PyPI - hindsight-api - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

hindsight-api 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

hindsight_api/__init__.py +1 -1
hindsight_api/api/http.py +3 -2
hindsight_api/config.py +114 -1
hindsight_api/daemon.py +4 -1
hindsight_api/engine/consolidation/consolidator.py +145 -49
hindsight_api/engine/consolidation/prompts.py +21 -13
hindsight_api/engine/cross_encoder.py +43 -109
hindsight_api/engine/embeddings.py +35 -99
hindsight_api/engine/memory_engine.py +11 -5
hindsight_api/engine/reflect/tools.py +1 -1
hindsight_api/engine/retain/fact_extraction.py +16 -0
hindsight_api/extensions/builtin/tenant.py +8 -5
hindsight_api/main.py +26 -2
{hindsight_api-0.4.0.dist-info → hindsight_api-0.4.2.dist-info}/METADATA +1 -1
{hindsight_api-0.4.0.dist-info → hindsight_api-0.4.2.dist-info}/RECORD +17 -17
{hindsight_api-0.4.0.dist-info → hindsight_api-0.4.2.dist-info}/WHEEL +0 -0
{hindsight_api-0.4.0.dist-info → hindsight_api-0.4.2.dist-info}/entry_points.txt +0 -0

hindsight_api/__init__.py CHANGED Viewed

@@ -46,4 +46,4 @@ __all__ = [
     "RemoteTEICrossEncoder",
     "LLMConfig",
 ]
-__version__ = "0.1.0"
+__version__ = "0.4.2"

hindsight_api/api/http.py CHANGED Viewed

@@ -1323,7 +1323,7 @@ class VersionResponse(BaseModel):
     model_config = ConfigDict(
         json_schema_extra={
             "example": {
-                "api_version": "1.0.0",
+                "api_version": "0.4.0",
                 "features": {
                     "observations": False,
                     "mcp": True,
@@ -1567,11 +1567,12 @@ def _register_routes(app: FastAPI):
         Returns version info and feature flags that can be used by clients
         to determine which capabilities are available.
         """
+        from hindsight_api import __version__
         from hindsight_api.config import get_config
         config = get_config()
         return VersionResponse(
-            api_version="1.0.0",
+            api_version=__version__,
             features=FeaturesInfo(
                 observations=config.enable_observations,
                 mcp=config.mcp_enabled,

hindsight_api/config.py CHANGED Viewed

@@ -20,11 +20,15 @@ logger = logging.getLogger(__name__)
 # Environment variable names
 ENV_DATABASE_URL = "HINDSIGHT_API_DATABASE_URL"
+ENV_DATABASE_SCHEMA = "HINDSIGHT_API_DATABASE_SCHEMA"
 ENV_LLM_PROVIDER = "HINDSIGHT_API_LLM_PROVIDER"
 ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
 ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
 ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
 ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
+ENV_LLM_MAX_RETRIES = "HINDSIGHT_API_LLM_MAX_RETRIES"
+ENV_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_LLM_INITIAL_BACKOFF"
+ENV_LLM_MAX_BACKOFF = "HINDSIGHT_API_LLM_MAX_BACKOFF"
 ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
 ENV_LLM_GROQ_SERVICE_TIER = "HINDSIGHT_API_LLM_GROQ_SERVICE_TIER"
@@ -33,19 +37,35 @@ ENV_RETAIN_LLM_PROVIDER = "HINDSIGHT_API_RETAIN_LLM_PROVIDER"
 ENV_RETAIN_LLM_API_KEY = "HINDSIGHT_API_RETAIN_LLM_API_KEY"
 ENV_RETAIN_LLM_MODEL = "HINDSIGHT_API_RETAIN_LLM_MODEL"
 ENV_RETAIN_LLM_BASE_URL = "HINDSIGHT_API_RETAIN_LLM_BASE_URL"
+ENV_RETAIN_LLM_MAX_CONCURRENT = "HINDSIGHT_API_RETAIN_LLM_MAX_CONCURRENT"
+ENV_RETAIN_LLM_MAX_RETRIES = "HINDSIGHT_API_RETAIN_LLM_MAX_RETRIES"
+ENV_RETAIN_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_RETAIN_LLM_INITIAL_BACKOFF"
+ENV_RETAIN_LLM_MAX_BACKOFF = "HINDSIGHT_API_RETAIN_LLM_MAX_BACKOFF"
+ENV_RETAIN_LLM_TIMEOUT = "HINDSIGHT_API_RETAIN_LLM_TIMEOUT"
 ENV_REFLECT_LLM_PROVIDER = "HINDSIGHT_API_REFLECT_LLM_PROVIDER"
 ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
 ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
 ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
+ENV_REFLECT_LLM_MAX_CONCURRENT = "HINDSIGHT_API_REFLECT_LLM_MAX_CONCURRENT"
+ENV_REFLECT_LLM_MAX_RETRIES = "HINDSIGHT_API_REFLECT_LLM_MAX_RETRIES"
+ENV_REFLECT_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_REFLECT_LLM_INITIAL_BACKOFF"
+ENV_REFLECT_LLM_MAX_BACKOFF = "HINDSIGHT_API_REFLECT_LLM_MAX_BACKOFF"
+ENV_REFLECT_LLM_TIMEOUT = "HINDSIGHT_API_REFLECT_LLM_TIMEOUT"
 ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
 ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
 ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
 ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
+ENV_CONSOLIDATION_LLM_MAX_CONCURRENT = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_CONCURRENT"
+ENV_CONSOLIDATION_LLM_MAX_RETRIES = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_RETRIES"
+ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_CONSOLIDATION_LLM_INITIAL_BACKOFF"
+ENV_CONSOLIDATION_LLM_MAX_BACKOFF = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_BACKOFF"
+ENV_CONSOLIDATION_LLM_TIMEOUT = "HINDSIGHT_API_CONSOLIDATION_LLM_TIMEOUT"
 ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
 ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
+ENV_EMBEDDINGS_LOCAL_FORCE_CPU = "HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"
 ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
 ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
 ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
@@ -65,6 +85,7 @@ ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
 ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
 ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
+ENV_RERANKER_LOCAL_FORCE_CPU = "HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"
 ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
 ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
 ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
@@ -98,6 +119,7 @@ ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
 # Observations settings (consolidated knowledge from facts)
 ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
 ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
+ENV_CONSOLIDATION_MAX_TOKENS = "HINDSIGHT_API_CONSOLIDATION_MAX_TOKENS"
 # Optimization flags
 ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
@@ -125,18 +147,24 @@ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
 # Default values
 DEFAULT_DATABASE_URL = "pg0"
+DEFAULT_DATABASE_SCHEMA = "public"
 DEFAULT_LLM_PROVIDER = "openai"
 DEFAULT_LLM_MODEL = "gpt-5-mini"
 DEFAULT_LLM_MAX_CONCURRENT = 32
+DEFAULT_LLM_MAX_RETRIES = 10  # Max retry attempts for LLM API calls
+DEFAULT_LLM_INITIAL_BACKOFF = 1.0  # Initial backoff in seconds for retry exponential backoff
+DEFAULT_LLM_MAX_BACKOFF = 60.0  # Max backoff cap in seconds for retry exponential backoff
 DEFAULT_LLM_TIMEOUT = 120.0  # seconds
 DEFAULT_EMBEDDINGS_PROVIDER = "local"
 DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
+DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False  # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
 DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
 DEFAULT_EMBEDDING_DIMENSION = 384
 DEFAULT_RERANKER_PROVIDER = "local"
 DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
+DEFAULT_RERANKER_LOCAL_FORCE_CPU = False  # Force CPU mode for local reranker (avoids MPS/XPC issues on macOS)
 DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4  # Limit concurrent CPU-bound reranking to prevent thrashing
 DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
 DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
@@ -177,6 +205,7 @@ DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False  # Run observation generation async (a
 # Observations defaults (consolidated knowledge from facts)
 DEFAULT_ENABLE_OBSERVATIONS = True  # Observations enabled by default
 DEFAULT_CONSOLIDATION_BATCH_SIZE = 50  # Memories to load per batch (internal memory optimization)
+DEFAULT_CONSOLIDATION_MAX_TOKENS = 1024  # Max tokens for recall when finding related observations
 # Database migrations
 DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
@@ -270,6 +299,7 @@ class HindsightConfig:
     # Database
     database_url: str
+    database_schema: str
     # LLM (default, used as fallback for per-operation config)
     llm_provider: str
@@ -277,6 +307,9 @@ class HindsightConfig:
     llm_model: str
     llm_base_url: str | None
     llm_max_concurrent: int
+    llm_max_retries: int
+    llm_initial_backoff: float
+    llm_max_backoff: float
     llm_timeout: float
     # Per-operation LLM configuration (None = use default LLM config)
@@ -284,20 +317,36 @@ class HindsightConfig:
     retain_llm_api_key: str | None
     retain_llm_model: str | None
     retain_llm_base_url: str | None
+    retain_llm_max_concurrent: int | None
+    retain_llm_max_retries: int | None
+    retain_llm_initial_backoff: float | None
+    retain_llm_max_backoff: float | None
+    retain_llm_timeout: float | None
     reflect_llm_provider: str | None
     reflect_llm_api_key: str | None
     reflect_llm_model: str | None
     reflect_llm_base_url: str | None
+    reflect_llm_max_concurrent: int | None
+    reflect_llm_max_retries: int | None
+    reflect_llm_initial_backoff: float | None
+    reflect_llm_max_backoff: float | None
+    reflect_llm_timeout: float | None
     consolidation_llm_provider: str | None
     consolidation_llm_api_key: str | None
     consolidation_llm_model: str | None
     consolidation_llm_base_url: str | None
+    consolidation_llm_max_concurrent: int | None
+    consolidation_llm_max_retries: int | None
+    consolidation_llm_initial_backoff: float | None
+    consolidation_llm_max_backoff: float | None
+    consolidation_llm_timeout: float | None
     # Embeddings
     embeddings_provider: str
     embeddings_local_model: str
+    embeddings_local_force_cpu: bool
     embeddings_tei_url: str | None
     embeddings_openai_base_url: str | None
     embeddings_cohere_base_url: str | None
@@ -305,6 +354,8 @@ class HindsightConfig:
     # Reranker
     reranker_provider: str
     reranker_local_model: str
+    reranker_local_force_cpu: bool
+    reranker_local_max_concurrent: int
     reranker_tei_url: str | None
     reranker_tei_batch_size: int
     reranker_tei_max_concurrent: int
@@ -336,6 +387,7 @@ class HindsightConfig:
     # Observations settings (consolidated knowledge from facts)
     enable_observations: bool
     consolidation_batch_size: int
+    consolidation_max_tokens: int
     # Optimization flags
     skip_llm_verification: bool
@@ -367,35 +419,93 @@ class HindsightConfig:
         return cls(
             # Database
             database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
+            database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
             # LLM
             llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
             llm_api_key=os.getenv(ENV_LLM_API_KEY),
             llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
             llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
             llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
+            llm_max_retries=int(os.getenv(ENV_LLM_MAX_RETRIES, str(DEFAULT_LLM_MAX_RETRIES))),
+            llm_initial_backoff=float(os.getenv(ENV_LLM_INITIAL_BACKOFF, str(DEFAULT_LLM_INITIAL_BACKOFF))),
+            llm_max_backoff=float(os.getenv(ENV_LLM_MAX_BACKOFF, str(DEFAULT_LLM_MAX_BACKOFF))),
             llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
             # Per-operation LLM config (None = use default)
             retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
             retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
             retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
             retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
+            retain_llm_max_concurrent=int(os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT))
+            if os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT)
+            else None,
+            retain_llm_max_retries=int(os.getenv(ENV_RETAIN_LLM_MAX_RETRIES))
+            if os.getenv(ENV_RETAIN_LLM_MAX_RETRIES)
+            else None,
+            retain_llm_initial_backoff=float(os.getenv(ENV_RETAIN_LLM_INITIAL_BACKOFF))
+            if os.getenv(ENV_RETAIN_LLM_INITIAL_BACKOFF)
+            else None,
+            retain_llm_max_backoff=float(os.getenv(ENV_RETAIN_LLM_MAX_BACKOFF))
+            if os.getenv(ENV_RETAIN_LLM_MAX_BACKOFF)
+            else None,
+            retain_llm_timeout=float(os.getenv(ENV_RETAIN_LLM_TIMEOUT)) if os.getenv(ENV_RETAIN_LLM_TIMEOUT) else None,
             reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
             reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
             reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
             reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
+            reflect_llm_max_concurrent=int(os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT))
+            if os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT)
+            else None,
+            reflect_llm_max_retries=int(os.getenv(ENV_REFLECT_LLM_MAX_RETRIES))
+            if os.getenv(ENV_REFLECT_LLM_MAX_RETRIES)
+            else None,
+            reflect_llm_initial_backoff=float(os.getenv(ENV_REFLECT_LLM_INITIAL_BACKOFF))
+            if os.getenv(ENV_REFLECT_LLM_INITIAL_BACKOFF)
+            else None,
+            reflect_llm_max_backoff=float(os.getenv(ENV_REFLECT_LLM_MAX_BACKOFF))
+            if os.getenv(ENV_REFLECT_LLM_MAX_BACKOFF)
+            else None,
+            reflect_llm_timeout=float(os.getenv(ENV_REFLECT_LLM_TIMEOUT))
+            if os.getenv(ENV_REFLECT_LLM_TIMEOUT)
+            else None,
             consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
             consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
             consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
             consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
+            consolidation_llm_max_concurrent=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT))
+            if os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT)
+            else None,
+            consolidation_llm_max_retries=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_RETRIES))
+            if os.getenv(ENV_CONSOLIDATION_LLM_MAX_RETRIES)
+            else None,
+            consolidation_llm_initial_backoff=float(os.getenv(ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF))
+            if os.getenv(ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF)
+            else None,
+            consolidation_llm_max_backoff=float(os.getenv(ENV_CONSOLIDATION_LLM_MAX_BACKOFF))
+            if os.getenv(ENV_CONSOLIDATION_LLM_MAX_BACKOFF)
+            else None,
+            consolidation_llm_timeout=float(os.getenv(ENV_CONSOLIDATION_LLM_TIMEOUT))
+            if os.getenv(ENV_CONSOLIDATION_LLM_TIMEOUT)
+            else None,
             # Embeddings
             embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
             embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
+            embeddings_local_force_cpu=os.getenv(
+                ENV_EMBEDDINGS_LOCAL_FORCE_CPU, str(DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU)
+            ).lower()
+            in ("true", "1"),
             embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
             embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
             embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
             # Reranker
             reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
             reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
+            reranker_local_force_cpu=os.getenv(
+                ENV_RERANKER_LOCAL_FORCE_CPU, str(DEFAULT_RERANKER_LOCAL_FORCE_CPU)
+            ).lower()
+            in ("true", "1"),
+            reranker_local_max_concurrent=int(
+                os.getenv(ENV_RERANKER_LOCAL_MAX_CONCURRENT, str(DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT))
+            ),
             reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
             reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
             reranker_tei_max_concurrent=int(
@@ -444,6 +554,9 @@ class HindsightConfig:
             consolidation_batch_size=int(
                 os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
             ),
+            consolidation_max_tokens=int(
+                os.getenv(ENV_CONSOLIDATION_MAX_TOKENS, str(DEFAULT_CONSOLIDATION_MAX_TOKENS))
+            ),
             # Database migrations
             run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
             # Database connection pool
@@ -515,7 +628,7 @@ class HindsightConfig:
     def log_config(self) -> None:
         """Log the current configuration (without sensitive values)."""
-        logger.info(f"Database: {self.database_url}")
+        logger.info(f"Database: {self.database_url} (schema: {self.database_schema})")
         logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
         if self.retain_llm_provider or self.retain_llm_model:
             retain_provider = self.retain_llm_provider or self.llm_provider

hindsight_api/daemon.py CHANGED Viewed

@@ -52,7 +52,10 @@ class IdleTimeoutMiddleware:
                 logger.info(f"Idle timeout reached ({self.idle_timeout}s), shutting down daemon")
                 # Give a moment for any in-flight requests
                 await asyncio.sleep(1)
-                os._exit(0)
+                # Send SIGTERM to ourselves to trigger graceful shutdown
+                import signal
+                os.kill(os.getpid(), signal.SIGTERM)
 class DaemonLock:

hindsight_api/engine/consolidation/consolidator.py CHANGED Viewed

@@ -144,10 +144,14 @@ async def run_consolidation_job(
     }
     batch_num = 0
+    last_progress_timings = {}  # Track timings at last progress log
     while True:
         batch_num += 1
         batch_start = time.time()
+        # Snapshot timings at batch start for per-batch calculation
+        batch_start_timings = perf.timings.copy()
         # Fetch next batch of unconsolidated memories
         async with pool.acquire() as conn:
             t0 = time.time()
@@ -217,19 +221,44 @@ async def run_consolidation_job(
             elif action == "skipped":
                 stats["skipped"] += 1
-            # Log progress periodically
+            # Log progress periodically with timing breakdown
             if stats["memories_processed"] % 10 == 0:
+                # Calculate timing deltas since last progress log
+                timing_parts = []
+                for key in ["recall", "llm", "embedding", "db_write"]:
+                    if key in perf.timings:
+                        delta = perf.timings[key] - last_progress_timings.get(key, 0)
+                        timing_parts.append(f"{key}={delta:.2f}s")
+                timing_str = f" | {', '.join(timing_parts)}" if timing_parts else ""
                 logger.info(
                     f"[CONSOLIDATION] bank={bank_id} progress: "
-                    f"{stats['memories_processed']}/{total_count} memories processed"
+                    f"{stats['memories_processed']}/{total_count} memories processed{timing_str}"
                 )
+                # Update last progress snapshot
+                last_progress_timings = perf.timings.copy()
         batch_time = time.time() - batch_start
         perf.log(
             f"[2] Batch {batch_num}: {len(memories)} memories in {batch_time:.3f}s "
             f"(avg {batch_time / len(memories):.3f}s/memory)"
         )
+        # Log timing breakdown after each batch (delta from batch start)
+        timing_parts = []
+        for key in ["recall", "llm", "embedding", "db_write"]:
+            if key in perf.timings:
+                delta = perf.timings[key] - batch_start_timings.get(key, 0)
+                timing_parts.append(f"{key}={delta:.3f}s")
+        if timing_parts:
+            avg_per_memory = batch_time / len(memories) if memories else 0
+            logger.info(
+                f"[CONSOLIDATION] bank={bank_id} batch {batch_num}/{len(memories)} memories: "
+                f"{', '.join(timing_parts)} | avg={avg_per_memory:.3f}s/memory"
+            )
     # Build summary
     perf.log(
         f"[3] Results: {stats['memories_processed']} memories -> "
@@ -639,28 +668,27 @@ async def _find_related_observations(
     request_context: "RequestContext",
 ) -> list[dict[str, Any]]:
     """
-    Find observations related to the given query using the full recall system.
+    Find observations related to the given query using optimized recall.
     IMPORTANT: We do NOT filter by tags here. Consolidation needs to see ALL
     potentially related observations regardless of scope, so the LLM can
     decide on tag routing (same scope update vs cross-scope create).
-    This leverages:
-    - Semantic search (embedding similarity)
-    - BM25 text search (keyword matching)
-    - Entity-based retrieval (shared entities)
-    - Graph traversal (connected via entity links)
+    Uses max_tokens to naturally limit observations (no artificial count limit).
+    Includes source memories with dates for LLM context.
     Returns:
-        List of related observations with their tags for LLM tag routing
+        List of related observations with their tags, source memories, and dates
     """
-    # Use recall to find related observations
-    # NO tags parameter - we want ALL observations regardless of scope
-    # Use low max_tokens since we only need observations, not memories
+    # Use recall to find related observations with token budget
+    # max_tokens naturally limits how many observations are returned
+    from ...config import get_config
+    config = get_config()
     recall_result = await memory_engine.recall_async(
         bank_id=bank_id,
         query=query,
-        max_tokens=5000,  # Token budget for observations
+        max_tokens=config.consolidation_max_tokens,  # Token budget for observations (configurable)
         fact_type=["observation"],  # Only retrieve observations
         request_context=request_context,
         _quiet=True,  # Suppress logging
@@ -668,43 +696,82 @@ async def _find_related_observations(
     )
     # If no observations returned, return empty list
-    # When fact_type=["observation"], results come back in `results` field
     if not recall_result.results:
         return []
-    # Trust recall's relevance filtering - fetch full data for each observation
+    # Batch fetch all observations in a single query (no artificial limit)
+    observation_ids = [uuid.UUID(obs.id) for obs in recall_result.results]
+    rows = await conn.fetch(
+        f"""
+        SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at,
+               occurred_start, occurred_end, mentioned_at
+        FROM {fq_table("memory_units")}
+        WHERE id = ANY($1) AND bank_id = $2 AND fact_type = 'observation'
+        """,
+        observation_ids,
+        bank_id,
+    )
+    # Build results list preserving recall order
+    id_to_row = {row["id"]: row for row in rows}
     results = []
-    for obs in recall_result.results:
-        # Fetch full observation data from DB to get history, source_memory_ids, tags
-        row = await conn.fetchrow(
-            f"""
-            SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
-            FROM {fq_table("memory_units")}
-            WHERE id = $1 AND bank_id = $2 AND fact_type = 'observation'
-            """,
-            uuid.UUID(obs.id),
-            bank_id,
-        )
-        if row:
-            history = row["history"]
-            if isinstance(history, str):
-                history = json.loads(history)
-            elif history is None:
-                history = []
-            results.append(
-                {
-                    "id": row["id"],
-                    "text": row["text"],
-                    "proof_count": row["proof_count"] or 1,
-                    "history": history,
-                    "tags": row["tags"] or [],  # Include tags for LLM tag routing
-                    "source_memory_ids": row["source_memory_ids"] or [],
-                    "similarity": 1.0,  # Retrieved via recall so assumed relevant
-                }
+    for obs in recall_result.results:
+        obs_id = uuid.UUID(obs.id)
+        if obs_id not in id_to_row:
+            continue
+        row = id_to_row[obs_id]
+        history = row["history"]
+        if isinstance(history, str):
+            history = json.loads(history)
+        elif history is None:
+            history = []
+        # Fetch source memories to include their text and dates
+        source_memory_ids = row["source_memory_ids"] or []
+        source_memories = []
+        if source_memory_ids:
+            source_rows = await conn.fetch(
+                f"""
+                SELECT text, occurred_start, occurred_end, mentioned_at, event_date
+                FROM {fq_table("memory_units")}
+                WHERE id = ANY($1) AND bank_id = $2
+                ORDER BY created_at ASC
+                LIMIT 5
+                """,
+                source_memory_ids[:5],  # Limit to first 5 source memories for token efficiency
+                bank_id,
             )
+            for src_row in source_rows:
+                source_memories.append(
+                    {
+                        "text": src_row["text"],
+                        "occurred_start": src_row["occurred_start"],
+                        "occurred_end": src_row["occurred_end"],
+                        "mentioned_at": src_row["mentioned_at"],
+                        "event_date": src_row["event_date"],
+                    }
+                )
+        results.append(
+            {
+                "id": row["id"],
+                "text": row["text"],
+                "proof_count": row["proof_count"] or 1,
+                "tags": row["tags"] or [],
+                "source_memories": source_memories,
+                "occurred_start": row["occurred_start"],
+                "occurred_end": row["occurred_end"],
+                "mentioned_at": row["mentioned_at"],
+                "created_at": row["created_at"],
+                "updated_at": row["updated_at"],
+            }
+        )
     return results
@@ -732,14 +799,43 @@ async def _consolidate_with_llm(
         - {"action": "create", "text": "...", "reason": "..."}
         - [] if fact is purely ephemeral (no durable knowledge)
     """
-    # Format observations WITH their tags (or "None" if empty)
+    # Format observations as JSON with source memories and dates
     if observations:
-        observations_text = "\n".join(
-            f'- ID: {obs["id"]}, Tags: {json.dumps(obs["tags"])}, Text: "{obs["text"]}" (proof_count: {obs["proof_count"]})'
-            for obs in observations
-        )
+        obs_list = []
+        for obs in observations:
+            obs_data = {
+                "id": str(obs["id"]),
+                "text": obs["text"],
+                "proof_count": obs["proof_count"],
+                "tags": obs["tags"],
+                "created_at": obs["created_at"].isoformat() if obs.get("created_at") else None,
+                "updated_at": obs["updated_at"].isoformat() if obs.get("updated_at") else None,
+            }
+            # Include temporal info if available
+            if obs.get("occurred_start"):
+                obs_data["occurred_start"] = obs["occurred_start"].isoformat()
+            if obs.get("occurred_end"):
+                obs_data["occurred_end"] = obs["occurred_end"].isoformat()
+            if obs.get("mentioned_at"):
+                obs_data["mentioned_at"] = obs["mentioned_at"].isoformat()
+            # Include source memories (up to 3 for brevity)
+            if obs.get("source_memories"):
+                obs_data["source_memories"] = [
+                    {
+                        "text": sm["text"],
+                        "event_date": sm["event_date"].isoformat() if sm.get("event_date") else None,
+                        "occurred_start": sm["occurred_start"].isoformat() if sm.get("occurred_start") else None,
+                    }
+                    for sm in obs["source_memories"][:3]  # Limit to 3 for token efficiency
+                ]
+            obs_list.append(obs_data)
+        observations_text = json.dumps(obs_list, indent=2)
     else:
-        observations_text = "None (this is a new topic - create if fact contains durable knowledge)"
+        observations_text = "[]"
     # Only include mission section if mission is set and not the default
     mission_section = ""

hindsight_api/engine/consolidation/prompts.py CHANGED Viewed

@@ -47,23 +47,31 @@ CONSOLIDATION_USER_PROMPT = """Analyze this new fact and consolidate into knowle
 {mission_section}
 NEW FACT: {fact_text}
-EXISTING OBSERVATIONS:
+EXISTING OBSERVATIONS (JSON array with source memories and dates):
 {observations_text}
-Instructions:
-1. First, extract the DURABLE KNOWLEDGE from the fact (not ephemeral state like "user is at X")
-2. Then compare with existing observations:
-   - If an observation covers the same topic: UPDATE it with the new knowledge
-   - If no observation covers the topic: CREATE a new one
+Each observation includes:
+- id: unique identifier for updating
+- text: the observation content
+- proof_count: number of supporting memories
+- tags: visibility scope (handled automatically)
+- created_at/updated_at: when observation was created/modified
+- occurred_start/occurred_end: temporal range of source facts
+- source_memories: array of supporting facts with their text and dates
-Output JSON array of actions (ALWAYS an array, even for single action):
+Instructions:
+1. Extract DURABLE KNOWLEDGE from the new fact (not ephemeral state)
+2. Review source_memories in existing observations to understand evidence
+3. Check dates to detect contradictions or updates
+4. Compare with observations:
+   - Same topic → UPDATE with learning_id
+   - New topic → CREATE new observation
+   - Purely ephemeral → return []
+Output JSON array of actions:
 [
-  {{"action": "update", "learning_id": "uuid", "text": "updated durable knowledge", "reason": "..."}},
+  {{"action": "update", "learning_id": "uuid-from-observations", "text": "updated knowledge", "reason": "..."}},
   {{"action": "create", "text": "new durable knowledge", "reason": "..."}}
 ]
-If NO consolidation is needed (fact is purely ephemeral with no durable knowledge):
-[]
-If no observations exist and fact contains durable knowledge:
-[{{"action": "create", "text": "durable knowledge text", "reason": "new topic"}}]"""
+Return [] if fact contains no durable knowledge."""

hindsight-api 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

hindsight-api 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl