PyPI - hindsight-api - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

hindsight-api 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

hindsight_api/__init__.py +1 -1
hindsight_api/api/http.py +3 -2
hindsight_api/config.py +114 -1
hindsight_api/daemon.py +4 -1
hindsight_api/engine/consolidation/consolidator.py +145 -49
hindsight_api/engine/consolidation/prompts.py +21 -13
hindsight_api/engine/cross_encoder.py +43 -109
hindsight_api/engine/embeddings.py +35 -99
hindsight_api/engine/memory_engine.py +11 -5
hindsight_api/engine/reflect/tools.py +1 -1
hindsight_api/engine/retain/fact_extraction.py +16 -0
hindsight_api/extensions/builtin/tenant.py +8 -5
hindsight_api/main.py +26 -2
{hindsight_api-0.4.0.dist-info → hindsight_api-0.4.2.dist-info}/METADATA +1 -1
{hindsight_api-0.4.0.dist-info → hindsight_api-0.4.2.dist-info}/RECORD +17 -17
{hindsight_api-0.4.0.dist-info → hindsight_api-0.4.2.dist-info}/WHEEL +0 -0
{hindsight_api-0.4.0.dist-info → hindsight_api-0.4.2.dist-info}/entry_points.txt +0 -0

hindsight_api/engine/cross_encoder.py CHANGED Viewed

@@ -20,6 +20,7 @@ from ..config import (
     DEFAULT_RERANKER_FLASHRANK_CACHE_DIR,
     DEFAULT_RERANKER_FLASHRANK_MODEL,
     DEFAULT_RERANKER_LITELLM_MODEL,
+    DEFAULT_RERANKER_LOCAL_FORCE_CPU,
     DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT,
     DEFAULT_RERANKER_LOCAL_MODEL,
     DEFAULT_RERANKER_PROVIDER,
@@ -33,6 +34,7 @@ from ..config import (
     ENV_RERANKER_FLASHRANK_CACHE_DIR,
     ENV_RERANKER_FLASHRANK_MODEL,
     ENV_RERANKER_LITELLM_MODEL,
+    ENV_RERANKER_LOCAL_FORCE_CPU,
     ENV_RERANKER_LOCAL_MAX_CONCURRENT,
     ENV_RERANKER_LOCAL_MODEL,
     ENV_RERANKER_PROVIDER,
@@ -99,7 +101,7 @@ class LocalSTCrossEncoder(CrossEncoderModel):
     _executor: ThreadPoolExecutor | None = None
     _max_concurrent: int = 4  # Limit concurrent CPU-bound reranking calls
-    def __init__(self, model_name: str | None = None, max_concurrent: int = 4):
+    def __init__(self, model_name: str | None = None, max_concurrent: int = 4, force_cpu: bool = False):
         """
         Initialize local SentenceTransformers cross-encoder.
@@ -108,8 +110,11 @@ class LocalSTCrossEncoder(CrossEncoderModel):
                        Default: cross-encoder/ms-marco-MiniLM-L-6-v2
             max_concurrent: Maximum concurrent reranking calls (default: 2).
                            Higher values may cause CPU thrashing under load.
+            force_cpu: Force CPU mode (avoids MPS/XPC issues on macOS in daemon mode).
+                      Default: False
         """
         self.model_name = model_name or DEFAULT_RERANKER_LOCAL_MODEL
+        self.force_cpu = force_cpu
         self._model = None
         LocalSTCrossEncoder._max_concurrent = max_concurrent
@@ -139,13 +144,23 @@ class LocalSTCrossEncoder(CrossEncoderModel):
         # after loading, which conflicts with accelerate's device_map handling.
         import torch
-        # Check for GPU (CUDA) or Apple Silicon (MPS)
-        has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
-        if has_gpu:
-            device = None  # Let sentence-transformers auto-detect GPU/MPS
-        else:
+        # Force CPU mode if configured (used in daemon mode to avoid MPS/XPC issues on macOS)
+        if self.force_cpu:
             device = "cpu"
+            logger.info("Reranker: forcing CPU mode (HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU=1)")
+        else:
+            # Check for GPU (CUDA) or Apple Silicon (MPS)
+            # Wrap in try-except to gracefully handle any device detection issues
+            # (e.g., in CI environments or when PyTorch is built without GPU support)
+            device = "cpu"  # Default to CPU
+            try:
+                has_gpu = torch.cuda.is_available() or (
+                    hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
+                )
+                if has_gpu:
+                    device = None  # Let sentence-transformers auto-detect GPU/MPS
+            except Exception as e:
+                logger.warning(f"Failed to detect GPU/MPS, falling back to CPU: {e}")
         self._model = CrossEncoder(
             self.model_name,
@@ -163,101 +178,16 @@ class LocalSTCrossEncoder(CrossEncoderModel):
         else:
             logger.info("Reranker: local provider initialized (using existing executor)")
-    def _is_xpc_error(self, error: Exception) -> bool:
-        """
-        Check if an error is an XPC connection error (macOS daemon issue).
-        On macOS, long-running daemons can lose XPC connections to system services
-        when the process is idle for extended periods.
-        """
-        error_str = str(error).lower()
-        return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
-    def _reinitialize_model_sync(self) -> None:
-        """
-        Clear and reinitialize the cross-encoder model synchronously.
-        This is used to recover from XPC errors on macOS where the
-        PyTorch/MPS backend loses its connection to system services.
-        """
-        logger.warning(f"Reinitializing reranker model {self.model_name} due to backend error")
-        # Clear existing model
-        self._model = None
-        # Force garbage collection to free resources
-        import gc
-        import torch
-        gc.collect()
-        # If using CUDA/MPS, clear the cache
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            try:
-                torch.mps.empty_cache()
-            except AttributeError:
-                pass  # Method might not exist in all PyTorch versions
-        # Reinitialize the model
-        try:
-            from sentence_transformers import CrossEncoder
-        except ImportError:
-            raise ImportError(
-                "sentence-transformers is required for LocalSTCrossEncoder. "
-                "Install it with: pip install sentence-transformers"
-            )
-        # Determine device based on hardware availability
-        has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
-        if has_gpu:
-            device = None  # Let sentence-transformers auto-detect GPU/MPS
-        else:
-            device = "cpu"
-        self._model = CrossEncoder(
-            self.model_name,
-            device=device,
-            model_kwargs={"low_cpu_mem_usage": False},
-        )
-        logger.info("Reranker: local provider reinitialized successfully")
-    def _predict_with_recovery(self, pairs: list[tuple[str, str]]) -> list[float]:
-        """
-        Predict with automatic recovery from XPC errors.
-        This runs synchronously in the thread pool.
-        """
-        max_retries = 1
-        for attempt in range(max_retries + 1):
-            try:
-                scores = self._model.predict(pairs, show_progress_bar=False)
-                return scores.tolist() if hasattr(scores, "tolist") else list(scores)
-            except Exception as e:
-                # Check if this is an XPC error (macOS daemon issue)
-                if self._is_xpc_error(e) and attempt < max_retries:
-                    logger.warning(f"XPC error detected in reranker (attempt {attempt + 1}): {e}")
-                    try:
-                        self._reinitialize_model_sync()
-                        logger.info("Reranker reinitialized successfully, retrying prediction")
-                        continue
-                    except Exception as reinit_error:
-                        logger.error(f"Failed to reinitialize reranker: {reinit_error}")
-                        raise Exception(f"Failed to recover from XPC error: {str(e)}")
-                else:
-                    # Not an XPC error or out of retries
-                    raise
+    def _predict_sync(self, pairs: list[tuple[str, str]]) -> list[float]:
+        """Synchronous prediction wrapper for thread pool execution."""
+        scores = self._model.predict(pairs, show_progress_bar=False)
+        return scores.tolist() if hasattr(scores, "tolist") else list(scores)
     async def predict(self, pairs: list[tuple[str, str]]) -> list[float]:
         """
         Score query-document pairs for relevance.
         Uses a dedicated thread pool with limited workers to prevent CPU thrashing.
-        Automatically recovers from XPC errors on macOS by reinitializing the model.
         Args:
             pairs: List of (query, document) tuples to score
@@ -272,7 +202,7 @@ class LocalSTCrossEncoder(CrossEncoderModel):
         loop = asyncio.get_event_loop()
         return await loop.run_in_executor(
             LocalSTCrossEncoder._executor,
-            self._predict_with_recovery,
+            self._predict_sync,
             pairs,
         )
@@ -873,29 +803,33 @@ class LiteLLMCrossEncoder(CrossEncoderModel):
 def create_cross_encoder_from_env() -> CrossEncoderModel:
     """
-    Create a CrossEncoderModel instance based on environment variables.
+    Create a CrossEncoderModel instance based on configuration.
-    See hindsight_api.config for environment variable names and defaults.
+    Reads configuration via get_config() to ensure consistency across the codebase.
     Returns:
         Configured CrossEncoderModel instance
     """
-    provider = os.environ.get(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER).lower()
+    from ..config import get_config
+    config = get_config()
+    provider = config.reranker_provider.lower()
     if provider == "tei":
-        url = os.environ.get(ENV_RERANKER_TEI_URL)
+        url = config.reranker_tei_url
         if not url:
             raise ValueError(f"{ENV_RERANKER_TEI_URL} is required when {ENV_RERANKER_PROVIDER} is 'tei'")
-        batch_size = int(os.environ.get(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE)))
-        max_concurrent = int(os.environ.get(ENV_RERANKER_TEI_MAX_CONCURRENT, str(DEFAULT_RERANKER_TEI_MAX_CONCURRENT)))
-        return RemoteTEICrossEncoder(base_url=url, batch_size=batch_size, max_concurrent=max_concurrent)
+        return RemoteTEICrossEncoder(
+            base_url=url,
+            batch_size=config.reranker_tei_batch_size,
+            max_concurrent=config.reranker_tei_max_concurrent,
+        )
     elif provider == "local":
-        model = os.environ.get(ENV_RERANKER_LOCAL_MODEL)
-        model_name = model or DEFAULT_RERANKER_LOCAL_MODEL
-        max_concurrent = int(
-            os.environ.get(ENV_RERANKER_LOCAL_MAX_CONCURRENT, str(DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT))
+        return LocalSTCrossEncoder(
+            model_name=config.reranker_local_model,
+            max_concurrent=config.reranker_local_max_concurrent,
+            force_cpu=config.reranker_local_force_cpu,
         )
-        return LocalSTCrossEncoder(model_name=model_name, max_concurrent=max_concurrent)
     elif provider == "cohere":
         api_key = os.environ.get(ENV_COHERE_API_KEY)
         if not api_key:

hindsight_api/engine/embeddings.py CHANGED Viewed

@@ -18,6 +18,7 @@ import httpx
 from ..config import (
     DEFAULT_EMBEDDINGS_COHERE_MODEL,
     DEFAULT_EMBEDDINGS_LITELLM_MODEL,
+    DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU,
     DEFAULT_EMBEDDINGS_LOCAL_MODEL,
     DEFAULT_EMBEDDINGS_OPENAI_MODEL,
     DEFAULT_EMBEDDINGS_PROVIDER,
@@ -26,6 +27,7 @@ from ..config import (
     ENV_EMBEDDINGS_COHERE_BASE_URL,
     ENV_EMBEDDINGS_COHERE_MODEL,
     ENV_EMBEDDINGS_LITELLM_MODEL,
+    ENV_EMBEDDINGS_LOCAL_FORCE_CPU,
     ENV_EMBEDDINGS_LOCAL_MODEL,
     ENV_EMBEDDINGS_OPENAI_API_KEY,
     ENV_EMBEDDINGS_OPENAI_BASE_URL,
@@ -92,15 +94,18 @@ class LocalSTEmbeddings(Embeddings):
     The embedding dimension is auto-detected from the model.
     """
-    def __init__(self, model_name: str | None = None):
+    def __init__(self, model_name: str | None = None, force_cpu: bool = False):
         """
         Initialize local SentenceTransformers embeddings.
         Args:
             model_name: Name of the SentenceTransformer model to use.
                        Default: BAAI/bge-small-en-v1.5
+            force_cpu: Force CPU mode (avoids MPS/XPC issues on macOS in daemon mode).
+                      Default: False
         """
         self.model_name = model_name or DEFAULT_EMBEDDINGS_LOCAL_MODEL
+        self.force_cpu = force_cpu
         self._model = None
         self._dimension: int | None = None
@@ -134,13 +139,23 @@ class LocalSTEmbeddings(Embeddings):
         # which can cause issues when accelerate is installed but no GPU is available.
         import torch
-        # Check for GPU (CUDA) or Apple Silicon (MPS)
-        has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
-        if has_gpu:
-            device = None  # Let sentence-transformers auto-detect GPU/MPS
-        else:
+        # Force CPU mode if configured (used in daemon mode to avoid MPS/XPC issues on macOS)
+        if self.force_cpu:
             device = "cpu"
+            logger.info("Embeddings: forcing CPU mode")
+        else:
+            # Check for GPU (CUDA) or Apple Silicon (MPS)
+            # Wrap in try-except to gracefully handle any device detection issues
+            # (e.g., in CI environments or when PyTorch is built without GPU support)
+            device = "cpu"  # Default to CPU
+            try:
+                has_gpu = torch.cuda.is_available() or (
+                    hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
+                )
+                if has_gpu:
+                    device = None  # Let sentence-transformers auto-detect GPU/MPS
+            except Exception as e:
+                logger.warning(f"Failed to detect GPU/MPS, falling back to CPU: {e}")
         self._model = SentenceTransformer(
             self.model_name,
@@ -151,75 +166,10 @@ class LocalSTEmbeddings(Embeddings):
         self._dimension = self._model.get_sentence_embedding_dimension()
         logger.info(f"Embeddings: local provider initialized (dim: {self._dimension})")
-    def _is_xpc_error(self, error: Exception) -> bool:
-        """
-        Check if an error is an XPC connection error (macOS daemon issue).
-        On macOS, long-running daemons can lose XPC connections to system services
-        when the process is idle for extended periods.
-        """
-        error_str = str(error).lower()
-        return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
-    def _reinitialize_model_sync(self) -> None:
-        """
-        Clear and reinitialize the embedding model synchronously.
-        This is used to recover from XPC errors on macOS where the
-        PyTorch/MPS backend loses its connection to system services.
-        """
-        logger.warning(f"Reinitializing embedding model {self.model_name} due to backend error")
-        # Clear existing model
-        self._model = None
-        # Force garbage collection to free resources
-        import gc
-        import torch
-        gc.collect()
-        # If using CUDA/MPS, clear the cache
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            try:
-                torch.mps.empty_cache()
-            except AttributeError:
-                pass  # Method might not exist in all PyTorch versions
-        # Reinitialize the model (inline version of initialize() but synchronous)
-        try:
-            from sentence_transformers import SentenceTransformer
-        except ImportError:
-            raise ImportError(
-                "sentence-transformers is required for LocalSTEmbeddings. "
-                "Install it with: pip install sentence-transformers"
-            )
-        # Determine device based on hardware availability
-        has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
-        if has_gpu:
-            device = None  # Let sentence-transformers auto-detect GPU/MPS
-        else:
-            device = "cpu"
-        self._model = SentenceTransformer(
-            self.model_name,
-            device=device,
-            model_kwargs={"low_cpu_mem_usage": False},
-        )
-        logger.info("Embeddings: local provider reinitialized successfully")
     def encode(self, texts: list[str]) -> list[list[float]]:
         """
         Generate embeddings for a list of texts.
-        Automatically recovers from XPC errors on macOS by reinitializing the model.
         Args:
             texts: List of text strings to encode
@@ -229,26 +179,8 @@ class LocalSTEmbeddings(Embeddings):
         if self._model is None:
             raise RuntimeError("Embeddings not initialized. Call initialize() first.")
-        # Try encoding with automatic recovery from XPC errors
-        max_retries = 1
-        for attempt in range(max_retries + 1):
-            try:
-                embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
-                return [emb.tolist() for emb in embeddings]
-            except Exception as e:
-                # Check if this is an XPC error (macOS daemon issue)
-                if self._is_xpc_error(e) and attempt < max_retries:
-                    logger.warning(f"XPC error detected in embedding generation (attempt {attempt + 1}): {e}")
-                    try:
-                        self._reinitialize_model_sync()
-                        logger.info("Model reinitialized successfully, retrying embedding generation")
-                        continue
-                    except Exception as reinit_error:
-                        logger.error(f"Failed to reinitialize model: {reinit_error}")
-                        raise Exception(f"Failed to recover from XPC error: {str(e)}")
-                else:
-                    # Not an XPC error or out of retries
-                    raise
+        embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
+        return [emb.tolist() for emb in embeddings]
 class RemoteTEIEmbeddings(Embeddings):
@@ -770,24 +702,28 @@ class LiteLLMEmbeddings(Embeddings):
 def create_embeddings_from_env() -> Embeddings:
     """
-    Create an Embeddings instance based on environment variables.
+    Create an Embeddings instance based on configuration.
-    See hindsight_api.config for environment variable names and defaults.
+    Reads configuration via get_config() to ensure consistency across the codebase.
     Returns:
         Configured Embeddings instance
     """
-    provider = os.environ.get(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER).lower()
+    from ..config import get_config
+    config = get_config()
+    provider = config.embeddings_provider.lower()
     if provider == "tei":
-        url = os.environ.get(ENV_EMBEDDINGS_TEI_URL)
+        url = config.embeddings_tei_url
         if not url:
             raise ValueError(f"{ENV_EMBEDDINGS_TEI_URL} is required when {ENV_EMBEDDINGS_PROVIDER} is 'tei'")
         return RemoteTEIEmbeddings(base_url=url)
     elif provider == "local":
-        model = os.environ.get(ENV_EMBEDDINGS_LOCAL_MODEL)
-        model_name = model or DEFAULT_EMBEDDINGS_LOCAL_MODEL
-        return LocalSTEmbeddings(model_name=model_name)
+        return LocalSTEmbeddings(
+            model_name=config.embeddings_local_model,
+            force_cpu=config.embeddings_local_force_cpu,
+        )
     elif provider == "openai":
         # Use dedicated embeddings API key, or fall back to LLM API key
         api_key = os.environ.get(ENV_EMBEDDINGS_OPENAI_API_KEY) or os.environ.get(ENV_LLM_API_KEY)

hindsight_api/engine/memory_engine.py CHANGED Viewed

@@ -23,12 +23,17 @@ from ..metrics import get_metrics_collector
 from .db_budget import budgeted_operation
 # Context variable for current schema (async-safe, per-task isolation)
-_current_schema: contextvars.ContextVar[str] = contextvars.ContextVar("current_schema", default="public")
+# Note: default is None, actual default comes from config via get_current_schema()
+_current_schema: contextvars.ContextVar[str | None] = contextvars.ContextVar("current_schema", default=None)
 def get_current_schema() -> str:
-    """Get the current schema from context (default: 'public')."""
-    return _current_schema.get()
+    """Get the current schema from context (falls back to config default)."""
+    schema = _current_schema.get()
+    if schema is None:
+        # Fall back to configured default schema
+        return get_config().database_schema
+    return schema
 def fq_table(table_name: str) -> str:
@@ -881,11 +886,12 @@ class MemoryEngine(MemoryEngineInterface):
             if not self.db_url:
                 raise ValueError("Database URL is required for migrations")
             logger.info("Running database migrations...")
-            run_migrations(self.db_url)
+            # Use configured database schema for migrations (defaults to "public")
+            run_migrations(self.db_url, schema=get_config().database_schema)
             # Ensure embedding column dimension matches the model's dimension
             # This is done after migrations and after embeddings.initialize()
-            ensure_embedding_dimension(self.db_url, self.embeddings.dimension)
+            ensure_embedding_dimension(self.db_url, self.embeddings.dimension, schema=get_config().database_schema)
         logger.info(f"Connecting to PostgreSQL at {self.db_url}")

hindsight_api/engine/reflect/tools.py CHANGED Viewed

@@ -69,7 +69,7 @@ async def tool_search_mental_models(
         next_param += 1
     if exclude_ids:
-        filters += f" AND id != ALL(${next_param}::uuid[])"
+        filters += f" AND id != ALL(${next_param}::text[])"
         params.append(exclude_ids)
         next_param += 1

hindsight_api/engine/retain/fact_extraction.py CHANGED Viewed

@@ -782,12 +782,28 @@ Text:
     usage = TokenUsage()  # Track cumulative usage across retries
     for attempt in range(max_retries):
         try:
+            # Use retain-specific overrides if set, otherwise fall back to global LLM config
+            max_retries = (
+                config.retain_llm_max_retries if config.retain_llm_max_retries is not None else config.llm_max_retries
+            )
+            initial_backoff = (
+                config.retain_llm_initial_backoff
+                if config.retain_llm_initial_backoff is not None
+                else config.llm_initial_backoff
+            )
+            max_backoff = (
+                config.retain_llm_max_backoff if config.retain_llm_max_backoff is not None else config.llm_max_backoff
+            )
             extraction_response_json, call_usage = await llm_config.call(
                 messages=[{"role": "system", "content": prompt}, {"role": "user", "content": user_message}],
                 response_format=response_schema,
                 scope="memory_extract_facts",
                 temperature=0.1,
                 max_completion_tokens=config.retain_max_completion_tokens,
+                max_retries=max_retries,
+                initial_backoff=initial_backoff,
+                max_backoff=max_backoff,
                 skip_validation=True,  # Get raw JSON, we'll validate leniently
                 return_usage=True,
             )

hindsight_api/extensions/builtin/tenant.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Built-in tenant extension implementations."""
+from hindsight_api.config import get_config
 from hindsight_api.extensions.tenant import AuthenticationError, Tenant, TenantContext, TenantExtension
 from hindsight_api.models import RequestContext
@@ -10,11 +11,13 @@ class ApiKeyTenantExtension(TenantExtension):
     This is a simple implementation that:
     1. Validates the API key matches HINDSIGHT_API_TENANT_API_KEY
-    2. Returns 'public' as the schema for all authenticated requests
+    2. Returns the configured schema (HINDSIGHT_API_DATABASE_SCHEMA, default 'public')
+       for all authenticated requests
     Configuration:
         HINDSIGHT_API_TENANT_EXTENSION=hindsight_api.extensions.builtin.tenant:ApiKeyTenantExtension
         HINDSIGHT_API_TENANT_API_KEY=your-secret-key
+        HINDSIGHT_API_DATABASE_SCHEMA=your-schema (optional, defaults to 'public')
     For multi-tenant setups with separate schemas per tenant, implement a custom
     TenantExtension that looks up the schema based on the API key or token claims.
@@ -27,11 +30,11 @@ class ApiKeyTenantExtension(TenantExtension):
             raise ValueError("HINDSIGHT_API_TENANT_API_KEY is required when using ApiKeyTenantExtension")
     async def authenticate(self, context: RequestContext) -> TenantContext:
-        """Validate API key and return public schema context."""
+        """Validate API key and return configured schema context."""
         if context.api_key != self.expected_api_key:
             raise AuthenticationError("Invalid API key")
-        return TenantContext(schema_name="public")
+        return TenantContext(schema_name=get_config().database_schema)
     async def list_tenants(self) -> list[Tenant]:
-        """Return public schema for single-tenant setup."""
-        return [Tenant(schema="public")]
+        """Return configured schema for single-tenant setup."""
+        return [Tenant(schema=get_config().database_schema)]

hindsight_api/main.py CHANGED Viewed

@@ -170,31 +170,53 @@ def main():
     if args.log_level != config.log_level:
         config = HindsightConfig(
             database_url=config.database_url,
+            database_schema=config.database_schema,
             llm_provider=config.llm_provider,
             llm_api_key=config.llm_api_key,
             llm_model=config.llm_model,
             llm_base_url=config.llm_base_url,
             llm_max_concurrent=config.llm_max_concurrent,
+            llm_max_retries=config.llm_max_retries,
+            llm_initial_backoff=config.llm_initial_backoff,
+            llm_max_backoff=config.llm_max_backoff,
             llm_timeout=config.llm_timeout,
             retain_llm_provider=config.retain_llm_provider,
             retain_llm_api_key=config.retain_llm_api_key,
             retain_llm_model=config.retain_llm_model,
             retain_llm_base_url=config.retain_llm_base_url,
+            retain_llm_max_concurrent=config.retain_llm_max_concurrent,
+            retain_llm_max_retries=config.retain_llm_max_retries,
+            retain_llm_initial_backoff=config.retain_llm_initial_backoff,
+            retain_llm_max_backoff=config.retain_llm_max_backoff,
+            retain_llm_timeout=config.retain_llm_timeout,
             reflect_llm_provider=config.reflect_llm_provider,
             reflect_llm_api_key=config.reflect_llm_api_key,
             reflect_llm_model=config.reflect_llm_model,
             reflect_llm_base_url=config.reflect_llm_base_url,
+            reflect_llm_max_concurrent=config.reflect_llm_max_concurrent,
+            reflect_llm_max_retries=config.reflect_llm_max_retries,
+            reflect_llm_initial_backoff=config.reflect_llm_initial_backoff,
+            reflect_llm_max_backoff=config.reflect_llm_max_backoff,
+            reflect_llm_timeout=config.reflect_llm_timeout,
             consolidation_llm_provider=config.consolidation_llm_provider,
             consolidation_llm_api_key=config.consolidation_llm_api_key,
             consolidation_llm_model=config.consolidation_llm_model,
             consolidation_llm_base_url=config.consolidation_llm_base_url,
+            consolidation_llm_max_concurrent=config.consolidation_llm_max_concurrent,
+            consolidation_llm_max_retries=config.consolidation_llm_max_retries,
+            consolidation_llm_initial_backoff=config.consolidation_llm_initial_backoff,
+            consolidation_llm_max_backoff=config.consolidation_llm_max_backoff,
+            consolidation_llm_timeout=config.consolidation_llm_timeout,
             embeddings_provider=config.embeddings_provider,
             embeddings_local_model=config.embeddings_local_model,
+            embeddings_local_force_cpu=config.embeddings_local_force_cpu,
             embeddings_tei_url=config.embeddings_tei_url,
             embeddings_openai_base_url=config.embeddings_openai_base_url,
             embeddings_cohere_base_url=config.embeddings_cohere_base_url,
             reranker_provider=config.reranker_provider,
             reranker_local_model=config.reranker_local_model,
+            reranker_local_force_cpu=config.reranker_local_force_cpu,
+            reranker_local_max_concurrent=config.reranker_local_max_concurrent,
             reranker_tei_url=config.reranker_tei_url,
             reranker_tei_batch_size=config.reranker_tei_batch_size,
             reranker_tei_max_concurrent=config.reranker_tei_max_concurrent,
@@ -217,6 +239,7 @@ def main():
             retain_observations_async=config.retain_observations_async,
             enable_observations=config.enable_observations,
             consolidation_batch_size=config.consolidation_batch_size,
+            consolidation_max_tokens=config.consolidation_max_tokens,
             skip_llm_verification=config.skip_llm_verification,
             lazy_reranker=config.lazy_reranker,
             run_migrations_on_startup=config.run_migrations_on_startup,
@@ -341,6 +364,7 @@ def main():
     # Start idle checker in daemon mode
     if idle_middleware is not None:
         # Start the idle checker in a background thread with its own event loop
+        import logging
         import threading
         def run_idle_checker():
@@ -351,8 +375,8 @@ def main():
                 loop = asyncio.new_event_loop()
                 asyncio.set_event_loop(loop)
                 loop.run_until_complete(idle_middleware._check_idle())
-            except Exception:
-                pass
+            except Exception as e:
+                logging.error(f"Idle checker error: {e}", exc_info=True)
         threading.Thread(target=run_idle_checker, daemon=True).start()

{hindsight_api-0.4.0.dist-info → hindsight_api-0.4.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hindsight-api
-Version: 0.4.0
+Version: 0.4.2
 Summary: Hindsight: Agent Memory That Works Like Human Memory
 Requires-Python: >=3.11
 Requires-Dist: aiohttp>=3.13.3

hindsight-api 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

hindsight-api 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl