PyPI - hindsight-api - Versions diffs - 0.4.1__tar.gz → 0.4.2__tar.gz - Mend

hindsight-api 0.4.1tar.gz → 0.4.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hindsight-api
-Version: 0.4.1
+Version: 0.4.2
 Summary: Hindsight: Agent Memory That Works Like Human Memory
 Requires-Python: >=3.11
 Requires-Dist: aiohttp>=3.13.3

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/__init__.py RENAMED Viewed

@@ -46,4 +46,4 @@ __all__ = [
     "RemoteTEICrossEncoder",
     "LLMConfig",
 ]
-__version__ = "0.4.1"
+__version__ = "0.4.2"

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/config.py RENAMED Viewed

@@ -26,6 +26,9 @@ ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
 ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
 ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
 ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
+ENV_LLM_MAX_RETRIES = "HINDSIGHT_API_LLM_MAX_RETRIES"
+ENV_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_LLM_INITIAL_BACKOFF"
+ENV_LLM_MAX_BACKOFF = "HINDSIGHT_API_LLM_MAX_BACKOFF"
 ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
 ENV_LLM_GROQ_SERVICE_TIER = "HINDSIGHT_API_LLM_GROQ_SERVICE_TIER"
@@ -34,16 +37,31 @@ ENV_RETAIN_LLM_PROVIDER = "HINDSIGHT_API_RETAIN_LLM_PROVIDER"
 ENV_RETAIN_LLM_API_KEY = "HINDSIGHT_API_RETAIN_LLM_API_KEY"
 ENV_RETAIN_LLM_MODEL = "HINDSIGHT_API_RETAIN_LLM_MODEL"
 ENV_RETAIN_LLM_BASE_URL = "HINDSIGHT_API_RETAIN_LLM_BASE_URL"
+ENV_RETAIN_LLM_MAX_CONCURRENT = "HINDSIGHT_API_RETAIN_LLM_MAX_CONCURRENT"
+ENV_RETAIN_LLM_MAX_RETRIES = "HINDSIGHT_API_RETAIN_LLM_MAX_RETRIES"
+ENV_RETAIN_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_RETAIN_LLM_INITIAL_BACKOFF"
+ENV_RETAIN_LLM_MAX_BACKOFF = "HINDSIGHT_API_RETAIN_LLM_MAX_BACKOFF"
+ENV_RETAIN_LLM_TIMEOUT = "HINDSIGHT_API_RETAIN_LLM_TIMEOUT"
 ENV_REFLECT_LLM_PROVIDER = "HINDSIGHT_API_REFLECT_LLM_PROVIDER"
 ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
 ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
 ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
+ENV_REFLECT_LLM_MAX_CONCURRENT = "HINDSIGHT_API_REFLECT_LLM_MAX_CONCURRENT"
+ENV_REFLECT_LLM_MAX_RETRIES = "HINDSIGHT_API_REFLECT_LLM_MAX_RETRIES"
+ENV_REFLECT_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_REFLECT_LLM_INITIAL_BACKOFF"
+ENV_REFLECT_LLM_MAX_BACKOFF = "HINDSIGHT_API_REFLECT_LLM_MAX_BACKOFF"
+ENV_REFLECT_LLM_TIMEOUT = "HINDSIGHT_API_REFLECT_LLM_TIMEOUT"
 ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
 ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
 ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
 ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
+ENV_CONSOLIDATION_LLM_MAX_CONCURRENT = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_CONCURRENT"
+ENV_CONSOLIDATION_LLM_MAX_RETRIES = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_RETRIES"
+ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_CONSOLIDATION_LLM_INITIAL_BACKOFF"
+ENV_CONSOLIDATION_LLM_MAX_BACKOFF = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_BACKOFF"
+ENV_CONSOLIDATION_LLM_TIMEOUT = "HINDSIGHT_API_CONSOLIDATION_LLM_TIMEOUT"
 ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
 ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
@@ -133,6 +151,9 @@ DEFAULT_DATABASE_SCHEMA = "public"
 DEFAULT_LLM_PROVIDER = "openai"
 DEFAULT_LLM_MODEL = "gpt-5-mini"
 DEFAULT_LLM_MAX_CONCURRENT = 32
+DEFAULT_LLM_MAX_RETRIES = 10  # Max retry attempts for LLM API calls
+DEFAULT_LLM_INITIAL_BACKOFF = 1.0  # Initial backoff in seconds for retry exponential backoff
+DEFAULT_LLM_MAX_BACKOFF = 60.0  # Max backoff cap in seconds for retry exponential backoff
 DEFAULT_LLM_TIMEOUT = 120.0  # seconds
 DEFAULT_EMBEDDINGS_PROVIDER = "local"
@@ -286,6 +307,9 @@ class HindsightConfig:
     llm_model: str
     llm_base_url: str | None
     llm_max_concurrent: int
+    llm_max_retries: int
+    llm_initial_backoff: float
+    llm_max_backoff: float
     llm_timeout: float
     # Per-operation LLM configuration (None = use default LLM config)
@@ -293,16 +317,31 @@ class HindsightConfig:
     retain_llm_api_key: str | None
     retain_llm_model: str | None
     retain_llm_base_url: str | None
+    retain_llm_max_concurrent: int | None
+    retain_llm_max_retries: int | None
+    retain_llm_initial_backoff: float | None
+    retain_llm_max_backoff: float | None
+    retain_llm_timeout: float | None
     reflect_llm_provider: str | None
     reflect_llm_api_key: str | None
     reflect_llm_model: str | None
     reflect_llm_base_url: str | None
+    reflect_llm_max_concurrent: int | None
+    reflect_llm_max_retries: int | None
+    reflect_llm_initial_backoff: float | None
+    reflect_llm_max_backoff: float | None
+    reflect_llm_timeout: float | None
     consolidation_llm_provider: str | None
     consolidation_llm_api_key: str | None
     consolidation_llm_model: str | None
     consolidation_llm_base_url: str | None
+    consolidation_llm_max_concurrent: int | None
+    consolidation_llm_max_retries: int | None
+    consolidation_llm_initial_backoff: float | None
+    consolidation_llm_max_backoff: float | None
+    consolidation_llm_timeout: float | None
     # Embeddings
     embeddings_provider: str
@@ -387,20 +426,66 @@ class HindsightConfig:
             llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
             llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
             llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
+            llm_max_retries=int(os.getenv(ENV_LLM_MAX_RETRIES, str(DEFAULT_LLM_MAX_RETRIES))),
+            llm_initial_backoff=float(os.getenv(ENV_LLM_INITIAL_BACKOFF, str(DEFAULT_LLM_INITIAL_BACKOFF))),
+            llm_max_backoff=float(os.getenv(ENV_LLM_MAX_BACKOFF, str(DEFAULT_LLM_MAX_BACKOFF))),
             llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
             # Per-operation LLM config (None = use default)
             retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
             retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
             retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
             retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
+            retain_llm_max_concurrent=int(os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT))
+            if os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT)
+            else None,
+            retain_llm_max_retries=int(os.getenv(ENV_RETAIN_LLM_MAX_RETRIES))
+            if os.getenv(ENV_RETAIN_LLM_MAX_RETRIES)
+            else None,
+            retain_llm_initial_backoff=float(os.getenv(ENV_RETAIN_LLM_INITIAL_BACKOFF))
+            if os.getenv(ENV_RETAIN_LLM_INITIAL_BACKOFF)
+            else None,
+            retain_llm_max_backoff=float(os.getenv(ENV_RETAIN_LLM_MAX_BACKOFF))
+            if os.getenv(ENV_RETAIN_LLM_MAX_BACKOFF)
+            else None,
+            retain_llm_timeout=float(os.getenv(ENV_RETAIN_LLM_TIMEOUT)) if os.getenv(ENV_RETAIN_LLM_TIMEOUT) else None,
             reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
             reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
             reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
             reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
+            reflect_llm_max_concurrent=int(os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT))
+            if os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT)
+            else None,
+            reflect_llm_max_retries=int(os.getenv(ENV_REFLECT_LLM_MAX_RETRIES))
+            if os.getenv(ENV_REFLECT_LLM_MAX_RETRIES)
+            else None,
+            reflect_llm_initial_backoff=float(os.getenv(ENV_REFLECT_LLM_INITIAL_BACKOFF))
+            if os.getenv(ENV_REFLECT_LLM_INITIAL_BACKOFF)
+            else None,
+            reflect_llm_max_backoff=float(os.getenv(ENV_REFLECT_LLM_MAX_BACKOFF))
+            if os.getenv(ENV_REFLECT_LLM_MAX_BACKOFF)
+            else None,
+            reflect_llm_timeout=float(os.getenv(ENV_REFLECT_LLM_TIMEOUT))
+            if os.getenv(ENV_REFLECT_LLM_TIMEOUT)
+            else None,
             consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
             consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
             consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
             consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
+            consolidation_llm_max_concurrent=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT))
+            if os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT)
+            else None,
+            consolidation_llm_max_retries=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_RETRIES))
+            if os.getenv(ENV_CONSOLIDATION_LLM_MAX_RETRIES)
+            else None,
+            consolidation_llm_initial_backoff=float(os.getenv(ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF))
+            if os.getenv(ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF)
+            else None,
+            consolidation_llm_max_backoff=float(os.getenv(ENV_CONSOLIDATION_LLM_MAX_BACKOFF))
+            if os.getenv(ENV_CONSOLIDATION_LLM_MAX_BACKOFF)
+            else None,
+            consolidation_llm_timeout=float(os.getenv(ENV_CONSOLIDATION_LLM_TIMEOUT))
+            if os.getenv(ENV_CONSOLIDATION_LLM_TIMEOUT)
+            else None,
             # Embeddings
             embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
             embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/daemon.py RENAMED Viewed

@@ -52,7 +52,10 @@ class IdleTimeoutMiddleware:
                 logger.info(f"Idle timeout reached ({self.idle_timeout}s), shutting down daemon")
                 # Give a moment for any in-flight requests
                 await asyncio.sleep(1)
-                os._exit(0)
+                # Send SIGTERM to ourselves to trigger graceful shutdown
+                import signal
+                os.kill(os.getpid(), signal.SIGTERM)
 class DaemonLock:

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/consolidation/consolidator.py RENAMED Viewed

@@ -144,10 +144,14 @@ async def run_consolidation_job(
     }
     batch_num = 0
+    last_progress_timings = {}  # Track timings at last progress log
     while True:
         batch_num += 1
         batch_start = time.time()
+        # Snapshot timings at batch start for per-batch calculation
+        batch_start_timings = perf.timings.copy()
         # Fetch next batch of unconsolidated memories
         async with pool.acquire() as conn:
             t0 = time.time()
@@ -217,19 +221,44 @@ async def run_consolidation_job(
             elif action == "skipped":
                 stats["skipped"] += 1
-            # Log progress periodically
+            # Log progress periodically with timing breakdown
             if stats["memories_processed"] % 10 == 0:
+                # Calculate timing deltas since last progress log
+                timing_parts = []
+                for key in ["recall", "llm", "embedding", "db_write"]:
+                    if key in perf.timings:
+                        delta = perf.timings[key] - last_progress_timings.get(key, 0)
+                        timing_parts.append(f"{key}={delta:.2f}s")
+                timing_str = f" | {', '.join(timing_parts)}" if timing_parts else ""
                 logger.info(
                     f"[CONSOLIDATION] bank={bank_id} progress: "
-                    f"{stats['memories_processed']}/{total_count} memories processed"
+                    f"{stats['memories_processed']}/{total_count} memories processed{timing_str}"
                 )
+                # Update last progress snapshot
+                last_progress_timings = perf.timings.copy()
         batch_time = time.time() - batch_start
         perf.log(
             f"[2] Batch {batch_num}: {len(memories)} memories in {batch_time:.3f}s "
             f"(avg {batch_time / len(memories):.3f}s/memory)"
         )
+        # Log timing breakdown after each batch (delta from batch start)
+        timing_parts = []
+        for key in ["recall", "llm", "embedding", "db_write"]:
+            if key in perf.timings:
+                delta = perf.timings[key] - batch_start_timings.get(key, 0)
+                timing_parts.append(f"{key}={delta:.3f}s")
+        if timing_parts:
+            avg_per_memory = batch_time / len(memories) if memories else 0
+            logger.info(
+                f"[CONSOLIDATION] bank={bank_id} batch {batch_num}/{len(memories)} memories: "
+                f"{', '.join(timing_parts)} | avg={avg_per_memory:.3f}s/memory"
+            )
     # Build summary
     perf.log(
         f"[3] Results: {stats['memories_processed']} memories -> "

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/cross_encoder.py RENAMED Viewed

@@ -178,108 +178,16 @@ class LocalSTCrossEncoder(CrossEncoderModel):
         else:
             logger.info("Reranker: local provider initialized (using existing executor)")
-    def _is_xpc_error(self, error: Exception) -> bool:
-        """
-        Check if an error is an XPC connection error (macOS daemon issue).
-        On macOS, long-running daemons can lose XPC connections to system services
-        when the process is idle for extended periods.
-        """
-        error_str = str(error).lower()
-        return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
-    def _reinitialize_model_sync(self) -> None:
-        """
-        Clear and reinitialize the cross-encoder model synchronously.
-        This is used to recover from XPC errors on macOS where the
-        PyTorch/MPS backend loses its connection to system services.
-        """
-        logger.warning(f"Reinitializing reranker model {self.model_name} due to backend error")
-        # Clear existing model
-        self._model = None
-        # Force garbage collection to free resources
-        import gc
-        import torch
-        gc.collect()
-        # If using CUDA/MPS, clear the cache
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            try:
-                torch.mps.empty_cache()
-            except AttributeError:
-                pass  # Method might not exist in all PyTorch versions
-        # Reinitialize the model
-        try:
-            from sentence_transformers import CrossEncoder
-        except ImportError:
-            raise ImportError(
-                "sentence-transformers is required for LocalSTCrossEncoder. "
-                "Install it with: pip install sentence-transformers"
-            )
-        # Determine device based on hardware availability
-        if self.force_cpu:
-            device = "cpu"
-        else:
-            # Wrap in try-except to gracefully handle any device detection issues
-            device = "cpu"  # Default to CPU
-            try:
-                has_gpu = torch.cuda.is_available() or (
-                    hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
-                )
-                if has_gpu:
-                    device = None  # Let sentence-transformers auto-detect GPU/MPS
-            except Exception as e:
-                logger.warning(f"Failed to detect GPU/MPS during reinit, falling back to CPU: {e}")
-        self._model = CrossEncoder(
-            self.model_name,
-            device=device,
-            model_kwargs={"low_cpu_mem_usage": False},
-        )
-        logger.info("Reranker: local provider reinitialized successfully")
-    def _predict_with_recovery(self, pairs: list[tuple[str, str]]) -> list[float]:
-        """
-        Predict with automatic recovery from XPC errors.
-        This runs synchronously in the thread pool.
-        """
-        max_retries = 1
-        for attempt in range(max_retries + 1):
-            try:
-                scores = self._model.predict(pairs, show_progress_bar=False)
-                return scores.tolist() if hasattr(scores, "tolist") else list(scores)
-            except Exception as e:
-                # Check if this is an XPC error (macOS daemon issue)
-                if self._is_xpc_error(e) and attempt < max_retries:
-                    logger.warning(f"XPC error detected in reranker (attempt {attempt + 1}): {e}")
-                    try:
-                        self._reinitialize_model_sync()
-                        logger.info("Reranker reinitialized successfully, retrying prediction")
-                        continue
-                    except Exception as reinit_error:
-                        logger.error(f"Failed to reinitialize reranker: {reinit_error}")
-                        raise Exception(f"Failed to recover from XPC error: {str(e)}")
-                else:
-                    # Not an XPC error or out of retries
-                    raise
+    def _predict_sync(self, pairs: list[tuple[str, str]]) -> list[float]:
+        """Synchronous prediction wrapper for thread pool execution."""
+        scores = self._model.predict(pairs, show_progress_bar=False)
+        return scores.tolist() if hasattr(scores, "tolist") else list(scores)
     async def predict(self, pairs: list[tuple[str, str]]) -> list[float]:
         """
         Score query-document pairs for relevance.
         Uses a dedicated thread pool with limited workers to prevent CPU thrashing.
-        Automatically recovers from XPC errors on macOS by reinitializing the model.
         Args:
             pairs: List of (query, document) tuples to score
@@ -294,7 +202,7 @@ class LocalSTCrossEncoder(CrossEncoderModel):
         loop = asyncio.get_event_loop()
         return await loop.run_in_executor(
             LocalSTCrossEncoder._executor,
-            self._predict_with_recovery,
+            self._predict_sync,
             pairs,
         )

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/embeddings.py RENAMED Viewed

@@ -166,82 +166,10 @@ class LocalSTEmbeddings(Embeddings):
         self._dimension = self._model.get_sentence_embedding_dimension()
         logger.info(f"Embeddings: local provider initialized (dim: {self._dimension})")
-    def _is_xpc_error(self, error: Exception) -> bool:
-        """
-        Check if an error is an XPC connection error (macOS daemon issue).
-        On macOS, long-running daemons can lose XPC connections to system services
-        when the process is idle for extended periods.
-        """
-        error_str = str(error).lower()
-        return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
-    def _reinitialize_model_sync(self) -> None:
-        """
-        Clear and reinitialize the embedding model synchronously.
-        This is used to recover from XPC errors on macOS where the
-        PyTorch/MPS backend loses its connection to system services.
-        """
-        logger.warning(f"Reinitializing embedding model {self.model_name} due to backend error")
-        # Clear existing model
-        self._model = None
-        # Force garbage collection to free resources
-        import gc
-        import torch
-        gc.collect()
-        # If using CUDA/MPS, clear the cache
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            try:
-                torch.mps.empty_cache()
-            except AttributeError:
-                pass  # Method might not exist in all PyTorch versions
-        # Reinitialize the model (inline version of initialize() but synchronous)
-        try:
-            from sentence_transformers import SentenceTransformer
-        except ImportError:
-            raise ImportError(
-                "sentence-transformers is required for LocalSTEmbeddings. "
-                "Install it with: pip install sentence-transformers"
-            )
-        # Determine device based on hardware availability
-        if self.force_cpu:
-            device = "cpu"
-        else:
-            # Wrap in try-except to gracefully handle any device detection issues
-            device = "cpu"  # Default to CPU
-            try:
-                has_gpu = torch.cuda.is_available() or (
-                    hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
-                )
-                if has_gpu:
-                    device = None  # Let sentence-transformers auto-detect GPU/MPS
-            except Exception as e:
-                logger.warning(f"Failed to detect GPU/MPS during reinit, falling back to CPU: {e}")
-        self._model = SentenceTransformer(
-            self.model_name,
-            device=device,
-            model_kwargs={"low_cpu_mem_usage": False},
-        )
-        logger.info("Embeddings: local provider reinitialized successfully")
     def encode(self, texts: list[str]) -> list[list[float]]:
         """
         Generate embeddings for a list of texts.
-        Automatically recovers from XPC errors on macOS by reinitializing the model.
         Args:
             texts: List of text strings to encode
@@ -251,26 +179,8 @@ class LocalSTEmbeddings(Embeddings):
         if self._model is None:
             raise RuntimeError("Embeddings not initialized. Call initialize() first.")
-        # Try encoding with automatic recovery from XPC errors
-        max_retries = 1
-        for attempt in range(max_retries + 1):
-            try:
-                embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
-                return [emb.tolist() for emb in embeddings]
-            except Exception as e:
-                # Check if this is an XPC error (macOS daemon issue)
-                if self._is_xpc_error(e) and attempt < max_retries:
-                    logger.warning(f"XPC error detected in embedding generation (attempt {attempt + 1}): {e}")
-                    try:
-                        self._reinitialize_model_sync()
-                        logger.info("Model reinitialized successfully, retrying embedding generation")
-                        continue
-                    except Exception as reinit_error:
-                        logger.error(f"Failed to reinitialize model: {reinit_error}")
-                        raise Exception(f"Failed to recover from XPC error: {str(e)}")
-                else:
-                    # Not an XPC error or out of retries
-                    raise
+        embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
+        return [emb.tolist() for emb in embeddings]
 class RemoteTEIEmbeddings(Embeddings):

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/fact_extraction.py RENAMED Viewed

@@ -782,12 +782,28 @@ Text:
     usage = TokenUsage()  # Track cumulative usage across retries
     for attempt in range(max_retries):
         try:
+            # Use retain-specific overrides if set, otherwise fall back to global LLM config
+            max_retries = (
+                config.retain_llm_max_retries if config.retain_llm_max_retries is not None else config.llm_max_retries
+            )
+            initial_backoff = (
+                config.retain_llm_initial_backoff
+                if config.retain_llm_initial_backoff is not None
+                else config.llm_initial_backoff
+            )
+            max_backoff = (
+                config.retain_llm_max_backoff if config.retain_llm_max_backoff is not None else config.llm_max_backoff
+            )
             extraction_response_json, call_usage = await llm_config.call(
                 messages=[{"role": "system", "content": prompt}, {"role": "user", "content": user_message}],
                 response_format=response_schema,
                 scope="memory_extract_facts",
                 temperature=0.1,
                 max_completion_tokens=config.retain_max_completion_tokens,
+                max_retries=max_retries,
+                initial_backoff=initial_backoff,
+                max_backoff=max_backoff,
                 skip_validation=True,  # Get raw JSON, we'll validate leniently
                 return_usage=True,
             )

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/main.py RENAMED Viewed

@@ -140,13 +140,6 @@ def main():
         args.port = DEFAULT_DAEMON_PORT
         args.host = "127.0.0.1"  # Only bind to localhost for security
-        # Force CPU mode for daemon to avoid macOS MPS/XPC issues
-        # MPS (Metal Performance Shaders) has unstable XPC connections in background processes
-        # that can cause assertion failures and process crashes at the C++ level
-        # (which Python exception handlers cannot catch)
-        os.environ["HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"] = "1"
-        os.environ["HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"] = "1"
         # Check if another daemon is already running
         daemon_lock = DaemonLock()
         if not daemon_lock.acquire():
@@ -183,19 +176,37 @@ def main():
             llm_model=config.llm_model,
             llm_base_url=config.llm_base_url,
             llm_max_concurrent=config.llm_max_concurrent,
+            llm_max_retries=config.llm_max_retries,
+            llm_initial_backoff=config.llm_initial_backoff,
+            llm_max_backoff=config.llm_max_backoff,
             llm_timeout=config.llm_timeout,
             retain_llm_provider=config.retain_llm_provider,
             retain_llm_api_key=config.retain_llm_api_key,
             retain_llm_model=config.retain_llm_model,
             retain_llm_base_url=config.retain_llm_base_url,
+            retain_llm_max_concurrent=config.retain_llm_max_concurrent,
+            retain_llm_max_retries=config.retain_llm_max_retries,
+            retain_llm_initial_backoff=config.retain_llm_initial_backoff,
+            retain_llm_max_backoff=config.retain_llm_max_backoff,
+            retain_llm_timeout=config.retain_llm_timeout,
             reflect_llm_provider=config.reflect_llm_provider,
             reflect_llm_api_key=config.reflect_llm_api_key,
             reflect_llm_model=config.reflect_llm_model,
             reflect_llm_base_url=config.reflect_llm_base_url,
+            reflect_llm_max_concurrent=config.reflect_llm_max_concurrent,
+            reflect_llm_max_retries=config.reflect_llm_max_retries,
+            reflect_llm_initial_backoff=config.reflect_llm_initial_backoff,
+            reflect_llm_max_backoff=config.reflect_llm_max_backoff,
+            reflect_llm_timeout=config.reflect_llm_timeout,
             consolidation_llm_provider=config.consolidation_llm_provider,
             consolidation_llm_api_key=config.consolidation_llm_api_key,
             consolidation_llm_model=config.consolidation_llm_model,
             consolidation_llm_base_url=config.consolidation_llm_base_url,
+            consolidation_llm_max_concurrent=config.consolidation_llm_max_concurrent,
+            consolidation_llm_max_retries=config.consolidation_llm_max_retries,
+            consolidation_llm_initial_backoff=config.consolidation_llm_initial_backoff,
+            consolidation_llm_max_backoff=config.consolidation_llm_max_backoff,
+            consolidation_llm_timeout=config.consolidation_llm_timeout,
             embeddings_provider=config.embeddings_provider,
             embeddings_local_model=config.embeddings_local_model,
             embeddings_local_force_cpu=config.embeddings_local_force_cpu,
@@ -353,6 +364,7 @@ def main():
     # Start idle checker in daemon mode
     if idle_middleware is not None:
         # Start the idle checker in a background thread with its own event loop
+        import logging
         import threading
         def run_idle_checker():
@@ -363,8 +375,8 @@ def main():
                 loop = asyncio.new_event_loop()
                 asyncio.set_event_loop(loop)
                 loop.run_until_complete(idle_middleware._check_idle())
-            except Exception:
-                pass
+            except Exception as e:
+                logging.error(f"Idle checker error: {e}", exc_info=True)
         threading.Thread(target=run_idle_checker, daemon=True).start()

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hindsight-api"
-version = "0.4.1"
+version = "0.4.2"
 description = "Hindsight: Agent Memory That Works Like Human Memory"
 readme = "README.md"
 requires-python = ">=3.11"

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/.gitignore RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/README.md RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/admin/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/admin/cli.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/README RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/env.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/script.py.mako RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/rename_personality_to_disposition.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/api/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/api/http.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/api/mcp.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/banner.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/consolidation/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/consolidation/prompts.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/db_budget.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/db_utils.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/directives/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/directives/models.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/entity_resolver.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/interface.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/llm_wrapper.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/memory_engine.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/mental_models/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/mental_models/models.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/query_analyzer.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/agent.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/models.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/observations.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/prompts.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/tools.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/tools_schema.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/response_models.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/bank_utils.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/chunk_storage.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/deduplication.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/embedding_processing.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/embedding_utils.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/entity_processing.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/fact_storage.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/link_creation.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/link_utils.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/orchestrator.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/types.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/fusion.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/graph_retrieval.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/link_expansion_retrieval.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/mpfp_retrieval.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/reranking.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/retrieval.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/tags.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/temporal_extraction.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/think_utils.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/trace.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/tracer.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/types.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/task_backend.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/utils.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/base.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/builtin/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/builtin/tenant.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/context.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/http.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/loader.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/operation_validator.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/tenant.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/mcp_local.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/mcp_tools.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/metrics.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/migrations.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/models.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/pg0.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/server.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/worker/__init__.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/worker/main.py RENAMED Viewed

File without changes

{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/worker/poller.py RENAMED Viewed

File without changes

hindsight-api 0.4.1__tar.gz → 0.4.2__tar.gz

hindsight-api 0.4.1tar.gz → 0.4.2tar.gz