PyPI - hindsight-api - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

hindsight-api 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

hindsight_api/admin/cli.py +59 -0
hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
hindsight_api/api/http.py +1119 -93
hindsight_api/api/mcp.py +11 -191
hindsight_api/config.py +145 -45
hindsight_api/engine/consolidation/__init__.py +5 -0
hindsight_api/engine/consolidation/consolidator.py +859 -0
hindsight_api/engine/consolidation/prompts.py +69 -0
hindsight_api/engine/cross_encoder.py +114 -9
hindsight_api/engine/directives/__init__.py +5 -0
hindsight_api/engine/directives/models.py +37 -0
hindsight_api/engine/embeddings.py +102 -5
hindsight_api/engine/interface.py +32 -13
hindsight_api/engine/llm_wrapper.py +505 -43
hindsight_api/engine/memory_engine.py +2090 -1089
hindsight_api/engine/mental_models/__init__.py +14 -0
hindsight_api/engine/mental_models/models.py +53 -0
hindsight_api/engine/reflect/__init__.py +18 -0
hindsight_api/engine/reflect/agent.py +933 -0
hindsight_api/engine/reflect/models.py +109 -0
hindsight_api/engine/reflect/observations.py +186 -0
hindsight_api/engine/reflect/prompts.py +483 -0
hindsight_api/engine/reflect/tools.py +437 -0
hindsight_api/engine/reflect/tools_schema.py +250 -0
hindsight_api/engine/response_models.py +130 -4
hindsight_api/engine/retain/bank_utils.py +79 -201
hindsight_api/engine/retain/fact_extraction.py +81 -48
hindsight_api/engine/retain/fact_storage.py +5 -8
hindsight_api/engine/retain/link_utils.py +5 -8
hindsight_api/engine/retain/orchestrator.py +1 -55
hindsight_api/engine/retain/types.py +2 -2
hindsight_api/engine/search/graph_retrieval.py +2 -2
hindsight_api/engine/search/link_expansion_retrieval.py +164 -29
hindsight_api/engine/search/mpfp_retrieval.py +1 -1
hindsight_api/engine/search/retrieval.py +14 -14
hindsight_api/engine/search/think_utils.py +41 -140
hindsight_api/engine/search/trace.py +0 -1
hindsight_api/engine/search/tracer.py +2 -5
hindsight_api/engine/search/types.py +0 -3
hindsight_api/engine/task_backend.py +112 -196
hindsight_api/engine/utils.py +0 -151
hindsight_api/extensions/__init__.py +10 -1
hindsight_api/extensions/builtin/tenant.py +5 -1
hindsight_api/extensions/operation_validator.py +81 -4
hindsight_api/extensions/tenant.py +26 -0
hindsight_api/main.py +16 -5
hindsight_api/mcp_local.py +12 -53
hindsight_api/mcp_tools.py +494 -0
hindsight_api/models.py +0 -2
hindsight_api/worker/__init__.py +11 -0
hindsight_api/worker/main.py +296 -0
hindsight_api/worker/poller.py +486 -0
{hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +12 -6
hindsight_api-0.4.0.dist-info/RECORD +112 -0
{hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +1 -0
hindsight_api/engine/retain/observation_regeneration.py +0 -254
hindsight_api/engine/search/observation_utils.py +0 -125
hindsight_api/engine/search/scoring.py +0 -159
hindsight_api-0.3.0.dist-info/RECORD +0 -82
{hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0

hindsight_api/engine/consolidation/prompts.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""Prompts for the consolidation engine."""
+CONSOLIDATION_SYSTEM_PROMPT = """You are a memory consolidation system. Your job is to convert facts into durable knowledge (observations) and merge with existing knowledge when appropriate.
+You must output ONLY valid JSON with no markdown formatting, no code blocks, and no additional text.
+## EXTRACT DURABLE KNOWLEDGE, NOT EPHEMERAL STATE
+Facts often describe events or actions. Extract the DURABLE KNOWLEDGE implied by the fact, not the transient state.
+Examples of extracting durable knowledge:
+- "User moved to Room 203" -> "Room 203 exists" (location exists, not where user is now)
+- "User visited Acme Corp at Room 105" -> "Acme Corp is located in Room 105"
+- "User took the elevator to floor 3" -> "Floor 3 is accessible by elevator"
+- "User met Sarah at the lobby" -> "Sarah can be found at the lobby"
+DO NOT track current user position/state as knowledge - that changes constantly.
+DO track permanent facts learned from the user's actions.
+## PRESERVE SPECIFIC DETAILS
+Keep names, locations, numbers, and other specifics. Do NOT:
+- Abstract into general principles
+- Generate business insights
+- Make knowledge generic
+GOOD examples:
+- Fact: "John likes pizza" -> "John likes pizza"
+- Fact: "Alice works at Google" -> "Alice works at Google"
+BAD examples:
+- "John likes pizza" -> "Understanding dietary preferences helps..." (TOO ABSTRACT)
+- "User is at Room 203" -> "User is currently at Room 203" (EPHEMERAL STATE)
+## MERGE RULES (when comparing to existing observations):
+1. REDUNDANT: Same information worded differently → update existing
+2. CONTRADICTION: Opposite information about same topic → update with history (e.g., "used to X, now Y")
+3. UPDATE: New state replacing old state → update with history
+## CRITICAL RULES:
+- NEVER merge facts about DIFFERENT people
+- NEVER merge unrelated topics (food preferences vs work vs hobbies)
+- When merging contradictions, capture the CHANGE (before → after)
+- Keep observations focused on ONE specific topic per person
+- The "text" field MUST contain durable knowledge, not ephemeral state
+- Do NOT include "tags" in output - tags are handled automatically"""
+CONSOLIDATION_USER_PROMPT = """Analyze this new fact and consolidate into knowledge.
+{mission_section}
+NEW FACT: {fact_text}
+EXISTING OBSERVATIONS:
+{observations_text}
+Instructions:
+1. First, extract the DURABLE KNOWLEDGE from the fact (not ephemeral state like "user is at X")
+2. Then compare with existing observations:
+   - If an observation covers the same topic: UPDATE it with the new knowledge
+   - If no observation covers the topic: CREATE a new one
+Output JSON array of actions (ALWAYS an array, even for single action):
+[
+  {{"action": "update", "learning_id": "uuid", "text": "updated durable knowledge", "reason": "..."}},
+  {{"action": "create", "text": "new durable knowledge", "reason": "..."}}
+]
+If NO consolidation is needed (fact is purely ephemeral with no durable knowledge):
+[]
+If no observations exist and fact contains durable knowledge:
+[{{"action": "create", "text": "durable knowledge text", "reason": "new topic"}}]"""

hindsight_api/engine/cross_encoder.py CHANGED Viewed

@@ -130,13 +130,28 @@ class LocalSTCrossEncoder(CrossEncoderModel):
                 "Install it with: pip install sentence-transformers"
             )
-        # Note: We use CPU even when GPU/MPS is available because:
-        # 1. The reranker model (MiniLM) is tiny (~22M params)
-        # 2. Batch sizes are small (~100-200 pairs)
-        # 3. Data transfer overhead to GPU outweighs compute benefit
-        # 4. CPU inference is actually faster for this workload
         logger.info(f"Reranker: initializing local provider with model {self.model_name}")
-        self._model = CrossEncoder(self.model_name)
+        # Determine device based on hardware availability.
+        # We always set low_cpu_mem_usage=False to prevent lazy loading (meta tensors)
+        # which can cause issues when accelerate is installed but no GPU is available.
+        # Note: We do NOT use device_map because CrossEncoder internally calls .to(device)
+        # after loading, which conflicts with accelerate's device_map handling.
+        import torch
+        # Check for GPU (CUDA) or Apple Silicon (MPS)
+        has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
+        if has_gpu:
+            device = None  # Let sentence-transformers auto-detect GPU/MPS
+        else:
+            device = "cpu"
+        self._model = CrossEncoder(
+            self.model_name,
+            device=device,
+            model_kwargs={"low_cpu_mem_usage": False},
+        )
         # Initialize shared executor (limited workers naturally limits concurrency)
         if LocalSTCrossEncoder._executor is None:
@@ -148,11 +163,101 @@ class LocalSTCrossEncoder(CrossEncoderModel):
         else:
             logger.info("Reranker: local provider initialized (using existing executor)")
+    def _is_xpc_error(self, error: Exception) -> bool:
+        """
+        Check if an error is an XPC connection error (macOS daemon issue).
+        On macOS, long-running daemons can lose XPC connections to system services
+        when the process is idle for extended periods.
+        """
+        error_str = str(error).lower()
+        return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
+    def _reinitialize_model_sync(self) -> None:
+        """
+        Clear and reinitialize the cross-encoder model synchronously.
+        This is used to recover from XPC errors on macOS where the
+        PyTorch/MPS backend loses its connection to system services.
+        """
+        logger.warning(f"Reinitializing reranker model {self.model_name} due to backend error")
+        # Clear existing model
+        self._model = None
+        # Force garbage collection to free resources
+        import gc
+        import torch
+        gc.collect()
+        # If using CUDA/MPS, clear the cache
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            try:
+                torch.mps.empty_cache()
+            except AttributeError:
+                pass  # Method might not exist in all PyTorch versions
+        # Reinitialize the model
+        try:
+            from sentence_transformers import CrossEncoder
+        except ImportError:
+            raise ImportError(
+                "sentence-transformers is required for LocalSTCrossEncoder. "
+                "Install it with: pip install sentence-transformers"
+            )
+        # Determine device based on hardware availability
+        has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
+        if has_gpu:
+            device = None  # Let sentence-transformers auto-detect GPU/MPS
+        else:
+            device = "cpu"
+        self._model = CrossEncoder(
+            self.model_name,
+            device=device,
+            model_kwargs={"low_cpu_mem_usage": False},
+        )
+        logger.info("Reranker: local provider reinitialized successfully")
+    def _predict_with_recovery(self, pairs: list[tuple[str, str]]) -> list[float]:
+        """
+        Predict with automatic recovery from XPC errors.
+        This runs synchronously in the thread pool.
+        """
+        max_retries = 1
+        for attempt in range(max_retries + 1):
+            try:
+                scores = self._model.predict(pairs, show_progress_bar=False)
+                return scores.tolist() if hasattr(scores, "tolist") else list(scores)
+            except Exception as e:
+                # Check if this is an XPC error (macOS daemon issue)
+                if self._is_xpc_error(e) and attempt < max_retries:
+                    logger.warning(f"XPC error detected in reranker (attempt {attempt + 1}): {e}")
+                    try:
+                        self._reinitialize_model_sync()
+                        logger.info("Reranker reinitialized successfully, retrying prediction")
+                        continue
+                    except Exception as reinit_error:
+                        logger.error(f"Failed to reinitialize reranker: {reinit_error}")
+                        raise Exception(f"Failed to recover from XPC error: {str(e)}")
+                else:
+                    # Not an XPC error or out of retries
+                    raise
     async def predict(self, pairs: list[tuple[str, str]]) -> list[float]:
         """
         Score query-document pairs for relevance.
         Uses a dedicated thread pool with limited workers to prevent CPU thrashing.
+        Automatically recovers from XPC errors on macOS by reinitializing the model.
         Args:
             pairs: List of (query, document) tuples to score
@@ -165,11 +270,11 @@ class LocalSTCrossEncoder(CrossEncoderModel):
         # Use dedicated executor - limited workers naturally limits concurrency
         loop = asyncio.get_event_loop()
-        scores = await loop.run_in_executor(
+        return await loop.run_in_executor(
             LocalSTCrossEncoder._executor,
-            lambda: self._model.predict(pairs, show_progress_bar=False),
+            self._predict_with_recovery,
+            pairs,
         )
-        return scores.tolist() if hasattr(scores, "tolist") else list(scores)
 class RemoteTEICrossEncoder(CrossEncoderModel):

hindsight_api/engine/directives/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Directives module for hard rules injected into prompts."""
+from .models import Directive
+__all__ = ["Directive"]

hindsight_api/engine/directives/models.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Pydantic models for directives."""
+from datetime import datetime, timezone
+from uuid import UUID
+from pydantic import BaseModel, Field
+class Directive(BaseModel):
+    """A directive is a hard rule injected into prompts.
+    Directives are user-defined rules that guide agent behavior. Unlike mental models
+    which are automatically consolidated from memories, directives are explicit
+    instructions that are always included in relevant prompts.
+    Examples:
+    - "Always respond in formal English"
+    - "Never share personal data with third parties"
+    - "Prefer conservative investment recommendations"
+    """
+    id: UUID = Field(description="Unique identifier")
+    bank_id: str = Field(description="Bank this directive belongs to")
+    name: str = Field(description="Human-readable name")
+    content: str = Field(description="The directive text to inject into prompts")
+    priority: int = Field(default=0, description="Higher priority directives are injected first")
+    is_active: bool = Field(default=True, description="Whether this directive is currently active")
+    tags: list[str] = Field(default_factory=list, description="Tags for filtering")
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc), description="When this directive was created"
+    )
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc), description="When this directive was last updated"
+    )
+    class Config:
+        from_attributes = True

hindsight_api/engine/embeddings.py CHANGED Viewed

@@ -128,20 +128,98 @@ class LocalSTEmbeddings(Embeddings):
             )
         logger.info(f"Embeddings: initializing local provider with model {self.model_name}")
-        # Disable lazy loading (meta tensors) which causes issues with newer transformers/accelerate
-        # Setting low_cpu_mem_usage=False and device_map=None ensures tensors are fully materialized
+        # Determine device based on hardware availability.
+        # We always set low_cpu_mem_usage=False to prevent lazy loading (meta tensors)
+        # which can cause issues when accelerate is installed but no GPU is available.
+        import torch
+        # Check for GPU (CUDA) or Apple Silicon (MPS)
+        has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
+        if has_gpu:
+            device = None  # Let sentence-transformers auto-detect GPU/MPS
+        else:
+            device = "cpu"
         self._model = SentenceTransformer(
             self.model_name,
-            model_kwargs={"low_cpu_mem_usage": False, "device_map": None},
+            device=device,
+            model_kwargs={"low_cpu_mem_usage": False},
         )
         self._dimension = self._model.get_sentence_embedding_dimension()
         logger.info(f"Embeddings: local provider initialized (dim: {self._dimension})")
+    def _is_xpc_error(self, error: Exception) -> bool:
+        """
+        Check if an error is an XPC connection error (macOS daemon issue).
+        On macOS, long-running daemons can lose XPC connections to system services
+        when the process is idle for extended periods.
+        """
+        error_str = str(error).lower()
+        return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
+    def _reinitialize_model_sync(self) -> None:
+        """
+        Clear and reinitialize the embedding model synchronously.
+        This is used to recover from XPC errors on macOS where the
+        PyTorch/MPS backend loses its connection to system services.
+        """
+        logger.warning(f"Reinitializing embedding model {self.model_name} due to backend error")
+        # Clear existing model
+        self._model = None
+        # Force garbage collection to free resources
+        import gc
+        import torch
+        gc.collect()
+        # If using CUDA/MPS, clear the cache
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            try:
+                torch.mps.empty_cache()
+            except AttributeError:
+                pass  # Method might not exist in all PyTorch versions
+        # Reinitialize the model (inline version of initialize() but synchronous)
+        try:
+            from sentence_transformers import SentenceTransformer
+        except ImportError:
+            raise ImportError(
+                "sentence-transformers is required for LocalSTEmbeddings. "
+                "Install it with: pip install sentence-transformers"
+            )
+        # Determine device based on hardware availability
+        has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
+        if has_gpu:
+            device = None  # Let sentence-transformers auto-detect GPU/MPS
+        else:
+            device = "cpu"
+        self._model = SentenceTransformer(
+            self.model_name,
+            device=device,
+            model_kwargs={"low_cpu_mem_usage": False},
+        )
+        logger.info("Embeddings: local provider reinitialized successfully")
     def encode(self, texts: list[str]) -> list[list[float]]:
         """
         Generate embeddings for a list of texts.
+        Automatically recovers from XPC errors on macOS by reinitializing the model.
         Args:
             texts: List of text strings to encode
@@ -150,8 +228,27 @@ class LocalSTEmbeddings(Embeddings):
         """
         if self._model is None:
             raise RuntimeError("Embeddings not initialized. Call initialize() first.")
-        embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
-        return [emb.tolist() for emb in embeddings]
+        # Try encoding with automatic recovery from XPC errors
+        max_retries = 1
+        for attempt in range(max_retries + 1):
+            try:
+                embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
+                return [emb.tolist() for emb in embeddings]
+            except Exception as e:
+                # Check if this is an XPC error (macOS daemon issue)
+                if self._is_xpc_error(e) and attempt < max_retries:
+                    logger.warning(f"XPC error detected in embedding generation (attempt {attempt + 1}): {e}")
+                    try:
+                        self._reinitialize_model_sync()
+                        logger.info("Model reinitialized successfully, retrying embedding generation")
+                        continue
+                    except Exception as reinit_error:
+                        logger.error(f"Failed to reinitialize model: {reinit_error}")
+                        raise Exception(f"Failed to recover from XPC error: {str(e)}")
+                else:
+                    # Not an XPC error or out of retries
+                    raise
 class RemoteTEIEmbeddings(Embeddings):

hindsight_api/engine/interface.py CHANGED Viewed

@@ -160,14 +160,14 @@ class MemoryEngineInterface(ABC):
         request_context: "RequestContext",
     ) -> dict[str, Any]:
         """
-        Get bank profile including disposition and background.
+        Get bank profile including disposition and mission.
         Args:
             bank_id: The memory bank ID.
             request_context: Request context for authentication.
         Returns:
-            Bank profile dict.
+            Bank profile dict with bank_id, name, disposition, and mission.
         """
         ...
@@ -190,25 +190,44 @@ class MemoryEngineInterface(ABC):
         ...
     @abstractmethod
-    async def merge_bank_background(
+    async def merge_bank_mission(
         self,
         bank_id: str,
         new_info: str,
         *,
-        update_disposition: bool = True,
         request_context: "RequestContext",
     ) -> dict[str, Any]:
         """
-        Merge new background information into bank profile.
+        Merge new mission information into bank profile.
         Args:
             bank_id: The memory bank ID.
-            new_info: New background information to merge.
-            update_disposition: Whether to infer disposition from background.
+            new_info: New mission information to merge.
             request_context: Request context for authentication.
         Returns:
-            Updated background info.
+            Updated mission info.
+        """
+        ...
+    @abstractmethod
+    async def set_bank_mission(
+        self,
+        bank_id: str,
+        mission: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, Any]:
+        """
+        Set the bank's mission (replaces existing).
+        Args:
+            bank_id: The memory bank ID.
+            mission: The mission text.
+            request_context: Request context for authentication.
+        Returns:
+            Dict with bank_id and mission.
         """
         ...
@@ -518,7 +537,7 @@ class MemoryEngineInterface(ABC):
         bank_id: str,
         *,
         request_context: "RequestContext",
-    ) -> list[dict[str, Any]]:
+    ) -> dict[str, Any]:
         """
         List async operations for a bank.
@@ -527,7 +546,7 @@ class MemoryEngineInterface(ABC):
             request_context: Request context for authentication.
         Returns:
-            List of operation dicts with id, task_type, status, etc.
+            Dict with 'total' (int) and 'operations' (list of operation dicts).
         """
         ...
@@ -561,16 +580,16 @@ class MemoryEngineInterface(ABC):
         bank_id: str,
         *,
         name: str | None = None,
-        background: str | None = None,
+        mission: str | None = None,
         request_context: "RequestContext",
     ) -> dict[str, Any]:
         """
-        Update bank name and/or background.
+        Update bank name and/or mission.
         Args:
             bank_id: The memory bank ID.
             name: New bank name (optional).
-            background: New background text (optional, replaces existing).
+            mission: New mission text (optional, replaces existing).
             request_context: Request context for authentication.
         Returns:

hindsight-api 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

hindsight-api 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl