PyPI - hindsight-api - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

hindsight-api 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

hindsight_api/__init__.py +1 -1
hindsight_api/api/http.py +7 -19
hindsight_api/api/mcp.py +45 -5
hindsight_api/config.py +115 -11
hindsight_api/daemon.py +4 -1
hindsight_api/engine/consolidation/consolidator.py +39 -3
hindsight_api/engine/cross_encoder.py +7 -99
hindsight_api/engine/embeddings.py +3 -93
hindsight_api/engine/interface.py +0 -43
hindsight_api/engine/llm_wrapper.py +93 -22
hindsight_api/engine/memory_engine.py +37 -138
hindsight_api/engine/response_models.py +1 -21
hindsight_api/engine/retain/fact_extraction.py +19 -23
hindsight_api/engine/retain/orchestrator.py +1 -4
hindsight_api/engine/utils.py +0 -3
hindsight_api/main.py +27 -12
hindsight_api/mcp_tools.py +31 -12
hindsight_api/metrics.py +3 -3
hindsight_api/pg0.py +1 -1
hindsight_api/worker/main.py +11 -11
hindsight_api/worker/poller.py +226 -97
{hindsight_api-0.4.1.dist-info → hindsight_api-0.4.3.dist-info}/METADATA +2 -1
{hindsight_api-0.4.1.dist-info → hindsight_api-0.4.3.dist-info}/RECORD +25 -25
{hindsight_api-0.4.1.dist-info → hindsight_api-0.4.3.dist-info}/WHEEL +0 -0
{hindsight_api-0.4.1.dist-info → hindsight_api-0.4.3.dist-info}/entry_points.txt +0 -0

hindsight_api/engine/memory_engine.py CHANGED Viewed

@@ -504,12 +504,11 @@ class MemoryEngine(MemoryEngineInterface):
         if request_context is None:
             raise AuthenticationError("RequestContext is required when tenant extension is configured")
-        # For internal/background operations (e.g., worker tasks), skip extension authentication
-        # if the schema has already been set by execute_task via the _schema field.
+        # For internal/background operations (e.g., worker tasks), skip extension authentication.
+        # The task was already authenticated at submission time, and execute_task sets _current_schema
+        # from the task's _schema field. For public schema tasks, _current_schema keeps its default "public".
         if request_context.internal:
-            current = _current_schema.get()
-            if current and current != "public":
-                return current
+            return _current_schema.get()
         # Let AuthenticationError propagate - HTTP layer will convert to 401
         tenant_context = await self._tenant_extension.authenticate(request_context)
@@ -789,7 +788,7 @@ class MemoryEngine(MemoryEngineInterface):
                 kwargs = {"name": self._pg0_instance_name}
                 if self._pg0_port is not None:
                     kwargs["port"] = self._pg0_port
-                pg0 = EmbeddedPostgres(**kwargs)  # type: ignore[invalid-argument-type] - dict kwargs
+                pg0 = EmbeddedPostgres(**kwargs)
                 # Check if pg0 is already running before we start it
                 was_already_running = await pg0.is_running()
                 self.db_url = await pg0.ensure_running()
@@ -889,6 +888,23 @@ class MemoryEngine(MemoryEngineInterface):
             # Use configured database schema for migrations (defaults to "public")
             run_migrations(self.db_url, schema=get_config().database_schema)
+            # Migrate all existing tenant schemas (if multi-tenant)
+            if self._tenant_extension is not None:
+                try:
+                    tenants = await self._tenant_extension.list_tenants()
+                    if tenants:
+                        logger.info(f"Running migrations on {len(tenants)} tenant schemas...")
+                        for tenant in tenants:
+                            schema = tenant.schema
+                            if schema and schema != "public":
+                                try:
+                                    run_migrations(self.db_url, schema=schema)
+                                except Exception as e:
+                                    logger.warning(f"Failed to migrate tenant schema {schema}: {e}")
+                        logger.info("Tenant schema migrations completed")
+                except Exception as e:
+                    logger.warning(f"Failed to run tenant schema migrations: {e}")
             # Ensure embedding column dimension matches the model's dimension
             # This is done after migrations and after embeddings.initialize()
             ensure_embedding_dimension(self.db_url, self.embeddings.dimension, schema=get_config().database_schema)
@@ -1175,15 +1191,15 @@ class MemoryEngine(MemoryEngineInterface):
             context: Context about when/why this memory was formed
             event_date: When the event occurred (defaults to now)
             document_id: Optional document ID for tracking (always upserts if document already exists)
-            fact_type_override: Override fact type ('world', 'experience', 'opinion')
-            confidence_score: Confidence score for opinions (0.0 to 1.0)
+            fact_type_override: Override fact type ('world', 'experience')
+            confidence_score: Confidence score (0.0 to 1.0)
             request_context: Request context for authentication.
         Returns:
             List of created unit IDs
         """
         # Build content dict
-        content_dict: RetainContentDict = {"content": content, "context": context}  # type: ignore[typeddict-item] - building incrementally
+        content_dict: RetainContentDict = {"content": content, "context": context}
         if event_date:
             content_dict["event_date"] = event_date
         if document_id:
@@ -1231,8 +1247,8 @@ class MemoryEngine(MemoryEngineInterface):
                 - "document_id" (optional): Document ID for this specific content item
             document_id: **DEPRECATED** - Use "document_id" key in each content dict instead.
                         Applies the same document_id to ALL content items that don't specify their own.
-            fact_type_override: Override fact type for all facts ('world', 'experience', 'opinion')
-            confidence_score: Confidence score for opinions (0.0 to 1.0)
+            fact_type_override: Override fact type for all facts ('world', 'experience')
+            confidence_score: Confidence score (0.0 to 1.0)
             return_usage: If True, returns tuple of (unit_ids, TokenUsage). Default False for backward compatibility.
         Returns:
@@ -1554,16 +1570,16 @@ class MemoryEngine(MemoryEngineInterface):
         if fact_type is None:
             fact_type = list(VALID_RECALL_FACT_TYPES)
-        # Validate fact types early
+        # Filter out 'opinion' early (deprecated, silently ignore)
+        fact_type = [ft for ft in fact_type if ft != "opinion"]
+        # Validate fact types
         invalid_types = set(fact_type) - VALID_RECALL_FACT_TYPES
         if invalid_types:
             raise ValueError(
                 f"Invalid fact type(s): {', '.join(sorted(invalid_types))}. "
                 f"Must be one of: {', '.join(sorted(VALID_RECALL_FACT_TYPES))}"
             )
-        # Filter out 'opinion' - opinions are no longer returned from recall
-        fact_type = [ft for ft in fact_type if ft != "opinion"]
         if not fact_type:
             # All requested types were opinions - return empty result
             return RecallResultModel(results=[], entities={}, chunks={})
@@ -2219,44 +2235,15 @@ class MemoryEngine(MemoryEngineInterface):
                     )
                 top_results_dicts.append(result_dict)
-            # Get entities for each fact if include_entities is requested
-            fact_entity_map = {}  # unit_id -> list of (entity_id, entity_name)
-            if include_entities and top_scored:
-                unit_ids = [uuid.UUID(sr.id) for sr in top_scored]
-                if unit_ids:
-                    async with acquire_with_retry(pool) as entity_conn:
-                        entity_rows = await entity_conn.fetch(
-                            f"""
-                            SELECT ue.unit_id, e.id as entity_id, e.canonical_name
-                            FROM {fq_table("unit_entities")} ue
-                            JOIN {fq_table("entities")} e ON ue.entity_id = e.id
-                            WHERE ue.unit_id = ANY($1::uuid[])
-                            """,
-                            unit_ids,
-                        )
-                        for row in entity_rows:
-                            unit_id = str(row["unit_id"])
-                            if unit_id not in fact_entity_map:
-                                fact_entity_map[unit_id] = []
-                            fact_entity_map[unit_id].append(
-                                {"entity_id": str(row["entity_id"]), "canonical_name": row["canonical_name"]}
-                            )
             # Convert results to MemoryFact objects
             memory_facts = []
             for result_dict in top_results_dicts:
-                result_id = str(result_dict.get("id"))
-                # Get entity names for this fact
-                entity_names = None
-                if include_entities and result_id in fact_entity_map:
-                    entity_names = [e["canonical_name"] for e in fact_entity_map[result_id]]
                 memory_facts.append(
                     MemoryFact(
-                        id=result_id,
+                        id=str(result_dict.get("id")),
                         text=result_dict.get("text"),
                         fact_type=result_dict.get("fact_type", "world"),
-                        entities=entity_names,
+                        entities=None,  # Entity observations removed
                         context=result_dict.get("context"),
                         occurred_start=result_dict.get("occurred_start"),
                         occurred_end=result_dict.get("occurred_end"),
@@ -2267,38 +2254,12 @@ class MemoryEngine(MemoryEngineInterface):
                     )
                 )
-            # Fetch entity observations if requested
+            # Entity observations removed - always set to None
             entities_dict = None
-            total_entity_tokens = 0
-            total_chunk_tokens = 0
-            if include_entities and fact_entity_map:
-                # Collect unique entities in order of fact relevance (preserving order from top_scored)
-                # Use a list to maintain order, but track seen entities to avoid duplicates
-                entities_ordered = []  # list of (entity_id, entity_name) tuples
-                seen_entity_ids = set()
-                # Iterate through facts in relevance order
-                for sr in top_scored:
-                    unit_id = sr.id
-                    if unit_id in fact_entity_map:
-                        for entity in fact_entity_map[unit_id]:
-                            entity_id = entity["entity_id"]
-                            entity_name = entity["canonical_name"]
-                            if entity_id not in seen_entity_ids:
-                                entities_ordered.append((entity_id, entity_name))
-                                seen_entity_ids.add(entity_id)
-                # Return entities with empty observations (summaries now live in mental models)
-                entities_dict = {}
-                for entity_id, entity_name in entities_ordered:
-                    entities_dict[entity_name] = EntityState(
-                        entity_id=entity_id,
-                        canonical_name=entity_name,
-                        observations=[],  # Mental models provide this now
-                    )
             # Fetch chunks if requested
             chunks_dict = None
+            total_chunk_tokens = 0
             if include_chunks and top_scored:
                 from .response_models import ChunkInfo
@@ -2367,7 +2328,6 @@ class MemoryEngine(MemoryEngineInterface):
             # Log final recall stats
             total_time = time.time() - recall_start
             num_chunks = len(chunks_dict) if chunks_dict else 0
-            num_entities = len(entities_dict) if entities_dict else 0
             # Include wait times in log if significant
             wait_parts = []
             if semaphore_wait > 0.01:
@@ -2376,7 +2336,7 @@ class MemoryEngine(MemoryEngineInterface):
                 wait_parts.append(f"conn={max_conn_wait:.3f}s")
             wait_info = f" | waits: {', '.join(wait_parts)}" if wait_parts else ""
             log_buffer.append(
-                f"[RECALL {recall_id}] Complete: {len(top_scored)} facts ({total_tokens} tok), {num_chunks} chunks ({total_chunk_tokens} tok), {num_entities} entities ({total_entity_tokens} tok) | {fact_type_summary} | {total_time:.3f}s{wait_info}"
+                f"[RECALL {recall_id}] Complete: {len(top_scored)} facts ({total_tokens} tok), {num_chunks} chunks ({total_chunk_tokens} tok) | {fact_type_summary} | {total_time:.3f}s{wait_info}"
             )
             if not quiet:
                 logger.info("\n" + "\n".join(log_buffer))
@@ -3550,7 +3510,6 @@ class MemoryEngine(MemoryEngineInterface):
             ReflectResult containing:
                 - text: Plain text answer
                 - based_on: Empty dict (agent retrieves facts dynamically)
-                - new_opinions: Empty list
                 - structured_output: None (not yet supported for agentic reflect)
         """
         # Use cached LLM config
@@ -3875,7 +3834,6 @@ class MemoryEngine(MemoryEngineInterface):
         result = ReflectResult(
             text=agent_result.text,
             based_on=based_on,
-            new_opinions=[],  # Learnings stored as mental models
             structured_output=agent_result.structured_output,
             usage=usage,
             tool_trace=tool_trace_result,
@@ -3904,32 +3862,6 @@ class MemoryEngine(MemoryEngineInterface):
         return result
-    async def get_entity_observations(
-        self,
-        bank_id: str,
-        entity_id: str,
-        *,
-        limit: int = 10,
-        request_context: "RequestContext",
-    ) -> list[Any]:
-        """
-        Get observations for an entity.
-        NOTE: Entity observations/summaries have been moved to mental models.
-        This method returns an empty list. Use mental models for entity summaries.
-        Args:
-            bank_id: bank IDentifier
-            entity_id: Entity UUID to get observations for
-            limit: Ignored (kept for backwards compatibility)
-            request_context: Request context for authentication.
-        Returns:
-            Empty list (observations now in mental models)
-        """
-        await self._authenticate_tenant(request_context)
-        return []
     async def list_entities(
         self,
         bank_id: str,
@@ -4116,36 +4048,6 @@ class MemoryEngine(MemoryEngineInterface):
         await self._authenticate_tenant(request_context)
         return EntityState(entity_id=entity_id, canonical_name=entity_name, observations=[])
-    async def regenerate_entity_observations(
-        self,
-        bank_id: str,
-        entity_id: str,
-        entity_name: str,
-        *,
-        version: str | None = None,
-        conn=None,
-        request_context: "RequestContext",
-    ) -> list[str]:
-        """
-        Regenerate observations for an entity.
-        NOTE: Entity observations/summaries have been moved to mental models.
-        This method is now a no-op and returns an empty list.
-        Args:
-            bank_id: bank IDentifier
-            entity_id: Entity UUID
-            entity_name: Canonical name of the entity
-            version: Entity's last_seen timestamp when task was created (for deduplication)
-            conn: Optional database connection (ignored)
-            request_context: Request context for authentication.
-        Returns:
-            Empty list (observations now in mental models)
-        """
-        await self._authenticate_tenant(request_context)
-        return []
     # =========================================================================
     # Statistics & Operations (for HTTP API layer)
     # =========================================================================
@@ -4256,9 +4158,6 @@ class MemoryEngine(MemoryEngineInterface):
         if not entity_row:
             return None
-        # Get observations for the entity
-        observations = await self.get_entity_observations(bank_id, entity_id, limit=20, request_context=request_context)
         return {
             "id": str(entity_row["id"]),
             "canonical_name": entity_row["canonical_name"],
@@ -4266,7 +4165,7 @@ class MemoryEngine(MemoryEngineInterface):
             "first_seen": entity_row["first_seen"].isoformat() if entity_row["first_seen"] else None,
             "last_seen": entity_row["last_seen"].isoformat() if entity_row["last_seen"] else None,
             "metadata": entity_row["metadata"] or {},
-            "observations": observations,
+            "observations": [],
         }
     def _parse_observations(self, observations_raw: list):

hindsight_api/engine/response_models.py CHANGED Viewed

@@ -263,7 +263,6 @@ class ReflectResult(BaseModel):
                         }
                     ],
                 },
-                "new_opinions": ["Machine learning has great potential in healthcare"],
                 "structured_output": {"summary": "ML in healthcare", "confidence": 0.9},
                 "usage": {"input_tokens": 1500, "output_tokens": 500, "total_tokens": 2000},
             }
@@ -272,9 +271,8 @@ class ReflectResult(BaseModel):
     text: str = Field(description="The formulated answer text")
     based_on: dict[str, Any] = Field(
-        description="Facts used to formulate the answer, organized by type (world, experience, opinion, mental_models, directives)"
+        description="Facts used to formulate the answer, organized by type (world, experience, mental_models, directives)"
     )
-    new_opinions: list[str] = Field(default_factory=list, description="List of newly formed opinions during reflection")
     structured_output: dict[str, Any] | None = Field(
         default=None,
         description="Structured output parsed according to the provided response schema. Only present when response_schema was provided.",
@@ -297,24 +295,6 @@ class ReflectResult(BaseModel):
     )
-class Opinion(BaseModel):
-    """
-    An opinion with confidence score.
-    Opinions represent the bank's formed perspectives on topics,
-    with a confidence level indicating strength of belief.
-    """
-    model_config = ConfigDict(
-        json_schema_extra={
-            "example": {"text": "Machine learning has great potential in healthcare", "confidence": 0.85}
-        }
-    )
-    text: str = Field(description="The opinion text")
-    confidence: float = Field(description="Confidence score between 0.0 and 1.0")
 class EntityObservation(BaseModel):
     """
     An observation about an entity.

hindsight_api/engine/retain/fact_extraction.py CHANGED Viewed

@@ -693,7 +693,6 @@ async def _extract_facts_from_chunk(
     context: str,
     llm_config: "LLMConfig",
     agent_name: str = None,
-    extract_opinions: bool = False,
 ) -> tuple[list[dict[str, str]], TokenUsage]:
     """
     Extract facts from a single chunk (internal helper for parallel processing).
@@ -707,17 +706,9 @@ async def _extract_facts_from_chunk(
     logger = logging.getLogger(__name__)
-    memory_bank_context = f"\n- Your name: {agent_name}" if agent_name and extract_opinions else ""
-    # Determine which fact types to extract based on the flag
+    # Determine which fact types to extract
     # Note: We use "assistant" in the prompt but convert to "bank" for storage
-    if extract_opinions:
-        # Opinion extraction uses a separate prompt (not this one)
-        fact_types_instruction = "Extract ONLY 'opinion' type facts (formed opinions, beliefs, and perspectives). DO NOT extract 'world' or 'assistant' facts."
-    else:
-        fact_types_instruction = (
-            "Extract ONLY 'world' and 'assistant' type facts. DO NOT extract opinions - those are extracted separately."
-        )
+    fact_types_instruction = "Extract ONLY 'world' and 'assistant' type facts."
     # Check config for extraction mode and causal link extraction
     config = get_config()
@@ -770,7 +761,6 @@ async def _extract_facts_from_chunk(
     # Format event_date with day of week for better temporal reasoning
     event_date_formatted = event_date.strftime("%A, %B %d, %Y")  # e.g., "Monday, June 10, 2024"
     user_message = f"""Extract facts from the following text chunk.
-{memory_bank_context}
 Chunk: {chunk_index + 1}/{total_chunks}
 Event Date: {event_date_formatted} ({event_date.isoformat()})
@@ -782,12 +772,28 @@ Text:
     usage = TokenUsage()  # Track cumulative usage across retries
     for attempt in range(max_retries):
         try:
+            # Use retain-specific overrides if set, otherwise fall back to global LLM config
+            max_retries = (
+                config.retain_llm_max_retries if config.retain_llm_max_retries is not None else config.llm_max_retries
+            )
+            initial_backoff = (
+                config.retain_llm_initial_backoff
+                if config.retain_llm_initial_backoff is not None
+                else config.llm_initial_backoff
+            )
+            max_backoff = (
+                config.retain_llm_max_backoff if config.retain_llm_max_backoff is not None else config.llm_max_backoff
+            )
             extraction_response_json, call_usage = await llm_config.call(
                 messages=[{"role": "system", "content": prompt}, {"role": "user", "content": user_message}],
                 response_format=response_schema,
                 scope="memory_extract_facts",
                 temperature=0.1,
                 max_completion_tokens=config.retain_max_completion_tokens,
+                max_retries=max_retries,
+                initial_backoff=initial_backoff,
+                max_backoff=max_backoff,
                 skip_validation=True,  # Get raw JSON, we'll validate leniently
                 return_usage=True,
             )
@@ -1013,7 +1019,6 @@ async def _extract_facts_with_auto_split(
     context: str,
     llm_config: LLMConfig,
     agent_name: str = None,
-    extract_opinions: bool = False,
 ) -> tuple[list[dict[str, str]], TokenUsage]:
     """
     Extract facts from a chunk with automatic splitting if output exceeds token limits.
@@ -1029,7 +1034,6 @@ async def _extract_facts_with_auto_split(
         context: Context about the conversation/document
         llm_config: LLM configuration to use
         agent_name: Optional agent name (memory owner)
-        extract_opinions: If True, extract ONLY opinions. If False, extract world and agent facts (no opinions)
     Returns:
         Tuple of (facts list, token usage) extracted from the chunk (possibly from sub-chunks)
@@ -1048,7 +1052,6 @@ async def _extract_facts_with_auto_split(
             context=context,
             llm_config=llm_config,
             agent_name=agent_name,
-            extract_opinions=extract_opinions,
         )
     except OutputTooLongError:
         # Output exceeded token limits - split the chunk in half and retry
@@ -1093,7 +1096,6 @@ async def _extract_facts_with_auto_split(
                 context=context,
                 llm_config=llm_config,
                 agent_name=agent_name,
-                extract_opinions=extract_opinions,
             ),
             _extract_facts_with_auto_split(
                 chunk=second_half,
@@ -1103,7 +1105,6 @@ async def _extract_facts_with_auto_split(
                 context=context,
                 llm_config=llm_config,
                 agent_name=agent_name,
-                extract_opinions=extract_opinions,
             ),
         ]
@@ -1127,7 +1128,6 @@ async def extract_facts_from_text(
     llm_config: LLMConfig,
     agent_name: str,
     context: str = "",
-    extract_opinions: bool = False,
 ) -> tuple[list[Fact], list[tuple[str, int]], TokenUsage]:
     """
     Extract semantic facts from conversational or narrative text using LLM.
@@ -1144,7 +1144,6 @@ async def extract_facts_from_text(
         context: Context about the conversation/document
         llm_config: LLM configuration to use
         agent_name: Agent name (memory owner)
-        extract_opinions: If True, extract ONLY opinions. If False, extract world and bank facts (no opinions)
     Returns:
         Tuple of (facts, chunks, usage) where:
@@ -1172,7 +1171,6 @@ async def extract_facts_from_text(
             context=context,
             llm_config=llm_config,
             agent_name=agent_name,
-            extract_opinions=extract_opinions,
         )
         for i, chunk in enumerate(chunks)
     ]
@@ -1204,7 +1202,7 @@ SECONDS_PER_FACT = 10
 async def extract_facts_from_contents(
-    contents: list[RetainContent], llm_config, agent_name: str, extract_opinions: bool = False
+    contents: list[RetainContent], llm_config, agent_name: str
 ) -> tuple[list[ExtractedFactType], list[ChunkMetadata], TokenUsage]:
     """
     Extract facts from multiple content items in parallel.
@@ -1219,7 +1217,6 @@ async def extract_facts_from_contents(
         contents: List of RetainContent objects to process
         llm_config: LLM configuration for fact extraction
         agent_name: Name of the agent (for agent-related fact detection)
-        extract_opinions: If True, extract only opinions; otherwise world/bank facts
     Returns:
         Tuple of (extracted_facts, chunks_metadata, usage)
@@ -1238,7 +1235,6 @@ async def extract_facts_from_contents(
             context=item.context,
             llm_config=llm_config,
             agent_name=agent_name,
-            extract_opinions=extract_opinions,
         )
         fact_extraction_tasks.append(task)

hindsight_api/engine/retain/orchestrator.py CHANGED Viewed

@@ -101,11 +101,8 @@ async def retain_batch(
     # Step 1: Extract facts from all contents
     step_start = time.time()
-    extract_opinions = fact_type_override == "opinion"
-    extracted_facts, chunks, usage = await fact_extraction.extract_facts_from_contents(
-        contents, llm_config, agent_name, extract_opinions
-    )
+    extracted_facts, chunks, usage = await fact_extraction.extract_facts_from_contents(contents, llm_config, agent_name)
     log_buffer.append(
         f"[1] Extract facts: {len(extracted_facts)} facts, {len(chunks)} chunks from {len(contents)} contents in {time.time() - step_start:.3f}s"
     )

hindsight_api/engine/utils.py CHANGED Viewed

@@ -19,7 +19,6 @@ async def extract_facts(
     context: str = "",
     llm_config: "LLMConfig" = None,
     agent_name: str = None,
-    extract_opinions: bool = False,
 ) -> tuple[list["Fact"], list[tuple[str, int]]]:
     """
     Extract semantic facts from text using LLM.
@@ -36,7 +35,6 @@ async def extract_facts(
         context: Context about the conversation/document
         llm_config: LLM configuration to use
         agent_name: Optional agent name to help identify agent-related facts
-        extract_opinions: If True, extract ONLY opinions. If False, extract world and agent facts (no opinions)
     Returns:
         Tuple of (facts, chunks) where:
@@ -55,7 +53,6 @@ async def extract_facts(
         context=context,
         llm_config=llm_config,
         agent_name=agent_name,
-        extract_opinions=extract_opinions,
     )
     if not facts:

hindsight_api/main.py CHANGED Viewed

@@ -140,13 +140,6 @@ def main():
         args.port = DEFAULT_DAEMON_PORT
         args.host = "127.0.0.1"  # Only bind to localhost for security
-        # Force CPU mode for daemon to avoid macOS MPS/XPC issues
-        # MPS (Metal Performance Shaders) has unstable XPC connections in background processes
-        # that can cause assertion failures and process crashes at the C++ level
-        # (which Python exception handlers cannot catch)
-        os.environ["HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"] = "1"
-        os.environ["HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"] = "1"
         # Check if another daemon is already running
         daemon_lock = DaemonLock()
         if not daemon_lock.acquire():
@@ -183,19 +176,40 @@ def main():
             llm_model=config.llm_model,
             llm_base_url=config.llm_base_url,
             llm_max_concurrent=config.llm_max_concurrent,
+            llm_max_retries=config.llm_max_retries,
+            llm_initial_backoff=config.llm_initial_backoff,
+            llm_max_backoff=config.llm_max_backoff,
             llm_timeout=config.llm_timeout,
+            llm_vertexai_project_id=config.llm_vertexai_project_id,
+            llm_vertexai_region=config.llm_vertexai_region,
+            llm_vertexai_service_account_key=config.llm_vertexai_service_account_key,
             retain_llm_provider=config.retain_llm_provider,
             retain_llm_api_key=config.retain_llm_api_key,
             retain_llm_model=config.retain_llm_model,
             retain_llm_base_url=config.retain_llm_base_url,
+            retain_llm_max_concurrent=config.retain_llm_max_concurrent,
+            retain_llm_max_retries=config.retain_llm_max_retries,
+            retain_llm_initial_backoff=config.retain_llm_initial_backoff,
+            retain_llm_max_backoff=config.retain_llm_max_backoff,
+            retain_llm_timeout=config.retain_llm_timeout,
             reflect_llm_provider=config.reflect_llm_provider,
             reflect_llm_api_key=config.reflect_llm_api_key,
             reflect_llm_model=config.reflect_llm_model,
             reflect_llm_base_url=config.reflect_llm_base_url,
+            reflect_llm_max_concurrent=config.reflect_llm_max_concurrent,
+            reflect_llm_max_retries=config.reflect_llm_max_retries,
+            reflect_llm_initial_backoff=config.reflect_llm_initial_backoff,
+            reflect_llm_max_backoff=config.reflect_llm_max_backoff,
+            reflect_llm_timeout=config.reflect_llm_timeout,
             consolidation_llm_provider=config.consolidation_llm_provider,
             consolidation_llm_api_key=config.consolidation_llm_api_key,
             consolidation_llm_model=config.consolidation_llm_model,
             consolidation_llm_base_url=config.consolidation_llm_base_url,
+            consolidation_llm_max_concurrent=config.consolidation_llm_max_concurrent,
+            consolidation_llm_max_retries=config.consolidation_llm_max_retries,
+            consolidation_llm_initial_backoff=config.consolidation_llm_initial_backoff,
+            consolidation_llm_max_backoff=config.consolidation_llm_max_backoff,
+            consolidation_llm_timeout=config.consolidation_llm_timeout,
             embeddings_provider=config.embeddings_provider,
             embeddings_local_model=config.embeddings_local_model,
             embeddings_local_force_cpu=config.embeddings_local_force_cpu,
@@ -225,7 +239,6 @@ def main():
             retain_extract_causal_links=config.retain_extract_causal_links,
             retain_extraction_mode=config.retain_extraction_mode,
             retain_custom_instructions=config.retain_custom_instructions,
-            retain_observations_async=config.retain_observations_async,
             enable_observations=config.enable_observations,
             consolidation_batch_size=config.consolidation_batch_size,
             consolidation_max_tokens=config.consolidation_max_tokens,
@@ -240,8 +253,9 @@ def main():
             worker_id=config.worker_id,
             worker_poll_interval_ms=config.worker_poll_interval_ms,
             worker_max_retries=config.worker_max_retries,
-            worker_batch_size=config.worker_batch_size,
             worker_http_port=config.worker_http_port,
+            worker_max_slots=config.worker_max_slots,
+            worker_consolidation_max_slots=config.worker_consolidation_max_slots,
             reflect_max_iterations=config.reflect_max_iterations,
             mental_model_refresh_concurrency=config.mental_model_refresh_concurrency,
         )
@@ -353,6 +367,7 @@ def main():
     # Start idle checker in daemon mode
     if idle_middleware is not None:
         # Start the idle checker in a background thread with its own event loop
+        import logging
         import threading
         def run_idle_checker():
@@ -363,12 +378,12 @@ def main():
                 loop = asyncio.new_event_loop()
                 asyncio.set_event_loop(loop)
                 loop.run_until_complete(idle_middleware._check_idle())
-            except Exception:
-                pass
+            except Exception as e:
+                logging.error(f"Idle checker error: {e}", exc_info=True)
         threading.Thread(target=run_idle_checker, daemon=True).start()
-    uvicorn.run(**uvicorn_config)  # type: ignore[invalid-argument-type] - dict kwargs
+    uvicorn.run(**uvicorn_config)
 if __name__ == "__main__":

hindsight-api 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

hindsight-api 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl