PyPI - hindsight-api - Versions diffs - 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

hindsight-api 0.0.21py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

hindsight_api/__init__.py +10 -2
hindsight_api/alembic/README +1 -0
hindsight_api/alembic/env.py +146 -0
hindsight_api/alembic/script.py.mako +28 -0
hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +274 -0
hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +48 -0
hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +62 -0
hindsight_api/alembic/versions/rename_personality_to_disposition.py +65 -0
hindsight_api/api/__init__.py +2 -4
hindsight_api/api/http.py +112 -164
hindsight_api/api/mcp.py +2 -1
hindsight_api/config.py +154 -0
hindsight_api/engine/__init__.py +7 -2
hindsight_api/engine/cross_encoder.py +225 -16
hindsight_api/engine/embeddings.py +198 -19
hindsight_api/engine/entity_resolver.py +56 -29
hindsight_api/engine/llm_wrapper.py +147 -106
hindsight_api/engine/memory_engine.py +337 -192
hindsight_api/engine/response_models.py +15 -17
hindsight_api/engine/retain/bank_utils.py +25 -35
hindsight_api/engine/retain/entity_processing.py +5 -5
hindsight_api/engine/retain/fact_extraction.py +86 -24
hindsight_api/engine/retain/fact_storage.py +1 -1
hindsight_api/engine/retain/link_creation.py +12 -6
hindsight_api/engine/retain/link_utils.py +50 -56
hindsight_api/engine/retain/observation_regeneration.py +264 -0
hindsight_api/engine/retain/orchestrator.py +31 -44
hindsight_api/engine/retain/types.py +14 -0
hindsight_api/engine/search/reranking.py +6 -10
hindsight_api/engine/search/retrieval.py +2 -2
hindsight_api/engine/search/think_utils.py +59 -30
hindsight_api/engine/search/tracer.py +1 -1
hindsight_api/main.py +201 -0
hindsight_api/migrations.py +61 -39
hindsight_api/models.py +1 -2
hindsight_api/pg0.py +17 -36
hindsight_api/server.py +43 -0
{hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/METADATA +2 -3
hindsight_api-0.1.1.dist-info/RECORD +60 -0
hindsight_api-0.1.1.dist-info/entry_points.txt +2 -0
hindsight_api/cli.py +0 -128
hindsight_api/web/__init__.py +0 -12
hindsight_api/web/server.py +0 -109
hindsight_api-0.0.21.dist-info/RECORD +0 -50
hindsight_api-0.0.21.dist-info/entry_points.txt +0 -2
{hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/WHEEL +0 -0

hindsight_api/engine/memory_engine.py CHANGED Viewed

@@ -11,17 +11,20 @@ This implements a sophisticated memory architecture that combines:
 import json
 import os
 from datetime import datetime, timedelta, timezone
-from typing import Any, Dict, List, Optional, Tuple, Union, TypedDict
+from typing import Any, Dict, List, Optional, Tuple, Union, TypedDict, TYPE_CHECKING
 import asyncpg
 import asyncio
-from .embeddings import Embeddings, SentenceTransformersEmbeddings
-from .cross_encoder import CrossEncoderModel
+from .embeddings import Embeddings, create_embeddings_from_env
+from .cross_encoder import CrossEncoderModel, create_cross_encoder_from_env
 import time
 import numpy as np
 import uuid
 import logging
 from pydantic import BaseModel, Field
+if TYPE_CHECKING:
+    from ..config import HindsightConfig
 class RetainContentDict(TypedDict, total=False):
     """Type definition for content items in retain_batch_async.
@@ -48,7 +51,7 @@ from .entity_resolver import EntityResolver
 from .retain import embedding_utils, bank_utils
 from .search import think_utils, observation_utils
 from .llm_wrapper import LLMConfig
-from .response_models import RecallResult as RecallResultModel, ReflectResult, MemoryFact, EntityState, EntityObservation
+from .response_models import RecallResult as RecallResultModel, ReflectResult, MemoryFact, EntityState, EntityObservation, VALID_RECALL_FACT_TYPES
 from .task_backend import TaskBackend, AsyncIOQueueBackend
 from .search.reranking import CrossEncoderReranker
 from ..pg0 import EmbeddedPostgres
@@ -99,10 +102,10 @@ class MemoryEngine:
     def __init__(
         self,
-        db_url: str,
-        memory_llm_provider: str,
-        memory_llm_api_key: str,
-        memory_llm_model: str,
+        db_url: Optional[str] = None,
+        memory_llm_provider: Optional[str] = None,
+        memory_llm_api_key: Optional[str] = None,
+        memory_llm_model: Optional[str] = None,
         memory_llm_base_url: Optional[str] = None,
         embeddings: Optional[Embeddings] = None,
         cross_encoder: Optional[CrossEncoderModel] = None,
@@ -110,35 +113,67 @@ class MemoryEngine:
         pool_min_size: int = 5,
         pool_max_size: int = 100,
         task_backend: Optional[TaskBackend] = None,
+        run_migrations: bool = True,
     ):
         """
         Initialize the temporal + semantic memory system.
+        All parameters are optional and will be read from environment variables if not provided.
+        See hindsight_api.config for environment variable names and defaults.
         Args:
-            db_url: PostgreSQL connection URL (postgresql://user:pass@host:port/dbname). Required.
-            memory_llm_provider: LLM provider for memory operations: "openai", "groq", or "ollama". Required.
-            memory_llm_api_key: API key for the LLM provider. Required.
-            memory_llm_model: Model name to use for all memory operations (put/think/opinions). Required.
-            memory_llm_base_url: Base URL for the LLM API. Optional. Defaults based on provider:
-                                - groq: https://api.groq.com/openai/v1
-                                - ollama: http://localhost:11434/v1
-            embeddings: Embeddings implementation to use. If not provided, uses SentenceTransformersEmbeddings
-            cross_encoder: Cross-encoder model for reranking. If not provided, uses default when cross-encoder reranker is selected
-            query_analyzer: Query analyzer implementation to use. If not provided, uses TransformerQueryAnalyzer
+            db_url: PostgreSQL connection URL. Defaults to HINDSIGHT_API_DATABASE_URL env var or "pg0".
+                    Also supports pg0 URLs: "pg0" or "pg0://instance-name" or "pg0://instance-name:port"
+            memory_llm_provider: LLM provider. Defaults to HINDSIGHT_API_LLM_PROVIDER env var or "groq".
+            memory_llm_api_key: API key for the LLM provider. Defaults to HINDSIGHT_API_LLM_API_KEY env var.
+            memory_llm_model: Model name. Defaults to HINDSIGHT_API_LLM_MODEL env var.
+            memory_llm_base_url: Base URL for the LLM API. Defaults based on provider.
+            embeddings: Embeddings implementation. If not provided, created from env vars.
+            cross_encoder: Cross-encoder model. If not provided, created from env vars.
+            query_analyzer: Query analyzer implementation. If not provided, uses DateparserQueryAnalyzer.
             pool_min_size: Minimum number of connections in the pool (default: 5)
             pool_max_size: Maximum number of connections in the pool (default: 100)
-                          Increase for parallel think/search operations (e.g., 200-300 for 100+ parallel thinks)
-            task_backend: Custom task backend for async task execution. If not provided, uses AsyncIOQueueBackend
-        """
-        if not db_url:
-            raise ValueError("Database url is required")
+            task_backend: Custom task backend. If not provided, uses AsyncIOQueueBackend.
+            run_migrations: Whether to run database migrations during initialize(). Default: True
+        """
+        # Load config from environment for any missing parameters
+        from ..config import get_config
+        config = get_config()
+        # Apply defaults from config
+        db_url = db_url or config.database_url
+        memory_llm_provider = memory_llm_provider or config.llm_provider
+        memory_llm_api_key = memory_llm_api_key or config.llm_api_key
+        memory_llm_model = memory_llm_model or config.llm_model
+        memory_llm_base_url = memory_llm_base_url or config.get_llm_base_url() or None
         # Track pg0 instance (if used)
         self._pg0: Optional[EmbeddedPostgres] = None
+        self._pg0_instance_name: Optional[str] = None
         # Initialize PostgreSQL connection URL
         # The actual URL will be set during initialize() after starting the server
-        self._use_pg0 = db_url == "pg0"
-        self.db_url = db_url if not self._use_pg0 else None
+        # Supports: "pg0" (default instance), "pg0://instance-name" (named instance), or regular postgresql:// URL
+        if db_url == "pg0":
+            self._use_pg0 = True
+            self._pg0_instance_name = "hindsight"
+            self._pg0_port = None  # Use default port
+            self.db_url = None
+        elif db_url.startswith("pg0://"):
+            self._use_pg0 = True
+            # Parse instance name and optional port: pg0://instance-name or pg0://instance-name:port
+            url_part = db_url[6:]  # Remove "pg0://"
+            if ":" in url_part:
+                self._pg0_instance_name, port_str = url_part.rsplit(":", 1)
+                self._pg0_port = int(port_str)
+            else:
+                self._pg0_instance_name = url_part or "hindsight"
+                self._pg0_port = None  # Use default port
+            self.db_url = None
+        else:
+            self._use_pg0 = False
+            self._pg0_instance_name = None
+            self._pg0_port = None
+            self.db_url = db_url
         # Set default base URL if not provided
@@ -155,15 +190,16 @@ class MemoryEngine:
         self._initialized = False
         self._pool_min_size = pool_min_size
         self._pool_max_size = pool_max_size
+        self._run_migrations = run_migrations
         # Initialize entity resolver (will be created in initialize())
         self.entity_resolver = None
-        # Initialize embeddings
+        # Initialize embeddings (from env vars if not provided)
         if embeddings is not None:
             self.embeddings = embeddings
         else:
-            self.embeddings = SentenceTransformersEmbeddings("BAAI/bge-small-en-v1.5")
+            self.embeddings = create_embeddings_from_env()
         # Initialize query analyzer
         if query_analyzer is not None:
@@ -294,7 +330,7 @@ class MemoryEngine:
                 await self._handle_reinforce_opinion(task_dict)
             elif task_type == 'form_opinion':
                 await self._handle_form_opinion(task_dict)
-            elif task_type == 'batch_put':
+            elif task_type == 'batch_retain':
                 await self._handle_batch_retain(task_dict)
             elif task_type == 'regenerate_observations':
                 await self._handle_regenerate_observations(task_dict)
@@ -378,35 +414,58 @@ class MemoryEngine:
         async def start_pg0():
             """Start pg0 if configured."""
             if self._use_pg0:
-                self._pg0 = EmbeddedPostgres()
-                self.db_url = await self._pg0.ensure_running()
-        def load_embeddings():
-            """Load embedding model (CPU-bound)."""
-            self.embeddings.load()
-        def load_cross_encoder():
-            """Load cross-encoder model (CPU-bound)."""
-            self._cross_encoder_reranker.cross_encoder.load()
-        def load_query_analyzer():
-            """Load query analyzer model (CPU-bound)."""
-            self.query_analyzer.load()
-        # Run pg0 and all model loads in parallel
-        # pg0 is async (IO-bound), models are sync (CPU-bound in thread pool)
-        # Use 3 workers to load all models concurrently
-        with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
-            # Start all tasks
-            pg0_task = asyncio.create_task(start_pg0())
-            embeddings_future = loop.run_in_executor(executor, load_embeddings)
-            cross_encoder_future = loop.run_in_executor(executor, load_cross_encoder)
-            query_analyzer_future = loop.run_in_executor(executor, load_query_analyzer)
-            # Wait for all to complete
-            await asyncio.gather(
-                pg0_task, embeddings_future, cross_encoder_future, query_analyzer_future
-            )
+                kwargs = {"name": self._pg0_instance_name}
+                if self._pg0_port is not None:
+                    kwargs["port"] = self._pg0_port
+                pg0 = EmbeddedPostgres(**kwargs)
+                # Check if pg0 is already running before we start it
+                was_already_running = await pg0.is_running()
+                self.db_url = await pg0.ensure_running()
+                # Only track pg0 (to stop later) if WE started it
+                if not was_already_running:
+                    self._pg0 = pg0
+        async def init_embeddings():
+            """Initialize embedding model."""
+            # For local providers, run in thread pool to avoid blocking event loop
+            if self.embeddings.provider_name == "local":
+                await loop.run_in_executor(
+                    None,
+                    lambda: asyncio.run(self.embeddings.initialize())
+                )
+            else:
+                await self.embeddings.initialize()
+        async def init_cross_encoder():
+            """Initialize cross-encoder model."""
+            cross_encoder = self._cross_encoder_reranker.cross_encoder
+            # For local providers, run in thread pool to avoid blocking event loop
+            if cross_encoder.provider_name == "local":
+                await loop.run_in_executor(
+                    None,
+                    lambda: asyncio.run(cross_encoder.initialize())
+                )
+            else:
+                await cross_encoder.initialize()
+        async def init_query_analyzer():
+            """Initialize query analyzer model."""
+            # Query analyzer load is sync and CPU-bound
+            await loop.run_in_executor(None, self.query_analyzer.load)
+        # Run pg0 and all model initializations in parallel
+        await asyncio.gather(
+            start_pg0(),
+            init_embeddings(),
+            init_cross_encoder(),
+            init_query_analyzer(),
+        )
+        # Run database migrations if enabled
+        if self._run_migrations:
+            from ..migrations import run_migrations
+            logger.info("Running database migrations...")
+            run_migrations(self.db_url)
         logger.info(f"Connecting to PostgreSQL at {self.db_url}")
@@ -869,7 +928,6 @@ class MemoryEngine:
                 task_backend=self._task_backend,
                 format_date_fn=self._format_readable_date,
                 duplicate_checker_fn=self._find_duplicate_facts_batch,
-                regenerate_observations_fn=self._regenerate_observations_sync,
                 bank_id=bank_id,
                 contents_dicts=contents,
                 document_id=document_id,
@@ -955,11 +1013,19 @@ class MemoryEngine:
             - entities: Optional dict of entity states (if include_entities=True)
             - chunks: Optional dict of chunks (if include_chunks=True)
         """
+        # Validate fact types early
+        invalid_types = set(fact_type) - VALID_RECALL_FACT_TYPES
+        if invalid_types:
+            raise ValueError(
+                f"Invalid fact type(s): {', '.join(sorted(invalid_types))}. "
+                f"Must be one of: {', '.join(sorted(VALID_RECALL_FACT_TYPES))}"
+            )
         # Map budget enum to thinking_budget number
         budget_mapping = {
             Budget.LOW: 100,
             Budget.MID: 300,
-            Budget.HIGH: 600
+            Budget.HIGH: 1000
         }
         thinking_budget = budget_mapping[budget]
@@ -1040,12 +1106,12 @@ class MemoryEngine:
             tracer.start()
         pool = await self._get_pool()
-        search_start = time.time()
+        recall_start = time.time()
         # Buffer logs for clean output in concurrent scenarios
-        search_id = f"{bank_id[:8]}-{int(time.time() * 1000) % 100000}"
+        recall_id = f"{bank_id[:8]}-{int(time.time() * 1000) % 100000}"
         log_buffer = []
-        log_buffer.append(f"[SEARCH {search_id}] Query: '{query[:50]}...' (budget={thinking_budget}, max_tokens={max_tokens})")
+        log_buffer.append(f"[RECALL {recall_id}] Query: '{query[:50]}...' (budget={thinking_budget}, max_tokens={max_tokens})")
         try:
             # Step 1: Generate query embedding (for semantic search)
@@ -1088,7 +1154,7 @@ class MemoryEngine:
             for idx, (ft_semantic, ft_bm25, ft_graph, ft_temporal, ft_timings, ft_temporal_constraint) in enumerate(all_retrievals):
                 # Log fact types in this retrieval batch
                 ft_name = fact_type[idx] if idx < len(fact_type) else "unknown"
-                logger.debug(f"[SEARCH {search_id}] Fact type '{ft_name}': semantic={len(ft_semantic)}, bm25={len(ft_bm25)}, graph={len(ft_graph)}, temporal={len(ft_temporal) if ft_temporal else 0}")
+                logger.debug(f"[RECALL {recall_id}] Fact type '{ft_name}': semantic={len(ft_semantic)}, bm25={len(ft_bm25)}, graph={len(ft_graph)}, temporal={len(ft_temporal) if ft_temporal else 0}")
                 semantic_results.extend(ft_semantic)
                 bm25_results.extend(ft_bm25)
@@ -1209,7 +1275,6 @@ class MemoryEngine:
             # Step 4: Rerank using cross-encoder (MergedCandidate -> ScoredResult)
             step_start = time.time()
             reranker_instance = self._cross_encoder_reranker
-            log_buffer.append(f"  [4] Using cross-encoder reranker")
             # Rerank using cross-encoder
             scored_results = reranker_instance.rerank(query, merged_candidates)
@@ -1334,12 +1399,7 @@ class MemoryEngine:
                 ft = sr.retrieval.fact_type
                 fact_type_counts[ft] = fact_type_counts.get(ft, 0) + 1
-            total_time = time.time() - search_start
             fact_type_summary = ", ".join([f"{ft}={count}" for ft, count in sorted(fact_type_counts.items())])
-            log_buffer.append(f"[SEARCH {search_id}] Complete: {len(top_scored)} results ({fact_type_summary}) ({total_tokens} tokens) in {total_time:.3f}s")
-            # Log all buffered logs at once
-            logger.info("\n" + "\n".join(log_buffer))
             # Convert ScoredResult to dicts with ISO datetime strings
             top_results_dicts = []
@@ -1401,11 +1461,12 @@ class MemoryEngine:
                     mentioned_at=result_dict.get("mentioned_at"),
                     document_id=result_dict.get("document_id"),
                     chunk_id=result_dict.get("chunk_id"),
-                    activation=result_dict.get("weight")  # Use final weight as activation
                 ))
             # Fetch entity observations if requested
             entities_dict = None
+            total_entity_tokens = 0
+            total_chunk_tokens = 0
             if include_entities and fact_entity_map:
                 # Collect unique entities in order of fact relevance (preserving order from top_scored)
                 # Use a list to maintain order, but track seen entities to avoid duplicates
@@ -1425,7 +1486,6 @@ class MemoryEngine:
                 # Fetch observations for each entity (respect token budget, in order)
                 entities_dict = {}
-                total_entity_tokens = 0
                 encoding = _get_tiktoken_encoding()
                 for entity_id, entity_name in entities_ordered:
@@ -1485,7 +1545,6 @@ class MemoryEngine:
                     # Apply token limit and build chunks_dict in the order of chunk_ids_ordered
                     chunks_dict = {}
-                    total_chunk_tokens = 0
                     encoding = _get_tiktoken_encoding()
                     for chunk_id in chunk_ids_ordered:
@@ -1525,10 +1584,17 @@ class MemoryEngine:
                 trace = tracer.finalize(top_results_dicts)
                 trace_dict = trace.to_dict() if trace else None
+            # Log final recall stats
+            total_time = time.time() - recall_start
+            num_chunks = len(chunks_dict) if chunks_dict else 0
+            num_entities = len(entities_dict) if entities_dict else 0
+            log_buffer.append(f"[RECALL {recall_id}] Complete: {len(top_scored)} facts ({total_tokens} tok), {num_chunks} chunks ({total_chunk_tokens} tok), {num_entities} entities ({total_entity_tokens} tok) | {fact_type_summary} | {total_time:.3f}s")
+            logger.info("\n" + "\n".join(log_buffer))
             return RecallResultModel(results=memory_facts, trace=trace_dict, entities=entities_dict, chunks=chunks_dict)
         except Exception as e:
-            log_buffer.append(f"[SEARCH {search_id}] ERROR after {time.time() - search_start:.3f}s: {str(e)}")
+            log_buffer.append(f"[RECALL {recall_id}] ERROR after {time.time() - recall_start:.3f}s: {str(e)}")
             logger.error("\n" + "\n".join(log_buffer))
             raise Exception(f"Failed to search memories: {str(e)}")
@@ -2502,14 +2568,14 @@ Guidelines:
     async def update_bank_disposition(
         self,
         bank_id: str,
-        disposition: Dict[str, float]
+        disposition: Dict[str, int]
     ) -> None:
         """
         Update bank disposition traits.
         Args:
             bank_id: bank IDentifier
-            disposition: Dict with Big Five traits + bias_strength (all 0-1)
+            disposition: Dict with skepticism, literalism, empathy (all 1-5)
         """
         pool = await self._get_pool()
         await bank_utils.update_bank_disposition(pool, bank_id, disposition)
@@ -2584,7 +2650,13 @@ Guidelines:
         if self._llm_config is None:
             raise ValueError("Memory LLM API key not set. Set HINDSIGHT_API_LLM_API_KEY environment variable.")
+        reflect_start = time.time()
+        reflect_id = f"{bank_id[:8]}-{int(time.time() * 1000) % 100000}"
+        log_buffer = []
+        log_buffer.append(f"[REFLECT {reflect_id}] Query: '{query[:50]}...'")
         # Steps 1-3: Run multi-fact-type search (12-way retrieval: 4 methods × 3 fact types)
+        recall_start = time.time()
         search_result = await self.recall_async(
             bank_id=bank_id,
             query=query,
@@ -2594,24 +2666,22 @@ Guidelines:
             fact_type=['experience', 'world', 'opinion'],
             include_entities=True
         )
+        recall_time = time.time() - recall_start
         all_results = search_result.results
-        logger.info(f"[THINK] Search returned {len(all_results)} results")
         # Split results by fact type for structured response
         agent_results = [r for r in all_results if r.fact_type == 'experience']
         world_results = [r for r in all_results if r.fact_type == 'world']
         opinion_results = [r for r in all_results if r.fact_type == 'opinion']
-        logger.info(f"[THINK] Split results - agent: {len(agent_results)}, world: {len(world_results)}, opinion: {len(opinion_results)}")
+        log_buffer.append(f"[REFLECT {reflect_id}] Recall: {len(all_results)} facts (experience={len(agent_results)}, world={len(world_results)}, opinion={len(opinion_results)}) in {recall_time:.3f}s")
         # Format facts for LLM
         agent_facts_text = think_utils.format_facts_for_prompt(agent_results)
         world_facts_text = think_utils.format_facts_for_prompt(world_results)
         opinion_facts_text = think_utils.format_facts_for_prompt(opinion_results)
-        logger.info(f"[THINK] Formatted facts - agent: {len(agent_facts_text)} chars, world: {len(world_facts_text)} chars, opinion: {len(opinion_facts_text)} chars")
         # Get bank profile (name, disposition + background)
         profile = await self.get_bank_profile(bank_id)
         name = profile["name"]
@@ -2630,10 +2700,11 @@ Guidelines:
             context=context,
         )
-        logger.info(f"[THINK] Full prompt length: {len(prompt)} chars")
+        log_buffer.append(f"[REFLECT {reflect_id}] Prompt: {len(prompt)} chars")
         system_message = think_utils.get_system_message(disposition)
+        llm_start = time.time()
         answer_text = await self._llm_config.call(
             messages=[
                 {"role": "system", "content": system_message},
@@ -2641,8 +2712,9 @@ Guidelines:
             ],
             scope="memory_think",
             temperature=0.9,
-            max_tokens=1000
+            max_completion_tokens=1000
         )
+        llm_time = time.time() - llm_start
         answer_text = answer_text.strip()
@@ -2654,6 +2726,10 @@ Guidelines:
             'query': query
         })
+        total_time = time.time() - reflect_start
+        log_buffer.append(f"[REFLECT {reflect_id}] Complete: {len(answer_text)} chars response, LLM {llm_time:.3f}s, total {total_time:.3f}s")
+        logger.info("\n" + "\n".join(log_buffer))
         # Return response with facts split by type
         return ReflectResult(
             text=answer_text,
@@ -2702,7 +2778,7 @@ Guidelines:
                     )
         except Exception as e:
-            logger.warning(f"[THINK] Failed to extract/store opinions: {str(e)}")
+            logger.warning(f"[REFLECT] Failed to extract/store opinions: {str(e)}")
     async def get_entity_observations(
         self,
@@ -2828,7 +2904,8 @@ Guidelines:
         bank_id: str,
         entity_id: str,
         entity_name: str,
-        version: str | None = None
+        version: str | None = None,
+        conn=None
     ) -> List[str]:
         """
         Regenerate observations for an entity by:
@@ -2843,43 +2920,58 @@ Guidelines:
             entity_id: Entity UUID
             entity_name: Canonical name of the entity
             version: Entity's last_seen timestamp when task was created (for deduplication)
+            conn: Optional database connection (for transactional atomicity with caller)
         Returns:
             List of created observation IDs
         """
         pool = await self._get_pool()
+        entity_uuid = uuid.UUID(entity_id)
-        # Step 1: Check version for deduplication
-        if version:
-            async with acquire_with_retry(pool) as conn:
-                current_last_seen = await conn.fetchval(
-                    """
-                    SELECT last_seen
-                    FROM entities
-                    WHERE id = $1 AND bank_id = $2
-                    """,
-                    uuid.UUID(entity_id), bank_id
-                )
+        # Helper to run a query with provided conn or acquire one
+        async def fetch_with_conn(query, *args):
+            if conn is not None:
+                return await conn.fetch(query, *args)
+            else:
+                async with acquire_with_retry(pool) as acquired_conn:
+                    return await acquired_conn.fetch(query, *args)
-                if current_last_seen and current_last_seen.isoformat() != version:
-                    return []
+        async def fetchval_with_conn(query, *args):
+            if conn is not None:
+                return await conn.fetchval(query, *args)
+            else:
+                async with acquire_with_retry(pool) as acquired_conn:
+                    return await acquired_conn.fetchval(query, *args)
-        # Step 2: Get all facts mentioning this entity (exclude observations themselves)
-        async with acquire_with_retry(pool) as conn:
-            rows = await conn.fetch(
+        # Step 1: Check version for deduplication
+        if version:
+            current_last_seen = await fetchval_with_conn(
                 """
-                SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.fact_type
-                FROM memory_units mu
-                JOIN unit_entities ue ON mu.id = ue.unit_id
-                WHERE mu.bank_id = $1
-                  AND ue.entity_id = $2
-                  AND mu.fact_type IN ('world', 'experience')
-                ORDER BY mu.occurred_start DESC
-                LIMIT 50
+                SELECT last_seen
+                FROM entities
+                WHERE id = $1 AND bank_id = $2
                 """,
-                bank_id, uuid.UUID(entity_id)
+                entity_uuid, bank_id
             )
+            if current_last_seen and current_last_seen.isoformat() != version:
+                return []
+        # Step 2: Get all facts mentioning this entity (exclude observations themselves)
+        rows = await fetch_with_conn(
+            """
+            SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.fact_type
+            FROM memory_units mu
+            JOIN unit_entities ue ON mu.id = ue.unit_id
+            WHERE mu.bank_id = $1
+              AND ue.entity_id = $2
+              AND mu.fact_type IN ('world', 'experience')
+            ORDER BY mu.occurred_start DESC
+            LIMIT 50
+            """,
+            bank_id, entity_uuid
+        )
         if not rows:
             return []
@@ -2905,120 +2997,173 @@ Guidelines:
         if not observations:
             return []
-        # Step 4: Delete old observations and insert new ones in a transaction
-        async with acquire_with_retry(pool) as conn:
-            async with conn.transaction():
-                # Delete old observations for this entity
-                await conn.execute(
+        # Step 4: Delete old observations and insert new ones
+        # If conn provided, we're already in a transaction - don't start another
+        # If conn is None, acquire one and start a transaction
+        async def do_db_operations(db_conn):
+            # Delete old observations for this entity
+            await db_conn.execute(
+                """
+                DELETE FROM memory_units
+                WHERE id IN (
+                    SELECT mu.id
+                    FROM memory_units mu
+                    JOIN unit_entities ue ON mu.id = ue.unit_id
+                    WHERE mu.bank_id = $1
+                      AND mu.fact_type = 'observation'
+                      AND ue.entity_id = $2
+                )
+                """,
+                bank_id, entity_uuid
+            )
+            # Generate embeddings for new observations
+            embeddings = await embedding_utils.generate_embeddings_batch(
+                self.embeddings, observations
+            )
+            # Insert new observations
+            current_time = utcnow()
+            created_ids = []
+            for obs_text, embedding in zip(observations, embeddings):
+                result = await db_conn.fetchrow(
                     """
-                    DELETE FROM memory_units
-                    WHERE id IN (
-                        SELECT mu.id
-                        FROM memory_units mu
-                        JOIN unit_entities ue ON mu.id = ue.unit_id
-                        WHERE mu.bank_id = $1
-                          AND mu.fact_type = 'observation'
-                          AND ue.entity_id = $2
+                    INSERT INTO memory_units (
+                        bank_id, text, embedding, context, event_date,
+                        occurred_start, occurred_end, mentioned_at,
+                        fact_type, access_count
                     )
+                    VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'observation', 0)
+                    RETURNING id
                     """,
-                    bank_id, uuid.UUID(entity_id)
+                    bank_id,
+                    obs_text,
+                    str(embedding),
+                    f"observation about {entity_name}",
+                    current_time,
+                    current_time,
+                    current_time,
+                    current_time
                 )
+                obs_id = str(result['id'])
+                created_ids.append(obs_id)
-                # Generate embeddings for new observations
-                embeddings = await embedding_utils.generate_embeddings_batch(
-                    self.embeddings, observations
+                # Link observation to entity
+                await db_conn.execute(
+                    """
+                    INSERT INTO unit_entities (unit_id, entity_id)
+                    VALUES ($1, $2)
+                    """,
+                    uuid.UUID(obs_id), entity_uuid
                 )
-                # Insert new observations
-                current_time = utcnow()
-                created_ids = []
-                for obs_text, embedding in zip(observations, embeddings):
-                    result = await conn.fetchrow(
-                        """
-                        INSERT INTO memory_units (
-                            bank_id, text, embedding, context, event_date,
-                            occurred_start, occurred_end, mentioned_at,
-                            fact_type, access_count
-                        )
-                        VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'observation', 0)
-                        RETURNING id
-                        """,
-                        bank_id,
-                        obs_text,
-                        str(embedding),
-                        f"observation about {entity_name}",
-                        current_time,
-                        current_time,
-                        current_time,
-                        current_time
-                    )
-                    obs_id = str(result['id'])
-                    created_ids.append(obs_id)
-                    # Link observation to entity
-                    await conn.execute(
-                        """
-                        INSERT INTO unit_entities (unit_id, entity_id)
-                        VALUES ($1, $2)
-                        """,
-                        uuid.UUID(obs_id), uuid.UUID(entity_id)
-                    )
+            return created_ids
-        # Single consolidated log line
-        logger.info(f"[OBSERVATIONS] {entity_name}: {len(facts)} facts -> {len(created_ids)} observations")
-        return created_ids
+        if conn is not None:
+            # Use provided connection (already in a transaction)
+            return await do_db_operations(conn)
+        else:
+            # Acquire connection and start our own transaction
+            async with acquire_with_retry(pool) as acquired_conn:
+                async with acquired_conn.transaction():
+                    return await do_db_operations(acquired_conn)
     async def _regenerate_observations_sync(
         self,
         bank_id: str,
         entity_ids: List[str],
-        min_facts: int = 5
+        min_facts: int = 5,
+        conn=None
     ) -> None:
         """
         Regenerate observations for entities synchronously (called during retain).
+        Processes entities in PARALLEL for faster execution.
         Args:
             bank_id: Bank identifier
             entity_ids: List of entity IDs to process
             min_facts: Minimum facts required to regenerate observations
+            conn: Optional database connection (for transactional atomicity)
         """
         if not bank_id or not entity_ids:
             return
-        pool = await self._get_pool()
-        async with pool.acquire() as conn:
-            for entity_id in entity_ids:
-                try:
-                    entity_uuid = uuid.UUID(entity_id) if isinstance(entity_id, str) else entity_id
-                    # Check if entity exists
-                    entity_exists = await conn.fetchrow(
-                        "SELECT canonical_name FROM entities WHERE id = $1 AND bank_id = $2",
-                        entity_uuid, bank_id
-                    )
+        # Convert to UUIDs
+        entity_uuids = [uuid.UUID(eid) if isinstance(eid, str) else eid for eid in entity_ids]
-                    if not entity_exists:
-                        logger.debug(f"[OBSERVATIONS] Entity {entity_id} not yet in bank {bank_id}, skipping")
-                        continue
+        # Use provided connection or acquire a new one
+        if conn is not None:
+            # Use the provided connection (transactional with caller)
+            entity_rows = await conn.fetch(
+                """
+                SELECT id, canonical_name FROM entities
+                WHERE id = ANY($1) AND bank_id = $2
+                """,
+                entity_uuids, bank_id
+            )
+            entity_names = {row['id']: row['canonical_name'] for row in entity_rows}
-                    entity_name = entity_exists['canonical_name']
+            fact_counts = await conn.fetch(
+                """
+                SELECT ue.entity_id, COUNT(*) as cnt
+                FROM unit_entities ue
+                JOIN memory_units mu ON ue.unit_id = mu.id
+                WHERE ue.entity_id = ANY($1) AND mu.bank_id = $2
+                GROUP BY ue.entity_id
+                """,
+                entity_uuids, bank_id
+            )
+            entity_fact_counts = {row['entity_id']: row['cnt'] for row in fact_counts}
+        else:
+            # Acquire a new connection (standalone call)
+            pool = await self._get_pool()
+            async with pool.acquire() as acquired_conn:
+                entity_rows = await acquired_conn.fetch(
+                    """
+                    SELECT id, canonical_name FROM entities
+                    WHERE id = ANY($1) AND bank_id = $2
+                    """,
+                    entity_uuids, bank_id
+                )
+                entity_names = {row['id']: row['canonical_name'] for row in entity_rows}
-                    # Count facts linked to this entity
-                    fact_count = await conn.fetchval(
-                        "SELECT COUNT(*) FROM unit_entities WHERE entity_id = $1",
-                        entity_uuid
-                    ) or 0
+                fact_counts = await acquired_conn.fetch(
+                    """
+                    SELECT ue.entity_id, COUNT(*) as cnt
+                    FROM unit_entities ue
+                    JOIN memory_units mu ON ue.unit_id = mu.id
+                    WHERE ue.entity_id = ANY($1) AND mu.bank_id = $2
+                    GROUP BY ue.entity_id
+                    """,
+                    entity_uuids, bank_id
+                )
+                entity_fact_counts = {row['entity_id']: row['cnt'] for row in fact_counts}
+        # Filter entities that meet the threshold
+        entities_to_process = []
+        for entity_id in entity_ids:
+            entity_uuid = uuid.UUID(entity_id) if isinstance(entity_id, str) else entity_id
+            if entity_uuid not in entity_names:
+                continue
+            fact_count = entity_fact_counts.get(entity_uuid, 0)
+            if fact_count >= min_facts:
+                entities_to_process.append((entity_id, entity_names[entity_uuid]))
+        if not entities_to_process:
+            return
-                    # Only regenerate if entity has enough facts
-                    if fact_count >= min_facts:
-                        await self.regenerate_entity_observations(bank_id, entity_id, entity_name, version=None)
-                    else:
-                        logger.debug(f"[OBSERVATIONS] Skipping {entity_name} ({fact_count} facts < {min_facts} threshold)")
+        # Process all entities in PARALLEL (LLM calls are the bottleneck)
+        async def process_entity(entity_id: str, entity_name: str):
+            try:
+                await self.regenerate_entity_observations(bank_id, entity_id, entity_name, version=None, conn=conn)
+            except Exception as e:
+                logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
-                except Exception as e:
-                    logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
-                    continue
+        await asyncio.gather(*[
+            process_entity(eid, name) for eid, name in entities_to_process
+        ])
     async def _handle_regenerate_observations(self, task_dict: Dict[str, Any]):
         """

hindsight-api 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl

hindsight-api 0.0.21py3-none-any.whl → 0.1.1py3-none-any.whl