PyPI - hindsight-api - Versions diffs - 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

hindsight-api 0.0.21py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

hindsight_api/__init__.py +10 -2
hindsight_api/alembic/README +1 -0
hindsight_api/alembic/env.py +146 -0
hindsight_api/alembic/script.py.mako +28 -0
hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +274 -0
hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +48 -0
hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +62 -0
hindsight_api/alembic/versions/rename_personality_to_disposition.py +65 -0
hindsight_api/api/__init__.py +2 -4
hindsight_api/api/http.py +112 -164
hindsight_api/api/mcp.py +2 -1
hindsight_api/config.py +154 -0
hindsight_api/engine/__init__.py +7 -2
hindsight_api/engine/cross_encoder.py +225 -16
hindsight_api/engine/embeddings.py +198 -19
hindsight_api/engine/entity_resolver.py +56 -29
hindsight_api/engine/llm_wrapper.py +147 -106
hindsight_api/engine/memory_engine.py +337 -192
hindsight_api/engine/response_models.py +15 -17
hindsight_api/engine/retain/bank_utils.py +25 -35
hindsight_api/engine/retain/entity_processing.py +5 -5
hindsight_api/engine/retain/fact_extraction.py +86 -24
hindsight_api/engine/retain/fact_storage.py +1 -1
hindsight_api/engine/retain/link_creation.py +12 -6
hindsight_api/engine/retain/link_utils.py +50 -56
hindsight_api/engine/retain/observation_regeneration.py +264 -0
hindsight_api/engine/retain/orchestrator.py +31 -44
hindsight_api/engine/retain/types.py +14 -0
hindsight_api/engine/search/reranking.py +6 -10
hindsight_api/engine/search/retrieval.py +2 -2
hindsight_api/engine/search/think_utils.py +59 -30
hindsight_api/engine/search/tracer.py +1 -1
hindsight_api/main.py +201 -0
hindsight_api/migrations.py +61 -39
hindsight_api/models.py +1 -2
hindsight_api/pg0.py +17 -36
hindsight_api/server.py +43 -0
{hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/METADATA +2 -3
hindsight_api-0.1.1.dist-info/RECORD +60 -0
hindsight_api-0.1.1.dist-info/entry_points.txt +2 -0
hindsight_api/cli.py +0 -128
hindsight_api/web/__init__.py +0 -12
hindsight_api/web/server.py +0 -109
hindsight_api-0.0.21.dist-info/RECORD +0 -50
hindsight_api-0.0.21.dist-info/entry_points.txt +0 -2
{hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/WHEEL +0 -0

hindsight_api/engine/retain/link_utils.py CHANGED Viewed

@@ -6,6 +6,9 @@ import time
 import logging
 from typing import List
 from datetime import timedelta, datetime, timezone
+from uuid import UUID
+from .types import EntityLink
 logger = logging.getLogger(__name__)
@@ -202,47 +205,24 @@ async def extract_entities_batch_optimized(
         # Resolve ALL entities in one batch call
         if all_entities_flat:
-            # [6.2.2] Batch resolve entities
+            # [6.2.2] Batch resolve entities - single call with per-entity dates
             substep_6_2_2_start = time.time()
-            # Group by date for batch resolution (round to hour to reduce buckets)
-            entities_by_date = {}
-            for idx, (unit_id, local_idx, fact_date) in enumerate(entity_to_unit):
-                # Round to hour to group facts from same time period
-                date_key = fact_date.replace(minute=0, second=0, microsecond=0)
-                if date_key not in entities_by_date:
-                    entities_by_date[date_key] = []
-                entities_by_date[date_key].append((idx, all_entities_flat[idx]))
-            _log(log_buffer, f"    [6.2.2] Grouped into {len(entities_by_date)} date buckets, resolving sequentially...", level='debug')
-            # Resolve all date groups SEQUENTIALLY using main transaction connection
-            # This prevents race conditions where parallel tasks create duplicate entities
-            resolved_entity_ids = [None] * len(all_entities_flat)
-            for date_idx, (date_key, entities_group) in enumerate(entities_by_date.items(), 1):
-                date_bucket_start = time.time()
-                indices = [idx for idx, _ in entities_group]
-                entities_data = [entity_data for _, entity_data in entities_group]
-                # Use the first fact's date for this bucket (all should be in same hour)
-                fact_date = entity_to_unit[indices[0]][2]
-                # Use main transaction connection to ensure consistency
-                batch_resolved = await entity_resolver.resolve_entities_batch(
-                    bank_id=bank_id,
-                    entities_data=entities_data,
-                    context=context,
-                    unit_event_date=fact_date,
-                    conn=conn  # Use main transaction connection
-                )
-                if len(entities_by_date) <= 10:  # Only log individual buckets if there aren't too many
-                    _log(log_buffer, f"      [6.2.2.{date_idx}] Resolved {len(entities_data)} entities in {time.time() - date_bucket_start:.3f}s", level='debug')
-                # Map results back to resolved_entity_ids
-                for idx, entity_id in zip(indices, batch_resolved):
-                    resolved_entity_ids[idx] = entity_id
+            # Add per-entity dates to entity data for batch resolution
+            for idx, (unit_id, local_idx, fact_date) in enumerate(entity_to_unit):
+                all_entities_flat[idx]['event_date'] = fact_date
+            # Resolve ALL entities in ONE batch call (much faster than sequential buckets)
+            # INSERT ... ON CONFLICT handles any race conditions at the DB level
+            resolved_entity_ids = await entity_resolver.resolve_entities_batch(
+                bank_id=bank_id,
+                entities_data=all_entities_flat,
+                context=context,
+                unit_event_date=None,  # Not used when per-entity dates provided
+                conn=conn  # Use main transaction connection
+            )
-            _log(log_buffer, f"    [6.2.2] Resolve entities: {len(all_entities_flat)} entities across {len(entities_by_date)} buckets in {time.time() - substep_6_2_2_start:.3f}s", level='debug')
+            _log(log_buffer, f"    [6.2.2] Resolve entities: {len(all_entities_flat)} entities in single batch in {time.time() - substep_6_2_2_start:.3f}s", level='debug')
             # [6.2.3] Create unit-entity links in BATCH
             substep_6_2_3_start = time.time()
@@ -305,10 +285,14 @@ async def extract_entities_batch_optimized(
         # Only link each new unit to the most recent MAX_LINKS_PER_ENTITY units
         MAX_LINKS_PER_ENTITY = 50  # Limit to prevent explosion when entity appears in many facts
         link_gen_start = time.time()
-        links = []
+        links: List[EntityLink] = []
         new_unit_set = set(unit_ids)  # Units from this batch
+        def to_uuid(val) -> UUID:
+            return UUID(val) if isinstance(val, str) else val
         for entity_id, units_with_entity in entity_to_units.items():
+            entity_uuid = to_uuid(entity_id)
             # Separate new units (from this batch) and existing units
             new_units = [u for u in units_with_entity if str(u) in new_unit_set or u in new_unit_set]
             existing_units = [u for u in units_with_entity if str(u) not in new_unit_set and u not in new_unit_set]
@@ -318,15 +302,15 @@ async def extract_entities_batch_optimized(
             new_units_to_link = new_units[-MAX_LINKS_PER_ENTITY:] if len(new_units) > MAX_LINKS_PER_ENTITY else new_units
             for i, unit_id_1 in enumerate(new_units_to_link):
                 for unit_id_2 in new_units_to_link[i+1:]:
-                    links.append((unit_id_1, unit_id_2, 'entity', 1.0, entity_id))
-                    links.append((unit_id_2, unit_id_1, 'entity', 1.0, entity_id))
+                    links.append(EntityLink(from_unit_id=to_uuid(unit_id_1), to_unit_id=to_uuid(unit_id_2), entity_id=entity_uuid))
+                    links.append(EntityLink(from_unit_id=to_uuid(unit_id_2), to_unit_id=to_uuid(unit_id_1), entity_id=entity_uuid))
             # Link new units to LIMITED existing units (most recent)
             existing_to_link = existing_units[-MAX_LINKS_PER_ENTITY:]  # Take most recent
             for new_unit in new_units:
                 for existing_unit in existing_to_link:
-                    links.append((new_unit, existing_unit, 'entity', 1.0, entity_id))
-                    links.append((existing_unit, new_unit, 'entity', 1.0, entity_id))
+                    links.append(EntityLink(from_unit_id=to_uuid(new_unit), to_unit_id=to_uuid(existing_unit), entity_id=entity_uuid))
+                    links.append(EntityLink(from_unit_id=to_uuid(existing_unit), to_unit_id=to_uuid(new_unit), entity_id=entity_uuid))
         _log(log_buffer, f"      [6.3.3] Generate {len(links)} links: {time.time() - link_gen_start:.3f}s", level='debug')
         _log(log_buffer, f"  [6.3] Entity link creation: {len(links)} links for {len(all_entity_ids)} unique entities in {time.time() - substep_start:.3f}s", level='debug')
@@ -346,7 +330,7 @@ async def create_temporal_links_batch_per_fact(
     unit_ids: List[str],
     time_window_hours: int = 24,
     log_buffer: List[str] = None,
-):
+) -> int:
     """
     Create temporal links for multiple units, each with their own event_date.
@@ -359,9 +343,12 @@ async def create_temporal_links_batch_per_fact(
         unit_ids: List of unit IDs
         time_window_hours: Time window in hours for temporal links
         log_buffer: Optional buffer for logging
+    Returns:
+        Number of temporal links created
     """
     if not unit_ids:
-        return
+        return 0
     try:
         import time as time_mod
@@ -417,6 +404,8 @@ async def create_temporal_links_batch_per_fact(
             )
             _log(log_buffer, f"      [7.4] Insert {len(links)} temporal links: {time_mod.time() - insert_start:.3f}s")
+        return len(links)
     except Exception as e:
         logger.error(f"Failed to create temporal links: {str(e)}")
         import traceback
@@ -432,7 +421,7 @@ async def create_semantic_links_batch(
     top_k: int = 5,
     threshold: float = 0.7,
     log_buffer: List[str] = None,
-):
+) -> int:
     """
     Create semantic links for multiple units efficiently.
@@ -446,9 +435,12 @@ async def create_semantic_links_batch(
         top_k: Number of top similar units to link
         threshold: Minimum similarity threshold
         log_buffer: Optional buffer for logging
+    Returns:
+        Number of semantic links created
     """
     if not unit_ids or not embeddings:
-        return
+        return 0
     try:
         import time as time_mod
@@ -539,6 +531,8 @@ async def create_semantic_links_batch(
             )
             _log(log_buffer, f"      [8.3] Insert {len(all_links)} semantic links: {time_mod.time() - insert_start:.3f}s")
+        return len(all_links)
     except Exception as e:
         logger.error(f"Failed to create semantic links: {str(e)}")
         import traceback
@@ -546,7 +540,7 @@ async def create_semantic_links_batch(
         raise
-async def insert_entity_links_batch(conn, links: List[tuple], chunk_size: int = 50000):
+async def insert_entity_links_batch(conn, links: List[EntityLink], chunk_size: int = 50000):
     """
     Insert all entity links using COPY to temp table + INSERT for maximum speed.
@@ -556,7 +550,7 @@ async def insert_entity_links_batch(conn, links: List[tuple], chunk_size: int =
     Args:
         conn: Database connection
-        links: List of tuples (from_unit_id, to_unit_id, link_type, weight, entity_id)
+        links: List of EntityLink objects
         chunk_size: Number of rows per batch (default 50000)
     """
     if not links:
@@ -585,16 +579,16 @@ async def insert_entity_links_batch(conn, links: List[tuple], chunk_size: int =
     await conn.execute("TRUNCATE _temp_entity_links")
     logger.debug(f"      [9.2] Truncate temp table: {time_mod.time() - truncate_start:.3f}s")
-    # Convert links to proper format for COPY
+    # Convert EntityLink objects to tuples for COPY
     convert_start = time_mod.time()
     records = []
-    for from_id, to_id, link_type, weight, entity_id in links:
+    for link in links:
         records.append((
-            uuid_mod.UUID(from_id) if isinstance(from_id, str) else from_id,
-            uuid_mod.UUID(to_id) if isinstance(to_id, str) else to_id,
-            link_type,
-            weight,
-            uuid_mod.UUID(str(entity_id)) if entity_id and not isinstance(entity_id, uuid_mod.UUID) else entity_id
+            link.from_unit_id,
+            link.to_unit_id,
+            link.link_type,
+            link.weight,
+            link.entity_id
         ))
     logger.debug(f"      [9.3] Convert {len(records)} records: {time_mod.time() - convert_start:.3f}s")

hindsight_api/engine/retain/observation_regeneration.py ADDED Viewed

@@ -0,0 +1,264 @@
+"""
+Observation regeneration for retain pipeline.
+Regenerates entity observations as part of the retain transaction.
+"""
+import logging
+import time
+import uuid
+from datetime import datetime, timezone
+from typing import List, Dict, Optional
+from ..search import observation_utils
+from . import embedding_utils
+from ..db_utils import acquire_with_retry
+from .types import EntityLink
+logger = logging.getLogger(__name__)
+def utcnow():
+    """Get current UTC time."""
+    return datetime.now(timezone.utc)
+# Simple dataclass-like container for facts (avoid importing from memory_engine)
+class MemoryFactForObservation:
+    def __init__(self, id: str, text: str, fact_type: str, context: str, occurred_start: Optional[str]):
+        self.id = id
+        self.text = text
+        self.fact_type = fact_type
+        self.context = context
+        self.occurred_start = occurred_start
+async def regenerate_observations_batch(
+    conn,
+    embeddings_model,
+    llm_config,
+    bank_id: str,
+    entity_links: List[EntityLink],
+    log_buffer: List[str] = None
+) -> None:
+    """
+    Regenerate observations for top entities in this batch.
+    Called INSIDE the retain transaction for atomicity - if observations
+    fail, the entire retain batch is rolled back.
+    Args:
+        conn: Database connection (from the retain transaction)
+        embeddings_model: Embeddings model for generating observation embeddings
+        llm_config: LLM configuration for observation extraction
+        bank_id: Bank identifier
+        entity_links: Entity links from this batch
+        log_buffer: Optional log buffer for timing
+    """
+    TOP_N_ENTITIES = 5
+    MIN_FACTS_THRESHOLD = 5
+    if not entity_links:
+        return
+    # Count mentions per entity in this batch
+    entity_mention_counts: Dict[str, int] = {}
+    for link in entity_links:
+        if link.entity_id:
+            entity_id = str(link.entity_id)
+            entity_mention_counts[entity_id] = entity_mention_counts.get(entity_id, 0) + 1
+    if not entity_mention_counts:
+        return
+    # Sort by mention count descending and take top N
+    sorted_entities = sorted(
+        entity_mention_counts.items(),
+        key=lambda x: x[1],
+        reverse=True
+    )
+    entities_to_process = [e[0] for e in sorted_entities[:TOP_N_ENTITIES]]
+    obs_start = time.time()
+    # Convert to UUIDs
+    entity_uuids = [uuid.UUID(eid) if isinstance(eid, str) else eid for eid in entities_to_process]
+    # Batch query for entity names
+    entity_rows = await conn.fetch(
+        """
+        SELECT id, canonical_name FROM entities
+        WHERE id = ANY($1) AND bank_id = $2
+        """,
+        entity_uuids, bank_id
+    )
+    entity_names = {row['id']: row['canonical_name'] for row in entity_rows}
+    # Batch query for fact counts
+    fact_counts = await conn.fetch(
+        """
+        SELECT ue.entity_id, COUNT(*) as cnt
+        FROM unit_entities ue
+        JOIN memory_units mu ON ue.unit_id = mu.id
+        WHERE ue.entity_id = ANY($1) AND mu.bank_id = $2
+        GROUP BY ue.entity_id
+        """,
+        entity_uuids, bank_id
+    )
+    entity_fact_counts = {row['entity_id']: row['cnt'] for row in fact_counts}
+    # Filter entities that meet the threshold
+    entities_with_names = []
+    for entity_id in entities_to_process:
+        entity_uuid = uuid.UUID(entity_id) if isinstance(entity_id, str) else entity_id
+        if entity_uuid not in entity_names:
+            continue
+        fact_count = entity_fact_counts.get(entity_uuid, 0)
+        if fact_count >= MIN_FACTS_THRESHOLD:
+            entities_with_names.append((entity_id, entity_names[entity_uuid]))
+    if not entities_with_names:
+        return
+    # Process entities SEQUENTIALLY (asyncpg doesn't allow concurrent queries on same connection)
+    # We must use the same connection to stay in the retain transaction
+    total_observations = 0
+    for entity_id, entity_name in entities_with_names:
+        try:
+            obs_ids = await _regenerate_entity_observations(
+                conn, embeddings_model, llm_config,
+                bank_id, entity_id, entity_name
+            )
+            total_observations += len(obs_ids)
+        except Exception as e:
+            logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
+    obs_time = time.time() - obs_start
+    if log_buffer is not None:
+        log_buffer.append(f"[11] Observations: {total_observations} observations for {len(entities_with_names)} entities in {obs_time:.3f}s")
+async def _regenerate_entity_observations(
+    conn,
+    embeddings_model,
+    llm_config,
+    bank_id: str,
+    entity_id: str,
+    entity_name: str
+) -> List[str]:
+    """
+    Regenerate observations for a single entity.
+    Uses the provided connection (part of retain transaction).
+    Args:
+        conn: Database connection (from the retain transaction)
+        embeddings_model: Embeddings model
+        llm_config: LLM configuration
+        bank_id: Bank identifier
+        entity_id: Entity UUID
+        entity_name: Canonical name of the entity
+    Returns:
+        List of created observation IDs
+    """
+    entity_uuid = uuid.UUID(entity_id) if isinstance(entity_id, str) else entity_id
+    # Get all facts mentioning this entity (exclude observations themselves)
+    rows = await conn.fetch(
+        """
+        SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.fact_type
+        FROM memory_units mu
+        JOIN unit_entities ue ON mu.id = ue.unit_id
+        WHERE mu.bank_id = $1
+          AND ue.entity_id = $2
+          AND mu.fact_type IN ('world', 'experience')
+        ORDER BY mu.occurred_start DESC
+        LIMIT 50
+        """,
+        bank_id, entity_uuid
+    )
+    if not rows:
+        return []
+    # Convert to fact objects for observation extraction
+    facts = []
+    for row in rows:
+        occurred_start = row['occurred_start'].isoformat() if row['occurred_start'] else None
+        facts.append(MemoryFactForObservation(
+            id=str(row['id']),
+            text=row['text'],
+            fact_type=row['fact_type'],
+            context=row['context'],
+            occurred_start=occurred_start
+        ))
+    # Extract observations using LLM
+    observations = await observation_utils.extract_observations_from_facts(
+        llm_config,
+        entity_name,
+        facts
+    )
+    if not observations:
+        return []
+    # Delete old observations for this entity
+    await conn.execute(
+        """
+        DELETE FROM memory_units
+        WHERE id IN (
+            SELECT mu.id
+            FROM memory_units mu
+            JOIN unit_entities ue ON mu.id = ue.unit_id
+            WHERE mu.bank_id = $1
+              AND mu.fact_type = 'observation'
+              AND ue.entity_id = $2
+        )
+        """,
+        bank_id, entity_uuid
+    )
+    # Generate embeddings for new observations
+    embeddings = await embedding_utils.generate_embeddings_batch(
+        embeddings_model, observations
+    )
+    # Insert new observations
+    current_time = utcnow()
+    created_ids = []
+    for obs_text, embedding in zip(observations, embeddings):
+        result = await conn.fetchrow(
+            """
+            INSERT INTO memory_units (
+                bank_id, text, embedding, context, event_date,
+                occurred_start, occurred_end, mentioned_at,
+                fact_type, access_count
+            )
+            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'observation', 0)
+            RETURNING id
+            """,
+            bank_id,
+            obs_text,
+            str(embedding),
+            f"observation about {entity_name}",
+            current_time,
+            current_time,
+            current_time,
+            current_time
+        )
+        obs_id = str(result['id'])
+        created_ids.append(obs_id)
+        # Link observation to entity
+        await conn.execute(
+            """
+            INSERT INTO unit_entities (unit_id, entity_id)
+            VALUES ($1, $2)
+            """,
+            uuid.UUID(obs_id), entity_uuid
+        )
+    return created_ids

hindsight_api/engine/retain/orchestrator.py CHANGED Viewed

@@ -17,7 +17,7 @@ def utcnow():
     """Get current UTC time."""
     return datetime.now(timezone.utc)
-from .types import RetainContent, ExtractedFact, ProcessedFact
+from .types import RetainContent, ExtractedFact, ProcessedFact, EntityLink
 from . import (
     fact_extraction,
     embedding_processing,
@@ -25,7 +25,8 @@ from . import (
     chunk_storage,
     fact_storage,
     entity_processing,
-    link_creation
+    link_creation,
+    observation_regeneration
 )
 logger = logging.getLogger(__name__)
@@ -39,7 +40,6 @@ async def retain_batch(
     task_backend,
     format_date_fn,
     duplicate_checker_fn,
-    regenerate_observations_fn,
     bank_id: str,
     contents_dicts: List[Dict[str, Any]],
     document_id: Optional[str] = None,
@@ -58,7 +58,6 @@ async def retain_batch(
         task_backend: Task backend for background jobs
         format_date_fn: Function to format datetime to readable string
         duplicate_checker_fn: Function to check for duplicate facts
-        regenerate_observations_fn: Async function to regenerate observations for entities
         bank_id: Bank identifier
         contents_dicts: List of content dictionaries
         document_id: Optional document ID
@@ -288,50 +287,59 @@ async def retain_batch(
             # Create temporal links
             step_start = time.time()
-            await link_creation.create_temporal_links_batch(conn, bank_id, unit_ids)
-            log_buffer.append(f"[7] Temporal links: {time.time() - step_start:.3f}s")
+            temporal_link_count = await link_creation.create_temporal_links_batch(conn, bank_id, unit_ids)
+            log_buffer.append(f"[7] Temporal links: {temporal_link_count} links in {time.time() - step_start:.3f}s")
             # Create semantic links
             step_start = time.time()
             embeddings_for_links = [fact.embedding for fact in non_duplicate_facts]
-            await link_creation.create_semantic_links_batch(conn, bank_id, unit_ids, embeddings_for_links)
-            log_buffer.append(f"[8] Semantic links: {time.time() - step_start:.3f}s")
+            semantic_link_count = await link_creation.create_semantic_links_batch(conn, bank_id, unit_ids, embeddings_for_links)
+            log_buffer.append(f"[8] Semantic links: {semantic_link_count} links in {time.time() - step_start:.3f}s")
             # Insert entity links
             step_start = time.time()
             if entity_links:
                 await entity_processing.insert_entity_links_batch(conn, entity_links)
-            log_buffer.append(f"[9] Entity links: {time.time() - step_start:.3f}s")
+            log_buffer.append(f"[9] Entity links: {len(entity_links) if entity_links else 0} links in {time.time() - step_start:.3f}s")
             # Create causal links
             step_start = time.time()
             causal_link_count = await link_creation.create_causal_links_batch(conn, unit_ids, non_duplicate_facts)
             log_buffer.append(f"[10] Causal links: {causal_link_count} links in {time.time() - step_start:.3f}s")
+            # Regenerate observations INSIDE transaction for atomicity
+            await observation_regeneration.regenerate_observations_batch(
+                conn,
+                embeddings_model,
+                llm_config,
+                bank_id,
+                entity_links,
+                log_buffer
+            )
             # Map results back to original content items
             result_unit_ids = _map_results_to_contents(
                 contents, extracted_facts, is_duplicate_flags, unit_ids
             )
-            total_time = time.time() - start_time
-            log_buffer.append(f"{'='*60}")
-            log_buffer.append(f"RETAIN_BATCH COMPLETE: {len(unit_ids)} units in {total_time:.3f}s")
-            if document_ids_added:
-                log_buffer.append(f"Documents: {', '.join(document_ids_added)}")
-            log_buffer.append(f"{'='*60}")
-            logger.info("\n" + "\n".join(log_buffer) + "\n")
-        # Trigger background tasks AFTER transaction commits
+        # Trigger background tasks AFTER transaction commits (opinion reinforcement only)
         await _trigger_background_tasks(
             task_backend,
-            regenerate_observations_fn,
             bank_id,
             unit_ids,
-            non_duplicate_facts,
-            entity_links
+            non_duplicate_facts
         )
+        # Log final summary
+        total_time = time.time() - start_time
+        log_buffer.append(f"{'='*60}")
+        log_buffer.append(f"RETAIN_BATCH COMPLETE: {len(unit_ids)} units in {total_time:.3f}s")
+        if document_ids_added:
+            log_buffer.append(f"Documents: {', '.join(document_ids_added)}")
+        log_buffer.append(f"{'='*60}")
+        logger.info("\n" + "\n".join(log_buffer) + "\n")
         return result_unit_ids
@@ -367,13 +375,11 @@ def _map_results_to_contents(
 async def _trigger_background_tasks(
     task_backend,
-    regenerate_observations_fn,
     bank_id: str,
     unit_ids: List[str],
     facts: List[ProcessedFact],
-    entity_links: List
 ) -> None:
-    """Trigger opinion reinforcement and observation regeneration (sync)."""
+    """Trigger opinion reinforcement as background task (after transaction commits)."""
     # Trigger opinion reinforcement if there are entities
     fact_entities = [[e.name for e in fact.entities] for fact in facts]
     if any(fact_entities):
@@ -384,22 +390,3 @@ async def _trigger_background_tasks(
             'unit_texts': [fact.fact_text for fact in facts],
             'unit_entities': fact_entities
         })
-    # Regenerate observations synchronously for top entities
-    TOP_N_ENTITIES = 5
-    MIN_FACTS_THRESHOLD = 5
-    if entity_links and regenerate_observations_fn:
-        unique_entity_ids = set()
-        for link in entity_links:
-            # links are tuples: (unit_id, entity_id, confidence)
-            if len(link) >= 2 and link[1]:
-                unique_entity_ids.add(str(link[1]))
-        if unique_entity_ids:
-            # Run observation regeneration synchronously
-            await regenerate_observations_fn(
-                bank_id=bank_id,
-                entity_ids=list(unique_entity_ids)[:TOP_N_ENTITIES],
-                min_facts=MIN_FACTS_THRESHOLD
-            )

hindsight_api/engine/retain/types.py CHANGED Viewed

@@ -176,6 +176,20 @@ class ProcessedFact:
         )
+@dataclass
+class EntityLink:
+    """
+    Link between two memory units through a shared entity.
+    Used for entity-based graph connections in the memory graph.
+    """
+    from_unit_id: UUID
+    to_unit_id: UUID
+    entity_id: UUID
+    link_type: str = 'entity'
+    weight: float = 1.0
 @dataclass
 class RetainBatch:
     """

hindsight_api/engine/search/reranking.py CHANGED Viewed

@@ -10,10 +10,8 @@ class CrossEncoderReranker:
     """
     Neural reranking using a cross-encoder model.
-    Uses cross-encoder/ms-marco-MiniLM-L-6-v2 by default:
-    - Fast inference (~80ms for 100 pairs on CPU)
-    - Small model (80MB)
-    - Trained for passage re-ranking
+    Configured via environment variables (see cross_encoder.py).
+    Default local model is cross-encoder/ms-marco-MiniLM-L-6-v2.
     """
     def __init__(self, cross_encoder=None):
@@ -21,14 +19,12 @@ class CrossEncoderReranker:
         Initialize cross-encoder reranker.
         Args:
-            cross_encoder: CrossEncoderReranker instance. If None, uses default
-                          SentenceTransformersCrossEncoder with ms-marco-MiniLM-L-6-v2
-                          (loaded lazily for faster startup)
+            cross_encoder: CrossEncoderModel instance. If None, creates one from
+                          environment variables (defaults to local provider)
         """
         if cross_encoder is None:
-            from hindsight_api.engine.cross_encoder import SentenceTransformersCrossEncoder
-            # Model is loaded lazily - call ensure_loaded() during initialize()
-            cross_encoder = SentenceTransformersCrossEncoder()
+            from hindsight_api.engine.cross_encoder import create_cross_encoder_from_env
+            cross_encoder = create_cross_encoder_from_env()
         self.cross_encoder = cross_encoder
     def rerank(

hindsight-api 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl

hindsight-api 0.0.21py3-none-any.whl → 0.1.1py3-none-any.whl