PyPI - hindsight-api - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

hindsight-api 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

hindsight_api/admin/__init__.py +1 -0
hindsight_api/admin/cli.py +252 -0
hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
hindsight_api/api/http.py +282 -20
hindsight_api/api/mcp.py +47 -52
hindsight_api/config.py +238 -6
hindsight_api/engine/cross_encoder.py +599 -86
hindsight_api/engine/db_budget.py +284 -0
hindsight_api/engine/db_utils.py +11 -0
hindsight_api/engine/embeddings.py +453 -26
hindsight_api/engine/entity_resolver.py +8 -5
hindsight_api/engine/interface.py +8 -4
hindsight_api/engine/llm_wrapper.py +241 -27
hindsight_api/engine/memory_engine.py +609 -122
hindsight_api/engine/query_analyzer.py +4 -3
hindsight_api/engine/response_models.py +38 -0
hindsight_api/engine/retain/fact_extraction.py +388 -192
hindsight_api/engine/retain/fact_storage.py +34 -8
hindsight_api/engine/retain/link_utils.py +24 -16
hindsight_api/engine/retain/orchestrator.py +52 -17
hindsight_api/engine/retain/types.py +9 -0
hindsight_api/engine/search/graph_retrieval.py +42 -13
hindsight_api/engine/search/link_expansion_retrieval.py +256 -0
hindsight_api/engine/search/mpfp_retrieval.py +362 -117
hindsight_api/engine/search/reranking.py +2 -2
hindsight_api/engine/search/retrieval.py +847 -200
hindsight_api/engine/search/tags.py +172 -0
hindsight_api/engine/search/think_utils.py +1 -1
hindsight_api/engine/search/trace.py +12 -0
hindsight_api/engine/search/tracer.py +24 -1
hindsight_api/engine/search/types.py +21 -0
hindsight_api/engine/task_backend.py +109 -18
hindsight_api/engine/utils.py +1 -1
hindsight_api/extensions/context.py +10 -1
hindsight_api/main.py +56 -4
hindsight_api/metrics.py +433 -48
hindsight_api/migrations.py +141 -1
hindsight_api/models.py +3 -1
hindsight_api/pg0.py +53 -0
hindsight_api/server.py +39 -2
{hindsight_api-0.2.1.dist-info → hindsight_api-0.3.0.dist-info}/METADATA +5 -1
hindsight_api-0.3.0.dist-info/RECORD +82 -0
{hindsight_api-0.2.1.dist-info → hindsight_api-0.3.0.dist-info}/entry_points.txt +1 -0
hindsight_api-0.2.1.dist-info/RECORD +0 -75
{hindsight_api-0.2.1.dist-info → hindsight_api-0.3.0.dist-info}/WHEEL +0 -0

hindsight_api/engine/retain/fact_storage.py CHANGED Viewed

@@ -45,6 +45,7 @@ async def insert_facts_batch(
     metadata_jsons = []
     chunk_ids = []
     document_ids = []
+    tags_list = []
     for fact in facts:
         fact_texts.append(fact.fact_text)
@@ -65,16 +66,31 @@ async def insert_facts_batch(
         chunk_ids.append(fact.chunk_id)
         # Use per-fact document_id if available, otherwise fallback to batch-level document_id
         document_ids.append(fact.document_id if fact.document_id else document_id)
+        # Convert tags to JSON string for proper batch insertion (PostgreSQL unnest doesn't handle 2D arrays well)
+        tags_list.append(json.dumps(fact.tags if fact.tags else []))
     # Batch insert all facts
+    # Note: tags are passed as JSON strings and converted back to varchar[] via jsonb_array_elements_text + array_agg
     results = await conn.fetch(
         f"""
-        INSERT INTO {fq_table("memory_units")} (bank_id, text, embedding, event_date, occurred_start, occurred_end, mentioned_at,
-                                 context, fact_type, confidence_score, access_count, metadata, chunk_id, document_id)
-        SELECT $1, * FROM unnest(
-            $2::text[], $3::vector[], $4::timestamptz[], $5::timestamptz[], $6::timestamptz[], $7::timestamptz[],
-            $8::text[], $9::text[], $10::float[], $11::int[], $12::jsonb[], $13::text[], $14::text[]
+        WITH input_data AS (
+            SELECT * FROM unnest(
+                $2::text[], $3::vector[], $4::timestamptz[], $5::timestamptz[], $6::timestamptz[], $7::timestamptz[],
+                $8::text[], $9::text[], $10::float[], $11::int[], $12::jsonb[], $13::text[], $14::text[], $15::jsonb[]
+            ) AS t(text, embedding, event_date, occurred_start, occurred_end, mentioned_at,
+                   context, fact_type, confidence_score, access_count, metadata, chunk_id, document_id, tags_json)
         )
+        INSERT INTO {fq_table("memory_units")} (bank_id, text, embedding, event_date, occurred_start, occurred_end, mentioned_at,
+                                 context, fact_type, confidence_score, access_count, metadata, chunk_id, document_id, tags)
+        SELECT
+            $1,
+            text, embedding, event_date, occurred_start, occurred_end, mentioned_at,
+            context, fact_type, confidence_score, access_count, metadata, chunk_id, document_id,
+            COALESCE(
+                (SELECT array_agg(elem) FROM jsonb_array_elements_text(tags_json) AS elem),
+                '{{}}'::varchar[]
+            )
+        FROM input_data
         RETURNING id
         """,
         bank_id,
@@ -91,6 +107,7 @@ async def insert_facts_batch(
         metadata_jsons,
         chunk_ids,
         document_ids,
+        tags_list,
     )
     unit_ids = [str(row["id"]) for row in results]
@@ -121,7 +138,13 @@ async def ensure_bank_exists(conn, bank_id: str) -> None:
 async def handle_document_tracking(
-    conn, bank_id: str, document_id: str, combined_content: str, is_first_batch: bool, retain_params: dict | None = None
+    conn,
+    bank_id: str,
+    document_id: str,
+    combined_content: str,
+    is_first_batch: bool,
+    retain_params: dict | None = None,
+    document_tags: list[str] | None = None,
 ) -> None:
     """
     Handle document tracking in the database.
@@ -133,6 +156,7 @@ async def handle_document_tracking(
         combined_content: Combined content text from all content items
         is_first_batch: Whether this is the first batch (for chunked operations)
         retain_params: Optional parameters passed during retain (context, event_date, etc.)
+        document_tags: Optional list of tags to associate with the document
     """
     import hashlib
@@ -149,13 +173,14 @@ async def handle_document_tracking(
     # Insert document (or update if exists from concurrent operations)
     await conn.execute(
         f"""
-        INSERT INTO {fq_table("documents")} (id, bank_id, original_text, content_hash, metadata, retain_params)
-        VALUES ($1, $2, $3, $4, $5, $6)
+        INSERT INTO {fq_table("documents")} (id, bank_id, original_text, content_hash, metadata, retain_params, tags)
+        VALUES ($1, $2, $3, $4, $5, $6, $7)
         ON CONFLICT (id, bank_id) DO UPDATE
         SET original_text = EXCLUDED.original_text,
             content_hash = EXCLUDED.content_hash,
             metadata = EXCLUDED.metadata,
             retain_params = EXCLUDED.retain_params,
+            tags = EXCLUDED.tags,
             updated_at = NOW()
         """,
         document_id,
@@ -164,4 +189,5 @@ async def handle_document_tracking(
         content_hash,
         json.dumps({}),  # Empty metadata dict
         json.dumps(retain_params) if retain_params else None,
+        document_tags or [],
     )

hindsight_api/engine/retain/link_utils.py CHANGED Viewed

@@ -479,14 +479,18 @@ async def create_temporal_links_batch_per_fact(
         if links:
             insert_start = time_mod.time()
-            await conn.executemany(
-                f"""
-                INSERT INTO {fq_table("memory_links")} (from_unit_id, to_unit_id, link_type, weight, entity_id)
-                VALUES ($1, $2, $3, $4, $5)
-                ON CONFLICT (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid)) DO NOTHING
-                """,
-                links,
-            )
+            # Batch inserts to avoid timeout on large batches
+            BATCH_SIZE = 1000
+            for batch_start in range(0, len(links), BATCH_SIZE):
+                batch = links[batch_start : batch_start + BATCH_SIZE]
+                await conn.executemany(
+                    f"""
+                    INSERT INTO {fq_table("memory_links")} (from_unit_id, to_unit_id, link_type, weight, entity_id)
+                    VALUES ($1, $2, $3, $4, $5)
+                    ON CONFLICT (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid)) DO NOTHING
+                    """,
+                    batch,
+                )
             _log(log_buffer, f"      [7.4] Insert {len(links)} temporal links: {time_mod.time() - insert_start:.3f}s")
         return len(links)
@@ -644,14 +648,18 @@ async def create_semantic_links_batch(
         if all_links:
             insert_start = time_mod.time()
-            await conn.executemany(
-                f"""
-                INSERT INTO {fq_table("memory_links")} (from_unit_id, to_unit_id, link_type, weight, entity_id)
-                VALUES ($1, $2, $3, $4, $5)
-                ON CONFLICT (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid)) DO NOTHING
-                """,
-                all_links,
-            )
+            # Batch inserts to avoid timeout on large batches
+            BATCH_SIZE = 1000
+            for batch_start in range(0, len(all_links), BATCH_SIZE):
+                batch = all_links[batch_start : batch_start + BATCH_SIZE]
+                await conn.executemany(
+                    f"""
+                    INSERT INTO {fq_table("memory_links")} (from_unit_id, to_unit_id, link_type, weight, entity_id)
+                    VALUES ($1, $2, $3, $4, $5)
+                    ON CONFLICT (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid)) DO NOTHING
+                    """,
+                    batch,
+                )
             _log(
                 log_buffer, f"      [8.3] Insert {len(all_links)} semantic links: {time_mod.time() - insert_start:.3f}s"
             )

hindsight_api/engine/retain/orchestrator.py CHANGED Viewed

@@ -9,6 +9,7 @@ import time
 import uuid
 from datetime import UTC, datetime
+from ...config import get_config
 from ..db_utils import acquire_with_retry
 from . import bank_utils
@@ -18,6 +19,7 @@ def utcnow():
     return datetime.now(UTC)
+from ..response_models import TokenUsage
 from . import (
     chunk_storage,
     deduplication,
@@ -47,7 +49,8 @@ async def retain_batch(
     is_first_batch: bool = True,
     fact_type_override: str | None = None,
     confidence_score: float | None = None,
-) -> list[list[str]]:
+    document_tags: list[str] | None = None,
+) -> tuple[list[list[str]], TokenUsage]:
     """
     Process a batch of content through the retain pipeline.
@@ -65,9 +68,10 @@ async def retain_batch(
         is_first_batch: Whether this is the first batch
         fact_type_override: Override fact type for all facts
         confidence_score: Confidence score for opinions
+        document_tags: Tags applied to all items in this batch
     Returns:
-        List of unit ID lists (one list per content item)
+        Tuple of (unit ID lists, token usage for fact extraction)
     """
     start_time = time.time()
     total_chars = sum(len(item.get("content", "")) for item in contents_dicts)
@@ -86,12 +90,16 @@ async def retain_batch(
     # Convert dicts to RetainContent objects
     contents = []
     for item in contents_dicts:
+        # Merge item-level tags with document-level tags
+        item_tags = item.get("tags", []) or []
+        merged_tags = list(set(item_tags + (document_tags or [])))
         content = RetainContent(
             content=item["content"],
             context=item.get("context", ""),
             event_date=item.get("event_date") or utcnow(),
             metadata=item.get("metadata", {}),
             entities=item.get("entities", []),
+            tags=merged_tags,
         )
         contents.append(content)
@@ -99,7 +107,7 @@ async def retain_batch(
     step_start = time.time()
     extract_opinions = fact_type_override == "opinion"
-    extracted_facts, chunks = await fact_extraction.extract_facts_from_contents(
+    extracted_facts, chunks, usage = await fact_extraction.extract_facts_from_contents(
         contents, llm_config, agent_name, extract_opinions
     )
     log_buffer.append(
@@ -129,7 +137,7 @@ async def retain_batch(
                         if first_item.get("metadata"):
                             retain_params["metadata"] = first_item["metadata"]
                     await fact_storage.handle_document_tracking(
-                        conn, bank_id, document_id, combined_content, is_first_batch, retain_params
+                        conn, bank_id, document_id, combined_content, is_first_batch, retain_params, document_tags
                     )
                 else:
                     # Check for per-item document_ids
@@ -157,14 +165,14 @@ async def retain_batch(
                             if first_item.get("metadata"):
                                 retain_params["metadata"] = first_item["metadata"]
                         await fact_storage.handle_document_tracking(
-                            conn, bank_id, doc_id, combined_content, is_first_batch, retain_params
+                            conn, bank_id, doc_id, combined_content, is_first_batch, retain_params, document_tags
                         )
         total_time = time.time() - start_time
         logger.info(
             f"RETAIN_BATCH COMPLETE: 0 facts extracted from {len(contents)} contents in {total_time:.3f}s (document tracked, no facts)"
         )
-        return [[] for _ in contents]
+        return [[] for _ in contents], usage
     # Apply fact_type_override if provided
     if fact_type_override:
@@ -223,7 +231,7 @@ async def retain_batch(
                         retain_params["metadata"] = first_item["metadata"]
                 await fact_storage.handle_document_tracking(
-                    conn, bank_id, document_id, combined_content, is_first_batch, retain_params
+                    conn, bank_id, document_id, combined_content, is_first_batch, retain_params, document_tags
                 )
                 document_ids_added.append(document_id)
                 doc_id_mapping[None] = document_id  # For backwards compatibility
@@ -267,7 +275,13 @@ async def retain_batch(
                                     retain_params["metadata"] = first_item["metadata"]
                             await fact_storage.handle_document_tracking(
-                                conn, bank_id, actual_doc_id, combined_content, is_first_batch, retain_params
+                                conn,
+                                bank_id,
+                                actual_doc_id,
+                                combined_content,
+                                is_first_batch,
+                                retain_params,
+                                document_tags,
                             )
                             document_ids_added.append(actual_doc_id)
@@ -344,7 +358,7 @@ async def retain_batch(
             non_duplicate_facts = deduplication.filter_duplicates(processed_facts, is_duplicate_flags)
             if not non_duplicate_facts:
-                return [[] for _ in contents]
+                return [[] for _ in contents], usage
             # Insert facts (document_id is now stored per-fact)
             step_start = time.time()
@@ -394,16 +408,26 @@ async def retain_batch(
             causal_link_count = await link_creation.create_causal_links_batch(conn, unit_ids, non_duplicate_facts)
             log_buffer.append(f"[10] Causal links: {causal_link_count} links in {time.time() - step_start:.3f}s")
-            # Regenerate observations INSIDE transaction for atomicity
-            await observation_regeneration.regenerate_observations_batch(
-                conn, embeddings_model, llm_config, bank_id, entity_links, log_buffer
-            )
+            # Regenerate observations - sync (in transaction) or async (background task)
+            config = get_config()
+            if config.retain_observations_async:
+                # Queue for async processing after transaction commits
+                entity_ids_for_async = list(set(link.entity_id for link in entity_links)) if entity_links else []
+                log_buffer.append(
+                    f"[11] Observations: queued {len(entity_ids_for_async)} entities for async processing"
+                )
+            else:
+                # Run synchronously inside transaction for atomicity
+                await observation_regeneration.regenerate_observations_batch(
+                    conn, embeddings_model, llm_config, bank_id, entity_links, log_buffer
+                )
+                entity_ids_for_async = []
             # Map results back to original content items
             result_unit_ids = _map_results_to_contents(contents, extracted_facts, is_duplicate_flags, unit_ids)
-        # Trigger background tasks AFTER transaction commits (opinion reinforcement only)
-        await _trigger_background_tasks(task_backend, bank_id, unit_ids, non_duplicate_facts)
+        # Trigger background tasks AFTER transaction commits
+        await _trigger_background_tasks(task_backend, bank_id, unit_ids, non_duplicate_facts, entity_ids_for_async)
         # Log final summary
         total_time = time.time() - start_time
@@ -415,7 +439,7 @@ async def retain_batch(
         logger.info("\n" + "\n".join(log_buffer) + "\n")
-        return result_unit_ids
+        return result_unit_ids, usage
 def _map_results_to_contents(
@@ -453,8 +477,9 @@ async def _trigger_background_tasks(
     bank_id: str,
     unit_ids: list[str],
     facts: list[ProcessedFact],
+    entity_ids_for_observations: list[str] | None = None,
 ) -> None:
-    """Trigger opinion reinforcement as background task (after transaction commits)."""
+    """Trigger background tasks after transaction commits."""
     # Trigger opinion reinforcement if there are entities
     fact_entities = [[e.name for e in fact.entities] for fact in facts]
     if any(fact_entities):
@@ -467,3 +492,13 @@ async def _trigger_background_tasks(
                 "unit_entities": fact_entities,
             }
         )
+    # Trigger observation regeneration if async mode is enabled
+    if entity_ids_for_observations:
+        await task_backend.submit_task(
+            {
+                "type": "regenerate_observations",
+                "bank_id": bank_id,
+                "entity_ids": entity_ids_for_observations,
+            }
+        )

hindsight_api/engine/retain/types.py CHANGED Viewed

@@ -21,6 +21,7 @@ class RetainContentDict(TypedDict, total=False):
         metadata: Custom key-value metadata (optional)
         document_id: Document ID for this content item (optional)
         entities: User-provided entities to merge with extracted entities (optional)
+        tags: Visibility scope tags for this content item (optional)
     """
     content: str  # Required
@@ -29,6 +30,7 @@ class RetainContentDict(TypedDict, total=False):
     metadata: dict[str, str]
     document_id: str
     entities: list[dict[str, str]]  # [{"text": "...", "type": "..."}]
+    tags: list[str]  # Visibility scope tags
 def _now_utc() -> datetime:
@@ -49,6 +51,7 @@ class RetainContent:
     event_date: datetime = field(default_factory=_now_utc)
     metadata: dict[str, str] = field(default_factory=dict)
     entities: list[dict[str, str]] = field(default_factory=list)  # User-provided entities
+    tags: list[str] = field(default_factory=list)  # Visibility scope tags
 @dataclass
@@ -113,6 +116,7 @@ class ExtractedFact:
     context: str = ""
     mentioned_at: datetime | None = None
     metadata: dict[str, str] = field(default_factory=dict)
+    tags: list[str] = field(default_factory=list)  # Visibility scope tags
 @dataclass
@@ -158,6 +162,9 @@ class ProcessedFact:
     # Track which content this fact came from (for user entity merging)
     content_index: int = 0
+    # Visibility scope tags
+    tags: list[str] = field(default_factory=list)
     @property
     def is_duplicate(self) -> bool:
         """Check if this fact was marked as a duplicate."""
@@ -201,6 +208,7 @@ class ProcessedFact:
             causal_relations=extracted_fact.causal_relations,
             chunk_id=chunk_id,
             content_index=extracted_fact.content_index,
+            tags=extracted_fact.tags,
         )
@@ -232,6 +240,7 @@ class RetainBatch:
     document_id: str | None = None
     fact_type_override: str | None = None
     confidence_score: float | None = None
+    document_tags: list[str] = field(default_factory=list)  # Tags applied to all items
     # Extracted data (populated during processing)
     extracted_facts: list[ExtractedFact] = field(default_factory=list)

hindsight_api/engine/search/graph_retrieval.py CHANGED Viewed

@@ -11,7 +11,8 @@ from abc import ABC, abstractmethod
 from ..db_utils import acquire_with_retry
 from ..memory_engine import fq_table
-from .types import RetrievalResult
+from .tags import TagsMatch, filter_results_by_tags
+from .types import MPFPTimings, RetrievalResult
 logger = logging.getLogger(__name__)
@@ -42,7 +43,10 @@ class GraphRetriever(ABC):
         query_text: str | None = None,
         semantic_seeds: list[RetrievalResult] | None = None,
         temporal_seeds: list[RetrievalResult] | None = None,
-    ) -> list[RetrievalResult]:
+        adjacency=None,  # TypedAdjacency, optional pre-loaded graph
+        tags: list[str] | None = None,  # Visibility scope tags for filtering
+        tags_match: TagsMatch = "any",  # How to match tags: 'any' (OR) or 'all' (AND)
+    ) -> tuple[list[RetrievalResult], MPFPTimings | None]:
         """
         Retrieve relevant facts via graph traversal.
@@ -55,9 +59,11 @@ class GraphRetriever(ABC):
             query_text: Original query text (optional, for some strategies)
             semantic_seeds: Pre-computed semantic entry points (from semantic retrieval)
             temporal_seeds: Pre-computed temporal entry points (from temporal retrieval)
+            adjacency: Pre-loaded typed adjacency graph (optional, for MPFP)
+            tags: Optional list of tags for visibility filtering (OR matching)
         Returns:
-            List of RetrievalResult objects with activation scores set
+            Tuple of (List of RetrievalResult with activation scores, optional timing info)
         """
         pass
@@ -111,7 +117,10 @@ class BFSGraphRetriever(GraphRetriever):
         query_text: str | None = None,
         semantic_seeds: list[RetrievalResult] | None = None,
         temporal_seeds: list[RetrievalResult] | None = None,
-    ) -> list[RetrievalResult]:
+        adjacency=None,  # Not used by BFS
+        tags: list[str] | None = None,
+        tags_match: TagsMatch = "any",
+    ) -> tuple[list[RetrievalResult], MPFPTimings | None]:
         """
         Retrieve facts using BFS spreading activation.
@@ -122,11 +131,14 @@ class BFSGraphRetriever(GraphRetriever):
         4. Return visited nodes up to budget
         Note: BFS finds its own entry points via embedding search.
-        The semantic_seeds and temporal_seeds parameters are accepted
+        The semantic_seeds, temporal_seeds, and adjacency parameters are accepted
         for interface compatibility but not used.
         """
         async with acquire_with_retry(pool) as conn:
-            return await self._retrieve_with_conn(conn, query_embedding_str, bank_id, fact_type, budget)
+            results = await self._retrieve_with_conn(
+                conn, query_embedding_str, bank_id, fact_type, budget, tags=tags, tags_match=tags_match
+            )
+            return results, None
     async def _retrieve_with_conn(
         self,
@@ -135,33 +147,46 @@ class BFSGraphRetriever(GraphRetriever):
         bank_id: str,
         fact_type: str,
         budget: int,
+        tags: list[str] | None = None,
+        tags_match: TagsMatch = "any",
     ) -> list[RetrievalResult]:
         """Internal implementation with connection."""
+        from .tags import build_tags_where_clause_simple
+        tags_clause = build_tags_where_clause_simple(tags, 6, match=tags_match)
+        params = [query_embedding_str, bank_id, fact_type, self.entry_point_threshold, self.entry_point_limit]
+        if tags:
+            params.append(tags)
         # Step 1: Find entry points
         entry_points = await conn.fetch(
             f"""
             SELECT id, text, context, event_date, occurred_start, occurred_end,
-                   mentioned_at, access_count, embedding, fact_type, document_id, chunk_id,
+                   mentioned_at, access_count, embedding, fact_type, document_id, chunk_id, tags,
                    1 - (embedding <=> $1::vector) AS similarity
             FROM {fq_table("memory_units")}
             WHERE bank_id = $2
               AND embedding IS NOT NULL
               AND fact_type = $3
               AND (1 - (embedding <=> $1::vector)) >= $4
+              {tags_clause}
             ORDER BY embedding <=> $1::vector
             LIMIT $5
             """,
-            query_embedding_str,
-            bank_id,
-            fact_type,
-            self.entry_point_threshold,
-            self.entry_point_limit,
+            *params,
         )
         if not entry_points:
+            logger.debug(
+                f"[BFS] No entry points found for fact_type={fact_type} (tags={tags}, tags_match={tags_match})"
+            )
             return []
+        logger.debug(
+            f"[BFS] Found {len(entry_points)} entry points for fact_type={fact_type} "
+            f"(tags={tags}, tags_match={tags_match})"
+        )
         # Step 2: BFS spreading activation
         visited = set()
         results = []
@@ -192,7 +217,7 @@ class BFSGraphRetriever(GraphRetriever):
                     f"""
                     SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.occurred_end,
                            mu.mentioned_at, mu.access_count, mu.embedding, mu.fact_type,
-                           mu.document_id, mu.chunk_id,
+                           mu.document_id, mu.chunk_id, mu.tags,
                            ml.weight, ml.link_type, ml.from_unit_id
                     FROM {fq_table("memory_links")} ml
                     JOIN {fq_table("memory_units")} mu ON ml.to_unit_id = mu.id
@@ -232,4 +257,8 @@ class BFSGraphRetriever(GraphRetriever):
                             neighbor_result = RetrievalResult.from_db_row(dict(n))
                             queue.append((neighbor_result, new_activation))
+        # Apply tags filtering (BFS may traverse into memories that don't match tags criteria)
+        if tags:
+            results = filter_results_by_tags(results, tags, match=tags_match)
         return results

hindsight-api 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

hindsight-api 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl