PyPI - hindsight-api - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

hindsight-api 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

hindsight_api/api/mcp.py +1 -5
hindsight_api/config.py +9 -0
hindsight_api/engine/cross_encoder.py +1 -6
hindsight_api/engine/llm_wrapper.py +33 -15
hindsight_api/engine/memory_engine.py +71 -59
hindsight_api/engine/search/__init__.py +15 -1
hindsight_api/engine/search/graph_retrieval.py +235 -0
hindsight_api/engine/search/mpfp_retrieval.py +454 -0
hindsight_api/engine/search/retrieval.py +337 -163
hindsight_api/engine/search/trace.py +1 -0
hindsight_api/engine/search/tracer.py +8 -3
hindsight_api/engine/search/types.py +4 -1
hindsight_api/pg0.py +54 -326
{hindsight_api-0.1.3.dist-info → hindsight_api-0.1.5.dist-info}/METADATA +6 -5
{hindsight_api-0.1.3.dist-info → hindsight_api-0.1.5.dist-info}/RECORD +17 -15
{hindsight_api-0.1.3.dist-info → hindsight_api-0.1.5.dist-info}/WHEEL +0 -0
{hindsight_api-0.1.3.dist-info → hindsight_api-0.1.5.dist-info}/entry_points.txt +0 -0

hindsight_api/api/mcp.py CHANGED Viewed

@@ -121,11 +121,7 @@ class MCPMiddleware:
         self.app = app
         self.memory = memory
         self.mcp_server = create_mcp_server(memory)
-        # Use sse_app - http_app requires lifespan management that's complex with middleware
-        import warnings
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", DeprecationWarning)
-            self.mcp_app = self.mcp_server.sse_app()
+        self.mcp_app = self.mcp_server.http_app()
     async def __call__(self, scope, receive, send):
         if scope["type"] != "http":

hindsight_api/config.py CHANGED Viewed

@@ -29,6 +29,7 @@ ENV_HOST = "HINDSIGHT_API_HOST"
 ENV_PORT = "HINDSIGHT_API_PORT"
 ENV_LOG_LEVEL = "HINDSIGHT_API_LOG_LEVEL"
 ENV_MCP_ENABLED = "HINDSIGHT_API_MCP_ENABLED"
+ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
 # Default values
 DEFAULT_DATABASE_URL = "pg0"
@@ -45,6 +46,7 @@ DEFAULT_HOST = "0.0.0.0"
 DEFAULT_PORT = 8888
 DEFAULT_LOG_LEVEL = "info"
 DEFAULT_MCP_ENABLED = True
+DEFAULT_GRAPH_RETRIEVER = "bfs"  # Options: "bfs", "mpfp"
 # Required embedding dimension for database schema
 EMBEDDING_DIMENSION = 384
@@ -79,6 +81,9 @@ class HindsightConfig:
     log_level: str
     mcp_enabled: bool
+    # Recall
+    graph_retriever: str
     @classmethod
     def from_env(cls) -> "HindsightConfig":
         """Create configuration from environment variables."""
@@ -107,6 +112,9 @@ class HindsightConfig:
             port=int(os.getenv(ENV_PORT, DEFAULT_PORT)),
             log_level=os.getenv(ENV_LOG_LEVEL, DEFAULT_LOG_LEVEL),
             mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
+            # Recall
+            graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
         )
     def get_llm_base_url(self) -> str:
@@ -147,6 +155,7 @@ class HindsightConfig:
         logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
         logger.info(f"Embeddings: provider={self.embeddings_provider}")
         logger.info(f"Reranker: provider={self.reranker_provider}")
+        logger.info(f"Graph retriever: {self.graph_retriever}")
 def get_config() -> HindsightConfig:

hindsight_api/engine/cross_encoder.py CHANGED Viewed

@@ -101,12 +101,7 @@ class LocalSTCrossEncoder(CrossEncoderModel):
             )
         logger.info(f"Reranker: initializing local provider with model {self.model_name}")
-        # Disable lazy loading (meta tensors) which causes issues with newer transformers/accelerate
-        # Setting low_cpu_mem_usage=False and device_map=None ensures tensors are fully materialized
-        self._model = CrossEncoder(
-            self.model_name,
-            model_kwargs={"low_cpu_mem_usage": False, "device_map": None},
-        )
+        self._model = CrossEncoder(self.model_name)
         logger.info("Reranker: local provider initialized")
     def predict(self, pairs: List[Tuple[str, str]]) -> List[float]:

hindsight_api/engine/llm_wrapper.py CHANGED Viewed

@@ -170,24 +170,42 @@ class LLMProvider:
                 "messages": messages,
             }
-            if max_completion_tokens is not None:
-                call_params["max_completion_tokens"] = max_completion_tokens
             # Check if model supports reasoning parameter (o1, o3, gpt-5 families)
             model_lower = self.model.lower()
             is_reasoning_model = any(x in model_lower for x in ["gpt-5", "o1", "o3"])
+            # For GPT-4 and GPT-4.1 models, cap max_completion_tokens to 32000
+            # For GPT-4o models, cap to 16384
+            is_gpt4_model = any(x in model_lower for x in ["gpt-4.1", "gpt-4-"])
+            is_gpt4o_model = "gpt-4o" in model_lower
+            if max_completion_tokens is not None:
+                if is_gpt4o_model and max_completion_tokens > 16384:
+                    max_completion_tokens = 16384
+                elif is_gpt4_model and max_completion_tokens > 32000:
+                    max_completion_tokens = 32000
+                # For reasoning models, max_completion_tokens includes reasoning + output tokens
+                # Enforce minimum of 16000 to ensure enough space for both
+                if is_reasoning_model and max_completion_tokens < 16000:
+                    max_completion_tokens = 16000
+                call_params["max_completion_tokens"] = max_completion_tokens
             # GPT-5/o1/o3 family doesn't support custom temperature (only default 1)
             if temperature is not None and not is_reasoning_model:
                 call_params["temperature"] = temperature
+            # Set reasoning_effort for reasoning models (OpenAI gpt-5, o1, o3)
+            if is_reasoning_model and self.provider == "openai":
+                call_params["reasoning_effort"] = self.reasoning_effort
             # Provider-specific parameters
             if self.provider == "groq":
                 call_params["seed"] = DEFAULT_LLM_SEED
-                call_params["extra_body"] = {
-                    "service_tier": "auto",
-                    "reasoning_effort": self.reasoning_effort,
-                    "include_reasoning": False,
-                }
+                extra_body = {"service_tier": "auto"}
+                # Only add reasoning parameters for reasoning models
+                if is_reasoning_model:
+                    extra_body["reasoning_effort"] = self.reasoning_effort
+                    extra_body["include_reasoning"] = False
+                call_params["extra_body"] = extra_body
             last_exception = None
@@ -254,9 +272,9 @@ class LLMProvider:
                         raise
                 except APIStatusError as e:
-                    # Fast fail on 4xx client errors (except 429 rate limit and 498 which is treated as server error)
-                    if 400 <= e.status_code < 500 and e.status_code not in (429, 498):
-                        logger.error(f"Client error (HTTP {e.status_code}), not retrying: {str(e)}")
+                    # Fast fail only on 401 (unauthorized) and 403 (forbidden) - these won't recover with retries
+                    if e.status_code in (401, 403):
+                        logger.error(f"Auth error (HTTP {e.status_code}), not retrying: {str(e)}")
                         raise
                     last_exception = e
@@ -394,13 +412,13 @@ class LLMProvider:
                     raise
             except genai_errors.APIError as e:
-                # Fast fail on 4xx client errors (except 429 rate limit)
-                if e.code and 400 <= e.code < 500 and e.code != 429:
-                    logger.error(f"Gemini client error (HTTP {e.code}), not retrying: {str(e)}")
+                # Fast fail only on 401 (unauthorized) and 403 (forbidden) - these won't recover with retries
+                if e.code in (401, 403):
+                    logger.error(f"Gemini auth error (HTTP {e.code}), not retrying: {str(e)}")
                     raise
-                # Retry on 429 and 5xx
-                if e.code in (429, 500, 502, 503, 504):
+                # Retry on retryable errors (rate limits, server errors, and other client errors like 400)
+                if e.code in (400, 429, 500, 502, 503, 504) or (e.code and e.code >= 500):
                     last_exception = e
                     if attempt < max_retries:
                         backoff = min(initial_backoff * (2 ** attempt), max_backoff)

hindsight_api/engine/memory_engine.py CHANGED Viewed

@@ -1156,22 +1156,22 @@ class MemoryEngine:
             aggregated_timings = {"semantic": 0.0, "bm25": 0.0, "graph": 0.0, "temporal": 0.0}
             detected_temporal_constraint = None
-            for idx, (ft_semantic, ft_bm25, ft_graph, ft_temporal, ft_timings, ft_temporal_constraint) in enumerate(all_retrievals):
+            for idx, retrieval_result in enumerate(all_retrievals):
                 # Log fact types in this retrieval batch
                 ft_name = fact_type[idx] if idx < len(fact_type) else "unknown"
-                logger.debug(f"[RECALL {recall_id}] Fact type '{ft_name}': semantic={len(ft_semantic)}, bm25={len(ft_bm25)}, graph={len(ft_graph)}, temporal={len(ft_temporal) if ft_temporal else 0}")
+                logger.debug(f"[RECALL {recall_id}] Fact type '{ft_name}': semantic={len(retrieval_result.semantic)}, bm25={len(retrieval_result.bm25)}, graph={len(retrieval_result.graph)}, temporal={len(retrieval_result.temporal) if retrieval_result.temporal else 0}")
-                semantic_results.extend(ft_semantic)
-                bm25_results.extend(ft_bm25)
-                graph_results.extend(ft_graph)
-                if ft_temporal:
-                    temporal_results.extend(ft_temporal)
+                semantic_results.extend(retrieval_result.semantic)
+                bm25_results.extend(retrieval_result.bm25)
+                graph_results.extend(retrieval_result.graph)
+                if retrieval_result.temporal:
+                    temporal_results.extend(retrieval_result.temporal)
                 # Track max timing for each method (since they run in parallel across fact types)
-                for method, duration in ft_timings.items():
-                    aggregated_timings[method] = max(aggregated_timings[method], duration)
+                for method, duration in retrieval_result.timings.items():
+                    aggregated_timings[method] = max(aggregated_timings.get(method, 0.0), duration)
                 # Capture temporal constraint (same across all fact types)
-                if ft_temporal_constraint:
-                    detected_temporal_constraint = ft_temporal_constraint
+                if retrieval_result.temporal_constraint:
+                    detected_temporal_constraint = retrieval_result.temporal_constraint
             # If no temporal results from any fact type, set to None
             if not temporal_results:
@@ -1203,49 +1203,57 @@ class MemoryEngine:
                 temporal_info = f" | temporal_range={start_dt.strftime('%Y-%m-%d')} to {end_dt.strftime('%Y-%m-%d')}"
             log_buffer.append(f"  [2] {total_retrievals}-way retrieval ({len(fact_type)} fact_types): {', '.join(timing_parts)} in {step_duration:.3f}s{temporal_info}")
-            # Record retrieval results for tracer (convert typed results to old format)
+            # Record retrieval results for tracer - per fact type
             if tracer:
                 # Convert RetrievalResult to old tuple format for tracer
                 def to_tuple_format(results):
                     return [(r.id, r.__dict__) for r in results]
-                # Add semantic retrieval results
-                tracer.add_retrieval_results(
-                    method_name="semantic",
-                    results=to_tuple_format(semantic_results),
-                    duration_seconds=aggregated_timings["semantic"],
-                    score_field="similarity",
-                    metadata={"limit": thinking_budget}
-                )
+                # Add retrieval results per fact type (to show parallel execution in UI)
+                for idx, rr in enumerate(all_retrievals):
+                    ft_name = fact_type[idx] if idx < len(fact_type) else "unknown"
-                # Add BM25 retrieval results
-                tracer.add_retrieval_results(
-                    method_name="bm25",
-                    results=to_tuple_format(bm25_results),
-                    duration_seconds=aggregated_timings["bm25"],
-                    score_field="bm25_score",
-                    metadata={"limit": thinking_budget}
-                )
+                    # Add semantic retrieval results for this fact type
+                    tracer.add_retrieval_results(
+                        method_name="semantic",
+                        results=to_tuple_format(rr.semantic),
+                        duration_seconds=rr.timings.get("semantic", 0.0),
+                        score_field="similarity",
+                        metadata={"limit": thinking_budget},
+                        fact_type=ft_name
+                    )
-                # Add graph retrieval results
-                tracer.add_retrieval_results(
-                    method_name="graph",
-                    results=to_tuple_format(graph_results),
-                    duration_seconds=aggregated_timings["graph"],
-                    score_field="similarity",  # Graph uses similarity for activation
-                    metadata={"budget": thinking_budget}
-                )
+                    # Add BM25 retrieval results for this fact type
+                    tracer.add_retrieval_results(
+                        method_name="bm25",
+                        results=to_tuple_format(rr.bm25),
+                        duration_seconds=rr.timings.get("bm25", 0.0),
+                        score_field="bm25_score",
+                        metadata={"limit": thinking_budget},
+                        fact_type=ft_name
+                    )
-                # Add temporal retrieval results if present
-                if temporal_results:
+                    # Add graph retrieval results for this fact type
                     tracer.add_retrieval_results(
-                        method_name="temporal",
-                        results=to_tuple_format(temporal_results),
-                        duration_seconds=aggregated_timings["temporal"],
-                        score_field="temporal_score",
-                        metadata={"budget": thinking_budget}
+                        method_name="graph",
+                        results=to_tuple_format(rr.graph),
+                        duration_seconds=rr.timings.get("graph", 0.0),
+                        score_field="activation",
+                        metadata={"budget": thinking_budget},
+                        fact_type=ft_name
                     )
+                    # Add temporal retrieval results for this fact type (even if empty, to show it ran)
+                    if rr.temporal is not None:
+                        tracer.add_retrieval_results(
+                            method_name="temporal",
+                            results=to_tuple_format(rr.temporal),
+                            duration_seconds=rr.timings.get("temporal", 0.0),
+                            score_field="temporal_score",
+                            metadata={"budget": thinking_budget},
+                            fact_type=ft_name
+                        )
                 # Record entry points (from semantic results) for legacy graph view
                 for rank, retrieval in enumerate(semantic_results[:10], start=1):  # Top 10 as entry points
                     tracer.add_entry_point(retrieval.id, retrieval.text, retrieval.similarity or 0.0, rank)
@@ -1287,31 +1295,24 @@ class MemoryEngine:
             step_duration = time.time() - step_start
             log_buffer.append(f"  [4] Reranking: {len(scored_results)} candidates scored in {step_duration:.3f}s")
-            if tracer:
-                # Convert to old format for tracer
-                results_dict = [sr.to_dict() for sr in scored_results]
-                tracer_merged = [(mc.id, mc.retrieval.__dict__, {"rrf_score": mc.rrf_score, **mc.source_ranks})
-                                for mc in merged_candidates]
-                tracer.add_reranked(results_dict, tracer_merged)
-                tracer.add_phase_metric("reranking", step_duration, {
-                    "reranker_type": "cross-encoder",
-                    "candidates_reranked": len(scored_results)
-                })
             # Step 4.5: Combine cross-encoder score with retrieval signals
             # This preserves retrieval work (RRF, temporal, recency) instead of pure cross-encoder ranking
             if scored_results:
-                # Normalize RRF scores to [0, 1] range
+                # Normalize RRF scores to [0, 1] range using min-max normalization
                 rrf_scores = [sr.candidate.rrf_score for sr in scored_results]
-                max_rrf = max(rrf_scores) if rrf_scores else 1.0
+                max_rrf = max(rrf_scores) if rrf_scores else 0.0
                 min_rrf = min(rrf_scores) if rrf_scores else 0.0
-                rrf_range = max_rrf - min_rrf if max_rrf > min_rrf else 1.0
+                rrf_range = max_rrf - min_rrf  # Don't force to 1.0, let fallback handle it
                 # Calculate recency based on occurred_start (more recent = higher score)
                 now = utcnow()
                 for sr in scored_results:
-                    # Normalize RRF score
-                    sr.rrf_normalized = (sr.candidate.rrf_score - min_rrf) / rrf_range if rrf_range > 0 else 0.5
+                    # Normalize RRF score (0-1 range, 0.5 if all same)
+                    if rrf_range > 0:
+                        sr.rrf_normalized = (sr.candidate.rrf_score - min_rrf) / rrf_range
+                    else:
+                        # All RRF scores are the same, use neutral value
+                        sr.rrf_normalized = 0.5
                     # Calculate recency (decay over 365 days, minimum 0.1)
                     sr.recency = 0.5  # default for missing dates
@@ -1343,6 +1344,17 @@ class MemoryEngine:
                 scored_results.sort(key=lambda x: x.weight, reverse=True)
                 log_buffer.append(f"  [4.6] Combined scoring: cross_encoder(0.6) + rrf(0.2) + temporal(0.1) + recency(0.1)")
+            # Add reranked results to tracer AFTER combined scoring (so normalized values are included)
+            if tracer:
+                results_dict = [sr.to_dict() for sr in scored_results]
+                tracer_merged = [(mc.id, mc.retrieval.__dict__, {"rrf_score": mc.rrf_score, **mc.source_ranks})
+                                for mc in merged_candidates]
+                tracer.add_reranked(results_dict, tracer_merged)
+                tracer.add_phase_metric("reranking", step_duration, {
+                    "reranker_type": "cross-encoder",
+                    "candidates_reranked": len(scored_results)
+                })
             # Step 5: Truncate to thinking_budget * 2 for token filtering
             rerank_limit = thinking_budget * 2
             top_scored = scored_results[:rerank_limit]

hindsight_api/engine/search/__init__.py CHANGED Viewed

@@ -3,13 +3,27 @@ Search module for memory retrieval.
 Provides modular search architecture:
 - Retrieval: 4-way parallel (semantic + BM25 + graph + temporal)
+- Graph retrieval: Pluggable strategies (BFS, PPR)
 - Reranking: Pluggable strategies (heuristic, cross-encoder)
 """
-from .retrieval import retrieve_parallel
+from .retrieval import (
+    retrieve_parallel,
+    get_default_graph_retriever,
+    set_default_graph_retriever,
+    ParallelRetrievalResult,
+)
+from .graph_retrieval import GraphRetriever, BFSGraphRetriever
+from .mpfp_retrieval import MPFPGraphRetriever
 from .reranking import CrossEncoderReranker
 __all__ = [
     "retrieve_parallel",
+    "get_default_graph_retriever",
+    "set_default_graph_retriever",
+    "ParallelRetrievalResult",
+    "GraphRetriever",
+    "BFSGraphRetriever",
+    "MPFPGraphRetriever",
     "CrossEncoderReranker",
 ]

hindsight_api/engine/search/graph_retrieval.py ADDED Viewed

@@ -0,0 +1,235 @@
+"""
+Graph retrieval strategies for memory recall.
+This module provides an abstraction for graph-based memory retrieval,
+allowing different algorithms (BFS spreading activation, PPR, etc.) to be
+swapped without changing the rest of the recall pipeline.
+"""
+from abc import ABC, abstractmethod
+from typing import List, Optional
+from datetime import datetime
+import logging
+from .types import RetrievalResult
+from ..db_utils import acquire_with_retry
+logger = logging.getLogger(__name__)
+class GraphRetriever(ABC):
+    """
+    Abstract base class for graph-based memory retrieval.
+    Implementations traverse the memory graph (entity links, temporal links,
+    causal links) to find relevant facts that might not be found by
+    semantic or keyword search alone.
+    """
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Return identifier for this retrieval strategy (e.g., 'bfs', 'mpfp')."""
+        pass
+    @abstractmethod
+    async def retrieve(
+        self,
+        pool,
+        query_embedding_str: str,
+        bank_id: str,
+        fact_type: str,
+        budget: int,
+        query_text: Optional[str] = None,
+        semantic_seeds: Optional[List[RetrievalResult]] = None,
+        temporal_seeds: Optional[List[RetrievalResult]] = None,
+    ) -> List[RetrievalResult]:
+        """
+        Retrieve relevant facts via graph traversal.
+        Args:
+            pool: Database connection pool
+            query_embedding_str: Query embedding as string (for finding entry points)
+            bank_id: Memory bank identifier
+            fact_type: Fact type to filter ('world', 'experience', 'opinion', 'observation')
+            budget: Maximum number of nodes to explore/return
+            query_text: Original query text (optional, for some strategies)
+            semantic_seeds: Pre-computed semantic entry points (from semantic retrieval)
+            temporal_seeds: Pre-computed temporal entry points (from temporal retrieval)
+        Returns:
+            List of RetrievalResult objects with activation scores set
+        """
+        pass
+class BFSGraphRetriever(GraphRetriever):
+    """
+    Graph retrieval using BFS-style spreading activation.
+    Starting from semantic entry points, spreads activation through
+    the memory graph (entity, temporal, causal links) using breadth-first
+    traversal with decaying activation.
+    This is the original Hindsight graph retrieval algorithm.
+    """
+    def __init__(
+        self,
+        entry_point_limit: int = 5,
+        entry_point_threshold: float = 0.5,
+        activation_decay: float = 0.8,
+        min_activation: float = 0.1,
+        batch_size: int = 20,
+    ):
+        """
+        Initialize BFS graph retriever.
+        Args:
+            entry_point_limit: Maximum number of entry points to start from
+            entry_point_threshold: Minimum semantic similarity for entry points
+            activation_decay: Decay factor per hop (activation *= decay)
+            min_activation: Minimum activation to continue spreading
+            batch_size: Number of nodes to process per batch (for neighbor fetching)
+        """
+        self.entry_point_limit = entry_point_limit
+        self.entry_point_threshold = entry_point_threshold
+        self.activation_decay = activation_decay
+        self.min_activation = min_activation
+        self.batch_size = batch_size
+    @property
+    def name(self) -> str:
+        return "bfs"
+    async def retrieve(
+        self,
+        pool,
+        query_embedding_str: str,
+        bank_id: str,
+        fact_type: str,
+        budget: int,
+        query_text: Optional[str] = None,
+        semantic_seeds: Optional[List[RetrievalResult]] = None,
+        temporal_seeds: Optional[List[RetrievalResult]] = None,
+    ) -> List[RetrievalResult]:
+        """
+        Retrieve facts using BFS spreading activation.
+        Algorithm:
+        1. Find entry points (top semantic matches above threshold)
+        2. BFS traversal: visit neighbors, propagate decaying activation
+        3. Boost causal links (causes, enables, prevents)
+        4. Return visited nodes up to budget
+        Note: BFS finds its own entry points via embedding search.
+        The semantic_seeds and temporal_seeds parameters are accepted
+        for interface compatibility but not used.
+        """
+        async with acquire_with_retry(pool) as conn:
+            return await self._retrieve_with_conn(
+                conn, query_embedding_str, bank_id, fact_type, budget
+            )
+    async def _retrieve_with_conn(
+        self,
+        conn,
+        query_embedding_str: str,
+        bank_id: str,
+        fact_type: str,
+        budget: int,
+    ) -> List[RetrievalResult]:
+        """Internal implementation with connection."""
+        # Step 1: Find entry points
+        entry_points = await conn.fetch(
+            """
+            SELECT id, text, context, event_date, occurred_start, occurred_end,
+                   mentioned_at, access_count, embedding, fact_type, document_id, chunk_id,
+                   1 - (embedding <=> $1::vector) AS similarity
+            FROM memory_units
+            WHERE bank_id = $2
+              AND embedding IS NOT NULL
+              AND fact_type = $3
+              AND (1 - (embedding <=> $1::vector)) >= $4
+            ORDER BY embedding <=> $1::vector
+            LIMIT $5
+            """,
+            query_embedding_str, bank_id, fact_type,
+            self.entry_point_threshold, self.entry_point_limit
+        )
+        if not entry_points:
+            return []
+        # Step 2: BFS spreading activation
+        visited = set()
+        results = []
+        queue = [
+            (RetrievalResult.from_db_row(dict(r)), r["similarity"])
+            for r in entry_points
+        ]
+        budget_remaining = budget
+        while queue and budget_remaining > 0:
+            # Collect a batch of nodes to process
+            batch_nodes = []
+            batch_activations = {}
+            while queue and len(batch_nodes) < self.batch_size and budget_remaining > 0:
+                current, activation = queue.pop(0)
+                unit_id = current.id
+                if unit_id not in visited:
+                    visited.add(unit_id)
+                    budget_remaining -= 1
+                    current.activation = activation
+                    results.append(current)
+                    batch_nodes.append(current.id)
+                    batch_activations[unit_id] = activation
+            # Batch fetch neighbors
+            if batch_nodes and budget_remaining > 0:
+                max_neighbors = len(batch_nodes) * 20
+                neighbors = await conn.fetch(
+                    """
+                    SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.occurred_end,
+                           mu.mentioned_at, mu.access_count, mu.embedding, mu.fact_type,
+                           mu.document_id, mu.chunk_id,
+                           ml.weight, ml.link_type, ml.from_unit_id
+                    FROM memory_links ml
+                    JOIN memory_units mu ON ml.to_unit_id = mu.id
+                    WHERE ml.from_unit_id = ANY($1::uuid[])
+                      AND ml.weight >= $2
+                      AND mu.fact_type = $3
+                    ORDER BY ml.weight DESC
+                    LIMIT $4
+                    """,
+                    batch_nodes, self.min_activation, fact_type, max_neighbors
+                )
+                for n in neighbors:
+                    neighbor_id = str(n["id"])
+                    if neighbor_id not in visited:
+                        parent_id = str(n["from_unit_id"])
+                        parent_activation = batch_activations.get(parent_id, 0.5)
+                        # Boost causal links
+                        link_type = n["link_type"]
+                        base_weight = n["weight"]
+                        if link_type in ("causes", "caused_by"):
+                            causal_boost = 2.0
+                        elif link_type in ("enables", "prevents"):
+                            causal_boost = 1.5
+                        else:
+                            causal_boost = 1.0
+                        effective_weight = base_weight * causal_boost
+                        new_activation = parent_activation * effective_weight * self.activation_decay
+                        if new_activation > self.min_activation:
+                            neighbor_result = RetrievalResult.from_db_row(dict(n))
+                            queue.append((neighbor_result, new_activation))
+        return results

hindsight-api 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

hindsight-api 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl