PyPI - hindsight-api - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

hindsight-api 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

hindsight_api/__init__.py +10 -9
hindsight_api/alembic/env.py +5 -8
hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
hindsight_api/api/__init__.py +10 -10
hindsight_api/api/http.py +575 -593
hindsight_api/api/mcp.py +30 -28
hindsight_api/banner.py +13 -6
hindsight_api/config.py +9 -13
hindsight_api/engine/__init__.py +9 -9
hindsight_api/engine/cross_encoder.py +22 -21
hindsight_api/engine/db_utils.py +5 -4
hindsight_api/engine/embeddings.py +22 -21
hindsight_api/engine/entity_resolver.py +81 -75
hindsight_api/engine/llm_wrapper.py +61 -79
hindsight_api/engine/memory_engine.py +603 -625
hindsight_api/engine/query_analyzer.py +100 -97
hindsight_api/engine/response_models.py +105 -106
hindsight_api/engine/retain/__init__.py +9 -16
hindsight_api/engine/retain/bank_utils.py +34 -58
hindsight_api/engine/retain/chunk_storage.py +4 -12
hindsight_api/engine/retain/deduplication.py +9 -28
hindsight_api/engine/retain/embedding_processing.py +4 -11
hindsight_api/engine/retain/embedding_utils.py +3 -4
hindsight_api/engine/retain/entity_processing.py +7 -17
hindsight_api/engine/retain/fact_extraction.py +155 -165
hindsight_api/engine/retain/fact_storage.py +11 -23
hindsight_api/engine/retain/link_creation.py +11 -39
hindsight_api/engine/retain/link_utils.py +166 -95
hindsight_api/engine/retain/observation_regeneration.py +39 -52
hindsight_api/engine/retain/orchestrator.py +72 -62
hindsight_api/engine/retain/types.py +49 -43
hindsight_api/engine/search/__init__.py +5 -5
hindsight_api/engine/search/fusion.py +6 -15
hindsight_api/engine/search/graph_retrieval.py +22 -23
hindsight_api/engine/search/mpfp_retrieval.py +76 -92
hindsight_api/engine/search/observation_utils.py +9 -16
hindsight_api/engine/search/reranking.py +4 -7
hindsight_api/engine/search/retrieval.py +87 -66
hindsight_api/engine/search/scoring.py +5 -7
hindsight_api/engine/search/temporal_extraction.py +8 -11
hindsight_api/engine/search/think_utils.py +115 -39
hindsight_api/engine/search/trace.py +68 -39
hindsight_api/engine/search/tracer.py +44 -35
hindsight_api/engine/search/types.py +20 -17
hindsight_api/engine/task_backend.py +21 -26
hindsight_api/engine/utils.py +25 -10
hindsight_api/main.py +21 -40
hindsight_api/mcp_local.py +190 -0
hindsight_api/metrics.py +44 -30
hindsight_api/migrations.py +10 -8
hindsight_api/models.py +60 -72
hindsight_api/pg0.py +22 -23
hindsight_api/server.py +3 -6
hindsight_api-0.1.7.dist-info/METADATA +178 -0
hindsight_api-0.1.7.dist-info/RECORD +64 -0
{hindsight_api-0.1.5.dist-info → hindsight_api-0.1.7.dist-info}/entry_points.txt +1 -0
hindsight_api-0.1.5.dist-info/METADATA +0 -42
hindsight_api-0.1.5.dist-info/RECORD +0 -63
{hindsight_api-0.1.5.dist-info → hindsight_api-0.1.7.dist-info}/WHEEL +0 -0

hindsight_api/engine/search/mpfp_retrieval.py CHANGED Viewed

@@ -16,13 +16,12 @@ Key properties:
 import asyncio
 import logging
-from dataclasses import dataclass, field
-from typing import List, Dict, Optional, Tuple
 from collections import defaultdict
+from dataclasses import dataclass, field
-from .types import RetrievalResult
-from .graph_retrieval import GraphRetriever
 from ..db_utils import acquire_with_retry
+from .graph_retrieval import GraphRetriever
+from .types import RetrievalResult
 logger = logging.getLogger(__name__)
@@ -31,9 +30,11 @@ logger = logging.getLogger(__name__)
 # Data Classes
 # -----------------------------------------------------------------------------
 @dataclass
 class EdgeTarget:
     """A neighbor node with its edge weight."""
     node_id: str
     weight: float
@@ -41,19 +42,15 @@ class EdgeTarget:
 @dataclass
 class TypedAdjacency:
     """Adjacency lists split by edge type."""
     # edge_type -> from_node_id -> list of (to_node_id, weight)
-    graphs: Dict[str, Dict[str, List[EdgeTarget]]] = field(default_factory=dict)
+    graphs: dict[str, dict[str, list[EdgeTarget]]] = field(default_factory=dict)
-    def get_neighbors(self, edge_type: str, node_id: str) -> List[EdgeTarget]:
+    def get_neighbors(self, edge_type: str, node_id: str) -> list[EdgeTarget]:
         """Get neighbors for a node via a specific edge type."""
         return self.graphs.get(edge_type, {}).get(node_id, [])
-    def get_normalized_neighbors(
-        self,
-        edge_type: str,
-        node_id: str,
-        top_k: int
-    ) -> List[EdgeTarget]:
+    def get_normalized_neighbors(self, edge_type: str, node_id: str, top_k: int) -> list[EdgeTarget]:
         """Get top-k neighbors with weights normalized to sum to 1."""
         neighbors = self.get_neighbors(edge_type, node_id)[:top_k]
         if not neighbors:
@@ -63,45 +60,49 @@ class TypedAdjacency:
         if total == 0:
             return []
-        return [
-            EdgeTarget(node_id=n.node_id, weight=n.weight / total)
-            for n in neighbors
-        ]
+        return [EdgeTarget(node_id=n.node_id, weight=n.weight / total) for n in neighbors]
 @dataclass
 class PatternResult:
     """Result from a single pattern traversal."""
-    pattern: List[str]
-    scores: Dict[str, float]  # node_id -> accumulated mass
+    pattern: list[str]
+    scores: dict[str, float]  # node_id -> accumulated mass
 @dataclass
 class MPFPConfig:
     """Configuration for MPFP algorithm."""
-    alpha: float = 0.15           # teleport/keep probability
-    threshold: float = 1e-6       # mass pruning threshold (lower = explore more)
-    top_k_neighbors: int = 20     # fan-out limit per node
+    alpha: float = 0.15  # teleport/keep probability
+    threshold: float = 1e-6  # mass pruning threshold (lower = explore more)
+    top_k_neighbors: int = 20  # fan-out limit per node
     # Patterns from semantic seeds
-    patterns_semantic: List[List[str]] = field(default_factory=lambda: [
-        ['semantic', 'semantic'],   # topic expansion
-        ['entity', 'temporal'],     # entity timeline
-        ['semantic', 'causes'],     # reasoning chains (forward)
-        ['semantic', 'caused_by'],  # reasoning chains (backward)
-        ['entity', 'semantic'],     # entity context
-    ])
+    patterns_semantic: list[list[str]] = field(
+        default_factory=lambda: [
+            ["semantic", "semantic"],  # topic expansion
+            ["entity", "temporal"],  # entity timeline
+            ["semantic", "causes"],  # reasoning chains (forward)
+            ["semantic", "caused_by"],  # reasoning chains (backward)
+            ["entity", "semantic"],  # entity context
+        ]
+    )
     # Patterns from temporal seeds
-    patterns_temporal: List[List[str]] = field(default_factory=lambda: [
-        ['temporal', 'semantic'],   # what was happening then
-        ['temporal', 'entity'],     # who was involved then
-    ])
+    patterns_temporal: list[list[str]] = field(
+        default_factory=lambda: [
+            ["temporal", "semantic"],  # what was happening then
+            ["temporal", "entity"],  # who was involved then
+        ]
+    )
 @dataclass
 class SeedNode:
     """An entry point node with its initial score."""
     node_id: str
     score: float  # initial mass (e.g., similarity score)
@@ -110,9 +111,10 @@ class SeedNode:
 # Core Algorithm
 # -----------------------------------------------------------------------------
 def mpfp_traverse(
-    seeds: List[SeedNode],
-    pattern: List[str],
+    seeds: list[SeedNode],
+    pattern: list[str],
     adjacency: TypedAdjacency,
     config: MPFPConfig,
 ) -> PatternResult:
@@ -131,20 +133,18 @@ def mpfp_traverse(
     if not seeds:
         return PatternResult(pattern=pattern, scores={})
-    scores: Dict[str, float] = {}
+    scores: dict[str, float] = {}
     # Initialize frontier with seed masses (normalized)
     total_seed_score = sum(s.score for s in seeds)
     if total_seed_score == 0:
         total_seed_score = len(seeds)  # fallback to uniform
-    frontier: Dict[str, float] = {
-        s.node_id: s.score / total_seed_score for s in seeds
-    }
+    frontier: dict[str, float] = {s.node_id: s.score / total_seed_score for s in seeds}
     # Follow pattern hop by hop
     for edge_type in pattern:
-        next_frontier: Dict[str, float] = {}
+        next_frontier: dict[str, float] = {}
         for node_id, mass in frontier.items():
             if mass < config.threshold:
@@ -155,15 +155,10 @@ def mpfp_traverse(
             # Push (1-α) to neighbors
             push_mass = (1 - config.alpha) * mass
-            neighbors = adjacency.get_normalized_neighbors(
-                edge_type, node_id, config.top_k_neighbors
-            )
+            neighbors = adjacency.get_normalized_neighbors(edge_type, node_id, config.top_k_neighbors)
             for neighbor in neighbors:
-                next_frontier[neighbor.node_id] = (
-                    next_frontier.get(neighbor.node_id, 0) +
-                    push_mass * neighbor.weight
-                )
+                next_frontier[neighbor.node_id] = next_frontier.get(neighbor.node_id, 0) + push_mass * neighbor.weight
         frontier = next_frontier
@@ -176,10 +171,10 @@ def mpfp_traverse(
 def rrf_fusion(
-    results: List[PatternResult],
+    results: list[PatternResult],
     k: int = 60,
     top_k: int = 50,
-) -> List[Tuple[str, float]]:
+) -> list[tuple[str, float]]:
     """
     Reciprocal Rank Fusion to combine pattern results.
@@ -191,28 +186,20 @@ def rrf_fusion(
     Returns:
         List of (node_id, fused_score) tuples, sorted by score descending
     """
-    fused: Dict[str, float] = {}
+    fused: dict[str, float] = {}
     for result in results:
         if not result.scores:
             continue
         # Rank nodes by their score in this pattern
-        ranked = sorted(
-            result.scores.keys(),
-            key=lambda n: result.scores[n],
-            reverse=True
-        )
+        ranked = sorted(result.scores.keys(), key=lambda n: result.scores[n], reverse=True)
         for rank, node_id in enumerate(ranked):
             fused[node_id] = fused.get(node_id, 0) + 1.0 / (k + rank + 1)
     # Sort by fused score and return top-k
-    sorted_results = sorted(
-        fused.items(),
-        key=lambda x: x[1],
-        reverse=True
-    )
+    sorted_results = sorted(fused.items(), key=lambda x: x[1], reverse=True)
     return sorted_results[:top_k]
@@ -221,6 +208,7 @@ def rrf_fusion(
 # Database Loading
 # -----------------------------------------------------------------------------
 async def load_typed_adjacency(pool, bank_id: str) -> TypedAdjacency:
     """
     Load all edges for a bank, split by edge type.
@@ -237,31 +225,27 @@ async def load_typed_adjacency(pool, bank_id: str) -> TypedAdjacency:
               AND ml.weight >= 0.1
             ORDER BY ml.from_unit_id, ml.weight DESC
             """,
-            bank_id
+            bank_id,
         )
-    graphs: Dict[str, Dict[str, List[EdgeTarget]]] = defaultdict(
-        lambda: defaultdict(list)
-    )
+    graphs: dict[str, dict[str, list[EdgeTarget]]] = defaultdict(lambda: defaultdict(list))
     for row in rows:
-        from_id = str(row['from_unit_id'])
-        to_id = str(row['to_unit_id'])
-        link_type = row['link_type']
-        weight = row['weight']
+        from_id = str(row["from_unit_id"])
+        to_id = str(row["to_unit_id"])
+        link_type = row["link_type"]
+        weight = row["weight"]
-        graphs[link_type][from_id].append(
-            EdgeTarget(node_id=to_id, weight=weight)
-        )
+        graphs[link_type][from_id].append(EdgeTarget(node_id=to_id, weight=weight))
     return TypedAdjacency(graphs=dict(graphs))
 async def fetch_memory_units_by_ids(
     pool,
-    node_ids: List[str],
+    node_ids: list[str],
     fact_type: str,
-) -> List[RetrievalResult]:
+) -> list[RetrievalResult]:
     """Fetch full memory unit details for a list of node IDs."""
     if not node_ids:
         return []
@@ -276,7 +260,7 @@ async def fetch_memory_units_by_ids(
               AND fact_type = $2
             """,
             node_ids,
-            fact_type
+            fact_type,
         )
     return [RetrievalResult.from_db_row(dict(r)) for r in rows]
@@ -286,6 +270,7 @@ async def fetch_memory_units_by_ids(
 # Graph Retriever Implementation
 # -----------------------------------------------------------------------------
 class MPFPGraphRetriever(GraphRetriever):
     """
     Graph retrieval using Meta-Path Forward Push.
@@ -294,7 +279,7 @@ class MPFPGraphRetriever(GraphRetriever):
     then fuses results via RRF.
     """
-    def __init__(self, config: Optional[MPFPConfig] = None):
+    def __init__(self, config: MPFPConfig | None = None):
         """
         Initialize MPFP retriever.
@@ -302,7 +287,7 @@ class MPFPGraphRetriever(GraphRetriever):
             config: Algorithm configuration (uses defaults if None)
         """
         self.config = config or MPFPConfig()
-        self._adjacency_cache: Dict[str, TypedAdjacency] = {}
+        self._adjacency_cache: dict[str, TypedAdjacency] = {}
     @property
     def name(self) -> str:
@@ -315,10 +300,10 @@ class MPFPGraphRetriever(GraphRetriever):
         bank_id: str,
         fact_type: str,
         budget: int,
-        query_text: Optional[str] = None,
-        semantic_seeds: Optional[List[RetrievalResult]] = None,
-        temporal_seeds: Optional[List[RetrievalResult]] = None,
-    ) -> List[RetrievalResult]:
+        query_text: str | None = None,
+        semantic_seeds: list[RetrievalResult] | None = None,
+        temporal_seeds: list[RetrievalResult] | None = None,
+    ) -> list[RetrievalResult]:
         """
         Retrieve facts using MPFP algorithm.
@@ -339,14 +324,12 @@ class MPFPGraphRetriever(GraphRetriever):
         adjacency = await load_typed_adjacency(pool, bank_id)
         # Convert seeds to SeedNode format
-        semantic_seed_nodes = self._convert_seeds(semantic_seeds, 'similarity')
-        temporal_seed_nodes = self._convert_seeds(temporal_seeds, 'temporal_score')
+        semantic_seed_nodes = self._convert_seeds(semantic_seeds, "similarity")
+        temporal_seed_nodes = self._convert_seeds(temporal_seeds, "temporal_score")
         # If no semantic seeds provided, fall back to finding our own
         if not semantic_seed_nodes:
-            semantic_seed_nodes = await self._find_semantic_seeds(
-                pool, query_embedding_str, bank_id, fact_type
-            )
+            semantic_seed_nodes = await self._find_semantic_seeds(pool, query_embedding_str, bank_id, fact_type)
         # Run all patterns in parallel
         tasks = []
@@ -407,9 +390,9 @@ class MPFPGraphRetriever(GraphRetriever):
     def _convert_seeds(
         self,
-        seeds: Optional[List[RetrievalResult]],
+        seeds: list[RetrievalResult] | None,
         score_attr: str,
-    ) -> List[SeedNode]:
+    ) -> list[SeedNode]:
         """Convert RetrievalResult seeds to SeedNode format."""
         if not seeds:
             return []
@@ -431,7 +414,7 @@ class MPFPGraphRetriever(GraphRetriever):
         fact_type: str,
         limit: int = 20,
         threshold: float = 0.3,
-    ) -> List[SeedNode]:
+    ) -> list[SeedNode]:
         """Fallback: find semantic seeds via embedding search."""
         async with acquire_with_retry(pool) as conn:
             rows = await conn.fetch(
@@ -445,10 +428,11 @@ class MPFPGraphRetriever(GraphRetriever):
                 ORDER BY embedding <=> $1::vector
                 LIMIT $5
                 """,
-                query_embedding_str, bank_id, fact_type, threshold, limit
+                query_embedding_str,
+                bank_id,
+                fact_type,
+                threshold,
+                limit,
             )
-        return [
-            SeedNode(node_id=str(r['id']), score=r['similarity'])
-            for r in rows
-        ]
+        return [SeedNode(node_id=str(r["id"]), score=r["similarity"]) for r in rows]

hindsight_api/engine/search/observation_utils.py CHANGED Viewed

@@ -6,7 +6,7 @@ about an entity, without personality influence.
 """
 import logging
-from typing import List, Dict, Any
 from pydantic import BaseModel, Field
 from ..response_models import MemoryFact
@@ -16,18 +16,17 @@ logger = logging.getLogger(__name__)
 class Observation(BaseModel):
     """An observation about an entity."""
     observation: str = Field(description="The observation text - a factual statement about the entity")
 class ObservationExtractionResponse(BaseModel):
     """Response containing extracted observations."""
-    observations: List[Observation] = Field(
-        default_factory=list,
-        description="List of observations about the entity"
-    )
+    observations: list[Observation] = Field(default_factory=list, description="List of observations about the entity")
-def format_facts_for_observation_prompt(facts: List[MemoryFact]) -> str:
+def format_facts_for_observation_prompt(facts: list[MemoryFact]) -> str:
     """Format facts as text for observation extraction prompt."""
     import json
@@ -35,9 +34,7 @@ def format_facts_for_observation_prompt(facts: List[MemoryFact]) -> str:
         return "[]"
     formatted = []
     for fact in facts:
-        fact_obj = {
-            "text": fact.text
-        }
+        fact_obj = {"text": fact.text}
         # Add context if available
         if fact.context:
@@ -92,11 +89,7 @@ def get_observation_system_message() -> str:
     return "You are an objective observer synthesizing facts about an entity. Generate clear, factual observations without opinions or personality influence. Be concise and accurate."
-async def extract_observations_from_facts(
-    llm_config,
-    entity_name: str,
-    facts: List[MemoryFact]
-) -> List[str]:
+async def extract_observations_from_facts(llm_config, entity_name: str, facts: list[MemoryFact]) -> list[str]:
     """
     Extract observations from facts about an entity using LLM.
@@ -118,10 +111,10 @@ async def extract_observations_from_facts(
         result = await llm_config.call(
             messages=[
                 {"role": "system", "content": get_observation_system_message()},
-                {"role": "user", "content": prompt}
+                {"role": "user", "content": prompt},
             ],
             response_format=ObservationExtractionResponse,
-            scope="memory_extract_observation"
+            scope="memory_extract_observation",
         )
         observations = [op.observation for op in result.observations]

hindsight_api/engine/search/reranking.py CHANGED Viewed

@@ -2,7 +2,6 @@
 Cross-encoder neural reranking for search results.
 """
-from typing import List
 from .types import MergedCandidate, ScoredResult
@@ -24,14 +23,11 @@ class CrossEncoderReranker:
         """
         if cross_encoder is None:
             from hindsight_api.engine.cross_encoder import create_cross_encoder_from_env
             cross_encoder = create_cross_encoder_from_env()
         self.cross_encoder = cross_encoder
-    def rerank(
-        self,
-        query: str,
-        candidates: List[MergedCandidate]
-    ) -> List[ScoredResult]:
+    def rerank(self, query: str, candidates: list[MergedCandidate]) -> list[ScoredResult]:
         """
         Rerank candidates using cross-encoder scores.
@@ -77,6 +73,7 @@ class CrossEncoderReranker:
         # Normalize scores using sigmoid to [0, 1] range
         # Cross-encoder returns logits which can be negative
         import numpy as np
         def sigmoid(x):
             return 1 / (1 + np.exp(-x))
@@ -89,7 +86,7 @@ class CrossEncoderReranker:
                 candidate=candidate,
                 cross_encoder_score=float(raw_score),
                 cross_encoder_score_normalized=float(norm_score),
-                weight=float(norm_score)  # Initial weight is just cross-encoder score
+                weight=float(norm_score),  # Initial weight is just cross-encoder score
             )
             scored_results.append(scored_result)

hindsight-api 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

hindsight-api 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl