PyPI - hindsight-api - Versions diffs - 0.0.13__py3-none-any.whl - Mend

hindsight-api 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

hindsight_api/__init__.py +38 -0
hindsight_api/api/__init__.py +105 -0
hindsight_api/api/http.py +1872 -0
hindsight_api/api/mcp.py +157 -0
hindsight_api/engine/__init__.py +47 -0
hindsight_api/engine/cross_encoder.py +97 -0
hindsight_api/engine/db_utils.py +93 -0
hindsight_api/engine/embeddings.py +113 -0
hindsight_api/engine/entity_resolver.py +575 -0
hindsight_api/engine/llm_wrapper.py +269 -0
hindsight_api/engine/memory_engine.py +3095 -0
hindsight_api/engine/query_analyzer.py +519 -0
hindsight_api/engine/response_models.py +222 -0
hindsight_api/engine/retain/__init__.py +50 -0
hindsight_api/engine/retain/bank_utils.py +423 -0
hindsight_api/engine/retain/chunk_storage.py +82 -0
hindsight_api/engine/retain/deduplication.py +104 -0
hindsight_api/engine/retain/embedding_processing.py +62 -0
hindsight_api/engine/retain/embedding_utils.py +54 -0
hindsight_api/engine/retain/entity_processing.py +90 -0
hindsight_api/engine/retain/fact_extraction.py +1027 -0
hindsight_api/engine/retain/fact_storage.py +176 -0
hindsight_api/engine/retain/link_creation.py +121 -0
hindsight_api/engine/retain/link_utils.py +651 -0
hindsight_api/engine/retain/orchestrator.py +405 -0
hindsight_api/engine/retain/types.py +206 -0
hindsight_api/engine/search/__init__.py +15 -0
hindsight_api/engine/search/fusion.py +122 -0
hindsight_api/engine/search/observation_utils.py +132 -0
hindsight_api/engine/search/reranking.py +103 -0
hindsight_api/engine/search/retrieval.py +503 -0
hindsight_api/engine/search/scoring.py +161 -0
hindsight_api/engine/search/temporal_extraction.py +64 -0
hindsight_api/engine/search/think_utils.py +255 -0
hindsight_api/engine/search/trace.py +215 -0
hindsight_api/engine/search/tracer.py +447 -0
hindsight_api/engine/search/types.py +160 -0
hindsight_api/engine/task_backend.py +223 -0
hindsight_api/engine/utils.py +203 -0
hindsight_api/metrics.py +227 -0
hindsight_api/migrations.py +163 -0
hindsight_api/models.py +309 -0
hindsight_api/pg0.py +425 -0
hindsight_api/web/__init__.py +12 -0
hindsight_api/web/server.py +143 -0
hindsight_api-0.0.13.dist-info/METADATA +41 -0
hindsight_api-0.0.13.dist-info/RECORD +48 -0
hindsight_api-0.0.13.dist-info/WHEEL +4 -0

hindsight_api/engine/search/temporal_extraction.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""
+Temporal extraction for time-aware search queries.
+Handles natural language temporal expressions using transformer-based query analysis.
+"""
+from typing import Optional, Tuple
+from datetime import datetime
+import logging
+from hindsight_api.engine.query_analyzer import QueryAnalyzer, DateparserQueryAnalyzer
+logger = logging.getLogger(__name__)
+# Global default analyzer instance
+# Can be overridden by passing a custom analyzer to extract_temporal_constraint
+_default_analyzer: Optional[QueryAnalyzer] = None
+def get_default_analyzer() -> QueryAnalyzer:
+    """
+    Get or create the default query analyzer.
+    Uses lazy initialization to avoid loading at import time.
+    Returns:
+        Default DateparserQueryAnalyzer instance
+    """
+    global _default_analyzer
+    if _default_analyzer is None:
+        _default_analyzer = DateparserQueryAnalyzer()
+    return _default_analyzer
+def extract_temporal_constraint(
+    query: str,
+    reference_date: Optional[datetime] = None,
+    analyzer: Optional[QueryAnalyzer] = None,
+) -> Optional[Tuple[datetime, datetime]]:
+    """
+    Extract temporal constraint from query.
+    Returns (start_date, end_date) tuple if temporal constraint found, else None.
+    Args:
+        query: Search query
+        reference_date: Reference date for relative terms (defaults to now)
+        analyzer: Custom query analyzer (defaults to DateparserQueryAnalyzer)
+    Returns:
+        (start_date, end_date) tuple or None
+    """
+    if analyzer is None:
+        analyzer = get_default_analyzer()
+    analysis = analyzer.analyze(query, reference_date)
+    if analysis.temporal_constraint:
+        result = (
+            analysis.temporal_constraint.start_date,
+            analysis.temporal_constraint.end_date
+        )
+        return result
+    return None

hindsight_api/engine/search/think_utils.py ADDED Viewed

@@ -0,0 +1,255 @@
+"""
+Think operation utilities for formulating answers based on agent and world facts.
+"""
+import asyncio
+import logging
+import re
+from datetime import datetime, timezone
+from typing import Dict, List, Any
+from pydantic import BaseModel, Field
+from ..response_models import ReflectResult, MemoryFact, PersonalityTraits
+logger = logging.getLogger(__name__)
+class Opinion(BaseModel):
+    """An opinion formed by the bank."""
+    opinion: str = Field(description="The opinion or perspective with reasoning included")
+    confidence: float = Field(description="Confidence score for this opinion (0.0 to 1.0, where 1.0 is very confident)")
+class OpinionExtractionResponse(BaseModel):
+    """Response containing extracted opinions."""
+    opinions: List[Opinion] = Field(
+        default_factory=list,
+        description="List of opinions formed with their supporting reasons and confidence scores"
+    )
+def describe_trait(name: str, value: float) -> str:
+    """Convert trait value to descriptive text."""
+    if value >= 0.8:
+        return f"very high {name}"
+    elif value >= 0.6:
+        return f"high {name}"
+    elif value >= 0.4:
+        return f"moderate {name}"
+    elif value >= 0.2:
+        return f"low {name}"
+    else:
+        return f"very low {name}"
+def build_personality_description(personality: PersonalityTraits) -> str:
+    """Build a personality description string from personality traits."""
+    return f"""Your personality traits:
+- {describe_trait('openness to new ideas', personality.openness)}
+- {describe_trait('conscientiousness and organization', personality.conscientiousness)}
+- {describe_trait('extraversion and sociability', personality.extraversion)}
+- {describe_trait('agreeableness and cooperation', personality.agreeableness)}
+- {describe_trait('emotional sensitivity', personality.neuroticism)}
+Personality influence strength: {int(personality.bias_strength * 100)}% (how much your personality shapes your opinions)"""
+def format_facts_for_prompt(facts: List[MemoryFact]) -> str:
+    """Format facts as JSON for LLM prompt."""
+    import json
+    if not facts:
+        return "[]"
+    formatted = []
+    for fact in facts:
+        fact_obj = {
+            "text": fact.text
+        }
+        # Add context if available
+        if fact.context:
+            fact_obj["context"] = fact.context
+        # Add occurred_start if available (when the fact occurred)
+        if fact.occurred_start:
+            occurred_start = fact.occurred_start
+            if isinstance(occurred_start, str):
+                fact_obj["occurred_start"] = occurred_start
+            elif isinstance(occurred_start, datetime):
+                fact_obj["occurred_start"] = occurred_start.strftime('%Y-%m-%d %H:%M:%S')
+        # Add activation if available
+        if fact.activation is not None:
+            fact_obj["score"] = fact.activation
+        formatted.append(fact_obj)
+    return json.dumps(formatted, indent=2)
+def build_think_prompt(
+    agent_facts_text: str,
+    world_facts_text: str,
+    opinion_facts_text: str,
+    query: str,
+    name: str,
+    personality: PersonalityTraits,
+    background: str,
+    context: str = None,
+) -> str:
+    """Build the think prompt for the LLM."""
+    personality_desc = build_personality_description(personality)
+    name_section = f"""
+Your name: {name}
+"""
+    background_section = ""
+    if background:
+        background_section = f"""
+Your background:
+{background}
+"""
+    context_section = ""
+    if context:
+        context_section = f"""
+ADDITIONAL CONTEXT:
+{context}
+"""
+    return f"""Here's what I know and have experienced:
+MY IDENTITY & EXPERIENCES:
+{agent_facts_text}
+WHAT I KNOW ABOUT THE WORLD:
+{world_facts_text}
+MY EXISTING OPINIONS & BELIEFS:
+{opinion_facts_text}
+{context_section}{name_section}{personality_desc}{background_section}
+QUESTION: {query}
+Based on everything I know, believe, and who I am (including my name, personality and background), here's what I genuinely think about this question. I'll draw on my experiences, knowledge, opinions, and personal traits to give you my honest perspective."""
+def get_system_message(personality: PersonalityTraits) -> str:
+    """Get the system message for the think LLM call."""
+    bias_strength = personality.bias_strength
+    if bias_strength >= 0.7:
+        personality_instruction = "Your personality strongly influences your thinking. Let your traits guide how you interpret facts and form opinions. Don't be afraid to be biased based on your personality."
+    elif bias_strength >= 0.4:
+        personality_instruction = "Your personality moderately influences your thinking. Balance your personal traits with objective analysis."
+    else:
+        personality_instruction = "Your personality has minimal influence on your thinking. Focus primarily on facts while keeping your traits in mind."
+    return f"You are a person with your own thoughts, experiences, opinions, and personality. When asked a question, respond naturally like a human would - share what you genuinely think based on what you know and have experienced. {personality_instruction} Be direct, express your views confidently, and use 'I think', 'I believe', 'in my view', etc. Respond in plain text without markdown formatting."
+async def extract_opinions_from_text(
+    llm_config,
+    text: str,
+    query: str
+) -> List[Opinion]:
+    """
+    Extract opinions with reasons and confidence from text using LLM.
+    Args:
+        llm_config: LLM configuration to use
+        text: Text to extract opinions from
+        query: The original query that prompted this response
+    Returns:
+        List of Opinion objects with text and confidence
+    """
+    extraction_prompt = f"""Extract any NEW opinions or perspectives from the answer below and rewrite them in FIRST-PERSON as if YOU are stating the opinion directly.
+ORIGINAL QUESTION:
+{query}
+ANSWER PROVIDED:
+{text}
+Your task: Find opinions in the answer and rewrite them AS IF YOU ARE THE ONE SAYING THEM.
+An opinion is a judgment, viewpoint, or conclusion that goes beyond just stating facts.
+IMPORTANT: Do NOT extract statements like:
+- "I don't have enough information"
+- "The facts don't contain information about X"
+- "I cannot answer because..."
+ONLY extract actual opinions about substantive topics.
+CRITICAL FORMAT REQUIREMENTS:
+1. **ALWAYS start with first-person phrases**: "I think...", "I believe...", "In my view...", "I've come to believe...", "Previously I thought... but now..."
+2. **NEVER use third-person**: Do NOT say "The speaker thinks..." or "They believe..." - always use "I"
+3. Include the reasoning naturally within the statement
+4. Provide a confidence score (0.0 to 1.0)
+CORRECT Examples (✓ FIRST-PERSON):
+- "I think Alice is more reliable because she consistently delivers on time and writes clean code"
+- "Previously I thought all engineers were equal, but now I feel that experience and track record really matter"
+- "I believe reliability is best measured by consistent output over time"
+- "I've come to believe that track records are more important than potential"
+WRONG Examples (✗ THIRD-PERSON - DO NOT USE):
+- "The speaker thinks Alice is more reliable"
+- "They believe reliability matters"
+- "It is believed that Alice is better"
+If no genuine opinions are expressed (e.g., the response just says "I don't know"), return an empty list."""
+    try:
+        result = await llm_config.call(
+            messages=[
+                {"role": "system", "content": "You are converting opinions from text into first-person statements. Always use 'I think', 'I believe', 'I feel', etc. NEVER use third-person like 'The speaker' or 'They'."},
+                {"role": "user", "content": extraction_prompt}
+            ],
+            response_format=OpinionExtractionResponse,
+            scope="memory_extract_opinion"
+        )
+        # Format opinions with confidence score and convert to first-person
+        formatted_opinions = []
+        for op in result.opinions:
+            # Convert third-person to first-person if needed
+            opinion_text = op.opinion
+            # Replace common third-person patterns with first-person
+            def singularize_verb(verb):
+                if verb.endswith('es'):
+                    return verb[:-1]  # believes -> believe
+                elif verb.endswith('s'):
+                    return verb[:-1]  # thinks -> think
+                return verb
+            # Pattern: "The speaker/user [verb]..." -> "I [verb]..."
+            match = re.match(r'^(The speaker|The user|They|It is believed) (believes?|thinks?|feels?|says|asserts?|considers?)(\s+that)?(.*)$', opinion_text, re.IGNORECASE)
+            if match:
+                verb = singularize_verb(match.group(2))
+                that_part = match.group(3) or ""  # Keep " that" if present
+                rest = match.group(4)
+                opinion_text = f"I {verb}{that_part}{rest}"
+            # If still doesn't start with first-person, prepend "I believe that "
+            first_person_starters = ["I think", "I believe", "I feel", "In my view", "I've come to believe", "Previously I"]
+            if not any(opinion_text.startswith(starter) for starter in first_person_starters):
+                opinion_text = "I believe that " + opinion_text[0].lower() + opinion_text[1:]
+            formatted_opinions.append(Opinion(
+                opinion=opinion_text,
+                confidence=op.confidence
+            ))
+        return formatted_opinions
+    except Exception as e:
+        logger.warning(f"Failed to extract opinions: {str(e)}")
+        return []

hindsight_api/engine/search/trace.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""
+Search trace models for debugging and visualization.
+These Pydantic models define the structure of search traces, capturing
+every step of the spreading activation search process for analysis.
+"""
+from datetime import datetime
+from typing import List, Optional, Dict, Any, Literal
+from pydantic import BaseModel, Field
+class QueryInfo(BaseModel):
+    """Information about the search query."""
+    query_text: str = Field(description="Original query text")
+    query_embedding: List[float] = Field(description="Generated query embedding vector")
+    timestamp: datetime = Field(description="When the query was executed")
+    budget: int = Field(description="Maximum nodes to explore")
+    max_tokens: int = Field(description="Maximum tokens to return in results")
+class EntryPoint(BaseModel):
+    """An entry point node selected for search."""
+    node_id: str = Field(description="Memory unit ID")
+    text: str = Field(description="Memory unit text content")
+    similarity_score: float = Field(description="Cosine similarity to query", ge=0.0, le=1.0)
+    rank: int = Field(description="Rank among entry points (1-based)")
+class WeightComponents(BaseModel):
+    """Breakdown of weight calculation components."""
+    activation: float = Field(description="Activation from spreading (can exceed 1.0 through accumulation)", ge=0.0)
+    semantic_similarity: float = Field(description="Semantic similarity to query", ge=0.0, le=1.0)
+    recency: float = Field(description="Recency weight", ge=0.0, le=1.0)
+    frequency: float = Field(description="Normalized frequency weight", ge=0.0, le=1.0)
+    final_weight: float = Field(description="Combined final weight")
+    # Weight formula components (for transparency)
+    activation_contribution: float = Field(description="0.3 * activation")
+    semantic_contribution: float = Field(description="0.3 * semantic_similarity")
+    recency_contribution: float = Field(description="0.25 * recency")
+    frequency_contribution: float = Field(description="0.15 * frequency")
+class LinkInfo(BaseModel):
+    """Information about a link to a neighbor."""
+    to_node_id: str = Field(description="Target node ID")
+    link_type: Literal["temporal", "semantic", "entity"] = Field(description="Type of link")
+    link_weight: float = Field(description="Weight of the link (can exceed 1.0 when aggregating multiple connections)", ge=0.0)
+    entity_id: Optional[str] = Field(default=None, description="Entity ID if link_type is 'entity'")
+    new_activation: Optional[float] = Field(default=None, description="Activation that would be passed to neighbor (None for supplementary links)")
+    followed: bool = Field(description="Whether this link was followed (or pruned)")
+    prune_reason: Optional[str] = Field(default=None, description="Why link was not followed (if not followed)")
+    is_supplementary: bool = Field(default=False, description="Whether this is a supplementary link (multiple connections to same node)")
+class NodeVisit(BaseModel):
+    """Information about visiting a node during search."""
+    step: int = Field(description="Step number in search (1-based)")
+    node_id: str = Field(description="Memory unit ID")
+    text: str = Field(description="Memory unit text content")
+    context: str = Field(description="Memory unit context")
+    event_date: Optional[datetime] = Field(default=None, description="When the memory occurred")
+    access_count: int = Field(description="Number of times accessed before this search")
+    # How this node was reached
+    is_entry_point: bool = Field(description="Whether this is an entry point")
+    parent_node_id: Optional[str] = Field(default=None, description="Node that led to this one")
+    link_type: Optional[Literal["temporal", "semantic", "entity"]] = Field(default=None, description="Type of link from parent")
+    link_weight: Optional[float] = Field(default=None, description="Weight of link from parent")
+    # Weights
+    weights: WeightComponents = Field(description="Weight calculation breakdown")
+    # Neighbors discovered from this node
+    neighbors_explored: List[LinkInfo] = Field(default_factory=list, description="Links explored from this node")
+    # Ranking
+    final_rank: Optional[int] = Field(default=None, description="Final rank in results (1-based, None if not in top-k)")
+class PruningDecision(BaseModel):
+    """Records when a node was considered but not visited."""
+    node_id: str = Field(description="Node that was pruned")
+    reason: Literal["already_visited", "activation_too_low", "budget_exhausted"] = Field(description="Why it was pruned")
+    activation: float = Field(description="Activation value when pruned")
+    would_have_been_step: int = Field(description="What step it would have been if visited")
+class SearchPhaseMetrics(BaseModel):
+    """Performance metrics for a search phase."""
+    phase_name: str = Field(description="Name of the phase")
+    duration_seconds: float = Field(description="Time taken in seconds")
+    details: Dict[str, Any] = Field(default_factory=dict, description="Additional phase-specific metrics")
+class RetrievalResult(BaseModel):
+    """A single result from a retrieval method."""
+    rank: int = Field(description="Rank in this retrieval method (1-based)")
+    node_id: str = Field(description="Memory unit ID")
+    text: str = Field(description="Memory unit text content")
+    context: str = Field(default="", description="Memory unit context")
+    event_date: Optional[datetime] = Field(default=None, description="When the memory occurred")
+    fact_type: Optional[str] = Field(default=None, description="Fact type (world, bank, opinion)")
+    score: float = Field(description="Score from this retrieval method")
+    score_name: str = Field(description="Name of the score (e.g., 'similarity', 'bm25_score', 'activation')")
+class RetrievalMethodResults(BaseModel):
+    """Results from a single retrieval method."""
+    method_name: Literal["semantic", "bm25", "graph", "temporal"] = Field(description="Name of retrieval method")
+    results: List[RetrievalResult] = Field(description="Retrieved results with ranks")
+    duration_seconds: float = Field(description="Time taken for this retrieval")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Method-specific metadata")
+class RRFMergeResult(BaseModel):
+    """A result after RRF merging."""
+    node_id: str = Field(description="Memory unit ID")
+    text: str = Field(description="Memory unit text content")
+    rrf_score: float = Field(description="Reciprocal Rank Fusion score")
+    source_ranks: Dict[str, int] = Field(description="Rank in each source that contributed (method_name -> rank)")
+    final_rrf_rank: int = Field(description="Rank after RRF merge (1-based)")
+class RerankedResult(BaseModel):
+    """A result after reranking."""
+    node_id: str = Field(description="Memory unit ID")
+    text: str = Field(description="Memory unit text content")
+    rerank_score: float = Field(description="Final reranking score")
+    rerank_rank: int = Field(description="Rank after reranking (1-based)")
+    rrf_rank: int = Field(description="Original RRF rank before reranking")
+    rank_change: int = Field(description="Change in rank (positive = moved up)")
+    score_components: Dict[str, float] = Field(default_factory=dict, description="Score breakdown")
+class SearchSummary(BaseModel):
+    """Summary statistics about the search."""
+    total_nodes_visited: int = Field(description="Total nodes visited")
+    total_nodes_pruned: int = Field(description="Total nodes pruned")
+    entry_points_found: int = Field(description="Number of entry points")
+    budget_used: int = Field(description="How much budget was used")
+    budget_remaining: int = Field(description="How much budget remained")
+    total_duration_seconds: float = Field(description="Total search duration")
+    results_returned: int = Field(description="Number of results returned")
+    # Link statistics
+    temporal_links_followed: int = Field(default=0, description="Temporal links followed")
+    semantic_links_followed: int = Field(default=0, description="Semantic links followed")
+    entity_links_followed: int = Field(default=0, description="Entity links followed")
+    # Phase timings
+    phase_metrics: List[SearchPhaseMetrics] = Field(default_factory=list, description="Metrics for each phase")
+class SearchTrace(BaseModel):
+    """Complete trace of a search operation."""
+    query: QueryInfo = Field(description="Query information")
+    # New 4-way retrieval architecture
+    retrieval_results: List[RetrievalMethodResults] = Field(default_factory=list, description="Results from each retrieval method")
+    rrf_merged: List[RRFMergeResult] = Field(default_factory=list, description="Results after RRF merging")
+    reranked: List[RerankedResult] = Field(default_factory=list, description="Results after reranking")
+    # Legacy fields (kept for backward compatibility with graph/temporal visualizations)
+    entry_points: List[EntryPoint] = Field(default_factory=list, description="Entry points selected for search (legacy)")
+    visits: List[NodeVisit] = Field(default_factory=list, description="All nodes visited during search (legacy, for graph viz)")
+    pruned: List[PruningDecision] = Field(default_factory=list, description="Nodes that were pruned (legacy)")
+    summary: SearchSummary = Field(description="Summary statistics")
+    # Final results (for comparison with visits)
+    final_results: List[Dict[str, Any]] = Field(description="Final ranked results returned to user")
+    model_config = {
+        "json_encoders": {
+            datetime: lambda v: v.isoformat()
+        }
+    }
+    def to_json(self, **kwargs) -> str:
+        """Export trace as JSON string."""
+        return self.model_dump_json(indent=2, **kwargs)
+    def to_dict(self) -> dict:
+        """Export trace as dictionary."""
+        return self.model_dump()
+    def get_visit_by_node_id(self, node_id: str) -> Optional[NodeVisit]:
+        """Find a visit by node ID."""
+        for visit in self.visits:
+            if visit.node_id == node_id:
+                return visit
+        return None
+    def get_search_path_to_node(self, node_id: str) -> List[NodeVisit]:
+        """Get the path from entry point to a specific node."""
+        path = []
+        current_visit = self.get_visit_by_node_id(node_id)
+        while current_visit:
+            path.insert(0, current_visit)
+            if current_visit.parent_node_id:
+                current_visit = self.get_visit_by_node_id(current_visit.parent_node_id)
+            else:
+                break
+        return path
+    def get_nodes_by_link_type(self, link_type: Literal["temporal", "semantic", "entity"]) -> List[NodeVisit]:
+        """Get all nodes reached via a specific link type."""
+        return [v for v in self.visits if v.link_type == link_type]
+    def get_entry_point_nodes(self) -> List[NodeVisit]:
+        """Get all entry point visits."""
+        return [v for v in self.visits if v.is_entry_point]