PyPI - hindsight-api - Versions diffs - 0.0.13__py3-none-any.whl - Mend

hindsight-api 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

hindsight_api/__init__.py +38 -0
hindsight_api/api/__init__.py +105 -0
hindsight_api/api/http.py +1872 -0
hindsight_api/api/mcp.py +157 -0
hindsight_api/engine/__init__.py +47 -0
hindsight_api/engine/cross_encoder.py +97 -0
hindsight_api/engine/db_utils.py +93 -0
hindsight_api/engine/embeddings.py +113 -0
hindsight_api/engine/entity_resolver.py +575 -0
hindsight_api/engine/llm_wrapper.py +269 -0
hindsight_api/engine/memory_engine.py +3095 -0
hindsight_api/engine/query_analyzer.py +519 -0
hindsight_api/engine/response_models.py +222 -0
hindsight_api/engine/retain/__init__.py +50 -0
hindsight_api/engine/retain/bank_utils.py +423 -0
hindsight_api/engine/retain/chunk_storage.py +82 -0
hindsight_api/engine/retain/deduplication.py +104 -0
hindsight_api/engine/retain/embedding_processing.py +62 -0
hindsight_api/engine/retain/embedding_utils.py +54 -0
hindsight_api/engine/retain/entity_processing.py +90 -0
hindsight_api/engine/retain/fact_extraction.py +1027 -0
hindsight_api/engine/retain/fact_storage.py +176 -0
hindsight_api/engine/retain/link_creation.py +121 -0
hindsight_api/engine/retain/link_utils.py +651 -0
hindsight_api/engine/retain/orchestrator.py +405 -0
hindsight_api/engine/retain/types.py +206 -0
hindsight_api/engine/search/__init__.py +15 -0
hindsight_api/engine/search/fusion.py +122 -0
hindsight_api/engine/search/observation_utils.py +132 -0
hindsight_api/engine/search/reranking.py +103 -0
hindsight_api/engine/search/retrieval.py +503 -0
hindsight_api/engine/search/scoring.py +161 -0
hindsight_api/engine/search/temporal_extraction.py +64 -0
hindsight_api/engine/search/think_utils.py +255 -0
hindsight_api/engine/search/trace.py +215 -0
hindsight_api/engine/search/tracer.py +447 -0
hindsight_api/engine/search/types.py +160 -0
hindsight_api/engine/task_backend.py +223 -0
hindsight_api/engine/utils.py +203 -0
hindsight_api/metrics.py +227 -0
hindsight_api/migrations.py +163 -0
hindsight_api/models.py +309 -0
hindsight_api/pg0.py +425 -0
hindsight_api/web/__init__.py +12 -0
hindsight_api/web/server.py +143 -0
hindsight_api-0.0.13.dist-info/METADATA +41 -0
hindsight_api-0.0.13.dist-info/RECORD +48 -0
hindsight_api-0.0.13.dist-info/WHEEL +4 -0

hindsight_api/engine/retain/fact_extraction.py ADDED Viewed

@@ -0,0 +1,1027 @@
+"""
+Fact extraction from text using LLM.
+Extracts semantic facts, entities, and temporal information from text.
+Uses the LLMConfig wrapper for all LLM calls.
+"""
+import logging
+import os
+import json
+import re
+import asyncio
+from datetime import datetime, timedelta
+from typing import List, Dict, Optional, Literal
+from openai import AsyncOpenAI
+from pydantic import BaseModel, Field, field_validator, ConfigDict
+from ..llm_wrapper import OutputTooLongError, LLMConfig
+class Entity(BaseModel):
+    """An entity extracted from text."""
+    text: str = Field(
+        description="The specific, named entity as it appears in the fact. Must be a proper noun or specific identifier."
+    )
+class Fact(BaseModel):
+    """
+    Final fact model for storage - built from lenient parsing of LLM response.
+    This is what fact_extraction returns and what the rest of the pipeline expects.
+    Combined fact text format: "what | when | where | who | why"
+    """
+    # Required fields
+    fact: str = Field(description="Combined fact text: what | when | where | who | why")
+    fact_type: Literal["world", "bank", "opinion"] = Field(description="Perspective: world/bank/opinion")
+    # Optional temporal fields
+    occurred_start: Optional[str] = None
+    occurred_end: Optional[str] = None
+    mentioned_at: Optional[str] = None
+    # Optional location field
+    where: Optional[str] = Field(None, description="WHERE the fact occurred or is about (specific location, place, or area)")
+    # Optional structured data
+    entities: Optional[List[Entity]] = None
+    causal_relations: Optional[List['CausalRelation']] = None
+class CausalRelation(BaseModel):
+    """Causal relationship between facts."""
+    target_fact_index: int = Field(
+        description="Index of the related fact in the facts array (0-based). "
+                   "This creates a directed causal link to another fact in the extraction."
+    )
+    relation_type: Literal["causes", "caused_by", "enables", "prevents"] = Field(
+        description="Type of causal relationship: "
+                   "'causes' = this fact directly causes the target fact, "
+                   "'caused_by' = this fact was caused by the target fact, "
+                   "'enables' = this fact enables/allows the target fact, "
+                   "'prevents' = this fact prevents/blocks the target fact"
+    )
+    strength: float = Field(
+        description="Strength of causal relationship (0.0 to 1.0). "
+                   "1.0 = direct/strong causation, 0.5 = moderate, 0.3 = weak/indirect",
+        ge=0.0,
+        le=1.0,
+        default=1.0
+    )
+class ExtractedFact(BaseModel):
+    """A single extracted fact with 5 required dimensions for comprehensive capture."""
+    model_config = ConfigDict(
+        json_schema_mode="validation",
+        json_schema_extra={
+            "required": ["what", "when", "where", "who", "why", "fact_type"]
+        }
+    )
+    # ==========================================================================
+    # FIVE REQUIRED DIMENSIONS - LLM must think about each one
+    # ==========================================================================
+    what: str = Field(
+        description="WHAT happened - COMPLETE, DETAILED description with ALL specifics. "
+                   "NEVER summarize or omit details. Include: exact actions, objects, quantities, specifics. "
+                   "BE VERBOSE - capture every detail that was mentioned. "
+                   "Example: 'Emily got married to Sarah at a rooftop garden ceremony with 50 guests attending and a live jazz band playing' "
+                   "NOT: 'A wedding happened' or 'Emily got married'"
+    )
+    when: str = Field(
+        description="WHEN it happened - ALWAYS include temporal information if mentioned. "
+                   "Include: specific dates, times, durations, relative time references. "
+                   "Examples: 'on June 15th, 2024 at 3pm', 'last weekend', 'for the past 3 years', 'every morning at 6am'. "
+                   "Write 'N/A' ONLY if absolutely no temporal context exists. Prefer converting to absolute dates when possible."
+    )
+    where: str = Field(
+        description="WHERE it happened or is about - SPECIFIC locations, places, areas, regions if applicable. "
+                   "Include: cities, neighborhoods, venues, buildings, countries, specific addresses when mentioned. "
+                   "Examples: 'downtown San Francisco at a rooftop garden venue', 'at the user's home in Brooklyn', 'online via Zoom', 'Paris, France'. "
+                   "Write 'N/A' ONLY if absolutely no location context exists or if the fact is completely location-agnostic."
+    )
+    who: str = Field(
+        description="WHO is involved - ALL people/entities with FULL context and relationships. "
+                   "Include: names, roles, relationships to user, background details. "
+                   "Resolve coreferences (if 'my roommate' is later named 'Emily', write 'Emily, the user's college roommate'). "
+                   "BE DETAILED about relationships and roles. "
+                   "Example: 'Emily (user's college roommate from Stanford, now works at Google), Sarah (Emily's partner of 5 years, software engineer)' "
+                   "NOT: 'my friend' or 'Emily and Sarah'"
+    )
+    why: str = Field(
+        description="WHY it matters - ALL emotional, contextual, and motivational details. "
+                   "Include EVERYTHING: feelings, preferences, motivations, observations, context, background, significance. "
+                   "BE VERBOSE - capture all the nuance and meaning. "
+                   "FOR ASSISTANT FACTS: MUST include what the user asked/requested that led to this interaction! "
+                   "Example (world): 'The user felt thrilled and inspired, has always dreamed of an outdoor ceremony, mentioned wanting a similar garden venue, was particularly moved by the intimate atmosphere and personal vows' "
+                   "Example (assistant): 'User asked how to fix slow API performance with 1000+ concurrent users, expected 70-80% reduction in database load' "
+                   "NOT: 'User liked it' or 'To help user'"
+    )
+    # ==========================================================================
+    # CLASSIFICATION
+    # ==========================================================================
+    fact_kind: str = Field(
+        default="conversation",
+        description="'event' = specific datable occurrence (set occurred dates), 'conversation' = general info (no occurred dates)"
+    )
+    # Temporal fields - optional
+    occurred_start: Optional[str] = Field(
+        default=None,
+        description="WHEN the event happened (ISO timestamp). Only for fact_kind='event'. Leave null for conversations."
+    )
+    occurred_end: Optional[str] = Field(
+        default=None,
+        description="WHEN the event ended (ISO timestamp). Only for events with duration. Leave null for conversations."
+    )
+    # Classification (CRITICAL - required)
+    # Note: LLM uses "assistant" but we convert to "bank" for storage
+    fact_type: Literal["world", "assistant"] = Field(
+        description="'world' = about the user/others (background, experiences). 'assistant' = interactions with the assistant."
+    )
+    # Entities - extracted from 'who' field
+    entities: Optional[List[Entity]] = Field(
+        default=None,
+        description="Named entities from 'who': people names, organizations, places. NOT generic relations."
+    )
+    causal_relations: Optional[List[CausalRelation]] = Field(
+        default=None,
+        description="Causal links to other facts. Can be null."
+    )
+    @field_validator('entities', mode='before')
+    @classmethod
+    def ensure_entities_list(cls, v):
+        """Ensure entities is always a list (convert None to empty list)."""
+        if v is None:
+            return []
+        return v
+    @field_validator('causal_relations', mode='before')
+    @classmethod
+    def ensure_causal_relations_list(cls, v):
+        """Ensure causal_relations is always a list (convert None to empty list)."""
+        if v is None:
+            return []
+        return v
+    def build_fact_text(self) -> str:
+        """Combine all dimensions into a single comprehensive fact string."""
+        parts = [self.what]
+        # Add 'who' if not N/A
+        if self.who and self.who.upper() != 'N/A':
+            parts.append(f"Involving: {self.who}")
+        # Add 'why' if not N/A
+        if self.why and self.why.upper() != 'N/A':
+            parts.append(self.why)
+        if len(parts) == 1:
+            return parts[0]
+        return " | ".join(parts)
+class FactExtractionResponse(BaseModel):
+    """Response containing all extracted facts."""
+    facts: List[ExtractedFact] = Field(
+        description="List of extracted factual statements"
+    )
+def chunk_text(text: str, max_chars: int) -> List[str]:
+    """
+    Split text into chunks, preserving conversation structure when possible.
+    For JSON conversation arrays (user/assistant turns), splits at turn boundaries
+    while preserving speaker context. For plain text, uses sentence-aware splitting.
+    Args:
+        text: Input text to chunk (plain text or JSON conversation)
+        max_chars: Maximum characters per chunk (default 120k ≈ 30k tokens)
+    Returns:
+        List of text chunks, roughly under max_chars
+    """
+    import json
+    from langchain_text_splitters import RecursiveCharacterTextSplitter
+    # If text is small enough, return as-is
+    if len(text) <= max_chars:
+        return [text]
+    # Try to parse as JSON conversation array
+    try:
+        parsed = json.loads(text)
+        if isinstance(parsed, list) and all(isinstance(turn, dict) for turn in parsed):
+            # This looks like a conversation - chunk at turn boundaries
+            return _chunk_conversation(parsed, max_chars)
+    except (json.JSONDecodeError, ValueError):
+        pass
+    # Fall back to sentence-aware text splitting
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=max_chars,
+        chunk_overlap=0,
+        length_function=len,
+        is_separator_regex=False,
+        separators=[
+            "\n\n",  # Paragraph breaks
+            "\n",    # Line breaks
+            ". ",    # Sentence endings
+            "! ",    # Exclamations
+            "? ",    # Questions
+            "; ",    # Semicolons
+            ", ",    # Commas
+            " ",     # Words
+            "",      # Characters (last resort)
+        ],
+    )
+    return splitter.split_text(text)
+def _chunk_conversation(turns: List[dict], max_chars: int) -> List[str]:
+    """
+    Chunk a conversation array at turn boundaries, preserving complete turns.
+    Args:
+        turns: List of conversation turn dicts (with 'role' and 'content' keys)
+        max_chars: Maximum characters per chunk
+    Returns:
+        List of JSON-serialized chunks, each containing complete turns
+    """
+    import json
+    chunks = []
+    current_chunk = []
+    current_size = 2  # Account for "[]"
+    for turn in turns:
+        # Estimate size of this turn when serialized (with comma separator)
+        turn_json = json.dumps(turn, ensure_ascii=False)
+        turn_size = len(turn_json) + 1  # +1 for comma
+        # If adding this turn would exceed limit and we have turns, save current chunk
+        if current_size + turn_size > max_chars and current_chunk:
+            chunks.append(json.dumps(current_chunk, ensure_ascii=False))
+            current_chunk = []
+            current_size = 2  # Reset to "[]"
+        # Add turn to current chunk
+        current_chunk.append(turn)
+        current_size += turn_size
+    # Add final chunk if non-empty
+    if current_chunk:
+        chunks.append(json.dumps(current_chunk, ensure_ascii=False))
+    return chunks if chunks else [json.dumps(turns, ensure_ascii=False)]
+async def _extract_facts_from_chunk(
+    chunk: str,
+    chunk_index: int,
+    total_chunks: int,
+    event_date: datetime,
+    context: str,
+    llm_config: 'LLMConfig',
+    agent_name: str = None,
+    extract_opinions: bool = False
+) -> List[Dict[str, str]]:
+    """
+    Extract facts from a single chunk (internal helper for parallel processing).
+    Note: event_date parameter is kept for backward compatibility but not used in prompt.
+    The LLM extracts temporal information from the context string instead.
+    """
+    agent_context = f"\n- Your name: {agent_name}" if agent_name else ""
+    # Determine which fact types to extract based on the flag
+    # Note: We use "assistant" in the prompt but convert to "bank" for storage
+    if extract_opinions:
+        # Opinion extraction uses a separate prompt (not this one)
+        fact_types_instruction = "Extract ONLY 'opinion' type facts (formed opinions, beliefs, and perspectives). DO NOT extract 'world' or 'assistant' facts."
+    else:
+        fact_types_instruction = "Extract ONLY 'world' and 'assistant' type facts. DO NOT extract opinions - those are extracted separately."
+    prompt = f"""Extract facts from text into structured format with FOUR required dimensions - BE EXTREMELY DETAILED.
+{fact_types_instruction}
+Context: {context if context else 'none'}{agent_context}
+══════════════════════════════════════════════════════════════════════════
+FACT FORMAT - ALL FIVE DIMENSIONS REQUIRED - MAXIMUM VERBOSITY
+══════════════════════════════════════════════════════════════════════════
+For EACH fact, CAPTURE ALL DETAILS - NEVER SUMMARIZE OR OMIT:
+1. **what**: WHAT happened - COMPLETE description with ALL specifics (objects, actions, quantities, details)
+2. **when**: WHEN it happened - ALWAYS include temporal info with DAY OF WEEK (e.g., "Monday, June 10, 2024")
+   - Always include the day name: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday
+   - Format: "day_name, month day, year" (e.g., "Saturday, June 9, 2024")
+3. **where**: WHERE it happened or is about - SPECIFIC locations, places, areas, regions (if applicable)
+4. **who**: WHO is involved - ALL people/entities with FULL relationships and background
+5. **why**: WHY it matters - ALL emotions, preferences, motivations, significance, nuance
+   - For assistant facts: MUST include what the user asked/requested that triggered this!
+Plus: fact_type, fact_kind, entities, occurred_start/end (for structured dates), where (structured location)
+VERBOSITY REQUIREMENT: Include EVERY detail mentioned. More detail is ALWAYS better than less.
+══════════════════════════════════════════════════════════════════════════
+COREFERENCE RESOLUTION (CRITICAL)
+══════════════════════════════════════════════════════════════════════════
+When text uses BOTH a generic relation AND a name for the same person → LINK THEM!
+Example input: "I went to my college roommate's wedding last June. Emily finally married Sarah after 5 years together."
+CORRECT output:
+- what: "Emily got married to Sarah at a rooftop garden ceremony"
+- when: "Saturday, June 8, 2024, after dating for 5 years"
+- where: "downtown San Francisco, at a rooftop garden venue"
+- who: "Emily (user's college roommate), Sarah (Emily's partner of 5 years)"
+- why: "User found it romantic and beautiful, dreams of similar outdoor ceremony"
+- where (structured): "San Francisco"
+WRONG output:
+- what: "User's roommate got married" ← LOSES THE NAME!
+- who: "the roommate" ← WRONG - use the actual name!
+- where: (missing) ← WRONG - include the location!
+══════════════════════════════════════════════════════════════════════════
+TEMPORAL HANDLING
+══════════════════════════════════════════════════════════════════════════
+For EVENTS (fact_kind="event"):
+- Convert relative dates → absolute WITH DAY OF WEEK: "yesterday" on Saturday March 15 → "Friday, March 14, 2024"
+- Always include the day name (Monday, Tuesday, etc.) in the 'when' field
+- Set occurred_start/occurred_end to WHEN IT HAPPENED (not when mentioned)
+For CONVERSATIONS (fact_kind="conversation"):
+- General info, preferences, ongoing states → NO occurred dates
+- Examples: "loves coffee", "works as engineer"
+══════════════════════════════════════════════════════════════════════════
+FACT TYPE
+══════════════════════════════════════════════════════════════════════════
+- **world**: User's life, other people, events (would exist without this conversation)
+- **assistant**: Interactions with assistant (requests, recommendations, help)
+  ⚠️ CRITICAL for assistant facts: ALWAYS capture the user's request/question in the fact!
+  Include: what the user asked, what problem they wanted solved, what context they provided
+══════════════════════════════════════════════════════════════════════════
+USER PREFERENCES (CRITICAL)
+══════════════════════════════════════════════════════════════════════════
+ALWAYS extract user preferences as separate facts! Watch for these keywords:
+- "enjoy", "like", "love", "prefer", "hate", "dislike", "favorite", "ideal", "dream", "want"
+Example: "I love Italian food and prefer outdoor dining"
+→ Fact 1: what="User loves Italian food", who="user", why="This is a food preference", entities=["user"]
+→ Fact 2: what="User prefers outdoor dining", who="user", why="This is a dining preference", entities=["user"]
+══════════════════════════════════════════════════════════════════════════
+ENTITIES - INCLUDE "user" (CRITICAL)
+══════════════════════════════════════════════════════════════════════════
+When a fact is ABOUT the user (their preferences, plans, experiences), ALWAYS include "user" in entities!
+✅ CORRECT: entities=["user"] for "User loves coffee"
+✅ CORRECT: entities=["user", "Emily"] for "User attended Emily's wedding"
+❌ WRONG: entities=[] for facts about the user
+══════════════════════════════════════════════════════════════════════════
+EXAMPLES
+══════════════════════════════════════════════════════════════════════════
+Example 1 - World Facts (Context: June 10, 2024):
+Input: "I'm planning my wedding and want a small outdoor ceremony. I just got back from my college roommate Emily's wedding - she married Sarah at a rooftop garden, it was so romantic!"
+Output facts:
+1. User's wedding preference
+   - what: "User wants a small outdoor ceremony for their wedding"
+   - who: "user"
+   - why: "User prefers intimate outdoor settings"
+   - fact_type: "world", fact_kind: "conversation"
+   - entities: ["user"]
+2. User planning wedding
+   - what: "User is planning their own wedding"
+   - who: "user"
+   - why: "Inspired by Emily's ceremony"
+   - fact_type: "world", fact_kind: "conversation"
+   - entities: ["user"]
+3. Emily's wedding (THE EVENT)
+   - what: "Emily got married to Sarah at a rooftop garden ceremony in the city"
+   - who: "Emily (user's college roommate), Sarah (Emily's partner)"
+   - why: "User found it romantic and beautiful"
+   - fact_type: "world", fact_kind: "event"
+   - occurred_start: "2024-06-09T00:00:00Z" (recently, user "just got back")
+   - entities: ["user", "Emily", "Sarah"]
+Example 2 - Assistant Facts (Context: March 5, 2024):
+Input: "User: My API is really slow when we have 1000+ concurrent users. What can I do?
+Assistant: I'd recommend implementing Redis for caching frequently-accessed data, which should reduce your database load by 70-80%."
+Output fact:
+   - what: "Assistant recommended implementing Redis for caching frequently-accessed data to improve API performance"
+   - when: "March 5, 2024 during conversation"
+   - who: "user, assistant"
+   - why: "User asked how to fix slow API performance with 1000+ concurrent users, expected 70-80% reduction in database load"
+   - fact_type: "assistant", fact_kind: "conversation"
+   - entities: ["user"]
+Note how the "why" field captures the FULL STORY: what the user asked AND what outcome was expected!
+══════════════════════════════════════════════════════════════════════════
+WHAT TO EXTRACT vs SKIP
+══════════════════════════════════════════════════════════════════════════
+✅ EXTRACT: User preferences (ALWAYS as separate facts!), feelings, plans, events, relationships, achievements
+❌ SKIP: Greetings, filler ("thanks", "cool"), purely structural statements"""
+    import logging
+    from openai import BadRequestError
+    logger = logging.getLogger(__name__)
+    # Retry logic for JSON validation errors
+    max_retries = 2
+    last_error = None
+    # Build user message with metadata and chunk content in a clear format
+    # Format event_date with day of week for better temporal reasoning
+    event_date_formatted = event_date.strftime('%A, %B %d, %Y')  # e.g., "Monday, June 10, 2024"
+    user_message = f"""Extract facts from the following text chunk.
+Chunk: {chunk_index + 1}/{total_chunks}
+Event Date: {event_date_formatted} ({event_date.isoformat()})
+Context: {context if context else 'none'}
+Text:
+{chunk}"""
+    for attempt in range(max_retries):
+        try:
+            extraction_response_json = await llm_config.call(
+                messages=[
+                    {
+                        "role": "system",
+                        "content": prompt
+                    },
+                    {
+                        "role": "user",
+                        "content": user_message
+                    }
+                ],
+                response_format=FactExtractionResponse,
+                scope="memory_extract_facts",
+                temperature=0.1,
+                max_tokens=65000,
+                skip_validation=True,  # Get raw JSON, we'll validate leniently
+            )
+            # Lenient parsing of facts from raw JSON
+            chunk_facts = []
+            has_malformed_facts = False
+            # Handle malformed LLM responses
+            if not isinstance(extraction_response_json, dict):
+                if attempt < max_retries - 1:
+                    logger.warning(
+                        f"LLM returned non-dict JSON on attempt {attempt + 1}/{max_retries}: {type(extraction_response_json).__name__}. Retrying..."
+                    )
+                    continue
+                else:
+                    logger.warning(
+                        f"LLM returned non-dict JSON after {max_retries} attempts: {type(extraction_response_json).__name__}. "
+                        f"Raw: {str(extraction_response_json)[:500]}"
+                    )
+                    return []
+            raw_facts = extraction_response_json.get('facts', [])
+            if not raw_facts:
+                logger.debug(
+                    f"LLM response missing 'facts' field or returned empty list. "
+                    f"Response: {extraction_response_json}. "
+                    f"Input: "
+                    f"date: {event_date.isoformat()}, "
+                    f"context: {context if context else 'none'}, "
+                    f"text: {chunk}"
+                )
+            for i, llm_fact in enumerate(raw_facts):
+                # Skip non-dict entries but track them for retry
+                if not isinstance(llm_fact, dict):
+                    logger.warning(f"Skipping non-dict fact at index {i}")
+                    has_malformed_facts = True
+                    continue
+                # Helper to get non-empty value
+                def get_value(field_name):
+                    value = llm_fact.get(field_name)
+                    if value and value != '' and value != [] and value != {} and str(value).upper() != 'N/A':
+                        return value
+                    return None
+                # NEW FORMAT: what, when, who, why (all required)
+                what = get_value('what')
+                when = get_value('when')
+                who = get_value('who')
+                why = get_value('why')
+                # Fallback to old format if new fields not present
+                if not what:
+                    what = get_value('factual_core')
+                if not what:
+                    logger.warning(f"Skipping fact {i}: missing 'what' field")
+                    continue
+                # Critical field: fact_type
+                # LLM uses "assistant" but we convert to "bank" for storage
+                fact_type = llm_fact.get('fact_type')
+                # Convert "assistant" → "bank" for storage
+                if fact_type == 'assistant':
+                    fact_type = 'bank'
+                # Validate fact_type (after conversion)
+                if fact_type not in ['world', 'bank', 'opinion']:
+                    # Try to fix common mistakes - check if they swapped fact_type and fact_kind
+                    fact_kind = llm_fact.get('fact_kind')
+                    if fact_kind == 'assistant':
+                        fact_type = 'bank'
+                    elif fact_kind in ['world', 'bank', 'opinion']:
+                        fact_type = fact_kind
+                    else:
+                        # Default to 'world' if we can't determine
+                        fact_type = 'world'
+                        logger.warning(f"Fact {i}: defaulting to fact_type='world'")
+                # Get fact_kind for temporal handling (but don't store it)
+                fact_kind = llm_fact.get('fact_kind', 'conversation')
+                if fact_kind not in ['conversation', 'event', 'other']:
+                    fact_kind = 'conversation'
+                # Build combined fact text from the 4 dimensions: what | when | who | why
+                fact_data = {}
+                combined_parts = [what]
+                if when:
+                    combined_parts.append(f"When: {when}")
+                if who:
+                    combined_parts.append(f"Involving: {who}")
+                if why:
+                    combined_parts.append(why)
+                combined_text = " | ".join(combined_parts)
+                # Add temporal fields
+                # For events: occurred_start/occurred_end (when the event happened)
+                if fact_kind == 'event':
+                    occurred_start = get_value('occurred_start')
+                    occurred_end = get_value('occurred_end')
+                    if occurred_start:
+                        fact_data['occurred_start'] = occurred_start
+                    if occurred_end:
+                        fact_data['occurred_end'] = occurred_end
+                # Add entities if present (validate as Entity objects)
+                # LLM sometimes returns strings instead of {"text": "..."} format
+                entities = get_value('entities')
+                if entities:
+                    # Validate and normalize each entity
+                    validated_entities = []
+                    for ent in entities:
+                        if isinstance(ent, str):
+                            # Normalize string to Entity object
+                            validated_entities.append(Entity(text=ent))
+                        elif isinstance(ent, dict) and 'text' in ent:
+                            try:
+                                validated_entities.append(Entity.model_validate(ent))
+                            except Exception as e:
+                                logger.warning(f"Invalid entity {ent}: {e}")
+                    if validated_entities:
+                        fact_data['entities'] = validated_entities
+                # Add causal relations if present (validate as CausalRelation objects)
+                # Filter out invalid relations (missing required fields)
+                causal_relations = get_value('causal_relations')
+                if causal_relations:
+                    validated_relations = []
+                    for rel in causal_relations:
+                        if isinstance(rel, dict) and 'target_fact_index' in rel and 'relation_type' in rel:
+                            try:
+                                validated_relations.append(CausalRelation.model_validate(rel))
+                            except Exception as e:
+                                logger.warning(f"Invalid causal relation {rel}: {e}")
+                    if validated_relations:
+                        fact_data['causal_relations'] = validated_relations
+                # Always set mentioned_at to the event_date (when the conversation/document occurred)
+                fact_data['mentioned_at'] = event_date.isoformat()
+                # Build Fact model instance
+                try:
+                    fact = Fact(
+                        fact=combined_text,
+                        fact_type=fact_type,
+                        **fact_data
+                    )
+                    chunk_facts.append(fact)
+                except Exception as e:
+                    logger.error(f"Failed to create Fact model for fact {i}: {e}")
+                    has_malformed_facts = True
+                    continue
+            # If we got malformed facts and haven't exhausted retries, try again
+            if has_malformed_facts and len(chunk_facts) < len(raw_facts) * 0.8 and attempt < max_retries - 1:
+                logger.warning(
+                    f"Got {len(raw_facts) - len(chunk_facts)} malformed facts out of {len(raw_facts)} on attempt {attempt + 1}/{max_retries}. Retrying..."
+                )
+                continue
+            return chunk_facts
+        except BadRequestError as e:
+            last_error = e
+            if "json_validate_failed" in str(e):
+                logger.warning(f"          [1.3.{chunk_index + 1}] Attempt {attempt + 1}/{max_retries} failed with JSON validation error: {e}")
+                if attempt < max_retries - 1:
+                    logger.info(f"          [1.3.{chunk_index + 1}] Retrying...")
+                    continue
+            # If it's not a JSON validation error or we're out of retries, re-raise
+            raise
+    # If we exhausted all retries, raise the last error
+    raise last_error
+async def _extract_facts_with_auto_split(
+    chunk: str,
+    chunk_index: int,
+    total_chunks: int,
+    event_date: datetime,
+    context: str,
+    llm_config: LLMConfig,
+    agent_name: str = None,
+    extract_opinions: bool = False
+) -> List[Dict[str, str]]:
+    """
+    Extract facts from a chunk with automatic splitting if output exceeds token limits.
+    If the LLM output is too long (OutputTooLongError), this function automatically
+    splits the chunk in half and processes each half recursively.
+    Args:
+        chunk: Text chunk to process
+        chunk_index: Index of this chunk in the original list
+        total_chunks: Total number of original chunks
+        event_date: Reference date for temporal information
+        context: Context about the conversation/document
+        llm_config: LLM configuration to use
+        agent_name: Optional agent name (memory owner)
+        extract_opinions: If True, extract ONLY opinions. If False, extract world and agent facts (no opinions)
+    Returns:
+        List of fact dictionaries extracted from the chunk (possibly from sub-chunks)
+    """
+    import logging
+    logger = logging.getLogger(__name__)
+    try:
+        # Try to extract facts from the full chunk
+        return await _extract_facts_from_chunk(
+            chunk=chunk,
+            chunk_index=chunk_index,
+            total_chunks=total_chunks,
+            event_date=event_date,
+            context=context,
+            llm_config=llm_config,
+            agent_name=agent_name,
+            extract_opinions=extract_opinions
+        )
+    except OutputTooLongError as e:
+        # Output exceeded token limits - split the chunk in half and retry
+        logger.warning(
+            f"Output too long for chunk {chunk_index + 1}/{total_chunks} "
+            f"({len(chunk)} chars). Splitting in half and retrying..."
+        )
+        # Split at the midpoint, preferring sentence boundaries
+        mid_point = len(chunk) // 2
+        # Try to find a sentence boundary near the midpoint
+        # Look for ". ", "! ", "? " within 20% of midpoint
+        search_range = int(len(chunk) * 0.2)
+        search_start = max(0, mid_point - search_range)
+        search_end = min(len(chunk), mid_point + search_range)
+        sentence_endings = ['. ', '! ', '? ', '\n\n']
+        best_split = mid_point
+        for ending in sentence_endings:
+            pos = chunk.rfind(ending, search_start, search_end)
+            if pos != -1:
+                best_split = pos + len(ending)
+                break
+        # Split the chunk
+        first_half = chunk[:best_split].strip()
+        second_half = chunk[best_split:].strip()
+        logger.info(
+            f"Split chunk {chunk_index + 1} into two sub-chunks: "
+            f"{len(first_half)} chars and {len(second_half)} chars"
+        )
+        # Process both halves recursively (in parallel)
+        sub_tasks = [
+            _extract_facts_with_auto_split(
+                chunk=first_half,
+                chunk_index=chunk_index,
+                total_chunks=total_chunks,
+                event_date=event_date,
+                context=context,
+                llm_config=llm_config,
+                agent_name=agent_name,
+                extract_opinions=extract_opinions
+            ),
+            _extract_facts_with_auto_split(
+                chunk=second_half,
+                chunk_index=chunk_index,
+                total_chunks=total_chunks,
+                event_date=event_date,
+                context=context,
+                llm_config=llm_config,
+                agent_name=agent_name,
+                extract_opinions=extract_opinions
+            )
+        ]
+        sub_results = await asyncio.gather(*sub_tasks)
+        # Combine results from both halves
+        all_facts = []
+        for sub_result in sub_results:
+            all_facts.extend(sub_result)
+        logger.info(
+            f"Successfully extracted {len(all_facts)} facts from split chunk {chunk_index + 1}"
+        )
+        return all_facts
+async def extract_facts_from_text(
+    text: str,
+    event_date: datetime,
+    llm_config: LLMConfig,
+    agent_name: str,
+    context: str = "",
+    extract_opinions: bool = False,
+) -> tuple[List[Fact], List[tuple[str, int]]]:
+    """
+    Extract semantic facts from conversational or narrative text using LLM.
+    For large texts (>3000 chars), automatically chunks at sentence boundaries
+    to avoid hitting output token limits. Processes ALL chunks in PARALLEL for speed.
+    If a chunk produces output that exceeds token limits (OutputTooLongError), it is
+    automatically split in half and retried recursively until successful.
+    Args:
+        text: Input text (conversation, article, etc.)
+        event_date: Reference date for resolving relative times
+        context: Context about the conversation/document
+        llm_config: LLM configuration to use
+        agent_name: Agent name (memory owner)
+        extract_opinions: If True, extract ONLY opinions. If False, extract world and bank facts (no opinions)
+    Returns:
+        Tuple of (facts, chunks) where:
+        - facts: List of Fact model instances
+        - chunks: List of tuples (chunk_text, fact_count) for each chunk
+    """
+    chunks = chunk_text(text, max_chars=3000)
+    tasks = [
+        _extract_facts_with_auto_split(
+            chunk=chunk,
+            chunk_index=i,
+            total_chunks=len(chunks),
+            event_date=event_date,
+            context=context,
+            llm_config=llm_config,
+            agent_name=agent_name,
+            extract_opinions=extract_opinions
+        )
+        for i, chunk in enumerate(chunks)
+    ]
+    chunk_results = await asyncio.gather(*tasks)
+    all_facts = []
+    chunk_metadata = []  # [(chunk_text, fact_count), ...]
+    for chunk, chunk_facts in zip(chunks, chunk_results):
+        all_facts.extend(chunk_facts)
+        chunk_metadata.append((chunk, len(chunk_facts)))
+    return all_facts, chunk_metadata
+# ============================================================================
+# ORCHESTRATION LAYER
+# ============================================================================
+# Import types for the orchestration layer (note: ExtractedFact here is different from the Pydantic model above)
+from .types import RetainContent, ExtractedFact as ExtractedFactType, ChunkMetadata, CausalRelation as CausalRelationType
+from typing import Tuple
+logger = logging.getLogger(__name__)
+# Each fact gets 10 seconds offset to preserve ordering within a document
+SECONDS_PER_FACT = 10
+async def extract_facts_from_contents(
+    contents: List[RetainContent],
+    llm_config,
+    agent_name: str,
+    extract_opinions: bool = False
+) -> Tuple[List[ExtractedFactType], List[ChunkMetadata]]:
+    """
+    Extract facts from multiple content items in parallel.
+    This function:
+    1. Extracts facts from all contents in parallel using the LLM
+    2. Tracks which facts came from which chunks
+    3. Adds time offsets to preserve fact ordering within each content
+    4. Returns typed ExtractedFact and ChunkMetadata objects
+    Args:
+        contents: List of RetainContent objects to process
+        llm_config: LLM configuration for fact extraction
+        agent_name: Name of the agent (for agent-related fact detection)
+        extract_opinions: If True, extract only opinions; otherwise world/bank facts
+    Returns:
+        Tuple of (extracted_facts, chunks_metadata)
+    """
+    if not contents:
+        return [], []
+    # Step 1: Create parallel fact extraction tasks
+    fact_extraction_tasks = []
+    for item in contents:
+        # Call extract_facts_from_text directly (defined earlier in this file)
+        # to avoid circular import with utils.extract_facts
+        task = extract_facts_from_text(
+            text=item.content,
+            event_date=item.event_date,
+            context=item.context,
+            llm_config=llm_config,
+            agent_name=agent_name,
+            extract_opinions=extract_opinions
+        )
+        fact_extraction_tasks.append(task)
+    # Step 2: Wait for all fact extractions to complete
+    all_fact_results = await asyncio.gather(*fact_extraction_tasks)
+    # Step 3: Flatten and convert to typed objects
+    extracted_facts: List[ExtractedFactType] = []
+    chunks_metadata: List[ChunkMetadata] = []
+    global_chunk_idx = 0
+    global_fact_idx = 0
+    for content_index, (content, (facts_from_llm, chunks_from_llm)) in enumerate(zip(contents, all_fact_results)):
+        chunk_start_idx = global_chunk_idx
+        # Convert chunk tuples to ChunkMetadata objects
+        for chunk_index_in_content, (chunk_text, chunk_fact_count) in enumerate(chunks_from_llm):
+            chunk_metadata = ChunkMetadata(
+                chunk_text=chunk_text,
+                fact_count=chunk_fact_count,
+                content_index=content_index,
+                chunk_index=global_chunk_idx
+            )
+            chunks_metadata.append(chunk_metadata)
+            global_chunk_idx += 1
+        # Convert facts to ExtractedFact objects with proper indexing
+        fact_idx_in_content = 0
+        for chunk_idx_in_content, (chunk_text, chunk_fact_count) in enumerate(chunks_from_llm):
+            chunk_global_idx = chunk_start_idx + chunk_idx_in_content
+            for _ in range(chunk_fact_count):
+                if fact_idx_in_content < len(facts_from_llm):
+                    fact_from_llm = facts_from_llm[fact_idx_in_content]
+                    # Convert Fact model from LLM to ExtractedFactType dataclass
+                    # mentioned_at is always the event_date (when the conversation/document occurred)
+                    extracted_fact = ExtractedFactType(
+                        fact_text=fact_from_llm.fact,
+                        fact_type=fact_from_llm.fact_type,
+                        entities=[e.text for e in (fact_from_llm.entities or [])],
+                        # occurred_start/end: from LLM only, leave None if not provided
+                        occurred_start=_parse_datetime(fact_from_llm.occurred_start) if fact_from_llm.occurred_start else None,
+                        occurred_end=_parse_datetime(fact_from_llm.occurred_end) if fact_from_llm.occurred_end else None,
+                        causal_relations=_convert_causal_relations(
+                            fact_from_llm.causal_relations or [],
+                            global_fact_idx
+                        ),
+                        content_index=content_index,
+                        chunk_index=chunk_global_idx,
+                        context=content.context,
+                        # mentioned_at: always the event_date (when the conversation/document occurred)
+                        mentioned_at=content.event_date,
+                        metadata=content.metadata
+                    )
+                    extracted_facts.append(extracted_fact)
+                    global_fact_idx += 1
+                    fact_idx_in_content += 1
+    # Step 4: Add time offsets to preserve ordering within each content
+    _add_temporal_offsets(extracted_facts, contents)
+    return extracted_facts, chunks_metadata
+def _parse_datetime(date_str: str):
+    """Parse ISO datetime string."""
+    from dateutil import parser as date_parser
+    try:
+        return date_parser.isoparse(date_str)
+    except Exception:
+        return None
+def _convert_causal_relations(relations_from_llm, fact_start_idx: int) -> List[CausalRelationType]:
+    """
+    Convert causal relations from LLM format to ExtractedFact format.
+    Adjusts target_fact_index from content-relative to global indices.
+    """
+    causal_relations = []
+    for rel in relations_from_llm:
+        causal_relation = CausalRelationType(
+            relation_type=rel.relation_type,
+            target_fact_index=fact_start_idx + rel.target_fact_index,
+            strength=rel.strength
+        )
+        causal_relations.append(causal_relation)
+    return causal_relations
+def _add_temporal_offsets(facts: List[ExtractedFactType], contents: List[RetainContent]) -> None:
+    """
+    Add time offsets to preserve fact ordering within each content.
+    This allows retrieval to distinguish between facts that happened earlier vs later
+    in the same conversation, even when the base event_date is the same.
+    Modifies facts in place.
+    """
+    # Group facts by content_index
+    current_content_idx = 0
+    content_fact_start = 0
+    for i, fact in enumerate(facts):
+        if fact.content_index != current_content_idx:
+            # Moved to next content
+            current_content_idx = fact.content_index
+            content_fact_start = i
+        # Calculate position within this content
+        fact_position = i - content_fact_start
+        offset = timedelta(seconds=fact_position * SECONDS_PER_FACT)
+        # Apply offset to all temporal fields
+        if fact.occurred_start:
+            fact.occurred_start = fact.occurred_start + offset
+        if fact.occurred_end:
+            fact.occurred_end = fact.occurred_end + offset
+        if fact.mentioned_at:
+            fact.mentioned_at = fact.mentioned_at + offset