PyPI - hindsight-api - Versions diffs - 0.1.15__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

hindsight-api 0.1.15py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

hindsight_api/api/__init__.py +38 -14
hindsight_api/api/http.py +100 -9
hindsight_api/api/mcp.py +203 -52
hindsight_api/config.py +27 -0
hindsight_api/engine/interface.py +4 -0
hindsight_api/engine/llm_wrapper.py +275 -45
hindsight_api/engine/memory_engine.py +69 -16
hindsight_api/engine/response_models.py +7 -1
hindsight_api/engine/retain/entity_processing.py +37 -8
hindsight_api/engine/retain/fact_extraction.py +49 -6
hindsight_api/engine/retain/observation_regeneration.py +4 -2
hindsight_api/engine/retain/orchestrator.py +12 -1
hindsight_api/engine/retain/types.py +7 -0
hindsight_api/extensions/context.py +8 -1
hindsight_api/extensions/operation_validator.py +6 -4
hindsight_api/main.py +29 -1
hindsight_api/models.py +3 -0
{hindsight_api-0.1.15.dist-info → hindsight_api-0.2.0.dist-info}/METADATA +3 -2
{hindsight_api-0.1.15.dist-info → hindsight_api-0.2.0.dist-info}/RECORD +21 -21
{hindsight_api-0.1.15.dist-info → hindsight_api-0.2.0.dist-info}/WHEEL +0 -0
{hindsight_api-0.1.15.dist-info → hindsight_api-0.2.0.dist-info}/entry_points.txt +0 -0

hindsight_api/api/mcp.py CHANGED Viewed

@@ -8,6 +8,7 @@ from contextvars import ContextVar
 from fastmcp import FastMCP
 from hindsight_api import MemoryEngine
+from hindsight_api.api.http import BankListItem, BankListResponse, BankProfileResponse, DispositionTraits
 from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
 from hindsight_api.models import RequestContext
@@ -27,12 +28,15 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# Context variable to hold the current bank_id from the URL path
+# Default bank_id from environment variable
+DEFAULT_BANK_ID = os.environ.get("HINDSIGHT_MCP_BANK_ID", "default")
+# Context variable to hold the current bank_id
 _current_bank_id: ContextVar[str | None] = ContextVar("current_bank_id", default=None)
 def get_current_bank_id() -> str | None:
-    """Get the current bank_id from context (set from URL path)."""
+    """Get the current bank_id from context."""
     return _current_bank_id.get()
@@ -44,12 +48,13 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
         memory: MemoryEngine instance (required)
     Returns:
-        Configured FastMCP server instance
+        Configured FastMCP server instance with stateless_http enabled
     """
-    mcp = FastMCP("hindsight-mcp-server")
+    # Use stateless_http=True for Claude Code compatibility
+    mcp = FastMCP("hindsight-mcp-server", stateless_http=True)
     @mcp.tool()
-    async def retain(content: str, context: str = "general") -> str:
+    async def retain(content: str, context: str = "general", bank_id: str | None = None) -> str:
         """
         Store important information to long-term memory.
@@ -65,21 +70,24 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
         Args:
             content: The fact/memory to store (be specific and include relevant details)
             context: Category for the memory (e.g., 'preferences', 'work', 'hobbies', 'family'). Default: 'general'
+            bank_id: Optional bank to store in (defaults to session bank). Use for cross-bank operations.
         """
         try:
-            bank_id = get_current_bank_id()
-            if bank_id is None:
+            target_bank = bank_id or get_current_bank_id()
+            if target_bank is None:
                 return "Error: No bank_id configured"
             await memory.retain_batch_async(
-                bank_id=bank_id, contents=[{"content": content, "context": context}], request_context=RequestContext()
+                bank_id=target_bank,
+                contents=[{"content": content, "context": context}],
+                request_context=RequestContext(),
             )
-            return "Memory stored successfully"
+            return f"Memory stored successfully in bank '{target_bank}'"
         except Exception as e:
             logger.error(f"Error storing memory: {e}", exc_info=True)
             return f"Error: {str(e)}"
     @mcp.tool()
-    async def recall(query: str, max_results: int = 10) -> str:
+    async def recall(query: str, max_tokens: int = 4096, bank_id: str | None = None) -> str:
         """
         Search memories to provide personalized, context-aware responses.
@@ -91,49 +99,184 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
         Args:
             query: Natural language search query (e.g., "user's food preferences", "what projects is user working on")
-            max_results: Maximum number of results to return (default: 10)
+            max_tokens: Maximum tokens in the response (default: 4096)
+            bank_id: Optional bank to search in (defaults to session bank). Use for cross-bank operations.
         """
         try:
-            bank_id = get_current_bank_id()
-            if bank_id is None:
+            target_bank = bank_id or get_current_bank_id()
+            if target_bank is None:
                 return "Error: No bank_id configured"
             from hindsight_api.engine.memory_engine import Budget
-            search_result = await memory.recall_async(
-                bank_id=bank_id,
+            recall_result = await memory.recall_async(
+                bank_id=target_bank,
                 query=query,
                 fact_type=list(VALID_RECALL_FACT_TYPES),
-                budget=Budget.LOW,
+                budget=Budget.HIGH,
+                max_tokens=max_tokens,
+                request_context=RequestContext(),
+            )
+            # Use model's JSON serialization
+            return recall_result.model_dump_json(indent=2)
+        except Exception as e:
+            logger.error(f"Error searching: {e}", exc_info=True)
+            return f'{{"error": "{e}", "results": []}}'
+    @mcp.tool()
+    async def reflect(query: str, context: str | None = None, budget: str = "low", bank_id: str | None = None) -> str:
+        """
+        Generate thoughtful analysis by synthesizing stored memories with the bank's personality.
+        WHEN TO USE THIS TOOL:
+        Use reflect when you need reasoned analysis, not just fact retrieval. This tool
+        thinks through the question using everything the bank knows and its personality traits.
+        EXAMPLES OF GOOD QUERIES:
+        - "What patterns have emerged in how I approach debugging?"
+        - "Based on my past decisions, what architectural style do I prefer?"
+        - "What might be the best approach for this problem given what you know about me?"
+        - "How should I prioritize these tasks based on my goals?"
+        HOW IT DIFFERS FROM RECALL:
+        - recall: Returns raw facts matching your search (fast lookup)
+        - reflect: Reasons across memories to form a synthesized answer (deeper analysis)
+        Use recall for "what did I say about X?" and reflect for "what should I do about X?"
+        Args:
+            query: The question or topic to reflect on
+            context: Optional context about why this reflection is needed
+            budget: Search budget - 'low', 'mid', or 'high' (default: 'low')
+            bank_id: Optional bank to reflect in (defaults to session bank). Use for cross-bank operations.
+        """
+        try:
+            target_bank = bank_id or get_current_bank_id()
+            if target_bank is None:
+                return "Error: No bank_id configured"
+            from hindsight_api.engine.memory_engine import Budget
+            # Map string budget to enum
+            budget_map = {"low": Budget.LOW, "mid": Budget.MID, "high": Budget.HIGH}
+            budget_enum = budget_map.get(budget.lower(), Budget.LOW)
+            reflect_result = await memory.reflect_async(
+                bank_id=target_bank,
+                query=query,
+                budget=budget_enum,
+                context=context,
                 request_context=RequestContext(),
             )
-            results = [
-                {
-                    "id": fact.id,
-                    "text": fact.text,
-                    "type": fact.fact_type,
-                    "context": fact.context,
-                    "occurred_start": fact.occurred_start,
-                }
-                for fact in search_result.results[:max_results]
+            return reflect_result.model_dump_json(indent=2)
+        except Exception as e:
+            logger.error(f"Error reflecting: {e}", exc_info=True)
+            return f'{{"error": "{e}", "text": ""}}'
+    @mcp.tool()
+    async def list_banks() -> str:
+        """
+        List all available memory banks.
+        Use this to discover banks for orchestration or to find
+        the correct bank_id for cross-bank operations.
+        Returns:
+            JSON object with banks array containing bank_id, name, disposition, background, and timestamps
+        """
+        try:
+            banks = await memory.list_banks(request_context=RequestContext())
+            bank_items = [
+                BankListItem(
+                    bank_id=b.get("bank_id") or b.get("id"),
+                    name=b.get("name"),
+                    disposition=DispositionTraits(
+                        **b.get("disposition", {"skepticism": 3, "literalism": 3, "empathy": 3})
+                    ),
+                    background=b.get("background"),
+                    created_at=str(b.get("created_at")) if b.get("created_at") else None,
+                    updated_at=str(b.get("updated_at")) if b.get("updated_at") else None,
+                )
+                for b in banks
             ]
+            return BankListResponse(banks=bank_items).model_dump_json(indent=2)
+        except Exception as e:
+            logger.error(f"Error listing banks: {e}", exc_info=True)
+            return f'{{"error": "{e}", "banks": []}}'
-            return json.dumps({"results": results}, indent=2)
+    @mcp.tool()
+    async def create_bank(bank_id: str, name: str | None = None, background: str | None = None) -> str:
+        """
+        Create or update a memory bank.
+        Use this to create new banks for different agents, sessions, or purposes.
+        Banks are isolated memory stores - each bank has its own memories and personality.
+        Args:
+            bank_id: Unique identifier for the bank (e.g., 'orchestrator-memory', 'agent-1')
+            name: Human-readable name for the bank
+            background: Context about what this bank stores or its purpose
+        """
+        try:
+            # Get or create the bank profile (auto-creates with defaults)
+            await memory.get_bank_profile(bank_id, request_context=RequestContext())
+            # Update name and/or background if provided
+            if name is not None or background is not None:
+                await memory.update_bank(bank_id, name=name, background=background, request_context=RequestContext())
+            # Get final profile and return using BankProfileResponse model
+            profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
+            disposition = profile.get("disposition")
+            if hasattr(disposition, "model_dump"):
+                disposition_traits = DispositionTraits(**disposition.model_dump())
+            else:
+                disposition_traits = DispositionTraits(
+                    **dict(disposition or {"skepticism": 3, "literalism": 3, "empathy": 3})
+                )
+            response = BankProfileResponse(
+                bank_id=bank_id,
+                name=profile.get("name") or "",
+                disposition=disposition_traits,
+                background=profile.get("background") or "",
+            )
+            return response.model_dump_json(indent=2)
         except Exception as e:
-            logger.error(f"Error searching: {e}", exc_info=True)
-            return json.dumps({"error": str(e), "results": []})
+            logger.error(f"Error creating bank: {e}", exc_info=True)
+            return json.dumps({"error": str(e)})
     return mcp
 class MCPMiddleware:
-    """ASGI middleware that extracts bank_id from path and sets context."""
+    """ASGI middleware that extracts bank_id from header or path and sets context.
+    Bank ID can be provided via:
+    1. X-Bank-Id header (recommended for Claude Code)
+    2. URL path: /mcp/{bank_id}/
+    3. Environment variable HINDSIGHT_MCP_BANK_ID (fallback default)
+    For Claude Code, configure with:
+        claude mcp add --transport http hindsight http://localhost:8888/mcp \\
+            --header "X-Bank-Id: my-bank"
+    """
     def __init__(self, app, memory: MemoryEngine):
         self.app = app
         self.memory = memory
         self.mcp_server = create_mcp_server(memory)
-        self.mcp_app = self.mcp_server.http_app()
+        self.mcp_app = self.mcp_server.http_app(path="/")
+        # Expose the lifespan for the parent app to chain
+        self.lifespan = self.mcp_app.lifespan_handler if hasattr(self.mcp_app, "lifespan_handler") else None
+    def _get_header(self, scope: dict, name: str) -> str | None:
+        """Extract a header value from ASGI scope."""
+        name_lower = name.lower().encode()
+        for header_name, header_value in scope.get("headers", []):
+            if header_name.lower() == name_lower:
+                return header_value.decode()
+        return None
     async def __call__(self, scope, receive, send):
         if scope["type"] != "http":
@@ -150,32 +293,39 @@ class MCPMiddleware:
         # Also handle case where mount path wasn't stripped (e.g., /mcp/...)
         if path.startswith("/mcp/"):
             path = path[4:]  # Remove /mcp prefix
-        # Extract bank_id from path: /{bank_id}/ or /{bank_id}
-        # http_app expects requests at /
-        if not path.startswith("/") or len(path) <= 1:
-            # No bank_id in path - return error
-            await self._send_error(send, 400, "bank_id required in path: /mcp/{bank_id}/")
-            return
-        # Extract bank_id from first path segment
-        parts = path[1:].split("/", 1)
-        if not parts[0]:
-            await self._send_error(send, 400, "bank_id required in path: /mcp/{bank_id}/")
-            return
-        bank_id = parts[0]
-        new_path = "/" + parts[1] if len(parts) > 1 else "/"
+        elif path == "/mcp":
+            path = "/"
+        # Try to get bank_id from header first (for Claude Code compatibility)
+        bank_id = self._get_header(scope, "X-Bank-Id")
+        # MCP endpoint paths that should not be treated as bank_ids
+        MCP_ENDPOINTS = {"sse", "messages"}
+        # If no header, try to extract from path: /{bank_id}/...
+        new_path = path
+        if not bank_id and path.startswith("/") and len(path) > 1:
+            parts = path[1:].split("/", 1)
+            # Don't treat MCP endpoints as bank_ids
+            if parts[0] and parts[0] not in MCP_ENDPOINTS:
+                # First segment looks like a bank_id
+                bank_id = parts[0]
+                new_path = "/" + parts[1] if len(parts) > 1 else "/"
+        # Fall back to default bank_id
+        if not bank_id:
+            bank_id = DEFAULT_BANK_ID
+            logger.debug(f"Using default bank_id: {bank_id}")
         # Set bank_id context
         token = _current_bank_id.set(bank_id)
         try:
             new_scope = scope.copy()
             new_scope["path"] = new_path
+            # Clear root_path since we're passing directly to the app
+            new_scope["root_path"] = ""
-            # Wrap send to rewrite the SSE endpoint URL to include bank_id
-            # The SSE app sends "event: endpoint\ndata: /messages\n" but we need
-            # the client to POST to /{bank_id}/messages instead
+            # Wrap send to rewrite the SSE endpoint URL to include bank_id if using path-based routing
             async def send_wrapper(message):
                 if message["type"] == "http.response.body":
                     body = message.get("body", b"")
@@ -211,9 +361,10 @@ def create_mcp_app(memory: MemoryEngine):
     """
     Create an ASGI app that handles MCP requests.
-    URL pattern: /mcp/{bank_id}/
-    The bank_id is extracted from the URL path and made available to tools.
+    Bank ID can be provided via:
+    1. X-Bank-Id header: claude mcp add --transport http hindsight http://localhost:8888/mcp --header "X-Bank-Id: my-bank"
+    2. URL path: /mcp/{bank_id}/
+    3. Environment variable HINDSIGHT_MCP_BANK_ID (fallback, default: "default")
     Args:
         memory: MemoryEngine instance

hindsight_api/config.py CHANGED Viewed

@@ -16,6 +16,8 @@ ENV_LLM_PROVIDER = "HINDSIGHT_API_LLM_PROVIDER"
 ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
 ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
 ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
+ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
+ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
 ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
 ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
@@ -33,6 +35,10 @@ ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
 ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
 ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
+# Observation thresholds
+ENV_OBSERVATION_MIN_FACTS = "HINDSIGHT_API_OBSERVATION_MIN_FACTS"
+ENV_OBSERVATION_TOP_ENTITIES = "HINDSIGHT_API_OBSERVATION_TOP_ENTITIES"
 # Optimization flags
 ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
 ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
@@ -41,6 +47,8 @@ ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
 DEFAULT_DATABASE_URL = "pg0"
 DEFAULT_LLM_PROVIDER = "openai"
 DEFAULT_LLM_MODEL = "gpt-5-mini"
+DEFAULT_LLM_MAX_CONCURRENT = 32
+DEFAULT_LLM_TIMEOUT = 120.0  # seconds
 DEFAULT_EMBEDDINGS_PROVIDER = "local"
 DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
@@ -55,6 +63,10 @@ DEFAULT_MCP_ENABLED = True
 DEFAULT_GRAPH_RETRIEVER = "bfs"  # Options: "bfs", "mpfp"
 DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
+# Observation thresholds
+DEFAULT_OBSERVATION_MIN_FACTS = 5  # Min facts required to generate entity observations
+DEFAULT_OBSERVATION_TOP_ENTITIES = 5  # Max entities to process per retain batch
 # Default MCP tool descriptions (can be customized via env vars)
 DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
@@ -91,6 +103,8 @@ class HindsightConfig:
     llm_api_key: str | None
     llm_model: str
     llm_base_url: str | None
+    llm_max_concurrent: int
+    llm_timeout: float
     # Embeddings
     embeddings_provider: str
@@ -111,6 +125,10 @@ class HindsightConfig:
     # Recall
     graph_retriever: str
+    # Observation thresholds
+    observation_min_facts: int
+    observation_top_entities: int
     # Optimization flags
     skip_llm_verification: bool
     lazy_reranker: bool
@@ -126,6 +144,8 @@ class HindsightConfig:
             llm_api_key=os.getenv(ENV_LLM_API_KEY),
             llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
             llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
+            llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
+            llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
             # Embeddings
             embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
             embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
@@ -144,6 +164,11 @@ class HindsightConfig:
             # Optimization flags
             skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
             lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
+            # Observation thresholds
+            observation_min_facts=int(os.getenv(ENV_OBSERVATION_MIN_FACTS, str(DEFAULT_OBSERVATION_MIN_FACTS))),
+            observation_top_entities=int(
+                os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))
+            ),
         )
     def get_llm_base_url(self) -> str:
@@ -156,6 +181,8 @@ class HindsightConfig:
             return "https://api.groq.com/openai/v1"
         elif provider == "ollama":
             return "http://localhost:11434/v1"
+        elif provider == "lmstudio":
+            return "http://localhost:1234/v1"
         else:
             return ""

hindsight_api/engine/interface.py CHANGED Viewed

@@ -110,6 +110,8 @@ class MemoryEngineInterface(ABC):
         *,
         budget: "Budget | None" = None,
         context: str | None = None,
+        max_tokens: int = 4096,
+        response_schema: dict | None = None,
         request_context: "RequestContext",
     ) -> "ReflectResult":
         """
@@ -120,6 +122,8 @@ class MemoryEngineInterface(ABC):
             query: The question to reflect on.
             budget: Search budget for retrieving context.
             context: Additional context for the reflection.
+            max_tokens: Maximum tokens for the response.
+            response_schema: Optional JSON Schema for structured output.
             request_context: Request context for authentication.
         Returns:

hindsight-api 0.1.15__py3-none-any.whl → 0.2.0__py3-none-any.whl

hindsight-api 0.1.15py3-none-any.whl → 0.2.0py3-none-any.whl