PyPI - hindsight-api - Versions diffs - 0.4.7__tar.gz → 0.4.8__tar.gz - Mend

hindsight-api 0.4.7tar.gz → 0.4.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

{hindsight_api-0.4.7 → hindsight_api-0.4.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hindsight-api
-Version: 0.4.7
+Version: 0.4.8
 Summary: Hindsight: Agent Memory That Works Like Human Memory
 Requires-Python: >=3.11
 Requires-Dist: aiohttp>=3.13.3
@@ -8,6 +8,7 @@ Requires-Dist: alembic>=1.17.1
 Requires-Dist: anthropic>=0.40.0
 Requires-Dist: asyncpg>=0.29.0
 Requires-Dist: authlib>=1.6.6
+Requires-Dist: claude-agent-sdk>=0.1.27
 Requires-Dist: cohere>=5.0.0
 Requires-Dist: dateparser>=1.2.2
 Requires-Dist: fastapi[standard]>=0.120.3

{hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/__init__.py RENAMED Viewed

@@ -46,4 +46,4 @@ __all__ = [
     "RemoteTEICrossEncoder",
     "LLMConfig",
 ]
-__version__ = "0.4.7"
+__version__ = "0.4.8"

{hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py RENAMED Viewed

@@ -11,6 +11,7 @@ from collections.abc import Sequence
 import sqlalchemy as sa
 from alembic import op
 from pgvector.sqlalchemy import Vector
+from sqlalchemy import text
 from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
@@ -23,8 +24,21 @@ depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
     """Upgrade schema - create all tables from scratch."""
-    # Enable required extensions
-    op.execute("CREATE EXTENSION IF NOT EXISTS vector")
+    # Note: pgvector extension is installed globally BEFORE migrations run
+    # See migrations.py:run_migrations() - this ensures the extension is available
+    # to all schemas, not just the one being migrated
+    # We keep this here as a fallback for backwards compatibility
+    # This may fail if user lacks permissions, which is fine if extension already exists
+    try:
+        op.execute("CREATE EXTENSION IF NOT EXISTS vector")
+    except Exception:
+        # Extension might already exist or user lacks permissions - verify it exists
+        conn = op.get_bind()
+        result = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vector'")).fetchone()
+        if not result:
+            # Extension truly doesn't exist - re-raise the error
+            raise
     # Create banks table
     op.create_table(

{hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/api/http.py RENAMED Viewed

@@ -1410,7 +1410,7 @@ def create_app(
                 poll_interval_ms=config.worker_poll_interval_ms,
                 max_retries=config.worker_max_retries,
                 schema=schema,
-                tenant_extension=getattr(memory, "_tenant_extension", None),
+                tenant_extension=memory._tenant_extension,
                 max_slots=config.worker_max_slots,
                 consolidation_max_slots=config.worker_consolidation_max_slots,
             )
@@ -2363,6 +2363,23 @@ def _register_routes(app: FastAPI):
     ):
         """Create a mental model (async - returns operation_id)."""
         try:
+            # Pre-operation validation hook
+            validator = app.state.memory._operation_validator
+            if validator:
+                from hindsight_api.extensions.operation_validator import MentalModelRefreshContext
+                ctx = MentalModelRefreshContext(
+                    bank_id=bank_id,
+                    mental_model_id=None,  # Not yet created
+                    request_context=request_context,
+                )
+                validation = await validator.validate_mental_model_refresh(ctx)
+                if not validation.allowed:
+                    raise OperationValidationError(
+                        validation.reason or "Operation not allowed",
+                        status_code=validation.status_code,
+                    )
             # 1. Create the mental model with placeholder content
             mental_model = await app.state.memory.create_mental_model(
                 bank_id=bank_id,
@@ -2385,6 +2402,8 @@ def _register_routes(app: FastAPI):
             raise HTTPException(status_code=400, detail=str(e))
         except (AuthenticationError, HTTPException):
             raise
+        except OperationValidationError as e:
+            raise HTTPException(status_code=e.status_code, detail=e.reason)
         except Exception as e:
             import traceback
@@ -2407,6 +2426,23 @@ def _register_routes(app: FastAPI):
     ):
         """Refresh a mental model by re-running its source query (async)."""
         try:
+            # Pre-operation validation hook
+            validator = app.state.memory._operation_validator
+            if validator:
+                from hindsight_api.extensions.operation_validator import MentalModelRefreshContext
+                ctx = MentalModelRefreshContext(
+                    bank_id=bank_id,
+                    mental_model_id=mental_model_id,
+                    request_context=request_context,
+                )
+                validation = await validator.validate_mental_model_refresh(ctx)
+                if not validation.allowed:
+                    raise OperationValidationError(
+                        validation.reason or "Operation not allowed",
+                        status_code=validation.status_code,
+                    )
             result = await app.state.memory.submit_async_refresh_mental_model(
                 bank_id=bank_id,
                 mental_model_id=mental_model_id,
@@ -2417,6 +2453,8 @@ def _register_routes(app: FastAPI):
             raise HTTPException(status_code=404, detail=str(e))
         except (AuthenticationError, HTTPException):
             raise
+        except OperationValidationError as e:
+            raise HTTPException(status_code=e.status_code, detail=e.reason)
         except Exception as e:
             import traceback

{hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/banner.py RENAMED Viewed

@@ -83,9 +83,12 @@ def print_startup_info(
     embeddings_provider: str,
     reranker_provider: str,
     mcp_enabled: bool = False,
+    version: str | None = None,
 ):
     """Print styled startup information."""
     print(color_start("Starting Hindsight API..."))
+    if version:
+        print(f"  {dim('Version:')} {color(f'v{version}', 0.1)}")
     print(f"  {dim('URL:')} {color(f'http://{host}:{port}', 0.2)}")
     print(f"  {dim('Database:')} {color(database_url, 0.4)}")
     print(f"  {dim('LLM:')} {color(f'{llm_provider} / {llm_model}', 0.6)}")

{hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/config.py RENAMED Viewed

@@ -154,7 +154,21 @@ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
 DEFAULT_DATABASE_URL = "pg0"
 DEFAULT_DATABASE_SCHEMA = "public"
 DEFAULT_LLM_PROVIDER = "openai"
-DEFAULT_LLM_MODEL = "gpt-5-mini"
+# Provider-specific default models
+PROVIDER_DEFAULT_MODELS = {
+    "openai": "o3-mini",
+    "anthropic": "claude-haiku-4-5-20251001",
+    "gemini": "gemini-2.5-flash",
+    "groq": "openai/gpt-oss-120b",
+    "ollama": "gemma3:12b",
+    "lmstudio": "local-model",
+    "vertexai": "gemini-2.0-flash-001",
+    "openai-codex": "gpt-5.2-codex",
+    "claude-code": "claude-sonnet-4-5-20250929",
+    "mock": "mock-model",
+}
+DEFAULT_LLM_MODEL = "o3-mini"  # Fallback if provider not in table
 DEFAULT_LLM_MAX_CONCURRENT = 32
 DEFAULT_LLM_MAX_RETRIES = 10  # Max retry attempts for LLM API calls
 DEFAULT_LLM_INITIAL_BACKOFF = 1.0  # Initial backoff in seconds for retry exponential backoff
@@ -303,6 +317,11 @@ def _validate_extraction_mode(mode: str) -> str:
     return mode_lower
+def _get_default_model_for_provider(provider: str) -> str:
+    """Get the default model for a given provider."""
+    return PROVIDER_DEFAULT_MODELS.get(provider.lower(), DEFAULT_LLM_MODEL)
 @dataclass
 class HindsightConfig:
     """Configuration container for Hindsight API."""
@@ -431,14 +450,18 @@ class HindsightConfig:
     @classmethod
     def from_env(cls) -> "HindsightConfig":
         """Create configuration from environment variables."""
+        # Get provider first to determine default model
+        llm_provider = os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER)
+        llm_model = os.getenv(ENV_LLM_MODEL) or _get_default_model_for_provider(llm_provider)
         return cls(
             # Database
             database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
             database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
             # LLM
-            llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
+            llm_provider=llm_provider,
             llm_api_key=os.getenv(ENV_LLM_API_KEY),
-            llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
+            llm_model=llm_model,
             llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
             llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
             llm_max_retries=int(os.getenv(ENV_LLM_MAX_RETRIES, str(DEFAULT_LLM_MAX_RETRIES))),
@@ -453,7 +476,12 @@ class HindsightConfig:
             # Per-operation LLM config (None = use default)
             retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
             retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
-            retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
+            retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL)
+            or (
+                _get_default_model_for_provider(os.getenv(ENV_RETAIN_LLM_PROVIDER))
+                if os.getenv(ENV_RETAIN_LLM_PROVIDER)
+                else None
+            ),
             retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
             retain_llm_max_concurrent=int(os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT))
             if os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT)
@@ -470,7 +498,12 @@ class HindsightConfig:
             retain_llm_timeout=float(os.getenv(ENV_RETAIN_LLM_TIMEOUT)) if os.getenv(ENV_RETAIN_LLM_TIMEOUT) else None,
             reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
             reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
-            reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
+            reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL)
+            or (
+                _get_default_model_for_provider(os.getenv(ENV_REFLECT_LLM_PROVIDER))
+                if os.getenv(ENV_REFLECT_LLM_PROVIDER)
+                else None
+            ),
             reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
             reflect_llm_max_concurrent=int(os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT))
             if os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT)
@@ -489,7 +522,12 @@ class HindsightConfig:
             else None,
             consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
             consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
-            consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
+            consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL)
+            or (
+                _get_default_model_for_provider(os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER))
+                if os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER)
+                else None
+            ),
             consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
             consolidation_llm_max_concurrent=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT))
             if os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT)

hindsight_api-0.4.8/hindsight_api/daemon.py ADDED Viewed

@@ -0,0 +1,113 @@
+"""
+Daemon mode support for Hindsight API.
+Provides idle timeout for running as a background daemon.
+"""
+import asyncio
+import logging
+import os
+import sys
+import time
+from pathlib import Path
+logger = logging.getLogger(__name__)
+# Default daemon configuration
+DEFAULT_DAEMON_PORT = 8888
+DEFAULT_IDLE_TIMEOUT = 0  # 0 = no auto-exit (hindsight-embed passes its own timeout)
+# Allow override via environment variable for profile-specific logs
+DAEMON_LOG_PATH = Path(os.getenv("HINDSIGHT_API_DAEMON_LOG", str(Path.home() / ".hindsight" / "daemon.log")))
+class IdleTimeoutMiddleware:
+    """ASGI middleware that tracks activity and exits after idle timeout."""
+    def __init__(self, app, idle_timeout: int = DEFAULT_IDLE_TIMEOUT):
+        self.app = app
+        self.idle_timeout = idle_timeout
+        self.last_activity = time.time()
+        self._checker_task = None
+    async def __call__(self, scope, receive, send):
+        # Update activity timestamp on each request
+        self.last_activity = time.time()
+        await self.app(scope, receive, send)
+    def start_idle_checker(self):
+        """Start the background task that checks for idle timeout."""
+        self._checker_task = asyncio.create_task(self._check_idle())
+    async def _check_idle(self):
+        """Background task that exits the process after idle timeout."""
+        # If idle_timeout is 0, don't auto-exit
+        if self.idle_timeout <= 0:
+            return
+        while True:
+            await asyncio.sleep(30)  # Check every 30 seconds
+            idle_time = time.time() - self.last_activity
+            if idle_time > self.idle_timeout:
+                logger.info(f"Idle timeout reached ({self.idle_timeout}s), shutting down daemon")
+                # Give a moment for any in-flight requests
+                await asyncio.sleep(1)
+                # Send SIGTERM to ourselves to trigger graceful shutdown
+                import signal
+                os.kill(os.getpid(), signal.SIGTERM)
+def daemonize():
+    """
+    Fork the current process into a background daemon.
+    Uses double-fork technique to properly detach from terminal.
+    """
+    # First fork - detach from parent
+    try:
+        pid = os.fork()
+        if pid > 0:
+            sys.exit(0)
+    except OSError as e:
+        sys.stderr.write(f"fork #1 failed: {e}\n")
+        sys.exit(1)
+    # Decouple from parent environment
+    os.chdir("/")
+    os.setsid()
+    os.umask(0)
+    # Second fork - prevent zombie
+    pid = os.fork()
+    if pid > 0:
+        sys.exit(0)
+    # Redirect standard file descriptors to log file
+    DAEMON_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
+    sys.stdout.flush()
+    sys.stderr.flush()
+    # Redirect stdin to /dev/null
+    with open("/dev/null", "r") as devnull:
+        os.dup2(devnull.fileno(), sys.stdin.fileno())
+    # Redirect stdout/stderr to log file
+    log_fd = open(DAEMON_LOG_PATH, "a")
+    os.dup2(log_fd.fileno(), sys.stdout.fileno())
+    os.dup2(log_fd.fileno(), sys.stderr.fileno())
+def check_daemon_running(port: int = DEFAULT_DAEMON_PORT) -> bool:
+    """Check if a daemon is running and responsive on the given port."""
+    import socket
+    try:
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.settimeout(1)
+        result = sock.connect_ex(("127.0.0.1", port))
+        sock.close()
+        return result == 0
+    except Exception:
+        return False

hindsight_api-0.4.8/hindsight_api/engine/llm_interface.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""
+Abstract interface for LLM providers.
+This module defines the interface that all LLM providers must implement,
+enabling support for multiple LLM backends (OpenAI, Anthropic, Gemini, Codex, etc.)
+"""
+from abc import ABC, abstractmethod
+from typing import Any
+from .response_models import LLMToolCallResult, TokenUsage
+class LLMInterface(ABC):
+    """
+    Abstract interface for LLM providers.
+    All LLM provider implementations must inherit from this class and implement
+    the required methods.
+    """
+    def __init__(
+        self,
+        provider: str,
+        api_key: str,
+        base_url: str,
+        model: str,
+        reasoning_effort: str = "low",
+        **kwargs: Any,
+    ):
+        """
+        Initialize LLM provider.
+        Args:
+            provider: Provider name (e.g., "openai", "codex", "anthropic", "gemini").
+            api_key: API key or authentication token.
+            base_url: Base URL for the API.
+            model: Model name.
+            reasoning_effort: Reasoning effort level for supported providers.
+            **kwargs: Additional provider-specific parameters.
+        """
+        self.provider = provider.lower()
+        self.api_key = api_key
+        self.base_url = base_url
+        self.model = model
+        self.reasoning_effort = reasoning_effort
+    @abstractmethod
+    async def verify_connection(self) -> None:
+        """
+        Verify that the LLM provider is configured correctly by making a simple test call.
+        Raises:
+            RuntimeError: If the connection test fails.
+        """
+        pass
+    @abstractmethod
+    async def call(
+        self,
+        messages: list[dict[str, str]],
+        response_format: Any | None = None,
+        max_completion_tokens: int | None = None,
+        temperature: float | None = None,
+        scope: str = "memory",
+        max_retries: int = 10,
+        initial_backoff: float = 1.0,
+        max_backoff: float = 60.0,
+        skip_validation: bool = False,
+        strict_schema: bool = False,
+        return_usage: bool = False,
+    ) -> Any:
+        """
+        Make an LLM API call with retry logic.
+        Args:
+            messages: List of message dicts with 'role' and 'content'.
+            response_format: Optional Pydantic model for structured output.
+            max_completion_tokens: Maximum tokens in response.
+            temperature: Sampling temperature (0.0-2.0).
+            scope: Scope identifier for tracking.
+            max_retries: Maximum retry attempts.
+            initial_backoff: Initial backoff time in seconds.
+            max_backoff: Maximum backoff time in seconds.
+            skip_validation: Return raw JSON without Pydantic validation.
+            strict_schema: Use strict JSON schema enforcement (OpenAI only).
+            return_usage: If True, return tuple (result, TokenUsage) instead of just result.
+        Returns:
+            If return_usage=False: Parsed response if response_format is provided, otherwise text content.
+            If return_usage=True: Tuple of (result, TokenUsage) with token counts.
+        Raises:
+            OutputTooLongError: If output exceeds token limits.
+            Exception: Re-raises API errors after retries exhausted.
+        """
+        pass
+    @abstractmethod
+    async def call_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        max_completion_tokens: int | None = None,
+        temperature: float | None = None,
+        scope: str = "tools",
+        max_retries: int = 5,
+        initial_backoff: float = 1.0,
+        max_backoff: float = 30.0,
+        tool_choice: str | dict[str, Any] = "auto",
+    ) -> LLMToolCallResult:
+        """
+        Make an LLM API call with tool/function calling support.
+        Args:
+            messages: List of message dicts. Can include tool results with role='tool'.
+            tools: List of tool definitions in OpenAI format.
+            max_completion_tokens: Maximum tokens in response.
+            temperature: Sampling temperature (0.0-2.0).
+            scope: Scope identifier for tracking.
+            max_retries: Maximum retry attempts.
+            initial_backoff: Initial backoff time in seconds.
+            max_backoff: Maximum backoff time in seconds.
+            tool_choice: How to choose tools - "auto", "none", "required", or specific function.
+        Returns:
+            LLMToolCallResult with content and/or tool_calls.
+        """
+        pass
+    @abstractmethod
+    async def cleanup(self) -> None:
+        """Clean up resources (close connections, etc.)."""
+        pass
+class OutputTooLongError(Exception):
+    """
+    Bridge exception raised when LLM output exceeds token limits.
+    This wraps provider-specific errors (e.g., OpenAI's LengthFinishReasonError)
+    to allow callers to handle output length issues without depending on
+    provider-specific implementations.
+    """
+    pass

hindsight-api 0.4.7__tar.gz → 0.4.8__tar.gz

hindsight-api 0.4.7tar.gz → 0.4.8tar.gz