PyPI - hindsight-api - Versions diffs - 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl - Mend

hindsight-api 0.4.6py3-none-any.whl → 0.4.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

hindsight_api/__init__.py +1 -1
hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +16 -2
hindsight_api/api/http.py +83 -1
hindsight_api/banner.py +3 -0
hindsight_api/config.py +44 -6
hindsight_api/daemon.py +18 -112
hindsight_api/engine/llm_interface.py +146 -0
hindsight_api/engine/llm_wrapper.py +304 -1327
hindsight_api/engine/memory_engine.py +125 -41
hindsight_api/engine/providers/__init__.py +14 -0
hindsight_api/engine/providers/anthropic_llm.py +434 -0
hindsight_api/engine/providers/claude_code_llm.py +352 -0
hindsight_api/engine/providers/codex_llm.py +527 -0
hindsight_api/engine/providers/gemini_llm.py +502 -0
hindsight_api/engine/providers/mock_llm.py +234 -0
hindsight_api/engine/providers/openai_compatible_llm.py +745 -0
hindsight_api/engine/retain/fact_extraction.py +13 -9
hindsight_api/engine/retain/fact_storage.py +5 -3
hindsight_api/extensions/__init__.py +10 -0
hindsight_api/extensions/builtin/tenant.py +36 -0
hindsight_api/extensions/operation_validator.py +129 -0
hindsight_api/main.py +6 -21
hindsight_api/migrations.py +75 -0
hindsight_api/worker/main.py +41 -11
hindsight_api/worker/poller.py +26 -14
{hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/METADATA +2 -1
{hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/RECORD +29 -21
{hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/WHEEL +0 -0
{hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/entry_points.txt +0 -0

hindsight_api/__init__.py CHANGED Viewed

@@ -46,4 +46,4 @@ __all__ = [
     "RemoteTEICrossEncoder",
     "LLMConfig",
 ]
-__version__ = "0.4.6"
+__version__ = "0.4.8"

hindsight_api/alembic/versions/5a366d414dce_initial_schema.py CHANGED Viewed

@@ -11,6 +11,7 @@ from collections.abc import Sequence
 import sqlalchemy as sa
 from alembic import op
 from pgvector.sqlalchemy import Vector
+from sqlalchemy import text
 from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
@@ -23,8 +24,21 @@ depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
     """Upgrade schema - create all tables from scratch."""
-    # Enable required extensions
-    op.execute("CREATE EXTENSION IF NOT EXISTS vector")
+    # Note: pgvector extension is installed globally BEFORE migrations run
+    # See migrations.py:run_migrations() - this ensures the extension is available
+    # to all schemas, not just the one being migrated
+    # We keep this here as a fallback for backwards compatibility
+    # This may fail if user lacks permissions, which is fine if extension already exists
+    try:
+        op.execute("CREATE EXTENSION IF NOT EXISTS vector")
+    except Exception:
+        # Extension might already exist or user lacks permissions - verify it exists
+        conn = op.get_bind()
+        result = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vector'")).fetchone()
+        if not result:
+            # Extension truly doesn't exist - re-raise the error
+            raise
     # Create banks table
     op.create_table(

hindsight_api/api/http.py CHANGED Viewed

@@ -1398,14 +1398,19 @@ def create_app(
         # Start worker poller if enabled (standalone mode)
         if config.worker_enabled and memory._pool is not None:
+            from ..config import DEFAULT_DATABASE_SCHEMA
             worker_id = config.worker_id or socket.gethostname()
+            # Convert default schema to None for SQL compatibility (no schema prefix)
+            schema = None if config.database_schema == DEFAULT_DATABASE_SCHEMA else config.database_schema
             poller = WorkerPoller(
                 pool=memory._pool,
                 worker_id=worker_id,
                 executor=memory.execute_task,
                 poll_interval_ms=config.worker_poll_interval_ms,
                 max_retries=config.worker_max_retries,
-                tenant_extension=getattr(memory, "_tenant_extension", None),
+                schema=schema,
+                tenant_extension=memory._tenant_extension,
                 max_slots=config.worker_max_slots,
                 consolidation_max_slots=config.worker_consolidation_max_slots,
             )
@@ -2285,6 +2290,23 @@ def _register_routes(app: FastAPI):
     ):
         """Get a mental model by ID."""
         try:
+            # Pre-operation validation hook
+            validator = app.state.memory._operation_validator
+            if validator:
+                from hindsight_api.extensions.operation_validator import MentalModelGetContext
+                ctx = MentalModelGetContext(
+                    bank_id=bank_id,
+                    mental_model_id=mental_model_id,
+                    request_context=request_context,
+                )
+                validation = await validator.validate_mental_model_get(ctx)
+                if not validation.allowed:
+                    raise OperationValidationError(
+                        validation.reason or "Operation not allowed",
+                        status_code=validation.status_code,
+                    )
             mental_model = await app.state.memory.get_mental_model(
                 bank_id=bank_id,
                 mental_model_id=mental_model_id,
@@ -2292,9 +2314,31 @@ def _register_routes(app: FastAPI):
             )
             if mental_model is None:
                 raise HTTPException(status_code=404, detail=f"Mental model '{mental_model_id}' not found")
+            # Post-operation hook
+            if validator:
+                from hindsight_api.extensions.operation_validator import MentalModelGetResult
+                content = mental_model.get("content", "")
+                output_tokens = len(content) // 4 if content else 0
+                result_ctx = MentalModelGetResult(
+                    bank_id=bank_id,
+                    mental_model_id=mental_model_id,
+                    request_context=request_context,
+                    output_tokens=output_tokens,
+                    success=True,
+                )
+                try:
+                    await validator.on_mental_model_get_complete(result_ctx)
+                except Exception as hook_err:
+                    logger.warning(f"Post-mental-model-get hook error (non-fatal): {hook_err}")
             return MentalModelResponse(**mental_model)
         except (AuthenticationError, HTTPException):
             raise
+        except OperationValidationError as e:
+            raise HTTPException(status_code=e.status_code, detail=e.reason)
         except Exception as e:
             import traceback
@@ -2319,6 +2363,23 @@ def _register_routes(app: FastAPI):
     ):
         """Create a mental model (async - returns operation_id)."""
         try:
+            # Pre-operation validation hook
+            validator = app.state.memory._operation_validator
+            if validator:
+                from hindsight_api.extensions.operation_validator import MentalModelRefreshContext
+                ctx = MentalModelRefreshContext(
+                    bank_id=bank_id,
+                    mental_model_id=None,  # Not yet created
+                    request_context=request_context,
+                )
+                validation = await validator.validate_mental_model_refresh(ctx)
+                if not validation.allowed:
+                    raise OperationValidationError(
+                        validation.reason or "Operation not allowed",
+                        status_code=validation.status_code,
+                    )
             # 1. Create the mental model with placeholder content
             mental_model = await app.state.memory.create_mental_model(
                 bank_id=bank_id,
@@ -2341,6 +2402,8 @@ def _register_routes(app: FastAPI):
             raise HTTPException(status_code=400, detail=str(e))
         except (AuthenticationError, HTTPException):
             raise
+        except OperationValidationError as e:
+            raise HTTPException(status_code=e.status_code, detail=e.reason)
         except Exception as e:
             import traceback
@@ -2363,6 +2426,23 @@ def _register_routes(app: FastAPI):
     ):
         """Refresh a mental model by re-running its source query (async)."""
         try:
+            # Pre-operation validation hook
+            validator = app.state.memory._operation_validator
+            if validator:
+                from hindsight_api.extensions.operation_validator import MentalModelRefreshContext
+                ctx = MentalModelRefreshContext(
+                    bank_id=bank_id,
+                    mental_model_id=mental_model_id,
+                    request_context=request_context,
+                )
+                validation = await validator.validate_mental_model_refresh(ctx)
+                if not validation.allowed:
+                    raise OperationValidationError(
+                        validation.reason or "Operation not allowed",
+                        status_code=validation.status_code,
+                    )
             result = await app.state.memory.submit_async_refresh_mental_model(
                 bank_id=bank_id,
                 mental_model_id=mental_model_id,
@@ -2373,6 +2453,8 @@ def _register_routes(app: FastAPI):
             raise HTTPException(status_code=404, detail=str(e))
         except (AuthenticationError, HTTPException):
             raise
+        except OperationValidationError as e:
+            raise HTTPException(status_code=e.status_code, detail=e.reason)
         except Exception as e:
             import traceback

hindsight_api/banner.py CHANGED Viewed

@@ -83,9 +83,12 @@ def print_startup_info(
     embeddings_provider: str,
     reranker_provider: str,
     mcp_enabled: bool = False,
+    version: str | None = None,
 ):
     """Print styled startup information."""
     print(color_start("Starting Hindsight API..."))
+    if version:
+        print(f"  {dim('Version:')} {color(f'v{version}', 0.1)}")
     print(f"  {dim('URL:')} {color(f'http://{host}:{port}', 0.2)}")
     print(f"  {dim('Database:')} {color(database_url, 0.4)}")
     print(f"  {dim('LLM:')} {color(f'{llm_provider} / {llm_model}', 0.6)}")

hindsight_api/config.py CHANGED Viewed

@@ -154,7 +154,21 @@ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
 DEFAULT_DATABASE_URL = "pg0"
 DEFAULT_DATABASE_SCHEMA = "public"
 DEFAULT_LLM_PROVIDER = "openai"
-DEFAULT_LLM_MODEL = "gpt-5-mini"
+# Provider-specific default models
+PROVIDER_DEFAULT_MODELS = {
+    "openai": "o3-mini",
+    "anthropic": "claude-haiku-4-5-20251001",
+    "gemini": "gemini-2.5-flash",
+    "groq": "openai/gpt-oss-120b",
+    "ollama": "gemma3:12b",
+    "lmstudio": "local-model",
+    "vertexai": "gemini-2.0-flash-001",
+    "openai-codex": "gpt-5.2-codex",
+    "claude-code": "claude-sonnet-4-5-20250929",
+    "mock": "mock-model",
+}
+DEFAULT_LLM_MODEL = "o3-mini"  # Fallback if provider not in table
 DEFAULT_LLM_MAX_CONCURRENT = 32
 DEFAULT_LLM_MAX_RETRIES = 10  # Max retry attempts for LLM API calls
 DEFAULT_LLM_INITIAL_BACKOFF = 1.0  # Initial backoff in seconds for retry exponential backoff
@@ -303,6 +317,11 @@ def _validate_extraction_mode(mode: str) -> str:
     return mode_lower
+def _get_default_model_for_provider(provider: str) -> str:
+    """Get the default model for a given provider."""
+    return PROVIDER_DEFAULT_MODELS.get(provider.lower(), DEFAULT_LLM_MODEL)
 @dataclass
 class HindsightConfig:
     """Configuration container for Hindsight API."""
@@ -431,14 +450,18 @@ class HindsightConfig:
     @classmethod
     def from_env(cls) -> "HindsightConfig":
         """Create configuration from environment variables."""
+        # Get provider first to determine default model
+        llm_provider = os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER)
+        llm_model = os.getenv(ENV_LLM_MODEL) or _get_default_model_for_provider(llm_provider)
         return cls(
             # Database
             database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
             database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
             # LLM
-            llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
+            llm_provider=llm_provider,
             llm_api_key=os.getenv(ENV_LLM_API_KEY),
-            llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
+            llm_model=llm_model,
             llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
             llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
             llm_max_retries=int(os.getenv(ENV_LLM_MAX_RETRIES, str(DEFAULT_LLM_MAX_RETRIES))),
@@ -453,7 +476,12 @@ class HindsightConfig:
             # Per-operation LLM config (None = use default)
             retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
             retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
-            retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
+            retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL)
+            or (
+                _get_default_model_for_provider(os.getenv(ENV_RETAIN_LLM_PROVIDER))
+                if os.getenv(ENV_RETAIN_LLM_PROVIDER)
+                else None
+            ),
             retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
             retain_llm_max_concurrent=int(os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT))
             if os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT)
@@ -470,7 +498,12 @@ class HindsightConfig:
             retain_llm_timeout=float(os.getenv(ENV_RETAIN_LLM_TIMEOUT)) if os.getenv(ENV_RETAIN_LLM_TIMEOUT) else None,
             reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
             reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
-            reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
+            reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL)
+            or (
+                _get_default_model_for_provider(os.getenv(ENV_REFLECT_LLM_PROVIDER))
+                if os.getenv(ENV_REFLECT_LLM_PROVIDER)
+                else None
+            ),
             reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
             reflect_llm_max_concurrent=int(os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT))
             if os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT)
@@ -489,7 +522,12 @@ class HindsightConfig:
             else None,
             consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
             consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
-            consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
+            consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL)
+            or (
+                _get_default_model_for_provider(os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER))
+                if os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER)
+                else None
+            ),
             consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
             consolidation_llm_max_concurrent=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT))
             if os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT)

hindsight_api/daemon.py CHANGED Viewed

@@ -1,11 +1,10 @@
 """
 Daemon mode support for Hindsight API.
-Provides idle timeout and lockfile management for running as a background daemon.
+Provides idle timeout for running as a background daemon.
 """
 import asyncio
-import fcntl
 import logging
 import os
 import sys
@@ -15,10 +14,11 @@ from pathlib import Path
 logger = logging.getLogger(__name__)
 # Default daemon configuration
-DEFAULT_DAEMON_PORT = 8889
+DEFAULT_DAEMON_PORT = 8888
 DEFAULT_IDLE_TIMEOUT = 0  # 0 = no auto-exit (hindsight-embed passes its own timeout)
-LOCKFILE_PATH = Path.home() / ".hindsight" / "daemon.lock"
-DAEMON_LOG_PATH = Path.home() / ".hindsight" / "daemon.log"
+# Allow override via environment variable for profile-specific logs
+DAEMON_LOG_PATH = Path(os.getenv("HINDSIGHT_API_DAEMON_LOG", str(Path.home() / ".hindsight" / "daemon.log")))
 class IdleTimeoutMiddleware:
@@ -58,97 +58,27 @@ class IdleTimeoutMiddleware:
                 os.kill(os.getpid(), signal.SIGTERM)
-class DaemonLock:
-    """
-    File-based lock to prevent multiple daemon instances.
-    Uses fcntl.flock for atomic locking on Unix systems.
-    """
-    def __init__(self, lockfile: Path = LOCKFILE_PATH):
-        self.lockfile = lockfile
-        self._fd = None
-    def acquire(self) -> bool:
-        """
-        Try to acquire the daemon lock.
-        Returns True if lock acquired, False if another daemon is running.
-        """
-        self.lockfile.parent.mkdir(parents=True, exist_ok=True)
-        try:
-            self._fd = open(self.lockfile, "w")
-            fcntl.flock(self._fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
-            # Write PID for debugging
-            self._fd.write(str(os.getpid()))
-            self._fd.flush()
-            return True
-        except (IOError, OSError):
-            # Lock is held by another process
-            if self._fd:
-                self._fd.close()
-                self._fd = None
-            return False
-    def release(self):
-        """Release the daemon lock."""
-        if self._fd:
-            try:
-                fcntl.flock(self._fd.fileno(), fcntl.LOCK_UN)
-                self._fd.close()
-            except Exception:
-                pass
-            finally:
-                self._fd = None
-                # Remove lockfile
-                try:
-                    self.lockfile.unlink()
-                except Exception:
-                    pass
-    def is_locked(self) -> bool:
-        """Check if the lock is held by another process."""
-        if not self.lockfile.exists():
-            return False
-        try:
-            fd = open(self.lockfile, "r")
-            fcntl.flock(fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
-            # We got the lock, so no one else has it
-            fcntl.flock(fd.fileno(), fcntl.LOCK_UN)
-            fd.close()
-            return False
-        except (IOError, OSError):
-            return True
-    def get_pid(self) -> int | None:
-        """Get the PID of the daemon holding the lock."""
-        if not self.lockfile.exists():
-            return None
-        try:
-            with open(self.lockfile, "r") as f:
-                return int(f.read().strip())
-        except (ValueError, IOError):
-            return None
 def daemonize():
     """
     Fork the current process into a background daemon.
     Uses double-fork technique to properly detach from terminal.
     """
-    # First fork
-    pid = os.fork()
-    if pid > 0:
-        # Parent exits
-        sys.exit(0)
-    # Create new session
+    # First fork - detach from parent
+    try:
+        pid = os.fork()
+        if pid > 0:
+            sys.exit(0)
+    except OSError as e:
+        sys.stderr.write(f"fork #1 failed: {e}\n")
+        sys.exit(1)
+    # Decouple from parent environment
+    os.chdir("/")
     os.setsid()
+    os.umask(0)
-    # Second fork to prevent zombie processes
+    # Second fork - prevent zombie
     pid = os.fork()
     if pid > 0:
         sys.exit(0)
@@ -181,27 +111,3 @@ def check_daemon_running(port: int = DEFAULT_DAEMON_PORT) -> bool:
         return result == 0
     except Exception:
         return False
-def stop_daemon(port: int = DEFAULT_DAEMON_PORT) -> bool:
-    """Stop a running daemon by sending SIGTERM to the process."""
-    lock = DaemonLock()
-    pid = lock.get_pid()
-    if pid is None:
-        return False
-    try:
-        import signal
-        os.kill(pid, signal.SIGTERM)
-        # Wait for process to exit
-        for _ in range(50):  # Wait up to 5 seconds
-            time.sleep(0.1)
-            try:
-                os.kill(pid, 0)  # Check if process exists
-            except OSError:
-                return True  # Process exited
-        return False
-    except OSError:
-        return False

hindsight_api/engine/llm_interface.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""
+Abstract interface for LLM providers.
+This module defines the interface that all LLM providers must implement,
+enabling support for multiple LLM backends (OpenAI, Anthropic, Gemini, Codex, etc.)
+"""
+from abc import ABC, abstractmethod
+from typing import Any
+from .response_models import LLMToolCallResult, TokenUsage
+class LLMInterface(ABC):
+    """
+    Abstract interface for LLM providers.
+    All LLM provider implementations must inherit from this class and implement
+    the required methods.
+    """
+    def __init__(
+        self,
+        provider: str,
+        api_key: str,
+        base_url: str,
+        model: str,
+        reasoning_effort: str = "low",
+        **kwargs: Any,
+    ):
+        """
+        Initialize LLM provider.
+        Args:
+            provider: Provider name (e.g., "openai", "codex", "anthropic", "gemini").
+            api_key: API key or authentication token.
+            base_url: Base URL for the API.
+            model: Model name.
+            reasoning_effort: Reasoning effort level for supported providers.
+            **kwargs: Additional provider-specific parameters.
+        """
+        self.provider = provider.lower()
+        self.api_key = api_key
+        self.base_url = base_url
+        self.model = model
+        self.reasoning_effort = reasoning_effort
+    @abstractmethod
+    async def verify_connection(self) -> None:
+        """
+        Verify that the LLM provider is configured correctly by making a simple test call.
+        Raises:
+            RuntimeError: If the connection test fails.
+        """
+        pass
+    @abstractmethod
+    async def call(
+        self,
+        messages: list[dict[str, str]],
+        response_format: Any | None = None,
+        max_completion_tokens: int | None = None,
+        temperature: float | None = None,
+        scope: str = "memory",
+        max_retries: int = 10,
+        initial_backoff: float = 1.0,
+        max_backoff: float = 60.0,
+        skip_validation: bool = False,
+        strict_schema: bool = False,
+        return_usage: bool = False,
+    ) -> Any:
+        """
+        Make an LLM API call with retry logic.
+        Args:
+            messages: List of message dicts with 'role' and 'content'.
+            response_format: Optional Pydantic model for structured output.
+            max_completion_tokens: Maximum tokens in response.
+            temperature: Sampling temperature (0.0-2.0).
+            scope: Scope identifier for tracking.
+            max_retries: Maximum retry attempts.
+            initial_backoff: Initial backoff time in seconds.
+            max_backoff: Maximum backoff time in seconds.
+            skip_validation: Return raw JSON without Pydantic validation.
+            strict_schema: Use strict JSON schema enforcement (OpenAI only).
+            return_usage: If True, return tuple (result, TokenUsage) instead of just result.
+        Returns:
+            If return_usage=False: Parsed response if response_format is provided, otherwise text content.
+            If return_usage=True: Tuple of (result, TokenUsage) with token counts.
+        Raises:
+            OutputTooLongError: If output exceeds token limits.
+            Exception: Re-raises API errors after retries exhausted.
+        """
+        pass
+    @abstractmethod
+    async def call_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        max_completion_tokens: int | None = None,
+        temperature: float | None = None,
+        scope: str = "tools",
+        max_retries: int = 5,
+        initial_backoff: float = 1.0,
+        max_backoff: float = 30.0,
+        tool_choice: str | dict[str, Any] = "auto",
+    ) -> LLMToolCallResult:
+        """
+        Make an LLM API call with tool/function calling support.
+        Args:
+            messages: List of message dicts. Can include tool results with role='tool'.
+            tools: List of tool definitions in OpenAI format.
+            max_completion_tokens: Maximum tokens in response.
+            temperature: Sampling temperature (0.0-2.0).
+            scope: Scope identifier for tracking.
+            max_retries: Maximum retry attempts.
+            initial_backoff: Initial backoff time in seconds.
+            max_backoff: Maximum backoff time in seconds.
+            tool_choice: How to choose tools - "auto", "none", "required", or specific function.
+        Returns:
+            LLMToolCallResult with content and/or tool_calls.
+        """
+        pass
+    @abstractmethod
+    async def cleanup(self) -> None:
+        """Clean up resources (close connections, etc.)."""
+        pass
+class OutputTooLongError(Exception):
+    """
+    Bridge exception raised when LLM output exceeds token limits.
+    This wraps provider-specific errors (e.g., OpenAI's LengthFinishReasonError)
+    to allow callers to handle output length issues without depending on
+    provider-specific implementations.
+    """
+    pass

hindsight-api 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

hindsight-api 0.4.6py3-none-any.whl → 0.4.8py3-none-any.whl