PyPI - hindsight-api - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

hindsight-api 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

hindsight_api/admin/__init__.py +1 -0
hindsight_api/admin/cli.py +252 -0
hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
hindsight_api/api/http.py +282 -20
hindsight_api/api/mcp.py +47 -52
hindsight_api/config.py +238 -6
hindsight_api/engine/cross_encoder.py +599 -86
hindsight_api/engine/db_budget.py +284 -0
hindsight_api/engine/db_utils.py +11 -0
hindsight_api/engine/embeddings.py +453 -26
hindsight_api/engine/entity_resolver.py +8 -5
hindsight_api/engine/interface.py +8 -4
hindsight_api/engine/llm_wrapper.py +241 -27
hindsight_api/engine/memory_engine.py +609 -122
hindsight_api/engine/query_analyzer.py +4 -3
hindsight_api/engine/response_models.py +38 -0
hindsight_api/engine/retain/fact_extraction.py +388 -192
hindsight_api/engine/retain/fact_storage.py +34 -8
hindsight_api/engine/retain/link_utils.py +24 -16
hindsight_api/engine/retain/orchestrator.py +52 -17
hindsight_api/engine/retain/types.py +9 -0
hindsight_api/engine/search/graph_retrieval.py +42 -13
hindsight_api/engine/search/link_expansion_retrieval.py +256 -0
hindsight_api/engine/search/mpfp_retrieval.py +362 -117
hindsight_api/engine/search/reranking.py +2 -2
hindsight_api/engine/search/retrieval.py +847 -200
hindsight_api/engine/search/tags.py +172 -0
hindsight_api/engine/search/think_utils.py +1 -1
hindsight_api/engine/search/trace.py +12 -0
hindsight_api/engine/search/tracer.py +24 -1
hindsight_api/engine/search/types.py +21 -0
hindsight_api/engine/task_backend.py +109 -18
hindsight_api/engine/utils.py +1 -1
hindsight_api/extensions/context.py +10 -1
hindsight_api/main.py +56 -4
hindsight_api/metrics.py +433 -48
hindsight_api/migrations.py +141 -1
hindsight_api/models.py +3 -1
hindsight_api/pg0.py +53 -0
hindsight_api/server.py +39 -2
{hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/METADATA +5 -1
hindsight_api-0.3.0.dist-info/RECORD +82 -0
{hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/entry_points.txt +1 -0
hindsight_api-0.2.0.dist-info/RECORD +0 -75
{hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/WHEEL +0 -0

hindsight_api/engine/embeddings.py CHANGED Viewed

@@ -3,8 +3,8 @@ Embeddings abstraction for the memory system.
 Provides an interface for generating embeddings with different backends.
-IMPORTANT: All embeddings must produce 384-dimensional vectors to match
-the database schema (pgvector column defined as vector(384)).
+The embedding dimension is auto-detected from the model at initialization.
+The database schema is automatically adjusted to match the model's dimension.
 Configuration via environment variables - see hindsight_api.config for all env var names.
 """
@@ -16,12 +16,25 @@ from abc import ABC, abstractmethod
 import httpx
 from ..config import (
+    DEFAULT_EMBEDDINGS_COHERE_MODEL,
+    DEFAULT_EMBEDDINGS_LITELLM_MODEL,
     DEFAULT_EMBEDDINGS_LOCAL_MODEL,
+    DEFAULT_EMBEDDINGS_OPENAI_MODEL,
     DEFAULT_EMBEDDINGS_PROVIDER,
-    EMBEDDING_DIMENSION,
+    DEFAULT_LITELLM_API_BASE,
+    ENV_COHERE_API_KEY,
+    ENV_EMBEDDINGS_COHERE_BASE_URL,
+    ENV_EMBEDDINGS_COHERE_MODEL,
+    ENV_EMBEDDINGS_LITELLM_MODEL,
     ENV_EMBEDDINGS_LOCAL_MODEL,
+    ENV_EMBEDDINGS_OPENAI_API_KEY,
+    ENV_EMBEDDINGS_OPENAI_BASE_URL,
+    ENV_EMBEDDINGS_OPENAI_MODEL,
     ENV_EMBEDDINGS_PROVIDER,
     ENV_EMBEDDINGS_TEI_URL,
+    ENV_LITELLM_API_BASE,
+    ENV_LITELLM_API_KEY,
+    ENV_LLM_API_KEY,
 )
 logger = logging.getLogger(__name__)
@@ -31,8 +44,8 @@ class Embeddings(ABC):
     """
     Abstract base class for embedding generation.
-    All implementations MUST generate 384-dimensional embeddings to match
-    the database schema.
+    The embedding dimension is determined by the model and detected at initialization.
+    The database schema is automatically adjusted to match the model's dimension.
     """
     @property
@@ -41,6 +54,12 @@ class Embeddings(ABC):
         """Return a human-readable name for this provider (e.g., 'local', 'tei')."""
         pass
+    @property
+    @abstractmethod
+    def dimension(self) -> int:
+        """Return the embedding dimension produced by this model."""
+        pass
     @abstractmethod
     async def initialize(self) -> None:
         """
@@ -54,13 +73,13 @@ class Embeddings(ABC):
     @abstractmethod
     def encode(self, texts: list[str]) -> list[list[float]]:
         """
-        Generate 384-dimensional embeddings for a list of texts.
+        Generate embeddings for a list of texts.
         Args:
             texts: List of text strings to encode
         Returns:
-            List of 384-dimensional embedding vectors (each is a list of floats)
+            List of embedding vectors (each is a list of floats)
         """
         pass
@@ -70,9 +89,7 @@ class LocalSTEmbeddings(Embeddings):
     Local embeddings implementation using SentenceTransformers.
     Call initialize() during startup to load the model and avoid cold starts.
-    Default model is BAAI/bge-small-en-v1.5 which produces 384-dimensional
-    embeddings matching the database schema.
+    The embedding dimension is auto-detected from the model.
     """
     def __init__(self, model_name: str | None = None):
@@ -81,16 +98,22 @@ class LocalSTEmbeddings(Embeddings):
         Args:
             model_name: Name of the SentenceTransformer model to use.
-                       Must produce 384-dimensional embeddings.
                        Default: BAAI/bge-small-en-v1.5
         """
         self.model_name = model_name or DEFAULT_EMBEDDINGS_LOCAL_MODEL
         self._model = None
+        self._dimension: int | None = None
     @property
     def provider_name(self) -> str:
         return "local"
+    @property
+    def dimension(self) -> int:
+        if self._dimension is None:
+            raise RuntimeError("Embeddings not initialized. Call initialize() first.")
+        return self._dimension
     async def initialize(self) -> None:
         """Load the embedding model."""
         if self._model is not None:
@@ -112,26 +135,18 @@ class LocalSTEmbeddings(Embeddings):
             model_kwargs={"low_cpu_mem_usage": False, "device_map": None},
         )
-        # Validate dimension matches database schema
-        model_dim = self._model.get_sentence_embedding_dimension()
-        if model_dim != EMBEDDING_DIMENSION:
-            raise ValueError(
-                f"Model {self.model_name} produces {model_dim}-dimensional embeddings, "
-                f"but database schema requires {EMBEDDING_DIMENSION} dimensions. "
-                f"Use a model that produces {EMBEDDING_DIMENSION}-dimensional embeddings."
-            )
-        logger.info(f"Embeddings: local provider initialized (dim: {model_dim})")
+        self._dimension = self._model.get_sentence_embedding_dimension()
+        logger.info(f"Embeddings: local provider initialized (dim: {self._dimension})")
     def encode(self, texts: list[str]) -> list[list[float]]:
         """
-        Generate 384-dimensional embeddings for a list of texts.
+        Generate embeddings for a list of texts.
         Args:
             texts: List of text strings to encode
         Returns:
-            List of 384-dimensional embedding vectors
+            List of embedding vectors
         """
         if self._model is None:
             raise RuntimeError("Embeddings not initialized. Call initialize() first.")
@@ -146,7 +161,7 @@ class RemoteTEIEmbeddings(Embeddings):
     TEI provides a high-performance inference server for embedding models.
     See: https://github.com/huggingface/text-embeddings-inference
-    The server should be running a model that produces 384-dimensional embeddings.
+    The embedding dimension is auto-detected from the server at initialization.
     """
     def __init__(
@@ -174,11 +189,18 @@ class RemoteTEIEmbeddings(Embeddings):
         self.retry_delay = retry_delay
         self._client: httpx.Client | None = None
         self._model_id: str | None = None
+        self._dimension: int | None = None
     @property
     def provider_name(self) -> str:
         return "tei"
+    @property
+    def dimension(self) -> int:
+        if self._dimension is None:
+            raise RuntimeError("Embeddings not initialized. Call initialize() first.")
+        return self._dimension
     def _request_with_retry(self, method: str, url: str, **kwargs) -> httpx.Response:
         """Make an HTTP request with automatic retries on transient errors."""
         import time
@@ -229,7 +251,24 @@ class RemoteTEIEmbeddings(Embeddings):
             response = self._request_with_retry("GET", f"{self.base_url}/info")
             info = response.json()
             self._model_id = info.get("model_id", "unknown")
-            logger.info(f"Embeddings: TEI provider initialized (model: {self._model_id})")
+            # Get dimension from server info or by doing a test embedding
+            if "max_input_length" in info and "model_dtype" in info:
+                # Try to get dimension from info endpoint (some TEI versions expose it)
+                # If not available, do a test embedding
+                pass
+            # Do a test embedding to detect dimension
+            test_response = self._request_with_retry(
+                "POST",
+                f"{self.base_url}/embed",
+                json={"inputs": ["test"]},
+            )
+            test_embeddings = test_response.json()
+            if test_embeddings and len(test_embeddings) > 0:
+                self._dimension = len(test_embeddings[0])
+            logger.info(f"Embeddings: TEI provider initialized (model: {self._model_id}, dim: {self._dimension})")
         except httpx.HTTPError as e:
             raise RuntimeError(f"Failed to connect to TEI server at {self.base_url}: {e}")
@@ -269,6 +308,369 @@ class RemoteTEIEmbeddings(Embeddings):
         return all_embeddings
+class OpenAIEmbeddings(Embeddings):
+    """
+    OpenAI embeddings implementation using the OpenAI API.
+    Supports text-embedding-3-small (1536 dims), text-embedding-3-large (3072 dims),
+    and text-embedding-ada-002 (1536 dims, legacy).
+    The embedding dimension is auto-detected from the model at initialization.
+    """
+    # Known dimensions for OpenAI embedding models
+    MODEL_DIMENSIONS = {
+        "text-embedding-3-small": 1536,
+        "text-embedding-3-large": 3072,
+        "text-embedding-ada-002": 1536,
+    }
+    def __init__(
+        self,
+        api_key: str,
+        model: str = DEFAULT_EMBEDDINGS_OPENAI_MODEL,
+        base_url: str | None = None,
+        batch_size: int = 100,
+        max_retries: int = 3,
+    ):
+        """
+        Initialize OpenAI embeddings client.
+        Args:
+            api_key: OpenAI API key
+            model: OpenAI embedding model name (default: text-embedding-3-small)
+            base_url: Custom base URL for OpenAI-compatible API (e.g., Azure OpenAI endpoint)
+            batch_size: Maximum batch size for embedding requests (default: 100)
+            max_retries: Maximum number of retries for failed requests (default: 3)
+        """
+        self.api_key = api_key
+        self.model = model
+        self.base_url = base_url
+        self.batch_size = batch_size
+        self.max_retries = max_retries
+        self._client = None
+        self._dimension: int | None = None
+    @property
+    def provider_name(self) -> str:
+        return "openai"
+    @property
+    def dimension(self) -> int:
+        if self._dimension is None:
+            raise RuntimeError("Embeddings not initialized. Call initialize() first.")
+        return self._dimension
+    async def initialize(self) -> None:
+        """Initialize the OpenAI client and detect dimension."""
+        if self._client is not None:
+            return
+        try:
+            from openai import OpenAI
+        except ImportError:
+            raise ImportError("openai is required for OpenAIEmbeddings. Install it with: pip install openai")
+        base_url_msg = f" at {self.base_url}" if self.base_url else ""
+        logger.info(f"Embeddings: initializing OpenAI provider with model {self.model}{base_url_msg}")
+        # Build client kwargs, only including base_url if set (for Azure or custom endpoints)
+        client_kwargs = {"api_key": self.api_key, "max_retries": self.max_retries}
+        if self.base_url:
+            client_kwargs["base_url"] = self.base_url
+        self._client = OpenAI(**client_kwargs)
+        # Try to get dimension from known models, otherwise do a test embedding
+        if self.model in self.MODEL_DIMENSIONS:
+            self._dimension = self.MODEL_DIMENSIONS[self.model]
+        else:
+            # Do a test embedding to detect dimension
+            response = self._client.embeddings.create(
+                model=self.model,
+                input=["test"],
+            )
+            if response.data:
+                self._dimension = len(response.data[0].embedding)
+        logger.info(f"Embeddings: OpenAI provider initialized (model: {self.model}, dim: {self._dimension})")
+    def encode(self, texts: list[str]) -> list[list[float]]:
+        """
+        Generate embeddings using the OpenAI API.
+        Args:
+            texts: List of text strings to encode
+        Returns:
+            List of embedding vectors
+        """
+        if self._client is None:
+            raise RuntimeError("Embeddings not initialized. Call initialize() first.")
+        if not texts:
+            return []
+        all_embeddings = []
+        # Process in batches
+        for i in range(0, len(texts), self.batch_size):
+            batch = texts[i : i + self.batch_size]
+            response = self._client.embeddings.create(
+                model=self.model,
+                input=batch,
+            )
+            # Sort by index to ensure correct order
+            batch_embeddings = sorted(response.data, key=lambda x: x.index)
+            all_embeddings.extend([e.embedding for e in batch_embeddings])
+        return all_embeddings
+class CohereEmbeddings(Embeddings):
+    """
+    Cohere embeddings implementation using the Cohere API.
+    Supports embed-english-v3.0 (1024 dims) and embed-multilingual-v3.0 (1024 dims).
+    The embedding dimension is auto-detected from the model at initialization.
+    """
+    # Known dimensions for Cohere embedding models
+    MODEL_DIMENSIONS = {
+        "embed-english-v3.0": 1024,
+        "embed-multilingual-v3.0": 1024,
+        "embed-english-light-v3.0": 384,
+        "embed-multilingual-light-v3.0": 384,
+        "embed-english-v2.0": 4096,
+        "embed-multilingual-v2.0": 768,
+    }
+    def __init__(
+        self,
+        api_key: str,
+        model: str = DEFAULT_EMBEDDINGS_COHERE_MODEL,
+        base_url: str | None = None,
+        batch_size: int = 96,
+        timeout: float = 60.0,
+        input_type: str = "search_document",
+    ):
+        """
+        Initialize Cohere embeddings client.
+        Args:
+            api_key: Cohere API key
+            model: Cohere embedding model name (default: embed-english-v3.0)
+            base_url: Custom base URL for Cohere-compatible API (e.g., Azure-hosted endpoint)
+            batch_size: Maximum batch size for embedding requests (default: 96, Cohere's limit)
+            timeout: Request timeout in seconds (default: 60.0)
+            input_type: Input type for embeddings (default: search_document).
+                       Options: search_document, search_query, classification, clustering
+        """
+        self.api_key = api_key
+        self.model = model
+        self.base_url = base_url
+        self.batch_size = batch_size
+        self.timeout = timeout
+        self.input_type = input_type
+        self._client = None
+        self._dimension: int | None = None
+    @property
+    def provider_name(self) -> str:
+        return "cohere"
+    @property
+    def dimension(self) -> int:
+        if self._dimension is None:
+            raise RuntimeError("Embeddings not initialized. Call initialize() first.")
+        return self._dimension
+    async def initialize(self) -> None:
+        """Initialize the Cohere client and detect dimension."""
+        if self._client is not None:
+            return
+        try:
+            import cohere
+        except ImportError:
+            raise ImportError("cohere is required for CohereEmbeddings. Install it with: pip install cohere")
+        base_url_msg = f" at {self.base_url}" if self.base_url else ""
+        logger.info(f"Embeddings: initializing Cohere provider with model {self.model}{base_url_msg}")
+        # Build client kwargs, only including base_url if set (for Azure or custom endpoints)
+        client_kwargs = {"api_key": self.api_key, "timeout": self.timeout}
+        if self.base_url:
+            client_kwargs["base_url"] = self.base_url
+        self._client = cohere.Client(**client_kwargs)
+        # Try to get dimension from known models, otherwise do a test embedding
+        if self.model in self.MODEL_DIMENSIONS:
+            self._dimension = self.MODEL_DIMENSIONS[self.model]
+        else:
+            # Do a test embedding to detect dimension
+            response = self._client.embed(
+                texts=["test"],
+                model=self.model,
+                input_type=self.input_type,
+            )
+            if response.embeddings:
+                self._dimension = len(response.embeddings[0])
+        logger.info(f"Embeddings: Cohere provider initialized (model: {self.model}, dim: {self._dimension})")
+    def encode(self, texts: list[str]) -> list[list[float]]:
+        """
+        Generate embeddings using the Cohere API.
+        Args:
+            texts: List of text strings to encode
+        Returns:
+            List of embedding vectors
+        """
+        if self._client is None:
+            raise RuntimeError("Embeddings not initialized. Call initialize() first.")
+        if not texts:
+            return []
+        all_embeddings = []
+        # Process in batches
+        for i in range(0, len(texts), self.batch_size):
+            batch = texts[i : i + self.batch_size]
+            response = self._client.embed(
+                texts=batch,
+                model=self.model,
+                input_type=self.input_type,
+            )
+            all_embeddings.extend(response.embeddings)
+        return all_embeddings
+class LiteLLMEmbeddings(Embeddings):
+    """
+    LiteLLM embeddings implementation using LiteLLM proxy's /embeddings endpoint.
+    LiteLLM provides a unified interface for multiple embedding providers.
+    The proxy exposes an OpenAI-compatible /embeddings endpoint.
+    See: https://docs.litellm.ai/docs/embedding/supported_embedding
+    Supported providers via LiteLLM:
+    - OpenAI (text-embedding-3-small, text-embedding-ada-002, etc.)
+    - Cohere (embed-english-v3.0, etc.) - prefix with cohere/
+    - Vertex AI (textembedding-gecko, etc.) - prefix with vertex_ai/
+    - HuggingFace, Mistral, Voyage AI, etc.
+    The embedding dimension is auto-detected from the model at initialization.
+    """
+    def __init__(
+        self,
+        api_base: str = DEFAULT_LITELLM_API_BASE,
+        api_key: str | None = None,
+        model: str = DEFAULT_EMBEDDINGS_LITELLM_MODEL,
+        batch_size: int = 100,
+        timeout: float = 60.0,
+    ):
+        """
+        Initialize LiteLLM embeddings client.
+        Args:
+            api_base: Base URL of the LiteLLM proxy (default: http://localhost:4000)
+            api_key: API key for the LiteLLM proxy (optional, depends on proxy config)
+            model: Embedding model name (default: text-embedding-3-small)
+                   Use provider prefix for non-OpenAI models (e.g., cohere/embed-english-v3.0)
+            batch_size: Maximum batch size for embedding requests (default: 100)
+            timeout: Request timeout in seconds (default: 60.0)
+        """
+        self.api_base = api_base.rstrip("/")
+        self.api_key = api_key
+        self.model = model
+        self.batch_size = batch_size
+        self.timeout = timeout
+        self._client: httpx.Client | None = None
+        self._dimension: int | None = None
+    @property
+    def provider_name(self) -> str:
+        return "litellm"
+    @property
+    def dimension(self) -> int:
+        if self._dimension is None:
+            raise RuntimeError("Embeddings not initialized. Call initialize() first.")
+        return self._dimension
+    async def initialize(self) -> None:
+        """Initialize the HTTP client and detect embedding dimension."""
+        if self._client is not None:
+            return
+        logger.info(f"Embeddings: initializing LiteLLM provider at {self.api_base} with model {self.model}")
+        headers = {"Content-Type": "application/json"}
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        self._client = httpx.Client(timeout=self.timeout, headers=headers)
+        # Do a test embedding to detect dimension
+        try:
+            response = self._client.post(
+                f"{self.api_base}/embeddings",
+                json={"model": self.model, "input": ["test"]},
+            )
+            response.raise_for_status()
+            result = response.json()
+            if result.get("data") and len(result["data"]) > 0:
+                self._dimension = len(result["data"][0]["embedding"])
+            logger.info(f"Embeddings: LiteLLM provider initialized (model: {self.model}, dim: {self._dimension})")
+        except httpx.HTTPError as e:
+            raise RuntimeError(f"Failed to connect to LiteLLM proxy at {self.api_base}: {e}")
+    def encode(self, texts: list[str]) -> list[list[float]]:
+        """
+        Generate embeddings using the LiteLLM proxy.
+        Args:
+            texts: List of text strings to encode
+        Returns:
+            List of embedding vectors
+        """
+        if self._client is None:
+            raise RuntimeError("Embeddings not initialized. Call initialize() first.")
+        if not texts:
+            return []
+        all_embeddings = []
+        # Process in batches
+        for i in range(0, len(texts), self.batch_size):
+            batch = texts[i : i + self.batch_size]
+            response = self._client.post(
+                f"{self.api_base}/embeddings",
+                json={"model": self.model, "input": batch},
+            )
+            response.raise_for_status()
+            result = response.json()
+            # Sort by index to ensure correct order
+            batch_embeddings = sorted(result["data"], key=lambda x: x["index"])
+            all_embeddings.extend([e["embedding"] for e in batch_embeddings])
+        return all_embeddings
 def create_embeddings_from_env() -> Embeddings:
     """
     Create an Embeddings instance based on environment variables.
@@ -289,5 +691,30 @@ def create_embeddings_from_env() -> Embeddings:
         model = os.environ.get(ENV_EMBEDDINGS_LOCAL_MODEL)
         model_name = model or DEFAULT_EMBEDDINGS_LOCAL_MODEL
         return LocalSTEmbeddings(model_name=model_name)
+    elif provider == "openai":
+        # Use dedicated embeddings API key, or fall back to LLM API key
+        api_key = os.environ.get(ENV_EMBEDDINGS_OPENAI_API_KEY) or os.environ.get(ENV_LLM_API_KEY)
+        if not api_key:
+            raise ValueError(
+                f"{ENV_EMBEDDINGS_OPENAI_API_KEY} or {ENV_LLM_API_KEY} is required "
+                f"when {ENV_EMBEDDINGS_PROVIDER} is 'openai'"
+            )
+        model = os.environ.get(ENV_EMBEDDINGS_OPENAI_MODEL, DEFAULT_EMBEDDINGS_OPENAI_MODEL)
+        base_url = os.environ.get(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None
+        return OpenAIEmbeddings(api_key=api_key, model=model, base_url=base_url)
+    elif provider == "cohere":
+        api_key = os.environ.get(ENV_COHERE_API_KEY)
+        if not api_key:
+            raise ValueError(f"{ENV_COHERE_API_KEY} is required when {ENV_EMBEDDINGS_PROVIDER} is 'cohere'")
+        model = os.environ.get(ENV_EMBEDDINGS_COHERE_MODEL, DEFAULT_EMBEDDINGS_COHERE_MODEL)
+        base_url = os.environ.get(ENV_EMBEDDINGS_COHERE_BASE_URL) or None
+        return CohereEmbeddings(api_key=api_key, model=model, base_url=base_url)
+    elif provider == "litellm":
+        api_base = os.environ.get(ENV_LITELLM_API_BASE, DEFAULT_LITELLM_API_BASE)
+        api_key = os.environ.get(ENV_LITELLM_API_KEY)
+        model = os.environ.get(ENV_EMBEDDINGS_LITELLM_MODEL, DEFAULT_EMBEDDINGS_LITELLM_MODEL)
+        return LiteLLMEmbeddings(api_base=api_base, api_key=api_key, model=model)
     else:
-        raise ValueError(f"Unknown embeddings provider: {provider}. Supported: 'local', 'tei'")
+        raise ValueError(
+            f"Unknown embeddings provider: {provider}. Supported: 'local', 'tei', 'openai', 'cohere', 'litellm'"
+        )

hindsight_api/engine/entity_resolver.py CHANGED Viewed

@@ -209,7 +209,7 @@ class EntityResolver:
         # This handles duplicates via ON CONFLICT and returns all IDs
         if entities_to_create:
             # Group entities by canonical name (lowercase) to handle duplicates within batch
-            # For duplicates, we only insert once and reuse the ID
+            # For duplicates, we only insert once and reuse the ID, but track the count
             unique_entities = {}  # lowercase_name -> (entity_data, event_date, [indices])
             for idx, entity_data, event_date in entities_to_create:
                 name_lower = entity_data["text"].lower()
@@ -223,29 +223,32 @@ class EntityResolver:
             # Use a single query with unnest for speed
             entity_names = []
             entity_dates = []
+            entity_counts = []  # Track how many times each entity appears in this batch
             indices_map = []  # Maps result index -> list of original indices
             for name_lower, (entity_data, event_date, indices) in unique_entities.items():
                 entity_names.append(entity_data["text"])
                 entity_dates.append(event_date)
+                entity_counts.append(len(indices))  # Count of occurrences in this batch
                 indices_map.append(indices)
             # Batch INSERT ... ON CONFLICT with RETURNING
-            # This is much faster than individual inserts
+            # Uses the batch count for mention_count instead of always 1
             rows = await conn.fetch(
                 f"""
                 INSERT INTO {fq_table("entities")} (bank_id, canonical_name, first_seen, last_seen, mention_count)
-                SELECT $1, name, event_date, event_date, 1
-                FROM unnest($2::text[], $3::timestamptz[]) AS t(name, event_date)
+                SELECT $1, name, event_date, event_date, cnt
+                FROM unnest($2::text[], $3::timestamptz[], $4::int[]) AS t(name, event_date, cnt)
                 ON CONFLICT (bank_id, LOWER(canonical_name))
                 DO UPDATE SET
-                    mention_count = {fq_table("entities")}.mention_count + 1,
+                    mention_count = {fq_table("entities")}.mention_count + EXCLUDED.mention_count,
                     last_seen = EXCLUDED.last_seen
                 RETURNING id
                 """,
                 bank_id,
                 entity_names,
                 entity_dates,
+                entity_counts,
             )
             # Map returned IDs back to original indices

hindsight_api/engine/interface.py CHANGED Viewed

@@ -289,6 +289,7 @@ class MemoryEngineInterface(ABC):
         bank_id: str,
         *,
         fact_type: str | None = None,
+        limit: int = 1000,
         request_context: "RequestContext",
     ) -> dict[str, Any]:
         """
@@ -297,10 +298,11 @@ class MemoryEngineInterface(ABC):
         Args:
             bank_id: The memory bank ID.
             fact_type: Filter by fact type.
+            limit: Maximum number of items to return (default: 1000).
             request_context: Request context for authentication.
         Returns:
-            Dict with nodes, edges, table_rows, total_units.
+            Dict with nodes, edges, table_rows, total_units, limit.
         """
         ...
@@ -404,18 +406,20 @@ class MemoryEngineInterface(ABC):
         bank_id: str,
         *,
         limit: int = 100,
+        offset: int = 0,
         request_context: "RequestContext",
-    ) -> list[dict[str, Any]]:
+    ) -> dict[str, Any]:
         """
-        List entities for a bank.
+        List entities for a bank with pagination.
         Args:
             bank_id: The memory bank ID.
             limit: Maximum results.
+            offset: Offset for pagination.
             request_context: Request context for authentication.
         Returns:
-            List of entity dicts.
+            Dict with items, total, limit, offset.
         """
         ...

hindsight-api 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

hindsight-api 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl