PyPI - endee - Versions diffs - 0.1.6__tar.gz → 0.1.8__tar.gz - Mend

endee 0.1.6tar.gz → 0.1.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{endee-0.1.6 → endee-0.1.8}/PKG-INFO +19 -3
{endee-0.1.6 → endee-0.1.8}/README.md +18 -0
{endee-0.1.6 → endee-0.1.8}/endee/endee.py +36 -44
{endee-0.1.6 → endee-0.1.8}/endee/index.py +334 -222
{endee-0.1.6 → endee-0.1.8}/endee.egg-info/PKG-INFO +19 -3
{endee-0.1.6 → endee-0.1.8}/setup.py +9 -17
{endee-0.1.6 → endee-0.1.8}/LICENSE +0 -0
{endee-0.1.6 → endee-0.1.8}/endee/__init__.py +0 -0
{endee-0.1.6 → endee-0.1.8}/endee/compression.py +0 -0
{endee-0.1.6 → endee-0.1.8}/endee/constants.py +0 -0
{endee-0.1.6 → endee-0.1.8}/endee/exceptions.py +0 -0
{endee-0.1.6 → endee-0.1.8}/endee/utils.py +0 -0
{endee-0.1.6 → endee-0.1.8}/endee.egg-info/SOURCES.txt +0 -0
{endee-0.1.6 → endee-0.1.8}/endee.egg-info/dependency_links.txt +0 -0
{endee-0.1.6 → endee-0.1.8}/endee.egg-info/requires.txt +0 -0
{endee-0.1.6 → endee-0.1.8}/endee.egg-info/top_level.txt +0 -0
{endee-0.1.6 → endee-0.1.8}/setup.cfg +0 -0

{endee-0.1.6 → endee-0.1.8}/PKG-INFO RENAMED Viewed

@@ -1,13 +1,11 @@
 Metadata-Version: 2.4
 Name: endee
-Version: 0.1.6
+Version: 0.1.8
 Summary: Endee is the Next-Generation Vector Database for Scalable, High-Performance AI
 Home-page: https://endee.io
 Author: Endee Labs
 Author-email: dev@endee.io
 Project-URL: Documentation, https://docs.endee.io
-Project-URL: Source, https://github.com/endee-labs/endee-python
-Project-URL: Bug Reports, https://github.com/endee-labs/endee-python/issues
 Keywords: vector database,embeddings,machine learning,AI,similarity search,HNSW,nearest neighbors
 Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Developers
@@ -604,6 +602,23 @@ index = client.get_index(name="your-index-name")
 index.delete_vector("vec1")
 ```
+### Filtered Deletion
+In cases where you don't know the exact vector `id`, but want to delete vectors based on filter fields, you can use filtered deletion. This is especially useful for:
+- Bulk deleting vectors by tag, type, or timestamp
+- Enforcing access control or data expiration policies
+```python
+from endee import Endee
+client = Endee(token="your-token-here")
+index = client.get_index(name="your-index-name")
+# Delete all vectors matching filter conditions
+index.delete_with_filter([{"tags": {"$eq": "important"}}])
+```
 ### Index Deletion
 Index deletion permanently removes the entire index and all vectors associated with it. This should be used when:
@@ -671,6 +686,7 @@ info = index.describe()
 | `upsert(input_array)` | Insert or update vectors (max 1000 per batch) |
 | `query(vector, top_k, filter, ef, include_vectors, sparse_indices, sparse_values)` | Search for similar vectors (sparse params optional for hybrid) |
 | `delete_vector(id)` | Delete a vector by ID |
+| `delete_with_filter(filter)` | Delete vectors matching a filter |
 | `get_vector(id)` | Get a specific vector by ID |
 | `describe()` | Get index statistics and configuration |

{endee-0.1.6 → endee-0.1.8}/README.md RENAMED Viewed

@@ -558,6 +558,23 @@ index = client.get_index(name="your-index-name")
 index.delete_vector("vec1")
 ```
+### Filtered Deletion
+In cases where you don't know the exact vector `id`, but want to delete vectors based on filter fields, you can use filtered deletion. This is especially useful for:
+- Bulk deleting vectors by tag, type, or timestamp
+- Enforcing access control or data expiration policies
+```python
+from endee import Endee
+client = Endee(token="your-token-here")
+index = client.get_index(name="your-index-name")
+# Delete all vectors matching filter conditions
+index.delete_with_filter([{"tags": {"$eq": "important"}}])
+```
 ### Index Deletion
 Index deletion permanently removes the entire index and all vectors associated with it. This should be used when:
@@ -625,6 +642,7 @@ info = index.describe()
 | `upsert(input_array)` | Insert or update vectors (max 1000 per batch) |
 | `query(vector, top_k, filter, ef, include_vectors, sparse_indices, sparse_values)` | Search for similar vectors (sparse params optional for hybrid) |
 | `delete_vector(id)` | Delete a vector by ID |
+| `delete_with_filter(filter)` | Delete vectors matching a filter |
 | `get_vector(id)` | Get a specific vector by ID |
 | `describe()` | Get index statistics and configuration |

{endee-0.1.6 → endee-0.1.8}/endee/endee.py RENAMED Viewed

@@ -6,6 +6,7 @@ vector database service. It includes session management, index operations.
 """
 import os
+import sys
 from functools import lru_cache
 import httpx
@@ -68,7 +69,7 @@ class SessionManager:
         pool_connections: int = SESSION_POOL_CONNECTIONS,
         pool_maxsize: int = SESSION_POOL_MAXSIZE,
         max_retries: int = SESSION_MAX_RETRIES,
-        pool_block: bool = True
+        pool_block: bool = True,
     ):
         """
         Initialize the SessionManager.
@@ -122,9 +123,9 @@ class SessionManager:
                     total=self.max_retries,
                     backoff_factor=0.5,
                     status_forcelist=HTTP_STATUS_CODES,
-                    allowed_methods=HTTP_METHODS_ALLOWED
+                    allowed_methods=HTTP_METHODS_ALLOWED,
                 ),
-                pool_block=self.pool_block
+                pool_block=self.pool_block,
             )
             session.mount(HTTP_PROTOCOL, adapter)
@@ -174,7 +175,7 @@ class ClientManager:
         max_keepalive_connections: int = HTTPX_MAX_KEEPALIVE_CONNECTIONS,
         max_retries: int = HTTPX_MAX_RETRIES,
         timeout: float = HTTPX_TIMEOUT_SEC,
-        enable_http2: bool = False
+        enable_http2: bool = False,
     ):
         """
         Initialize the ClientManager.
@@ -226,9 +227,7 @@ class ClientManager:
                 max_keepalive_connections=self.max_keepalive_connections,
             )
-            transport = httpx.HTTPTransport(
-                retries=self.max_retries
-            )
+            transport = httpx.HTTPTransport(retries=self.max_retries)
             self._client = httpx.Client(
                 http2=self.http2,
@@ -269,9 +268,7 @@ class Endee:
     """
     def __init__(
-        self,
-        token: str | None = None,
-        http_library: str = HTTP_REQUESTS_LIBRARY
+        self, token: str | None = None, http_library: str = HTTP_REQUESTS_LIBRARY
     ):
         """
         Initialize the Endee client.
@@ -306,16 +303,12 @@ class Endee:
         if self.library == HTTP_REQUESTS_LIBRARY:
             # Centralized session manager - shared across all Index objects
             self.session_manager = SessionManager(
-                pool_connections=10,
-                pool_maxsize=10,
-                max_retries=3
+                pool_connections=10, pool_maxsize=10, max_retries=3
             )
         elif self.library == HTTP_HTTPX_1_1_LIBRARY:
             # httpx.Client based manager for HTTP/1.1
             self.client_manager = ClientManager(
-                max_connections=10,
-                max_keepalive_connections=10,
-                max_retries=3
+                max_connections=10, max_keepalive_connections=10, max_retries=3
             )
         elif self.library == HTTP_HTTPX_2_LIBRARY:
             # httpx.Client based manager for HTTP/2
@@ -323,7 +316,7 @@ class Endee:
                 http2=True,
                 max_connections=10,
                 max_keepalive_connections=10,
-                max_retries=3
+                max_retries=3,
             )
         else:
             raise ValueError(
@@ -394,7 +387,7 @@ class Endee:
         ef_con: int = DEFAULT_EF_CON,
         precision: str | Precision = Precision.INT8D,
         version: int = None,
-        sparse_dim: int = DEFAULT_SPARSE_DIMENSION
+        sparse_dim: int = DEFAULT_SPARSE_DIMENSION,
     ):
         """
         Create a new vector index.
@@ -439,9 +432,14 @@ class Endee:
             )
         # Validate sparse dimension
+        # Lower bound check
         if sparse_dim < 0:
             raise ValueError("sparse_dim cannot be negative")
+        # Upper bound check
+        if sparse_dim > sys.maxsize:
+            raise ValueError(f"sparse_dim cannot exceed {sys.maxsize}")
         # Validate space type
         space_type = space_type.lower()
         if space_type not in SPACE_TYPES_SUPPORTED:
@@ -456,26 +454,23 @@ class Endee:
             )
         # Prepare request headers and data
-        headers = {
-            'Authorization': f'{self.token}',
-            'Content-Type': 'application/json'
-        }
+        headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
         data = {
-            'index_name': name,
-            'dim': dimension,
-            'space_type': space_type,
-            'M': M,
-            'ef_con': ef_con,
-            'checksum': CHECKSUM,
-            'precision': precision,
-            'version': version
+            "index_name": name,
+            "dim": dimension,
+            "space_type": space_type,
+            "M": M,
+            "ef_con": ef_con,
+            "checksum": CHECKSUM,
+            "precision": precision,
+            "version": version,
         }
         # Add sparse dimension if specified
         if sparse_dim > 0:
-            data['sparse_dim'] = sparse_dim
+            data["sparse_dim"] = sparse_dim
-        url = f'{self.base_url}/index/create'
+        url = f"{self.base_url}/index/create"
         # Make API request using appropriate library
         if self.library == HTTP_REQUESTS_LIBRARY:
@@ -502,10 +497,10 @@ class Endee:
             HTTPError: If API request fails
         """
         headers = {
-            'Authorization': f'{self.token}',
+            "Authorization": f"{self.token}",
         }
-        url = f'{self.base_url}/index/list'
+        url = f"{self.base_url}/index/list"
         # Make API request using appropriate library
         if self.library == HTTP_REQUESTS_LIBRARY:
@@ -539,10 +534,10 @@ class Endee:
             TODO - Clear the index from LRU cache when deleted
         """
         headers = {
-            'Authorization': f'{self.token}',
+            "Authorization": f"{self.token}",
         }
-        url = f'{self.base_url}/index/{name}/delete'
+        url = f"{self.base_url}/index/{name}/delete"
         # Make API request using appropriate library
         if self.library == HTTP_REQUESTS_LIBRARY:
@@ -556,7 +551,7 @@ class Endee:
         if response.status_code != 200:
             raise_exception(response.status_code, response.text)
-        return f'Index {name} deleted successfully'
+        return f"Index {name} deleted successfully"
     @lru_cache(maxsize=10)  # noqa: B019
     def get_index(self, name: str):
@@ -575,12 +570,9 @@ class Endee:
         Raises:
             HTTPError: If API request fails
         """
-        headers = {
-            'Authorization': f'{self.token}',
-            'Content-Type': 'application/json'
-        }
+        headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
-        url = f'{self.base_url}/index/{name}/info'
+        url = f"{self.base_url}/index/{name}/info"
         # Get index details from the server
         if self.library == HTTP_REQUESTS_LIBRARY:
@@ -604,7 +596,7 @@ class Endee:
                 url=self.base_url,
                 version=self.version,
                 params=data,
-                session_client_manager=self.session_manager
+                session_client_manager=self.session_manager,
             )
         else:
             idx = Index(
@@ -613,7 +605,7 @@ class Endee:
                 url=self.base_url,
                 version=self.version,
                 params=data,
-                session_client_manager=self.client_manager
+                session_client_manager=self.client_manager,
             )
         return idx

{endee-0.1.6 → endee-0.1.8}/endee/index.py RENAMED Viewed

@@ -62,7 +62,7 @@ class Index:
         url: str,
         version: int = 1,
         params=None,
-        session_client_manager=None
+        session_client_manager=None,
     ):
         """
         Initialize an Index object.
@@ -105,9 +105,9 @@ class Index:
         Raises:
             ValueError: If manager doesn't have required methods
         """
-        if hasattr(self.session_client_manager, 'get_session'):
+        if hasattr(self.session_client_manager, "get_session"):
             return self.session_client_manager.get_session()
-        elif hasattr(self.session_client_manager, 'get_client'):
+        elif hasattr(self.session_client_manager, "get_client"):
             return self.session_client_manager.get_client()
         else:
             raise ValueError(
@@ -134,7 +134,125 @@ class Index:
         """
         return self.name
+    def _validate_and_prepare_vectors(self, input_array):
+        """
+        Validate and prepare vectors from input array.
+        Args:
+            input_array: List of vector dictionaries
+        Returns:
+            tuple: (vectors_array, norms_array, vectors_list)
+        Raises:
+            ValueError: If vector data is invalid
+        """
+        # Extract vectors
+        try:
+            vectors = np.asarray(
+                [item["vector"] for item in input_array], dtype=np.float32
+            )
+        except Exception as e:
+            raise ValueError(f"Invalid vector data: {e}") from e
+        # Validate vector shape
+        if vectors.ndim != 2 or vectors.shape[1] != self.dimension:
+            raise ValueError(
+                f"Expected shape (N, {self.dimension}), got {vectors.shape}"
+            )
+        # Validate finite values
+        if not np.isfinite(vectors).all():
+            raise ValueError("Vectors contain NaN or infinity")
+        # Normalize vectors for cosine similarity
+        N = len(input_array)
+        if self.space_type == "cosine":
+            norms = np.sqrt(np.einsum("ij,ij->i", vectors, vectors))
+            np.maximum(norms, 1e-10, out=norms)  # Prevent division by zero
+            vectors /= norms[:, None]
+        else:
+            norms = np.ones(N, dtype=np.float32)
+        return vectors, norms, vectors.tolist()
+    def _validate_sparse_data(self, sparse_indices, sparse_values):
+        """
+        Validate sparse data for hybrid indexes.
+        Args:
+            sparse_indices: List of sparse vector indices
+            sparse_values: List of sparse vector values
+        Raises:
+            ValueError: If sparse data is invalid
+        """
+        if len(sparse_indices) != len(sparse_values):
+            raise ValueError("sparse_indices and sparse_values must match in length")
+        if sparse_indices:
+            min_idx = min(sparse_indices)
+            max_idx = max(sparse_indices)
+            if min_idx < 0 or max_idx >= self.sparse_dim:
+                raise ValueError(f"Sparse indices out of bounds [0, {self.sparse_dim})")
+    def _build_vector_batch_item(self, item, i, norms, vectors_list, is_hybrid):
+        """
+        Build a single vector batch item.
+        Args:
+            item: Input dictionary for one vector
+            i: Index in the batch
+            norms: Array of vector norms
+            vectors_list: List of vectors
+            is_hybrid: Whether index is hybrid
+        Returns:
+            list: Vector batch item
+        Raises:
+            ValueError: If sparse data is invalid
+        """
+        get_func = dict.get
+        dumps_func = orjson.dumps
+        str_func = str
+        float_func = float
+        sparse_indices = get_func(item, "sparse_indices", None)
+        sparse_values = get_func(item, "sparse_values", None)
+        has_sparse = sparse_indices is not None or sparse_values is not None
+        # XOR logic: hybrid index requires sparse data,
+        # dense-only forbids it
+        if has_sparse != is_hybrid:
+            raise ValueError(
+                "Hybrid index requires sparse data(along with dense vectors), "
+                "and dense-only index forbids it."
+            )
+        # Validate sparse data if present
+        if is_hybrid:
+            self._validate_sparse_data(sparse_indices, sparse_values)
+        # Build vector object: [id, meta, filter, norm, vector, ...]
+        obj = [
+            str_func(get_func(item, "id", "")),
+            json_zip(get_func(item, "meta", {})),
+            dumps_func(get_func(item, "filter", {})).decode("utf-8"),
+            float_func(norms[i]),
+            vectors_list[i],
+        ]
+        # Add sparse components for hybrid indexes
+        if is_hybrid:
+            obj.extend(
+                (
+                    sparse_indices,
+                    [float(v) for v in sparse_values],
+                )
+            )
+        return obj
     def upsert(self, input_array):
         """
@@ -172,119 +290,52 @@ class Index:
             ... ])
         """
         if len(input_array) > MAX_VECTORS_PER_BATCH:
-            raise ValueError("Cannot insert more than 1000 vectors at a time")
-        N = len(input_array)
-        is_hybrid = self.is_hybrid
-        sparse_dim = self.sparse_dim
-        # ---------- Vector extraction ----------
-        try:
-            vectors = np.asarray(
-                [item["vector"] for item in input_array],
-                dtype=np.float32
-            )
-        except Exception as e:
-            raise ValueError(f"Invalid vector data: {e}") from e
-        # Validate vector shape
-        if vectors.ndim != 2 or vectors.shape[1] != self.dimension:
             raise ValueError(
-                f"Expected shape (N, {self.dimension}), got {vectors.shape}"
+                f"Cannot insert more than {MAX_VECTORS_PER_BATCH} vectors at a time"
             )
-        # ---------- Validation (single pass) ----------
-        if not np.isfinite(vectors).all():
-            raise ValueError("Vectors contain NaN or infinity")
-        # Note: Negative zero check disabled as it's expensive and rarely useful
-        # if np.any((vectors == 0.0) & np.signbit(vectors)):
-        #     raise ValueError("Vectors contain negative zero (-0.0)")
-        # ---------- Normalization ----------
-        # Normalize vectors for cosine similarity
-        if self.space_type == "cosine":
-            norms = np.sqrt(np.einsum("ij,ij->i", vectors, vectors))
-            np.maximum(norms, 1e-10, out=norms)  # Prevent division by zero
-            vectors /= norms[:, None]
-        else:
-            norms = np.ones(N, dtype=np.float32)
+        # Validate IDs upfront and check for duplicates
+        seen_ids = set()
+        duplicate_ids = set()
-        # Convert to Python list once to avoid repeated conversions
-        vectors_list = vectors.tolist()
+        for item in input_array:
+            id_val = item.get("id", "")
+            if not id_val or id_val is None:
+                raise ValueError("All vectors must have a non-empty ID")
-        # ---------- Batch construction ----------
-        vector_batch = []
-         # Use local references for speed
-        vector_append = vector_batch.append
-        get_func = dict.get
-        dumps_func = orjson.dumps
-        str_func = str
-        float_func = float
+            if id_val in seen_ids:
+                duplicate_ids.add(id_val)
+            else:
+                seen_ids.add(id_val)
-        for i, item in enumerate(input_array):
-            sparse_indices = get_func(item, "sparse_indices", None)
-            sparse_values = get_func(item, "sparse_values", None)
-            has_sparse = (
-                sparse_indices is not None or sparse_values is not None
+        if duplicate_ids:
+            raise ValueError(
+                f"Duplicate IDs found in input array: {sorted(duplicate_ids)}"
             )
-            # XOR logic: hybrid index requires sparse data,
-            # dense-only forbids it
-            if has_sparse != is_hybrid:
-                raise ValueError(
-                    "Hybrid index requires sparse data(along with dense vectors), "
-                    "and dense-only index forbids it."
-                )
+        is_hybrid = self.is_hybrid
-            # Validate sparse data if present
-            if is_hybrid:
-                if len(sparse_indices) != len(sparse_values):
-                    raise ValueError(
-                        "sparse_indices and sparse_values must match in length"
-                    )
-                if sparse_indices:
-                    min_idx = min(sparse_indices)
-                    max_idx = max(sparse_indices)
-                    if min_idx < 0 or max_idx >= sparse_dim:
-                        raise ValueError(
-                            f"Sparse indices out of bounds [0, {sparse_dim})"
-                        )
-            # Build vector object: [id, meta, filter, norm, vector, ...]
-            obj = [
-                str_func(get_func(item, "id", "")),
-                json_zip(get_func(item, "meta", {})),
-                dumps_func(get_func(item, "filter", {})).decode('utf-8'),
-                float_func(norms[i]),
-                vectors_list[i],
-            ]
-            # Add sparse components for hybrid indexes
-            if is_hybrid:
-                obj.extend((
-                    sparse_indices,
-                    [float(v) for v in sparse_values],
-                ))
+        # Validate and prepare vectors
+        vectors, norms, vectors_list = self._validate_and_prepare_vectors(input_array)
-            vector_append(obj)
+        # Build batch
+        vector_batch = [
+            self._build_vector_batch_item(item, i, norms, vectors_list, is_hybrid)
+            for i, item in enumerate(input_array)
+        ]
         serialized_data = msgpack.packb(
             vector_batch, use_bin_type=True, use_single_float=True
         )
-        headers = {
-            'Authorization': self.token,
-            'Content-Type': 'application/msgpack'
-        }
+        headers = {"Authorization": self.token, "Content-Type": "application/msgpack"}
         http_client = self._get_session_client()
         # Sending the batch to the server
         response = http_client.post(
-            f'{self.url}/index/{self.name}/vector/insert',
+            f"{self.url}/index/{self.name}/vector/insert",
             headers=headers,
-            data=serialized_data
+            data=serialized_data,
         )
         if response.status_code != 200:
@@ -292,7 +343,140 @@ class Index:
         return "Vectors inserted successfully"
+    def _validate_query_params(
+        self, top_k, ef, has_sparse, has_dense, sparse_indices, sparse_values
+    ):
+        """
+        Validate query parameters.
+        Args:
+            top_k: Number of results to return
+            ef: HNSW ef_search parameter
+            has_sparse: Whether sparse query is provided
+            has_dense: Whether dense query is provided
+            sparse_indices: Sparse vector indices
+            sparse_values: Sparse vector values
+        Raises:
+            ValueError: If parameters are invalid
+        """
+        # Validate top_k parameter
+        if top_k > MAX_TOP_K_ALLOWED or top_k <= 0:
+            raise ValueError(
+                f"top_k must be between 1 and {MAX_TOP_K_ALLOWED}, got {top_k}"
+            )
+        # Validate ef parameter
+        if ef > MAX_EF_SEARCH_ALLOWED:
+            raise ValueError(
+                f"ef search cannot be greater than {MAX_EF_SEARCH_ALLOWED}"
+            )
+        # At least one query type must be provided
+        if not has_dense and not has_sparse:
+            raise ValueError(
+                "At least one of 'vector' or 'sparse_indices'/'sparse_values' "
+                "must be provided."
+            )
+        # Cannot use sparse query on dense-only index
+        if has_sparse and not self.is_hybrid:
+            raise ValueError(
+                "Cannot perform sparse search on a dense-only index. "
+                "Create index with sparse_dim > 0 for hybrid support."
+            )
+        # If one sparse parameter is provided, both must be provided
+        if has_sparse:
+            if sparse_indices is None or sparse_values is None:
+                raise ValueError(
+                    "Both sparse_indices and sparse_values must be provided together."
+                )
+            if len(sparse_indices) != len(sparse_values):
+                raise ValueError(
+                    f"sparse_indices and sparse_values must have the same "
+                    f"length. Got {len(sparse_indices)} indices and "
+                    f"{len(sparse_values)} values."
+                )
+    def _prepare_dense_vector(self, vector):
+        """
+        Prepare and validate dense query vector.
+        Args:
+            vector: Input vector
+        Returns:
+            list: Normalized vector as list
+        Raises:
+            ValueError: If vector is invalid
+        """
+        # Convert to numpy array
+        vec = np.asarray(vector, dtype=np.float32)
+        # Validate shape
+        if vec.shape != (self.dimension,):
+            raise ValueError(
+                f"Vector must have shape ({self.dimension},), got {vec.shape}"
+            )
+        # Validate finite values
+        if not np.isfinite(vec).all():
+            raise ValueError("Vector contains NaN or infinity")
+        # Normalize for cosine similarity using einsum
+        if self.space_type == "cosine":
+            norm = np.sqrt(np.einsum("i,i->", vec, vec))
+            norm = max(norm, 1e-10)  # Prevent division by zero
+            vec = vec / norm
+        return vec.tolist()
+    def _process_query_results(self, results, top_k, include_vectors):
+        """
+        Process and format query results.
+        Args:
+            results: Raw msgpack results from server
+            top_k: Number of results requested
+            include_vectors: Whether to include vector data
+        Returns:
+            list: Processed results
+        """
+        processed_results = []
+        results = results[:top_k]
+        for result in results:
+            similarity = result[0]
+            vector_id = result[1]
+            meta_data = result[2]
+            filter_str = result[3]
+            norm_value = result[4]
+            vector_data = result[5] if len(result) > 5 else []
+            processed = {
+                "id": vector_id,
+                "similarity": similarity,
+                "distance": 1.0 - similarity,
+                "meta": json_unzip(meta_data),
+                "norm": norm_value,
+            }
+            # Add filter if present
+            if filter_str:
+                processed["filter"] = orjson.loads(filter_str)
+            # Add vector data if requested
+            if include_vectors and vector_data:
+                processed["vector"] = list(vector_data)
+            else:
+                processed["vector"] = []
+            processed_results.append(processed)
+        return processed_results
     def query(
         self,
@@ -303,7 +487,7 @@ class Index:
         include_vectors=False,
         log=False,
         sparse_indices=None,
-        sparse_values=None
+        sparse_values=None,
     ):
         """
         Search for similar vectors in the index.
@@ -343,97 +527,35 @@ class Index:
             ...     filter={"category": "A"}
             ... )
         """
-        # Validate top_k parameter
-        if top_k > MAX_TOP_K_ALLOWED or top_k <= 0:
-            raise ValueError(
-                f"top_k must be between 1 and {MAX_TOP_K_ALLOWED}, got {top_k}"
-            )
-        # Validate ef parameter
-        if ef > MAX_EF_SEARCH_ALLOWED:
-            raise ValueError(
-                f"ef search cannot be greater than {MAX_EF_SEARCH_ALLOWED}"
-            )
         # Validate sparse query parameters
         has_sparse = sparse_indices is not None or sparse_values is not None
         has_dense = vector is not None
-        # At least one query type must be provided
-        if not has_dense and not has_sparse:
-            raise ValueError(
-                "At least one of 'vector' or 'sparse_indices'/'sparse_values' "
-                "must be provided."
-            )
-        # Cannot use sparse query on dense-only index
-        if has_sparse and not self.is_hybrid:
-            raise ValueError(
-                "Cannot perform sparse search on a dense-only index. "
-                "Create index with sparse_dim > 0 for hybrid support."
-            )
-        # If one sparse parameter is provided, both must be provided
-        if has_sparse:
-            if sparse_indices is None or sparse_values is None:
-                raise ValueError(
-                    "Both sparse_indices and sparse_values must be provided "
-                    "together."
-                )
-            if len(sparse_indices) != len(sparse_values):
-                raise ValueError(
-                    f"sparse_indices and sparse_values must have the same "
-                    f"length. Got {len(sparse_indices)} indices and "
-                    f"{len(sparse_values)} values."
-                )
+        # Validate all query parameters
+        self._validate_query_params(
+            top_k, ef, has_sparse, has_dense, sparse_indices, sparse_values
+        )
         # Prepare search request headers
-        headers = {
-            'Authorization': f'{self.token}',
-            'Content-Type': 'application/json'
-        }
+        headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
         # Prepare search request data
-        data = {
-            'k': top_k,
-            'ef': ef,
-            'include_vectors': include_vectors
-        }
+        data = {"k": top_k, "ef": ef, "include_vectors": include_vectors}
         # Add dense vector if provided
         if has_dense:
-            # Convert to numpy array
-            vec = np.asarray(vector, dtype=np.float32)
-            # Validate shape
-            if vec.shape != (self.dimension,):
-                raise ValueError(
-                    f"Vector must have shape ({self.dimension},), "
-                    f"got {vec.shape}"
-                )
-            # Validate finite values
-            if not np.isfinite(vec).all():
-                raise ValueError("Vector contains NaN or infinity")
-            # Normalize for cosine similarity using einsum
-            if self.space_type == "cosine":
-                norm = np.sqrt(np.einsum("i,i->", vec, vec))
-                norm = max(norm, 1e-10)  # Prevent division by zero
-                vec = vec / norm
-            data['vector'] = vec.tolist()
+            data["vector"] = self._prepare_dense_vector(vector)
         # Add sparse query if provided
         if has_sparse:
-            data['sparse_indices'] = list(sparse_indices)
-            data['sparse_values'] = [float(v) for v in sparse_values]
+            data["sparse_indices"] = list(sparse_indices)
+            data["sparse_values"] = [float(v) for v in sparse_values]
         # Add filter if provided
         if filter:
-            data['filter'] = orjson.dumps(filter).decode('utf-8')
+            data["filter"] = orjson.dumps(filter).decode("utf-8")
-        url = f'{self.url}/index/{self.name}/search'
+        url = f"{self.url}/index/{self.name}/search"
         # Make API request
         http_client = self._get_session_client()
@@ -447,39 +569,7 @@ class Index:
         results = msgpack.unpackb(response.content, raw=False)
         # Process and format results
-        # Result format: [similarity, id, meta, filter, norm, vector]
-        processed_results = []
-        results = results[:top_k]
-        for result in results:
-            similarity = result[0]
-            vector_id = result[1]
-            meta_data = result[2]
-            filter_str = result[3]
-            norm_value = result[4]
-            vector_data = result[5] if len(result) > 5 else []
-            processed = {
-                'id': vector_id,
-                'similarity': similarity,
-                'distance': 1.0 - similarity,
-                'meta': json_unzip(meta_data),
-                'norm': norm_value
-            }
-            # Add filter if present
-            if filter_str:
-                processed['filter'] = orjson.loads(filter_str)
-            # Add vector data if requested
-            if include_vectors and vector_data:
-                processed['vector'] = list(vector_data)
-            else:
-                processed['vector'] = []
-            processed_results.append(processed)
-        return processed_results
+        return self._process_query_results(results, top_k, include_vectors)
     def delete_vector(self, id):
         """
@@ -495,10 +585,10 @@ class Index:
             HTTPError: If deletion fails
         """
         headers = {
-            'Authorization': f'{self.token}',
+            "Authorization": f"{self.token}",
         }
-        url = f'{self.url}/index/{self.name}/vector/{id}/delete'
+        url = f"{self.url}/index/{self.name}/vector/{id}/delete"
         http_client = self._get_session_client()
         response = http_client.delete(url, headers=headers)
@@ -508,6 +598,35 @@ class Index:
         return response.text + " rows deleted"
+    def delete_with_filter(self, filter):
+        """
+        Delete multiple vectors based on a filter.
+        Deletes all vectors that match the provided filter criteria.
+        Args:
+            filter: Dictionary containing filter criteria
+        Returns:
+            str: Server response with deletion details
+        Raises:
+            HTTPError: If deletion fails
+        """
+        headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
+        data = {"filter": filter}
+        url = f"{self.url}/index/{self.name}/vectors/delete"
+        http_client = self._get_session_client()
+        response = http_client.delete(url, headers=headers, json=data)
+        if response.status_code != 200:
+            raise_exception(response.status_code, response.text)
+        return response.text
     def get_vector(self, id):
         """
         Retrieve a single vector by ID.
@@ -535,16 +654,13 @@ class Index:
             >>> vec = index.get_vector("vec1")
             >>> print(vec['meta'])
         """
-        headers = {
-            'Authorization': f'{self.token}',
-            'Content-Type': 'application/json'
-        }
+        headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
-        url = f'{self.url}/index/{self.name}/vector/get'
+        url = f"{self.url}/index/{self.name}/vector/get"
         # Use POST method with the ID in the request body
         http_client = self._get_session_client()
-        response = http_client.post(url, headers=headers, json={'id': id})
+        response = http_client.post(url, headers=headers, json={"id": id})
         if response.status_code != 200:
             raise_exception(response.status_code, response.text)
@@ -554,22 +670,18 @@ class Index:
         vector_obj = msgpack.unpackb(response.content, raw=False)
         result = {
-            'id': vector_obj[0],
-            'meta': json_unzip(vector_obj[1]),
-            'filter': vector_obj[2],
-            'norm': vector_obj[3],
-            'vector': list(vector_obj[4])
+            "id": vector_obj[0],
+            "meta": json_unzip(vector_obj[1]),
+            "filter": vector_obj[2],
+            "norm": vector_obj[3],
+            "vector": list(vector_obj[4]),
         }
         # Include sparse data if present (for hybrid indexes)
         if len(vector_obj) > 5:
-            result['sparse_indices'] = (
-                list(vector_obj[5]) if vector_obj[5] else []
-            )
+            result["sparse_indices"] = list(vector_obj[5]) if vector_obj[5] else []
         if len(vector_obj) > 6:
-            result['sparse_values'] = (
-                list(vector_obj[6]) if vector_obj[6] else []
-            )
+            result["sparse_values"] = list(vector_obj[6]) if vector_obj[6] else []
         return result

{endee-0.1.6 → endee-0.1.8}/endee.egg-info/PKG-INFO RENAMED Viewed

@@ -1,13 +1,11 @@
 Metadata-Version: 2.4
 Name: endee
-Version: 0.1.6
+Version: 0.1.8
 Summary: Endee is the Next-Generation Vector Database for Scalable, High-Performance AI
 Home-page: https://endee.io
 Author: Endee Labs
 Author-email: dev@endee.io
 Project-URL: Documentation, https://docs.endee.io
-Project-URL: Source, https://github.com/endee-labs/endee-python
-Project-URL: Bug Reports, https://github.com/endee-labs/endee-python/issues
 Keywords: vector database,embeddings,machine learning,AI,similarity search,HNSW,nearest neighbors
 Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Developers
@@ -604,6 +602,23 @@ index = client.get_index(name="your-index-name")
 index.delete_vector("vec1")
 ```
+### Filtered Deletion
+In cases where you don't know the exact vector `id`, but want to delete vectors based on filter fields, you can use filtered deletion. This is especially useful for:
+- Bulk deleting vectors by tag, type, or timestamp
+- Enforcing access control or data expiration policies
+```python
+from endee import Endee
+client = Endee(token="your-token-here")
+index = client.get_index(name="your-index-name")
+# Delete all vectors matching filter conditions
+index.delete_with_filter([{"tags": {"$eq": "important"}}])
+```
 ### Index Deletion
 Index deletion permanently removes the entire index and all vectors associated with it. This should be used when:
@@ -671,6 +686,7 @@ info = index.describe()
 | `upsert(input_array)` | Insert or update vectors (max 1000 per batch) |
 | `query(vector, top_k, filter, ef, include_vectors, sparse_indices, sparse_values)` | Search for similar vectors (sparse params optional for hybrid) |
 | `delete_vector(id)` | Delete a vector by ID |
+| `delete_with_filter(filter)` | Delete vectors matching a filter |
 | `get_vector(id)` | Get a specific vector by ID |
 | `describe()` | Get index statistics and configuration |

{endee-0.1.6 → endee-0.1.8}/setup.py RENAMED Viewed

@@ -8,39 +8,34 @@ requirements for the Endee Python client library.
 from setuptools import find_packages, setup
 # Read the long description from README
-with open('README.md', encoding='utf-8') as f:
+with open("README.md", encoding="utf-8") as f:
     long_description = f.read()
 setup(
     # Package Metadata
     name="endee",
-    version="0.1.6",
+    version="0.1.8",
     author="Endee Labs",
     author_email="dev@endee.io",
     description=(
-        "Endee is the Next-Generation Vector Database for Scalable, "
-        "High-Performance AI"
+        "Endee is the Next-Generation Vector Database for Scalable, High-Performance AI"
     ),
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://endee.io",
     # Package Discovery
     packages=find_packages(),
     # Dependencies
     install_requires=[
-        "requests>=2.28.0",       # HTTP library for API requests
-        "httpx[http2]>=0.28.1",   # Alternative HTTP library with HTTP/2 support
-        "numpy>=2.2.4",           # Array operations and vector normalization
-        "msgpack>=1.1.0",         # Efficient binary serialization
-        "orjson>=3.11.5",         # Ultra-fast JSON serialization/deserialization
+        "requests>=2.28.0",  # HTTP library for API requests
+        "httpx[http2]>=0.28.1",  # Alternative HTTP library with HTTP/2 support
+        "numpy>=2.2.4",  # Array operations and vector normalization
+        "msgpack>=1.1.0",  # Efficient binary serialization
+        "orjson>=3.11.5",  # Ultra-fast JSON serialization/deserialization
     ],
     # Python Version Requirements
-    python_requires='>=3.6',
+    python_requires=">=3.6",
     # Package Classification
     classifiers=[
         "Development Status :: 4 - Beta",
@@ -58,7 +53,6 @@ setup(
         "License :: OSI Approved :: MIT License",
         "Operating System :: OS Independent",
     ],
     # Additional Metadata
     keywords=[
         "vector database",
@@ -71,7 +65,5 @@ setup(
     ],
     project_urls={
         "Documentation": "https://docs.endee.io",
-        "Source": "https://github.com/endee-labs/endee-python",
-        "Bug Reports": "https://github.com/endee-labs/endee-python/issues",
     },
 )