endee 0.1.7__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: endee
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Endee is the Next-Generation Vector Database for Scalable, High-Performance AI
5
5
  Home-page: https://endee.io
6
6
  Author: Endee Labs
@@ -29,6 +29,7 @@ Requires-Dist: httpx[http2]>=0.28.1
29
29
  Requires-Dist: numpy>=2.2.4
30
30
  Requires-Dist: msgpack>=1.1.0
31
31
  Requires-Dist: orjson>=3.11.5
32
+ Requires-Dist: pydantic>=2.0.0
32
33
  Dynamic: author
33
34
  Dynamic: author-email
34
35
  Dynamic: classifier
@@ -602,6 +603,23 @@ index = client.get_index(name="your-index-name")
602
603
  index.delete_vector("vec1")
603
604
  ```
604
605
 
606
+ ### Filtered Deletion
607
+
608
+ In cases where you don't know the exact vector `id`, but want to delete vectors based on filter fields, you can use filtered deletion. This is especially useful for:
609
+
610
+ - Bulk deleting vectors by tag, type, or timestamp
611
+ - Enforcing access control or data expiration policies
612
+
613
+ ```python
614
+ from endee import Endee
615
+
616
+ client = Endee(token="your-token-here")
617
+ index = client.get_index(name="your-index-name")
618
+
619
+ # Delete all vectors matching filter conditions
620
+ index.delete_with_filter([{"tags": {"$eq": "important"}}])
621
+ ```
622
+
605
623
  ### Index Deletion
606
624
 
607
625
  Index deletion permanently removes the entire index and all vectors associated with it. This should be used when:
@@ -669,6 +687,7 @@ info = index.describe()
669
687
  | `upsert(input_array)` | Insert or update vectors (max 1000 per batch) |
670
688
  | `query(vector, top_k, filter, ef, include_vectors, sparse_indices, sparse_values)` | Search for similar vectors (sparse params optional for hybrid) |
671
689
  | `delete_vector(id)` | Delete a vector by ID |
690
+ | `delete_with_filter(filter)` | Delete vectors matching a filter |
672
691
  | `get_vector(id)` | Get a specific vector by ID |
673
692
  | `describe()` | Get index statistics and configuration |
674
693
 
@@ -558,6 +558,23 @@ index = client.get_index(name="your-index-name")
558
558
  index.delete_vector("vec1")
559
559
  ```
560
560
 
561
+ ### Filtered Deletion
562
+
563
+ In cases where you don't know the exact vector `id`, but want to delete vectors based on filter fields, you can use filtered deletion. This is especially useful for:
564
+
565
+ - Bulk deleting vectors by tag, type, or timestamp
566
+ - Enforcing access control or data expiration policies
567
+
568
+ ```python
569
+ from endee import Endee
570
+
571
+ client = Endee(token="your-token-here")
572
+ index = client.get_index(name="your-index-name")
573
+
574
+ # Delete all vectors matching filter conditions
575
+ index.delete_with_filter([{"tags": {"$eq": "important"}}])
576
+ ```
577
+
561
578
  ### Index Deletion
562
579
 
563
580
  Index deletion permanently removes the entire index and all vectors associated with it. This should be used when:
@@ -625,6 +642,7 @@ info = index.describe()
625
642
  | `upsert(input_array)` | Insert or update vectors (max 1000 per batch) |
626
643
  | `query(vector, top_k, filter, ef, include_vectors, sparse_indices, sparse_values)` | Search for similar vectors (sparse params optional for hybrid) |
627
644
  | `delete_vector(id)` | Delete a vector by ID |
645
+ | `delete_with_filter(filter)` | Delete vectors matching a filter |
628
646
  | `get_vector(id)` | Get a specific vector by ID |
629
647
  | `describe()` | Get index statistics and configuration |
630
648
 
@@ -6,9 +6,10 @@ metadata. Metadata is serialized to JSON and compressed using zlib
6
6
  to reduce storage size, memory usage, and network transfer overhead.
7
7
  """
8
8
 
9
- import json
10
9
  import zlib
11
10
 
11
+ import orjson
12
+
12
13
 
13
14
  def json_zip(data: dict) -> bytes:
14
15
  """
@@ -24,10 +25,9 @@ def json_zip(data: dict) -> bytes:
24
25
  >>> compressed = json_zip({"user": "alice", "age": 30})
25
26
  """
26
27
  if not data:
27
- return b''
28
+ return b""
28
29
 
29
- json_bytes = json.dumps(data).encode("utf-8")
30
- return zlib.compress(json_bytes)
30
+ return zlib.compress(orjson.dumps(data))
31
31
 
32
32
 
33
33
  def json_unzip(compressed_data: bytes) -> dict:
@@ -46,5 +46,4 @@ def json_unzip(compressed_data: bytes) -> dict:
46
46
  if not compressed_data:
47
47
  return {}
48
48
 
49
- decompressed = zlib.decompress(compressed_data)
50
- return json.loads(decompressed.decode("utf-8"))
49
+ return orjson.loads(zlib.decompress(compressed_data))
@@ -21,13 +21,15 @@ class Precision(str, Enum):
21
21
  INT16D: 16-bit integer
22
22
  INT8D: 8-bit integer
23
23
  """
24
+
24
25
  BINARY2 = "binary"
25
26
  FLOAT16 = "float16"
26
27
  FLOAT32 = "float32"
27
28
  INT16D = "int16d"
28
29
  INT8D = "int8d"
29
30
 
30
- #Checksum Value while creating an index
31
+
32
+ # Checksum Value while creating an index
31
33
  CHECKSUM = -1
32
34
 
33
35
  # HTTP Configuration
@@ -69,13 +71,7 @@ LOCAL_REGION = "local"
69
71
 
70
72
  # Supported Types
71
73
  # List of precision types supported by the vector database
72
- PRECISION_TYPES_SUPPORTED = [
73
- "binary",
74
- "float16",
75
- "float32",
76
- "int16d",
77
- "int8d"
78
- ]
74
+ PRECISION_TYPES_SUPPORTED = ["binary", "float16", "float32", "int16d", "int8d"]
79
75
 
80
76
  # Distance metric types
81
77
  COSINE = "cosine" # Cosine similarity (normalized dot product)
@@ -6,7 +6,6 @@ vector database service. It includes session management, index operations.
6
6
  """
7
7
 
8
8
  import os
9
- import sys
10
9
  from functools import lru_cache
11
10
 
12
11
  import httpx
@@ -30,18 +29,14 @@ from endee.constants import (
30
29
  HTTPX_TIMEOUT_SEC,
31
30
  LOCAL_BASE_URL,
32
31
  LOCAL_REGION,
33
- MAX_DIMENSION_ALLOWED,
34
- MAX_INDEX_NAME_LENGTH_ALLOWED,
35
- PRECISION_TYPES_SUPPORTED,
36
32
  SESSION_MAX_RETRIES,
37
33
  SESSION_POOL_CONNECTIONS,
38
34
  SESSION_POOL_MAXSIZE,
39
- SPACE_TYPES_SUPPORTED,
40
35
  Precision,
41
36
  )
42
37
  from endee.exceptions import raise_exception
43
38
  from endee.index import Index
44
- from endee.utils import is_valid_index_name
39
+ from endee.schema import IndexCreateRequest, IndexMetadata
45
40
 
46
41
 
47
42
  class SessionManager:
@@ -417,58 +412,34 @@ class Endee:
417
412
  ValueError: If parameters are invalid
418
413
  HTTPError: If API request fails
419
414
  """
420
- # Validate index name
421
- if not is_valid_index_name(name):
422
- raise ValueError(
423
- f"Invalid index name. Index name must be alphanumeric and can "
424
- f"contain underscores and should be less than "
425
- f"{MAX_INDEX_NAME_LENGTH_ALLOWED} characters"
426
- )
427
-
428
- # Validate dimension
429
- if dimension > MAX_DIMENSION_ALLOWED:
430
- raise ValueError(
431
- f"Dimension cannot be greater than {MAX_DIMENSION_ALLOWED}"
432
- )
433
-
434
- # Validate sparse dimension
435
- # Lower bound check
436
- if sparse_dim < 0:
437
- raise ValueError("sparse_dim cannot be negative")
438
-
439
- # Upper bound check
440
- if sparse_dim > sys.maxsize:
441
- raise ValueError(f"sparse_dim cannot exceed {sys.maxsize}")
442
-
443
- # Validate space type
444
- space_type = space_type.lower()
445
- if space_type not in SPACE_TYPES_SUPPORTED:
446
- raise ValueError(f"Invalid space type: {space_type}")
447
-
448
- # Validate precision
449
- if precision not in PRECISION_TYPES_SUPPORTED:
450
- raise ValueError(
451
- f"Invalid precision: {precision}. Use one of Precision enum "
452
- f"values: Precision.BINARY2, Precision.INT8D, "
453
- f"Precision.INT16D, Precision.FLOAT16, or Precision.FLOAT32"
454
- )
415
+ # Validate parameters using Pydantic
416
+ request_data = IndexCreateRequest(
417
+ name=name,
418
+ dimension=dimension,
419
+ space_type=space_type,
420
+ M=M,
421
+ ef_con=ef_con,
422
+ precision=precision,
423
+ version=version,
424
+ sparse_dim=sparse_dim,
425
+ )
455
426
 
456
427
  # Prepare request headers and data
457
428
  headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
458
429
  data = {
459
- "index_name": name,
460
- "dim": dimension,
461
- "space_type": space_type,
462
- "M": M,
463
- "ef_con": ef_con,
430
+ "index_name": request_data.name,
431
+ "dim": request_data.dimension,
432
+ "space_type": request_data.space_type,
433
+ "M": request_data.M,
434
+ "ef_con": request_data.ef_con,
464
435
  "checksum": CHECKSUM,
465
- "precision": precision,
466
- "version": version,
436
+ "precision": request_data.precision,
437
+ "version": request_data.version,
467
438
  }
468
439
 
469
440
  # Add sparse dimension if specified
470
- if sparse_dim > 0:
471
- data["sparse_dim"] = sparse_dim
441
+ if request_data.sparse_dim > 0:
442
+ data["sparse_dim"] = request_data.sparse_dim
472
443
 
473
444
  url = f"{self.base_url}/index/create"
474
445
 
@@ -588,6 +559,9 @@ class Endee:
588
559
 
589
560
  data = response.json()
590
561
 
562
+ # Validate index metadata
563
+ metadata = IndexMetadata(**data)
564
+
591
565
  # Create Index object with appropriate manager
592
566
  if self.library == HTTP_REQUESTS_LIBRARY:
593
567
  idx = Index(
@@ -595,7 +569,7 @@ class Endee:
595
569
  token=self.token,
596
570
  url=self.base_url,
597
571
  version=self.version,
598
- params=data,
572
+ params=metadata.model_dump(by_alias=True),
599
573
  session_client_manager=self.session_manager,
600
574
  )
601
575
  else:
@@ -604,7 +578,7 @@ class Endee:
604
578
  token=self.token,
605
579
  url=self.base_url,
606
580
  version=self.version,
607
- params=data,
581
+ params=metadata.model_dump(by_alias=True),
608
582
  session_client_manager=self.client_manager,
609
583
  )
610
584
 
@@ -6,7 +6,7 @@ conditions that can occur when interacting with the Endee API. Each exception
6
6
  type corresponds to specific HTTP status codes and error scenarios.
7
7
  """
8
8
 
9
- import json
9
+ import orjson
10
10
 
11
11
 
12
12
  class EndeeException(Exception):
@@ -231,8 +231,8 @@ def raise_exception(code: int, text: str = None):
231
231
  # Try to parse JSON error message
232
232
  message = None
233
233
  try:
234
- message = json.loads(text).get("error", "Unknown error")
235
- except (json.JSONDecodeError, TypeError, AttributeError):
234
+ message = orjson.loads(text).get("error", "Unknown error")
235
+ except (orjson.JSONDecodeError, TypeError, AttributeError):
236
236
  # Fall back to raw text or default message
237
237
  message = text or "Unknown error"
238
238
 
@@ -6,6 +6,8 @@ upsert, query, delete, and retrieval on vector indices. It supports both dense
6
6
  and hybrid (dense + sparse) vector operations.
7
7
  """
8
8
 
9
+ from typing import List
10
+
9
11
  import msgpack
10
12
  import numpy as np
11
13
  import orjson
@@ -20,8 +22,6 @@ from .constants import (
20
22
  DIMENSION_FIELD,
21
23
  IS_HYBRID_FIELD,
22
24
  MAX_CONNECTIONS_FIELD,
23
- MAX_EF_SEARCH_ALLOWED,
24
- MAX_TOP_K_ALLOWED,
25
25
  MAX_VECTORS_PER_BATCH,
26
26
  NAME_FIELD,
27
27
  PRECISION_FIELD,
@@ -29,6 +29,7 @@ from .constants import (
29
29
  SPARSE_DIM_FIELD,
30
30
  )
31
31
  from .exceptions import raise_exception
32
+ from .schema import IndexMetadata, QueryRequest, VectorItem
32
33
 
33
34
 
34
35
  class Index:
@@ -76,18 +77,19 @@ class Index:
76
77
  session_client_manager: Shared SessionManager or ClientManager
77
78
  from parent Endee client
78
79
  """
80
+ metadata = IndexMetadata(**params)
79
81
  self.name = name
80
82
  self.token = token
81
83
  self.url = url
82
84
  self.version = version
83
85
  self.checksum = CHECKSUM
84
- self.lib_token = params["lib_token"]
85
- self.count = params["total_elements"]
86
- self.space_type = params[SPACE_TYPE_FIELD]
87
- self.dimension = params[DIMENSION_FIELD]
88
- self.precision = params.get(PRECISION_FIELD)
89
- self.M = params[MAX_CONNECTIONS_FIELD]
90
- self.sparse_dim = params.get(SPARSE_DIM_FIELD, 0)
86
+ self.lib_token = metadata.lib_token
87
+ self.count = metadata.total_elements
88
+ self.space_type = metadata.space_type
89
+ self.dimension = metadata.dimension
90
+ self.precision = metadata.precision
91
+ self.M = metadata.M
92
+ self.sparse_dim = metadata.sparse_dim
91
93
 
92
94
  # Use shared HTTP manager from Endee client
93
95
  self.session_client_manager = session_client_manager
@@ -134,12 +136,12 @@ class Index:
134
136
  """
135
137
  return self.name
136
138
 
137
- def _validate_and_prepare_vectors(self, input_array):
139
+ def _validate_and_prepare_vectors(self, raw_vectors: List[List[float]]):
138
140
  """
139
- Validate and prepare vectors from input array.
141
+ Validate and prepare vectors from raw input lists.
140
142
 
141
143
  Args:
142
- input_array: List of vector dictionaries
144
+ raw_vectors: List of dense vector lists
143
145
 
144
146
  Returns:
145
147
  tuple: (vectors_array, norms_array, vectors_list)
@@ -149,9 +151,7 @@ class Index:
149
151
  """
150
152
  # Extract vectors
151
153
  try:
152
- vectors = np.asarray(
153
- [item["vector"] for item in input_array], dtype=np.float32
154
- )
154
+ vectors = np.asarray(raw_vectors, dtype=np.float32)
155
155
  except Exception as e:
156
156
  raise ValueError(f"Invalid vector data: {e}") from e
157
157
 
@@ -166,42 +166,29 @@ class Index:
166
166
  raise ValueError("Vectors contain NaN or infinity")
167
167
 
168
168
  # Normalize vectors for cosine similarity
169
- N = len(input_array)
169
+ n_vectors = len(raw_vectors)
170
170
  if self.space_type == "cosine":
171
171
  norms = np.sqrt(np.einsum("ij,ij->i", vectors, vectors))
172
172
  np.maximum(norms, 1e-10, out=norms) # Prevent division by zero
173
173
  vectors /= norms[:, None]
174
174
  else:
175
- norms = np.ones(N, dtype=np.float32)
175
+ norms = np.ones(n_vectors, dtype=np.float32)
176
176
 
177
177
  return vectors, norms, vectors.tolist()
178
178
 
179
- def _validate_sparse_data(self, sparse_indices, sparse_values):
180
- """
181
- Validate sparse data for hybrid indexes.
182
-
183
- Args:
184
- sparse_indices: List of sparse vector indices
185
- sparse_values: List of sparse vector values
186
-
187
- Raises:
188
- ValueError: If sparse data is invalid
189
- """
190
- if len(sparse_indices) != len(sparse_values):
191
- raise ValueError("sparse_indices and sparse_values must match in length")
192
-
193
- if sparse_indices:
194
- min_idx = min(sparse_indices)
195
- max_idx = max(sparse_indices)
196
- if min_idx < 0 or max_idx >= self.sparse_dim:
197
- raise ValueError(f"Sparse indices out of bounds [0, {self.sparse_dim})")
198
-
199
- def _build_vector_batch_item(self, item, i, norms, vectors_list, is_hybrid):
179
+ def _build_vector_batch_item(
180
+ self,
181
+ item: VectorItem,
182
+ i: int,
183
+ norms: np.ndarray,
184
+ vectors_list: list,
185
+ is_hybrid: bool,
186
+ ):
200
187
  """
201
188
  Build a single vector batch item.
202
189
 
203
190
  Args:
204
- item: Input dictionary for one vector
191
+ item: Validated VectorItem
205
192
  i: Index in the batch
206
193
  norms: Array of vector norms
207
194
  vectors_list: List of vectors
@@ -213,14 +200,19 @@ class Index:
213
200
  Raises:
214
201
  ValueError: If sparse data is invalid
215
202
  """
216
- get_func = dict.get
203
+ # Localize functions for performance
217
204
  dumps_func = orjson.dumps
205
+ zip_func = json_zip
218
206
  str_func = str
219
207
  float_func = float
220
208
 
221
- sparse_indices = get_func(item, "sparse_indices", None)
222
- sparse_values = get_func(item, "sparse_values", None)
223
- has_sparse = sparse_indices is not None or sparse_values is not None
209
+ item_id = item.id
210
+ item_meta = item.meta
211
+ item_filter = item.filter
212
+ sparse_indices = item.sparse_indices
213
+ sparse_values = item.sparse_values
214
+
215
+ has_sparse = sparse_indices is not None
224
216
 
225
217
  # XOR logic: hybrid index requires sparse data,
226
218
  # dense-only forbids it
@@ -230,15 +222,18 @@ class Index:
230
222
  "and dense-only index forbids it."
231
223
  )
232
224
 
233
- # Validate sparse data if present
234
- if is_hybrid:
235
- self._validate_sparse_data(sparse_indices, sparse_values)
225
+ # Validate sparse indices if present
226
+ if has_sparse:
227
+ max_idx = max(sparse_indices)
228
+ min_idx = min(sparse_indices)
229
+ if min_idx < 0 or max_idx >= self.sparse_dim:
230
+ raise ValueError(f"Sparse indices out of bounds [0, {self.sparse_dim})")
236
231
 
237
232
  # Build vector object: [id, meta, filter, norm, vector, ...]
238
233
  obj = [
239
- str_func(get_func(item, "id", "")),
240
- json_zip(get_func(item, "meta", {})),
241
- dumps_func(get_func(item, "filter", {})).decode("utf-8"),
234
+ str_func(item_id),
235
+ zip_func(item_meta),
236
+ dumps_func(item_filter).decode("utf-8"),
242
237
  float_func(norms[i]),
243
238
  vectors_list[i],
244
239
  ]
@@ -248,7 +243,7 @@ class Index:
248
243
  obj.extend(
249
244
  (
250
245
  sparse_indices,
251
- [float(v) for v in sparse_values],
246
+ [float_func(v) for v in sparse_values],
252
247
  )
253
248
  )
254
249
 
@@ -294,20 +289,40 @@ class Index:
294
289
  f"Cannot insert more than {MAX_VECTORS_PER_BATCH} vectors at a time"
295
290
  )
296
291
 
297
- # Validate IDs upfront
298
- ids = [item.get("id", "") for item in input_array]
299
- if any(not id_val or id_val is None for id_val in ids):
300
- raise ValueError("All vectors must have a non-empty ID")
301
-
292
+ # Localize for the loop
302
293
  is_hybrid = self.is_hybrid
294
+ seen_ids = set()
295
+ duplicate_ids = []
296
+ validated_items = []
297
+ vector_item_cls = VectorItem
298
+
299
+ # Combine validation, duplicate check, and vector extraction
300
+ vectors_to_process = []
301
+ for item in input_array:
302
+ v_item = vector_item_cls(**item)
303
+ item_id = v_item.id
304
+ if item_id in seen_ids:
305
+ duplicate_ids.append(item_id)
306
+ else:
307
+ seen_ids.add(item_id)
308
+ validated_items.append(v_item)
309
+ vectors_to_process.append(v_item.vector)
303
310
 
304
- # Validate and prepare vectors
305
- vectors, norms, vectors_list = self._validate_and_prepare_vectors(input_array)
311
+ if duplicate_ids:
312
+ raise ValueError(
313
+ f"Duplicate IDs found in input array: {sorted(duplicate_ids)}"
314
+ )
315
+
316
+ # Validate and prepare vectors - Passing pre-extracted vectors
317
+ vectors, norms, vectors_list = self._validate_and_prepare_vectors(
318
+ vectors_to_process
319
+ )
306
320
 
307
- # Build batch
321
+ # Build batch - localizing method call for performance
322
+ build_item = self._build_vector_batch_item
308
323
  vector_batch = [
309
- self._build_vector_batch_item(item, i, norms, vectors_list, is_hybrid)
310
- for i, item in enumerate(input_array)
324
+ build_item(item, i, norms, vectors_list, is_hybrid)
325
+ for i, item in enumerate(validated_items)
311
326
  ]
312
327
 
313
328
  serialized_data = msgpack.packb(
@@ -329,62 +344,23 @@ class Index:
329
344
 
330
345
  return "Vectors inserted successfully"
331
346
 
332
- def _validate_query_params(
333
- self, top_k, ef, has_sparse, has_dense, sparse_indices, sparse_values
334
- ):
347
+ def _validate_query_params(self, query: QueryRequest):
335
348
  """
336
- Validate query parameters.
349
+ Validate query parameters against index configuration.
337
350
 
338
351
  Args:
339
- top_k: Number of results to return
340
- ef: HNSW ef_search parameter
341
- has_sparse: Whether sparse query is provided
342
- has_dense: Whether dense query is provided
343
- sparse_indices: Sparse vector indices
344
- sparse_values: Sparse vector values
352
+ query: Validated QueryRequest model
345
353
 
346
354
  Raises:
347
- ValueError: If parameters are invalid
355
+ ValueError: If parameters are invalid for this index
348
356
  """
349
- # Validate top_k parameter
350
- if top_k > MAX_TOP_K_ALLOWED or top_k <= 0:
351
- raise ValueError(
352
- f"top_k must be between 1 and {MAX_TOP_K_ALLOWED}, got {top_k}"
353
- )
354
-
355
- # Validate ef parameter
356
- if ef > MAX_EF_SEARCH_ALLOWED:
357
- raise ValueError(
358
- f"ef search cannot be greater than {MAX_EF_SEARCH_ALLOWED}"
359
- )
360
-
361
- # At least one query type must be provided
362
- if not has_dense and not has_sparse:
363
- raise ValueError(
364
- "At least one of 'vector' or 'sparse_indices'/'sparse_values' "
365
- "must be provided."
366
- )
367
-
368
357
  # Cannot use sparse query on dense-only index
369
- if has_sparse and not self.is_hybrid:
358
+ if query.sparse_indices is not None and not self.is_hybrid:
370
359
  raise ValueError(
371
360
  "Cannot perform sparse search on a dense-only index. "
372
361
  "Create index with sparse_dim > 0 for hybrid support."
373
362
  )
374
363
 
375
- # If one sparse parameter is provided, both must be provided
376
- if has_sparse:
377
- if sparse_indices is None or sparse_values is None:
378
- raise ValueError(
379
- "Both sparse_indices and sparse_values must be provided together."
380
- )
381
- if len(sparse_indices) != len(sparse_values):
382
- raise ValueError(
383
- f"sparse_indices and sparse_values must have the same "
384
- f"length. Got {len(sparse_indices)} indices and "
385
- f"{len(sparse_values)} values."
386
- )
387
-
388
364
  def _prepare_dense_vector(self, vector):
389
365
  """
390
366
  Prepare and validate dense query vector.
@@ -513,33 +489,42 @@ class Index:
513
489
  ... filter={"category": "A"}
514
490
  ... )
515
491
  """
516
- # Validate sparse query parameters
517
- has_sparse = sparse_indices is not None or sparse_values is not None
518
- has_dense = vector is not None
519
-
520
- # Validate all query parameters
521
- self._validate_query_params(
522
- top_k, ef, has_sparse, has_dense, sparse_indices, sparse_values
492
+ # Validate and prepare query using Pydantic
493
+ query_params = QueryRequest(
494
+ vector=vector,
495
+ top_k=top_k,
496
+ filter=filter,
497
+ ef=ef,
498
+ include_vectors=include_vectors,
499
+ sparse_indices=sparse_indices,
500
+ sparse_values=sparse_values,
523
501
  )
524
502
 
503
+ # Additional index-specific validation
504
+ self._validate_query_params(query_params)
505
+
525
506
  # Prepare search request headers
526
507
  headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
527
508
 
528
509
  # Prepare search request data
529
- data = {"k": top_k, "ef": ef, "include_vectors": include_vectors}
510
+ data = {
511
+ "k": query_params.top_k,
512
+ "ef": query_params.ef,
513
+ "include_vectors": query_params.include_vectors,
514
+ }
530
515
 
531
516
  # Add dense vector if provided
532
- if has_dense:
533
- data["vector"] = self._prepare_dense_vector(vector)
517
+ if query_params.vector is not None:
518
+ data["vector"] = self._prepare_dense_vector(query_params.vector)
534
519
 
535
520
  # Add sparse query if provided
536
- if has_sparse:
537
- data["sparse_indices"] = list(sparse_indices)
538
- data["sparse_values"] = [float(v) for v in sparse_values]
521
+ if query_params.sparse_indices is not None:
522
+ data["sparse_indices"] = list(query_params.sparse_indices)
523
+ data["sparse_values"] = [float(v) for v in query_params.sparse_values]
539
524
 
540
525
  # Add filter if provided
541
- if filter:
542
- data["filter"] = orjson.dumps(filter).decode("utf-8")
526
+ if query_params.filter:
527
+ data["filter"] = orjson.dumps(query_params.filter).decode("utf-8")
543
528
 
544
529
  url = f"{self.url}/index/{self.name}/search"
545
530
 
@@ -555,7 +540,7 @@ class Index:
555
540
  results = msgpack.unpackb(response.content, raw=False)
556
541
 
557
542
  # Process and format results
558
- return self._process_query_results(results, top_k, include_vectors)
543
+ return self._process_query_results(results, query_params.top_k, include_vectors)
559
544
 
560
545
  def delete_vector(self, id):
561
546
  """
@@ -584,6 +569,35 @@ class Index:
584
569
 
585
570
  return response.text + " rows deleted"
586
571
 
572
+ def delete_with_filter(self, filter):
573
+ """
574
+ Delete multiple vectors based on a filter.
575
+
576
+ Deletes all vectors that match the provided filter criteria.
577
+
578
+ Args:
579
+ filter: Dictionary containing filter criteria
580
+
581
+ Returns:
582
+ str: Server response with deletion details
583
+
584
+ Raises:
585
+ HTTPError: If deletion fails
586
+ """
587
+ headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
588
+
589
+ data = {"filter": filter}
590
+
591
+ url = f"{self.url}/index/{self.name}/vectors/delete"
592
+
593
+ http_client = self._get_session_client()
594
+ response = http_client.delete(url, headers=headers, json=data)
595
+
596
+ if response.status_code != 200:
597
+ raise_exception(response.status_code, response.text)
598
+
599
+ return response.text
600
+
587
601
  def get_vector(self, id):
588
602
  """
589
603
  Retrieve a single vector by ID.
@@ -0,0 +1,138 @@
1
+ import re
2
+ import sys
3
+ from typing import Any, Dict, List, Optional, Union
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
6
+
7
+ from .constants import (
8
+ DEFAULT_EF_SEARCH,
9
+ DEFAULT_TOPK,
10
+ MAX_DIMENSION_ALLOWED,
11
+ MAX_EF_SEARCH_ALLOWED,
12
+ MAX_INDEX_NAME_LENGTH_ALLOWED,
13
+ MAX_TOP_K_ALLOWED,
14
+ PRECISION_TYPES_SUPPORTED,
15
+ SPACE_TYPES_SUPPORTED,
16
+ Precision,
17
+ )
18
+
19
+
20
+ class VectorItem(BaseModel):
21
+ """Model for a single vector item in an upsert operation."""
22
+
23
+ id: str = Field(..., min_length=1)
24
+ vector: List[float]
25
+ meta: Optional[Dict[str, Any]] = Field(default_factory=dict)
26
+ filter: Optional[Dict[str, Any]] = Field(default_factory=dict)
27
+ sparse_indices: Optional[List[int]] = None
28
+ sparse_values: Optional[List[float]] = None
29
+
30
+ @model_validator(mode="after")
31
+ def validate_sparse_data(self) -> "VectorItem":
32
+ if (self.sparse_indices is None) != (self.sparse_values is None):
33
+ raise ValueError(
34
+ "Both sparse_indices and sparse_values must be provided together"
35
+ )
36
+ if self.sparse_indices is not None and len(self.sparse_indices) != len(
37
+ self.sparse_values
38
+ ):
39
+ raise ValueError("sparse_indices and sparse_values must match in length")
40
+ return self
41
+
42
+
43
+ class QueryRequest(BaseModel):
44
+ """Model for query parameters."""
45
+
46
+ vector: Optional[List[float]] = None
47
+ top_k: int = Field(default=DEFAULT_TOPK, gt=0, le=MAX_TOP_K_ALLOWED)
48
+ filter: Optional[List[Dict[str, Any]]] = None
49
+ ef: int = Field(default=DEFAULT_EF_SEARCH, le=MAX_EF_SEARCH_ALLOWED)
50
+ include_vectors: bool = False
51
+ sparse_indices: Optional[List[int]] = None
52
+ sparse_values: Optional[List[float]] = None
53
+
54
+ @model_validator(mode="after")
55
+ def validate_query_type(self) -> "QueryRequest":
56
+ has_dense = self.vector is not None
57
+ has_sparse = self.sparse_indices is not None or self.sparse_values is not None
58
+
59
+ if not has_dense and not has_sparse:
60
+ raise ValueError(
61
+ "At least one of 'vector' or 'sparse_indices'/'sparse_values'"
62
+ " must be provided."
63
+ )
64
+
65
+ if (self.sparse_indices is None) != (self.sparse_values is None):
66
+ raise ValueError(
67
+ "Both sparse_indices and sparse_values must be provided together"
68
+ )
69
+
70
+ if self.sparse_indices is not None and len(self.sparse_indices) != len(
71
+ self.sparse_values
72
+ ):
73
+ raise ValueError("sparse_indices and sparse_values must match in length")
74
+
75
+ return self
76
+
77
+
78
+ class IndexCreateRequest(BaseModel):
79
+ """Model for index creation parameters."""
80
+
81
+ name: str
82
+ dimension: int = Field(..., gt=0, le=MAX_DIMENSION_ALLOWED)
83
+ space_type: str
84
+ M: int = Field(..., gt=0)
85
+ ef_con: int = Field(..., gt=0)
86
+ precision: Union[str, Precision]
87
+ version: Optional[int] = None
88
+ sparse_dim: int = Field(default=0, ge=0, le=sys.maxsize)
89
+
90
+ @field_validator("name")
91
+ @classmethod
92
+ def validate_name(cls, v: str) -> str:
93
+ if not re.match(r"^[a-zA-Z0-9_]+$", v):
94
+ raise ValueError(
95
+ "Index name must be alphanumeric and can contain underscores"
96
+ )
97
+ if len(v) > MAX_INDEX_NAME_LENGTH_ALLOWED:
98
+ raise ValueError(
99
+ f"Index name should be less than {MAX_INDEX_NAME_LENGTH_ALLOWED}"
100
+ " characters"
101
+ )
102
+ return v
103
+
104
+ @field_validator("space_type")
105
+ @classmethod
106
+ def validate_space_type(cls, v: str) -> str:
107
+ v = v.lower()
108
+ if v not in SPACE_TYPES_SUPPORTED:
109
+ raise ValueError(
110
+ f"Invalid space type: {v}. Must be one of {SPACE_TYPES_SUPPORTED}"
111
+ )
112
+ return v
113
+
114
+ @field_validator("precision")
115
+ @classmethod
116
+ def validate_precision(cls, v: Union[str, Precision]) -> Union[str, Precision]:
117
+ if isinstance(v, Precision):
118
+ return v
119
+ if v not in PRECISION_TYPES_SUPPORTED:
120
+ raise ValueError(
121
+ f"Invalid precision: {v}. Must be one of {PRECISION_TYPES_SUPPORTED}"
122
+ )
123
+ return v
124
+
125
+
126
+ class IndexMetadata(BaseModel):
127
+ """Model for index metadata returned by the server."""
128
+
129
+ model_config = ConfigDict(populate_by_name=True)
130
+
131
+ name: Optional[str] = Field(None, alias="name")
132
+ lib_token: str
133
+ total_elements: int = Field(..., alias="total_elements")
134
+ space_type: str = Field(..., alias="space_type")
135
+ dimension: int = Field(..., alias="dimension")
136
+ precision: Optional[str] = Field(None, alias="precision")
137
+ M: int = Field(..., alias="M")
138
+ sparse_dim: int = Field(0, alias="sparse_dim")
@@ -35,10 +35,10 @@ def is_valid_index_name(index_name):
35
35
  False
36
36
  """
37
37
  # Pattern matches alphanumeric characters and underscores only
38
- pattern = re.compile(r'^[a-zA-Z0-9_]+$')
38
+ pattern = re.compile(r"^[a-zA-Z0-9_]+$")
39
39
 
40
40
  # Check both pattern match and length constraint
41
41
  return (
42
- pattern.match(index_name) is not None and
43
- len(index_name) <= MAX_INDEX_NAME_LENGTH_ALLOWED
42
+ pattern.match(index_name) is not None
43
+ and len(index_name) <= MAX_INDEX_NAME_LENGTH_ALLOWED
44
44
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: endee
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Endee is the Next-Generation Vector Database for Scalable, High-Performance AI
5
5
  Home-page: https://endee.io
6
6
  Author: Endee Labs
@@ -29,6 +29,7 @@ Requires-Dist: httpx[http2]>=0.28.1
29
29
  Requires-Dist: numpy>=2.2.4
30
30
  Requires-Dist: msgpack>=1.1.0
31
31
  Requires-Dist: orjson>=3.11.5
32
+ Requires-Dist: pydantic>=2.0.0
32
33
  Dynamic: author
33
34
  Dynamic: author-email
34
35
  Dynamic: classifier
@@ -602,6 +603,23 @@ index = client.get_index(name="your-index-name")
602
603
  index.delete_vector("vec1")
603
604
  ```
604
605
 
606
+ ### Filtered Deletion
607
+
608
+ In cases where you don't know the exact vector `id`, but want to delete vectors based on filter fields, you can use filtered deletion. This is especially useful for:
609
+
610
+ - Bulk deleting vectors by tag, type, or timestamp
611
+ - Enforcing access control or data expiration policies
612
+
613
+ ```python
614
+ from endee import Endee
615
+
616
+ client = Endee(token="your-token-here")
617
+ index = client.get_index(name="your-index-name")
618
+
619
+ # Delete all vectors matching filter conditions
620
+ index.delete_with_filter([{"tags": {"$eq": "important"}}])
621
+ ```
622
+
605
623
  ### Index Deletion
606
624
 
607
625
  Index deletion permanently removes the entire index and all vectors associated with it. This should be used when:
@@ -669,6 +687,7 @@ info = index.describe()
669
687
  | `upsert(input_array)` | Insert or update vectors (max 1000 per batch) |
670
688
  | `query(vector, top_k, filter, ef, include_vectors, sparse_indices, sparse_values)` | Search for similar vectors (sparse params optional for hybrid) |
671
689
  | `delete_vector(id)` | Delete a vector by ID |
690
+ | `delete_with_filter(filter)` | Delete vectors matching a filter |
672
691
  | `get_vector(id)` | Get a specific vector by ID |
673
692
  | `describe()` | Get index statistics and configuration |
674
693
 
@@ -7,6 +7,7 @@ endee/constants.py
7
7
  endee/endee.py
8
8
  endee/exceptions.py
9
9
  endee/index.py
10
+ endee/schema.py
10
11
  endee/utils.py
11
12
  endee.egg-info/PKG-INFO
12
13
  endee.egg-info/SOURCES.txt
@@ -3,3 +3,4 @@ httpx[http2]>=0.28.1
3
3
  numpy>=2.2.4
4
4
  msgpack>=1.1.0
5
5
  orjson>=3.11.5
6
+ pydantic>=2.0.0
@@ -15,7 +15,7 @@ with open("README.md", encoding="utf-8") as f:
15
15
  setup(
16
16
  # Package Metadata
17
17
  name="endee",
18
- version="0.1.7",
18
+ version="0.1.9",
19
19
  author="Endee Labs",
20
20
  author_email="dev@endee.io",
21
21
  description=(
@@ -33,6 +33,7 @@ setup(
33
33
  "numpy>=2.2.4", # Array operations and vector normalization
34
34
  "msgpack>=1.1.0", # Efficient binary serialization
35
35
  "orjson>=3.11.5", # Ultra-fast JSON serialization/deserialization
36
+ "pydantic>=2.0.0", # Data validation and settings management
36
37
  ],
37
38
  # Python Version Requirements
38
39
  python_requires=">=3.6",
File without changes
File without changes
File without changes