endee 0.1.6__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: endee
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: Endee is the Next-Generation Vector Database for Scalable, High-Performance AI
5
5
  Home-page: https://endee.io
6
6
  Author: Endee Labs
7
7
  Author-email: dev@endee.io
8
8
  Project-URL: Documentation, https://docs.endee.io
9
- Project-URL: Source, https://github.com/endee-labs/endee-python
10
- Project-URL: Bug Reports, https://github.com/endee-labs/endee-python/issues
11
9
  Keywords: vector database,embeddings,machine learning,AI,similarity search,HNSW,nearest neighbors
12
10
  Classifier: Development Status :: 4 - Beta
13
11
  Classifier: Intended Audience :: Developers
@@ -604,6 +602,23 @@ index = client.get_index(name="your-index-name")
604
602
  index.delete_vector("vec1")
605
603
  ```
606
604
 
605
+ ### Filtered Deletion
606
+
607
+ In cases where you don't know the exact vector `id`, but want to delete vectors based on filter fields, you can use filtered deletion. This is especially useful for:
608
+
609
+ - Bulk deleting vectors by tag, type, or timestamp
610
+ - Enforcing access control or data expiration policies
611
+
612
+ ```python
613
+ from endee import Endee
614
+
615
+ client = Endee(token="your-token-here")
616
+ index = client.get_index(name="your-index-name")
617
+
618
+ # Delete all vectors matching filter conditions
619
+ index.delete_with_filter([{"tags": {"$eq": "important"}}])
620
+ ```
621
+
607
622
  ### Index Deletion
608
623
 
609
624
  Index deletion permanently removes the entire index and all vectors associated with it. This should be used when:
@@ -671,6 +686,7 @@ info = index.describe()
671
686
  | `upsert(input_array)` | Insert or update vectors (max 1000 per batch) |
672
687
  | `query(vector, top_k, filter, ef, include_vectors, sparse_indices, sparse_values)` | Search for similar vectors (sparse params optional for hybrid) |
673
688
  | `delete_vector(id)` | Delete a vector by ID |
689
+ | `delete_with_filter(filter)` | Delete vectors matching a filter |
674
690
  | `get_vector(id)` | Get a specific vector by ID |
675
691
  | `describe()` | Get index statistics and configuration |
676
692
 
@@ -558,6 +558,23 @@ index = client.get_index(name="your-index-name")
558
558
  index.delete_vector("vec1")
559
559
  ```
560
560
 
561
+ ### Filtered Deletion
562
+
563
+ In cases where you don't know the exact vector `id`, but want to delete vectors based on filter fields, you can use filtered deletion. This is especially useful for:
564
+
565
+ - Bulk deleting vectors by tag, type, or timestamp
566
+ - Enforcing access control or data expiration policies
567
+
568
+ ```python
569
+ from endee import Endee
570
+
571
+ client = Endee(token="your-token-here")
572
+ index = client.get_index(name="your-index-name")
573
+
574
+ # Delete all vectors matching filter conditions
575
+ index.delete_with_filter([{"tags": {"$eq": "important"}}])
576
+ ```
577
+
561
578
  ### Index Deletion
562
579
 
563
580
  Index deletion permanently removes the entire index and all vectors associated with it. This should be used when:
@@ -625,6 +642,7 @@ info = index.describe()
625
642
  | `upsert(input_array)` | Insert or update vectors (max 1000 per batch) |
626
643
  | `query(vector, top_k, filter, ef, include_vectors, sparse_indices, sparse_values)` | Search for similar vectors (sparse params optional for hybrid) |
627
644
  | `delete_vector(id)` | Delete a vector by ID |
645
+ | `delete_with_filter(filter)` | Delete vectors matching a filter |
628
646
  | `get_vector(id)` | Get a specific vector by ID |
629
647
  | `describe()` | Get index statistics and configuration |
630
648
 
@@ -6,6 +6,7 @@ vector database service. It includes session management, index operations.
6
6
  """
7
7
 
8
8
  import os
9
+ import sys
9
10
  from functools import lru_cache
10
11
 
11
12
  import httpx
@@ -68,7 +69,7 @@ class SessionManager:
68
69
  pool_connections: int = SESSION_POOL_CONNECTIONS,
69
70
  pool_maxsize: int = SESSION_POOL_MAXSIZE,
70
71
  max_retries: int = SESSION_MAX_RETRIES,
71
- pool_block: bool = True
72
+ pool_block: bool = True,
72
73
  ):
73
74
  """
74
75
  Initialize the SessionManager.
@@ -122,9 +123,9 @@ class SessionManager:
122
123
  total=self.max_retries,
123
124
  backoff_factor=0.5,
124
125
  status_forcelist=HTTP_STATUS_CODES,
125
- allowed_methods=HTTP_METHODS_ALLOWED
126
+ allowed_methods=HTTP_METHODS_ALLOWED,
126
127
  ),
127
- pool_block=self.pool_block
128
+ pool_block=self.pool_block,
128
129
  )
129
130
 
130
131
  session.mount(HTTP_PROTOCOL, adapter)
@@ -174,7 +175,7 @@ class ClientManager:
174
175
  max_keepalive_connections: int = HTTPX_MAX_KEEPALIVE_CONNECTIONS,
175
176
  max_retries: int = HTTPX_MAX_RETRIES,
176
177
  timeout: float = HTTPX_TIMEOUT_SEC,
177
- enable_http2: bool = False
178
+ enable_http2: bool = False,
178
179
  ):
179
180
  """
180
181
  Initialize the ClientManager.
@@ -226,9 +227,7 @@ class ClientManager:
226
227
  max_keepalive_connections=self.max_keepalive_connections,
227
228
  )
228
229
 
229
- transport = httpx.HTTPTransport(
230
- retries=self.max_retries
231
- )
230
+ transport = httpx.HTTPTransport(retries=self.max_retries)
232
231
 
233
232
  self._client = httpx.Client(
234
233
  http2=self.http2,
@@ -269,9 +268,7 @@ class Endee:
269
268
  """
270
269
 
271
270
  def __init__(
272
- self,
273
- token: str | None = None,
274
- http_library: str = HTTP_REQUESTS_LIBRARY
271
+ self, token: str | None = None, http_library: str = HTTP_REQUESTS_LIBRARY
275
272
  ):
276
273
  """
277
274
  Initialize the Endee client.
@@ -306,16 +303,12 @@ class Endee:
306
303
  if self.library == HTTP_REQUESTS_LIBRARY:
307
304
  # Centralized session manager - shared across all Index objects
308
305
  self.session_manager = SessionManager(
309
- pool_connections=10,
310
- pool_maxsize=10,
311
- max_retries=3
306
+ pool_connections=10, pool_maxsize=10, max_retries=3
312
307
  )
313
308
  elif self.library == HTTP_HTTPX_1_1_LIBRARY:
314
309
  # httpx.Client based manager for HTTP/1.1
315
310
  self.client_manager = ClientManager(
316
- max_connections=10,
317
- max_keepalive_connections=10,
318
- max_retries=3
311
+ max_connections=10, max_keepalive_connections=10, max_retries=3
319
312
  )
320
313
  elif self.library == HTTP_HTTPX_2_LIBRARY:
321
314
  # httpx.Client based manager for HTTP/2
@@ -323,7 +316,7 @@ class Endee:
323
316
  http2=True,
324
317
  max_connections=10,
325
318
  max_keepalive_connections=10,
326
- max_retries=3
319
+ max_retries=3,
327
320
  )
328
321
  else:
329
322
  raise ValueError(
@@ -394,7 +387,7 @@ class Endee:
394
387
  ef_con: int = DEFAULT_EF_CON,
395
388
  precision: str | Precision = Precision.INT8D,
396
389
  version: int = None,
397
- sparse_dim: int = DEFAULT_SPARSE_DIMENSION
390
+ sparse_dim: int = DEFAULT_SPARSE_DIMENSION,
398
391
  ):
399
392
  """
400
393
  Create a new vector index.
@@ -439,9 +432,14 @@ class Endee:
439
432
  )
440
433
 
441
434
  # Validate sparse dimension
435
+ # Lower bound check
442
436
  if sparse_dim < 0:
443
437
  raise ValueError("sparse_dim cannot be negative")
444
438
 
439
+ # Upper bound check
440
+ if sparse_dim > sys.maxsize:
441
+ raise ValueError(f"sparse_dim cannot exceed {sys.maxsize}")
442
+
445
443
  # Validate space type
446
444
  space_type = space_type.lower()
447
445
  if space_type not in SPACE_TYPES_SUPPORTED:
@@ -456,26 +454,23 @@ class Endee:
456
454
  )
457
455
 
458
456
  # Prepare request headers and data
459
- headers = {
460
- 'Authorization': f'{self.token}',
461
- 'Content-Type': 'application/json'
462
- }
457
+ headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
463
458
  data = {
464
- 'index_name': name,
465
- 'dim': dimension,
466
- 'space_type': space_type,
467
- 'M': M,
468
- 'ef_con': ef_con,
469
- 'checksum': CHECKSUM,
470
- 'precision': precision,
471
- 'version': version
459
+ "index_name": name,
460
+ "dim": dimension,
461
+ "space_type": space_type,
462
+ "M": M,
463
+ "ef_con": ef_con,
464
+ "checksum": CHECKSUM,
465
+ "precision": precision,
466
+ "version": version,
472
467
  }
473
468
 
474
469
  # Add sparse dimension if specified
475
470
  if sparse_dim > 0:
476
- data['sparse_dim'] = sparse_dim
471
+ data["sparse_dim"] = sparse_dim
477
472
 
478
- url = f'{self.base_url}/index/create'
473
+ url = f"{self.base_url}/index/create"
479
474
 
480
475
  # Make API request using appropriate library
481
476
  if self.library == HTTP_REQUESTS_LIBRARY:
@@ -502,10 +497,10 @@ class Endee:
502
497
  HTTPError: If API request fails
503
498
  """
504
499
  headers = {
505
- 'Authorization': f'{self.token}',
500
+ "Authorization": f"{self.token}",
506
501
  }
507
502
 
508
- url = f'{self.base_url}/index/list'
503
+ url = f"{self.base_url}/index/list"
509
504
 
510
505
  # Make API request using appropriate library
511
506
  if self.library == HTTP_REQUESTS_LIBRARY:
@@ -539,10 +534,10 @@ class Endee:
539
534
  TODO - Clear the index from LRU cache when deleted
540
535
  """
541
536
  headers = {
542
- 'Authorization': f'{self.token}',
537
+ "Authorization": f"{self.token}",
543
538
  }
544
539
 
545
- url = f'{self.base_url}/index/{name}/delete'
540
+ url = f"{self.base_url}/index/{name}/delete"
546
541
 
547
542
  # Make API request using appropriate library
548
543
  if self.library == HTTP_REQUESTS_LIBRARY:
@@ -556,7 +551,7 @@ class Endee:
556
551
  if response.status_code != 200:
557
552
  raise_exception(response.status_code, response.text)
558
553
 
559
- return f'Index {name} deleted successfully'
554
+ return f"Index {name} deleted successfully"
560
555
 
561
556
  @lru_cache(maxsize=10) # noqa: B019
562
557
  def get_index(self, name: str):
@@ -575,12 +570,9 @@ class Endee:
575
570
  Raises:
576
571
  HTTPError: If API request fails
577
572
  """
578
- headers = {
579
- 'Authorization': f'{self.token}',
580
- 'Content-Type': 'application/json'
581
- }
573
+ headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
582
574
 
583
- url = f'{self.base_url}/index/{name}/info'
575
+ url = f"{self.base_url}/index/{name}/info"
584
576
 
585
577
  # Get index details from the server
586
578
  if self.library == HTTP_REQUESTS_LIBRARY:
@@ -604,7 +596,7 @@ class Endee:
604
596
  url=self.base_url,
605
597
  version=self.version,
606
598
  params=data,
607
- session_client_manager=self.session_manager
599
+ session_client_manager=self.session_manager,
608
600
  )
609
601
  else:
610
602
  idx = Index(
@@ -613,7 +605,7 @@ class Endee:
613
605
  url=self.base_url,
614
606
  version=self.version,
615
607
  params=data,
616
- session_client_manager=self.client_manager
608
+ session_client_manager=self.client_manager,
617
609
  )
618
610
 
619
611
  return idx
@@ -62,7 +62,7 @@ class Index:
62
62
  url: str,
63
63
  version: int = 1,
64
64
  params=None,
65
- session_client_manager=None
65
+ session_client_manager=None,
66
66
  ):
67
67
  """
68
68
  Initialize an Index object.
@@ -105,9 +105,9 @@ class Index:
105
105
  Raises:
106
106
  ValueError: If manager doesn't have required methods
107
107
  """
108
- if hasattr(self.session_client_manager, 'get_session'):
108
+ if hasattr(self.session_client_manager, "get_session"):
109
109
  return self.session_client_manager.get_session()
110
- elif hasattr(self.session_client_manager, 'get_client'):
110
+ elif hasattr(self.session_client_manager, "get_client"):
111
111
  return self.session_client_manager.get_client()
112
112
  else:
113
113
  raise ValueError(
@@ -134,7 +134,125 @@ class Index:
134
134
  """
135
135
  return self.name
136
136
 
137
+ def _validate_and_prepare_vectors(self, input_array):
138
+ """
139
+ Validate and prepare vectors from input array.
140
+
141
+ Args:
142
+ input_array: List of vector dictionaries
143
+
144
+ Returns:
145
+ tuple: (vectors_array, norms_array, vectors_list)
146
+
147
+ Raises:
148
+ ValueError: If vector data is invalid
149
+ """
150
+ # Extract vectors
151
+ try:
152
+ vectors = np.asarray(
153
+ [item["vector"] for item in input_array], dtype=np.float32
154
+ )
155
+ except Exception as e:
156
+ raise ValueError(f"Invalid vector data: {e}") from e
157
+
158
+ # Validate vector shape
159
+ if vectors.ndim != 2 or vectors.shape[1] != self.dimension:
160
+ raise ValueError(
161
+ f"Expected shape (N, {self.dimension}), got {vectors.shape}"
162
+ )
163
+
164
+ # Validate finite values
165
+ if not np.isfinite(vectors).all():
166
+ raise ValueError("Vectors contain NaN or infinity")
167
+
168
+ # Normalize vectors for cosine similarity
169
+ N = len(input_array)
170
+ if self.space_type == "cosine":
171
+ norms = np.sqrt(np.einsum("ij,ij->i", vectors, vectors))
172
+ np.maximum(norms, 1e-10, out=norms) # Prevent division by zero
173
+ vectors /= norms[:, None]
174
+ else:
175
+ norms = np.ones(N, dtype=np.float32)
176
+
177
+ return vectors, norms, vectors.tolist()
178
+
179
+ def _validate_sparse_data(self, sparse_indices, sparse_values):
180
+ """
181
+ Validate sparse data for hybrid indexes.
182
+
183
+ Args:
184
+ sparse_indices: List of sparse vector indices
185
+ sparse_values: List of sparse vector values
137
186
 
187
+ Raises:
188
+ ValueError: If sparse data is invalid
189
+ """
190
+ if len(sparse_indices) != len(sparse_values):
191
+ raise ValueError("sparse_indices and sparse_values must match in length")
192
+
193
+ if sparse_indices:
194
+ min_idx = min(sparse_indices)
195
+ max_idx = max(sparse_indices)
196
+ if min_idx < 0 or max_idx >= self.sparse_dim:
197
+ raise ValueError(f"Sparse indices out of bounds [0, {self.sparse_dim})")
198
+
199
+ def _build_vector_batch_item(self, item, i, norms, vectors_list, is_hybrid):
200
+ """
201
+ Build a single vector batch item.
202
+
203
+ Args:
204
+ item: Input dictionary for one vector
205
+ i: Index in the batch
206
+ norms: Array of vector norms
207
+ vectors_list: List of vectors
208
+ is_hybrid: Whether index is hybrid
209
+
210
+ Returns:
211
+ list: Vector batch item
212
+
213
+ Raises:
214
+ ValueError: If sparse data is invalid
215
+ """
216
+ get_func = dict.get
217
+ dumps_func = orjson.dumps
218
+ str_func = str
219
+ float_func = float
220
+
221
+ sparse_indices = get_func(item, "sparse_indices", None)
222
+ sparse_values = get_func(item, "sparse_values", None)
223
+ has_sparse = sparse_indices is not None or sparse_values is not None
224
+
225
+ # XOR logic: hybrid index requires sparse data,
226
+ # dense-only forbids it
227
+ if has_sparse != is_hybrid:
228
+ raise ValueError(
229
+ "Hybrid index requires sparse data(along with dense vectors), "
230
+ "and dense-only index forbids it."
231
+ )
232
+
233
+ # Validate sparse data if present
234
+ if is_hybrid:
235
+ self._validate_sparse_data(sparse_indices, sparse_values)
236
+
237
+ # Build vector object: [id, meta, filter, norm, vector, ...]
238
+ obj = [
239
+ str_func(get_func(item, "id", "")),
240
+ json_zip(get_func(item, "meta", {})),
241
+ dumps_func(get_func(item, "filter", {})).decode("utf-8"),
242
+ float_func(norms[i]),
243
+ vectors_list[i],
244
+ ]
245
+
246
+ # Add sparse components for hybrid indexes
247
+ if is_hybrid:
248
+ obj.extend(
249
+ (
250
+ sparse_indices,
251
+ [float(v) for v in sparse_values],
252
+ )
253
+ )
254
+
255
+ return obj
138
256
 
139
257
  def upsert(self, input_array):
140
258
  """
@@ -172,119 +290,52 @@ class Index:
172
290
  ... ])
173
291
  """
174
292
  if len(input_array) > MAX_VECTORS_PER_BATCH:
175
- raise ValueError("Cannot insert more than 1000 vectors at a time")
176
-
177
- N = len(input_array)
178
- is_hybrid = self.is_hybrid
179
- sparse_dim = self.sparse_dim
180
-
181
- # ---------- Vector extraction ----------
182
- try:
183
- vectors = np.asarray(
184
- [item["vector"] for item in input_array],
185
- dtype=np.float32
186
- )
187
- except Exception as e:
188
- raise ValueError(f"Invalid vector data: {e}") from e
189
-
190
- # Validate vector shape
191
- if vectors.ndim != 2 or vectors.shape[1] != self.dimension:
192
293
  raise ValueError(
193
- f"Expected shape (N, {self.dimension}), got {vectors.shape}"
294
+ f"Cannot insert more than {MAX_VECTORS_PER_BATCH} vectors at a time"
194
295
  )
195
296
 
196
- # ---------- Validation (single pass) ----------
197
- if not np.isfinite(vectors).all():
198
- raise ValueError("Vectors contain NaN or infinity")
199
-
200
- # Note: Negative zero check disabled as it's expensive and rarely useful
201
- # if np.any((vectors == 0.0) & np.signbit(vectors)):
202
- # raise ValueError("Vectors contain negative zero (-0.0)")
203
-
204
- # ---------- Normalization ----------
205
- # Normalize vectors for cosine similarity
206
- if self.space_type == "cosine":
207
- norms = np.sqrt(np.einsum("ij,ij->i", vectors, vectors))
208
- np.maximum(norms, 1e-10, out=norms) # Prevent division by zero
209
- vectors /= norms[:, None]
210
- else:
211
- norms = np.ones(N, dtype=np.float32)
297
+ # Validate IDs upfront and check for duplicates
298
+ seen_ids = set()
299
+ duplicate_ids = set()
212
300
 
213
- # Convert to Python list once to avoid repeated conversions
214
- vectors_list = vectors.tolist()
301
+ for item in input_array:
302
+ id_val = item.get("id", "")
303
+ if not id_val or id_val is None:
304
+ raise ValueError("All vectors must have a non-empty ID")
215
305
 
216
- # ---------- Batch construction ----------
217
- vector_batch = []
218
- # Use local references for speed
219
- vector_append = vector_batch.append
220
- get_func = dict.get
221
- dumps_func = orjson.dumps
222
- str_func = str
223
- float_func = float
306
+ if id_val in seen_ids:
307
+ duplicate_ids.add(id_val)
308
+ else:
309
+ seen_ids.add(id_val)
224
310
 
225
- for i, item in enumerate(input_array):
226
- sparse_indices = get_func(item, "sparse_indices", None)
227
- sparse_values = get_func(item, "sparse_values", None)
228
- has_sparse = (
229
- sparse_indices is not None or sparse_values is not None
311
+ if duplicate_ids:
312
+ raise ValueError(
313
+ f"Duplicate IDs found in input array: {sorted(duplicate_ids)}"
230
314
  )
231
315
 
232
- # XOR logic: hybrid index requires sparse data,
233
- # dense-only forbids it
234
- if has_sparse != is_hybrid:
235
- raise ValueError(
236
- "Hybrid index requires sparse data(along with dense vectors), "
237
- "and dense-only index forbids it."
238
- )
316
+ is_hybrid = self.is_hybrid
239
317
 
240
- # Validate sparse data if present
241
- if is_hybrid:
242
- if len(sparse_indices) != len(sparse_values):
243
- raise ValueError(
244
- "sparse_indices and sparse_values must match in length"
245
- )
246
-
247
- if sparse_indices:
248
- min_idx = min(sparse_indices)
249
- max_idx = max(sparse_indices)
250
- if min_idx < 0 or max_idx >= sparse_dim:
251
- raise ValueError(
252
- f"Sparse indices out of bounds [0, {sparse_dim})"
253
- )
254
-
255
- # Build vector object: [id, meta, filter, norm, vector, ...]
256
- obj = [
257
- str_func(get_func(item, "id", "")),
258
- json_zip(get_func(item, "meta", {})),
259
- dumps_func(get_func(item, "filter", {})).decode('utf-8'),
260
- float_func(norms[i]),
261
- vectors_list[i],
262
- ]
263
-
264
- # Add sparse components for hybrid indexes
265
- if is_hybrid:
266
- obj.extend((
267
- sparse_indices,
268
- [float(v) for v in sparse_values],
269
- ))
318
+ # Validate and prepare vectors
319
+ vectors, norms, vectors_list = self._validate_and_prepare_vectors(input_array)
270
320
 
271
- vector_append(obj)
321
+ # Build batch
322
+ vector_batch = [
323
+ self._build_vector_batch_item(item, i, norms, vectors_list, is_hybrid)
324
+ for i, item in enumerate(input_array)
325
+ ]
272
326
 
273
327
  serialized_data = msgpack.packb(
274
328
  vector_batch, use_bin_type=True, use_single_float=True
275
329
  )
276
- headers = {
277
- 'Authorization': self.token,
278
- 'Content-Type': 'application/msgpack'
279
- }
330
+ headers = {"Authorization": self.token, "Content-Type": "application/msgpack"}
280
331
 
281
332
  http_client = self._get_session_client()
282
333
 
283
334
  # Sending the batch to the server
284
335
  response = http_client.post(
285
- f'{self.url}/index/{self.name}/vector/insert',
336
+ f"{self.url}/index/{self.name}/vector/insert",
286
337
  headers=headers,
287
- data=serialized_data
338
+ data=serialized_data,
288
339
  )
289
340
 
290
341
  if response.status_code != 200:
@@ -292,7 +343,140 @@ class Index:
292
343
 
293
344
  return "Vectors inserted successfully"
294
345
 
346
+ def _validate_query_params(
347
+ self, top_k, ef, has_sparse, has_dense, sparse_indices, sparse_values
348
+ ):
349
+ """
350
+ Validate query parameters.
351
+
352
+ Args:
353
+ top_k: Number of results to return
354
+ ef: HNSW ef_search parameter
355
+ has_sparse: Whether sparse query is provided
356
+ has_dense: Whether dense query is provided
357
+ sparse_indices: Sparse vector indices
358
+ sparse_values: Sparse vector values
295
359
 
360
+ Raises:
361
+ ValueError: If parameters are invalid
362
+ """
363
+ # Validate top_k parameter
364
+ if top_k > MAX_TOP_K_ALLOWED or top_k <= 0:
365
+ raise ValueError(
366
+ f"top_k must be between 1 and {MAX_TOP_K_ALLOWED}, got {top_k}"
367
+ )
368
+
369
+ # Validate ef parameter
370
+ if ef > MAX_EF_SEARCH_ALLOWED:
371
+ raise ValueError(
372
+ f"ef search cannot be greater than {MAX_EF_SEARCH_ALLOWED}"
373
+ )
374
+
375
+ # At least one query type must be provided
376
+ if not has_dense and not has_sparse:
377
+ raise ValueError(
378
+ "At least one of 'vector' or 'sparse_indices'/'sparse_values' "
379
+ "must be provided."
380
+ )
381
+
382
+ # Cannot use sparse query on dense-only index
383
+ if has_sparse and not self.is_hybrid:
384
+ raise ValueError(
385
+ "Cannot perform sparse search on a dense-only index. "
386
+ "Create index with sparse_dim > 0 for hybrid support."
387
+ )
388
+
389
+ # If one sparse parameter is provided, both must be provided
390
+ if has_sparse:
391
+ if sparse_indices is None or sparse_values is None:
392
+ raise ValueError(
393
+ "Both sparse_indices and sparse_values must be provided together."
394
+ )
395
+ if len(sparse_indices) != len(sparse_values):
396
+ raise ValueError(
397
+ f"sparse_indices and sparse_values must have the same "
398
+ f"length. Got {len(sparse_indices)} indices and "
399
+ f"{len(sparse_values)} values."
400
+ )
401
+
402
+ def _prepare_dense_vector(self, vector):
403
+ """
404
+ Prepare and validate dense query vector.
405
+
406
+ Args:
407
+ vector: Input vector
408
+
409
+ Returns:
410
+ list: Normalized vector as list
411
+
412
+ Raises:
413
+ ValueError: If vector is invalid
414
+ """
415
+ # Convert to numpy array
416
+ vec = np.asarray(vector, dtype=np.float32)
417
+
418
+ # Validate shape
419
+ if vec.shape != (self.dimension,):
420
+ raise ValueError(
421
+ f"Vector must have shape ({self.dimension},), got {vec.shape}"
422
+ )
423
+
424
+ # Validate finite values
425
+ if not np.isfinite(vec).all():
426
+ raise ValueError("Vector contains NaN or infinity")
427
+
428
+ # Normalize for cosine similarity using einsum
429
+ if self.space_type == "cosine":
430
+ norm = np.sqrt(np.einsum("i,i->", vec, vec))
431
+ norm = max(norm, 1e-10) # Prevent division by zero
432
+ vec = vec / norm
433
+
434
+ return vec.tolist()
435
+
436
+ def _process_query_results(self, results, top_k, include_vectors):
437
+ """
438
+ Process and format query results.
439
+
440
+ Args:
441
+ results: Raw msgpack results from server
442
+ top_k: Number of results requested
443
+ include_vectors: Whether to include vector data
444
+
445
+ Returns:
446
+ list: Processed results
447
+ """
448
+ processed_results = []
449
+ results = results[:top_k]
450
+
451
+ for result in results:
452
+ similarity = result[0]
453
+ vector_id = result[1]
454
+ meta_data = result[2]
455
+ filter_str = result[3]
456
+ norm_value = result[4]
457
+ vector_data = result[5] if len(result) > 5 else []
458
+
459
+ processed = {
460
+ "id": vector_id,
461
+ "similarity": similarity,
462
+ "distance": 1.0 - similarity,
463
+ "meta": json_unzip(meta_data),
464
+ "norm": norm_value,
465
+ }
466
+
467
+ # Add filter if present
468
+ if filter_str:
469
+ processed["filter"] = orjson.loads(filter_str)
470
+
471
+ # Add vector data if requested
472
+ if include_vectors and vector_data:
473
+ processed["vector"] = list(vector_data)
474
+ else:
475
+ processed["vector"] = []
476
+
477
+ processed_results.append(processed)
478
+
479
+ return processed_results
296
480
 
297
481
  def query(
298
482
  self,
@@ -303,7 +487,7 @@ class Index:
303
487
  include_vectors=False,
304
488
  log=False,
305
489
  sparse_indices=None,
306
- sparse_values=None
490
+ sparse_values=None,
307
491
  ):
308
492
  """
309
493
  Search for similar vectors in the index.
@@ -343,97 +527,35 @@ class Index:
343
527
  ... filter={"category": "A"}
344
528
  ... )
345
529
  """
346
- # Validate top_k parameter
347
- if top_k > MAX_TOP_K_ALLOWED or top_k <= 0:
348
- raise ValueError(
349
- f"top_k must be between 1 and {MAX_TOP_K_ALLOWED}, got {top_k}"
350
- )
351
-
352
- # Validate ef parameter
353
- if ef > MAX_EF_SEARCH_ALLOWED:
354
- raise ValueError(
355
- f"ef search cannot be greater than {MAX_EF_SEARCH_ALLOWED}"
356
- )
357
-
358
530
  # Validate sparse query parameters
359
531
  has_sparse = sparse_indices is not None or sparse_values is not None
360
532
  has_dense = vector is not None
361
533
 
362
- # At least one query type must be provided
363
- if not has_dense and not has_sparse:
364
- raise ValueError(
365
- "At least one of 'vector' or 'sparse_indices'/'sparse_values' "
366
- "must be provided."
367
- )
368
-
369
- # Cannot use sparse query on dense-only index
370
- if has_sparse and not self.is_hybrid:
371
- raise ValueError(
372
- "Cannot perform sparse search on a dense-only index. "
373
- "Create index with sparse_dim > 0 for hybrid support."
374
- )
375
-
376
- # If one sparse parameter is provided, both must be provided
377
- if has_sparse:
378
- if sparse_indices is None or sparse_values is None:
379
- raise ValueError(
380
- "Both sparse_indices and sparse_values must be provided "
381
- "together."
382
- )
383
- if len(sparse_indices) != len(sparse_values):
384
- raise ValueError(
385
- f"sparse_indices and sparse_values must have the same "
386
- f"length. Got {len(sparse_indices)} indices and "
387
- f"{len(sparse_values)} values."
388
- )
534
+ # Validate all query parameters
535
+ self._validate_query_params(
536
+ top_k, ef, has_sparse, has_dense, sparse_indices, sparse_values
537
+ )
389
538
 
390
539
  # Prepare search request headers
391
- headers = {
392
- 'Authorization': f'{self.token}',
393
- 'Content-Type': 'application/json'
394
- }
540
+ headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
395
541
 
396
542
  # Prepare search request data
397
- data = {
398
- 'k': top_k,
399
- 'ef': ef,
400
- 'include_vectors': include_vectors
401
- }
543
+ data = {"k": top_k, "ef": ef, "include_vectors": include_vectors}
402
544
 
403
545
  # Add dense vector if provided
404
546
  if has_dense:
405
- # Convert to numpy array
406
- vec = np.asarray(vector, dtype=np.float32)
407
-
408
- # Validate shape
409
- if vec.shape != (self.dimension,):
410
- raise ValueError(
411
- f"Vector must have shape ({self.dimension},), "
412
- f"got {vec.shape}"
413
- )
414
-
415
- # Validate finite values
416
- if not np.isfinite(vec).all():
417
- raise ValueError("Vector contains NaN or infinity")
418
-
419
- # Normalize for cosine similarity using einsum
420
- if self.space_type == "cosine":
421
- norm = np.sqrt(np.einsum("i,i->", vec, vec))
422
- norm = max(norm, 1e-10) # Prevent division by zero
423
- vec = vec / norm
424
-
425
- data['vector'] = vec.tolist()
547
+ data["vector"] = self._prepare_dense_vector(vector)
426
548
 
427
549
  # Add sparse query if provided
428
550
  if has_sparse:
429
- data['sparse_indices'] = list(sparse_indices)
430
- data['sparse_values'] = [float(v) for v in sparse_values]
551
+ data["sparse_indices"] = list(sparse_indices)
552
+ data["sparse_values"] = [float(v) for v in sparse_values]
431
553
 
432
554
  # Add filter if provided
433
555
  if filter:
434
- data['filter'] = orjson.dumps(filter).decode('utf-8')
556
+ data["filter"] = orjson.dumps(filter).decode("utf-8")
435
557
 
436
- url = f'{self.url}/index/{self.name}/search'
558
+ url = f"{self.url}/index/{self.name}/search"
437
559
 
438
560
  # Make API request
439
561
  http_client = self._get_session_client()
@@ -447,39 +569,7 @@ class Index:
447
569
  results = msgpack.unpackb(response.content, raw=False)
448
570
 
449
571
  # Process and format results
450
- # Result format: [similarity, id, meta, filter, norm, vector]
451
- processed_results = []
452
- results = results[:top_k]
453
-
454
- for result in results:
455
- similarity = result[0]
456
- vector_id = result[1]
457
- meta_data = result[2]
458
- filter_str = result[3]
459
- norm_value = result[4]
460
- vector_data = result[5] if len(result) > 5 else []
461
-
462
- processed = {
463
- 'id': vector_id,
464
- 'similarity': similarity,
465
- 'distance': 1.0 - similarity,
466
- 'meta': json_unzip(meta_data),
467
- 'norm': norm_value
468
- }
469
-
470
- # Add filter if present
471
- if filter_str:
472
- processed['filter'] = orjson.loads(filter_str)
473
-
474
- # Add vector data if requested
475
- if include_vectors and vector_data:
476
- processed['vector'] = list(vector_data)
477
- else:
478
- processed['vector'] = []
479
-
480
- processed_results.append(processed)
481
-
482
- return processed_results
572
+ return self._process_query_results(results, top_k, include_vectors)
483
573
 
484
574
  def delete_vector(self, id):
485
575
  """
@@ -495,10 +585,10 @@ class Index:
495
585
  HTTPError: If deletion fails
496
586
  """
497
587
  headers = {
498
- 'Authorization': f'{self.token}',
588
+ "Authorization": f"{self.token}",
499
589
  }
500
590
 
501
- url = f'{self.url}/index/{self.name}/vector/{id}/delete'
591
+ url = f"{self.url}/index/{self.name}/vector/{id}/delete"
502
592
 
503
593
  http_client = self._get_session_client()
504
594
  response = http_client.delete(url, headers=headers)
@@ -508,6 +598,35 @@ class Index:
508
598
 
509
599
  return response.text + " rows deleted"
510
600
 
601
+ def delete_with_filter(self, filter):
602
+ """
603
+ Delete multiple vectors based on a filter.
604
+
605
+ Deletes all vectors that match the provided filter criteria.
606
+
607
+ Args:
608
+ filter: Dictionary containing filter criteria
609
+
610
+ Returns:
611
+ str: Server response with deletion details
612
+
613
+ Raises:
614
+ HTTPError: If deletion fails
615
+ """
616
+ headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
617
+
618
+ data = {"filter": filter}
619
+
620
+ url = f"{self.url}/index/{self.name}/vectors/delete"
621
+
622
+ http_client = self._get_session_client()
623
+ response = http_client.delete(url, headers=headers, json=data)
624
+
625
+ if response.status_code != 200:
626
+ raise_exception(response.status_code, response.text)
627
+
628
+ return response.text
629
+
511
630
  def get_vector(self, id):
512
631
  """
513
632
  Retrieve a single vector by ID.
@@ -535,16 +654,13 @@ class Index:
535
654
  >>> vec = index.get_vector("vec1")
536
655
  >>> print(vec['meta'])
537
656
  """
538
- headers = {
539
- 'Authorization': f'{self.token}',
540
- 'Content-Type': 'application/json'
541
- }
657
+ headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
542
658
 
543
- url = f'{self.url}/index/{self.name}/vector/get'
659
+ url = f"{self.url}/index/{self.name}/vector/get"
544
660
 
545
661
  # Use POST method with the ID in the request body
546
662
  http_client = self._get_session_client()
547
- response = http_client.post(url, headers=headers, json={'id': id})
663
+ response = http_client.post(url, headers=headers, json={"id": id})
548
664
 
549
665
  if response.status_code != 200:
550
666
  raise_exception(response.status_code, response.text)
@@ -554,22 +670,18 @@ class Index:
554
670
  vector_obj = msgpack.unpackb(response.content, raw=False)
555
671
 
556
672
  result = {
557
- 'id': vector_obj[0],
558
- 'meta': json_unzip(vector_obj[1]),
559
- 'filter': vector_obj[2],
560
- 'norm': vector_obj[3],
561
- 'vector': list(vector_obj[4])
673
+ "id": vector_obj[0],
674
+ "meta": json_unzip(vector_obj[1]),
675
+ "filter": vector_obj[2],
676
+ "norm": vector_obj[3],
677
+ "vector": list(vector_obj[4]),
562
678
  }
563
679
 
564
680
  # Include sparse data if present (for hybrid indexes)
565
681
  if len(vector_obj) > 5:
566
- result['sparse_indices'] = (
567
- list(vector_obj[5]) if vector_obj[5] else []
568
- )
682
+ result["sparse_indices"] = list(vector_obj[5]) if vector_obj[5] else []
569
683
  if len(vector_obj) > 6:
570
- result['sparse_values'] = (
571
- list(vector_obj[6]) if vector_obj[6] else []
572
- )
684
+ result["sparse_values"] = list(vector_obj[6]) if vector_obj[6] else []
573
685
 
574
686
  return result
575
687
 
@@ -1,13 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: endee
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: Endee is the Next-Generation Vector Database for Scalable, High-Performance AI
5
5
  Home-page: https://endee.io
6
6
  Author: Endee Labs
7
7
  Author-email: dev@endee.io
8
8
  Project-URL: Documentation, https://docs.endee.io
9
- Project-URL: Source, https://github.com/endee-labs/endee-python
10
- Project-URL: Bug Reports, https://github.com/endee-labs/endee-python/issues
11
9
  Keywords: vector database,embeddings,machine learning,AI,similarity search,HNSW,nearest neighbors
12
10
  Classifier: Development Status :: 4 - Beta
13
11
  Classifier: Intended Audience :: Developers
@@ -604,6 +602,23 @@ index = client.get_index(name="your-index-name")
604
602
  index.delete_vector("vec1")
605
603
  ```
606
604
 
605
+ ### Filtered Deletion
606
+
607
+ In cases where you don't know the exact vector `id`, but want to delete vectors based on filter fields, you can use filtered deletion. This is especially useful for:
608
+
609
+ - Bulk deleting vectors by tag, type, or timestamp
610
+ - Enforcing access control or data expiration policies
611
+
612
+ ```python
613
+ from endee import Endee
614
+
615
+ client = Endee(token="your-token-here")
616
+ index = client.get_index(name="your-index-name")
617
+
618
+ # Delete all vectors matching filter conditions
619
+ index.delete_with_filter([{"tags": {"$eq": "important"}}])
620
+ ```
621
+
607
622
  ### Index Deletion
608
623
 
609
624
  Index deletion permanently removes the entire index and all vectors associated with it. This should be used when:
@@ -671,6 +686,7 @@ info = index.describe()
671
686
  | `upsert(input_array)` | Insert or update vectors (max 1000 per batch) |
672
687
  | `query(vector, top_k, filter, ef, include_vectors, sparse_indices, sparse_values)` | Search for similar vectors (sparse params optional for hybrid) |
673
688
  | `delete_vector(id)` | Delete a vector by ID |
689
+ | `delete_with_filter(filter)` | Delete vectors matching a filter |
674
690
  | `get_vector(id)` | Get a specific vector by ID |
675
691
  | `describe()` | Get index statistics and configuration |
676
692
 
@@ -8,39 +8,34 @@ requirements for the Endee Python client library.
8
8
  from setuptools import find_packages, setup
9
9
 
10
10
  # Read the long description from README
11
- with open('README.md', encoding='utf-8') as f:
11
+ with open("README.md", encoding="utf-8") as f:
12
12
  long_description = f.read()
13
13
 
14
14
 
15
15
  setup(
16
16
  # Package Metadata
17
17
  name="endee",
18
- version="0.1.6",
18
+ version="0.1.8",
19
19
  author="Endee Labs",
20
20
  author_email="dev@endee.io",
21
21
  description=(
22
- "Endee is the Next-Generation Vector Database for Scalable, "
23
- "High-Performance AI"
22
+ "Endee is the Next-Generation Vector Database for Scalable, High-Performance AI"
24
23
  ),
25
24
  long_description=long_description,
26
25
  long_description_content_type="text/markdown",
27
26
  url="https://endee.io",
28
-
29
27
  # Package Discovery
30
28
  packages=find_packages(),
31
-
32
29
  # Dependencies
33
30
  install_requires=[
34
- "requests>=2.28.0", # HTTP library for API requests
35
- "httpx[http2]>=0.28.1", # Alternative HTTP library with HTTP/2 support
36
- "numpy>=2.2.4", # Array operations and vector normalization
37
- "msgpack>=1.1.0", # Efficient binary serialization
38
- "orjson>=3.11.5", # Ultra-fast JSON serialization/deserialization
31
+ "requests>=2.28.0", # HTTP library for API requests
32
+ "httpx[http2]>=0.28.1", # Alternative HTTP library with HTTP/2 support
33
+ "numpy>=2.2.4", # Array operations and vector normalization
34
+ "msgpack>=1.1.0", # Efficient binary serialization
35
+ "orjson>=3.11.5", # Ultra-fast JSON serialization/deserialization
39
36
  ],
40
-
41
37
  # Python Version Requirements
42
- python_requires='>=3.6',
43
-
38
+ python_requires=">=3.6",
44
39
  # Package Classification
45
40
  classifiers=[
46
41
  "Development Status :: 4 - Beta",
@@ -58,7 +53,6 @@ setup(
58
53
  "License :: OSI Approved :: MIT License",
59
54
  "Operating System :: OS Independent",
60
55
  ],
61
-
62
56
  # Additional Metadata
63
57
  keywords=[
64
58
  "vector database",
@@ -71,7 +65,5 @@ setup(
71
65
  ],
72
66
  project_urls={
73
67
  "Documentation": "https://docs.endee.io",
74
- "Source": "https://github.com/endee-labs/endee-python",
75
- "Bug Reports": "https://github.com/endee-labs/endee-python/issues",
76
68
  },
77
69
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes