endee 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
endee/endee.py CHANGED
@@ -6,6 +6,7 @@ vector database service. It includes session management, index operations.
6
6
  """
7
7
 
8
8
  import os
9
+ import sys
9
10
  from functools import lru_cache
10
11
 
11
12
  import httpx
@@ -68,7 +69,7 @@ class SessionManager:
68
69
  pool_connections: int = SESSION_POOL_CONNECTIONS,
69
70
  pool_maxsize: int = SESSION_POOL_MAXSIZE,
70
71
  max_retries: int = SESSION_MAX_RETRIES,
71
- pool_block: bool = True
72
+ pool_block: bool = True,
72
73
  ):
73
74
  """
74
75
  Initialize the SessionManager.
@@ -122,9 +123,9 @@ class SessionManager:
122
123
  total=self.max_retries,
123
124
  backoff_factor=0.5,
124
125
  status_forcelist=HTTP_STATUS_CODES,
125
- allowed_methods=HTTP_METHODS_ALLOWED
126
+ allowed_methods=HTTP_METHODS_ALLOWED,
126
127
  ),
127
- pool_block=self.pool_block
128
+ pool_block=self.pool_block,
128
129
  )
129
130
 
130
131
  session.mount(HTTP_PROTOCOL, adapter)
@@ -174,7 +175,7 @@ class ClientManager:
174
175
  max_keepalive_connections: int = HTTPX_MAX_KEEPALIVE_CONNECTIONS,
175
176
  max_retries: int = HTTPX_MAX_RETRIES,
176
177
  timeout: float = HTTPX_TIMEOUT_SEC,
177
- enable_http2: bool = False
178
+ enable_http2: bool = False,
178
179
  ):
179
180
  """
180
181
  Initialize the ClientManager.
@@ -226,9 +227,7 @@ class ClientManager:
226
227
  max_keepalive_connections=self.max_keepalive_connections,
227
228
  )
228
229
 
229
- transport = httpx.HTTPTransport(
230
- retries=self.max_retries
231
- )
230
+ transport = httpx.HTTPTransport(retries=self.max_retries)
232
231
 
233
232
  self._client = httpx.Client(
234
233
  http2=self.http2,
@@ -269,9 +268,7 @@ class Endee:
269
268
  """
270
269
 
271
270
  def __init__(
272
- self,
273
- token: str | None = None,
274
- http_library: str = HTTP_REQUESTS_LIBRARY
271
+ self, token: str | None = None, http_library: str = HTTP_REQUESTS_LIBRARY
275
272
  ):
276
273
  """
277
274
  Initialize the Endee client.
@@ -306,16 +303,12 @@ class Endee:
306
303
  if self.library == HTTP_REQUESTS_LIBRARY:
307
304
  # Centralized session manager - shared across all Index objects
308
305
  self.session_manager = SessionManager(
309
- pool_connections=10,
310
- pool_maxsize=10,
311
- max_retries=3
306
+ pool_connections=10, pool_maxsize=10, max_retries=3
312
307
  )
313
308
  elif self.library == HTTP_HTTPX_1_1_LIBRARY:
314
309
  # httpx.Client based manager for HTTP/1.1
315
310
  self.client_manager = ClientManager(
316
- max_connections=10,
317
- max_keepalive_connections=10,
318
- max_retries=3
311
+ max_connections=10, max_keepalive_connections=10, max_retries=3
319
312
  )
320
313
  elif self.library == HTTP_HTTPX_2_LIBRARY:
321
314
  # httpx.Client based manager for HTTP/2
@@ -323,7 +316,7 @@ class Endee:
323
316
  http2=True,
324
317
  max_connections=10,
325
318
  max_keepalive_connections=10,
326
- max_retries=3
319
+ max_retries=3,
327
320
  )
328
321
  else:
329
322
  raise ValueError(
@@ -394,7 +387,7 @@ class Endee:
394
387
  ef_con: int = DEFAULT_EF_CON,
395
388
  precision: str | Precision = Precision.INT8D,
396
389
  version: int = None,
397
- sparse_dim: int = DEFAULT_SPARSE_DIMENSION
390
+ sparse_dim: int = DEFAULT_SPARSE_DIMENSION,
398
391
  ):
399
392
  """
400
393
  Create a new vector index.
@@ -439,9 +432,14 @@ class Endee:
439
432
  )
440
433
 
441
434
  # Validate sparse dimension
435
+ # Lower bound check
442
436
  if sparse_dim < 0:
443
437
  raise ValueError("sparse_dim cannot be negative")
444
438
 
439
+ # Upper bound check
440
+ if sparse_dim > sys.maxsize:
441
+ raise ValueError(f"sparse_dim cannot exceed {sys.maxsize}")
442
+
445
443
  # Validate space type
446
444
  space_type = space_type.lower()
447
445
  if space_type not in SPACE_TYPES_SUPPORTED:
@@ -456,26 +454,23 @@ class Endee:
456
454
  )
457
455
 
458
456
  # Prepare request headers and data
459
- headers = {
460
- 'Authorization': f'{self.token}',
461
- 'Content-Type': 'application/json'
462
- }
457
+ headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
463
458
  data = {
464
- 'index_name': name,
465
- 'dim': dimension,
466
- 'space_type': space_type,
467
- 'M': M,
468
- 'ef_con': ef_con,
469
- 'checksum': CHECKSUM,
470
- 'precision': precision,
471
- 'version': version
459
+ "index_name": name,
460
+ "dim": dimension,
461
+ "space_type": space_type,
462
+ "M": M,
463
+ "ef_con": ef_con,
464
+ "checksum": CHECKSUM,
465
+ "precision": precision,
466
+ "version": version,
472
467
  }
473
468
 
474
469
  # Add sparse dimension if specified
475
470
  if sparse_dim > 0:
476
- data['sparse_dim'] = sparse_dim
471
+ data["sparse_dim"] = sparse_dim
477
472
 
478
- url = f'{self.base_url}/index/create'
473
+ url = f"{self.base_url}/index/create"
479
474
 
480
475
  # Make API request using appropriate library
481
476
  if self.library == HTTP_REQUESTS_LIBRARY:
@@ -502,10 +497,10 @@ class Endee:
502
497
  HTTPError: If API request fails
503
498
  """
504
499
  headers = {
505
- 'Authorization': f'{self.token}',
500
+ "Authorization": f"{self.token}",
506
501
  }
507
502
 
508
- url = f'{self.base_url}/index/list'
503
+ url = f"{self.base_url}/index/list"
509
504
 
510
505
  # Make API request using appropriate library
511
506
  if self.library == HTTP_REQUESTS_LIBRARY:
@@ -539,10 +534,10 @@ class Endee:
539
534
  TODO - Clear the index from LRU cache when deleted
540
535
  """
541
536
  headers = {
542
- 'Authorization': f'{self.token}',
537
+ "Authorization": f"{self.token}",
543
538
  }
544
539
 
545
- url = f'{self.base_url}/index/{name}/delete'
540
+ url = f"{self.base_url}/index/{name}/delete"
546
541
 
547
542
  # Make API request using appropriate library
548
543
  if self.library == HTTP_REQUESTS_LIBRARY:
@@ -556,7 +551,7 @@ class Endee:
556
551
  if response.status_code != 200:
557
552
  raise_exception(response.status_code, response.text)
558
553
 
559
- return f'Index {name} deleted successfully'
554
+ return f"Index {name} deleted successfully"
560
555
 
561
556
  @lru_cache(maxsize=10) # noqa: B019
562
557
  def get_index(self, name: str):
@@ -575,12 +570,9 @@ class Endee:
575
570
  Raises:
576
571
  HTTPError: If API request fails
577
572
  """
578
- headers = {
579
- 'Authorization': f'{self.token}',
580
- 'Content-Type': 'application/json'
581
- }
573
+ headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
582
574
 
583
- url = f'{self.base_url}/index/{name}/info'
575
+ url = f"{self.base_url}/index/{name}/info"
584
576
 
585
577
  # Get index details from the server
586
578
  if self.library == HTTP_REQUESTS_LIBRARY:
@@ -604,7 +596,7 @@ class Endee:
604
596
  url=self.base_url,
605
597
  version=self.version,
606
598
  params=data,
607
- session_client_manager=self.session_manager
599
+ session_client_manager=self.session_manager,
608
600
  )
609
601
  else:
610
602
  idx = Index(
@@ -613,7 +605,7 @@ class Endee:
613
605
  url=self.base_url,
614
606
  version=self.version,
615
607
  params=data,
616
- session_client_manager=self.client_manager
608
+ session_client_manager=self.client_manager,
617
609
  )
618
610
 
619
611
  return idx
endee/index.py CHANGED
@@ -62,7 +62,7 @@ class Index:
62
62
  url: str,
63
63
  version: int = 1,
64
64
  params=None,
65
- session_client_manager=None
65
+ session_client_manager=None,
66
66
  ):
67
67
  """
68
68
  Initialize an Index object.
@@ -105,9 +105,9 @@ class Index:
105
105
  Raises:
106
106
  ValueError: If manager doesn't have required methods
107
107
  """
108
- if hasattr(self.session_client_manager, 'get_session'):
108
+ if hasattr(self.session_client_manager, "get_session"):
109
109
  return self.session_client_manager.get_session()
110
- elif hasattr(self.session_client_manager, 'get_client'):
110
+ elif hasattr(self.session_client_manager, "get_client"):
111
111
  return self.session_client_manager.get_client()
112
112
  else:
113
113
  raise ValueError(
@@ -134,7 +134,125 @@ class Index:
134
134
  """
135
135
  return self.name
136
136
 
137
+ def _validate_and_prepare_vectors(self, input_array):
138
+ """
139
+ Validate and prepare vectors from input array.
140
+
141
+ Args:
142
+ input_array: List of vector dictionaries
143
+
144
+ Returns:
145
+ tuple: (vectors_array, norms_array, vectors_list)
146
+
147
+ Raises:
148
+ ValueError: If vector data is invalid
149
+ """
150
+ # Extract vectors
151
+ try:
152
+ vectors = np.asarray(
153
+ [item["vector"] for item in input_array], dtype=np.float32
154
+ )
155
+ except Exception as e:
156
+ raise ValueError(f"Invalid vector data: {e}") from e
157
+
158
+ # Validate vector shape
159
+ if vectors.ndim != 2 or vectors.shape[1] != self.dimension:
160
+ raise ValueError(
161
+ f"Expected shape (N, {self.dimension}), got {vectors.shape}"
162
+ )
137
163
 
164
+ # Validate finite values
165
+ if not np.isfinite(vectors).all():
166
+ raise ValueError("Vectors contain NaN or infinity")
167
+
168
+ # Normalize vectors for cosine similarity
169
+ N = len(input_array)
170
+ if self.space_type == "cosine":
171
+ norms = np.sqrt(np.einsum("ij,ij->i", vectors, vectors))
172
+ np.maximum(norms, 1e-10, out=norms) # Prevent division by zero
173
+ vectors /= norms[:, None]
174
+ else:
175
+ norms = np.ones(N, dtype=np.float32)
176
+
177
+ return vectors, norms, vectors.tolist()
178
+
179
+ def _validate_sparse_data(self, sparse_indices, sparse_values):
180
+ """
181
+ Validate sparse data for hybrid indexes.
182
+
183
+ Args:
184
+ sparse_indices: List of sparse vector indices
185
+ sparse_values: List of sparse vector values
186
+
187
+ Raises:
188
+ ValueError: If sparse data is invalid
189
+ """
190
+ if len(sparse_indices) != len(sparse_values):
191
+ raise ValueError("sparse_indices and sparse_values must match in length")
192
+
193
+ if sparse_indices:
194
+ min_idx = min(sparse_indices)
195
+ max_idx = max(sparse_indices)
196
+ if min_idx < 0 or max_idx >= self.sparse_dim:
197
+ raise ValueError(f"Sparse indices out of bounds [0, {self.sparse_dim})")
198
+
199
+ def _build_vector_batch_item(self, item, i, norms, vectors_list, is_hybrid):
200
+ """
201
+ Build a single vector batch item.
202
+
203
+ Args:
204
+ item: Input dictionary for one vector
205
+ i: Index in the batch
206
+ norms: Array of vector norms
207
+ vectors_list: List of vectors
208
+ is_hybrid: Whether index is hybrid
209
+
210
+ Returns:
211
+ list: Vector batch item
212
+
213
+ Raises:
214
+ ValueError: If sparse data is invalid
215
+ """
216
+ get_func = dict.get
217
+ dumps_func = orjson.dumps
218
+ str_func = str
219
+ float_func = float
220
+
221
+ sparse_indices = get_func(item, "sparse_indices", None)
222
+ sparse_values = get_func(item, "sparse_values", None)
223
+ has_sparse = sparse_indices is not None or sparse_values is not None
224
+
225
+ # XOR logic: hybrid index requires sparse data,
226
+ # dense-only forbids it
227
+ if has_sparse != is_hybrid:
228
+ raise ValueError(
229
+ "Hybrid index requires sparse data(along with dense vectors), "
230
+ "and dense-only index forbids it."
231
+ )
232
+
233
+ # Validate sparse data if present
234
+ if is_hybrid:
235
+ self._validate_sparse_data(sparse_indices, sparse_values)
236
+
237
+ # Build vector object: [id, meta, filter, norm, vector, ...]
238
+ obj = [
239
+ str_func(get_func(item, "id", "")),
240
+ json_zip(get_func(item, "meta", {})),
241
+ dumps_func(get_func(item, "filter", {})).decode("utf-8"),
242
+ float_func(norms[i]),
243
+ vectors_list[i],
244
+ ]
245
+
246
+ # Add sparse components for hybrid indexes
247
+ if is_hybrid:
248
+ obj.extend(
249
+ (
250
+ sparse_indices,
251
+ [float(v) for v in sparse_values],
252
+ )
253
+ )
254
+
255
+ return obj
138
256
 
139
257
  def upsert(self, input_array):
140
258
  """
@@ -172,119 +290,38 @@ class Index:
172
290
  ... ])
173
291
  """
174
292
  if len(input_array) > MAX_VECTORS_PER_BATCH:
175
- raise ValueError("Cannot insert more than 1000 vectors at a time")
176
-
177
- N = len(input_array)
178
- is_hybrid = self.is_hybrid
179
- sparse_dim = self.sparse_dim
180
-
181
- # ---------- Vector extraction ----------
182
- try:
183
- vectors = np.asarray(
184
- [item["vector"] for item in input_array],
185
- dtype=np.float32
186
- )
187
- except Exception as e:
188
- raise ValueError(f"Invalid vector data: {e}") from e
189
-
190
- # Validate vector shape
191
- if vectors.ndim != 2 or vectors.shape[1] != self.dimension:
192
293
  raise ValueError(
193
- f"Expected shape (N, {self.dimension}), got {vectors.shape}"
294
+ f"Cannot insert more than {MAX_VECTORS_PER_BATCH} vectors at a time"
194
295
  )
195
296
 
196
- # ---------- Validation (single pass) ----------
197
- if not np.isfinite(vectors).all():
198
- raise ValueError("Vectors contain NaN or infinity")
199
-
200
- # Note: Negative zero check disabled as it's expensive and rarely useful
201
- # if np.any((vectors == 0.0) & np.signbit(vectors)):
202
- # raise ValueError("Vectors contain negative zero (-0.0)")
203
-
204
- # ---------- Normalization ----------
205
- # Normalize vectors for cosine similarity
206
- if self.space_type == "cosine":
207
- norms = np.sqrt(np.einsum("ij,ij->i", vectors, vectors))
208
- np.maximum(norms, 1e-10, out=norms) # Prevent division by zero
209
- vectors /= norms[:, None]
210
- else:
211
- norms = np.ones(N, dtype=np.float32)
212
-
213
- # Convert to Python list once to avoid repeated conversions
214
- vectors_list = vectors.tolist()
215
-
216
- # ---------- Batch construction ----------
217
- vector_batch = []
218
- # Use local references for speed
219
- vector_append = vector_batch.append
220
- get_func = dict.get
221
- dumps_func = orjson.dumps
222
- str_func = str
223
- float_func = float
297
+ # Validate IDs upfront
298
+ ids = [item.get("id", "") for item in input_array]
299
+ if any(not id_val or id_val is None for id_val in ids):
300
+ raise ValueError("All vectors must have a non-empty ID")
224
301
 
225
- for i, item in enumerate(input_array):
226
- sparse_indices = get_func(item, "sparse_indices", None)
227
- sparse_values = get_func(item, "sparse_values", None)
228
- has_sparse = (
229
- sparse_indices is not None or sparse_values is not None
230
- )
231
-
232
- # XOR logic: hybrid index requires sparse data,
233
- # dense-only forbids it
234
- if has_sparse != is_hybrid:
235
- raise ValueError(
236
- "Hybrid index requires sparse data(along with dense vectors), "
237
- "and dense-only index forbids it."
238
- )
302
+ is_hybrid = self.is_hybrid
239
303
 
240
- # Validate sparse data if present
241
- if is_hybrid:
242
- if len(sparse_indices) != len(sparse_values):
243
- raise ValueError(
244
- "sparse_indices and sparse_values must match in length"
245
- )
246
-
247
- if sparse_indices:
248
- min_idx = min(sparse_indices)
249
- max_idx = max(sparse_indices)
250
- if min_idx < 0 or max_idx >= sparse_dim:
251
- raise ValueError(
252
- f"Sparse indices out of bounds [0, {sparse_dim})"
253
- )
254
-
255
- # Build vector object: [id, meta, filter, norm, vector, ...]
256
- obj = [
257
- str_func(get_func(item, "id", "")),
258
- json_zip(get_func(item, "meta", {})),
259
- dumps_func(get_func(item, "filter", {})).decode('utf-8'),
260
- float_func(norms[i]),
261
- vectors_list[i],
262
- ]
263
-
264
- # Add sparse components for hybrid indexes
265
- if is_hybrid:
266
- obj.extend((
267
- sparse_indices,
268
- [float(v) for v in sparse_values],
269
- ))
304
+ # Validate and prepare vectors
305
+ vectors, norms, vectors_list = self._validate_and_prepare_vectors(input_array)
270
306
 
271
- vector_append(obj)
307
+ # Build batch
308
+ vector_batch = [
309
+ self._build_vector_batch_item(item, i, norms, vectors_list, is_hybrid)
310
+ for i, item in enumerate(input_array)
311
+ ]
272
312
 
273
313
  serialized_data = msgpack.packb(
274
314
  vector_batch, use_bin_type=True, use_single_float=True
275
315
  )
276
- headers = {
277
- 'Authorization': self.token,
278
- 'Content-Type': 'application/msgpack'
279
- }
316
+ headers = {"Authorization": self.token, "Content-Type": "application/msgpack"}
280
317
 
281
318
  http_client = self._get_session_client()
282
319
 
283
320
  # Sending the batch to the server
284
321
  response = http_client.post(
285
- f'{self.url}/index/{self.name}/vector/insert',
322
+ f"{self.url}/index/{self.name}/vector/insert",
286
323
  headers=headers,
287
- data=serialized_data
324
+ data=serialized_data,
288
325
  )
289
326
 
290
327
  if response.status_code != 200:
@@ -292,7 +329,140 @@ class Index:
292
329
 
293
330
  return "Vectors inserted successfully"
294
331
 
332
+ def _validate_query_params(
333
+ self, top_k, ef, has_sparse, has_dense, sparse_indices, sparse_values
334
+ ):
335
+ """
336
+ Validate query parameters.
337
+
338
+ Args:
339
+ top_k: Number of results to return
340
+ ef: HNSW ef_search parameter
341
+ has_sparse: Whether sparse query is provided
342
+ has_dense: Whether dense query is provided
343
+ sparse_indices: Sparse vector indices
344
+ sparse_values: Sparse vector values
345
+
346
+ Raises:
347
+ ValueError: If parameters are invalid
348
+ """
349
+ # Validate top_k parameter
350
+ if top_k > MAX_TOP_K_ALLOWED or top_k <= 0:
351
+ raise ValueError(
352
+ f"top_k must be between 1 and {MAX_TOP_K_ALLOWED}, got {top_k}"
353
+ )
354
+
355
+ # Validate ef parameter
356
+ if ef > MAX_EF_SEARCH_ALLOWED:
357
+ raise ValueError(
358
+ f"ef search cannot be greater than {MAX_EF_SEARCH_ALLOWED}"
359
+ )
360
+
361
+ # At least one query type must be provided
362
+ if not has_dense and not has_sparse:
363
+ raise ValueError(
364
+ "At least one of 'vector' or 'sparse_indices'/'sparse_values' "
365
+ "must be provided."
366
+ )
367
+
368
+ # Cannot use sparse query on dense-only index
369
+ if has_sparse and not self.is_hybrid:
370
+ raise ValueError(
371
+ "Cannot perform sparse search on a dense-only index. "
372
+ "Create index with sparse_dim > 0 for hybrid support."
373
+ )
374
+
375
+ # If one sparse parameter is provided, both must be provided
376
+ if has_sparse:
377
+ if sparse_indices is None or sparse_values is None:
378
+ raise ValueError(
379
+ "Both sparse_indices and sparse_values must be provided together."
380
+ )
381
+ if len(sparse_indices) != len(sparse_values):
382
+ raise ValueError(
383
+ f"sparse_indices and sparse_values must have the same "
384
+ f"length. Got {len(sparse_indices)} indices and "
385
+ f"{len(sparse_values)} values."
386
+ )
387
+
388
+ def _prepare_dense_vector(self, vector):
389
+ """
390
+ Prepare and validate dense query vector.
391
+
392
+ Args:
393
+ vector: Input vector
394
+
395
+ Returns:
396
+ list: Normalized vector as list
295
397
 
398
+ Raises:
399
+ ValueError: If vector is invalid
400
+ """
401
+ # Convert to numpy array
402
+ vec = np.asarray(vector, dtype=np.float32)
403
+
404
+ # Validate shape
405
+ if vec.shape != (self.dimension,):
406
+ raise ValueError(
407
+ f"Vector must have shape ({self.dimension},), got {vec.shape}"
408
+ )
409
+
410
+ # Validate finite values
411
+ if not np.isfinite(vec).all():
412
+ raise ValueError("Vector contains NaN or infinity")
413
+
414
+ # Normalize for cosine similarity using einsum
415
+ if self.space_type == "cosine":
416
+ norm = np.sqrt(np.einsum("i,i->", vec, vec))
417
+ norm = max(norm, 1e-10) # Prevent division by zero
418
+ vec = vec / norm
419
+
420
+ return vec.tolist()
421
+
422
+ def _process_query_results(self, results, top_k, include_vectors):
423
+ """
424
+ Process and format query results.
425
+
426
+ Args:
427
+ results: Raw msgpack results from server
428
+ top_k: Number of results requested
429
+ include_vectors: Whether to include vector data
430
+
431
+ Returns:
432
+ list: Processed results
433
+ """
434
+ processed_results = []
435
+ results = results[:top_k]
436
+
437
+ for result in results:
438
+ similarity = result[0]
439
+ vector_id = result[1]
440
+ meta_data = result[2]
441
+ filter_str = result[3]
442
+ norm_value = result[4]
443
+ vector_data = result[5] if len(result) > 5 else []
444
+
445
+ processed = {
446
+ "id": vector_id,
447
+ "similarity": similarity,
448
+ "distance": 1.0 - similarity,
449
+ "meta": json_unzip(meta_data),
450
+ "norm": norm_value,
451
+ }
452
+
453
+ # Add filter if present
454
+ if filter_str:
455
+ processed["filter"] = orjson.loads(filter_str)
456
+
457
+ # Add vector data if requested
458
+ if include_vectors and vector_data:
459
+ processed["vector"] = list(vector_data)
460
+ else:
461
+ processed["vector"] = []
462
+
463
+ processed_results.append(processed)
464
+
465
+ return processed_results
296
466
 
297
467
  def query(
298
468
  self,
@@ -303,7 +473,7 @@ class Index:
303
473
  include_vectors=False,
304
474
  log=False,
305
475
  sparse_indices=None,
306
- sparse_values=None
476
+ sparse_values=None,
307
477
  ):
308
478
  """
309
479
  Search for similar vectors in the index.
@@ -343,97 +513,35 @@ class Index:
343
513
  ... filter={"category": "A"}
344
514
  ... )
345
515
  """
346
- # Validate top_k parameter
347
- if top_k > MAX_TOP_K_ALLOWED or top_k <= 0:
348
- raise ValueError(
349
- f"top_k must be between 1 and {MAX_TOP_K_ALLOWED}, got {top_k}"
350
- )
351
-
352
- # Validate ef parameter
353
- if ef > MAX_EF_SEARCH_ALLOWED:
354
- raise ValueError(
355
- f"ef search cannot be greater than {MAX_EF_SEARCH_ALLOWED}"
356
- )
357
-
358
516
  # Validate sparse query parameters
359
517
  has_sparse = sparse_indices is not None or sparse_values is not None
360
518
  has_dense = vector is not None
361
519
 
362
- # At least one query type must be provided
363
- if not has_dense and not has_sparse:
364
- raise ValueError(
365
- "At least one of 'vector' or 'sparse_indices'/'sparse_values' "
366
- "must be provided."
367
- )
368
-
369
- # Cannot use sparse query on dense-only index
370
- if has_sparse and not self.is_hybrid:
371
- raise ValueError(
372
- "Cannot perform sparse search on a dense-only index. "
373
- "Create index with sparse_dim > 0 for hybrid support."
374
- )
375
-
376
- # If one sparse parameter is provided, both must be provided
377
- if has_sparse:
378
- if sparse_indices is None or sparse_values is None:
379
- raise ValueError(
380
- "Both sparse_indices and sparse_values must be provided "
381
- "together."
382
- )
383
- if len(sparse_indices) != len(sparse_values):
384
- raise ValueError(
385
- f"sparse_indices and sparse_values must have the same "
386
- f"length. Got {len(sparse_indices)} indices and "
387
- f"{len(sparse_values)} values."
388
- )
520
+ # Validate all query parameters
521
+ self._validate_query_params(
522
+ top_k, ef, has_sparse, has_dense, sparse_indices, sparse_values
523
+ )
389
524
 
390
525
  # Prepare search request headers
391
- headers = {
392
- 'Authorization': f'{self.token}',
393
- 'Content-Type': 'application/json'
394
- }
526
+ headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
395
527
 
396
528
  # Prepare search request data
397
- data = {
398
- 'k': top_k,
399
- 'ef': ef,
400
- 'include_vectors': include_vectors
401
- }
529
+ data = {"k": top_k, "ef": ef, "include_vectors": include_vectors}
402
530
 
403
531
  # Add dense vector if provided
404
532
  if has_dense:
405
- # Convert to numpy array
406
- vec = np.asarray(vector, dtype=np.float32)
407
-
408
- # Validate shape
409
- if vec.shape != (self.dimension,):
410
- raise ValueError(
411
- f"Vector must have shape ({self.dimension},), "
412
- f"got {vec.shape}"
413
- )
414
-
415
- # Validate finite values
416
- if not np.isfinite(vec).all():
417
- raise ValueError("Vector contains NaN or infinity")
418
-
419
- # Normalize for cosine similarity using einsum
420
- if self.space_type == "cosine":
421
- norm = np.sqrt(np.einsum("i,i->", vec, vec))
422
- norm = max(norm, 1e-10) # Prevent division by zero
423
- vec = vec / norm
424
-
425
- data['vector'] = vec.tolist()
533
+ data["vector"] = self._prepare_dense_vector(vector)
426
534
 
427
535
  # Add sparse query if provided
428
536
  if has_sparse:
429
- data['sparse_indices'] = list(sparse_indices)
430
- data['sparse_values'] = [float(v) for v in sparse_values]
537
+ data["sparse_indices"] = list(sparse_indices)
538
+ data["sparse_values"] = [float(v) for v in sparse_values]
431
539
 
432
540
  # Add filter if provided
433
541
  if filter:
434
- data['filter'] = orjson.dumps(filter).decode('utf-8')
542
+ data["filter"] = orjson.dumps(filter).decode("utf-8")
435
543
 
436
- url = f'{self.url}/index/{self.name}/search'
544
+ url = f"{self.url}/index/{self.name}/search"
437
545
 
438
546
  # Make API request
439
547
  http_client = self._get_session_client()
@@ -447,39 +555,7 @@ class Index:
447
555
  results = msgpack.unpackb(response.content, raw=False)
448
556
 
449
557
  # Process and format results
450
- # Result format: [similarity, id, meta, filter, norm, vector]
451
- processed_results = []
452
- results = results[:top_k]
453
-
454
- for result in results:
455
- similarity = result[0]
456
- vector_id = result[1]
457
- meta_data = result[2]
458
- filter_str = result[3]
459
- norm_value = result[4]
460
- vector_data = result[5] if len(result) > 5 else []
461
-
462
- processed = {
463
- 'id': vector_id,
464
- 'similarity': similarity,
465
- 'distance': 1.0 - similarity,
466
- 'meta': json_unzip(meta_data),
467
- 'norm': norm_value
468
- }
469
-
470
- # Add filter if present
471
- if filter_str:
472
- processed['filter'] = orjson.loads(filter_str)
473
-
474
- # Add vector data if requested
475
- if include_vectors and vector_data:
476
- processed['vector'] = list(vector_data)
477
- else:
478
- processed['vector'] = []
479
-
480
- processed_results.append(processed)
481
-
482
- return processed_results
558
+ return self._process_query_results(results, top_k, include_vectors)
483
559
 
484
560
  def delete_vector(self, id):
485
561
  """
@@ -495,10 +571,10 @@ class Index:
495
571
  HTTPError: If deletion fails
496
572
  """
497
573
  headers = {
498
- 'Authorization': f'{self.token}',
574
+ "Authorization": f"{self.token}",
499
575
  }
500
576
 
501
- url = f'{self.url}/index/{self.name}/vector/{id}/delete'
577
+ url = f"{self.url}/index/{self.name}/vector/{id}/delete"
502
578
 
503
579
  http_client = self._get_session_client()
504
580
  response = http_client.delete(url, headers=headers)
@@ -535,16 +611,13 @@ class Index:
535
611
  >>> vec = index.get_vector("vec1")
536
612
  >>> print(vec['meta'])
537
613
  """
538
- headers = {
539
- 'Authorization': f'{self.token}',
540
- 'Content-Type': 'application/json'
541
- }
614
+ headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
542
615
 
543
- url = f'{self.url}/index/{self.name}/vector/get'
616
+ url = f"{self.url}/index/{self.name}/vector/get"
544
617
 
545
618
  # Use POST method with the ID in the request body
546
619
  http_client = self._get_session_client()
547
- response = http_client.post(url, headers=headers, json={'id': id})
620
+ response = http_client.post(url, headers=headers, json={"id": id})
548
621
 
549
622
  if response.status_code != 200:
550
623
  raise_exception(response.status_code, response.text)
@@ -554,22 +627,18 @@ class Index:
554
627
  vector_obj = msgpack.unpackb(response.content, raw=False)
555
628
 
556
629
  result = {
557
- 'id': vector_obj[0],
558
- 'meta': json_unzip(vector_obj[1]),
559
- 'filter': vector_obj[2],
560
- 'norm': vector_obj[3],
561
- 'vector': list(vector_obj[4])
630
+ "id": vector_obj[0],
631
+ "meta": json_unzip(vector_obj[1]),
632
+ "filter": vector_obj[2],
633
+ "norm": vector_obj[3],
634
+ "vector": list(vector_obj[4]),
562
635
  }
563
636
 
564
637
  # Include sparse data if present (for hybrid indexes)
565
638
  if len(vector_obj) > 5:
566
- result['sparse_indices'] = (
567
- list(vector_obj[5]) if vector_obj[5] else []
568
- )
639
+ result["sparse_indices"] = list(vector_obj[5]) if vector_obj[5] else []
569
640
  if len(vector_obj) > 6:
570
- result['sparse_values'] = (
571
- list(vector_obj[6]) if vector_obj[6] else []
572
- )
641
+ result["sparse_values"] = list(vector_obj[6]) if vector_obj[6] else []
573
642
 
574
643
  return result
575
644
 
@@ -1,13 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: endee
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: Endee is the Next-Generation Vector Database for Scalable, High-Performance AI
5
5
  Home-page: https://endee.io
6
6
  Author: Endee Labs
7
7
  Author-email: dev@endee.io
8
8
  Project-URL: Documentation, https://docs.endee.io
9
- Project-URL: Source, https://github.com/endee-labs/endee-python
10
- Project-URL: Bug Reports, https://github.com/endee-labs/endee-python/issues
11
9
  Keywords: vector database,embeddings,machine learning,AI,similarity search,HNSW,nearest neighbors
12
10
  Classifier: Development Status :: 4 - Beta
13
11
  Classifier: Intended Audience :: Developers
@@ -0,0 +1,12 @@
1
+ endee/__init__.py,sha256=PZnr7T97fJlLfv9bMPeiSy8vcySBpFgBF2b4VJZbE1s,58
2
+ endee/compression.py,sha256=LiQiHiUslFe-jdJxGUIB-kFil99aRGo-_KFebA5mnt4,1219
3
+ endee/constants.py,sha256=wTKW99Uzg-sKlMx3HgSJcnHasl-KzKMJoe1JsvXjkuY,4316
4
+ endee/endee.py,sha256=vNq7lWmZUK3dzr4OAKLYyjpkQ-rLcEwwh8p-fozghJ8,20723
5
+ endee/exceptions.py,sha256=RTBm6dZ42tw6-PoodEC1W46DLjCmhHxlPj4EtYh-Su4,7499
6
+ endee/index.py,sha256=MN0FqVevHBqQtPRIairB0LjQB8IIf8nry745935KaF8,22907
7
+ endee/utils.py,sha256=CFdr0Qsxo77y00cvtCiuuYHqImOkwtaUIaIqPX2BsyQ,1258
8
+ endee-0.1.7.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ endee-0.1.7.dist-info/METADATA,sha256=ug2Buug0pMnuYmHsBZZNAW0oB_1qarjHIF3a5nWhiUg,23702
10
+ endee-0.1.7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
11
+ endee-0.1.7.dist-info/top_level.txt,sha256=zOEvXIfzdm7vXJaVX_jq5OX3fTftKq14KzynxlAp8ZQ,6
12
+ endee-0.1.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,12 +0,0 @@
1
- endee/__init__.py,sha256=PZnr7T97fJlLfv9bMPeiSy8vcySBpFgBF2b4VJZbE1s,58
2
- endee/compression.py,sha256=LiQiHiUslFe-jdJxGUIB-kFil99aRGo-_KFebA5mnt4,1219
3
- endee/constants.py,sha256=wTKW99Uzg-sKlMx3HgSJcnHasl-KzKMJoe1JsvXjkuY,4316
4
- endee/endee.py,sha256=xkTGCv-caZjKxlpzwRKkxJCh1XgQWmKIBxgke7L1EoU,20715
5
- endee/exceptions.py,sha256=RTBm6dZ42tw6-PoodEC1W46DLjCmhHxlPj4EtYh-Su4,7499
6
- endee/index.py,sha256=ENYJDNnpimRgoC1SDzVpIaHCqOpSNfilD3Txj6Lg5mg,20825
7
- endee/utils.py,sha256=CFdr0Qsxo77y00cvtCiuuYHqImOkwtaUIaIqPX2BsyQ,1258
8
- endee-0.1.6.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- endee-0.1.6.dist-info/METADATA,sha256=430qjrG8vFFqPLmFnCwREn3w1PKLp4fqteqc1_C1q1g,23842
10
- endee-0.1.6.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
11
- endee-0.1.6.dist-info/top_level.txt,sha256=zOEvXIfzdm7vXJaVX_jq5OX3fTftKq14KzynxlAp8ZQ,6
12
- endee-0.1.6.dist-info/RECORD,,