endee 0.1.8__tar.gz → 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {endee-0.1.8 → endee-0.1.10}/PKG-INFO +2 -1
- {endee-0.1.8 → endee-0.1.10}/endee/compression.py +5 -6
- {endee-0.1.8 → endee-0.1.10}/endee/constants.py +4 -8
- {endee-0.1.8 → endee-0.1.10}/endee/endee.py +26 -52
- {endee-0.1.8 → endee-0.1.10}/endee/exceptions.py +3 -3
- {endee-0.1.8 → endee-0.1.10}/endee/index.py +97 -126
- endee-0.1.10/endee/schema.py +151 -0
- {endee-0.1.8 → endee-0.1.10}/endee/utils.py +3 -3
- {endee-0.1.8 → endee-0.1.10}/endee.egg-info/PKG-INFO +2 -1
- {endee-0.1.8 → endee-0.1.10}/endee.egg-info/SOURCES.txt +1 -0
- {endee-0.1.8 → endee-0.1.10}/endee.egg-info/requires.txt +1 -0
- {endee-0.1.8 → endee-0.1.10}/setup.py +2 -1
- {endee-0.1.8 → endee-0.1.10}/LICENSE +0 -0
- {endee-0.1.8 → endee-0.1.10}/README.md +0 -0
- {endee-0.1.8 → endee-0.1.10}/endee/__init__.py +0 -0
- {endee-0.1.8 → endee-0.1.10}/endee.egg-info/dependency_links.txt +0 -0
- {endee-0.1.8 → endee-0.1.10}/endee.egg-info/top_level.txt +0 -0
- {endee-0.1.8 → endee-0.1.10}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: endee
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.10
|
|
4
4
|
Summary: Endee is the Next-Generation Vector Database for Scalable, High-Performance AI
|
|
5
5
|
Home-page: https://endee.io
|
|
6
6
|
Author: Endee Labs
|
|
@@ -29,6 +29,7 @@ Requires-Dist: httpx[http2]>=0.28.1
|
|
|
29
29
|
Requires-Dist: numpy>=2.2.4
|
|
30
30
|
Requires-Dist: msgpack>=1.1.0
|
|
31
31
|
Requires-Dist: orjson>=3.11.5
|
|
32
|
+
Requires-Dist: pydantic==1.10.26
|
|
32
33
|
Dynamic: author
|
|
33
34
|
Dynamic: author-email
|
|
34
35
|
Dynamic: classifier
|
|
@@ -6,9 +6,10 @@ metadata. Metadata is serialized to JSON and compressed using zlib
|
|
|
6
6
|
to reduce storage size, memory usage, and network transfer overhead.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import json
|
|
10
9
|
import zlib
|
|
11
10
|
|
|
11
|
+
import orjson
|
|
12
|
+
|
|
12
13
|
|
|
13
14
|
def json_zip(data: dict) -> bytes:
|
|
14
15
|
"""
|
|
@@ -24,10 +25,9 @@ def json_zip(data: dict) -> bytes:
|
|
|
24
25
|
>>> compressed = json_zip({"user": "alice", "age": 30})
|
|
25
26
|
"""
|
|
26
27
|
if not data:
|
|
27
|
-
return b
|
|
28
|
+
return b""
|
|
28
29
|
|
|
29
|
-
|
|
30
|
-
return zlib.compress(json_bytes)
|
|
30
|
+
return zlib.compress(orjson.dumps(data))
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def json_unzip(compressed_data: bytes) -> dict:
|
|
@@ -46,5 +46,4 @@ def json_unzip(compressed_data: bytes) -> dict:
|
|
|
46
46
|
if not compressed_data:
|
|
47
47
|
return {}
|
|
48
48
|
|
|
49
|
-
|
|
50
|
-
return json.loads(decompressed.decode("utf-8"))
|
|
49
|
+
return orjson.loads(zlib.decompress(compressed_data))
|
|
@@ -21,13 +21,15 @@ class Precision(str, Enum):
|
|
|
21
21
|
INT16D: 16-bit integer
|
|
22
22
|
INT8D: 8-bit integer
|
|
23
23
|
"""
|
|
24
|
+
|
|
24
25
|
BINARY2 = "binary"
|
|
25
26
|
FLOAT16 = "float16"
|
|
26
27
|
FLOAT32 = "float32"
|
|
27
28
|
INT16D = "int16d"
|
|
28
29
|
INT8D = "int8d"
|
|
29
30
|
|
|
30
|
-
|
|
31
|
+
|
|
32
|
+
# Checksum Value while creating an index
|
|
31
33
|
CHECKSUM = -1
|
|
32
34
|
|
|
33
35
|
# HTTP Configuration
|
|
@@ -69,13 +71,7 @@ LOCAL_REGION = "local"
|
|
|
69
71
|
|
|
70
72
|
# Supported Types
|
|
71
73
|
# List of precision types supported by the vector database
|
|
72
|
-
PRECISION_TYPES_SUPPORTED = [
|
|
73
|
-
"binary",
|
|
74
|
-
"float16",
|
|
75
|
-
"float32",
|
|
76
|
-
"int16d",
|
|
77
|
-
"int8d"
|
|
78
|
-
]
|
|
74
|
+
PRECISION_TYPES_SUPPORTED = ["binary", "float16", "float32", "int16d", "int8d"]
|
|
79
75
|
|
|
80
76
|
# Distance metric types
|
|
81
77
|
COSINE = "cosine" # Cosine similarity (normalized dot product)
|
|
@@ -6,7 +6,6 @@ vector database service. It includes session management, index operations.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import os
|
|
9
|
-
import sys
|
|
10
9
|
from functools import lru_cache
|
|
11
10
|
|
|
12
11
|
import httpx
|
|
@@ -30,18 +29,14 @@ from endee.constants import (
|
|
|
30
29
|
HTTPX_TIMEOUT_SEC,
|
|
31
30
|
LOCAL_BASE_URL,
|
|
32
31
|
LOCAL_REGION,
|
|
33
|
-
MAX_DIMENSION_ALLOWED,
|
|
34
|
-
MAX_INDEX_NAME_LENGTH_ALLOWED,
|
|
35
|
-
PRECISION_TYPES_SUPPORTED,
|
|
36
32
|
SESSION_MAX_RETRIES,
|
|
37
33
|
SESSION_POOL_CONNECTIONS,
|
|
38
34
|
SESSION_POOL_MAXSIZE,
|
|
39
|
-
SPACE_TYPES_SUPPORTED,
|
|
40
35
|
Precision,
|
|
41
36
|
)
|
|
42
37
|
from endee.exceptions import raise_exception
|
|
43
38
|
from endee.index import Index
|
|
44
|
-
from endee.
|
|
39
|
+
from endee.schema import IndexCreateRequest, IndexMetadata
|
|
45
40
|
|
|
46
41
|
|
|
47
42
|
class SessionManager:
|
|
@@ -417,58 +412,34 @@ class Endee:
|
|
|
417
412
|
ValueError: If parameters are invalid
|
|
418
413
|
HTTPError: If API request fails
|
|
419
414
|
"""
|
|
420
|
-
# Validate
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
f"Dimension cannot be greater than {MAX_DIMENSION_ALLOWED}"
|
|
432
|
-
)
|
|
433
|
-
|
|
434
|
-
# Validate sparse dimension
|
|
435
|
-
# Lower bound check
|
|
436
|
-
if sparse_dim < 0:
|
|
437
|
-
raise ValueError("sparse_dim cannot be negative")
|
|
438
|
-
|
|
439
|
-
# Upper bound check
|
|
440
|
-
if sparse_dim > sys.maxsize:
|
|
441
|
-
raise ValueError(f"sparse_dim cannot exceed {sys.maxsize}")
|
|
442
|
-
|
|
443
|
-
# Validate space type
|
|
444
|
-
space_type = space_type.lower()
|
|
445
|
-
if space_type not in SPACE_TYPES_SUPPORTED:
|
|
446
|
-
raise ValueError(f"Invalid space type: {space_type}")
|
|
447
|
-
|
|
448
|
-
# Validate precision
|
|
449
|
-
if precision not in PRECISION_TYPES_SUPPORTED:
|
|
450
|
-
raise ValueError(
|
|
451
|
-
f"Invalid precision: {precision}. Use one of Precision enum "
|
|
452
|
-
f"values: Precision.BINARY2, Precision.INT8D, "
|
|
453
|
-
f"Precision.INT16D, Precision.FLOAT16, or Precision.FLOAT32"
|
|
454
|
-
)
|
|
415
|
+
# Validate parameters using Pydantic
|
|
416
|
+
request_data = IndexCreateRequest(
|
|
417
|
+
name=name,
|
|
418
|
+
dimension=dimension,
|
|
419
|
+
space_type=space_type,
|
|
420
|
+
M=M,
|
|
421
|
+
ef_con=ef_con,
|
|
422
|
+
precision=precision,
|
|
423
|
+
version=version,
|
|
424
|
+
sparse_dim=sparse_dim,
|
|
425
|
+
)
|
|
455
426
|
|
|
456
427
|
# Prepare request headers and data
|
|
457
428
|
headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
|
|
458
429
|
data = {
|
|
459
|
-
"index_name": name,
|
|
460
|
-
"dim": dimension,
|
|
461
|
-
"space_type": space_type,
|
|
462
|
-
"M": M,
|
|
463
|
-
"ef_con": ef_con,
|
|
430
|
+
"index_name": request_data.name,
|
|
431
|
+
"dim": request_data.dimension,
|
|
432
|
+
"space_type": request_data.space_type,
|
|
433
|
+
"M": request_data.M,
|
|
434
|
+
"ef_con": request_data.ef_con,
|
|
464
435
|
"checksum": CHECKSUM,
|
|
465
|
-
"precision": precision,
|
|
466
|
-
"version": version,
|
|
436
|
+
"precision": request_data.precision,
|
|
437
|
+
"version": request_data.version,
|
|
467
438
|
}
|
|
468
439
|
|
|
469
440
|
# Add sparse dimension if specified
|
|
470
|
-
if sparse_dim > 0:
|
|
471
|
-
data["sparse_dim"] = sparse_dim
|
|
441
|
+
if request_data.sparse_dim > 0:
|
|
442
|
+
data["sparse_dim"] = request_data.sparse_dim
|
|
472
443
|
|
|
473
444
|
url = f"{self.base_url}/index/create"
|
|
474
445
|
|
|
@@ -588,6 +559,9 @@ class Endee:
|
|
|
588
559
|
|
|
589
560
|
data = response.json()
|
|
590
561
|
|
|
562
|
+
# Validate index metadata
|
|
563
|
+
metadata = IndexMetadata(**data)
|
|
564
|
+
|
|
591
565
|
# Create Index object with appropriate manager
|
|
592
566
|
if self.library == HTTP_REQUESTS_LIBRARY:
|
|
593
567
|
idx = Index(
|
|
@@ -595,7 +569,7 @@ class Endee:
|
|
|
595
569
|
token=self.token,
|
|
596
570
|
url=self.base_url,
|
|
597
571
|
version=self.version,
|
|
598
|
-
params=
|
|
572
|
+
params=metadata.dict(by_alias=True),
|
|
599
573
|
session_client_manager=self.session_manager,
|
|
600
574
|
)
|
|
601
575
|
else:
|
|
@@ -604,7 +578,7 @@ class Endee:
|
|
|
604
578
|
token=self.token,
|
|
605
579
|
url=self.base_url,
|
|
606
580
|
version=self.version,
|
|
607
|
-
params=
|
|
581
|
+
params=metadata.dict(by_alias=True),
|
|
608
582
|
session_client_manager=self.client_manager,
|
|
609
583
|
)
|
|
610
584
|
|
|
@@ -6,7 +6,7 @@ conditions that can occur when interacting with the Endee API. Each exception
|
|
|
6
6
|
type corresponds to specific HTTP status codes and error scenarios.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import
|
|
9
|
+
import orjson
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class EndeeException(Exception):
|
|
@@ -231,8 +231,8 @@ def raise_exception(code: int, text: str = None):
|
|
|
231
231
|
# Try to parse JSON error message
|
|
232
232
|
message = None
|
|
233
233
|
try:
|
|
234
|
-
message =
|
|
235
|
-
except (
|
|
234
|
+
message = orjson.loads(text).get("error", "Unknown error")
|
|
235
|
+
except (orjson.JSONDecodeError, TypeError, AttributeError):
|
|
236
236
|
# Fall back to raw text or default message
|
|
237
237
|
message = text or "Unknown error"
|
|
238
238
|
|
|
@@ -6,6 +6,8 @@ upsert, query, delete, and retrieval on vector indices. It supports both dense
|
|
|
6
6
|
and hybrid (dense + sparse) vector operations.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
+
from typing import List
|
|
10
|
+
|
|
9
11
|
import msgpack
|
|
10
12
|
import numpy as np
|
|
11
13
|
import orjson
|
|
@@ -20,8 +22,6 @@ from .constants import (
|
|
|
20
22
|
DIMENSION_FIELD,
|
|
21
23
|
IS_HYBRID_FIELD,
|
|
22
24
|
MAX_CONNECTIONS_FIELD,
|
|
23
|
-
MAX_EF_SEARCH_ALLOWED,
|
|
24
|
-
MAX_TOP_K_ALLOWED,
|
|
25
25
|
MAX_VECTORS_PER_BATCH,
|
|
26
26
|
NAME_FIELD,
|
|
27
27
|
PRECISION_FIELD,
|
|
@@ -29,6 +29,7 @@ from .constants import (
|
|
|
29
29
|
SPARSE_DIM_FIELD,
|
|
30
30
|
)
|
|
31
31
|
from .exceptions import raise_exception
|
|
32
|
+
from .schema import IndexMetadata, QueryRequest, VectorItem
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
class Index:
|
|
@@ -76,18 +77,19 @@ class Index:
|
|
|
76
77
|
session_client_manager: Shared SessionManager or ClientManager
|
|
77
78
|
from parent Endee client
|
|
78
79
|
"""
|
|
80
|
+
metadata = IndexMetadata(**params)
|
|
79
81
|
self.name = name
|
|
80
82
|
self.token = token
|
|
81
83
|
self.url = url
|
|
82
84
|
self.version = version
|
|
83
85
|
self.checksum = CHECKSUM
|
|
84
|
-
self.lib_token =
|
|
85
|
-
self.count =
|
|
86
|
-
self.space_type =
|
|
87
|
-
self.dimension =
|
|
88
|
-
self.precision =
|
|
89
|
-
self.M =
|
|
90
|
-
self.sparse_dim =
|
|
86
|
+
self.lib_token = metadata.lib_token
|
|
87
|
+
self.count = metadata.total_elements
|
|
88
|
+
self.space_type = metadata.space_type
|
|
89
|
+
self.dimension = metadata.dimension
|
|
90
|
+
self.precision = metadata.precision
|
|
91
|
+
self.M = metadata.M
|
|
92
|
+
self.sparse_dim = metadata.sparse_dim
|
|
91
93
|
|
|
92
94
|
# Use shared HTTP manager from Endee client
|
|
93
95
|
self.session_client_manager = session_client_manager
|
|
@@ -134,12 +136,12 @@ class Index:
|
|
|
134
136
|
"""
|
|
135
137
|
return self.name
|
|
136
138
|
|
|
137
|
-
def _validate_and_prepare_vectors(self,
|
|
139
|
+
def _validate_and_prepare_vectors(self, raw_vectors: List[List[float]]):
|
|
138
140
|
"""
|
|
139
|
-
Validate and prepare vectors from input
|
|
141
|
+
Validate and prepare vectors from raw input lists.
|
|
140
142
|
|
|
141
143
|
Args:
|
|
142
|
-
|
|
144
|
+
raw_vectors: List of dense vector lists
|
|
143
145
|
|
|
144
146
|
Returns:
|
|
145
147
|
tuple: (vectors_array, norms_array, vectors_list)
|
|
@@ -149,9 +151,7 @@ class Index:
|
|
|
149
151
|
"""
|
|
150
152
|
# Extract vectors
|
|
151
153
|
try:
|
|
152
|
-
vectors = np.asarray(
|
|
153
|
-
[item["vector"] for item in input_array], dtype=np.float32
|
|
154
|
-
)
|
|
154
|
+
vectors = np.asarray(raw_vectors, dtype=np.float32)
|
|
155
155
|
except Exception as e:
|
|
156
156
|
raise ValueError(f"Invalid vector data: {e}") from e
|
|
157
157
|
|
|
@@ -166,42 +166,29 @@ class Index:
|
|
|
166
166
|
raise ValueError("Vectors contain NaN or infinity")
|
|
167
167
|
|
|
168
168
|
# Normalize vectors for cosine similarity
|
|
169
|
-
|
|
169
|
+
n_vectors = len(raw_vectors)
|
|
170
170
|
if self.space_type == "cosine":
|
|
171
171
|
norms = np.sqrt(np.einsum("ij,ij->i", vectors, vectors))
|
|
172
172
|
np.maximum(norms, 1e-10, out=norms) # Prevent division by zero
|
|
173
173
|
vectors /= norms[:, None]
|
|
174
174
|
else:
|
|
175
|
-
norms = np.ones(
|
|
175
|
+
norms = np.ones(n_vectors, dtype=np.float32)
|
|
176
176
|
|
|
177
177
|
return vectors, norms, vectors.tolist()
|
|
178
178
|
|
|
179
|
-
def
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
Raises:
|
|
188
|
-
ValueError: If sparse data is invalid
|
|
189
|
-
"""
|
|
190
|
-
if len(sparse_indices) != len(sparse_values):
|
|
191
|
-
raise ValueError("sparse_indices and sparse_values must match in length")
|
|
192
|
-
|
|
193
|
-
if sparse_indices:
|
|
194
|
-
min_idx = min(sparse_indices)
|
|
195
|
-
max_idx = max(sparse_indices)
|
|
196
|
-
if min_idx < 0 or max_idx >= self.sparse_dim:
|
|
197
|
-
raise ValueError(f"Sparse indices out of bounds [0, {self.sparse_dim})")
|
|
198
|
-
|
|
199
|
-
def _build_vector_batch_item(self, item, i, norms, vectors_list, is_hybrid):
|
|
179
|
+
def _build_vector_batch_item(
|
|
180
|
+
self,
|
|
181
|
+
item: VectorItem,
|
|
182
|
+
i: int,
|
|
183
|
+
norms: np.ndarray,
|
|
184
|
+
vectors_list: list,
|
|
185
|
+
is_hybrid: bool,
|
|
186
|
+
):
|
|
200
187
|
"""
|
|
201
188
|
Build a single vector batch item.
|
|
202
189
|
|
|
203
190
|
Args:
|
|
204
|
-
item:
|
|
191
|
+
item: Validated VectorItem
|
|
205
192
|
i: Index in the batch
|
|
206
193
|
norms: Array of vector norms
|
|
207
194
|
vectors_list: List of vectors
|
|
@@ -213,14 +200,19 @@ class Index:
|
|
|
213
200
|
Raises:
|
|
214
201
|
ValueError: If sparse data is invalid
|
|
215
202
|
"""
|
|
216
|
-
|
|
203
|
+
# Localize functions for performance
|
|
217
204
|
dumps_func = orjson.dumps
|
|
205
|
+
zip_func = json_zip
|
|
218
206
|
str_func = str
|
|
219
207
|
float_func = float
|
|
220
208
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
209
|
+
item_id = item.id
|
|
210
|
+
item_meta = item.meta
|
|
211
|
+
item_filter = item.filter
|
|
212
|
+
sparse_indices = item.sparse_indices
|
|
213
|
+
sparse_values = item.sparse_values
|
|
214
|
+
|
|
215
|
+
has_sparse = sparse_indices is not None
|
|
224
216
|
|
|
225
217
|
# XOR logic: hybrid index requires sparse data,
|
|
226
218
|
# dense-only forbids it
|
|
@@ -230,15 +222,18 @@ class Index:
|
|
|
230
222
|
"and dense-only index forbids it."
|
|
231
223
|
)
|
|
232
224
|
|
|
233
|
-
# Validate sparse
|
|
234
|
-
if
|
|
235
|
-
|
|
225
|
+
# Validate sparse indices if present
|
|
226
|
+
if has_sparse:
|
|
227
|
+
max_idx = max(sparse_indices)
|
|
228
|
+
min_idx = min(sparse_indices)
|
|
229
|
+
if min_idx < 0 or max_idx >= self.sparse_dim:
|
|
230
|
+
raise ValueError(f"Sparse indices out of bounds [0, {self.sparse_dim})")
|
|
236
231
|
|
|
237
232
|
# Build vector object: [id, meta, filter, norm, vector, ...]
|
|
238
233
|
obj = [
|
|
239
|
-
str_func(
|
|
240
|
-
|
|
241
|
-
dumps_func(
|
|
234
|
+
str_func(item_id),
|
|
235
|
+
zip_func(item_meta),
|
|
236
|
+
dumps_func(item_filter).decode("utf-8"),
|
|
242
237
|
float_func(norms[i]),
|
|
243
238
|
vectors_list[i],
|
|
244
239
|
]
|
|
@@ -248,7 +243,7 @@ class Index:
|
|
|
248
243
|
obj.extend(
|
|
249
244
|
(
|
|
250
245
|
sparse_indices,
|
|
251
|
-
[
|
|
246
|
+
[float_func(v) for v in sparse_values],
|
|
252
247
|
)
|
|
253
248
|
)
|
|
254
249
|
|
|
@@ -294,34 +289,40 @@ class Index:
|
|
|
294
289
|
f"Cannot insert more than {MAX_VECTORS_PER_BATCH} vectors at a time"
|
|
295
290
|
)
|
|
296
291
|
|
|
297
|
-
#
|
|
292
|
+
# Localize for the loop
|
|
293
|
+
is_hybrid = self.is_hybrid
|
|
298
294
|
seen_ids = set()
|
|
299
|
-
duplicate_ids =
|
|
295
|
+
duplicate_ids = []
|
|
296
|
+
validated_items = []
|
|
297
|
+
vector_item_cls = VectorItem
|
|
300
298
|
|
|
299
|
+
# Combine validation, duplicate check, and vector extraction
|
|
300
|
+
vectors_to_process = []
|
|
301
301
|
for item in input_array:
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
if id_val in seen_ids:
|
|
307
|
-
duplicate_ids.add(id_val)
|
|
302
|
+
v_item = vector_item_cls(**item)
|
|
303
|
+
item_id = v_item.id
|
|
304
|
+
if item_id in seen_ids:
|
|
305
|
+
duplicate_ids.append(item_id)
|
|
308
306
|
else:
|
|
309
|
-
seen_ids.add(
|
|
307
|
+
seen_ids.add(item_id)
|
|
308
|
+
validated_items.append(v_item)
|
|
309
|
+
vectors_to_process.append(v_item.vector)
|
|
310
310
|
|
|
311
311
|
if duplicate_ids:
|
|
312
312
|
raise ValueError(
|
|
313
313
|
f"Duplicate IDs found in input array: {sorted(duplicate_ids)}"
|
|
314
314
|
)
|
|
315
315
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
316
|
+
# Validate and prepare vectors - Passing pre-extracted vectors
|
|
317
|
+
vectors, norms, vectors_list = self._validate_and_prepare_vectors(
|
|
318
|
+
vectors_to_process
|
|
319
|
+
)
|
|
320
320
|
|
|
321
|
-
# Build batch
|
|
321
|
+
# Build batch - localizing method call for performance
|
|
322
|
+
build_item = self._build_vector_batch_item
|
|
322
323
|
vector_batch = [
|
|
323
|
-
|
|
324
|
-
for i, item in enumerate(
|
|
324
|
+
build_item(item, i, norms, vectors_list, is_hybrid)
|
|
325
|
+
for i, item in enumerate(validated_items)
|
|
325
326
|
]
|
|
326
327
|
|
|
327
328
|
serialized_data = msgpack.packb(
|
|
@@ -343,62 +344,23 @@ class Index:
|
|
|
343
344
|
|
|
344
345
|
return "Vectors inserted successfully"
|
|
345
346
|
|
|
346
|
-
def _validate_query_params(
|
|
347
|
-
self, top_k, ef, has_sparse, has_dense, sparse_indices, sparse_values
|
|
348
|
-
):
|
|
347
|
+
def _validate_query_params(self, query: QueryRequest):
|
|
349
348
|
"""
|
|
350
|
-
Validate query parameters.
|
|
349
|
+
Validate query parameters against index configuration.
|
|
351
350
|
|
|
352
351
|
Args:
|
|
353
|
-
|
|
354
|
-
ef: HNSW ef_search parameter
|
|
355
|
-
has_sparse: Whether sparse query is provided
|
|
356
|
-
has_dense: Whether dense query is provided
|
|
357
|
-
sparse_indices: Sparse vector indices
|
|
358
|
-
sparse_values: Sparse vector values
|
|
352
|
+
query: Validated QueryRequest model
|
|
359
353
|
|
|
360
354
|
Raises:
|
|
361
|
-
ValueError: If parameters are invalid
|
|
355
|
+
ValueError: If parameters are invalid for this index
|
|
362
356
|
"""
|
|
363
|
-
# Validate top_k parameter
|
|
364
|
-
if top_k > MAX_TOP_K_ALLOWED or top_k <= 0:
|
|
365
|
-
raise ValueError(
|
|
366
|
-
f"top_k must be between 1 and {MAX_TOP_K_ALLOWED}, got {top_k}"
|
|
367
|
-
)
|
|
368
|
-
|
|
369
|
-
# Validate ef parameter
|
|
370
|
-
if ef > MAX_EF_SEARCH_ALLOWED:
|
|
371
|
-
raise ValueError(
|
|
372
|
-
f"ef search cannot be greater than {MAX_EF_SEARCH_ALLOWED}"
|
|
373
|
-
)
|
|
374
|
-
|
|
375
|
-
# At least one query type must be provided
|
|
376
|
-
if not has_dense and not has_sparse:
|
|
377
|
-
raise ValueError(
|
|
378
|
-
"At least one of 'vector' or 'sparse_indices'/'sparse_values' "
|
|
379
|
-
"must be provided."
|
|
380
|
-
)
|
|
381
|
-
|
|
382
357
|
# Cannot use sparse query on dense-only index
|
|
383
|
-
if
|
|
358
|
+
if query.sparse_indices is not None and not self.is_hybrid:
|
|
384
359
|
raise ValueError(
|
|
385
360
|
"Cannot perform sparse search on a dense-only index. "
|
|
386
361
|
"Create index with sparse_dim > 0 for hybrid support."
|
|
387
362
|
)
|
|
388
363
|
|
|
389
|
-
# If one sparse parameter is provided, both must be provided
|
|
390
|
-
if has_sparse:
|
|
391
|
-
if sparse_indices is None or sparse_values is None:
|
|
392
|
-
raise ValueError(
|
|
393
|
-
"Both sparse_indices and sparse_values must be provided together."
|
|
394
|
-
)
|
|
395
|
-
if len(sparse_indices) != len(sparse_values):
|
|
396
|
-
raise ValueError(
|
|
397
|
-
f"sparse_indices and sparse_values must have the same "
|
|
398
|
-
f"length. Got {len(sparse_indices)} indices and "
|
|
399
|
-
f"{len(sparse_values)} values."
|
|
400
|
-
)
|
|
401
|
-
|
|
402
364
|
def _prepare_dense_vector(self, vector):
|
|
403
365
|
"""
|
|
404
366
|
Prepare and validate dense query vector.
|
|
@@ -527,33 +489,42 @@ class Index:
|
|
|
527
489
|
... filter={"category": "A"}
|
|
528
490
|
... )
|
|
529
491
|
"""
|
|
530
|
-
# Validate
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
492
|
+
# Validate and prepare query using Pydantic
|
|
493
|
+
query_params = QueryRequest(
|
|
494
|
+
vector=vector,
|
|
495
|
+
top_k=top_k,
|
|
496
|
+
filter=filter,
|
|
497
|
+
ef=ef,
|
|
498
|
+
include_vectors=include_vectors,
|
|
499
|
+
sparse_indices=sparse_indices,
|
|
500
|
+
sparse_values=sparse_values,
|
|
537
501
|
)
|
|
538
502
|
|
|
503
|
+
# Additional index-specific validation
|
|
504
|
+
self._validate_query_params(query_params)
|
|
505
|
+
|
|
539
506
|
# Prepare search request headers
|
|
540
507
|
headers = {"Authorization": f"{self.token}", "Content-Type": "application/json"}
|
|
541
508
|
|
|
542
509
|
# Prepare search request data
|
|
543
|
-
data = {
|
|
510
|
+
data = {
|
|
511
|
+
"k": query_params.top_k,
|
|
512
|
+
"ef": query_params.ef,
|
|
513
|
+
"include_vectors": query_params.include_vectors,
|
|
514
|
+
}
|
|
544
515
|
|
|
545
516
|
# Add dense vector if provided
|
|
546
|
-
if
|
|
547
|
-
data["vector"] = self._prepare_dense_vector(vector)
|
|
517
|
+
if query_params.vector is not None:
|
|
518
|
+
data["vector"] = self._prepare_dense_vector(query_params.vector)
|
|
548
519
|
|
|
549
520
|
# Add sparse query if provided
|
|
550
|
-
if
|
|
551
|
-
data["sparse_indices"] = list(sparse_indices)
|
|
552
|
-
data["sparse_values"] = [float(v) for v in sparse_values]
|
|
521
|
+
if query_params.sparse_indices is not None:
|
|
522
|
+
data["sparse_indices"] = list(query_params.sparse_indices)
|
|
523
|
+
data["sparse_values"] = [float(v) for v in query_params.sparse_values]
|
|
553
524
|
|
|
554
525
|
# Add filter if provided
|
|
555
|
-
if filter:
|
|
556
|
-
data["filter"] = orjson.dumps(filter).decode("utf-8")
|
|
526
|
+
if query_params.filter:
|
|
527
|
+
data["filter"] = orjson.dumps(query_params.filter).decode("utf-8")
|
|
557
528
|
|
|
558
529
|
url = f"{self.url}/index/{self.name}/search"
|
|
559
530
|
|
|
@@ -569,7 +540,7 @@ class Index:
|
|
|
569
540
|
results = msgpack.unpackb(response.content, raw=False)
|
|
570
541
|
|
|
571
542
|
# Process and format results
|
|
572
|
-
return self._process_query_results(results, top_k, include_vectors)
|
|
543
|
+
return self._process_query_results(results, query_params.top_k, include_vectors)
|
|
573
544
|
|
|
574
545
|
def delete_vector(self, id):
|
|
575
546
|
"""
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import sys
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field, root_validator, validator
|
|
6
|
+
|
|
7
|
+
from .constants import (
|
|
8
|
+
DEFAULT_EF_SEARCH,
|
|
9
|
+
DEFAULT_TOPK,
|
|
10
|
+
MAX_DIMENSION_ALLOWED,
|
|
11
|
+
MAX_EF_SEARCH_ALLOWED,
|
|
12
|
+
MAX_INDEX_NAME_LENGTH_ALLOWED,
|
|
13
|
+
MAX_TOP_K_ALLOWED,
|
|
14
|
+
PRECISION_TYPES_SUPPORTED,
|
|
15
|
+
SPACE_TYPES_SUPPORTED,
|
|
16
|
+
Precision,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class VectorItem(BaseModel):
|
|
21
|
+
"""Model for a single vector item in an upsert operation."""
|
|
22
|
+
|
|
23
|
+
id: str
|
|
24
|
+
vector: List[float]
|
|
25
|
+
meta: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
|
26
|
+
filter: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
|
27
|
+
sparse_indices: Optional[List[int]] = None
|
|
28
|
+
sparse_values: Optional[List[float]] = None
|
|
29
|
+
|
|
30
|
+
@validator("id")
|
|
31
|
+
@classmethod
|
|
32
|
+
def validate_id(cls, v: str) -> str:
|
|
33
|
+
if len(v) < 1:
|
|
34
|
+
raise ValueError("id must not be empty")
|
|
35
|
+
return v
|
|
36
|
+
|
|
37
|
+
@root_validator
|
|
38
|
+
@classmethod
|
|
39
|
+
def validate_sparse_data(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
40
|
+
sparse_indices = values.get("sparse_indices")
|
|
41
|
+
sparse_values = values.get("sparse_values")
|
|
42
|
+
|
|
43
|
+
if (sparse_indices is None) != (sparse_values is None):
|
|
44
|
+
raise ValueError(
|
|
45
|
+
"Both sparse_indices and sparse_values must be provided together"
|
|
46
|
+
)
|
|
47
|
+
if sparse_indices is not None and len(sparse_indices) != len(sparse_values):
|
|
48
|
+
raise ValueError("sparse_indices and sparse_values must match in length")
|
|
49
|
+
return values
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class QueryRequest(BaseModel):
|
|
53
|
+
"""Model for query parameters."""
|
|
54
|
+
|
|
55
|
+
vector: Optional[List[float]] = None
|
|
56
|
+
top_k: int = Field(default=DEFAULT_TOPK, gt=0, le=MAX_TOP_K_ALLOWED)
|
|
57
|
+
filter: Optional[List[Dict[str, Any]]] = None
|
|
58
|
+
ef: int = Field(default=DEFAULT_EF_SEARCH, le=MAX_EF_SEARCH_ALLOWED)
|
|
59
|
+
include_vectors: bool = False
|
|
60
|
+
sparse_indices: Optional[List[int]] = None
|
|
61
|
+
sparse_values: Optional[List[float]] = None
|
|
62
|
+
|
|
63
|
+
@root_validator
|
|
64
|
+
@classmethod
|
|
65
|
+
def validate_query_type(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
66
|
+
vector = values.get("vector")
|
|
67
|
+
sparse_indices = values.get("sparse_indices")
|
|
68
|
+
sparse_values = values.get("sparse_values")
|
|
69
|
+
|
|
70
|
+
has_dense = vector is not None
|
|
71
|
+
has_sparse = sparse_indices is not None or sparse_values is not None
|
|
72
|
+
|
|
73
|
+
if not has_dense and not has_sparse:
|
|
74
|
+
raise ValueError(
|
|
75
|
+
"At least one of 'vector' or 'sparse_indices'/'sparse_values'"
|
|
76
|
+
" must be provided."
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if (sparse_indices is None) != (sparse_values is None):
|
|
80
|
+
raise ValueError(
|
|
81
|
+
"Both sparse_indices and sparse_values must be provided together"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
if sparse_indices is not None and len(sparse_indices) != len(sparse_values):
|
|
85
|
+
raise ValueError("sparse_indices and sparse_values must match in length")
|
|
86
|
+
|
|
87
|
+
return values
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class IndexCreateRequest(BaseModel):
|
|
91
|
+
"""Model for index creation parameters."""
|
|
92
|
+
|
|
93
|
+
name: str
|
|
94
|
+
dimension: int = Field(..., gt=0, le=MAX_DIMENSION_ALLOWED)
|
|
95
|
+
space_type: str
|
|
96
|
+
M: int = Field(..., gt=0)
|
|
97
|
+
ef_con: int = Field(..., gt=0)
|
|
98
|
+
precision: Union[str, Precision]
|
|
99
|
+
version: Optional[int] = None
|
|
100
|
+
sparse_dim: int = Field(default=0, ge=0, le=sys.maxsize)
|
|
101
|
+
|
|
102
|
+
@validator("name")
|
|
103
|
+
@classmethod
|
|
104
|
+
def validate_name(cls, v: str) -> str:
|
|
105
|
+
if not re.match(r"^[a-zA-Z0-9_]+$", v):
|
|
106
|
+
raise ValueError(
|
|
107
|
+
"Index name must be alphanumeric and can contain underscores"
|
|
108
|
+
)
|
|
109
|
+
if len(v) > MAX_INDEX_NAME_LENGTH_ALLOWED:
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f"Index name should be less than {MAX_INDEX_NAME_LENGTH_ALLOWED}"
|
|
112
|
+
" characters"
|
|
113
|
+
)
|
|
114
|
+
return v
|
|
115
|
+
|
|
116
|
+
@validator("space_type")
|
|
117
|
+
@classmethod
|
|
118
|
+
def validate_space_type(cls, v: str) -> str:
|
|
119
|
+
v = v.lower()
|
|
120
|
+
if v not in SPACE_TYPES_SUPPORTED:
|
|
121
|
+
raise ValueError(
|
|
122
|
+
f"Invalid space type: {v}. Must be one of {SPACE_TYPES_SUPPORTED}"
|
|
123
|
+
)
|
|
124
|
+
return v
|
|
125
|
+
|
|
126
|
+
@validator("precision")
|
|
127
|
+
@classmethod
|
|
128
|
+
def validate_precision(cls, v: Union[str, Precision]) -> Union[str, Precision]:
|
|
129
|
+
if isinstance(v, Precision):
|
|
130
|
+
return v
|
|
131
|
+
if v not in PRECISION_TYPES_SUPPORTED:
|
|
132
|
+
raise ValueError(
|
|
133
|
+
f"Invalid precision: {v}. Must be one of {PRECISION_TYPES_SUPPORTED}"
|
|
134
|
+
)
|
|
135
|
+
return v
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class IndexMetadata(BaseModel):
|
|
139
|
+
"""Model for index metadata returned by the server."""
|
|
140
|
+
|
|
141
|
+
class Config:
|
|
142
|
+
allow_population_by_field_name = True
|
|
143
|
+
|
|
144
|
+
name: Optional[str] = Field(None, alias="name")
|
|
145
|
+
lib_token: str
|
|
146
|
+
total_elements: int = Field(..., alias="total_elements")
|
|
147
|
+
space_type: str = Field(..., alias="space_type")
|
|
148
|
+
dimension: int = Field(..., alias="dimension")
|
|
149
|
+
precision: Optional[str] = Field(None, alias="precision")
|
|
150
|
+
M: int = Field(..., alias="M")
|
|
151
|
+
sparse_dim: int = Field(0, alias="sparse_dim")
|
|
@@ -35,10 +35,10 @@ def is_valid_index_name(index_name):
|
|
|
35
35
|
False
|
|
36
36
|
"""
|
|
37
37
|
# Pattern matches alphanumeric characters and underscores only
|
|
38
|
-
pattern = re.compile(r
|
|
38
|
+
pattern = re.compile(r"^[a-zA-Z0-9_]+$")
|
|
39
39
|
|
|
40
40
|
# Check both pattern match and length constraint
|
|
41
41
|
return (
|
|
42
|
-
pattern.match(index_name) is not None
|
|
43
|
-
len(index_name) <= MAX_INDEX_NAME_LENGTH_ALLOWED
|
|
42
|
+
pattern.match(index_name) is not None
|
|
43
|
+
and len(index_name) <= MAX_INDEX_NAME_LENGTH_ALLOWED
|
|
44
44
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: endee
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.10
|
|
4
4
|
Summary: Endee is the Next-Generation Vector Database for Scalable, High-Performance AI
|
|
5
5
|
Home-page: https://endee.io
|
|
6
6
|
Author: Endee Labs
|
|
@@ -29,6 +29,7 @@ Requires-Dist: httpx[http2]>=0.28.1
|
|
|
29
29
|
Requires-Dist: numpy>=2.2.4
|
|
30
30
|
Requires-Dist: msgpack>=1.1.0
|
|
31
31
|
Requires-Dist: orjson>=3.11.5
|
|
32
|
+
Requires-Dist: pydantic==1.10.26
|
|
32
33
|
Dynamic: author
|
|
33
34
|
Dynamic: author-email
|
|
34
35
|
Dynamic: classifier
|
|
@@ -15,7 +15,7 @@ with open("README.md", encoding="utf-8") as f:
|
|
|
15
15
|
setup(
|
|
16
16
|
# Package Metadata
|
|
17
17
|
name="endee",
|
|
18
|
-
version="0.1.
|
|
18
|
+
version="0.1.10",
|
|
19
19
|
author="Endee Labs",
|
|
20
20
|
author_email="dev@endee.io",
|
|
21
21
|
description=(
|
|
@@ -33,6 +33,7 @@ setup(
|
|
|
33
33
|
"numpy>=2.2.4", # Array operations and vector normalization
|
|
34
34
|
"msgpack>=1.1.0", # Efficient binary serialization
|
|
35
35
|
"orjson>=3.11.5", # Ultra-fast JSON serialization/deserialization
|
|
36
|
+
"pydantic==1.10.26", # Data validation and settings management
|
|
36
37
|
],
|
|
37
38
|
# Python Version Requirements
|
|
38
39
|
python_requires=">=3.6",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|