hyperspacedb 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,157 @@
1
+ Metadata-Version: 2.4
2
+ Name: hyperspacedb
3
+ Version: 2.0.0
4
+ Summary: Fastest Hyperbolic Vector DB Client
5
+ Author: YARlabs
6
+ Keywords: vector-database,ann,grpc,embeddings,hyperspace
7
+ Requires-Python: >=3.8
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: grpcio>=1.50.0
10
+ Requires-Dist: protobuf>=4.21.0
11
+ Requires-Dist: numpy>=1.20.0
12
+ Provides-Extra: openai
13
+ Requires-Dist: openai>=1.0.0; extra == "openai"
14
+ Provides-Extra: cohere
15
+ Requires-Dist: cohere>=4.0.0; extra == "cohere"
16
+ Provides-Extra: voyage
17
+ Requires-Dist: voyageai>=0.1.0; extra == "voyage"
18
+ Provides-Extra: google
19
+ Requires-Dist: google-generativeai>=0.3.0; extra == "google"
20
+ Provides-Extra: sentence-transformers
21
+ Requires-Dist: sentence-transformers>=2.2.0; extra == "sentence-transformers"
22
+ Provides-Extra: all
23
+ Requires-Dist: openai>=1.0.0; extra == "all"
24
+ Requires-Dist: cohere>=4.0.0; extra == "all"
25
+ Requires-Dist: voyageai>=0.1.0; extra == "all"
26
+ Requires-Dist: google-generativeai>=0.3.0; extra == "all"
27
+ Requires-Dist: sentence-transformers>=2.2.0; extra == "all"
28
+
29
+ # HyperspaceDB Python SDK
30
+
31
+ Official Python client for HyperspaceDB gRPC API.
32
+
33
+ The SDK is designed for production services and benchmark tooling:
34
+ - collection management
35
+ - single and batch insert
36
+ - single and batch vector search
37
+ - optional embedder integrations
38
+ - multi-tenant metadata headers
39
+
40
+ ## Requirements
41
+
42
+ - Python 3.8+
43
+ - Running HyperspaceDB server (default gRPC endpoint: `localhost:50051`)
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ pip install hyperspacedb
49
+ ```
50
+
51
+ Optional embedder extras:
52
+
53
+ ```bash
54
+ pip install "hyperspacedb[openai]"
55
+ pip install "hyperspacedb[all]"
56
+ ```
57
+
58
+ ## Quick Start
59
+
60
+ ```python
61
+ from hyperspace import HyperspaceClient
62
+
63
+ client = HyperspaceClient("localhost:50051", api_key="I_LOVE_HYPERSPACEDB")
64
+ collection = "docs_py"
65
+
66
+ client.delete_collection(collection)
67
+ client.create_collection(collection, dimension=3, metric="cosine")
68
+
69
+ client.insert(
70
+ id=1,
71
+ vector=[0.1, 0.2, 0.3],
72
+ metadata={"source": "demo"},
73
+ collection=collection,
74
+ )
75
+
76
+ results = client.search(
77
+ vector=[0.1, 0.2, 0.3],
78
+ top_k=5,
79
+ collection=collection,
80
+ )
81
+ print(results)
82
+
83
+ client.close()
84
+ ```
85
+
86
+ ## Batch Search (Recommended for Throughput)
87
+
88
+ ```python
89
+ queries = [
90
+ [0.1, 0.2, 0.3],
91
+ [0.3, 0.1, 0.4],
92
+ ]
93
+
94
+ batch_results = client.search_batch(
95
+ vectors=queries,
96
+ top_k=10,
97
+ collection="docs_py",
98
+ )
99
+ ```
100
+
101
+ `search_batch` reduces per-request RPC overhead and should be preferred for high concurrency.
102
+
103
+ ## API Summary
104
+
105
+ ### Collection Operations
106
+
107
+ - `create_collection(name, dimension, metric) -> bool`
108
+ - `delete_collection(name) -> bool`
109
+ - `list_collections() -> list[str]`
110
+ - `get_collection_stats(name) -> dict`
111
+
112
+ ### Data Operations
113
+
114
+ - `insert(id, vector=None, document=None, metadata=None, collection="", durability=Durability.DEFAULT) -> bool`
115
+ - `batch_insert(vectors, ids, metadatas=None, collection="", durability=Durability.DEFAULT) -> bool`
116
+ - `search(vector=None, query_text=None, top_k=10, filter=None, filters=None, hybrid_query=None, hybrid_alpha=None, collection="") -> list[dict]`
117
+ - `search_batch(vectors, top_k=10, collection="") -> list[list[dict]]`
118
+
119
+ ### Maintenance Operations
120
+
121
+ - `rebuild_index(collection) -> bool`
122
+ - `trigger_vacuum() -> bool`
123
+ - `trigger_snapshot() -> bool`
124
+ - `configure(ef_search=None, ef_construction=None, collection="") -> bool`
125
+
126
+ ## Durability Levels
127
+
128
+ Use `Durability` enum values:
129
+ - `Durability.DEFAULT`
130
+ - `Durability.ASYNC`
131
+ - `Durability.BATCH`
132
+ - `Durability.STRICT`
133
+
134
+ ## Multi-Tenancy
135
+
136
+ Pass `user_id` to include `x-hyperspace-user-id` on all requests:
137
+
138
+ ```python
139
+ client = HyperspaceClient(
140
+ "localhost:50051",
141
+ api_key="I_LOVE_HYPERSPACEDB",
142
+ user_id="tenant_a",
143
+ )
144
+ ```
145
+
146
+ ## Best Practices
147
+
148
+ - Reuse one client instance per worker/process.
149
+ - Prefer `search_batch` for benchmark and high-QPS paths.
150
+ - Chunk large inserts instead of one huge request.
151
+ - Keep vector dimensionality aligned with collection configuration.
152
+
153
+ ## Error Handling
154
+
155
+ The SDK catches gRPC errors and returns `False` / `[]` in many methods.
156
+ For strict production observability, log return values and attach metrics around failed operations.
157
+
@@ -0,0 +1,129 @@
1
+ # HyperspaceDB Python SDK
2
+
3
+ Official Python client for HyperspaceDB gRPC API.
4
+
5
+ The SDK is designed for production services and benchmark tooling:
6
+ - collection management
7
+ - single and batch insert
8
+ - single and batch vector search
9
+ - optional embedder integrations
10
+ - multi-tenant metadata headers
11
+
12
+ ## Requirements
13
+
14
+ - Python 3.8+
15
+ - Running HyperspaceDB server (default gRPC endpoint: `localhost:50051`)
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install hyperspacedb
21
+ ```
22
+
23
+ Optional embedder extras:
24
+
25
+ ```bash
26
+ pip install "hyperspacedb[openai]"
27
+ pip install "hyperspacedb[all]"
28
+ ```
29
+
30
+ ## Quick Start
31
+
32
+ ```python
33
+ from hyperspace import HyperspaceClient
34
+
35
+ client = HyperspaceClient("localhost:50051", api_key="I_LOVE_HYPERSPACEDB")
36
+ collection = "docs_py"
37
+
38
+ client.delete_collection(collection)
39
+ client.create_collection(collection, dimension=3, metric="cosine")
40
+
41
+ client.insert(
42
+ id=1,
43
+ vector=[0.1, 0.2, 0.3],
44
+ metadata={"source": "demo"},
45
+ collection=collection,
46
+ )
47
+
48
+ results = client.search(
49
+ vector=[0.1, 0.2, 0.3],
50
+ top_k=5,
51
+ collection=collection,
52
+ )
53
+ print(results)
54
+
55
+ client.close()
56
+ ```
57
+
58
+ ## Batch Search (Recommended for Throughput)
59
+
60
+ ```python
61
+ queries = [
62
+ [0.1, 0.2, 0.3],
63
+ [0.3, 0.1, 0.4],
64
+ ]
65
+
66
+ batch_results = client.search_batch(
67
+ vectors=queries,
68
+ top_k=10,
69
+ collection="docs_py",
70
+ )
71
+ ```
72
+
73
+ `search_batch` reduces per-request RPC overhead and should be preferred for high concurrency.
74
+
75
+ ## API Summary
76
+
77
+ ### Collection Operations
78
+
79
+ - `create_collection(name, dimension, metric) -> bool`
80
+ - `delete_collection(name) -> bool`
81
+ - `list_collections() -> list[str]`
82
+ - `get_collection_stats(name) -> dict`
83
+
84
+ ### Data Operations
85
+
86
+ - `insert(id, vector=None, document=None, metadata=None, collection="", durability=Durability.DEFAULT) -> bool`
87
+ - `batch_insert(vectors, ids, metadatas=None, collection="", durability=Durability.DEFAULT) -> bool`
88
+ - `search(vector=None, query_text=None, top_k=10, filter=None, filters=None, hybrid_query=None, hybrid_alpha=None, collection="") -> list[dict]`
89
+ - `search_batch(vectors, top_k=10, collection="") -> list[list[dict]]`
90
+
91
+ ### Maintenance Operations
92
+
93
+ - `rebuild_index(collection) -> bool`
94
+ - `trigger_vacuum() -> bool`
95
+ - `trigger_snapshot() -> bool`
96
+ - `configure(ef_search=None, ef_construction=None, collection="") -> bool`
97
+
98
+ ## Durability Levels
99
+
100
+ Use `Durability` enum values:
101
+ - `Durability.DEFAULT`
102
+ - `Durability.ASYNC`
103
+ - `Durability.BATCH`
104
+ - `Durability.STRICT`
105
+
106
+ ## Multi-Tenancy
107
+
108
+ Pass `user_id` to include `x-hyperspace-user-id` on all requests:
109
+
110
+ ```python
111
+ client = HyperspaceClient(
112
+ "localhost:50051",
113
+ api_key="I_LOVE_HYPERSPACEDB",
114
+ user_id="tenant_a",
115
+ )
116
+ ```
117
+
118
+ ## Best Practices
119
+
120
+ - Reuse one client instance per worker/process.
121
+ - Prefer `search_batch` for benchmark and high-QPS paths.
122
+ - Chunk large inserts instead of one huge request.
123
+ - Keep vector dimensionality aligned with collection configuration.
124
+
125
+ ## Error Handling
126
+
127
+ The SDK catches gRPC errors and returns `False` / `[]` in many methods.
128
+ For strict production observability, log return values and attach metrics around failed operations.
129
+
@@ -0,0 +1,21 @@
1
+ from .client import HyperspaceClient
2
+ from .embedders import (
3
+ BaseEmbedder,
4
+ OpenAIEmbedder,
5
+ OpenRouterEmbedder,
6
+ CohereEmbedder,
7
+ VoyageEmbedder,
8
+ GoogleEmbedder,
9
+ SentenceTransformerEmbedder
10
+ )
11
+
12
+ __all__ = [
13
+ "HyperspaceClient",
14
+ "BaseEmbedder",
15
+ "OpenAIEmbedder",
16
+ "OpenRouterEmbedder",
17
+ "CohereEmbedder",
18
+ "VoyageEmbedder",
19
+ "GoogleEmbedder",
20
+ "SentenceTransformerEmbedder"
21
+ ]
@@ -0,0 +1,308 @@
1
+ import grpc
2
+ from typing import List, Dict, Optional, Union
3
+ import sys
4
+ import os
5
+
6
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
7
+ sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "proto"))
8
+
9
+ from .proto import hyperspace_pb2
10
+ from .proto import hyperspace_pb2_grpc
11
+ from .embedders import BaseEmbedder
12
+
13
+ class Durability:
14
+ DEFAULT = 0
15
+ ASYNC = 1
16
+ BATCH = 2
17
+ STRICT = 3
18
+
19
+ class HyperspaceClient:
20
+ def __init__(self, host: str = "localhost:50051", api_key: Optional[str] = None, embedder: Optional[BaseEmbedder] = None, user_id: Optional[str] = None):
21
+ # Optimized gRPC Channel with KeepAlive and Max Message Size
22
+ options = [
23
+ ('grpc.max_send_message_length', 64 * 1024 * 1024), # 64MB
24
+ ('grpc.max_receive_message_length', 64 * 1024 * 1024), # 64MB
25
+ ('grpc.keepalive_time_ms', 10000),
26
+ ('grpc.keepalive_timeout_ms', 5000),
27
+ ('grpc.keepalive_permit_without_calls', 1),
28
+ ('grpc.http2.max_pings_without_data', 0),
29
+ ('grpc.http2.min_time_between_pings_ms', 10000),
30
+ ('grpc.http2.min_ping_interval_without_data_ms', 5000),
31
+ ]
32
+ self.channel = grpc.insecure_channel(host, options=options)
33
+ self.stub = hyperspace_pb2_grpc.DatabaseStub(self.channel)
34
+ meta = []
35
+ if api_key:
36
+ meta.append(('x-api-key', api_key))
37
+ if user_id:
38
+ meta.append(('x-hyperspace-user-id', user_id))
39
+ self.metadata = tuple(meta) if meta else None
40
+ self.embedder = embedder
41
+
42
+ @staticmethod
43
+ def _normalize_vector(vector: Union[List[float], tuple]) -> List[float]:
44
+ # Fast path: already Python list (protobuf will consume directly).
45
+ if isinstance(vector, list):
46
+ return vector
47
+ # Common path for tuples/numpy arrays/iterables.
48
+ # Keep explicit list conversion once per request.
49
+ return list(vector)
50
+
51
+ # ... (create/delete/list unchanged) ...
52
+
53
+ def create_collection(self, name: str, dimension: int, metric: str) -> bool:
54
+ req = hyperspace_pb2.CreateCollectionRequest(name=name, dimension=dimension, metric=metric)
55
+ try:
56
+ resp = self.stub.CreateCollection(req, metadata=self.metadata)
57
+ return True
58
+ except grpc.RpcError:
59
+ return False
60
+
61
+ def delete_collection(self, name: str) -> bool:
62
+ req = hyperspace_pb2.DeleteCollectionRequest(name=name)
63
+ try:
64
+ resp = self.stub.DeleteCollection(req, metadata=self.metadata)
65
+ return True
66
+ except grpc.RpcError:
67
+ return False
68
+
69
+ def list_collections(self) -> List[str]:
70
+ req = hyperspace_pb2.Empty()
71
+ try:
72
+ resp = self.stub.ListCollections(req, metadata=self.metadata)
73
+ return resp.collections
74
+ except grpc.RpcError as e:
75
+ print(f"RPC Error: {e}")
76
+ return []
77
+
78
+ def get_collection_stats(self, name: str) -> Dict:
79
+ req = hyperspace_pb2.CollectionStatsRequest(name=name)
80
+ try:
81
+ resp = self.stub.GetCollectionStats(req, metadata=self.metadata)
82
+ return {
83
+ "count": resp.count,
84
+ "dimension": resp.dimension,
85
+ "metric": resp.metric,
86
+ "indexing_queue": resp.indexing_queue
87
+ }
88
+ except grpc.RpcError:
89
+ return {}
90
+
91
+ def insert(self, id: int, vector: List[float] = None, document: str = None, metadata: Dict[str, str] = None, collection: str = "", durability: int = Durability.DEFAULT) -> bool:
92
+ if vector is None and document is not None:
93
+ if self.embedder is None:
94
+ raise ValueError("No embedder configured. Please pass 'vector' or init client with an embedder.")
95
+ vector = self.embedder.encode(document)
96
+
97
+ if vector is None:
98
+ raise ValueError("Either 'vector' or 'document' must be provided.")
99
+ vector = self._normalize_vector(vector)
100
+
101
+ req = hyperspace_pb2.InsertRequest(
102
+ id=id,
103
+ vector=vector,
104
+ collection=collection,
105
+ origin_node_id="",
106
+ logical_clock=0,
107
+ durability=durability
108
+ )
109
+ if metadata:
110
+ req.metadata.update(metadata)
111
+ try:
112
+ resp = self.stub.Insert(req, metadata=self.metadata)
113
+ return resp.success
114
+ except grpc.RpcError as e:
115
+ print(f"RPC Error: {e}")
116
+ return False
117
+
118
+ def batch_insert(self, vectors: List[List[float]], ids: List[int], metadatas: List[Dict[str, str]] = None, collection: str = "", durability: int = Durability.DEFAULT) -> bool:
119
+ if len(vectors) != len(ids):
120
+ raise ValueError("Vectors and IDs length mismatch")
121
+
122
+ proto_vectors = []
123
+ if metadatas is None:
124
+ for v, i in zip(vectors, ids):
125
+ proto_vectors.append(hyperspace_pb2.VectorData(
126
+ vector=self._normalize_vector(v),
127
+ id=i
128
+ ))
129
+ else:
130
+ for v, i, m in zip(vectors, ids, metadatas):
131
+ if m:
132
+ proto_vectors.append(hyperspace_pb2.VectorData(
133
+ vector=self._normalize_vector(v),
134
+ id=i,
135
+ metadata=m
136
+ ))
137
+ else:
138
+ proto_vectors.append(hyperspace_pb2.VectorData(
139
+ vector=self._normalize_vector(v),
140
+ id=i
141
+ ))
142
+
143
+ req = hyperspace_pb2.BatchInsertRequest(
144
+ collection=collection,
145
+ vectors=proto_vectors,
146
+ origin_node_id="",
147
+ logical_clock=0,
148
+ durability=durability
149
+ )
150
+ try:
151
+ resp = self.stub.BatchInsert(req, metadata=self.metadata)
152
+ return resp.success
153
+ except grpc.RpcError as e:
154
+ print(f"RPC Error: {e}")
155
+ return False
156
+
157
+ def search(self, vector: List[float] = None, query_text: str = None, top_k: int = 10, filter: Dict[str, str] = None, filters: List[Dict] = None, hybrid_query: str = None, hybrid_alpha: float = None, collection: str = "") -> List[Dict]:
158
+ if vector is None and query_text is not None:
159
+ if self.embedder is None:
160
+ raise ValueError("No embedder configured. Please pass 'vector' or init client with an embedder.")
161
+ # For pure vector search using text query
162
+ vector = self.embedder.encode(query_text)
163
+
164
+ # Auto-enable hybrid if not specified but meaningful?
165
+ if hybrid_query is None and hybrid_alpha is not None:
166
+ hybrid_query = query_text
167
+
168
+ if vector is None:
169
+ raise ValueError("Either 'vector' or 'query_text' must be provided.")
170
+ vector = self._normalize_vector(vector)
171
+
172
+ proto_filters = []
173
+ if filters:
174
+ for f in filters:
175
+ if f.get("type") == "match":
176
+ proto_filters.append(hyperspace_pb2.Filter(
177
+ match=hyperspace_pb2.Match(key=f["key"], value=f["value"])
178
+ ))
179
+ elif f.get("type") == "range":
180
+ kwargs = {"key": f["key"]}
181
+ if "gte" in f: kwargs["gte"] = int(f["gte"])
182
+ if "lte" in f: kwargs["lte"] = int(f["lte"])
183
+ proto_filters.append(hyperspace_pb2.Filter(
184
+ range=hyperspace_pb2.Range(**kwargs)
185
+ ))
186
+
187
+ req = hyperspace_pb2.SearchRequest(
188
+ vector=vector,
189
+ top_k=top_k,
190
+ collection=collection
191
+ )
192
+ if filter:
193
+ req.filter.update(filter)
194
+ if proto_filters:
195
+ req.filters.extend(proto_filters)
196
+ if hybrid_query is not None:
197
+ req.hybrid_query = hybrid_query
198
+ if hybrid_alpha is not None:
199
+ req.hybrid_alpha = hybrid_alpha
200
+ try:
201
+ resp = self.stub.Search(req, metadata=self.metadata)
202
+ return [
203
+ {
204
+ "id": r.id,
205
+ "distance": r.distance,
206
+ "metadata": (dict(r.metadata) if r.metadata else {})
207
+ }
208
+ for r in resp.results
209
+ ]
210
+ except grpc.RpcError as e:
211
+ print(f"RPC Error: {e}")
212
+ return []
213
+
214
+ def search_batch(
215
+ self,
216
+ vectors: List[List[float]],
217
+ top_k: int = 10,
218
+ collection: str = "",
219
+ ) -> List[List[Dict]]:
220
+ searches = []
221
+ for vector in vectors:
222
+ searches.append(
223
+ hyperspace_pb2.SearchRequest(
224
+ vector=self._normalize_vector(vector),
225
+ top_k=top_k,
226
+ collection=collection,
227
+ )
228
+ )
229
+ req = hyperspace_pb2.BatchSearchRequest(searches=searches)
230
+ try:
231
+ resp = self.stub.SearchBatch(req, metadata=self.metadata)
232
+ batch = []
233
+ for search_resp in resp.responses:
234
+ batch.append(
235
+ [
236
+ {
237
+ "id": r.id,
238
+ "distance": r.distance,
239
+ "metadata": (dict(r.metadata) if r.metadata else {}),
240
+ }
241
+ for r in search_resp.results
242
+ ]
243
+ )
244
+ return batch
245
+ except grpc.RpcError as e:
246
+ print(f"RPC Error: {e}")
247
+ return []
248
+
249
+ def trigger_vacuum(self) -> bool:
250
+ try:
251
+ self.stub.TriggerVacuum(hyperspace_pb2.Empty(), metadata=self.metadata)
252
+ return True
253
+ except grpc.RpcError as e:
254
+ print(f"RPC Error: {e}")
255
+ return False
256
+
257
+ def rebuild_index(self, collection: str) -> bool:
258
+ req = hyperspace_pb2.RebuildIndexRequest(name=collection)
259
+ try:
260
+ self.stub.RebuildIndex(req, metadata=self.metadata)
261
+ return True
262
+ except grpc.RpcError as e:
263
+ print(f"RPC Error: {e}")
264
+ return False
265
+
266
+ def trigger_snapshot(self) -> bool:
267
+ try:
268
+ resp = self.stub.TriggerSnapshot(hyperspace_pb2.Empty(), metadata=self.metadata)
269
+ return True
270
+ except grpc.RpcError as e:
271
+ print(f"RPC Error: {e}")
272
+ return False
273
+
274
+ def configure(self, ef_search: int = None, ef_construction: int = None, collection: str = "") -> bool:
275
+ req = hyperspace_pb2.ConfigUpdate(collection=collection)
276
+ if ef_search is not None:
277
+ req.ef_search = ef_search
278
+ if ef_construction is not None:
279
+ req.ef_construction = ef_construction
280
+
281
+ try:
282
+ resp = self.stub.Configure(req, metadata=self.metadata)
283
+ return True
284
+ except grpc.RpcError as e:
285
+ print(f"RPC Error: {e}")
286
+ return False
287
+
288
+ def get_digest(self, collection: str = "") -> Dict:
289
+ req = hyperspace_pb2.DigestRequest(collection=collection)
290
+ try:
291
+ resp = self.stub.GetDigest(req, metadata=self.metadata)
292
+ return {
293
+ "logical_clock": resp.logical_clock,
294
+ "state_hash": resp.state_hash,
295
+ "count": resp.count
296
+ }
297
+ except grpc.RpcError as e:
298
+ print(f"RPC Error: {e}")
299
+ return {}
300
+
301
+ def close(self):
302
+ self.channel.close()
303
+
304
+ def __enter__(self):
305
+ return self
306
+
307
+ def __exit__(self, exc_type, exc_val, exc_tb):
308
+ self.close()