hyperspacedb 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hyperspacedb-2.0.0/PKG-INFO +157 -0
- hyperspacedb-2.0.0/README.md +129 -0
- hyperspacedb-2.0.0/hyperspace/__init__.py +21 -0
- hyperspacedb-2.0.0/hyperspace/client.py +308 -0
- hyperspacedb-2.0.0/hyperspace/embedders.py +122 -0
- hyperspacedb-2.0.0/hyperspace/proto/__init__.py +0 -0
- hyperspacedb-2.0.0/hyperspace/proto/hyperspace_pb2.py +134 -0
- hyperspacedb-2.0.0/hyperspace/proto/hyperspace_pb2_grpc.py +794 -0
- hyperspacedb-2.0.0/hyperspacedb.egg-info/PKG-INFO +157 -0
- hyperspacedb-2.0.0/hyperspacedb.egg-info/SOURCES.txt +13 -0
- hyperspacedb-2.0.0/hyperspacedb.egg-info/dependency_links.txt +1 -0
- hyperspacedb-2.0.0/hyperspacedb.egg-info/requires.txt +25 -0
- hyperspacedb-2.0.0/hyperspacedb.egg-info/top_level.txt +1 -0
- hyperspacedb-2.0.0/pyproject.toml +31 -0
- hyperspacedb-2.0.0/setup.cfg +4 -0
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hyperspacedb
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: Fastest Hyperbolic Vector DB Client
|
|
5
|
+
Author: YARlabs
|
|
6
|
+
Keywords: vector-database,ann,grpc,embeddings,hyperspace
|
|
7
|
+
Requires-Python: >=3.8
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: grpcio>=1.50.0
|
|
10
|
+
Requires-Dist: protobuf>=4.21.0
|
|
11
|
+
Requires-Dist: numpy>=1.20.0
|
|
12
|
+
Provides-Extra: openai
|
|
13
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
14
|
+
Provides-Extra: cohere
|
|
15
|
+
Requires-Dist: cohere>=4.0.0; extra == "cohere"
|
|
16
|
+
Provides-Extra: voyage
|
|
17
|
+
Requires-Dist: voyageai>=0.1.0; extra == "voyage"
|
|
18
|
+
Provides-Extra: google
|
|
19
|
+
Requires-Dist: google-generativeai>=0.3.0; extra == "google"
|
|
20
|
+
Provides-Extra: sentence-transformers
|
|
21
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "sentence-transformers"
|
|
22
|
+
Provides-Extra: all
|
|
23
|
+
Requires-Dist: openai>=1.0.0; extra == "all"
|
|
24
|
+
Requires-Dist: cohere>=4.0.0; extra == "all"
|
|
25
|
+
Requires-Dist: voyageai>=0.1.0; extra == "all"
|
|
26
|
+
Requires-Dist: google-generativeai>=0.3.0; extra == "all"
|
|
27
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "all"
|
|
28
|
+
|
|
29
|
+
# HyperspaceDB Python SDK
|
|
30
|
+
|
|
31
|
+
Official Python client for HyperspaceDB gRPC API.
|
|
32
|
+
|
|
33
|
+
The SDK is designed for production services and benchmark tooling:
|
|
34
|
+
- collection management
|
|
35
|
+
- single and batch insert
|
|
36
|
+
- single and batch vector search
|
|
37
|
+
- optional embedder integrations
|
|
38
|
+
- multi-tenant metadata headers
|
|
39
|
+
|
|
40
|
+
## Requirements
|
|
41
|
+
|
|
42
|
+
- Python 3.8+
|
|
43
|
+
- Running HyperspaceDB server (default gRPC endpoint: `localhost:50051`)
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install hyperspacedb
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Optional embedder extras:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install "hyperspacedb[openai]"
|
|
55
|
+
pip install "hyperspacedb[all]"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from hyperspace import HyperspaceClient
|
|
62
|
+
|
|
63
|
+
client = HyperspaceClient("localhost:50051", api_key="I_LOVE_HYPERSPACEDB")
|
|
64
|
+
collection = "docs_py"
|
|
65
|
+
|
|
66
|
+
client.delete_collection(collection)
|
|
67
|
+
client.create_collection(collection, dimension=3, metric="cosine")
|
|
68
|
+
|
|
69
|
+
client.insert(
|
|
70
|
+
id=1,
|
|
71
|
+
vector=[0.1, 0.2, 0.3],
|
|
72
|
+
metadata={"source": "demo"},
|
|
73
|
+
collection=collection,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
results = client.search(
|
|
77
|
+
vector=[0.1, 0.2, 0.3],
|
|
78
|
+
top_k=5,
|
|
79
|
+
collection=collection,
|
|
80
|
+
)
|
|
81
|
+
print(results)
|
|
82
|
+
|
|
83
|
+
client.close()
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Batch Search (Recommended for Throughput)
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
queries = [
|
|
90
|
+
[0.1, 0.2, 0.3],
|
|
91
|
+
[0.3, 0.1, 0.4],
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
batch_results = client.search_batch(
|
|
95
|
+
vectors=queries,
|
|
96
|
+
top_k=10,
|
|
97
|
+
collection="docs_py",
|
|
98
|
+
)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
`search_batch` reduces per-request RPC overhead and should be preferred for high concurrency.
|
|
102
|
+
|
|
103
|
+
## API Summary
|
|
104
|
+
|
|
105
|
+
### Collection Operations
|
|
106
|
+
|
|
107
|
+
- `create_collection(name, dimension, metric) -> bool`
|
|
108
|
+
- `delete_collection(name) -> bool`
|
|
109
|
+
- `list_collections() -> list[str]`
|
|
110
|
+
- `get_collection_stats(name) -> dict`
|
|
111
|
+
|
|
112
|
+
### Data Operations
|
|
113
|
+
|
|
114
|
+
- `insert(id, vector=None, document=None, metadata=None, collection="", durability=Durability.DEFAULT) -> bool`
|
|
115
|
+
- `batch_insert(vectors, ids, metadatas=None, collection="", durability=Durability.DEFAULT) -> bool`
|
|
116
|
+
- `search(vector=None, query_text=None, top_k=10, filter=None, filters=None, hybrid_query=None, hybrid_alpha=None, collection="") -> list[dict]`
|
|
117
|
+
- `search_batch(vectors, top_k=10, collection="") -> list[list[dict]]`
|
|
118
|
+
|
|
119
|
+
### Maintenance Operations
|
|
120
|
+
|
|
121
|
+
- `rebuild_index(collection) -> bool`
|
|
122
|
+
- `trigger_vacuum() -> bool`
|
|
123
|
+
- `trigger_snapshot() -> bool`
|
|
124
|
+
- `configure(ef_search=None, ef_construction=None, collection="") -> bool`
|
|
125
|
+
|
|
126
|
+
## Durability Levels
|
|
127
|
+
|
|
128
|
+
Use `Durability` enum values:
|
|
129
|
+
- `Durability.DEFAULT`
|
|
130
|
+
- `Durability.ASYNC`
|
|
131
|
+
- `Durability.BATCH`
|
|
132
|
+
- `Durability.STRICT`
|
|
133
|
+
|
|
134
|
+
## Multi-Tenancy
|
|
135
|
+
|
|
136
|
+
Pass `user_id` to include `x-hyperspace-user-id` on all requests:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
client = HyperspaceClient(
|
|
140
|
+
"localhost:50051",
|
|
141
|
+
api_key="I_LOVE_HYPERSPACEDB",
|
|
142
|
+
user_id="tenant_a",
|
|
143
|
+
)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Best Practices
|
|
147
|
+
|
|
148
|
+
- Reuse one client instance per worker/process.
|
|
149
|
+
- Prefer `search_batch` for benchmark and high-QPS paths.
|
|
150
|
+
- Chunk large inserts instead of one huge request.
|
|
151
|
+
- Keep vector dimensionality aligned with collection configuration.
|
|
152
|
+
|
|
153
|
+
## Error Handling
|
|
154
|
+
|
|
155
|
+
The SDK catches gRPC errors and returns `False` / `[]` in many methods.
|
|
156
|
+
For strict production observability, log return values and attach metrics around failed operations.
|
|
157
|
+
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# HyperspaceDB Python SDK
|
|
2
|
+
|
|
3
|
+
Official Python client for HyperspaceDB gRPC API.
|
|
4
|
+
|
|
5
|
+
The SDK is designed for production services and benchmark tooling:
|
|
6
|
+
- collection management
|
|
7
|
+
- single and batch insert
|
|
8
|
+
- single and batch vector search
|
|
9
|
+
- optional embedder integrations
|
|
10
|
+
- multi-tenant metadata headers
|
|
11
|
+
|
|
12
|
+
## Requirements
|
|
13
|
+
|
|
14
|
+
- Python 3.8+
|
|
15
|
+
- Running HyperspaceDB server (default gRPC endpoint: `localhost:50051`)
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install hyperspacedb
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Optional embedder extras:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install "hyperspacedb[openai]"
|
|
27
|
+
pip install "hyperspacedb[all]"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from hyperspace import HyperspaceClient
|
|
34
|
+
|
|
35
|
+
client = HyperspaceClient("localhost:50051", api_key="I_LOVE_HYPERSPACEDB")
|
|
36
|
+
collection = "docs_py"
|
|
37
|
+
|
|
38
|
+
client.delete_collection(collection)
|
|
39
|
+
client.create_collection(collection, dimension=3, metric="cosine")
|
|
40
|
+
|
|
41
|
+
client.insert(
|
|
42
|
+
id=1,
|
|
43
|
+
vector=[0.1, 0.2, 0.3],
|
|
44
|
+
metadata={"source": "demo"},
|
|
45
|
+
collection=collection,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
results = client.search(
|
|
49
|
+
vector=[0.1, 0.2, 0.3],
|
|
50
|
+
top_k=5,
|
|
51
|
+
collection=collection,
|
|
52
|
+
)
|
|
53
|
+
print(results)
|
|
54
|
+
|
|
55
|
+
client.close()
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Batch Search (Recommended for Throughput)
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
queries = [
|
|
62
|
+
[0.1, 0.2, 0.3],
|
|
63
|
+
[0.3, 0.1, 0.4],
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
batch_results = client.search_batch(
|
|
67
|
+
vectors=queries,
|
|
68
|
+
top_k=10,
|
|
69
|
+
collection="docs_py",
|
|
70
|
+
)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
`search_batch` reduces per-request RPC overhead and should be preferred for high concurrency.
|
|
74
|
+
|
|
75
|
+
## API Summary
|
|
76
|
+
|
|
77
|
+
### Collection Operations
|
|
78
|
+
|
|
79
|
+
- `create_collection(name, dimension, metric) -> bool`
|
|
80
|
+
- `delete_collection(name) -> bool`
|
|
81
|
+
- `list_collections() -> list[str]`
|
|
82
|
+
- `get_collection_stats(name) -> dict`
|
|
83
|
+
|
|
84
|
+
### Data Operations
|
|
85
|
+
|
|
86
|
+
- `insert(id, vector=None, document=None, metadata=None, collection="", durability=Durability.DEFAULT) -> bool`
|
|
87
|
+
- `batch_insert(vectors, ids, metadatas=None, collection="", durability=Durability.DEFAULT) -> bool`
|
|
88
|
+
- `search(vector=None, query_text=None, top_k=10, filter=None, filters=None, hybrid_query=None, hybrid_alpha=None, collection="") -> list[dict]`
|
|
89
|
+
- `search_batch(vectors, top_k=10, collection="") -> list[list[dict]]`
|
|
90
|
+
|
|
91
|
+
### Maintenance Operations
|
|
92
|
+
|
|
93
|
+
- `rebuild_index(collection) -> bool`
|
|
94
|
+
- `trigger_vacuum() -> bool`
|
|
95
|
+
- `trigger_snapshot() -> bool`
|
|
96
|
+
- `configure(ef_search=None, ef_construction=None, collection="") -> bool`
|
|
97
|
+
|
|
98
|
+
## Durability Levels
|
|
99
|
+
|
|
100
|
+
Use `Durability` enum values:
|
|
101
|
+
- `Durability.DEFAULT`
|
|
102
|
+
- `Durability.ASYNC`
|
|
103
|
+
- `Durability.BATCH`
|
|
104
|
+
- `Durability.STRICT`
|
|
105
|
+
|
|
106
|
+
## Multi-Tenancy
|
|
107
|
+
|
|
108
|
+
Pass `user_id` to include `x-hyperspace-user-id` on all requests:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
client = HyperspaceClient(
|
|
112
|
+
"localhost:50051",
|
|
113
|
+
api_key="I_LOVE_HYPERSPACEDB",
|
|
114
|
+
user_id="tenant_a",
|
|
115
|
+
)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Best Practices
|
|
119
|
+
|
|
120
|
+
- Reuse one client instance per worker/process.
|
|
121
|
+
- Prefer `search_batch` for benchmark and high-QPS paths.
|
|
122
|
+
- Chunk large inserts instead of one huge request.
|
|
123
|
+
- Keep vector dimensionality aligned with collection configuration.
|
|
124
|
+
|
|
125
|
+
## Error Handling
|
|
126
|
+
|
|
127
|
+
The SDK catches gRPC errors and returns `False` / `[]` in many methods.
|
|
128
|
+
For strict production observability, log return values and attach metrics around failed operations.
|
|
129
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from .client import HyperspaceClient
|
|
2
|
+
from .embedders import (
|
|
3
|
+
BaseEmbedder,
|
|
4
|
+
OpenAIEmbedder,
|
|
5
|
+
OpenRouterEmbedder,
|
|
6
|
+
CohereEmbedder,
|
|
7
|
+
VoyageEmbedder,
|
|
8
|
+
GoogleEmbedder,
|
|
9
|
+
SentenceTransformerEmbedder
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"HyperspaceClient",
|
|
14
|
+
"BaseEmbedder",
|
|
15
|
+
"OpenAIEmbedder",
|
|
16
|
+
"OpenRouterEmbedder",
|
|
17
|
+
"CohereEmbedder",
|
|
18
|
+
"VoyageEmbedder",
|
|
19
|
+
"GoogleEmbedder",
|
|
20
|
+
"SentenceTransformerEmbedder"
|
|
21
|
+
]
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
import grpc
|
|
2
|
+
from typing import List, Dict, Optional, Union
|
|
3
|
+
import sys
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
7
|
+
sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "proto"))
|
|
8
|
+
|
|
9
|
+
from .proto import hyperspace_pb2
|
|
10
|
+
from .proto import hyperspace_pb2_grpc
|
|
11
|
+
from .embedders import BaseEmbedder
|
|
12
|
+
|
|
13
|
+
class Durability:
|
|
14
|
+
DEFAULT = 0
|
|
15
|
+
ASYNC = 1
|
|
16
|
+
BATCH = 2
|
|
17
|
+
STRICT = 3
|
|
18
|
+
|
|
19
|
+
class HyperspaceClient:
|
|
20
|
+
def __init__(self, host: str = "localhost:50051", api_key: Optional[str] = None, embedder: Optional[BaseEmbedder] = None, user_id: Optional[str] = None):
|
|
21
|
+
# Optimized gRPC Channel with KeepAlive and Max Message Size
|
|
22
|
+
options = [
|
|
23
|
+
('grpc.max_send_message_length', 64 * 1024 * 1024), # 64MB
|
|
24
|
+
('grpc.max_receive_message_length', 64 * 1024 * 1024), # 64MB
|
|
25
|
+
('grpc.keepalive_time_ms', 10000),
|
|
26
|
+
('grpc.keepalive_timeout_ms', 5000),
|
|
27
|
+
('grpc.keepalive_permit_without_calls', 1),
|
|
28
|
+
('grpc.http2.max_pings_without_data', 0),
|
|
29
|
+
('grpc.http2.min_time_between_pings_ms', 10000),
|
|
30
|
+
('grpc.http2.min_ping_interval_without_data_ms', 5000),
|
|
31
|
+
]
|
|
32
|
+
self.channel = grpc.insecure_channel(host, options=options)
|
|
33
|
+
self.stub = hyperspace_pb2_grpc.DatabaseStub(self.channel)
|
|
34
|
+
meta = []
|
|
35
|
+
if api_key:
|
|
36
|
+
meta.append(('x-api-key', api_key))
|
|
37
|
+
if user_id:
|
|
38
|
+
meta.append(('x-hyperspace-user-id', user_id))
|
|
39
|
+
self.metadata = tuple(meta) if meta else None
|
|
40
|
+
self.embedder = embedder
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def _normalize_vector(vector: Union[List[float], tuple]) -> List[float]:
|
|
44
|
+
# Fast path: already Python list (protobuf will consume directly).
|
|
45
|
+
if isinstance(vector, list):
|
|
46
|
+
return vector
|
|
47
|
+
# Common path for tuples/numpy arrays/iterables.
|
|
48
|
+
# Keep explicit list conversion once per request.
|
|
49
|
+
return list(vector)
|
|
50
|
+
|
|
51
|
+
# ... (create/delete/list unchanged) ...
|
|
52
|
+
|
|
53
|
+
def create_collection(self, name: str, dimension: int, metric: str) -> bool:
|
|
54
|
+
req = hyperspace_pb2.CreateCollectionRequest(name=name, dimension=dimension, metric=metric)
|
|
55
|
+
try:
|
|
56
|
+
resp = self.stub.CreateCollection(req, metadata=self.metadata)
|
|
57
|
+
return True
|
|
58
|
+
except grpc.RpcError:
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
def delete_collection(self, name: str) -> bool:
|
|
62
|
+
req = hyperspace_pb2.DeleteCollectionRequest(name=name)
|
|
63
|
+
try:
|
|
64
|
+
resp = self.stub.DeleteCollection(req, metadata=self.metadata)
|
|
65
|
+
return True
|
|
66
|
+
except grpc.RpcError:
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
def list_collections(self) -> List[str]:
|
|
70
|
+
req = hyperspace_pb2.Empty()
|
|
71
|
+
try:
|
|
72
|
+
resp = self.stub.ListCollections(req, metadata=self.metadata)
|
|
73
|
+
return resp.collections
|
|
74
|
+
except grpc.RpcError as e:
|
|
75
|
+
print(f"RPC Error: {e}")
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
def get_collection_stats(self, name: str) -> Dict:
|
|
79
|
+
req = hyperspace_pb2.CollectionStatsRequest(name=name)
|
|
80
|
+
try:
|
|
81
|
+
resp = self.stub.GetCollectionStats(req, metadata=self.metadata)
|
|
82
|
+
return {
|
|
83
|
+
"count": resp.count,
|
|
84
|
+
"dimension": resp.dimension,
|
|
85
|
+
"metric": resp.metric,
|
|
86
|
+
"indexing_queue": resp.indexing_queue
|
|
87
|
+
}
|
|
88
|
+
except grpc.RpcError:
|
|
89
|
+
return {}
|
|
90
|
+
|
|
91
|
+
def insert(self, id: int, vector: List[float] = None, document: str = None, metadata: Dict[str, str] = None, collection: str = "", durability: int = Durability.DEFAULT) -> bool:
|
|
92
|
+
if vector is None and document is not None:
|
|
93
|
+
if self.embedder is None:
|
|
94
|
+
raise ValueError("No embedder configured. Please pass 'vector' or init client with an embedder.")
|
|
95
|
+
vector = self.embedder.encode(document)
|
|
96
|
+
|
|
97
|
+
if vector is None:
|
|
98
|
+
raise ValueError("Either 'vector' or 'document' must be provided.")
|
|
99
|
+
vector = self._normalize_vector(vector)
|
|
100
|
+
|
|
101
|
+
req = hyperspace_pb2.InsertRequest(
|
|
102
|
+
id=id,
|
|
103
|
+
vector=vector,
|
|
104
|
+
collection=collection,
|
|
105
|
+
origin_node_id="",
|
|
106
|
+
logical_clock=0,
|
|
107
|
+
durability=durability
|
|
108
|
+
)
|
|
109
|
+
if metadata:
|
|
110
|
+
req.metadata.update(metadata)
|
|
111
|
+
try:
|
|
112
|
+
resp = self.stub.Insert(req, metadata=self.metadata)
|
|
113
|
+
return resp.success
|
|
114
|
+
except grpc.RpcError as e:
|
|
115
|
+
print(f"RPC Error: {e}")
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
def batch_insert(self, vectors: List[List[float]], ids: List[int], metadatas: List[Dict[str, str]] = None, collection: str = "", durability: int = Durability.DEFAULT) -> bool:
|
|
119
|
+
if len(vectors) != len(ids):
|
|
120
|
+
raise ValueError("Vectors and IDs length mismatch")
|
|
121
|
+
|
|
122
|
+
proto_vectors = []
|
|
123
|
+
if metadatas is None:
|
|
124
|
+
for v, i in zip(vectors, ids):
|
|
125
|
+
proto_vectors.append(hyperspace_pb2.VectorData(
|
|
126
|
+
vector=self._normalize_vector(v),
|
|
127
|
+
id=i
|
|
128
|
+
))
|
|
129
|
+
else:
|
|
130
|
+
for v, i, m in zip(vectors, ids, metadatas):
|
|
131
|
+
if m:
|
|
132
|
+
proto_vectors.append(hyperspace_pb2.VectorData(
|
|
133
|
+
vector=self._normalize_vector(v),
|
|
134
|
+
id=i,
|
|
135
|
+
metadata=m
|
|
136
|
+
))
|
|
137
|
+
else:
|
|
138
|
+
proto_vectors.append(hyperspace_pb2.VectorData(
|
|
139
|
+
vector=self._normalize_vector(v),
|
|
140
|
+
id=i
|
|
141
|
+
))
|
|
142
|
+
|
|
143
|
+
req = hyperspace_pb2.BatchInsertRequest(
|
|
144
|
+
collection=collection,
|
|
145
|
+
vectors=proto_vectors,
|
|
146
|
+
origin_node_id="",
|
|
147
|
+
logical_clock=0,
|
|
148
|
+
durability=durability
|
|
149
|
+
)
|
|
150
|
+
try:
|
|
151
|
+
resp = self.stub.BatchInsert(req, metadata=self.metadata)
|
|
152
|
+
return resp.success
|
|
153
|
+
except grpc.RpcError as e:
|
|
154
|
+
print(f"RPC Error: {e}")
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
def search(self, vector: List[float] = None, query_text: str = None, top_k: int = 10, filter: Dict[str, str] = None, filters: List[Dict] = None, hybrid_query: str = None, hybrid_alpha: float = None, collection: str = "") -> List[Dict]:
|
|
158
|
+
if vector is None and query_text is not None:
|
|
159
|
+
if self.embedder is None:
|
|
160
|
+
raise ValueError("No embedder configured. Please pass 'vector' or init client with an embedder.")
|
|
161
|
+
# For pure vector search using text query
|
|
162
|
+
vector = self.embedder.encode(query_text)
|
|
163
|
+
|
|
164
|
+
# Auto-enable hybrid if not specified but meaningful?
|
|
165
|
+
if hybrid_query is None and hybrid_alpha is not None:
|
|
166
|
+
hybrid_query = query_text
|
|
167
|
+
|
|
168
|
+
if vector is None:
|
|
169
|
+
raise ValueError("Either 'vector' or 'query_text' must be provided.")
|
|
170
|
+
vector = self._normalize_vector(vector)
|
|
171
|
+
|
|
172
|
+
proto_filters = []
|
|
173
|
+
if filters:
|
|
174
|
+
for f in filters:
|
|
175
|
+
if f.get("type") == "match":
|
|
176
|
+
proto_filters.append(hyperspace_pb2.Filter(
|
|
177
|
+
match=hyperspace_pb2.Match(key=f["key"], value=f["value"])
|
|
178
|
+
))
|
|
179
|
+
elif f.get("type") == "range":
|
|
180
|
+
kwargs = {"key": f["key"]}
|
|
181
|
+
if "gte" in f: kwargs["gte"] = int(f["gte"])
|
|
182
|
+
if "lte" in f: kwargs["lte"] = int(f["lte"])
|
|
183
|
+
proto_filters.append(hyperspace_pb2.Filter(
|
|
184
|
+
range=hyperspace_pb2.Range(**kwargs)
|
|
185
|
+
))
|
|
186
|
+
|
|
187
|
+
req = hyperspace_pb2.SearchRequest(
|
|
188
|
+
vector=vector,
|
|
189
|
+
top_k=top_k,
|
|
190
|
+
collection=collection
|
|
191
|
+
)
|
|
192
|
+
if filter:
|
|
193
|
+
req.filter.update(filter)
|
|
194
|
+
if proto_filters:
|
|
195
|
+
req.filters.extend(proto_filters)
|
|
196
|
+
if hybrid_query is not None:
|
|
197
|
+
req.hybrid_query = hybrid_query
|
|
198
|
+
if hybrid_alpha is not None:
|
|
199
|
+
req.hybrid_alpha = hybrid_alpha
|
|
200
|
+
try:
|
|
201
|
+
resp = self.stub.Search(req, metadata=self.metadata)
|
|
202
|
+
return [
|
|
203
|
+
{
|
|
204
|
+
"id": r.id,
|
|
205
|
+
"distance": r.distance,
|
|
206
|
+
"metadata": (dict(r.metadata) if r.metadata else {})
|
|
207
|
+
}
|
|
208
|
+
for r in resp.results
|
|
209
|
+
]
|
|
210
|
+
except grpc.RpcError as e:
|
|
211
|
+
print(f"RPC Error: {e}")
|
|
212
|
+
return []
|
|
213
|
+
|
|
214
|
+
def search_batch(
|
|
215
|
+
self,
|
|
216
|
+
vectors: List[List[float]],
|
|
217
|
+
top_k: int = 10,
|
|
218
|
+
collection: str = "",
|
|
219
|
+
) -> List[List[Dict]]:
|
|
220
|
+
searches = []
|
|
221
|
+
for vector in vectors:
|
|
222
|
+
searches.append(
|
|
223
|
+
hyperspace_pb2.SearchRequest(
|
|
224
|
+
vector=self._normalize_vector(vector),
|
|
225
|
+
top_k=top_k,
|
|
226
|
+
collection=collection,
|
|
227
|
+
)
|
|
228
|
+
)
|
|
229
|
+
req = hyperspace_pb2.BatchSearchRequest(searches=searches)
|
|
230
|
+
try:
|
|
231
|
+
resp = self.stub.SearchBatch(req, metadata=self.metadata)
|
|
232
|
+
batch = []
|
|
233
|
+
for search_resp in resp.responses:
|
|
234
|
+
batch.append(
|
|
235
|
+
[
|
|
236
|
+
{
|
|
237
|
+
"id": r.id,
|
|
238
|
+
"distance": r.distance,
|
|
239
|
+
"metadata": (dict(r.metadata) if r.metadata else {}),
|
|
240
|
+
}
|
|
241
|
+
for r in search_resp.results
|
|
242
|
+
]
|
|
243
|
+
)
|
|
244
|
+
return batch
|
|
245
|
+
except grpc.RpcError as e:
|
|
246
|
+
print(f"RPC Error: {e}")
|
|
247
|
+
return []
|
|
248
|
+
|
|
249
|
+
def trigger_vacuum(self) -> bool:
|
|
250
|
+
try:
|
|
251
|
+
self.stub.TriggerVacuum(hyperspace_pb2.Empty(), metadata=self.metadata)
|
|
252
|
+
return True
|
|
253
|
+
except grpc.RpcError as e:
|
|
254
|
+
print(f"RPC Error: {e}")
|
|
255
|
+
return False
|
|
256
|
+
|
|
257
|
+
def rebuild_index(self, collection: str) -> bool:
|
|
258
|
+
req = hyperspace_pb2.RebuildIndexRequest(name=collection)
|
|
259
|
+
try:
|
|
260
|
+
self.stub.RebuildIndex(req, metadata=self.metadata)
|
|
261
|
+
return True
|
|
262
|
+
except grpc.RpcError as e:
|
|
263
|
+
print(f"RPC Error: {e}")
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
def trigger_snapshot(self) -> bool:
|
|
267
|
+
try:
|
|
268
|
+
resp = self.stub.TriggerSnapshot(hyperspace_pb2.Empty(), metadata=self.metadata)
|
|
269
|
+
return True
|
|
270
|
+
except grpc.RpcError as e:
|
|
271
|
+
print(f"RPC Error: {e}")
|
|
272
|
+
return False
|
|
273
|
+
|
|
274
|
+
def configure(self, ef_search: int = None, ef_construction: int = None, collection: str = "") -> bool:
|
|
275
|
+
req = hyperspace_pb2.ConfigUpdate(collection=collection)
|
|
276
|
+
if ef_search is not None:
|
|
277
|
+
req.ef_search = ef_search
|
|
278
|
+
if ef_construction is not None:
|
|
279
|
+
req.ef_construction = ef_construction
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
resp = self.stub.Configure(req, metadata=self.metadata)
|
|
283
|
+
return True
|
|
284
|
+
except grpc.RpcError as e:
|
|
285
|
+
print(f"RPC Error: {e}")
|
|
286
|
+
return False
|
|
287
|
+
|
|
288
|
+
def get_digest(self, collection: str = "") -> Dict:
|
|
289
|
+
req = hyperspace_pb2.DigestRequest(collection=collection)
|
|
290
|
+
try:
|
|
291
|
+
resp = self.stub.GetDigest(req, metadata=self.metadata)
|
|
292
|
+
return {
|
|
293
|
+
"logical_clock": resp.logical_clock,
|
|
294
|
+
"state_hash": resp.state_hash,
|
|
295
|
+
"count": resp.count
|
|
296
|
+
}
|
|
297
|
+
except grpc.RpcError as e:
|
|
298
|
+
print(f"RPC Error: {e}")
|
|
299
|
+
return {}
|
|
300
|
+
|
|
301
|
+
def close(self):
|
|
302
|
+
self.channel.close()
|
|
303
|
+
|
|
304
|
+
def __enter__(self):
|
|
305
|
+
return self
|
|
306
|
+
|
|
307
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
308
|
+
self.close()
|