hermes-client-python 1.4.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hermes_client_python/__init__.py +14 -0
- hermes_client_python/client.py +560 -0
- hermes_client_python/hermes_pb2.py +108 -0
- hermes_client_python/hermes_pb2_grpc.py +485 -0
- hermes_client_python/types.py +48 -0
- hermes_client_python-1.4.11.dist-info/METADATA +232 -0
- hermes_client_python-1.4.11.dist-info/RECORD +8 -0
- hermes_client_python-1.4.11.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Async Python client for Hermes search server."""
|
|
2
|
+
|
|
3
|
+
from .client import HermesClient
|
|
4
|
+
from .types import Document, IndexInfo, SearchHit, SearchResponse
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"HermesClient",
|
|
8
|
+
"Document",
|
|
9
|
+
"SearchHit",
|
|
10
|
+
"SearchResponse",
|
|
11
|
+
"IndexInfo",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
__version__ = "1.0.2"
|
|
@@ -0,0 +1,560 @@
|
|
|
1
|
+
"""Async Hermes client implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections.abc import AsyncIterator
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import grpc
|
|
10
|
+
from grpc import aio
|
|
11
|
+
|
|
12
|
+
from . import hermes_pb2 as pb
|
|
13
|
+
from . import hermes_pb2_grpc as pb_grpc
|
|
14
|
+
from .types import Document, IndexInfo, SearchHit, SearchResponse
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class HermesClient:
|
|
18
|
+
"""Async client for Hermes search server.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
async with HermesClient("localhost:50051") as client:
|
|
22
|
+
# Create index
|
|
23
|
+
await client.create_index("articles", '''
|
|
24
|
+
index articles {
|
|
25
|
+
title: text indexed stored
|
|
26
|
+
body: text indexed stored
|
|
27
|
+
}
|
|
28
|
+
''')
|
|
29
|
+
|
|
30
|
+
# Index documents
|
|
31
|
+
await client.index_documents("articles", [
|
|
32
|
+
{"title": "Hello", "body": "World"},
|
|
33
|
+
{"title": "Foo", "body": "Bar"},
|
|
34
|
+
])
|
|
35
|
+
await client.commit("articles")
|
|
36
|
+
|
|
37
|
+
# Search
|
|
38
|
+
results = await client.search("articles", term=("title", "hello"))
|
|
39
|
+
for hit in results.hits:
|
|
40
|
+
print(hit.doc_id, hit.score)
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, address: str = "localhost:50051"):
|
|
44
|
+
"""Initialize client.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
address: Server address in format "host:port"
|
|
48
|
+
"""
|
|
49
|
+
self.address = address
|
|
50
|
+
self._channel: aio.Channel | None = None
|
|
51
|
+
self._index_stub: pb_grpc.IndexServiceStub | None = None
|
|
52
|
+
self._search_stub: pb_grpc.SearchServiceStub | None = None
|
|
53
|
+
|
|
54
|
+
async def connect(self) -> None:
|
|
55
|
+
"""Connect to the server."""
|
|
56
|
+
self._channel = aio.insecure_channel(self.address)
|
|
57
|
+
self._index_stub = pb_grpc.IndexServiceStub(self._channel)
|
|
58
|
+
self._search_stub = pb_grpc.SearchServiceStub(self._channel)
|
|
59
|
+
|
|
60
|
+
async def close(self) -> None:
|
|
61
|
+
"""Close the connection."""
|
|
62
|
+
if self._channel:
|
|
63
|
+
await self._channel.close()
|
|
64
|
+
self._channel = None
|
|
65
|
+
self._index_stub = None
|
|
66
|
+
self._search_stub = None
|
|
67
|
+
|
|
68
|
+
async def __aenter__(self) -> HermesClient:
|
|
69
|
+
await self.connect()
|
|
70
|
+
return self
|
|
71
|
+
|
|
72
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
73
|
+
await self.close()
|
|
74
|
+
|
|
75
|
+
def _ensure_connected(self) -> None:
|
|
76
|
+
if self._index_stub is None or self._search_stub is None:
|
|
77
|
+
raise RuntimeError(
|
|
78
|
+
"Client not connected. Use 'async with' or call connect() first."
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# =========================================================================
|
|
82
|
+
# Index Management
|
|
83
|
+
# =========================================================================
|
|
84
|
+
|
|
85
|
+
async def create_index(self, index_name: str, schema: str) -> bool:
|
|
86
|
+
"""Create a new index.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
index_name: Name of the index
|
|
90
|
+
schema: Schema definition in SDL or JSON format
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
True if successful
|
|
94
|
+
|
|
95
|
+
Example SDL schema:
|
|
96
|
+
index myindex {
|
|
97
|
+
title: text indexed stored
|
|
98
|
+
body: text indexed stored
|
|
99
|
+
score: f64 stored
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
Example JSON schema:
|
|
103
|
+
{
|
|
104
|
+
"fields": [
|
|
105
|
+
{"name": "title", "type": "text", "indexed": true, "stored": true}
|
|
106
|
+
]
|
|
107
|
+
}
|
|
108
|
+
"""
|
|
109
|
+
self._ensure_connected()
|
|
110
|
+
request = pb.CreateIndexRequest(index_name=index_name, schema=schema)
|
|
111
|
+
response = await self._index_stub.CreateIndex(request)
|
|
112
|
+
return response.success
|
|
113
|
+
|
|
114
|
+
async def delete_index(self, index_name: str) -> bool:
|
|
115
|
+
"""Delete an index.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
index_name: Name of the index to delete
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
True if successful
|
|
122
|
+
"""
|
|
123
|
+
self._ensure_connected()
|
|
124
|
+
request = pb.DeleteIndexRequest(index_name=index_name)
|
|
125
|
+
response = await self._index_stub.DeleteIndex(request)
|
|
126
|
+
return response.success
|
|
127
|
+
|
|
128
|
+
async def get_index_info(self, index_name: str) -> IndexInfo:
|
|
129
|
+
"""Get information about an index.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
index_name: Name of the index
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
IndexInfo with document count, segments, and schema
|
|
136
|
+
"""
|
|
137
|
+
self._ensure_connected()
|
|
138
|
+
request = pb.GetIndexInfoRequest(index_name=index_name)
|
|
139
|
+
response = await self._search_stub.GetIndexInfo(request)
|
|
140
|
+
return IndexInfo(
|
|
141
|
+
index_name=response.index_name,
|
|
142
|
+
num_docs=response.num_docs,
|
|
143
|
+
num_segments=response.num_segments,
|
|
144
|
+
schema=response.schema,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# =========================================================================
|
|
148
|
+
# Document Indexing
|
|
149
|
+
# =========================================================================
|
|
150
|
+
|
|
151
|
+
async def index_documents(
|
|
152
|
+
self, index_name: str, documents: list[dict[str, Any]]
|
|
153
|
+
) -> tuple[int, int]:
|
|
154
|
+
"""Index multiple documents in batch.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
index_name: Name of the index
|
|
158
|
+
documents: List of documents (dicts with field names as keys)
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Tuple of (indexed_count, error_count)
|
|
162
|
+
"""
|
|
163
|
+
self._ensure_connected()
|
|
164
|
+
|
|
165
|
+
named_docs = []
|
|
166
|
+
for doc in documents:
|
|
167
|
+
fields = _to_field_entries(doc)
|
|
168
|
+
named_docs.append(pb.NamedDocument(fields=fields))
|
|
169
|
+
|
|
170
|
+
request = pb.BatchIndexDocumentsRequest(
|
|
171
|
+
index_name=index_name, documents=named_docs
|
|
172
|
+
)
|
|
173
|
+
response = await self._index_stub.BatchIndexDocuments(request)
|
|
174
|
+
return response.indexed_count, response.error_count
|
|
175
|
+
|
|
176
|
+
async def index_document(self, index_name: str, document: dict[str, Any]) -> None:
|
|
177
|
+
"""Index a single document.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
index_name: Name of the index
|
|
181
|
+
document: Document as dict with field names as keys
|
|
182
|
+
"""
|
|
183
|
+
await self.index_documents(index_name, [document])
|
|
184
|
+
|
|
185
|
+
async def index_documents_stream(
|
|
186
|
+
self, index_name: str, documents: AsyncIterator[dict[str, Any]]
|
|
187
|
+
) -> int:
|
|
188
|
+
"""Stream documents for indexing.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
index_name: Name of the index
|
|
192
|
+
documents: Async iterator of documents
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Number of indexed documents
|
|
196
|
+
"""
|
|
197
|
+
self._ensure_connected()
|
|
198
|
+
|
|
199
|
+
async def request_iterator():
|
|
200
|
+
async for doc in documents:
|
|
201
|
+
fields = _to_field_entries(doc)
|
|
202
|
+
yield pb.IndexDocumentRequest(index_name=index_name, fields=fields)
|
|
203
|
+
|
|
204
|
+
response = await self._index_stub.IndexDocuments(request_iterator())
|
|
205
|
+
return response.indexed_count
|
|
206
|
+
|
|
207
|
+
async def commit(self, index_name: str) -> int:
|
|
208
|
+
"""Commit pending changes.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
index_name: Name of the index
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Total number of documents in the index
|
|
215
|
+
"""
|
|
216
|
+
self._ensure_connected()
|
|
217
|
+
request = pb.CommitRequest(index_name=index_name)
|
|
218
|
+
response = await self._index_stub.Commit(request)
|
|
219
|
+
return response.num_docs
|
|
220
|
+
|
|
221
|
+
async def force_merge(self, index_name: str) -> int:
|
|
222
|
+
"""Force merge all segments.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
index_name: Name of the index
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Number of segments after merge
|
|
229
|
+
"""
|
|
230
|
+
self._ensure_connected()
|
|
231
|
+
request = pb.ForceMergeRequest(index_name=index_name)
|
|
232
|
+
response = await self._index_stub.ForceMerge(request)
|
|
233
|
+
return response.num_segments
|
|
234
|
+
|
|
235
|
+
# =========================================================================
|
|
236
|
+
# Search
|
|
237
|
+
# =========================================================================
|
|
238
|
+
|
|
239
|
+
async def search(
|
|
240
|
+
self,
|
|
241
|
+
index_name: str,
|
|
242
|
+
*,
|
|
243
|
+
term: tuple[str, str] | None = None,
|
|
244
|
+
boolean: dict[str, list[tuple[str, str]]] | None = None,
|
|
245
|
+
sparse_vector: tuple[str, list[int], list[float]] | None = None,
|
|
246
|
+
sparse_text: tuple[str, str] | None = None,
|
|
247
|
+
dense_vector: tuple[str, list[float]] | None = None,
|
|
248
|
+
nprobe: int = 0,
|
|
249
|
+
rerank_factor: int = 0,
|
|
250
|
+
limit: int = 10,
|
|
251
|
+
offset: int = 0,
|
|
252
|
+
fields_to_load: list[str] | None = None,
|
|
253
|
+
) -> SearchResponse:
|
|
254
|
+
"""Search for documents.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
index_name: Name of the index
|
|
258
|
+
term: Term query as (field, term) tuple
|
|
259
|
+
boolean: Boolean query with "must", "should", "must_not" keys
|
|
260
|
+
sparse_vector: Sparse vector query as (field, indices, values) tuple
|
|
261
|
+
sparse_text: Sparse vector query with server-side tokenization as (field, text) tuple
|
|
262
|
+
dense_vector: Dense vector query as (field, vector) tuple
|
|
263
|
+
nprobe: Number of clusters to probe for dense vector (IVF indexes)
|
|
264
|
+
rerank_factor: Re-ranking factor for dense vector search
|
|
265
|
+
limit: Maximum number of results
|
|
266
|
+
offset: Offset for pagination
|
|
267
|
+
fields_to_load: List of fields to include in results
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
SearchResponse with hits
|
|
271
|
+
|
|
272
|
+
Examples:
|
|
273
|
+
# Term query
|
|
274
|
+
results = await client.search("articles", term=("title", "hello"))
|
|
275
|
+
|
|
276
|
+
# Boolean query
|
|
277
|
+
results = await client.search("articles", boolean={
|
|
278
|
+
"must": [("title", "hello")],
|
|
279
|
+
"should": [("body", "world")],
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
# Sparse vector query (pre-tokenized)
|
|
283
|
+
results = await client.search("docs",
|
|
284
|
+
sparse_vector=("embedding", [1, 5, 10], [0.5, 0.3, 0.2]),
|
|
285
|
+
fields_to_load=["title", "body"]
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# Sparse text query (server-side tokenization)
|
|
289
|
+
results = await client.search("docs",
|
|
290
|
+
sparse_text=("embedding", "what is machine learning?"),
|
|
291
|
+
fields_to_load=["title", "body"]
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Dense vector query
|
|
295
|
+
results = await client.search("docs",
|
|
296
|
+
dense_vector=("embedding", [0.1, 0.2, 0.3, ...]),
|
|
297
|
+
fields_to_load=["title"]
|
|
298
|
+
)
|
|
299
|
+
"""
|
|
300
|
+
self._ensure_connected()
|
|
301
|
+
|
|
302
|
+
query = _build_query(
|
|
303
|
+
term=term,
|
|
304
|
+
boolean=boolean,
|
|
305
|
+
sparse_vector=sparse_vector,
|
|
306
|
+
sparse_text=sparse_text,
|
|
307
|
+
dense_vector=dense_vector,
|
|
308
|
+
nprobe=nprobe,
|
|
309
|
+
rerank_factor=rerank_factor,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
request = pb.SearchRequest(
|
|
313
|
+
index_name=index_name,
|
|
314
|
+
query=query,
|
|
315
|
+
limit=limit,
|
|
316
|
+
offset=offset,
|
|
317
|
+
fields_to_load=fields_to_load or [],
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
response = await self._search_stub.Search(request)
|
|
321
|
+
|
|
322
|
+
hits = [
|
|
323
|
+
SearchHit(
|
|
324
|
+
doc_id=hit.doc_id,
|
|
325
|
+
score=hit.score,
|
|
326
|
+
fields={k: _from_field_value(v) for k, v in hit.fields.items()},
|
|
327
|
+
)
|
|
328
|
+
for hit in response.hits
|
|
329
|
+
]
|
|
330
|
+
|
|
331
|
+
return SearchResponse(
|
|
332
|
+
hits=hits,
|
|
333
|
+
total_hits=response.total_hits,
|
|
334
|
+
took_ms=response.took_ms,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
async def get_document(self, index_name: str, doc_id: int) -> Document | None:
|
|
338
|
+
"""Get a document by ID.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
index_name: Name of the index
|
|
342
|
+
doc_id: Document ID
|
|
343
|
+
|
|
344
|
+
Returns:
|
|
345
|
+
Document or None if not found
|
|
346
|
+
"""
|
|
347
|
+
self._ensure_connected()
|
|
348
|
+
request = pb.GetDocumentRequest(index_name=index_name, doc_id=doc_id)
|
|
349
|
+
try:
|
|
350
|
+
response = await self._search_stub.GetDocument(request)
|
|
351
|
+
fields = {k: _from_field_value(v) for k, v in response.fields.items()}
|
|
352
|
+
return Document(fields=fields)
|
|
353
|
+
except grpc.RpcError as e:
|
|
354
|
+
if e.code() == grpc.StatusCode.NOT_FOUND:
|
|
355
|
+
return None
|
|
356
|
+
raise
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
# =============================================================================
|
|
360
|
+
# Helper functions
|
|
361
|
+
# =============================================================================
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def _is_sparse_vector(value: list) -> bool:
|
|
365
|
+
"""Check if list is a sparse vector: list of (int, float) pairs."""
|
|
366
|
+
if not value:
|
|
367
|
+
return False
|
|
368
|
+
for item in value:
|
|
369
|
+
if not isinstance(item, (list, tuple)) or len(item) != 2:
|
|
370
|
+
return False
|
|
371
|
+
idx, val = item
|
|
372
|
+
if not isinstance(idx, int) or not isinstance(val, (int, float)):
|
|
373
|
+
return False
|
|
374
|
+
return True
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _is_multi_sparse_vector(value: list) -> bool:
|
|
378
|
+
"""Check if list is a multi-value sparse vector: list of sparse vectors."""
|
|
379
|
+
if not value:
|
|
380
|
+
return False
|
|
381
|
+
# All items must be lists and each must be a valid sparse vector
|
|
382
|
+
if not all(isinstance(item, list) for item in value):
|
|
383
|
+
return False
|
|
384
|
+
return all(_is_sparse_vector(item) for item in value)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def _is_dense_vector(value: list) -> bool:
|
|
388
|
+
"""Check if list is a dense vector: flat list of numeric values."""
|
|
389
|
+
if not value:
|
|
390
|
+
return False
|
|
391
|
+
return all(isinstance(v, (int, float)) and not isinstance(v, bool) for v in value)
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def _is_multi_dense_vector(value: list) -> bool:
|
|
395
|
+
"""Check if list is a multi-value dense vector: list of dense vectors."""
|
|
396
|
+
if not value:
|
|
397
|
+
return False
|
|
398
|
+
# All items must be lists and each must be a valid dense vector
|
|
399
|
+
if not all(isinstance(item, list) for item in value):
|
|
400
|
+
return False
|
|
401
|
+
return all(_is_dense_vector(item) for item in value)
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def _to_field_entries(doc: dict[str, Any]) -> list[pb.FieldEntry]:
|
|
405
|
+
"""Convert document dict to list of FieldEntry for multi-value field support.
|
|
406
|
+
|
|
407
|
+
Multi-value fields (list of sparse vectors or list of dense vectors) are
|
|
408
|
+
expanded into multiple FieldEntry with the same name.
|
|
409
|
+
"""
|
|
410
|
+
entries = []
|
|
411
|
+
for name, value in doc.items():
|
|
412
|
+
if isinstance(value, list):
|
|
413
|
+
# Check for multi-value sparse vectors: [[( idx, val), ...], ...]
|
|
414
|
+
if _is_multi_sparse_vector(value):
|
|
415
|
+
for sv in value:
|
|
416
|
+
indices = [int(item[0]) for item in sv]
|
|
417
|
+
values = [float(item[1]) for item in sv]
|
|
418
|
+
fv = pb.FieldValue(
|
|
419
|
+
sparse_vector=pb.SparseVector(indices=indices, values=values)
|
|
420
|
+
)
|
|
421
|
+
entries.append(pb.FieldEntry(name=name, value=fv))
|
|
422
|
+
continue
|
|
423
|
+
# Check for multi-value dense vectors: [[f1, f2, ...], ...]
|
|
424
|
+
if _is_multi_dense_vector(value):
|
|
425
|
+
for dv in value:
|
|
426
|
+
fv = pb.FieldValue(
|
|
427
|
+
dense_vector=pb.DenseVector(values=[float(v) for v in dv])
|
|
428
|
+
)
|
|
429
|
+
entries.append(pb.FieldEntry(name=name, value=fv))
|
|
430
|
+
continue
|
|
431
|
+
# Single value - use standard conversion
|
|
432
|
+
entries.append(pb.FieldEntry(name=name, value=_to_field_value(value)))
|
|
433
|
+
return entries
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def _to_field_value(value: Any) -> pb.FieldValue:
|
|
437
|
+
"""Convert Python value to protobuf FieldValue.
|
|
438
|
+
|
|
439
|
+
Special handling for vector types:
|
|
440
|
+
- list[(int, float)] -> SparseVector (list of (index, value) tuples)
|
|
441
|
+
- list[float] -> DenseVector (flat list of numeric values)
|
|
442
|
+
- Other lists/dicts -> JSON
|
|
443
|
+
"""
|
|
444
|
+
if isinstance(value, str):
|
|
445
|
+
return pb.FieldValue(text=value)
|
|
446
|
+
elif isinstance(value, bool):
|
|
447
|
+
return pb.FieldValue(u64=1 if value else 0)
|
|
448
|
+
elif isinstance(value, int):
|
|
449
|
+
if value >= 0:
|
|
450
|
+
return pb.FieldValue(u64=value)
|
|
451
|
+
else:
|
|
452
|
+
return pb.FieldValue(i64=value)
|
|
453
|
+
elif isinstance(value, float):
|
|
454
|
+
return pb.FieldValue(f64=value)
|
|
455
|
+
elif isinstance(value, bytes):
|
|
456
|
+
return pb.FieldValue(bytes_value=value)
|
|
457
|
+
elif isinstance(value, dict):
|
|
458
|
+
# Dicts are always JSON
|
|
459
|
+
return pb.FieldValue(json_value=json.dumps(value))
|
|
460
|
+
elif isinstance(value, list):
|
|
461
|
+
# Check if it's a sparse vector: list of (index, value) pairs
|
|
462
|
+
if _is_sparse_vector(value):
|
|
463
|
+
indices = [int(item[0]) for item in value]
|
|
464
|
+
values = [float(item[1]) for item in value]
|
|
465
|
+
return pb.FieldValue(
|
|
466
|
+
sparse_vector=pb.SparseVector(indices=indices, values=values)
|
|
467
|
+
)
|
|
468
|
+
# Check if it's a dense vector: flat list of numeric values
|
|
469
|
+
if _is_dense_vector(value):
|
|
470
|
+
return pb.FieldValue(
|
|
471
|
+
dense_vector=pb.DenseVector(values=[float(v) for v in value])
|
|
472
|
+
)
|
|
473
|
+
# Otherwise treat as JSON
|
|
474
|
+
return pb.FieldValue(json_value=json.dumps(value))
|
|
475
|
+
else:
|
|
476
|
+
return pb.FieldValue(text=str(value))
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def _from_field_value(fv: pb.FieldValue) -> Any:
|
|
480
|
+
"""Convert protobuf FieldValue to Python value."""
|
|
481
|
+
which = fv.WhichOneof("value")
|
|
482
|
+
if which == "text":
|
|
483
|
+
return fv.text
|
|
484
|
+
elif which == "u64":
|
|
485
|
+
return fv.u64
|
|
486
|
+
elif which == "i64":
|
|
487
|
+
return fv.i64
|
|
488
|
+
elif which == "f64":
|
|
489
|
+
return fv.f64
|
|
490
|
+
elif which == "bytes_value":
|
|
491
|
+
return fv.bytes_value
|
|
492
|
+
elif which == "json_value":
|
|
493
|
+
return json.loads(fv.json_value)
|
|
494
|
+
elif which == "sparse_vector":
|
|
495
|
+
return {
|
|
496
|
+
"indices": list(fv.sparse_vector.indices),
|
|
497
|
+
"values": list(fv.sparse_vector.values),
|
|
498
|
+
}
|
|
499
|
+
elif which == "dense_vector":
|
|
500
|
+
return list(fv.dense_vector.values)
|
|
501
|
+
return None
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def _build_query(
|
|
505
|
+
*,
|
|
506
|
+
term: tuple[str, str] | None = None,
|
|
507
|
+
boolean: dict[str, list[tuple[str, str]]] | None = None,
|
|
508
|
+
sparse_vector: tuple[str, list[int], list[float]] | None = None,
|
|
509
|
+
sparse_text: tuple[str, str] | None = None,
|
|
510
|
+
dense_vector: tuple[str, list[float]] | None = None,
|
|
511
|
+
nprobe: int = 0,
|
|
512
|
+
rerank_factor: int = 0,
|
|
513
|
+
) -> pb.Query:
|
|
514
|
+
"""Build a protobuf Query from parameters."""
|
|
515
|
+
if term is not None:
|
|
516
|
+
field, value = term
|
|
517
|
+
return pb.Query(term=pb.TermQuery(field=field, term=value))
|
|
518
|
+
|
|
519
|
+
if boolean is not None:
|
|
520
|
+
must = [
|
|
521
|
+
pb.Query(term=pb.TermQuery(field=f, term=t))
|
|
522
|
+
for f, t in boolean.get("must", [])
|
|
523
|
+
]
|
|
524
|
+
should = [
|
|
525
|
+
pb.Query(term=pb.TermQuery(field=f, term=t))
|
|
526
|
+
for f, t in boolean.get("should", [])
|
|
527
|
+
]
|
|
528
|
+
must_not = [
|
|
529
|
+
pb.Query(term=pb.TermQuery(field=f, term=t))
|
|
530
|
+
for f, t in boolean.get("must_not", [])
|
|
531
|
+
]
|
|
532
|
+
return pb.Query(
|
|
533
|
+
boolean=pb.BooleanQuery(must=must, should=should, must_not=must_not)
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
if sparse_vector is not None:
|
|
537
|
+
field, indices, values = sparse_vector
|
|
538
|
+
return pb.Query(
|
|
539
|
+
sparse_vector=pb.SparseVectorQuery(
|
|
540
|
+
field=field, indices=indices, values=values
|
|
541
|
+
)
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
if sparse_text is not None:
|
|
545
|
+
field, text = sparse_text
|
|
546
|
+
return pb.Query(sparse_vector=pb.SparseVectorQuery(field=field, text=text))
|
|
547
|
+
|
|
548
|
+
if dense_vector is not None:
|
|
549
|
+
field, vector = dense_vector
|
|
550
|
+
return pb.Query(
|
|
551
|
+
dense_vector=pb.DenseVectorQuery(
|
|
552
|
+
field=field,
|
|
553
|
+
vector=vector,
|
|
554
|
+
nprobe=nprobe,
|
|
555
|
+
rerank_factor=rerank_factor,
|
|
556
|
+
)
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
# Default: match all (empty boolean query)
|
|
560
|
+
return pb.Query(boolean=pb.BooleanQuery())
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# NO CHECKED-IN PROTOBUF GENCODE
|
|
4
|
+
# source: hermes.proto
|
|
5
|
+
# Protobuf Python Version: 6.31.1
|
|
6
|
+
"""Generated protocol buffer code."""
|
|
7
|
+
from google.protobuf import descriptor as _descriptor
|
|
8
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
9
|
+
from google.protobuf import runtime_version as _runtime_version
|
|
10
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
11
|
+
from google.protobuf.internal import builder as _builder
|
|
12
|
+
_runtime_version.ValidateProtobufRuntimeVersion(
|
|
13
|
+
_runtime_version.Domain.PUBLIC,
|
|
14
|
+
6,
|
|
15
|
+
31,
|
|
16
|
+
1,
|
|
17
|
+
'',
|
|
18
|
+
'hermes.proto'
|
|
19
|
+
)
|
|
20
|
+
# @@protoc_insertion_point(imports)
|
|
21
|
+
|
|
22
|
+
_sym_db = _symbol_database.Default()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0chermes.proto\x12\x06hermes\"\x88\x02\n\x05Query\x12!\n\x04term\x18\x01 \x01(\x0b\x32\x11.hermes.TermQueryH\x00\x12\'\n\x07\x62oolean\x18\x02 \x01(\x0b\x32\x14.hermes.BooleanQueryH\x00\x12#\n\x05\x62oost\x18\x03 \x01(\x0b\x32\x12.hermes.BoostQueryH\x00\x12\x1f\n\x03\x61ll\x18\x04 \x01(\x0b\x32\x10.hermes.AllQueryH\x00\x12\x32\n\rsparse_vector\x18\x05 \x01(\x0b\x32\x19.hermes.SparseVectorQueryH\x00\x12\x30\n\x0c\x64\x65nse_vector\x18\x06 \x01(\x0b\x32\x18.hermes.DenseVectorQueryH\x00\x42\x07\n\x05query\"Q\n\x11SparseVectorQuery\x12\r\n\x05\x66ield\x18\x01 \x01(\t\x12\x0f\n\x07indices\x18\x02 \x03(\r\x12\x0e\n\x06values\x18\x03 \x03(\x02\x12\x0c\n\x04text\x18\x04 \x01(\t\"X\n\x10\x44\x65nseVectorQuery\x12\r\n\x05\x66ield\x18\x01 \x01(\t\x12\x0e\n\x06vector\x18\x02 \x03(\x02\x12\x0e\n\x06nprobe\x18\x03 \x01(\r\x12\x15\n\rrerank_factor\x18\x04 \x01(\r\"(\n\tTermQuery\x12\r\n\x05\x66ield\x18\x01 \x01(\t\x12\x0c\n\x04term\x18\x02 \x01(\t\"k\n\x0c\x42ooleanQuery\x12\x1b\n\x04must\x18\x01 \x03(\x0b\x32\r.hermes.Query\x12\x1d\n\x06should\x18\x02 \x03(\x0b\x32\r.hermes.Query\x12\x1f\n\x08must_not\x18\x03 \x03(\x0b\x32\r.hermes.Query\"9\n\nBoostQuery\x12\x1c\n\x05query\x18\x01 \x01(\x0b\x32\r.hermes.Query\x12\r\n\x05\x62oost\x18\x02 \x01(\x02\"\n\n\x08\x41llQuery\"x\n\rSearchRequest\x12\x12\n\nindex_name\x18\x01 \x01(\t\x12\x1c\n\x05query\x18\x02 \x01(\x0b\x32\r.hermes.Query\x12\r\n\x05limit\x18\x03 \x01(\r\x12\x0e\n\x06offset\x18\x04 \x01(\r\x12\x16\n\x0e\x66ields_to_load\x18\x05 \x03(\t\"\x9c\x01\n\tSearchHit\x12\x0e\n\x06\x64oc_id\x18\x01 \x01(\r\x12\r\n\x05score\x18\x02 \x01(\x02\x12-\n\x06\x66ields\x18\x03 \x03(\x0b\x32\x1d.hermes.SearchHit.FieldsEntry\x1a\x41\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.hermes.FieldValue:\x02\x38\x01\"\xdb\x01\n\nFieldValue\x12\x0e\n\x04text\x18\x01 \x01(\tH\x00\x12\r\n\x03u64\x18\x02 \x01(\x04H\x00\x12\r\n\x03i64\x18\x03 \x01(\x03H\x00\x12\r\n\x03\x66\x36\x34\x18\x04 \x01(\x01H\x00\x12\x15\n\x0b\x62ytes_value\x18\x05 \x01(\x0cH\x00\x12-\n\rsparse_vector\x18\x06 \x01(\x0b\x32\x14.hermes.SparseVectorH\x00\x12+\n\x0c\x64\x65nse_vector\x18\x07 \x01(\x0b\x32\x13.hermes.DenseVectorH\x00\x12\x14\n\njson_value\x18\x08 \x01(\tH\x00\x42\x07\n\x05value\"/\n\x0cSparseVector\x12\x0f\n\x07indices\x18\x01 \x03(\r\x12\x0e\n\x06values\x18\x02 \x03(\x02\"\x1d\n\x0b\x44\x65nseVector\x12\x0e\n\x06values\x18\x01 \x03(\x02\"V\n\x0eSearchResponse\x12\x1f\n\x04hits\x18\x01 \x03(\x0b\x32\x11.hermes.SearchHit\x12\x12\n\ntotal_hits\x18\x02 \x01(\r\x12\x0f\n\x07took_ms\x18\x03 \x01(\x04\"8\n\x12GetDocumentRequest\x12\x12\n\nindex_name\x18\x01 \x01(\t\x12\x0e\n\x06\x64oc_id\x18\x02 \x01(\r\"\x91\x01\n\x13GetDocumentResponse\x12\x37\n\x06\x66ields\x18\x01 \x03(\x0b\x32\'.hermes.GetDocumentResponse.FieldsEntry\x1a\x41\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.hermes.FieldValue:\x02\x38\x01\")\n\x13GetIndexInfoRequest\x12\x12\n\nindex_name\x18\x01 \x01(\t\"b\n\x14GetIndexInfoResponse\x12\x12\n\nindex_name\x18\x01 \x01(\t\x12\x10\n\x08num_docs\x18\x02 \x01(\r\x12\x14\n\x0cnum_segments\x18\x03 \x01(\r\x12\x0e\n\x06schema\x18\x04 \x01(\t\"8\n\x12\x43reateIndexRequest\x12\x12\n\nindex_name\x18\x01 \x01(\t\x12\x0e\n\x06schema\x18\x02 \x01(\t\"&\n\x13\x43reateIndexResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"=\n\nFieldEntry\x12\x0c\n\x04name\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.hermes.FieldValue\"3\n\rNamedDocument\x12\"\n\x06\x66ields\x18\x01 \x03(\x0b\x32\x12.hermes.FieldEntry\"Z\n\x1a\x42\x61tchIndexDocumentsRequest\x12\x12\n\nindex_name\x18\x01 \x01(\t\x12(\n\tdocuments\x18\x02 \x03(\x0b\x32\x15.hermes.NamedDocument\"I\n\x1b\x42\x61tchIndexDocumentsResponse\x12\x15\n\rindexed_count\x18\x01 \x01(\r\x12\x13\n\x0b\x65rror_count\x18\x02 \x01(\r\"N\n\x14IndexDocumentRequest\x12\x12\n\nindex_name\x18\x01 \x01(\t\x12\"\n\x06\x66ields\x18\x02 \x03(\x0b\x32\x12.hermes.FieldEntry\"/\n\x16IndexDocumentsResponse\x12\x15\n\rindexed_count\x18\x01 \x01(\r\"#\n\rCommitRequest\x12\x12\n\nindex_name\x18\x01 \x01(\t\"3\n\x0e\x43ommitResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x10\n\x08num_docs\x18\x02 \x01(\r\"\'\n\x11\x46orceMergeRequest\x12\x12\n\nindex_name\x18\x01 \x01(\t\";\n\x12\x46orceMergeResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x14\n\x0cnum_segments\x18\x02 \x01(\r\"(\n\x12\x44\x65leteIndexRequest\x12\x12\n\nindex_name\x18\x01 \x01(\t\"&\n\x13\x44\x65leteIndexResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x32\xdb\x01\n\rSearchService\x12\x37\n\x06Search\x12\x15.hermes.SearchRequest\x1a\x16.hermes.SearchResponse\x12\x46\n\x0bGetDocument\x12\x1a.hermes.GetDocumentRequest\x1a\x1b.hermes.GetDocumentResponse\x12I\n\x0cGetIndexInfo\x12\x1b.hermes.GetIndexInfoRequest\x1a\x1c.hermes.GetIndexInfoResponse2\xce\x03\n\x0cIndexService\x12\x46\n\x0b\x43reateIndex\x12\x1a.hermes.CreateIndexRequest\x1a\x1b.hermes.CreateIndexResponse\x12P\n\x0eIndexDocuments\x12\x1c.hermes.IndexDocumentRequest\x1a\x1e.hermes.IndexDocumentsResponse(\x01\x12^\n\x13\x42\x61tchIndexDocuments\x12\".hermes.BatchIndexDocumentsRequest\x1a#.hermes.BatchIndexDocumentsResponse\x12\x37\n\x06\x43ommit\x12\x15.hermes.CommitRequest\x1a\x16.hermes.CommitResponse\x12\x43\n\nForceMerge\x12\x19.hermes.ForceMergeRequest\x1a\x1a.hermes.ForceMergeResponse\x12\x46\n\x0b\x44\x65leteIndex\x12\x1a.hermes.DeleteIndexRequest\x1a\x1b.hermes.DeleteIndexResponseb\x06proto3')
|
|
28
|
+
|
|
29
|
+
_globals = globals()
|
|
30
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
31
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'hermes_pb2', _globals)
|
|
32
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
|
33
|
+
DESCRIPTOR._loaded_options = None
|
|
34
|
+
_globals['_SEARCHHIT_FIELDSENTRY']._loaded_options = None
|
|
35
|
+
_globals['_SEARCHHIT_FIELDSENTRY']._serialized_options = b'8\001'
|
|
36
|
+
_globals['_GETDOCUMENTRESPONSE_FIELDSENTRY']._loaded_options = None
|
|
37
|
+
_globals['_GETDOCUMENTRESPONSE_FIELDSENTRY']._serialized_options = b'8\001'
|
|
38
|
+
_globals['_QUERY']._serialized_start=25
|
|
39
|
+
_globals['_QUERY']._serialized_end=289
|
|
40
|
+
_globals['_SPARSEVECTORQUERY']._serialized_start=291
|
|
41
|
+
_globals['_SPARSEVECTORQUERY']._serialized_end=372
|
|
42
|
+
_globals['_DENSEVECTORQUERY']._serialized_start=374
|
|
43
|
+
_globals['_DENSEVECTORQUERY']._serialized_end=462
|
|
44
|
+
_globals['_TERMQUERY']._serialized_start=464
|
|
45
|
+
_globals['_TERMQUERY']._serialized_end=504
|
|
46
|
+
_globals['_BOOLEANQUERY']._serialized_start=506
|
|
47
|
+
_globals['_BOOLEANQUERY']._serialized_end=613
|
|
48
|
+
_globals['_BOOSTQUERY']._serialized_start=615
|
|
49
|
+
_globals['_BOOSTQUERY']._serialized_end=672
|
|
50
|
+
_globals['_ALLQUERY']._serialized_start=674
|
|
51
|
+
_globals['_ALLQUERY']._serialized_end=684
|
|
52
|
+
_globals['_SEARCHREQUEST']._serialized_start=686
|
|
53
|
+
_globals['_SEARCHREQUEST']._serialized_end=806
|
|
54
|
+
_globals['_SEARCHHIT']._serialized_start=809
|
|
55
|
+
_globals['_SEARCHHIT']._serialized_end=965
|
|
56
|
+
_globals['_SEARCHHIT_FIELDSENTRY']._serialized_start=900
|
|
57
|
+
_globals['_SEARCHHIT_FIELDSENTRY']._serialized_end=965
|
|
58
|
+
_globals['_FIELDVALUE']._serialized_start=968
|
|
59
|
+
_globals['_FIELDVALUE']._serialized_end=1187
|
|
60
|
+
_globals['_SPARSEVECTOR']._serialized_start=1189
|
|
61
|
+
_globals['_SPARSEVECTOR']._serialized_end=1236
|
|
62
|
+
_globals['_DENSEVECTOR']._serialized_start=1238
|
|
63
|
+
_globals['_DENSEVECTOR']._serialized_end=1267
|
|
64
|
+
_globals['_SEARCHRESPONSE']._serialized_start=1269
|
|
65
|
+
_globals['_SEARCHRESPONSE']._serialized_end=1355
|
|
66
|
+
_globals['_GETDOCUMENTREQUEST']._serialized_start=1357
|
|
67
|
+
_globals['_GETDOCUMENTREQUEST']._serialized_end=1413
|
|
68
|
+
_globals['_GETDOCUMENTRESPONSE']._serialized_start=1416
|
|
69
|
+
_globals['_GETDOCUMENTRESPONSE']._serialized_end=1561
|
|
70
|
+
_globals['_GETDOCUMENTRESPONSE_FIELDSENTRY']._serialized_start=900
|
|
71
|
+
_globals['_GETDOCUMENTRESPONSE_FIELDSENTRY']._serialized_end=965
|
|
72
|
+
_globals['_GETINDEXINFOREQUEST']._serialized_start=1563
|
|
73
|
+
_globals['_GETINDEXINFOREQUEST']._serialized_end=1604
|
|
74
|
+
_globals['_GETINDEXINFORESPONSE']._serialized_start=1606
|
|
75
|
+
_globals['_GETINDEXINFORESPONSE']._serialized_end=1704
|
|
76
|
+
_globals['_CREATEINDEXREQUEST']._serialized_start=1706
|
|
77
|
+
_globals['_CREATEINDEXREQUEST']._serialized_end=1762
|
|
78
|
+
_globals['_CREATEINDEXRESPONSE']._serialized_start=1764
|
|
79
|
+
_globals['_CREATEINDEXRESPONSE']._serialized_end=1802
|
|
80
|
+
_globals['_FIELDENTRY']._serialized_start=1804
|
|
81
|
+
_globals['_FIELDENTRY']._serialized_end=1865
|
|
82
|
+
_globals['_NAMEDDOCUMENT']._serialized_start=1867
|
|
83
|
+
_globals['_NAMEDDOCUMENT']._serialized_end=1918
|
|
84
|
+
_globals['_BATCHINDEXDOCUMENTSREQUEST']._serialized_start=1920
|
|
85
|
+
_globals['_BATCHINDEXDOCUMENTSREQUEST']._serialized_end=2010
|
|
86
|
+
_globals['_BATCHINDEXDOCUMENTSRESPONSE']._serialized_start=2012
|
|
87
|
+
_globals['_BATCHINDEXDOCUMENTSRESPONSE']._serialized_end=2085
|
|
88
|
+
_globals['_INDEXDOCUMENTREQUEST']._serialized_start=2087
|
|
89
|
+
_globals['_INDEXDOCUMENTREQUEST']._serialized_end=2165
|
|
90
|
+
_globals['_INDEXDOCUMENTSRESPONSE']._serialized_start=2167
|
|
91
|
+
_globals['_INDEXDOCUMENTSRESPONSE']._serialized_end=2214
|
|
92
|
+
_globals['_COMMITREQUEST']._serialized_start=2216
|
|
93
|
+
_globals['_COMMITREQUEST']._serialized_end=2251
|
|
94
|
+
_globals['_COMMITRESPONSE']._serialized_start=2253
|
|
95
|
+
_globals['_COMMITRESPONSE']._serialized_end=2304
|
|
96
|
+
_globals['_FORCEMERGEREQUEST']._serialized_start=2306
|
|
97
|
+
_globals['_FORCEMERGEREQUEST']._serialized_end=2345
|
|
98
|
+
_globals['_FORCEMERGERESPONSE']._serialized_start=2347
|
|
99
|
+
_globals['_FORCEMERGERESPONSE']._serialized_end=2406
|
|
100
|
+
_globals['_DELETEINDEXREQUEST']._serialized_start=2408
|
|
101
|
+
_globals['_DELETEINDEXREQUEST']._serialized_end=2448
|
|
102
|
+
_globals['_DELETEINDEXRESPONSE']._serialized_start=2450
|
|
103
|
+
_globals['_DELETEINDEXRESPONSE']._serialized_end=2488
|
|
104
|
+
_globals['_SEARCHSERVICE']._serialized_start=2491
|
|
105
|
+
_globals['_SEARCHSERVICE']._serialized_end=2710
|
|
106
|
+
_globals['_INDEXSERVICE']._serialized_start=2713
|
|
107
|
+
_globals['_INDEXSERVICE']._serialized_end=3175
|
|
108
|
+
# @@protoc_insertion_point(module_scope)
|