hermes-client-python 1.7.14__tar.gz → 1.7.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hermes_client_python-1.7.14 → hermes_client_python-1.7.15}/PKG-INFO +1 -1
- {hermes_client_python-1.7.14 → hermes_client_python-1.7.15}/pyproject.toml +1 -1
- hermes_client_python-1.7.15/src/hermes_client_python/__init__.py +45 -0
- {hermes_client_python-1.7.14 → hermes_client_python-1.7.15}/src/hermes_client_python/client.py +180 -197
- hermes_client_python-1.7.15/src/hermes_client_python/types.py +185 -0
- hermes_client_python-1.7.14/src/hermes_client_python/__init__.py +0 -23
- hermes_client_python-1.7.14/src/hermes_client_python/types.py +0 -78
- {hermes_client_python-1.7.14 → hermes_client_python-1.7.15}/.gitignore +0 -0
- {hermes_client_python-1.7.14 → hermes_client_python-1.7.15}/README.md +0 -0
- {hermes_client_python-1.7.14 → hermes_client_python-1.7.15}/src/hermes_client_python/hermes_pb2.py +0 -0
- {hermes_client_python-1.7.14 → hermes_client_python-1.7.15}/src/hermes_client_python/hermes_pb2_grpc.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hermes-client-python
|
|
3
|
-
Version: 1.7.
|
|
3
|
+
Version: 1.7.15
|
|
4
4
|
Summary: Async Python client for Hermes search server
|
|
5
5
|
Project-URL: Homepage, https://github.com/SpaceFrontiers/hermes
|
|
6
6
|
Project-URL: Repository, https://github.com/SpaceFrontiers/hermes
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Async Python client for Hermes search server."""
|
|
2
|
+
|
|
3
|
+
from .client import HermesClient
|
|
4
|
+
from .types import (
|
|
5
|
+
AllQuery,
|
|
6
|
+
BooleanQuery,
|
|
7
|
+
BoostQuery,
|
|
8
|
+
Combiner,
|
|
9
|
+
DenseVectorQuery,
|
|
10
|
+
DocAddress,
|
|
11
|
+
Document,
|
|
12
|
+
Filter,
|
|
13
|
+
IndexInfo,
|
|
14
|
+
MatchQuery,
|
|
15
|
+
Reranker,
|
|
16
|
+
SearchHit,
|
|
17
|
+
SearchResponse,
|
|
18
|
+
SearchTimings,
|
|
19
|
+
SparseVectorQuery,
|
|
20
|
+
TermQuery,
|
|
21
|
+
VectorFieldStats,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"HermesClient",
|
|
26
|
+
"AllQuery",
|
|
27
|
+
"BooleanQuery",
|
|
28
|
+
"BoostQuery",
|
|
29
|
+
"Combiner",
|
|
30
|
+
"DenseVectorQuery",
|
|
31
|
+
"DocAddress",
|
|
32
|
+
"Document",
|
|
33
|
+
"Filter",
|
|
34
|
+
"IndexInfo",
|
|
35
|
+
"MatchQuery",
|
|
36
|
+
"Reranker",
|
|
37
|
+
"SearchHit",
|
|
38
|
+
"SearchResponse",
|
|
39
|
+
"SearchTimings",
|
|
40
|
+
"SparseVectorQuery",
|
|
41
|
+
"TermQuery",
|
|
42
|
+
"VectorFieldStats",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
__version__ = "1.0.2"
|
{hermes_client_python-1.7.14 → hermes_client_python-1.7.15}/src/hermes_client_python/client.py
RENAMED
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
"""Async Hermes client implementation.
|
|
1
|
+
"""Async Hermes client implementation.
|
|
2
|
+
|
|
3
|
+
All search types mirror the proto API structure exactly.
|
|
4
|
+
See types.py for Query, Reranker, Filter definitions.
|
|
5
|
+
"""
|
|
2
6
|
|
|
3
7
|
from __future__ import annotations
|
|
4
8
|
|
|
@@ -25,6 +29,8 @@ from .types import (
|
|
|
25
29
|
class HermesClient:
|
|
26
30
|
"""Async client for Hermes search server.
|
|
27
31
|
|
|
32
|
+
All search types mirror the proto API structure exactly.
|
|
33
|
+
|
|
28
34
|
Example:
|
|
29
35
|
async with HermesClient("localhost:50051") as client:
|
|
30
36
|
# Create index
|
|
@@ -43,9 +49,10 @@ class HermesClient:
|
|
|
43
49
|
await client.commit("articles")
|
|
44
50
|
|
|
45
51
|
# Search
|
|
46
|
-
results = await client.search("articles",
|
|
52
|
+
results = await client.search("articles",
|
|
53
|
+
query={"match": {"field": "title", "text": "hello"}})
|
|
47
54
|
for hit in results.hits:
|
|
48
|
-
print(hit.
|
|
55
|
+
print(hit.address, hit.score)
|
|
49
56
|
"""
|
|
50
57
|
|
|
51
58
|
def __init__(self, address: str = "localhost:50051"):
|
|
@@ -297,124 +304,95 @@ class HermesClient:
|
|
|
297
304
|
self,
|
|
298
305
|
index_name: str,
|
|
299
306
|
*,
|
|
300
|
-
|
|
301
|
-
match: tuple[str, str] | None = None,
|
|
302
|
-
boolean: dict[str, list[tuple[str, str]]] | None = None,
|
|
303
|
-
sparse_vector: tuple[str, list[int], list[float]] | None = None,
|
|
304
|
-
sparse_text: tuple[str, str] | None = None,
|
|
305
|
-
dense_vector: tuple[str, list[float]] | None = None,
|
|
306
|
-
nprobe: int = 0,
|
|
307
|
-
rerank_factor: int = 0,
|
|
308
|
-
heap_factor: float = 1.0,
|
|
309
|
-
combiner: str = "sum",
|
|
307
|
+
query: dict[str, Any],
|
|
310
308
|
limit: int = 10,
|
|
311
309
|
offset: int = 0,
|
|
312
310
|
fields_to_load: list[str] | None = None,
|
|
313
|
-
reranker:
|
|
314
|
-
reranker_combiner: str = "weighted_top_k",
|
|
315
|
-
matryoshka_dims: int = 0,
|
|
311
|
+
reranker: dict[str, Any] | None = None,
|
|
316
312
|
filters: list[dict[str, Any]] | None = None,
|
|
317
313
|
) -> SearchResponse:
|
|
318
314
|
"""Search for documents.
|
|
319
315
|
|
|
316
|
+
All parameters mirror the proto SearchRequest structure exactly.
|
|
317
|
+
``query`` is a dict with exactly one key matching the proto Query oneof.
|
|
318
|
+
|
|
320
319
|
Args:
|
|
321
320
|
index_name: Name of the index
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
and searches as OR of individual tokens (use for natural language)
|
|
325
|
-
boolean: Boolean query with "must", "should", "must_not" keys
|
|
326
|
-
sparse_vector: Sparse vector query as (field, indices, values) tuple
|
|
327
|
-
sparse_text: Sparse vector query with server-side tokenization as (field, text) tuple
|
|
328
|
-
dense_vector: Dense vector query as (field, vector) tuple
|
|
329
|
-
nprobe: Number of clusters to probe for dense vector (IVF indexes)
|
|
330
|
-
rerank_factor: Re-ranking factor for dense vector search
|
|
331
|
-
heap_factor: Approximate search factor for sparse vectors (1.0=exact, 0.8=faster)
|
|
332
|
-
combiner: Score combiner for multi-value fields: "sum", "max", or "avg"
|
|
321
|
+
query: Query dict with one key: "term", "match", "boolean",
|
|
322
|
+
"sparse_vector", "dense_vector", "boost", or "all".
|
|
333
323
|
limit: Maximum number of results
|
|
334
324
|
offset: Offset for pagination
|
|
335
325
|
fields_to_load: List of fields to include in results
|
|
336
|
-
reranker:
|
|
337
|
-
|
|
338
|
-
l1_limit=0 defaults to 10x the final limit.
|
|
339
|
-
reranker_combiner: Score combiner for reranker multi-value fields:
|
|
340
|
-
"log_sum_exp" (default), "max", "avg", "sum"
|
|
341
|
-
matryoshka_dims: Matryoshka pre-filter dimensions for reranker.
|
|
342
|
-
When > 0, scores candidates on leading N dimensions first,
|
|
343
|
-
keeps top survivors, then does full-dimension exact scoring.
|
|
344
|
-
0 = disabled (default).
|
|
345
|
-
filters: Fast-field filters. Each dict has "field" and one condition key:
|
|
346
|
-
- {"field": "status", "eq_text": "active"}
|
|
347
|
-
- {"field": "price", "eq_u64": 100}
|
|
348
|
-
- {"field": "price", "range": {"min": 10.0, "max": 100.0}}
|
|
349
|
-
- {"field": "category", "in_text": ["books", "movies"]}
|
|
350
|
-
- {"field": "count", "in_u64": [1, 2, 3]}
|
|
326
|
+
reranker: Reranker dict matching proto Reranker message
|
|
327
|
+
filters: List of filter dicts matching proto Filter message
|
|
351
328
|
|
|
352
329
|
Returns:
|
|
353
330
|
SearchResponse with hits
|
|
354
331
|
|
|
355
332
|
Examples:
|
|
356
333
|
# Term query (exact single token)
|
|
357
|
-
results = await client.search("articles",
|
|
334
|
+
results = await client.search("articles",
|
|
335
|
+
query={"term": {"field": "title", "term": "hello"}})
|
|
358
336
|
|
|
359
337
|
# Match query (full-text, tokenized server-side)
|
|
360
|
-
results = await client.search("articles",
|
|
338
|
+
results = await client.search("articles",
|
|
339
|
+
query={"match": {"field": "title", "text": "what is hemoglobin"}})
|
|
361
340
|
|
|
362
341
|
# Boolean query
|
|
363
|
-
results = await client.search("articles",
|
|
364
|
-
"
|
|
365
|
-
|
|
366
|
-
|
|
342
|
+
results = await client.search("articles",
|
|
343
|
+
query={"boolean": {
|
|
344
|
+
"must": [{"match": {"field": "title", "text": "hello"}}],
|
|
345
|
+
"should": [{"match": {"field": "body", "text": "world"}}],
|
|
346
|
+
}})
|
|
367
347
|
|
|
368
|
-
# Sparse
|
|
348
|
+
# Sparse text query (server-side tokenization) with pruning
|
|
369
349
|
results = await client.search("docs",
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
350
|
+
query={"sparse_vector": {
|
|
351
|
+
"field": "embedding",
|
|
352
|
+
"text": "machine learning",
|
|
353
|
+
"pruning": 0.5,
|
|
354
|
+
}},
|
|
355
|
+
fields_to_load=["title", "body"])
|
|
356
|
+
|
|
357
|
+
# Sparse vector query (pre-computed)
|
|
375
358
|
results = await client.search("docs",
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
359
|
+
query={"sparse_vector": {
|
|
360
|
+
"field": "embedding",
|
|
361
|
+
"indices": [1, 5, 10],
|
|
362
|
+
"values": [0.5, 0.3, 0.2],
|
|
363
|
+
}})
|
|
379
364
|
|
|
380
|
-
# Dense vector query
|
|
365
|
+
# Dense vector query with reranker
|
|
381
366
|
results = await client.search("docs",
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
367
|
+
query={"dense_vector": {
|
|
368
|
+
"field": "embedding",
|
|
369
|
+
"vector": [0.1, 0.2, 0.3],
|
|
370
|
+
"nprobe": 10,
|
|
371
|
+
}},
|
|
372
|
+
reranker={
|
|
373
|
+
"field": "embedding",
|
|
374
|
+
"vector": [0.1, 0.2, 0.3],
|
|
375
|
+
"limit": 100,
|
|
376
|
+
},
|
|
377
|
+
fields_to_load=["title"])
|
|
378
|
+
|
|
379
|
+
# Filters
|
|
380
|
+
results = await client.search("docs",
|
|
381
|
+
query={"match": {"field": "title", "text": "hello"}},
|
|
382
|
+
filters=[
|
|
383
|
+
{"field": "status", "eq_text": "active"},
|
|
384
|
+
{"field": "price", "range": {"min": 10.0, "max": 100.0}},
|
|
385
|
+
])
|
|
385
386
|
"""
|
|
386
387
|
self._ensure_connected()
|
|
387
388
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
boolean=boolean,
|
|
392
|
-
sparse_vector=sparse_vector,
|
|
393
|
-
sparse_text=sparse_text,
|
|
394
|
-
dense_vector=dense_vector,
|
|
395
|
-
nprobe=nprobe,
|
|
396
|
-
rerank_factor=rerank_factor,
|
|
397
|
-
heap_factor=heap_factor,
|
|
398
|
-
combiner=combiner,
|
|
399
|
-
)
|
|
400
|
-
|
|
401
|
-
pb_reranker = None
|
|
402
|
-
if reranker is not None:
|
|
403
|
-
field_name, query_vector, l1_limit = reranker
|
|
404
|
-
reranker_combiner_value = _reranker_combiner_to_proto(reranker_combiner)
|
|
405
|
-
pb_reranker = pb.Reranker(
|
|
406
|
-
field=field_name,
|
|
407
|
-
vector=query_vector,
|
|
408
|
-
limit=l1_limit,
|
|
409
|
-
combiner=reranker_combiner_value,
|
|
410
|
-
matryoshka_dims=matryoshka_dims,
|
|
411
|
-
)
|
|
412
|
-
|
|
413
|
-
pb_filters = _build_filters(filters) if filters else []
|
|
389
|
+
pb_query = _build_query(query)
|
|
390
|
+
pb_reranker = _build_reranker(reranker) if reranker else None
|
|
391
|
+
pb_filters = [_build_filter(f) for f in filters] if filters else []
|
|
414
392
|
|
|
415
393
|
request = pb.SearchRequest(
|
|
416
394
|
index_name=index_name,
|
|
417
|
-
query=
|
|
395
|
+
query=pb_query,
|
|
418
396
|
limit=limit,
|
|
419
397
|
offset=offset,
|
|
420
398
|
fields_to_load=fields_to_load or [],
|
|
@@ -625,129 +603,134 @@ def _from_field_value(fv: pb.FieldValue) -> Any:
|
|
|
625
603
|
return None
|
|
626
604
|
|
|
627
605
|
|
|
628
|
-
|
|
629
|
-
""
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
def _build_filters(filters: list[dict[str, Any]]) -> list[pb.Filter]:
|
|
645
|
-
"""Convert filter dicts to protobuf Filter messages."""
|
|
646
|
-
result = []
|
|
647
|
-
for f in filters:
|
|
648
|
-
field = f["field"]
|
|
649
|
-
kwargs: dict[str, Any] = {"field": field}
|
|
650
|
-
if "eq_u64" in f:
|
|
651
|
-
kwargs["eq_u64"] = int(f["eq_u64"])
|
|
652
|
-
elif "eq_i64" in f:
|
|
653
|
-
kwargs["eq_i64"] = int(f["eq_i64"])
|
|
654
|
-
elif "eq_f64" in f:
|
|
655
|
-
kwargs["eq_f64"] = float(f["eq_f64"])
|
|
656
|
-
elif "eq_text" in f:
|
|
657
|
-
kwargs["eq_text"] = str(f["eq_text"])
|
|
658
|
-
elif "range" in f:
|
|
659
|
-
r = f["range"]
|
|
660
|
-
range_kwargs = {}
|
|
661
|
-
if "min" in r:
|
|
662
|
-
range_kwargs["min"] = float(r["min"])
|
|
663
|
-
if "max" in r:
|
|
664
|
-
range_kwargs["max"] = float(r["max"])
|
|
665
|
-
kwargs["range"] = pb.RangeFilter(**range_kwargs)
|
|
666
|
-
elif "in_text" in f:
|
|
667
|
-
kwargs["in_values"] = pb.InFilter(text_values=list(f["in_text"]))
|
|
668
|
-
elif "in_u64" in f:
|
|
669
|
-
kwargs["in_values"] = pb.InFilter(u64_values=[int(v) for v in f["in_u64"]])
|
|
670
|
-
elif "in_i64" in f:
|
|
671
|
-
kwargs["in_values"] = pb.InFilter(i64_values=[int(v) for v in f["in_i64"]])
|
|
672
|
-
result.append(pb.Filter(**kwargs))
|
|
673
|
-
return result
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
def _build_query(
|
|
677
|
-
*,
|
|
678
|
-
term: tuple[str, str] | None = None,
|
|
679
|
-
match: tuple[str, str] | None = None,
|
|
680
|
-
boolean: dict[str, list[tuple[str, str]]] | None = None,
|
|
681
|
-
sparse_vector: tuple[str, list[int], list[float]] | None = None,
|
|
682
|
-
sparse_text: tuple[str, str] | None = None,
|
|
683
|
-
dense_vector: tuple[str, list[float]] | None = None,
|
|
684
|
-
nprobe: int = 0,
|
|
685
|
-
rerank_factor: int = 0,
|
|
686
|
-
heap_factor: float = 1.0,
|
|
687
|
-
combiner: str = "sum",
|
|
688
|
-
) -> pb.Query:
|
|
689
|
-
"""Build a protobuf Query from parameters."""
|
|
690
|
-
if term is not None:
|
|
691
|
-
field, value = term
|
|
692
|
-
return pb.Query(term=pb.TermQuery(field=field, term=value))
|
|
693
|
-
|
|
694
|
-
if match is not None:
|
|
695
|
-
field, text = match
|
|
696
|
-
return pb.Query(match=pb.MatchQuery(field=field, text=text))
|
|
697
|
-
|
|
698
|
-
if boolean is not None:
|
|
699
|
-
must = [
|
|
700
|
-
pb.Query(match=pb.MatchQuery(field=f, text=t))
|
|
701
|
-
for f, t in boolean.get("must", [])
|
|
702
|
-
]
|
|
703
|
-
should = [
|
|
704
|
-
pb.Query(match=pb.MatchQuery(field=f, text=t))
|
|
705
|
-
for f, t in boolean.get("should", [])
|
|
706
|
-
]
|
|
707
|
-
must_not = [
|
|
708
|
-
pb.Query(match=pb.MatchQuery(field=f, text=t))
|
|
709
|
-
for f, t in boolean.get("must_not", [])
|
|
710
|
-
]
|
|
711
|
-
return pb.Query(
|
|
712
|
-
boolean=pb.BooleanQuery(must=must, should=should, must_not=must_not)
|
|
713
|
-
)
|
|
606
|
+
_COMBINER_MAP: dict[str, int] = {
|
|
607
|
+
"log_sum_exp": 0,
|
|
608
|
+
"max": 1,
|
|
609
|
+
"avg": 2,
|
|
610
|
+
"sum": 3,
|
|
611
|
+
"weighted_top_k": 4,
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def _combiner_to_proto(combiner: str | None) -> int:
|
|
616
|
+
"""Convert combiner string to proto MultiValueCombiner enum value."""
|
|
617
|
+
if combiner is None:
|
|
618
|
+
return 0 # LOG_SUM_EXP default
|
|
619
|
+
return _COMBINER_MAP.get(combiner.lower(), 0)
|
|
620
|
+
|
|
714
621
|
|
|
715
|
-
|
|
622
|
+
def _build_query(q: dict[str, Any]) -> pb.Query:
|
|
623
|
+
"""Recursively convert a Query dict to protobuf Query.
|
|
716
624
|
|
|
717
|
-
|
|
718
|
-
|
|
625
|
+
The dict must have exactly one key matching the proto Query oneof:
|
|
626
|
+
"term", "match", "boolean", "sparse_vector", "dense_vector", "boost", "all".
|
|
627
|
+
"""
|
|
628
|
+
if "term" in q:
|
|
629
|
+
t = q["term"]
|
|
630
|
+
return pb.Query(term=pb.TermQuery(field=t["field"], term=t["term"]))
|
|
631
|
+
|
|
632
|
+
if "match" in q:
|
|
633
|
+
m = q["match"]
|
|
634
|
+
return pb.Query(match=pb.MatchQuery(field=m["field"], text=m["text"]))
|
|
635
|
+
|
|
636
|
+
if "boolean" in q:
|
|
637
|
+
b = q["boolean"]
|
|
719
638
|
return pb.Query(
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
combiner=combiner_value,
|
|
725
|
-
heap_factor=heap_factor,
|
|
639
|
+
boolean=pb.BooleanQuery(
|
|
640
|
+
must=[_build_query(sq) for sq in b.get("must", [])],
|
|
641
|
+
should=[_build_query(sq) for sq in b.get("should", [])],
|
|
642
|
+
must_not=[_build_query(sq) for sq in b.get("must_not", [])],
|
|
726
643
|
)
|
|
727
644
|
)
|
|
728
645
|
|
|
729
|
-
if
|
|
730
|
-
|
|
646
|
+
if "sparse_vector" in q:
|
|
647
|
+
sv = q["sparse_vector"]
|
|
731
648
|
return pb.Query(
|
|
732
649
|
sparse_vector=pb.SparseVectorQuery(
|
|
733
|
-
field=field,
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
650
|
+
field=sv["field"],
|
|
651
|
+
indices=sv.get("indices", []),
|
|
652
|
+
values=sv.get("values", []),
|
|
653
|
+
text=sv.get("text", ""),
|
|
654
|
+
combiner=_combiner_to_proto(sv.get("combiner")),
|
|
655
|
+
heap_factor=sv.get("heap_factor", 0),
|
|
656
|
+
combiner_temperature=sv.get("combiner_temperature", 0),
|
|
657
|
+
combiner_top_k=sv.get("combiner_top_k", 0),
|
|
658
|
+
combiner_decay=sv.get("combiner_decay", 0),
|
|
659
|
+
weight_threshold=sv.get("weight_threshold", 0),
|
|
660
|
+
max_query_dims=sv.get("max_query_dims", 0),
|
|
661
|
+
pruning=sv.get("pruning", 0),
|
|
737
662
|
)
|
|
738
663
|
)
|
|
739
664
|
|
|
740
|
-
if dense_vector
|
|
741
|
-
|
|
665
|
+
if "dense_vector" in q:
|
|
666
|
+
dv = q["dense_vector"]
|
|
742
667
|
return pb.Query(
|
|
743
668
|
dense_vector=pb.DenseVectorQuery(
|
|
744
|
-
field=field,
|
|
745
|
-
vector=vector,
|
|
746
|
-
nprobe=nprobe,
|
|
747
|
-
rerank_factor=rerank_factor,
|
|
748
|
-
combiner=
|
|
669
|
+
field=dv["field"],
|
|
670
|
+
vector=dv["vector"],
|
|
671
|
+
nprobe=dv.get("nprobe", 0),
|
|
672
|
+
rerank_factor=dv.get("rerank_factor", 0),
|
|
673
|
+
combiner=_combiner_to_proto(dv.get("combiner")),
|
|
674
|
+
combiner_temperature=dv.get("combiner_temperature", 0),
|
|
675
|
+
combiner_top_k=dv.get("combiner_top_k", 0),
|
|
676
|
+
combiner_decay=dv.get("combiner_decay", 0),
|
|
749
677
|
)
|
|
750
678
|
)
|
|
751
679
|
|
|
752
|
-
|
|
680
|
+
if "boost" in q:
|
|
681
|
+
bq = q["boost"]
|
|
682
|
+
return pb.Query(
|
|
683
|
+
boost=pb.BoostQuery(
|
|
684
|
+
query=_build_query(bq["query"]),
|
|
685
|
+
boost=bq["boost"],
|
|
686
|
+
)
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
if "all" in q:
|
|
690
|
+
return pb.Query(all=pb.AllQuery())
|
|
691
|
+
|
|
692
|
+
# Default: match all
|
|
753
693
|
return pb.Query(boolean=pb.BooleanQuery())
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def _build_reranker(r: dict[str, Any]) -> pb.Reranker:
|
|
697
|
+
"""Convert a Reranker dict to protobuf Reranker."""
|
|
698
|
+
return pb.Reranker(
|
|
699
|
+
field=r["field"],
|
|
700
|
+
vector=r["vector"],
|
|
701
|
+
limit=r.get("limit", 0),
|
|
702
|
+
combiner=_combiner_to_proto(r.get("combiner")),
|
|
703
|
+
combiner_temperature=r.get("combiner_temperature", 0),
|
|
704
|
+
combiner_top_k=r.get("combiner_top_k", 0),
|
|
705
|
+
combiner_decay=r.get("combiner_decay", 0),
|
|
706
|
+
matryoshka_dims=r.get("matryoshka_dims", 0),
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
def _build_filter(f: dict[str, Any]) -> pb.Filter:
|
|
711
|
+
"""Convert a Filter dict to protobuf Filter."""
|
|
712
|
+
kwargs: dict[str, Any] = {"field": f["field"]}
|
|
713
|
+
if "eq_u64" in f:
|
|
714
|
+
kwargs["eq_u64"] = int(f["eq_u64"])
|
|
715
|
+
elif "eq_i64" in f:
|
|
716
|
+
kwargs["eq_i64"] = int(f["eq_i64"])
|
|
717
|
+
elif "eq_f64" in f:
|
|
718
|
+
kwargs["eq_f64"] = float(f["eq_f64"])
|
|
719
|
+
elif "eq_text" in f:
|
|
720
|
+
kwargs["eq_text"] = str(f["eq_text"])
|
|
721
|
+
elif "range" in f:
|
|
722
|
+
r = f["range"]
|
|
723
|
+
range_kwargs = {}
|
|
724
|
+
if "min" in r:
|
|
725
|
+
range_kwargs["min"] = float(r["min"])
|
|
726
|
+
if "max" in r:
|
|
727
|
+
range_kwargs["max"] = float(r["max"])
|
|
728
|
+
kwargs["range"] = pb.RangeFilter(**range_kwargs)
|
|
729
|
+
elif "in_values" in f:
|
|
730
|
+
iv = f["in_values"]
|
|
731
|
+
kwargs["in_values"] = pb.InFilter(
|
|
732
|
+
text_values=iv.get("text_values", []),
|
|
733
|
+
u64_values=[int(v) for v in iv.get("u64_values", [])],
|
|
734
|
+
i64_values=[int(v) for v in iv.get("i64_values", [])],
|
|
735
|
+
)
|
|
736
|
+
return pb.Filter(**kwargs)
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Type definitions for Hermes client.
|
|
2
|
+
|
|
3
|
+
All search-related types mirror the proto API structure exactly.
|
|
4
|
+
Query is a dict with exactly one key matching the proto Query oneof variant.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Any, Literal, TypedDict
|
|
9
|
+
|
|
10
|
+
# =============================================================================
|
|
11
|
+
# Multi-value score combiner (mirrors proto MultiValueCombiner)
|
|
12
|
+
# =============================================================================
|
|
13
|
+
|
|
14
|
+
Combiner = Literal["log_sum_exp", "max", "avg", "sum", "weighted_top_k"]
|
|
15
|
+
|
|
16
|
+
# =============================================================================
|
|
17
|
+
# Query types (mirrors proto Query oneof)
|
|
18
|
+
# =============================================================================
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TermQuery(TypedDict):
|
|
22
|
+
field: str
|
|
23
|
+
term: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MatchQuery(TypedDict):
|
|
27
|
+
field: str
|
|
28
|
+
text: str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class BooleanQuery(TypedDict, total=False):
|
|
32
|
+
must: list["Query"]
|
|
33
|
+
should: list["Query"]
|
|
34
|
+
must_not: list["Query"]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class BoostQuery(TypedDict):
|
|
38
|
+
query: "Query"
|
|
39
|
+
boost: float
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class AllQuery(TypedDict):
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SparseVectorQuery(TypedDict, total=False):
|
|
47
|
+
field: str # required but total=False for optional fields
|
|
48
|
+
indices: list[int]
|
|
49
|
+
values: list[float]
|
|
50
|
+
text: str
|
|
51
|
+
combiner: Combiner
|
|
52
|
+
heap_factor: float
|
|
53
|
+
combiner_temperature: float
|
|
54
|
+
combiner_top_k: int
|
|
55
|
+
combiner_decay: float
|
|
56
|
+
weight_threshold: float
|
|
57
|
+
max_query_dims: int
|
|
58
|
+
pruning: float
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class DenseVectorQuery(TypedDict, total=False):
|
|
62
|
+
field: str # required but total=False for optional fields
|
|
63
|
+
vector: list[float]
|
|
64
|
+
nprobe: int
|
|
65
|
+
rerank_factor: int
|
|
66
|
+
combiner: Combiner
|
|
67
|
+
combiner_temperature: float
|
|
68
|
+
combiner_top_k: int
|
|
69
|
+
combiner_decay: float
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# Query is a dict with exactly one key: "term", "match", "boolean",
|
|
73
|
+
# "sparse_vector", "dense_vector", "boost", or "all".
|
|
74
|
+
Query = dict[str, Any]
|
|
75
|
+
|
|
76
|
+
# =============================================================================
|
|
77
|
+
# Reranker (mirrors proto Reranker)
|
|
78
|
+
# =============================================================================
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class Reranker(TypedDict, total=False):
|
|
82
|
+
field: str
|
|
83
|
+
vector: list[float]
|
|
84
|
+
limit: int
|
|
85
|
+
combiner: Combiner
|
|
86
|
+
combiner_temperature: float
|
|
87
|
+
combiner_top_k: int
|
|
88
|
+
combiner_decay: float
|
|
89
|
+
matryoshka_dims: int
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# =============================================================================
|
|
93
|
+
# Filter (mirrors proto Filter)
|
|
94
|
+
# =============================================================================
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class Filter(TypedDict, total=False):
|
|
98
|
+
field: str
|
|
99
|
+
eq_u64: int
|
|
100
|
+
eq_i64: int
|
|
101
|
+
eq_f64: float
|
|
102
|
+
eq_text: str
|
|
103
|
+
range: dict[str, float] # {"min": ..., "max": ...}
|
|
104
|
+
in_values: dict[
|
|
105
|
+
str, list
|
|
106
|
+
] # {"text_values": [...], "u64_values": [...], "i64_values": [...]}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# =============================================================================
|
|
110
|
+
# Response types
|
|
111
|
+
# =============================================================================
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass
|
|
115
|
+
class Document:
|
|
116
|
+
"""A document with field values."""
|
|
117
|
+
|
|
118
|
+
fields: dict[str, Any] = field(default_factory=dict)
|
|
119
|
+
|
|
120
|
+
def __getitem__(self, key: str) -> Any:
|
|
121
|
+
return self.fields[key]
|
|
122
|
+
|
|
123
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
|
124
|
+
self.fields[key] = value
|
|
125
|
+
|
|
126
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
127
|
+
return self.fields.get(key, default)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass
|
|
131
|
+
class DocAddress:
|
|
132
|
+
"""Unique document address: segment + local doc_id."""
|
|
133
|
+
|
|
134
|
+
segment_id: str
|
|
135
|
+
doc_id: int
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@dataclass
|
|
139
|
+
class SearchHit:
|
|
140
|
+
"""A single search result."""
|
|
141
|
+
|
|
142
|
+
address: DocAddress
|
|
143
|
+
score: float
|
|
144
|
+
fields: dict[str, Any] = field(default_factory=dict)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@dataclass
|
|
148
|
+
class SearchTimings:
|
|
149
|
+
"""Detailed timing breakdown for search phases (all values in microseconds)."""
|
|
150
|
+
|
|
151
|
+
search_us: int
|
|
152
|
+
rerank_us: int
|
|
153
|
+
load_us: int
|
|
154
|
+
total_us: int
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@dataclass
|
|
158
|
+
class SearchResponse:
|
|
159
|
+
"""Search response with hits and metadata."""
|
|
160
|
+
|
|
161
|
+
hits: list[SearchHit]
|
|
162
|
+
total_hits: int
|
|
163
|
+
took_ms: int
|
|
164
|
+
timings: SearchTimings | None = None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@dataclass
|
|
168
|
+
class VectorFieldStats:
|
|
169
|
+
"""Per-field vector statistics."""
|
|
170
|
+
|
|
171
|
+
field_name: str
|
|
172
|
+
vector_type: str # "dense" or "sparse"
|
|
173
|
+
total_vectors: int
|
|
174
|
+
dimension: int
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@dataclass
|
|
178
|
+
class IndexInfo:
|
|
179
|
+
"""Information about an index."""
|
|
180
|
+
|
|
181
|
+
index_name: str
|
|
182
|
+
num_docs: int
|
|
183
|
+
num_segments: int
|
|
184
|
+
schema: str
|
|
185
|
+
vector_stats: list[VectorFieldStats] = field(default_factory=list)
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
"""Async Python client for Hermes search server."""
|
|
2
|
-
|
|
3
|
-
from .client import HermesClient
|
|
4
|
-
from .types import (
|
|
5
|
-
DocAddress,
|
|
6
|
-
Document,
|
|
7
|
-
IndexInfo,
|
|
8
|
-
SearchHit,
|
|
9
|
-
SearchResponse,
|
|
10
|
-
VectorFieldStats,
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
__all__ = [
|
|
14
|
-
"HermesClient",
|
|
15
|
-
"DocAddress",
|
|
16
|
-
"Document",
|
|
17
|
-
"SearchHit",
|
|
18
|
-
"SearchResponse",
|
|
19
|
-
"IndexInfo",
|
|
20
|
-
"VectorFieldStats",
|
|
21
|
-
]
|
|
22
|
-
|
|
23
|
-
__version__ = "1.0.2"
|
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
"""Type definitions for Hermes client."""
|
|
2
|
-
|
|
3
|
-
from dataclasses import dataclass, field
|
|
4
|
-
from typing import Any
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@dataclass
|
|
8
|
-
class Document:
|
|
9
|
-
"""A document with field values."""
|
|
10
|
-
|
|
11
|
-
fields: dict[str, Any] = field(default_factory=dict)
|
|
12
|
-
|
|
13
|
-
def __getitem__(self, key: str) -> Any:
|
|
14
|
-
return self.fields[key]
|
|
15
|
-
|
|
16
|
-
def __setitem__(self, key: str, value: Any) -> None:
|
|
17
|
-
self.fields[key] = value
|
|
18
|
-
|
|
19
|
-
def get(self, key: str, default: Any = None) -> Any:
|
|
20
|
-
return self.fields.get(key, default)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@dataclass
|
|
24
|
-
class DocAddress:
|
|
25
|
-
"""Unique document address: segment + local doc_id."""
|
|
26
|
-
|
|
27
|
-
segment_id: str
|
|
28
|
-
doc_id: int
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
@dataclass
|
|
32
|
-
class SearchHit:
|
|
33
|
-
"""A single search result."""
|
|
34
|
-
|
|
35
|
-
address: DocAddress
|
|
36
|
-
score: float
|
|
37
|
-
fields: dict[str, Any] = field(default_factory=dict)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@dataclass
|
|
41
|
-
class SearchTimings:
|
|
42
|
-
"""Detailed timing breakdown for search phases (all values in microseconds)."""
|
|
43
|
-
|
|
44
|
-
search_us: int
|
|
45
|
-
rerank_us: int
|
|
46
|
-
load_us: int
|
|
47
|
-
total_us: int
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
@dataclass
|
|
51
|
-
class SearchResponse:
|
|
52
|
-
"""Search response with hits and metadata."""
|
|
53
|
-
|
|
54
|
-
hits: list[SearchHit]
|
|
55
|
-
total_hits: int
|
|
56
|
-
took_ms: int
|
|
57
|
-
timings: SearchTimings | None = None
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
@dataclass
|
|
61
|
-
class VectorFieldStats:
|
|
62
|
-
"""Per-field vector statistics."""
|
|
63
|
-
|
|
64
|
-
field_name: str
|
|
65
|
-
vector_type: str # "dense" or "sparse"
|
|
66
|
-
total_vectors: int
|
|
67
|
-
dimension: int
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
@dataclass
|
|
71
|
-
class IndexInfo:
|
|
72
|
-
"""Information about an index."""
|
|
73
|
-
|
|
74
|
-
index_name: str
|
|
75
|
-
num_docs: int
|
|
76
|
-
num_segments: int
|
|
77
|
-
schema: str
|
|
78
|
-
vector_stats: list[VectorFieldStats] = field(default_factory=list)
|
|
File without changes
|
|
File without changes
|
{hermes_client_python-1.7.14 → hermes_client_python-1.7.15}/src/hermes_client_python/hermes_pb2.py
RENAMED
|
File without changes
|
|
File without changes
|