betterdb-valkey-search-kit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- betterdb_valkey_search_kit-0.1.0/.gitignore +6 -0
- betterdb_valkey_search_kit-0.1.0/CHANGELOG.md +12 -0
- betterdb_valkey_search_kit-0.1.0/PKG-INFO +106 -0
- betterdb_valkey_search_kit-0.1.0/README.md +94 -0
- betterdb_valkey_search_kit-0.1.0/betterdb_valkey_search_kit/__init__.py +26 -0
- betterdb_valkey_search_kit-0.1.0/betterdb_valkey_search_kit/encoding.py +17 -0
- betterdb_valkey_search_kit-0.1.0/betterdb_valkey_search_kit/errors.py +19 -0
- betterdb_valkey_search_kit-0.1.0/betterdb_valkey_search_kit/ft_info.py +97 -0
- betterdb_valkey_search_kit-0.1.0/betterdb_valkey_search_kit/ft_search.py +87 -0
- betterdb_valkey_search_kit-0.1.0/betterdb_valkey_search_kit/tags.py +15 -0
- betterdb_valkey_search_kit-0.1.0/pyproject.toml +28 -0
- betterdb_valkey_search_kit-0.1.0/tests/__init__.py +0 -0
- betterdb_valkey_search_kit-0.1.0/tests/test_encoding.py +28 -0
- betterdb_valkey_search_kit-0.1.0/tests/test_errors.py +50 -0
- betterdb_valkey_search_kit-0.1.0/tests/test_ft_info.py +76 -0
- betterdb_valkey_search_kit-0.1.0/tests/test_ft_search.py +107 -0
- betterdb_valkey_search_kit-0.1.0/tests/test_tags.py +21 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.1.0] - 2026-06-23
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- Initial release. Python equivalent of the TypeScript `@betterdb/valkey-search-kit`.
|
|
8
|
+
- `encode_float32` / `decode_float32` — little-endian Float32 vector encoding for embeddings.
|
|
9
|
+
- `escape_tag` — TAG filter value escaping (including spaces).
|
|
10
|
+
- `parse_ft_search_response` — bytes-aware FT.SEARCH reply parsing; never raises.
|
|
11
|
+
- `parse_dimension_from_info` / `parse_ft_info_stats` — version-skew-tolerant FT.INFO parsing.
|
|
12
|
+
- `is_index_not_found_error` — "index does not exist" error classification.
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: betterdb-valkey-search-kit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared Valkey Search (FT.*) helpers: float32 vector encoding, FT.SEARCH reply parsing, version-skew FT.INFO parsing, TAG escaping, and error classification.
|
|
5
|
+
Project-URL: Repository, https://github.com/BetterDB-inc/monitor
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: ft,redis,redisearch,valkey,valkey-search,vector-search
|
|
8
|
+
Requires-Python: >=3.11
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# @betterdb/valkey-search-kit (Python)
|
|
14
|
+
|
|
15
|
+
`betterdb-valkey-search-kit` — shared low-level helpers for working with Valkey
|
|
16
|
+
Search (`FT.*`) from Python. This is the Python equivalent of the TypeScript
|
|
17
|
+
`@betterdb/valkey-search-kit` package, and the shared foundation the
|
|
18
|
+
`betterdb-retrieval` and `betterdb-agent-memory` packages build on.
|
|
19
|
+
|
|
20
|
+
It has **no runtime dependencies** and exposes only pure functions, so it stays
|
|
21
|
+
trivial to vendor and test.
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install betterdb-valkey-search-kit
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## API
|
|
30
|
+
|
|
31
|
+
### Vector encoding
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from betterdb_valkey_search_kit import encode_float32, decode_float32
|
|
35
|
+
|
|
36
|
+
blob = encode_float32([0.1, 0.2, 0.3]) # little-endian Float32 bytes
|
|
37
|
+
vec = decode_float32(blob) # back to list[float]
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Use `encode_float32` to store embeddings as binary `HSET` field values and as
|
|
41
|
+
the `PARAMS` vector for a KNN `FT.SEARCH`.
|
|
42
|
+
|
|
43
|
+
### TAG escaping
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
from betterdb_valkey_search_kit import escape_tag
|
|
47
|
+
|
|
48
|
+
f"@model:{{{escape_tag('gpt-4o')}}}" # -> "@model:{gpt\\-4o}"
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Escapes every character with special meaning in the TAG filter syntax,
|
|
52
|
+
**including spaces** (unescaped spaces are treated as OR term separators).
|
|
53
|
+
|
|
54
|
+
### FT.SEARCH reply parsing
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from betterdb_valkey_search_kit import parse_ft_search_response
|
|
58
|
+
|
|
59
|
+
raw = await client.execute_command("FT.SEARCH", index, query, ...)
|
|
60
|
+
hits = parse_ft_search_response(raw)
|
|
61
|
+
# [{"key": "cache:entry:abc", "fields": {"prompt": "...", "__score": "0.05"}}]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Handles valkey-py's mixed `bytes`/`str` replies, `RETURN 0` mode (keys with no
|
|
65
|
+
field list), and odd-length field lists. Binary field values that are not valid
|
|
66
|
+
UTF-8 (e.g. raw embedding bytes) are skipped. **Never raises** — returns `[]` on
|
|
67
|
+
any malformed input.
|
|
68
|
+
|
|
69
|
+
### FT.INFO parsing (version-skew tolerant)
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from betterdb_valkey_search_kit import (
|
|
73
|
+
parse_dimension_from_info,
|
|
74
|
+
parse_ft_info_stats,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
info = await client.execute_command("FT.INFO", index)
|
|
78
|
+
dims = parse_dimension_from_info(info) # 1536, or 0 if no vector field
|
|
79
|
+
stats = parse_ft_info_stats(info) # FtIndexStats(num_docs=..., indexing_state=...)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
`parse_dimension_from_info` understands both the flat `DIM` attribute pair and
|
|
83
|
+
the nested `index/dimensions` shape introduced in Valkey Search 1.2.
|
|
84
|
+
|
|
85
|
+
### Error classification
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from betterdb_valkey_search_kit import is_index_not_found_error
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
await client.execute_command("FT.INFO", index)
|
|
92
|
+
except Exception as err:
|
|
93
|
+
if is_index_not_found_error(err):
|
|
94
|
+
... # index does not exist yet
|
|
95
|
+
else:
|
|
96
|
+
raise
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Matches the "index does not exist" message variants emitted across Valkey
|
|
100
|
+
Search / RediSearch versions, case-insensitively.
|
|
101
|
+
|
|
102
|
+
## Development
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
uv run --extra dev pytest tests -q
|
|
106
|
+
```
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# @betterdb/valkey-search-kit (Python)
|
|
2
|
+
|
|
3
|
+
`betterdb-valkey-search-kit` — shared low-level helpers for working with Valkey
|
|
4
|
+
Search (`FT.*`) from Python. This is the Python equivalent of the TypeScript
|
|
5
|
+
`@betterdb/valkey-search-kit` package, and the shared foundation the
|
|
6
|
+
`betterdb-retrieval` and `betterdb-agent-memory` packages build on.
|
|
7
|
+
|
|
8
|
+
It has **no runtime dependencies** and exposes only pure functions, so it stays
|
|
9
|
+
trivial to vendor and test.
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install betterdb-valkey-search-kit
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## API
|
|
18
|
+
|
|
19
|
+
### Vector encoding
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from betterdb_valkey_search_kit import encode_float32, decode_float32
|
|
23
|
+
|
|
24
|
+
blob = encode_float32([0.1, 0.2, 0.3]) # little-endian Float32 bytes
|
|
25
|
+
vec = decode_float32(blob) # back to list[float]
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Use `encode_float32` to store embeddings as binary `HSET` field values and as
|
|
29
|
+
the `PARAMS` vector for a KNN `FT.SEARCH`.
|
|
30
|
+
|
|
31
|
+
### TAG escaping
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from betterdb_valkey_search_kit import escape_tag
|
|
35
|
+
|
|
36
|
+
f"@model:{{{escape_tag('gpt-4o')}}}" # -> "@model:{gpt\\-4o}"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Escapes every character with special meaning in the TAG filter syntax,
|
|
40
|
+
**including spaces** (unescaped spaces are treated as OR term separators).
|
|
41
|
+
|
|
42
|
+
### FT.SEARCH reply parsing
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from betterdb_valkey_search_kit import parse_ft_search_response
|
|
46
|
+
|
|
47
|
+
raw = await client.execute_command("FT.SEARCH", index, query, ...)
|
|
48
|
+
hits = parse_ft_search_response(raw)
|
|
49
|
+
# [{"key": "cache:entry:abc", "fields": {"prompt": "...", "__score": "0.05"}}]
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Handles valkey-py's mixed `bytes`/`str` replies, `RETURN 0` mode (keys with no
|
|
53
|
+
field list), and odd-length field lists. Binary field values that are not valid
|
|
54
|
+
UTF-8 (e.g. raw embedding bytes) are skipped. **Never raises** — returns `[]` on
|
|
55
|
+
any malformed input.
|
|
56
|
+
|
|
57
|
+
### FT.INFO parsing (version-skew tolerant)
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from betterdb_valkey_search_kit import (
|
|
61
|
+
parse_dimension_from_info,
|
|
62
|
+
parse_ft_info_stats,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
info = await client.execute_command("FT.INFO", index)
|
|
66
|
+
dims = parse_dimension_from_info(info) # 1536, or 0 if no vector field
|
|
67
|
+
stats = parse_ft_info_stats(info) # FtIndexStats(num_docs=..., indexing_state=...)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
`parse_dimension_from_info` understands both the flat `DIM` attribute pair and
|
|
71
|
+
the nested `index/dimensions` shape introduced in Valkey Search 1.2.
|
|
72
|
+
|
|
73
|
+
### Error classification
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from betterdb_valkey_search_kit import is_index_not_found_error
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
await client.execute_command("FT.INFO", index)
|
|
80
|
+
except Exception as err:
|
|
81
|
+
if is_index_not_found_error(err):
|
|
82
|
+
... # index does not exist yet
|
|
83
|
+
else:
|
|
84
|
+
raise
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Matches the "index does not exist" message variants emitted across Valkey
|
|
88
|
+
Search / RediSearch versions, case-insensitively.
|
|
89
|
+
|
|
90
|
+
## Development
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
uv run --extra dev pytest tests -q
|
|
94
|
+
```
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Shared Valkey Search (FT.*) helpers for BetterDB packages.
|
|
2
|
+
|
|
3
|
+
Mirrors the TypeScript ``@betterdb/valkey-search-kit`` package: float32 vector
|
|
4
|
+
encoding, FT.SEARCH reply parsing, version-skew-tolerant FT.INFO parsing, TAG
|
|
5
|
+
escaping, and "index does not exist" error classification.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from .encoding import decode_float32, encode_float32
|
|
11
|
+
from .errors import is_index_not_found_error
|
|
12
|
+
from .ft_info import FtIndexStats, parse_dimension_from_info, parse_ft_info_stats
|
|
13
|
+
from .ft_search import FtSearchHit, parse_ft_search_response
|
|
14
|
+
from .tags import escape_tag
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"encode_float32",
|
|
18
|
+
"decode_float32",
|
|
19
|
+
"escape_tag",
|
|
20
|
+
"parse_ft_search_response",
|
|
21
|
+
"FtSearchHit",
|
|
22
|
+
"parse_dimension_from_info",
|
|
23
|
+
"parse_ft_info_stats",
|
|
24
|
+
"FtIndexStats",
|
|
25
|
+
"is_index_not_found_error",
|
|
26
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import struct
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def encode_float32(vec: list[float]) -> bytes:
|
|
7
|
+
"""Encode a float list as little-endian Float32 bytes.
|
|
8
|
+
|
|
9
|
+
Used to store embeddings as binary HSET field values for KNN search.
|
|
10
|
+
"""
|
|
11
|
+
return struct.pack(f"<{len(vec)}f", *vec)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def decode_float32(data: bytes) -> list[float]:
|
|
15
|
+
"""Decode little-endian Float32 bytes into a float list."""
|
|
16
|
+
n = len(data) // 4
|
|
17
|
+
return list(struct.unpack_from(f"<{n}f", data))
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def is_index_not_found_error(err: Any) -> bool:
|
|
7
|
+
"""Classify an error as a Valkey Search "index does not exist" error.
|
|
8
|
+
|
|
9
|
+
Matches the message variants emitted across Valkey Search / RediSearch
|
|
10
|
+
versions, case-insensitively. Non-exception values never match.
|
|
11
|
+
"""
|
|
12
|
+
if not isinstance(err, BaseException):
|
|
13
|
+
return False
|
|
14
|
+
msg = str(err).lower()
|
|
15
|
+
return (
|
|
16
|
+
"unknown index name" in msg
|
|
17
|
+
or "no such index" in msg
|
|
18
|
+
or ("not found" in msg and "index" in msg)
|
|
19
|
+
)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _s(x: Any) -> str:
|
|
8
|
+
"""Coerce a raw FT.INFO token (bytes from valkey-py, or str) to a string."""
|
|
9
|
+
if isinstance(x, bytes):
|
|
10
|
+
try:
|
|
11
|
+
return x.decode()
|
|
12
|
+
except UnicodeDecodeError:
|
|
13
|
+
return ""
|
|
14
|
+
return str(x)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _to_int(x: Any) -> int:
|
|
18
|
+
# Mirror TS ``parseInt(String(x), 10) || 0``: parse via float() so a
|
|
19
|
+
# float-formatted token (e.g. a RESP3 double rendered as "42.0") still
|
|
20
|
+
# yields its integer value instead of falling back to 0. OverflowError
|
|
21
|
+
# guards "inf"/"Infinity"; non-numeric tokens fall back to 0.
|
|
22
|
+
try:
|
|
23
|
+
return int(float(_s(x)))
|
|
24
|
+
except (ValueError, TypeError, OverflowError):
|
|
25
|
+
return 0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def parse_dimension_from_info(info: list[Any]) -> int:
|
|
29
|
+
"""Extract the vector field dimension from a raw FT.INFO reply.
|
|
30
|
+
|
|
31
|
+
Handles both reply shapes across Valkey Search versions:
|
|
32
|
+
|
|
33
|
+
- flat attribute pairs with a ``DIM`` key
|
|
34
|
+
- Valkey Search 1.2, which nests dimension inside an ``index`` sub-array
|
|
35
|
+
under a ``dimensions`` key
|
|
36
|
+
|
|
37
|
+
Returns 0 if no vector field with a positive dimension is found.
|
|
38
|
+
"""
|
|
39
|
+
for i in range(0, len(info) - 1, 2):
|
|
40
|
+
key = _s(info[i])
|
|
41
|
+
if key not in ("attributes", "fields"):
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
attributes = info[i + 1]
|
|
45
|
+
if not isinstance(attributes, (list, tuple)):
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
for attr in attributes:
|
|
49
|
+
if not isinstance(attr, (list, tuple)):
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
is_vector = False
|
|
53
|
+
dim = 0
|
|
54
|
+
|
|
55
|
+
j = 0
|
|
56
|
+
while j < len(attr) - 1:
|
|
57
|
+
attr_key = _s(attr[j])
|
|
58
|
+
if attr_key == "type" and _s(attr[j + 1]) == "VECTOR":
|
|
59
|
+
is_vector = True
|
|
60
|
+
if attr_key.lower() == "dim":
|
|
61
|
+
dim = _to_int(attr[j + 1])
|
|
62
|
+
if attr_key == "index" and isinstance(attr[j + 1], (list, tuple)):
|
|
63
|
+
index_arr = attr[j + 1]
|
|
64
|
+
k = 0
|
|
65
|
+
while k < len(index_arr) - 1:
|
|
66
|
+
if _s(index_arr[k]) == "dimensions":
|
|
67
|
+
d = _to_int(index_arr[k + 1])
|
|
68
|
+
if d > 0:
|
|
69
|
+
dim = d
|
|
70
|
+
k += 1
|
|
71
|
+
j += 1
|
|
72
|
+
|
|
73
|
+
if is_vector and dim > 0:
|
|
74
|
+
return dim
|
|
75
|
+
|
|
76
|
+
return 0
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass(frozen=True)
|
|
80
|
+
class FtIndexStats:
|
|
81
|
+
num_docs: int
|
|
82
|
+
indexing_state: str
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def parse_ft_info_stats(info: list[Any]) -> FtIndexStats:
|
|
86
|
+
"""Walk the flat key/value pairs of a raw FT.INFO reply and extract
|
|
87
|
+
``num_docs`` and the indexing state.
|
|
88
|
+
"""
|
|
89
|
+
num_docs = 0
|
|
90
|
+
indexing_state = "unknown"
|
|
91
|
+
for i in range(0, len(info) - 1, 2):
|
|
92
|
+
key = _s(info[i])
|
|
93
|
+
if key == "num_docs":
|
|
94
|
+
num_docs = _to_int(info[i + 1])
|
|
95
|
+
elif key == "indexing":
|
|
96
|
+
indexing_state = _s(info[i + 1])
|
|
97
|
+
return FtIndexStats(num_docs=num_docs, indexing_state=indexing_state)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, TypedDict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class FtSearchHit(TypedDict):
|
|
7
|
+
"""A single FT.SEARCH hit: the matched key and its returned fields."""
|
|
8
|
+
|
|
9
|
+
key: str
|
|
10
|
+
fields: dict[str, str]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def parse_ft_search_response(raw: Any) -> list[FtSearchHit]:
|
|
14
|
+
"""Parse a raw FT.SEARCH response from valkey-py's execute_command().
|
|
15
|
+
|
|
16
|
+
valkey-py returns FT.SEARCH results as a mixed bytes/str list::
|
|
17
|
+
|
|
18
|
+
[totalCount, key1, [field1, val1, ...], key2, [...], ...]
|
|
19
|
+
|
|
20
|
+
Returns a list of ``{"key": str, "fields": dict[str, str]}``.
|
|
21
|
+
Returns ``[]`` if totalCount is 0 or the response is empty/malformed.
|
|
22
|
+
Never raises: on any parse error, returns ``[]``. Binary field values
|
|
23
|
+
that cannot be decoded as UTF-8 (e.g. embedding bytes) are skipped.
|
|
24
|
+
"""
|
|
25
|
+
try:
|
|
26
|
+
if not isinstance(raw, (list, tuple)) or len(raw) < 1:
|
|
27
|
+
return []
|
|
28
|
+
|
|
29
|
+
total_raw = raw[0]
|
|
30
|
+
if isinstance(total_raw, bytes):
|
|
31
|
+
total_raw = total_raw.decode()
|
|
32
|
+
# Parse via float() so a float-formatted total (e.g. "2.0" from a RESP3
|
|
33
|
+
# double) yields its integer value instead of raising and collapsing to
|
|
34
|
+
# no hits — matching TS parseInt and this package's FT.INFO _to_int.
|
|
35
|
+
total = int(float(total_raw))
|
|
36
|
+
|
|
37
|
+
if total <= 0:
|
|
38
|
+
return []
|
|
39
|
+
|
|
40
|
+
results: list[FtSearchHit] = []
|
|
41
|
+
i = 1
|
|
42
|
+
while i < len(raw):
|
|
43
|
+
key = raw[i]
|
|
44
|
+
if isinstance(key, bytes):
|
|
45
|
+
key = key.decode()
|
|
46
|
+
elif not isinstance(key, str):
|
|
47
|
+
i += 1
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
if i + 1 >= len(raw):
|
|
51
|
+
results.append({"key": key, "fields": {}})
|
|
52
|
+
break
|
|
53
|
+
|
|
54
|
+
field_list = raw[i + 1]
|
|
55
|
+
fields: dict[str, str] = {}
|
|
56
|
+
|
|
57
|
+
if isinstance(field_list, (list, tuple)):
|
|
58
|
+
j = 0
|
|
59
|
+
while j < len(field_list) - 1:
|
|
60
|
+
fname = field_list[j]
|
|
61
|
+
fval = field_list[j + 1]
|
|
62
|
+
if isinstance(fname, bytes):
|
|
63
|
+
fname = fname.decode()
|
|
64
|
+
else:
|
|
65
|
+
fname = str(fname)
|
|
66
|
+
if isinstance(fval, bytes):
|
|
67
|
+
try:
|
|
68
|
+
fval = fval.decode()
|
|
69
|
+
except (UnicodeDecodeError, AttributeError):
|
|
70
|
+
# Binary field (e.g. embedding bytes) — skip it.
|
|
71
|
+
j += 2
|
|
72
|
+
continue
|
|
73
|
+
else:
|
|
74
|
+
fval = str(fval)
|
|
75
|
+
fields[fname] = fval
|
|
76
|
+
j += 2
|
|
77
|
+
i += 2
|
|
78
|
+
else:
|
|
79
|
+
results.append({"key": key, "fields": {}})
|
|
80
|
+
i += 1
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
results.append({"key": key, "fields": fields})
|
|
84
|
+
|
|
85
|
+
return results
|
|
86
|
+
except Exception:
|
|
87
|
+
return []
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
_TAG_ESCAPE_RE = re.compile(r'([,.<>{}\[\]"\'!@#$%^&*()\-+=~|/\\:; ])')
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def escape_tag(value: str) -> str:
|
|
9
|
+
"""Escape a string for safe use as a Valkey Search TAG filter value.
|
|
10
|
+
|
|
11
|
+
Spaces are escaped because Valkey Search treats unescaped spaces in TAG
|
|
12
|
+
values as term separators (OR semantics), which would broaden the filter
|
|
13
|
+
unintentionally.
|
|
14
|
+
"""
|
|
15
|
+
return _TAG_ESCAPE_RE.sub(r"\\\1", value)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "betterdb-valkey-search-kit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Shared Valkey Search (FT.*) helpers: float32 vector encoding, FT.SEARCH reply parsing, version-skew FT.INFO parsing, TAG escaping, and error classification."
|
|
9
|
+
keywords = ["valkey", "redis", "valkey-search", "vector-search", "ft", "redisearch"]
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
requires-python = ">=3.11"
|
|
13
|
+
dependencies = []
|
|
14
|
+
|
|
15
|
+
[project.optional-dependencies]
|
|
16
|
+
dev = [
|
|
17
|
+
"pytest>=8.0.0",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[project.urls]
|
|
21
|
+
Repository = "https://github.com/BetterDB-inc/monitor"
|
|
22
|
+
|
|
23
|
+
[tool.hatch.build.targets.wheel]
|
|
24
|
+
packages = ["betterdb_valkey_search_kit"]
|
|
25
|
+
|
|
26
|
+
[tool.ruff]
|
|
27
|
+
target-version = "py311"
|
|
28
|
+
line-length = 100
|
|
File without changes
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import struct
|
|
2
|
+
|
|
3
|
+
from betterdb_valkey_search_kit import decode_float32, encode_float32
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_byte_length_is_four_per_element():
|
|
7
|
+
vec = [1.0, 2.0, 3.0, 4.0]
|
|
8
|
+
buf = encode_float32(vec)
|
|
9
|
+
assert len(buf) == len(vec) * 4
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_little_endian_float32_values():
|
|
13
|
+
vec = [0.5, -1.25, 3.75]
|
|
14
|
+
buf = encode_float32(vec)
|
|
15
|
+
assert struct.unpack_from("<f", buf, 0)[0] == 0.5
|
|
16
|
+
assert struct.unpack_from("<f", buf, 4)[0] == -1.25
|
|
17
|
+
assert struct.unpack_from("<f", buf, 8)[0] == 3.75
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_decode_inverts_encode():
|
|
21
|
+
# These values are exactly representable in float32, so equality holds.
|
|
22
|
+
vec = [0.5, -1.25, 3.75]
|
|
23
|
+
assert decode_float32(encode_float32(vec)) == vec
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_empty_vector():
|
|
27
|
+
assert len(encode_float32([])) == 0
|
|
28
|
+
assert decode_float32(b"") == []
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from betterdb_valkey_search_kit import is_index_not_found_error
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_matches_unknown_index_name_case_insensitively():
|
|
5
|
+
assert is_index_not_found_error(Exception("Unknown Index Name")) is True
|
|
6
|
+
assert is_index_not_found_error(Exception("UNKNOWN INDEX NAME sc:idx")) is True
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_matches_no_such_index_case_insensitively():
|
|
10
|
+
assert is_index_not_found_error(Exception("no such index")) is True
|
|
11
|
+
assert is_index_not_found_error(Exception("sc:idx: No Such Index")) is True
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_matches_redis8_ft_search_phrasing():
|
|
15
|
+
assert is_index_not_found_error(Exception("No such index nonexistent_idx_xyz")) is True
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_matches_index_scoped_not_found_phrasings():
|
|
19
|
+
assert is_index_not_found_error(Exception("Index sc:idx: not found")) is True
|
|
20
|
+
assert is_index_not_found_error(Exception("index not found")) is True
|
|
21
|
+
assert is_index_not_found_error(Exception("Index with name foo not found")) is True
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_matches_the_valkey_search_12_phrasing():
|
|
25
|
+
assert (
|
|
26
|
+
is_index_not_found_error(
|
|
27
|
+
Exception("Index with name 'nonexistent_idx_xyz' not found in database 0")
|
|
28
|
+
)
|
|
29
|
+
is True
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_rejects_not_found_messages_without_index_context():
|
|
34
|
+
assert is_index_not_found_error(Exception("key not found")) is False
|
|
35
|
+
assert is_index_not_found_error(Exception("function not found")) is False
|
|
36
|
+
assert is_index_not_found_error(Exception("ERR value not found")) is False
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_rejects_index_messages_without_not_found_context():
|
|
40
|
+
assert is_index_not_found_error(Exception("index is being created")) is False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_rejects_unrelated_error_messages():
|
|
44
|
+
assert is_index_not_found_error(Exception("connection refused")) is False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_rejects_non_exception_values():
|
|
48
|
+
assert is_index_not_found_error("index not found") is False
|
|
49
|
+
assert is_index_not_found_error(None) is False
|
|
50
|
+
assert is_index_not_found_error({"message": "index not found"}) is False
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from betterdb_valkey_search_kit import (
|
|
2
|
+
FtIndexStats,
|
|
3
|
+
parse_dimension_from_info,
|
|
4
|
+
parse_ft_info_stats,
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_parses_the_flat_dim_pair_shape():
|
|
9
|
+
info = [
|
|
10
|
+
"index_name",
|
|
11
|
+
"sc:idx",
|
|
12
|
+
"attributes",
|
|
13
|
+
[["identifier", "embedding", "type", "VECTOR", "DIM", "1536"]],
|
|
14
|
+
]
|
|
15
|
+
assert parse_dimension_from_info(info) == 1536
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_parses_the_nested_v12_index_dimensions_shape():
|
|
19
|
+
info = [
|
|
20
|
+
"index_name",
|
|
21
|
+
"sc:idx",
|
|
22
|
+
"attributes",
|
|
23
|
+
[["identifier", "embedding", "type", "VECTOR", "index", ["dimensions", "768"]]],
|
|
24
|
+
]
|
|
25
|
+
assert parse_dimension_from_info(info) == 768
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_reads_attributes_under_the_legacy_fields_key():
|
|
29
|
+
info = ["fields", [["identifier", "embedding", "type", "VECTOR", "dim", "384"]]]
|
|
30
|
+
assert parse_dimension_from_info(info) == 384
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_ignores_non_vector_attributes_with_a_dim_pair():
|
|
34
|
+
info = ["attributes", [["identifier", "prompt", "type", "TEXT", "DIM", "99"]]]
|
|
35
|
+
assert parse_dimension_from_info(info) == 0
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_returns_zero_when_no_vector_attribute_exists():
|
|
39
|
+
info = ["index_name", "sc:idx", "num_docs", "5"]
|
|
40
|
+
assert parse_dimension_from_info(info) == 0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_parses_bytes_info_from_valkey_py():
|
|
44
|
+
info = [
|
|
45
|
+
b"attributes",
|
|
46
|
+
[[b"identifier", b"embedding", b"type", b"VECTOR", b"DIM", b"1536"]],
|
|
47
|
+
]
|
|
48
|
+
assert parse_dimension_from_info(info) == 1536
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_stats_extracts_num_docs_and_indexing_state():
|
|
52
|
+
info = ["index_name", "sc:idx", "num_docs", "42", "indexing", "0"]
|
|
53
|
+
assert parse_ft_info_stats(info) == FtIndexStats(num_docs=42, indexing_state="0")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_stats_defaults_when_keys_absent():
|
|
57
|
+
assert parse_ft_info_stats(["index_name", "sc:idx"]) == FtIndexStats(
|
|
58
|
+
num_docs=0, indexing_state="unknown"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_stats_coerces_unparseable_num_docs_to_zero():
|
|
63
|
+
assert parse_ft_info_stats(["num_docs", "garbage"]) == FtIndexStats(
|
|
64
|
+
num_docs=0, indexing_state="unknown"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_stats_reads_a_float_formatted_num_docs():
|
|
69
|
+
# A RESP3 double may surface as "42.0"; match TS parseInt and read 42
|
|
70
|
+
# rather than strict int() falling back to 0.
|
|
71
|
+
assert parse_ft_info_stats(["num_docs", "42.0"]).num_docs == 42
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_parses_a_float_formatted_dim():
|
|
75
|
+
info = ["attributes", [["identifier", "embedding", "type", "VECTOR", "DIM", "1536.0"]]]
|
|
76
|
+
assert parse_dimension_from_info(info) == 1536
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from betterdb_valkey_search_kit import parse_ft_search_response
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_returns_empty_for_none():
|
|
5
|
+
assert parse_ft_search_response(None) == []
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_returns_empty_for_empty_list():
|
|
9
|
+
assert parse_ft_search_response([]) == []
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_returns_empty_for_zero_count():
|
|
13
|
+
assert parse_ft_search_response(["0"]) == []
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_parses_a_single_entry():
|
|
17
|
+
raw = [
|
|
18
|
+
"1",
|
|
19
|
+
"cache:entry:abc",
|
|
20
|
+
["prompt", "hello", "response", "world", "__score", "0.05"],
|
|
21
|
+
]
|
|
22
|
+
result = parse_ft_search_response(raw)
|
|
23
|
+
assert len(result) == 1
|
|
24
|
+
assert result[0]["key"] == "cache:entry:abc"
|
|
25
|
+
assert result[0]["fields"]["prompt"] == "hello"
|
|
26
|
+
assert result[0]["fields"]["response"] == "world"
|
|
27
|
+
assert result[0]["fields"]["__score"] == "0.05"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_parses_bytes_response_from_valkey_py():
|
|
31
|
+
raw = [b"1", b"cache:entry:abc", [b"prompt", b"hello", b"__score", b"0.05"]]
|
|
32
|
+
result = parse_ft_search_response(raw)
|
|
33
|
+
assert len(result) == 1
|
|
34
|
+
assert result[0]["key"] == "cache:entry:abc"
|
|
35
|
+
assert result[0]["fields"]["prompt"] == "hello"
|
|
36
|
+
assert result[0]["fields"]["__score"] == "0.05"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_skips_undecodable_binary_field_value():
|
|
40
|
+
raw = ["1", "k", ["embedding", b"\xff\xfe\x00\x01", "prompt", "hi"]]
|
|
41
|
+
result = parse_ft_search_response(raw)
|
|
42
|
+
assert len(result) == 1
|
|
43
|
+
assert "embedding" not in result[0]["fields"]
|
|
44
|
+
assert result[0]["fields"]["prompt"] == "hi"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_extracts_score_from_two_results():
|
|
48
|
+
raw = [
|
|
49
|
+
"2",
|
|
50
|
+
"sc:entry:111",
|
|
51
|
+
["prompt", "q1", "response", "a1", "__score", "0.0234", "model", "gpt-4o"],
|
|
52
|
+
"sc:entry:222",
|
|
53
|
+
["prompt", "q2", "response", "a2", "__score", "0.1500", "model", "gpt-4o"],
|
|
54
|
+
]
|
|
55
|
+
result = parse_ft_search_response(raw)
|
|
56
|
+
assert len(result) == 2
|
|
57
|
+
assert abs(float(result[0]["fields"]["__score"]) - 0.0234) < 1e-4
|
|
58
|
+
assert abs(float(result[1]["fields"]["__score"]) - 0.15) < 1e-4
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_malformed_odd_length_field_list_skips_orphan():
|
|
62
|
+
raw = ["1", "key1", ["field1", "val1", "orphan"]]
|
|
63
|
+
result = parse_ft_search_response(raw)
|
|
64
|
+
assert len(result) == 1
|
|
65
|
+
assert result[0]["fields"]["field1"] == "val1"
|
|
66
|
+
assert len(result[0]["fields"]) == 1
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_two_result_response():
|
|
70
|
+
raw = ["2", "key:a", ["f1", "v1"], "key:b", ["f2", "v2"]]
|
|
71
|
+
result = parse_ft_search_response(raw)
|
|
72
|
+
assert len(result) == 2
|
|
73
|
+
assert result[0]["key"] == "key:a"
|
|
74
|
+
assert result[0]["fields"]["f1"] == "v1"
|
|
75
|
+
assert result[1]["key"] == "key:b"
|
|
76
|
+
assert result[1]["fields"]["f2"] == "v2"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_return_zero_mode_keys_without_field_list():
|
|
80
|
+
raw = ["2", "key:a", "key:b"]
|
|
81
|
+
result = parse_ft_search_response(raw)
|
|
82
|
+
assert len(result) == 2
|
|
83
|
+
assert result[0] == {"key": "key:a", "fields": {}}
|
|
84
|
+
assert result[1] == {"key": "key:b", "fields": {}}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_parses_a_float_formatted_total():
|
|
88
|
+
# A RESP3 double may surface the total as "2.0"; match TS parseInt and
|
|
89
|
+
# still return the hits instead of collapsing to [].
|
|
90
|
+
raw = ["2.0", "key:a", ["f1", "v1"], "key:b", ["f2", "v2"]]
|
|
91
|
+
result = parse_ft_search_response(raw)
|
|
92
|
+
assert len(result) == 2
|
|
93
|
+
assert result[0]["key"] == "key:a"
|
|
94
|
+
assert result[1]["key"] == "key:b"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_parses_a_float_formatted_total_in_bytes():
|
|
98
|
+
raw = [b"1", b"key:a", [b"f1", b"v1"]]
|
|
99
|
+
raw[0] = b"1.0"
|
|
100
|
+
result = parse_ft_search_response(raw)
|
|
101
|
+
assert len(result) == 1
|
|
102
|
+
assert result[0]["key"] == "key:a"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_never_raises_on_garbage():
|
|
106
|
+
assert parse_ft_search_response("garbage") == []
|
|
107
|
+
assert parse_ft_search_response(123) == []
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from betterdb_valkey_search_kit import escape_tag
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_escapes_tag_punctuation():
|
|
5
|
+
assert escape_tag("a,b") == "a\\,b"
|
|
6
|
+
assert escape_tag("a.b") == "a\\.b"
|
|
7
|
+
assert escape_tag("a{b}") == "a\\{b\\}"
|
|
8
|
+
assert escape_tag("a|b") == "a\\|b"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_escapes_spaces_to_prevent_or_semantics():
|
|
12
|
+
assert escape_tag("gpt 4o") == "gpt\\ 4o"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_escapes_hyphens_and_slashes():
|
|
16
|
+
assert escape_tag("gpt-4o") == "gpt\\-4o"
|
|
17
|
+
assert escape_tag("a/b\\c") == "a\\/b\\\\c"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_leaves_alphanumerics_and_underscores_untouched():
|
|
21
|
+
assert escape_tag("model_v2") == "model_v2"
|