betterdb-valkey-search-kit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .venv/
5
+ dist/
6
+ *.egg-info/
@@ -0,0 +1,12 @@
1
+ # Changelog
2
+
3
+ ## [0.1.0] - 2026-06-23
4
+
5
+ ### Added
6
+
7
+ - Initial release. Python equivalent of the TypeScript `@betterdb/valkey-search-kit`.
8
+ - `encode_float32` / `decode_float32` — little-endian Float32 vector encoding for embeddings.
9
+ - `escape_tag` — TAG filter value escaping (including spaces).
10
+ - `parse_ft_search_response` — bytes-aware FT.SEARCH reply parsing; never raises.
11
+ - `parse_dimension_from_info` / `parse_ft_info_stats` — version-skew-tolerant FT.INFO parsing.
12
+ - `is_index_not_found_error` — "index does not exist" error classification.
@@ -0,0 +1,106 @@
1
+ Metadata-Version: 2.4
2
+ Name: betterdb-valkey-search-kit
3
+ Version: 0.1.0
4
+ Summary: Shared Valkey Search (FT.*) helpers: float32 vector encoding, FT.SEARCH reply parsing, version-skew FT.INFO parsing, TAG escaping, and error classification.
5
+ Project-URL: Repository, https://github.com/BetterDB-inc/monitor
6
+ License: MIT
7
+ Keywords: ft,redis,redisearch,valkey,valkey-search,vector-search
8
+ Requires-Python: >=3.11
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
11
+ Description-Content-Type: text/markdown
12
+
13
+ # @betterdb/valkey-search-kit (Python)
14
+
15
+ `betterdb-valkey-search-kit` — shared low-level helpers for working with Valkey
16
+ Search (`FT.*`) from Python. This is the Python equivalent of the TypeScript
17
+ `@betterdb/valkey-search-kit` package, and the shared foundation the
18
+ `betterdb-retrieval` and `betterdb-agent-memory` packages build on.
19
+
20
+ It has **no runtime dependencies** and exposes only pure functions, so it stays
21
+ trivial to vendor and test.
22
+
23
+ ## Install
24
+
25
+ ```bash
26
+ pip install betterdb-valkey-search-kit
27
+ ```
28
+
29
+ ## API
30
+
31
+ ### Vector encoding
32
+
33
+ ```python
34
+ from betterdb_valkey_search_kit import encode_float32, decode_float32
35
+
36
+ blob = encode_float32([0.1, 0.2, 0.3]) # little-endian Float32 bytes
37
+ vec = decode_float32(blob) # back to list[float]
38
+ ```
39
+
40
+ Use `encode_float32` to store embeddings as binary `HSET` field values and as
41
+ the `PARAMS` vector for a KNN `FT.SEARCH`.
42
+
43
+ ### TAG escaping
44
+
45
+ ```python
46
+ from betterdb_valkey_search_kit import escape_tag
47
+
48
+ f"@model:{{{escape_tag('gpt-4o')}}}" # -> "@model:{gpt\\-4o}"
49
+ ```
50
+
51
+ Escapes every character with special meaning in the TAG filter syntax,
52
+ **including spaces** (unescaped spaces are treated as OR term separators).
53
+
54
+ ### FT.SEARCH reply parsing
55
+
56
+ ```python
57
+ from betterdb_valkey_search_kit import parse_ft_search_response
58
+
59
+ raw = await client.execute_command("FT.SEARCH", index, query, ...)
60
+ hits = parse_ft_search_response(raw)
61
+ # [{"key": "cache:entry:abc", "fields": {"prompt": "...", "__score": "0.05"}}]
62
+ ```
63
+
64
+ Handles valkey-py's mixed `bytes`/`str` replies, `RETURN 0` mode (keys with no
65
+ field list), and odd-length field lists. Binary field values that are not valid
66
+ UTF-8 (e.g. raw embedding bytes) are skipped. **Never raises** — returns `[]` on
67
+ any malformed input.
68
+
69
+ ### FT.INFO parsing (version-skew tolerant)
70
+
71
+ ```python
72
+ from betterdb_valkey_search_kit import (
73
+ parse_dimension_from_info,
74
+ parse_ft_info_stats,
75
+ )
76
+
77
+ info = await client.execute_command("FT.INFO", index)
78
+ dims = parse_dimension_from_info(info) # 1536, or 0 if no vector field
79
+ stats = parse_ft_info_stats(info) # FtIndexStats(num_docs=..., indexing_state=...)
80
+ ```
81
+
82
+ `parse_dimension_from_info` understands both the flat `DIM` attribute pair and
83
+ the nested `index/dimensions` shape introduced in Valkey Search 1.2.
84
+
85
+ ### Error classification
86
+
87
+ ```python
88
+ from betterdb_valkey_search_kit import is_index_not_found_error
89
+
90
+ try:
91
+ await client.execute_command("FT.INFO", index)
92
+ except Exception as err:
93
+ if is_index_not_found_error(err):
94
+ ... # index does not exist yet
95
+ else:
96
+ raise
97
+ ```
98
+
99
+ Matches the "index does not exist" message variants emitted across Valkey
100
+ Search / RediSearch versions, case-insensitively.
101
+
102
+ ## Development
103
+
104
+ ```bash
105
+ uv run --extra dev pytest tests -q
106
+ ```
@@ -0,0 +1,94 @@
1
+ # @betterdb/valkey-search-kit (Python)
2
+
3
+ `betterdb-valkey-search-kit` — shared low-level helpers for working with Valkey
4
+ Search (`FT.*`) from Python. This is the Python equivalent of the TypeScript
5
+ `@betterdb/valkey-search-kit` package, and the shared foundation the
6
+ `betterdb-retrieval` and `betterdb-agent-memory` packages build on.
7
+
8
+ It has **no runtime dependencies** and exposes only pure functions, so it stays
9
+ trivial to vendor and test.
10
+
11
+ ## Install
12
+
13
+ ```bash
14
+ pip install betterdb-valkey-search-kit
15
+ ```
16
+
17
+ ## API
18
+
19
+ ### Vector encoding
20
+
21
+ ```python
22
+ from betterdb_valkey_search_kit import encode_float32, decode_float32
23
+
24
+ blob = encode_float32([0.1, 0.2, 0.3]) # little-endian Float32 bytes
25
+ vec = decode_float32(blob) # back to list[float]
26
+ ```
27
+
28
+ Use `encode_float32` to store embeddings as binary `HSET` field values and as
29
+ the `PARAMS` vector for a KNN `FT.SEARCH`.
30
+
31
+ ### TAG escaping
32
+
33
+ ```python
34
+ from betterdb_valkey_search_kit import escape_tag
35
+
36
+ f"@model:{{{escape_tag('gpt-4o')}}}" # -> "@model:{gpt\\-4o}"
37
+ ```
38
+
39
+ Escapes every character with special meaning in the TAG filter syntax,
40
+ **including spaces** (unescaped spaces are treated as OR term separators).
41
+
42
+ ### FT.SEARCH reply parsing
43
+
44
+ ```python
45
+ from betterdb_valkey_search_kit import parse_ft_search_response
46
+
47
+ raw = await client.execute_command("FT.SEARCH", index, query, ...)
48
+ hits = parse_ft_search_response(raw)
49
+ # [{"key": "cache:entry:abc", "fields": {"prompt": "...", "__score": "0.05"}}]
50
+ ```
51
+
52
+ Handles valkey-py's mixed `bytes`/`str` replies, `RETURN 0` mode (keys with no
53
+ field list), and odd-length field lists. Binary field values that are not valid
54
+ UTF-8 (e.g. raw embedding bytes) are skipped. **Never raises** — returns `[]` on
55
+ any malformed input.
56
+
57
+ ### FT.INFO parsing (version-skew tolerant)
58
+
59
+ ```python
60
+ from betterdb_valkey_search_kit import (
61
+ parse_dimension_from_info,
62
+ parse_ft_info_stats,
63
+ )
64
+
65
+ info = await client.execute_command("FT.INFO", index)
66
+ dims = parse_dimension_from_info(info) # 1536, or 0 if no vector field
67
+ stats = parse_ft_info_stats(info) # FtIndexStats(num_docs=..., indexing_state=...)
68
+ ```
69
+
70
+ `parse_dimension_from_info` understands both the flat `DIM` attribute pair and
71
+ the nested `index/dimensions` shape introduced in Valkey Search 1.2.
72
+
73
+ ### Error classification
74
+
75
+ ```python
76
+ from betterdb_valkey_search_kit import is_index_not_found_error
77
+
78
+ try:
79
+ await client.execute_command("FT.INFO", index)
80
+ except Exception as err:
81
+ if is_index_not_found_error(err):
82
+ ... # index does not exist yet
83
+ else:
84
+ raise
85
+ ```
86
+
87
+ Matches the "index does not exist" message variants emitted across Valkey
88
+ Search / RediSearch versions, case-insensitively.
89
+
90
+ ## Development
91
+
92
+ ```bash
93
+ uv run --extra dev pytest tests -q
94
+ ```
@@ -0,0 +1,26 @@
1
+ """Shared Valkey Search (FT.*) helpers for BetterDB packages.
2
+
3
+ Mirrors the TypeScript ``@betterdb/valkey-search-kit`` package: float32 vector
4
+ encoding, FT.SEARCH reply parsing, version-skew-tolerant FT.INFO parsing, TAG
5
+ escaping, and "index does not exist" error classification.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from .encoding import decode_float32, encode_float32
11
+ from .errors import is_index_not_found_error
12
+ from .ft_info import FtIndexStats, parse_dimension_from_info, parse_ft_info_stats
13
+ from .ft_search import FtSearchHit, parse_ft_search_response
14
+ from .tags import escape_tag
15
+
16
+ __all__ = [
17
+ "encode_float32",
18
+ "decode_float32",
19
+ "escape_tag",
20
+ "parse_ft_search_response",
21
+ "FtSearchHit",
22
+ "parse_dimension_from_info",
23
+ "parse_ft_info_stats",
24
+ "FtIndexStats",
25
+ "is_index_not_found_error",
26
+ ]
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ import struct
4
+
5
+
6
+ def encode_float32(vec: list[float]) -> bytes:
7
+ """Encode a float list as little-endian Float32 bytes.
8
+
9
+ Used to store embeddings as binary HSET field values for KNN search.
10
+ """
11
+ return struct.pack(f"<{len(vec)}f", *vec)
12
+
13
+
14
+ def decode_float32(data: bytes) -> list[float]:
15
+ """Decode little-endian Float32 bytes into a float list."""
16
+ n = len(data) // 4
17
+ return list(struct.unpack_from(f"<{n}f", data))
@@ -0,0 +1,19 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+
6
+ def is_index_not_found_error(err: Any) -> bool:
7
+ """Classify an error as a Valkey Search "index does not exist" error.
8
+
9
+ Matches the message variants emitted across Valkey Search / RediSearch
10
+ versions, case-insensitively. Non-exception values never match.
11
+ """
12
+ if not isinstance(err, BaseException):
13
+ return False
14
+ msg = str(err).lower()
15
+ return (
16
+ "unknown index name" in msg
17
+ or "no such index" in msg
18
+ or ("not found" in msg and "index" in msg)
19
+ )
@@ -0,0 +1,97 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+
7
+ def _s(x: Any) -> str:
8
+ """Coerce a raw FT.INFO token (bytes from valkey-py, or str) to a string."""
9
+ if isinstance(x, bytes):
10
+ try:
11
+ return x.decode()
12
+ except UnicodeDecodeError:
13
+ return ""
14
+ return str(x)
15
+
16
+
17
+ def _to_int(x: Any) -> int:
18
+ # Mirror TS ``parseInt(String(x), 10) || 0``: parse via float() so a
19
+ # float-formatted token (e.g. a RESP3 double rendered as "42.0") still
20
+ # yields its integer value instead of falling back to 0. OverflowError
21
+ # guards "inf"/"Infinity"; non-numeric tokens fall back to 0.
22
+ try:
23
+ return int(float(_s(x)))
24
+ except (ValueError, TypeError, OverflowError):
25
+ return 0
26
+
27
+
28
+ def parse_dimension_from_info(info: list[Any]) -> int:
29
+ """Extract the vector field dimension from a raw FT.INFO reply.
30
+
31
+ Handles both reply shapes across Valkey Search versions:
32
+
33
+ - flat attribute pairs with a ``DIM`` key
34
+ - Valkey Search 1.2, which nests dimension inside an ``index`` sub-array
35
+ under a ``dimensions`` key
36
+
37
+ Returns 0 if no vector field with a positive dimension is found.
38
+ """
39
+ for i in range(0, len(info) - 1, 2):
40
+ key = _s(info[i])
41
+ if key not in ("attributes", "fields"):
42
+ continue
43
+
44
+ attributes = info[i + 1]
45
+ if not isinstance(attributes, (list, tuple)):
46
+ continue
47
+
48
+ for attr in attributes:
49
+ if not isinstance(attr, (list, tuple)):
50
+ continue
51
+
52
+ is_vector = False
53
+ dim = 0
54
+
55
+ j = 0
56
+ while j < len(attr) - 1:
57
+ attr_key = _s(attr[j])
58
+ if attr_key == "type" and _s(attr[j + 1]) == "VECTOR":
59
+ is_vector = True
60
+ if attr_key.lower() == "dim":
61
+ dim = _to_int(attr[j + 1])
62
+ if attr_key == "index" and isinstance(attr[j + 1], (list, tuple)):
63
+ index_arr = attr[j + 1]
64
+ k = 0
65
+ while k < len(index_arr) - 1:
66
+ if _s(index_arr[k]) == "dimensions":
67
+ d = _to_int(index_arr[k + 1])
68
+ if d > 0:
69
+ dim = d
70
+ k += 1
71
+ j += 1
72
+
73
+ if is_vector and dim > 0:
74
+ return dim
75
+
76
+ return 0
77
+
78
+
79
+ @dataclass(frozen=True)
80
+ class FtIndexStats:
81
+ num_docs: int
82
+ indexing_state: str
83
+
84
+
85
+ def parse_ft_info_stats(info: list[Any]) -> FtIndexStats:
86
+ """Walk the flat key/value pairs of a raw FT.INFO reply and extract
87
+ ``num_docs`` and the indexing state.
88
+ """
89
+ num_docs = 0
90
+ indexing_state = "unknown"
91
+ for i in range(0, len(info) - 1, 2):
92
+ key = _s(info[i])
93
+ if key == "num_docs":
94
+ num_docs = _to_int(info[i + 1])
95
+ elif key == "indexing":
96
+ indexing_state = _s(info[i + 1])
97
+ return FtIndexStats(num_docs=num_docs, indexing_state=indexing_state)
@@ -0,0 +1,87 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, TypedDict
4
+
5
+
6
+ class FtSearchHit(TypedDict):
7
+ """A single FT.SEARCH hit: the matched key and its returned fields."""
8
+
9
+ key: str
10
+ fields: dict[str, str]
11
+
12
+
13
+ def parse_ft_search_response(raw: Any) -> list[FtSearchHit]:
14
+ """Parse a raw FT.SEARCH response from valkey-py's execute_command().
15
+
16
+ valkey-py returns FT.SEARCH results as a mixed bytes/str list::
17
+
18
+ [totalCount, key1, [field1, val1, ...], key2, [...], ...]
19
+
20
+ Returns a list of ``{"key": str, "fields": dict[str, str]}``.
21
+ Returns ``[]`` if totalCount is 0 or the response is empty/malformed.
22
+ Never raises: on any parse error, returns ``[]``. Binary field values
23
+ that cannot be decoded as UTF-8 (e.g. embedding bytes) are skipped.
24
+ """
25
+ try:
26
+ if not isinstance(raw, (list, tuple)) or len(raw) < 1:
27
+ return []
28
+
29
+ total_raw = raw[0]
30
+ if isinstance(total_raw, bytes):
31
+ total_raw = total_raw.decode()
32
+ # Parse via float() so a float-formatted total (e.g. "2.0" from a RESP3
33
+ # double) yields its integer value instead of raising and collapsing to
34
+ # no hits — matching TS parseInt and this package's FT.INFO _to_int.
35
+ total = int(float(total_raw))
36
+
37
+ if total <= 0:
38
+ return []
39
+
40
+ results: list[FtSearchHit] = []
41
+ i = 1
42
+ while i < len(raw):
43
+ key = raw[i]
44
+ if isinstance(key, bytes):
45
+ key = key.decode()
46
+ elif not isinstance(key, str):
47
+ i += 1
48
+ continue
49
+
50
+ if i + 1 >= len(raw):
51
+ results.append({"key": key, "fields": {}})
52
+ break
53
+
54
+ field_list = raw[i + 1]
55
+ fields: dict[str, str] = {}
56
+
57
+ if isinstance(field_list, (list, tuple)):
58
+ j = 0
59
+ while j < len(field_list) - 1:
60
+ fname = field_list[j]
61
+ fval = field_list[j + 1]
62
+ if isinstance(fname, bytes):
63
+ fname = fname.decode()
64
+ else:
65
+ fname = str(fname)
66
+ if isinstance(fval, bytes):
67
+ try:
68
+ fval = fval.decode()
69
+ except (UnicodeDecodeError, AttributeError):
70
+ # Binary field (e.g. embedding bytes) — skip it.
71
+ j += 2
72
+ continue
73
+ else:
74
+ fval = str(fval)
75
+ fields[fname] = fval
76
+ j += 2
77
+ i += 2
78
+ else:
79
+ results.append({"key": key, "fields": {}})
80
+ i += 1
81
+ continue
82
+
83
+ results.append({"key": key, "fields": fields})
84
+
85
+ return results
86
+ except Exception:
87
+ return []
@@ -0,0 +1,15 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ _TAG_ESCAPE_RE = re.compile(r'([,.<>{}\[\]"\'!@#$%^&*()\-+=~|/\\:; ])')
6
+
7
+
8
+ def escape_tag(value: str) -> str:
9
+ """Escape a string for safe use as a Valkey Search TAG filter value.
10
+
11
+ Spaces are escaped because Valkey Search treats unescaped spaces in TAG
12
+ values as term separators (OR semantics), which would broaden the filter
13
+ unintentionally.
14
+ """
15
+ return _TAG_ESCAPE_RE.sub(r"\\\1", value)
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "betterdb-valkey-search-kit"
7
+ version = "0.1.0"
8
+ description = "Shared Valkey Search (FT.*) helpers: float32 vector encoding, FT.SEARCH reply parsing, version-skew FT.INFO parsing, TAG escaping, and error classification."
9
+ keywords = ["valkey", "redis", "valkey-search", "vector-search", "ft", "redisearch"]
10
+ license = { text = "MIT" }
11
+ readme = "README.md"
12
+ requires-python = ">=3.11"
13
+ dependencies = []
14
+
15
+ [project.optional-dependencies]
16
+ dev = [
17
+ "pytest>=8.0.0",
18
+ ]
19
+
20
+ [project.urls]
21
+ Repository = "https://github.com/BetterDB-inc/monitor"
22
+
23
+ [tool.hatch.build.targets.wheel]
24
+ packages = ["betterdb_valkey_search_kit"]
25
+
26
+ [tool.ruff]
27
+ target-version = "py311"
28
+ line-length = 100
File without changes
@@ -0,0 +1,28 @@
1
+ import struct
2
+
3
+ from betterdb_valkey_search_kit import decode_float32, encode_float32
4
+
5
+
6
+ def test_byte_length_is_four_per_element():
7
+ vec = [1.0, 2.0, 3.0, 4.0]
8
+ buf = encode_float32(vec)
9
+ assert len(buf) == len(vec) * 4
10
+
11
+
12
+ def test_little_endian_float32_values():
13
+ vec = [0.5, -1.25, 3.75]
14
+ buf = encode_float32(vec)
15
+ assert struct.unpack_from("<f", buf, 0)[0] == 0.5
16
+ assert struct.unpack_from("<f", buf, 4)[0] == -1.25
17
+ assert struct.unpack_from("<f", buf, 8)[0] == 3.75
18
+
19
+
20
+ def test_decode_inverts_encode():
21
+ # These values are exactly representable in float32, so equality holds.
22
+ vec = [0.5, -1.25, 3.75]
23
+ assert decode_float32(encode_float32(vec)) == vec
24
+
25
+
26
+ def test_empty_vector():
27
+ assert len(encode_float32([])) == 0
28
+ assert decode_float32(b"") == []
@@ -0,0 +1,50 @@
1
+ from betterdb_valkey_search_kit import is_index_not_found_error
2
+
3
+
4
+ def test_matches_unknown_index_name_case_insensitively():
5
+ assert is_index_not_found_error(Exception("Unknown Index Name")) is True
6
+ assert is_index_not_found_error(Exception("UNKNOWN INDEX NAME sc:idx")) is True
7
+
8
+
9
+ def test_matches_no_such_index_case_insensitively():
10
+ assert is_index_not_found_error(Exception("no such index")) is True
11
+ assert is_index_not_found_error(Exception("sc:idx: No Such Index")) is True
12
+
13
+
14
+ def test_matches_redis8_ft_search_phrasing():
15
+ assert is_index_not_found_error(Exception("No such index nonexistent_idx_xyz")) is True
16
+
17
+
18
+ def test_matches_index_scoped_not_found_phrasings():
19
+ assert is_index_not_found_error(Exception("Index sc:idx: not found")) is True
20
+ assert is_index_not_found_error(Exception("index not found")) is True
21
+ assert is_index_not_found_error(Exception("Index with name foo not found")) is True
22
+
23
+
24
+ def test_matches_the_valkey_search_12_phrasing():
25
+ assert (
26
+ is_index_not_found_error(
27
+ Exception("Index with name 'nonexistent_idx_xyz' not found in database 0")
28
+ )
29
+ is True
30
+ )
31
+
32
+
33
+ def test_rejects_not_found_messages_without_index_context():
34
+ assert is_index_not_found_error(Exception("key not found")) is False
35
+ assert is_index_not_found_error(Exception("function not found")) is False
36
+ assert is_index_not_found_error(Exception("ERR value not found")) is False
37
+
38
+
39
+ def test_rejects_index_messages_without_not_found_context():
40
+ assert is_index_not_found_error(Exception("index is being created")) is False
41
+
42
+
43
+ def test_rejects_unrelated_error_messages():
44
+ assert is_index_not_found_error(Exception("connection refused")) is False
45
+
46
+
47
+ def test_rejects_non_exception_values():
48
+ assert is_index_not_found_error("index not found") is False
49
+ assert is_index_not_found_error(None) is False
50
+ assert is_index_not_found_error({"message": "index not found"}) is False
@@ -0,0 +1,76 @@
1
+ from betterdb_valkey_search_kit import (
2
+ FtIndexStats,
3
+ parse_dimension_from_info,
4
+ parse_ft_info_stats,
5
+ )
6
+
7
+
8
+ def test_parses_the_flat_dim_pair_shape():
9
+ info = [
10
+ "index_name",
11
+ "sc:idx",
12
+ "attributes",
13
+ [["identifier", "embedding", "type", "VECTOR", "DIM", "1536"]],
14
+ ]
15
+ assert parse_dimension_from_info(info) == 1536
16
+
17
+
18
+ def test_parses_the_nested_v12_index_dimensions_shape():
19
+ info = [
20
+ "index_name",
21
+ "sc:idx",
22
+ "attributes",
23
+ [["identifier", "embedding", "type", "VECTOR", "index", ["dimensions", "768"]]],
24
+ ]
25
+ assert parse_dimension_from_info(info) == 768
26
+
27
+
28
+ def test_reads_attributes_under_the_legacy_fields_key():
29
+ info = ["fields", [["identifier", "embedding", "type", "VECTOR", "dim", "384"]]]
30
+ assert parse_dimension_from_info(info) == 384
31
+
32
+
33
+ def test_ignores_non_vector_attributes_with_a_dim_pair():
34
+ info = ["attributes", [["identifier", "prompt", "type", "TEXT", "DIM", "99"]]]
35
+ assert parse_dimension_from_info(info) == 0
36
+
37
+
38
+ def test_returns_zero_when_no_vector_attribute_exists():
39
+ info = ["index_name", "sc:idx", "num_docs", "5"]
40
+ assert parse_dimension_from_info(info) == 0
41
+
42
+
43
+ def test_parses_bytes_info_from_valkey_py():
44
+ info = [
45
+ b"attributes",
46
+ [[b"identifier", b"embedding", b"type", b"VECTOR", b"DIM", b"1536"]],
47
+ ]
48
+ assert parse_dimension_from_info(info) == 1536
49
+
50
+
51
+ def test_stats_extracts_num_docs_and_indexing_state():
52
+ info = ["index_name", "sc:idx", "num_docs", "42", "indexing", "0"]
53
+ assert parse_ft_info_stats(info) == FtIndexStats(num_docs=42, indexing_state="0")
54
+
55
+
56
+ def test_stats_defaults_when_keys_absent():
57
+ assert parse_ft_info_stats(["index_name", "sc:idx"]) == FtIndexStats(
58
+ num_docs=0, indexing_state="unknown"
59
+ )
60
+
61
+
62
+ def test_stats_coerces_unparseable_num_docs_to_zero():
63
+ assert parse_ft_info_stats(["num_docs", "garbage"]) == FtIndexStats(
64
+ num_docs=0, indexing_state="unknown"
65
+ )
66
+
67
+
68
+ def test_stats_reads_a_float_formatted_num_docs():
69
+ # A RESP3 double may surface as "42.0"; match TS parseInt and read 42
70
+ # rather than strict int() falling back to 0.
71
+ assert parse_ft_info_stats(["num_docs", "42.0"]).num_docs == 42
72
+
73
+
74
+ def test_parses_a_float_formatted_dim():
75
+ info = ["attributes", [["identifier", "embedding", "type", "VECTOR", "DIM", "1536.0"]]]
76
+ assert parse_dimension_from_info(info) == 1536
@@ -0,0 +1,107 @@
1
+ from betterdb_valkey_search_kit import parse_ft_search_response
2
+
3
+
4
+ def test_returns_empty_for_none():
5
+ assert parse_ft_search_response(None) == []
6
+
7
+
8
+ def test_returns_empty_for_empty_list():
9
+ assert parse_ft_search_response([]) == []
10
+
11
+
12
+ def test_returns_empty_for_zero_count():
13
+ assert parse_ft_search_response(["0"]) == []
14
+
15
+
16
+ def test_parses_a_single_entry():
17
+ raw = [
18
+ "1",
19
+ "cache:entry:abc",
20
+ ["prompt", "hello", "response", "world", "__score", "0.05"],
21
+ ]
22
+ result = parse_ft_search_response(raw)
23
+ assert len(result) == 1
24
+ assert result[0]["key"] == "cache:entry:abc"
25
+ assert result[0]["fields"]["prompt"] == "hello"
26
+ assert result[0]["fields"]["response"] == "world"
27
+ assert result[0]["fields"]["__score"] == "0.05"
28
+
29
+
30
+ def test_parses_bytes_response_from_valkey_py():
31
+ raw = [b"1", b"cache:entry:abc", [b"prompt", b"hello", b"__score", b"0.05"]]
32
+ result = parse_ft_search_response(raw)
33
+ assert len(result) == 1
34
+ assert result[0]["key"] == "cache:entry:abc"
35
+ assert result[0]["fields"]["prompt"] == "hello"
36
+ assert result[0]["fields"]["__score"] == "0.05"
37
+
38
+
39
+ def test_skips_undecodable_binary_field_value():
40
+ raw = ["1", "k", ["embedding", b"\xff\xfe\x00\x01", "prompt", "hi"]]
41
+ result = parse_ft_search_response(raw)
42
+ assert len(result) == 1
43
+ assert "embedding" not in result[0]["fields"]
44
+ assert result[0]["fields"]["prompt"] == "hi"
45
+
46
+
47
+ def test_extracts_score_from_two_results():
48
+ raw = [
49
+ "2",
50
+ "sc:entry:111",
51
+ ["prompt", "q1", "response", "a1", "__score", "0.0234", "model", "gpt-4o"],
52
+ "sc:entry:222",
53
+ ["prompt", "q2", "response", "a2", "__score", "0.1500", "model", "gpt-4o"],
54
+ ]
55
+ result = parse_ft_search_response(raw)
56
+ assert len(result) == 2
57
+ assert abs(float(result[0]["fields"]["__score"]) - 0.0234) < 1e-4
58
+ assert abs(float(result[1]["fields"]["__score"]) - 0.15) < 1e-4
59
+
60
+
61
+ def test_malformed_odd_length_field_list_skips_orphan():
62
+ raw = ["1", "key1", ["field1", "val1", "orphan"]]
63
+ result = parse_ft_search_response(raw)
64
+ assert len(result) == 1
65
+ assert result[0]["fields"]["field1"] == "val1"
66
+ assert len(result[0]["fields"]) == 1
67
+
68
+
69
+ def test_two_result_response():
70
+ raw = ["2", "key:a", ["f1", "v1"], "key:b", ["f2", "v2"]]
71
+ result = parse_ft_search_response(raw)
72
+ assert len(result) == 2
73
+ assert result[0]["key"] == "key:a"
74
+ assert result[0]["fields"]["f1"] == "v1"
75
+ assert result[1]["key"] == "key:b"
76
+ assert result[1]["fields"]["f2"] == "v2"
77
+
78
+
79
+ def test_return_zero_mode_keys_without_field_list():
80
+ raw = ["2", "key:a", "key:b"]
81
+ result = parse_ft_search_response(raw)
82
+ assert len(result) == 2
83
+ assert result[0] == {"key": "key:a", "fields": {}}
84
+ assert result[1] == {"key": "key:b", "fields": {}}
85
+
86
+
87
+ def test_parses_a_float_formatted_total():
88
+ # A RESP3 double may surface the total as "2.0"; match TS parseInt and
89
+ # still return the hits instead of collapsing to [].
90
+ raw = ["2.0", "key:a", ["f1", "v1"], "key:b", ["f2", "v2"]]
91
+ result = parse_ft_search_response(raw)
92
+ assert len(result) == 2
93
+ assert result[0]["key"] == "key:a"
94
+ assert result[1]["key"] == "key:b"
95
+
96
+
97
+ def test_parses_a_float_formatted_total_in_bytes():
98
+ raw = [b"1", b"key:a", [b"f1", b"v1"]]
99
+ raw[0] = b"1.0"
100
+ result = parse_ft_search_response(raw)
101
+ assert len(result) == 1
102
+ assert result[0]["key"] == "key:a"
103
+
104
+
105
+ def test_never_raises_on_garbage():
106
+ assert parse_ft_search_response("garbage") == []
107
+ assert parse_ft_search_response(123) == []
@@ -0,0 +1,21 @@
1
+ from betterdb_valkey_search_kit import escape_tag
2
+
3
+
4
+ def test_escapes_tag_punctuation():
5
+ assert escape_tag("a,b") == "a\\,b"
6
+ assert escape_tag("a.b") == "a\\.b"
7
+ assert escape_tag("a{b}") == "a\\{b\\}"
8
+ assert escape_tag("a|b") == "a\\|b"
9
+
10
+
11
+ def test_escapes_spaces_to_prevent_or_semantics():
12
+ assert escape_tag("gpt 4o") == "gpt\\ 4o"
13
+
14
+
15
+ def test_escapes_hyphens_and_slashes():
16
+ assert escape_tag("gpt-4o") == "gpt\\-4o"
17
+ assert escape_tag("a/b\\c") == "a\\/b\\\\c"
18
+
19
+
20
+ def test_leaves_alphanumerics_and_underscores_untouched():
21
+ assert escape_tag("model_v2") == "model_v2"