cachekit 0.2.3__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cachekit-0.2.3 → cachekit-0.3.1}/Cargo.lock +3 -3
- {cachekit-0.2.3 → cachekit-0.3.1}/PKG-INFO +4 -4
- {cachekit-0.2.3 → cachekit-0.3.1}/pyproject.toml +1 -1
- {cachekit-0.2.3 → cachekit-0.3.1}/rust/Cargo.toml +1 -1
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/__init__.py +1 -1
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/decorator.py +6 -1
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/wrapper.py +27 -5
- cachekit-0.3.1/src/cachekit/key_generator.py +348 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/logging.py +2 -1
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/__init__.py +51 -14
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/auto_serializer.py +13 -0
- cachekit-0.2.3/src/cachekit/key_generator.py +0 -158
- {cachekit-0.2.3 → cachekit-0.3.1}/Cargo.toml +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/LICENSE +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/README.md +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/rust/Makefile +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/rust/README.md +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/rust/TEST_EXPANSION_SUMMARY.md +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/rust/src/lib.rs +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/rust/src/python_bindings.rs +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/rust/supply-chain/audits.toml +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/rust/supply-chain/config.toml +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/rust/supply-chain/imports.lock +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/rust/tsan_suppressions.txt +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/__init__.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/base.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/errors.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/file/__init__.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/file/backend.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/file/config.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/provider.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/__init__.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/backend.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/client.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/config.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/error_handler.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/provider.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/cache_handler.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/__init__.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/nested.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/settings.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/singleton.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/validation.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/__init__.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/intent.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/main.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/orchestrator.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/session.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/stats_context.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/tenant_context.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/utils/__init__.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/di.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/hash_utils.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/health.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/hiredis_compat.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/imports.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/invalidation/__init__.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/invalidation/channel.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/invalidation/event.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/invalidation/redis_channel.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/l1_cache.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/monitoring/__init__.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/monitoring/correlation_tracking.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/monitoring/pool_monitor.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/monitoring/protocols.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/py.typed +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/__init__.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/adaptive_timeout.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/async_metrics.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/circuit_breaker.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/error_classification.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/load_control.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/metrics_collection.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/profiles.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/arrow_serializer.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/base.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/encryption_wrapper.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/orjson_serializer.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/standard_serializer.py +0 -0
- {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/wrapper.py +0 -0
|
@@ -202,9 +202,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
|
|
202
202
|
|
|
203
203
|
[[package]]
|
|
204
204
|
name = "bytes"
|
|
205
|
-
version = "1.
|
|
205
|
+
version = "1.11.1"
|
|
206
206
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
207
|
-
checksum = "
|
|
207
|
+
checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
|
|
208
208
|
|
|
209
209
|
[[package]]
|
|
210
210
|
name = "cachekit-core"
|
|
@@ -231,7 +231,7 @@ dependencies = [
|
|
|
231
231
|
|
|
232
232
|
[[package]]
|
|
233
233
|
name = "cachekit-rs"
|
|
234
|
-
version = "0.
|
|
234
|
+
version = "0.3.1"
|
|
235
235
|
dependencies = [
|
|
236
236
|
"cachekit-core",
|
|
237
237
|
"criterion",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cachekit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Classifier: Development Status :: 3 - Alpha
|
|
5
5
|
Classifier: Intended Audience :: Developers
|
|
6
6
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -43,11 +43,11 @@ Maintainer-email: cachekit Contributors <noreply@cachekit.io>
|
|
|
43
43
|
License: MIT
|
|
44
44
|
Requires-Python: >=3.9
|
|
45
45
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
46
|
-
Project-URL:
|
|
46
|
+
Project-URL: Changelog, https://github.com/cachekit-io/cachekit-py/blob/main/CHANGELOG.md
|
|
47
47
|
Project-URL: Documentation, https://github.com/cachekit-io/cachekit-py#readme
|
|
48
|
-
Project-URL:
|
|
48
|
+
Project-URL: Homepage, https://github.com/cachekit-io/cachekit-py
|
|
49
49
|
Project-URL: Issues, https://github.com/cachekit-io/cachekit-py/issues
|
|
50
|
-
Project-URL:
|
|
50
|
+
Project-URL: Repository, https://github.com/cachekit-io/cachekit-py.git
|
|
51
51
|
|
|
52
52
|
<div align="center">
|
|
53
53
|
|
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "cachekit"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.1"
|
|
8
8
|
description = "Production-ready Redis caching for Python with intelligent reliability features and Rust-powered performance"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -172,6 +172,9 @@ class DecoratorConfig:
|
|
|
172
172
|
integrity_checking: Enable checksums for corruption detection (default: True)
|
|
173
173
|
All serializers use xxHash3-64 (8 bytes).
|
|
174
174
|
Set to False for @cache.minimal (speed-first, no integrity guarantee)
|
|
175
|
+
key: Custom key function for complex types. Receives (*args, **kwargs) and returns str.
|
|
176
|
+
Use for numpy arrays, DataFrames, or cross-language cache sharing.
|
|
177
|
+
Example: @cache(key=lambda arr: hashlib.blake2b(arr.tobytes()).hexdigest())
|
|
175
178
|
refresh_ttl_on_get: Extend TTL on cache hit
|
|
176
179
|
ttl_refresh_threshold: Minimum remaining TTL fraction (0.0-1.0) to trigger refresh
|
|
177
180
|
backend: L2 backend (RedisBackend, HTTPBackend, None for L1-only)
|
|
@@ -183,12 +186,13 @@ class DecoratorConfig:
|
|
|
183
186
|
encryption: Client-side encryption configuration
|
|
184
187
|
"""
|
|
185
188
|
|
|
186
|
-
# Core settings (
|
|
189
|
+
# Core settings (6 fields)
|
|
187
190
|
ttl: int | None = None
|
|
188
191
|
namespace: str | None = None
|
|
189
192
|
serializer: Union[str, SerializerProtocol] = "default" # type: ignore[assignment] # String name or protocol instance
|
|
190
193
|
safe_mode: bool = False
|
|
191
194
|
integrity_checking: bool = True # Checksums for corruption detection (xxHash3-64 for all serializers)
|
|
195
|
+
key: Callable[..., str] | None = None # Custom key function (escape hatch for complex types)
|
|
192
196
|
|
|
193
197
|
# Performance (2 fields)
|
|
194
198
|
refresh_ttl_on_get: bool = False
|
|
@@ -251,6 +255,7 @@ class DecoratorConfig:
|
|
|
251
255
|
"namespace": self.namespace,
|
|
252
256
|
"serializer": self.serializer,
|
|
253
257
|
"safe_mode": self.safe_mode,
|
|
258
|
+
"key": self.key,
|
|
254
259
|
"refresh_ttl_on_get": self.refresh_ttl_on_get,
|
|
255
260
|
"ttl_refresh_threshold": self.ttl_refresh_threshold,
|
|
256
261
|
"backend": self.backend,
|
|
@@ -412,6 +412,15 @@ def create_cache_wrapper(
|
|
|
412
412
|
deployment_uuid = config.encryption.deployment_uuid
|
|
413
413
|
master_key = config.encryption.master_key
|
|
414
414
|
|
|
415
|
+
# Custom key function (escape hatch for complex types)
|
|
416
|
+
custom_key_func = config.key
|
|
417
|
+
else:
|
|
418
|
+
custom_key_func = None
|
|
419
|
+
|
|
420
|
+
# Re-scope custom_key_func for closure
|
|
421
|
+
if "custom_key_func" not in dir():
|
|
422
|
+
custom_key_func = None
|
|
423
|
+
|
|
415
424
|
# Fast mode: Disable monitoring overhead, keep performance features
|
|
416
425
|
use_circuit_breaker = circuit_breaker and not fast_mode
|
|
417
426
|
use_adaptive_timeout = adaptive_timeout and not fast_mode
|
|
@@ -541,7 +550,13 @@ def create_cache_wrapper(
|
|
|
541
550
|
|
|
542
551
|
# Key generation - needed for both L1-only and L1+L2 modes
|
|
543
552
|
try:
|
|
544
|
-
|
|
553
|
+
# Custom key function takes priority (escape hatch for complex types)
|
|
554
|
+
if custom_key_func is not None:
|
|
555
|
+
custom_key = custom_key_func(*args, **kwargs)
|
|
556
|
+
if not isinstance(custom_key, str):
|
|
557
|
+
raise TypeError(f"key function must return str, got {type(custom_key).__name__}")
|
|
558
|
+
cache_key = f"{namespace or 'default'}:{custom_key}"
|
|
559
|
+
elif fast_mode:
|
|
545
560
|
# Minimal key generation - no string formatting overhead
|
|
546
561
|
from ..hash_utils import cache_key_hash
|
|
547
562
|
|
|
@@ -878,12 +893,17 @@ def create_cache_wrapper(
|
|
|
878
893
|
cache_key = None
|
|
879
894
|
func_start_time: float | None = None # Initialize for exception handlers
|
|
880
895
|
try:
|
|
881
|
-
#
|
|
882
|
-
if
|
|
896
|
+
# Custom key function takes priority (escape hatch for complex types)
|
|
897
|
+
if custom_key_func is not None:
|
|
898
|
+
custom_key = custom_key_func(*args, **kwargs)
|
|
899
|
+
if not isinstance(custom_key, str):
|
|
900
|
+
raise TypeError(f"key function must return str, got {type(custom_key).__name__}")
|
|
901
|
+
cache_key = f"{namespace or 'default'}:{custom_key}"
|
|
902
|
+
elif fast_mode:
|
|
883
903
|
# Ultra-fast key generation for hot paths (10-50μs savings)
|
|
884
904
|
from ..hash_utils import cache_key_hash
|
|
885
905
|
|
|
886
|
-
cache_namespace = namespace or
|
|
906
|
+
cache_namespace = namespace or "default"
|
|
887
907
|
args_kwargs_str = str(args) + str(kwargs)
|
|
888
908
|
cache_key = cache_namespace + ":" + func_hash + ":" + cache_key_hash(args_kwargs_str)
|
|
889
909
|
else:
|
|
@@ -1372,7 +1392,9 @@ def create_cache_wrapper(
|
|
|
1372
1392
|
"""Clear cache statistics and invalidate all cached entries."""
|
|
1373
1393
|
_stats.clear()
|
|
1374
1394
|
# Also invalidate actual cache entries
|
|
1375
|
-
|
|
1395
|
+
if inspect.iscoroutinefunction(func):
|
|
1396
|
+
raise TypeError("cache_clear() cannot clear cache for async functions. Use 'await fn.ainvalidate_cache()' instead.")
|
|
1397
|
+
invalidate_cache()
|
|
1376
1398
|
|
|
1377
1399
|
if inspect.iscoroutinefunction(func):
|
|
1378
1400
|
async_wrapper.invalidate_cache = ainvalidate_cache # type: ignore[attr-defined]
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""Cache key generation functionality."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import sys
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from decimal import Decimal
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from pathlib import Path, PurePath
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Callable, NoReturn, cast
|
|
12
|
+
from uuid import UUID
|
|
13
|
+
|
|
14
|
+
import msgpack
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
# Constants for constrained array support (per round-table review 2025-12-18)
|
|
20
|
+
ARRAY_MAX_BYTES = 100_000 # 100KB per array
|
|
21
|
+
ARRAY_AGGREGATE_MAX = 5_000_000 # 5MB total across all args
|
|
22
|
+
SUPPORTED_ARRAY_DTYPES = {"int32", "int64", "float32", "float64"}
|
|
23
|
+
DTYPE_MAP = {"int32": "i32", "int64": "i64", "float32": "f32", "float64": "f64"}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CacheKeyGenerator:
|
|
27
|
+
"""Generates consistent cache keys from function calls.
|
|
28
|
+
|
|
29
|
+
Uses MessagePack + Blake2b-256 for cross-language compatibility.
|
|
30
|
+
Implements protocol-v1.0.md Section 3.3 (MessagePack-based approach).
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
# Key length constants
|
|
34
|
+
MAX_KEY_LENGTH = 250 # Practical cache key length limit (Redis, Memcached, etc.)
|
|
35
|
+
KEY_PREFIX_LENGTH = 50 # Length of prefix to keep when shortening keys
|
|
36
|
+
|
|
37
|
+
# Serializer codes for compact metadata encoding (1 char each)
|
|
38
|
+
SERIALIZER_CODES = {
|
|
39
|
+
"std": "s", # StandardSerializer (multi-language MessagePack)
|
|
40
|
+
"auto": "a", # AutoSerializer (Python-specific, NumPy/pandas)
|
|
41
|
+
"orjson": "o", # OrjsonSerializer (JSON-based)
|
|
42
|
+
"arrow": "w", # ArrowSerializer (columnar format, w=arroW)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
def __init__(self):
|
|
46
|
+
"""Initialize the key generator.
|
|
47
|
+
|
|
48
|
+
Uses MessagePack + Blake2b-256 per protocol-v1.0.md Section 3.3.
|
|
49
|
+
"""
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
def generate_key(
|
|
53
|
+
self,
|
|
54
|
+
func: Callable[..., Any],
|
|
55
|
+
args: tuple[Any, ...],
|
|
56
|
+
kwargs: dict[str, Any],
|
|
57
|
+
namespace: str | None = None,
|
|
58
|
+
integrity_checking: bool = True,
|
|
59
|
+
serializer_type: str = "std",
|
|
60
|
+
) -> str:
|
|
61
|
+
"""Generate a cache key from function and arguments.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
func: The function being cached
|
|
65
|
+
args: Positional arguments passed to the function
|
|
66
|
+
kwargs: Keyword arguments passed to the function
|
|
67
|
+
namespace: Optional namespace prefix for the key
|
|
68
|
+
integrity_checking: Whether integrity checking is enabled (ByteStorage vs plain MessagePack)
|
|
69
|
+
serializer_type: Serializer type code ("std", "auto", "orjson", "arrow")
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
A consistent string key for caching
|
|
73
|
+
|
|
74
|
+
Note:
|
|
75
|
+
Uses compact metadata suffix format: :<ic><serializer_code>
|
|
76
|
+
Example: ":1s" = integrity_checking=True, serializer=StandardSerializer
|
|
77
|
+
"""
|
|
78
|
+
# Build key components efficiently (avoid f-strings in hot path)
|
|
79
|
+
key_parts = []
|
|
80
|
+
|
|
81
|
+
# Add namespace if provided
|
|
82
|
+
if namespace:
|
|
83
|
+
key_parts.extend(["ns:", namespace, ":"])
|
|
84
|
+
|
|
85
|
+
# Add function identifier (module + name) - single string operation
|
|
86
|
+
key_parts.extend(["func:", func.__module__, ".", func.__qualname__, ":"])
|
|
87
|
+
|
|
88
|
+
# Generate args hash using Blake2b-256
|
|
89
|
+
args_hash = self._blake2b_hash(args, kwargs)
|
|
90
|
+
|
|
91
|
+
key_parts.extend(["args:", args_hash, ":"])
|
|
92
|
+
|
|
93
|
+
# Add compact metadata suffix: :<ic><serializer_code>
|
|
94
|
+
# Example: ":1s" = integrity_checking=True, serializer=std
|
|
95
|
+
ic_flag = "1" if integrity_checking else "0"
|
|
96
|
+
serializer_code = self.SERIALIZER_CODES.get(serializer_type, "s") # Default to "s" if unknown
|
|
97
|
+
key_parts.extend([ic_flag, serializer_code])
|
|
98
|
+
|
|
99
|
+
# Single join operation reduces string allocations
|
|
100
|
+
key = "".join(key_parts)
|
|
101
|
+
|
|
102
|
+
# Ensure key is within practical limits and contains no problematic characters
|
|
103
|
+
return self._normalize_key(key)
|
|
104
|
+
|
|
105
|
+
def _blake2b_hash(self, args: tuple, kwargs: dict) -> str:
|
|
106
|
+
"""Generate hash using MessagePack + Blake2b-256.
|
|
107
|
+
|
|
108
|
+
Blake2b-256 (32 bytes = 64 hex chars) for collision resistance.
|
|
109
|
+
MessagePack ensures cross-language compatibility.
|
|
110
|
+
|
|
111
|
+
Raises:
|
|
112
|
+
TypeError: If args/kwargs contain unsupported types (custom objects, numpy arrays, etc.)
|
|
113
|
+
"""
|
|
114
|
+
# Track aggregate array bytes for DoS prevention
|
|
115
|
+
array_bytes_seen: list[int] = [0]
|
|
116
|
+
|
|
117
|
+
# Step 1: Normalize recursively
|
|
118
|
+
normalized_args = [self._normalize(arg, array_bytes_seen) for arg in args]
|
|
119
|
+
normalized_kwargs = {k: self._normalize(v, array_bytes_seen) for k, v in sorted(kwargs.items())}
|
|
120
|
+
|
|
121
|
+
# Step 2: Serialize with MessagePack
|
|
122
|
+
try:
|
|
123
|
+
msgpack_bytes = cast(
|
|
124
|
+
bytes, msgpack.packb([normalized_args, normalized_kwargs], use_bin_type=True, strict_types=True)
|
|
125
|
+
)
|
|
126
|
+
except TypeError as e:
|
|
127
|
+
# Wrap msgpack's TypeError with a more descriptive message
|
|
128
|
+
raise TypeError(f"Unsupported type for cache key generation: {e}") from e
|
|
129
|
+
|
|
130
|
+
# Step 3: Hash with Blake2b-256
|
|
131
|
+
return hashlib.blake2b(msgpack_bytes, digest_size=32).hexdigest()
|
|
132
|
+
|
|
133
|
+
def _normalize(self, obj: Any, _array_bytes_seen: list[int] | None = None) -> Any:
|
|
134
|
+
"""Normalize object for deterministic MessagePack encoding.
|
|
135
|
+
|
|
136
|
+
CRITICAL: Cross-language compatible types ONLY per Protocol v1.1.
|
|
137
|
+
|
|
138
|
+
Supported types (per round-table review 2025-12-18):
|
|
139
|
+
- Primitives: int, str, bytes, bool, None, float
|
|
140
|
+
- Collections: dict (sorted keys), list, tuple
|
|
141
|
+
- Extended: Path, UUID, Decimal, Enum, datetime (UTC only)
|
|
142
|
+
- Arrays: numpy.ndarray (1D, ≤100KB, i32/i64/f32/f64)
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
obj: Object to normalize
|
|
146
|
+
_array_bytes_seen: Internal tracker for aggregate array size (DoS prevention)
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Normalized object safe for MessagePack serialization
|
|
150
|
+
|
|
151
|
+
Raises:
|
|
152
|
+
TypeError: For unsupported types with helpful guidance
|
|
153
|
+
"""
|
|
154
|
+
# Initialize aggregate tracker if not provided
|
|
155
|
+
if _array_bytes_seen is None:
|
|
156
|
+
_array_bytes_seen = [0]
|
|
157
|
+
|
|
158
|
+
# === COLLECTIONS (recursive) ===
|
|
159
|
+
if isinstance(obj, dict):
|
|
160
|
+
return {k: self._normalize(v, _array_bytes_seen) for k, v in sorted(obj.items())}
|
|
161
|
+
|
|
162
|
+
if isinstance(obj, (list, tuple)):
|
|
163
|
+
return [self._normalize(x, _array_bytes_seen) for x in obj]
|
|
164
|
+
|
|
165
|
+
# === FLOAT (cross-language compat) ===
|
|
166
|
+
if isinstance(obj, float):
|
|
167
|
+
# CRITICAL: Normalize -0.0 → 0.0 for cross-language compatibility
|
|
168
|
+
return 0.0 if obj == 0.0 else obj
|
|
169
|
+
|
|
170
|
+
# === EXTENDED TYPES ===
|
|
171
|
+
|
|
172
|
+
# Path: normalize to POSIX format for cross-platform consistency
|
|
173
|
+
if isinstance(obj, (Path, PurePath)):
|
|
174
|
+
return obj.as_posix()
|
|
175
|
+
|
|
176
|
+
# UUID: standard string format
|
|
177
|
+
if isinstance(obj, UUID):
|
|
178
|
+
return str(obj)
|
|
179
|
+
|
|
180
|
+
# Decimal: exact string representation
|
|
181
|
+
if isinstance(obj, Decimal):
|
|
182
|
+
return str(obj)
|
|
183
|
+
|
|
184
|
+
# Enum: use value (recursively normalize in case value is complex)
|
|
185
|
+
if isinstance(obj, Enum):
|
|
186
|
+
return self._normalize(obj.value, _array_bytes_seen)
|
|
187
|
+
|
|
188
|
+
# datetime: UTC only, reject naive datetimes
|
|
189
|
+
if isinstance(obj, datetime):
|
|
190
|
+
if obj.tzinfo is None:
|
|
191
|
+
raise TypeError(
|
|
192
|
+
"Naive datetime not allowed in cache keys (timezone ambiguity). "
|
|
193
|
+
"Use timezone-aware datetime: datetime(..., tzinfo=timezone.utc)"
|
|
194
|
+
)
|
|
195
|
+
return obj.isoformat()
|
|
196
|
+
|
|
197
|
+
# === NUMPY ARRAY (constrained support) ===
|
|
198
|
+
if self._is_numpy_array(obj):
|
|
199
|
+
return self._normalize_array(obj, _array_bytes_seen)
|
|
200
|
+
|
|
201
|
+
# === PRIMITIVES (pass through) ===
|
|
202
|
+
if isinstance(obj, (int, str, bytes, bool, type(None))):
|
|
203
|
+
return obj
|
|
204
|
+
|
|
205
|
+
# === UNSUPPORTED: Fail fast with helpful message ===
|
|
206
|
+
return self._raise_unsupported_type(obj)
|
|
207
|
+
|
|
208
|
+
def _is_numpy_array(self, obj: Any) -> bool:
|
|
209
|
+
"""Check if object is numpy array without importing numpy."""
|
|
210
|
+
return type(obj).__module__ == "numpy" and type(obj).__name__ == "ndarray"
|
|
211
|
+
|
|
212
|
+
def _normalize_array(self, arr: Any, _array_bytes_seen: list[int]) -> list[Any]:
|
|
213
|
+
"""Normalize numpy array with strict constraints.
|
|
214
|
+
|
|
215
|
+
Constraints (per round-table review 2025-12-18):
|
|
216
|
+
- 1D only (cross-language simplicity)
|
|
217
|
+
- ≤100KB (memory safety)
|
|
218
|
+
- 4 dtypes: i32, i64, f32, f64 (cross-language compatibility)
|
|
219
|
+
- Little-endian byte order (platform determinism)
|
|
220
|
+
- 256-bit Blake2b hash (collision resistance)
|
|
221
|
+
- Version prefix for future protocol changes
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
arr: numpy.ndarray to normalize
|
|
225
|
+
_array_bytes_seen: Aggregate byte counter for DoS prevention
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
List of ["__array_v1__", shape_list, dtype_str, content_hash]
|
|
229
|
+
(list format for MessagePack compatibility with strict_types=True)
|
|
230
|
+
|
|
231
|
+
Raises:
|
|
232
|
+
TypeError: If array doesn't meet constraints
|
|
233
|
+
"""
|
|
234
|
+
import numpy as np
|
|
235
|
+
|
|
236
|
+
# Constraint 1: Size limit per array
|
|
237
|
+
if arr.nbytes > ARRAY_MAX_BYTES:
|
|
238
|
+
raise TypeError(
|
|
239
|
+
f"Array too large ({arr.nbytes:,} bytes, max {ARRAY_MAX_BYTES:,}). Use key= parameter for large arrays."
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Constraint 2: Aggregate size limit (DoS prevention)
|
|
243
|
+
_array_bytes_seen[0] += arr.nbytes
|
|
244
|
+
if _array_bytes_seen[0] > ARRAY_AGGREGATE_MAX:
|
|
245
|
+
raise TypeError(
|
|
246
|
+
f"Total array size exceeds {ARRAY_AGGREGATE_MAX:,} bytes. Use key= parameter for batch array operations."
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Constraint 3: 1D only
|
|
250
|
+
if arr.ndim != 1:
|
|
251
|
+
raise TypeError(
|
|
252
|
+
f"Only 1D arrays supported in cache keys (got {arr.ndim}D). "
|
|
253
|
+
f"Use key= parameter for multidimensional arrays, or flatten with arr.ravel()."
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Constraint 4: Supported dtypes only
|
|
257
|
+
dtype_name = arr.dtype.name
|
|
258
|
+
if dtype_name not in SUPPORTED_ARRAY_DTYPES:
|
|
259
|
+
raise TypeError(
|
|
260
|
+
f"Unsupported array dtype '{dtype_name}'. "
|
|
261
|
+
f"Supported: {', '.join(sorted(SUPPORTED_ARRAY_DTYPES))}. "
|
|
262
|
+
f"Cast with arr.astype(np.float64) or use key= parameter."
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
# Ensure C-contiguous memory layout
|
|
266
|
+
arr = np.ascontiguousarray(arr)
|
|
267
|
+
|
|
268
|
+
# Force little-endian byte order for cross-platform determinism
|
|
269
|
+
if arr.dtype.byteorder not in ("=", "<", "|"):
|
|
270
|
+
arr = arr.astype(arr.dtype.newbyteorder("<"))
|
|
271
|
+
elif arr.dtype.byteorder == "=" and sys.byteorder == "big":
|
|
272
|
+
arr = arr.byteswap().newbyteorder("<")
|
|
273
|
+
|
|
274
|
+
# 256-bit Blake2b hash (per security review)
|
|
275
|
+
content_hash = hashlib.blake2b(arr.tobytes(), digest_size=32).hexdigest()
|
|
276
|
+
|
|
277
|
+
# Standardized dtype string for cross-language compatibility
|
|
278
|
+
dtype_str = DTYPE_MAP[dtype_name]
|
|
279
|
+
|
|
280
|
+
# Version prefix for protocol evolution
|
|
281
|
+
# Return as list (not tuple) for MessagePack compatibility with strict_types=True
|
|
282
|
+
# Shape converted to list as well
|
|
283
|
+
return ["__array_v1__", list(arr.shape), dtype_str, content_hash]
|
|
284
|
+
|
|
285
|
+
def _raise_unsupported_type(self, obj: Any) -> NoReturn:
|
|
286
|
+
"""Raise helpful TypeError for unsupported types.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
obj: The unsupported object
|
|
290
|
+
|
|
291
|
+
Raises:
|
|
292
|
+
TypeError: Always, with guidance on how to handle the type
|
|
293
|
+
"""
|
|
294
|
+
type_name = type(obj).__module__ + "." + type(obj).__qualname__
|
|
295
|
+
|
|
296
|
+
# Specific guidance for numpy arrays that don't meet constraints
|
|
297
|
+
if "numpy" in type_name and "ndarray" in type_name:
|
|
298
|
+
raise TypeError(
|
|
299
|
+
"numpy array doesn't meet cache key constraints. "
|
|
300
|
+
"Requirements: 1D, ≤100KB, dtype in (i32, i64, f32, f64). "
|
|
301
|
+
"Use key= parameter for other arrays."
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
if "pandas" in type_name:
|
|
305
|
+
raise TypeError(
|
|
306
|
+
"pandas objects not supported as cache key arguments "
|
|
307
|
+
"(Parquet serialization is non-deterministic). "
|
|
308
|
+
"Recommended patterns:\n"
|
|
309
|
+
" 1. Pass identifier, return DataFrame: @cache def load(id: int) -> pd.DataFrame\n"
|
|
310
|
+
" 2. Use explicit key: @cache(key=lambda df: hashlib.blake2b(df.to_parquet()).hexdigest())"
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
if isinstance(obj, (set, frozenset)):
|
|
314
|
+
raise TypeError(
|
|
315
|
+
"set/frozenset not supported in cache keys (mixed-type sorting crashes). "
|
|
316
|
+
"Convert to sorted list: sorted(list(your_set))"
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
raise TypeError(
|
|
320
|
+
f"Unsupported type '{type_name}' for cache key. "
|
|
321
|
+
f"Supported: dict, list, tuple, int, float, str, bytes, bool, None, "
|
|
322
|
+
f"Path, UUID, Decimal, Enum, datetime (UTC), 1D numpy arrays (≤100KB, i32/i64/f32/f64). "
|
|
323
|
+
f"For custom types, use key= parameter."
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
def _normalize_key(self, key: str) -> str:
|
|
327
|
+
"""Normalize key to ensure it's valid for cache backends.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
key: Raw cache key
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Normalized key safe for cache backends (Redis, Memcached, etc.)
|
|
334
|
+
"""
|
|
335
|
+
# Replace problematic characters
|
|
336
|
+
normalized = key.replace(" ", "_").replace("\n", "_").replace("\r", "_")
|
|
337
|
+
|
|
338
|
+
# Ensure key length is within practical limits for cache backends
|
|
339
|
+
if len(normalized) > self.MAX_KEY_LENGTH:
|
|
340
|
+
# If too long, hash the key to get consistent shorter version
|
|
341
|
+
# Use Blake2b-256 (32 bytes) for consistency
|
|
342
|
+
key_hash = hashlib.blake2b(normalized.encode("utf-8"), digest_size=32).hexdigest()
|
|
343
|
+
|
|
344
|
+
# Keep first part of original key for readability + hash
|
|
345
|
+
prefix = normalized[: self.KEY_PREFIX_LENGTH] if len(normalized) > self.KEY_PREFIX_LENGTH else normalized
|
|
346
|
+
normalized = f"{prefix}:{key_hash[:32]}"
|
|
347
|
+
|
|
348
|
+
return normalized
|
|
@@ -7,6 +7,7 @@ that reduces overhead from 570% to <5% while maintaining functionality.
|
|
|
7
7
|
import json
|
|
8
8
|
import logging
|
|
9
9
|
import os
|
|
10
|
+
import platform
|
|
10
11
|
import random
|
|
11
12
|
import threading
|
|
12
13
|
import time
|
|
@@ -170,7 +171,7 @@ class UltraOptimizedStructuredLogger:
|
|
|
170
171
|
|
|
171
172
|
# Pre-computed values for performance
|
|
172
173
|
self._sampling_threshold = int(SAMPLING_RATE * 100)
|
|
173
|
-
self._hostname =
|
|
174
|
+
self._hostname = platform.node()
|
|
174
175
|
self._pid = os.getpid()
|
|
175
176
|
|
|
176
177
|
# PII patterns to mask (pre-compiled for speed)
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from threading import Lock
|
|
3
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
4
6
|
|
|
5
7
|
from cachekit._rust_serializer import ByteStorage
|
|
6
8
|
|
|
7
|
-
from .arrow_serializer import ArrowSerializer
|
|
8
9
|
from .auto_serializer import AutoSerializer
|
|
9
10
|
from .base import (
|
|
10
11
|
SerializationError,
|
|
@@ -16,8 +17,25 @@ from .encryption_wrapper import EncryptionWrapper
|
|
|
16
17
|
from .orjson_serializer import OrjsonSerializer
|
|
17
18
|
from .standard_serializer import StandardSerializer
|
|
18
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from .arrow_serializer import ArrowSerializer
|
|
22
|
+
|
|
19
23
|
logger = logging.getLogger(__name__)
|
|
20
24
|
|
|
25
|
+
# Lazy import for optional ArrowSerializer (requires pyarrow from [data] extra)
|
|
26
|
+
_ArrowSerializer: type | None = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _get_arrow_serializer() -> type:
|
|
30
|
+
"""Lazy-load ArrowSerializer. Raises ImportError if pyarrow not installed."""
|
|
31
|
+
global _ArrowSerializer
|
|
32
|
+
if _ArrowSerializer is None:
|
|
33
|
+
from .arrow_serializer import ArrowSerializer
|
|
34
|
+
|
|
35
|
+
_ArrowSerializer = ArrowSerializer
|
|
36
|
+
return _ArrowSerializer
|
|
37
|
+
|
|
38
|
+
|
|
21
39
|
# Validate ByteStorage works correctly
|
|
22
40
|
test_storage = ByteStorage("msgpack")
|
|
23
41
|
test_data = b"test validation data"
|
|
@@ -36,7 +54,7 @@ SERIALIZER_REGISTRY = {
|
|
|
36
54
|
"auto": AutoSerializer, # Python-specific types (NumPy, pandas, datetime optimization)
|
|
37
55
|
"default": StandardSerializer, # Language-agnostic MessagePack for multi-language caches
|
|
38
56
|
"std": StandardSerializer, # Explicit StandardSerializer alias
|
|
39
|
-
"arrow":
|
|
57
|
+
"arrow": None, # Lazy-loaded: requires pyarrow from [data] extra
|
|
40
58
|
"orjson": OrjsonSerializer,
|
|
41
59
|
"encrypted": EncryptionWrapper, # AutoSerializer + AES-256-GCM encryption
|
|
42
60
|
}
|
|
@@ -96,8 +114,13 @@ def get_serializer(name: str, enable_integrity_checking: bool = True) -> Seriali
|
|
|
96
114
|
f"@cache(serializer=MySerializer())"
|
|
97
115
|
)
|
|
98
116
|
|
|
117
|
+
# Get serializer class (lazy-load arrow if needed)
|
|
118
|
+
if name == "arrow":
|
|
119
|
+
serializer_class = _get_arrow_serializer()
|
|
120
|
+
else:
|
|
121
|
+
serializer_class = SERIALIZER_REGISTRY[name]
|
|
122
|
+
|
|
99
123
|
# Instantiate with integrity checking configuration
|
|
100
|
-
serializer_class = SERIALIZER_REGISTRY[name]
|
|
101
124
|
if name in ("default", "std", "auto", "arrow", "orjson"):
|
|
102
125
|
# All core serializers use enable_integrity_checking parameter
|
|
103
126
|
serializer = serializer_class(enable_integrity_checking=enable_integrity_checking)
|
|
@@ -167,9 +190,9 @@ def get_available_serializers() -> dict[str, Any]:
|
|
|
167
190
|
def benchmark_serializers() -> dict[str, Any]:
|
|
168
191
|
"""Get instantiated serializers for benchmarking."""
|
|
169
192
|
serializers = {}
|
|
170
|
-
for name
|
|
193
|
+
for name in SERIALIZER_REGISTRY:
|
|
171
194
|
try:
|
|
172
|
-
serializers[name] =
|
|
195
|
+
serializers[name] = get_serializer(name)
|
|
173
196
|
except Exception as e:
|
|
174
197
|
logger.warning(f"Failed to instantiate {name} serializer: {e}")
|
|
175
198
|
return serializers
|
|
@@ -178,28 +201,42 @@ def benchmark_serializers() -> dict[str, Any]:
|
|
|
178
201
|
def get_serializer_info() -> dict[str, dict[str, Any]]:
|
|
179
202
|
"""Get information about available serializers."""
|
|
180
203
|
info = {}
|
|
181
|
-
for name
|
|
204
|
+
for name in SERIALIZER_REGISTRY:
|
|
182
205
|
try:
|
|
183
|
-
instance =
|
|
206
|
+
instance = get_serializer(name)
|
|
184
207
|
info[name] = {
|
|
185
|
-
"class":
|
|
186
|
-
"module":
|
|
208
|
+
"class": type(instance).__name__,
|
|
209
|
+
"module": type(instance).__module__,
|
|
187
210
|
"available": True,
|
|
188
|
-
"description":
|
|
211
|
+
"description": type(instance).__doc__ or "No description available",
|
|
189
212
|
}
|
|
190
213
|
# Add method info if available
|
|
191
214
|
if hasattr(instance, "get_info"):
|
|
192
|
-
info[name].update(instance.get_info())
|
|
215
|
+
info[name].update(instance.get_info()) # type: ignore[attr-defined]
|
|
216
|
+
except ImportError as e:
|
|
217
|
+
info[name] = {
|
|
218
|
+
"class": "ArrowSerializer" if name == "arrow" else "Unknown",
|
|
219
|
+
"module": "cachekit.serializers.arrow_serializer",
|
|
220
|
+
"available": False,
|
|
221
|
+
"error": str(e),
|
|
222
|
+
}
|
|
193
223
|
except Exception as e:
|
|
194
224
|
info[name] = {
|
|
195
|
-
"class":
|
|
196
|
-
"module":
|
|
225
|
+
"class": "Unknown",
|
|
226
|
+
"module": "unknown",
|
|
197
227
|
"available": False,
|
|
198
228
|
"error": str(e),
|
|
199
229
|
}
|
|
200
230
|
return info
|
|
201
231
|
|
|
202
232
|
|
|
233
|
+
def __getattr__(name: str) -> Any:
|
|
234
|
+
"""Lazy attribute access for optional ArrowSerializer."""
|
|
235
|
+
if name == "ArrowSerializer":
|
|
236
|
+
return _get_arrow_serializer()
|
|
237
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
238
|
+
|
|
239
|
+
|
|
203
240
|
# Export the main interface
|
|
204
241
|
__all__ = [
|
|
205
242
|
"ArrowSerializer",
|
|
@@ -131,6 +131,7 @@ def _auto_default(obj: Any) -> Any:
|
|
|
131
131
|
- datetime/date/time → ISO-8601 strings
|
|
132
132
|
- UUID → string representation
|
|
133
133
|
- set/frozenset → list (with type marker for roundtrip)
|
|
134
|
+
- NumPy arrays → dict with binary data, shape, and dtype (nested in dicts/lists)
|
|
134
135
|
|
|
135
136
|
Provides helpful errors for:
|
|
136
137
|
- Pydantic models (suggest .model_dump())
|
|
@@ -162,6 +163,10 @@ def _auto_default(obj: Any) -> Any:
|
|
|
162
163
|
if isinstance(obj, (set, frozenset)):
|
|
163
164
|
return {"__set__": True, "value": list(obj), "frozen": isinstance(obj, frozenset)}
|
|
164
165
|
|
|
166
|
+
# NumPy array support (nested in dicts/lists via msgpack custom encoder)
|
|
167
|
+
if HAS_NUMPY and isinstance(obj, np.ndarray):
|
|
168
|
+
return {"__ndarray__": True, "data": obj.tobytes(), "shape": list(obj.shape), "dtype": str(obj.dtype)}
|
|
169
|
+
|
|
165
170
|
# NEW: Helpful error detection for common unsupported types
|
|
166
171
|
if _safe_hasattr(obj, "model_dump"): # Pydantic BaseModel
|
|
167
172
|
raise TypeError(PYDANTIC_ERROR_MESSAGE)
|
|
@@ -184,6 +189,7 @@ def _auto_object_hook(obj: Any) -> Any:
|
|
|
184
189
|
- datetime/date/time from ISO-8601 strings
|
|
185
190
|
- UUID from string representation
|
|
186
191
|
- set/frozenset from list (type-safe roundtrip)
|
|
192
|
+
- NumPy arrays from binary data with shape and dtype
|
|
187
193
|
|
|
188
194
|
Args:
|
|
189
195
|
obj: Object from MessagePack decoder
|
|
@@ -232,6 +238,13 @@ def _auto_object_hook(obj: Any) -> Any:
|
|
|
232
238
|
else:
|
|
233
239
|
return set(value_list)
|
|
234
240
|
|
|
241
|
+
if obj.get("__ndarray__") is True:
|
|
242
|
+
if not HAS_NUMPY:
|
|
243
|
+
raise SerializationError("Cannot deserialize numpy array: numpy is not installed")
|
|
244
|
+
if "data" not in obj or "shape" not in obj or "dtype" not in obj:
|
|
245
|
+
raise SerializationError("Invalid ndarray format: missing required fields in cached data")
|
|
246
|
+
return np.frombuffer(obj["data"], dtype=obj["dtype"]).reshape(obj["shape"])
|
|
247
|
+
|
|
235
248
|
return obj
|
|
236
249
|
|
|
237
250
|
|
|
@@ -1,158 +0,0 @@
|
|
|
1
|
-
"""Cache key generation functionality."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import hashlib
|
|
6
|
-
from typing import Any, Callable, cast
|
|
7
|
-
|
|
8
|
-
import msgpack
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class CacheKeyGenerator:
|
|
12
|
-
"""Generates consistent cache keys from function calls.
|
|
13
|
-
|
|
14
|
-
Uses MessagePack + Blake2b-256 for cross-language compatibility.
|
|
15
|
-
Implements protocol-v1.0.md Section 3.3 (MessagePack-based approach).
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
# Key length constants
|
|
19
|
-
MAX_KEY_LENGTH = 250 # Practical cache key length limit (Redis, Memcached, etc.)
|
|
20
|
-
KEY_PREFIX_LENGTH = 50 # Length of prefix to keep when shortening keys
|
|
21
|
-
|
|
22
|
-
# Serializer codes for compact metadata encoding (1 char each)
|
|
23
|
-
SERIALIZER_CODES = {
|
|
24
|
-
"std": "s", # StandardSerializer (multi-language MessagePack)
|
|
25
|
-
"auto": "a", # AutoSerializer (Python-specific, NumPy/pandas)
|
|
26
|
-
"orjson": "o", # OrjsonSerializer (JSON-based)
|
|
27
|
-
"arrow": "w", # ArrowSerializer (columnar format, w=arroW)
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
def __init__(self):
|
|
31
|
-
"""Initialize the key generator.
|
|
32
|
-
|
|
33
|
-
Uses MessagePack + Blake2b-256 per protocol-v1.0.md Section 3.3.
|
|
34
|
-
"""
|
|
35
|
-
pass
|
|
36
|
-
|
|
37
|
-
def generate_key(
|
|
38
|
-
self,
|
|
39
|
-
func: Callable[..., Any],
|
|
40
|
-
args: tuple[Any, ...],
|
|
41
|
-
kwargs: dict[str, Any],
|
|
42
|
-
namespace: str | None = None,
|
|
43
|
-
integrity_checking: bool = True,
|
|
44
|
-
serializer_type: str = "std",
|
|
45
|
-
) -> str:
|
|
46
|
-
"""Generate a cache key from function and arguments.
|
|
47
|
-
|
|
48
|
-
Args:
|
|
49
|
-
func: The function being cached
|
|
50
|
-
args: Positional arguments passed to the function
|
|
51
|
-
kwargs: Keyword arguments passed to the function
|
|
52
|
-
namespace: Optional namespace prefix for the key
|
|
53
|
-
integrity_checking: Whether integrity checking is enabled (ByteStorage vs plain MessagePack)
|
|
54
|
-
serializer_type: Serializer type code ("std", "auto", "orjson", "arrow")
|
|
55
|
-
|
|
56
|
-
Returns:
|
|
57
|
-
A consistent string key for caching
|
|
58
|
-
|
|
59
|
-
Note:
|
|
60
|
-
Uses compact metadata suffix format: :<ic><serializer_code>
|
|
61
|
-
Example: ":1s" = integrity_checking=True, serializer=StandardSerializer
|
|
62
|
-
"""
|
|
63
|
-
# Build key components efficiently (avoid f-strings in hot path)
|
|
64
|
-
key_parts = []
|
|
65
|
-
|
|
66
|
-
# Add namespace if provided
|
|
67
|
-
if namespace:
|
|
68
|
-
key_parts.extend(["ns:", namespace, ":"])
|
|
69
|
-
|
|
70
|
-
# Add function identifier (module + name) - single string operation
|
|
71
|
-
key_parts.extend(["func:", func.__module__, ".", func.__qualname__, ":"])
|
|
72
|
-
|
|
73
|
-
# Generate args hash using Blake2b-256
|
|
74
|
-
args_hash = self._blake2b_hash(args, kwargs)
|
|
75
|
-
|
|
76
|
-
key_parts.extend(["args:", args_hash, ":"])
|
|
77
|
-
|
|
78
|
-
# Add compact metadata suffix: :<ic><serializer_code>
|
|
79
|
-
# Example: ":1s" = integrity_checking=True, serializer=std
|
|
80
|
-
ic_flag = "1" if integrity_checking else "0"
|
|
81
|
-
serializer_code = self.SERIALIZER_CODES.get(serializer_type, "s") # Default to "s" if unknown
|
|
82
|
-
key_parts.extend([ic_flag, serializer_code])
|
|
83
|
-
|
|
84
|
-
# Single join operation reduces string allocations
|
|
85
|
-
key = "".join(key_parts)
|
|
86
|
-
|
|
87
|
-
# Ensure key is within practical limits and contains no problematic characters
|
|
88
|
-
return self._normalize_key(key)
|
|
89
|
-
|
|
90
|
-
def _blake2b_hash(self, args: tuple, kwargs: dict) -> str:
|
|
91
|
-
"""Generate hash using MessagePack + Blake2b-256.
|
|
92
|
-
|
|
93
|
-
Blake2b-256 (32 bytes = 64 hex chars) for collision resistance.
|
|
94
|
-
MessagePack ensures cross-language compatibility.
|
|
95
|
-
|
|
96
|
-
Raises:
|
|
97
|
-
TypeError: If args/kwargs contain unsupported types (custom objects, numpy arrays, etc.)
|
|
98
|
-
"""
|
|
99
|
-
# Step 1: Normalize recursively
|
|
100
|
-
normalized_args = [self._normalize(arg) for arg in args]
|
|
101
|
-
normalized_kwargs = {k: self._normalize(v) for k, v in sorted(kwargs.items())}
|
|
102
|
-
|
|
103
|
-
# Step 2: Serialize with MessagePack
|
|
104
|
-
try:
|
|
105
|
-
msgpack_bytes = cast(
|
|
106
|
-
bytes, msgpack.packb([normalized_args, normalized_kwargs], use_bin_type=True, strict_types=True)
|
|
107
|
-
)
|
|
108
|
-
except TypeError as e:
|
|
109
|
-
# Wrap msgpack's TypeError with a more descriptive message
|
|
110
|
-
raise TypeError(f"Unsupported type for cache key generation: {e}") from e
|
|
111
|
-
|
|
112
|
-
# Step 3: Hash with Blake2b-256
|
|
113
|
-
return hashlib.blake2b(msgpack_bytes, digest_size=32).hexdigest()
|
|
114
|
-
|
|
115
|
-
def _normalize(self, obj: Any) -> Any:
|
|
116
|
-
"""Normalize object for deterministic MessagePack encoding.
|
|
117
|
-
|
|
118
|
-
CRITICAL: Ensures identical serialization across Python, TypeScript, Go, PHP.
|
|
119
|
-
"""
|
|
120
|
-
if isinstance(obj, dict):
|
|
121
|
-
# Recursively normalize dict with sorted keys
|
|
122
|
-
return {k: self._normalize(v) for k, v in sorted(obj.items())}
|
|
123
|
-
|
|
124
|
-
elif isinstance(obj, (list, tuple)):
|
|
125
|
-
# Recursively normalize collections (tuple→list)
|
|
126
|
-
return [self._normalize(x) for x in obj]
|
|
127
|
-
|
|
128
|
-
elif isinstance(obj, float):
|
|
129
|
-
# CRITICAL: Normalize -0.0 → 0.0 for cross-language compatibility
|
|
130
|
-
return 0.0 if obj == 0.0 else obj
|
|
131
|
-
|
|
132
|
-
else:
|
|
133
|
-
# Primitives (int, str, bytes, bool, None) pass through unchanged
|
|
134
|
-
return obj
|
|
135
|
-
|
|
136
|
-
def _normalize_key(self, key: str) -> str:
|
|
137
|
-
"""Normalize key to ensure it's valid for cache backends.
|
|
138
|
-
|
|
139
|
-
Args:
|
|
140
|
-
key: Raw cache key
|
|
141
|
-
|
|
142
|
-
Returns:
|
|
143
|
-
Normalized key safe for cache backends (Redis, Memcached, etc.)
|
|
144
|
-
"""
|
|
145
|
-
# Replace problematic characters
|
|
146
|
-
normalized = key.replace(" ", "_").replace("\n", "_").replace("\r", "_")
|
|
147
|
-
|
|
148
|
-
# Ensure key length is within practical limits for cache backends
|
|
149
|
-
if len(normalized) > self.MAX_KEY_LENGTH:
|
|
150
|
-
# If too long, hash the key to get consistent shorter version
|
|
151
|
-
# Use Blake2b-256 (32 bytes) for consistency
|
|
152
|
-
key_hash = hashlib.blake2b(normalized.encode("utf-8"), digest_size=32).hexdigest()
|
|
153
|
-
|
|
154
|
-
# Keep first part of original key for readability + hash
|
|
155
|
-
prefix = normalized[: self.KEY_PREFIX_LENGTH] if len(normalized) > self.KEY_PREFIX_LENGTH else normalized
|
|
156
|
-
normalized = f"{prefix}:{key_hash[:32]}"
|
|
157
|
-
|
|
158
|
-
return normalized
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|