cachekit 0.2.3__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {cachekit-0.2.3 → cachekit-0.3.1}/Cargo.lock +3 -3
  2. {cachekit-0.2.3 → cachekit-0.3.1}/PKG-INFO +4 -4
  3. {cachekit-0.2.3 → cachekit-0.3.1}/pyproject.toml +1 -1
  4. {cachekit-0.2.3 → cachekit-0.3.1}/rust/Cargo.toml +1 -1
  5. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/__init__.py +1 -1
  6. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/decorator.py +6 -1
  7. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/wrapper.py +27 -5
  8. cachekit-0.3.1/src/cachekit/key_generator.py +348 -0
  9. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/logging.py +2 -1
  10. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/__init__.py +51 -14
  11. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/auto_serializer.py +13 -0
  12. cachekit-0.2.3/src/cachekit/key_generator.py +0 -158
  13. {cachekit-0.2.3 → cachekit-0.3.1}/Cargo.toml +0 -0
  14. {cachekit-0.2.3 → cachekit-0.3.1}/LICENSE +0 -0
  15. {cachekit-0.2.3 → cachekit-0.3.1}/README.md +0 -0
  16. {cachekit-0.2.3 → cachekit-0.3.1}/rust/Makefile +0 -0
  17. {cachekit-0.2.3 → cachekit-0.3.1}/rust/README.md +0 -0
  18. {cachekit-0.2.3 → cachekit-0.3.1}/rust/TEST_EXPANSION_SUMMARY.md +0 -0
  19. {cachekit-0.2.3 → cachekit-0.3.1}/rust/src/lib.rs +0 -0
  20. {cachekit-0.2.3 → cachekit-0.3.1}/rust/src/python_bindings.rs +0 -0
  21. {cachekit-0.2.3 → cachekit-0.3.1}/rust/supply-chain/audits.toml +0 -0
  22. {cachekit-0.2.3 → cachekit-0.3.1}/rust/supply-chain/config.toml +0 -0
  23. {cachekit-0.2.3 → cachekit-0.3.1}/rust/supply-chain/imports.lock +0 -0
  24. {cachekit-0.2.3 → cachekit-0.3.1}/rust/tsan_suppressions.txt +0 -0
  25. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/__init__.py +0 -0
  26. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/base.py +0 -0
  27. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/errors.py +0 -0
  28. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/file/__init__.py +0 -0
  29. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/file/backend.py +0 -0
  30. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/file/config.py +0 -0
  31. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/provider.py +0 -0
  32. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/__init__.py +0 -0
  33. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/backend.py +0 -0
  34. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/client.py +0 -0
  35. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/config.py +0 -0
  36. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/error_handler.py +0 -0
  37. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/backends/redis/provider.py +0 -0
  38. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/cache_handler.py +0 -0
  39. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/__init__.py +0 -0
  40. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/nested.py +0 -0
  41. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/settings.py +0 -0
  42. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/singleton.py +0 -0
  43. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/config/validation.py +0 -0
  44. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/__init__.py +0 -0
  45. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/intent.py +0 -0
  46. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/main.py +0 -0
  47. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/orchestrator.py +0 -0
  48. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/session.py +0 -0
  49. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/stats_context.py +0 -0
  50. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/tenant_context.py +0 -0
  51. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/decorators/utils/__init__.py +0 -0
  52. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/di.py +0 -0
  53. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/hash_utils.py +0 -0
  54. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/health.py +0 -0
  55. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/hiredis_compat.py +0 -0
  56. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/imports.py +0 -0
  57. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/invalidation/__init__.py +0 -0
  58. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/invalidation/channel.py +0 -0
  59. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/invalidation/event.py +0 -0
  60. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/invalidation/redis_channel.py +0 -0
  61. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/l1_cache.py +0 -0
  62. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/monitoring/__init__.py +0 -0
  63. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/monitoring/correlation_tracking.py +0 -0
  64. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/monitoring/pool_monitor.py +0 -0
  65. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/monitoring/protocols.py +0 -0
  66. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/py.typed +0 -0
  67. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/__init__.py +0 -0
  68. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/adaptive_timeout.py +0 -0
  69. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/async_metrics.py +0 -0
  70. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/circuit_breaker.py +0 -0
  71. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/error_classification.py +0 -0
  72. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/load_control.py +0 -0
  73. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/metrics_collection.py +0 -0
  74. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/reliability/profiles.py +0 -0
  75. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/arrow_serializer.py +0 -0
  76. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/base.py +0 -0
  77. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/encryption_wrapper.py +0 -0
  78. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/orjson_serializer.py +0 -0
  79. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/standard_serializer.py +0 -0
  80. {cachekit-0.2.3 → cachekit-0.3.1}/src/cachekit/serializers/wrapper.py +0 -0
@@ -202,9 +202,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
202
202
 
203
203
  [[package]]
204
204
  name = "bytes"
205
- version = "1.10.1"
205
+ version = "1.11.1"
206
206
  source = "registry+https://github.com/rust-lang/crates.io-index"
207
- checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
207
+ checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
208
208
 
209
209
  [[package]]
210
210
  name = "cachekit-core"
@@ -231,7 +231,7 @@ dependencies = [
231
231
 
232
232
  [[package]]
233
233
  name = "cachekit-rs"
234
- version = "0.2.3"
234
+ version = "0.3.1"
235
235
  dependencies = [
236
236
  "cachekit-core",
237
237
  "criterion",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cachekit
3
- Version: 0.2.3
3
+ Version: 0.3.1
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -43,11 +43,11 @@ Maintainer-email: cachekit Contributors <noreply@cachekit.io>
43
43
  License: MIT
44
44
  Requires-Python: >=3.9
45
45
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
46
- Project-URL: Homepage, https://github.com/cachekit-io/cachekit-py
46
+ Project-URL: Changelog, https://github.com/cachekit-io/cachekit-py/blob/main/CHANGELOG.md
47
47
  Project-URL: Documentation, https://github.com/cachekit-io/cachekit-py#readme
48
- Project-URL: Repository, https://github.com/cachekit-io/cachekit-py.git
48
+ Project-URL: Homepage, https://github.com/cachekit-io/cachekit-py
49
49
  Project-URL: Issues, https://github.com/cachekit-io/cachekit-py/issues
50
- Project-URL: Changelog, https://github.com/cachekit-io/cachekit-py/blob/main/CHANGELOG.md
50
+ Project-URL: Repository, https://github.com/cachekit-io/cachekit-py.git
51
51
 
52
52
  <div align="center">
53
53
 
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "cachekit"
7
- version = "0.2.3"
7
+ version = "0.3.1"
8
8
  description = "Production-ready Redis caching for Python with intelligent reliability features and Rust-powered performance"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "cachekit-rs"
3
- version = "0.2.3"
3
+ version = "0.3.1"
4
4
  edition = "2021"
5
5
  authors = ["cachekit Contributors"]
6
6
  description = "High-performance storage engine for caching with compression and encryption"
@@ -62,7 +62,7 @@ Example Usage:
62
62
  ```
63
63
  """
64
64
 
65
- __version__ = "0.2.3"
65
+ __version__ = "0.3.1"
66
66
 
67
67
  from typing import Any, Callable, TypeVar
68
68
 
@@ -172,6 +172,9 @@ class DecoratorConfig:
172
172
  integrity_checking: Enable checksums for corruption detection (default: True)
173
173
  All serializers use xxHash3-64 (8 bytes).
174
174
  Set to False for @cache.minimal (speed-first, no integrity guarantee)
175
+ key: Custom key function for complex types. Receives (*args, **kwargs) and returns str.
176
+ Use for numpy arrays, DataFrames, or cross-language cache sharing.
177
+ Example: @cache(key=lambda arr: hashlib.blake2b(arr.tobytes()).hexdigest())
175
178
  refresh_ttl_on_get: Extend TTL on cache hit
176
179
  ttl_refresh_threshold: Minimum remaining TTL fraction (0.0-1.0) to trigger refresh
177
180
  backend: L2 backend (RedisBackend, HTTPBackend, None for L1-only)
@@ -183,12 +186,13 @@ class DecoratorConfig:
183
186
  encryption: Client-side encryption configuration
184
187
  """
185
188
 
186
- # Core settings (5 fields)
189
+ # Core settings (6 fields)
187
190
  ttl: int | None = None
188
191
  namespace: str | None = None
189
192
  serializer: Union[str, SerializerProtocol] = "default" # type: ignore[assignment] # String name or protocol instance
190
193
  safe_mode: bool = False
191
194
  integrity_checking: bool = True # Checksums for corruption detection (xxHash3-64 for all serializers)
195
+ key: Callable[..., str] | None = None # Custom key function (escape hatch for complex types)
192
196
 
193
197
  # Performance (2 fields)
194
198
  refresh_ttl_on_get: bool = False
@@ -251,6 +255,7 @@ class DecoratorConfig:
251
255
  "namespace": self.namespace,
252
256
  "serializer": self.serializer,
253
257
  "safe_mode": self.safe_mode,
258
+ "key": self.key,
254
259
  "refresh_ttl_on_get": self.refresh_ttl_on_get,
255
260
  "ttl_refresh_threshold": self.ttl_refresh_threshold,
256
261
  "backend": self.backend,
@@ -412,6 +412,15 @@ def create_cache_wrapper(
412
412
  deployment_uuid = config.encryption.deployment_uuid
413
413
  master_key = config.encryption.master_key
414
414
 
415
+ # Custom key function (escape hatch for complex types)
416
+ custom_key_func = config.key
417
+ else:
418
+ custom_key_func = None
419
+
420
+ # Re-scope custom_key_func for closure
421
+ if "custom_key_func" not in dir():
422
+ custom_key_func = None
423
+
415
424
  # Fast mode: Disable monitoring overhead, keep performance features
416
425
  use_circuit_breaker = circuit_breaker and not fast_mode
417
426
  use_adaptive_timeout = adaptive_timeout and not fast_mode
@@ -541,7 +550,13 @@ def create_cache_wrapper(
541
550
 
542
551
  # Key generation - needed for both L1-only and L1+L2 modes
543
552
  try:
544
- if fast_mode:
553
+ # Custom key function takes priority (escape hatch for complex types)
554
+ if custom_key_func is not None:
555
+ custom_key = custom_key_func(*args, **kwargs)
556
+ if not isinstance(custom_key, str):
557
+ raise TypeError(f"key function must return str, got {type(custom_key).__name__}")
558
+ cache_key = f"{namespace or 'default'}:{custom_key}"
559
+ elif fast_mode:
545
560
  # Minimal key generation - no string formatting overhead
546
561
  from ..hash_utils import cache_key_hash
547
562
 
@@ -878,12 +893,17 @@ def create_cache_wrapper(
878
893
  cache_key = None
879
894
  func_start_time: float | None = None # Initialize for exception handlers
880
895
  try:
881
- # Fast key generation path (for simple types)
882
- if fast_mode:
896
+ # Custom key function takes priority (escape hatch for complex types)
897
+ if custom_key_func is not None:
898
+ custom_key = custom_key_func(*args, **kwargs)
899
+ if not isinstance(custom_key, str):
900
+ raise TypeError(f"key function must return str, got {type(custom_key).__name__}")
901
+ cache_key = f"{namespace or 'default'}:{custom_key}"
902
+ elif fast_mode:
883
903
  # Ultra-fast key generation for hot paths (10-50μs savings)
884
904
  from ..hash_utils import cache_key_hash
885
905
 
886
- cache_namespace = namespace or namespace or "default"
906
+ cache_namespace = namespace or "default"
887
907
  args_kwargs_str = str(args) + str(kwargs)
888
908
  cache_key = cache_namespace + ":" + func_hash + ":" + cache_key_hash(args_kwargs_str)
889
909
  else:
@@ -1372,7 +1392,9 @@ def create_cache_wrapper(
1372
1392
  """Clear cache statistics and invalidate all cached entries."""
1373
1393
  _stats.clear()
1374
1394
  # Also invalidate actual cache entries
1375
- invalidate_cache() if not inspect.iscoroutinefunction(func) else ainvalidate_cache()
1395
+ if inspect.iscoroutinefunction(func):
1396
+ raise TypeError("cache_clear() cannot clear cache for async functions. Use 'await fn.ainvalidate_cache()' instead.")
1397
+ invalidate_cache()
1376
1398
 
1377
1399
  if inspect.iscoroutinefunction(func):
1378
1400
  async_wrapper.invalidate_cache = ainvalidate_cache # type: ignore[attr-defined]
@@ -0,0 +1,348 @@
1
+ """Cache key generation functionality."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import sys
7
+ from datetime import datetime
8
+ from decimal import Decimal
9
+ from enum import Enum
10
+ from pathlib import Path, PurePath
11
+ from typing import TYPE_CHECKING, Any, Callable, NoReturn, cast
12
+ from uuid import UUID
13
+
14
+ import msgpack
15
+
16
+ if TYPE_CHECKING:
17
+ pass
18
+
19
+ # Constants for constrained array support (per round-table review 2025-12-18)
20
+ ARRAY_MAX_BYTES = 100_000 # 100KB per array
21
+ ARRAY_AGGREGATE_MAX = 5_000_000 # 5MB total across all args
22
+ SUPPORTED_ARRAY_DTYPES = {"int32", "int64", "float32", "float64"}
23
+ DTYPE_MAP = {"int32": "i32", "int64": "i64", "float32": "f32", "float64": "f64"}
24
+
25
+
26
+ class CacheKeyGenerator:
27
+ """Generates consistent cache keys from function calls.
28
+
29
+ Uses MessagePack + Blake2b-256 for cross-language compatibility.
30
+ Implements protocol-v1.0.md Section 3.3 (MessagePack-based approach).
31
+ """
32
+
33
+ # Key length constants
34
+ MAX_KEY_LENGTH = 250 # Practical cache key length limit (Redis, Memcached, etc.)
35
+ KEY_PREFIX_LENGTH = 50 # Length of prefix to keep when shortening keys
36
+
37
+ # Serializer codes for compact metadata encoding (1 char each)
38
+ SERIALIZER_CODES = {
39
+ "std": "s", # StandardSerializer (multi-language MessagePack)
40
+ "auto": "a", # AutoSerializer (Python-specific, NumPy/pandas)
41
+ "orjson": "o", # OrjsonSerializer (JSON-based)
42
+ "arrow": "w", # ArrowSerializer (columnar format, w=arroW)
43
+ }
44
+
45
+ def __init__(self):
46
+ """Initialize the key generator.
47
+
48
+ Uses MessagePack + Blake2b-256 per protocol-v1.0.md Section 3.3.
49
+ """
50
+ pass
51
+
52
+ def generate_key(
53
+ self,
54
+ func: Callable[..., Any],
55
+ args: tuple[Any, ...],
56
+ kwargs: dict[str, Any],
57
+ namespace: str | None = None,
58
+ integrity_checking: bool = True,
59
+ serializer_type: str = "std",
60
+ ) -> str:
61
+ """Generate a cache key from function and arguments.
62
+
63
+ Args:
64
+ func: The function being cached
65
+ args: Positional arguments passed to the function
66
+ kwargs: Keyword arguments passed to the function
67
+ namespace: Optional namespace prefix for the key
68
+ integrity_checking: Whether integrity checking is enabled (ByteStorage vs plain MessagePack)
69
+ serializer_type: Serializer type code ("std", "auto", "orjson", "arrow")
70
+
71
+ Returns:
72
+ A consistent string key for caching
73
+
74
+ Note:
75
+ Uses compact metadata suffix format: :<ic><serializer_code>
76
+ Example: ":1s" = integrity_checking=True, serializer=StandardSerializer
77
+ """
78
+ # Build key components efficiently (avoid f-strings in hot path)
79
+ key_parts = []
80
+
81
+ # Add namespace if provided
82
+ if namespace:
83
+ key_parts.extend(["ns:", namespace, ":"])
84
+
85
+ # Add function identifier (module + name) - single string operation
86
+ key_parts.extend(["func:", func.__module__, ".", func.__qualname__, ":"])
87
+
88
+ # Generate args hash using Blake2b-256
89
+ args_hash = self._blake2b_hash(args, kwargs)
90
+
91
+ key_parts.extend(["args:", args_hash, ":"])
92
+
93
+ # Add compact metadata suffix: :<ic><serializer_code>
94
+ # Example: ":1s" = integrity_checking=True, serializer=std
95
+ ic_flag = "1" if integrity_checking else "0"
96
+ serializer_code = self.SERIALIZER_CODES.get(serializer_type, "s") # Default to "s" if unknown
97
+ key_parts.extend([ic_flag, serializer_code])
98
+
99
+ # Single join operation reduces string allocations
100
+ key = "".join(key_parts)
101
+
102
+ # Ensure key is within practical limits and contains no problematic characters
103
+ return self._normalize_key(key)
104
+
105
+ def _blake2b_hash(self, args: tuple, kwargs: dict) -> str:
106
+ """Generate hash using MessagePack + Blake2b-256.
107
+
108
+ Blake2b-256 (32 bytes = 64 hex chars) for collision resistance.
109
+ MessagePack ensures cross-language compatibility.
110
+
111
+ Raises:
112
+ TypeError: If args/kwargs contain unsupported types (custom objects, numpy arrays, etc.)
113
+ """
114
+ # Track aggregate array bytes for DoS prevention
115
+ array_bytes_seen: list[int] = [0]
116
+
117
+ # Step 1: Normalize recursively
118
+ normalized_args = [self._normalize(arg, array_bytes_seen) for arg in args]
119
+ normalized_kwargs = {k: self._normalize(v, array_bytes_seen) for k, v in sorted(kwargs.items())}
120
+
121
+ # Step 2: Serialize with MessagePack
122
+ try:
123
+ msgpack_bytes = cast(
124
+ bytes, msgpack.packb([normalized_args, normalized_kwargs], use_bin_type=True, strict_types=True)
125
+ )
126
+ except TypeError as e:
127
+ # Wrap msgpack's TypeError with a more descriptive message
128
+ raise TypeError(f"Unsupported type for cache key generation: {e}") from e
129
+
130
+ # Step 3: Hash with Blake2b-256
131
+ return hashlib.blake2b(msgpack_bytes, digest_size=32).hexdigest()
132
+
133
+ def _normalize(self, obj: Any, _array_bytes_seen: list[int] | None = None) -> Any:
134
+ """Normalize object for deterministic MessagePack encoding.
135
+
136
+ CRITICAL: Cross-language compatible types ONLY per Protocol v1.1.
137
+
138
+ Supported types (per round-table review 2025-12-18):
139
+ - Primitives: int, str, bytes, bool, None, float
140
+ - Collections: dict (sorted keys), list, tuple
141
+ - Extended: Path, UUID, Decimal, Enum, datetime (UTC only)
142
+ - Arrays: numpy.ndarray (1D, ≤100KB, i32/i64/f32/f64)
143
+
144
+ Args:
145
+ obj: Object to normalize
146
+ _array_bytes_seen: Internal tracker for aggregate array size (DoS prevention)
147
+
148
+ Returns:
149
+ Normalized object safe for MessagePack serialization
150
+
151
+ Raises:
152
+ TypeError: For unsupported types with helpful guidance
153
+ """
154
+ # Initialize aggregate tracker if not provided
155
+ if _array_bytes_seen is None:
156
+ _array_bytes_seen = [0]
157
+
158
+ # === COLLECTIONS (recursive) ===
159
+ if isinstance(obj, dict):
160
+ return {k: self._normalize(v, _array_bytes_seen) for k, v in sorted(obj.items())}
161
+
162
+ if isinstance(obj, (list, tuple)):
163
+ return [self._normalize(x, _array_bytes_seen) for x in obj]
164
+
165
+ # === FLOAT (cross-language compat) ===
166
+ if isinstance(obj, float):
167
+ # CRITICAL: Normalize -0.0 → 0.0 for cross-language compatibility
168
+ return 0.0 if obj == 0.0 else obj
169
+
170
+ # === EXTENDED TYPES ===
171
+
172
+ # Path: normalize to POSIX format for cross-platform consistency
173
+ if isinstance(obj, (Path, PurePath)):
174
+ return obj.as_posix()
175
+
176
+ # UUID: standard string format
177
+ if isinstance(obj, UUID):
178
+ return str(obj)
179
+
180
+ # Decimal: exact string representation
181
+ if isinstance(obj, Decimal):
182
+ return str(obj)
183
+
184
+ # Enum: use value (recursively normalize in case value is complex)
185
+ if isinstance(obj, Enum):
186
+ return self._normalize(obj.value, _array_bytes_seen)
187
+
188
+ # datetime: UTC only, reject naive datetimes
189
+ if isinstance(obj, datetime):
190
+ if obj.tzinfo is None:
191
+ raise TypeError(
192
+ "Naive datetime not allowed in cache keys (timezone ambiguity). "
193
+ "Use timezone-aware datetime: datetime(..., tzinfo=timezone.utc)"
194
+ )
195
+ return obj.isoformat()
196
+
197
+ # === NUMPY ARRAY (constrained support) ===
198
+ if self._is_numpy_array(obj):
199
+ return self._normalize_array(obj, _array_bytes_seen)
200
+
201
+ # === PRIMITIVES (pass through) ===
202
+ if isinstance(obj, (int, str, bytes, bool, type(None))):
203
+ return obj
204
+
205
+ # === UNSUPPORTED: Fail fast with helpful message ===
206
+ return self._raise_unsupported_type(obj)
207
+
208
+ def _is_numpy_array(self, obj: Any) -> bool:
209
+ """Check if object is numpy array without importing numpy."""
210
+ return type(obj).__module__ == "numpy" and type(obj).__name__ == "ndarray"
211
+
212
+ def _normalize_array(self, arr: Any, _array_bytes_seen: list[int]) -> list[Any]:
213
+ """Normalize numpy array with strict constraints.
214
+
215
+ Constraints (per round-table review 2025-12-18):
216
+ - 1D only (cross-language simplicity)
217
+ - ≤100KB (memory safety)
218
+ - 4 dtypes: i32, i64, f32, f64 (cross-language compatibility)
219
+ - Little-endian byte order (platform determinism)
220
+ - 256-bit Blake2b hash (collision resistance)
221
+ - Version prefix for future protocol changes
222
+
223
+ Args:
224
+ arr: numpy.ndarray to normalize
225
+ _array_bytes_seen: Aggregate byte counter for DoS prevention
226
+
227
+ Returns:
228
+ List of ["__array_v1__", shape_list, dtype_str, content_hash]
229
+ (list format for MessagePack compatibility with strict_types=True)
230
+
231
+ Raises:
232
+ TypeError: If array doesn't meet constraints
233
+ """
234
+ import numpy as np
235
+
236
+ # Constraint 1: Size limit per array
237
+ if arr.nbytes > ARRAY_MAX_BYTES:
238
+ raise TypeError(
239
+ f"Array too large ({arr.nbytes:,} bytes, max {ARRAY_MAX_BYTES:,}). Use key= parameter for large arrays."
240
+ )
241
+
242
+ # Constraint 2: Aggregate size limit (DoS prevention)
243
+ _array_bytes_seen[0] += arr.nbytes
244
+ if _array_bytes_seen[0] > ARRAY_AGGREGATE_MAX:
245
+ raise TypeError(
246
+ f"Total array size exceeds {ARRAY_AGGREGATE_MAX:,} bytes. Use key= parameter for batch array operations."
247
+ )
248
+
249
+ # Constraint 3: 1D only
250
+ if arr.ndim != 1:
251
+ raise TypeError(
252
+ f"Only 1D arrays supported in cache keys (got {arr.ndim}D). "
253
+ f"Use key= parameter for multidimensional arrays, or flatten with arr.ravel()."
254
+ )
255
+
256
+ # Constraint 4: Supported dtypes only
257
+ dtype_name = arr.dtype.name
258
+ if dtype_name not in SUPPORTED_ARRAY_DTYPES:
259
+ raise TypeError(
260
+ f"Unsupported array dtype '{dtype_name}'. "
261
+ f"Supported: {', '.join(sorted(SUPPORTED_ARRAY_DTYPES))}. "
262
+ f"Cast with arr.astype(np.float64) or use key= parameter."
263
+ )
264
+
265
+ # Ensure C-contiguous memory layout
266
+ arr = np.ascontiguousarray(arr)
267
+
268
+ # Force little-endian byte order for cross-platform determinism
269
+ if arr.dtype.byteorder not in ("=", "<", "|"):
270
+ arr = arr.astype(arr.dtype.newbyteorder("<"))
271
+ elif arr.dtype.byteorder == "=" and sys.byteorder == "big":
272
+ arr = arr.byteswap().newbyteorder("<")
273
+
274
+ # 256-bit Blake2b hash (per security review)
275
+ content_hash = hashlib.blake2b(arr.tobytes(), digest_size=32).hexdigest()
276
+
277
+ # Standardized dtype string for cross-language compatibility
278
+ dtype_str = DTYPE_MAP[dtype_name]
279
+
280
+ # Version prefix for protocol evolution
281
+ # Return as list (not tuple) for MessagePack compatibility with strict_types=True
282
+ # Shape converted to list as well
283
+ return ["__array_v1__", list(arr.shape), dtype_str, content_hash]
284
+
285
+ def _raise_unsupported_type(self, obj: Any) -> NoReturn:
286
+ """Raise helpful TypeError for unsupported types.
287
+
288
+ Args:
289
+ obj: The unsupported object
290
+
291
+ Raises:
292
+ TypeError: Always, with guidance on how to handle the type
293
+ """
294
+ type_name = type(obj).__module__ + "." + type(obj).__qualname__
295
+
296
+ # Specific guidance for numpy arrays that don't meet constraints
297
+ if "numpy" in type_name and "ndarray" in type_name:
298
+ raise TypeError(
299
+ "numpy array doesn't meet cache key constraints. "
300
+ "Requirements: 1D, ≤100KB, dtype in (i32, i64, f32, f64). "
301
+ "Use key= parameter for other arrays."
302
+ )
303
+
304
+ if "pandas" in type_name:
305
+ raise TypeError(
306
+ "pandas objects not supported as cache key arguments "
307
+ "(Parquet serialization is non-deterministic). "
308
+ "Recommended patterns:\n"
309
+ " 1. Pass identifier, return DataFrame: @cache def load(id: int) -> pd.DataFrame\n"
310
+ " 2. Use explicit key: @cache(key=lambda df: hashlib.blake2b(df.to_parquet()).hexdigest())"
311
+ )
312
+
313
+ if isinstance(obj, (set, frozenset)):
314
+ raise TypeError(
315
+ "set/frozenset not supported in cache keys (mixed-type sorting crashes). "
316
+ "Convert to sorted list: sorted(list(your_set))"
317
+ )
318
+
319
+ raise TypeError(
320
+ f"Unsupported type '{type_name}' for cache key. "
321
+ f"Supported: dict, list, tuple, int, float, str, bytes, bool, None, "
322
+ f"Path, UUID, Decimal, Enum, datetime (UTC), 1D numpy arrays (≤100KB, i32/i64/f32/f64). "
323
+ f"For custom types, use key= parameter."
324
+ )
325
+
326
+ def _normalize_key(self, key: str) -> str:
327
+ """Normalize key to ensure it's valid for cache backends.
328
+
329
+ Args:
330
+ key: Raw cache key
331
+
332
+ Returns:
333
+ Normalized key safe for cache backends (Redis, Memcached, etc.)
334
+ """
335
+ # Replace problematic characters
336
+ normalized = key.replace(" ", "_").replace("\n", "_").replace("\r", "_")
337
+
338
+ # Ensure key length is within practical limits for cache backends
339
+ if len(normalized) > self.MAX_KEY_LENGTH:
340
+ # If too long, hash the key to get consistent shorter version
341
+ # Use Blake2b-256 (32 bytes) for consistency
342
+ key_hash = hashlib.blake2b(normalized.encode("utf-8"), digest_size=32).hexdigest()
343
+
344
+ # Keep first part of original key for readability + hash
345
+ prefix = normalized[: self.KEY_PREFIX_LENGTH] if len(normalized) > self.KEY_PREFIX_LENGTH else normalized
346
+ normalized = f"{prefix}:{key_hash[:32]}"
347
+
348
+ return normalized
@@ -7,6 +7,7 @@ that reduces overhead from 570% to <5% while maintaining functionality.
7
7
  import json
8
8
  import logging
9
9
  import os
10
+ import platform
10
11
  import random
11
12
  import threading
12
13
  import time
@@ -170,7 +171,7 @@ class UltraOptimizedStructuredLogger:
170
171
 
171
172
  # Pre-computed values for performance
172
173
  self._sampling_threshold = int(SAMPLING_RATE * 100)
173
- self._hostname = os.uname().nodename
174
+ self._hostname = platform.node()
174
175
  self._pid = os.getpid()
175
176
 
176
177
  # PII patterns to mask (pre-compiled for speed)
@@ -1,10 +1,11 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from threading import Lock
3
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
4
6
 
5
7
  from cachekit._rust_serializer import ByteStorage
6
8
 
7
- from .arrow_serializer import ArrowSerializer
8
9
  from .auto_serializer import AutoSerializer
9
10
  from .base import (
10
11
  SerializationError,
@@ -16,8 +17,25 @@ from .encryption_wrapper import EncryptionWrapper
16
17
  from .orjson_serializer import OrjsonSerializer
17
18
  from .standard_serializer import StandardSerializer
18
19
 
20
+ if TYPE_CHECKING:
21
+ from .arrow_serializer import ArrowSerializer
22
+
19
23
  logger = logging.getLogger(__name__)
20
24
 
25
+ # Lazy import for optional ArrowSerializer (requires pyarrow from [data] extra)
26
+ _ArrowSerializer: type | None = None
27
+
28
+
29
+ def _get_arrow_serializer() -> type:
30
+ """Lazy-load ArrowSerializer. Raises ImportError if pyarrow not installed."""
31
+ global _ArrowSerializer
32
+ if _ArrowSerializer is None:
33
+ from .arrow_serializer import ArrowSerializer
34
+
35
+ _ArrowSerializer = ArrowSerializer
36
+ return _ArrowSerializer
37
+
38
+
21
39
  # Validate ByteStorage works correctly
22
40
  test_storage = ByteStorage("msgpack")
23
41
  test_data = b"test validation data"
@@ -36,7 +54,7 @@ SERIALIZER_REGISTRY = {
36
54
  "auto": AutoSerializer, # Python-specific types (NumPy, pandas, datetime optimization)
37
55
  "default": StandardSerializer, # Language-agnostic MessagePack for multi-language caches
38
56
  "std": StandardSerializer, # Explicit StandardSerializer alias
39
- "arrow": ArrowSerializer,
57
+ "arrow": None, # Lazy-loaded: requires pyarrow from [data] extra
40
58
  "orjson": OrjsonSerializer,
41
59
  "encrypted": EncryptionWrapper, # AutoSerializer + AES-256-GCM encryption
42
60
  }
@@ -96,8 +114,13 @@ def get_serializer(name: str, enable_integrity_checking: bool = True) -> Seriali
96
114
  f"@cache(serializer=MySerializer())"
97
115
  )
98
116
 
117
+ # Get serializer class (lazy-load arrow if needed)
118
+ if name == "arrow":
119
+ serializer_class = _get_arrow_serializer()
120
+ else:
121
+ serializer_class = SERIALIZER_REGISTRY[name]
122
+
99
123
  # Instantiate with integrity checking configuration
100
- serializer_class = SERIALIZER_REGISTRY[name]
101
124
  if name in ("default", "std", "auto", "arrow", "orjson"):
102
125
  # All core serializers use enable_integrity_checking parameter
103
126
  serializer = serializer_class(enable_integrity_checking=enable_integrity_checking)
@@ -167,9 +190,9 @@ def get_available_serializers() -> dict[str, Any]:
167
190
  def benchmark_serializers() -> dict[str, Any]:
168
191
  """Get instantiated serializers for benchmarking."""
169
192
  serializers = {}
170
- for name, cls in get_available_serializers().items():
193
+ for name in SERIALIZER_REGISTRY:
171
194
  try:
172
- serializers[name] = cls()
195
+ serializers[name] = get_serializer(name)
173
196
  except Exception as e:
174
197
  logger.warning(f"Failed to instantiate {name} serializer: {e}")
175
198
  return serializers
@@ -178,28 +201,42 @@ def benchmark_serializers() -> dict[str, Any]:
178
201
  def get_serializer_info() -> dict[str, dict[str, Any]]:
179
202
  """Get information about available serializers."""
180
203
  info = {}
181
- for name, cls in get_available_serializers().items():
204
+ for name in SERIALIZER_REGISTRY:
182
205
  try:
183
- instance = cls()
206
+ instance = get_serializer(name)
184
207
  info[name] = {
185
- "class": cls.__name__,
186
- "module": cls.__module__,
208
+ "class": type(instance).__name__,
209
+ "module": type(instance).__module__,
187
210
  "available": True,
188
- "description": cls.__doc__ or "No description available",
211
+ "description": type(instance).__doc__ or "No description available",
189
212
  }
190
213
  # Add method info if available
191
214
  if hasattr(instance, "get_info"):
192
- info[name].update(instance.get_info())
215
+ info[name].update(instance.get_info()) # type: ignore[attr-defined]
216
+ except ImportError as e:
217
+ info[name] = {
218
+ "class": "ArrowSerializer" if name == "arrow" else "Unknown",
219
+ "module": "cachekit.serializers.arrow_serializer",
220
+ "available": False,
221
+ "error": str(e),
222
+ }
193
223
  except Exception as e:
194
224
  info[name] = {
195
- "class": cls.__name__,
196
- "module": cls.__module__,
225
+ "class": "Unknown",
226
+ "module": "unknown",
197
227
  "available": False,
198
228
  "error": str(e),
199
229
  }
200
230
  return info
201
231
 
202
232
 
233
+ def __getattr__(name: str) -> Any:
234
+ """Lazy attribute access for optional ArrowSerializer."""
235
+ if name == "ArrowSerializer":
236
+ return _get_arrow_serializer()
237
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
238
+
239
+
203
240
  # Export the main interface
204
241
  __all__ = [
205
242
  "ArrowSerializer",
@@ -131,6 +131,7 @@ def _auto_default(obj: Any) -> Any:
131
131
  - datetime/date/time → ISO-8601 strings
132
132
  - UUID → string representation
133
133
  - set/frozenset → list (with type marker for roundtrip)
134
+ - NumPy arrays → dict with binary data, shape, and dtype (nested in dicts/lists)
134
135
 
135
136
  Provides helpful errors for:
136
137
  - Pydantic models (suggest .model_dump())
@@ -162,6 +163,10 @@ def _auto_default(obj: Any) -> Any:
162
163
  if isinstance(obj, (set, frozenset)):
163
164
  return {"__set__": True, "value": list(obj), "frozen": isinstance(obj, frozenset)}
164
165
 
166
+ # NumPy array support (nested in dicts/lists via msgpack custom encoder)
167
+ if HAS_NUMPY and isinstance(obj, np.ndarray):
168
+ return {"__ndarray__": True, "data": obj.tobytes(), "shape": list(obj.shape), "dtype": str(obj.dtype)}
169
+
165
170
  # NEW: Helpful error detection for common unsupported types
166
171
  if _safe_hasattr(obj, "model_dump"): # Pydantic BaseModel
167
172
  raise TypeError(PYDANTIC_ERROR_MESSAGE)
@@ -184,6 +189,7 @@ def _auto_object_hook(obj: Any) -> Any:
184
189
  - datetime/date/time from ISO-8601 strings
185
190
  - UUID from string representation
186
191
  - set/frozenset from list (type-safe roundtrip)
192
+ - NumPy arrays from binary data with shape and dtype
187
193
 
188
194
  Args:
189
195
  obj: Object from MessagePack decoder
@@ -232,6 +238,13 @@ def _auto_object_hook(obj: Any) -> Any:
232
238
  else:
233
239
  return set(value_list)
234
240
 
241
+ if obj.get("__ndarray__") is True:
242
+ if not HAS_NUMPY:
243
+ raise SerializationError("Cannot deserialize numpy array: numpy is not installed")
244
+ if "data" not in obj or "shape" not in obj or "dtype" not in obj:
245
+ raise SerializationError("Invalid ndarray format: missing required fields in cached data")
246
+ return np.frombuffer(obj["data"], dtype=obj["dtype"]).reshape(obj["shape"])
247
+
235
248
  return obj
236
249
 
237
250
 
@@ -1,158 +0,0 @@
1
- """Cache key generation functionality."""
2
-
3
- from __future__ import annotations
4
-
5
- import hashlib
6
- from typing import Any, Callable, cast
7
-
8
- import msgpack
9
-
10
-
11
- class CacheKeyGenerator:
12
- """Generates consistent cache keys from function calls.
13
-
14
- Uses MessagePack + Blake2b-256 for cross-language compatibility.
15
- Implements protocol-v1.0.md Section 3.3 (MessagePack-based approach).
16
- """
17
-
18
- # Key length constants
19
- MAX_KEY_LENGTH = 250 # Practical cache key length limit (Redis, Memcached, etc.)
20
- KEY_PREFIX_LENGTH = 50 # Length of prefix to keep when shortening keys
21
-
22
- # Serializer codes for compact metadata encoding (1 char each)
23
- SERIALIZER_CODES = {
24
- "std": "s", # StandardSerializer (multi-language MessagePack)
25
- "auto": "a", # AutoSerializer (Python-specific, NumPy/pandas)
26
- "orjson": "o", # OrjsonSerializer (JSON-based)
27
- "arrow": "w", # ArrowSerializer (columnar format, w=arroW)
28
- }
29
-
30
- def __init__(self):
31
- """Initialize the key generator.
32
-
33
- Uses MessagePack + Blake2b-256 per protocol-v1.0.md Section 3.3.
34
- """
35
- pass
36
-
37
- def generate_key(
38
- self,
39
- func: Callable[..., Any],
40
- args: tuple[Any, ...],
41
- kwargs: dict[str, Any],
42
- namespace: str | None = None,
43
- integrity_checking: bool = True,
44
- serializer_type: str = "std",
45
- ) -> str:
46
- """Generate a cache key from function and arguments.
47
-
48
- Args:
49
- func: The function being cached
50
- args: Positional arguments passed to the function
51
- kwargs: Keyword arguments passed to the function
52
- namespace: Optional namespace prefix for the key
53
- integrity_checking: Whether integrity checking is enabled (ByteStorage vs plain MessagePack)
54
- serializer_type: Serializer type code ("std", "auto", "orjson", "arrow")
55
-
56
- Returns:
57
- A consistent string key for caching
58
-
59
- Note:
60
- Uses compact metadata suffix format: :<ic><serializer_code>
61
- Example: ":1s" = integrity_checking=True, serializer=StandardSerializer
62
- """
63
- # Build key components efficiently (avoid f-strings in hot path)
64
- key_parts = []
65
-
66
- # Add namespace if provided
67
- if namespace:
68
- key_parts.extend(["ns:", namespace, ":"])
69
-
70
- # Add function identifier (module + name) - single string operation
71
- key_parts.extend(["func:", func.__module__, ".", func.__qualname__, ":"])
72
-
73
- # Generate args hash using Blake2b-256
74
- args_hash = self._blake2b_hash(args, kwargs)
75
-
76
- key_parts.extend(["args:", args_hash, ":"])
77
-
78
- # Add compact metadata suffix: :<ic><serializer_code>
79
- # Example: ":1s" = integrity_checking=True, serializer=std
80
- ic_flag = "1" if integrity_checking else "0"
81
- serializer_code = self.SERIALIZER_CODES.get(serializer_type, "s") # Default to "s" if unknown
82
- key_parts.extend([ic_flag, serializer_code])
83
-
84
- # Single join operation reduces string allocations
85
- key = "".join(key_parts)
86
-
87
- # Ensure key is within practical limits and contains no problematic characters
88
- return self._normalize_key(key)
89
-
90
- def _blake2b_hash(self, args: tuple, kwargs: dict) -> str:
91
- """Generate hash using MessagePack + Blake2b-256.
92
-
93
- Blake2b-256 (32 bytes = 64 hex chars) for collision resistance.
94
- MessagePack ensures cross-language compatibility.
95
-
96
- Raises:
97
- TypeError: If args/kwargs contain unsupported types (custom objects, numpy arrays, etc.)
98
- """
99
- # Step 1: Normalize recursively
100
- normalized_args = [self._normalize(arg) for arg in args]
101
- normalized_kwargs = {k: self._normalize(v) for k, v in sorted(kwargs.items())}
102
-
103
- # Step 2: Serialize with MessagePack
104
- try:
105
- msgpack_bytes = cast(
106
- bytes, msgpack.packb([normalized_args, normalized_kwargs], use_bin_type=True, strict_types=True)
107
- )
108
- except TypeError as e:
109
- # Wrap msgpack's TypeError with a more descriptive message
110
- raise TypeError(f"Unsupported type for cache key generation: {e}") from e
111
-
112
- # Step 3: Hash with Blake2b-256
113
- return hashlib.blake2b(msgpack_bytes, digest_size=32).hexdigest()
114
-
115
- def _normalize(self, obj: Any) -> Any:
116
- """Normalize object for deterministic MessagePack encoding.
117
-
118
- CRITICAL: Ensures identical serialization across Python, TypeScript, Go, PHP.
119
- """
120
- if isinstance(obj, dict):
121
- # Recursively normalize dict with sorted keys
122
- return {k: self._normalize(v) for k, v in sorted(obj.items())}
123
-
124
- elif isinstance(obj, (list, tuple)):
125
- # Recursively normalize collections (tuple→list)
126
- return [self._normalize(x) for x in obj]
127
-
128
- elif isinstance(obj, float):
129
- # CRITICAL: Normalize -0.0 → 0.0 for cross-language compatibility
130
- return 0.0 if obj == 0.0 else obj
131
-
132
- else:
133
- # Primitives (int, str, bytes, bool, None) pass through unchanged
134
- return obj
135
-
136
- def _normalize_key(self, key: str) -> str:
137
- """Normalize key to ensure it's valid for cache backends.
138
-
139
- Args:
140
- key: Raw cache key
141
-
142
- Returns:
143
- Normalized key safe for cache backends (Redis, Memcached, etc.)
144
- """
145
- # Replace problematic characters
146
- normalized = key.replace(" ", "_").replace("\n", "_").replace("\r", "_")
147
-
148
- # Ensure key length is within practical limits for cache backends
149
- if len(normalized) > self.MAX_KEY_LENGTH:
150
- # If too long, hash the key to get consistent shorter version
151
- # Use Blake2b-256 (32 bytes) for consistency
152
- key_hash = hashlib.blake2b(normalized.encode("utf-8"), digest_size=32).hexdigest()
153
-
154
- # Keep first part of original key for readability + hash
155
- prefix = normalized[: self.KEY_PREFIX_LENGTH] if len(normalized) > self.KEY_PREFIX_LENGTH else normalized
156
- normalized = f"{prefix}:{key_hash[:32]}"
157
-
158
- return normalized
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes