cachekit 0.9.1__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {cachekit-0.9.1 → cachekit-0.10.0}/Cargo.lock +1 -1
  2. {cachekit-0.9.1 → cachekit-0.10.0}/PKG-INFO +1 -1
  3. {cachekit-0.9.1 → cachekit-0.10.0}/pyproject.toml +1 -1
  4. {cachekit-0.9.1 → cachekit-0.10.0}/rust/Cargo.toml +1 -1
  5. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/__init__.py +1 -1
  6. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/base.py +35 -0
  7. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/file/backend.py +114 -0
  8. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/cache_handler.py +66 -2
  9. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/settings.py +3 -2
  10. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/l1_cache.py +16 -0
  11. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/arrow_serializer.py +7 -4
  12. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/auto_serializer.py +3 -1
  13. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/base.py +1 -1
  14. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/encryption_wrapper.py +5 -2
  15. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/orjson_serializer.py +2 -1
  16. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/standard_serializer.py +2 -1
  17. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/wrapper.py +11 -4
  18. {cachekit-0.9.1 → cachekit-0.10.0}/Cargo.toml +0 -0
  19. {cachekit-0.9.1 → cachekit-0.10.0}/LICENSE +0 -0
  20. {cachekit-0.9.1 → cachekit-0.10.0}/README.md +0 -0
  21. {cachekit-0.9.1 → cachekit-0.10.0}/rust/Makefile +0 -0
  22. {cachekit-0.9.1 → cachekit-0.10.0}/rust/README.md +0 -0
  23. {cachekit-0.9.1 → cachekit-0.10.0}/rust/TEST_EXPANSION_SUMMARY.md +0 -0
  24. {cachekit-0.9.1 → cachekit-0.10.0}/rust/src/lib.rs +0 -0
  25. {cachekit-0.9.1 → cachekit-0.10.0}/rust/src/python_bindings.rs +0 -0
  26. {cachekit-0.9.1 → cachekit-0.10.0}/rust/supply-chain/audits.toml +0 -0
  27. {cachekit-0.9.1 → cachekit-0.10.0}/rust/supply-chain/config.toml +0 -0
  28. {cachekit-0.9.1 → cachekit-0.10.0}/rust/supply-chain/imports.lock +0 -0
  29. {cachekit-0.9.1 → cachekit-0.10.0}/rust/tsan_suppressions.txt +0 -0
  30. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/__init__.py +0 -0
  31. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/base_config.py +0 -0
  32. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/__init__.py +0 -0
  33. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/backend.py +0 -0
  34. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/client.py +0 -0
  35. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/config.py +0 -0
  36. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/error_handler.py +0 -0
  37. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/session.py +0 -0
  38. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/errors.py +0 -0
  39. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/file/__init__.py +0 -0
  40. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/file/config.py +0 -0
  41. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/memcached/__init__.py +0 -0
  42. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/memcached/backend.py +0 -0
  43. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/memcached/config.py +0 -0
  44. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/memcached/error_handler.py +0 -0
  45. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/provider.py +0 -0
  46. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/__init__.py +0 -0
  47. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/backend.py +0 -0
  48. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/client.py +0 -0
  49. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/config.py +0 -0
  50. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/error_handler.py +0 -0
  51. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/provider.py +0 -0
  52. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/__init__.py +0 -0
  53. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/decorator.py +0 -0
  54. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/nested.py +0 -0
  55. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/singleton.py +0 -0
  56. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/validation.py +0 -0
  57. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/__init__.py +0 -0
  58. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/intent.py +0 -0
  59. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/local_wrapper.py +0 -0
  60. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/main.py +0 -0
  61. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/orchestrator.py +0 -0
  62. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/session.py +0 -0
  63. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/stats_context.py +0 -0
  64. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/tenant_context.py +0 -0
  65. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/utils/__init__.py +0 -0
  66. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/wrapper.py +0 -0
  67. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/di.py +0 -0
  68. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/hash_utils.py +0 -0
  69. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/health.py +0 -0
  70. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/hiredis_compat.py +0 -0
  71. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/imports.py +0 -0
  72. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/invalidation/__init__.py +0 -0
  73. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/invalidation/channel.py +0 -0
  74. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/invalidation/event.py +0 -0
  75. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/invalidation/redis_channel.py +0 -0
  76. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/key_generator.py +0 -0
  77. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/logging.py +0 -0
  78. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/monitoring/__init__.py +0 -0
  79. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/monitoring/correlation_tracking.py +0 -0
  80. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/monitoring/pool_monitor.py +0 -0
  81. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/monitoring/protocols.py +0 -0
  82. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/object_cache.py +0 -0
  83. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/py.typed +0 -0
  84. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/__init__.py +0 -0
  85. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/adaptive_timeout.py +0 -0
  86. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/async_metrics.py +0 -0
  87. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/circuit_breaker.py +0 -0
  88. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/error_classification.py +0 -0
  89. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/load_control.py +0 -0
  90. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/metrics_collection.py +0 -0
  91. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/profiles.py +0 -0
  92. {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/__init__.py +0 -0
@@ -271,7 +271,7 @@ dependencies = [
271
271
 
272
272
  [[package]]
273
273
  name = "cachekit-rs"
274
- version = "0.9.1"
274
+ version = "0.10.0"
275
275
  dependencies = [
276
276
  "cachekit-core",
277
277
  "criterion",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cachekit
3
- Version: 0.9.1
3
+ Version: 0.10.0
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "cachekit"
7
- version = "0.9.1"
7
+ version = "0.10.0"
8
8
  description = "Production-ready Redis caching for Python with intelligent reliability features and Rust-powered performance"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "cachekit-rs"
3
- version = "0.9.1"
3
+ version = "0.10.0"
4
4
  edition = "2021"
5
5
  authors = ["cachekit Contributors"]
6
6
  description = "High-performance storage engine for caching with compression and encryption"
@@ -68,7 +68,7 @@ Example Usage:
68
68
  ```
69
69
  """
70
70
 
71
- __version__ = "0.9.1"
71
+ __version__ = "0.10.0"
72
72
 
73
73
  from collections.abc import Callable
74
74
  from typing import Any, TypeVar
@@ -175,6 +175,41 @@ class TTLInspectableBackend(Protocol):
175
175
  ...
176
176
 
177
177
 
178
+ @runtime_checkable
179
+ class BufferHandle(Protocol):
180
+ """A borrowed, zero-copy view of a cached value plus the resource backing it.
181
+
182
+ Returned by ``BufferReadableBackend.get_buffer``. ``view`` aliases backend-owned memory (e.g.
183
+ mmap'd file pages), not a heap copy — so the consumer must finish reading and ``close()``
184
+ before the view is touched again. The view DANGLES after close (touching it can segfault), so
185
+ it must never be stored (e.g. in L1) nor returned past the read call frame (#171).
186
+ """
187
+
188
+ view: memoryview
189
+ """Zero-copy view of the payload (valid only until close())."""
190
+
191
+ def close(self) -> None:
192
+ """Release the view and its backing resource. Idempotent."""
193
+ ...
194
+
195
+
196
+ @runtime_checkable
197
+ class BufferReadableBackend(Protocol):
198
+ """Optional protocol for backends that can return a zero-copy buffer instead of materializing
199
+ the whole value on the heap.
200
+
201
+ Lets large plaintext values (e.g. uncompressed Arrow IPC) be read without copying the payload.
202
+ Only the File backend implements this today (mmap, POSIX). Backends that don't implement it
203
+ are simply read via ``get`` as usual.
204
+ """
205
+
206
+ def get_buffer(self, key: str) -> Optional[BufferHandle]:
207
+ """Return a borrowed zero-copy handle for ``key``, or None when the value is not mappable
208
+ (missing/expired/too large/non-POSIX) — the caller then falls back to ``get``. The caller
209
+ MUST ``close()`` the handle when done reading."""
210
+ ...
211
+
212
+
178
213
  @runtime_checkable
179
214
  class LockableBackend(Protocol):
180
215
  """Optional protocol for backends supporting distributed locking.
@@ -13,6 +13,7 @@ from __future__ import annotations
13
13
 
14
14
  import errno
15
15
  import hashlib
16
+ import mmap
16
17
  import os
17
18
  import platform
18
19
  import struct
@@ -47,6 +48,40 @@ TEMP_FILE_MAX_AGE_SECONDS: int = 60 # Delete orphaned temp files older than 60s
47
48
  # TTL bounds (security: prevent integer overflow)
48
49
  MAX_TTL_SECONDS: int = 10 * 365 * 24 * 60 * 60 # 10 years max
49
50
 
51
+ # Read-side mmap ceiling (#171, fork 4): a fixed internal cap independent of max_value_mb so a
52
+ # misconfigured huge max_value_mb (or an out-of-band file dropped in cache_dir) can't map an
53
+ # unbounded region. Above this, get_buffer() returns None and the caller falls back to os.read.
54
+ MMAP_MAX_BYTES: int = 512 * 1024 * 1024 # 512 MB
55
+
56
+
57
+ class _MmapHandle:
58
+ """Owns a read-only mmap of a cache file plus a memoryview of its payload (past the 14-byte
59
+ header). Zero-copy: the view aliases mapped pages, never a heap copy.
60
+
61
+ The CALLER must ``close()`` once the consumer is done (after Arrow deserialize has copied the
62
+ data out via to_pandas). The view DANGLES after close — touching it segfaults — so the handle
63
+ must never escape the deserialize call frame and must never be stored in L1 (#171 blocker C).
64
+ """
65
+
66
+ __slots__ = ("_mm", "view")
67
+
68
+ def __init__(self, mm: mmap.mmap) -> None:
69
+ self._mm = mm
70
+ # Slice past the 14-byte header; the slice exports its buffer directly from `mm`, so it is
71
+ # the only export to release before mm.close().
72
+ self.view: memoryview = memoryview(mm)[HEADER_SIZE:]
73
+
74
+ def close(self) -> None:
75
+ """Release the view then the mapping. Idempotent (safe to call more than once)."""
76
+ try:
77
+ self.view.release() # must release exports before mmap.close(), else BufferError
78
+ except (ValueError, BufferError): # pragma: no cover - defensive (already released / lingering sub-export)
79
+ pass
80
+ try:
81
+ self._mm.close()
82
+ except (ValueError, BufferError): # pragma: no cover - defensive (already closed)
83
+ pass
84
+
50
85
 
51
86
  class FileBackend:
52
87
  """File-based backend for local disk caching.
@@ -189,6 +224,85 @@ class FileBackend:
189
224
  key=key,
190
225
  ) from exc
191
226
 
227
+ def get_buffer(self, key: str) -> _MmapHandle | None:
228
+ """Memory-map a cache value for a zero-copy read of its payload (POSIX only; #171).
229
+
230
+ Returns an `_MmapHandle` owning the mmap + a memoryview of the payload (past the 14-byte
231
+ header), or `None` when mmap does not apply — the caller then falls back to `get()`:
232
+ - non-POSIX platform (Windows pins mapped files against rename/unlink);
233
+ - missing / expired / corrupt entry (corrupt + expired are unlinked, mirroring `get`);
234
+ - empty payload (nothing to map);
235
+ - file larger than ``MMAP_MAX_BYTES``.
236
+
237
+ Security: the fd is opened with ``O_NOFOLLOW`` and the header is validated from the fd
238
+ BEFORE mapping. We never use ``pa.memory_map(path)`` / a path-based mmap — that re-opens
239
+ by path and would follow an attacker-swapped symlink, reintroducing the TOCTOU that
240
+ ``O_NOFOLLOW`` closes. The mapping survives the fd close on POSIX.
241
+ """
242
+ if os.name != "posix": # pragma: no cover - Windows-only branch; CI is Linux
243
+ return None # mapped files can't be renamed/unlinked on Windows; caller uses get()
244
+
245
+ file_path = self._key_to_path(key)
246
+
247
+ with self._lock:
248
+ try:
249
+ fd = os.open(file_path, os.O_RDONLY | os.O_NOFOLLOW)
250
+ except FileNotFoundError:
251
+ return None
252
+ except OSError as exc: # pragma: no cover - rare open errors (ELOOP/EACCES); defensive
253
+ if exc.errno in (errno.ENOENT, errno.ELOOP):
254
+ return None # missing, or symlink rejected by O_NOFOLLOW
255
+ raise BackendError(
256
+ f"Failed to open cache file for mmap: {exc}",
257
+ error_type=self._classify_os_error(exc, is_directory=False),
258
+ original_exception=exc,
259
+ operation="get_buffer",
260
+ key=key,
261
+ ) from exc
262
+
263
+ mm: mmap.mmap | None = None
264
+ try:
265
+ self._acquire_file_lock(fd, exclusive=False)
266
+ try:
267
+ st_size = os.fstat(fd).st_size
268
+
269
+ # Validate-then-map: never map a file we're about to delete.
270
+ if st_size < HEADER_SIZE:
271
+ self._safe_unlink(file_path)
272
+ return None
273
+ header = os.read(fd, HEADER_SIZE)
274
+ if header[0:2] != MAGIC or header[2] != FORMAT_VERSION:
275
+ self._safe_unlink(file_path)
276
+ return None
277
+ expiry_timestamp = struct.unpack(">Q", header[6:14])[0]
278
+ if expiry_timestamp > 0 and time.time() > expiry_timestamp:
279
+ self._safe_unlink(file_path)
280
+ return None
281
+
282
+ # Empty payload (header only): nothing to map (mmap rejects length 0 anyway).
283
+ # Too large: fall back to os.read so we never map an unbounded region.
284
+ if st_size <= HEADER_SIZE or st_size > MMAP_MAX_BYTES:
285
+ return None
286
+
287
+ mm = mmap.mmap(fd, st_size, access=mmap.ACCESS_READ)
288
+ handle = _MmapHandle(mm)
289
+ mm = None # ownership transferred to the handle; don't close it in finally
290
+ return handle
291
+ finally:
292
+ self._release_file_lock(fd)
293
+ except OSError as exc:
294
+ raise BackendError(
295
+ f"Failed to mmap cache file: {exc}",
296
+ error_type=self._classify_os_error(exc, is_directory=False),
297
+ original_exception=exc,
298
+ operation="get_buffer",
299
+ key=key,
300
+ ) from exc
301
+ finally:
302
+ if mm is not None: # pragma: no cover - only on a mid-map exception; ownership normally moved to the handle
303
+ mm.close()
304
+ os.close(fd)
305
+
192
306
  def set(self, key: str, value: bytes, ttl: int | None = None) -> None:
193
307
  """Store value in file storage with atomic write.
194
308
 
@@ -11,7 +11,7 @@ import threading
11
11
  from collections.abc import Callable
12
12
  from typing import TYPE_CHECKING, Any, Optional, Protocol, TypeGuard, Union, runtime_checkable
13
13
 
14
- from cachekit.backends.base import BackendError, BaseBackend, TTLInspectableBackend
14
+ from cachekit.backends.base import BackendError, BaseBackend, BufferHandle, BufferReadableBackend, TTLInspectableBackend
15
15
  from cachekit.backends.provider import (
16
16
  BackendProviderInterface,
17
17
  DefaultBackendProvider,
@@ -85,6 +85,15 @@ def supports_ttl_inspection(backend: BaseBackend) -> TypeGuard[TTLInspectableBac
85
85
  return hasattr(backend, "get_ttl") and hasattr(backend, "refresh_ttl")
86
86
 
87
87
 
88
+ def supports_buffer_read(backend: BaseBackend) -> TypeGuard[BufferReadableBackend]:
89
+ """Type guard: backend can return a zero-copy buffer via get_buffer (#171, File/POSIX only).
90
+
91
+ Returns:
92
+ True if backend implements BufferReadableBackend (used for the mmap Arrow read fast path).
93
+ """
94
+ return hasattr(backend, "get_buffer")
95
+
96
+
88
97
  # Import caching for serializer modules
89
98
  #
90
99
  # PERFORMANCE OPTIMIZATION: Dynamic imports are expensive (~100μs per import)
@@ -640,7 +649,25 @@ class CacheSerializationHandler:
640
649
  get_logger().error(f"Serialization failed with {self.serializer_name}: {e}")
641
650
  raise SerializationError(f"Failed to serialize data with {self.serializer_name}: {e}") from e
642
651
 
643
- def deserialize_data(self, data: str | bytes, cache_key: str = "") -> Any:
652
+ def supports_mmap_read(self) -> bool:
653
+ """True iff reads can use the zero-copy mmap fast path (#171).
654
+
655
+ Eligible only for PLAINTEXT Arrow that returns pandas:
656
+ - encrypted values can never mmap (AES-GCM decrypt owns its buffer);
657
+ - non-Arrow serializers gain nothing (they copy at the Rust/C boundary, rebuild objects);
658
+ - the "arrow" return_format yields a table that ALIASES the mapped pages, so closing the
659
+ handle would be a use-after-free — pandas (which copies out via to_pandas) only.
660
+
661
+ The backend must also support buffer reads (File/POSIX); that is checked separately, so a
662
+ True here on a non-File backend simply means get_buffer returns None and we fall back.
663
+ """
664
+ return (
665
+ not self.encryption
666
+ and self._serializer_string_name == "arrow"
667
+ and getattr(self._base_serializer, "return_format", None) == "pandas"
668
+ )
669
+
670
+ def deserialize_data(self, data: str | bytes | memoryview, cache_key: str = "") -> Any:
644
671
  """Deserialize data from cache storage with cache_key verification.
645
672
 
646
673
  Args:
@@ -845,6 +872,19 @@ class CacheOperationHandler:
845
872
  if self._cache_handler is None:
846
873
  raise RuntimeError("Cache handler must be set before calling get_cached_value")
847
874
 
875
+ # mmap fast path (#171): plaintext Arrow -> pandas on a buffer-readable backend (File,
876
+ # POSIX) reads zero-copy. The handle is confined to this frame and closed in `finally`,
877
+ # so the mmap never becomes the returned value and never reaches L1 (blocker C). A None
878
+ # from get_buffer (ineligible file, or a non-buffer backend) falls through to bytes.
879
+ if self.serialization_handler.supports_mmap_read():
880
+ handle = self._cache_handler.get_buffer(cache_key)
881
+ if handle is not None:
882
+ try:
883
+ get_logger().cache_hit(cache_key, "Backend(mmap)")
884
+ return (True, self.serialization_handler.deserialize_data(handle.view, cache_key))
885
+ finally:
886
+ handle.close()
887
+
848
888
  cached_data = self._cache_handler.get(cache_key, refresh_ttl)
849
889
  if cached_data is not None:
850
890
  get_logger().cache_hit(cache_key, "Backend")
@@ -886,6 +926,9 @@ class CacheOperationHandler:
886
926
  if self._cache_handler is None:
887
927
  raise RuntimeError("Cache handler must be set before calling get_cached_value_async")
888
928
 
929
+ # NOTE: no mmap fast path here. The async decorator path inlines get_async (it does not
930
+ # route through this method today), so an mmap branch would be dead code. The mmap read
931
+ # lives on the sync get_cached_value; add it here only when an async caller routes through.
889
932
  cached_data = await self._cache_handler.get_async(cache_key, refresh_ttl)
890
933
  if cached_data is not None:
891
934
  get_logger().cache_hit(cache_key, "Backend")
@@ -1100,6 +1143,10 @@ class CacheHandlerStrategy(Protocol):
1100
1143
  """Get value from cache with optional TTL refresh."""
1101
1144
  ...
1102
1145
 
1146
+ def get_buffer(self, key: str) -> Optional[BufferHandle]:
1147
+ """Return a zero-copy buffer handle if the backend supports it (#171), else None."""
1148
+ ...
1149
+
1103
1150
  def set(self, key: str, value: Union[str, bytes], ttl: Optional[int] = None, **metadata) -> bool:
1104
1151
  """Set value in cache with TTL and optional metadata."""
1105
1152
  ...
@@ -1266,6 +1313,23 @@ class StandardCacheHandler:
1266
1313
  get_logger().error(f"Unexpected error getting key {key}: {e}")
1267
1314
  return None
1268
1315
 
1316
+ def get_buffer(self, key: str) -> Optional[BufferHandle]:
1317
+ """Return a zero-copy buffer handle for key if the backend supports it (#171), else None.
1318
+
1319
+ Mirrors get()'s backpressure/timeout wrapping. Returns None when the backend can't map the
1320
+ value (or on any backend error) so the caller transparently falls back to get().
1321
+ """
1322
+ if not supports_buffer_read(self.backend):
1323
+ return None
1324
+ try:
1325
+ return self._with_backpressure_and_timeout(self.backend.get_buffer, key)
1326
+ except BackendError as e:
1327
+ get_logger().error(f"Backend error mmapping key {key}: {e}")
1328
+ return None
1329
+ except Exception as e:
1330
+ get_logger().error(f"Unexpected error mmapping key {key}: {e}")
1331
+ return None
1332
+
1269
1333
  def set(self, key: str, value: Union[str, bytes], ttl: Optional[int] = None, **metadata) -> bool:
1270
1334
  """Set value in cache using backend.
1271
1335
 
@@ -121,8 +121,9 @@ class CachekitConfig(BaseSettings):
121
121
  description=(
122
122
  "Arrow IPC compression codec for DataFrame caching (ArrowSerializer, compression='auto'). "
123
123
  "'zstd'/'lz4' shrink the stored payload but must be decompressed into the heap on read. "
124
- "'none' stores uncompressed Arrow IPC, which enables zero-copy memory-mapped reads "
125
- "(lowest read memory) at the cost of a larger payload. Env: CACHEKIT_ARROW_COMPRESSION."
124
+ "'none' stores uncompressed Arrow IPC, which lets the File backend serve plaintext "
125
+ "DataFrame reads via a zero-copy mmap (low steady-state read RSS; peak transiently "
126
+ "higher) at the cost of a larger payload. Env: CACHEKIT_ARROW_COMPRESSION."
126
127
  ),
127
128
  )
128
129
  retry_on_timeout: bool = Field(
@@ -266,7 +266,23 @@ class L1Cache:
266
266
  redis_ttl: TTL in seconds from Redis (used to calculate expiry)
267
267
  expires_at: Absolute expiry timestamp (overrides redis_ttl)
268
268
  namespace: Optional namespace for invalidation support
269
+
270
+ Raises:
271
+ TypeError: if `value` is not exactly `bytes`. L1 stores raw bytes only; a memoryview
272
+ (e.g. an mmap-backed view from the File backend) or a mutable bytearray must never
273
+ be stored — the former would pin a mapped file's inode for the whole TTL, the
274
+ latter could mutate underneath the cache (#171 blocker C). Loud-fail a regression
275
+ rather than silently alias.
269
276
  """
277
+ # Runtime guard: the annotation says bytes, but callers reach here across dynamic
278
+ # boundaries (backend.get returns, decorator paths) where the type isn't enforced.
279
+ if not isinstance(value, bytes): # pyright: ignore[reportUnnecessaryIsInstance]
280
+ raise TypeError(
281
+ f"L1Cache.put requires bytes, got {type(value).__name__}. "
282
+ "Storing a memoryview/bytearray in L1 is forbidden: an mmap-backed view would pin "
283
+ "the mapped file for the entry's TTL. Materialize to bytes before caching."
284
+ )
285
+
270
286
  # Calculate expiry time
271
287
  current_time = time.time()
272
288
  if expires_at is not None:
@@ -142,8 +142,10 @@ class ArrowSerializer:
142
142
  compression: Arrow IPC compression codec.
143
143
  - "auto" (default): use the CACHEKIT_ARROW_COMPRESSION setting (itself "zstd" by default)
144
144
  - "zstd" / "lz4": compress the payload (smaller wire/L1; must be decompressed on read)
145
- - None or "none": store uncompressed Arrow IPC, enabling zero-copy memory-mapped reads
146
- (lowest read memory) at the cost of a larger payload
145
+ - None or "none": store uncompressed Arrow IPC. Lets the File backend serve plaintext
146
+ DataFrame reads (returned as pandas) via a zero-copy mmap low steady-state read
147
+ RSS (~0.32x), though peak is transiently higher from checksum verification + pandas
148
+ materialization — at the cost of a larger stored payload. No effect on wire backends.
147
149
 
148
150
  Raises:
149
151
  ValueError: If return_format or compression is not a valid option
@@ -226,7 +228,8 @@ class ArrowSerializer:
226
228
  # writing in bounded batches keeps the compressor's working set bounded (one big
227
229
  # batch makes the codec allocate a full-size working buffer — measured ~3.6x the
228
230
  # payload). Size each batch to ~8 MiB regardless of schema width. compression=None
229
- # writes uncompressed IPC, which a reader can memory-map zero-copy.
231
+ # writes uncompressed IPC, which the File backend reads zero-copy via mmap (#171,
232
+ # plaintext, pandas return only).
230
233
  max_chunksize = _bounded_chunksize(table)
231
234
  sink = pa.BufferOutputStream()
232
235
  write_options = pa.ipc.IpcWriteOptions(compression=self.compression) if self.compression else None
@@ -258,7 +261,7 @@ class ArrowSerializer:
258
261
  except (pa.ArrowInvalid, pa.ArrowTypeError, ValueError) as e:
259
262
  raise SerializationError(f"Failed to serialize DataFrame to Arrow IPC format: {e}") from e
260
263
 
261
- def deserialize(self, data: bytes, metadata: SerializationMetadata | None = None) -> Any:
264
+ def deserialize(self, data: bytes | memoryview, metadata: SerializationMetadata | None = None) -> Any:
262
265
  """Deserialize Arrow IPC bytes with optional xxHash3-64 integrity validation.
263
266
 
264
267
  Args:
@@ -478,7 +478,7 @@ class AutoSerializer:
478
478
  metadata = SerializationMetadata(serialization_format=SerializationFormat.MSGPACK, original_type="msgpack")
479
479
  return data, metadata
480
480
 
481
- def deserialize(self, data: bytes, metadata: Optional[SerializationMetadata] = None) -> Any:
481
+ def deserialize(self, data: bytes | memoryview, metadata: Optional[SerializationMetadata] = None) -> Any:
482
482
  """Deserialize bytes back to Python object.
483
483
 
484
484
  Automatically detects format from envelope and deserializes accordingly.
@@ -490,6 +490,8 @@ class AutoSerializer:
490
490
  Returns:
491
491
  Any: Deserialized Python object
492
492
  """
493
+ # coerce unwrap's zero-copy memoryview; no-op when already bytes (enables .startswith below + Rust retrieve)
494
+ data = bytes(data)
493
495
  # Check for custom NumPy format
494
496
  if data.startswith(b"NUMPY_RAW"):
495
497
  return self._deserialize_numpy(data)
@@ -95,7 +95,7 @@ class SerializerProtocol(Protocol):
95
95
  """
96
96
  ...
97
97
 
98
- def deserialize(self, data: bytes, metadata: Any = None) -> Any:
98
+ def deserialize(self, data: bytes | memoryview, metadata: Any = None) -> Any:
99
99
  """Deserialize bytes to Python object.
100
100
 
101
101
  Args:
@@ -241,7 +241,7 @@ class EncryptionWrapper:
241
241
  except Exception as e:
242
242
  raise EncryptionError(f"Encryption failed: {e}") from e
243
243
 
244
- def deserialize(self, data: bytes, metadata: SerializationMetadata, cache_key: str = "") -> Any:
244
+ def deserialize(self, data: bytes | memoryview, metadata: SerializationMetadata, cache_key: str = "") -> Any:
245
245
  """Decrypt and deserialize data with cache_key verification.
246
246
 
247
247
  Args:
@@ -329,7 +329,10 @@ class EncryptionWrapper:
329
329
  # NOTE: If cache_key doesn't match the one used during encryption,
330
330
  # the AAD will be different and AES-GCM authentication will fail.
331
331
  # This is the SECURITY mechanism that detects ciphertext substitution.
332
- decrypted_data = self.encryptor.decrypt_with_keys(data, aad, self.tenant_keys)
332
+ # `unwrap` may hand us a memoryview; the AES-GCM binding requires owned bytes, and an
333
+ # encrypted value can never be zero-copy anyway (decrypt reads the whole ciphertext
334
+ # into an owned buffer), so coercing here costs nothing the cipher wasn't already paying.
335
+ decrypted_data = self.encryptor.decrypt_with_keys(bytes(data), aad, self.tenant_keys)
333
336
 
334
337
  # Deserialize the decrypted data using base serializer
335
338
  return self.serializer.deserialize(decrypted_data, raw_metadata)
@@ -156,7 +156,7 @@ class OrjsonSerializer:
156
156
  # ValueError = data encoding error
157
157
  raise SerializationError(f"Failed to serialize object to JSON: {e}") from e
158
158
 
159
- def deserialize(self, data: bytes, metadata: SerializationMetadata | None = None) -> Any:
159
+ def deserialize(self, data: bytes | memoryview, metadata: SerializationMetadata | None = None) -> Any:
160
160
  """Deserialize JSON bytes with optional xxHash3-64 integrity validation.
161
161
 
162
162
  Args:
@@ -176,6 +176,7 @@ class OrjsonSerializer:
176
176
  >>> result == {"test": 123}
177
177
  True
178
178
  """
179
+ data = bytes(data) # coerce unwrap's zero-copy memoryview; no-op when already bytes
179
180
  try:
180
181
  if self.enable_integrity_checking:
181
182
  # Guard clause: Minimum size check (8 bytes checksum + at least 2 bytes JSON: {})
@@ -308,7 +308,7 @@ class StandardSerializer:
308
308
  # ValueError = data encoding error
309
309
  raise SerializationError(f"Failed to serialize object to MessagePack: {e}") from e
310
310
 
311
- def deserialize(self, data: bytes, metadata: SerializationMetadata | None = None) -> Any:
311
+ def deserialize(self, data: bytes | memoryview, metadata: SerializationMetadata | None = None) -> Any:
312
312
  """Deserialize MessagePack bytes with optional ByteStorage unwrapping.
313
313
 
314
314
  Args:
@@ -328,6 +328,7 @@ class StandardSerializer:
328
328
  >>> result == {"test": 123}
329
329
  True
330
330
  """
331
+ data = bytes(data) # coerce unwrap's zero-copy memoryview; no-op when already bytes (Rust retrieve needs bytes)
331
332
  try:
332
333
  if self.enable_integrity_checking:
333
334
  # Unwrap ByteStorage envelope (decompress + validate integrity)
@@ -99,15 +99,18 @@ class SerializationWrapper:
99
99
  )
100
100
 
101
101
  @staticmethod
102
- def unwrap(wrapped_data: Union[str, bytes]) -> tuple[bytes, dict[str, Any], str]:
102
+ def unwrap(
103
+ wrapped_data: Union[str, bytes, bytearray, memoryview],
104
+ ) -> tuple[Union[bytes, memoryview], dict[str, Any], str]:
103
105
  """Unwrap a cache envelope, reading either the v3 frame or the legacy format.
104
106
 
105
107
  Args:
106
- wrapped_data: v3 frame (bytes starting with MAGIC) OR legacy base64+JSON
108
+ wrapped_data: v3 frame (bytes-like starting with MAGIC) OR legacy base64+JSON
107
109
  envelope (bytes/str starting with '{').
108
110
 
109
111
  Returns:
110
- tuple: (data_bytes, metadata_dict, serializer_name)
112
+ tuple: (payload, metadata_dict, serializer_name). For a v3 frame the payload is a
113
+ zero-copy ``memoryview`` aliasing ``wrapped_data``; the legacy path returns ``bytes``.
111
114
  """
112
115
  # v3 binary frame: only bytes-like can be a frame (str is always legacy JSON).
113
116
  if isinstance(wrapped_data, (bytes, bytearray, memoryview)):
@@ -123,7 +126,11 @@ class SerializationWrapper:
123
126
  if header_end > mv.nbytes:
124
127
  raise ValueError(f"Invalid cache envelope header length {hdr_len}: frame has only {mv.nbytes} bytes")
125
128
  header = json.loads(bytes(mv[_PREFIX_LEN:header_end]))
126
- payload = bytes(mv[header_end:]) # single copy of the raw payload
129
+ # Zero-copy: a memoryview slice past the header aliases the input frame (no
130
+ # full-payload copy on every read). It flows into pa.py_buffer (Arrow) and the
131
+ # mmap read path without materializing. The view keeps `wrapped_data` alive, so
132
+ # it never dangles; consumers needing owned bytes coerce at their own boundary.
133
+ payload = mv[header_end:]
127
134
  return payload, header.get("m", {}), header.get("s", "unknown")
128
135
 
129
136
  # Legacy base64+JSON envelope (pre-v3 entries; backward compatible read path).
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes