cachekit 0.9.1__tar.gz → 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cachekit-0.9.1 → cachekit-0.10.0}/Cargo.lock +1 -1
- {cachekit-0.9.1 → cachekit-0.10.0}/PKG-INFO +1 -1
- {cachekit-0.9.1 → cachekit-0.10.0}/pyproject.toml +1 -1
- {cachekit-0.9.1 → cachekit-0.10.0}/rust/Cargo.toml +1 -1
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/__init__.py +1 -1
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/base.py +35 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/file/backend.py +114 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/cache_handler.py +66 -2
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/settings.py +3 -2
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/l1_cache.py +16 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/arrow_serializer.py +7 -4
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/auto_serializer.py +3 -1
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/base.py +1 -1
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/encryption_wrapper.py +5 -2
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/orjson_serializer.py +2 -1
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/standard_serializer.py +2 -1
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/wrapper.py +11 -4
- {cachekit-0.9.1 → cachekit-0.10.0}/Cargo.toml +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/LICENSE +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/README.md +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/rust/Makefile +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/rust/README.md +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/rust/TEST_EXPANSION_SUMMARY.md +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/rust/src/lib.rs +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/rust/src/python_bindings.rs +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/rust/supply-chain/audits.toml +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/rust/supply-chain/config.toml +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/rust/supply-chain/imports.lock +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/rust/tsan_suppressions.txt +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/__init__.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/base_config.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/__init__.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/backend.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/client.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/config.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/error_handler.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/cachekitio/session.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/errors.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/file/__init__.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/file/config.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/memcached/__init__.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/memcached/backend.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/memcached/config.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/memcached/error_handler.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/provider.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/__init__.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/backend.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/client.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/config.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/error_handler.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/backends/redis/provider.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/__init__.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/decorator.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/nested.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/singleton.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/config/validation.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/__init__.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/intent.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/local_wrapper.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/main.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/orchestrator.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/session.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/stats_context.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/tenant_context.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/utils/__init__.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/decorators/wrapper.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/di.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/hash_utils.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/health.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/hiredis_compat.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/imports.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/invalidation/__init__.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/invalidation/channel.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/invalidation/event.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/invalidation/redis_channel.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/key_generator.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/logging.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/monitoring/__init__.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/monitoring/correlation_tracking.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/monitoring/pool_monitor.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/monitoring/protocols.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/object_cache.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/py.typed +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/__init__.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/adaptive_timeout.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/async_metrics.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/circuit_breaker.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/error_classification.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/load_control.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/metrics_collection.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/reliability/profiles.py +0 -0
- {cachekit-0.9.1 → cachekit-0.10.0}/src/cachekit/serializers/__init__.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "cachekit"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.10.0"
|
|
8
8
|
description = "Production-ready Redis caching for Python with intelligent reliability features and Rust-powered performance"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -175,6 +175,41 @@ class TTLInspectableBackend(Protocol):
|
|
|
175
175
|
...
|
|
176
176
|
|
|
177
177
|
|
|
178
|
+
@runtime_checkable
|
|
179
|
+
class BufferHandle(Protocol):
|
|
180
|
+
"""A borrowed, zero-copy view of a cached value plus the resource backing it.
|
|
181
|
+
|
|
182
|
+
Returned by ``BufferReadableBackend.get_buffer``. ``view`` aliases backend-owned memory (e.g.
|
|
183
|
+
mmap'd file pages), not a heap copy — so the consumer must finish reading and ``close()``
|
|
184
|
+
before the view is touched again. The view DANGLES after close (touching it can segfault), so
|
|
185
|
+
it must never be stored (e.g. in L1) nor returned past the read call frame (#171).
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
view: memoryview
|
|
189
|
+
"""Zero-copy view of the payload (valid only until close())."""
|
|
190
|
+
|
|
191
|
+
def close(self) -> None:
|
|
192
|
+
"""Release the view and its backing resource. Idempotent."""
|
|
193
|
+
...
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
@runtime_checkable
|
|
197
|
+
class BufferReadableBackend(Protocol):
|
|
198
|
+
"""Optional protocol for backends that can return a zero-copy buffer instead of materializing
|
|
199
|
+
the whole value on the heap.
|
|
200
|
+
|
|
201
|
+
Lets large plaintext values (e.g. uncompressed Arrow IPC) be read without copying the payload.
|
|
202
|
+
Only the File backend implements this today (mmap, POSIX). Backends that don't implement it
|
|
203
|
+
are simply read via ``get`` as usual.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
def get_buffer(self, key: str) -> Optional[BufferHandle]:
|
|
207
|
+
"""Return a borrowed zero-copy handle for ``key``, or None when the value is not mappable
|
|
208
|
+
(missing/expired/too large/non-POSIX) — the caller then falls back to ``get``. The caller
|
|
209
|
+
MUST ``close()`` the handle when done reading."""
|
|
210
|
+
...
|
|
211
|
+
|
|
212
|
+
|
|
178
213
|
@runtime_checkable
|
|
179
214
|
class LockableBackend(Protocol):
|
|
180
215
|
"""Optional protocol for backends supporting distributed locking.
|
|
@@ -13,6 +13,7 @@ from __future__ import annotations
|
|
|
13
13
|
|
|
14
14
|
import errno
|
|
15
15
|
import hashlib
|
|
16
|
+
import mmap
|
|
16
17
|
import os
|
|
17
18
|
import platform
|
|
18
19
|
import struct
|
|
@@ -47,6 +48,40 @@ TEMP_FILE_MAX_AGE_SECONDS: int = 60 # Delete orphaned temp files older than 60s
|
|
|
47
48
|
# TTL bounds (security: prevent integer overflow)
|
|
48
49
|
MAX_TTL_SECONDS: int = 10 * 365 * 24 * 60 * 60 # 10 years max
|
|
49
50
|
|
|
51
|
+
# Read-side mmap ceiling (#171, fork 4): a fixed internal cap independent of max_value_mb so a
|
|
52
|
+
# misconfigured huge max_value_mb (or an out-of-band file dropped in cache_dir) can't map an
|
|
53
|
+
# unbounded region. Above this, get_buffer() returns None and the caller falls back to os.read.
|
|
54
|
+
MMAP_MAX_BYTES: int = 512 * 1024 * 1024 # 512 MB
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class _MmapHandle:
|
|
58
|
+
"""Owns a read-only mmap of a cache file plus a memoryview of its payload (past the 14-byte
|
|
59
|
+
header). Zero-copy: the view aliases mapped pages, never a heap copy.
|
|
60
|
+
|
|
61
|
+
The CALLER must ``close()`` once the consumer is done (after Arrow deserialize has copied the
|
|
62
|
+
data out via to_pandas). The view DANGLES after close — touching it segfaults — so the handle
|
|
63
|
+
must never escape the deserialize call frame and must never be stored in L1 (#171 blocker C).
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
__slots__ = ("_mm", "view")
|
|
67
|
+
|
|
68
|
+
def __init__(self, mm: mmap.mmap) -> None:
|
|
69
|
+
self._mm = mm
|
|
70
|
+
# Slice past the 14-byte header; the slice exports its buffer directly from `mm`, so it is
|
|
71
|
+
# the only export to release before mm.close().
|
|
72
|
+
self.view: memoryview = memoryview(mm)[HEADER_SIZE:]
|
|
73
|
+
|
|
74
|
+
def close(self) -> None:
|
|
75
|
+
"""Release the view then the mapping. Idempotent (safe to call more than once)."""
|
|
76
|
+
try:
|
|
77
|
+
self.view.release() # must release exports before mmap.close(), else BufferError
|
|
78
|
+
except (ValueError, BufferError): # pragma: no cover - defensive (already released / lingering sub-export)
|
|
79
|
+
pass
|
|
80
|
+
try:
|
|
81
|
+
self._mm.close()
|
|
82
|
+
except (ValueError, BufferError): # pragma: no cover - defensive (already closed)
|
|
83
|
+
pass
|
|
84
|
+
|
|
50
85
|
|
|
51
86
|
class FileBackend:
|
|
52
87
|
"""File-based backend for local disk caching.
|
|
@@ -189,6 +224,85 @@ class FileBackend:
|
|
|
189
224
|
key=key,
|
|
190
225
|
) from exc
|
|
191
226
|
|
|
227
|
+
def get_buffer(self, key: str) -> _MmapHandle | None:
|
|
228
|
+
"""Memory-map a cache value for a zero-copy read of its payload (POSIX only; #171).
|
|
229
|
+
|
|
230
|
+
Returns an `_MmapHandle` owning the mmap + a memoryview of the payload (past the 14-byte
|
|
231
|
+
header), or `None` when mmap does not apply — the caller then falls back to `get()`:
|
|
232
|
+
- non-POSIX platform (Windows pins mapped files against rename/unlink);
|
|
233
|
+
- missing / expired / corrupt entry (corrupt + expired are unlinked, mirroring `get`);
|
|
234
|
+
- empty payload (nothing to map);
|
|
235
|
+
- file larger than ``MMAP_MAX_BYTES``.
|
|
236
|
+
|
|
237
|
+
Security: the fd is opened with ``O_NOFOLLOW`` and the header is validated from the fd
|
|
238
|
+
BEFORE mapping. We never use ``pa.memory_map(path)`` / a path-based mmap — that re-opens
|
|
239
|
+
by path and would follow an attacker-swapped symlink, reintroducing the TOCTOU that
|
|
240
|
+
``O_NOFOLLOW`` closes. The mapping survives the fd close on POSIX.
|
|
241
|
+
"""
|
|
242
|
+
if os.name != "posix": # pragma: no cover - Windows-only branch; CI is Linux
|
|
243
|
+
return None # mapped files can't be renamed/unlinked on Windows; caller uses get()
|
|
244
|
+
|
|
245
|
+
file_path = self._key_to_path(key)
|
|
246
|
+
|
|
247
|
+
with self._lock:
|
|
248
|
+
try:
|
|
249
|
+
fd = os.open(file_path, os.O_RDONLY | os.O_NOFOLLOW)
|
|
250
|
+
except FileNotFoundError:
|
|
251
|
+
return None
|
|
252
|
+
except OSError as exc: # pragma: no cover - rare open errors (ELOOP/EACCES); defensive
|
|
253
|
+
if exc.errno in (errno.ENOENT, errno.ELOOP):
|
|
254
|
+
return None # missing, or symlink rejected by O_NOFOLLOW
|
|
255
|
+
raise BackendError(
|
|
256
|
+
f"Failed to open cache file for mmap: {exc}",
|
|
257
|
+
error_type=self._classify_os_error(exc, is_directory=False),
|
|
258
|
+
original_exception=exc,
|
|
259
|
+
operation="get_buffer",
|
|
260
|
+
key=key,
|
|
261
|
+
) from exc
|
|
262
|
+
|
|
263
|
+
mm: mmap.mmap | None = None
|
|
264
|
+
try:
|
|
265
|
+
self._acquire_file_lock(fd, exclusive=False)
|
|
266
|
+
try:
|
|
267
|
+
st_size = os.fstat(fd).st_size
|
|
268
|
+
|
|
269
|
+
# Validate-then-map: never map a file we're about to delete.
|
|
270
|
+
if st_size < HEADER_SIZE:
|
|
271
|
+
self._safe_unlink(file_path)
|
|
272
|
+
return None
|
|
273
|
+
header = os.read(fd, HEADER_SIZE)
|
|
274
|
+
if header[0:2] != MAGIC or header[2] != FORMAT_VERSION:
|
|
275
|
+
self._safe_unlink(file_path)
|
|
276
|
+
return None
|
|
277
|
+
expiry_timestamp = struct.unpack(">Q", header[6:14])[0]
|
|
278
|
+
if expiry_timestamp > 0 and time.time() > expiry_timestamp:
|
|
279
|
+
self._safe_unlink(file_path)
|
|
280
|
+
return None
|
|
281
|
+
|
|
282
|
+
# Empty payload (header only): nothing to map (mmap rejects length 0 anyway).
|
|
283
|
+
# Too large: fall back to os.read so we never map an unbounded region.
|
|
284
|
+
if st_size <= HEADER_SIZE or st_size > MMAP_MAX_BYTES:
|
|
285
|
+
return None
|
|
286
|
+
|
|
287
|
+
mm = mmap.mmap(fd, st_size, access=mmap.ACCESS_READ)
|
|
288
|
+
handle = _MmapHandle(mm)
|
|
289
|
+
mm = None # ownership transferred to the handle; don't close it in finally
|
|
290
|
+
return handle
|
|
291
|
+
finally:
|
|
292
|
+
self._release_file_lock(fd)
|
|
293
|
+
except OSError as exc:
|
|
294
|
+
raise BackendError(
|
|
295
|
+
f"Failed to mmap cache file: {exc}",
|
|
296
|
+
error_type=self._classify_os_error(exc, is_directory=False),
|
|
297
|
+
original_exception=exc,
|
|
298
|
+
operation="get_buffer",
|
|
299
|
+
key=key,
|
|
300
|
+
) from exc
|
|
301
|
+
finally:
|
|
302
|
+
if mm is not None: # pragma: no cover - only on a mid-map exception; ownership normally moved to the handle
|
|
303
|
+
mm.close()
|
|
304
|
+
os.close(fd)
|
|
305
|
+
|
|
192
306
|
def set(self, key: str, value: bytes, ttl: int | None = None) -> None:
|
|
193
307
|
"""Store value in file storage with atomic write.
|
|
194
308
|
|
|
@@ -11,7 +11,7 @@ import threading
|
|
|
11
11
|
from collections.abc import Callable
|
|
12
12
|
from typing import TYPE_CHECKING, Any, Optional, Protocol, TypeGuard, Union, runtime_checkable
|
|
13
13
|
|
|
14
|
-
from cachekit.backends.base import BackendError, BaseBackend, TTLInspectableBackend
|
|
14
|
+
from cachekit.backends.base import BackendError, BaseBackend, BufferHandle, BufferReadableBackend, TTLInspectableBackend
|
|
15
15
|
from cachekit.backends.provider import (
|
|
16
16
|
BackendProviderInterface,
|
|
17
17
|
DefaultBackendProvider,
|
|
@@ -85,6 +85,15 @@ def supports_ttl_inspection(backend: BaseBackend) -> TypeGuard[TTLInspectableBac
|
|
|
85
85
|
return hasattr(backend, "get_ttl") and hasattr(backend, "refresh_ttl")
|
|
86
86
|
|
|
87
87
|
|
|
88
|
+
def supports_buffer_read(backend: BaseBackend) -> TypeGuard[BufferReadableBackend]:
|
|
89
|
+
"""Type guard: backend can return a zero-copy buffer via get_buffer (#171, File/POSIX only).
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
True if backend implements BufferReadableBackend (used for the mmap Arrow read fast path).
|
|
93
|
+
"""
|
|
94
|
+
return hasattr(backend, "get_buffer")
|
|
95
|
+
|
|
96
|
+
|
|
88
97
|
# Import caching for serializer modules
|
|
89
98
|
#
|
|
90
99
|
# PERFORMANCE OPTIMIZATION: Dynamic imports are expensive (~100μs per import)
|
|
@@ -640,7 +649,25 @@ class CacheSerializationHandler:
|
|
|
640
649
|
get_logger().error(f"Serialization failed with {self.serializer_name}: {e}")
|
|
641
650
|
raise SerializationError(f"Failed to serialize data with {self.serializer_name}: {e}") from e
|
|
642
651
|
|
|
643
|
-
def
|
|
652
|
+
def supports_mmap_read(self) -> bool:
|
|
653
|
+
"""True iff reads can use the zero-copy mmap fast path (#171).
|
|
654
|
+
|
|
655
|
+
Eligible only for PLAINTEXT Arrow that returns pandas:
|
|
656
|
+
- encrypted values can never mmap (AES-GCM decrypt owns its buffer);
|
|
657
|
+
- non-Arrow serializers gain nothing (they copy at the Rust/C boundary, rebuild objects);
|
|
658
|
+
- the "arrow" return_format yields a table that ALIASES the mapped pages, so closing the
|
|
659
|
+
handle would be a use-after-free — pandas (which copies out via to_pandas) only.
|
|
660
|
+
|
|
661
|
+
The backend must also support buffer reads (File/POSIX); that is checked separately, so a
|
|
662
|
+
True here on a non-File backend simply means get_buffer returns None and we fall back.
|
|
663
|
+
"""
|
|
664
|
+
return (
|
|
665
|
+
not self.encryption
|
|
666
|
+
and self._serializer_string_name == "arrow"
|
|
667
|
+
and getattr(self._base_serializer, "return_format", None) == "pandas"
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
def deserialize_data(self, data: str | bytes | memoryview, cache_key: str = "") -> Any:
|
|
644
671
|
"""Deserialize data from cache storage with cache_key verification.
|
|
645
672
|
|
|
646
673
|
Args:
|
|
@@ -845,6 +872,19 @@ class CacheOperationHandler:
|
|
|
845
872
|
if self._cache_handler is None:
|
|
846
873
|
raise RuntimeError("Cache handler must be set before calling get_cached_value")
|
|
847
874
|
|
|
875
|
+
# mmap fast path (#171): plaintext Arrow -> pandas on a buffer-readable backend (File,
|
|
876
|
+
# POSIX) reads zero-copy. The handle is confined to this frame and closed in `finally`,
|
|
877
|
+
# so the mmap never becomes the returned value and never reaches L1 (blocker C). A None
|
|
878
|
+
# from get_buffer (ineligible file, or a non-buffer backend) falls through to bytes.
|
|
879
|
+
if self.serialization_handler.supports_mmap_read():
|
|
880
|
+
handle = self._cache_handler.get_buffer(cache_key)
|
|
881
|
+
if handle is not None:
|
|
882
|
+
try:
|
|
883
|
+
get_logger().cache_hit(cache_key, "Backend(mmap)")
|
|
884
|
+
return (True, self.serialization_handler.deserialize_data(handle.view, cache_key))
|
|
885
|
+
finally:
|
|
886
|
+
handle.close()
|
|
887
|
+
|
|
848
888
|
cached_data = self._cache_handler.get(cache_key, refresh_ttl)
|
|
849
889
|
if cached_data is not None:
|
|
850
890
|
get_logger().cache_hit(cache_key, "Backend")
|
|
@@ -886,6 +926,9 @@ class CacheOperationHandler:
|
|
|
886
926
|
if self._cache_handler is None:
|
|
887
927
|
raise RuntimeError("Cache handler must be set before calling get_cached_value_async")
|
|
888
928
|
|
|
929
|
+
# NOTE: no mmap fast path here. The async decorator path inlines get_async (it does not
|
|
930
|
+
# route through this method today), so an mmap branch would be dead code. The mmap read
|
|
931
|
+
# lives on the sync get_cached_value; add it here only when an async caller routes through.
|
|
889
932
|
cached_data = await self._cache_handler.get_async(cache_key, refresh_ttl)
|
|
890
933
|
if cached_data is not None:
|
|
891
934
|
get_logger().cache_hit(cache_key, "Backend")
|
|
@@ -1100,6 +1143,10 @@ class CacheHandlerStrategy(Protocol):
|
|
|
1100
1143
|
"""Get value from cache with optional TTL refresh."""
|
|
1101
1144
|
...
|
|
1102
1145
|
|
|
1146
|
+
def get_buffer(self, key: str) -> Optional[BufferHandle]:
|
|
1147
|
+
"""Return a zero-copy buffer handle if the backend supports it (#171), else None."""
|
|
1148
|
+
...
|
|
1149
|
+
|
|
1103
1150
|
def set(self, key: str, value: Union[str, bytes], ttl: Optional[int] = None, **metadata) -> bool:
|
|
1104
1151
|
"""Set value in cache with TTL and optional metadata."""
|
|
1105
1152
|
...
|
|
@@ -1266,6 +1313,23 @@ class StandardCacheHandler:
|
|
|
1266
1313
|
get_logger().error(f"Unexpected error getting key {key}: {e}")
|
|
1267
1314
|
return None
|
|
1268
1315
|
|
|
1316
|
+
def get_buffer(self, key: str) -> Optional[BufferHandle]:
|
|
1317
|
+
"""Return a zero-copy buffer handle for key if the backend supports it (#171), else None.
|
|
1318
|
+
|
|
1319
|
+
Mirrors get()'s backpressure/timeout wrapping. Returns None when the backend can't map the
|
|
1320
|
+
value (or on any backend error) so the caller transparently falls back to get().
|
|
1321
|
+
"""
|
|
1322
|
+
if not supports_buffer_read(self.backend):
|
|
1323
|
+
return None
|
|
1324
|
+
try:
|
|
1325
|
+
return self._with_backpressure_and_timeout(self.backend.get_buffer, key)
|
|
1326
|
+
except BackendError as e:
|
|
1327
|
+
get_logger().error(f"Backend error mmapping key {key}: {e}")
|
|
1328
|
+
return None
|
|
1329
|
+
except Exception as e:
|
|
1330
|
+
get_logger().error(f"Unexpected error mmapping key {key}: {e}")
|
|
1331
|
+
return None
|
|
1332
|
+
|
|
1269
1333
|
def set(self, key: str, value: Union[str, bytes], ttl: Optional[int] = None, **metadata) -> bool:
|
|
1270
1334
|
"""Set value in cache using backend.
|
|
1271
1335
|
|
|
@@ -121,8 +121,9 @@ class CachekitConfig(BaseSettings):
|
|
|
121
121
|
description=(
|
|
122
122
|
"Arrow IPC compression codec for DataFrame caching (ArrowSerializer, compression='auto'). "
|
|
123
123
|
"'zstd'/'lz4' shrink the stored payload but must be decompressed into the heap on read. "
|
|
124
|
-
"'none' stores uncompressed Arrow IPC, which
|
|
125
|
-
"
|
|
124
|
+
"'none' stores uncompressed Arrow IPC, which lets the File backend serve plaintext "
|
|
125
|
+
"DataFrame reads via a zero-copy mmap (low steady-state read RSS; peak transiently "
|
|
126
|
+
"higher) at the cost of a larger payload. Env: CACHEKIT_ARROW_COMPRESSION."
|
|
126
127
|
),
|
|
127
128
|
)
|
|
128
129
|
retry_on_timeout: bool = Field(
|
|
@@ -266,7 +266,23 @@ class L1Cache:
|
|
|
266
266
|
redis_ttl: TTL in seconds from Redis (used to calculate expiry)
|
|
267
267
|
expires_at: Absolute expiry timestamp (overrides redis_ttl)
|
|
268
268
|
namespace: Optional namespace for invalidation support
|
|
269
|
+
|
|
270
|
+
Raises:
|
|
271
|
+
TypeError: if `value` is not exactly `bytes`. L1 stores raw bytes only; a memoryview
|
|
272
|
+
(e.g. an mmap-backed view from the File backend) or a mutable bytearray must never
|
|
273
|
+
be stored — the former would pin a mapped file's inode for the whole TTL, the
|
|
274
|
+
latter could mutate underneath the cache (#171 blocker C). Loud-fail a regression
|
|
275
|
+
rather than silently alias.
|
|
269
276
|
"""
|
|
277
|
+
# Runtime guard: the annotation says bytes, but callers reach here across dynamic
|
|
278
|
+
# boundaries (backend.get returns, decorator paths) where the type isn't enforced.
|
|
279
|
+
if not isinstance(value, bytes): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
280
|
+
raise TypeError(
|
|
281
|
+
f"L1Cache.put requires bytes, got {type(value).__name__}. "
|
|
282
|
+
"Storing a memoryview/bytearray in L1 is forbidden: an mmap-backed view would pin "
|
|
283
|
+
"the mapped file for the entry's TTL. Materialize to bytes before caching."
|
|
284
|
+
)
|
|
285
|
+
|
|
270
286
|
# Calculate expiry time
|
|
271
287
|
current_time = time.time()
|
|
272
288
|
if expires_at is not None:
|
|
@@ -142,8 +142,10 @@ class ArrowSerializer:
|
|
|
142
142
|
compression: Arrow IPC compression codec.
|
|
143
143
|
- "auto" (default): use the CACHEKIT_ARROW_COMPRESSION setting (itself "zstd" by default)
|
|
144
144
|
- "zstd" / "lz4": compress the payload (smaller wire/L1; must be decompressed on read)
|
|
145
|
-
- None or "none": store uncompressed Arrow IPC
|
|
146
|
-
(
|
|
145
|
+
- None or "none": store uncompressed Arrow IPC. Lets the File backend serve plaintext
|
|
146
|
+
DataFrame reads (returned as pandas) via a zero-copy mmap — low steady-state read
|
|
147
|
+
RSS (~0.32x), though peak is transiently higher from checksum verification + pandas
|
|
148
|
+
materialization — at the cost of a larger stored payload. No effect on wire backends.
|
|
147
149
|
|
|
148
150
|
Raises:
|
|
149
151
|
ValueError: If return_format or compression is not a valid option
|
|
@@ -226,7 +228,8 @@ class ArrowSerializer:
|
|
|
226
228
|
# writing in bounded batches keeps the compressor's working set bounded (one big
|
|
227
229
|
# batch makes the codec allocate a full-size working buffer — measured ~3.6x the
|
|
228
230
|
# payload). Size each batch to ~8 MiB regardless of schema width. compression=None
|
|
229
|
-
# writes uncompressed IPC, which
|
|
231
|
+
# writes uncompressed IPC, which the File backend reads zero-copy via mmap (#171,
|
|
232
|
+
# plaintext, pandas return only).
|
|
230
233
|
max_chunksize = _bounded_chunksize(table)
|
|
231
234
|
sink = pa.BufferOutputStream()
|
|
232
235
|
write_options = pa.ipc.IpcWriteOptions(compression=self.compression) if self.compression else None
|
|
@@ -258,7 +261,7 @@ class ArrowSerializer:
|
|
|
258
261
|
except (pa.ArrowInvalid, pa.ArrowTypeError, ValueError) as e:
|
|
259
262
|
raise SerializationError(f"Failed to serialize DataFrame to Arrow IPC format: {e}") from e
|
|
260
263
|
|
|
261
|
-
def deserialize(self, data: bytes, metadata: SerializationMetadata | None = None) -> Any:
|
|
264
|
+
def deserialize(self, data: bytes | memoryview, metadata: SerializationMetadata | None = None) -> Any:
|
|
262
265
|
"""Deserialize Arrow IPC bytes with optional xxHash3-64 integrity validation.
|
|
263
266
|
|
|
264
267
|
Args:
|
|
@@ -478,7 +478,7 @@ class AutoSerializer:
|
|
|
478
478
|
metadata = SerializationMetadata(serialization_format=SerializationFormat.MSGPACK, original_type="msgpack")
|
|
479
479
|
return data, metadata
|
|
480
480
|
|
|
481
|
-
def deserialize(self, data: bytes, metadata: Optional[SerializationMetadata] = None) -> Any:
|
|
481
|
+
def deserialize(self, data: bytes | memoryview, metadata: Optional[SerializationMetadata] = None) -> Any:
|
|
482
482
|
"""Deserialize bytes back to Python object.
|
|
483
483
|
|
|
484
484
|
Automatically detects format from envelope and deserializes accordingly.
|
|
@@ -490,6 +490,8 @@ class AutoSerializer:
|
|
|
490
490
|
Returns:
|
|
491
491
|
Any: Deserialized Python object
|
|
492
492
|
"""
|
|
493
|
+
# coerce unwrap's zero-copy memoryview; no-op when already bytes (enables .startswith below + Rust retrieve)
|
|
494
|
+
data = bytes(data)
|
|
493
495
|
# Check for custom NumPy format
|
|
494
496
|
if data.startswith(b"NUMPY_RAW"):
|
|
495
497
|
return self._deserialize_numpy(data)
|
|
@@ -241,7 +241,7 @@ class EncryptionWrapper:
|
|
|
241
241
|
except Exception as e:
|
|
242
242
|
raise EncryptionError(f"Encryption failed: {e}") from e
|
|
243
243
|
|
|
244
|
-
def deserialize(self, data: bytes, metadata: SerializationMetadata, cache_key: str = "") -> Any:
|
|
244
|
+
def deserialize(self, data: bytes | memoryview, metadata: SerializationMetadata, cache_key: str = "") -> Any:
|
|
245
245
|
"""Decrypt and deserialize data with cache_key verification.
|
|
246
246
|
|
|
247
247
|
Args:
|
|
@@ -329,7 +329,10 @@ class EncryptionWrapper:
|
|
|
329
329
|
# NOTE: If cache_key doesn't match the one used during encryption,
|
|
330
330
|
# the AAD will be different and AES-GCM authentication will fail.
|
|
331
331
|
# This is the SECURITY mechanism that detects ciphertext substitution.
|
|
332
|
-
|
|
332
|
+
# `unwrap` may hand us a memoryview; the AES-GCM binding requires owned bytes, and an
|
|
333
|
+
# encrypted value can never be zero-copy anyway (decrypt reads the whole ciphertext
|
|
334
|
+
# into an owned buffer), so coercing here costs nothing the cipher wasn't already paying.
|
|
335
|
+
decrypted_data = self.encryptor.decrypt_with_keys(bytes(data), aad, self.tenant_keys)
|
|
333
336
|
|
|
334
337
|
# Deserialize the decrypted data using base serializer
|
|
335
338
|
return self.serializer.deserialize(decrypted_data, raw_metadata)
|
|
@@ -156,7 +156,7 @@ class OrjsonSerializer:
|
|
|
156
156
|
# ValueError = data encoding error
|
|
157
157
|
raise SerializationError(f"Failed to serialize object to JSON: {e}") from e
|
|
158
158
|
|
|
159
|
-
def deserialize(self, data: bytes, metadata: SerializationMetadata | None = None) -> Any:
|
|
159
|
+
def deserialize(self, data: bytes | memoryview, metadata: SerializationMetadata | None = None) -> Any:
|
|
160
160
|
"""Deserialize JSON bytes with optional xxHash3-64 integrity validation.
|
|
161
161
|
|
|
162
162
|
Args:
|
|
@@ -176,6 +176,7 @@ class OrjsonSerializer:
|
|
|
176
176
|
>>> result == {"test": 123}
|
|
177
177
|
True
|
|
178
178
|
"""
|
|
179
|
+
data = bytes(data) # coerce unwrap's zero-copy memoryview; no-op when already bytes
|
|
179
180
|
try:
|
|
180
181
|
if self.enable_integrity_checking:
|
|
181
182
|
# Guard clause: Minimum size check (8 bytes checksum + at least 2 bytes JSON: {})
|
|
@@ -308,7 +308,7 @@ class StandardSerializer:
|
|
|
308
308
|
# ValueError = data encoding error
|
|
309
309
|
raise SerializationError(f"Failed to serialize object to MessagePack: {e}") from e
|
|
310
310
|
|
|
311
|
-
def deserialize(self, data: bytes, metadata: SerializationMetadata | None = None) -> Any:
|
|
311
|
+
def deserialize(self, data: bytes | memoryview, metadata: SerializationMetadata | None = None) -> Any:
|
|
312
312
|
"""Deserialize MessagePack bytes with optional ByteStorage unwrapping.
|
|
313
313
|
|
|
314
314
|
Args:
|
|
@@ -328,6 +328,7 @@ class StandardSerializer:
|
|
|
328
328
|
>>> result == {"test": 123}
|
|
329
329
|
True
|
|
330
330
|
"""
|
|
331
|
+
data = bytes(data) # coerce unwrap's zero-copy memoryview; no-op when already bytes (Rust retrieve needs bytes)
|
|
331
332
|
try:
|
|
332
333
|
if self.enable_integrity_checking:
|
|
333
334
|
# Unwrap ByteStorage envelope (decompress + validate integrity)
|
|
@@ -99,15 +99,18 @@ class SerializationWrapper:
|
|
|
99
99
|
)
|
|
100
100
|
|
|
101
101
|
@staticmethod
|
|
102
|
-
def unwrap(
|
|
102
|
+
def unwrap(
|
|
103
|
+
wrapped_data: Union[str, bytes, bytearray, memoryview],
|
|
104
|
+
) -> tuple[Union[bytes, memoryview], dict[str, Any], str]:
|
|
103
105
|
"""Unwrap a cache envelope, reading either the v3 frame or the legacy format.
|
|
104
106
|
|
|
105
107
|
Args:
|
|
106
|
-
wrapped_data: v3 frame (bytes starting with MAGIC) OR legacy base64+JSON
|
|
108
|
+
wrapped_data: v3 frame (bytes-like starting with MAGIC) OR legacy base64+JSON
|
|
107
109
|
envelope (bytes/str starting with '{').
|
|
108
110
|
|
|
109
111
|
Returns:
|
|
110
|
-
tuple: (
|
|
112
|
+
tuple: (payload, metadata_dict, serializer_name). For a v3 frame the payload is a
|
|
113
|
+
zero-copy ``memoryview`` aliasing ``wrapped_data``; the legacy path returns ``bytes``.
|
|
111
114
|
"""
|
|
112
115
|
# v3 binary frame: only bytes-like can be a frame (str is always legacy JSON).
|
|
113
116
|
if isinstance(wrapped_data, (bytes, bytearray, memoryview)):
|
|
@@ -123,7 +126,11 @@ class SerializationWrapper:
|
|
|
123
126
|
if header_end > mv.nbytes:
|
|
124
127
|
raise ValueError(f"Invalid cache envelope header length {hdr_len}: frame has only {mv.nbytes} bytes")
|
|
125
128
|
header = json.loads(bytes(mv[_PREFIX_LEN:header_end]))
|
|
126
|
-
|
|
129
|
+
# Zero-copy: a memoryview slice past the header aliases the input frame (no
|
|
130
|
+
# full-payload copy on every read). It flows into pa.py_buffer (Arrow) and the
|
|
131
|
+
# mmap read path without materializing. The view keeps `wrapped_data` alive, so
|
|
132
|
+
# it never dangles; consumers needing owned bytes coerce at their own boundary.
|
|
133
|
+
payload = mv[header_end:]
|
|
127
134
|
return payload, header.get("m", {}), header.get("s", "unknown")
|
|
128
135
|
|
|
129
136
|
# Legacy base64+JSON envelope (pre-v3 entries; backward compatible read path).
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|