cachecore-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cachecore/__init__.py +367 -0
- cachecore/_context.py +42 -0
- cachecore/_transport.py +99 -0
- cachecore/errors.py +24 -0
- cachecore/py.typed +0 -0
- cachecore_python-0.1.0.dist-info/METADATA +230 -0
- cachecore_python-0.1.0.dist-info/RECORD +9 -0
- cachecore_python-0.1.0.dist-info/WHEEL +4 -0
- cachecore_python-0.1.0.dist-info/licenses/LICENSE +21 -0
cachecore/__init__.py
ADDED
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CacheCore Python client — thin adapter for the CacheCore caching proxy.
|
|
3
|
+
|
|
4
|
+
This library handles CacheCore-specific concerns (header injection,
|
|
5
|
+
dependency encoding, invalidation) without replacing or wrapping the
|
|
6
|
+
LLM SDK. You keep using ``openai.AsyncOpenAI``, ``ChatOpenAI``, etc.
|
|
7
|
+
exactly as before.
|
|
8
|
+
|
|
9
|
+
Quick start::
|
|
10
|
+
|
|
11
|
+
from cachecore import CacheCoreClient, Dep
|
|
12
|
+
|
|
13
|
+
cc = CacheCoreClient(
|
|
14
|
+
gateway_url="http://localhost:8080",
|
|
15
|
+
tenant_jwt="ey...",
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Inject into any OpenAI-compatible SDK
|
|
19
|
+
import httpx
|
|
20
|
+
from openai import AsyncOpenAI
|
|
21
|
+
|
|
22
|
+
oai = AsyncOpenAI(
|
|
23
|
+
api_key="ignored",
|
|
24
|
+
base_url="http://localhost:8080/v1",
|
|
25
|
+
http_client=httpx.AsyncClient(transport=cc.transport),
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Read path — declare deps so entries can be invalidated later
|
|
29
|
+
with cc.request_context(deps=[Dep("table:products")]):
|
|
30
|
+
resp = await oai.chat.completions.create(model="gpt-4o", messages=[...])
|
|
31
|
+
|
|
32
|
+
# Write path — bypass cache, then invalidate stale deps
|
|
33
|
+
with cc.request_context(bypass=True):
|
|
34
|
+
resp = await oai.chat.completions.create(model="gpt-4o", messages=[...])
|
|
35
|
+
await cc.invalidate("table:products")
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
from __future__ import annotations
|
|
39
|
+
|
|
40
|
+
import asyncio
|
|
41
|
+
import logging
|
|
42
|
+
import uuid
|
|
43
|
+
from contextlib import contextmanager
|
|
44
|
+
from dataclasses import dataclass
|
|
45
|
+
from typing import Iterator, Literal
|
|
46
|
+
|
|
47
|
+
import httpx
|
|
48
|
+
|
|
49
|
+
from cachecore._context import _RequestContext, _ctx
|
|
50
|
+
from cachecore._transport import (
|
|
51
|
+
CacheCoreTransport,
|
|
52
|
+
_HDR_AGE,
|
|
53
|
+
_HDR_CACHE,
|
|
54
|
+
_HDR_SIMILARITY,
|
|
55
|
+
)
|
|
56
|
+
from cachecore.errors import (
|
|
57
|
+
CacheCoreAuthError,
|
|
58
|
+
CacheCoreError,
|
|
59
|
+
CacheCoreRateLimitError,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
__all__ = [
|
|
63
|
+
"CacheCoreClient",
|
|
64
|
+
"CacheCoreTransport",
|
|
65
|
+
"CacheStatus",
|
|
66
|
+
"Dep",
|
|
67
|
+
"DepDeclaration",
|
|
68
|
+
"InvalidateResult",
|
|
69
|
+
"CacheCoreError",
|
|
70
|
+
"CacheCoreAuthError",
|
|
71
|
+
"CacheCoreRateLimitError",
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
logger = logging.getLogger("cachecore")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
78
|
+
# Data classes
|
|
79
|
+
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class DepDeclaration:
|
|
83
|
+
"""
|
|
84
|
+
A dependency declaration to attach to a cached entry.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
dep_id:
|
|
89
|
+
Arbitrary string key, e.g. ``"table:products"``, ``"doc:policy-42"``.
|
|
90
|
+
hash:
|
|
91
|
+
The expected hash of this dependency at time of caching. When the
|
|
92
|
+
dep is later invalidated with a *different* hash, all entries that
|
|
93
|
+
declared this dep_id + hash are evicted.
|
|
94
|
+
|
|
95
|
+
For simple use cases (invalidate everything with this dep_id),
|
|
96
|
+
leave as ``"v1"`` — any stable value works. What matters is that
|
|
97
|
+
``invalidate()`` changes the hash.
|
|
98
|
+
|
|
99
|
+
Examples
|
|
100
|
+
--------
|
|
101
|
+
::
|
|
102
|
+
|
|
103
|
+
# Simple — just a dep ID
|
|
104
|
+
Dep("table:products")
|
|
105
|
+
|
|
106
|
+
# Explicit hash for multi-version tracking
|
|
107
|
+
Dep("table:products", hash="abc123")
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
__slots__ = ("dep_id", "expected_hash")
|
|
111
|
+
|
|
112
|
+
def __init__(self, dep_id: str, hash: str = "v1") -> None:
|
|
113
|
+
self.dep_id = dep_id
|
|
114
|
+
self.expected_hash = hash
|
|
115
|
+
|
|
116
|
+
def __repr__(self) -> str:
|
|
117
|
+
if self.expected_hash == "v1":
|
|
118
|
+
return f"Dep({self.dep_id!r})"
|
|
119
|
+
return f"Dep({self.dep_id!r}, hash={self.expected_hash!r})"
|
|
120
|
+
|
|
121
|
+
def __eq__(self, other: object) -> bool:
|
|
122
|
+
if not isinstance(other, DepDeclaration):
|
|
123
|
+
return NotImplemented
|
|
124
|
+
return self.dep_id == other.dep_id and self.expected_hash == other.expected_hash
|
|
125
|
+
|
|
126
|
+
def __hash__(self) -> int:
|
|
127
|
+
return hash((self.dep_id, self.expected_hash))
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# Convenience alias — the name users will actually type
|
|
131
|
+
Dep = DepDeclaration
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@dataclass(frozen=True, slots=True)
|
|
135
|
+
class CacheStatus:
|
|
136
|
+
"""
|
|
137
|
+
Parsed from ``X-Cache``, ``X-Cache-Similarity``, ``X-Cache-Age``
|
|
138
|
+
response headers.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
status: Literal["HIT_L1", "HIT_L1_STALE", "HIT_L2", "MISS", "BYPASS", "UNKNOWN"]
|
|
142
|
+
similarity: float # 0.0 – 1.0
|
|
143
|
+
age_seconds: int # 0 for MISS / BYPASS
|
|
144
|
+
|
|
145
|
+
@classmethod
|
|
146
|
+
def from_headers(cls, headers: httpx.Headers) -> CacheStatus:
|
|
147
|
+
raw_status = headers.get(_HDR_CACHE, "UNKNOWN")
|
|
148
|
+
status: Literal[
|
|
149
|
+
"HIT_L1", "HIT_L1_STALE", "HIT_L2", "MISS", "BYPASS", "UNKNOWN"
|
|
150
|
+
]
|
|
151
|
+
if raw_status in ("HIT_L1", "HIT_L1_STALE", "HIT_L2", "MISS", "BYPASS"):
|
|
152
|
+
status = raw_status # type: ignore[assignment]
|
|
153
|
+
else:
|
|
154
|
+
status = "UNKNOWN"
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
similarity = float(headers.get(_HDR_SIMILARITY, "0.0"))
|
|
158
|
+
except ValueError:
|
|
159
|
+
similarity = 0.0
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
age = int(headers.get(_HDR_AGE, "0"))
|
|
163
|
+
except ValueError:
|
|
164
|
+
age = 0
|
|
165
|
+
|
|
166
|
+
return cls(status=status, similarity=similarity, age_seconds=age)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@dataclass(frozen=True, slots=True)
|
|
170
|
+
class InvalidateResult:
|
|
171
|
+
"""Result of a single invalidation call."""
|
|
172
|
+
|
|
173
|
+
dep_id: str
|
|
174
|
+
ok: bool
|
|
175
|
+
error: str | None = None
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
179
|
+
# Client
|
|
180
|
+
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class CacheCoreClient:
|
|
184
|
+
"""
|
|
185
|
+
Connection-level object. Constructed once per tenant.
|
|
186
|
+
|
|
187
|
+
Provides:
|
|
188
|
+
|
|
189
|
+
- An httpx async transport to inject into any OpenAI / Anthropic SDK.
|
|
190
|
+
- A context manager for per-request dep declarations and bypass.
|
|
191
|
+
- Invalidation helpers.
|
|
192
|
+
|
|
193
|
+
The client is **not** a replacement for the LLM SDK. It handles
|
|
194
|
+
CacheCore-specific concerns only.
|
|
195
|
+
|
|
196
|
+
Parameters
|
|
197
|
+
----------
|
|
198
|
+
gateway_url:
|
|
199
|
+
CacheCore proxy base URL, e.g. ``"http://localhost:8080"``.
|
|
200
|
+
No trailing slash.
|
|
201
|
+
tenant_jwt:
|
|
202
|
+
HS256/RS256 JWT for this tenant.
|
|
203
|
+
timeout:
|
|
204
|
+
HTTP timeout in seconds for management calls (invalidate).
|
|
205
|
+
debug:
|
|
206
|
+
If True, log cache status for every proxied request.
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
__slots__ = ("_gateway_url", "_jwt", "_timeout", "_debug", "_transport", "_http")
|
|
210
|
+
|
|
211
|
+
def __init__(
|
|
212
|
+
self,
|
|
213
|
+
gateway_url: str,
|
|
214
|
+
tenant_jwt: str,
|
|
215
|
+
*,
|
|
216
|
+
timeout: float = 30.0,
|
|
217
|
+
debug: bool = False,
|
|
218
|
+
) -> None:
|
|
219
|
+
self._gateway_url = gateway_url.rstrip("/")
|
|
220
|
+
self._jwt = tenant_jwt
|
|
221
|
+
self._timeout = timeout
|
|
222
|
+
self._debug = debug
|
|
223
|
+
self._transport = CacheCoreTransport(jwt=tenant_jwt, debug=debug)
|
|
224
|
+
|
|
225
|
+
# Separate httpx client for management calls (invalidate).
|
|
226
|
+
# Not routed through the CacheCore transport — these go direct.
|
|
227
|
+
self._http = httpx.AsyncClient(
|
|
228
|
+
base_url=self._gateway_url,
|
|
229
|
+
timeout=httpx.Timeout(timeout),
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# ── Transport ───────────────────────────────────────────────────────
|
|
233
|
+
|
|
234
|
+
@property
|
|
235
|
+
def transport(self) -> CacheCoreTransport:
|
|
236
|
+
"""
|
|
237
|
+
The httpx async transport to pass to your SDK's http_client.
|
|
238
|
+
|
|
239
|
+
Example::
|
|
240
|
+
|
|
241
|
+
http = httpx.AsyncClient(transport=client.transport)
|
|
242
|
+
oai = AsyncOpenAI(..., http_client=http)
|
|
243
|
+
"""
|
|
244
|
+
return self._transport
|
|
245
|
+
|
|
246
|
+
# ── Per-request context ─────────────────────────────────────────────
|
|
247
|
+
|
|
248
|
+
@contextmanager
|
|
249
|
+
def request_context(
|
|
250
|
+
self,
|
|
251
|
+
deps: list[DepDeclaration] | None = None,
|
|
252
|
+
bypass: bool = False,
|
|
253
|
+
) -> Iterator[None]:
|
|
254
|
+
"""
|
|
255
|
+
Set per-request cache headers for the duration of the ``with`` block.
|
|
256
|
+
|
|
257
|
+
Uses a ``ContextVar`` so concurrent asyncio Tasks never interfere.
|
|
258
|
+
|
|
259
|
+
Parameters
|
|
260
|
+
----------
|
|
261
|
+
deps:
|
|
262
|
+
Dependency declarations to tag this cache entry with.
|
|
263
|
+
Pass ``[Dep("table:products")]`` etc.
|
|
264
|
+
bypass:
|
|
265
|
+
If True, omits ``X-CacheCore-Token`` so the gateway skips
|
|
266
|
+
caching. Use for write operations whose LLM response should
|
|
267
|
+
not be cached.
|
|
268
|
+
"""
|
|
269
|
+
token = _ctx.set(_RequestContext(deps=deps or [], bypass=bypass))
|
|
270
|
+
try:
|
|
271
|
+
yield
|
|
272
|
+
finally:
|
|
273
|
+
_ctx.reset(token)
|
|
274
|
+
|
|
275
|
+
# ── Invalidation ────────────────────────────────────────────────────
|
|
276
|
+
|
|
277
|
+
async def invalidate(
|
|
278
|
+
self,
|
|
279
|
+
dep_id: str,
|
|
280
|
+
new_hash: str | None = None,
|
|
281
|
+
) -> InvalidateResult:
|
|
282
|
+
"""
|
|
283
|
+
Invalidate all cache entries that declared the given dependency.
|
|
284
|
+
|
|
285
|
+
Parameters
|
|
286
|
+
----------
|
|
287
|
+
dep_id:
|
|
288
|
+
The dependency key to invalidate, e.g. ``"table:products"``.
|
|
289
|
+
new_hash:
|
|
290
|
+
The new hash to store for this dep. If None, a random UUID is
|
|
291
|
+
generated — this guarantees all existing entries are stale.
|
|
292
|
+
"""
|
|
293
|
+
if new_hash is None:
|
|
294
|
+
new_hash = uuid.uuid4().hex
|
|
295
|
+
|
|
296
|
+
try:
|
|
297
|
+
resp = await self._http.post(
|
|
298
|
+
"/v1/invalidate",
|
|
299
|
+
json={"dep_id": dep_id, "new_hash": new_hash},
|
|
300
|
+
)
|
|
301
|
+
_raise_for_status(resp)
|
|
302
|
+
return InvalidateResult(dep_id=dep_id, ok=True)
|
|
303
|
+
except CacheCoreError as exc:
|
|
304
|
+
return InvalidateResult(dep_id=dep_id, ok=False, error=str(exc))
|
|
305
|
+
except httpx.HTTPError as exc:
|
|
306
|
+
return InvalidateResult(dep_id=dep_id, ok=False, error=str(exc))
|
|
307
|
+
|
|
308
|
+
async def invalidate_many(
|
|
309
|
+
self,
|
|
310
|
+
dep_ids: list[str],
|
|
311
|
+
new_hash: str | None = None,
|
|
312
|
+
) -> list[InvalidateResult]:
|
|
313
|
+
"""
|
|
314
|
+
Invalidate multiple deps concurrently.
|
|
315
|
+
|
|
316
|
+
Each dep gets the same ``new_hash`` (or individual UUIDs if None).
|
|
317
|
+
"""
|
|
318
|
+
tasks = [
|
|
319
|
+
self.invalidate(dep_id, new_hash=new_hash)
|
|
320
|
+
for dep_id in dep_ids
|
|
321
|
+
]
|
|
322
|
+
return list(await asyncio.gather(*tasks))
|
|
323
|
+
|
|
324
|
+
# ── Lifecycle ───────────────────────────────────────────────────────
|
|
325
|
+
|
|
326
|
+
async def aclose(self) -> None:
|
|
327
|
+
"""Close the underlying HTTP clients."""
|
|
328
|
+
await self._http.aclose()
|
|
329
|
+
await self._transport.aclose()
|
|
330
|
+
|
|
331
|
+
async def __aenter__(self) -> CacheCoreClient:
|
|
332
|
+
return self
|
|
333
|
+
|
|
334
|
+
async def __aexit__(self, *exc: object) -> None:
|
|
335
|
+
await self.aclose()
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
# ── Helpers ─────────────────────────────────────────────────────────────
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def _raise_for_status(resp: httpx.Response) -> None:
|
|
342
|
+
"""Map gateway HTTP errors to typed exceptions."""
|
|
343
|
+
if resp.status_code < 400:
|
|
344
|
+
return
|
|
345
|
+
|
|
346
|
+
if resp.status_code in (401, 403):
|
|
347
|
+
raise CacheCoreAuthError(
|
|
348
|
+
f"CacheCore auth error {resp.status_code}: {resp.text}"
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
if resp.status_code == 429:
|
|
352
|
+
retry_after: float | None = None
|
|
353
|
+
raw = resp.headers.get("Retry-After")
|
|
354
|
+
if raw is not None:
|
|
355
|
+
try:
|
|
356
|
+
retry_after = float(raw)
|
|
357
|
+
except ValueError:
|
|
358
|
+
pass
|
|
359
|
+
raise CacheCoreRateLimitError(
|
|
360
|
+
f"CacheCore rate limit (429): {resp.text}",
|
|
361
|
+
retry_after=retry_after,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
if resp.status_code >= 400:
|
|
365
|
+
raise CacheCoreError(
|
|
366
|
+
f"CacheCore error {resp.status_code}: {resp.text}"
|
|
367
|
+
)
|
cachecore/_context.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Internal context-variable plumbing for per-request cache headers.
|
|
3
|
+
|
|
4
|
+
ContextVar is safe in asyncio: each Task gets its own snapshot,
|
|
5
|
+
so concurrent LLM calls from different Tasks never interfere.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import base64
|
|
11
|
+
import json
|
|
12
|
+
from contextvars import ContextVar
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import TYPE_CHECKING
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from cachecore import DepDeclaration
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(slots=True)
|
|
21
|
+
class _RequestContext:
|
|
22
|
+
deps: list[DepDeclaration] = field(default_factory=list)
|
|
23
|
+
bypass: bool = False
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
_ctx: ContextVar[_RequestContext | None] = ContextVar(
|
|
27
|
+
"cachecore_request_ctx", default=None
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _encode_deps(deps: list[DepDeclaration]) -> str:
|
|
32
|
+
"""
|
|
33
|
+
Encode a list of DepDeclaration into the X-CacheCore-Deps header value.
|
|
34
|
+
|
|
35
|
+
Format: base64url (no padding) of JSON array:
|
|
36
|
+
[{"dep_id": "...", "expected_hash": "..."}]
|
|
37
|
+
|
|
38
|
+
This is the canonical encoding the gateway expects (manifest.rs:37-51).
|
|
39
|
+
"""
|
|
40
|
+
payload = [{"dep_id": d.dep_id, "expected_hash": d.expected_hash} for d in deps]
|
|
41
|
+
raw = json.dumps(payload, separators=(",", ":")).encode()
|
|
42
|
+
return base64.urlsafe_b64encode(raw).rstrip(b"=").decode()
|
cachecore/_transport.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CacheCore httpx async transport.
|
|
3
|
+
|
|
4
|
+
Sits between the LLM SDK and the network. On every outgoing request it:
|
|
5
|
+
1. Injects X-CacheCore-Token (unless bypass is active).
|
|
6
|
+
2. Injects X-CacheCore-Deps (base64url-JSON, if deps declared).
|
|
7
|
+
3. Captures X-Cache / X-Cache-Similarity / X-Cache-Age from the response.
|
|
8
|
+
|
|
9
|
+
Why a transport and not default_headers?
|
|
10
|
+
The OpenAI Python SDK constructs each httpx.Request internally and its
|
|
11
|
+
header-merging logic can drop or overwrite client-level default_headers.
|
|
12
|
+
Injecting at the transport layer — after Request construction, before the
|
|
13
|
+
TCP write — is the only guaranteed injection point.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
|
+
|
|
21
|
+
import httpx
|
|
22
|
+
|
|
23
|
+
from cachecore._context import _ctx, _encode_deps
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from cachecore import CacheStatus
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger("cachecore")
|
|
29
|
+
|
|
30
|
+
# ── Header constants ────────────────────────────────────────────────────
|
|
31
|
+
# These MUST match proxy.rs exactly. Bug 1 in the design doc: both test
|
|
32
|
+
# agents used "X-Cache-Deps" — the gateway reads "X-CacheCore-Deps".
|
|
33
|
+
# Baking the correct names here makes the bug impossible by construction.
|
|
34
|
+
_HDR_TOKEN = "X-CacheCore-Token"
|
|
35
|
+
_HDR_DEPS = "X-CacheCore-Deps"
|
|
36
|
+
_HDR_CACHE = "X-Cache"
|
|
37
|
+
_HDR_SIMILARITY = "X-Cache-Similarity"
|
|
38
|
+
_HDR_AGE = "X-Cache-Age"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class CacheCoreTransport(httpx.AsyncBaseTransport):
|
|
42
|
+
"""
|
|
43
|
+
Drop-in httpx async transport that handles all CacheCore header plumbing.
|
|
44
|
+
|
|
45
|
+
Usage::
|
|
46
|
+
|
|
47
|
+
transport = CacheCoreTransport(jwt="ey...")
|
|
48
|
+
http = httpx.AsyncClient(transport=transport)
|
|
49
|
+
oai = AsyncOpenAI(base_url="http://localhost:8080/v1",
|
|
50
|
+
api_key="dummy", http_client=http)
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
__slots__ = ("_jwt", "_wrapped", "_debug")
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
jwt: str,
|
|
58
|
+
*,
|
|
59
|
+
wrapped: httpx.AsyncBaseTransport | None = None,
|
|
60
|
+
debug: bool = False,
|
|
61
|
+
) -> None:
|
|
62
|
+
self._jwt = jwt
|
|
63
|
+
self._wrapped = wrapped or httpx.AsyncHTTPTransport()
|
|
64
|
+
self._debug = debug
|
|
65
|
+
|
|
66
|
+
async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
|
|
67
|
+
ctx = _ctx.get()
|
|
68
|
+
|
|
69
|
+
# ── Inject token (unless bypass) ────────────────────────────────
|
|
70
|
+
# Bypass = omit the token entirely. The gateway treats a missing
|
|
71
|
+
# token as bypass mode (proxy.rs:55-68). This fixes Bug 3: the
|
|
72
|
+
# old agents sent "X-Cache-Bypass: true" which is a no-op.
|
|
73
|
+
bypass = ctx is not None and ctx.bypass
|
|
74
|
+
if not bypass:
|
|
75
|
+
request.headers[_HDR_TOKEN] = self._jwt
|
|
76
|
+
|
|
77
|
+
# ── Inject deps ─────────────────────────────────────────────────
|
|
78
|
+
# This fixes Bug 1 (wrong header name) and Bug 2 (wrong encoding)
|
|
79
|
+
# by construction: correct header, correct base64url-JSON format.
|
|
80
|
+
if ctx and ctx.deps:
|
|
81
|
+
request.headers[_HDR_DEPS] = _encode_deps(ctx.deps)
|
|
82
|
+
|
|
83
|
+
# ── Forward ─────────────────────────────────────────────────────
|
|
84
|
+
response = await self._wrapped.handle_async_request(request)
|
|
85
|
+
|
|
86
|
+
# ── Debug logging ───────────────────────────────────────────────
|
|
87
|
+
if self._debug:
|
|
88
|
+
cache = response.headers.get(_HDR_CACHE, "?")
|
|
89
|
+
sim = response.headers.get(_HDR_SIMILARITY, "?")
|
|
90
|
+
age = response.headers.get(_HDR_AGE, "?")
|
|
91
|
+
logger.debug(
|
|
92
|
+
"CacheCore %s sim=%s age=%ss %s %s",
|
|
93
|
+
cache, sim, age, request.method, request.url,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return response
|
|
97
|
+
|
|
98
|
+
async def aclose(self) -> None:
|
|
99
|
+
await self._wrapped.aclose()
|
cachecore/errors.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""CacheCore client exceptions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CacheCoreError(Exception):
|
|
7
|
+
"""Base exception for all CacheCore client errors."""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CacheCoreAuthError(CacheCoreError):
|
|
11
|
+
"""Raised on 401/403 from the gateway or admin endpoints."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CacheCoreRateLimitError(CacheCoreError):
|
|
15
|
+
"""
|
|
16
|
+
Raised on 429 Too Many Requests from the gateway.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
retry_after: Value of Retry-After header if present (seconds), else None.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, message: str, retry_after: float | None = None) -> None:
|
|
23
|
+
super().__init__(message)
|
|
24
|
+
self.retry_after = retry_after
|
cachecore/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cachecore-python
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python client for CacheCore — semantic cache gateway for LLM agent workloads
|
|
5
|
+
Project-URL: Homepage, https://cachecore.it
|
|
6
|
+
Project-URL: Repository, https://github.com/cachecore/cachecore-python
|
|
7
|
+
Author: Fabrizio
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: agents,cache,llm,openai,proxy,semantic-cache
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Framework :: AsyncIO
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: httpx>=0.25.0
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# cachecore
|
|
29
|
+
|
|
30
|
+
Python client for [CacheCore](https://cachecore.it) — the LLM API caching proxy that reduces cost and latency for AI agent workloads.
|
|
31
|
+
|
|
32
|
+
CacheCore sits transparently between your application and LLM providers (OpenAI, Anthropic via OpenAI-compat, etc.) and caches responses at two levels: L1 exact-match and L2 semantic similarity. This client handles the CacheCore-specific plumbing — header injection, dependency encoding, invalidation — without replacing your LLM SDK.
|
|
33
|
+
|
|
34
|
+
## Install
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install cachecore-python
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
import cachecore # the import name is 'cachecore'
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick start
|
|
45
|
+
|
|
46
|
+
### Rung 1 — zero code changes: swap `base_url`
|
|
47
|
+
|
|
48
|
+
Point your existing SDK at CacheCore and get L1 exact-match caching immediately.
|
|
49
|
+
No `import cachecore` required.
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from openai import AsyncOpenAI
|
|
53
|
+
|
|
54
|
+
oai = AsyncOpenAI(
|
|
55
|
+
api_key="your-openai-key",
|
|
56
|
+
base_url="https://gateway.cachecore.it/v1", # ← only change
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Identical requests are now served from cache.
|
|
60
|
+
resp = await oai.chat.completions.create(
|
|
61
|
+
model="gpt-4o",
|
|
62
|
+
messages=[{"role": "user", "content": "What is 2+2?"}],
|
|
63
|
+
)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Rung 2 — tenant isolation (3 lines)
|
|
67
|
+
|
|
68
|
+
Add `CacheCoreClient` to unlock tenant-scoped namespaces, L2 semantic caching, and per-tenant
|
|
69
|
+
metrics. Three extra lines wired into the SDK's `http_client`.
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from cachecore import CacheCoreClient
|
|
73
|
+
import httpx
|
|
74
|
+
from openai import AsyncOpenAI
|
|
75
|
+
|
|
76
|
+
cc = CacheCoreClient(
|
|
77
|
+
gateway_url="https://gateway.cachecore.it",
|
|
78
|
+
tenant_jwt="ey...", # your tenant JWT from the CacheCore dashboard
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
oai = AsyncOpenAI(
|
|
82
|
+
api_key="ignored", # gateway injects its own upstream key
|
|
83
|
+
base_url="https://gateway.cachecore.it/v1",
|
|
84
|
+
http_client=httpx.AsyncClient(transport=cc.transport),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Requests now carry your tenant identity.
|
|
88
|
+
# Semantically similar prompts hit L2 cache.
|
|
89
|
+
resp = await oai.chat.completions.create(
|
|
90
|
+
model="gpt-4o",
|
|
91
|
+
messages=[{"role": "user", "content": "Explain photosynthesis"}],
|
|
92
|
+
)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Rung 3 — dep invalidation
|
|
96
|
+
|
|
97
|
+
Declare which data a cached response depends on. When that data changes, invalidate the dep
|
|
98
|
+
and all stale entries are evicted automatically.
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from cachecore import CacheCoreClient, Dep
|
|
102
|
+
import httpx
|
|
103
|
+
from openai import AsyncOpenAI
|
|
104
|
+
|
|
105
|
+
cc = CacheCoreClient(
|
|
106
|
+
gateway_url="https://gateway.cachecore.it",
|
|
107
|
+
tenant_jwt="ey...",
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
oai = AsyncOpenAI(
|
|
111
|
+
api_key="ignored",
|
|
112
|
+
base_url="https://gateway.cachecore.it/v1",
|
|
113
|
+
http_client=httpx.AsyncClient(transport=cc.transport),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Read path — declare what data this response depends on
|
|
117
|
+
with cc.request_context(deps=[Dep("table:products"), Dep("table:orders")]):
|
|
118
|
+
resp = await oai.chat.completions.create(
|
|
119
|
+
model="gpt-4o",
|
|
120
|
+
messages=[{"role": "user", "content": "List all products under $50"}],
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Write path — bypass cache for the LLM call, then invalidate
|
|
124
|
+
with cc.request_context(bypass=True):
|
|
125
|
+
resp = await oai.chat.completions.create(
|
|
126
|
+
model="gpt-4o",
|
|
127
|
+
messages=[{"role": "user", "content": "Confirm order created."}],
|
|
128
|
+
)
|
|
129
|
+
await cc.invalidate("table:products")
|
|
130
|
+
|
|
131
|
+
# Invalidate multiple deps at once
|
|
132
|
+
await cc.invalidate_many(["table:orders", "table:products"])
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Works with LangChain / LangGraph
|
|
136
|
+
|
|
137
|
+
The transport works with any SDK that accepts an `httpx.AsyncClient`:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from langchain_openai import ChatOpenAI
|
|
141
|
+
import httpx
|
|
142
|
+
from cachecore import CacheCoreClient, Dep
|
|
143
|
+
|
|
144
|
+
cc = CacheCoreClient(gateway_url="https://gateway.cachecore.it", tenant_jwt="ey...")
|
|
145
|
+
|
|
146
|
+
llm = ChatOpenAI(
|
|
147
|
+
model="gpt-4o",
|
|
148
|
+
api_key="ignored",
|
|
149
|
+
base_url="https://gateway.cachecore.it/v1",
|
|
150
|
+
http_async_client=httpx.AsyncClient(transport=cc.transport),
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Use request_context() around any ainvoke / astream call
|
|
154
|
+
with cc.request_context(deps=[Dep("doc:policy-42")]):
|
|
155
|
+
result = await llm.ainvoke("Summarise the compliance policy")
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## API reference
|
|
159
|
+
|
|
160
|
+
### `CacheCoreClient`
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
CacheCoreClient(
|
|
164
|
+
gateway_url: str, # "https://gateway.cachecore.it"
|
|
165
|
+
tenant_jwt: str, # tenant HS256/RS256 JWT
|
|
166
|
+
timeout: float = 30.0, # for invalidation calls
|
|
167
|
+
debug: bool = False, # log cache status per request
|
|
168
|
+
)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
| Property / Method | Description |
|
|
172
|
+
|---|---|
|
|
173
|
+
| `.transport` | `httpx.AsyncBaseTransport` — pass to `httpx.AsyncClient(transport=...)` |
|
|
174
|
+
| `.request_context(deps, bypass)` | Context manager — sets per-request deps / bypass |
|
|
175
|
+
| `await .invalidate(dep_id)` | Evict all entries tagged with this dep |
|
|
176
|
+
| `await .invalidate_many(dep_ids)` | Invalidate multiple deps concurrently |
|
|
177
|
+
| `await .aclose()` | Close HTTP clients. Also works as `async with CacheCoreClient(...):` |
|
|
178
|
+
|
|
179
|
+
### `Dep` / `DepDeclaration`
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
Dep("table:products") # simple — hash defaults to "v1"
|
|
183
|
+
Dep("table:products", hash="abc123") # explicit hash for versioned deps
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### `CacheStatus`
|
|
187
|
+
|
|
188
|
+
Parsed from response headers after a proxied request:
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
from cachecore import CacheStatus
|
|
192
|
+
|
|
193
|
+
status = CacheStatus.from_headers(response.headers)
|
|
194
|
+
# status.status → "HIT_L1" | "HIT_L1_STALE" | "HIT_L2" | "MISS" | "BYPASS" | "UNKNOWN"
|
|
195
|
+
# status.similarity → float 0.0–1.0 (non-zero on L2 hits)
|
|
196
|
+
# status.age_seconds → int
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Exceptions
|
|
200
|
+
|
|
201
|
+
| Exception | When |
|
|
202
|
+
|---|---|
|
|
203
|
+
| `CacheCoreError` | Base class for all CacheCore errors |
|
|
204
|
+
| `CacheCoreAuthError` | 401 / 403 from the gateway |
|
|
205
|
+
| `CacheCoreRateLimitError` | 429 — check `.retry_after` attribute (seconds, or `None`) |
|
|
206
|
+
|
|
207
|
+
## How it works
|
|
208
|
+
|
|
209
|
+
The client injects headers at the httpx transport layer — below the LLM SDK, above the network. Your SDK continues to work exactly as before:
|
|
210
|
+
|
|
211
|
+
```
|
|
212
|
+
Your code → openai SDK → httpx → [CacheCoreTransport] → CacheCore proxy → OpenAI API
|
|
213
|
+
↑
|
|
214
|
+
injects X-CacheCore-Token
|
|
215
|
+
injects X-CacheCore-Deps
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Requirements
|
|
219
|
+
|
|
220
|
+
- Python 3.10+
|
|
221
|
+
- `httpx >= 0.25.0`
|
|
222
|
+
|
|
223
|
+
## Links
|
|
224
|
+
|
|
225
|
+
- Website: [cachecore.it](https://cachecore.it)
|
|
226
|
+
- Source: [github.com/cachecore/cachecore-python](https://github.com/cachecore/cachecore-python)
|
|
227
|
+
|
|
228
|
+
## License
|
|
229
|
+
|
|
230
|
+
MIT — see [LICENSE](LICENSE)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
cachecore/__init__.py,sha256=mMEIWjAxEAPty9j-VbY7rWZsWmM1DhUW8YuwRcVdJ0A,11915
|
|
2
|
+
cachecore/_context.py,sha256=JB5kbnJFViX6MEOTjFvW-VNeRhLi-uisf2i8ufrxVsM,1205
|
|
3
|
+
cachecore/_transport.py,sha256=vfCLEtH3W-A0eW0rh17KFIu28hUwEVoQwV5hoVyC1bk,4049
|
|
4
|
+
cachecore/errors.py,sha256=Jy4JXmINWgG7ItgPb7o4pI5tW2O8N5xmSFe47P8XdoU,646
|
|
5
|
+
cachecore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
cachecore_python-0.1.0.dist-info/METADATA,sha256=6O5qCrZEirdoOZ7T5XKz8NmEIEqKpi7Q6bvGCrhRnpA,7230
|
|
7
|
+
cachecore_python-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
8
|
+
cachecore_python-0.1.0.dist-info/licenses/LICENSE,sha256=lQU0_mqn8xkIyItjElpsMJyRpESqDBaJlCpy1fmYzW0,1065
|
|
9
|
+
cachecore_python-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Fabrizio
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|