cachecore-python 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cachecore_python-0.1.0/.claude/settings.local.json +14 -0
- cachecore_python-0.1.0/CHANGELOG.md +32 -0
- cachecore_python-0.1.0/LICENSE +21 -0
- cachecore_python-0.1.0/PKG-INFO +230 -0
- cachecore_python-0.1.0/README.md +203 -0
- cachecore_python-0.1.0/cachecore/__init__.py +367 -0
- cachecore_python-0.1.0/cachecore/_context.py +42 -0
- cachecore_python-0.1.0/cachecore/_transport.py +99 -0
- cachecore_python-0.1.0/cachecore/errors.py +24 -0
- cachecore_python-0.1.0/cachecore/py.typed +0 -0
- cachecore_python-0.1.0/pyproject.toml +54 -0
- cachecore_python-0.1.0/tests/__init__.py +0 -0
- cachecore_python-0.1.0/tests/generate_token.py +38 -0
- cachecore_python-0.1.0/tests/test_cachecore.py +289 -0
- cachecore_python-0.1.0/tests/test_live.py +40 -0
- cachecore_python-0.1.0/tests/test_scenario_a_role_isolation.py +205 -0
- cachecore_python-0.1.0/tests/test_scenario_b_tenant_isolation.py +258 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(tree -a -L 3 /Users/fabrizio/Desktop/cachecore/client/cachecore-python 2>/dev/null || find /Users/fabrizio/Desktop/cachecore/client/cachecore-python -not -path '*/\\\\.*' -not -path '*/__pycache__/*' -not -path '*.pyc' | sort)",
|
|
5
|
+
"Bash(python3 --version && pip3 show hatchling 2>/dev/null && pip3 show build 2>/dev/null && pip3 show twine 2>/dev/null)",
|
|
6
|
+
"Bash(pip3 show hatchling build twine 2>&1 | grep -E \"^\\(Name|Version|---\\)\")",
|
|
7
|
+
"Bash(pip3 list 2>/dev/null | grep -iE \"hatch|build|twine|wheel|setuptools\")",
|
|
8
|
+
"Bash(pip3 list 2>/dev/null | grep -iE \"hatch\")",
|
|
9
|
+
"Bash(pip install build twine -q 2>&1 | tail -3)",
|
|
10
|
+
"Bash(python -m build 2>&1)",
|
|
11
|
+
"Bash(python3 -m build 2>&1)"
|
|
12
|
+
]
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|
6
|
+
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] - 2024-01-01
|
|
9
|
+
|
|
10
|
+
Initial public release.
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- `CacheCoreClient` — connection-level object constructed once per tenant. Provides
|
|
15
|
+
the transport, per-request context manager, and invalidation helpers.
|
|
16
|
+
- `CacheCoreTransport` — `httpx.AsyncBaseTransport` that injects `X-CacheCore-Token`
|
|
17
|
+
and `X-CacheCore-Deps` headers at the transport layer, below the LLM SDK, guaranteeing
|
|
18
|
+
correct injection regardless of SDK header-merging behaviour.
|
|
19
|
+
- `CacheStatus` — dataclass parsed from `X-Cache`, `X-Cache-Similarity`, and
|
|
20
|
+
`X-Cache-Age` response headers. Reports `HIT_L1`, `HIT_L1_STALE`, `HIT_L2`,
|
|
21
|
+
`MISS`, `BYPASS`, or `UNKNOWN`.
|
|
22
|
+
- `Dep` / `DepDeclaration` — dependency declaration objects. Pass to
|
|
23
|
+
`request_context(deps=[Dep("table:products")])` to tag cache entries for later
|
|
24
|
+
invalidation.
|
|
25
|
+
- `InvalidateResult` — result dataclass returned by `invalidate()` and
|
|
26
|
+
`invalidate_many()`, carrying `dep_id`, `ok`, and an optional `error` message.
|
|
27
|
+
- `CacheCoreError` — base exception class for all CacheCore client errors.
|
|
28
|
+
- `CacheCoreAuthError` — raised on 401 / 403 from the gateway.
|
|
29
|
+
- `CacheCoreRateLimitError` — raised on 429; carries `.retry_after` attribute (seconds).
|
|
30
|
+
- `py.typed` marker (PEP 561) — package ships inline type stubs.
|
|
31
|
+
- Python 3.10, 3.11, 3.12, and 3.13 support.
|
|
32
|
+
- Single runtime dependency: `httpx >= 0.25.0`.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Fabrizio
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cachecore-python
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python client for CacheCore — semantic cache gateway for LLM agent workloads
|
|
5
|
+
Project-URL: Homepage, https://cachecore.it
|
|
6
|
+
Project-URL: Repository, https://github.com/cachecore/cachecore-python
|
|
7
|
+
Author: Fabrizio
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: agents,cache,llm,openai,proxy,semantic-cache
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Framework :: AsyncIO
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: httpx>=0.25.0
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# cachecore
|
|
29
|
+
|
|
30
|
+
Python client for [CacheCore](https://cachecore.it) — the LLM API caching proxy that reduces cost and latency for AI agent workloads.
|
|
31
|
+
|
|
32
|
+
CacheCore sits transparently between your application and LLM providers (OpenAI, Anthropic via OpenAI-compat, etc.) and caches responses at two levels: L1 exact-match and L2 semantic similarity. This client handles the CacheCore-specific plumbing — header injection, dependency encoding, invalidation — without replacing your LLM SDK.
|
|
33
|
+
|
|
34
|
+
## Install
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install cachecore-python
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
import cachecore # the import name is 'cachecore'
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick start
|
|
45
|
+
|
|
46
|
+
### Rung 1 — zero code changes: swap `base_url`
|
|
47
|
+
|
|
48
|
+
Point your existing SDK at CacheCore and get L1 exact-match caching immediately.
|
|
49
|
+
No `import cachecore` required.
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from openai import AsyncOpenAI
|
|
53
|
+
|
|
54
|
+
oai = AsyncOpenAI(
|
|
55
|
+
api_key="your-openai-key",
|
|
56
|
+
base_url="https://gateway.cachecore.it/v1", # ← only change
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Identical requests are now served from cache.
|
|
60
|
+
resp = await oai.chat.completions.create(
|
|
61
|
+
model="gpt-4o",
|
|
62
|
+
messages=[{"role": "user", "content": "What is 2+2?"}],
|
|
63
|
+
)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Rung 2 — tenant isolation (3 lines)
|
|
67
|
+
|
|
68
|
+
Add `CacheCoreClient` to unlock tenant-scoped namespaces, L2 semantic caching, and per-tenant
|
|
69
|
+
metrics. Three extra lines wired into the SDK's `http_client`.
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from cachecore import CacheCoreClient
|
|
73
|
+
import httpx
|
|
74
|
+
from openai import AsyncOpenAI
|
|
75
|
+
|
|
76
|
+
cc = CacheCoreClient(
|
|
77
|
+
gateway_url="https://gateway.cachecore.it",
|
|
78
|
+
tenant_jwt="ey...", # your tenant JWT from the CacheCore dashboard
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
oai = AsyncOpenAI(
|
|
82
|
+
api_key="ignored", # gateway injects its own upstream key
|
|
83
|
+
base_url="https://gateway.cachecore.it/v1",
|
|
84
|
+
http_client=httpx.AsyncClient(transport=cc.transport),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Requests now carry your tenant identity.
|
|
88
|
+
# Semantically similar prompts hit L2 cache.
|
|
89
|
+
resp = await oai.chat.completions.create(
|
|
90
|
+
model="gpt-4o",
|
|
91
|
+
messages=[{"role": "user", "content": "Explain photosynthesis"}],
|
|
92
|
+
)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Rung 3 — dep invalidation
|
|
96
|
+
|
|
97
|
+
Declare which data a cached response depends on. When that data changes, invalidate the dep
|
|
98
|
+
and all stale entries are evicted automatically.
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from cachecore import CacheCoreClient, Dep
|
|
102
|
+
import httpx
|
|
103
|
+
from openai import AsyncOpenAI
|
|
104
|
+
|
|
105
|
+
cc = CacheCoreClient(
|
|
106
|
+
gateway_url="https://gateway.cachecore.it",
|
|
107
|
+
tenant_jwt="ey...",
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
oai = AsyncOpenAI(
|
|
111
|
+
api_key="ignored",
|
|
112
|
+
base_url="https://gateway.cachecore.it/v1",
|
|
113
|
+
http_client=httpx.AsyncClient(transport=cc.transport),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Read path — declare what data this response depends on
|
|
117
|
+
with cc.request_context(deps=[Dep("table:products"), Dep("table:orders")]):
|
|
118
|
+
resp = await oai.chat.completions.create(
|
|
119
|
+
model="gpt-4o",
|
|
120
|
+
messages=[{"role": "user", "content": "List all products under $50"}],
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Write path — bypass cache for the LLM call, then invalidate
|
|
124
|
+
with cc.request_context(bypass=True):
|
|
125
|
+
resp = await oai.chat.completions.create(
|
|
126
|
+
model="gpt-4o",
|
|
127
|
+
messages=[{"role": "user", "content": "Confirm order created."}],
|
|
128
|
+
)
|
|
129
|
+
await cc.invalidate("table:products")
|
|
130
|
+
|
|
131
|
+
# Invalidate multiple deps at once
|
|
132
|
+
await cc.invalidate_many(["table:orders", "table:products"])
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Works with LangChain / LangGraph
|
|
136
|
+
|
|
137
|
+
The transport works with any SDK that accepts an `httpx.AsyncClient`:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from langchain_openai import ChatOpenAI
|
|
141
|
+
import httpx
|
|
142
|
+
from cachecore import CacheCoreClient, Dep
|
|
143
|
+
|
|
144
|
+
cc = CacheCoreClient(gateway_url="https://gateway.cachecore.it", tenant_jwt="ey...")
|
|
145
|
+
|
|
146
|
+
llm = ChatOpenAI(
|
|
147
|
+
model="gpt-4o",
|
|
148
|
+
api_key="ignored",
|
|
149
|
+
base_url="https://gateway.cachecore.it/v1",
|
|
150
|
+
http_async_client=httpx.AsyncClient(transport=cc.transport),
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Use request_context() around any ainvoke / astream call
|
|
154
|
+
with cc.request_context(deps=[Dep("doc:policy-42")]):
|
|
155
|
+
result = await llm.ainvoke("Summarise the compliance policy")
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## API reference
|
|
159
|
+
|
|
160
|
+
### `CacheCoreClient`
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
CacheCoreClient(
|
|
164
|
+
gateway_url: str, # "https://gateway.cachecore.it"
|
|
165
|
+
tenant_jwt: str, # tenant HS256/RS256 JWT
|
|
166
|
+
timeout: float = 30.0, # for invalidation calls
|
|
167
|
+
debug: bool = False, # log cache status per request
|
|
168
|
+
)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
| Property / Method | Description |
|
|
172
|
+
|---|---|
|
|
173
|
+
| `.transport` | `httpx.AsyncBaseTransport` — pass to `httpx.AsyncClient(transport=...)` |
|
|
174
|
+
| `.request_context(deps, bypass)` | Context manager — sets per-request deps / bypass |
|
|
175
|
+
| `await .invalidate(dep_id)` | Evict all entries tagged with this dep |
|
|
176
|
+
| `await .invalidate_many(dep_ids)` | Invalidate multiple deps concurrently |
|
|
177
|
+
| `await .aclose()` | Close HTTP clients. Also works as `async with CacheCoreClient(...):` |
|
|
178
|
+
|
|
179
|
+
### `Dep` / `DepDeclaration`
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
Dep("table:products") # simple — hash defaults to "v1"
|
|
183
|
+
Dep("table:products", hash="abc123") # explicit hash for versioned deps
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### `CacheStatus`
|
|
187
|
+
|
|
188
|
+
Parsed from response headers after a proxied request:
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
from cachecore import CacheStatus
|
|
192
|
+
|
|
193
|
+
status = CacheStatus.from_headers(response.headers)
|
|
194
|
+
# status.status → "HIT_L1" | "HIT_L1_STALE" | "HIT_L2" | "MISS" | "BYPASS" | "UNKNOWN"
|
|
195
|
+
# status.similarity → float 0.0–1.0 (non-zero on L2 hits)
|
|
196
|
+
# status.age_seconds → int
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Exceptions
|
|
200
|
+
|
|
201
|
+
| Exception | When |
|
|
202
|
+
|---|---|
|
|
203
|
+
| `CacheCoreError` | Base class for all CacheCore errors |
|
|
204
|
+
| `CacheCoreAuthError` | 401 / 403 from the gateway |
|
|
205
|
+
| `CacheCoreRateLimitError` | 429 — check `.retry_after` attribute (seconds, or `None`) |
|
|
206
|
+
|
|
207
|
+
## How it works
|
|
208
|
+
|
|
209
|
+
The client injects headers at the httpx transport layer — below the LLM SDK, above the network. Your SDK continues to work exactly as before:
|
|
210
|
+
|
|
211
|
+
```
|
|
212
|
+
Your code → openai SDK → httpx → [CacheCoreTransport] → CacheCore proxy → OpenAI API
|
|
213
|
+
↑
|
|
214
|
+
injects X-CacheCore-Token
|
|
215
|
+
injects X-CacheCore-Deps
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Requirements
|
|
219
|
+
|
|
220
|
+
- Python 3.10+
|
|
221
|
+
- `httpx >= 0.25.0`
|
|
222
|
+
|
|
223
|
+
## Links
|
|
224
|
+
|
|
225
|
+
- Website: [cachecore.it](https://cachecore.it)
|
|
226
|
+
- Source: [github.com/cachecore/cachecore-python](https://github.com/cachecore/cachecore-python)
|
|
227
|
+
|
|
228
|
+
## License
|
|
229
|
+
|
|
230
|
+
MIT — see [LICENSE](LICENSE)
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# cachecore
|
|
2
|
+
|
|
3
|
+
Python client for [CacheCore](https://cachecore.it) — the LLM API caching proxy that reduces cost and latency for AI agent workloads.
|
|
4
|
+
|
|
5
|
+
CacheCore sits transparently between your application and LLM providers (OpenAI, Anthropic via OpenAI-compat, etc.) and caches responses at two levels: L1 exact-match and L2 semantic similarity. This client handles the CacheCore-specific plumbing — header injection, dependency encoding, invalidation — without replacing your LLM SDK.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install cachecore-python
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
import cachecore # the import name is 'cachecore'
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quick start
|
|
18
|
+
|
|
19
|
+
### Rung 1 — zero code changes: swap `base_url`
|
|
20
|
+
|
|
21
|
+
Point your existing SDK at CacheCore and get L1 exact-match caching immediately.
|
|
22
|
+
No `import cachecore` required.
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from openai import AsyncOpenAI
|
|
26
|
+
|
|
27
|
+
oai = AsyncOpenAI(
|
|
28
|
+
api_key="your-openai-key",
|
|
29
|
+
base_url="https://gateway.cachecore.it/v1", # ← only change
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Identical requests are now served from cache.
|
|
33
|
+
resp = await oai.chat.completions.create(
|
|
34
|
+
model="gpt-4o",
|
|
35
|
+
messages=[{"role": "user", "content": "What is 2+2?"}],
|
|
36
|
+
)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Rung 2 — tenant isolation (3 lines)
|
|
40
|
+
|
|
41
|
+
Add `CacheCoreClient` to unlock tenant-scoped namespaces, L2 semantic caching, and per-tenant
|
|
42
|
+
metrics. Three extra lines wired into the SDK's `http_client`.
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from cachecore import CacheCoreClient
|
|
46
|
+
import httpx
|
|
47
|
+
from openai import AsyncOpenAI
|
|
48
|
+
|
|
49
|
+
cc = CacheCoreClient(
|
|
50
|
+
gateway_url="https://gateway.cachecore.it",
|
|
51
|
+
tenant_jwt="ey...", # your tenant JWT from the CacheCore dashboard
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
oai = AsyncOpenAI(
|
|
55
|
+
api_key="ignored", # gateway injects its own upstream key
|
|
56
|
+
base_url="https://gateway.cachecore.it/v1",
|
|
57
|
+
http_client=httpx.AsyncClient(transport=cc.transport),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Requests now carry your tenant identity.
|
|
61
|
+
# Semantically similar prompts hit L2 cache.
|
|
62
|
+
resp = await oai.chat.completions.create(
|
|
63
|
+
model="gpt-4o",
|
|
64
|
+
messages=[{"role": "user", "content": "Explain photosynthesis"}],
|
|
65
|
+
)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Rung 3 — dep invalidation
|
|
69
|
+
|
|
70
|
+
Declare which data a cached response depends on. When that data changes, invalidate the dep
|
|
71
|
+
and all stale entries are evicted automatically.
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from cachecore import CacheCoreClient, Dep
|
|
75
|
+
import httpx
|
|
76
|
+
from openai import AsyncOpenAI
|
|
77
|
+
|
|
78
|
+
cc = CacheCoreClient(
|
|
79
|
+
gateway_url="https://gateway.cachecore.it",
|
|
80
|
+
tenant_jwt="ey...",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
oai = AsyncOpenAI(
|
|
84
|
+
api_key="ignored",
|
|
85
|
+
base_url="https://gateway.cachecore.it/v1",
|
|
86
|
+
http_client=httpx.AsyncClient(transport=cc.transport),
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Read path — declare what data this response depends on
|
|
90
|
+
with cc.request_context(deps=[Dep("table:products"), Dep("table:orders")]):
|
|
91
|
+
resp = await oai.chat.completions.create(
|
|
92
|
+
model="gpt-4o",
|
|
93
|
+
messages=[{"role": "user", "content": "List all products under $50"}],
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Write path — bypass cache for the LLM call, then invalidate
|
|
97
|
+
with cc.request_context(bypass=True):
|
|
98
|
+
resp = await oai.chat.completions.create(
|
|
99
|
+
model="gpt-4o",
|
|
100
|
+
messages=[{"role": "user", "content": "Confirm order created."}],
|
|
101
|
+
)
|
|
102
|
+
await cc.invalidate("table:products")
|
|
103
|
+
|
|
104
|
+
# Invalidate multiple deps at once
|
|
105
|
+
await cc.invalidate_many(["table:orders", "table:products"])
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Works with LangChain / LangGraph
|
|
109
|
+
|
|
110
|
+
The transport works with any SDK that accepts an `httpx.AsyncClient`:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from langchain_openai import ChatOpenAI
|
|
114
|
+
import httpx
|
|
115
|
+
from cachecore import CacheCoreClient, Dep
|
|
116
|
+
|
|
117
|
+
cc = CacheCoreClient(gateway_url="https://gateway.cachecore.it", tenant_jwt="ey...")
|
|
118
|
+
|
|
119
|
+
llm = ChatOpenAI(
|
|
120
|
+
model="gpt-4o",
|
|
121
|
+
api_key="ignored",
|
|
122
|
+
base_url="https://gateway.cachecore.it/v1",
|
|
123
|
+
http_async_client=httpx.AsyncClient(transport=cc.transport),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Use request_context() around any ainvoke / astream call
|
|
127
|
+
with cc.request_context(deps=[Dep("doc:policy-42")]):
|
|
128
|
+
result = await llm.ainvoke("Summarise the compliance policy")
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## API reference
|
|
132
|
+
|
|
133
|
+
### `CacheCoreClient`
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
CacheCoreClient(
|
|
137
|
+
gateway_url: str, # "https://gateway.cachecore.it"
|
|
138
|
+
tenant_jwt: str, # tenant HS256/RS256 JWT
|
|
139
|
+
timeout: float = 30.0, # for invalidation calls
|
|
140
|
+
debug: bool = False, # log cache status per request
|
|
141
|
+
)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
| Property / Method | Description |
|
|
145
|
+
|---|---|
|
|
146
|
+
| `.transport` | `httpx.AsyncBaseTransport` — pass to `httpx.AsyncClient(transport=...)` |
|
|
147
|
+
| `.request_context(deps, bypass)` | Context manager — sets per-request deps / bypass |
|
|
148
|
+
| `await .invalidate(dep_id)` | Evict all entries tagged with this dep |
|
|
149
|
+
| `await .invalidate_many(dep_ids)` | Invalidate multiple deps concurrently |
|
|
150
|
+
| `await .aclose()` | Close HTTP clients. Also works as `async with CacheCoreClient(...):` |
|
|
151
|
+
|
|
152
|
+
### `Dep` / `DepDeclaration`
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
Dep("table:products") # simple — hash defaults to "v1"
|
|
156
|
+
Dep("table:products", hash="abc123") # explicit hash for versioned deps
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### `CacheStatus`
|
|
160
|
+
|
|
161
|
+
Parsed from response headers after a proxied request:
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
from cachecore import CacheStatus
|
|
165
|
+
|
|
166
|
+
status = CacheStatus.from_headers(response.headers)
|
|
167
|
+
# status.status → "HIT_L1" | "HIT_L1_STALE" | "HIT_L2" | "MISS" | "BYPASS" | "UNKNOWN"
|
|
168
|
+
# status.similarity → float 0.0–1.0 (non-zero on L2 hits)
|
|
169
|
+
# status.age_seconds → int
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Exceptions
|
|
173
|
+
|
|
174
|
+
| Exception | When |
|
|
175
|
+
|---|---|
|
|
176
|
+
| `CacheCoreError` | Base class for all CacheCore errors |
|
|
177
|
+
| `CacheCoreAuthError` | 401 / 403 from the gateway |
|
|
178
|
+
| `CacheCoreRateLimitError` | 429 — check `.retry_after` attribute (seconds, or `None`) |
|
|
179
|
+
|
|
180
|
+
## How it works
|
|
181
|
+
|
|
182
|
+
The client injects headers at the httpx transport layer — below the LLM SDK, above the network. Your SDK continues to work exactly as before:
|
|
183
|
+
|
|
184
|
+
```
|
|
185
|
+
Your code → openai SDK → httpx → [CacheCoreTransport] → CacheCore proxy → OpenAI API
|
|
186
|
+
↑
|
|
187
|
+
injects X-CacheCore-Token
|
|
188
|
+
injects X-CacheCore-Deps
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Requirements
|
|
192
|
+
|
|
193
|
+
- Python 3.10+
|
|
194
|
+
- `httpx >= 0.25.0`
|
|
195
|
+
|
|
196
|
+
## Links
|
|
197
|
+
|
|
198
|
+
- Website: [cachecore.it](https://cachecore.it)
|
|
199
|
+
- Source: [github.com/cachecore/cachecore-python](https://github.com/cachecore/cachecore-python)
|
|
200
|
+
|
|
201
|
+
## License
|
|
202
|
+
|
|
203
|
+
MIT — see [LICENSE](LICENSE)
|