hypercache-kv 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hypercache_kv-0.1.0/LICENSE +27 -0
- hypercache_kv-0.1.0/PKG-INFO +131 -0
- hypercache_kv-0.1.0/README.md +100 -0
- hypercache_kv-0.1.0/pyproject.toml +38 -0
- hypercache_kv-0.1.0/setup.cfg +4 -0
- hypercache_kv-0.1.0/src/hypercache/__init__.py +1310 -0
- hypercache_kv-0.1.0/src/hypercache/workflows.py +442 -0
- hypercache_kv-0.1.0/src/hypercache_kv.egg-info/PKG-INFO +131 -0
- hypercache_kv-0.1.0/src/hypercache_kv.egg-info/SOURCES.txt +11 -0
- hypercache_kv-0.1.0/src/hypercache_kv.egg-info/dependency_links.txt +1 -0
- hypercache_kv-0.1.0/src/hypercache_kv.egg-info/requires.txt +10 -0
- hypercache_kv-0.1.0/src/hypercache_kv.egg-info/top_level.txt +1 -0
- hypercache_kv-0.1.0/tests/test_smoke.py +100 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Hyper Cache
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
23
|
+
Note: this license applies to the SDK source code in this repository only.
|
|
24
|
+
The Hyper Cache codec algorithm — which runs only inside Cloudflare Workers
|
|
25
|
+
as compiled WebAssembly and is accessed via HTTP — is proprietary, patent-
|
|
26
|
+
pending, and not licensed under MIT. This SDK is a thin HTTP wrapper and
|
|
27
|
+
contains zero codec algorithm code.
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hypercache-kv
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Skip repeated LLM calls, skip GPU prefill, prove what happened — a thin client for the Hyper Cache API.
|
|
5
|
+
Author-email: Hyper Cache <contact@hypercache.ai>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: homepage, https://hypercache.ai
|
|
8
|
+
Project-URL: documentation, https://hypercache.ai/docs
|
|
9
|
+
Project-URL: repository, https://github.com/Hyper-Cache/hypercache-kv
|
|
10
|
+
Project-URL: issues, https://github.com/Hyper-Cache/hypercache-kv/issues
|
|
11
|
+
Keywords: ai,ml,llm,cache,fingerprint,audit,inference
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Provides-Extra: numpy
|
|
24
|
+
Requires-Dist: numpy>=1.20; extra == "numpy"
|
|
25
|
+
Provides-Extra: torch
|
|
26
|
+
Requires-Dist: torch>=2.0; extra == "torch"
|
|
27
|
+
Provides-Extra: test
|
|
28
|
+
Requires-Dist: pytest>=7; extra == "test"
|
|
29
|
+
Requires-Dist: numpy>=1.20; extra == "test"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# hypercache (Python SDK)
|
|
33
|
+
|
|
34
|
+
Thin client for the [Hyper Cache](https://hypercache.ai) API. Zero runtime dependencies (stdlib only).
|
|
35
|
+
|
|
36
|
+
Hyper Cache is one thing on Cloudflare: a small fast server-locked codec that gives any input a tamper-evident 90-byte ID, plus a content-addressed cache and chain. **This SDK is a thin HTTP wrapper** — the codec algorithm runs only inside our WASM binary on Cloudflare, never on your machine.
|
|
37
|
+
|
|
38
|
+
## Three gains, one primitive
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from hypercache.workflows import Pipeline
|
|
42
|
+
|
|
43
|
+
with Pipeline("my_pipeline") as p:
|
|
44
|
+
# Skip repeated LLM calls — same input next time, cached response back
|
|
45
|
+
answer, was_hit = p.cached(
|
|
46
|
+
label="gpt_call",
|
|
47
|
+
input_bytes=prompt.encode("utf-8"),
|
|
48
|
+
compute=lambda: call_openai(prompt),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Prove what happened — every step gets a verifiable fingerprint
|
|
52
|
+
p.record("output", answer.encode("utf-8"))
|
|
53
|
+
|
|
54
|
+
print(f"{p.report.n_hits} hits / {p.report.n_misses} misses")
|
|
55
|
+
audit_chain = p.report.export_audit()
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
One template, three gains: skip repeated work (cache), prove what happened (chain), stats for your dashboard (report).
|
|
59
|
+
|
|
60
|
+
## Install
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install hypercache-kv
|
|
64
|
+
export HYPERCACHE_KEY=hck_...
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Get a key at [hypercache.ai](https://hypercache.ai).
|
|
68
|
+
|
|
69
|
+
## The three gains in detail
|
|
70
|
+
|
|
71
|
+
**1. Skip repeated LLM calls.** Same prompt → cached response in milliseconds. Measured 7.6× faster on cache hit against real Phi-3-mini calls.
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from hypercache.workflows import cached_completion
|
|
75
|
+
|
|
76
|
+
text, was_hit = cached_completion(
|
|
77
|
+
prompt="Translate to French: Hello",
|
|
78
|
+
compute=lambda p: call_openai(p),
|
|
79
|
+
)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Or wrap your OpenAI / Anthropic client directly:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from openai import OpenAI
|
|
86
|
+
from hypercache.workflows import wrap_openai
|
|
87
|
+
|
|
88
|
+
client = wrap_openai(OpenAI())
|
|
89
|
+
resp = client.chat.completions.create(model="gpt-4o-mini", messages=[...])
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**2. Skip repeated GPU prefill.** For self-hosted inference (vLLM, llama-cpp, SGLang, TRT-LLM) with reused system prompts or RAG contexts. Measured 21.8× faster than cold prefill on Phi-3-mini at 1199 tokens. See [docs/02_skip_gpu_prefill.md](../../docs/02_skip_gpu_prefill.md).
|
|
93
|
+
|
|
94
|
+
**3. Prove what happened.** Every fingerprint chains algebraically to the prior one. The chain is mathematically verifiable, server-locked against forgery, and exportable for compliance.
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from hypercache.workflows import audit_chain
|
|
98
|
+
|
|
99
|
+
with audit_chain() as chain:
|
|
100
|
+
r1 = chain.fingerprint(input_bytes)
|
|
101
|
+
r2 = chain.fingerprint(model_output)
|
|
102
|
+
r3 = chain.fingerprint(reviewer_note)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Low-level API
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
import hypercache
|
|
109
|
+
|
|
110
|
+
result = hypercache.cache_lookup(b"some input bytes")
|
|
111
|
+
if result.hit:
|
|
112
|
+
print(result.value)
|
|
113
|
+
else:
|
|
114
|
+
hypercache.cache_put(result.fingerprint_hex, b"my expensive output", ttl=3600)
|
|
115
|
+
|
|
116
|
+
results = hypercache.cache_lookup_batch([b"in 1", b"in 2", b"in 3"])
|
|
117
|
+
|
|
118
|
+
fp = hypercache.fingerprint(b"any bytes")
|
|
119
|
+
print(fp.record_hex)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## What's open / what's closed
|
|
123
|
+
|
|
124
|
+
- **Open (this SDK):** thin HTTP wrappers + workflow templates. MIT licensed.
|
|
125
|
+
- **Closed:** the codec algorithm itself runs only inside our WASM binary on Cloudflare. You interact with it via HTTP.
|
|
126
|
+
|
|
127
|
+
The separation is intentional: the codec's mathematical integrity (forgery resistance for audit, byte-precision for caching) requires that the algorithm cannot be replicated or modified by anyone, including customers. This SDK contains zero codec code; PRs that try to add codec algorithm code locally will be closed.
|
|
128
|
+
|
|
129
|
+
## License
|
|
130
|
+
|
|
131
|
+
MIT. See [LICENSE](./LICENSE).
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# hypercache (Python SDK)
|
|
2
|
+
|
|
3
|
+
Thin client for the [Hyper Cache](https://hypercache.ai) API. Zero runtime dependencies (stdlib only).
|
|
4
|
+
|
|
5
|
+
Hyper Cache is one thing on Cloudflare: a small fast server-locked codec that gives any input a tamper-evident 90-byte ID, plus a content-addressed cache and chain. **This SDK is a thin HTTP wrapper** — the codec algorithm runs only inside our WASM binary on Cloudflare, never on your machine.
|
|
6
|
+
|
|
7
|
+
## Three gains, one primitive
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from hypercache.workflows import Pipeline
|
|
11
|
+
|
|
12
|
+
with Pipeline("my_pipeline") as p:
|
|
13
|
+
# Skip repeated LLM calls — same input next time, cached response back
|
|
14
|
+
answer, was_hit = p.cached(
|
|
15
|
+
label="gpt_call",
|
|
16
|
+
input_bytes=prompt.encode("utf-8"),
|
|
17
|
+
compute=lambda: call_openai(prompt),
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Prove what happened — every step gets a verifiable fingerprint
|
|
21
|
+
p.record("output", answer.encode("utf-8"))
|
|
22
|
+
|
|
23
|
+
print(f"{p.report.n_hits} hits / {p.report.n_misses} misses")
|
|
24
|
+
audit_chain = p.report.export_audit()
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
One template, three gains: skip repeated work (cache), prove what happened (chain), stats for your dashboard (report).
|
|
28
|
+
|
|
29
|
+
## Install
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install hypercache-kv
|
|
33
|
+
export HYPERCACHE_KEY=hck_...
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Get a key at [hypercache.ai](https://hypercache.ai).
|
|
37
|
+
|
|
38
|
+
## The three gains in detail
|
|
39
|
+
|
|
40
|
+
**1. Skip repeated LLM calls.** Same prompt → cached response in milliseconds. Measured 7.6× faster on cache hit against real Phi-3-mini calls.
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from hypercache.workflows import cached_completion
|
|
44
|
+
|
|
45
|
+
text, was_hit = cached_completion(
|
|
46
|
+
prompt="Translate to French: Hello",
|
|
47
|
+
compute=lambda p: call_openai(p),
|
|
48
|
+
)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Or wrap your OpenAI / Anthropic client directly:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from openai import OpenAI
|
|
55
|
+
from hypercache.workflows import wrap_openai
|
|
56
|
+
|
|
57
|
+
client = wrap_openai(OpenAI())
|
|
58
|
+
resp = client.chat.completions.create(model="gpt-4o-mini", messages=[...])
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**2. Skip repeated GPU prefill.** For self-hosted inference (vLLM, llama-cpp, SGLang, TRT-LLM) with reused system prompts or RAG contexts. Measured 21.8× faster than cold prefill on Phi-3-mini at 1199 tokens. See [docs/02_skip_gpu_prefill.md](../../docs/02_skip_gpu_prefill.md).
|
|
62
|
+
|
|
63
|
+
**3. Prove what happened.** Every fingerprint chains algebraically to the prior one. The chain is mathematically verifiable, server-locked against forgery, and exportable for compliance.
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from hypercache.workflows import audit_chain
|
|
67
|
+
|
|
68
|
+
with audit_chain() as chain:
|
|
69
|
+
r1 = chain.fingerprint(input_bytes)
|
|
70
|
+
r2 = chain.fingerprint(model_output)
|
|
71
|
+
r3 = chain.fingerprint(reviewer_note)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Low-level API
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import hypercache
|
|
78
|
+
|
|
79
|
+
result = hypercache.cache_lookup(b"some input bytes")
|
|
80
|
+
if result.hit:
|
|
81
|
+
print(result.value)
|
|
82
|
+
else:
|
|
83
|
+
hypercache.cache_put(result.fingerprint_hex, b"my expensive output", ttl=3600)
|
|
84
|
+
|
|
85
|
+
results = hypercache.cache_lookup_batch([b"in 1", b"in 2", b"in 3"])
|
|
86
|
+
|
|
87
|
+
fp = hypercache.fingerprint(b"any bytes")
|
|
88
|
+
print(fp.record_hex)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## What's open / what's closed
|
|
92
|
+
|
|
93
|
+
- **Open (this SDK):** thin HTTP wrappers + workflow templates. MIT licensed.
|
|
94
|
+
- **Closed:** the codec algorithm itself runs only inside our WASM binary on Cloudflare. You interact with it via HTTP.
|
|
95
|
+
|
|
96
|
+
The separation is intentional: the codec's mathematical integrity (forgery resistance for audit, byte-precision for caching) requires that the algorithm cannot be replicated or modified by anyone, including customers. This SDK contains zero codec code; PRs that try to add codec algorithm code locally will be closed.
|
|
97
|
+
|
|
98
|
+
## License
|
|
99
|
+
|
|
100
|
+
MIT. See [LICENSE](./LICENSE).
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "hypercache-kv"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Skip repeated LLM calls, skip GPU prefill, prove what happened — a thin client for the Hyper Cache API."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Hyper Cache", email = "contact@hypercache.ai" }]
|
|
13
|
+
keywords = ["ai", "ml", "llm", "cache", "fingerprint", "audit", "inference"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.9",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
23
|
+
]
|
|
24
|
+
# Zero runtime dependencies — stdlib only.
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
numpy = ["numpy>=1.20"]
|
|
28
|
+
torch = ["torch>=2.0"]
|
|
29
|
+
test = ["pytest>=7", "numpy>=1.20"]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
homepage = "https://hypercache.ai"
|
|
33
|
+
documentation = "https://hypercache.ai/docs"
|
|
34
|
+
repository = "https://github.com/Hyper-Cache/hypercache-kv"
|
|
35
|
+
issues = "https://github.com/Hyper-Cache/hypercache-kv/issues"
|
|
36
|
+
|
|
37
|
+
[tool.setuptools.packages.find]
|
|
38
|
+
where = ["src"]
|