hypercache-kv 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Hyper Cache
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ Note: this license applies to the SDK source code in this repository only.
24
+ The Hyper Cache codec algorithm — which runs only inside Cloudflare Workers
25
+ as compiled WebAssembly and is accessed via HTTP — is proprietary, patent-
26
+ pending, and not licensed under MIT. This SDK is a thin HTTP wrapper and
27
+ contains zero codec algorithm code.
@@ -0,0 +1,131 @@
1
+ Metadata-Version: 2.4
2
+ Name: hypercache-kv
3
+ Version: 0.1.0
4
+ Summary: Skip repeated LLM calls, skip GPU prefill, prove what happened — a thin client for the Hyper Cache API.
5
+ Author-email: Hyper Cache <contact@hypercache.ai>
6
+ License: MIT
7
+ Project-URL: homepage, https://hypercache.ai
8
+ Project-URL: documentation, https://hypercache.ai/docs
9
+ Project-URL: repository, https://github.com/Hyper-Cache/hypercache-kv
10
+ Project-URL: issues, https://github.com/Hyper-Cache/hypercache-kv/issues
11
+ Keywords: ai,ml,llm,cache,fingerprint,audit,inference
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.9
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Provides-Extra: numpy
24
+ Requires-Dist: numpy>=1.20; extra == "numpy"
25
+ Provides-Extra: torch
26
+ Requires-Dist: torch>=2.0; extra == "torch"
27
+ Provides-Extra: test
28
+ Requires-Dist: pytest>=7; extra == "test"
29
+ Requires-Dist: numpy>=1.20; extra == "test"
30
+ Dynamic: license-file
31
+
32
+ # hypercache (Python SDK)
33
+
34
+ Thin client for the [Hyper Cache](https://hypercache.ai) API. Zero runtime dependencies (stdlib only).
35
+
36
+ Hyper Cache is one thing on Cloudflare: a small fast server-locked codec that gives any input a tamper-evident 90-byte ID, plus a content-addressed cache and chain. **This SDK is a thin HTTP wrapper** — the codec algorithm runs only inside our WASM binary on Cloudflare, never on your machine.
37
+
38
+ ## Three gains, one primitive
39
+
40
+ ```python
41
+ from hypercache.workflows import Pipeline
42
+
43
+ with Pipeline("my_pipeline") as p:
44
+ # Skip repeated LLM calls — same input next time, cached response back
45
+ answer, was_hit = p.cached(
46
+ label="gpt_call",
47
+ input_bytes=prompt.encode("utf-8"),
48
+ compute=lambda: call_openai(prompt),
49
+ )
50
+
51
+ # Prove what happened — every step gets a verifiable fingerprint
52
+ p.record("output", answer.encode("utf-8"))
53
+
54
+ print(f"{p.report.n_hits} hits / {p.report.n_misses} misses")
55
+ audit_chain = p.report.export_audit()
56
+ ```
57
+
58
+ One template, three gains: skip repeated work (cache), prove what happened (chain), stats for your dashboard (report).
59
+
60
+ ## Install
61
+
62
+ ```bash
63
+ pip install hypercache-kv
64
+ export HYPERCACHE_KEY=hck_...
65
+ ```
66
+
67
+ Get a key at [hypercache.ai](https://hypercache.ai).
68
+
69
+ ## The three gains in detail
70
+
71
+ **1. Skip repeated LLM calls.** Same prompt → cached response in milliseconds. Measured 7.6× faster on cache hit against real Phi-3-mini calls.
72
+
73
+ ```python
74
+ from hypercache.workflows import cached_completion
75
+
76
+ text, was_hit = cached_completion(
77
+ prompt="Translate to French: Hello",
78
+ compute=lambda p: call_openai(p),
79
+ )
80
+ ```
81
+
82
+ Or wrap your OpenAI / Anthropic client directly:
83
+
84
+ ```python
85
+ from openai import OpenAI
86
+ from hypercache.workflows import wrap_openai
87
+
88
+ client = wrap_openai(OpenAI())
89
+ resp = client.chat.completions.create(model="gpt-4o-mini", messages=[...])
90
+ ```
91
+
92
+ **2. Skip repeated GPU prefill.** For self-hosted inference (vLLM, llama-cpp, SGLang, TRT-LLM) with reused system prompts or RAG contexts. Measured 21.8× faster than cold prefill on Phi-3-mini at 1199 tokens. See [docs/02_skip_gpu_prefill.md](../../docs/02_skip_gpu_prefill.md).
93
+
94
+ **3. Prove what happened.** Every fingerprint chains algebraically to the prior one. The chain is mathematically verifiable, server-locked against forgery, and exportable for compliance.
95
+
96
+ ```python
97
+ from hypercache.workflows import audit_chain
98
+
99
+ with audit_chain() as chain:
100
+ r1 = chain.fingerprint(input_bytes)
101
+ r2 = chain.fingerprint(model_output)
102
+ r3 = chain.fingerprint(reviewer_note)
103
+ ```
104
+
105
+ ## Low-level API
106
+
107
+ ```python
108
+ import hypercache
109
+
110
+ result = hypercache.cache_lookup(b"some input bytes")
111
+ if result.hit:
112
+ print(result.value)
113
+ else:
114
+ hypercache.cache_put(result.fingerprint_hex, b"my expensive output", ttl=3600)
115
+
116
+ results = hypercache.cache_lookup_batch([b"in 1", b"in 2", b"in 3"])
117
+
118
+ fp = hypercache.fingerprint(b"any bytes")
119
+ print(fp.record_hex)
120
+ ```
121
+
122
+ ## What's open / what's closed
123
+
124
+ - **Open (this SDK):** thin HTTP wrappers + workflow templates. MIT licensed.
125
+ - **Closed:** the codec algorithm itself runs only inside our WASM binary on Cloudflare. You interact with it via HTTP.
126
+
127
+ The separation is intentional: the codec's mathematical integrity (forgery resistance for audit, byte-precision for caching) requires that the algorithm cannot be replicated or modified by anyone, including customers. This SDK contains zero codec code; PRs that try to add codec algorithm code locally will be closed.
128
+
129
+ ## License
130
+
131
+ MIT. See [LICENSE](./LICENSE).
@@ -0,0 +1,100 @@
1
+ # hypercache (Python SDK)
2
+
3
+ Thin client for the [Hyper Cache](https://hypercache.ai) API. Zero runtime dependencies (stdlib only).
4
+
5
+ Hyper Cache is one thing on Cloudflare: a small fast server-locked codec that gives any input a tamper-evident 90-byte ID, plus a content-addressed cache and chain. **This SDK is a thin HTTP wrapper** — the codec algorithm runs only inside our WASM binary on Cloudflare, never on your machine.
6
+
7
+ ## Three gains, one primitive
8
+
9
+ ```python
10
+ from hypercache.workflows import Pipeline
11
+
12
+ with Pipeline("my_pipeline") as p:
13
+ # Skip repeated LLM calls — same input next time, cached response back
14
+ answer, was_hit = p.cached(
15
+ label="gpt_call",
16
+ input_bytes=prompt.encode("utf-8"),
17
+ compute=lambda: call_openai(prompt),
18
+ )
19
+
20
+ # Prove what happened — every step gets a verifiable fingerprint
21
+ p.record("output", answer.encode("utf-8"))
22
+
23
+ print(f"{p.report.n_hits} hits / {p.report.n_misses} misses")
24
+ audit_chain = p.report.export_audit()
25
+ ```
26
+
27
+ One template, three gains: skip repeated work (cache), prove what happened (chain), stats for your dashboard (report).
28
+
29
+ ## Install
30
+
31
+ ```bash
32
+ pip install hypercache-kv
33
+ export HYPERCACHE_KEY=hck_...
34
+ ```
35
+
36
+ Get a key at [hypercache.ai](https://hypercache.ai).
37
+
38
+ ## The three gains in detail
39
+
40
+ **1. Skip repeated LLM calls.** Same prompt → cached response in milliseconds. Measured 7.6× faster on cache hit against real Phi-3-mini calls.
41
+
42
+ ```python
43
+ from hypercache.workflows import cached_completion
44
+
45
+ text, was_hit = cached_completion(
46
+ prompt="Translate to French: Hello",
47
+ compute=lambda p: call_openai(p),
48
+ )
49
+ ```
50
+
51
+ Or wrap your OpenAI / Anthropic client directly:
52
+
53
+ ```python
54
+ from openai import OpenAI
55
+ from hypercache.workflows import wrap_openai
56
+
57
+ client = wrap_openai(OpenAI())
58
+ resp = client.chat.completions.create(model="gpt-4o-mini", messages=[...])
59
+ ```
60
+
61
+ **2. Skip repeated GPU prefill.** For self-hosted inference (vLLM, llama-cpp, SGLang, TRT-LLM) with reused system prompts or RAG contexts. Measured 21.8× faster than cold prefill on Phi-3-mini at 1199 tokens. See [docs/02_skip_gpu_prefill.md](../../docs/02_skip_gpu_prefill.md).
62
+
63
+ **3. Prove what happened.** Every fingerprint chains algebraically to the prior one. The chain is mathematically verifiable, server-locked against forgery, and exportable for compliance.
64
+
65
+ ```python
66
+ from hypercache.workflows import audit_chain
67
+
68
+ with audit_chain() as chain:
69
+ r1 = chain.fingerprint(input_bytes)
70
+ r2 = chain.fingerprint(model_output)
71
+ r3 = chain.fingerprint(reviewer_note)
72
+ ```
73
+
74
+ ## Low-level API
75
+
76
+ ```python
77
+ import hypercache
78
+
79
+ result = hypercache.cache_lookup(b"some input bytes")
80
+ if result.hit:
81
+ print(result.value)
82
+ else:
83
+ hypercache.cache_put(result.fingerprint_hex, b"my expensive output", ttl=3600)
84
+
85
+ results = hypercache.cache_lookup_batch([b"in 1", b"in 2", b"in 3"])
86
+
87
+ fp = hypercache.fingerprint(b"any bytes")
88
+ print(fp.record_hex)
89
+ ```
90
+
91
+ ## What's open / what's closed
92
+
93
+ - **Open (this SDK):** thin HTTP wrappers + workflow templates. MIT licensed.
94
+ - **Closed:** the codec algorithm itself runs only inside our WASM binary on Cloudflare. You interact with it via HTTP.
95
+
96
+ The separation is intentional: the codec's mathematical integrity (forgery resistance for audit, byte-precision for caching) requires that the algorithm cannot be replicated or modified by anyone, including customers. This SDK contains zero codec code; PRs that try to add codec algorithm code locally will be closed.
97
+
98
+ ## License
99
+
100
+ MIT. See [LICENSE](./LICENSE).
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hypercache-kv"
7
+ version = "0.1.0"
8
+ description = "Skip repeated LLM calls, skip GPU prefill, prove what happened — a thin client for the Hyper Cache API."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Hyper Cache", email = "contact@hypercache.ai" }]
13
+ keywords = ["ai", "ml", "llm", "cache", "fingerprint", "audit", "inference"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.9",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
23
+ ]
24
+ # Zero runtime dependencies — stdlib only.
25
+
26
+ [project.optional-dependencies]
27
+ numpy = ["numpy>=1.20"]
28
+ torch = ["torch>=2.0"]
29
+ test = ["pytest>=7", "numpy>=1.20"]
30
+
31
+ [project.urls]
32
+ homepage = "https://hypercache.ai"
33
+ documentation = "https://hypercache.ai/docs"
34
+ repository = "https://github.com/Hyper-Cache/hypercache-kv"
35
+ issues = "https://github.com/Hyper-Cache/hypercache-kv/issues"
36
+
37
+ [tool.setuptools.packages.find]
38
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+