prismlib 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prismlib-0.3.0/PKG-INFO +529 -0
- prismlib-0.3.0/README.md +463 -0
- prismlib-0.3.0/prism/__init__.py +9 -0
- prismlib-0.3.0/prism/bridge/__init__.py +32 -0
- prismlib-0.3.0/prism/bridge/vector.py +704 -0
- prismlib-0.3.0/prism/cache/__init__.py +54 -0
- prismlib-0.3.0/prism/cache/cache.py +597 -0
- prismlib-0.3.0/prism/cache/embedder.py +438 -0
- prismlib-0.3.0/prism/cache/metrics.py +273 -0
- prismlib-0.3.0/prism/cache/store.py +370 -0
- prismlib-0.3.0/prism/ffi/__init__.py +34 -0
- prismlib-0.3.0/prism/ffi/bindings.py +841 -0
- prismlib-0.3.0/prism/lib/__init__.py +17 -0
- prismlib-0.3.0/prism/lib/fabric.py +567 -0
- prismlib-0.3.0/prism/lib/lang.py +531 -0
- prismlib-0.3.0/prism/lib/resonance.py +691 -0
- prismlib-0.3.0/prism/wrapper/__init__.py +49 -0
- prismlib-0.3.0/prism/wrapper/config.py +116 -0
- prismlib-0.3.0/prism/wrapper/daemon.py +236 -0
- prismlib-0.3.0/prism/wrapper/interceptor.py +524 -0
- prismlib-0.3.0/prism/wrapper/publisher.py +229 -0
- prismlib-0.3.0/prismlib.egg-info/PKG-INFO +529 -0
- prismlib-0.3.0/prismlib.egg-info/SOURCES.txt +34 -0
- prismlib-0.3.0/prismlib.egg-info/dependency_links.txt +1 -0
- prismlib-0.3.0/prismlib.egg-info/entry_points.txt +2 -0
- prismlib-0.3.0/prismlib.egg-info/requires.txt +54 -0
- prismlib-0.3.0/prismlib.egg-info/top_level.txt +1 -0
- prismlib-0.3.0/pyproject.toml +125 -0
- prismlib-0.3.0/setup.cfg +4 -0
- prismlib-0.3.0/tests/test_bridge_vector.py +245 -0
- prismlib-0.3.0/tests/test_cache.py +450 -0
- prismlib-0.3.0/tests/test_fabric.py +140 -0
- prismlib-0.3.0/tests/test_ffi.py +144 -0
- prismlib-0.3.0/tests/test_lang.py +175 -0
- prismlib-0.3.0/tests/test_resonance.py +194 -0
- prismlib-0.3.0/tests/test_wrapper.py +331 -0
prismlib-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,529 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: prismlib
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Tensor-native semantic cache and distributed data plane — PrismLib
|
|
5
|
+
Author-email: InsightIts <insightits.info@gmail.com>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/insightitsGit/prismlib
|
|
8
|
+
Project-URL: Repository, https://github.com/insightitsGit/prismlib
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/insightitsGit/prismlib/issues
|
|
10
|
+
Keywords: llm,cache,semantic,vector,tensor,grpc
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Intended Audience :: Developers
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: numpy>=1.26.0
|
|
22
|
+
Requires-Dist: onnxruntime>=1.17.0
|
|
23
|
+
Requires-Dist: onnx>=1.15.0
|
|
24
|
+
Provides-Extra: cache
|
|
25
|
+
Requires-Dist: sentence-transformers>=2.7.0; extra == "cache"
|
|
26
|
+
Provides-Extra: cache-openai
|
|
27
|
+
Requires-Dist: openai>=1.30.0; extra == "cache-openai"
|
|
28
|
+
Provides-Extra: cache-anthropic
|
|
29
|
+
Requires-Dist: anthropic>=0.28.0; extra == "cache-anthropic"
|
|
30
|
+
Requires-Dist: voyageai>=0.2.0; extra == "cache-anthropic"
|
|
31
|
+
Provides-Extra: cache-ollama
|
|
32
|
+
Requires-Dist: httpx>=0.27.0; extra == "cache-ollama"
|
|
33
|
+
Provides-Extra: fabric
|
|
34
|
+
Requires-Dist: grpcio>=1.62.0; extra == "fabric"
|
|
35
|
+
Requires-Dist: grpcio-tools>=1.62.0; extra == "fabric"
|
|
36
|
+
Requires-Dist: protobuf>=4.25.0; extra == "fabric"
|
|
37
|
+
Provides-Extra: wrapper
|
|
38
|
+
Requires-Dist: grpcio>=1.62.0; extra == "wrapper"
|
|
39
|
+
Requires-Dist: grpcio-tools>=1.62.0; extra == "wrapper"
|
|
40
|
+
Requires-Dist: protobuf>=4.25.0; extra == "wrapper"
|
|
41
|
+
Requires-Dist: asyncpg>=0.29.0; extra == "wrapper"
|
|
42
|
+
Requires-Dist: aiomysql>=0.2.0; extra == "wrapper"
|
|
43
|
+
Provides-Extra: vector
|
|
44
|
+
Requires-Dist: asyncpg>=0.29.0; extra == "vector"
|
|
45
|
+
Requires-Dist: chromadb>=0.5.0; extra == "vector"
|
|
46
|
+
Requires-Dist: qdrant-client>=1.9.0; extra == "vector"
|
|
47
|
+
Provides-Extra: all
|
|
48
|
+
Requires-Dist: sentence-transformers>=2.7.0; extra == "all"
|
|
49
|
+
Requires-Dist: openai>=1.30.0; extra == "all"
|
|
50
|
+
Requires-Dist: anthropic>=0.28.0; extra == "all"
|
|
51
|
+
Requires-Dist: voyageai>=0.2.0; extra == "all"
|
|
52
|
+
Requires-Dist: httpx>=0.27.0; extra == "all"
|
|
53
|
+
Requires-Dist: grpcio>=1.62.0; extra == "all"
|
|
54
|
+
Requires-Dist: grpcio-tools>=1.62.0; extra == "all"
|
|
55
|
+
Requires-Dist: protobuf>=4.25.0; extra == "all"
|
|
56
|
+
Requires-Dist: asyncpg>=0.29.0; extra == "all"
|
|
57
|
+
Requires-Dist: aiomysql>=0.2.0; extra == "all"
|
|
58
|
+
Requires-Dist: chromadb>=0.5.0; extra == "all"
|
|
59
|
+
Requires-Dist: qdrant-client>=1.9.0; extra == "all"
|
|
60
|
+
Provides-Extra: dev
|
|
61
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
62
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
63
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == "dev"
|
|
64
|
+
Requires-Dist: mypy>=1.10.0; extra == "dev"
|
|
65
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
66
|
+
|
|
67
|
+
# PrismLib
|
|
68
|
+
|
|
69
|
+
[](https://pypi.org/project/prismlib/)
|
|
70
|
+
[](https://pypi.org/project/prismlib/)
|
|
71
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
72
|
+
[](https://github.com/insightitsGit/prismlib)
|
|
73
|
+
|
|
74
|
+
**Tensor-native semantic cache and distributed data plane.**
|
|
75
|
+
|
|
76
|
+
Two products, one mathematical core:
|
|
77
|
+
|
|
78
|
+
| Product | Component | Deployed on | Install |
|
|
79
|
+
|---------|-----------|-------------|---------|
|
|
80
|
+
| **PrismCache** | In-process LLM cache | App node | `pip install "prismlib[cache]"` |
|
|
81
|
+
| **PrismDriver** | **Server Wrapper** (daemon) | **DB node** | `pip install "prismlib[wrapper]"` |
|
|
82
|
+
| **PrismDriver** | **DLL Driver** (in-process) | **App node** | `pip install "prismlib[fabric]"` |
|
|
83
|
+
|
|
84
|
+
PrismDriver is a two-node system: the **Server Wrapper** runs as an OS daemon on the same machine as your database, intercepts WAL/binlog changes, vectorizes rows, and streams them over CHORUS Fabric to the **DLL Driver** on your app server. The driver keeps a local PrismResonance index warm so reads never leave the process.
|
|
85
|
+
|
|
86
|
+
Built on two open-source InsightIts libraries:
|
|
87
|
+
- **[PrismResonance](https://github.com/insightitsGit/prismresonance)** — the wave-memory similarity engine powering every cache lookup and local vector index
|
|
88
|
+
- **[CHORUS Fabric](https://github.com/insightitsGit/chorus_fabric)** — the gRPC binary streaming protocol that carries encrypted float32 tensor frames from the Server Wrapper to the DLL Driver
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Installation
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
# Semantic LLM cache only
|
|
96
|
+
pip install "prismlib[cache]"
|
|
97
|
+
|
|
98
|
+
# With OpenAI embeddings
|
|
99
|
+
pip install "prismlib[cache,cache-openai]"
|
|
100
|
+
|
|
101
|
+
# With Anthropic/Voyage embeddings
|
|
102
|
+
pip install "prismlib[cache,cache-anthropic]"
|
|
103
|
+
|
|
104
|
+
# With Ollama (local models)
|
|
105
|
+
pip install "prismlib[cache,cache-ollama]"
|
|
106
|
+
|
|
107
|
+
# DB driver (app node)
|
|
108
|
+
pip install "prismlib[fabric]"
|
|
109
|
+
|
|
110
|
+
# Server Wrapper daemon (DB node — Linux/macOS)
|
|
111
|
+
pip install "prismlib[wrapper]"
|
|
112
|
+
prism-wrapper --config /etc/prism/wrapper.toml
|
|
113
|
+
|
|
114
|
+
# Everything
|
|
115
|
+
pip install "prismlib[all]"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Use Cases
|
|
121
|
+
|
|
122
|
+
### PrismCache
|
|
123
|
+
|
|
124
|
+
#### Drop-in LLM response cache
|
|
125
|
+
|
|
126
|
+
Save 60-80% of LLM API calls by serving semantically identical queries from cache.
|
|
127
|
+
Paraphrases hit the cache — "How do I reset my password?" and "I forgot my password, help" return the same answer without a second LLM call.
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from prism.cache import PrismCache
|
|
131
|
+
|
|
132
|
+
cache = PrismCache.build(tenant_id="my-app", llm_model="gpt-4o")
|
|
133
|
+
|
|
134
|
+
def ask(question: str) -> str:
|
|
135
|
+
return cache.get_or_call(
|
|
136
|
+
query=question,
|
|
137
|
+
call_fn=lambda: openai_client.chat.completions.create(
|
|
138
|
+
model="gpt-4o",
|
|
139
|
+
messages=[{"role": "user", "content": question}],
|
|
140
|
+
).choices[0].message.content,
|
|
141
|
+
)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
#### Multi-tenant SaaS — isolated caches per customer
|
|
145
|
+
|
|
146
|
+
Each tenant gets a mathematically isolated cache space (JL projection seeded by tenant ID).
|
|
147
|
+
One customer's cached answers never bleed into another's.
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from prism.cache import PrismCache
|
|
151
|
+
|
|
152
|
+
def get_cache(tenant_id: str) -> PrismCache:
|
|
153
|
+
return PrismCache.build(tenant_id=tenant_id, llm_model="gpt-4o-mini")
|
|
154
|
+
|
|
155
|
+
# Tenant A and tenant B share no cache state
|
|
156
|
+
cache_a = get_cache("acme-corp")
|
|
157
|
+
cache_b = get_cache("globex-inc")
|
|
158
|
+
|
|
159
|
+
answer = cache_a.get_or_call(query="What is my plan limit?", call_fn=llm_call)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
#### FastAPI / Django middleware — transparent caching
|
|
163
|
+
|
|
164
|
+
Wrap your existing LLM endpoint without changing any business logic.
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
# FastAPI
|
|
168
|
+
from fastapi import FastAPI, Request
|
|
169
|
+
from prism.cache import PrismCache
|
|
170
|
+
|
|
171
|
+
app = FastAPI()
|
|
172
|
+
cache = PrismCache.build(tenant_id="api", llm_model="gpt-4o")
|
|
173
|
+
|
|
174
|
+
@app.post("/chat")
|
|
175
|
+
async def chat(request: Request):
|
|
176
|
+
body = await request.json()
|
|
177
|
+
question = body["message"]
|
|
178
|
+
answer = await cache.aget_or_call(
|
|
179
|
+
query=question,
|
|
180
|
+
call_fn=lambda: llm_client.ask(question),
|
|
181
|
+
)
|
|
182
|
+
return {"answer": answer}
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
# Django — add to MIDDLEWARE in settings.py
|
|
187
|
+
# prism/middleware.py
|
|
188
|
+
from prism.cache import PrismCache
|
|
189
|
+
|
|
190
|
+
_cache = PrismCache.build(tenant_id="django-app", llm_model="gpt-4o")
|
|
191
|
+
|
|
192
|
+
class PrismCacheMiddleware:
|
|
193
|
+
def __init__(self, get_response):
|
|
194
|
+
self.get_response = get_response
|
|
195
|
+
|
|
196
|
+
def __call__(self, request):
|
|
197
|
+
return self.get_response(request)
|
|
198
|
+
|
|
199
|
+
def process_llm_query(self, question: str, call_fn) -> str:
|
|
200
|
+
return _cache.get_or_call(query=question, call_fn=call_fn)
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
#### Async batch queries
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
import asyncio
|
|
207
|
+
from prism.cache import PrismCache
|
|
208
|
+
|
|
209
|
+
cache = PrismCache.build(tenant_id="batch", llm_model="gpt-4o-mini")
|
|
210
|
+
|
|
211
|
+
async def process_batch(questions: list[str]) -> list[str]:
|
|
212
|
+
tasks = [
|
|
213
|
+
cache.aget_or_call(query=q, call_fn=lambda q=q: llm_call(q))
|
|
214
|
+
for q in questions
|
|
215
|
+
]
|
|
216
|
+
return await asyncio.gather(*tasks)
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
#### Cost estimation
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
from prism.cache import PrismCache
|
|
223
|
+
|
|
224
|
+
cache = PrismCache.build(tenant_id="finance", llm_model="gpt-4o")
|
|
225
|
+
|
|
226
|
+
# After processing queries...
|
|
227
|
+
metrics = cache.metrics()
|
|
228
|
+
print(f"Hit rate: {metrics.hit_rate:.0%}")
|
|
229
|
+
print(f"Tokens saved: {metrics.tokens_saved:,}")
|
|
230
|
+
print(f"Cost saved today: ${metrics.cost_saved_usd:.2f}")
|
|
231
|
+
print(f"Projected monthly: ${metrics.cost_saved_usd * 30:.0f}")
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
### PrismDriver
|
|
237
|
+
|
|
238
|
+
PrismDriver has two components that work together. Install each on the right machine.
|
|
239
|
+
|
|
240
|
+
**On the DB node — Server Wrapper**
|
|
241
|
+
|
|
242
|
+
The Server Wrapper is an OS daemon that sits next to your database. It reads WAL/binlog changes, vectorizes rows using `RowVectorizer`, encrypts them with `TensorCipher` (via CHORUS Fabric), and streams float32 frames to every connected DLL Driver.
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
# Install on the DB node (Linux or macOS)
|
|
246
|
+
pip install "prismlib[wrapper]"
|
|
247
|
+
|
|
248
|
+
# Configure and start
|
|
249
|
+
prism-wrapper --config /etc/prism/wrapper.toml
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
```toml
|
|
253
|
+
# /etc/prism/wrapper.toml
|
|
254
|
+
[database]
|
|
255
|
+
flavor = "postgresql"
|
|
256
|
+
dsn = "postgresql://user:pass@localhost/mydb"
|
|
257
|
+
|
|
258
|
+
[chorus]
|
|
259
|
+
listen_port = 50051
|
|
260
|
+
tenant_id = "products-service"
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
Supported databases: PostgreSQL (WAL / wal2json), MySQL (binlog), CockroachDB (EXPERIMENTAL CHANGEFEED), TiDB (push model).
|
|
264
|
+
|
|
265
|
+
**On the app node — DLL Driver**
|
|
266
|
+
|
|
267
|
+
The DLL Driver is an in-process library that replaces your DB connection string. On startup it connects to the Server Wrapper, subscribes to the CHORUS Fabric stream, and keeps a local PrismResonance index warm. All reads hit the in-process index — no network round-trip, sub-millisecond latency.
|
|
268
|
+
|
|
269
|
+
```bash
|
|
270
|
+
# Install on the app node
|
|
271
|
+
pip install "prismlib[fabric]"
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
#### Replace your DB connection string
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
# Before
|
|
278
|
+
import psycopg2
|
|
279
|
+
conn = psycopg2.connect("postgresql://user:secret@db-host:5432/mydb")
|
|
280
|
+
|
|
281
|
+
# After — no password, no hostname in app config
|
|
282
|
+
from prism.ffi import PrismDriver, DriverConfig
|
|
283
|
+
|
|
284
|
+
async with PrismDriver(DriverConfig(wrapper_host="db-proxy-1")) as driver:
|
|
285
|
+
results = await driver.query(
|
|
286
|
+
embedding=my_embedding_vector,
|
|
287
|
+
top_k=5,
|
|
288
|
+
threshold=0.85,
|
|
289
|
+
)
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
#### Sub-millisecond row lookups via local cache
|
|
293
|
+
|
|
294
|
+
The driver keeps a local PrismResonance cache warm via a background WAL subscription.
|
|
295
|
+
Reads never touch the DB — they hit the in-process float32 index.
|
|
296
|
+
|
|
297
|
+
```python
|
|
298
|
+
from prism.ffi import PrismDriver, DriverConfig
|
|
299
|
+
import numpy as np
|
|
300
|
+
|
|
301
|
+
config = DriverConfig(
|
|
302
|
+
wrapper_host="10.0.1.50",
|
|
303
|
+
wrapper_port=50051,
|
|
304
|
+
tenant_id="products-service",
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
async with PrismDriver(config) as driver:
|
|
308
|
+
# Typical hit: < 1ms, no network round-trip
|
|
309
|
+
query_vec = np.array([...], dtype=np.float32)
|
|
310
|
+
matches = await driver.query(embedding=query_vec, top_k=10)
|
|
311
|
+
for m in matches:
|
|
312
|
+
print(f"{m.row_id} score={m.score:.3f} {m.text_repr}")
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
#### Write through to DB
|
|
316
|
+
|
|
317
|
+
```python
|
|
318
|
+
async with PrismDriver(config) as driver:
|
|
319
|
+
ack = await driver.write(
|
|
320
|
+
row_id="product-42",
|
|
321
|
+
data={"name": "Widget Pro", "price": 29.99, "stock": 150},
|
|
322
|
+
)
|
|
323
|
+
print(f"Written: event_id={ack.event_id}")
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
#### Go, C#, PHP, Java — same DLL, native bindings
|
|
327
|
+
|
|
328
|
+
```go
|
|
329
|
+
// Go
|
|
330
|
+
import prism "github.com/insightitsGit/prismlib/go"
|
|
331
|
+
|
|
332
|
+
driver, _ := prism.Connect("db-proxy-1:50051", "my-tenant")
|
|
333
|
+
defer driver.Close()
|
|
334
|
+
results, _ := driver.Query(embedding, prism.QueryOpts{TopK: 5, Threshold: 0.85})
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
```csharp
|
|
338
|
+
// C#
|
|
339
|
+
using InsightIts.Prism;
|
|
340
|
+
|
|
341
|
+
await using var driver = new PrismDriver("db-proxy-1:50051", tenantId: "my-tenant");
|
|
342
|
+
await driver.ConnectAsync();
|
|
343
|
+
var results = await driver.QueryAsync(embedding, topK: 5, threshold: 0.85f);
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
```php
|
|
347
|
+
// PHP 8.0+
|
|
348
|
+
$driver = new PrismDriver('db-proxy-1', 50051, 'my-tenant');
|
|
349
|
+
$driver->connect();
|
|
350
|
+
$results = $driver->query($embedding, topK: 5, threshold: 0.85);
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
---
|
|
354
|
+
|
|
355
|
+
## Architecture
|
|
356
|
+
|
|
357
|
+
```
|
|
358
|
+
┌─ DB Node ──────────────────────────────────────────────────────┐
|
|
359
|
+
│ PostgreSQL / MySQL / CockroachDB / TiDB │
|
|
360
|
+
│ │ WAL / binlog / changefeed │
|
|
361
|
+
│ ┌────▼────────────────────────────────────────────────────┐ │
|
|
362
|
+
│ │ prism-wrapper (pip install "prismlib[wrapper]") │ │
|
|
363
|
+
│ │ RowVectorizer → TensorCipher (V_enc = V @ K) │ │
|
|
364
|
+
│ │ → HMAC-SHA256 watermark → CHORUSPublisher │ │
|
|
365
|
+
│ └────────────────────────┬────────────────────────────────┘ │
|
|
366
|
+
└───────────────────────────┼────────────────────────────────────┘
|
|
367
|
+
│ CHORUS Fabric (gRPC, port 50051)
|
|
368
|
+
│ encrypted float32 frames
|
|
369
|
+
┌─ App Node ────────────────┼────────────────────────────────────┐
|
|
370
|
+
│ ┌────────────────────────▼───────────────────────────────┐ │
|
|
371
|
+
│ │ PrismDriver DLL (pip install "prismlib[fabric]") │ │
|
|
372
|
+
│ │ Subscribe loop → decrypt → PrismResonance index │ │
|
|
373
|
+
│ └──────────────────────────────────────────┬─────────────┘ │
|
|
374
|
+
│ │ sub-ms query │
|
|
375
|
+
│ ┌──────────────────────────────────────────▼─────────────┐ │
|
|
376
|
+
│ │ Your Application │ │
|
|
377
|
+
│ │ ┌──────────────────┐ ┌───────────────────────────┐ │ │
|
|
378
|
+
│ │ │ PrismCache │ │ PrismDriver │ │ │
|
|
379
|
+
│ │ │ LLM cache │ │ local PrismResonance │ │ │
|
|
380
|
+
│ │ │ pip install │ │ (no DB round-trip) │ │ │
|
|
381
|
+
│ │ │ prismlib[cache] │ │ │ │ │
|
|
382
|
+
│ │ └──────────────────┘ └───────────────────────────┘ │ │
|
|
383
|
+
│ └─────────────────────────────────────────────────────────┘ │
|
|
384
|
+
└────────────────────────────────────────────────────────────────┘
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
---
|
|
388
|
+
|
|
389
|
+
## Benchmark
|
|
390
|
+
|
|
391
|
+
### PrismCache — semantic LLM cache
|
|
392
|
+
|
|
393
|
+
Live results from Azure Container App (`westus2`, 1 vCPU / 2 GiB, mock LLM baseline):
|
|
394
|
+
|
|
395
|
+
| Scenario | Users | Duration | Hit rate | Queries | Tokens saved | Monthly est. |
|
|
396
|
+
|----------|-------|----------|----------|---------|-------------|-------------|
|
|
397
|
+
| Light | 20 | 60s | **91.0%** | 5,936 | 1,374,464 | **$594** |
|
|
398
|
+
| Mixed | 50 | 300s | **95.9%** | 6,973 | 1,673,216 | **$723** |
|
|
399
|
+
|
|
400
|
+
> Numbers use a mock LLM (80ms sleep). With real GPT-4o calls (1–3s), latency speedup is 4–13×; token savings are identical.
|
|
401
|
+
|
|
402
|
+
### PrismDriver — two-node baseline vs local index
|
|
403
|
+
|
|
404
|
+
Live two-node benchmark (Azure Container Apps `westus2`, 30 users × 60s per phase):
|
|
405
|
+
|
|
406
|
+
| Phase | Path | Avg latency | Queries |
|
|
407
|
+
|-------|------|-------------|---------|
|
|
408
|
+
| **Baseline** (no driver) | App → DB node, network | **142.8 ms** | 3,864 |
|
|
409
|
+
| **Driver** (local index) | App → in-process PrismResonance | **2.0 ms** | 1,479 |
|
|
410
|
+
|
|
411
|
+
**70.7× faster · 98.6% latency reduction**
|
|
412
|
+
|
|
413
|
+
The 98.6% reduction is a direct result of CHORUS Fabric doing its job. The subscription loop streamed 11,000 rows at **26,000 rows/s** from the DB node into the local PrismResonance index before the load test began. By the time the first `/driver/query` hit arrived, there were zero network hops — the answer was already in-process. This is what CHORUS Fabric was designed for: getting tensor data to where the query is, before the query arrives.
|
|
414
|
+
|
|
415
|
+
```bash
|
|
416
|
+
# Two-node benchmark (requires both container apps running)
|
|
417
|
+
python benchmark/load/run_driver_benchmark.py \
|
|
418
|
+
--app-url https://prism-benchmark.nicestone-720c6a9b.westus2.azurecontainerapps.io \
|
|
419
|
+
--db-url https://prism-wrapper-sim.nicestone-720c6a9b.westus2.azurecontainerapps.io \
|
|
420
|
+
--users 30 --duration 60
|
|
421
|
+
|
|
422
|
+
# PrismCache load test
|
|
423
|
+
python benchmark/load/run_benchmark.py \
|
|
424
|
+
--host https://prism-benchmark.nicestone-720c6a9b.westus2.azurecontainerapps.io \
|
|
425
|
+
--scenario mixed
|
|
426
|
+
```
|
|
427
|
+
|
|
428
|
+
See [`benchmark/`](benchmark/) for full results JSON, Locust CSV files, and the Azure deploy script.
|
|
429
|
+
|
|
430
|
+
---
|
|
431
|
+
|
|
432
|
+
## Core libraries
|
|
433
|
+
|
|
434
|
+
PrismLib is built on two InsightIts open-source libraries. You can use them directly if you need lower-level access.
|
|
435
|
+
|
|
436
|
+
### PrismResonance
|
|
437
|
+
|
|
438
|
+
> **[github.com/insightitsGit/prismresonance](https://github.com/insightitsGit/prismresonance)** · `pip install prismresonance`
|
|
439
|
+
|
|
440
|
+
The wave-memory similarity engine. Every cache lookup and local vector index in PrismLib goes through PrismResonance.
|
|
441
|
+
|
|
442
|
+
How it works:
|
|
443
|
+
- Receives a float32 embedding vector
|
|
444
|
+
- Johnson-Lindenstrauss reduces it to 64 dimensions using a projection matrix seeded by `SHA-256(tenant_id)` — this is what gives each tenant mathematically isolated address space
|
|
445
|
+
- Computes similarity as wave interference (cosine in projected space) in three lock-free phases: snapshot → ONNX MatMul → rank
|
|
446
|
+
- Returns ranked candidates in sub-millisecond time entirely in-process
|
|
447
|
+
|
|
448
|
+
PrismCache wraps this for LLM response caching. PrismDriver's local replica is a PrismResonance index kept warm by WAL streaming.
|
|
449
|
+
|
|
450
|
+
```python
|
|
451
|
+
from prismresonance import PrismProjector, WaveIndex
|
|
452
|
+
|
|
453
|
+
projector = PrismProjector(dim=64, tenant_id="my-tenant")
|
|
454
|
+
index = WaveIndex(projector)
|
|
455
|
+
|
|
456
|
+
index.add(vector=my_embedding, payload={"row_id": "product-1", "text": "Widget"})
|
|
457
|
+
results = index.query(vector=query_embedding, top_k=5, threshold=0.85)
|
|
458
|
+
```
|
|
459
|
+
|
|
460
|
+
### CHORUS Fabric
|
|
461
|
+
|
|
462
|
+
> **[github.com/insightitsGit/chorus_fabric](https://github.com/insightitsGit/chorus_fabric)** · `pip install chorus-fabric`
|
|
463
|
+
|
|
464
|
+
The secure gRPC binary streaming protocol for machine-to-machine tensor communication. PrismDriver uses CHORUS Fabric as its transport layer between the server wrapper on the DB node and the DLL driver on the app node.
|
|
465
|
+
|
|
466
|
+
How it works:
|
|
467
|
+
- `prism-wrapper` (DB node) vectorizes WAL row events via `RowVectorizer`, encrypts them with `TensorCipher` (`V_enc = V @ K`), appends an HMAC-SHA256 watermark, and publishes batches of raw float32 frames
|
|
468
|
+
- `PrismDriver` (app node) opens a persistent `WrapperService.Subscribe()` gRPC stream, receives encrypted frames, decrypts, and feeds them into the local PrismResonance index
|
|
469
|
+
- Transport is pure binary float32 over gRPC server-streaming — no JSON serialization, no REST overhead
|
|
470
|
+
- The `WrapperService` proto also exposes `Query`, `Write`, `Health`, and `Hello` RPCs for direct interaction
|
|
471
|
+
|
|
472
|
+
```python
|
|
473
|
+
from chorus_fabric import CHORUSPublisher, DriverEndpoint
|
|
474
|
+
|
|
475
|
+
publisher = CHORUSPublisher(config)
|
|
476
|
+
publisher.add_driver(DriverEndpoint(host="10.0.1.50", port=50051, tenant_id="prod"))
|
|
477
|
+
await publisher.run(event_queue) # streams WAL events to all connected drivers
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
CHORUS Fabric is the same protocol used in the CHORUS M2M system — InsightIts' 4-container gRPC topology for tensor communication between AI agents. The 98.6% latency reduction in the PrismDriver benchmark is direct proof that the protocol works at production scale: 11,000 rows streamed at 26,000 rows/s across Azure inter-container networking, then served locally at 2ms.
|
|
481
|
+
|
|
482
|
+
---
|
|
483
|
+
|
|
484
|
+
## Enterprise
|
|
485
|
+
|
|
486
|
+
PrismLib is open source (Apache 2.0) and free to use. If your team needs any of the following, contact us for enterprise pricing:
|
|
487
|
+
|
|
488
|
+
- **On-premises deployment support** — air-gapped installs, hardened Docker images, SOC 2 documentation
|
|
489
|
+
- **SLA-backed support** — guaranteed response times, incident escalation, dedicated Slack channel
|
|
490
|
+
- **Custom embedding model integration** — fine-tuned domain-specific embedders for higher hit rates in specialized domains (legal, medical, finance, code)
|
|
491
|
+
- **Multi-region CHORUS Fabric topology** — active-active DB node clusters, cross-region WAL fan-out, geo-aware driver routing
|
|
492
|
+
- **Audit logging and compliance exports** — per-query access logs, tenant isolation attestation reports, GDPR data lineage
|
|
493
|
+
- **Professional services** — architecture review, migration from Redis/GPTCache, custom RowVectorizer schemas
|
|
494
|
+
|
|
495
|
+
**Contact: [insightits.info@gmail.com](mailto:insightits.info@gmail.com)**
|
|
496
|
+
**GitHub: [github.com/insightitsGit/prismlib](https://github.com/insightitsGit/prismlib)**
|
|
497
|
+
|
|
498
|
+
---
|
|
499
|
+
|
|
500
|
+
## Publishing to PyPI
|
|
501
|
+
|
|
502
|
+
**It is one package** — `prismlib` — published once. The wrapper, driver, and cache are all extras of the same package. Users install what they need:
|
|
503
|
+
|
|
504
|
+
```bash
|
|
505
|
+
pip install "prismlib[cache]" # PrismCache only
|
|
506
|
+
pip install "prismlib[wrapper]" # Server Wrapper (DB node)
|
|
507
|
+
pip install "prismlib[fabric]" # DLL Driver (App node)
|
|
508
|
+
pip install "prismlib[all]" # Everything
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
**To publish a new version:**
|
|
512
|
+
|
|
513
|
+
```bash
|
|
514
|
+
# 1. Bump version in pyproject.toml (currently 0.3.0)
|
|
515
|
+
# 2. Build the distribution
|
|
516
|
+
pip install build twine
|
|
517
|
+
python -m build
|
|
518
|
+
|
|
519
|
+
# 3. Upload to PyPI (use your token from pypi.org/manage/account/token/)
|
|
520
|
+
python -m twine upload dist/* --username __token__ --password pypi-YOUR_TOKEN
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
That's it. One upload covers all three install variants — PyPI resolves the extras automatically.
|
|
524
|
+
|
|
525
|
+
---
|
|
526
|
+
|
|
527
|
+
## License
|
|
528
|
+
|
|
529
|
+
Apache 2.0 — InsightIts © 2026
|