prismcortex 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,381 @@
1
+ """Reference adapters — real, in-memory implementations of every port.
2
+
3
+ These are NOT mocks: the embeddings use the hashing trick (deterministic, no
4
+ randomness), the graph is a genuine bitemporal store, the cache is content-addressed
5
+ and optionally durable, and resonance applies real weight/decay. They make the engine
6
+ run and test today; the production adapters wrap the real Prism packages behind the
7
+ same ports.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ import json
13
+ import re
14
+ from pathlib import Path
15
+ from typing import Optional
16
+
17
+ import numpy as np
18
+
19
+ from ..determinism import graph_content_hash
20
+ from ..labels import canonical_label, token_overlap
21
+ from ..models import (
22
+ AssetPointer,
23
+ Band,
24
+ DeltaOp,
25
+ Edge,
26
+ GraphVersion,
27
+ Node,
28
+ Operation,
29
+ StateDelta,
30
+ Subgraph,
31
+ utcnow,
32
+ )
33
+
34
+ _WORD = re.compile(r"[a-z0-9]+")
35
+
36
+
37
+ def _stable_hash_int(token: str, nbytes: int = 4) -> int:
38
+ # hashlib (not builtin hash, which is per-process salted) → cross-run deterministic.
39
+ return int.from_bytes(hashlib.blake2b(token.encode(), digest_size=nbytes).digest(), "big")
40
+
41
+
42
+ # --------------------------------------------------------------------------- #
43
+ # PrismLang stand-in: deterministic feature-hashing projector.
44
+ # --------------------------------------------------------------------------- #
45
+ class HashingProjector:
46
+ def __init__(self, dim: int = 384) -> None:
47
+ self.dim = dim
48
+
49
+ def embed(self, text: str) -> list[float]:
50
+ v = np.zeros(self.dim, dtype=np.float32)
51
+ for tok in _WORD.findall(text.lower()):
52
+ d = hashlib.blake2b(tok.encode(), digest_size=8).digest()
53
+ idx = int.from_bytes(d[:4], "big") % self.dim
54
+ v[idx] += 1.0 if (d[4] & 1) else -1.0
55
+ n = float(np.linalg.norm(v))
56
+ return (v / n).tolist() if n > 0 else v.tolist()
57
+
58
+ def classify(self, text: str) -> str:
59
+ return "general"
60
+
61
+
62
+ # --------------------------------------------------------------------------- #
63
+ # PrismRAG stand-in: bitemporal in-memory graph store.
64
+ # --------------------------------------------------------------------------- #
65
+ class InMemoryGraphStore:
66
+ def __init__(self) -> None:
67
+ self._nodes: dict[str, Node] = {}
68
+ self._edges: dict[str, Edge] = {}
69
+ self._label_index: dict[str, str] = {} # lower(label) -> node_id
70
+ self._version = 0
71
+ # cached unit-normalized embedding matrix for vectorized retrieval (rebuilt lazily)
72
+ self._emb_ids: list[str] = []
73
+ self._emb_unit = None # np.ndarray [n_nodes, dim]
74
+ self._matrix_dirty = True
75
+ self._tombstones: list[dict] = [] # audit log of erasures (content not retained)
76
+
77
+ def _ensure_matrix(self) -> None:
78
+ if not self._matrix_dirty:
79
+ return
80
+ ids, vecs = [], []
81
+ for nid, node in self._nodes.items():
82
+ if node.embedding:
83
+ ids.append(nid)
84
+ vecs.append(node.embedding)
85
+ self._emb_ids = ids
86
+ if vecs:
87
+ m = np.asarray(vecs, dtype=np.float32)
88
+ norms = np.linalg.norm(m, axis=1, keepdims=True)
89
+ norms[norms == 0] = 1.0
90
+ self._emb_unit = m / norms
91
+ else:
92
+ self._emb_unit = None
93
+ self._matrix_dirty = False
94
+
95
+ # -- reads --
96
+ def find_node_by_label(self, label: str) -> Optional[str]:
97
+ key = label.strip().lower()
98
+ if key in self._label_index:
99
+ return self._label_index[key]
100
+ return self._label_index.get(canonical_label(label))
101
+
102
+ def node_label(self, node_id: str) -> Optional[str]:
103
+ n = self._nodes.get(node_id)
104
+ return n.label if n else None
105
+
106
+ def find_node_by_token_overlap(self, label: str, threshold: float = 0.34) -> Optional[str]:
107
+ """Resolve a paraphrased subject when embeddings differ ('product launch' vs 'launch')."""
108
+ best_id, best = None, threshold
109
+ for nid, node in self._nodes.items():
110
+ score = token_overlap(label, node.label)
111
+ if score > best:
112
+ best, best_id = score, nid
113
+ return best_id
114
+
115
+ def find_nodes_by_label_overlap(self, text: str, threshold: float = 0.34, limit: int = 4) -> list[str]:
116
+ scored = []
117
+ for nid, node in self._nodes.items():
118
+ score = token_overlap(text, node.label)
119
+ if score >= threshold:
120
+ scored.append((score, nid))
121
+ scored.sort(key=lambda x: (-x[0], x[1]))
122
+ return [nid for _, nid in scored[:limit]]
123
+
124
+ def current_edge(self, src: str, relation: str) -> Optional[Edge]:
125
+ for e in self._edges.values():
126
+ if e.is_current and e.src == src and e.relation == relation:
127
+ return e
128
+ return None
129
+
130
+ def current_edges_from(self, src: str) -> list[Edge]:
131
+ return [e for e in self._edges.values() if e.is_current and e.src == src]
132
+
133
+ def find_similar_node(self, embedding: list[float], threshold: float = 0.88) -> Optional[str]:
134
+ """Entity resolution: the existing node whose embedding is closest to `embedding`,
135
+ if above `threshold`. Lets a paraphrased subject ("the budget" vs "deploy budget")
136
+ resolve to the same node without relying on the LLM to canonicalize perfectly."""
137
+ if not embedding:
138
+ return None
139
+ self._ensure_matrix()
140
+ if self._emb_unit is None:
141
+ return None
142
+ q = np.asarray(embedding, dtype=np.float32)
143
+ qn = float(np.linalg.norm(q)) or 1.0
144
+ sims = self._emb_unit @ (q / qn) # rows are unit-normalized → cosine
145
+ i = int(np.argmax(sims))
146
+ return self._emb_ids[i] if float(sims[i]) >= threshold else None
147
+
148
+ def retrieve(self, embedding: list[float], k: int = 8) -> Subgraph:
149
+ if not self._nodes:
150
+ return Subgraph()
151
+ self._ensure_matrix()
152
+ if self._emb_unit is None:
153
+ return Subgraph()
154
+ q = np.asarray(embedding, dtype=np.float32)
155
+ qn = float(np.linalg.norm(q)) or 1.0
156
+ sims = self._emb_unit @ (q / qn)
157
+ kk = min(k, len(self._emb_ids))
158
+ # stable top-k → deterministic retrieval set (subgraph is canonically sorted in the key)
159
+ order = np.argsort(-sims, kind="stable")[:kk]
160
+ chosen = {self._emb_ids[int(i)] for i in order}
161
+
162
+ edges = [e for e in self._edges.values() if e.is_current and (e.src in chosen or e.dst in chosen)]
163
+ # pull in neighbor nodes so the subgraph is self-contained and renderable.
164
+ for e in edges:
165
+ chosen.add(e.src)
166
+ chosen.add(e.dst)
167
+ nodes = [self._nodes[n] for n in chosen if n in self._nodes]
168
+ return Subgraph(nodes=nodes, edges=edges)
169
+
170
+ def version(self) -> GraphVersion:
171
+ return GraphVersion(version=self._version, content_hash=self._content_hash())
172
+
173
+ # -- write (the only mutation entry point) --
174
+ def apply(self, delta: StateDelta) -> GraphVersion:
175
+ if delta.is_empty:
176
+ return self.version()
177
+ now = utcnow()
178
+ for op in delta.ops:
179
+ if op.operation is Operation.ASSIMILATE:
180
+ if op.node is not None:
181
+ self._nodes[op.node.id] = op.node
182
+ key = op.node.label.strip().lower()
183
+ self._label_index[key] = op.node.id
184
+ canon = canonical_label(op.node.label)
185
+ if canon not in self._label_index:
186
+ self._label_index[canon] = op.node.id
187
+ self._matrix_dirty = True # invalidate the cached embedding matrix
188
+ if op.edge is not None:
189
+ self._edges[op.edge.id] = op.edge
190
+ elif op.operation is Operation.ACCOMMODATE:
191
+ if op.target_id and op.target_id in self._edges:
192
+ self._edges[op.target_id].valid_to = now # invalidate, never delete
193
+ if op.edge is not None:
194
+ self._edges[op.edge.id] = op.edge
195
+ elif op.operation is Operation.REINFORCE:
196
+ self._reinforce(op.target_id)
197
+ elif op.operation is Operation.PRUNE:
198
+ if op.target_id and op.target_id in self._edges:
199
+ self._edges[op.target_id].valid_to = now
200
+ self._version += 1
201
+ return self.version()
202
+
203
+ def prune_to(self, max_current_edges: int) -> int:
204
+ """Bound the active working set: soft-invalidate the coldest (lowest-weight, then
205
+ oldest) current edges until at most `max_current_edges` remain. Invalidated facts
206
+ are retained (valid_to set) for audit/time-travel, just out of the recall path."""
207
+ current = [e for e in self._edges.values() if e.is_current]
208
+ if len(current) <= max_current_edges:
209
+ return 0
210
+ now = utcnow()
211
+ current.sort(key=lambda e: (e.weight, e.recorded_at)) # coldest first
212
+ for e in current[: len(current) - max_current_edges]:
213
+ e.valid_to = now
214
+ return len(current) - max_current_edges
215
+
216
+ def _reinforce(self, target_id: Optional[str]) -> None:
217
+ if not target_id:
218
+ return
219
+ if target_id in self._nodes:
220
+ self._nodes[target_id].weight = min(self._nodes[target_id].weight + 0.5, 100.0)
221
+ elif target_id in self._edges:
222
+ self._edges[target_id].weight = min(self._edges[target_id].weight + 0.5, 100.0)
223
+
224
+ def _content_hash(self) -> str:
225
+ current = sorted(
226
+ f"{e.src}|{e.relation}|{e.dst}" for e in self._edges.values() if e.is_current
227
+ )
228
+ return graph_content_hash(json.dumps(current, separators=(",", ":")))
229
+
230
+ # -- introspection helpers (used by the audit/time-travel + memory-savings demo) --
231
+ def history(self, src: str, relation: str) -> list[Edge]:
232
+ return [e for e in self._edges.values() if e.src == src and e.relation == relation]
233
+
234
+ def all_edges(self) -> list[Edge]:
235
+ return list(self._edges.values())
236
+
237
+ def all_nodes(self) -> list[Node]:
238
+ return list(self._nodes.values())
239
+
240
+ def forget_source(self, source_id: str) -> dict:
241
+ """Right-to-be-forgotten: hard-remove every fact (and now-orphaned node) derived
242
+ from `source_id`. The *content* is erased (GDPR); only a tombstone receipt is kept
243
+ for audit ("N facts from source X erased at T")."""
244
+ edge_ids = [eid for eid, e in self._edges.items()
245
+ if e.provenance and e.provenance.source_id == source_id]
246
+ for eid in edge_ids:
247
+ del self._edges[eid]
248
+ referenced = {e.src for e in self._edges.values()} | {e.dst for e in self._edges.values()}
249
+ node_ids = [nid for nid, n in self._nodes.items()
250
+ if n.provenance and n.provenance.source_id == source_id and nid not in referenced]
251
+ for nid in node_ids:
252
+ del self._nodes[nid]
253
+ self._label_index = {}
254
+ for nid, n in self._nodes.items():
255
+ self._label_index[n.label.strip().lower()] = nid
256
+ canon = canonical_label(n.label)
257
+ if canon not in self._label_index:
258
+ self._label_index[canon] = nid
259
+ self._matrix_dirty = True
260
+ if edge_ids or node_ids:
261
+ self._version += 1
262
+ receipt = {"source_id": source_id, "edges_erased": len(edge_ids),
263
+ "nodes_erased": len(node_ids), "at": utcnow().isoformat()}
264
+ self._tombstones.append(receipt)
265
+ return receipt
266
+
267
+ def tombstones(self) -> list[dict]:
268
+ return list(self._tombstones)
269
+
270
+
271
+ # --------------------------------------------------------------------------- #
272
+ # PrismResonance stand-in: synaptic weight + discrete consolidation.
273
+ # --------------------------------------------------------------------------- #
274
+ _BAND_AMP = {
275
+ Band.EMERGENCY.value: 4.0,
276
+ Band.ALERT.value: 3.0,
277
+ Band.NORMAL.value: 1.0,
278
+ Band.RECOVERY.value: 1.0,
279
+ Band.NEUTRAL.value: 0.5,
280
+ Band.ARCHIVE.value: 0.25,
281
+ }
282
+
283
+
284
+ class InProcessResonance:
285
+ def __init__(self, decay: float = 0.95) -> None:
286
+ self._weights: dict[str, float] = {}
287
+ self._decay = decay
288
+
289
+ def ingest(self, chunk_id: str, amplitude: list[float], band: str) -> None:
290
+ self._weights[chunk_id] = max(self._weights.get(chunk_id, 0.0), _BAND_AMP.get(band, 1.0))
291
+
292
+ def reinforce(self, chunk_id: str) -> None:
293
+ self._weights[chunk_id] = self._weights.get(chunk_id, 1.0) + 1.0 # LTP
294
+
295
+ def rank(self, candidate_ids: list[str]) -> list[str]:
296
+ return sorted(candidate_ids, key=lambda c: (-self._weights.get(c, 0.0), c))
297
+
298
+ def consolidate(self) -> None:
299
+ # discrete decay pass (the "sleep" heartbeat) — pruning the dormant.
300
+ for c in list(self._weights):
301
+ self._weights[c] *= self._decay
302
+ if self._weights[c] < 0.05:
303
+ del self._weights[c]
304
+
305
+
306
+ # --------------------------------------------------------------------------- #
307
+ # PrismLib cache-as-failover stand-in: content-addressed, optionally durable.
308
+ # --------------------------------------------------------------------------- #
309
+ class DurableCache:
310
+ def __init__(self, path: Optional[str] = None) -> None:
311
+ self._path = Path(path) if path else None
312
+ self._store: dict[str, str] = {}
313
+ if self._path and self._path.exists():
314
+ self._store = json.loads(self._path.read_text(encoding="utf-8"))
315
+
316
+ def get(self, key: str) -> Optional[str]:
317
+ return self._store.get(key)
318
+
319
+ def has(self, key: str) -> bool:
320
+ return key in self._store
321
+
322
+ def put(self, key: str, value: str) -> None:
323
+ self._store[key] = value
324
+ if self._path: # durable: a frozen answer survives restart / eviction.
325
+ self._path.write_text(json.dumps(self._store), encoding="utf-8")
326
+
327
+ def clear(self) -> None:
328
+ """Drop all cached answers — used on erasure so deleted content can't linger."""
329
+ self._store = {}
330
+ if self._path and self._path.exists():
331
+ self._path.write_text("{}", encoding="utf-8")
332
+
333
+
334
+ # --------------------------------------------------------------------------- #
335
+ # Chorus / PrismLib cluster stand-in: in-process version broadcast.
336
+ # --------------------------------------------------------------------------- #
337
+ class InProcessMesh:
338
+ def __init__(self) -> None:
339
+ self.events: list[tuple[int, list[str]]] = []
340
+
341
+ def broadcast_version(self, version: GraphVersion, invalidated: list[str]) -> None:
342
+ self.events.append((version.version, invalidated))
343
+
344
+
345
+ # --------------------------------------------------------------------------- #
346
+ # Labile working-memory staging buffer.
347
+ # --------------------------------------------------------------------------- #
348
+ class ListStaging:
349
+ def __init__(self) -> None:
350
+ self._buf: list[tuple[StateDelta, str]] = []
351
+
352
+ def stage(self, delta: StateDelta, reason: str) -> None:
353
+ self._buf.append((delta, reason))
354
+
355
+ def drain(self) -> list[tuple[StateDelta, str]]:
356
+ out, self._buf = self._buf, []
357
+ return out
358
+
359
+ def pending_count(self) -> int:
360
+ return len(self._buf)
361
+
362
+
363
+ # --------------------------------------------------------------------------- #
364
+ # Immutable blob storage (local filesystem).
365
+ # --------------------------------------------------------------------------- #
366
+ class LocalBlobStore:
367
+ def __init__(self, root: str = ".prismcortex_blobs") -> None:
368
+ self._root = Path(root)
369
+ self._root.mkdir(parents=True, exist_ok=True)
370
+
371
+ def put(self, data: bytes, modality: str, uri_hint: str = "") -> AssetPointer:
372
+ sha = hashlib.sha256(data).hexdigest()
373
+ path = self._root / sha
374
+ path.write_bytes(data)
375
+ return AssetPointer(asset_id=sha[:16], uri=str(path), modality=modality, sha256=sha)
376
+
377
+ def get(self, asset_id: str) -> bytes:
378
+ for p in self._root.iterdir():
379
+ if p.name.startswith(asset_id):
380
+ return p.read_bytes()
381
+ raise KeyError(asset_id)
prismcortex/auth.py ADDED
@@ -0,0 +1,81 @@
1
+ """API-key authentication with tenant scoping and RBAC.
2
+
3
+ Single-key mode (dev): ``PRISMCORTEX_API_KEY`` → tenant ``default``, all roles.
4
+
5
+ Multi-key mode (enterprise): ``PRISMCORTEX_API_KEYS`` JSON map::
6
+
7
+ {"keyhex": {"tenant": "acme", "roles": ["read", "write", "admin"]}}
8
+
9
+ Or a path via ``PRISMCORTEX_API_KEYS_FILE``.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import os
15
+ from dataclasses import dataclass, field
16
+ from typing import Optional
17
+
18
+ ROLE_READ = "read"
19
+ ROLE_WRITE = "write"
20
+ ROLE_ADMIN = "admin"
21
+ ROLE_FORGET = "forget"
22
+ ALL_ROLES = frozenset({ROLE_READ, ROLE_WRITE, ROLE_ADMIN, ROLE_FORGET})
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class AuthContext:
27
+ tenant_id: str = "default"
28
+ roles: frozenset[str] = field(default_factory=lambda: ALL_ROLES)
29
+ region: str = "default"
30
+
31
+ def allows(self, *required: str) -> bool:
32
+ if ROLE_ADMIN in self.roles:
33
+ return True
34
+ return any(r in self.roles for r in required)
35
+
36
+
37
+ def _load_key_map() -> dict[str, dict]:
38
+ raw = os.environ.get("PRISMCORTEX_API_KEYS")
39
+ path = os.environ.get("PRISMCORTEX_API_KEYS_FILE")
40
+ if path and os.path.isfile(path):
41
+ raw = open(path, encoding="utf-8").read()
42
+ if raw:
43
+ return json.loads(raw)
44
+ single = os.environ.get("PRISMCORTEX_API_KEY")
45
+ if single:
46
+ return {single: {"tenant": os.environ.get("PRISMCORTEX_TENANT", "default"), "roles": list(ALL_ROLES)}}
47
+ return {}
48
+
49
+
50
+ _KEY_MAP: Optional[dict[str, dict]] = None
51
+
52
+
53
+ def key_map() -> dict[str, dict]:
54
+ global _KEY_MAP
55
+ if _KEY_MAP is None:
56
+ _KEY_MAP = _load_key_map()
57
+ return _KEY_MAP
58
+
59
+
60
+ def reload_keys() -> None:
61
+ global _KEY_MAP
62
+ _KEY_MAP = None
63
+
64
+
65
+ def authenticate(token: Optional[str]) -> Optional[AuthContext]:
66
+ if not token:
67
+ return None
68
+ entry = key_map().get(token)
69
+ if entry is None:
70
+ return None
71
+ roles = frozenset(entry.get("roles") or [ROLE_READ, ROLE_WRITE])
72
+ region = entry.get("region") or os.environ.get("PRISMCORTEX_REGION", "default")
73
+ return AuthContext(
74
+ tenant_id=str(entry.get("tenant") or "default"),
75
+ roles=roles | ({ROLE_ADMIN} if ROLE_ADMIN in roles else frozenset()),
76
+ region=region,
77
+ )
78
+
79
+
80
+ def auth_required() -> bool:
81
+ return bool(key_map())
@@ -0,0 +1,75 @@
1
+ """Content-addressing — the mechanism that makes the system deterministic.
2
+
3
+ The cache key is a hash of *the exact context that produced the answer*. Because the
4
+ key IS the content, a changed fact yields a changed key, so a stale answer is simply
5
+ unreachable — invalidation and determinism are the same mechanism. Timestamps are
6
+ deliberately excluded: the key depends on the current *knowledge*, not on when it was
7
+ recorded, so re-deriving identical facts hits the same answer.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ import json
13
+
14
+ from .models import Subgraph
15
+
16
+ _SEP = "\x00"
17
+
18
+
19
+ def canonical_subgraph(subgraph: Subgraph) -> str:
20
+ """Stable serialization of the *current* knowledge in a subgraph.
21
+
22
+ Sorted by id / (src, dst, relation) with explicit tie-breaks so ordering from the
23
+ retrieval layer can never change the key. Validity timestamps are omitted on
24
+ purpose (see module docstring).
25
+ """
26
+ nodes = sorted(
27
+ (
28
+ {
29
+ "id": n.id,
30
+ "label": n.label,
31
+ "kind": n.kind,
32
+ "attributes": _canonical_attrs(n.attributes),
33
+ }
34
+ for n in subgraph.nodes
35
+ ),
36
+ key=lambda x: x["id"],
37
+ )
38
+ edges = sorted(
39
+ (
40
+ {"src": e.src, "dst": e.dst, "relation": e.relation}
41
+ for e in subgraph.edges
42
+ if e.is_current
43
+ ),
44
+ key=lambda x: (x["src"], x["dst"], x["relation"]),
45
+ )
46
+ return json.dumps({"nodes": nodes, "edges": edges}, sort_keys=True, separators=(",", ":"))
47
+
48
+
49
+ def _canonical_attrs(attrs: dict) -> dict:
50
+ # Only stable, JSON-serializable scalar attributes contribute to the address.
51
+ return {k: attrs[k] for k in sorted(attrs) if isinstance(attrs[k], (str, int, float, bool))}
52
+
53
+
54
+ def content_address(query: str, subgraph: Subgraph, template_id: str, model_id: str) -> str:
55
+ """The cache key. Pins to a model snapshot — a model rev correctly re-renders."""
56
+ payload = _SEP.join(
57
+ [
58
+ " ".join(query.lower().split()), # normalize whitespace/case
59
+ canonical_subgraph(subgraph),
60
+ template_id,
61
+ model_id,
62
+ ]
63
+ )
64
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
65
+
66
+
67
+ def extraction_memo_key(text: str, extractor_model_id: str) -> str:
68
+ """Write-path memo key: re-digesting identical input is idempotent + reproducible."""
69
+ payload = _SEP.join(["extract", " ".join(text.lower().split()), extractor_model_id])
70
+ return "memo:" + hashlib.sha256(payload.encode("utf-8")).hexdigest()
71
+
72
+
73
+ def graph_content_hash(serialized_current_edges: str) -> str:
74
+ """Integrity stamp stored on GraphVersion (independent of the monotonic counter)."""
75
+ return hashlib.sha256(serialized_current_edges.encode("utf-8")).hexdigest()