prismcortex 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
prismcortex/engine.py ADDED
@@ -0,0 +1,524 @@
1
+ """The Memory engine — the single front door (`digest` / `recall` / `sleep`).
2
+
3
+ All five Prism packages live behind ports; this class owns the lifecycle logic that
4
+ none of them own individually: salience routing, the in-RAM delta calculation, the
5
+ fast/slow (inline vs staging) split, bitemporal commits, and the content-addressed
6
+ deterministic render path.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ from typing import Optional
12
+
13
+ from . import salience
14
+ from .determinism import content_address, extraction_memo_key
15
+ from .models import (
16
+ Band,
17
+ DeltaOp,
18
+ DigestOutcome,
19
+ DigestResult,
20
+ Edge,
21
+ Evidence,
22
+ Explanation,
23
+ FAST_TRACK_BANDS,
24
+ SKIP_BANDS,
25
+ ExtractedGist,
26
+ GraphVersion,
27
+ Node,
28
+ Operation,
29
+ Provenance,
30
+ RecallResult,
31
+ StateDelta,
32
+ Subgraph,
33
+ )
34
+
35
+
36
+ def _confidence(weight: float) -> float:
37
+ """Map reinforcement (edge/subject weight) to a 0..1 confidence. A fact stated once
38
+ (weight 1.0) → 0.5; confirmed repeatedly → approaches 1.0."""
39
+ return round(1.0 - 0.5 ** max(weight, 0.0), 3)
40
+
41
+
42
+ from .labels import (
43
+ canonical_label,
44
+ looks_like_correctable_value,
45
+ norm_relation,
46
+ relations_compatible,
47
+ resolve_alias,
48
+ )
49
+ from .ports import (
50
+ EntityExtractor,
51
+ GistProjector,
52
+ GraphStore,
53
+ MeshBroadcast,
54
+ Renderer,
55
+ ResonanceEngine,
56
+ ResponseCache,
57
+ StagingBuffer,
58
+ )
59
+
60
+
61
+ def _node_id(label: str) -> str:
62
+ return "n_" + hashlib.blake2b(label.strip().lower().encode(), digest_size=8).hexdigest()
63
+
64
+
65
+ def _edge_id(src: str, relation: str, dst: str) -> str:
66
+ raw = f"{src}|{relation}|{dst}".encode()
67
+ return "e_" + hashlib.blake2b(raw, digest_size=8).hexdigest()
68
+
69
+
70
+ class Memory:
71
+ """Deterministic, auditable agent memory.
72
+
73
+ >>> mem = reference_memory() # see prismcortex.factory
74
+ >>> mem.digest("My deploy budget is $40k.")
75
+ >>> mem.recall("What's my deploy budget?").answer
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ *,
81
+ projector: GistProjector,
82
+ extractor: EntityExtractor,
83
+ renderer: Renderer,
84
+ store: GraphStore,
85
+ resonance: ResonanceEngine,
86
+ cache: ResponseCache,
87
+ mesh: MeshBroadcast,
88
+ staging: StagingBuffer,
89
+ template_id: str = "render-v1",
90
+ k: int = 8,
91
+ resolve_threshold: float = 0.88,
92
+ max_facts: Optional[int] = None,
93
+ tenant_id: str = "default",
94
+ ) -> None:
95
+ self.projector = projector
96
+ self.extractor = extractor
97
+ self.renderer = renderer
98
+ self.store = store
99
+ self.resonance = resonance
100
+ self.cache = cache
101
+ self.mesh = mesh
102
+ self.staging = staging
103
+ self.template_id = template_id
104
+ self.k = k
105
+ self.resolve_threshold = resolve_threshold
106
+ self.max_facts = max_facts
107
+ self.tenant_id = tenant_id
108
+
109
+ # ------------------------------------------------------------------ write
110
+ def digest(self, text: str, *, source_id: Optional[str] = None, agent_id: Optional[str] = None) -> DigestResult:
111
+ band = salience.assess(text)
112
+ if band in SKIP_BANDS: # cost gate: never call the LLM on "ok thanks"
113
+ return DigestResult(outcome=DigestOutcome.SKIPPED, band=band, version=self.store.version(), reason="low salience")
114
+
115
+ memo = extraction_memo_key(text, self.extractor.model_id)
116
+ if self.cache.has(memo): # idempotent: identical input never re-digested
117
+ return DigestResult(outcome=DigestOutcome.SKIPPED, band=band, version=self.store.version(), reason="already digested (idempotent)")
118
+
119
+ emb = self.projector.embed(text)
120
+ context = self.store.retrieve(emb, k=self.k)
121
+ gist = self.extractor.extract(text, context)
122
+
123
+ prov = Provenance(
124
+ source_id=source_id or hashlib.blake2b(text.encode(), digest_size=8).hexdigest(),
125
+ agent_id=agent_id,
126
+ )
127
+ delta, uncertain = self._calculate_delta(gist, context, band, prov)
128
+ self.cache.put(memo, "1") # mark digested
129
+
130
+ if delta.is_empty:
131
+ return DigestResult(outcome=DigestOutcome.SKIPPED, band=band, version=self.store.version(), reason="no new knowledge")
132
+
133
+ # Uncertain writes are deferred to sleep() — unless salience fast-tracks them.
134
+ if uncertain and band not in FAST_TRACK_BANDS:
135
+ self.staging.stage(delta, reason=f"uncertain: {gist.notes[:80]}")
136
+ return DigestResult(outcome=DigestOutcome.STAGED, band=band, delta=delta, version=self.store.version(), reason="parked for consolidation")
137
+
138
+ version = self._commit(delta)
139
+ only_reinforce = all(op.operation is Operation.REINFORCE for op in delta.ops)
140
+ outcome = DigestOutcome.REINFORCED if only_reinforce else DigestOutcome.COMMITTED
141
+ return DigestResult(outcome=outcome, band=band, delta=delta, version=version)
142
+
143
+ def _label_for(self, node_id: str) -> Optional[str]:
144
+ if hasattr(self.store, "node_label"):
145
+ return self.store.node_label(node_id)
146
+ nodes = self.store.all_nodes() if hasattr(self.store, "all_nodes") else []
147
+ for n in nodes:
148
+ if n.id == node_id:
149
+ return n.label
150
+ return None
151
+
152
+ def _resolve_subject(self, label: str, resolved: dict[str, str], ops: list[DeltaOp]) -> str:
153
+ """Subject coref: alias → exact → canonical → token overlap → embedding similarity."""
154
+ key = label.strip().lower()
155
+ canon = resolve_alias(label, tenant_id=self.tenant_id)
156
+ for probe in (key, canon):
157
+ if probe in resolved:
158
+ return resolved[probe]
159
+
160
+ emb = self.projector.embed(label)
161
+ nid = self.store.find_node_by_label(label)
162
+ if nid is None:
163
+ nid = self.store.find_node_by_label(canon)
164
+ if nid is None and hasattr(self.store, "find_node_by_token_overlap"):
165
+ nid = self.store.find_node_by_token_overlap(label, threshold=0.34)
166
+ if nid is None:
167
+ nid = self.store.find_similar_node(emb, self.resolve_threshold)
168
+
169
+ if nid:
170
+ ops.append(DeltaOp(operation=Operation.REINFORCE, target_id=nid, reason="resolved to existing"))
171
+ else:
172
+ kind, attributes = self._ent_meta.get(key, self._ent_meta.get(canon, ("entity", {})))
173
+ nid = _node_id(canon if canon else label)
174
+ ops.append(DeltaOp(
175
+ operation=Operation.ASSIMILATE,
176
+ node=Node(id=nid, label=label, kind=kind, attributes=attributes or {},
177
+ embedding=emb, band=self._band, provenance=self._prov),
178
+ ))
179
+ resolved[key] = nid
180
+ if canon != key:
181
+ resolved[canon] = nid
182
+ return nid
183
+
184
+ def _prior_conflicting_edge(self, src_id: str, relation: str, dst_id: str, *, dst_label: str = "") -> Optional[Edge]:
185
+ """Find a current edge from `src` that this new fact would contradict.
186
+
187
+ Matches on normalized relation *or* subject + correctable-value kind so extraction
188
+ drift ("is scheduled for March" vs "scheduled for June") still consolidates.
189
+ """
190
+ if not hasattr(self.store, "current_edges_from"):
191
+ prior = self.store.current_edge(src_id, relation)
192
+ return prior if prior is not None and prior.dst != dst_id else None
193
+
194
+ new_val = dst_label or self._label_for(dst_id) or ""
195
+ for e in self.store.current_edges_from(src_id):
196
+ if e.dst == dst_id:
197
+ continue
198
+ if relations_compatible(e.relation, relation):
199
+ return e
200
+ old_label = self._label_for(e.dst) or ""
201
+ if (new_val and old_label
202
+ and looks_like_correctable_value(new_val)
203
+ and looks_like_correctable_value(old_label)):
204
+ return e
205
+ return None
206
+
207
+ def _prior_edge(self, src_id: str, relation: str):
208
+ """A current edge from src whose relation matches after normalization."""
209
+ norm = norm_relation(relation)
210
+ if hasattr(self.store, "current_edges_from"):
211
+ for e in self.store.current_edges_from(src_id):
212
+ if norm_relation(e.relation) == norm:
213
+ return e
214
+ return None
215
+ return self.store.current_edge(src_id, relation)
216
+
217
+ def _calculate_delta(self, gist: ExtractedGist, context: Subgraph, band: Band, prov: Provenance):
218
+ """Resolve the gist against current knowledge into graph mutations (in RAM)."""
219
+ ops: list[DeltaOp] = []
220
+ uncertain = False
221
+ resolved: dict[str, str] = {} # lower(label) -> node_id
222
+ self._ent_meta = {e.label.strip().lower(): (e.kind, e.attributes) for e in gist.entities}
223
+ self._ent_meta.update({canonical_label(e.label): (e.kind, e.attributes) for e in gist.entities})
224
+ self._band = band
225
+ self._prov = prov
226
+
227
+ def resolve_value(label: str) -> str:
228
+ key = label.strip().lower()
229
+ if key in resolved:
230
+ return resolved[key]
231
+ emb = self.projector.embed(label)
232
+ nid = self.store.find_node_by_label(label)
233
+ if nid:
234
+ ops.append(DeltaOp(operation=Operation.REINFORCE, target_id=nid, reason="resolved to existing"))
235
+ else:
236
+ kind, attributes = self._ent_meta.get(key, ("entity", {}))
237
+ nid = _node_id(label)
238
+ ops.append(DeltaOp(
239
+ operation=Operation.ASSIMILATE,
240
+ node=Node(id=nid, label=label, kind=kind, attributes=attributes or {},
241
+ embedding=emb, band=band, provenance=prov),
242
+ ))
243
+ resolved[key] = nid
244
+ return nid
245
+
246
+ # Subjects (relation src) coref by similarity + token overlap; values exact only.
247
+ for rel in gist.relations:
248
+ src_id = self._resolve_subject(rel.src, resolved, ops)
249
+ dst_id = resolve_value(rel.dst)
250
+ new_edge = Edge(id=_edge_id(src_id, rel.relation, dst_id), src=src_id, dst=dst_id, relation=rel.relation, band=band, provenance=prov)
251
+ prior = self._prior_conflicting_edge(src_id, rel.relation, dst_id, dst_label=rel.dst)
252
+
253
+ if gist.is_correction:
254
+ if prior is not None:
255
+ ops.append(DeltaOp(operation=Operation.ACCOMMODATE, edge=new_edge, target_id=prior.id, reason="correction"))
256
+ else:
257
+ ops.append(DeltaOp(operation=Operation.ASSIMILATE, edge=new_edge, reason="claimed correction, no prior"))
258
+ uncertain = True
259
+ else:
260
+ if prior is not None:
261
+ ops.append(DeltaOp(operation=Operation.ASSIMILATE, edge=new_edge, reason="conflicts with existing fact"))
262
+ uncertain = True
263
+ else:
264
+ ops.append(DeltaOp(operation=Operation.ASSIMILATE, edge=new_edge))
265
+
266
+ for ent in gist.entities:
267
+ resolve_value(ent.label)
268
+
269
+ return StateDelta(ops=ops), uncertain
270
+
271
+ def _commit(self, delta: StateDelta):
272
+ version = self.store.apply(delta)
273
+ invalidated: list[str] = []
274
+ for op in delta.ops:
275
+ if op.operation is Operation.ASSIMILATE and op.node is not None:
276
+ self.resonance.ingest(op.node.id, op.node.embedding or [], op.node.band.value)
277
+ invalidated.append(op.node.id)
278
+ elif op.operation is Operation.REINFORCE and op.target_id:
279
+ self.resonance.reinforce(op.target_id)
280
+ self.mesh.broadcast_version(version, invalidated)
281
+ return version
282
+
283
+ # ------------------------------------------------------------------- read
284
+ def _evidence(self, subgraph: Subgraph) -> list[Evidence]:
285
+ """The audit trail behind an answer: each current fact + its source + confidence."""
286
+ id2label = {n.id: n.label for n in subgraph.nodes}
287
+ id2weight = {n.id: n.weight for n in subgraph.nodes}
288
+ out: list[Evidence] = []
289
+ for e in subgraph.edges:
290
+ if not e.is_current:
291
+ continue
292
+ w = id2weight.get(e.src, e.weight)
293
+ prov = e.provenance
294
+ out.append(Evidence(
295
+ fact=f"{id2label.get(e.src, e.src)} {e.relation} {id2label.get(e.dst, e.dst)}",
296
+ source_id=prov.source_id if prov else None,
297
+ recorded_at=prov.recorded_at if prov else e.recorded_at,
298
+ confirmations=w,
299
+ confidence=_confidence(w),
300
+ ))
301
+ return out
302
+
303
+ def _confidence_freshness(self, subgraph: Subgraph):
304
+ weights = {n.id: n.weight for n in subgraph.nodes}
305
+ cur = [e for e in subgraph.edges if e.is_current]
306
+ if not cur:
307
+ return 1.0, None
308
+ conf = round(sum(_confidence(weights.get(e.src, e.weight)) for e in cur) / len(cur), 3)
309
+ fresh = max((e.provenance.recorded_at if e.provenance else e.recorded_at) for e in cur)
310
+ return conf, fresh
311
+
312
+ def _expand_subgraph(self, subgraph: Subgraph, query: str) -> Subgraph:
313
+ """Pull in nodes whose labels overlap the query — helps recall in crowded graphs."""
314
+ if not hasattr(self.store, "find_nodes_by_label_overlap"):
315
+ return subgraph
316
+ extra = self.store.find_nodes_by_label_overlap(query, threshold=0.34, limit=4)
317
+ if not extra:
318
+ return subgraph
319
+ chosen = {n.id for n in subgraph.nodes} | set(extra)
320
+ edges = list(subgraph.edges)
321
+ if hasattr(self.store, "current_edges_from"):
322
+ seen = {e.id for e in edges}
323
+ for nid in extra:
324
+ for e in self.store.current_edges_from(nid):
325
+ if e.is_current and e.id not in seen:
326
+ edges.append(e)
327
+ seen.add(e.id)
328
+ chosen.add(e.src)
329
+ chosen.add(e.dst)
330
+ nodes = subgraph.nodes
331
+ have = {n.id for n in nodes}
332
+ if hasattr(self.store, "node_label"):
333
+ for nid in chosen:
334
+ if nid not in have and self.store.node_label(nid):
335
+ label = self.store.node_label(nid)
336
+ emb = self.projector.embed(label) if label else None
337
+ nodes = nodes + [Node(id=nid, label=label, embedding=emb)]
338
+ have.add(nid)
339
+ elif hasattr(self.store, "all_nodes"):
340
+ by_id = {n.id: n for n in self.store.all_nodes()}
341
+ for nid in chosen:
342
+ if nid not in have and nid in by_id:
343
+ nodes = nodes + [by_id[nid]]
344
+ have.add(nid)
345
+ return Subgraph(nodes=nodes, edges=edges)
346
+
347
+ def recall(self, query: str) -> RecallResult:
348
+ emb = self.projector.embed(query)
349
+ version = self.store.version()
350
+ subgraph = self._expand_subgraph(self.store.retrieve(emb, k=self.k), query)
351
+ key = content_address(query, subgraph, self.template_id, self.renderer.model_id)
352
+ ans_key = "ans:" + key
353
+
354
+ node_ids = [n.id for n in subgraph.nodes]
355
+ edge_ids = [e.id for e in subgraph.edges if e.is_current]
356
+ conf, fresh = self._confidence_freshness(subgraph)
357
+ common = dict(subgraph_hash=key, version=version.version, model_id=self.renderer.model_id,
358
+ node_ids=node_ids, edge_ids=edge_ids, confidence=conf, freshness=fresh)
359
+
360
+ cached = self.cache.get(ans_key)
361
+ if cached is not None:
362
+ return RecallResult(answer=cached, cache_hit=True, **common)
363
+
364
+ answer = self.renderer.render(query, subgraph) # the one stochastic draw
365
+ self.cache.put(ans_key, answer) # frozen → byte-identical hereafter
366
+ return RecallResult(answer=answer, cache_hit=False, **common)
367
+
368
+ def forget(self, source_id: str) -> dict:
369
+ """Right-to-be-forgotten: erase every fact derived from `source_id` and clear the
370
+ answer cache (so deleted content can't linger in a cached response). Returns the
371
+ audit receipt; the erased content is gone, only the tombstone remains."""
372
+ receipt = self.store.forget_source(source_id)
373
+ if hasattr(self.cache, "clear"):
374
+ self.cache.clear() # cached answers may contain the erased content
375
+ self.mesh.broadcast_version(self.store.version(), invalidated=[])
376
+ return receipt
377
+
378
+ def conflicts(self) -> list[dict]:
379
+ """Surface contested facts — subjects with >1 current value for the same
380
+ (normalized) relation — so the system never *silently* serves one of them."""
381
+ from collections import defaultdict
382
+
383
+ edges = self.store.all_edges() if hasattr(self.store, "all_edges") else []
384
+ labels = {n.id: n.label for n in (self.store.all_nodes() if hasattr(self.store, "all_nodes") else [])}
385
+ groups: dict[tuple, list] = defaultdict(list)
386
+ for e in edges:
387
+ if e.valid_to is None:
388
+ groups[(e.src, norm_relation(e.relation))].append(e)
389
+ out = []
390
+ for (src, rel), es in groups.items():
391
+ if len({e.dst for e in es}) > 1:
392
+ out.append({"subject": labels.get(src, src), "relation": rel,
393
+ "values": [labels.get(e.dst, e.dst) for e in es]})
394
+ return out
395
+
396
+ def explain(self, query: str) -> Explanation:
397
+ """Why an answer is what it is — the exact facts, sources, and confidence behind it.
398
+ A vector store can return memories; only a provenance graph can return evidence."""
399
+ emb = self.projector.embed(query)
400
+ version = self.store.version()
401
+ subgraph = self._expand_subgraph(self.store.retrieve(emb, k=self.k), query)
402
+ key = content_address(query, subgraph, self.template_id, self.renderer.model_id)
403
+ conf, fresh = self._confidence_freshness(subgraph)
404
+ return Explanation(query=query, version=version.version, subgraph_hash=key,
405
+ confidence=conf, freshness=fresh, evidence=self._evidence(subgraph))
406
+
407
+ # ----------------------------------------------------------------- sleep
408
+ def sleep(self) -> int:
409
+ """Consolidation pass: drain the labile buffer and resolve conflicts off the hot
410
+ path. A staged edge that conflicts with the (now-)current fact for the same
411
+ (subject, relation) is turned into an accommodation — the old fact is invalidated
412
+ (kept for time-travel) and the staged one becomes current. Returns the number of
413
+ staged items consolidated.
414
+ """
415
+ drained = self.staging.drain()
416
+ if drained:
417
+ resolved_ops: list[DeltaOp] = []
418
+ pending: dict[tuple, str] = {} # (src, norm_relation) -> latest edge id this pass
419
+ for delta, _reason in drained:
420
+ for op in delta.ops:
421
+ if op.operation is Operation.ASSIMILATE and op.edge is not None:
422
+ key = (op.edge.src, norm_relation(op.edge.relation))
423
+ prior_id = pending.get(key)
424
+ if prior_id is None: # also resolve against the committed store
425
+ prior = self._prior_edge(op.edge.src, op.edge.relation)
426
+ prior_id = prior.id if prior else None
427
+ pending[key] = op.edge.id
428
+ if prior_id and prior_id != op.edge.id:
429
+ resolved_ops.append(DeltaOp(
430
+ operation=Operation.ACCOMMODATE, edge=op.edge,
431
+ target_id=prior_id, reason="consolidated conflict",
432
+ ))
433
+ continue
434
+ resolved_ops.append(op)
435
+ if resolved_ops:
436
+ self._commit(StateDelta(ops=resolved_ops))
437
+ self.resonance.consolidate() # discrete decay heartbeat → new version semantics
438
+ if self.max_facts and hasattr(self.store, "prune_to"):
439
+ # bound the active working set: soft-invalidate the coldest facts (kept for
440
+ # audit/time-travel, out of the recall path) so memory size plateaus.
441
+ self.store.prune_to(self.max_facts)
442
+ return len(drained)
443
+
444
+ # ----------------------------------------------------------- enterprise API
445
+ def subgraph_at(self, query: str, at) -> Subgraph:
446
+ """Facts valid at a point in time (bitemporal time-travel)."""
447
+ from datetime import datetime, timezone
448
+
449
+ if at is None:
450
+ return self._expand_subgraph(self.store.retrieve(self.projector.embed(query), k=self.k), query)
451
+ if isinstance(at, str):
452
+ at = datetime.fromisoformat(at.replace("Z", "+00:00"))
453
+ if at.tzinfo is None:
454
+ at = at.replace(tzinfo=timezone.utc)
455
+ emb = self.projector.embed(query)
456
+ live = self.store.retrieve(emb, k=max(self.k, 16))
457
+ id2label = {n.id: n.label for n in live.nodes}
458
+ for n in (self.store.all_nodes() if hasattr(self.store, "all_nodes") else []):
459
+ id2label[n.id] = n.label
460
+ nodes_map = {n.id: n for n in live.nodes}
461
+ edges = []
462
+ for e in (self.store.all_edges() if hasattr(self.store, "all_edges") else []):
463
+ vf = e.valid_from if e.valid_from.tzinfo else e.valid_from.replace(tzinfo=timezone.utc)
464
+ vt = e.valid_to
465
+ if vt is not None and vt.tzinfo is None:
466
+ vt = vt.replace(tzinfo=timezone.utc)
467
+ if vf <= at and (vt is None or at < vt):
468
+ edges.append(e)
469
+ for nid in (e.src, e.dst):
470
+ if nid not in nodes_map and hasattr(self.store, "all_nodes"):
471
+ for n in self.store.all_nodes():
472
+ if n.id == nid:
473
+ nodes_map[nid] = n
474
+ return Subgraph(nodes=list(nodes_map.values()), edges=edges)
475
+
476
+ def recall_at(self, query: str, at=None) -> RecallResult:
477
+ subgraph = self.subgraph_at(query, at)
478
+ key = content_address(query, subgraph, self.template_id, self.renderer.model_id)
479
+ conf, fresh = self._confidence_freshness(subgraph)
480
+ answer = self.renderer.render(query, subgraph)
481
+ return RecallResult(
482
+ answer=answer, cache_hit=False, subgraph_hash=key,
483
+ version=self.store.version().version, model_id=self.renderer.model_id,
484
+ node_ids=[n.id for n in subgraph.nodes],
485
+ edge_ids=[e.id for e in subgraph.edges],
486
+ confidence=conf, freshness=fresh,
487
+ )
488
+
489
+ def replay_certificate(self, query: str) -> dict:
490
+ """Exportable proof: answer + content address + evidence (audit/replay)."""
491
+ ex = self.explain(query)
492
+ rec = self.recall(query)
493
+ return {
494
+ "query": query,
495
+ "answer": rec.answer,
496
+ "cache_hit": rec.cache_hit,
497
+ "subgraph_hash": rec.subgraph_hash,
498
+ "version": rec.version,
499
+ "model_id": rec.model_id,
500
+ "confidence": rec.confidence,
501
+ "freshness": rec.freshness.isoformat() if rec.freshness else None,
502
+ "evidence": [e.model_dump(mode="json") for e in ex.evidence],
503
+ }
504
+
505
+ def resolve_conflict(self, subject: str, relation: str, chosen_value: str) -> GraphVersion:
506
+ """Human-in-the-loop: pick the winning value for a contested (subject, relation)."""
507
+ src_id = self.store.find_node_by_label(subject) or self.store.find_node_by_label(resolve_alias(subject, tenant_id=self.tenant_id))
508
+ if src_id is None and hasattr(self.store, "find_node_by_token_overlap"):
509
+ src_id = self.store.find_node_by_token_overlap(subject, threshold=0.34)
510
+ if src_id is None:
511
+ raise ValueError(f"unknown subject: {subject!r}")
512
+ dst_id = self.store.find_node_by_label(chosen_value)
513
+ if dst_id is None:
514
+ emb = self.projector.embed(chosen_value)
515
+ dst_id = _node_id(chosen_value)
516
+ ops = [DeltaOp(operation=Operation.ASSIMILATE, node=Node(id=dst_id, label=chosen_value, embedding=emb))]
517
+ else:
518
+ ops = []
519
+ prior = self._prior_conflicting_edge(src_id, relation, dst_id, dst_label=chosen_value)
520
+ if prior is None:
521
+ raise ValueError("no conflict found for that subject/relation")
522
+ edge = Edge(id=_edge_id(src_id, relation, dst_id), src=src_id, dst=dst_id, relation=relation)
523
+ ops.append(DeltaOp(operation=Operation.ACCOMMODATE, edge=edge, target_id=prior.id, reason="human resolved"))
524
+ return self._commit(StateDelta(ops=ops))
prismcortex/factory.py ADDED
@@ -0,0 +1,48 @@
1
+ """Convenience builders that wire a ready-to-run Memory."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Optional
5
+
6
+ from .adapters.reference import (
7
+ DurableCache,
8
+ HashingProjector,
9
+ InMemoryGraphStore,
10
+ InProcessMesh,
11
+ InProcessResonance,
12
+ ListStaging,
13
+ )
14
+ from .engine import Memory
15
+
16
+
17
+ def reference_memory(
18
+ *,
19
+ model: Optional[str] = None,
20
+ cache_path: Optional[str] = None,
21
+ embedding_dim: int = 384,
22
+ k: int = 8,
23
+ max_facts: Optional[int] = None,
24
+ llm=None,
25
+ ) -> Memory:
26
+ """A fully wired Memory: reference adapters + the real Gemini client.
27
+
28
+ Needs `google-genai` and GEMINI_API_KEY / GOOGLE_API_KEY (extraction & rendering
29
+ are real Gemini calls). The Gemini import is lazy so the rest of the package stays
30
+ importable without it. Pass ``llm`` to inject a custom extractor/renderer (e.g. a
31
+ call-counting wrapper for benchmarks).
32
+ """
33
+ if llm is None:
34
+ from .llm.gemini import GeminiClient
35
+
36
+ llm = GeminiClient(model=model)
37
+ return Memory(
38
+ projector=HashingProjector(dim=embedding_dim),
39
+ extractor=llm,
40
+ renderer=llm,
41
+ store=InMemoryGraphStore(),
42
+ resonance=InProcessResonance(),
43
+ cache=DurableCache(path=cache_path),
44
+ mesh=InProcessMesh(),
45
+ staging=ListStaging(),
46
+ k=k,
47
+ max_facts=max_facts,
48
+ )
prismcortex/labels.py ADDED
@@ -0,0 +1,114 @@
1
+ """Label normalization and lightweight entity matching helpers.
2
+
3
+ Used by the engine and graph store so paraphrased subjects ("the deploy budget" vs
4
+ "deploy budget") and relation wording drift ("is scheduled for" vs "scheduled for")
5
+ do not fork facts or miss conflicts.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import re
11
+ from pathlib import Path
12
+
13
+ _WORD = re.compile(r"[a-z0-9]+")
14
+ _CANON_PREFIX = re.compile(r"^(?:(?:the|my|our|their|its|a|an)\s+)+", re.I)
15
+
16
+ _REL_STOP = frozenset({
17
+ "is", "are", "the", "a", "an", "of", "for", "to", "at", "in", "on", "was",
18
+ "were", "be", "been", "has", "have", "had", "by", "with", "as", "now",
19
+ })
20
+
21
+ _MONTHS = frozenset({
22
+ "january", "february", "march", "april", "may", "june", "july", "august",
23
+ "september", "october", "november", "december",
24
+ "jan", "feb", "mar", "apr", "jun", "jul", "aug", "sep", "oct", "nov", "dec",
25
+ })
26
+
27
+
28
+ def canonical_label(label: str) -> str:
29
+ """Strip leading articles/possessives so the same entity gets one label key."""
30
+ s = label.strip()
31
+ while True:
32
+ m = _CANON_PREFIX.match(s)
33
+ if not m:
34
+ break
35
+ s = s[m.end():].strip()
36
+ return s.lower() or label.strip().lower()
37
+
38
+
39
+ def content_tokens(text: str) -> set[str]:
40
+ return {t for t in _WORD.findall(text.lower()) if t not in _REL_STOP}
41
+
42
+
43
+ def token_overlap(a: str, b: str) -> float:
44
+ """Jaccard overlap on content tokens — 1.0 for identical, 0.0 for disjoint."""
45
+ ta, tb = content_tokens(a), content_tokens(b)
46
+ if not ta or not tb:
47
+ return 0.0
48
+ return len(ta & tb) / len(ta | tb)
49
+
50
+
51
+ def norm_relation(relation: str) -> str:
52
+ toks = [t for t in _WORD.findall(relation.lower()) if t not in _REL_STOP]
53
+ return " ".join(toks) or relation.strip().lower()
54
+
55
+
56
+ def relations_compatible(a: str, b: str) -> bool:
57
+ """True when two relation phrasings likely describe the same fact slot."""
58
+ na, nb = norm_relation(a), norm_relation(b)
59
+ if na == nb:
60
+ return True
61
+ ta, tb = content_tokens(na), content_tokens(nb)
62
+ return bool(ta and tb and (ta & tb))
63
+
64
+
65
+ def looks_like_correctable_value(label: str) -> bool:
66
+ """Dates, amounts, durations, and other facts that get corrected over time."""
67
+ s = label.strip().lower()
68
+ if any(c.isdigit() for c in s):
69
+ return True
70
+ if s in _MONTHS:
71
+ return True
72
+ if re.search(r"\b\d+\s*(?:sec|second|min|minute|hour|day|week|month|year)s?\b", s):
73
+ return True
74
+ if s.startswith(("$", "€", "£")):
75
+ return True
76
+ return False
77
+
78
+
79
+ _alias_to_canon: dict[str, dict[str, str]] = {}
80
+ _canon_aliases: dict[str, dict[str, set[str]]] = {}
81
+
82
+
83
+ def register_alias(canonical: str, alias: str, *, tenant_id: str = "default") -> None:
84
+ canon = canonical_label(canonical)
85
+ al = alias.strip().lower()
86
+ _alias_to_canon.setdefault(tenant_id, {})[al] = canon
87
+ _canon_aliases.setdefault(tenant_id, {}).setdefault(canon, set()).add(al)
88
+
89
+
90
+ def resolve_alias(label: str, *, tenant_id: str = "default") -> str:
91
+ key = label.strip().lower()
92
+ mapped = _alias_to_canon.get(tenant_id, {}).get(key)
93
+ if mapped:
94
+ return mapped
95
+ return canonical_label(label)
96
+
97
+
98
+ def aliases_snapshot(*, tenant_id: str = "default") -> dict[str, list[str]]:
99
+ return {k: sorted(v) for k, v in _canon_aliases.get(tenant_id, {}).items()}
100
+
101
+
102
+ def load_aliases(path: str, *, tenant_id: str = "default") -> None:
103
+ p = Path(path)
104
+ if not p.exists():
105
+ return
106
+ for canon, aliases in json.loads(p.read_text(encoding="utf-8")).items():
107
+ for al in aliases:
108
+ register_alias(canon, al, tenant_id=tenant_id)
109
+
110
+
111
+ def save_aliases(path: str, *, tenant_id: str = "default") -> None:
112
+ p = Path(path)
113
+ p.parent.mkdir(parents=True, exist_ok=True)
114
+ p.write_text(json.dumps(aliases_snapshot(tenant_id=tenant_id), indent=2), encoding="utf-8")