superlocalmemory 3.2.1 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/CHANGELOG.md +23 -1
  2. package/README.md +61 -1
  3. package/package.json +1 -1
  4. package/pyproject.toml +26 -1
  5. package/src/superlocalmemory/attribution/signer.py +6 -1
  6. package/src/superlocalmemory/core/config.py +114 -1
  7. package/src/superlocalmemory/core/consolidation_engine.py +595 -0
  8. package/src/superlocalmemory/core/embeddings.py +0 -1
  9. package/src/superlocalmemory/core/engine.py +164 -674
  10. package/src/superlocalmemory/core/engine_wiring.py +474 -0
  11. package/src/superlocalmemory/core/graph_analyzer.py +199 -0
  12. package/src/superlocalmemory/core/recall_pipeline.py +247 -0
  13. package/src/superlocalmemory/core/store_pipeline.py +483 -0
  14. package/src/superlocalmemory/core/worker_pool.py +35 -12
  15. package/src/superlocalmemory/encoding/auto_linker.py +308 -0
  16. package/src/superlocalmemory/encoding/context_generator.py +175 -0
  17. package/src/superlocalmemory/encoding/temporal_validator.py +513 -0
  18. package/src/superlocalmemory/hooks/auto_invoker.py +484 -0
  19. package/src/superlocalmemory/retrieval/channel_registry.py +154 -0
  20. package/src/superlocalmemory/retrieval/engine.py +12 -0
  21. package/src/superlocalmemory/retrieval/semantic_channel.py +87 -3
  22. package/src/superlocalmemory/retrieval/spreading_activation.py +311 -0
  23. package/src/superlocalmemory/retrieval/strategy.py +6 -6
  24. package/src/superlocalmemory/retrieval/vector_store.py +386 -0
  25. package/src/superlocalmemory/server/routes/v3_api.py +576 -0
  26. package/src/superlocalmemory/storage/access_log.py +169 -0
  27. package/src/superlocalmemory/storage/database.py +288 -0
  28. package/src/superlocalmemory/storage/schema.py +10 -0
  29. package/src/superlocalmemory/storage/schema_v32.py +252 -0
  30. package/src/superlocalmemory/storage/v2_migrator.py +24 -2
@@ -0,0 +1,483 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Store pipeline — extracted free functions for MemoryEngine.store().
6
+
7
+ Direction: engine.py imports this module. This module NEVER imports engine.py.
8
+
9
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ from typing import TYPE_CHECKING, Any
16
+
17
+ if TYPE_CHECKING:
18
+ from superlocalmemory.core.config import SLMConfig
19
+ from superlocalmemory.core.hooks import HookRegistry
20
+ from superlocalmemory.storage.database import DatabaseManager
21
+
22
+ from superlocalmemory.storage.models import (
23
+ AtomicFact, FactType, MemoryRecord,
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # enrich_fact (was MemoryEngine._enrich_fact)
31
+ # ---------------------------------------------------------------------------
32
+
33
+ def enrich_fact(
34
+ fact: AtomicFact,
35
+ record: MemoryRecord,
36
+ profile_id: str,
37
+ *,
38
+ embedder: Any,
39
+ entity_resolver: Any,
40
+ temporal_parser: Any,
41
+ ) -> AtomicFact:
42
+ """Enrich fact with embeddings, entities, temporal, emotional data."""
43
+ from superlocalmemory.encoding.emotional import tag_emotion, emotional_importance_boost
44
+ from superlocalmemory.encoding.signal_inference import infer_signal
45
+
46
+ embedding = embedder.embed(fact.content) if embedder else None
47
+ fisher_mean, fisher_variance = (None, None)
48
+ if embedder and embedding:
49
+ fisher_mean, fisher_variance = embedder.compute_fisher_params(embedding)
50
+
51
+ canonical = {}
52
+ if entity_resolver and fact.entities:
53
+ canonical = entity_resolver.resolve(fact.entities, profile_id)
54
+
55
+ temporal = {}
56
+ if temporal_parser:
57
+ temporal = temporal_parser.extract_dates_from_text(fact.content)
58
+
59
+ emotion = tag_emotion(fact.content)
60
+ signal = infer_signal(fact.content)
61
+
62
+ return AtomicFact(
63
+ fact_id=fact.fact_id, memory_id=record.memory_id,
64
+ profile_id=profile_id, content=fact.content,
65
+ fact_type=fact.fact_type, entities=fact.entities,
66
+ canonical_entities=list(canonical.values()),
67
+ observation_date=fact.observation_date or record.session_date,
68
+ referenced_date=fact.referenced_date or temporal.get("referenced_date"),
69
+ interval_start=fact.interval_start or temporal.get("interval_start"),
70
+ interval_end=fact.interval_end or temporal.get("interval_end"),
71
+ confidence=fact.confidence,
72
+ importance=min(1.0, fact.importance + emotional_importance_boost(emotion)),
73
+ evidence_count=fact.evidence_count,
74
+ source_turn_ids=fact.source_turn_ids, session_id=record.session_id,
75
+ embedding=embedding, fisher_mean=fisher_mean, fisher_variance=fisher_variance,
76
+ emotional_valence=emotion.valence, emotional_arousal=emotion.arousal,
77
+ signal_type=signal, created_at=fact.created_at,
78
+ )
79
+
80
+
81
+ # ---------------------------------------------------------------------------
82
+ # run_store (was MemoryEngine.store)
83
+ # ---------------------------------------------------------------------------
84
+
85
+ def run_store(
86
+ content: str,
87
+ profile_id: str,
88
+ session_id: str = "",
89
+ session_date: str | None = None,
90
+ speaker: str = "",
91
+ role: str = "user",
92
+ metadata: dict[str, Any] | None = None,
93
+ *,
94
+ config: SLMConfig,
95
+ db: DatabaseManager,
96
+ embedder: Any,
97
+ fact_extractor: Any,
98
+ entity_resolver: Any,
99
+ temporal_parser: Any,
100
+ type_router: Any,
101
+ graph_builder: Any,
102
+ consolidator: Any,
103
+ observation_builder: Any,
104
+ scene_builder: Any,
105
+ entropy_gate: Any,
106
+ ann_index: Any,
107
+ sheaf_checker: Any,
108
+ retrieval_engine: Any,
109
+ provenance: Any,
110
+ hooks: HookRegistry,
111
+ vector_store: Any = None,
112
+ temporal_validator: Any = None,
113
+ auto_linker: Any = None,
114
+ context_generator: Any = None,
115
+ consolidation_engine: Any = None,
116
+ ) -> list[str]:
117
+ """Store content and extract structured facts. Returns fact_ids."""
118
+ # Pre-operation hooks (trust gate, ABAC, rate limiter)
119
+ hook_ctx = {
120
+ "operation": "store",
121
+ "agent_id": metadata.get("agent_id", "unknown") if metadata else "unknown",
122
+ "profile_id": profile_id,
123
+ "content_preview": content[:100],
124
+ }
125
+ hooks.run_pre("store", hook_ctx)
126
+
127
+ if entropy_gate and not entropy_gate.should_pass(content):
128
+ return []
129
+
130
+ from superlocalmemory.encoding.temporal_parser import TemporalParser
131
+ parser = temporal_parser or TemporalParser()
132
+ parsed_date = parser.parse_session_date(session_date) if session_date else None
133
+
134
+ record = MemoryRecord(
135
+ profile_id=profile_id, content=content,
136
+ session_id=session_id, speaker=speaker, role=role,
137
+ session_date=parsed_date, metadata=metadata or {},
138
+ )
139
+ db.store_memory(record)
140
+
141
+ facts = fact_extractor.extract_facts(
142
+ turns=[content], session_id=session_id,
143
+ session_date=parsed_date, speaker_a=speaker,
144
+ )
145
+ if not facts:
146
+ return []
147
+
148
+ if type_router:
149
+ facts = type_router.route_facts(facts)
150
+
151
+ stored_ids: list[str] = []
152
+ for fact in facts:
153
+ fact = enrich_fact(
154
+ fact, record, profile_id,
155
+ embedder=embedder,
156
+ entity_resolver=entity_resolver,
157
+ temporal_parser=temporal_parser,
158
+ )
159
+
160
+ if consolidator:
161
+ action = consolidator.consolidate(fact, profile_id)
162
+ if action.action_type.value == "noop":
163
+ continue
164
+
165
+ # Opinion confidence tracking: reinforce or decay
166
+ if fact.fact_type == FactType.OPINION and action.action_type.value == "update":
167
+ try:
168
+ existing = db.get_fact(action.new_fact_id)
169
+ if existing and existing.fact_type == FactType.OPINION:
170
+ new_conf = min(1.0, existing.confidence + 0.1)
171
+ db.update_fact(action.new_fact_id, {"confidence": new_conf})
172
+ except Exception:
173
+ pass
174
+ elif fact.fact_type == FactType.OPINION and action.action_type.value == "supersede":
175
+ try:
176
+ old_id = getattr(action, "old_fact_id", None)
177
+ if old_id:
178
+ old_fact = db.get_fact(old_id)
179
+ if old_fact:
180
+ new_conf = max(0.0, old_fact.confidence - 0.2)
181
+ db.update_fact(old_id, {"confidence": new_conf})
182
+ except Exception:
183
+ pass
184
+
185
+ if action.action_type.value in ("update", "supersede"):
186
+ updated_fact = db.get_fact(action.new_fact_id)
187
+ if updated_fact:
188
+ if graph_builder:
189
+ graph_builder.build_edges(updated_fact, profile_id)
190
+ if observation_builder:
191
+ for eid in updated_fact.canonical_entities:
192
+ observation_builder.update_profile(
193
+ eid, updated_fact, profile_id,
194
+ )
195
+ stored_ids.append(action.new_fact_id)
196
+ continue
197
+ # ADD case: consolidator already stored the fact (F8 fix)
198
+ # Fall through to post-processing below
199
+ else:
200
+ db.store_fact(fact)
201
+
202
+ stored_ids.append(fact.fact_id)
203
+
204
+ if fact.embedding and ann_index:
205
+ ann_index.add(fact.fact_id, fact.embedding)
206
+ # V3.2: VectorStore upsert (sqlite-vec) -- dual-write (Rule 12)
207
+ if fact.embedding and vector_store and vector_store.available:
208
+ vector_store.upsert(
209
+ fact_id=fact.fact_id,
210
+ profile_id=profile_id,
211
+ embedding=fact.embedding,
212
+ )
213
+ # Phase 2: Generate contextual description (after consolidator, before graph_builder)
214
+ if context_generator:
215
+ try:
216
+ import json as _json
217
+ ctx_result = context_generator.generate(fact, config.mode.value)
218
+ db.store_fact_context(
219
+ fact_id=fact.fact_id,
220
+ profile_id=profile_id,
221
+ contextual_description=ctx_result.description,
222
+ keywords=_json.dumps(ctx_result.keywords),
223
+ generated_by=ctx_result.generated_by,
224
+ )
225
+ except Exception as _ctx_exc:
226
+ logger.debug("Context generation skipped for %s: %s", fact.fact_id, _ctx_exc)
227
+
228
+ if graph_builder:
229
+ graph_builder.build_edges(fact, profile_id)
230
+
231
+ # Phase 3: AutoLinker creates association_edges (AFTER GraphBuilder)
232
+ if auto_linker is not None:
233
+ try:
234
+ auto_linker.link_new_fact(fact, profile_id)
235
+ except Exception as exc:
236
+ logger.debug("AutoLinker.link_new_fact: %s", exc)
237
+
238
+ # Sheaf consistency check (runs after edges exist)
239
+ if (sheaf_checker
240
+ and fact.embedding
241
+ and fact.canonical_entities):
242
+ from superlocalmemory.storage.models import EdgeType, GraphEdge
243
+ try:
244
+ edges_for_fact = db.get_edges_for_node(
245
+ fact.fact_id, profile_id,
246
+ )
247
+ if len(edges_for_fact) < config.math.sheaf_max_edges_per_check:
248
+ contradictions = sheaf_checker.check_consistency(
249
+ fact, profile_id,
250
+ )
251
+ for c in contradictions:
252
+ if c.severity > 0.45:
253
+ edge = GraphEdge(
254
+ profile_id=profile_id,
255
+ source_id=fact.fact_id,
256
+ target_id=c.fact_id_b,
257
+ edge_type=EdgeType.SUPERSEDES,
258
+ weight=c.severity,
259
+ )
260
+ db.store_edge(edge)
261
+ except Exception as exc:
262
+ logger.debug("Sheaf check skipped: %s", exc)
263
+
264
+ # Phase 4: Temporal validation and contradiction detection
265
+ if temporal_validator:
266
+ try:
267
+ db.store_temporal_validity(
268
+ fact_id=fact.fact_id,
269
+ profile_id=profile_id,
270
+ valid_from=fact.observation_date,
271
+ valid_until=None,
272
+ )
273
+ invalidations = temporal_validator.validate_and_invalidate(
274
+ new_fact=fact,
275
+ profile_id=profile_id,
276
+ )
277
+ if invalidations:
278
+ logger.info(
279
+ "Temporal: %d facts invalidated by new fact %s",
280
+ len(invalidations), fact.fact_id,
281
+ )
282
+ except Exception as exc:
283
+ logger.debug(
284
+ "Temporal validation skipped for fact %s: %s",
285
+ fact.fact_id, exc,
286
+ )
287
+
288
+ if observation_builder:
289
+ for eid in fact.canonical_entities:
290
+ observation_builder.update_profile(eid, fact, profile_id)
291
+
292
+ # Increment fact_count for each linked canonical entity
293
+ for eid in fact.canonical_entities:
294
+ try:
295
+ db.increment_entity_fact_count(eid)
296
+ except Exception:
297
+ pass # Non-critical — entity may have been deleted
298
+ if scene_builder:
299
+ scene_builder.assign_to_scene(fact, profile_id)
300
+
301
+ # Populate temporal_events for temporal retrieval
302
+ has_dates = (fact.observation_date or fact.referenced_date
303
+ or fact.interval_start)
304
+ if fact.canonical_entities and has_dates:
305
+ from superlocalmemory.storage.models import TemporalEvent
306
+ for eid in fact.canonical_entities:
307
+ event = TemporalEvent(
308
+ profile_id=profile_id, entity_id=eid,
309
+ fact_id=fact.fact_id,
310
+ observation_date=fact.observation_date,
311
+ referenced_date=fact.referenced_date,
312
+ interval_start=fact.interval_start,
313
+ interval_end=fact.interval_end,
314
+ description=fact.content[:200],
315
+ )
316
+ db.store_temporal_event(event)
317
+
318
+ # Foresight: extract time-bounded predictions
319
+ try:
320
+ from superlocalmemory.encoding.foresight import extract_foresight_signals
321
+ from superlocalmemory.storage.models import TemporalEvent as _TE
322
+ foresight_signals = extract_foresight_signals(fact)
323
+ for sig in foresight_signals:
324
+ f_event = _TE(
325
+ profile_id=profile_id,
326
+ entity_id=sig.get("entity_id", ""),
327
+ fact_id=fact.fact_id,
328
+ interval_start=sig.get("start_time"),
329
+ interval_end=sig.get("end_time"),
330
+ description=sig.get("description", ""),
331
+ )
332
+ db.store_temporal_event(f_event)
333
+ except Exception as exc:
334
+ logger.debug("Foresight extraction: %s", exc)
335
+
336
+ # Persist BM25 tokens at ingestion
337
+ bm25 = getattr(retrieval_engine, '_bm25', None) if retrieval_engine else None
338
+ if bm25:
339
+ bm25.add(fact.fact_id, fact.content, profile_id)
340
+
341
+ # Record provenance for data lineage (EU AI Act Art. 10)
342
+ if provenance:
343
+ try:
344
+ provenance.record(
345
+ fact_id=fact.fact_id,
346
+ profile_id=profile_id,
347
+ source_type="store",
348
+ source_id=session_id,
349
+ created_by=speaker or "unknown",
350
+ )
351
+ except Exception:
352
+ pass
353
+
354
+ logger.info("Stored %d facts (session=%s)", len(stored_ids), session_id)
355
+
356
+ # Post-operation hooks (audit, trust signal, event bus)
357
+ hook_ctx["fact_ids"] = stored_ids
358
+ hook_ctx["fact_count"] = len(stored_ids)
359
+ hooks.run_post("store", hook_ctx)
360
+
361
+ # Phase 5: Step-count trigger for lightweight consolidation (L7)
362
+ if consolidation_engine is not None:
363
+ try:
364
+ consolidation_engine.increment_store_count(profile_id)
365
+ except Exception as _cons_exc:
366
+ logger.debug("Consolidation step-count trigger: %s", _cons_exc)
367
+
368
+ return stored_ids
369
+
370
+
371
+ # ---------------------------------------------------------------------------
372
+ # run_store_fact_direct (was MemoryEngine.store_fact_direct)
373
+ # ---------------------------------------------------------------------------
374
+
375
+ def run_store_fact_direct(
376
+ fact: AtomicFact,
377
+ profile_id: str,
378
+ *,
379
+ db: DatabaseManager,
380
+ embedder: Any,
381
+ entity_resolver: Any,
382
+ ann_index: Any,
383
+ graph_builder: Any,
384
+ retrieval_engine: Any,
385
+ vector_store: Any = None,
386
+ ) -> str:
387
+ """Store a pre-built fact with full enrichment.
388
+
389
+ Ensures embedding, Fisher params, canonical entities, BM25 tokens,
390
+ and graph edges are all populated — even for auxiliary data.
391
+ Creates a parent memory record to satisfy FK constraint.
392
+ """
393
+ # Create parent memory record (FK: atomic_facts.memory_id → memories.memory_id)
394
+ if not fact.memory_id:
395
+ record = MemoryRecord(
396
+ profile_id=profile_id,
397
+ content=fact.content[:500],
398
+ session_id=fact.session_id,
399
+ )
400
+ db.store_memory(record)
401
+ fact.memory_id = record.memory_id
402
+
403
+ if not fact.embedding and embedder:
404
+ fact.embedding = embedder.embed(fact.content)
405
+ if fact.embedding:
406
+ fact.fisher_mean, fact.fisher_variance = (
407
+ embedder.compute_fisher_params(fact.embedding)
408
+ )
409
+ if entity_resolver and fact.entities:
410
+ canonical = entity_resolver.resolve(
411
+ fact.entities, profile_id,
412
+ )
413
+ fact.canonical_entities = list(canonical.values())
414
+ db.store_fact(fact)
415
+ if fact.embedding and ann_index:
416
+ ann_index.add(fact.fact_id, fact.embedding)
417
+ # V3.2: VectorStore upsert (dual-write)
418
+ if fact.embedding and vector_store and vector_store.available:
419
+ vector_store.upsert(
420
+ fact_id=fact.fact_id,
421
+ profile_id=profile_id,
422
+ embedding=fact.embedding,
423
+ )
424
+ if graph_builder:
425
+ graph_builder.build_edges(fact, profile_id)
426
+ # BM25 indexing
427
+ bm25 = getattr(retrieval_engine, '_bm25', None) if retrieval_engine else None
428
+ if bm25:
429
+ bm25.add(fact.fact_id, fact.content, profile_id)
430
+ return fact.fact_id
431
+
432
+
433
+ # ---------------------------------------------------------------------------
434
+ # run_close_session (was MemoryEngine.close_session)
435
+ # ---------------------------------------------------------------------------
436
+
437
+ def run_close_session(
438
+ session_id: str,
439
+ profile_id: str,
440
+ *,
441
+ db: DatabaseManager,
442
+ ) -> int:
443
+ """Create session-level temporal summary for session-level retrieval.
444
+
445
+ Aggregates facts from a completed session into temporal_events
446
+ with session scope. Enables temporal queries like "What happened
447
+ in session 3?"
448
+
449
+ Returns number of session summary events created.
450
+ """
451
+ from superlocalmemory.storage.models import TemporalEvent
452
+
453
+ facts = db.get_all_facts(profile_id)
454
+ session_facts = [f for f in facts if f.session_id == session_id]
455
+ if not session_facts:
456
+ return 0
457
+
458
+ # Group by entity for session-level summaries
459
+ entity_facts: dict[str, list[AtomicFact]] = {}
460
+ for f in session_facts:
461
+ for eid in f.canonical_entities:
462
+ entity_facts.setdefault(eid, []).append(f)
463
+
464
+ count = 0
465
+ session_date = session_facts[0].observation_date or ""
466
+ for eid, efacts in entity_facts.items():
467
+ summary_parts = [f.content[:80] for f in efacts[:5]]
468
+ summary = f"Session {session_id}: " + "; ".join(summary_parts)
469
+ event = TemporalEvent(
470
+ profile_id=profile_id,
471
+ entity_id=eid,
472
+ fact_id=efacts[0].fact_id,
473
+ observation_date=session_date,
474
+ description=summary[:500],
475
+ )
476
+ db.store_temporal_event(event)
477
+ count += 1
478
+
479
+ logger.info(
480
+ "Session %s closed: %d summary events for %d facts",
481
+ session_id, count, len(session_facts),
482
+ )
483
+ return count
@@ -155,19 +155,13 @@ class WorkerPool:
155
155
  self._proc.stdin.write(req_line)
156
156
  self._proc.stdin.flush()
157
157
 
158
- # Read response with timeout
159
- import selectors
160
- sel = selectors.DefaultSelector()
161
- sel.register(self._proc.stdout, selectors.EVENT_READ)
162
- ready = sel.select(timeout=timeout)
163
- sel.close()
164
-
165
- if not ready:
166
- logger.error("Worker timed out after %ds", _REQUEST_TIMEOUT)
167
- self._kill()
168
- return {"ok": False, "error": "Worker timed out"}
158
+ # Read response with timeout using a thread.
159
+ # selectors/select do NOT work with pipes on Windows,
160
+ # so we use the same thread-based approach as EmbeddingService.
161
+ resp_line = self._readline_with_timeout(
162
+ self._proc.stdout, timeout,
163
+ )
169
164
 
170
- resp_line = self._proc.stdout.readline()
171
165
  if not resp_line:
172
166
  logger.warning("Worker returned empty, restarting. Run 'slm doctor' to diagnose.")
173
167
  self._kill()
@@ -181,6 +175,35 @@ class WorkerPool:
181
175
  self._kill()
182
176
  return {"ok": False, "error": str(exc)}
183
177
 
178
+ @staticmethod
179
+ def _readline_with_timeout(stream, timeout_seconds: float) -> str:
180
+ """Read one line from *stream* with a timeout.
181
+
182
+ Uses a daemon thread so the call never blocks the main thread
183
+ indefinitely. This is the cross-platform replacement for
184
+ ``selectors`` which fails on Windows pipes.
185
+
186
+ Returns the line read, or ``""`` on timeout / error.
187
+ """
188
+ result_container: list[str] = []
189
+ error_container: list[Exception] = []
190
+
191
+ def _read() -> None:
192
+ try:
193
+ result_container.append(stream.readline())
194
+ except Exception as exc:
195
+ error_container.append(exc)
196
+
197
+ reader = threading.Thread(target=_read, daemon=True)
198
+ reader.start()
199
+ reader.join(timeout=timeout_seconds)
200
+
201
+ if reader.is_alive():
202
+ return ""
203
+ if error_container:
204
+ raise error_container[0]
205
+ return result_container[0] if result_container else ""
206
+
184
207
  def _ensure_worker(self) -> None:
185
208
  """Spawn worker if not running."""
186
209
  if self._proc is not None and self._proc.poll() is None: