superlocalmemory 3.4.35 → 3.4.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -66,6 +66,75 @@ class ObserveRequest(BaseModel):
66
66
  content: str
67
67
 
68
68
 
69
+ # ---------------------------------------------------------------------------
70
+ # V3.4.37: Engine recall adapter — routes QueueConsumer through the daemon's
71
+ # in-process MemoryEngine instead of spawning a recall_worker subprocess.
72
+ # Saves ~800 MB by eliminating the duplicate engine.
73
+ # ---------------------------------------------------------------------------
74
+
75
+ class EngineRecallAdapter:
76
+ """Adapts MemoryEngine.recall() to RecallPoolProtocol for QueueConsumer.
77
+
78
+ The daemon already has a full MemoryEngine in-process. The QueueConsumer
79
+ previously routed through WorkerPool → recall_worker subprocess, which
80
+ loaded a SECOND MemoryEngine. This adapter eliminates that duplication.
81
+ """
82
+
83
+ def __init__(self, engine) -> None:
84
+ self._engine = engine
85
+
86
+ def recall(self, query: str, limit: int = 10, session_id: str = "") -> dict:
87
+ response = self._engine.recall(
88
+ query, limit=limit, session_id=session_id or None,
89
+ )
90
+ memory_ids = list({
91
+ r.fact.memory_id for r in response.results[:limit]
92
+ if r.fact.memory_id
93
+ })
94
+ memory_map = (
95
+ self._engine._db.get_memory_content_batch(memory_ids)
96
+ if memory_ids else {}
97
+ )
98
+ results = []
99
+ for r in response.results[:limit]:
100
+ fact_type = getattr(r.fact, "fact_type", None)
101
+ lifecycle = getattr(r.fact, "lifecycle", None)
102
+ results.append({
103
+ "fact_id": r.fact.fact_id,
104
+ "memory_id": r.fact.memory_id,
105
+ "content": r.fact.content[:300],
106
+ "source_content": memory_map.get(r.fact.memory_id, ""),
107
+ "score": round(r.score, 4),
108
+ "confidence": round(r.confidence, 4),
109
+ "trust_score": round(r.trust_score, 4),
110
+ "channel_scores": {
111
+ k: round(v, 4)
112
+ for k, v in (r.channel_scores or {}).items()
113
+ },
114
+ "fact_type": fact_type.value
115
+ if fact_type and hasattr(fact_type, "value") else "",
116
+ "lifecycle": lifecycle.value
117
+ if lifecycle and hasattr(lifecycle, "value") else "",
118
+ "access_count": getattr(r.fact, "access_count", 0),
119
+ "evidence_chain": list(
120
+ getattr(r, "evidence_chain", []) or []
121
+ ),
122
+ })
123
+ return {
124
+ "ok": True,
125
+ "query": query,
126
+ "query_type": response.query_type,
127
+ "result_count": len(results),
128
+ "retrieval_time_ms": round(response.retrieval_time_ms, 1),
129
+ "channel_weights": {
130
+ k: round(v, 3)
131
+ for k, v in (response.channel_weights or {}).items()
132
+ },
133
+ "total_candidates": getattr(response, "total_candidates", 0),
134
+ "results": results,
135
+ }
136
+
137
+
69
138
  # ---------------------------------------------------------------------------
70
139
  # v3.4.32: Recall-priority gate for the pending materializer.
71
140
  # All /remember writes go to pending.db and return fast; a background
@@ -397,9 +466,10 @@ async def lifespan(application: FastAPI):
397
466
  # Set up observe buffer
398
467
  _observe_buffer.set_engine(engine)
399
468
 
400
- # Pre-warm workers (background)
401
- from superlocalmemory.core.worker_pool import WorkerPool
402
- WorkerPool.shared().warmup()
469
+ # V3.4.37: Removed WorkerPool.warmup() — the recall_worker subprocess
470
+ # duplicated the daemon's MemoryEngine (800+ MB). QueueConsumer now
471
+ # uses the daemon's engine directly via EngineRecallAdapter.
472
+ # WorkerPool is still available as fallback for dashboard/chat routes.
403
473
 
404
474
  # Force reranker warmup
405
475
  retrieval_eng = getattr(engine, '_retrieval_engine', None)
@@ -422,8 +492,9 @@ async def lifespan(application: FastAPI):
422
492
  logger.warning("Embedding warmup failed: %s", exc)
423
493
  threading.Thread(target=_warmup_embedder, daemon=True, name="embed-warmup").start()
424
494
 
425
- # v3.4.26: Start QueueConsumer drains recall_queue.db via pool.recall().
426
- # Must start AFTER WorkerPool.warmup() so the worker is ready.
495
+ # v3.4.37: QueueConsumer uses daemon's engine directly via adapter.
496
+ # Previously routed through WorkerPool recall_worker subprocess,
497
+ # which loaded a duplicate MemoryEngine (~800 MB waste).
427
498
  try:
428
499
  from pathlib import Path as _QP
429
500
  from superlocalmemory.core.queue_consumer import QueueConsumer
@@ -432,12 +503,23 @@ async def lifespan(application: FastAPI):
432
503
  _recall_queue = RecallQueue(_queue_db)
433
504
  _queue_consumer = QueueConsumer(
434
505
  queue=_recall_queue,
435
- pool=WorkerPool.shared(),
506
+ pool=EngineRecallAdapter(engine),
436
507
  )
437
508
  _queue_consumer.start()
438
509
  application.state.queue_consumer = _queue_consumer
439
510
  application.state.recall_queue = _recall_queue
440
511
  logger.info("QueueConsumer started (recall_queue.db)")
512
+
513
+ # v3.4.36: Start persistent hook daemon (Unix socket server).
514
+ # Eliminates Python subprocess startup for each recall hook call.
515
+ try:
516
+ from superlocalmemory.hooks.hook_daemon import HookDaemon
517
+ _hook_daemon = HookDaemon(queue_db_path=_queue_db)
518
+ _hook_daemon.start()
519
+ application.state.hook_daemon = _hook_daemon
520
+ except Exception as _hd_exc:
521
+ logger.warning("HookDaemon start failed (non-fatal): %s", _hd_exc)
522
+ application.state.hook_daemon = None
441
523
  except Exception as _qc_exc:
442
524
  logger.warning("QueueConsumer start failed (non-fatal): %s", _qc_exc)
443
525
  application.state.queue_consumer = None
@@ -455,9 +537,9 @@ async def lifespan(application: FastAPI):
455
537
  from superlocalmemory.core.health_monitor import HealthMonitor
456
538
  health_config = getattr(config, 'health', None)
457
539
  monitor = HealthMonitor(
458
- global_rss_budget_mb=getattr(health_config, 'global_rss_budget_mb', 4096) if health_config else 4096,
540
+ global_rss_budget_mb=getattr(health_config, 'global_rss_budget_mb', 2500) if health_config else 2500,
459
541
  heartbeat_timeout_sec=getattr(health_config, 'heartbeat_timeout_sec', 60) if health_config else 60,
460
- check_interval_sec=getattr(health_config, 'health_check_interval_sec', 30) if health_config else 30,
542
+ check_interval_sec=getattr(health_config, 'health_check_interval_sec', 15) if health_config else 15,
461
543
  enable_structured_logging=getattr(health_config, 'enable_structured_logging', True) if health_config else True,
462
544
  )
463
545
  monitor.start()
@@ -592,6 +674,14 @@ async def lifespan(application: FastAPI):
592
674
  except Exception as exc: # pragma: no cover — defensive
593
675
  logger.warning("bandit_tasks cancel failed: %s", exc)
594
676
 
677
+ # v3.4.36: Stop HookDaemon (Unix socket server).
678
+ _hd = getattr(application.state, "hook_daemon", None)
679
+ if _hd is not None:
680
+ try:
681
+ _hd.stop()
682
+ except Exception as exc: # pragma: no cover — defensive
683
+ logger.warning("hook_daemon stop failed: %s", exc)
684
+
595
685
  # v3.4.26: Stop QueueConsumer (recall_queue.db drainer).
596
686
  _qc = getattr(application.state, "queue_consumer", None)
597
687
  if _qc is not None:
@@ -1240,11 +1330,11 @@ def _start_memory_watchdog() -> None:
1240
1330
  """
1241
1331
  import threading
1242
1332
 
1243
- MAX_WORKER_MB = 4096 # 4GB per worker — ONNX full model is 1.6GB + overhead
1333
+ MAX_WORKER_MB = 1800 # V3.4.37: 1.8GB — ONNX nomic-embed is ~1.7GB loaded
1244
1334
 
1245
1335
  def watchdog_loop():
1246
1336
  while True:
1247
- time.sleep(60)
1337
+ time.sleep(15) # V3.4.37: 15s (was 60s) — catch spikes faster
1248
1338
  try:
1249
1339
  import psutil
1250
1340
  parent = psutil.Process(os.getpid())