superlocalmemory 3.4.35 → 3.4.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +68 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/superlocalmemory/__init__.py +1 -1
- package/src/superlocalmemory/cli/commands.py +1 -0
- package/src/superlocalmemory/core/embedding_worker.py +1 -1
- package/src/superlocalmemory/core/embeddings.py +5 -8
- package/src/superlocalmemory/core/health_monitor.py +2 -2
- package/src/superlocalmemory/core/recall_worker.py +3 -1
- package/src/superlocalmemory/hooks/auto_recall_hook.py +32 -0
- package/src/superlocalmemory/hooks/hook_daemon.py +276 -0
- package/src/superlocalmemory/retrieval/reranker.py +2 -1
- package/src/superlocalmemory/server/unified_daemon.py +100 -10
- package/src/superlocalmemory.egg-info/PKG-INFO +663 -0
- package/src/superlocalmemory.egg-info/SOURCES.txt +451 -0
- package/src/superlocalmemory.egg-info/dependency_links.txt +1 -0
- package/src/superlocalmemory.egg-info/entry_points.txt +2 -0
- package/src/superlocalmemory.egg-info/requires.txt +59 -0
- package/src/superlocalmemory.egg-info/top_level.txt +1 -0
|
@@ -66,6 +66,75 @@ class ObserveRequest(BaseModel):
|
|
|
66
66
|
content: str
|
|
67
67
|
|
|
68
68
|
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# V3.4.37: Engine recall adapter — routes QueueConsumer through the daemon's
|
|
71
|
+
# in-process MemoryEngine instead of spawning a recall_worker subprocess.
|
|
72
|
+
# Saves ~800 MB by eliminating the duplicate engine.
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
class EngineRecallAdapter:
|
|
76
|
+
"""Adapts MemoryEngine.recall() to RecallPoolProtocol for QueueConsumer.
|
|
77
|
+
|
|
78
|
+
The daemon already has a full MemoryEngine in-process. The QueueConsumer
|
|
79
|
+
previously routed through WorkerPool → recall_worker subprocess, which
|
|
80
|
+
loaded a SECOND MemoryEngine. This adapter eliminates that duplication.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def __init__(self, engine) -> None:
|
|
84
|
+
self._engine = engine
|
|
85
|
+
|
|
86
|
+
def recall(self, query: str, limit: int = 10, session_id: str = "") -> dict:
|
|
87
|
+
response = self._engine.recall(
|
|
88
|
+
query, limit=limit, session_id=session_id or None,
|
|
89
|
+
)
|
|
90
|
+
memory_ids = list({
|
|
91
|
+
r.fact.memory_id for r in response.results[:limit]
|
|
92
|
+
if r.fact.memory_id
|
|
93
|
+
})
|
|
94
|
+
memory_map = (
|
|
95
|
+
self._engine._db.get_memory_content_batch(memory_ids)
|
|
96
|
+
if memory_ids else {}
|
|
97
|
+
)
|
|
98
|
+
results = []
|
|
99
|
+
for r in response.results[:limit]:
|
|
100
|
+
fact_type = getattr(r.fact, "fact_type", None)
|
|
101
|
+
lifecycle = getattr(r.fact, "lifecycle", None)
|
|
102
|
+
results.append({
|
|
103
|
+
"fact_id": r.fact.fact_id,
|
|
104
|
+
"memory_id": r.fact.memory_id,
|
|
105
|
+
"content": r.fact.content[:300],
|
|
106
|
+
"source_content": memory_map.get(r.fact.memory_id, ""),
|
|
107
|
+
"score": round(r.score, 4),
|
|
108
|
+
"confidence": round(r.confidence, 4),
|
|
109
|
+
"trust_score": round(r.trust_score, 4),
|
|
110
|
+
"channel_scores": {
|
|
111
|
+
k: round(v, 4)
|
|
112
|
+
for k, v in (r.channel_scores or {}).items()
|
|
113
|
+
},
|
|
114
|
+
"fact_type": fact_type.value
|
|
115
|
+
if fact_type and hasattr(fact_type, "value") else "",
|
|
116
|
+
"lifecycle": lifecycle.value
|
|
117
|
+
if lifecycle and hasattr(lifecycle, "value") else "",
|
|
118
|
+
"access_count": getattr(r.fact, "access_count", 0),
|
|
119
|
+
"evidence_chain": list(
|
|
120
|
+
getattr(r, "evidence_chain", []) or []
|
|
121
|
+
),
|
|
122
|
+
})
|
|
123
|
+
return {
|
|
124
|
+
"ok": True,
|
|
125
|
+
"query": query,
|
|
126
|
+
"query_type": response.query_type,
|
|
127
|
+
"result_count": len(results),
|
|
128
|
+
"retrieval_time_ms": round(response.retrieval_time_ms, 1),
|
|
129
|
+
"channel_weights": {
|
|
130
|
+
k: round(v, 3)
|
|
131
|
+
for k, v in (response.channel_weights or {}).items()
|
|
132
|
+
},
|
|
133
|
+
"total_candidates": getattr(response, "total_candidates", 0),
|
|
134
|
+
"results": results,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
69
138
|
# ---------------------------------------------------------------------------
|
|
70
139
|
# v3.4.32: Recall-priority gate for the pending materializer.
|
|
71
140
|
# All /remember writes go to pending.db and return fast; a background
|
|
@@ -397,9 +466,10 @@ async def lifespan(application: FastAPI):
|
|
|
397
466
|
# Set up observe buffer
|
|
398
467
|
_observe_buffer.set_engine(engine)
|
|
399
468
|
|
|
400
|
-
#
|
|
401
|
-
|
|
402
|
-
|
|
469
|
+
# V3.4.37: Removed WorkerPool.warmup() — the recall_worker subprocess
|
|
470
|
+
# duplicated the daemon's MemoryEngine (800+ MB). QueueConsumer now
|
|
471
|
+
# uses the daemon's engine directly via EngineRecallAdapter.
|
|
472
|
+
# WorkerPool is still available as fallback for dashboard/chat routes.
|
|
403
473
|
|
|
404
474
|
# Force reranker warmup
|
|
405
475
|
retrieval_eng = getattr(engine, '_retrieval_engine', None)
|
|
@@ -422,8 +492,9 @@ async def lifespan(application: FastAPI):
|
|
|
422
492
|
logger.warning("Embedding warmup failed: %s", exc)
|
|
423
493
|
threading.Thread(target=_warmup_embedder, daemon=True, name="embed-warmup").start()
|
|
424
494
|
|
|
425
|
-
# v3.4.
|
|
426
|
-
#
|
|
495
|
+
# v3.4.37: QueueConsumer uses daemon's engine directly via adapter.
|
|
496
|
+
# Previously routed through WorkerPool → recall_worker subprocess,
|
|
497
|
+
# which loaded a duplicate MemoryEngine (~800 MB waste).
|
|
427
498
|
try:
|
|
428
499
|
from pathlib import Path as _QP
|
|
429
500
|
from superlocalmemory.core.queue_consumer import QueueConsumer
|
|
@@ -432,12 +503,23 @@ async def lifespan(application: FastAPI):
|
|
|
432
503
|
_recall_queue = RecallQueue(_queue_db)
|
|
433
504
|
_queue_consumer = QueueConsumer(
|
|
434
505
|
queue=_recall_queue,
|
|
435
|
-
pool=
|
|
506
|
+
pool=EngineRecallAdapter(engine),
|
|
436
507
|
)
|
|
437
508
|
_queue_consumer.start()
|
|
438
509
|
application.state.queue_consumer = _queue_consumer
|
|
439
510
|
application.state.recall_queue = _recall_queue
|
|
440
511
|
logger.info("QueueConsumer started (recall_queue.db)")
|
|
512
|
+
|
|
513
|
+
# v3.4.36: Start persistent hook daemon (Unix socket server).
|
|
514
|
+
# Eliminates Python subprocess startup for each recall hook call.
|
|
515
|
+
try:
|
|
516
|
+
from superlocalmemory.hooks.hook_daemon import HookDaemon
|
|
517
|
+
_hook_daemon = HookDaemon(queue_db_path=_queue_db)
|
|
518
|
+
_hook_daemon.start()
|
|
519
|
+
application.state.hook_daemon = _hook_daemon
|
|
520
|
+
except Exception as _hd_exc:
|
|
521
|
+
logger.warning("HookDaemon start failed (non-fatal): %s", _hd_exc)
|
|
522
|
+
application.state.hook_daemon = None
|
|
441
523
|
except Exception as _qc_exc:
|
|
442
524
|
logger.warning("QueueConsumer start failed (non-fatal): %s", _qc_exc)
|
|
443
525
|
application.state.queue_consumer = None
|
|
@@ -455,9 +537,9 @@ async def lifespan(application: FastAPI):
|
|
|
455
537
|
from superlocalmemory.core.health_monitor import HealthMonitor
|
|
456
538
|
health_config = getattr(config, 'health', None)
|
|
457
539
|
monitor = HealthMonitor(
|
|
458
|
-
global_rss_budget_mb=getattr(health_config, 'global_rss_budget_mb',
|
|
540
|
+
global_rss_budget_mb=getattr(health_config, 'global_rss_budget_mb', 2500) if health_config else 2500,
|
|
459
541
|
heartbeat_timeout_sec=getattr(health_config, 'heartbeat_timeout_sec', 60) if health_config else 60,
|
|
460
|
-
check_interval_sec=getattr(health_config, 'health_check_interval_sec',
|
|
542
|
+
check_interval_sec=getattr(health_config, 'health_check_interval_sec', 15) if health_config else 15,
|
|
461
543
|
enable_structured_logging=getattr(health_config, 'enable_structured_logging', True) if health_config else True,
|
|
462
544
|
)
|
|
463
545
|
monitor.start()
|
|
@@ -592,6 +674,14 @@ async def lifespan(application: FastAPI):
|
|
|
592
674
|
except Exception as exc: # pragma: no cover — defensive
|
|
593
675
|
logger.warning("bandit_tasks cancel failed: %s", exc)
|
|
594
676
|
|
|
677
|
+
# v3.4.36: Stop HookDaemon (Unix socket server).
|
|
678
|
+
_hd = getattr(application.state, "hook_daemon", None)
|
|
679
|
+
if _hd is not None:
|
|
680
|
+
try:
|
|
681
|
+
_hd.stop()
|
|
682
|
+
except Exception as exc: # pragma: no cover — defensive
|
|
683
|
+
logger.warning("hook_daemon stop failed: %s", exc)
|
|
684
|
+
|
|
595
685
|
# v3.4.26: Stop QueueConsumer (recall_queue.db drainer).
|
|
596
686
|
_qc = getattr(application.state, "queue_consumer", None)
|
|
597
687
|
if _qc is not None:
|
|
@@ -1240,11 +1330,11 @@ def _start_memory_watchdog() -> None:
|
|
|
1240
1330
|
"""
|
|
1241
1331
|
import threading
|
|
1242
1332
|
|
|
1243
|
-
MAX_WORKER_MB =
|
|
1333
|
+
MAX_WORKER_MB = 1800 # V3.4.37: 1.8GB — ONNX nomic-embed is ~1.7GB loaded
|
|
1244
1334
|
|
|
1245
1335
|
def watchdog_loop():
|
|
1246
1336
|
while True:
|
|
1247
|
-
time.sleep(
|
|
1337
|
+
time.sleep(15) # V3.4.37: 15s (was 60s) — catch spikes faster
|
|
1248
1338
|
try:
|
|
1249
1339
|
import psutil
|
|
1250
1340
|
parent = psutil.Process(os.getpid())
|