superlocalmemory 3.3.11 → 3.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +2 -3
- package/src/superlocalmemory/core/config.py +9 -6
- package/src/superlocalmemory/core/embedding_worker.py +5 -1
- package/src/superlocalmemory/core/embeddings.py +3 -1
- package/src/superlocalmemory/core/engine.py +14 -0
- package/src/superlocalmemory/core/engine_wiring.py +16 -1
- package/src/superlocalmemory/core/maintenance_scheduler.py +94 -0
- package/src/superlocalmemory/core/recall_pipeline.py +24 -0
- package/src/superlocalmemory/core/recall_worker.py +22 -4
- package/src/superlocalmemory/core/reranker_worker.py +246 -0
- package/src/superlocalmemory/core/store_pipeline.py +12 -2
- package/src/superlocalmemory/encoding/fact_extractor.py +16 -8
- package/src/superlocalmemory/encoding/graph_builder.py +21 -1
- package/src/superlocalmemory/learning/adaptive.py +2 -2
- package/src/superlocalmemory/math/fisher_quantized.py +8 -4
- package/src/superlocalmemory/math/langevin.py +15 -2
- package/src/superlocalmemory/mcp/resources.py +2 -2
- package/src/superlocalmemory/mcp/shared.py +27 -0
- package/src/superlocalmemory/mcp/tools_active.py +31 -1
- package/src/superlocalmemory/mcp/tools_core.py +15 -9
- package/src/superlocalmemory/mcp/tools_v28.py +2 -2
- package/src/superlocalmemory/mcp/tools_v3.py +3 -0
- package/src/superlocalmemory/mcp/tools_v33.py +68 -7
- package/src/superlocalmemory/retrieval/agentic.py +1 -1
- package/src/superlocalmemory/retrieval/bm25_channel.py +21 -1
- package/src/superlocalmemory/retrieval/engine.py +44 -9
- package/src/superlocalmemory/retrieval/entity_channel.py +6 -0
- package/src/superlocalmemory/retrieval/fusion.py +2 -2
- package/src/superlocalmemory/retrieval/hopfield_channel.py +2 -2
- package/src/superlocalmemory/retrieval/reranker.py +24 -7
- package/src/superlocalmemory/retrieval/semantic_channel.py +2 -2
- package/src/superlocalmemory/retrieval/temporal_channel.py +14 -1
- package/src/superlocalmemory/storage/schema.py +2 -0
|
@@ -142,17 +142,25 @@ class GraphBuilder:
|
|
|
142
142
|
|
|
143
143
|
# -- Edge builders (private) -------------------------------------------
|
|
144
144
|
|
|
145
|
+
# V3.3.12: Cap entity edges per entity to prevent O(n²) explosion.
|
|
146
|
+
# With 500+ facts sharing a popular entity, creating an edge to each
|
|
147
|
+
# produced 44K+ edges and 22-min ingestion. Cap to 20 most recent per entity.
|
|
148
|
+
_MAX_ENTITY_EDGES_PER_ENTITY: int = 20
|
|
149
|
+
|
|
145
150
|
def _build_entity_edges(
|
|
146
151
|
self, new_fact: AtomicFact, profile_id: str,
|
|
147
152
|
) -> list[GraphEdge]:
|
|
148
|
-
"""ENTITY edges: shared canonical entity —
|
|
153
|
+
"""ENTITY edges: shared canonical entity — capped to most recent per entity."""
|
|
149
154
|
if not new_fact.canonical_entities:
|
|
150
155
|
return []
|
|
151
156
|
edges: list[GraphEdge] = []
|
|
152
157
|
seen: set[str] = set()
|
|
153
158
|
|
|
154
159
|
for entity_id in new_fact.canonical_entities:
|
|
160
|
+
entity_edge_count = 0
|
|
155
161
|
for other in self._db.get_facts_by_entity(entity_id, profile_id):
|
|
162
|
+
if entity_edge_count >= self._MAX_ENTITY_EDGES_PER_ENTITY:
|
|
163
|
+
break
|
|
156
164
|
if other.fact_id == new_fact.fact_id or other.fact_id in seen:
|
|
157
165
|
continue
|
|
158
166
|
if self._edge_exists(new_fact.fact_id, other.fact_id, EdgeType.ENTITY, profile_id):
|
|
@@ -163,6 +171,7 @@ class GraphBuilder:
|
|
|
163
171
|
target_id=other.fact_id, edge_type=EdgeType.ENTITY,
|
|
164
172
|
weight=_ENTITY_WEIGHT,
|
|
165
173
|
))
|
|
174
|
+
entity_edge_count += 1
|
|
166
175
|
return edges
|
|
167
176
|
|
|
168
177
|
def _build_temporal_edges(
|
|
@@ -184,7 +193,10 @@ class GraphBuilder:
|
|
|
184
193
|
seen_pairs: set[tuple[str, str]] = set()
|
|
185
194
|
|
|
186
195
|
for entity_id in new_fact.canonical_entities:
|
|
196
|
+
temporal_edge_count = 0
|
|
187
197
|
for other in self._db.get_facts_by_entity(entity_id, profile_id):
|
|
198
|
+
if temporal_edge_count >= self._MAX_ENTITY_EDGES_PER_ENTITY:
|
|
199
|
+
break # V3.3.12: cap temporal edges like entity edges
|
|
188
200
|
if other.fact_id == new_fact.fact_id:
|
|
189
201
|
continue
|
|
190
202
|
other_dt = _parse_date(other.observation_date)
|
|
@@ -212,6 +224,7 @@ class GraphBuilder:
|
|
|
212
224
|
target_id=other.fact_id, edge_type=EdgeType.TEMPORAL,
|
|
213
225
|
weight=weight,
|
|
214
226
|
))
|
|
227
|
+
temporal_edge_count += 1
|
|
215
228
|
# Reverse: other -> new
|
|
216
229
|
if not self._edge_exists(other.fact_id, new_fact.fact_id, EdgeType.TEMPORAL, profile_id):
|
|
217
230
|
edges.append(GraphEdge(
|
|
@@ -248,6 +261,9 @@ class GraphBuilder:
|
|
|
248
261
|
break
|
|
249
262
|
return edges
|
|
250
263
|
|
|
264
|
+
# V3.3.13: Cap causal edges per entity to prevent O(n²) explosion (same as entity/temporal).
|
|
265
|
+
_MAX_CAUSAL_EDGES_PER_ENTITY: int = 20
|
|
266
|
+
|
|
251
267
|
def _build_causal_edges(
|
|
252
268
|
self, new_fact: AtomicFact, profile_id: str,
|
|
253
269
|
) -> list[GraphEdge]:
|
|
@@ -260,7 +276,10 @@ class GraphBuilder:
|
|
|
260
276
|
edges: list[GraphEdge] = []
|
|
261
277
|
seen: set[str] = set()
|
|
262
278
|
for entity_id in new_fact.canonical_entities:
|
|
279
|
+
causal_edge_count = 0
|
|
263
280
|
for other in self._db.get_facts_by_entity(entity_id, profile_id):
|
|
281
|
+
if causal_edge_count >= self._MAX_CAUSAL_EDGES_PER_ENTITY:
|
|
282
|
+
break
|
|
264
283
|
if other.fact_id == new_fact.fact_id or other.fact_id in seen:
|
|
265
284
|
continue
|
|
266
285
|
if self._edge_exists(other.fact_id, new_fact.fact_id, EdgeType.CAUSAL, profile_id):
|
|
@@ -271,6 +290,7 @@ class GraphBuilder:
|
|
|
271
290
|
target_id=new_fact.fact_id, edge_type=EdgeType.CAUSAL,
|
|
272
291
|
weight=_CAUSAL_WEIGHT,
|
|
273
292
|
))
|
|
293
|
+
causal_edge_count += 1
|
|
274
294
|
return edges
|
|
275
295
|
|
|
276
296
|
# -- Helpers -----------------------------------------------------------
|
|
@@ -145,10 +145,14 @@ class FRQADMetric:
|
|
|
145
145
|
if bit_width >= 32:
|
|
146
146
|
return np.array(base_variance, dtype=np.float64)
|
|
147
147
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
148
|
+
# V3.3.12: Paper-correct ADDITIVE variance combination (was multiplicative).
|
|
149
|
+
# sigma²_total = sigma²_obs + sigma²_quant
|
|
150
|
+
# sigma²_quant = Delta²/12 where Delta = 2/2^b (uniform quantization step)
|
|
151
|
+
delta = 2.0 / (2 ** bit_width) # Quantization step size
|
|
152
|
+
sigma_q_sq = (delta ** 2) / 12.0 # Uniform quantization noise variance
|
|
153
|
+
sigma_total = np.asarray(base_variance, dtype=np.float64) + sigma_q_sq
|
|
154
|
+
|
|
155
|
+
return np.clip(sigma_total, self._config.variance_floor, self._config.variance_ceiling)
|
|
152
156
|
|
|
153
157
|
# ------------------------------------------------------------------
|
|
154
158
|
# Core distance (THE novel contribution)
|
|
@@ -178,6 +178,19 @@ class LangevinDynamics:
|
|
|
178
178
|
# --- Drift: -lambda^{-2} * grad_U * dt (Eq. 5 term 1) ---
|
|
179
179
|
drift = -(lam_inv ** 2) * grad * self.dt
|
|
180
180
|
|
|
181
|
+
# --- V3.3.12: Ebbinghaus forgetting drift (Eq. 6 in Paper 3) ---
|
|
182
|
+
# λ(m) = 1/S(m) pushes toward boundary (forgetting) based on memory strength.
|
|
183
|
+
# S(m) is computed from access_count + importance. Higher S → less drift.
|
|
184
|
+
strength = max(0.5, 0.3 * math.log(1.0 + access_count) + 0.4 * importance)
|
|
185
|
+
forget_rate = 1.0 / strength # λ(m)
|
|
186
|
+
# F(ξ) = ξ/||ξ|| points outward (toward boundary = archived zone)
|
|
187
|
+
xi_norm = float(np.linalg.norm(xi))
|
|
188
|
+
if xi_norm > _EPS:
|
|
189
|
+
forget_direction = xi / xi_norm
|
|
190
|
+
else:
|
|
191
|
+
forget_direction = np.zeros(self.dim)
|
|
192
|
+
forgetting_drift = forget_rate * forget_direction * self.dt * 0.1 # Scaled down to prevent instability
|
|
193
|
+
|
|
181
194
|
# --- Curvature correction: 0.5 * T * (d-2) * lambda^{-1} * xi * dt (Eq. 5 term 3) ---
|
|
182
195
|
correction = 0.5 * self.temperature * (self.dim - 2) * lam_inv * xi * self.dt
|
|
183
196
|
|
|
@@ -186,8 +199,8 @@ class LangevinDynamics:
|
|
|
186
199
|
noise = rng.standard_normal(self.dim)
|
|
187
200
|
diffusion = math.sqrt(2.0 * self.temperature * self.dt) * lam_inv * noise
|
|
188
201
|
|
|
189
|
-
# --- Full Euler-Maruyama update (Girolami & Calderhead 2011) ---
|
|
190
|
-
new_xi = xi + drift + correction + diffusion
|
|
202
|
+
# --- Full Euler-Maruyama update with forgetting (Eq. 6, Girolami & Calderhead 2011) ---
|
|
203
|
+
new_xi = xi + drift + forgetting_drift + correction + diffusion
|
|
191
204
|
|
|
192
205
|
# --- Project back into the open ball ---
|
|
193
206
|
new_xi = _project_to_ball(new_xi)
|
|
@@ -197,8 +197,8 @@ def register_resources(server, get_engine: Callable) -> None:
|
|
|
197
197
|
|
|
198
198
|
# Behavioral patterns summary
|
|
199
199
|
try:
|
|
200
|
-
from superlocalmemory.learning.behavioral import
|
|
201
|
-
store =
|
|
200
|
+
from superlocalmemory.learning.behavioral import BehavioralPatternStore
|
|
201
|
+
store = BehavioralPatternStore(engine._db.db_path)
|
|
202
202
|
summary = store.get_summary(pid)
|
|
203
203
|
except Exception:
|
|
204
204
|
summary = {}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3
|
|
4
|
+
|
|
5
|
+
"""Shared MCP utilities — single source of truth for helpers used
|
|
6
|
+
across tools_core, tools_active, tools_v28, tools_v3, tools_v33.
|
|
7
|
+
|
|
8
|
+
V3.3.12: Extracted _emit_event to eliminate code duplication.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
_DB_PATH = Path.home() / ".superlocalmemory" / "memory.db"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def emit_event(event_type: str, payload: dict | None = None,
|
|
19
|
+
source_agent: str = "mcp_client") -> None:
|
|
20
|
+
"""Emit an event to the EventBus (best-effort, never raises)."""
|
|
21
|
+
try:
|
|
22
|
+
from superlocalmemory.infra.event_bus import EventBus
|
|
23
|
+
bus = EventBus.get_instance(_DB_PATH)
|
|
24
|
+
bus.emit(event_type, payload=payload, source_agent=source_agent,
|
|
25
|
+
source_protocol="mcp")
|
|
26
|
+
except Exception:
|
|
27
|
+
pass
|
|
@@ -27,7 +27,7 @@ DB_PATH = MEMORY_DIR / "memory.db"
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def _emit_event(event_type: str, payload: dict | None = None,
|
|
30
|
-
source_agent: str = "mcp_client") -> None:
|
|
30
|
+
source_agent: str = "mcp_client") -> None: # V3.3.12: see also mcp/shared.py
|
|
31
31
|
"""Emit an event to the EventBus (best-effort, never raises)."""
|
|
32
32
|
try:
|
|
33
33
|
from superlocalmemory.infra.event_bus import EventBus
|
|
@@ -253,3 +253,33 @@ def register_active_tools(server, get_engine: Callable) -> None:
|
|
|
253
253
|
except Exception as exc:
|
|
254
254
|
logger.exception("report_feedback failed")
|
|
255
255
|
return {"success": False, "error": str(exc)}
|
|
256
|
+
|
|
257
|
+
# ------------------------------------------------------------------
|
|
258
|
+
# close_session — V3.3.12: Expose session closure via MCP
|
|
259
|
+
# ------------------------------------------------------------------
|
|
260
|
+
|
|
261
|
+
@server.tool()
|
|
262
|
+
async def close_session(session_id: str = "") -> dict:
|
|
263
|
+
"""Close the current session and create temporal summary events.
|
|
264
|
+
|
|
265
|
+
Aggregates facts from the session into per-entity temporal summaries,
|
|
266
|
+
enabling temporal queries like "What happened in session X?"
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
session_id: Session to close. Defaults to the most recent session.
|
|
270
|
+
"""
|
|
271
|
+
try:
|
|
272
|
+
engine = get_engine()
|
|
273
|
+
pid = engine.profile_id
|
|
274
|
+
sid = session_id or getattr(engine, '_last_session_id', '')
|
|
275
|
+
if not sid:
|
|
276
|
+
return {"success": False, "error": "No session_id provided"}
|
|
277
|
+
count = engine.close_session(sid)
|
|
278
|
+
return {
|
|
279
|
+
"success": True,
|
|
280
|
+
"session_id": sid,
|
|
281
|
+
"summary_events_created": count,
|
|
282
|
+
}
|
|
283
|
+
except Exception as exc:
|
|
284
|
+
logger.exception("close_session failed")
|
|
285
|
+
return {"success": False, "error": str(exc)}
|
|
@@ -139,6 +139,8 @@ def register_core_tools(server, get_engine: Callable) -> None:
|
|
|
139
139
|
"results": result.get("results", []),
|
|
140
140
|
"count": result.get("result_count", 0),
|
|
141
141
|
"query_type": result.get("query_type", "unknown"),
|
|
142
|
+
"channel_weights": result.get("channel_weights", {}),
|
|
143
|
+
"retrieval_time_ms": result.get("retrieval_time_ms", 0),
|
|
142
144
|
}
|
|
143
145
|
return {"success": False, "error": result.get("error", "Recall failed")}
|
|
144
146
|
except Exception as exc:
|
|
@@ -280,11 +282,15 @@ def register_core_tools(server, get_engine: Callable) -> None:
|
|
|
280
282
|
engine.profile_id = profile_id
|
|
281
283
|
|
|
282
284
|
# Persist to both config stores so CLI and Dashboard stay in sync
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
285
|
+
try:
|
|
286
|
+
from superlocalmemory.server.routes.helpers import (
|
|
287
|
+
ensure_profile_in_db, set_active_profile_everywhere,
|
|
288
|
+
)
|
|
289
|
+
ensure_profile_in_db(profile_id)
|
|
290
|
+
set_active_profile_everywhere(profile_id)
|
|
291
|
+
except ImportError:
|
|
292
|
+
# Dashboard not installed — profile switch still works for MCP/CLI
|
|
293
|
+
logger.debug("Dashboard routes not available, profile set in engine only")
|
|
288
294
|
|
|
289
295
|
return {
|
|
290
296
|
"success": True,
|
|
@@ -337,8 +343,8 @@ def register_core_tools(server, get_engine: Callable) -> None:
|
|
|
337
343
|
"""Get learned behavioral patterns (interests, refinements, archival habits)."""
|
|
338
344
|
try:
|
|
339
345
|
engine = get_engine()
|
|
340
|
-
from superlocalmemory.learning.behavioral import
|
|
341
|
-
store =
|
|
346
|
+
from superlocalmemory.learning.behavioral import BehavioralPatternStore
|
|
347
|
+
store = BehavioralPatternStore(engine._db.db_path)
|
|
342
348
|
ptype = pattern_type if pattern_type else None
|
|
343
349
|
patterns = store.get_patterns(
|
|
344
350
|
engine.profile_id, pattern_type=ptype, limit=limit,
|
|
@@ -353,8 +359,8 @@ def register_core_tools(server, get_engine: Callable) -> None:
|
|
|
353
359
|
"""Correct or annotate a learned behavioral pattern to improve retrieval."""
|
|
354
360
|
try:
|
|
355
361
|
engine = get_engine()
|
|
356
|
-
from superlocalmemory.learning.behavioral import
|
|
357
|
-
store =
|
|
362
|
+
from superlocalmemory.learning.behavioral import BehavioralPatternStore
|
|
363
|
+
store = BehavioralPatternStore(engine._db.db_path)
|
|
358
364
|
store.record(
|
|
359
365
|
engine.profile_id,
|
|
360
366
|
pattern_type="correction",
|
|
@@ -181,8 +181,8 @@ def register_v28_tools(server, get_engine: Callable) -> None:
|
|
|
181
181
|
"""
|
|
182
182
|
try:
|
|
183
183
|
engine = get_engine()
|
|
184
|
-
from superlocalmemory.learning.behavioral import
|
|
185
|
-
store =
|
|
184
|
+
from superlocalmemory.learning.behavioral import BehavioralPatternStore
|
|
185
|
+
store = BehavioralPatternStore(engine._db.db_path)
|
|
186
186
|
patterns = store.get_patterns(engine.profile_id, limit=limit)
|
|
187
187
|
summary = store.get_summary(engine.profile_id)
|
|
188
188
|
return {
|
|
@@ -228,6 +228,7 @@ def register_v3_tools(server, get_engine: Callable) -> None:
|
|
|
228
228
|
|
|
229
229
|
facts = engine._db.get_all_facts(pid)[:limit]
|
|
230
230
|
all_contradictions: list[dict] = []
|
|
231
|
+
errors_count = 0
|
|
231
232
|
for fact in facts:
|
|
232
233
|
if not fact.embedding or not fact.canonical_entities:
|
|
233
234
|
continue
|
|
@@ -243,11 +244,13 @@ def register_v3_tools(server, get_engine: Callable) -> None:
|
|
|
243
244
|
"content_a": fact.content[:80],
|
|
244
245
|
})
|
|
245
246
|
except Exception:
|
|
247
|
+
errors_count += 1
|
|
246
248
|
continue
|
|
247
249
|
|
|
248
250
|
return {
|
|
249
251
|
"success": True,
|
|
250
252
|
"facts_checked": len(facts),
|
|
253
|
+
"facts_errored": errors_count,
|
|
251
254
|
"contradictions": all_contradictions[:50],
|
|
252
255
|
"total_contradictions": len(all_contradictions),
|
|
253
256
|
}
|
|
@@ -27,7 +27,7 @@ DB_PATH = MEMORY_DIR / "memory.db"
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def _emit_event(event_type: str, payload: dict | None = None,
|
|
30
|
-
source_agent: str = "mcp_client") -> None:
|
|
30
|
+
source_agent: str = "mcp_client") -> None: # V3.3.12: see also mcp/shared.py
|
|
31
31
|
"""Emit an event to the EventBus (best-effort, never raises)."""
|
|
32
32
|
try:
|
|
33
33
|
from superlocalmemory.infra.event_bus import EventBus
|
|
@@ -76,8 +76,15 @@ def register_v33_tools(server, get_engine: Callable) -> None:
|
|
|
76
76
|
)
|
|
77
77
|
|
|
78
78
|
if dry_run:
|
|
79
|
-
#
|
|
80
|
-
|
|
79
|
+
# Dry run: compute retention stats without applying changes
|
|
80
|
+
from superlocalmemory.math.ebbinghaus import EbbinghausCurve as _EC
|
|
81
|
+
facts = engine._db.get_all_facts(pid)
|
|
82
|
+
zones = {"active": 0, "warm": 0, "cold": 0, "archive": 0, "forgotten": 0}
|
|
83
|
+
for f in facts:
|
|
84
|
+
r = ebbinghaus.compute_retention(f.access_count or 0, f.importance or 0.5, 0, 0.0)
|
|
85
|
+
zone = ebbinghaus.classify_zone(r)
|
|
86
|
+
zones[zone] = zones.get(zone, 0) + 1
|
|
87
|
+
result = {"total": len(facts), "transitions": 0, "dry_run_zones": zones}
|
|
81
88
|
else:
|
|
82
89
|
result = scheduler.run_decay_cycle(pid, force=True)
|
|
83
90
|
|
|
@@ -137,8 +144,9 @@ def register_v33_tools(server, get_engine: Callable) -> None:
|
|
|
137
144
|
)
|
|
138
145
|
|
|
139
146
|
if dry_run:
|
|
140
|
-
#
|
|
141
|
-
|
|
147
|
+
# Dry run: report current quantization state without changes
|
|
148
|
+
facts = engine._db.get_all_facts(pid)
|
|
149
|
+
result = {"total": len(facts), "would_quantize": 0, "dry_run": True}
|
|
142
150
|
else:
|
|
143
151
|
result = scheduler.run_eap_cycle(pid)
|
|
144
152
|
|
|
@@ -185,13 +193,13 @@ def register_v33_tools(server, get_engine: Callable) -> None:
|
|
|
185
193
|
|
|
186
194
|
_emit_event("ccq.consolidation_complete", {
|
|
187
195
|
"profile_id": pid,
|
|
188
|
-
"
|
|
196
|
+
"clusters_processed": result.clusters_processed,
|
|
189
197
|
"blocks_created": result.blocks_created,
|
|
190
198
|
})
|
|
191
199
|
|
|
192
200
|
return {
|
|
193
201
|
"success": True,
|
|
194
|
-
"
|
|
202
|
+
"clusters_processed": result.clusters_processed,
|
|
195
203
|
"blocks_created": result.blocks_created,
|
|
196
204
|
"facts_archived": result.facts_archived,
|
|
197
205
|
"compression_ratio": round(result.compression_ratio, 3),
|
|
@@ -349,3 +357,56 @@ def register_v33_tools(server, get_engine: Callable) -> None:
|
|
|
349
357
|
except Exception as exc:
|
|
350
358
|
logger.exception("get_retention_stats tool failed")
|
|
351
359
|
return {"success": False, "error": str(exc)}
|
|
360
|
+
|
|
361
|
+
# ------------------------------------------------------------------
|
|
362
|
+
# 7. run_maintenance — V3.3.12: Combined periodic maintenance cycle
|
|
363
|
+
# ------------------------------------------------------------------
|
|
364
|
+
@server.tool()
|
|
365
|
+
async def run_maintenance(profile_id: str = "") -> dict:
|
|
366
|
+
"""Run all periodic maintenance tasks in a single call.
|
|
367
|
+
|
|
368
|
+
Combines Langevin dynamics stepping, Ebbinghaus forgetting decay,
|
|
369
|
+
and behavioral pattern mining into one convenient maintenance cycle.
|
|
370
|
+
Clients should call this periodically (e.g., at session end).
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
profile_id: Profile to maintain (default: active profile).
|
|
374
|
+
"""
|
|
375
|
+
try:
|
|
376
|
+
engine = get_engine()
|
|
377
|
+
pid = profile_id or engine.profile_id
|
|
378
|
+
results = {}
|
|
379
|
+
|
|
380
|
+
# 1. Langevin dynamics step (lifecycle evolution)
|
|
381
|
+
try:
|
|
382
|
+
from superlocalmemory.core.maintenance import run_maintenance as _run_maint
|
|
383
|
+
maint_result = _run_maint(engine._db, engine._config, pid)
|
|
384
|
+
results["langevin"] = {"updated": maint_result.get("updated", 0)}
|
|
385
|
+
except Exception as exc:
|
|
386
|
+
results["langevin"] = {"error": str(exc)}
|
|
387
|
+
|
|
388
|
+
# 2. Ebbinghaus forgetting decay
|
|
389
|
+
try:
|
|
390
|
+
from superlocalmemory.math.ebbinghaus import EbbinghausCurve
|
|
391
|
+
from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
|
|
392
|
+
ebbinghaus = EbbinghausCurve(engine._config.forgetting)
|
|
393
|
+
scheduler = ForgettingScheduler(engine._db, ebbinghaus, engine._config.forgetting)
|
|
394
|
+
decay_result = scheduler.run_decay_cycle(pid, force=False)
|
|
395
|
+
results["forgetting"] = decay_result
|
|
396
|
+
except Exception as exc:
|
|
397
|
+
results["forgetting"] = {"error": str(exc)}
|
|
398
|
+
|
|
399
|
+
# 3. Behavioral pattern mining
|
|
400
|
+
try:
|
|
401
|
+
from superlocalmemory.learning.consolidation_worker import ConsolidationWorker
|
|
402
|
+
cw = ConsolidationWorker(engine._db, engine._config)
|
|
403
|
+
patterns = cw._generate_patterns(pid)
|
|
404
|
+
results["behavioral"] = {"patterns_mined": len(patterns)}
|
|
405
|
+
except Exception as exc:
|
|
406
|
+
results["behavioral"] = {"error": str(exc)}
|
|
407
|
+
|
|
408
|
+
return {"success": True, "profile": pid, **results}
|
|
409
|
+
|
|
410
|
+
except Exception as exc:
|
|
411
|
+
logger.exception("run_maintenance failed")
|
|
412
|
+
return {"success": False, "error": str(exc)}
|
|
@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__)
|
|
|
31
31
|
|
|
32
32
|
_MAX_ROUNDS = 2
|
|
33
33
|
_SUFFICIENCY_SCORE_THRESHOLD = 0.6
|
|
34
|
-
_SKIP_TYPES = frozenset() #
|
|
34
|
+
_SKIP_TYPES = frozenset({"temporal"}) # S15: agentic harms temporal queries
|
|
35
35
|
|
|
36
36
|
_SUFFICIENCY_SYSTEM = (
|
|
37
37
|
"You evaluate whether retrieved context is sufficient to answer a query. "
|
|
@@ -68,6 +68,7 @@ class BM25Channel:
|
|
|
68
68
|
self._corpus: list[list[str]] = []
|
|
69
69
|
self._fact_ids: list[str] = []
|
|
70
70
|
self._fact_id_set: set[str] = set()
|
|
71
|
+
self._raw_texts: list[str] = [] # V3.3.12: raw content for phrase matching
|
|
71
72
|
self._bm25: BM25Plus | None = None
|
|
72
73
|
self._dirty: bool = False
|
|
73
74
|
self._loaded_profiles: set[str] = set()
|
|
@@ -96,15 +97,24 @@ class BM25Channel:
|
|
|
96
97
|
self._corpus.append(tokens)
|
|
97
98
|
self._fact_ids.append(fact.fact_id)
|
|
98
99
|
self._fact_id_set.add(fact.fact_id)
|
|
100
|
+
self._raw_texts.append(fact.content)
|
|
99
101
|
# Persist for next cold start
|
|
100
102
|
self._db.store_bm25_tokens(fact.fact_id, profile_id, tokens)
|
|
101
103
|
else:
|
|
104
|
+
# Load raw texts for phrase matching (V3.3.12)
|
|
105
|
+
fact_content_map = {}
|
|
106
|
+
try:
|
|
107
|
+
facts = self._db.get_all_facts(profile_id)
|
|
108
|
+
fact_content_map = {f.fact_id: f.content for f in facts}
|
|
109
|
+
except Exception:
|
|
110
|
+
pass
|
|
102
111
|
for fid, tokens in token_map.items():
|
|
103
112
|
if fid in self._fact_id_set:
|
|
104
113
|
continue
|
|
105
114
|
self._corpus.append(tokens)
|
|
106
115
|
self._fact_ids.append(fid)
|
|
107
116
|
self._fact_id_set.add(fid)
|
|
117
|
+
self._raw_texts.append(fact_content_map.get(fid, ""))
|
|
108
118
|
|
|
109
119
|
self._dirty = True
|
|
110
120
|
self._loaded_profiles.add(profile_id)
|
|
@@ -128,6 +138,9 @@ class BM25Channel:
|
|
|
128
138
|
self._corpus.append(tokens)
|
|
129
139
|
self._fact_ids.append(fact_id)
|
|
130
140
|
self._fact_id_set.add(fact_id)
|
|
141
|
+
if not hasattr(self, '_raw_texts'):
|
|
142
|
+
self._raw_texts = []
|
|
143
|
+
self._raw_texts.append(content)
|
|
131
144
|
self._dirty = True
|
|
132
145
|
|
|
133
146
|
# Persist for cold start
|
|
@@ -168,9 +181,16 @@ class BM25Channel:
|
|
|
168
181
|
scores = self._bm25.get_scores(query_tokens)
|
|
169
182
|
|
|
170
183
|
scored: list[tuple[str, float]] = []
|
|
184
|
+
# V3.3.12: Exact phrase bonus — boost facts containing the full query phrase
|
|
185
|
+
query_lower = query.lower().strip()
|
|
171
186
|
for i, score in enumerate(scores):
|
|
172
187
|
if score > 0.0:
|
|
173
|
-
|
|
188
|
+
bonus = score
|
|
189
|
+
# Exact phrase match bonus: if the query appears as a substring in the document
|
|
190
|
+
if len(query_lower) >= 5 and i < len(self._raw_texts):
|
|
191
|
+
if query_lower in self._raw_texts[i].lower():
|
|
192
|
+
bonus *= 1.5 # 50% boost for exact phrase match
|
|
193
|
+
scored.append((self._fact_ids[i], bonus))
|
|
174
194
|
|
|
175
195
|
scored.sort(key=lambda x: x[1], reverse=True)
|
|
176
196
|
return scored[:top_k]
|
|
@@ -75,6 +75,8 @@ class RetrievalEngine:
|
|
|
75
75
|
self._temporal: TemporalChannel | None = channels.get("temporal")
|
|
76
76
|
# Phase G: Hopfield channel (6th)
|
|
77
77
|
self._hopfield: HopfieldChannel | None = channels.get("hopfield")
|
|
78
|
+
# Phase 3: Spreading Activation channel
|
|
79
|
+
self._spreading_activation = channels.get("spreading_activation")
|
|
78
80
|
self._embedder = embedder
|
|
79
81
|
self._reranker = reranker
|
|
80
82
|
self._strategy = strategy or QueryStrategyClassifier()
|
|
@@ -101,6 +103,11 @@ class RetrievalEngine:
|
|
|
101
103
|
# Phase G: Hopfield channel (6th) — needs embedding input
|
|
102
104
|
if self._hopfield is not None:
|
|
103
105
|
self._registry.register_channel("hopfield", self._hopfield, needs_embedding=True)
|
|
106
|
+
# Phase 3: Spreading Activation (5th channel) — needs embedding input
|
|
107
|
+
if self._spreading_activation is not None:
|
|
108
|
+
self._registry.register_channel(
|
|
109
|
+
"spreading_activation", self._spreading_activation, needs_embedding=True,
|
|
110
|
+
)
|
|
104
111
|
|
|
105
112
|
def recall(
|
|
106
113
|
self, query: str, profile_id: str,
|
|
@@ -139,7 +146,7 @@ class RetrievalEngine:
|
|
|
139
146
|
fused = weighted_rrf(ch_results, strat.weights, k=self._config.rrf_k)
|
|
140
147
|
|
|
141
148
|
# Bridge discovery for multi-hop queries
|
|
142
|
-
if self._bridge is not None and strat.query_type
|
|
149
|
+
if self._bridge is not None and strat.query_type in ("multi_hop", "entity", "factual", "general"):
|
|
143
150
|
try:
|
|
144
151
|
seed_ids = [fr.fact_id for fr in fused[:10]]
|
|
145
152
|
bridges = self._bridge.discover(seed_ids, profile_id, max_bridges=10)
|
|
@@ -221,6 +228,7 @@ class RetrievalEngine:
|
|
|
221
228
|
needs_embedding = (
|
|
222
229
|
(self._semantic is not None and "semantic" not in disabled)
|
|
223
230
|
or (self._hopfield is not None and "hopfield" not in disabled)
|
|
231
|
+
or (self._spreading_activation is not None and "spreading_activation" not in disabled)
|
|
224
232
|
)
|
|
225
233
|
if needs_embedding:
|
|
226
234
|
try:
|
|
@@ -269,6 +277,23 @@ class RetrievalEngine:
|
|
|
269
277
|
except Exception as exc:
|
|
270
278
|
logger.warning("Hopfield channel: %s", exc)
|
|
271
279
|
|
|
280
|
+
# Phase 3: Spreading Activation channel (5th) — graph-based associative recall
|
|
281
|
+
if self._spreading_activation is not None and q_emb is not None and "spreading_activation" not in disabled:
|
|
282
|
+
try:
|
|
283
|
+
r = self._spreading_activation.search(q_emb, profile_id, self._config.bm25_top_k)
|
|
284
|
+
if r:
|
|
285
|
+
out["spreading_activation"] = r
|
|
286
|
+
except Exception as exc:
|
|
287
|
+
logger.warning("Spreading activation channel: %s", exc)
|
|
288
|
+
|
|
289
|
+
# Apply registered post-retrieval filters (forgetting filter, etc.)
|
|
290
|
+
if hasattr(self, '_registry') and self._registry._filters:
|
|
291
|
+
for fn in self._registry._filters:
|
|
292
|
+
try:
|
|
293
|
+
out = fn(out, profile_id, None)
|
|
294
|
+
except Exception as exc:
|
|
295
|
+
logger.warning("Post-retrieval filter failed: %s", exc)
|
|
296
|
+
|
|
272
297
|
return out
|
|
273
298
|
|
|
274
299
|
# -- Fact loading -------------------------------------------------------
|
|
@@ -336,12 +361,24 @@ class RetrievalEngine:
|
|
|
336
361
|
|
|
337
362
|
score_map = {fact.fact_id: score for fact, score in scored}
|
|
338
363
|
|
|
364
|
+
# Min-max normalize CE scores to [0, 1] within the batch instead of
|
|
365
|
+
# sigmoid (which compresses the useful discrimination range).
|
|
366
|
+
ce_values = list(score_map.values())
|
|
367
|
+
ce_min = min(ce_values) if ce_values else 0.0
|
|
368
|
+
ce_max = max(ce_values) if ce_values else 1.0
|
|
369
|
+
ce_range = ce_max - ce_min if ce_max > ce_min else 1.0
|
|
370
|
+
|
|
371
|
+
# Also normalize RRF scores so both terms contribute meaningfully
|
|
372
|
+
rrf_values = [fr.fused_score for fr in fused]
|
|
373
|
+
rrf_max = max(rrf_values) if rrf_values else 1.0
|
|
374
|
+
rrf_max = rrf_max if rrf_max > 0 else 1.0
|
|
375
|
+
|
|
339
376
|
updated = [
|
|
340
377
|
FusionResult(
|
|
341
378
|
fact_id=fr.fact_id,
|
|
342
379
|
fused_score=(
|
|
343
|
-
alpha *
|
|
344
|
-
+ (1.0 - alpha) * fr.fused_score
|
|
380
|
+
alpha * ((score_map.get(fr.fact_id, ce_min) - ce_min) / ce_range)
|
|
381
|
+
+ (1.0 - alpha) * (fr.fused_score / rrf_max)
|
|
345
382
|
),
|
|
346
383
|
channel_ranks=fr.channel_ranks,
|
|
347
384
|
channel_scores=fr.channel_scores,
|
|
@@ -425,12 +462,10 @@ class RetrievalEngine:
|
|
|
425
462
|
# due to BM25 name-matching (greetings like "Hey Caroline!" score high
|
|
426
463
|
# on BM25 but have zero retrieval value)
|
|
427
464
|
content_len = len(fact.content.strip())
|
|
428
|
-
if content_len <
|
|
429
|
-
quality = 0.
|
|
430
|
-
elif content_len <
|
|
431
|
-
quality = 0.
|
|
432
|
-
elif content_len < 80:
|
|
433
|
-
quality = 0.8
|
|
465
|
+
if content_len < 10:
|
|
466
|
+
quality = 0.3
|
|
467
|
+
elif content_len < 25:
|
|
468
|
+
quality = 0.7
|
|
434
469
|
else:
|
|
435
470
|
quality = 1.0
|
|
436
471
|
|
|
@@ -67,6 +67,12 @@ def extract_query_entities(query: str) -> list[str]:
|
|
|
67
67
|
_add(m.group(0))
|
|
68
68
|
for m in re.finditer(r'"([^"]+)"', query):
|
|
69
69
|
_add(m.group(1).strip())
|
|
70
|
+
# Also extract multi-word capitalized sequences (e.g. "New York", "San Francisco")
|
|
71
|
+
for m in re.finditer(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b', query):
|
|
72
|
+
_add(m.group(1))
|
|
73
|
+
# Extract all-caps abbreviations (e.g. NYU, MIT, UCLA) — min 2 chars
|
|
74
|
+
for m in re.finditer(r'\b([A-Z]{2,})\b', query):
|
|
75
|
+
_add(m.group(1))
|
|
70
76
|
|
|
71
77
|
return candidates
|
|
72
78
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
"""SuperLocalMemory V3 — Weighted Reciprocal Rank Fusion.
|
|
6
6
|
|
|
7
|
-
Single-pass RRF with k=
|
|
7
|
+
Single-pass RRF with k=15 for sharp rank discrimination on small candidate pools.
|
|
8
8
|
V1 had triple re-fusion which destroyed rankings — fixed in V2.
|
|
9
9
|
|
|
10
10
|
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
@@ -27,7 +27,7 @@ class FusionResult:
|
|
|
27
27
|
def weighted_rrf(
|
|
28
28
|
channels: dict[str, list[tuple[str, float]]],
|
|
29
29
|
weights: dict[str, float],
|
|
30
|
-
k: int =
|
|
30
|
+
k: int = 15,
|
|
31
31
|
max_rank_penalty: int = 1000,
|
|
32
32
|
) -> list[FusionResult]:
|
|
33
33
|
"""Fuse ranked lists via Weighted Reciprocal Rank Fusion.
|
|
@@ -288,8 +288,8 @@ class HopfieldChannel:
|
|
|
288
288
|
):
|
|
289
289
|
return (self._cached_matrix, self._cached_fact_ids)
|
|
290
290
|
|
|
291
|
-
# Step 2: Load
|
|
292
|
-
facts = self._db.get_all_facts(profile_id)
|
|
291
|
+
# Step 2: Load facts (V3.3.12: cap to most recent 5000 to bound memory)
|
|
292
|
+
facts = self._db.get_all_facts(profile_id)[:5000]
|
|
293
293
|
if not facts:
|
|
294
294
|
return (None, [])
|
|
295
295
|
|