superlocalmemory 3.4.38 → 3.4.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -1
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/superlocalmemory/__init__.py +1 -1
- package/src/superlocalmemory/cli/commands.py +6 -2
- package/src/superlocalmemory/cli/main.py +6 -0
- package/src/superlocalmemory/core/engine.py +7 -0
- package/src/superlocalmemory/core/maintenance.py +39 -1
- package/src/superlocalmemory/core/recall_pipeline.py +10 -1
- package/src/superlocalmemory/encoding/observation_builder.py +21 -4
- package/src/superlocalmemory/mcp/tools_active.py +29 -6
- package/src/superlocalmemory/retrieval/engine.py +15 -3
- package/src/superlocalmemory/retrieval/spreading_activation.py +30 -13
- package/src/superlocalmemory/server/routes/v3_api.py +3 -1
- package/src/superlocalmemory/server/unified_daemon.py +2 -0
- package/src/superlocalmemory.egg-info/PKG-INFO +0 -663
- package/src/superlocalmemory.egg-info/SOURCES.txt +0 -451
- package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
- package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
- package/src/superlocalmemory.egg-info/requires.txt +0 -59
- package/src/superlocalmemory.egg-info/top_level.txt +0 -1
package/CHANGELOG.md
CHANGED
|
@@ -6,7 +6,53 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
8
|
### [Unreleased]
|
|
9
|
-
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## [3.4.40] - 2026-05-09
|
|
13
|
+
|
|
14
|
+
Recall performance and entity-profile hygiene. Two scaling issues surfaced
|
|
15
|
+
on dense graphs: spreading-activation fan-out grew unbounded as graphs
|
|
16
|
+
exceeded the previous calibration target, and `entity_profiles.knowledge_summary`
|
|
17
|
+
grew unbounded via concatenation. This release bounds both, adds an opt-in
|
|
18
|
+
`--fast` recall mode, and increases the query embedding cache.
|
|
19
|
+
|
|
20
|
+
### Added
|
|
21
|
+
- **`slm recall --fast`** — skips the spreading-activation channel for
|
|
22
|
+
faster response. The other four channels (semantic, BM25, temporal,
|
|
23
|
+
hopfield) still run. Use when an agent needs recall before another
|
|
24
|
+
tool call. Plumbed via a new `extra_disabled_channels` parameter through
|
|
25
|
+
CLI → daemon `/recall` → `MemoryEngine.recall` → `run_recall` →
|
|
26
|
+
`RetrievalEngine.recall`.
|
|
27
|
+
|
|
28
|
+
### Changed
|
|
29
|
+
- **Spreading-activation fan-out is bounded.** `_get_unified_neighbors`
|
|
30
|
+
now applies `ORDER BY weight DESC LIMIT max_neighbors_per_node`
|
|
31
|
+
(default 100). High-degree nodes previously expanded every neighbor
|
|
32
|
+
every iteration. Bounded fan-out matches the SYNAPSE paper's
|
|
33
|
+
sparse-graph assumption while preserving the highest-weight edges.
|
|
34
|
+
- **`SpreadingActivationConfig.top_m`: 20 → 10.** Compromise between the
|
|
35
|
+
SYNAPSE default (7) and the prior dense-graph tuning (20).
|
|
36
|
+
- **`ObservationBuilder._build_summary` is now bounded.** Last 10 facts
|
|
37
|
+
(was 20), 200-char cap per fact, 2048-char total cap. Previously
|
|
38
|
+
`knowledge_summary` grew via concatenation and could exceed tens of
|
|
39
|
+
KB on hub entities, polluting recall with stale text.
|
|
40
|
+
- **Query embedding LRU cache: 64 → 512 entries.** Sub-millisecond cache
|
|
41
|
+
hits versus a 200–2000 ms embedding call. Memory cost is ≈1.5 MB.
|
|
42
|
+
|
|
43
|
+
### Maintenance
|
|
44
|
+
- `run_maintenance` now consolidates over-bound entity summaries via a
|
|
45
|
+
single SQL update on the existing scheduler interval.
|
|
46
|
+
|
|
47
|
+
### Tests
|
|
48
|
+
- 399/399 retrieval + encoding suite passing.
|
|
49
|
+
- 12/12 spreading-activation unit tests passing.
|
|
50
|
+
|
|
51
|
+
### Upgrade notes
|
|
52
|
+
- Existing deployments with bloated `entity_profiles.knowledge_summary`
|
|
53
|
+
rows will see them truncated on the next `slm consolidate` or
|
|
54
|
+
scheduled maintenance run. The truncation is in-place; entity
|
|
55
|
+
identity and `fact_count` are preserved.
|
|
10
56
|
|
|
11
57
|
---
|
|
12
58
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.4.
|
|
3
|
+
"version": "3.4.40",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -905,10 +905,11 @@ def cmd_recall(args: Namespace) -> None:
|
|
|
905
905
|
if is_daemon_running() or ensure_daemon():
|
|
906
906
|
from urllib.parse import quote
|
|
907
907
|
session_id = f"cli:{os.getppid()}"
|
|
908
|
+
fast_qs = "&fast=true" if getattr(args, "fast", False) else ""
|
|
908
909
|
result = daemon_request(
|
|
909
910
|
"GET",
|
|
910
911
|
f"/recall?q={quote(args.query)}&limit={args.limit}"
|
|
911
|
-
f"&session_id={quote(session_id)}",
|
|
912
|
+
f"&session_id={quote(session_id)}{fast_qs}",
|
|
912
913
|
)
|
|
913
914
|
if result and "results" in result:
|
|
914
915
|
# Format daemon response same as engine response
|
|
@@ -937,7 +938,10 @@ def cmd_recall(args: Namespace) -> None:
|
|
|
937
938
|
engine = MemoryEngine(config)
|
|
938
939
|
engine.initialize()
|
|
939
940
|
|
|
940
|
-
response = engine.recall(
|
|
941
|
+
response = engine.recall(
|
|
942
|
+
args.query, limit=args.limit,
|
|
943
|
+
fast=getattr(args, "fast", False),
|
|
944
|
+
)
|
|
941
945
|
except Exception as exc:
|
|
942
946
|
if use_json:
|
|
943
947
|
from superlocalmemory.cli.json_output import json_print
|
|
@@ -187,6 +187,12 @@ def main() -> None:
|
|
|
187
187
|
recall_p.add_argument("query", help="Search query")
|
|
188
188
|
recall_p.add_argument("--limit", type=int, default=10, help="Max results (default 10)")
|
|
189
189
|
recall_p.add_argument("--json", action="store_true", help="Output structured JSON (agent-native)")
|
|
190
|
+
recall_p.add_argument(
|
|
191
|
+
"--fast", action="store_true",
|
|
192
|
+
help="Skip SpreadingActivation 5th channel for sub-second response. "
|
|
193
|
+
"Other 4 channels (semantic, lexical, temporal, structural) still run. "
|
|
194
|
+
"Use when you need recall before a tool call (e.g. before WebSearch).",
|
|
195
|
+
)
|
|
190
196
|
|
|
191
197
|
forget_p = sub.add_parser("forget", help="Delete memories matching a query (fuzzy)")
|
|
192
198
|
forget_p.add_argument("query", help="Query to match for deletion")
|
|
@@ -376,6 +376,7 @@ class MemoryEngine:
|
|
|
376
376
|
mode: Mode | None = None, limit: int = 20,
|
|
377
377
|
agent_id: str = "unknown",
|
|
378
378
|
session_id: str | None = None,
|
|
379
|
+
fast: bool = False,
|
|
379
380
|
) -> RecallResponse:
|
|
380
381
|
"""Recall relevant facts for a query.
|
|
381
382
|
|
|
@@ -385,6 +386,11 @@ class MemoryEngine:
|
|
|
385
386
|
Zero additional latency on the hot path — enqueue is a
|
|
386
387
|
``put_nowait`` and the actual ``pending_outcomes`` INSERT runs
|
|
387
388
|
on a background worker.
|
|
389
|
+
|
|
390
|
+
V3.4.40 (2026-05-09): ``fast=True`` skips the SpreadingActivation
|
|
391
|
+
5th channel for sub-second response. The other 4 channels still
|
|
392
|
+
run. Use when recall must complete before another tool call (e.g.
|
|
393
|
+
agent recall before WebSearch).
|
|
388
394
|
"""
|
|
389
395
|
self._require_full("recall")
|
|
390
396
|
self._ensure_init()
|
|
@@ -402,6 +408,7 @@ class MemoryEngine:
|
|
|
402
408
|
hooks=self._hooks,
|
|
403
409
|
access_log=self._access_log,
|
|
404
410
|
auto_linker=self._auto_linker,
|
|
411
|
+
fast=fast,
|
|
405
412
|
)
|
|
406
413
|
|
|
407
414
|
# S9-DASH-02: enqueue for pending_outcomes. Non-blocking; errors
|
|
@@ -106,6 +106,7 @@ def run_maintenance(
|
|
|
106
106
|
"langevin_updated": 0,
|
|
107
107
|
"fisher_coupled": 0,
|
|
108
108
|
"sheaf_checked": 0,
|
|
109
|
+
"entity_summaries_consolidated": 0, # V3.4.40
|
|
109
110
|
}
|
|
110
111
|
|
|
111
112
|
facts = db.get_all_facts(profile_id)
|
|
@@ -270,9 +271,46 @@ def run_maintenance(
|
|
|
270
271
|
except Exception as exc:
|
|
271
272
|
logger.warning("Sheaf maintenance failed: %s", exc)
|
|
272
273
|
|
|
274
|
+
# 3. V3.4.40: Entity summary consolidation
|
|
275
|
+
# Re-bound any entity_profiles whose knowledge_summary exceeded the cap
|
|
276
|
+
# (e.g. created before V3.4.40, or via a code path that bypassed the
|
|
277
|
+
# bounded _build_summary). Truncates in-place — keeps entity identity,
|
|
278
|
+
# drops bloat. Future writes go through ObservationBuilder.SUMMARY_*
|
|
279
|
+
# bounds and stay clean.
|
|
280
|
+
try:
|
|
281
|
+
consolidated = db.execute(
|
|
282
|
+
"""
|
|
283
|
+
UPDATE entity_profiles
|
|
284
|
+
SET knowledge_summary = SUBSTR(knowledge_summary, 1, 2047) || '…',
|
|
285
|
+
last_updated = datetime('now')
|
|
286
|
+
WHERE LENGTH(knowledge_summary) > 2048
|
|
287
|
+
AND profile_id = ?
|
|
288
|
+
""",
|
|
289
|
+
(profile_id,),
|
|
290
|
+
)
|
|
291
|
+
# SQLite doesn't return rowcount via execute() wrapper consistently.
|
|
292
|
+
# Re-count instead — fast on the small subset.
|
|
293
|
+
rows = db.execute(
|
|
294
|
+
"SELECT COUNT(*) AS c FROM entity_profiles "
|
|
295
|
+
"WHERE LENGTH(knowledge_summary) > 2048 AND profile_id = ?",
|
|
296
|
+
(profile_id,),
|
|
297
|
+
)
|
|
298
|
+
# If any remain >2048 after the UPDATE, log it. Otherwise count
|
|
299
|
+
# how many were truncated by diffing against the prior pass.
|
|
300
|
+
# (Best-effort; non-fatal.)
|
|
301
|
+
if rows:
|
|
302
|
+
remaining = dict(rows[0]).get("c", 0)
|
|
303
|
+
counts["entity_summaries_consolidated"] = max(
|
|
304
|
+
0, counts.get("entity_summaries_consolidated", 0)
|
|
305
|
+
) - remaining
|
|
306
|
+
except Exception as exc:
|
|
307
|
+
logger.warning("Entity summary consolidation failed: %s", exc)
|
|
308
|
+
|
|
273
309
|
logger.info(
|
|
274
|
-
"Maintenance complete: %d backfilled, %d Langevin, %d Fisher-coupled,
|
|
310
|
+
"Maintenance complete: %d backfilled, %d Langevin, %d Fisher-coupled, "
|
|
311
|
+
"%d Sheaf, %d entity-summaries",
|
|
275
312
|
counts["langevin_backfilled"], counts["langevin_updated"],
|
|
276
313
|
counts["fisher_coupled"], counts["sheaf_checked"],
|
|
314
|
+
counts["entity_summaries_consolidated"],
|
|
277
315
|
)
|
|
278
316
|
return counts
|
|
@@ -556,10 +556,15 @@ def run_recall(
|
|
|
556
556
|
hooks: HookRegistry,
|
|
557
557
|
access_log: Any = None,
|
|
558
558
|
auto_linker: Any = None,
|
|
559
|
+
fast: bool = False,
|
|
559
560
|
) -> RecallResponse:
|
|
560
561
|
"""Recall relevant facts for a query.
|
|
561
562
|
|
|
562
563
|
Pipeline: retrieval -> agentic sufficiency (if configured) -> post-recall updates.
|
|
564
|
+
|
|
565
|
+
V3.4.40: ``fast=True`` adds spreading_activation to the per-recall
|
|
566
|
+
extra_disabled_channels set, skipping the 5th channel for sub-second
|
|
567
|
+
response.
|
|
563
568
|
"""
|
|
564
569
|
# Pre-operation hooks
|
|
565
570
|
hook_ctx = {
|
|
@@ -572,7 +577,11 @@ def run_recall(
|
|
|
572
577
|
|
|
573
578
|
m = mode or config.mode
|
|
574
579
|
|
|
575
|
-
|
|
580
|
+
extra_disabled = {"spreading_activation"} if fast else None
|
|
581
|
+
response = retrieval_engine.recall(
|
|
582
|
+
query, profile_id, m, limit,
|
|
583
|
+
extra_disabled_channels=extra_disabled,
|
|
584
|
+
)
|
|
576
585
|
|
|
577
586
|
# Agentic sufficiency verification
|
|
578
587
|
# V3.3.19: Only trigger for multi_hop queries in Mode A (rule-based).
|
|
@@ -139,22 +139,39 @@ class ObservationBuilder:
|
|
|
139
139
|
),
|
|
140
140
|
)
|
|
141
141
|
|
|
142
|
+
# V3.4.40 (2026-05-09): bounded summary to prevent unbounded growth.
|
|
143
|
+
# Pre-V3.4.40 builds concatenated full fact content. Hubs accumulated
|
|
144
|
+
# 60+ KB summaries that polluted recall and bloated entity_profiles.
|
|
145
|
+
_SUMMARY_MAX_FACTS = 10 # Last N facts (was 20)
|
|
146
|
+
_SUMMARY_MAX_CHARS_PER_FACT = 200 # Truncate each fact (NEW)
|
|
147
|
+
_SUMMARY_MAX_TOTAL_CHARS = 2048 # Hard cap on full summary (NEW)
|
|
148
|
+
|
|
142
149
|
def _build_summary(
|
|
143
150
|
self, entity_id: str, fact_ids: list[str], profile_id: str
|
|
144
151
|
) -> str:
|
|
145
152
|
"""Build a knowledge summary from all facts about an entity.
|
|
146
153
|
|
|
147
|
-
|
|
154
|
+
V3.4.40: bounded — last 10 facts, 200 chars each, 2048 total.
|
|
155
|
+
Older builds produced 60KB summaries on hub entities.
|
|
156
|
+
Mode B/C could use LLM rollup for higher-quality compression.
|
|
148
157
|
"""
|
|
149
158
|
facts = []
|
|
150
|
-
for fid in fact_ids[-
|
|
159
|
+
for fid in fact_ids[-self._SUMMARY_MAX_FACTS:]:
|
|
151
160
|
rows = self._db.execute(
|
|
152
161
|
"SELECT content FROM atomic_facts WHERE fact_id = ? AND profile_id = ?",
|
|
153
162
|
(fid, profile_id),
|
|
154
163
|
)
|
|
155
164
|
if rows:
|
|
156
|
-
|
|
165
|
+
content = dict(rows[0])["content"]
|
|
166
|
+
# Truncate per-fact to bound the join below
|
|
167
|
+
if len(content) > self._SUMMARY_MAX_CHARS_PER_FACT:
|
|
168
|
+
content = content[: self._SUMMARY_MAX_CHARS_PER_FACT - 1] + "…"
|
|
169
|
+
facts.append(content)
|
|
157
170
|
|
|
158
171
|
if not facts:
|
|
159
172
|
return ""
|
|
160
|
-
|
|
173
|
+
joined = " | ".join(facts)
|
|
174
|
+
# Hard cap as final safety net
|
|
175
|
+
if len(joined) > self._SUMMARY_MAX_TOTAL_CHARS:
|
|
176
|
+
joined = joined[: self._SUMMARY_MAX_TOTAL_CHARS - 1] + "…"
|
|
177
|
+
return joined
|
|
@@ -17,6 +17,7 @@ Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
19
|
import logging
|
|
20
|
+
import os
|
|
20
21
|
from pathlib import Path
|
|
21
22
|
from typing import Callable
|
|
22
23
|
|
|
@@ -26,17 +27,32 @@ MEMORY_DIR = Path.home() / ".superlocalmemory"
|
|
|
26
27
|
DB_PATH = MEMORY_DIR / "memory.db"
|
|
27
28
|
|
|
28
29
|
|
|
30
|
+
def _get_agent_id(default: str = "mcp_client") -> str:
|
|
31
|
+
"""Resolve the calling agent's ID for attribution.
|
|
32
|
+
|
|
33
|
+
Each Avenger (Claude, Codex, Gemini, Kimi, GLM, Qwen, etc.) sets the
|
|
34
|
+
``SLM_AGENT_ID`` env var in its MCP server config so that memories,
|
|
35
|
+
observations, and registry entries are tagged with the actual source
|
|
36
|
+
agent — not the legacy ``"mcp_client"`` default.
|
|
37
|
+
|
|
38
|
+
v3.4.39+: enables proper cross-Avenger attribution in ``session_init``,
|
|
39
|
+
``observe``, and event emissions.
|
|
40
|
+
"""
|
|
41
|
+
return os.environ.get("SLM_AGENT_ID", default)
|
|
42
|
+
|
|
43
|
+
|
|
29
44
|
def _emit_event(event_type: str, payload: dict | None = None,
|
|
30
|
-
source_agent: str =
|
|
45
|
+
source_agent: str | None = None) -> None: # V3.3.12: see also mcp/shared.py
|
|
31
46
|
"""Emit an event to the EventBus (best-effort, never raises).
|
|
32
47
|
|
|
33
48
|
Dashboard visibility is load-bearing per the v3.4.26 user contract,
|
|
34
49
|
so we log on failure rather than silently dropping the signal.
|
|
35
50
|
"""
|
|
51
|
+
resolved_agent = source_agent if source_agent is not None else _get_agent_id()
|
|
36
52
|
try:
|
|
37
53
|
from superlocalmemory.infra.event_bus import EventBus
|
|
38
54
|
bus = EventBus.get_instance(str(DB_PATH))
|
|
39
|
-
bus.emit(event_type, payload=payload, source_agent=
|
|
55
|
+
bus.emit(event_type, payload=payload, source_agent=resolved_agent,
|
|
40
56
|
source_protocol="mcp")
|
|
41
57
|
except Exception as exc:
|
|
42
58
|
logger.warning("event emit failed: type=%s err=%s", event_type, exc)
|
|
@@ -116,10 +132,11 @@ def register_active_tools(server, get_engine: Callable) -> None:
|
|
|
116
132
|
"session_init feedback_count read failed: %s", exc,
|
|
117
133
|
)
|
|
118
134
|
|
|
119
|
-
# Register agent + emit event
|
|
120
|
-
|
|
135
|
+
# Register agent + emit event (v3.4.39: SLM_AGENT_ID env support)
|
|
136
|
+
agent_id = _get_agent_id()
|
|
137
|
+
_register_agent(agent_id, pid)
|
|
121
138
|
_emit_event("agent.connected", {
|
|
122
|
-
"agent_id":
|
|
139
|
+
"agent_id": agent_id,
|
|
123
140
|
"project_path": project_path,
|
|
124
141
|
"memory_count": len(memories),
|
|
125
142
|
})
|
|
@@ -145,7 +162,7 @@ def register_active_tools(server, get_engine: Callable) -> None:
|
|
|
145
162
|
@server.tool()
|
|
146
163
|
async def observe(
|
|
147
164
|
content: str,
|
|
148
|
-
agent_id: str =
|
|
165
|
+
agent_id: str | None = None,
|
|
149
166
|
) -> dict:
|
|
150
167
|
"""Observe conversation content for automatic memory capture.
|
|
151
168
|
|
|
@@ -155,7 +172,13 @@ def register_active_tools(server, get_engine: Callable) -> None:
|
|
|
155
172
|
|
|
156
173
|
Call this after making decisions, fixing bugs, or expressing preferences.
|
|
157
174
|
The system will NOT store low-confidence or irrelevant content.
|
|
175
|
+
|
|
176
|
+
v3.4.39: ``agent_id`` now defaults to the ``SLM_AGENT_ID`` env var
|
|
177
|
+
(set by each Avenger's MCP config) so observations carry proper
|
|
178
|
+
cross-Avenger attribution.
|
|
158
179
|
"""
|
|
180
|
+
if agent_id is None:
|
|
181
|
+
agent_id = _get_agent_id()
|
|
159
182
|
try:
|
|
160
183
|
from superlocalmemory.hooks.auto_capture import AutoCapture
|
|
161
184
|
from superlocalmemory.hooks.rules_engine import RulesEngine
|
|
@@ -86,8 +86,11 @@ class RetrievalEngine:
|
|
|
86
86
|
self._trust_scorer = trust_scorer
|
|
87
87
|
|
|
88
88
|
# V3.3.4: LRU cache for query embeddings (avoids redundant Ollama API calls)
|
|
89
|
+
# V3.4.40 (2026-05-09): bumped 64 -> 512. Each cached embedding is ~3KB
|
|
90
|
+
# (768 floats × 4 bytes). 512 entries ~1.5MB — trivial memory cost,
|
|
91
|
+
# massive latency win on repeated queries (sub-ms vs 200-2000ms ollama).
|
|
89
92
|
self._query_embedding_cache: dict[str, list[float]] = {}
|
|
90
|
-
self._cache_max_size =
|
|
93
|
+
self._cache_max_size = 512
|
|
91
94
|
|
|
92
95
|
# V3.2: ChannelRegistry for self-registration (Phase 0.5)
|
|
93
96
|
from superlocalmemory.retrieval.channel_registry import ChannelRegistry
|
|
@@ -112,9 +115,17 @@ class RetrievalEngine:
|
|
|
112
115
|
def recall(
|
|
113
116
|
self, query: str, profile_id: str,
|
|
114
117
|
mode: Mode = Mode.A, limit: int = 20,
|
|
118
|
+
*,
|
|
119
|
+
extra_disabled_channels: set[str] | None = None,
|
|
115
120
|
) -> RecallResponse:
|
|
116
|
-
"""Full retrieval pipeline: strategy -> channels -> RRF -> rerank.
|
|
121
|
+
"""Full retrieval pipeline: strategy -> channels -> RRF -> rerank.
|
|
122
|
+
|
|
123
|
+
V3.4.40 (2026-05-09): ``extra_disabled_channels`` allows callers to
|
|
124
|
+
skip specific channels for a single recall (e.g. SpreadingActivation
|
|
125
|
+
for the ``--fast`` CLI flag) without mutating shared config.
|
|
126
|
+
"""
|
|
117
127
|
t0 = time.monotonic()
|
|
128
|
+
self._extra_disabled = set(extra_disabled_channels or ())
|
|
118
129
|
|
|
119
130
|
# 1. Classify query, get adaptive weights
|
|
120
131
|
strat = self._strategy.classify(query, self._base_weights)
|
|
@@ -443,7 +454,8 @@ class RetrievalEngine:
|
|
|
443
454
|
"""Run active retrieval channels. Respects disabled_channels config for ablation."""
|
|
444
455
|
out: dict[str, list[tuple[str, float]]] = {}
|
|
445
456
|
# Skip channels listed in disabled_channels (ablation support)
|
|
446
|
-
|
|
457
|
+
# V3.4.40: union with per-recall extra_disabled set (e.g. --fast skip)
|
|
458
|
+
disabled = set(self._config.disabled_channels) | getattr(self, "_extra_disabled", set())
|
|
447
459
|
|
|
448
460
|
# V3.3.4: Embed query ONCE, reuse for semantic + hopfield channels
|
|
449
461
|
q_emb: list[float] | None = None
|
|
@@ -47,12 +47,18 @@ class SpreadingActivationConfig:
|
|
|
47
47
|
delta: float = 0.5 # Node retention / self-decay per iteration
|
|
48
48
|
spreading_factor: float = 0.8 # S: energy diffusion rate
|
|
49
49
|
# V3.3.20: Recalibrated for SLM graph density (254K edges, 768d).
|
|
50
|
+
# V3.4.40 (2026-05-09): graph grew to 960K edges. top_m=20 caused 5.5s recalls.
|
|
51
|
+
# Reduced to 10 (compromise between SYNAPSE default 7 and the dense-graph 20).
|
|
50
52
|
# SYNAPSE defaults (theta=0.5, top_m=7) were for 384d sparse graphs.
|
|
51
53
|
theta: float = 0.2 # Activation threshold for sigmoid (was 0.5)
|
|
52
|
-
top_m: int =
|
|
54
|
+
top_m: int = 10 # Lateral inhibition: max active nodes (was 20, then 7 originally)
|
|
53
55
|
max_iterations: int = 3 # T: propagation depth
|
|
54
56
|
tau_gate: float = 0.05 # FOK confidence gate (was 0.12)
|
|
55
57
|
enabled: bool = True # Ships enabled by default
|
|
58
|
+
# V3.4.40 (2026-05-09): per-node neighbor fan-out clamp.
|
|
59
|
+
# Hub nodes in dense graphs (5K+ edges) caused unbounded work per expansion.
|
|
60
|
+
# 100 top-weighted neighbors keeps signal, drops long-tail noise.
|
|
61
|
+
max_neighbors_per_node: int = 100
|
|
56
62
|
# v3.4.1: Graph intelligence integration
|
|
57
63
|
use_pagerank_bias: bool = False # Multiply propagation by target PageRank
|
|
58
64
|
community_boost: float = 0.0 # Boost same-community nodes (0.0 = disabled)
|
|
@@ -217,24 +223,35 @@ class SpreadingActivation:
|
|
|
217
223
|
"""Get neighbors from BOTH graph_edges and association_edges.
|
|
218
224
|
|
|
219
225
|
Uses bidirectional UNION query (Section 4 of LLD).
|
|
226
|
+
|
|
227
|
+
V3.4.40 (2026-05-09): clamps fan-out to top
|
|
228
|
+
``max_neighbors_per_node`` by weight. Without this clamp, hub nodes
|
|
229
|
+
with thousands of neighbors caused 5.5s recalls. Bounded fan-out
|
|
230
|
+
matches SYNAPSE's original sparse-graph assumption while preserving
|
|
231
|
+
the highest-signal edges.
|
|
220
232
|
"""
|
|
221
233
|
try:
|
|
222
234
|
rows = self._db.execute(
|
|
223
235
|
"""
|
|
224
|
-
SELECT
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
236
|
+
SELECT neighbor_id, weight FROM (
|
|
237
|
+
SELECT target_id AS neighbor_id, weight FROM graph_edges
|
|
238
|
+
WHERE source_id = ? AND profile_id = ?
|
|
239
|
+
UNION ALL
|
|
240
|
+
SELECT target_fact_id AS neighbor_id, weight FROM association_edges
|
|
241
|
+
WHERE source_fact_id = ? AND profile_id = ?
|
|
242
|
+
UNION ALL
|
|
243
|
+
SELECT source_id AS neighbor_id, weight FROM graph_edges
|
|
244
|
+
WHERE target_id = ? AND profile_id = ?
|
|
245
|
+
UNION ALL
|
|
246
|
+
SELECT source_fact_id AS neighbor_id, weight FROM association_edges
|
|
247
|
+
WHERE target_fact_id = ? AND profile_id = ?
|
|
248
|
+
)
|
|
249
|
+
ORDER BY weight DESC
|
|
250
|
+
LIMIT ?
|
|
235
251
|
""",
|
|
236
252
|
(node_id, profile_id, node_id, profile_id,
|
|
237
|
-
node_id, profile_id, node_id, profile_id
|
|
253
|
+
node_id, profile_id, node_id, profile_id,
|
|
254
|
+
self._config.max_neighbors_per_node),
|
|
238
255
|
)
|
|
239
256
|
return [
|
|
240
257
|
(dict(r)["neighbor_id"], dict(r)["weight"]) for r in rows
|
|
@@ -381,7 +381,9 @@ async def test_provider(request: Request):
|
|
|
381
381
|
resp = c.get(f"{endpoint}/api/tags")
|
|
382
382
|
resp.raise_for_status()
|
|
383
383
|
models = [m["name"] for m in resp.json().get("models", [])]
|
|
384
|
-
|
|
384
|
+
# Match either exact name or base name (strip ":tag" suffix on both sides)
|
|
385
|
+
model_base = model.split(":")[0] if model else ""
|
|
386
|
+
found = any(m == model or m.split(":")[0] == model_base for m in models) if model else len(models) > 0
|
|
385
387
|
return {
|
|
386
388
|
"success": found,
|
|
387
389
|
"message": f"Ollama OK, {len(models)} models" + (f", '{model}' available" if found and model else ""),
|
|
@@ -1082,6 +1082,7 @@ def _register_daemon_routes(application: FastAPI) -> None:
|
|
|
1082
1082
|
request: Request,
|
|
1083
1083
|
q: str = "", query: str = "", limit: int = 20,
|
|
1084
1084
|
session_id: str = "",
|
|
1085
|
+
fast: bool = False,
|
|
1085
1086
|
):
|
|
1086
1087
|
_update_activity()
|
|
1087
1088
|
search_query = q or query # Accept both ?q= and ?query= for compatibility
|
|
@@ -1103,6 +1104,7 @@ def _register_daemon_routes(application: FastAPI) -> None:
|
|
|
1103
1104
|
try:
|
|
1104
1105
|
response = engine.recall(
|
|
1105
1106
|
search_query, limit=limit, session_id=effective_sid,
|
|
1107
|
+
fast=fast,
|
|
1106
1108
|
)
|
|
1107
1109
|
# v3.4.26: return the same field shape as recall_worker so
|
|
1108
1110
|
# MCP processes proxying through the daemon get recall_trace-
|