superlocalmemory 3.4.38 → 3.4.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -6,7 +6,53 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
8
  ### [Unreleased]
9
- - **License:** Changed from Elastic-2.0 to AGPL-3.0-or-later to protect research IP
9
+
10
+ ---
11
+
12
+ ## [3.4.40] - 2026-05-09
13
+
14
+ Recall performance and entity-profile hygiene. Two scaling issues surfaced
15
+ on dense graphs: spreading-activation fan-out grew unbounded as graphs
16
+ exceeded the previous calibration target, and `entity_profiles.knowledge_summary`
17
+ grew unbounded via concatenation. This release bounds both, adds an opt-in
18
+ `--fast` recall mode, and increases the query embedding cache.
19
+
20
+ ### Added
21
+ - **`slm recall --fast`** — skips the spreading-activation channel for
22
+ faster response. The other four channels (semantic, BM25, temporal,
23
+ hopfield) still run. Use when an agent needs recall before another
24
+ tool call. Plumbed via a new `extra_disabled_channels` parameter through
25
+ CLI → daemon `/recall` → `MemoryEngine.recall` → `run_recall` →
26
+ `RetrievalEngine.recall`.
27
+
28
+ ### Changed
29
+ - **Spreading-activation fan-out is bounded.** `_get_unified_neighbors`
30
+ now applies `ORDER BY weight DESC LIMIT max_neighbors_per_node`
31
+ (default 100). High-degree nodes previously expanded every neighbor
32
+ every iteration. Bounded fan-out matches the SYNAPSE paper's
33
+ sparse-graph assumption while preserving the highest-weight edges.
34
+ - **`SpreadingActivationConfig.top_m`: 20 → 10.** Compromise between the
35
+ SYNAPSE default (7) and the prior dense-graph tuning (20).
36
+ - **`ObservationBuilder._build_summary` is now bounded.** Last 10 facts
37
+ (was 20), 200-char cap per fact, 2048-char total cap. Previously
38
+ `knowledge_summary` grew via concatenation and could exceed tens of
39
+ KB on hub entities, polluting recall with stale text.
40
+ - **Query embedding LRU cache: 64 → 512 entries.** Sub-millisecond cache
41
+ hits versus a 200–2000 ms embedding call. Memory cost is ≈1.5 MB.
42
+
43
+ ### Maintenance
44
+ - `run_maintenance` now consolidates over-bound entity summaries via a
45
+ single SQL update on the existing scheduler interval.
46
+
47
+ ### Tests
48
+ - 399/399 retrieval + encoding suite passing.
49
+ - 12/12 spreading-activation unit tests passing.
50
+
51
+ ### Upgrade notes
52
+ - Existing deployments with bloated `entity_profiles.knowledge_summary`
53
+ rows will see them truncated on the next `slm consolidate` or
54
+ scheduled maintenance run. The truncation is in-place; entity
55
+ identity and `fact_count` are preserved.
10
56
 
11
57
  ---
12
58
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.4.38",
3
+ "version": "3.4.40",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.4.38"
3
+ version = "3.4.40"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "AGPL-3.0-or-later"}
@@ -1,3 +1,3 @@
1
1
  """SuperLocalMemory — information-geometric agent memory."""
2
2
 
3
- __version__ = "3.4.38"
3
+ __version__ = "3.4.39"
@@ -905,10 +905,11 @@ def cmd_recall(args: Namespace) -> None:
905
905
  if is_daemon_running() or ensure_daemon():
906
906
  from urllib.parse import quote
907
907
  session_id = f"cli:{os.getppid()}"
908
+ fast_qs = "&fast=true" if getattr(args, "fast", False) else ""
908
909
  result = daemon_request(
909
910
  "GET",
910
911
  f"/recall?q={quote(args.query)}&limit={args.limit}"
911
- f"&session_id={quote(session_id)}",
912
+ f"&session_id={quote(session_id)}{fast_qs}",
912
913
  )
913
914
  if result and "results" in result:
914
915
  # Format daemon response same as engine response
@@ -937,7 +938,10 @@ def cmd_recall(args: Namespace) -> None:
937
938
  engine = MemoryEngine(config)
938
939
  engine.initialize()
939
940
 
940
- response = engine.recall(args.query, limit=args.limit)
941
+ response = engine.recall(
942
+ args.query, limit=args.limit,
943
+ fast=getattr(args, "fast", False),
944
+ )
941
945
  except Exception as exc:
942
946
  if use_json:
943
947
  from superlocalmemory.cli.json_output import json_print
@@ -187,6 +187,12 @@ def main() -> None:
187
187
  recall_p.add_argument("query", help="Search query")
188
188
  recall_p.add_argument("--limit", type=int, default=10, help="Max results (default 10)")
189
189
  recall_p.add_argument("--json", action="store_true", help="Output structured JSON (agent-native)")
190
+ recall_p.add_argument(
191
+ "--fast", action="store_true",
192
+ help="Skip SpreadingActivation 5th channel for sub-second response. "
193
+ "Other 4 channels (semantic, lexical, temporal, structural) still run. "
194
+ "Use when you need recall before a tool call (e.g. before WebSearch).",
195
+ )
190
196
 
191
197
  forget_p = sub.add_parser("forget", help="Delete memories matching a query (fuzzy)")
192
198
  forget_p.add_argument("query", help="Query to match for deletion")
@@ -376,6 +376,7 @@ class MemoryEngine:
376
376
  mode: Mode | None = None, limit: int = 20,
377
377
  agent_id: str = "unknown",
378
378
  session_id: str | None = None,
379
+ fast: bool = False,
379
380
  ) -> RecallResponse:
380
381
  """Recall relevant facts for a query.
381
382
 
@@ -385,6 +386,11 @@ class MemoryEngine:
385
386
  Zero additional latency on the hot path — enqueue is a
386
387
  ``put_nowait`` and the actual ``pending_outcomes`` INSERT runs
387
388
  on a background worker.
389
+
390
+ V3.4.40 (2026-05-09): ``fast=True`` skips the SpreadingActivation
391
+ 5th channel for sub-second response. The other 4 channels still
392
+ run. Use when recall must complete before another tool call (e.g.
393
+ agent recall before WebSearch).
388
394
  """
389
395
  self._require_full("recall")
390
396
  self._ensure_init()
@@ -402,6 +408,7 @@ class MemoryEngine:
402
408
  hooks=self._hooks,
403
409
  access_log=self._access_log,
404
410
  auto_linker=self._auto_linker,
411
+ fast=fast,
405
412
  )
406
413
 
407
414
  # S9-DASH-02: enqueue for pending_outcomes. Non-blocking; errors
@@ -106,6 +106,7 @@ def run_maintenance(
106
106
  "langevin_updated": 0,
107
107
  "fisher_coupled": 0,
108
108
  "sheaf_checked": 0,
109
+ "entity_summaries_consolidated": 0, # V3.4.40
109
110
  }
110
111
 
111
112
  facts = db.get_all_facts(profile_id)
@@ -270,9 +271,46 @@ def run_maintenance(
270
271
  except Exception as exc:
271
272
  logger.warning("Sheaf maintenance failed: %s", exc)
272
273
 
274
+ # 3. V3.4.40: Entity summary consolidation
275
+ # Re-bound any entity_profiles whose knowledge_summary exceeded the cap
276
+ # (e.g. created before V3.4.40, or via a code path that bypassed the
277
+ # bounded _build_summary). Truncates in-place — keeps entity identity,
278
+ # drops bloat. Future writes go through ObservationBuilder.SUMMARY_*
279
+ # bounds and stay clean.
280
+ try:
281
+ consolidated = db.execute(
282
+ """
283
+ UPDATE entity_profiles
284
+ SET knowledge_summary = SUBSTR(knowledge_summary, 1, 2047) || '…',
285
+ last_updated = datetime('now')
286
+ WHERE LENGTH(knowledge_summary) > 2048
287
+ AND profile_id = ?
288
+ """,
289
+ (profile_id,),
290
+ )
291
+ # SQLite doesn't return rowcount via execute() wrapper consistently.
292
+ # Re-count instead — fast on the small subset.
293
+ rows = db.execute(
294
+ "SELECT COUNT(*) AS c FROM entity_profiles "
295
+ "WHERE LENGTH(knowledge_summary) > 2048 AND profile_id = ?",
296
+ (profile_id,),
297
+ )
298
+ # If any remain >2048 after the UPDATE, log it. Otherwise count
299
+ # how many were truncated by diffing against the prior pass.
300
+ # (Best-effort; non-fatal.)
301
+ if rows:
302
+ remaining = dict(rows[0]).get("c", 0)
303
+ counts["entity_summaries_consolidated"] = max(
304
+ 0, counts.get("entity_summaries_consolidated", 0)
305
+ ) - remaining
306
+ except Exception as exc:
307
+ logger.warning("Entity summary consolidation failed: %s", exc)
308
+
273
309
  logger.info(
274
- "Maintenance complete: %d backfilled, %d Langevin, %d Fisher-coupled, %d Sheaf",
310
+ "Maintenance complete: %d backfilled, %d Langevin, %d Fisher-coupled, "
311
+ "%d Sheaf, %d entity-summaries",
275
312
  counts["langevin_backfilled"], counts["langevin_updated"],
276
313
  counts["fisher_coupled"], counts["sheaf_checked"],
314
+ counts["entity_summaries_consolidated"],
277
315
  )
278
316
  return counts
@@ -556,10 +556,15 @@ def run_recall(
556
556
  hooks: HookRegistry,
557
557
  access_log: Any = None,
558
558
  auto_linker: Any = None,
559
+ fast: bool = False,
559
560
  ) -> RecallResponse:
560
561
  """Recall relevant facts for a query.
561
562
 
562
563
  Pipeline: retrieval -> agentic sufficiency (if configured) -> post-recall updates.
564
+
565
+ V3.4.40: ``fast=True`` adds spreading_activation to the per-recall
566
+ extra_disabled_channels set, skipping the 5th channel for sub-second
567
+ response.
563
568
  """
564
569
  # Pre-operation hooks
565
570
  hook_ctx = {
@@ -572,7 +577,11 @@ def run_recall(
572
577
 
573
578
  m = mode or config.mode
574
579
 
575
- response = retrieval_engine.recall(query, profile_id, m, limit)
580
+ extra_disabled = {"spreading_activation"} if fast else None
581
+ response = retrieval_engine.recall(
582
+ query, profile_id, m, limit,
583
+ extra_disabled_channels=extra_disabled,
584
+ )
576
585
 
577
586
  # Agentic sufficiency verification
578
587
  # V3.3.19: Only trigger for multi_hop queries in Mode A (rule-based).
@@ -139,22 +139,39 @@ class ObservationBuilder:
139
139
  ),
140
140
  )
141
141
 
142
+ # V3.4.40 (2026-05-09): bounded summary to prevent unbounded growth.
143
+ # Pre-V3.4.40 builds concatenated full fact content. Hubs accumulated
144
+ # 60+ KB summaries that polluted recall and bloated entity_profiles.
145
+ _SUMMARY_MAX_FACTS = 10 # Last N facts (was 20)
146
+ _SUMMARY_MAX_CHARS_PER_FACT = 200 # Truncate each fact (NEW)
147
+ _SUMMARY_MAX_TOTAL_CHARS = 2048 # Hard cap on full summary (NEW)
148
+
142
149
  def _build_summary(
143
150
  self, entity_id: str, fact_ids: list[str], profile_id: str
144
151
  ) -> str:
145
152
  """Build a knowledge summary from all facts about an entity.
146
153
 
147
- Simple concatenation for now. Mode B/C could use LLM summarization.
154
+ V3.4.40: bounded last 10 facts, 200 chars each, 2048 total.
155
+ Older builds produced 60KB summaries on hub entities.
156
+ Mode B/C could use LLM rollup for higher-quality compression.
148
157
  """
149
158
  facts = []
150
- for fid in fact_ids[-20:]: # Last 20 facts to keep summary manageable
159
+ for fid in fact_ids[-self._SUMMARY_MAX_FACTS:]:
151
160
  rows = self._db.execute(
152
161
  "SELECT content FROM atomic_facts WHERE fact_id = ? AND profile_id = ?",
153
162
  (fid, profile_id),
154
163
  )
155
164
  if rows:
156
- facts.append(dict(rows[0])["content"])
165
+ content = dict(rows[0])["content"]
166
+ # Truncate per-fact to bound the join below
167
+ if len(content) > self._SUMMARY_MAX_CHARS_PER_FACT:
168
+ content = content[: self._SUMMARY_MAX_CHARS_PER_FACT - 1] + "…"
169
+ facts.append(content)
157
170
 
158
171
  if not facts:
159
172
  return ""
160
- return " | ".join(facts)
173
+ joined = " | ".join(facts)
174
+ # Hard cap as final safety net
175
+ if len(joined) > self._SUMMARY_MAX_TOTAL_CHARS:
176
+ joined = joined[: self._SUMMARY_MAX_TOTAL_CHARS - 1] + "…"
177
+ return joined
@@ -17,6 +17,7 @@ Part of Qualixar | Author: Varun Pratap Bhardwaj
17
17
  from __future__ import annotations
18
18
 
19
19
  import logging
20
+ import os
20
21
  from pathlib import Path
21
22
  from typing import Callable
22
23
 
@@ -26,17 +27,32 @@ MEMORY_DIR = Path.home() / ".superlocalmemory"
26
27
  DB_PATH = MEMORY_DIR / "memory.db"
27
28
 
28
29
 
30
+ def _get_agent_id(default: str = "mcp_client") -> str:
31
+ """Resolve the calling agent's ID for attribution.
32
+
33
+ Each Avenger (Claude, Codex, Gemini, Kimi, GLM, Qwen, etc.) sets the
34
+ ``SLM_AGENT_ID`` env var in its MCP server config so that memories,
35
+ observations, and registry entries are tagged with the actual source
36
+ agent — not the legacy ``"mcp_client"`` default.
37
+
38
+ v3.4.39+: enables proper cross-Avenger attribution in ``session_init``,
39
+ ``observe``, and event emissions.
40
+ """
41
+ return os.environ.get("SLM_AGENT_ID", default)
42
+
43
+
29
44
  def _emit_event(event_type: str, payload: dict | None = None,
30
- source_agent: str = "mcp_client") -> None: # V3.3.12: see also mcp/shared.py
45
+ source_agent: str | None = None) -> None: # V3.3.12: see also mcp/shared.py
31
46
  """Emit an event to the EventBus (best-effort, never raises).
32
47
 
33
48
  Dashboard visibility is load-bearing per the v3.4.26 user contract,
34
49
  so we log on failure rather than silently dropping the signal.
35
50
  """
51
+ resolved_agent = source_agent if source_agent is not None else _get_agent_id()
36
52
  try:
37
53
  from superlocalmemory.infra.event_bus import EventBus
38
54
  bus = EventBus.get_instance(str(DB_PATH))
39
- bus.emit(event_type, payload=payload, source_agent=source_agent,
55
+ bus.emit(event_type, payload=payload, source_agent=resolved_agent,
40
56
  source_protocol="mcp")
41
57
  except Exception as exc:
42
58
  logger.warning("event emit failed: type=%s err=%s", event_type, exc)
@@ -116,10 +132,11 @@ def register_active_tools(server, get_engine: Callable) -> None:
116
132
  "session_init feedback_count read failed: %s", exc,
117
133
  )
118
134
 
119
- # Register agent + emit event
120
- _register_agent("mcp_client", pid)
135
+ # Register agent + emit event (v3.4.39: SLM_AGENT_ID env support)
136
+ agent_id = _get_agent_id()
137
+ _register_agent(agent_id, pid)
121
138
  _emit_event("agent.connected", {
122
- "agent_id": "mcp_client",
139
+ "agent_id": agent_id,
123
140
  "project_path": project_path,
124
141
  "memory_count": len(memories),
125
142
  })
@@ -145,7 +162,7 @@ def register_active_tools(server, get_engine: Callable) -> None:
145
162
  @server.tool()
146
163
  async def observe(
147
164
  content: str,
148
- agent_id: str = "mcp_client",
165
+ agent_id: str | None = None,
149
166
  ) -> dict:
150
167
  """Observe conversation content for automatic memory capture.
151
168
 
@@ -155,7 +172,13 @@ def register_active_tools(server, get_engine: Callable) -> None:
155
172
 
156
173
  Call this after making decisions, fixing bugs, or expressing preferences.
157
174
  The system will NOT store low-confidence or irrelevant content.
175
+
176
+ v3.4.39: ``agent_id`` now defaults to the ``SLM_AGENT_ID`` env var
177
+ (set by each Avenger's MCP config) so observations carry proper
178
+ cross-Avenger attribution.
158
179
  """
180
+ if agent_id is None:
181
+ agent_id = _get_agent_id()
159
182
  try:
160
183
  from superlocalmemory.hooks.auto_capture import AutoCapture
161
184
  from superlocalmemory.hooks.rules_engine import RulesEngine
@@ -86,8 +86,11 @@ class RetrievalEngine:
86
86
  self._trust_scorer = trust_scorer
87
87
 
88
88
  # V3.3.4: LRU cache for query embeddings (avoids redundant Ollama API calls)
89
+ # V3.4.40 (2026-05-09): bumped 64 -> 512. Each cached embedding is ~3KB
90
+ # (768 floats × 4 bytes). 512 entries ~1.5MB — trivial memory cost,
91
+ # massive latency win on repeated queries (sub-ms vs 200-2000ms ollama).
89
92
  self._query_embedding_cache: dict[str, list[float]] = {}
90
- self._cache_max_size = 64
93
+ self._cache_max_size = 512
91
94
 
92
95
  # V3.2: ChannelRegistry for self-registration (Phase 0.5)
93
96
  from superlocalmemory.retrieval.channel_registry import ChannelRegistry
@@ -112,9 +115,17 @@ class RetrievalEngine:
112
115
  def recall(
113
116
  self, query: str, profile_id: str,
114
117
  mode: Mode = Mode.A, limit: int = 20,
118
+ *,
119
+ extra_disabled_channels: set[str] | None = None,
115
120
  ) -> RecallResponse:
116
- """Full retrieval pipeline: strategy -> channels -> RRF -> rerank."""
121
+ """Full retrieval pipeline: strategy -> channels -> RRF -> rerank.
122
+
123
+ V3.4.40 (2026-05-09): ``extra_disabled_channels`` allows callers to
124
+ skip specific channels for a single recall (e.g. SpreadingActivation
125
+ for the ``--fast`` CLI flag) without mutating shared config.
126
+ """
117
127
  t0 = time.monotonic()
128
+ self._extra_disabled = set(extra_disabled_channels or ())
118
129
 
119
130
  # 1. Classify query, get adaptive weights
120
131
  strat = self._strategy.classify(query, self._base_weights)
@@ -443,7 +454,8 @@ class RetrievalEngine:
443
454
  """Run active retrieval channels. Respects disabled_channels config for ablation."""
444
455
  out: dict[str, list[tuple[str, float]]] = {}
445
456
  # Skip channels listed in disabled_channels (ablation support)
446
- disabled = set(self._config.disabled_channels)
457
+ # V3.4.40: union with per-recall extra_disabled set (e.g. --fast skip)
458
+ disabled = set(self._config.disabled_channels) | getattr(self, "_extra_disabled", set())
447
459
 
448
460
  # V3.3.4: Embed query ONCE, reuse for semantic + hopfield channels
449
461
  q_emb: list[float] | None = None
@@ -47,12 +47,18 @@ class SpreadingActivationConfig:
47
47
  delta: float = 0.5 # Node retention / self-decay per iteration
48
48
  spreading_factor: float = 0.8 # S: energy diffusion rate
49
49
  # V3.3.20: Recalibrated for SLM graph density (254K edges, 768d).
50
+ # V3.4.40 (2026-05-09): graph grew to 960K edges. top_m=20 caused 5.5s recalls.
51
+ # Reduced to 10 (compromise between SYNAPSE default 7 and the dense-graph 20).
50
52
  # SYNAPSE defaults (theta=0.5, top_m=7) were for 384d sparse graphs.
51
53
  theta: float = 0.2 # Activation threshold for sigmoid (was 0.5)
52
- top_m: int = 20 # Lateral inhibition: max active nodes (was 7)
54
+ top_m: int = 10 # Lateral inhibition: max active nodes (was 20, then 7 originally)
53
55
  max_iterations: int = 3 # T: propagation depth
54
56
  tau_gate: float = 0.05 # FOK confidence gate (was 0.12)
55
57
  enabled: bool = True # Ships enabled by default
58
+ # V3.4.40 (2026-05-09): per-node neighbor fan-out clamp.
59
+ # Hub nodes in dense graphs (5K+ edges) caused unbounded work per expansion.
60
+ # 100 top-weighted neighbors keeps signal, drops long-tail noise.
61
+ max_neighbors_per_node: int = 100
56
62
  # v3.4.1: Graph intelligence integration
57
63
  use_pagerank_bias: bool = False # Multiply propagation by target PageRank
58
64
  community_boost: float = 0.0 # Boost same-community nodes (0.0 = disabled)
@@ -217,24 +223,35 @@ class SpreadingActivation:
217
223
  """Get neighbors from BOTH graph_edges and association_edges.
218
224
 
219
225
  Uses bidirectional UNION query (Section 4 of LLD).
226
+
227
+ V3.4.40 (2026-05-09): clamps fan-out to top
228
+ ``max_neighbors_per_node`` by weight. Without this clamp, hub nodes
229
+ with thousands of neighbors caused 5.5s recalls. Bounded fan-out
230
+ matches SYNAPSE's original sparse-graph assumption while preserving
231
+ the highest-signal edges.
220
232
  """
221
233
  try:
222
234
  rows = self._db.execute(
223
235
  """
224
- SELECT target_id AS neighbor_id, weight FROM graph_edges
225
- WHERE source_id = ? AND profile_id = ?
226
- UNION ALL
227
- SELECT target_fact_id AS neighbor_id, weight FROM association_edges
228
- WHERE source_fact_id = ? AND profile_id = ?
229
- UNION ALL
230
- SELECT source_id AS neighbor_id, weight FROM graph_edges
231
- WHERE target_id = ? AND profile_id = ?
232
- UNION ALL
233
- SELECT source_fact_id AS neighbor_id, weight FROM association_edges
234
- WHERE target_fact_id = ? AND profile_id = ?
236
+ SELECT neighbor_id, weight FROM (
237
+ SELECT target_id AS neighbor_id, weight FROM graph_edges
238
+ WHERE source_id = ? AND profile_id = ?
239
+ UNION ALL
240
+ SELECT target_fact_id AS neighbor_id, weight FROM association_edges
241
+ WHERE source_fact_id = ? AND profile_id = ?
242
+ UNION ALL
243
+ SELECT source_id AS neighbor_id, weight FROM graph_edges
244
+ WHERE target_id = ? AND profile_id = ?
245
+ UNION ALL
246
+ SELECT source_fact_id AS neighbor_id, weight FROM association_edges
247
+ WHERE target_fact_id = ? AND profile_id = ?
248
+ )
249
+ ORDER BY weight DESC
250
+ LIMIT ?
235
251
  """,
236
252
  (node_id, profile_id, node_id, profile_id,
237
- node_id, profile_id, node_id, profile_id),
253
+ node_id, profile_id, node_id, profile_id,
254
+ self._config.max_neighbors_per_node),
238
255
  )
239
256
  return [
240
257
  (dict(r)["neighbor_id"], dict(r)["weight"]) for r in rows
@@ -381,7 +381,9 @@ async def test_provider(request: Request):
381
381
  resp = c.get(f"{endpoint}/api/tags")
382
382
  resp.raise_for_status()
383
383
  models = [m["name"] for m in resp.json().get("models", [])]
384
- found = model in models if model else len(models) > 0
384
+ # Match either exact name or base name (strip ":tag" suffix on both sides)
385
+ model_base = model.split(":")[0] if model else ""
386
+ found = any(m == model or m.split(":")[0] == model_base for m in models) if model else len(models) > 0
385
387
  return {
386
388
  "success": found,
387
389
  "message": f"Ollama OK, {len(models)} models" + (f", '{model}' available" if found and model else ""),
@@ -1082,6 +1082,7 @@ def _register_daemon_routes(application: FastAPI) -> None:
1082
1082
  request: Request,
1083
1083
  q: str = "", query: str = "", limit: int = 20,
1084
1084
  session_id: str = "",
1085
+ fast: bool = False,
1085
1086
  ):
1086
1087
  _update_activity()
1087
1088
  search_query = q or query # Accept both ?q= and ?query= for compatibility
@@ -1103,6 +1104,7 @@ def _register_daemon_routes(application: FastAPI) -> None:
1103
1104
  try:
1104
1105
  response = engine.recall(
1105
1106
  search_query, limit=limit, session_id=effective_sid,
1107
+ fast=fast,
1106
1108
  )
1107
1109
  # v3.4.26: return the same field shape as recall_worker so
1108
1110
  # MCP processes proxying through the daemon get recall_trace-