nexo-brain 7.32.0 → 7.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -173,6 +173,46 @@ def applies_overlap(left: str, right: str) -> bool:
173
173
  return False
174
174
 
175
175
 
176
+ def normalized_key(title: str, applies_to: str = "") -> str:
177
+ """Stable dedup key for a learning: normalized title + sorted applies scope.
178
+
179
+ Public thin wrapper so callers (e.g. the nightly consolidation brief builder)
180
+ depend only on the resolver's public surface for dedup math. Two learnings
181
+ that differ only in casing/whitespace of the title and ordering of applies_to
182
+ tokens collapse to the same key.
183
+ """
184
+ title_key = _normalize_text(title)
185
+ scope_tokens = sorted(
186
+ {_normalize_applies_token(item) for item in _split_applies_to(applies_to)} - {""}
187
+ )
188
+ if scope_tokens:
189
+ return f"{title_key}|{','.join(scope_tokens)}"
190
+ return title_key
191
+
192
+
193
+ def candidate_similarity(text_a: str, text_b: str) -> float:
194
+ """Similarity between two free-text snippets using the resolver's own math.
195
+
196
+ Wraps hybrid_similarity_score with the SAME thresholds used in _similarity
197
+ (the per-candidate resolution path) so consolidation_prep and the resolver
198
+ stay in lockstep. Returns 0.0 when either side is empty.
199
+ """
200
+ left = str(text_a or "").strip()
201
+ right = str(text_b or "").strip()
202
+ if not left or not right:
203
+ return 0.0
204
+ return float(
205
+ hybrid_similarity_score(
206
+ left,
207
+ right,
208
+ keyword_extractor=extract_keywords,
209
+ strong_semantic_threshold=0.82,
210
+ moderate_semantic_threshold=0.74,
211
+ moderate_keyword_floor=0.08,
212
+ )
213
+ )
214
+
215
+
176
216
  def _table_columns(conn: sqlite3.Connection, table: str) -> set[str]:
177
217
  try:
178
218
  return {str(row["name"]) for row in conn.execute(f"PRAGMA table_info({table})").fetchall()}
@@ -413,7 +453,9 @@ __all__ = [
413
453
  "CANONICAL_ACTIONS",
414
454
  "applies_overlap",
415
455
  "authority_rank",
456
+ "candidate_similarity",
416
457
  "looks_contradictory",
417
458
  "normalize_authority",
459
+ "normalized_key",
418
460
  "resolve_learning_candidate",
419
461
  ]
@@ -50,6 +50,12 @@ INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
50
50
  PERFORMANCE_PROFILE_KEY = "performance_profile"
51
51
  DEFAULT_PERFORMANCE_PROFILE = os.environ.get("NEXO_LOCAL_INDEX_PERFORMANCE_PROFILE", "medium").strip().lower() or "medium"
52
52
  VALID_CONTEXT_MODES = {"compact", "full"}
53
+ # FTS5 keyword recall over local_chunks. Additive, guarded, reversible.
54
+ # Backfill batch size; 0 disables the incremental backfill entirely.
55
+ FTS_BACKFILL_BATCH = int(os.environ.get("NEXO_LOCAL_FTS_BACKFILL_BATCH", "500") or "500")
56
+ FTS_MIGRATION_CURSOR_KEY = "fts_migration_cursor"
57
+ FTS_MIGRATION_DONE_KEY = "fts_migration_done"
58
+ FTS_BACKFILL_TOTAL_KEY = "fts_backfill_total"
53
59
  EMBEDDING_REFRESH_JOB = "embedding_refresh"
54
60
  ENTITY_FACTS_JOB = "entity_facts"
55
61
  BACKGROUND_INDEX_JOB_TYPES = {ENTITY_FACTS_JOB}
@@ -3541,6 +3547,14 @@ def run_once(
3541
3547
  }
3542
3548
  scan_result = scan_once(limit=effective_scan_limit)
3543
3549
  job_result = process_jobs(limit=effective_process_limit)
3550
+ # Incremental FTS backfill: bounded one-batch-per-tick, after the disk-budget
3551
+ # gate (above) and after process_jobs. Best-effort — never let it break the
3552
+ # cron tick. Skips itself when disabled (batch=0) or already done.
3553
+ if FTS_BACKFILL_BATCH > 0:
3554
+ try:
3555
+ _backfill_fts_rows(conn, batch_limit=FTS_BACKFILL_BATCH)
3556
+ except Exception:
3557
+ pass
3544
3558
  conn_after = _conn()
3545
3559
  initial_after = _initial_scan_status(conn_after, list_roots(readonly=False))
3546
3560
  blocking_active_after = _active_job_count(conn_after, blocking_only=True)
@@ -4109,6 +4123,32 @@ def _status_from_conn(conn, *, readonly: bool = False) -> dict:
4109
4123
  "permissions": [],
4110
4124
  "models": model_status()["models"],
4111
4125
  "support_log_available": True,
4126
+ "fts_recall": _fts_status(conn),
4127
+ }
4128
+
4129
+
4130
+ def _fts_status(conn) -> dict:
4131
+ """Operator-facing FTS5 backfill progress (so progress can be watched)."""
4132
+ try:
4133
+ done = _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") == "1"
4134
+ except Exception:
4135
+ done = False
4136
+ try:
4137
+ cursor = int(_get_state_conn(conn, FTS_MIGRATION_CURSOR_KEY, "0") or "0")
4138
+ except Exception:
4139
+ cursor = 0
4140
+ try:
4141
+ total = int(_get_state_conn(conn, FTS_BACKFILL_TOTAL_KEY, "0") or "0")
4142
+ except Exception:
4143
+ total = 0
4144
+ return {
4145
+ "enabled": _fts_enabled_env(),
4146
+ "available": _fts_available(conn),
4147
+ "done": done,
4148
+ "cursor": cursor,
4149
+ "total": total,
4150
+ "read_path": "fts" if _fts_ready(conn) else "like",
4151
+ "backfill_batch": FTS_BACKFILL_BATCH,
4112
4152
  }
4113
4153
 
4114
4154
 
@@ -4433,6 +4473,136 @@ def _context_prefilter_limit(default: int = 1200) -> int:
4433
4473
  return max(100, min(value, 5000))
4434
4474
 
4435
4475
 
4476
+ def _fts_enabled_env() -> bool:
4477
+ """Feature flag for the FTS5 read path (default on; set 0/false to roll back)."""
4478
+ value = os.environ.get("NEXO_LOCAL_CONTEXT_FTS_ENABLED", "1").strip().lower()
4479
+ return value not in {"0", "false", "no", "off"}
4480
+
4481
+
4482
+ def _fts_available(conn) -> bool:
4483
+ """True if the local_chunks_fts FTS5 vtab exists and MATCH works.
4484
+
4485
+ Hosts without FTS5 support fall back to a plain shadow table that does NOT
4486
+ support MATCH, so the cheap probe runs a trivial MATCH and catches
4487
+ OperationalError. The probe (a sqlite_master lookup + MATCH LIMIT 0) is fast
4488
+ enough to run inline without caching, which avoids stale per-connection
4489
+ cache bugs across reconnects.
4490
+ """
4491
+ try:
4492
+ row = conn.execute(
4493
+ "SELECT 1 FROM sqlite_master WHERE type IN ('table','view') AND name='local_chunks_fts' LIMIT 1"
4494
+ ).fetchone()
4495
+ if not row:
4496
+ return False
4497
+ # Trivial MATCH proves this is a real FTS5 vtab (shadow fallback raises).
4498
+ conn.execute(
4499
+ "SELECT rowid FROM local_chunks_fts WHERE local_chunks_fts MATCH ? LIMIT 0",
4500
+ ("nexo_fts_probe",),
4501
+ ).fetchall()
4502
+ return True
4503
+ except sqlite3.OperationalError:
4504
+ return False
4505
+ except Exception:
4506
+ return False
4507
+
4508
+
4509
+ def _fts_ready(conn) -> bool:
4510
+ """The FTS read path is authoritative only when: backfill done AND the
4511
+ feature flag is on AND FTS5 is actually available on this host."""
4512
+ if not _fts_enabled_env():
4513
+ return False
4514
+ if _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") != "1":
4515
+ return False
4516
+ return _fts_available(conn)
4517
+
4518
+
4519
+ def _fts_match_expr(terms: list[str]) -> str:
4520
+ """Build a safe FTS5 MATCH expression from query terms.
4521
+
4522
+ Each term is double-quoted (FTS5 phrase syntax) with embedded double quotes
4523
+ doubled, neutralizing FTS operators/special chars; terms are OR-joined.
4524
+ Returns '' when there is nothing safe to match.
4525
+ """
4526
+ quoted = []
4527
+ for term in terms:
4528
+ cleaned = str(term or "").strip()
4529
+ if not cleaned:
4530
+ continue
4531
+ quoted.append('"' + cleaned.replace('"', '""') + '"')
4532
+ return " OR ".join(quoted)
4533
+
4534
+
4535
+ def _backfill_fts_rows(conn, *, batch_limit: int | None = None) -> dict:
4536
+ """Incrementally mirror legacy local_chunks rows into local_chunks_fts.
4537
+
4538
+ Idempotent + resumable: a cursor (max processed rowid) is persisted in
4539
+ local_index_state per batch and committed, so a crash resumes from the last
4540
+ committed rowid. INSERT OR REPLACE keyed by rowid makes re-runs safe.
4541
+ When no rows remain past the cursor the done flag is set. Returns a small
4542
+ status dict. NOTE: new chunks written after schema migration already get FTS
4543
+ rows via the local_chunks_fts triggers, so this only handles pre-existing
4544
+ rows (the legacy 19GB DB).
4545
+ """
4546
+ if batch_limit is None:
4547
+ batch_limit = FTS_BACKFILL_BATCH
4548
+ batch_limit = int(batch_limit)
4549
+ if batch_limit <= 0:
4550
+ return {"ok": True, "skipped": "disabled", "done": _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") == "1"}
4551
+ if not _fts_available(conn):
4552
+ return {"ok": True, "skipped": "fts_unavailable", "done": False}
4553
+ if _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") == "1":
4554
+ return {"ok": True, "skipped": "already_done", "done": True}
4555
+
4556
+ def _run() -> dict:
4557
+ try:
4558
+ cursor = int(_get_state_conn(conn, FTS_MIGRATION_CURSOR_KEY, "0") or "0")
4559
+ except Exception:
4560
+ cursor = 0
4561
+ # Snapshot the total once (first backfill tick) so the operator status
4562
+ # surface can show progress without a COUNT(*) on the 19GB table per tick.
4563
+ if _get_state_conn(conn, FTS_BACKFILL_TOTAL_KEY, "") == "":
4564
+ try:
4565
+ total_row = conn.execute("SELECT COUNT(*) AS total FROM local_chunks").fetchone()
4566
+ _set_state_conn(conn, FTS_BACKFILL_TOTAL_KEY, str(int(total_row["total"] or 0)))
4567
+ except Exception:
4568
+ pass
4569
+ rows = conn.execute(
4570
+ """
4571
+ SELECT c.rowid AS rid, c.text AS text,
4572
+ COALESCE(a.privacy_class, 'normal') AS privacy_class,
4573
+ COALESCE(a.status, 'active') AS asset_status
4574
+ FROM local_chunks c
4575
+ LEFT JOIN local_assets a ON a.asset_id = c.asset_id
4576
+ WHERE c.rowid > ?
4577
+ ORDER BY c.rowid ASC
4578
+ LIMIT ?
4579
+ """,
4580
+ (cursor, batch_limit),
4581
+ ).fetchall()
4582
+ if not rows:
4583
+ _set_state_conn(conn, FTS_MIGRATION_DONE_KEY, "1")
4584
+ conn.commit()
4585
+ return {"ok": True, "done": True, "processed": 0, "cursor": cursor}
4586
+ max_rid = cursor
4587
+ for row in rows:
4588
+ rid = int(row["rid"])
4589
+ conn.execute("DELETE FROM local_chunks_fts WHERE rowid = ?", (rid,))
4590
+ conn.execute(
4591
+ """
4592
+ INSERT INTO local_chunks_fts(rowid, text, privacy_class, asset_status)
4593
+ VALUES (?, ?, ?, ?)
4594
+ """,
4595
+ (rid, str(row["text"] or ""), str(row["privacy_class"] or "normal"), str(row["asset_status"] or "active")),
4596
+ )
4597
+ if rid > max_rid:
4598
+ max_rid = rid
4599
+ _set_state_conn(conn, FTS_MIGRATION_CURSOR_KEY, str(max_rid))
4600
+ conn.commit()
4601
+ return {"ok": True, "done": False, "processed": len(rows), "cursor": max_rid}
4602
+
4603
+ return _with_sqlite_busy_retry(_run)
4604
+
4605
+
4436
4606
  def _context_candidate_rows(
4437
4607
  conn,
4438
4608
  entity_asset_ids: list[str],
@@ -4444,39 +4614,73 @@ def _context_candidate_rows(
4444
4614
  prefilter_limit = min(int(base_limit or 5000), _context_prefilter_limit())
4445
4615
  prefilter_rows = []
4446
4616
  if terms:
4447
- term_clauses = []
4448
- params: list[str] = []
4449
- for term in terms:
4450
- term_clauses.append("(lower(a.path) LIKE ? OR lower(COALESCE(v.summary, '')) LIKE ? OR lower(c.text) LIKE ?)")
4451
- like = f"%{term}%"
4452
- params.extend([like, like, like])
4453
- prefilter_rows = conn.execute(
4454
- f"""
4455
- SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
4456
- e.vector_json, e.model_id, e.model_revision, e.dimension
4457
- FROM local_chunks c
4458
- JOIN local_assets a ON a.asset_id = c.asset_id
4459
- LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
4460
- LEFT JOIN local_embeddings e ON e.chunk_id = c.chunk_id
4461
- WHERE a.status='active'
4462
- AND a.privacy_class='normal'
4463
- AND ({" OR ".join(term_clauses)})
4464
- ORDER BY
4465
- CASE
4466
- WHEN {" OR ".join("lower(a.path) LIKE ?" for _ in terms)} THEN 0
4467
- WHEN {" OR ".join("lower(COALESCE(v.summary, '')) LIKE ?" for _ in terms)} THEN 1
4468
- ELSE 2
4469
- END,
4470
- c.created_at DESC
4471
- LIMIT ?
4472
- """,
4473
- [
4474
- *params,
4475
- *(f"%{term}%" for term in terms),
4476
- *(f"%{term}%" for term in terms),
4477
- prefilter_limit,
4478
- ],
4479
- ).fetchall()
4617
+ used_fts = False
4618
+ # DUAL-READ: only take the FTS path once the backfill is done AND the
4619
+ # flag is on AND FTS5 is available. Until then (or on rollback) the
4620
+ # EXACT legacy LIKE path runs, so retrieval is unaffected mid-migration.
4621
+ if _fts_ready(conn):
4622
+ match_expr = _fts_match_expr(terms)
4623
+ if match_expr:
4624
+ try:
4625
+ prefilter_rows = conn.execute(
4626
+ """
4627
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
4628
+ e.vector_json, e.model_id, e.model_revision, e.dimension
4629
+ FROM local_chunks_fts f
4630
+ JOIN local_chunks c ON c.rowid = f.rowid
4631
+ JOIN local_assets a ON a.asset_id = c.asset_id
4632
+ LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
4633
+ LEFT JOIN local_embeddings e ON e.chunk_id = c.chunk_id
4634
+ WHERE local_chunks_fts MATCH ?
4635
+ AND f.privacy_class='normal'
4636
+ AND f.asset_status='active'
4637
+ AND a.status='active'
4638
+ AND a.privacy_class='normal'
4639
+ ORDER BY bm25(local_chunks_fts), c.created_at DESC
4640
+ LIMIT ?
4641
+ """,
4642
+ (match_expr, prefilter_limit),
4643
+ ).fetchall()
4644
+ used_fts = True
4645
+ except sqlite3.OperationalError:
4646
+ # Malformed FTS expression (odd user input) -> fall back to
4647
+ # the legacy LIKE path below instead of erroring the answer.
4648
+ prefilter_rows = []
4649
+ used_fts = False
4650
+ if not used_fts:
4651
+ term_clauses = []
4652
+ params: list[str] = []
4653
+ for term in terms:
4654
+ term_clauses.append("(lower(a.path) LIKE ? OR lower(COALESCE(v.summary, '')) LIKE ? OR lower(c.text) LIKE ?)")
4655
+ like = f"%{term}%"
4656
+ params.extend([like, like, like])
4657
+ prefilter_rows = conn.execute(
4658
+ f"""
4659
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
4660
+ e.vector_json, e.model_id, e.model_revision, e.dimension
4661
+ FROM local_chunks c
4662
+ JOIN local_assets a ON a.asset_id = c.asset_id
4663
+ LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
4664
+ LEFT JOIN local_embeddings e ON e.chunk_id = c.chunk_id
4665
+ WHERE a.status='active'
4666
+ AND a.privacy_class='normal'
4667
+ AND ({" OR ".join(term_clauses)})
4668
+ ORDER BY
4669
+ CASE
4670
+ WHEN {" OR ".join("lower(a.path) LIKE ?" for _ in terms)} THEN 0
4671
+ WHEN {" OR ".join("lower(COALESCE(v.summary, '')) LIKE ?" for _ in terms)} THEN 1
4672
+ ELSE 2
4673
+ END,
4674
+ c.created_at DESC
4675
+ LIMIT ?
4676
+ """,
4677
+ [
4678
+ *params,
4679
+ *(f"%{term}%" for term in terms),
4680
+ *(f"%{term}%" for term in terms),
4681
+ prefilter_limit,
4682
+ ],
4683
+ ).fetchall()
4480
4684
 
4481
4685
  fallback_limit = prefilter_limit if not terms else max(120, min(500, prefilter_limit // 3))
4482
4686
  base_rows = conn.execute(
@@ -8,7 +8,7 @@ from typing import Iterable
8
8
  from urllib.parse import quote
9
9
 
10
10
  import paths
11
- from db._schema import _m63_local_context_layer, _m64_local_context_live_dirs
11
+ from db._schema import _m63_local_context_layer, _m64_local_context_live_dirs, _m84_local_chunks_fts
12
12
 
13
13
  LOCAL_CONTEXT_DB_NAME = "local-context.db"
14
14
  MIGRATION_STATE_KEY = "local_context_db_migrated_from_main"
@@ -118,7 +118,8 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
118
118
  _m64_local_context_live_dirs(conn)
119
119
  _ensure_entity_dossier_schema(conn)
120
120
  _ensure_local_context_v2_schema(conn)
121
- conn.execute("PRAGMA user_version=65")
121
+ _m84_local_chunks_fts(conn)
122
+ conn.execute("PRAGMA user_version=84")
122
123
  conn.commit()
123
124
 
124
125
 
@@ -352,6 +352,7 @@ def record_router_usage(
352
352
  elapsed_ms: int | None = None,
353
353
  deadline_ms: int | None = None,
354
354
  used_before_response: bool = True,
355
+ cache_hit: bool = False,
355
356
  db_path: str | os.PathLike[str] | None = None,
356
357
  ) -> dict[str, Any]:
357
358
  evidence_refs = router_payload.get("evidence_refs") or []
@@ -374,6 +375,7 @@ def record_router_usage(
374
375
  "escalated_from": router_payload.get("escalated_from") or budget_policy.get("escalated_from") or "",
375
376
  "escalated_to": router_payload.get("escalated_to") or budget_policy.get("escalated_to") or "",
376
377
  "route_cache_key": budget_policy.get("route_cache_key") or "",
378
+ "cache_hit": bool(cache_hit or router_payload.get("cache_hit")),
377
379
  "max_sources": budget_policy.get("max_sources") or 0,
378
380
  "max_source_timeout_ms": budget_policy.get("max_source_timeout_ms") or 0,
379
381
  "allowed_sources": budget_policy.get("allowed_sources") or [],
@@ -9,12 +9,24 @@ from typing import Any
9
9
 
10
10
  from db import (
11
11
  build_pre_action_context,
12
+ get_memory_observations_by_uids,
12
13
  list_memory_events,
13
14
  list_memory_observations,
14
15
  process_memory_observation_queue,
15
16
  search_memory_observations_fts,
17
+ vector_scan_observations,
16
18
  )
17
19
 
20
+ # Weight for the semantic (vector) signal when fused with the lexical/FTS score.
21
+ # A strong paraphrase match (high cosine) can carry an observation that the
22
+ # token-overlap score missed entirely, while still ranking below an exact
23
+ # lexical hit on the same query.
24
+ _VECTOR_FUSION_WEIGHT = 0.85
25
+ # Minimum cosine for a semantic-only candidate to survive the relaxed filter.
26
+ # Below this, a vector "match" is noise and must not resurrect an observation
27
+ # that the lexical path already rejected.
28
+ _VECTOR_MIN_SCORE = 0.30
29
+
18
30
 
19
31
  def _tokens(text: str) -> set[str]:
20
32
  return {
@@ -37,6 +49,41 @@ def _score(query: str, text: str, base: float = 0.0) -> float:
37
49
  return min(1.0, base + len(overlap) / max(1, len(query_tokens)))
38
50
 
39
51
 
52
+ def _model_is_warm() -> bool:
53
+ """True only when embedding the query will NOT trigger a cold model load."""
54
+ try:
55
+ import cognitive._core as cog
56
+ except Exception:
57
+ return False
58
+ try:
59
+ if cog._model_download_disabled():
60
+ return True
61
+ except Exception:
62
+ return False
63
+ return getattr(cog, "_model", None) is not None
64
+
65
+
66
+ def _maybe_query_embedding(query: str):
67
+ """Embed the query ONCE for semantic fusion, or return None.
68
+
69
+ CRITICAL latency guard: this never loads a cold model. It returns None
70
+ (degrading to the FTS/token path) unless the deterministic offline fallback
71
+ is active or the real model is already warm in-process. Any failure also
72
+ yields None.
73
+ """
74
+ clean = (query or "").strip()
75
+ if not clean:
76
+ return None
77
+ if not _model_is_warm():
78
+ return None
79
+ try:
80
+ import cognitive._core as cog
81
+
82
+ return cog.embed(clean)
83
+ except Exception:
84
+ return None
85
+
86
+
40
87
  def _project_hint_values(project_hint: str = "") -> set[str]:
41
88
  clean = (project_hint or "").strip()
42
89
  if not clean:
@@ -225,6 +272,30 @@ def memory_search(
225
272
  ):
226
273
  uid = item.get("observation_uid") or f"id:{item.get('id')}"
227
274
  observations_by_uid.setdefault(uid, item)
275
+ # Semantic fusion: embed the query ONCE (only when a model is already warm —
276
+ # never trigger a cold model load on this latency path) and run a bounded
277
+ # vector scan over precomputed observation embeddings. Paraphrases that the
278
+ # lexical/FTS path missed are pulled in here.
279
+ vector_scores: dict[str, float] = {}
280
+ if clean_query:
281
+ query_vector = _maybe_query_embedding(clean_query)
282
+ if query_vector is not None:
283
+ for hit in vector_scan_observations(
284
+ query_vector,
285
+ limit=max_items * 3,
286
+ start_ts=start,
287
+ end_ts=end,
288
+ min_score=_VECTOR_MIN_SCORE,
289
+ ):
290
+ uid = hit.get("observation_uid")
291
+ if uid:
292
+ vector_scores[uid] = float(hit.get("vector_score") or 0.0)
293
+ # Materialise semantic-only observations the lexical scan did not see.
294
+ missing_uids = [uid for uid in vector_scores if uid not in observations_by_uid]
295
+ if missing_uids:
296
+ for uid, item in get_memory_observations_by_uids(missing_uids).items():
297
+ observations_by_uid.setdefault(uid, item)
298
+
228
299
  observations = list(observations_by_uid.values())
229
300
  events = list_memory_events(
230
301
  query=clean_query,
@@ -234,12 +305,23 @@ def memory_search(
234
305
  end_ts=end,
235
306
  )
236
307
 
237
- candidates = [
238
- _observation_to_candidate(item, clean_query)
239
- for item in observations
240
- if _within_range(item.get("created_at"), start, end)
241
- and _project_matches(item.get("project_key") or "", project_hint)
242
- ]
308
+ candidates = []
309
+ for item in observations:
310
+ if not _within_range(item.get("created_at"), start, end):
311
+ continue
312
+ if not _project_matches(item.get("project_key") or "", project_hint):
313
+ continue
314
+ candidate = _observation_to_candidate(item, clean_query)
315
+ uid = item.get("observation_uid") or f"id:{item.get('id')}"
316
+ vector_score = vector_scores.get(uid, 0.0)
317
+ if vector_score > 0:
318
+ # Fuse: keep the higher of the lexical score and the weighted vector
319
+ # signal so a strong paraphrase survives while exact lexical hits
320
+ # still outrank weak semantic ones.
321
+ fused = max(float(candidate.get("score") or 0.0), _VECTOR_FUSION_WEIGHT * vector_score)
322
+ candidate["score"] = round(fused, 4)
323
+ candidate["vector_score"] = round(vector_score, 4)
324
+ candidates.append(candidate)
243
325
  candidates.extend(
244
326
  _event_to_candidate(item, clean_query)
245
327
  for item in events
@@ -248,7 +330,14 @@ def memory_search(
248
330
  )
249
331
 
250
332
  if clean_query:
251
- candidates = [item for item in candidates if item.get("score", 0) > 0]
333
+ # Relaxed filter: a candidate survives if it has a positive lexical score
334
+ # OR a qualifying semantic (vector) match. Previously the hard score>0
335
+ # filter dropped semantic-only paraphrase hits before they could rank.
336
+ candidates = [
337
+ item
338
+ for item in candidates
339
+ if item.get("score", 0) > 0 or item.get("vector_score", 0) > 0
340
+ ]
252
341
  candidates.sort(key=lambda item: (item.get("score", 0), item.get("created_at") or 0), reverse=True)
253
342
  candidates = candidates[:max_items]
254
343