nexo-brain 7.32.0 → 7.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/consolidation_prep.py +380 -0
- package/src/db/__init__.py +5 -1
- package/src/db/_episodic.py +32 -0
- package/src/db/_memory_v2.py +276 -0
- package/src/db/_protocol.py +35 -0
- package/src/db/_schema.py +207 -0
- package/src/hooks/auto_capture.py +60 -24
- package/src/learning_resolver.py +42 -0
- package/src/local_context/api.py +237 -33
- package/src/local_context/db.py +3 -2
- package/src/local_context/usage_events.py +2 -0
- package/src/memory_retrieval.py +96 -7
- package/src/message_batch_preview.py +290 -0
- package/src/plugins/protocol.py +218 -27
- package/src/ppr.py +473 -0
- package/src/pre_answer_router.py +316 -3
- package/src/pre_answer_runtime.py +156 -1
- package/src/resolution_cache.py +1119 -0
- package/src/scripts/deep-sleep/apply_findings.py +86 -9
- package/src/scripts/deep-sleep/rewrite.py +625 -0
- package/src/scripts/nexo-deep-sleep.sh +10 -0
- package/src/scripts/nexo-followup-runner.py +110 -8
- package/src/scripts/nexo-morning-agent.py +43 -2
- package/src/scripts/nexo-postmortem-consolidator.py +44 -1
- package/src/self_error_detector.py +414 -0
- package/src/semantic_layers.py +30 -3
- package/templates/core-prompts/morning-agent.md +3 -0
- package/templates/core-prompts/postmortem-consolidator.md +29 -2
package/src/learning_resolver.py
CHANGED
|
@@ -173,6 +173,46 @@ def applies_overlap(left: str, right: str) -> bool:
|
|
|
173
173
|
return False
|
|
174
174
|
|
|
175
175
|
|
|
176
|
+
def normalized_key(title: str, applies_to: str = "") -> str:
|
|
177
|
+
"""Stable dedup key for a learning: normalized title + sorted applies scope.
|
|
178
|
+
|
|
179
|
+
Public thin wrapper so callers (e.g. the nightly consolidation brief builder)
|
|
180
|
+
depend only on the resolver's public surface for dedup math. Two learnings
|
|
181
|
+
that differ only in casing/whitespace of the title and ordering of applies_to
|
|
182
|
+
tokens collapse to the same key.
|
|
183
|
+
"""
|
|
184
|
+
title_key = _normalize_text(title)
|
|
185
|
+
scope_tokens = sorted(
|
|
186
|
+
{_normalize_applies_token(item) for item in _split_applies_to(applies_to)} - {""}
|
|
187
|
+
)
|
|
188
|
+
if scope_tokens:
|
|
189
|
+
return f"{title_key}|{','.join(scope_tokens)}"
|
|
190
|
+
return title_key
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def candidate_similarity(text_a: str, text_b: str) -> float:
|
|
194
|
+
"""Similarity between two free-text snippets using the resolver's own math.
|
|
195
|
+
|
|
196
|
+
Wraps hybrid_similarity_score with the SAME thresholds used in _similarity
|
|
197
|
+
(the per-candidate resolution path) so consolidation_prep and the resolver
|
|
198
|
+
stay in lockstep. Returns 0.0 when either side is empty.
|
|
199
|
+
"""
|
|
200
|
+
left = str(text_a or "").strip()
|
|
201
|
+
right = str(text_b or "").strip()
|
|
202
|
+
if not left or not right:
|
|
203
|
+
return 0.0
|
|
204
|
+
return float(
|
|
205
|
+
hybrid_similarity_score(
|
|
206
|
+
left,
|
|
207
|
+
right,
|
|
208
|
+
keyword_extractor=extract_keywords,
|
|
209
|
+
strong_semantic_threshold=0.82,
|
|
210
|
+
moderate_semantic_threshold=0.74,
|
|
211
|
+
moderate_keyword_floor=0.08,
|
|
212
|
+
)
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
|
|
176
216
|
def _table_columns(conn: sqlite3.Connection, table: str) -> set[str]:
|
|
177
217
|
try:
|
|
178
218
|
return {str(row["name"]) for row in conn.execute(f"PRAGMA table_info({table})").fetchall()}
|
|
@@ -413,7 +453,9 @@ __all__ = [
|
|
|
413
453
|
"CANONICAL_ACTIONS",
|
|
414
454
|
"applies_overlap",
|
|
415
455
|
"authority_rank",
|
|
456
|
+
"candidate_similarity",
|
|
416
457
|
"looks_contradictory",
|
|
417
458
|
"normalize_authority",
|
|
459
|
+
"normalized_key",
|
|
418
460
|
"resolve_learning_candidate",
|
|
419
461
|
]
|
package/src/local_context/api.py
CHANGED
|
@@ -50,6 +50,12 @@ INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
|
|
|
50
50
|
PERFORMANCE_PROFILE_KEY = "performance_profile"
|
|
51
51
|
DEFAULT_PERFORMANCE_PROFILE = os.environ.get("NEXO_LOCAL_INDEX_PERFORMANCE_PROFILE", "medium").strip().lower() or "medium"
|
|
52
52
|
VALID_CONTEXT_MODES = {"compact", "full"}
|
|
53
|
+
# FTS5 keyword recall over local_chunks. Additive, guarded, reversible.
|
|
54
|
+
# Backfill batch size; 0 disables the incremental backfill entirely.
|
|
55
|
+
FTS_BACKFILL_BATCH = int(os.environ.get("NEXO_LOCAL_FTS_BACKFILL_BATCH", "500") or "500")
|
|
56
|
+
FTS_MIGRATION_CURSOR_KEY = "fts_migration_cursor"
|
|
57
|
+
FTS_MIGRATION_DONE_KEY = "fts_migration_done"
|
|
58
|
+
FTS_BACKFILL_TOTAL_KEY = "fts_backfill_total"
|
|
53
59
|
EMBEDDING_REFRESH_JOB = "embedding_refresh"
|
|
54
60
|
ENTITY_FACTS_JOB = "entity_facts"
|
|
55
61
|
BACKGROUND_INDEX_JOB_TYPES = {ENTITY_FACTS_JOB}
|
|
@@ -3541,6 +3547,14 @@ def run_once(
|
|
|
3541
3547
|
}
|
|
3542
3548
|
scan_result = scan_once(limit=effective_scan_limit)
|
|
3543
3549
|
job_result = process_jobs(limit=effective_process_limit)
|
|
3550
|
+
# Incremental FTS backfill: bounded one-batch-per-tick, after the disk-budget
|
|
3551
|
+
# gate (above) and after process_jobs. Best-effort — never let it break the
|
|
3552
|
+
# cron tick. Skips itself when disabled (batch=0) or already done.
|
|
3553
|
+
if FTS_BACKFILL_BATCH > 0:
|
|
3554
|
+
try:
|
|
3555
|
+
_backfill_fts_rows(conn, batch_limit=FTS_BACKFILL_BATCH)
|
|
3556
|
+
except Exception:
|
|
3557
|
+
pass
|
|
3544
3558
|
conn_after = _conn()
|
|
3545
3559
|
initial_after = _initial_scan_status(conn_after, list_roots(readonly=False))
|
|
3546
3560
|
blocking_active_after = _active_job_count(conn_after, blocking_only=True)
|
|
@@ -4109,6 +4123,32 @@ def _status_from_conn(conn, *, readonly: bool = False) -> dict:
|
|
|
4109
4123
|
"permissions": [],
|
|
4110
4124
|
"models": model_status()["models"],
|
|
4111
4125
|
"support_log_available": True,
|
|
4126
|
+
"fts_recall": _fts_status(conn),
|
|
4127
|
+
}
|
|
4128
|
+
|
|
4129
|
+
|
|
4130
|
+
def _fts_status(conn) -> dict:
|
|
4131
|
+
"""Operator-facing FTS5 backfill progress (so progress can be watched)."""
|
|
4132
|
+
try:
|
|
4133
|
+
done = _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") == "1"
|
|
4134
|
+
except Exception:
|
|
4135
|
+
done = False
|
|
4136
|
+
try:
|
|
4137
|
+
cursor = int(_get_state_conn(conn, FTS_MIGRATION_CURSOR_KEY, "0") or "0")
|
|
4138
|
+
except Exception:
|
|
4139
|
+
cursor = 0
|
|
4140
|
+
try:
|
|
4141
|
+
total = int(_get_state_conn(conn, FTS_BACKFILL_TOTAL_KEY, "0") or "0")
|
|
4142
|
+
except Exception:
|
|
4143
|
+
total = 0
|
|
4144
|
+
return {
|
|
4145
|
+
"enabled": _fts_enabled_env(),
|
|
4146
|
+
"available": _fts_available(conn),
|
|
4147
|
+
"done": done,
|
|
4148
|
+
"cursor": cursor,
|
|
4149
|
+
"total": total,
|
|
4150
|
+
"read_path": "fts" if _fts_ready(conn) else "like",
|
|
4151
|
+
"backfill_batch": FTS_BACKFILL_BATCH,
|
|
4112
4152
|
}
|
|
4113
4153
|
|
|
4114
4154
|
|
|
@@ -4433,6 +4473,136 @@ def _context_prefilter_limit(default: int = 1200) -> int:
|
|
|
4433
4473
|
return max(100, min(value, 5000))
|
|
4434
4474
|
|
|
4435
4475
|
|
|
4476
|
+
def _fts_enabled_env() -> bool:
|
|
4477
|
+
"""Feature flag for the FTS5 read path (default on; set 0/false to roll back)."""
|
|
4478
|
+
value = os.environ.get("NEXO_LOCAL_CONTEXT_FTS_ENABLED", "1").strip().lower()
|
|
4479
|
+
return value not in {"0", "false", "no", "off"}
|
|
4480
|
+
|
|
4481
|
+
|
|
4482
|
+
def _fts_available(conn) -> bool:
|
|
4483
|
+
"""True if the local_chunks_fts FTS5 vtab exists and MATCH works.
|
|
4484
|
+
|
|
4485
|
+
Hosts without FTS5 support fall back to a plain shadow table that does NOT
|
|
4486
|
+
support MATCH, so the cheap probe runs a trivial MATCH and catches
|
|
4487
|
+
OperationalError. The probe (a sqlite_master lookup + MATCH LIMIT 0) is fast
|
|
4488
|
+
enough to run inline without caching, which avoids stale per-connection
|
|
4489
|
+
cache bugs across reconnects.
|
|
4490
|
+
"""
|
|
4491
|
+
try:
|
|
4492
|
+
row = conn.execute(
|
|
4493
|
+
"SELECT 1 FROM sqlite_master WHERE type IN ('table','view') AND name='local_chunks_fts' LIMIT 1"
|
|
4494
|
+
).fetchone()
|
|
4495
|
+
if not row:
|
|
4496
|
+
return False
|
|
4497
|
+
# Trivial MATCH proves this is a real FTS5 vtab (shadow fallback raises).
|
|
4498
|
+
conn.execute(
|
|
4499
|
+
"SELECT rowid FROM local_chunks_fts WHERE local_chunks_fts MATCH ? LIMIT 0",
|
|
4500
|
+
("nexo_fts_probe",),
|
|
4501
|
+
).fetchall()
|
|
4502
|
+
return True
|
|
4503
|
+
except sqlite3.OperationalError:
|
|
4504
|
+
return False
|
|
4505
|
+
except Exception:
|
|
4506
|
+
return False
|
|
4507
|
+
|
|
4508
|
+
|
|
4509
|
+
def _fts_ready(conn) -> bool:
|
|
4510
|
+
"""The FTS read path is authoritative only when: backfill done AND the
|
|
4511
|
+
feature flag is on AND FTS5 is actually available on this host."""
|
|
4512
|
+
if not _fts_enabled_env():
|
|
4513
|
+
return False
|
|
4514
|
+
if _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") != "1":
|
|
4515
|
+
return False
|
|
4516
|
+
return _fts_available(conn)
|
|
4517
|
+
|
|
4518
|
+
|
|
4519
|
+
def _fts_match_expr(terms: list[str]) -> str:
|
|
4520
|
+
"""Build a safe FTS5 MATCH expression from query terms.
|
|
4521
|
+
|
|
4522
|
+
Each term is double-quoted (FTS5 phrase syntax) with embedded double quotes
|
|
4523
|
+
doubled, neutralizing FTS operators/special chars; terms are OR-joined.
|
|
4524
|
+
Returns '' when there is nothing safe to match.
|
|
4525
|
+
"""
|
|
4526
|
+
quoted = []
|
|
4527
|
+
for term in terms:
|
|
4528
|
+
cleaned = str(term or "").strip()
|
|
4529
|
+
if not cleaned:
|
|
4530
|
+
continue
|
|
4531
|
+
quoted.append('"' + cleaned.replace('"', '""') + '"')
|
|
4532
|
+
return " OR ".join(quoted)
|
|
4533
|
+
|
|
4534
|
+
|
|
4535
|
+
def _backfill_fts_rows(conn, *, batch_limit: int | None = None) -> dict:
|
|
4536
|
+
"""Incrementally mirror legacy local_chunks rows into local_chunks_fts.
|
|
4537
|
+
|
|
4538
|
+
Idempotent + resumable: a cursor (max processed rowid) is persisted in
|
|
4539
|
+
local_index_state per batch and committed, so a crash resumes from the last
|
|
4540
|
+
committed rowid. INSERT OR REPLACE keyed by rowid makes re-runs safe.
|
|
4541
|
+
When no rows remain past the cursor the done flag is set. Returns a small
|
|
4542
|
+
status dict. NOTE: new chunks written after schema migration already get FTS
|
|
4543
|
+
rows via the local_chunks_fts triggers, so this only handles pre-existing
|
|
4544
|
+
rows (the legacy 19GB DB).
|
|
4545
|
+
"""
|
|
4546
|
+
if batch_limit is None:
|
|
4547
|
+
batch_limit = FTS_BACKFILL_BATCH
|
|
4548
|
+
batch_limit = int(batch_limit)
|
|
4549
|
+
if batch_limit <= 0:
|
|
4550
|
+
return {"ok": True, "skipped": "disabled", "done": _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") == "1"}
|
|
4551
|
+
if not _fts_available(conn):
|
|
4552
|
+
return {"ok": True, "skipped": "fts_unavailable", "done": False}
|
|
4553
|
+
if _get_state_conn(conn, FTS_MIGRATION_DONE_KEY, "0") == "1":
|
|
4554
|
+
return {"ok": True, "skipped": "already_done", "done": True}
|
|
4555
|
+
|
|
4556
|
+
def _run() -> dict:
|
|
4557
|
+
try:
|
|
4558
|
+
cursor = int(_get_state_conn(conn, FTS_MIGRATION_CURSOR_KEY, "0") or "0")
|
|
4559
|
+
except Exception:
|
|
4560
|
+
cursor = 0
|
|
4561
|
+
# Snapshot the total once (first backfill tick) so the operator status
|
|
4562
|
+
# surface can show progress without a COUNT(*) on the 19GB table per tick.
|
|
4563
|
+
if _get_state_conn(conn, FTS_BACKFILL_TOTAL_KEY, "") == "":
|
|
4564
|
+
try:
|
|
4565
|
+
total_row = conn.execute("SELECT COUNT(*) AS total FROM local_chunks").fetchone()
|
|
4566
|
+
_set_state_conn(conn, FTS_BACKFILL_TOTAL_KEY, str(int(total_row["total"] or 0)))
|
|
4567
|
+
except Exception:
|
|
4568
|
+
pass
|
|
4569
|
+
rows = conn.execute(
|
|
4570
|
+
"""
|
|
4571
|
+
SELECT c.rowid AS rid, c.text AS text,
|
|
4572
|
+
COALESCE(a.privacy_class, 'normal') AS privacy_class,
|
|
4573
|
+
COALESCE(a.status, 'active') AS asset_status
|
|
4574
|
+
FROM local_chunks c
|
|
4575
|
+
LEFT JOIN local_assets a ON a.asset_id = c.asset_id
|
|
4576
|
+
WHERE c.rowid > ?
|
|
4577
|
+
ORDER BY c.rowid ASC
|
|
4578
|
+
LIMIT ?
|
|
4579
|
+
""",
|
|
4580
|
+
(cursor, batch_limit),
|
|
4581
|
+
).fetchall()
|
|
4582
|
+
if not rows:
|
|
4583
|
+
_set_state_conn(conn, FTS_MIGRATION_DONE_KEY, "1")
|
|
4584
|
+
conn.commit()
|
|
4585
|
+
return {"ok": True, "done": True, "processed": 0, "cursor": cursor}
|
|
4586
|
+
max_rid = cursor
|
|
4587
|
+
for row in rows:
|
|
4588
|
+
rid = int(row["rid"])
|
|
4589
|
+
conn.execute("DELETE FROM local_chunks_fts WHERE rowid = ?", (rid,))
|
|
4590
|
+
conn.execute(
|
|
4591
|
+
"""
|
|
4592
|
+
INSERT INTO local_chunks_fts(rowid, text, privacy_class, asset_status)
|
|
4593
|
+
VALUES (?, ?, ?, ?)
|
|
4594
|
+
""",
|
|
4595
|
+
(rid, str(row["text"] or ""), str(row["privacy_class"] or "normal"), str(row["asset_status"] or "active")),
|
|
4596
|
+
)
|
|
4597
|
+
if rid > max_rid:
|
|
4598
|
+
max_rid = rid
|
|
4599
|
+
_set_state_conn(conn, FTS_MIGRATION_CURSOR_KEY, str(max_rid))
|
|
4600
|
+
conn.commit()
|
|
4601
|
+
return {"ok": True, "done": False, "processed": len(rows), "cursor": max_rid}
|
|
4602
|
+
|
|
4603
|
+
return _with_sqlite_busy_retry(_run)
|
|
4604
|
+
|
|
4605
|
+
|
|
4436
4606
|
def _context_candidate_rows(
|
|
4437
4607
|
conn,
|
|
4438
4608
|
entity_asset_ids: list[str],
|
|
@@ -4444,39 +4614,73 @@ def _context_candidate_rows(
|
|
|
4444
4614
|
prefilter_limit = min(int(base_limit or 5000), _context_prefilter_limit())
|
|
4445
4615
|
prefilter_rows = []
|
|
4446
4616
|
if terms:
|
|
4447
|
-
|
|
4448
|
-
|
|
4449
|
-
|
|
4450
|
-
|
|
4451
|
-
|
|
4452
|
-
|
|
4453
|
-
|
|
4454
|
-
|
|
4455
|
-
|
|
4456
|
-
|
|
4457
|
-
|
|
4458
|
-
|
|
4459
|
-
|
|
4460
|
-
|
|
4461
|
-
|
|
4462
|
-
|
|
4463
|
-
|
|
4464
|
-
|
|
4465
|
-
|
|
4466
|
-
|
|
4467
|
-
|
|
4468
|
-
|
|
4469
|
-
|
|
4470
|
-
|
|
4471
|
-
|
|
4472
|
-
|
|
4473
|
-
|
|
4474
|
-
|
|
4475
|
-
|
|
4476
|
-
|
|
4477
|
-
|
|
4478
|
-
|
|
4479
|
-
|
|
4617
|
+
used_fts = False
|
|
4618
|
+
# DUAL-READ: only take the FTS path once the backfill is done AND the
|
|
4619
|
+
# flag is on AND FTS5 is available. Until then (or on rollback) the
|
|
4620
|
+
# EXACT legacy LIKE path runs, so retrieval is unaffected mid-migration.
|
|
4621
|
+
if _fts_ready(conn):
|
|
4622
|
+
match_expr = _fts_match_expr(terms)
|
|
4623
|
+
if match_expr:
|
|
4624
|
+
try:
|
|
4625
|
+
prefilter_rows = conn.execute(
|
|
4626
|
+
"""
|
|
4627
|
+
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
4628
|
+
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
4629
|
+
FROM local_chunks_fts f
|
|
4630
|
+
JOIN local_chunks c ON c.rowid = f.rowid
|
|
4631
|
+
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
4632
|
+
LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
|
|
4633
|
+
LEFT JOIN local_embeddings e ON e.chunk_id = c.chunk_id
|
|
4634
|
+
WHERE local_chunks_fts MATCH ?
|
|
4635
|
+
AND f.privacy_class='normal'
|
|
4636
|
+
AND f.asset_status='active'
|
|
4637
|
+
AND a.status='active'
|
|
4638
|
+
AND a.privacy_class='normal'
|
|
4639
|
+
ORDER BY bm25(local_chunks_fts), c.created_at DESC
|
|
4640
|
+
LIMIT ?
|
|
4641
|
+
""",
|
|
4642
|
+
(match_expr, prefilter_limit),
|
|
4643
|
+
).fetchall()
|
|
4644
|
+
used_fts = True
|
|
4645
|
+
except sqlite3.OperationalError:
|
|
4646
|
+
# Malformed FTS expression (odd user input) -> fall back to
|
|
4647
|
+
# the legacy LIKE path below instead of erroring the answer.
|
|
4648
|
+
prefilter_rows = []
|
|
4649
|
+
used_fts = False
|
|
4650
|
+
if not used_fts:
|
|
4651
|
+
term_clauses = []
|
|
4652
|
+
params: list[str] = []
|
|
4653
|
+
for term in terms:
|
|
4654
|
+
term_clauses.append("(lower(a.path) LIKE ? OR lower(COALESCE(v.summary, '')) LIKE ? OR lower(c.text) LIKE ?)")
|
|
4655
|
+
like = f"%{term}%"
|
|
4656
|
+
params.extend([like, like, like])
|
|
4657
|
+
prefilter_rows = conn.execute(
|
|
4658
|
+
f"""
|
|
4659
|
+
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
4660
|
+
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
4661
|
+
FROM local_chunks c
|
|
4662
|
+
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
4663
|
+
LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
|
|
4664
|
+
LEFT JOIN local_embeddings e ON e.chunk_id = c.chunk_id
|
|
4665
|
+
WHERE a.status='active'
|
|
4666
|
+
AND a.privacy_class='normal'
|
|
4667
|
+
AND ({" OR ".join(term_clauses)})
|
|
4668
|
+
ORDER BY
|
|
4669
|
+
CASE
|
|
4670
|
+
WHEN {" OR ".join("lower(a.path) LIKE ?" for _ in terms)} THEN 0
|
|
4671
|
+
WHEN {" OR ".join("lower(COALESCE(v.summary, '')) LIKE ?" for _ in terms)} THEN 1
|
|
4672
|
+
ELSE 2
|
|
4673
|
+
END,
|
|
4674
|
+
c.created_at DESC
|
|
4675
|
+
LIMIT ?
|
|
4676
|
+
""",
|
|
4677
|
+
[
|
|
4678
|
+
*params,
|
|
4679
|
+
*(f"%{term}%" for term in terms),
|
|
4680
|
+
*(f"%{term}%" for term in terms),
|
|
4681
|
+
prefilter_limit,
|
|
4682
|
+
],
|
|
4683
|
+
).fetchall()
|
|
4480
4684
|
|
|
4481
4685
|
fallback_limit = prefilter_limit if not terms else max(120, min(500, prefilter_limit // 3))
|
|
4482
4686
|
base_rows = conn.execute(
|
package/src/local_context/db.py
CHANGED
|
@@ -8,7 +8,7 @@ from typing import Iterable
|
|
|
8
8
|
from urllib.parse import quote
|
|
9
9
|
|
|
10
10
|
import paths
|
|
11
|
-
from db._schema import _m63_local_context_layer, _m64_local_context_live_dirs
|
|
11
|
+
from db._schema import _m63_local_context_layer, _m64_local_context_live_dirs, _m84_local_chunks_fts
|
|
12
12
|
|
|
13
13
|
LOCAL_CONTEXT_DB_NAME = "local-context.db"
|
|
14
14
|
MIGRATION_STATE_KEY = "local_context_db_migrated_from_main"
|
|
@@ -118,7 +118,8 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
|
|
|
118
118
|
_m64_local_context_live_dirs(conn)
|
|
119
119
|
_ensure_entity_dossier_schema(conn)
|
|
120
120
|
_ensure_local_context_v2_schema(conn)
|
|
121
|
-
conn
|
|
121
|
+
_m84_local_chunks_fts(conn)
|
|
122
|
+
conn.execute("PRAGMA user_version=84")
|
|
122
123
|
conn.commit()
|
|
123
124
|
|
|
124
125
|
|
|
@@ -352,6 +352,7 @@ def record_router_usage(
|
|
|
352
352
|
elapsed_ms: int | None = None,
|
|
353
353
|
deadline_ms: int | None = None,
|
|
354
354
|
used_before_response: bool = True,
|
|
355
|
+
cache_hit: bool = False,
|
|
355
356
|
db_path: str | os.PathLike[str] | None = None,
|
|
356
357
|
) -> dict[str, Any]:
|
|
357
358
|
evidence_refs = router_payload.get("evidence_refs") or []
|
|
@@ -374,6 +375,7 @@ def record_router_usage(
|
|
|
374
375
|
"escalated_from": router_payload.get("escalated_from") or budget_policy.get("escalated_from") or "",
|
|
375
376
|
"escalated_to": router_payload.get("escalated_to") or budget_policy.get("escalated_to") or "",
|
|
376
377
|
"route_cache_key": budget_policy.get("route_cache_key") or "",
|
|
378
|
+
"cache_hit": bool(cache_hit or router_payload.get("cache_hit")),
|
|
377
379
|
"max_sources": budget_policy.get("max_sources") or 0,
|
|
378
380
|
"max_source_timeout_ms": budget_policy.get("max_source_timeout_ms") or 0,
|
|
379
381
|
"allowed_sources": budget_policy.get("allowed_sources") or [],
|
package/src/memory_retrieval.py
CHANGED
|
@@ -9,12 +9,24 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
from db import (
|
|
11
11
|
build_pre_action_context,
|
|
12
|
+
get_memory_observations_by_uids,
|
|
12
13
|
list_memory_events,
|
|
13
14
|
list_memory_observations,
|
|
14
15
|
process_memory_observation_queue,
|
|
15
16
|
search_memory_observations_fts,
|
|
17
|
+
vector_scan_observations,
|
|
16
18
|
)
|
|
17
19
|
|
|
20
|
+
# Weight for the semantic (vector) signal when fused with the lexical/FTS score.
|
|
21
|
+
# A strong paraphrase match (high cosine) can carry an observation that the
|
|
22
|
+
# token-overlap score missed entirely, while still ranking below an exact
|
|
23
|
+
# lexical hit on the same query.
|
|
24
|
+
_VECTOR_FUSION_WEIGHT = 0.85
|
|
25
|
+
# Minimum cosine for a semantic-only candidate to survive the relaxed filter.
|
|
26
|
+
# Below this, a vector "match" is noise and must not resurrect an observation
|
|
27
|
+
# that the lexical path already rejected.
|
|
28
|
+
_VECTOR_MIN_SCORE = 0.30
|
|
29
|
+
|
|
18
30
|
|
|
19
31
|
def _tokens(text: str) -> set[str]:
|
|
20
32
|
return {
|
|
@@ -37,6 +49,41 @@ def _score(query: str, text: str, base: float = 0.0) -> float:
|
|
|
37
49
|
return min(1.0, base + len(overlap) / max(1, len(query_tokens)))
|
|
38
50
|
|
|
39
51
|
|
|
52
|
+
def _model_is_warm() -> bool:
|
|
53
|
+
"""True only when embedding the query will NOT trigger a cold model load."""
|
|
54
|
+
try:
|
|
55
|
+
import cognitive._core as cog
|
|
56
|
+
except Exception:
|
|
57
|
+
return False
|
|
58
|
+
try:
|
|
59
|
+
if cog._model_download_disabled():
|
|
60
|
+
return True
|
|
61
|
+
except Exception:
|
|
62
|
+
return False
|
|
63
|
+
return getattr(cog, "_model", None) is not None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _maybe_query_embedding(query: str):
|
|
67
|
+
"""Embed the query ONCE for semantic fusion, or return None.
|
|
68
|
+
|
|
69
|
+
CRITICAL latency guard: this never loads a cold model. It returns None
|
|
70
|
+
(degrading to the FTS/token path) unless the deterministic offline fallback
|
|
71
|
+
is active or the real model is already warm in-process. Any failure also
|
|
72
|
+
yields None.
|
|
73
|
+
"""
|
|
74
|
+
clean = (query or "").strip()
|
|
75
|
+
if not clean:
|
|
76
|
+
return None
|
|
77
|
+
if not _model_is_warm():
|
|
78
|
+
return None
|
|
79
|
+
try:
|
|
80
|
+
import cognitive._core as cog
|
|
81
|
+
|
|
82
|
+
return cog.embed(clean)
|
|
83
|
+
except Exception:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
|
|
40
87
|
def _project_hint_values(project_hint: str = "") -> set[str]:
|
|
41
88
|
clean = (project_hint or "").strip()
|
|
42
89
|
if not clean:
|
|
@@ -225,6 +272,30 @@ def memory_search(
|
|
|
225
272
|
):
|
|
226
273
|
uid = item.get("observation_uid") or f"id:{item.get('id')}"
|
|
227
274
|
observations_by_uid.setdefault(uid, item)
|
|
275
|
+
# Semantic fusion: embed the query ONCE (only when a model is already warm —
|
|
276
|
+
# never trigger a cold model load on this latency path) and run a bounded
|
|
277
|
+
# vector scan over precomputed observation embeddings. Paraphrases that the
|
|
278
|
+
# lexical/FTS path missed are pulled in here.
|
|
279
|
+
vector_scores: dict[str, float] = {}
|
|
280
|
+
if clean_query:
|
|
281
|
+
query_vector = _maybe_query_embedding(clean_query)
|
|
282
|
+
if query_vector is not None:
|
|
283
|
+
for hit in vector_scan_observations(
|
|
284
|
+
query_vector,
|
|
285
|
+
limit=max_items * 3,
|
|
286
|
+
start_ts=start,
|
|
287
|
+
end_ts=end,
|
|
288
|
+
min_score=_VECTOR_MIN_SCORE,
|
|
289
|
+
):
|
|
290
|
+
uid = hit.get("observation_uid")
|
|
291
|
+
if uid:
|
|
292
|
+
vector_scores[uid] = float(hit.get("vector_score") or 0.0)
|
|
293
|
+
# Materialise semantic-only observations the lexical scan did not see.
|
|
294
|
+
missing_uids = [uid for uid in vector_scores if uid not in observations_by_uid]
|
|
295
|
+
if missing_uids:
|
|
296
|
+
for uid, item in get_memory_observations_by_uids(missing_uids).items():
|
|
297
|
+
observations_by_uid.setdefault(uid, item)
|
|
298
|
+
|
|
228
299
|
observations = list(observations_by_uid.values())
|
|
229
300
|
events = list_memory_events(
|
|
230
301
|
query=clean_query,
|
|
@@ -234,12 +305,23 @@ def memory_search(
|
|
|
234
305
|
end_ts=end,
|
|
235
306
|
)
|
|
236
307
|
|
|
237
|
-
candidates = [
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
308
|
+
candidates = []
|
|
309
|
+
for item in observations:
|
|
310
|
+
if not _within_range(item.get("created_at"), start, end):
|
|
311
|
+
continue
|
|
312
|
+
if not _project_matches(item.get("project_key") or "", project_hint):
|
|
313
|
+
continue
|
|
314
|
+
candidate = _observation_to_candidate(item, clean_query)
|
|
315
|
+
uid = item.get("observation_uid") or f"id:{item.get('id')}"
|
|
316
|
+
vector_score = vector_scores.get(uid, 0.0)
|
|
317
|
+
if vector_score > 0:
|
|
318
|
+
# Fuse: keep the higher of the lexical score and the weighted vector
|
|
319
|
+
# signal so a strong paraphrase survives while exact lexical hits
|
|
320
|
+
# still outrank weak semantic ones.
|
|
321
|
+
fused = max(float(candidate.get("score") or 0.0), _VECTOR_FUSION_WEIGHT * vector_score)
|
|
322
|
+
candidate["score"] = round(fused, 4)
|
|
323
|
+
candidate["vector_score"] = round(vector_score, 4)
|
|
324
|
+
candidates.append(candidate)
|
|
243
325
|
candidates.extend(
|
|
244
326
|
_event_to_candidate(item, clean_query)
|
|
245
327
|
for item in events
|
|
@@ -248,7 +330,14 @@ def memory_search(
|
|
|
248
330
|
)
|
|
249
331
|
|
|
250
332
|
if clean_query:
|
|
251
|
-
|
|
333
|
+
# Relaxed filter: a candidate survives if it has a positive lexical score
|
|
334
|
+
# OR a qualifying semantic (vector) match. Previously the hard score>0
|
|
335
|
+
# filter dropped semantic-only paraphrase hits before they could rank.
|
|
336
|
+
candidates = [
|
|
337
|
+
item
|
|
338
|
+
for item in candidates
|
|
339
|
+
if item.get("score", 0) > 0 or item.get("vector_score", 0) > 0
|
|
340
|
+
]
|
|
252
341
|
candidates.sort(key=lambda item: (item.get("score", 0), item.get("created_at") or 0), reverse=True)
|
|
253
342
|
candidates = candidates[:max_items]
|
|
254
343
|
|