nexo-brain 7.34.0 → 7.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@ import re
7
7
  import shutil
8
8
  import sqlite3
9
9
  import stat
10
+ import struct
10
11
  import subprocess
11
12
  import sys
12
13
  import time
@@ -56,6 +57,16 @@ FTS_BACKFILL_BATCH = int(os.environ.get("NEXO_LOCAL_FTS_BACKFILL_BATCH", "500")
56
57
  FTS_MIGRATION_CURSOR_KEY = "fts_migration_cursor"
57
58
  FTS_MIGRATION_DONE_KEY = "fts_migration_done"
58
59
  FTS_BACKFILL_TOTAL_KEY = "fts_backfill_total"
60
+ # Compact float32 BLOB embedding storage (replaces JSON-text vector_json, which
61
+ # bloated the index ~4-6x). Dual-write both columns, read prefers the BLOB and
62
+ # falls back to JSON, backfill converts old rows incrementally. Feature flags
63
+ # are kill switches that revert to JSON-only with no redeploy.
64
+ EMB_BLOB_WRITE_ENABLED = os.environ.get("NEXO_LOCAL_EMB_BLOB_WRITE", "1") != "0"
65
+ EMB_BLOB_READ_ENABLED = os.environ.get("NEXO_LOCAL_EMB_BLOB_READ", "1") != "0"
66
+ EMB_BLOB_BACKFILL_BATCH = int(os.environ.get("NEXO_LOCAL_EMB_BLOB_BACKFILL_BATCH", "500") or "500")
67
+ EMB_BLOB_CURSOR_KEY = "emb_blob_backfill_cursor"
68
+ EMB_BLOB_DONE_KEY = "emb_blob_backfill_done"
69
+ EMB_BLOB_TOTAL_KEY = "emb_blob_backfill_total"
59
70
  EMBEDDING_REFRESH_JOB = "embedding_refresh"
60
71
  ENTITY_FACTS_JOB = "entity_facts"
61
72
  BACKGROUND_INDEX_JOB_TYPES = {ENTITY_FACTS_JOB}
@@ -2888,6 +2899,47 @@ def _latest_version_id(conn, asset_id: str) -> str:
2888
2899
  return row["version_id"] if row else stable_id("ver", asset_id)
2889
2900
 
2890
2901
 
2902
+ def _encode_embedding_blob(vector) -> bytes | None:
2903
+ """Pack a vector of floats into a little-endian float32 BLOB (dimension*4
2904
+ bytes). Returns None when blob writes are disabled or the vector is empty,
2905
+ so the caller still writes vector_json (the source of truth during the
2906
+ transition). float32 vs the legacy float64 JSON is a deliberate, negligible
2907
+ cosine drift (vectors are L2-normalized / already 8-dp-rounded)."""
2908
+ if not EMB_BLOB_WRITE_ENABLED:
2909
+ return None
2910
+ try:
2911
+ floats = [float(v) for v in (vector or [])]
2912
+ if not floats:
2913
+ return None
2914
+ return struct.pack(f"<{len(floats)}f", *floats)
2915
+ except (TypeError, ValueError, struct.error):
2916
+ return None
2917
+
2918
+
2919
+ def _decode_embedding(row) -> list:
2920
+ """Read a stored embedding, preferring the compact BLOB and falling back to
2921
+ the legacy JSON text. The BLOB is trusted only when its length matches
2922
+ dimension*4 (4 bytes per float32); a short/garbage blob falls through to
2923
+ JSON so it can never reach the cosine loop. Returns a plain Python list so
2924
+ embeddings.cosine() and the `elif vector:` truthiness need no changes."""
2925
+ if EMB_BLOB_READ_ENABLED:
2926
+ try:
2927
+ blob = row["vector_blob"]
2928
+ except (KeyError, IndexError):
2929
+ blob = None
2930
+ if blob:
2931
+ try:
2932
+ dim = int(row["dimension"] or 0)
2933
+ except (KeyError, IndexError, TypeError, ValueError):
2934
+ dim = 0
2935
+ if dim and len(blob) == dim * 4:
2936
+ try:
2937
+ return list(struct.unpack(f"<{dim}f", blob))
2938
+ except struct.error:
2939
+ pass # fall through to JSON
2940
+ return json_loads(row["vector_json"], [])
2941
+
2942
+
2891
2943
  def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> None:
2892
2944
  record = embeddings.embed_record(text)
2893
2945
  model_id = str(record["model_id"])
@@ -2895,8 +2947,8 @@ def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> No
2895
2947
  dimension = int(record["dimension"])
2896
2948
  conn.execute(
2897
2949
  """
2898
- INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
2899
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
2950
+ INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, vector_blob, created_at)
2951
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
2900
2952
  """,
2901
2953
  (
2902
2954
  stable_id("emb", f"{chunk_id}:{model_id}:{model_revision}:{dimension}"),
@@ -2906,6 +2958,7 @@ def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> No
2906
2958
  model_revision,
2907
2959
  dimension,
2908
2960
  json_dumps(record["vector"]),
2961
+ _encode_embedding_blob(record["vector"]),
2909
2962
  now(),
2910
2963
  ),
2911
2964
  )
@@ -3555,6 +3608,13 @@ def run_once(
3555
3608
  _backfill_fts_rows(conn, batch_limit=FTS_BACKFILL_BATCH)
3556
3609
  except Exception:
3557
3610
  pass
3611
+ # Incremental embedding TEXT->BLOB backfill: same bounded one-batch-per-tick
3612
+ # discipline. Best-effort; skips when disabled or already done.
3613
+ if EMB_BLOB_BACKFILL_BATCH > 0:
3614
+ try:
3615
+ _backfill_embedding_blobs(conn, batch_limit=EMB_BLOB_BACKFILL_BATCH)
3616
+ except Exception:
3617
+ pass
3558
3618
  conn_after = _conn()
3559
3619
  initial_after = _initial_scan_status(conn_after, list_roots(readonly=False))
3560
3620
  blocking_active_after = _active_job_count(conn_after, blocking_only=True)
@@ -4603,6 +4663,76 @@ def _backfill_fts_rows(conn, *, batch_limit: int | None = None) -> dict:
4603
4663
  return _with_sqlite_busy_retry(_run)
4604
4664
 
4605
4665
 
4666
+ def _backfill_embedding_blobs(conn, *, batch_limit: int | None = None) -> dict:
4667
+ """Incrementally convert legacy vector_json TEXT rows to compact float32
4668
+ vector_blob. Idempotent + resumable via a rowid cursor in local_index_state,
4669
+ committing per batch. Converts the EXISTING JSON in place (never re-embeds —
4670
+ re-embedding could re-stamp model_id if fastembed availability differs). New
4671
+ rows already get vector_blob from the dual-write, so this only handles
4672
+ pre-existing rows (the legacy ~19GB DB). Rows whose JSON length != dimension
4673
+ are skipped (left JSON-only; dual-read falls back) but still advance the
4674
+ cursor so they are not retried forever.
4675
+ """
4676
+ if batch_limit is None:
4677
+ batch_limit = EMB_BLOB_BACKFILL_BATCH
4678
+ batch_limit = int(batch_limit)
4679
+ if batch_limit <= 0:
4680
+ return {"ok": True, "skipped": "disabled", "done": _get_state_conn(conn, EMB_BLOB_DONE_KEY, "0") == "1"}
4681
+ if not EMB_BLOB_WRITE_ENABLED:
4682
+ return {"ok": True, "skipped": "blob_write_disabled", "done": False}
4683
+ if _get_state_conn(conn, EMB_BLOB_DONE_KEY, "0") == "1":
4684
+ return {"ok": True, "skipped": "already_done", "done": True}
4685
+
4686
+ def _run() -> dict:
4687
+ try:
4688
+ cursor = int(_get_state_conn(conn, EMB_BLOB_CURSOR_KEY, "0") or "0")
4689
+ except Exception:
4690
+ cursor = 0
4691
+ if _get_state_conn(conn, EMB_BLOB_TOTAL_KEY, "") == "":
4692
+ try:
4693
+ total_row = conn.execute(
4694
+ "SELECT COUNT(*) AS total FROM local_embeddings WHERE vector_blob IS NULL"
4695
+ ).fetchone()
4696
+ _set_state_conn(conn, EMB_BLOB_TOTAL_KEY, str(int(total_row["total"] or 0)))
4697
+ except Exception:
4698
+ pass
4699
+ rows = conn.execute(
4700
+ """
4701
+ SELECT rowid AS rid, dimension, vector_json
4702
+ FROM local_embeddings
4703
+ WHERE rowid > ? AND vector_blob IS NULL
4704
+ ORDER BY rowid ASC
4705
+ LIMIT ?
4706
+ """,
4707
+ (cursor, batch_limit),
4708
+ ).fetchall()
4709
+ if not rows:
4710
+ _set_state_conn(conn, EMB_BLOB_DONE_KEY, "1")
4711
+ conn.commit()
4712
+ return {"ok": True, "done": True, "processed": 0, "cursor": cursor}
4713
+ max_rid = cursor
4714
+ converted = 0
4715
+ for row in rows:
4716
+ rid = int(row["rid"])
4717
+ if rid > max_rid:
4718
+ max_rid = rid
4719
+ try:
4720
+ dim = int(row["dimension"] or 0)
4721
+ except (TypeError, ValueError):
4722
+ dim = 0
4723
+ vec = json_loads(row["vector_json"], [])
4724
+ if dim and len(vec) == dim:
4725
+ blob = _encode_embedding_blob(vec)
4726
+ if blob is not None and len(blob) == dim * 4:
4727
+ conn.execute("UPDATE local_embeddings SET vector_blob=? WHERE rowid=?", (blob, rid))
4728
+ converted += 1
4729
+ _set_state_conn(conn, EMB_BLOB_CURSOR_KEY, str(max_rid))
4730
+ conn.commit()
4731
+ return {"ok": True, "done": False, "processed": len(rows), "converted": converted, "cursor": max_rid}
4732
+
4733
+ return _with_sqlite_busy_retry(_run)
4734
+
4735
+
4606
4736
  def _context_candidate_rows(
4607
4737
  conn,
4608
4738
  entity_asset_ids: list[str],
@@ -4625,7 +4755,7 @@ def _context_candidate_rows(
4625
4755
  prefilter_rows = conn.execute(
4626
4756
  """
4627
4757
  SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
4628
- e.vector_json, e.model_id, e.model_revision, e.dimension
4758
+ e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
4629
4759
  FROM local_chunks_fts f
4630
4760
  JOIN local_chunks c ON c.rowid = f.rowid
4631
4761
  JOIN local_assets a ON a.asset_id = c.asset_id
@@ -4657,7 +4787,7 @@ def _context_candidate_rows(
4657
4787
  prefilter_rows = conn.execute(
4658
4788
  f"""
4659
4789
  SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
4660
- e.vector_json, e.model_id, e.model_revision, e.dimension
4790
+ e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
4661
4791
  FROM local_chunks c
4662
4792
  JOIN local_assets a ON a.asset_id = c.asset_id
4663
4793
  LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -4686,7 +4816,7 @@ def _context_candidate_rows(
4686
4816
  base_rows = conn.execute(
4687
4817
  """
4688
4818
  SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
4689
- e.vector_json, e.model_id, e.model_revision, e.dimension
4819
+ e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
4690
4820
  FROM local_chunks c
4691
4821
  JOIN local_assets a ON a.asset_id = c.asset_id
4692
4822
  LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -4713,7 +4843,7 @@ def _context_candidate_rows(
4713
4843
  entity_rows = conn.execute(
4714
4844
  f"""
4715
4845
  SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
4716
- e.vector_json, e.model_id, e.model_revision, e.dimension
4846
+ e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
4717
4847
  FROM local_chunks c
4718
4848
  JOIN local_assets a ON a.asset_id = c.asset_id
4719
4849
  LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -5200,7 +5330,7 @@ def _context_query_conn(
5200
5330
  for row in rows:
5201
5331
  if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
5202
5332
  continue
5203
- vector = json_loads(row["vector_json"], [])
5333
+ vector = _decode_embedding(row)
5204
5334
  text_score = _search_text_score(search_query, row["text"])
5205
5335
  path_score = _search_text_score(search_query, row["path"] or "")
5206
5336
  summary_score = _search_text_score(search_query, row["summary"] or "")
@@ -5756,6 +5886,14 @@ def purge_asset(asset_id: str) -> dict:
5756
5886
  conn = _conn()
5757
5887
  _purge_asset_ids(conn, [asset_id])
5758
5888
  conn.commit()
5889
+ # Reclaim the just-freed pages. Cheap incremental_vacuum (not a full VACUUM
5890
+ # — this is a frequent single-asset op; a 19GB rewrite per purge would be
5891
+ # catastrophic). No-op unless auto_vacuum=INCREMENTAL is active. Best-effort.
5892
+ try:
5893
+ conn.execute("PRAGMA incremental_vacuum")
5894
+ conn.commit()
5895
+ except Exception:
5896
+ pass
5759
5897
  log_event("info", "asset_purged", "Asset purged", asset_id=asset_id)
5760
5898
  return {"ok": True, "asset_id": asset_id}
5761
5899
 
@@ -5790,6 +5928,18 @@ def clear_index() -> dict:
5790
5928
  )
5791
5929
  _set_initial_index_complete(conn, False)
5792
5930
  conn.commit()
5931
+ # The index is now near-empty, so a full VACUUM rewrites a tiny file and
5932
+ # actually returns the freed disk to the OS (DELETE alone only moves pages
5933
+ # to the free-list). Checkpoint the WAL first so its pages are folded in,
5934
+ # VACUUM, then checkpoint again — in WAL mode VACUUM's rewrite lands in the
5935
+ # WAL, so the main file is only truncated by the trailing checkpoint. Works
5936
+ # regardless of auto_vacuum mode. Best-effort — never fail the clear.
5937
+ try:
5938
+ conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
5939
+ conn.execute("VACUUM")
5940
+ conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
5941
+ except Exception:
5942
+ pass
5793
5943
  log_event("warn", "index_cleared", "Local memory index cleared")
5794
5944
  return {"ok": True}
5795
5945
 
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
+ import shutil
4
5
  import sqlite3
5
6
  import time
6
7
  from pathlib import Path
@@ -14,6 +15,11 @@ LOCAL_CONTEXT_DB_NAME = "local-context.db"
14
15
  MIGRATION_STATE_KEY = "local_context_db_migrated_from_main"
15
16
  MIGRATION_SKIPPED_KEY = "local_context_db_migration_skipped"
16
17
  MAIN_CLEANUP_STATE_KEY = "local_context_main_tables_drained"
18
+ # One-time conversion flag: auto_vacuum=INCREMENTAL is a no-op on an already
19
+ # populated DB until exactly one full VACUUM runs. We do that conversion once
20
+ # per never-converted DB (guarded by free disk) and record it here so it never
21
+ # re-runs the expensive rewrite. See ensure_local_context_db().
22
+ AUTO_VACUUM_CONVERTED_KEY = "auto_vacuum_converted"
17
23
 
18
24
  LOCAL_CONTEXT_TABLES: tuple[str, ...] = (
19
25
  "local_index_roots",
@@ -77,6 +83,12 @@ def _connect(db_path: Path) -> sqlite3.Connection:
77
83
  conn = sqlite3.connect(str(db_path), timeout=max(_busy_timeout_ms() / 1000.0, 1.0), check_same_thread=False)
78
84
  conn.row_factory = sqlite3.Row
79
85
  conn.execute(f"PRAGMA busy_timeout={_busy_timeout_ms()}")
86
+ # auto_vacuum must be set BEFORE the first table is created to take effect on
87
+ # a brand-new DB (it is a no-op on an already-populated file — those are
88
+ # converted once via a guarded full VACUUM in ensure_local_context_db()).
89
+ # INCREMENTAL lets deletes (privacy purge, reconcile, purge_asset) reclaim
90
+ # pages via `PRAGMA incremental_vacuum` instead of growing the file forever.
91
+ conn.execute("PRAGMA auto_vacuum=INCREMENTAL")
80
92
  conn.execute("PRAGMA journal_mode=WAL")
81
93
  conn.execute("PRAGMA synchronous=NORMAL")
82
94
  conn.execute("PRAGMA temp_store=MEMORY")
@@ -119,10 +131,20 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
119
131
  _ensure_entity_dossier_schema(conn)
120
132
  _ensure_local_context_v2_schema(conn)
121
133
  _m84_local_chunks_fts(conn)
122
- conn.execute("PRAGMA user_version=84")
134
+ _m85_local_embeddings_blob(conn)
135
+ conn.execute("PRAGMA user_version=85")
123
136
  conn.commit()
124
137
 
125
138
 
139
+ def _m85_local_embeddings_blob(conn: sqlite3.Connection) -> None:
140
+ """v85: compact float32 BLOB embedding storage alongside the legacy
141
+ vector_json TEXT. Nullable + no DEFAULT so the ALTER is metadata-only (a
142
+ DEFAULT would rewrite the whole table). The write path dual-writes both
143
+ columns; the read path prefers the BLOB and falls back to JSON, so adding
144
+ the column is safe even before any backfill runs."""
145
+ _add_column_if_missing(conn, "local_embeddings", "vector_blob", "BLOB")
146
+
147
+
126
148
  def _table_columns(conn: sqlite3.Connection, table: str) -> set[str]:
127
149
  rows = conn.execute(f"PRAGMA table_info({table})").fetchall()
128
150
  return {str(row["name"] if isinstance(row, sqlite3.Row) else row[1]) for row in rows}
@@ -421,11 +443,49 @@ def ensure_local_context_db() -> None:
421
443
  pass
422
444
  return
423
445
  _ensure_schema(_CONN)
446
+ _convert_auto_vacuum_once(_CONN, db_path)
424
447
  _LAST_MIGRATION_ATTEMPT = now
425
448
  migration = migrate_from_main_if_needed(_CONN)
426
449
  _READY = True
427
450
 
428
451
 
452
+ def _convert_auto_vacuum_once(conn: sqlite3.Connection, db_path: Path) -> None:
453
+ """Flip an existing DB from auto_vacuum=NONE to INCREMENTAL.
454
+
455
+ Setting the PRAGMA only takes effect after one full VACUUM that writes the
456
+ pointer-map pages. This rewrites the whole file once, so we guard on free
457
+ disk (VACUUM needs ~1x the DB size of scratch; require 2x margin) and only
458
+ record the done-flag once the mode is actually INCREMENTAL, so a machine
459
+ that was too full retries on a later boot. Best-effort: a failure here must
460
+ never block index startup. Runs on the writer connection only.
461
+ """
462
+ try:
463
+ if _state(conn, AUTO_VACUUM_CONVERTED_KEY) == "1":
464
+ return
465
+ mode = int(conn.execute("PRAGMA auto_vacuum").fetchone()[0])
466
+ if mode == 2: # already INCREMENTAL (e.g. freshly created DB)
467
+ _set_state(conn, AUTO_VACUUM_CONVERTED_KEY, "1")
468
+ conn.commit()
469
+ return
470
+ try:
471
+ db_size = db_path.stat().st_size
472
+ free = shutil.disk_usage(db_path.parent).free
473
+ except OSError:
474
+ return
475
+ if free <= db_size * 2:
476
+ # Not enough scratch room — leave NONE mode, retry on a later boot.
477
+ return
478
+ conn.execute("PRAGMA auto_vacuum=INCREMENTAL")
479
+ conn.execute("VACUUM")
480
+ new_mode = int(conn.execute("PRAGMA auto_vacuum").fetchone()[0])
481
+ if new_mode == 2:
482
+ _set_state(conn, AUTO_VACUUM_CONVERTED_KEY, "1")
483
+ conn.commit()
484
+ except Exception:
485
+ # Conversion is an optimization; never break startup over it.
486
+ pass
487
+
488
+
429
489
  def get_local_context_db() -> sqlite3.Connection:
430
490
  ensure_local_context_db()
431
491
  assert _CONN is not None