nexo-brain 7.34.0 → 7.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/db/_hot_context.py +30 -1
- package/src/db/_schema.py +64 -0
- package/src/deep_sleep_retention.py +8 -0
- package/src/email_sent_events.py +7 -0
- package/src/enforcement_engine.py +58 -0
- package/src/hooks/post_tool_use.py +114 -0
- package/src/local_context/api.py +157 -7
- package/src/local_context/db.py +61 -1
- package/src/memory_forget.py +1249 -0
- package/src/plugins/protocol.py +30 -0
- package/src/plugins/schema_abstraction.py +66 -0
- package/src/schema_abstraction.py +763 -0
- package/src/scripts/nexo-daily-self-audit.py +97 -13
- package/src/server.py +54 -0
- package/src/tools_credentials.py +60 -2
- package/tool-enforcement-map.json +66 -0
package/src/local_context/api.py
CHANGED
|
@@ -7,6 +7,7 @@ import re
|
|
|
7
7
|
import shutil
|
|
8
8
|
import sqlite3
|
|
9
9
|
import stat
|
|
10
|
+
import struct
|
|
10
11
|
import subprocess
|
|
11
12
|
import sys
|
|
12
13
|
import time
|
|
@@ -56,6 +57,16 @@ FTS_BACKFILL_BATCH = int(os.environ.get("NEXO_LOCAL_FTS_BACKFILL_BATCH", "500")
|
|
|
56
57
|
FTS_MIGRATION_CURSOR_KEY = "fts_migration_cursor"
|
|
57
58
|
FTS_MIGRATION_DONE_KEY = "fts_migration_done"
|
|
58
59
|
FTS_BACKFILL_TOTAL_KEY = "fts_backfill_total"
|
|
60
|
+
# Compact float32 BLOB embedding storage (replaces JSON-text vector_json, which
|
|
61
|
+
# bloated the index ~4-6x). Dual-write both columns, read prefers the BLOB and
|
|
62
|
+
# falls back to JSON, backfill converts old rows incrementally. Feature flags
|
|
63
|
+
# are kill switches that revert to JSON-only with no redeploy.
|
|
64
|
+
EMB_BLOB_WRITE_ENABLED = os.environ.get("NEXO_LOCAL_EMB_BLOB_WRITE", "1") != "0"
|
|
65
|
+
EMB_BLOB_READ_ENABLED = os.environ.get("NEXO_LOCAL_EMB_BLOB_READ", "1") != "0"
|
|
66
|
+
EMB_BLOB_BACKFILL_BATCH = int(os.environ.get("NEXO_LOCAL_EMB_BLOB_BACKFILL_BATCH", "500") or "500")
|
|
67
|
+
EMB_BLOB_CURSOR_KEY = "emb_blob_backfill_cursor"
|
|
68
|
+
EMB_BLOB_DONE_KEY = "emb_blob_backfill_done"
|
|
69
|
+
EMB_BLOB_TOTAL_KEY = "emb_blob_backfill_total"
|
|
59
70
|
EMBEDDING_REFRESH_JOB = "embedding_refresh"
|
|
60
71
|
ENTITY_FACTS_JOB = "entity_facts"
|
|
61
72
|
BACKGROUND_INDEX_JOB_TYPES = {ENTITY_FACTS_JOB}
|
|
@@ -2888,6 +2899,47 @@ def _latest_version_id(conn, asset_id: str) -> str:
|
|
|
2888
2899
|
return row["version_id"] if row else stable_id("ver", asset_id)
|
|
2889
2900
|
|
|
2890
2901
|
|
|
2902
|
+
def _encode_embedding_blob(vector) -> bytes | None:
|
|
2903
|
+
"""Pack a vector of floats into a little-endian float32 BLOB (dimension*4
|
|
2904
|
+
bytes). Returns None when blob writes are disabled or the vector is empty,
|
|
2905
|
+
so the caller still writes vector_json (the source of truth during the
|
|
2906
|
+
transition). float32 vs the legacy float64 JSON is a deliberate, negligible
|
|
2907
|
+
cosine drift (vectors are L2-normalized / already 8-dp-rounded)."""
|
|
2908
|
+
if not EMB_BLOB_WRITE_ENABLED:
|
|
2909
|
+
return None
|
|
2910
|
+
try:
|
|
2911
|
+
floats = [float(v) for v in (vector or [])]
|
|
2912
|
+
if not floats:
|
|
2913
|
+
return None
|
|
2914
|
+
return struct.pack(f"<{len(floats)}f", *floats)
|
|
2915
|
+
except (TypeError, ValueError, struct.error):
|
|
2916
|
+
return None
|
|
2917
|
+
|
|
2918
|
+
|
|
2919
|
+
def _decode_embedding(row) -> list:
|
|
2920
|
+
"""Read a stored embedding, preferring the compact BLOB and falling back to
|
|
2921
|
+
the legacy JSON text. The BLOB is trusted only when its length matches
|
|
2922
|
+
dimension*4 (4 bytes per float32); a short/garbage blob falls through to
|
|
2923
|
+
JSON so it can never reach the cosine loop. Returns a plain Python list so
|
|
2924
|
+
embeddings.cosine() and the `elif vector:` truthiness need no changes."""
|
|
2925
|
+
if EMB_BLOB_READ_ENABLED:
|
|
2926
|
+
try:
|
|
2927
|
+
blob = row["vector_blob"]
|
|
2928
|
+
except (KeyError, IndexError):
|
|
2929
|
+
blob = None
|
|
2930
|
+
if blob:
|
|
2931
|
+
try:
|
|
2932
|
+
dim = int(row["dimension"] or 0)
|
|
2933
|
+
except (KeyError, IndexError, TypeError, ValueError):
|
|
2934
|
+
dim = 0
|
|
2935
|
+
if dim and len(blob) == dim * 4:
|
|
2936
|
+
try:
|
|
2937
|
+
return list(struct.unpack(f"<{dim}f", blob))
|
|
2938
|
+
except struct.error:
|
|
2939
|
+
pass # fall through to JSON
|
|
2940
|
+
return json_loads(row["vector_json"], [])
|
|
2941
|
+
|
|
2942
|
+
|
|
2891
2943
|
def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> None:
|
|
2892
2944
|
record = embeddings.embed_record(text)
|
|
2893
2945
|
model_id = str(record["model_id"])
|
|
@@ -2895,8 +2947,8 @@ def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> No
|
|
|
2895
2947
|
dimension = int(record["dimension"])
|
|
2896
2948
|
conn.execute(
|
|
2897
2949
|
"""
|
|
2898
|
-
INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
|
|
2899
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
2950
|
+
INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, vector_blob, created_at)
|
|
2951
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2900
2952
|
""",
|
|
2901
2953
|
(
|
|
2902
2954
|
stable_id("emb", f"{chunk_id}:{model_id}:{model_revision}:{dimension}"),
|
|
@@ -2906,6 +2958,7 @@ def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> No
|
|
|
2906
2958
|
model_revision,
|
|
2907
2959
|
dimension,
|
|
2908
2960
|
json_dumps(record["vector"]),
|
|
2961
|
+
_encode_embedding_blob(record["vector"]),
|
|
2909
2962
|
now(),
|
|
2910
2963
|
),
|
|
2911
2964
|
)
|
|
@@ -3555,6 +3608,13 @@ def run_once(
|
|
|
3555
3608
|
_backfill_fts_rows(conn, batch_limit=FTS_BACKFILL_BATCH)
|
|
3556
3609
|
except Exception:
|
|
3557
3610
|
pass
|
|
3611
|
+
# Incremental embedding TEXT->BLOB backfill: same bounded one-batch-per-tick
|
|
3612
|
+
# discipline. Best-effort; skips when disabled or already done.
|
|
3613
|
+
if EMB_BLOB_BACKFILL_BATCH > 0:
|
|
3614
|
+
try:
|
|
3615
|
+
_backfill_embedding_blobs(conn, batch_limit=EMB_BLOB_BACKFILL_BATCH)
|
|
3616
|
+
except Exception:
|
|
3617
|
+
pass
|
|
3558
3618
|
conn_after = _conn()
|
|
3559
3619
|
initial_after = _initial_scan_status(conn_after, list_roots(readonly=False))
|
|
3560
3620
|
blocking_active_after = _active_job_count(conn_after, blocking_only=True)
|
|
@@ -4603,6 +4663,76 @@ def _backfill_fts_rows(conn, *, batch_limit: int | None = None) -> dict:
|
|
|
4603
4663
|
return _with_sqlite_busy_retry(_run)
|
|
4604
4664
|
|
|
4605
4665
|
|
|
4666
|
+
def _backfill_embedding_blobs(conn, *, batch_limit: int | None = None) -> dict:
|
|
4667
|
+
"""Incrementally convert legacy vector_json TEXT rows to compact float32
|
|
4668
|
+
vector_blob. Idempotent + resumable via a rowid cursor in local_index_state,
|
|
4669
|
+
committing per batch. Converts the EXISTING JSON in place (never re-embeds —
|
|
4670
|
+
re-embedding could re-stamp model_id if fastembed availability differs). New
|
|
4671
|
+
rows already get vector_blob from the dual-write, so this only handles
|
|
4672
|
+
pre-existing rows (the legacy ~19GB DB). Rows whose JSON length != dimension
|
|
4673
|
+
are skipped (left JSON-only; dual-read falls back) but still advance the
|
|
4674
|
+
cursor so they are not retried forever.
|
|
4675
|
+
"""
|
|
4676
|
+
if batch_limit is None:
|
|
4677
|
+
batch_limit = EMB_BLOB_BACKFILL_BATCH
|
|
4678
|
+
batch_limit = int(batch_limit)
|
|
4679
|
+
if batch_limit <= 0:
|
|
4680
|
+
return {"ok": True, "skipped": "disabled", "done": _get_state_conn(conn, EMB_BLOB_DONE_KEY, "0") == "1"}
|
|
4681
|
+
if not EMB_BLOB_WRITE_ENABLED:
|
|
4682
|
+
return {"ok": True, "skipped": "blob_write_disabled", "done": False}
|
|
4683
|
+
if _get_state_conn(conn, EMB_BLOB_DONE_KEY, "0") == "1":
|
|
4684
|
+
return {"ok": True, "skipped": "already_done", "done": True}
|
|
4685
|
+
|
|
4686
|
+
def _run() -> dict:
|
|
4687
|
+
try:
|
|
4688
|
+
cursor = int(_get_state_conn(conn, EMB_BLOB_CURSOR_KEY, "0") or "0")
|
|
4689
|
+
except Exception:
|
|
4690
|
+
cursor = 0
|
|
4691
|
+
if _get_state_conn(conn, EMB_BLOB_TOTAL_KEY, "") == "":
|
|
4692
|
+
try:
|
|
4693
|
+
total_row = conn.execute(
|
|
4694
|
+
"SELECT COUNT(*) AS total FROM local_embeddings WHERE vector_blob IS NULL"
|
|
4695
|
+
).fetchone()
|
|
4696
|
+
_set_state_conn(conn, EMB_BLOB_TOTAL_KEY, str(int(total_row["total"] or 0)))
|
|
4697
|
+
except Exception:
|
|
4698
|
+
pass
|
|
4699
|
+
rows = conn.execute(
|
|
4700
|
+
"""
|
|
4701
|
+
SELECT rowid AS rid, dimension, vector_json
|
|
4702
|
+
FROM local_embeddings
|
|
4703
|
+
WHERE rowid > ? AND vector_blob IS NULL
|
|
4704
|
+
ORDER BY rowid ASC
|
|
4705
|
+
LIMIT ?
|
|
4706
|
+
""",
|
|
4707
|
+
(cursor, batch_limit),
|
|
4708
|
+
).fetchall()
|
|
4709
|
+
if not rows:
|
|
4710
|
+
_set_state_conn(conn, EMB_BLOB_DONE_KEY, "1")
|
|
4711
|
+
conn.commit()
|
|
4712
|
+
return {"ok": True, "done": True, "processed": 0, "cursor": cursor}
|
|
4713
|
+
max_rid = cursor
|
|
4714
|
+
converted = 0
|
|
4715
|
+
for row in rows:
|
|
4716
|
+
rid = int(row["rid"])
|
|
4717
|
+
if rid > max_rid:
|
|
4718
|
+
max_rid = rid
|
|
4719
|
+
try:
|
|
4720
|
+
dim = int(row["dimension"] or 0)
|
|
4721
|
+
except (TypeError, ValueError):
|
|
4722
|
+
dim = 0
|
|
4723
|
+
vec = json_loads(row["vector_json"], [])
|
|
4724
|
+
if dim and len(vec) == dim:
|
|
4725
|
+
blob = _encode_embedding_blob(vec)
|
|
4726
|
+
if blob is not None and len(blob) == dim * 4:
|
|
4727
|
+
conn.execute("UPDATE local_embeddings SET vector_blob=? WHERE rowid=?", (blob, rid))
|
|
4728
|
+
converted += 1
|
|
4729
|
+
_set_state_conn(conn, EMB_BLOB_CURSOR_KEY, str(max_rid))
|
|
4730
|
+
conn.commit()
|
|
4731
|
+
return {"ok": True, "done": False, "processed": len(rows), "converted": converted, "cursor": max_rid}
|
|
4732
|
+
|
|
4733
|
+
return _with_sqlite_busy_retry(_run)
|
|
4734
|
+
|
|
4735
|
+
|
|
4606
4736
|
def _context_candidate_rows(
|
|
4607
4737
|
conn,
|
|
4608
4738
|
entity_asset_ids: list[str],
|
|
@@ -4625,7 +4755,7 @@ def _context_candidate_rows(
|
|
|
4625
4755
|
prefilter_rows = conn.execute(
|
|
4626
4756
|
"""
|
|
4627
4757
|
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
4628
|
-
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
4758
|
+
e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
|
|
4629
4759
|
FROM local_chunks_fts f
|
|
4630
4760
|
JOIN local_chunks c ON c.rowid = f.rowid
|
|
4631
4761
|
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
@@ -4657,7 +4787,7 @@ def _context_candidate_rows(
|
|
|
4657
4787
|
prefilter_rows = conn.execute(
|
|
4658
4788
|
f"""
|
|
4659
4789
|
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
4660
|
-
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
4790
|
+
e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
|
|
4661
4791
|
FROM local_chunks c
|
|
4662
4792
|
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
4663
4793
|
LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
|
|
@@ -4686,7 +4816,7 @@ def _context_candidate_rows(
|
|
|
4686
4816
|
base_rows = conn.execute(
|
|
4687
4817
|
"""
|
|
4688
4818
|
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
4689
|
-
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
4819
|
+
e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
|
|
4690
4820
|
FROM local_chunks c
|
|
4691
4821
|
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
4692
4822
|
LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
|
|
@@ -4713,7 +4843,7 @@ def _context_candidate_rows(
|
|
|
4713
4843
|
entity_rows = conn.execute(
|
|
4714
4844
|
f"""
|
|
4715
4845
|
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
4716
|
-
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
4846
|
+
e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
|
|
4717
4847
|
FROM local_chunks c
|
|
4718
4848
|
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
4719
4849
|
LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
|
|
@@ -5200,7 +5330,7 @@ def _context_query_conn(
|
|
|
5200
5330
|
for row in rows:
|
|
5201
5331
|
if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
|
|
5202
5332
|
continue
|
|
5203
|
-
vector =
|
|
5333
|
+
vector = _decode_embedding(row)
|
|
5204
5334
|
text_score = _search_text_score(search_query, row["text"])
|
|
5205
5335
|
path_score = _search_text_score(search_query, row["path"] or "")
|
|
5206
5336
|
summary_score = _search_text_score(search_query, row["summary"] or "")
|
|
@@ -5756,6 +5886,14 @@ def purge_asset(asset_id: str) -> dict:
|
|
|
5756
5886
|
conn = _conn()
|
|
5757
5887
|
_purge_asset_ids(conn, [asset_id])
|
|
5758
5888
|
conn.commit()
|
|
5889
|
+
# Reclaim the just-freed pages. Cheap incremental_vacuum (not a full VACUUM
|
|
5890
|
+
# — this is a frequent single-asset op; a 19GB rewrite per purge would be
|
|
5891
|
+
# catastrophic). No-op unless auto_vacuum=INCREMENTAL is active. Best-effort.
|
|
5892
|
+
try:
|
|
5893
|
+
conn.execute("PRAGMA incremental_vacuum")
|
|
5894
|
+
conn.commit()
|
|
5895
|
+
except Exception:
|
|
5896
|
+
pass
|
|
5759
5897
|
log_event("info", "asset_purged", "Asset purged", asset_id=asset_id)
|
|
5760
5898
|
return {"ok": True, "asset_id": asset_id}
|
|
5761
5899
|
|
|
@@ -5790,6 +5928,18 @@ def clear_index() -> dict:
|
|
|
5790
5928
|
)
|
|
5791
5929
|
_set_initial_index_complete(conn, False)
|
|
5792
5930
|
conn.commit()
|
|
5931
|
+
# The index is now near-empty, so a full VACUUM rewrites a tiny file and
|
|
5932
|
+
# actually returns the freed disk to the OS (DELETE alone only moves pages
|
|
5933
|
+
# to the free-list). Checkpoint the WAL first so its pages are folded in,
|
|
5934
|
+
# VACUUM, then checkpoint again — in WAL mode VACUUM's rewrite lands in the
|
|
5935
|
+
# WAL, so the main file is only truncated by the trailing checkpoint. Works
|
|
5936
|
+
# regardless of auto_vacuum mode. Best-effort — never fail the clear.
|
|
5937
|
+
try:
|
|
5938
|
+
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
|
|
5939
|
+
conn.execute("VACUUM")
|
|
5940
|
+
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
|
|
5941
|
+
except Exception:
|
|
5942
|
+
pass
|
|
5793
5943
|
log_event("warn", "index_cleared", "Local memory index cleared")
|
|
5794
5944
|
return {"ok": True}
|
|
5795
5945
|
|
package/src/local_context/db.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import shutil
|
|
4
5
|
import sqlite3
|
|
5
6
|
import time
|
|
6
7
|
from pathlib import Path
|
|
@@ -14,6 +15,11 @@ LOCAL_CONTEXT_DB_NAME = "local-context.db"
|
|
|
14
15
|
MIGRATION_STATE_KEY = "local_context_db_migrated_from_main"
|
|
15
16
|
MIGRATION_SKIPPED_KEY = "local_context_db_migration_skipped"
|
|
16
17
|
MAIN_CLEANUP_STATE_KEY = "local_context_main_tables_drained"
|
|
18
|
+
# One-time conversion flag: auto_vacuum=INCREMENTAL is a no-op on an already
|
|
19
|
+
# populated DB until exactly one full VACUUM runs. We do that conversion once
|
|
20
|
+
# per never-converted DB (guarded by free disk) and record it here so it never
|
|
21
|
+
# re-runs the expensive rewrite. See ensure_local_context_db().
|
|
22
|
+
AUTO_VACUUM_CONVERTED_KEY = "auto_vacuum_converted"
|
|
17
23
|
|
|
18
24
|
LOCAL_CONTEXT_TABLES: tuple[str, ...] = (
|
|
19
25
|
"local_index_roots",
|
|
@@ -77,6 +83,12 @@ def _connect(db_path: Path) -> sqlite3.Connection:
|
|
|
77
83
|
conn = sqlite3.connect(str(db_path), timeout=max(_busy_timeout_ms() / 1000.0, 1.0), check_same_thread=False)
|
|
78
84
|
conn.row_factory = sqlite3.Row
|
|
79
85
|
conn.execute(f"PRAGMA busy_timeout={_busy_timeout_ms()}")
|
|
86
|
+
# auto_vacuum must be set BEFORE the first table is created to take effect on
|
|
87
|
+
# a brand-new DB (it is a no-op on an already-populated file — those are
|
|
88
|
+
# converted once via a guarded full VACUUM in ensure_local_context_db()).
|
|
89
|
+
# INCREMENTAL lets deletes (privacy purge, reconcile, purge_asset) reclaim
|
|
90
|
+
# pages via `PRAGMA incremental_vacuum` instead of growing the file forever.
|
|
91
|
+
conn.execute("PRAGMA auto_vacuum=INCREMENTAL")
|
|
80
92
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
81
93
|
conn.execute("PRAGMA synchronous=NORMAL")
|
|
82
94
|
conn.execute("PRAGMA temp_store=MEMORY")
|
|
@@ -119,10 +131,20 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
|
|
|
119
131
|
_ensure_entity_dossier_schema(conn)
|
|
120
132
|
_ensure_local_context_v2_schema(conn)
|
|
121
133
|
_m84_local_chunks_fts(conn)
|
|
122
|
-
conn
|
|
134
|
+
_m85_local_embeddings_blob(conn)
|
|
135
|
+
conn.execute("PRAGMA user_version=85")
|
|
123
136
|
conn.commit()
|
|
124
137
|
|
|
125
138
|
|
|
139
|
+
def _m85_local_embeddings_blob(conn: sqlite3.Connection) -> None:
|
|
140
|
+
"""v85: compact float32 BLOB embedding storage alongside the legacy
|
|
141
|
+
vector_json TEXT. Nullable + no DEFAULT so the ALTER is metadata-only (a
|
|
142
|
+
DEFAULT would rewrite the whole table). The write path dual-writes both
|
|
143
|
+
columns; the read path prefers the BLOB and falls back to JSON, so adding
|
|
144
|
+
the column is safe even before any backfill runs."""
|
|
145
|
+
_add_column_if_missing(conn, "local_embeddings", "vector_blob", "BLOB")
|
|
146
|
+
|
|
147
|
+
|
|
126
148
|
def _table_columns(conn: sqlite3.Connection, table: str) -> set[str]:
|
|
127
149
|
rows = conn.execute(f"PRAGMA table_info({table})").fetchall()
|
|
128
150
|
return {str(row["name"] if isinstance(row, sqlite3.Row) else row[1]) for row in rows}
|
|
@@ -421,11 +443,49 @@ def ensure_local_context_db() -> None:
|
|
|
421
443
|
pass
|
|
422
444
|
return
|
|
423
445
|
_ensure_schema(_CONN)
|
|
446
|
+
_convert_auto_vacuum_once(_CONN, db_path)
|
|
424
447
|
_LAST_MIGRATION_ATTEMPT = now
|
|
425
448
|
migration = migrate_from_main_if_needed(_CONN)
|
|
426
449
|
_READY = True
|
|
427
450
|
|
|
428
451
|
|
|
452
|
+
def _convert_auto_vacuum_once(conn: sqlite3.Connection, db_path: Path) -> None:
|
|
453
|
+
"""Flip an existing DB from auto_vacuum=NONE to INCREMENTAL.
|
|
454
|
+
|
|
455
|
+
Setting the PRAGMA only takes effect after one full VACUUM that writes the
|
|
456
|
+
pointer-map pages. This rewrites the whole file once, so we guard on free
|
|
457
|
+
disk (VACUUM needs ~1x the DB size of scratch; require 2x margin) and only
|
|
458
|
+
record the done-flag once the mode is actually INCREMENTAL, so a machine
|
|
459
|
+
that was too full retries on a later boot. Best-effort: a failure here must
|
|
460
|
+
never block index startup. Runs on the writer connection only.
|
|
461
|
+
"""
|
|
462
|
+
try:
|
|
463
|
+
if _state(conn, AUTO_VACUUM_CONVERTED_KEY) == "1":
|
|
464
|
+
return
|
|
465
|
+
mode = int(conn.execute("PRAGMA auto_vacuum").fetchone()[0])
|
|
466
|
+
if mode == 2: # already INCREMENTAL (e.g. freshly created DB)
|
|
467
|
+
_set_state(conn, AUTO_VACUUM_CONVERTED_KEY, "1")
|
|
468
|
+
conn.commit()
|
|
469
|
+
return
|
|
470
|
+
try:
|
|
471
|
+
db_size = db_path.stat().st_size
|
|
472
|
+
free = shutil.disk_usage(db_path.parent).free
|
|
473
|
+
except OSError:
|
|
474
|
+
return
|
|
475
|
+
if free <= db_size * 2:
|
|
476
|
+
# Not enough scratch room — leave NONE mode, retry on a later boot.
|
|
477
|
+
return
|
|
478
|
+
conn.execute("PRAGMA auto_vacuum=INCREMENTAL")
|
|
479
|
+
conn.execute("VACUUM")
|
|
480
|
+
new_mode = int(conn.execute("PRAGMA auto_vacuum").fetchone()[0])
|
|
481
|
+
if new_mode == 2:
|
|
482
|
+
_set_state(conn, AUTO_VACUUM_CONVERTED_KEY, "1")
|
|
483
|
+
conn.commit()
|
|
484
|
+
except Exception:
|
|
485
|
+
# Conversion is an optimization; never break startup over it.
|
|
486
|
+
pass
|
|
487
|
+
|
|
488
|
+
|
|
429
489
|
def get_local_context_db() -> sqlite3.Connection:
|
|
430
490
|
ensure_local_context_db()
|
|
431
491
|
assert _CONN is not None
|