simplevecdb 2.5.0__tar.gz → 2.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. simplevecdb-2.6.1/.bandit +9 -0
  2. simplevecdb-2.6.1/.github/FUNDING.yml +5 -0
  3. simplevecdb-2.6.1/.gitignore +70 -0
  4. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/CHANGELOG.md +214 -0
  5. simplevecdb-2.6.1/PKG-INFO +377 -0
  6. simplevecdb-2.6.1/README.md +330 -0
  7. simplevecdb-2.6.1/docs/CHANGELOG.md +796 -0
  8. simplevecdb-2.6.1/docs/Features.md +206 -0
  9. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/api/core.md +0 -1
  10. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/api/encryption.md +53 -6
  11. simplevecdb-2.6.1/docs/examples.md +286 -0
  12. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/examples/rag/langchain_rag.ipynb +10 -7
  13. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/examples/rag/llama_rag.ipynb +10 -4
  14. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/examples/rag/ollama_rag.ipynb +21 -16
  15. simplevecdb-2.6.1/lefthook.yml +64 -0
  16. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/mkdocs.yml +1 -0
  17. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/pyproject.toml +39 -4
  18. simplevecdb-2.6.1/scripts/bump_version.py +88 -0
  19. simplevecdb-2.6.1/scripts/check_version_sync.py +88 -0
  20. simplevecdb-2.6.1/scripts/exercise_async_collection.py +358 -0
  21. simplevecdb-2.6.1/scripts/track_metrics.py +91 -0
  22. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/__init__.py +5 -2
  23. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/async_core.py +359 -206
  24. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/config.py +18 -3
  25. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/constants.py +32 -0
  26. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/core.py +1141 -293
  27. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/embeddings/models.py +39 -9
  28. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/embeddings/server.py +122 -18
  29. simplevecdb-2.6.1/src/simplevecdb/encryption.py +675 -0
  30. simplevecdb-2.6.1/src/simplevecdb/engine/catalog.py +2267 -0
  31. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/engine/clustering.py +13 -2
  32. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/engine/quantization.py +36 -5
  33. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/engine/search.py +124 -45
  34. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/engine/usearch_index.py +57 -15
  35. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/integrations/langchain.py +27 -5
  36. simplevecdb-2.6.1/src/simplevecdb/integrations/llamaindex.py +353 -0
  37. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/logging.py +9 -42
  38. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/types.py +60 -35
  39. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/utils.py +238 -42
  40. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/integration/test_llamaindex.py +4 -4
  41. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/integration/test_rag.py +21 -10
  42. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/integration/test_server.py +8 -3
  43. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/test_core_additional_coverage.py +7 -4
  44. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/test_filters.py +5 -4
  45. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/test_missing_coverage.py +8 -189
  46. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/test_v25_correctness.py +10 -6
  47. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/test_v25_features.py +9 -3
  48. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/test_v25_robustness.py +31 -23
  49. simplevecdb-2.6.1/tests/unit/core/test_v26_safety.py +182 -0
  50. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/embeddings/test_models.py +1 -0
  51. simplevecdb-2.6.1/tests/unit/embeddings/test_repo_id_validation.py +102 -0
  52. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/embeddings/test_server.py +9 -3
  53. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/embeddings/test_server_coverage.py +1 -2
  54. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/embeddings/test_v25_enhancements.py +16 -18
  55. simplevecdb-2.6.1/tests/unit/engine/test_v26_quantization_clustering.py +139 -0
  56. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/integrations/test_langchain_coverage.py +3 -1
  57. simplevecdb-2.6.1/tests/unit/integrations/test_llamaindex_review_pass_3.py +154 -0
  58. simplevecdb-2.6.1/tests/unit/integrations/test_llamaindex_v26.py +188 -0
  59. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_async_coverage.py +3 -1
  60. simplevecdb-2.6.1/tests/unit/test_async_v26.py +115 -0
  61. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_catalog_coverage.py +12 -74
  62. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_core.py +2 -40
  63. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_encryption_coverage.py +23 -8
  64. simplevecdb-2.6.1/tests/unit/test_encryption_salt.py +98 -0
  65. simplevecdb-2.6.1/tests/unit/test_encryption_v1_format.py +218 -0
  66. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_error_handling.py +23 -74
  67. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_multi_collection.py +14 -11
  68. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_search.py +27 -18
  69. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_search_missing_coverage.py +6 -7
  70. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_usearch_index_missing_coverage.py +13 -8
  71. simplevecdb-2.6.1/tests/unit/test_v26_1_features.py +655 -0
  72. simplevecdb-2.6.1/tests/unit/test_v26_encryption_review_pass_3.py +206 -0
  73. simplevecdb-2.6.1/tests/unit/test_v26_misc.py +183 -0
  74. simplevecdb-2.6.1/tests/unit/test_v26_review_pass_3.py +264 -0
  75. simplevecdb-2.6.1/tests/unit/test_v26_review_pass_4.py +173 -0
  76. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/uv.lock +239 -235
  77. simplevecdb-2.5.0/.bandit +0 -9
  78. simplevecdb-2.5.0/.github/FUNDING.yml +0 -6
  79. simplevecdb-2.5.0/.gitignore +0 -36
  80. simplevecdb-2.5.0/.pre-commit-config.yaml +0 -37
  81. simplevecdb-2.5.0/PKG-INFO +0 -527
  82. simplevecdb-2.5.0/README.md +0 -498
  83. simplevecdb-2.5.0/docs/CHANGELOG.md +0 -533
  84. simplevecdb-2.5.0/docs/LICENSE +0 -0
  85. simplevecdb-2.5.0/docs/examples.md +0 -356
  86. simplevecdb-2.5.0/src/simplevecdb/encryption.py +0 -417
  87. simplevecdb-2.5.0/src/simplevecdb/engine/catalog.py +0 -1013
  88. simplevecdb-2.5.0/src/simplevecdb/integrations/llamaindex.py +0 -227
  89. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/.env.example +0 -0
  90. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  91. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  92. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  93. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/.github/dependabot.yml +0 -0
  94. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/.github/workflows/ci.yml +0 -0
  95. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/.github/workflows/publish.yml +0 -0
  96. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/.github/workflows/security.yml +0 -0
  97. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/.github/workflows/update-sponsors.yml +0 -0
  98. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/.python-version +0 -0
  99. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/CODE_OF_CONDUCT.md +0 -0
  100. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/CONTRIBUTING.md +0 -0
  101. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/LICENSE +0 -0
  102. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/SECURITY.md +0 -0
  103. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/CONTRIBUTING.md +0 -0
  104. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/ENV_SETUP.md +0 -0
  105. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/api/async.md +0 -0
  106. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/api/config.md +0 -0
  107. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/api/embeddings.md +0 -0
  108. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/api/engine/catalog.md +0 -0
  109. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/api/engine/quantization.md +0 -0
  110. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/api/engine/search.md +0 -0
  111. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/api/integrations.md +0 -0
  112. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/api/types.md +0 -0
  113. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/benchmarks.md +0 -0
  114. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/guides/clustering.md +0 -0
  115. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/docs/index.md +0 -0
  116. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/examples/auto_embed.py +0 -0
  117. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/examples/backend_benchmark.py +0 -0
  118. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/examples/embeddings/perf_benchmark.py +0 -0
  119. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/examples/quant_benchmark.py +0 -0
  120. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/examples/smoke_test.py +0 -0
  121. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/embeddings/__init__.py +0 -0
  122. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/engine/__init__.py +0 -0
  123. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/src/simplevecdb/integrations/__init__.py +0 -0
  124. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/conftest.py +0 -0
  125. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/integration/test_langchain.py +0 -0
  126. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/integration/test_v21_features.py +0 -0
  127. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/perf/test_batch_detection.py +0 -0
  128. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/perf/test_performance.py +0 -0
  129. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/__init__.py +0 -0
  130. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/test_batch_detection.py +0 -0
  131. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/test_factory_methods.py +0 -0
  132. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/test_initialization.py +0 -0
  133. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/test_quantization.py +0 -0
  134. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/core/test_similarity_search.py +0 -0
  135. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/embeddings/__init__.py +0 -0
  136. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/integrations/__init__.py +0 -0
  137. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/integrations/test_llamaindex_coverage.py +0 -0
  138. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_async.py +0 -0
  139. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_clustering.py +0 -0
  140. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_config.py +0 -0
  141. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_cross_collection_search.py +0 -0
  142. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_encryption.py +0 -0
  143. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_hierarchy.py +0 -0
  144. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_search_coverage.py +0 -0
  145. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_streaming.py +0 -0
  146. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_types.py +0 -0
  147. {simplevecdb-2.5.0 → simplevecdb-2.6.1}/tests/unit/test_utils.py +0 -0
@@ -0,0 +1,9 @@
1
+ exclude_dirs:
2
+
3
+ - /tests
4
+ - /examples
5
+
6
+ skips:
7
+
8
+ - B104 # 0.0.0.0 binding: SERVER_HOST defaults to 127.0.0.1; bandit can't see runtime defaults, so the warning is a false positive on this codebase. Keep it skipped only because the default is safe — if anyone introduces a hardcoded "0.0.0.0", remove this skip.
9
+ - B608 # SQL injection false positive: table names are validated via _validate_table_name()
@@ -0,0 +1,5 @@
1
+ github: [coderdayton]
2
+ custom:
3
+ [
4
+ "https://ko-fi.com/xbbvii",
5
+ ]
@@ -0,0 +1,70 @@
1
+ # Python / uv
2
+ .venv/
3
+ __pycache__/
4
+ *.py[cod]
5
+ *.egg-info/
6
+ .eggs/
7
+ build/
8
+ dist/
9
+ .tox/
10
+ .nox/
11
+
12
+ # Tooling caches
13
+ .mypy_cache/
14
+ .pytest_cache/
15
+ .ruff_cache/
16
+ .cache/
17
+ .hypothesis/
18
+ cython_debug/
19
+
20
+ # Coverage
21
+ .coverage
22
+ .coverage.*
23
+ coverage.xml
24
+ *.cover
25
+ htmlcov/
26
+
27
+ # Jupyter
28
+ .ipynb_checkpoints/
29
+
30
+ # Environment / secrets
31
+ .env
32
+ .envrc
33
+ .direnv/
34
+
35
+ # Databases (SimpleVecDB writes WAL/SHM sidecars)
36
+ *.db
37
+ *.db-journal
38
+ *.db-shm
39
+ *.db-wal
40
+ *.sqlite
41
+ *.sqlite3
42
+
43
+ # Editor / IDE
44
+ .vscode/
45
+ .idea/
46
+ .history/
47
+ *.iml
48
+ *.swp
49
+ *.swo
50
+ *~
51
+
52
+ # OS
53
+ .DS_Store
54
+ Thumbs.db
55
+ desktop.ini
56
+
57
+ # Docs build
58
+ site/
59
+
60
+ # Agentic CLI tools (per-developer state)
61
+ .opencode/
62
+ opencode.json
63
+ .claude/
64
+ .codex
65
+
66
+ # Project-specific scratch
67
+ simplevecdb_plan.md
68
+ AGENTS.md
69
+ NEXT_UPDATES.md
70
+ pro_pack/
@@ -5,6 +5,220 @@ All notable changes to SimpleVecDB will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [2.6.1] - 2026-05-10
9
+
10
+ ### Storage, mutation, and eventing improvements
11
+
12
+ This release closes ten long-standing gaps in the catalog layer with a coherent
13
+ set of additive primitives. No public API breaks; existing 2.6.0 databases
14
+ upgrade transparently (the new tables are created on first open).
15
+
16
+ #### New features
17
+
18
+ - **Native vector update via pending buffer** — `collection.update_embedding(id, vector)`
19
+ writes a row to a per-collection `_pending_vectors` overlay inside one SQL
20
+ transaction; the new vector becomes visible to reads immediately and is
21
+ promoted to the HNSW index on `collection.pending.flush()`. Removes the
22
+ HNSW remove+re-add churn previously required for in-place updates.
23
+ - **Bulk vector math** — `collection.pending.update_many([(id, vec), …])` and
24
+ `collection.pending.blend_toward(ids, centroid, alpha)` for batched edits.
25
+ - **Atomic transaction boundary** — `with db.transaction() as tx: …` and
26
+ `with collection.tx(): …` wrap a SAVEPOINT around catalog writes
27
+ (metadata, counters, edges, events, TTL, and `update_embedding`'s
28
+ pending overlay) so a raised exception rolls all SQL writes back.
29
+ Coarse vector mutations (`add_texts`, `delete`) are NOT rolled back —
30
+ use `update_embedding` + `pending.flush()` for vector changes that
31
+ must be commit-gated. Nested contexts share a single savepoint stack
32
+ via the new `_TxState` helper.
33
+ - **Weighted directed edges** — new `collection.edges` namespace with
34
+ `add_edge / get_edges / update_edge / delete_edge / prune` over a
35
+ per-collection `_edges` table. Numeric columns (`weight`, `bonus`, `hits`,
36
+ `last_touch`) are addressable by the new range-filter grammar; deltas
37
+ (`dweight=+0.02, dhits=+1`) compile to a single atomic SQL UPDATE.
38
+ - **Atomic counter increments** — `collection.increment_metadata(id, {"hits": 1, "drift": 0.02})`
39
+ applies a dict of numeric deltas to JSON metadata in one statement using
40
+ chained `json_set(... json_extract + ?)` calls. WAL-atomic; safe under
41
+ concurrent writers.
42
+ - **Mongo-style range filters** — `filter={"score": {"$gt": 0.5, "$lte": 0.9}}`
43
+ on `similarity_search`, `keyword_search`, `hybrid_search`, `edges.get_edges`,
44
+ and `events.read`. Supported operators: `$eq $ne $gt $gte $lt $lte $in $nin
45
+ $exists $between`. Tuple shorthand (`("range", lo, hi)`, `(">", x)`) is
46
+ normalised into the operator-dict form.
47
+ - **Append-only change feed** — every mutating method now appends one row to
48
+ a per-collection `_events` table (kind, doc_id, payload, monotonic seq).
49
+ `collection.events.read(since=, kind=, limit=)`,
50
+ `collection.events.subscribe(since=, poll_interval=)`, and
51
+ `collection.events.prune(before_seq=)` expose the feed; cross-process
52
+ visibility comes from the existing WAL mode.
53
+ - **TTL / expiry hooks** — `collection.ttl.set(id, seconds=…, on_expire="delete"|"callback")`,
54
+ `collection.ttl.clear(id)`, and `collection.ttl.sweep()` over a
55
+ `_ttl` table; `start_background(interval=…)` runs the sweep in a daemon
56
+ thread (off by default).
57
+ - **Incremental rebuild scheduler** — `collection.maintenance.rebuild_if_needed(max_pending=, max_deleted=)`
58
+ triggers a full `rebuild_index()` only when the configured pending /
59
+ tombstone / wall-time thresholds are crossed.
60
+ - **Multi-process write safety** — added `PRAGMA busy_timeout=5000` and
61
+ `PRAGMA foreign_keys=ON` at every connection-open site (encrypted and
62
+ unencrypted). The native 5 s wait window reduces `DatabaseLockedError`
63
+ pressure under contention; foreign keys cascade-delete pending /
64
+ edges / TTL rows when a doc is deleted. The events table is
65
+ intentionally FK-less so the audit trail survives deletions.
66
+ - **Async wrappers** — `AsyncVectorCollection` gains async equivalents of the
67
+ new methods (`update_embedding`, `flush_pending`, `increment_metadata`,
68
+ `add_edge`, `update_edge`, `delete_edge`, `get_edges`, `set_ttl`,
69
+ `clear_ttl`, `sweep_ttl`, `read_events`, `last_event_seq`,
70
+ `rebuild_if_needed`).
71
+
72
+ #### New types & constants
73
+
74
+ - `simplevecdb.types`: `Edge`, `Event`, `TTLEntry` frozen dataclasses.
75
+ - `simplevecdb.constants`: `PENDING_FLUSH_DEFAULT_BATCH=1000`,
76
+ `EVENTS_POLL_INTERVAL_S=0.1`, `EVENTS_RETENTION_LIMIT=100_000`,
77
+ `TTL_SWEEP_DEFAULT_INTERVAL_S=60.0`, `REBUILD_PENDING_THRESHOLD=5_000`,
78
+ `REBUILD_TOMBSTONE_THRESHOLD=5_000`, `REBUILD_MIN_INTERVAL_S=3600.0`,
79
+ `SQLITE_BUSY_TIMEOUT_MS=5000`.
80
+
81
+ #### Test coverage
82
+
83
+ - `tests/unit/test_v26_1_features.py` — 25 tests covering the five must-have
84
+ primitives end-to-end: `update_embedding` + pending buffer + flush; edges
85
+ CRUD with atomic deltas, range filtering, and prune; `increment_metadata`
86
+ under 800-thread contention (exact total preserved); transaction rollback
87
+ and commit semantics; Mongo-style and tuple-shorthand range filters in
88
+ `similarity_search`; events append on every mutation; TTL sweep with
89
+ `delete` and `callback` paths; threshold-driven rebuild scheduler.
90
+
91
+ #### Removed
92
+
93
+ - **`sqlite-vec` dependency** dropped from `pyproject.toml`. The package was
94
+ never imported and the v1.x → v2.0 auto-migration code path could not have
95
+ worked without explicitly loading the extension at connection time.
96
+ - **`MigrationRequiredError`**, **`VectorDB.check_migration()`**, and the
97
+ **`auto_migrate=`** constructor flag have been removed. Databases written
98
+ by `simplevecdb < 2.0.0` (sqlite-vec backend) are no longer auto-migrated
99
+ on open. To upgrade a v1.x database, dump the rows with a v1.x install and
100
+ re-ingest them through the v2 API; or stay on the last release that shipped
101
+ the migration path (anything ≤ v2.6.1's predecessor).
102
+ - The catalog helpers `check_legacy_sqlite_vec`, `get_legacy_vectors`, and
103
+ `drop_legacy_vec_table` are gone alongside the migration entry point.
104
+
105
+ #### Out of scope
106
+
107
+ - No external pub/sub for events — polling only.
108
+ - No multi-master writer support; single-writer + many readers remains the
109
+ recommended topology.
110
+
111
+ ## [2.6.0] - 2026-05-06
112
+
113
+ ### Review pass 3 — final correctness/security pass before tag
114
+
115
+ #### Critical fixes
116
+
117
+ - **`UsearchIndex.save` lost-update race** — the `_dirty = False` clear was outside the `file_lock` window, so a concurrent `add()` between `os.replace()` and the dirty-flag clear could be silently overwritten. Moved inside `file_lock`.
118
+ - **`UsearchIndex.save` data fsync on `O_RDONLY` fd** — `fsync(2)` on a read-only file descriptor has implementation-defined behavior on Linux (some kernels return `EBADF`, swallowed by the warning branch). Switched to `O_RDWR` so the data fsync is guaranteed.
119
+ - **`_rebuild_index_locked` bare `conn.execute`** — replaced the bare `self.conn.execute("SELECT id FROM ...")` with the new `CatalogManager.list_all_ids()`, which routes the read through `self._lock` instead of relying on RLock re-entrancy from a single caller.
120
+ - **PBKDF2 iteration bump** — raised from 480 000 → 600 000 to match the OWASP 2024 minimum for PBKDF2-HMAC-SHA256.
121
+ - **AES-GCM AAD now binds the v1 header** — `encrypt_file` / `decrypt_file` pass the magic+version bytes as `associated_data`, so any tampering with the header (including downgrade attempts) fails authentication instead of silently succeeding.
122
+ - **Bounded normalize-key cache** — `_NORMALIZE_KEY_CACHE` is now an LRU capped at 64 entries, serialized by a `threading.Lock`. Long-running multi-tenant processes no longer leak derived key material indefinitely.
123
+ - **LlamaIndex `delete()` no longer swallows `sqlite3.DatabaseError`** — narrowed the exception in the metadata-fallback path to `(TypeError, NotImplementedError)`. A locked DB, closed connection, or schema mismatch now propagates to the caller instead of becoming a silent no-op.
124
+ - **Hybrid-search RRF rank symmetry** — vector candidates now use the original HNSW position as their RRF rank (via `enumerate(vector_keys_list)`), matching how keyword candidates use raw BM25 position. Previously, a metadata filter that rejected vector candidates inflated surviving vector scores relative to keyword scores, corrupting result ordering.
125
+ - **`add_documents` FTS sentinel guard** — added a defense-in-depth check that raises `RuntimeError` if any `-1` sentinel rowid remains in `real_ids` before the FTS upsert. Prevents a hypothetical retry-loop interaction from corrupting the FTS index with rowid `-1`.
126
+
127
+ #### Important fixes
128
+
129
+ - **`delete_collection` TOCTOU** — moved the `list_collections()` existence check inside the `with self._lock:` block so two concurrent `delete_collection(name)` calls cannot both pass the check; the second now sees a clean `KeyError` instead of a SQLite error.
130
+ - **Salt sidecar `O_EXCL` guard** — `_resolve_salt(create_if_missing=True)` now creates the sidecar with `O_CREAT | O_EXCL`. If two processes race, the loser reads the winner's salt; if a sidecar already exists out-of-band, it is preserved instead of being clobbered (which would have rendered an existing DB unreadable).
131
+ - **`encrypt_index_file` v0→v1 sidecar migration** — re-encrypting a legacy v0 blob (no sidecar) now creates a fresh sidecar, completing the migration path to per-DB salts. Previously, `is_first_encryption` was keyed on `.enc` presence rather than `.salt` presence.
132
+ - **LlamaIndex legacy-collection warning** — `SimpleVecDBLlamaStore.__init__` now emits a one-shot `DeprecationWarning` when it detects rows lacking `_simplevecdb_node_id`, telling the operator to call `migrate_node_id_metadata()` and noting the inherent limitation that pre-2.6 rows can only be stamped with `str(doc_id)` (the original LlamaIndex node ids were never persisted).
133
+ - **INT8 quantization range break softened** — instead of raising `ValueError` on `max(|x|) > 1.0 + 1e-5`, the strategy now emits a one-shot `DeprecationWarning` and clips. Restores backwards compatibility for callers that relied on the prior silent-clip behavior.
134
+ - **`scripts/check_version_sync.py` now validates `CHANGELOG.md`** — the hook fails if the latest CHANGELOG entry header does not match `pyproject.toml`'s version, preventing a release from shipping with a stale changelog.
135
+
136
+ #### Test coverage added (review pass 3 gaps)
137
+
138
+ - `tests/unit/test_v26_review_pass_3.py` — covers parent-directory fsync on save, `.tmp` cleanup on save failure, `db._lock is catalog._lock` shared-RLock identity, adversarial inputs to `_validate_table_name`, hybrid-search RRF rank symmetry under filter, and same-text-different-id deduplication.
139
+ - `tests/unit/test_v26_encryption_review_pass_3.py` — covers nonce uniqueness across saves, wrong-key decrypt does not create the output file, AAD-bound header tampering fails authentication, salt sidecar O_EXCL preservation, and v0→v1 migration round-trip.
140
+ - `tests/unit/integrations/test_llamaindex_review_pass_3.py` — covers the `add → query` round-trip preserving the original LlamaIndex node id, end-to-end migration-then-delete on v2.5-shaped data, the legacy-collection `DeprecationWarning` at `__init__` time, and that `sqlite3.DatabaseError` from the metadata-fallback path now propagates instead of being swallowed.
141
+
142
+ ### Fixed (concurrency & durability)
143
+
144
+ - **Atomic `UsearchIndex.save`** — now writes to a sibling `.tmp`, fsyncs, then `os.replace()`s onto the live path and fsyncs the parent directory. A crash mid-save can no longer corrupt the only copy of the index. Also moved the `_dirty` short-circuit inside `_write_lock` so a concurrent `add` cannot have its dirty flag silently cleared.
145
+ - **Atomic `rebuild_index`** — builds the new index at a sibling `.rebuild` path and atomically swaps it onto the live path; the old index remains the canonical copy until the swap succeeds.
146
+ - **Atomic encrypted save** — `encrypt_file` / `decrypt_file` now write to a sibling `.tmp`, fsync, set mode `0o600`, then `os.replace()`. `encrypt_index_file` only unlinks the plaintext after the encrypted output is durably on disk. A torn write can no longer leave the index unrecoverable.
147
+ - **`VectorDB`-level `RLock`** — a single re-entrant lock now serializes the `_collections` cache (no more check-then-insert TOCTOU on `collection()`) and is shared with every `CatalogManager` so all `with self.conn:` blocks across collections cannot interleave on the shared `sqlite3.Connection`. Reads remain lock-free at the SQLite level via WAL.
148
+ - **`AsyncVectorDB.close` drains** — switched from `executor.shutdown(wait=False)` to `wait=True` so in-flight pool tasks finish their cursors before the SQLite connection is closed. Pending (not-yet-started) work is still cancelled.
149
+ - **`set_parent` cycle check is transactional** — descendant lookup and parent UPDATE now run inside the same `with self._lock, self.conn:` block, closing a TOCTOU window where a concurrent edge could form a cycle.
150
+ - **Cluster persistence** — `_ensure_cluster_table`, `save_cluster_state`, `delete_cluster_state` now use `with self._lock, self.conn:` instead of bare `conn.commit()`; an exception during the execute is properly rolled back.
151
+ - **`add_documents` ID recovery is correct under upsert** — replaced the `last_insert_rowid()` arithmetic (which silently returned wrong IDs for batches mixing explicit and `None` IDs because UPSERTs do not advance the auto-increment counter) with a single `INSERT … RETURNING id` for the auto-ID rows. Explicit-ID rows still take the upsert path.
152
+ - **`delete_collection` closes cached indexes first** — any `VectorCollection` instances cached for the deleted name have their `UsearchIndex` closed before the file is unlinked, so a stale mmap view cannot race the unlink.
153
+
154
+ ### Changed
155
+
156
+ - **`upsert_fts_rows` / `delete_fts_rows` are now `_upsert_fts_rows` / `_delete_fts_rows`** (private). The FTS shadow table must be updated inside the same transaction as the main table or it can desync on crash; the rename signals the contract.
157
+ - **`get_legacy_vectors`, `drop_legacy_vec_table`** now validate the supplied table name via `_validate_table_name` before interpolating into SQL.
158
+
159
+ ### Added
160
+
161
+ - **Declared `python-dotenv` dependency** — `simplevecdb.config` already imported and called `load_dotenv` at package import; the missing dependency would `ImportError` on a clean install of the base package without optional extras.
162
+
163
+ ### Fixed (correctness & quality)
164
+
165
+ - **RRF deduplication keys by document ID, not text** — `hybrid_search` previously deduped by `doc.page_content`, silently merging two distinct documents that happened to share text into one inflated-score result.
166
+ - **NaN/Inf guard at insert** — `add_texts` and `add_texts_streaming` reject non-finite vectors instead of feeding them to HNSW, which would produce undefined neighbours and could corrupt the graph.
167
+ - **`normalize_l2` handles subnormals** — replaced the exact `norm == 0` compare with a `< 1e-12` check (matching the existing usearch_index guard); subnormal floats no longer produce wildly large normalized vectors.
168
+ - **Silhouette score samples on large collections** — `silhouette_score` is O(n²); now caps the evaluation sample at `SILHOUETTE_MAX_SAMPLE = 10_000`. Large collections no longer OOM.
169
+ - **MMR maintains the selected matrix incrementally** — replaced per-iteration `np.stack(selected_embs)` with `np.vstack` of a running matrix. O(k²·d) wasted allocations dropped to O(k·d).
170
+ - **`_parse_bool_env` treats `KEY=` as unset** — empty strings now fall through to the default; previously they were truthy because `"".strip()` is not in the falsey set.
171
+ - **LangChain async methods use `asyncio.to_thread`** — `aadd_texts` / `asimilarity_search` / `amax_marginal_relevance_search` no longer block the event loop.
172
+ - **LlamaIndex `delete()` survives a process restart** — node IDs are persisted into document metadata under `_simplevecdb_node_id`; `delete()` falls back to a metadata query when the in-memory `_id_map` is empty.
173
+ - **LlamaIndex query results carry stable node IDs** — replaced `str(hash(page_content))` (process-randomized, collision-prone) with the persisted `_simplevecdb_node_id`.
174
+ - **`AsyncVectorDB.collection` accepts `store_embeddings`** — async callers can now enable embedding storage (required for `rebuild_index()`); previously they had no way to set it.
175
+
176
+ ### Security
177
+
178
+ - **API key comparison uses `hmac.compare_digest`** — the prior `token not in allowed_keys` short-circuit leaked key prefixes via response time.
179
+ - **SQLCipher PRAGMA key always uses the `x'hex'` form** — every key path now goes through `_normalize_key` first, eliminating string interpolation of user-supplied passphrase characters into a quoted PRAGMA argument.
180
+ - **`is_database_encrypted` rejects zero-byte files** — previously a missing/empty DB looked like an unencrypted DB because `sqlite3.connect` would create a fresh one.
181
+
182
+ ### Changed (tooling)
183
+
184
+ - **Ruff and mypy targets aligned with `requires-python>=3.10`** — both were `py312`, hiding 3.10/3.11 incompatibilities. Cleaned three resulting `F401` unused-import warnings (`signal` in models.py, `_batched` and `constants` re-imports).
185
+ - **Pre-commit version-sync hook** — `__init__.py` derives `__version__` dynamically via `importlib.metadata`, so `check_version_sync.py` was failing on every commit looking for a literal `__version__ = "x.y.z"` line that does not exist. The hook now validates only `pyproject.toml`'s version field. `bump_version.py` similarly stops trying to rewrite `__init__.py` and uses an anchored regex to update only the canonical version field.
186
+
187
+ ### Security (2.6.0 final)
188
+
189
+ - **Per-DB random PBKDF2 salt** — encrypted databases and index files now generate a random 16-byte salt at creation time, written to a `<resource>.salt` sidecar with mode `0o600`. The previous fixed `b"simplevecdb-sqlcipher-key"` salt let an attacker precompute one rainbow table that broke every simplevecdb installation with the same passphrase. Pre-2.6.0 encrypted resources keep working unchanged: when no sidecar exists, the loader falls back to the legacy fixed salt automatically.
190
+ - **HuggingFace `repo_id` allowlist + `trust_remote_code=False`** — the embeddings server validates model names against a strict regex (`namespace/name` with `[A-Za-z0-9_.-]` only) before passing them to `snapshot_download` / `SentenceTransformer`, blocking path traversal and local-filesystem inputs. `SentenceTransformer` is constructed with `trust_remote_code=False` so a malicious model card cannot trigger arbitrary downloaded Python on load.
191
+ - **CORS is opt-in** — the server no longer adds CORS middleware unless `EMBEDDING_SERVER_CORS_ORIGINS` is set. When the operator does set wildcard origins (`["*"]`), `allow_credentials` is forced off so the spec-violating wildcard-with-credentials combo can't be produced.
192
+
193
+ ### Migration helpers
194
+
195
+ - **`SimpleVecDBLlamaStore.migrate_node_id_metadata()`** — backfills `_simplevecdb_node_id` for documents inserted before 2.6.0. Pre-2.6.0 versions did not persist the LlamaIndex node_id into metadata, so `delete()` could not find the right row after a process restart. Idempotent — already-stamped rows are skipped.
196
+
197
+ ### Added (hygiene & polish)
198
+
199
+ - **`ClusterResult` and `ClusterTagCallback` exported from `simplevecdb`** — they were return/argument types of public methods but had no public import path; users had to reach into `simplevecdb.types`.
200
+ - **`NullHandler` attached to the package's root logger** at import time, per the Python logging HOWTO. Idempotent — duplicate calls do not stack handlers.
201
+ - **`SimpleVecDBLlamaStore.delete_nodes` raises `NotImplementedError`** when called with `filters`, instead of silently dropping the filter portion and pretending the deletion succeeded.
202
+ - **Recursive CTE depth bound as a parameter** in `get_descendants` / `get_ancestors`. The previous f-string interpolation was safe due to `int()` coercion but is now one less line away from injection on a future refactor.
203
+ - **`Config.from_env()` documented** as returning the import-time-frozen instance; setting env vars after import does not refresh.
204
+ - **`ModelRegistry(allow_unlisted=...)` defaults to `False`** to match the secure-by-default config setting; programmatic instantiations no longer get an open registry by accident.
205
+ - **`/v1/usage` returns aggregated totals when auth is disabled** instead of leaking the per-IP buckets to anyone who hits the endpoint.
206
+ - **Server validates `EMBEDDING_SERVER_MAX_REQUEST_ITEMS <= _MAX_ENCODE_BATCH` at startup** so an out-of-range env var fails fast at boot rather than per request.
207
+ - **`pyproject.toml` gains `[project.urls]`, `classifiers`, and `keywords`** for a useful PyPI listing.
208
+ - **`.bandit` documents the B104 skip** and warns that any future `0.0.0.0` binding requires removing the skip.
209
+ - **Encrypted file format now carries a 3-byte header** (`'SV' + version`) so future format changes are detectable. `decrypt_file` accepts both the new v1 format and the v0 (pre-2.6.0) format, so existing encrypted indexes still load without re-encryption.
210
+
211
+ ### Fixed (review pass 2)
212
+
213
+ - **NaN/Inf rejection no longer leaves orphan catalog rows** — `add_texts` and `_process_streaming_batch` now validate vectors *before* the SQLite insert. Previously the catalog row committed first and a non-finite vector then raised, leaving rows visible via `get_documents_by_ids` but unreachable through similarity search.
214
+ - **`VectorCollection.__repr__` no longer issues SQL** — the previous `count()` call would raise `ProgrammingError` after `close()`, breaking debuggers and exception formatters that auto-stringify objects. The 2.6.0 fix only covered `VectorDB.__repr__`.
215
+ - **`EMBEDDING_SERVER_MAX_REQUEST_ITEMS` validation runs at module import** — the guard was previously inside `run_server()` and was bypassed under any non-CLI ASGI deployment (gunicorn, programmatic uvicorn).
216
+ - **LlamaIndex empty-`node_id` path is atomic** — `SimpleVecDBLlamaStore.add` now generates a UUID for nodes that arrive without a `node_id` and stamps it into metadata *before* the row insert, so the metadata commit is in the same SQLite transaction as the catalog row. Previously a separate `UPDATE` followed `add_texts`; a crash in the gap left rows un-stampable and cross-restart `delete()` silently no-op'd.
217
+ - **Catalog read paths serialize on `self._lock`** — `get_documents_by_ids`, `get_embeddings_by_ids`, `get_documents_and_embeddings_by_ids`, `find_ids_by_texts`, `find_ids_by_filter`, `keyword_search`, `count`, `get_all_docs_with_text`, `check_legacy_sqlite_vec`, `get_legacy_vectors`, `get_children`, `get_parent`, `get_descendants`, `get_ancestors`, `load_cluster_state`, `list_cluster_states`, and `VectorDB.list_collections` now acquire the connection-level lock around `conn.execute`. `sqlite3.Connection` is not safe for concurrent statement execution from multiple threads even under WAL.
218
+ - **`rebuild_index` is fully serialized** — the entire fetch + build + swap now runs inside `with self._lock:` so concurrent `add` / `delete` cannot mutate the catalog mid-rebuild and produce a stale snapshot.
219
+ - **`_ensure_cluster_table` double-checked under lock** — the `_cluster_table_ready` flag is now re-checked inside the lock and set inside the `with` block. Concurrent first-callers no longer both run the DDL.
220
+ - **`utils.file_lock` opens via `os.open(O_CREAT | O_RDWR, 0o600)`** — no truncation of stale lock files from a crashed prior run, restricted permissions on the lock sentinel.
221
+
8
222
  ## [2.5.0] - 2026-04-07
9
223
 
10
224
  ### Added