simplevecdb 2.6.0__tar.gz → 2.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. simplevecdb-2.6.2/.github/FUNDING.yml +5 -0
  2. simplevecdb-2.6.2/.gitignore +74 -0
  3. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/CHANGELOG.md +209 -0
  4. simplevecdb-2.6.2/PKG-INFO +377 -0
  5. simplevecdb-2.6.2/README.md +330 -0
  6. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/CHANGELOG.md +209 -0
  7. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/ENV_SETUP.md +1 -0
  8. simplevecdb-2.6.2/docs/Features.md +207 -0
  9. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/api/core.md +0 -1
  10. simplevecdb-2.6.2/docs/examples.md +286 -0
  11. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/index.md +9 -9
  12. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/examples/rag/langchain_rag.ipynb +10 -7
  13. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/examples/rag/llama_rag.ipynb +10 -4
  14. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/examples/rag/ollama_rag.ipynb +21 -16
  15. simplevecdb-2.6.2/lefthook.yml +64 -0
  16. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/mkdocs.yml +1 -0
  17. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/pyproject.toml +1 -2
  18. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/scripts/check_version_sync.py +2 -6
  19. simplevecdb-2.6.2/scripts/exercise_async_collection.py +358 -0
  20. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/scripts/track_metrics.py +25 -16
  21. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/__init__.py +1 -2
  22. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/async_core.py +328 -198
  23. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/config.py +31 -9
  24. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/constants.py +28 -0
  25. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/core.py +1103 -313
  26. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/embeddings/models.py +1 -0
  27. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/embeddings/server.py +90 -1
  28. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/encryption.py +28 -9
  29. simplevecdb-2.6.2/src/simplevecdb/engine/catalog.py +2342 -0
  30. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/engine/clustering.py +19 -2
  31. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/engine/search.py +98 -18
  32. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/engine/usearch_index.py +21 -13
  33. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/integrations/langchain.py +32 -3
  34. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/integrations/llamaindex.py +47 -9
  35. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/logging.py +3 -7
  36. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/types.py +60 -35
  37. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/utils.py +210 -31
  38. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/integration/test_rag.py +11 -9
  39. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/integration/test_server.py +3 -1
  40. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_core_additional_coverage.py +6 -3
  41. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_filters.py +5 -4
  42. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_missing_coverage.py +4 -184
  43. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_v25_correctness.py +3 -1
  44. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_v25_features.py +9 -3
  45. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_v25_robustness.py +28 -14
  46. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/embeddings/test_repo_id_validation.py +6 -4
  47. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/embeddings/test_server.py +9 -3
  48. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/embeddings/test_v25_enhancements.py +4 -4
  49. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/engine/test_v26_quantization_clustering.py +7 -1
  50. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/integrations/test_langchain_coverage.py +3 -1
  51. simplevecdb-2.6.2/tests/unit/integrations/test_llamaindex_filters_tier1.py +70 -0
  52. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/integrations/test_llamaindex_v26.py +1 -3
  53. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_async_coverage.py +3 -1
  54. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_catalog_coverage.py +2 -62
  55. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_clustering.py +127 -0
  56. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_core.py +2 -39
  57. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_encryption_coverage.py +23 -7
  58. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_encryption_v1_format.py +1 -3
  59. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_error_handling.py +23 -58
  60. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_hierarchy.py +28 -0
  61. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_multi_collection.py +14 -11
  62. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_search.py +27 -18
  63. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_search_missing_coverage.py +4 -4
  64. simplevecdb-2.6.2/tests/unit/test_tier1_fixes.py +241 -0
  65. simplevecdb-2.6.2/tests/unit/test_tier2_fixes.py +117 -0
  66. simplevecdb-2.6.2/tests/unit/test_tier3_fixes.py +65 -0
  67. simplevecdb-2.6.2/tests/unit/test_tier4_fixes.py +51 -0
  68. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_usearch_index_missing_coverage.py +11 -5
  69. simplevecdb-2.6.2/tests/unit/test_v26_1_features.py +655 -0
  70. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_v26_encryption_review_pass_3.py +6 -14
  71. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_v26_misc.py +3 -4
  72. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_v26_review_pass_3.py +3 -9
  73. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_v26_review_pass_4.py +3 -9
  74. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/uv.lock +1 -15
  75. simplevecdb-2.6.0/.github/FUNDING.yml +0 -6
  76. simplevecdb-2.6.0/.gitignore +0 -38
  77. simplevecdb-2.6.0/PKG-INFO +0 -546
  78. simplevecdb-2.6.0/README.md +0 -498
  79. simplevecdb-2.6.0/docs/LICENSE +0 -0
  80. simplevecdb-2.6.0/docs/examples.md +0 -356
  81. simplevecdb-2.6.0/lefthook.yml +0 -39
  82. simplevecdb-2.6.0/src/simplevecdb/engine/catalog.py +0 -1123
  83. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/.bandit +0 -0
  84. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/.env.example +0 -0
  85. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  86. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  87. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  88. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/.github/dependabot.yml +0 -0
  89. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/.github/workflows/ci.yml +0 -0
  90. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/.github/workflows/publish.yml +0 -0
  91. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/.github/workflows/security.yml +0 -0
  92. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/.github/workflows/update-sponsors.yml +0 -0
  93. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/.python-version +0 -0
  94. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/CODE_OF_CONDUCT.md +0 -0
  95. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/CONTRIBUTING.md +0 -0
  96. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/LICENSE +0 -0
  97. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/SECURITY.md +0 -0
  98. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/CONTRIBUTING.md +0 -0
  99. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/api/async.md +0 -0
  100. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/api/config.md +0 -0
  101. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/api/embeddings.md +0 -0
  102. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/api/encryption.md +0 -0
  103. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/api/engine/catalog.md +0 -0
  104. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/api/engine/quantization.md +0 -0
  105. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/api/engine/search.md +0 -0
  106. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/api/integrations.md +0 -0
  107. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/api/types.md +0 -0
  108. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/benchmarks.md +0 -0
  109. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/docs/guides/clustering.md +0 -0
  110. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/examples/auto_embed.py +0 -0
  111. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/examples/backend_benchmark.py +0 -0
  112. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/examples/embeddings/perf_benchmark.py +0 -0
  113. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/examples/quant_benchmark.py +0 -0
  114. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/examples/smoke_test.py +0 -0
  115. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/scripts/bump_version.py +0 -0
  116. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/embeddings/__init__.py +0 -0
  117. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/engine/__init__.py +0 -0
  118. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/engine/quantization.py +0 -0
  119. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/src/simplevecdb/integrations/__init__.py +0 -0
  120. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/conftest.py +0 -0
  121. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/integration/test_langchain.py +0 -0
  122. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/integration/test_llamaindex.py +0 -0
  123. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/integration/test_v21_features.py +0 -0
  124. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/perf/test_batch_detection.py +0 -0
  125. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/perf/test_performance.py +0 -0
  126. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/__init__.py +0 -0
  127. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_batch_detection.py +0 -0
  128. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_factory_methods.py +0 -0
  129. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_initialization.py +0 -0
  130. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_quantization.py +0 -0
  131. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_similarity_search.py +0 -0
  132. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/core/test_v26_safety.py +0 -0
  133. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/embeddings/__init__.py +0 -0
  134. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/embeddings/test_models.py +0 -0
  135. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/embeddings/test_server_coverage.py +0 -0
  136. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/integrations/__init__.py +0 -0
  137. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/integrations/test_llamaindex_coverage.py +0 -0
  138. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/integrations/test_llamaindex_review_pass_3.py +0 -0
  139. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_async.py +0 -0
  140. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_async_v26.py +0 -0
  141. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_config.py +0 -0
  142. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_cross_collection_search.py +0 -0
  143. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_encryption.py +0 -0
  144. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_encryption_salt.py +0 -0
  145. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_search_coverage.py +0 -0
  146. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_streaming.py +0 -0
  147. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_types.py +0 -0
  148. {simplevecdb-2.6.0 → simplevecdb-2.6.2}/tests/unit/test_utils.py +0 -0
@@ -0,0 +1,5 @@
1
+ github: [coderdayton]
2
+ custom:
3
+ [
4
+ "https://ko-fi.com/xbbvii",
5
+ ]
@@ -0,0 +1,74 @@
1
+ # Python / uv
2
+ .venv/
3
+ __pycache__/
4
+ *.py[cod]
5
+ *.egg-info/
6
+ .eggs/
7
+ build/
8
+ dist/
9
+ .tox/
10
+ .nox/
11
+
12
+ # Tooling caches
13
+ .mypy_cache/
14
+ .pytest_cache/
15
+ .ruff_cache/
16
+ .cache/
17
+ .hypothesis/
18
+ cython_debug/
19
+
20
+ # Coverage
21
+ .coverage
22
+ .coverage.*
23
+ coverage.xml
24
+ *.cover
25
+ htmlcov/
26
+
27
+ # Jupyter
28
+ .ipynb_checkpoints/
29
+
30
+ # Environment / secrets
31
+ .env
32
+ .envrc
33
+ .direnv/
34
+
35
+ # Databases (SimpleVecDB writes WAL/SHM sidecars)
36
+ *.db
37
+ *.db-journal
38
+ *.db-shm
39
+ *.db-wal
40
+ *.sqlite
41
+ *.sqlite3
42
+
43
+ # Editor / IDE
44
+ .vscode/
45
+ .idea/
46
+ .history/
47
+ *.iml
48
+ *.swp
49
+ *.swo
50
+ *~
51
+
52
+ # OS
53
+ .DS_Store
54
+ Thumbs.db
55
+ desktop.ini
56
+
57
+ # Docs build
58
+ site/
59
+
60
+ # Agentic CLI tools (per-developer state)
61
+ .opencode/
62
+ opencode.json
63
+ .claude/
64
+ .codex
65
+
66
+ # Project-specific scratch
67
+ simplevecdb_plan.md
68
+ AGENTS.md
69
+ NEXT_UPDATES.md
70
+ pro_pack/
71
+
72
+
73
+ # Local notes
74
+ IMPORTANT.md
@@ -5,6 +5,215 @@ All notable changes to SimpleVecDB will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [2.6.2] - 2026-06-06
9
+
10
+ ### Correctness and contract fixes
11
+
12
+ Hardening of the index-rebuild, search, clustering, and integration layers
13
+ surfaced by a code review. Two intentional behavior changes are noted under
14
+ “Changed”.
15
+
16
+ #### Fixed
17
+
18
+ - **`rebuild_index` no longer bricks a collection on failure** — if building or
19
+ swapping the new HNSW index raises after the live index is closed, the
20
+ collection re-opens the intact on-disk index instead of holding a closed one.
21
+ - **Catalog write lock released on connection error** — a raising
22
+ `connection.__enter__` no longer leaks the catalog lock (which could deadlock
23
+ the database).
24
+ - **Max-Marginal-Relevance respects the distance metric** — MMR on `l2`
25
+ collections used a cosine-specific relevance formula that swamped the
26
+ diversity term; it now uses a bounded, metric-appropriate relevance.
27
+ - **`similarity_search_batch` fills `k` under filters and accepts text queries**
28
+ — large filtered batches no longer silently under-deliver, and a text query in
29
+ a large batch behaves the same as in a small one.
30
+ - **Clustering handles impossible `n_clusters`** — `ClusterEngine.cluster_vectors`
31
+ raises a clear error when `n_clusters` exceeds the number of vectors;
32
+ `Collection.cluster()` caps `n_clusters` to the number of vectors actually
33
+ clustered (the sample when `sample_size` is set, fixing a latent error when
34
+ `n_clusters > sample_size`).
35
+ - **Metadata filter keys match literally** — a filter key containing a dot
36
+ (e.g. `{"a.b": x}`) now matches the literal top-level key `a.b` instead of the
37
+ nested JSON path `a → b`, consistent with the Python filter path. Keys
38
+ containing a double-quote are rejected.
39
+ - **BIT-quantized vector retrieval unpacks correctly** — `UsearchIndex.get()`
40
+ (used by the MMR fallback) returned packed bytes for BIT indexes instead of
41
+ the unpacked ±1 float vectors; it now unpacks them.
42
+ - **`rebuild_index` no longer blocks the database during the HNSW build** — the
43
+ expensive build runs without the shared lock (held only to snapshot and swap);
44
+ writes that land during the build are folded into the new index before the
45
+ swap.
46
+ - **Embedding server caps request body size** — an ASGI middleware rejects
47
+ request bodies larger than the server's own accept limits before they are
48
+ buffered/parsed, closing an unauthenticated memory-exhaustion vector (only
49
+ relevant with the `[server]` extra exposed on a network). A missing
50
+ encryption salt sidecar now logs a warning instead of silently falling back
51
+ to the shared legacy salt.
52
+ - **Robustness pass** — malformed FTS5 keyword queries raise `ValueError` instead
53
+ of a raw SQLite error; the cluster-state table is created eagerly so a
54
+ rolled-back first `save_cluster` cannot desync it; a non-integer
55
+ `EMBEDDING_BATCH_SIZE`/`EMBEDDING_SERVER_MAX_REQUEST_ITEMS` env value warns and
56
+ falls back instead of crashing import; `vacuum()` holds the DB lock; a failed
57
+ index add after the catalog commit is logged (divergence visibility); hybrid
58
+ search applies the Python metadata filter on the keyword side too (SQL/Python
59
+ parity); `logging.configure_logging` swaps handlers atomically.
60
+ - **LangChain `asimilarity_search_with_score`** offloads to a thread instead of
61
+ blocking the event loop.
62
+
63
+ #### Changed
64
+
65
+ - **`AsyncVectorCollection.increment_metadata` now returns `int`** (1 if the row
66
+ existed and was updated, 0 otherwise), matching the synchronous API; it
67
+ previously discarded the value and returned `None`.
68
+ - **LlamaIndex metadata filters fail loudly on unsupported shapes** — the
69
+ `SimpleVecDBLlamaStore` adapter now maps comparison operators
70
+ (`$gt/$gte/$lt/$lte/$ne/$in/$nin`) instead of silently treating them as
71
+ equality, and raises `NotImplementedError` for `OR`/`NOT` conditions and
72
+ unsupported operators rather than returning wrong results.
73
+ - **LangChain relevance scoring now works** — `SimpleVecDBVectorStore` implements
74
+ `_select_relevance_score_fn`, so `similarity_search_with_relevance_scores` and
75
+ `as_retriever(search_type="similarity_score_threshold")` return metric-aware
76
+ `[0, 1]` relevance (higher = better). `similarity_search_with_score` still
77
+ returns the raw distance (FAISS/Chroma convention), now documented as such.
78
+
79
+ ### Clustering and hierarchy fixes
80
+
81
+ Internal correctness and performance work on the clustering and hierarchy
82
+ layers. No public API changes; existing databases are unaffected.
83
+
84
+ #### Fixed
85
+
86
+ - **`load_cluster` survives empty k-means clusters** — when k-means leaves a
87
+ requested cluster empty (common with duplicate vectors or `n_clusters` near
88
+ the number of distinct points), the stored `n_clusters` is smaller than the
89
+ number of centroid rows. The centroid reshape now derives its row count from
90
+ the stored buffer rather than `n_clusters`, which previously raised
91
+ `ValueError` on load.
92
+ - **`assign_to_cluster` matches metadata keys literally** — a `metadata_key`
93
+ containing `.` or `[` is now matched as a literal top-level key (via
94
+ `json_each`) instead of being misread as a nested JSON path, which had caused
95
+ every already-assigned document to be re-assigned on each call.
96
+ - **`cluster(algorithm="hdbscan", sample_size=…)` raises instead of silently
97
+ dropping documents** — HDBSCAN produces no centroids, so out-of-sample
98
+ documents cannot be assigned. The combination now raises a clear `ValueError`
99
+ rather than clustering only the sample.
100
+
101
+ #### Performance
102
+
103
+ - **BLAS-backed out-of-sample centroid assignment** — nearest-centroid
104
+ assignment uses the `‖c‖² − 2·x·c` expansion (a single matmul) instead of
105
+ materialising the dense `(n_vectors, n_centroids, dim)` broadcast temporary
106
+ that could exhaust memory on large collections.
107
+ - **Unassigned-id lookup pushed into SQLite** — `assign_to_cluster(doc_ids=None)`
108
+ finds documents lacking the cluster key with one `json_each` query instead of
109
+ loading and JSON-parsing every row's text and metadata.
110
+ - **Bounded ancestor-walk for cycle detection** — `set_parent` detects
111
+ parent/child cycles by walking the ancestor chain with a depth-bounded
112
+ recursive CTE instead of materialising the entire descendant subtree.
113
+
114
+ ## [2.6.1] - 2026-05-10
115
+
116
+ ### Storage, mutation, and eventing improvements
117
+
118
+ This release closes ten long-standing gaps in the catalog layer with a coherent
119
+ set of additive primitives. No public API breaks; existing 2.6.0 databases
120
+ upgrade transparently (the new tables are created on first open).
121
+
122
+ #### New features
123
+
124
+ - **Native vector update via pending buffer** — `collection.update_embedding(id, vector)`
125
+ writes a row to a per-collection `_pending_vectors` overlay inside one SQL
126
+ transaction; the new vector becomes visible to reads immediately and is
127
+ promoted to the HNSW index on `collection.pending.flush()`. Removes the
128
+ HNSW remove+re-add churn previously required for in-place updates.
129
+ - **Bulk vector math** — `collection.pending.update_many([(id, vec), …])` and
130
+ `collection.pending.blend_toward(ids, centroid, alpha)` for batched edits.
131
+ - **Atomic transaction boundary** — `with db.transaction() as tx: …` and
132
+ `with collection.tx(): …` wrap a SAVEPOINT around catalog writes
133
+ (metadata, counters, edges, events, TTL, and `update_embedding`'s
134
+ pending overlay) so a raised exception rolls all SQL writes back.
135
+ Coarse vector mutations (`add_texts`, `delete`) are NOT rolled back —
136
+ use `update_embedding` + `pending.flush()` for vector changes that
137
+ must be commit-gated. Nested contexts share a single savepoint stack
138
+ via the new `_TxState` helper.
139
+ - **Weighted directed edges** — new `collection.edges` namespace with
140
+ `add_edge / get_edges / update_edge / delete_edge / prune` over a
141
+ per-collection `_edges` table. Numeric columns (`weight`, `bonus`, `hits`,
142
+ `last_touch`) are addressable by the new range-filter grammar; deltas
143
+ (`dweight=+0.02, dhits=+1`) compile to a single atomic SQL UPDATE.
144
+ - **Atomic counter increments** — `collection.increment_metadata(id, {"hits": 1, "drift": 0.02})`
145
+ applies a dict of numeric deltas to JSON metadata in one statement using
146
+ chained `json_set(... json_extract + ?)` calls. WAL-atomic; safe under
147
+ concurrent writers.
148
+ - **Mongo-style range filters** — `filter={"score": {"$gt": 0.5, "$lte": 0.9}}`
149
+ on `similarity_search`, `keyword_search`, `hybrid_search`, `edges.get_edges`,
150
+ and `events.read`. Supported operators: `$eq $ne $gt $gte $lt $lte $in $nin
151
+ $exists $between`. Tuple shorthand (`("range", lo, hi)`, `(">", x)`) is
152
+ normalised into the operator-dict form.
153
+ - **Append-only change feed** — every mutating method now appends one row to
154
+ a per-collection `_events` table (kind, doc_id, payload, monotonic seq).
155
+ `collection.events.read(since=, kind=, limit=)`,
156
+ `collection.events.subscribe(since=, poll_interval=)`, and
157
+ `collection.events.prune(before_seq=)` expose the feed; cross-process
158
+ visibility comes from the existing WAL mode.
159
+ - **TTL / expiry hooks** — `collection.ttl.set(id, seconds=…, on_expire="delete"|"callback")`,
160
+ `collection.ttl.clear(id)`, and `collection.ttl.sweep()` over a
161
+ `_ttl` table; `start_background(interval=…)` runs the sweep in a daemon
162
+ thread (off by default).
163
+ - **Incremental rebuild scheduler** — `collection.maintenance.rebuild_if_needed(max_pending=, max_deleted=)`
164
+ triggers a full `rebuild_index()` only when the configured pending /
165
+ tombstone / wall-time thresholds are crossed.
166
+ - **Multi-process write safety** — added `PRAGMA busy_timeout=5000` and
167
+ `PRAGMA foreign_keys=ON` at every connection-open site (encrypted and
168
+ unencrypted). The native 5 s wait window reduces `DatabaseLockedError`
169
+ pressure under contention; foreign keys cascade-delete pending /
170
+ edges / TTL rows when a doc is deleted. The events table is
171
+ intentionally FK-less so the audit trail survives deletions.
172
+ - **Async wrappers** — `AsyncVectorCollection` gains async equivalents of the
173
+ new methods (`update_embedding`, `flush_pending`, `increment_metadata`,
174
+ `add_edge`, `update_edge`, `delete_edge`, `get_edges`, `set_ttl`,
175
+ `clear_ttl`, `sweep_ttl`, `read_events`, `last_event_seq`,
176
+ `rebuild_if_needed`).
177
+
178
+ #### New types & constants
179
+
180
+ - `simplevecdb.types`: `Edge`, `Event`, `TTLEntry` frozen dataclasses.
181
+ - `simplevecdb.constants`: `PENDING_FLUSH_DEFAULT_BATCH=1000`,
182
+ `EVENTS_POLL_INTERVAL_S=0.1`, `EVENTS_RETENTION_LIMIT=100_000`,
183
+ `TTL_SWEEP_DEFAULT_INTERVAL_S=60.0`, `REBUILD_PENDING_THRESHOLD=5_000`,
184
+ `REBUILD_TOMBSTONE_THRESHOLD=5_000`, `REBUILD_MIN_INTERVAL_S=3600.0`,
185
+ `SQLITE_BUSY_TIMEOUT_MS=5000`.
186
+
187
+ #### Test coverage
188
+
189
+ - `tests/unit/test_v26_1_features.py` — 25 tests covering the five must-have
190
+ primitives end-to-end: `update_embedding` + pending buffer + flush; edges
191
+ CRUD with atomic deltas, range filtering, and prune; `increment_metadata`
192
+ under 800-thread contention (exact total preserved); transaction rollback
193
+ and commit semantics; Mongo-style and tuple-shorthand range filters in
194
+ `similarity_search`; events append on every mutation; TTL sweep with
195
+ `delete` and `callback` paths; threshold-driven rebuild scheduler.
196
+
197
+ #### Removed
198
+
199
+ - **`sqlite-vec` dependency** dropped from `pyproject.toml`. The package was
200
+ never imported and the v1.x → v2.0 auto-migration code path could not have
201
+ worked without explicitly loading the extension at connection time.
202
+ - **`MigrationRequiredError`**, **`VectorDB.check_migration()`**, and the
203
+ **`auto_migrate=`** constructor flag have been removed. Databases written
204
+ by `simplevecdb < 2.0.0` (sqlite-vec backend) are no longer auto-migrated
205
+ on open. To upgrade a v1.x database, dump the rows with a v1.x install and
206
+ re-ingest them through the v2 API; or stay on the last release that shipped
207
+ the migration path (anything ≤ v2.6.1's predecessor).
208
+ - The catalog helpers `check_legacy_sqlite_vec`, `get_legacy_vectors`, and
209
+ `drop_legacy_vec_table` are gone alongside the migration entry point.
210
+
211
+ #### Out of scope
212
+
213
+ - No external pub/sub for events — polling only.
214
+ - No multi-master writer support; single-writer + many readers remains the
215
+ recommended topology.
216
+
8
217
  ## [2.6.0] - 2026-05-06
9
218
 
10
219
  ### Review pass 3 — final correctness/security pass before tag
@@ -0,0 +1,377 @@
1
+ Metadata-Version: 2.4
2
+ Name: simplevecdb
3
+ Version: 2.6.2
4
+ Summary: Dead-simple local vector database powered by usearch HNSW.
5
+ Project-URL: Homepage, https://github.com/CoderDayton/simplevecdb
6
+ Project-URL: Repository, https://github.com/CoderDayton/simplevecdb
7
+ Project-URL: Issues, https://github.com/CoderDayton/simplevecdb/issues
8
+ Project-URL: Changelog, https://github.com/CoderDayton/simplevecdb/blob/main/CHANGELOG.md
9
+ Author-email: Dayton Dunbar <coderdayton14@gmail.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: embeddings,hnsw,langchain,llamaindex,rag,similarity-search,sqlite,usearch,vector-database,vectordb
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Database
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
+ Classifier: Typing :: Typed
26
+ Requires-Python: >=3.10
27
+ Requires-Dist: cryptography>=41.0
28
+ Requires-Dist: hdbscan>=0.8.33
29
+ Requires-Dist: numpy>=1.24
30
+ Requires-Dist: python-dotenv>=1.0
31
+ Requires-Dist: scikit-learn>=1.3.0
32
+ Requires-Dist: sqlcipher3-binary>=0.5.0
33
+ Requires-Dist: usearch>=2.16.3
34
+ Provides-Extra: examples
35
+ Requires-Dist: ollama; extra == 'examples'
36
+ Provides-Extra: integrations
37
+ Requires-Dist: langchain-core>=1.0.7; extra == 'integrations'
38
+ Requires-Dist: langchain-openai>=1.0.3; extra == 'integrations'
39
+ Requires-Dist: llama-index-llms-ollama>=0.9.0; extra == 'integrations'
40
+ Requires-Dist: llama-index-llms-openai-like>=0.5.3; extra == 'integrations'
41
+ Requires-Dist: llama-index>=0.14.8; extra == 'integrations'
42
+ Provides-Extra: server
43
+ Requires-Dist: fastapi>=0.115; extra == 'server'
44
+ Requires-Dist: sentence-transformers>=5.0; extra == 'server'
45
+ Requires-Dist: uvicorn[standard]>=0.30; extra == 'server'
46
+ Description-Content-Type: text/markdown
47
+
48
+ # SimpleVecDB
49
+
50
+ [![CI](https://github.com/coderdayton/simplevecdb/actions/workflows/ci.yml/badge.svg)](https://github.com/coderdayton/simplevecdb/actions)
51
+ [![PyPI](https://img.shields.io/pypi/v/simplevecdb?color=blue)](https://pypi.org/project/simplevecdb/)
52
+ [![License: MIT](https://img.shields.io/github/license/coderdayton/simplevecdb)](LICENSE)
53
+ [![GitHub Stars](https://img.shields.io/github/stars/coderdayton/simplevecdb?style=social)](https://github.com/coderdayton/simplevecdb)
54
+
55
+ <a href='https://ko-fi.com/U7U01WTJF9' target='_blank'><img height='36' style='border:0px;height:36px;' src='https://storage.ko-fi.com/cdn/kofi3.png?v=6' border='0' alt='Buy Me a Coffee at ko-fi.com' /></a>
56
+
57
+ **A local-first, embedded vector database backed by SQLite and usearch.**
58
+
59
+ SimpleVecDB pairs **Chroma-like ergonomics** with a **file-based** store — a SQLite database for metadata and text alongside a `usearch` HNSW index per collection. It provides high-performance vector search, quantization, and hybrid retrieval with no separate services to run. It fits local RAG pipelines, offline agents, and any application that needs production-grade vector search without the operational overhead of a hosted database.
60
+
61
+ ## Why SimpleVecDB?
62
+
63
+ - **Zero Infrastructure** — Local files on disk: a SQLite database plus a `usearch` index. No Docker, no Redis, no external services.
64
+ - **High Performance** — usearch HNSW indexing with adaptive search: brute-force under 10k vectors (perfect recall), HNSW above that.
65
+ - **Portable** — Runs anywhere SQLite runs: Linux, macOS, Windows, and WASM.
66
+ - **Async Support** — A complete async/await surface with optional executor injection for thread-safe ONNX/usearch sharing.
67
+ - **Integrations Included** — Optional FastAPI embeddings server and LangChain/LlamaIndex adapters via the `[integrations]` extra.
68
+ - **Production Ready** — Hybrid search (BM25 + vector), metadata filtering, multi-collection support, and automatic hardware acceleration.
69
+
70
+ ### When to Choose SimpleVecDB
71
+
72
+ | Use Case | SimpleVecDB | Cloud Vector DB |
73
+ | :----------------------------- | :-------------------- | :----------------------- |
74
+ | **Local RAG applications** | ✅ Perfect fit | ❌ Overkill + latency |
75
+ | **Offline-first agents** | ✅ No internet needed | ❌ Requires connectivity |
76
+ | **Prototyping & MVPs** | ✅ Zero config | ⚠️ Setup overhead |
77
+ | **Multi-tenant SaaS at scale** | ⚠️ Consider sharding | ✅ Built for this |
78
+ | **Budget-conscious projects** | ✅ $0/month | ❌ $50-500+/month |
79
+
80
+ ## Prerequisites
81
+
82
+ **System Requirements:**
83
+
84
+ - Python 3.10+
85
+ - SQLite 3.35+ with FTS5 support (included in Python 3.8+ standard library)
86
+ - 50MB+ disk space for core library, 500MB+ with `[server]` extras
87
+
88
+ **Optional for GPU Acceleration:**
89
+
90
+ - CUDA 11.8+ for NVIDIA GPUs
91
+ - Metal Performance Shaders (MPS) for Apple Silicon
92
+
93
+ > **Note:** If using custom-compiled SQLite, ensure `-DSQLITE_ENABLE_FTS5` is enabled for full-text search support.
94
+
95
+ ## Installation
96
+
97
+ ```bash
98
+ # Standard installation (includes clustering, encryption)
99
+ pip install simplevecdb
100
+
101
+ # With LangChain & LlamaIndex integrations
102
+ pip install "simplevecdb[integrations]"
103
+
104
+ # With local embeddings server (adds 500MB+ models)
105
+ pip install "simplevecdb[server]"
106
+ ```
107
+
108
+ **What's included by default:**
109
+ - Vector search with HNSW indexing
110
+ - Clustering (K-means, MiniBatch K-means, HDBSCAN)
111
+ - Encryption (SQLCipher AES-256)
112
+ - Async support
113
+
114
+ **Verify Installation:**
115
+
116
+ ```bash
117
+ python -c "import simplevecdb; print(simplevecdb.__version__)"
118
+ ```
119
+
120
+ ## Quickstart
121
+
122
+ SimpleVecDB is just a storage and search layer — it doesn't ship an LLM
123
+ and won't generate embeddings for you. Bring whichever embedding source
124
+ you already use; three common ones below.
125
+
126
+ ### Option 1: OpenAI embeddings
127
+
128
+ ```python
129
+ from simplevecdb import VectorDB
130
+ from openai import OpenAI
131
+
132
+ client = OpenAI()
133
+ db = VectorDB("notes.db")
134
+ notes = db.collection("personal")
135
+
136
+ def embed(text: str) -> list[float]:
137
+ return (
138
+ client.embeddings
139
+ .create(model="text-embedding-3-small", input=text)
140
+ .data[0].embedding
141
+ )
142
+
143
+ entries = [
144
+ ("Cherry MX silent reds bottom out around 45g — quieter than browns", "keyboards"),
145
+ ("Sourdough hydration sweet spot is ~75% with this flour", "baking"),
146
+ ("EXPLAIN ANALYZE showed seq scan; ANALYZE on the table fixed it", "work"),
147
+ ("Passport renewal took 3 weeks, not the advertised 6–8", "admin"),
148
+ ]
149
+
150
+ notes.add_texts(
151
+ texts=[t for t, _ in entries],
152
+ embeddings=[embed(t) for t, _ in entries],
153
+ metadatas=[{"tag": tag} for _, tag in entries],
154
+ )
155
+
156
+ hits = notes.similarity_search(embed("how loud are silent reds"), k=2)
157
+ for doc, score in hits:
158
+ print(f"{score:.3f} {doc.page_content}")
159
+
160
+ work = notes.similarity_search(
161
+ embed("query plan slow"),
162
+ k=5,
163
+ filter={"tag": "work"},
164
+ )
165
+ ```
166
+
167
+ ### Option 2: Fully local (no network, no API key)
168
+
169
+ ```bash
170
+ pip install "simplevecdb[server]"
171
+ ```
172
+
173
+ ```python
174
+ from simplevecdb import VectorDB
175
+ from simplevecdb.embeddings.models import embed_texts
176
+
177
+ db = VectorDB("notes.db")
178
+ notes = db.collection("personal")
179
+
180
+ texts = [
181
+ "Cherry MX silent reds bottom out around 45g",
182
+ "Sourdough hydration sweet spot is ~75% with this flour",
183
+ "EXPLAIN ANALYZE showed seq scan; ANALYZE on the table fixed it",
184
+ ]
185
+ notes.add_texts(texts=texts, embeddings=embed_texts(texts))
186
+
187
+ vec = notes.similarity_search(embed_texts(["quieter switches"])[0], k=2)
188
+ mixed = notes.hybrid_search("postgres slow query", k=3)
189
+ ```
190
+
191
+ If you'd rather hit an HTTP endpoint than import the embedding models
192
+ directly, the bundled server speaks the same shape as OpenAI's
193
+ embeddings API:
194
+
195
+ ```bash
196
+ simplevecdb-server --port 8000 # default model, auto warm-up
197
+ simplevecdb-server --host 0.0.0.0 --port 9000
198
+ simplevecdb-server --no-warmup # skip the model preload
199
+ simplevecdb-server --help
200
+ ```
201
+
202
+ Server tuning (model registry, rate limits, API keys, CORS, CUDA) lives
203
+ in the [Setup Guide](ENV_SETUP.md).
204
+
205
+ ### Option 3: LangChain or LlamaIndex
206
+
207
+ Already using one of the major RAG frameworks? Use SimpleVecDB as the
208
+ vector store:
209
+
210
+ ```bash
211
+ pip install "simplevecdb[integrations]"
212
+ ```
213
+
214
+ ```python
215
+ from simplevecdb.integrations.langchain import SimpleVecDBVectorStore
216
+ from langchain_openai import OpenAIEmbeddings
217
+
218
+ store = SimpleVecDBVectorStore(
219
+ db_path="notes.db",
220
+ embedding=OpenAIEmbeddings(model="text-embedding-3-small"),
221
+ )
222
+
223
+ store.add_texts([
224
+ "Cherry MX silent reds bottom out around 45g",
225
+ "EXPLAIN ANALYZE showed seq scan; ANALYZE on the table fixed it",
226
+ ])
227
+ store.similarity_search("quieter switches", k=1)
228
+ store.hybrid_search("postgres performance", k=3)
229
+ ```
230
+
231
+ LlamaIndex is the same shape:
232
+
233
+ ```python
234
+ from simplevecdb.integrations.llamaindex import SimpleVecDBLlamaStore
235
+ from llama_index.embeddings.openai import OpenAIEmbedding
236
+
237
+ store = SimpleVecDBLlamaStore(
238
+ db_path="notes.db",
239
+ embedding=OpenAIEmbedding(model="text-embedding-3-small"),
240
+ )
241
+ ```
242
+
243
+ End-to-end notebooks (including a fully local Ollama RAG) live in the
244
+ [examples gallery](https://coderdayton.github.io/SimpleVecDB/examples/).
245
+
246
+ ## Feature Highlights
247
+
248
+ A few of the things SimpleVecDB does well — see
249
+ [`docs/Features.md`](docs/Features.md) for the comprehensive list.
250
+
251
+ - **Vector + keyword + hybrid search** — cosine / L2 similarity, BM25
252
+ via SQLite FTS5, and Reciprocal Rank Fusion in one collection.
253
+ - **Adaptive HNSW** — brute-force for <10k vectors (perfect recall),
254
+ `usearch` HNSW above that. Override per query with `exact=True/False`.
255
+ - **Quantization** — `FLOAT32`, `FLOAT16`, `INT8`, `BIT` for 1×–32×
256
+ compression.
257
+ - **Multi-collection + cross-collection search** — isolated namespaces in
258
+ one database, with merged ranked search across them.
259
+ - **Mongo-style filters** — `$eq $ne $gt $gte $lt $lte $in $nin $exists
260
+ $between` on metadata, edges, and events.
261
+ - **Memory primitives (v2.6.1)** — pending-vector buffer with atomic
262
+ flush, weighted directed edges, append-only event feed, TTL with
263
+ delete/callback sweep, and a threshold-driven rebuild scheduler.
264
+ - **Atomic counters & transactions (v2.6.1)** — `increment_metadata` for
265
+ JSON deltas in one statement; SAVEPOINT-backed `db.transaction()` /
266
+ `collection.tx()` rolling all catalog writes back on error.
267
+ - **Async, encryption, clustering, hierarchies** — full async surface
268
+ (with executor injection), SQLCipher AES-256, K-means / MiniBatch
269
+ K-means / HDBSCAN, parent/child relationships.
270
+ - **Framework integrations** — drop-in `LangChain` and `LlamaIndex`
271
+ adapters via the `[integrations]` extra; optional FastAPI embeddings
272
+ server via `[server]`.
273
+
274
+ For full method-level coverage, see [the Features doc](docs/Features.md)
275
+ or the [API reference](https://coderdayton.github.io/SimpleVecDB/api/core).
276
+
277
+
278
+ ## Performance Benchmarks
279
+
280
+ **10,000 vectors, 384 dimensions, k=10 search** — [Full benchmarks →](https://coderdayton.github.io/SimpleVecDB/benchmarks)
281
+
282
+ | Quantization | Storage | Query Time | Compression |
283
+ | :----------- | :------- | :--------- | :---------- |
284
+ | FLOAT32 | 36.0 MB | 0.20 ms | 1x |
285
+ | FLOAT16 | 28.7 MB | 0.20 ms | 2x |
286
+ | INT8 | 25.0 MB | 0.16 ms | 4x |
287
+ | BIT | 21.8 MB | 0.08 ms | 32x |
288
+
289
+ **Key highlights:**
290
+ - **3-34x faster** than brute-force for collections >10k vectors
291
+ - **Adaptive search**: perfect recall for small collections, HNSW for large
292
+ - **FLOAT16 recommended**: best balance of speed, memory, and precision
293
+
294
+ ## Documentation
295
+
296
+ - **[Features](docs/Features.md)** — Comprehensive list of every capability, grouped by area
297
+ - **[Setup Guide](https://coderdayton.github.io/SimpleVecDB/ENV_SETUP)** — Environment variables, server configuration, authentication
298
+ - **[API Reference](https://coderdayton.github.io/SimpleVecDB/api/core)** — Complete class/method documentation with type signatures
299
+ - **[Benchmarks](https://coderdayton.github.io/SimpleVecDB/benchmarks)** — Quantization strategies, batch sizes, hardware optimization
300
+ - **[Integration Examples](https://coderdayton.github.io/SimpleVecDB/examples)** — RAG notebooks, Ollama workflows, production patterns
301
+ - **[Contributing Guide](CONTRIBUTING.md)** — Development setup, testing, PR guidelines
302
+
303
+ ## Troubleshooting
304
+
305
+ **Import Error: `sqlite3.OperationalError: no such module: fts5`**
306
+
307
+ ```bash
308
+ # Your Python's SQLite was compiled without FTS5
309
+ # Solution: Install Python from python.org (includes FTS5) or compile SQLite with:
310
+ # -DSQLITE_ENABLE_FTS5
311
+ ```
312
+
313
+ **Dimension Mismatch Error**
314
+
315
+ ```python
316
+ # Ensure all vectors in a collection have identical dimensions
317
+ collection = db.collection("docs", dim=384) # Explicit dimension
318
+ ```
319
+
320
+ **CUDA Not Detected (GPU Available)**
321
+
322
+ ```bash
323
+ # Verify CUDA installation
324
+ python -c "import torch; print(torch.cuda.is_available())"
325
+
326
+ # Reinstall PyTorch with CUDA support
327
+ pip install torch --index-url https://download.pytorch.org/whl/cu118
328
+ ```
329
+
330
+ **Slow Queries on Large Datasets**
331
+
332
+ - Enable quantization: `collection = db.collection("docs", quantization=Quantization.INT8)`
333
+ - For >10k vectors, HNSW is automatic; tune with `rebuild_index(connectivity=32)`
334
+ - Use `exact=False` to force HNSW even on smaller collections
335
+ - Use metadata filtering to reduce search space
336
+
337
+ ## Roadmap
338
+
339
+ What's on the near-term radar:
340
+
341
+ - [ ] Incremental clustering (online learning)
342
+ - [ ] Cluster visualization exports
343
+
344
+ For shipped capabilities, see [`docs/Features.md`](docs/Features.md) and the
345
+ release-by-release [Changelog](CHANGELOG.md). Vote on these or propose new
346
+ ideas in [GitHub Discussions](https://github.com/coderdayton/simplevecdb/discussions).
347
+
348
+ ## Contributing
349
+
350
+ Contributions are welcome — bug fixes, documentation improvements, and new feature proposals alike:
351
+
352
+ 1. Read [CONTRIBUTING.md](CONTRIBUTING.md) for development setup
353
+ 2. Check existing [Issues](https://github.com/coderdayton/simplevecdb/issues) and [Discussions](https://github.com/coderdayton/simplevecdb/discussions)
354
+ 3. Open a PR with clear description and tests
355
+
356
+ ## Community & Support
357
+
358
+ **Get Help:**
359
+
360
+ - [GitHub Discussions](https://github.com/coderdayton/simplevecdb/discussions) — Q&A and feature requests
361
+ - [GitHub Issues](https://github.com/coderdayton/simplevecdb/issues) — Bug reports
362
+
363
+ **Stay Updated:**
364
+
365
+ - [GitHub Releases](https://github.com/coderdayton/simplevecdb/releases) — Changelog and updates
366
+ - [Examples Gallery](https://coderdayton.github.io/SimpleVecDB/examples/) — Community-contributed notebooks
367
+
368
+ ## Support the Project
369
+
370
+ - **[Sponsor on Ko-fi](https://ko-fi.com/xbbvii)** — one-time donations
371
+ - **Star the repository** — helps with visibility
372
+ - **[Report issues](https://github.com/coderdayton/simplevecdb/issues)** — bug reports and feedback
373
+ - **[Contribute](CONTRIBUTING.md)** — development setup and guidelines
374
+
375
+ ## License
376
+
377
+ [MIT License](LICENSE) — Free for personal and commercial use.