simplevecdb 2.6.1__tar.gz → 2.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.gitignore +4 -0
  2. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/CHANGELOG.md +106 -0
  3. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/PKG-INFO +18 -18
  4. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/README.md +17 -17
  5. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/CHANGELOG.md +106 -0
  6. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/ENV_SETUP.md +1 -0
  7. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/Features.md +3 -2
  8. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/index.md +9 -9
  9. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/pyproject.toml +1 -1
  10. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/async_core.py +6 -3
  11. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/config.py +31 -9
  12. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/core.py +171 -91
  13. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/embeddings/server.py +87 -0
  14. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/encryption.py +19 -3
  15. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/engine/catalog.py +84 -9
  16. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/engine/clustering.py +19 -2
  17. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/engine/search.py +24 -5
  18. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/engine/usearch_index.py +16 -3
  19. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/integrations/langchain.py +32 -3
  20. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/integrations/llamaindex.py +46 -6
  21. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/logging.py +3 -4
  22. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/utils.py +5 -0
  23. simplevecdb-2.6.2/tests/unit/integrations/test_llamaindex_filters_tier1.py +70 -0
  24. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_clustering.py +127 -0
  25. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_hierarchy.py +28 -0
  26. simplevecdb-2.6.2/tests/unit/test_tier1_fixes.py +241 -0
  27. simplevecdb-2.6.2/tests/unit/test_tier2_fixes.py +117 -0
  28. simplevecdb-2.6.2/tests/unit/test_tier3_fixes.py +65 -0
  29. simplevecdb-2.6.2/tests/unit/test_tier4_fixes.py +51 -0
  30. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/uv.lock +1 -1
  31. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.bandit +0 -0
  32. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.env.example +0 -0
  33. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.github/FUNDING.yml +0 -0
  34. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  35. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  36. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  37. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.github/dependabot.yml +0 -0
  38. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.github/workflows/ci.yml +0 -0
  39. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.github/workflows/publish.yml +0 -0
  40. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.github/workflows/security.yml +0 -0
  41. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.github/workflows/update-sponsors.yml +0 -0
  42. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/.python-version +0 -0
  43. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/CODE_OF_CONDUCT.md +0 -0
  44. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/CONTRIBUTING.md +0 -0
  45. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/LICENSE +0 -0
  46. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/SECURITY.md +0 -0
  47. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/CONTRIBUTING.md +0 -0
  48. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/api/async.md +0 -0
  49. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/api/config.md +0 -0
  50. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/api/core.md +0 -0
  51. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/api/embeddings.md +0 -0
  52. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/api/encryption.md +0 -0
  53. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/api/engine/catalog.md +0 -0
  54. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/api/engine/quantization.md +0 -0
  55. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/api/engine/search.md +0 -0
  56. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/api/integrations.md +0 -0
  57. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/api/types.md +0 -0
  58. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/benchmarks.md +0 -0
  59. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/examples.md +0 -0
  60. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/docs/guides/clustering.md +0 -0
  61. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/examples/auto_embed.py +0 -0
  62. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/examples/backend_benchmark.py +0 -0
  63. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/examples/embeddings/perf_benchmark.py +0 -0
  64. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/examples/quant_benchmark.py +0 -0
  65. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/examples/rag/langchain_rag.ipynb +0 -0
  66. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/examples/rag/llama_rag.ipynb +0 -0
  67. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/examples/rag/ollama_rag.ipynb +0 -0
  68. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/examples/smoke_test.py +0 -0
  69. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/lefthook.yml +0 -0
  70. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/mkdocs.yml +0 -0
  71. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/scripts/bump_version.py +0 -0
  72. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/scripts/check_version_sync.py +0 -0
  73. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/scripts/exercise_async_collection.py +0 -0
  74. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/scripts/track_metrics.py +0 -0
  75. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/__init__.py +0 -0
  76. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/constants.py +0 -0
  77. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/embeddings/__init__.py +0 -0
  78. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/embeddings/models.py +0 -0
  79. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/engine/__init__.py +0 -0
  80. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/engine/quantization.py +0 -0
  81. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/integrations/__init__.py +0 -0
  82. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/src/simplevecdb/types.py +0 -0
  83. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/conftest.py +0 -0
  84. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/integration/test_langchain.py +0 -0
  85. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/integration/test_llamaindex.py +0 -0
  86. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/integration/test_rag.py +0 -0
  87. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/integration/test_server.py +0 -0
  88. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/integration/test_v21_features.py +0 -0
  89. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/perf/test_batch_detection.py +0 -0
  90. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/perf/test_performance.py +0 -0
  91. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/__init__.py +0 -0
  92. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_batch_detection.py +0 -0
  93. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_core_additional_coverage.py +0 -0
  94. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_factory_methods.py +0 -0
  95. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_filters.py +0 -0
  96. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_initialization.py +0 -0
  97. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_missing_coverage.py +0 -0
  98. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_quantization.py +0 -0
  99. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_similarity_search.py +0 -0
  100. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_v25_correctness.py +0 -0
  101. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_v25_features.py +0 -0
  102. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_v25_robustness.py +0 -0
  103. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/core/test_v26_safety.py +0 -0
  104. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/embeddings/__init__.py +0 -0
  105. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/embeddings/test_models.py +0 -0
  106. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/embeddings/test_repo_id_validation.py +0 -0
  107. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/embeddings/test_server.py +0 -0
  108. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/embeddings/test_server_coverage.py +0 -0
  109. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/embeddings/test_v25_enhancements.py +0 -0
  110. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/engine/test_v26_quantization_clustering.py +0 -0
  111. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/integrations/__init__.py +0 -0
  112. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/integrations/test_langchain_coverage.py +0 -0
  113. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/integrations/test_llamaindex_coverage.py +0 -0
  114. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/integrations/test_llamaindex_review_pass_3.py +0 -0
  115. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/integrations/test_llamaindex_v26.py +0 -0
  116. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_async.py +0 -0
  117. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_async_coverage.py +0 -0
  118. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_async_v26.py +0 -0
  119. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_catalog_coverage.py +0 -0
  120. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_config.py +0 -0
  121. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_core.py +0 -0
  122. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_cross_collection_search.py +0 -0
  123. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_encryption.py +0 -0
  124. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_encryption_coverage.py +0 -0
  125. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_encryption_salt.py +0 -0
  126. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_encryption_v1_format.py +0 -0
  127. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_error_handling.py +0 -0
  128. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_multi_collection.py +0 -0
  129. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_search.py +0 -0
  130. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_search_coverage.py +0 -0
  131. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_search_missing_coverage.py +0 -0
  132. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_streaming.py +0 -0
  133. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_types.py +0 -0
  134. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_usearch_index_missing_coverage.py +0 -0
  135. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_utils.py +0 -0
  136. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_v26_1_features.py +0 -0
  137. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_v26_encryption_review_pass_3.py +0 -0
  138. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_v26_misc.py +0 -0
  139. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_v26_review_pass_3.py +0 -0
  140. {simplevecdb-2.6.1 → simplevecdb-2.6.2}/tests/unit/test_v26_review_pass_4.py +0 -0
@@ -68,3 +68,7 @@ simplevecdb_plan.md
68
68
  AGENTS.md
69
69
  NEXT_UPDATES.md
70
70
  pro_pack/
71
+
72
+
73
+ # Local notes
74
+ IMPORTANT.md
@@ -5,6 +5,112 @@ All notable changes to SimpleVecDB will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [2.6.2] - 2026-06-06
9
+
10
+ ### Correctness and contract fixes
11
+
12
+ Hardening of the index-rebuild, search, clustering, and integration layers
13
+ surfaced by a code review. Two intentional behavior changes are noted under
14
+ “Changed”.
15
+
16
+ #### Fixed
17
+
18
+ - **`rebuild_index` no longer bricks a collection on failure** — if building or
19
+ swapping the new HNSW index raises after the live index is closed, the
20
+ collection re-opens the intact on-disk index instead of holding a closed one.
21
+ - **Catalog write lock released on connection error** — a raising
22
+ `connection.__enter__` no longer leaks the catalog lock (which could deadlock
23
+ the database).
24
+ - **Max-Marginal-Relevance respects the distance metric** — MMR on `l2`
25
+ collections used a cosine-specific relevance formula that swamped the
26
+ diversity term; it now uses a bounded, metric-appropriate relevance.
27
+ - **`similarity_search_batch` fills `k` under filters and accepts text queries**
28
+ — large filtered batches no longer silently under-deliver, and a text query in
29
+ a large batch behaves the same as in a small one.
30
+ - **Clustering handles impossible `n_clusters`** — `ClusterEngine.cluster_vectors`
31
+ raises a clear error when `n_clusters` exceeds the number of vectors;
32
+ `Collection.cluster()` caps `n_clusters` to the number of vectors actually
33
+ clustered (the sample when `sample_size` is set, fixing a latent error when
34
+ `n_clusters > sample_size`).
35
+ - **Metadata filter keys match literally** — a filter key containing a dot
36
+ (e.g. `{"a.b": x}`) now matches the literal top-level key `a.b` instead of the
37
+ nested JSON path `a → b`, consistent with the Python filter path. Keys
38
+ containing a double-quote are rejected.
39
+ - **BIT-quantized vector retrieval unpacks correctly** — `UsearchIndex.get()`
40
+ (used by the MMR fallback) returned packed bytes for BIT indexes instead of
41
+ the unpacked ±1 float vectors; it now unpacks them.
42
+ - **`rebuild_index` no longer blocks the database during the HNSW build** — the
43
+ expensive build runs without the shared lock (held only to snapshot and swap);
44
+ writes that land during the build are folded into the new index before the
45
+ swap.
46
+ - **Embedding server caps request body size** — an ASGI middleware rejects
47
+ request bodies larger than the server's own accept limits before they are
48
+ buffered/parsed, closing an unauthenticated memory-exhaustion vector (only
49
+ relevant with the `[server]` extra exposed on a network). A missing
50
+ encryption salt sidecar now logs a warning instead of silently falling back
51
+ to the shared legacy salt.
52
+ - **Robustness pass** — malformed FTS5 keyword queries raise `ValueError` instead
53
+ of a raw SQLite error; the cluster-state table is created eagerly so a
54
+ rolled-back first `save_cluster` cannot desync it; a non-integer
55
+ `EMBEDDING_BATCH_SIZE`/`EMBEDDING_SERVER_MAX_REQUEST_ITEMS` env value warns and
56
+ falls back instead of crashing import; `vacuum()` holds the DB lock; a failed
57
+ index add after the catalog commit is logged (divergence visibility); hybrid
58
+ search applies the Python metadata filter on the keyword side too (SQL/Python
59
+ parity); `logging.configure_logging` swaps handlers atomically.
60
+ - **LangChain `asimilarity_search_with_score`** offloads to a thread instead of
61
+ blocking the event loop.
62
+
63
+ #### Changed
64
+
65
+ - **`AsyncVectorCollection.increment_metadata` now returns `int`** (1 if the row
66
+ existed and was updated, 0 otherwise), matching the synchronous API; it
67
+ previously discarded the value and returned `None`.
68
+ - **LlamaIndex metadata filters fail loudly on unsupported shapes** — the
69
+ `SimpleVecDBLlamaStore` adapter now maps comparison operators
70
+ (`$gt/$gte/$lt/$lte/$ne/$in/$nin`) instead of silently treating them as
71
+ equality, and raises `NotImplementedError` for `OR`/`NOT` conditions and
72
+ unsupported operators rather than returning wrong results.
73
+ - **LangChain relevance scoring now works** — `SimpleVecDBVectorStore` implements
74
+ `_select_relevance_score_fn`, so `similarity_search_with_relevance_scores` and
75
+ `as_retriever(search_type="similarity_score_threshold")` return metric-aware
76
+ `[0, 1]` relevance (higher = better). `similarity_search_with_score` still
77
+ returns the raw distance (FAISS/Chroma convention), now documented as such.
78
+
79
+ ### Clustering and hierarchy fixes
80
+
81
+ Internal correctness and performance work on the clustering and hierarchy
82
+ layers. No public API changes; existing databases are unaffected.
83
+
84
+ #### Fixed
85
+
86
+ - **`load_cluster` survives empty k-means clusters** — when k-means leaves a
87
+ requested cluster empty (common with duplicate vectors or `n_clusters` near
88
+ the number of distinct points), the stored `n_clusters` is smaller than the
89
+ number of centroid rows. The centroid reshape now derives its row count from
90
+ the stored buffer rather than `n_clusters`, which previously raised
91
+ `ValueError` on load.
92
+ - **`assign_to_cluster` matches metadata keys literally** — a `metadata_key`
93
+ containing `.` or `[` is now matched as a literal top-level key (via
94
+ `json_each`) instead of being misread as a nested JSON path, which had caused
95
+ every already-assigned document to be re-assigned on each call.
96
+ - **`cluster(algorithm="hdbscan", sample_size=…)` raises instead of silently
97
+ dropping documents** — HDBSCAN produces no centroids, so out-of-sample
98
+ documents cannot be assigned. The combination now raises a clear `ValueError`
99
+ rather than clustering only the sample.
100
+
101
+ #### Performance
102
+
103
+ - **BLAS-backed out-of-sample centroid assignment** — nearest-centroid
104
+ assignment uses the `‖c‖² − 2·x·c` expansion (a single matmul) instead of
105
+ materialising the dense `(n_vectors, n_centroids, dim)` broadcast temporary
106
+ that could exhaust memory on large collections.
107
+ - **Unassigned-id lookup pushed into SQLite** — `assign_to_cluster(doc_ids=None)`
108
+ finds documents lacking the cluster key with one `json_each` query instead of
109
+ loading and JSON-parsing every row's text and metadata.
110
+ - **Bounded ancestor-walk for cycle detection** — `set_parent` detects
111
+ parent/child cycles by walking the ancestor chain with a depth-bounded
112
+ recursive CTE instead of materialising the entire descendant subtree.
113
+
8
114
  ## [2.6.1] - 2026-05-10
9
115
 
10
116
  ### Storage, mutation, and eventing improvements
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: simplevecdb
3
- Version: 2.6.1
3
+ Version: 2.6.2
4
4
  Summary: Dead-simple local vector database powered by usearch HNSW.
5
5
  Project-URL: Homepage, https://github.com/CoderDayton/simplevecdb
6
6
  Project-URL: Repository, https://github.com/CoderDayton/simplevecdb
@@ -54,17 +54,17 @@ Description-Content-Type: text/markdown
54
54
 
55
55
  <a href='https://ko-fi.com/U7U01WTJF9' target='_blank'><img height='36' style='border:0px;height:36px;' src='https://storage.ko-fi.com/cdn/kofi3.png?v=6' border='0' alt='Buy Me a Coffee at ko-fi.com' /></a>
56
56
 
57
- **The dead-simple, local-first vector database.**
57
+ **A local-first, embedded vector database backed by SQLite and usearch.**
58
58
 
59
- SimpleVecDB brings **Chroma-like simplicity** to a single **SQLite file**. Built on `usearch` HNSW indexing, it offers high-performance vector search, quantization, and zero infrastructure headaches. Perfect for local RAG, offline agents, and indie hackers who need production-grade vector search without the operational overhead.
59
+ SimpleVecDB pairs **Chroma-like ergonomics** with a **file-based** store — a SQLite database for metadata and text alongside a `usearch` HNSW index per collection. It provides high-performance vector search, quantization, and hybrid retrieval with no separate services to run. It fits local RAG pipelines, offline agents, and any application that needs production-grade vector search without the operational overhead of a hosted database.
60
60
 
61
61
  ## Why SimpleVecDB?
62
62
 
63
- - **Zero Infrastructure** — Just a `.db` file. No Docker, no Redis, no cloud bills.
64
- - **Blazing Fast** — 10-100x faster search via usearch HNSW. Adaptive: brute-force for <10k vectors (perfect recall), HNSW for larger collections.
65
- - **Truly Portable** — Runs anywhere SQLite runs: Linux, macOS, Windows, even WASM.
66
- - **Async Ready** — Full async/await support with optional executor injection for thread-safe ONNX/usearch sharing.
67
- - **Batteries Included** — Optional FastAPI embeddings server + LangChain/LlamaIndex integrations via `[integrations]` extra.
63
+ - **Zero Infrastructure** — Local files on disk: a SQLite database plus a `usearch` index. No Docker, no Redis, no external services.
64
+ - **High Performance** — usearch HNSW indexing with adaptive search: brute-force under 10k vectors (perfect recall), HNSW above that.
65
+ - **Portable** — Runs anywhere SQLite runs: Linux, macOS, Windows, and WASM.
66
+ - **Async Support** — A complete async/await surface with optional executor injection for thread-safe ONNX/usearch sharing.
67
+ - **Integrations Included** — Optional FastAPI embeddings server and LangChain/LlamaIndex adapters via the `[integrations]` extra.
68
68
  - **Production Ready** — Hybrid search (BM25 + vector), metadata filtering, multi-collection support, and automatic hardware acceleration.
69
69
 
70
70
  ### When to Choose SimpleVecDB
@@ -114,7 +114,7 @@ pip install "simplevecdb[server]"
114
114
  **Verify Installation:**
115
115
 
116
116
  ```bash
117
- python -c "from simplevecdb import VectorDB; print('SimpleVecDB installed successfully!')"
117
+ python -c "import simplevecdb; print(simplevecdb.__version__)"
118
118
  ```
119
119
 
120
120
  ## Quickstart
@@ -204,8 +204,8 @@ in the [Setup Guide](ENV_SETUP.md).
204
204
 
205
205
  ### Option 3: LangChain or LlamaIndex
206
206
 
207
- Already wired into one of the big RAG frameworks? Drop SimpleVecDB in
208
- as the vector store:
207
+ Already using one of the major RAG frameworks? Use SimpleVecDB as the
208
+ vector store:
209
209
 
210
210
  ```bash
211
211
  pip install "simplevecdb[integrations]"
@@ -255,7 +255,7 @@ A few of the things SimpleVecDB does well — see
255
255
  - **Quantization** — `FLOAT32`, `FLOAT16`, `INT8`, `BIT` for 1×–32×
256
256
  compression.
257
257
  - **Multi-collection + cross-collection search** — isolated namespaces in
258
- one `.db` file, with merged ranked search across them.
258
+ one database, with merged ranked search across them.
259
259
  - **Mongo-style filters** — `$eq $ne $gt $gte $lt $lte $in $nin $exists
260
260
  $between` on metadata, edges, and events.
261
261
  - **Memory primitives (v2.6.1)** — pending-vector buffer with atomic
@@ -347,7 +347,7 @@ ideas in [GitHub Discussions](https://github.com/coderdayton/simplevecdb/discuss
347
347
 
348
348
  ## Contributing
349
349
 
350
- Contributions are welcome! Whether you're fixing bugs, improving documentation, or proposing new features:
350
+ Contributions are welcome bug fixes, documentation improvements, and new feature proposals alike:
351
351
 
352
352
  1. Read [CONTRIBUTING.md](CONTRIBUTING.md) for development setup
353
353
  2. Check existing [Issues](https://github.com/coderdayton/simplevecdb/issues) and [Discussions](https://github.com/coderdayton/simplevecdb/discussions)
@@ -365,12 +365,12 @@ Contributions are welcome! Whether you're fixing bugs, improving documentation,
365
365
  - [GitHub Releases](https://github.com/coderdayton/simplevecdb/releases) — Changelog and updates
366
366
  - [Examples Gallery](https://coderdayton.github.io/SimpleVecDB/examples/) — Community-contributed notebooks
367
367
 
368
- ## Other Ways to Support
368
+ ## Support the Project
369
369
 
370
- - **[Buy me a coffee](https://ko-fi.com/xbbvii)** - One-time donation
371
- - **Star the repo** - Helps with visibility
372
- - 🐛 **Report bugs** - Improve the project for everyone
373
- - 📝 **Contribute** - See [CONTRIBUTING.md](CONTRIBUTING.md)
370
+ - **[Sponsor on Ko-fi](https://ko-fi.com/xbbvii)** one-time donations
371
+ - **Star the repository** helps with visibility
372
+ - **[Report issues](https://github.com/coderdayton/simplevecdb/issues)** bug reports and feedback
373
+ - **[Contribute](CONTRIBUTING.md)** — development setup and guidelines
374
374
 
375
375
  ## License
376
376
 
@@ -7,17 +7,17 @@
7
7
 
8
8
  <a href='https://ko-fi.com/U7U01WTJF9' target='_blank'><img height='36' style='border:0px;height:36px;' src='https://storage.ko-fi.com/cdn/kofi3.png?v=6' border='0' alt='Buy Me a Coffee at ko-fi.com' /></a>
9
9
 
10
- **The dead-simple, local-first vector database.**
10
+ **A local-first, embedded vector database backed by SQLite and usearch.**
11
11
 
12
- SimpleVecDB brings **Chroma-like simplicity** to a single **SQLite file**. Built on `usearch` HNSW indexing, it offers high-performance vector search, quantization, and zero infrastructure headaches. Perfect for local RAG, offline agents, and indie hackers who need production-grade vector search without the operational overhead.
12
+ SimpleVecDB pairs **Chroma-like ergonomics** with a **file-based** store — a SQLite database for metadata and text alongside a `usearch` HNSW index per collection. It provides high-performance vector search, quantization, and hybrid retrieval with no separate services to run. It fits local RAG pipelines, offline agents, and any application that needs production-grade vector search without the operational overhead of a hosted database.
13
13
 
14
14
  ## Why SimpleVecDB?
15
15
 
16
- - **Zero Infrastructure** — Just a `.db` file. No Docker, no Redis, no cloud bills.
17
- - **Blazing Fast** — 10-100x faster search via usearch HNSW. Adaptive: brute-force for <10k vectors (perfect recall), HNSW for larger collections.
18
- - **Truly Portable** — Runs anywhere SQLite runs: Linux, macOS, Windows, even WASM.
19
- - **Async Ready** — Full async/await support with optional executor injection for thread-safe ONNX/usearch sharing.
20
- - **Batteries Included** — Optional FastAPI embeddings server + LangChain/LlamaIndex integrations via `[integrations]` extra.
16
+ - **Zero Infrastructure** — Local files on disk: a SQLite database plus a `usearch` index. No Docker, no Redis, no external services.
17
+ - **High Performance** — usearch HNSW indexing with adaptive search: brute-force under 10k vectors (perfect recall), HNSW above that.
18
+ - **Portable** — Runs anywhere SQLite runs: Linux, macOS, Windows, and WASM.
19
+ - **Async Support** — A complete async/await surface with optional executor injection for thread-safe ONNX/usearch sharing.
20
+ - **Integrations Included** — Optional FastAPI embeddings server and LangChain/LlamaIndex adapters via the `[integrations]` extra.
21
21
  - **Production Ready** — Hybrid search (BM25 + vector), metadata filtering, multi-collection support, and automatic hardware acceleration.
22
22
 
23
23
  ### When to Choose SimpleVecDB
@@ -67,7 +67,7 @@ pip install "simplevecdb[server]"
67
67
  **Verify Installation:**
68
68
 
69
69
  ```bash
70
- python -c "from simplevecdb import VectorDB; print('SimpleVecDB installed successfully!')"
70
+ python -c "import simplevecdb; print(simplevecdb.__version__)"
71
71
  ```
72
72
 
73
73
  ## Quickstart
@@ -157,8 +157,8 @@ in the [Setup Guide](ENV_SETUP.md).
157
157
 
158
158
  ### Option 3: LangChain or LlamaIndex
159
159
 
160
- Already wired into one of the big RAG frameworks? Drop SimpleVecDB in
161
- as the vector store:
160
+ Already using one of the major RAG frameworks? Use SimpleVecDB as the
161
+ vector store:
162
162
 
163
163
  ```bash
164
164
  pip install "simplevecdb[integrations]"
@@ -208,7 +208,7 @@ A few of the things SimpleVecDB does well — see
208
208
  - **Quantization** — `FLOAT32`, `FLOAT16`, `INT8`, `BIT` for 1×–32×
209
209
  compression.
210
210
  - **Multi-collection + cross-collection search** — isolated namespaces in
211
- one `.db` file, with merged ranked search across them.
211
+ one database, with merged ranked search across them.
212
212
  - **Mongo-style filters** — `$eq $ne $gt $gte $lt $lte $in $nin $exists
213
213
  $between` on metadata, edges, and events.
214
214
  - **Memory primitives (v2.6.1)** — pending-vector buffer with atomic
@@ -300,7 +300,7 @@ ideas in [GitHub Discussions](https://github.com/coderdayton/simplevecdb/discuss
300
300
 
301
301
  ## Contributing
302
302
 
303
- Contributions are welcome! Whether you're fixing bugs, improving documentation, or proposing new features:
303
+ Contributions are welcome bug fixes, documentation improvements, and new feature proposals alike:
304
304
 
305
305
  1. Read [CONTRIBUTING.md](CONTRIBUTING.md) for development setup
306
306
  2. Check existing [Issues](https://github.com/coderdayton/simplevecdb/issues) and [Discussions](https://github.com/coderdayton/simplevecdb/discussions)
@@ -318,12 +318,12 @@ Contributions are welcome! Whether you're fixing bugs, improving documentation,
318
318
  - [GitHub Releases](https://github.com/coderdayton/simplevecdb/releases) — Changelog and updates
319
319
  - [Examples Gallery](https://coderdayton.github.io/SimpleVecDB/examples/) — Community-contributed notebooks
320
320
 
321
- ## Other Ways to Support
321
+ ## Support the Project
322
322
 
323
- - **[Buy me a coffee](https://ko-fi.com/xbbvii)** - One-time donation
324
- - **Star the repo** - Helps with visibility
325
- - 🐛 **Report bugs** - Improve the project for everyone
326
- - 📝 **Contribute** - See [CONTRIBUTING.md](CONTRIBUTING.md)
323
+ - **[Sponsor on Ko-fi](https://ko-fi.com/xbbvii)** one-time donations
324
+ - **Star the repository** helps with visibility
325
+ - **[Report issues](https://github.com/coderdayton/simplevecdb/issues)** bug reports and feedback
326
+ - **[Contribute](CONTRIBUTING.md)** — development setup and guidelines
327
327
 
328
328
  ## License
329
329
 
@@ -5,6 +5,112 @@ All notable changes to SimpleVecDB will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [2.6.2] - 2026-06-06
9
+
10
+ ### Correctness and contract fixes
11
+
12
+ Hardening of the index-rebuild, search, clustering, and integration layers
13
+ surfaced by a code review. Two intentional behavior changes are noted under
14
+ “Changed”.
15
+
16
+ #### Fixed
17
+
18
+ - **`rebuild_index` no longer bricks a collection on failure** — if building or
19
+ swapping the new HNSW index raises after the live index is closed, the
20
+ collection re-opens the intact on-disk index instead of holding a closed one.
21
+ - **Catalog write lock released on connection error** — a raising
22
+ `connection.__enter__` no longer leaks the catalog lock (which could deadlock
23
+ the database).
24
+ - **Max-Marginal-Relevance respects the distance metric** — MMR on `l2`
25
+ collections used a cosine-specific relevance formula that swamped the
26
+ diversity term; it now uses a bounded, metric-appropriate relevance.
27
+ - **`similarity_search_batch` fills `k` under filters and accepts text queries**
28
+ — large filtered batches no longer silently under-deliver, and a text query in
29
+ a large batch behaves the same as in a small one.
30
+ - **Clustering handles impossible `n_clusters`** — `ClusterEngine.cluster_vectors`
31
+ raises a clear error when `n_clusters` exceeds the number of vectors;
32
+ `Collection.cluster()` caps `n_clusters` to the number of vectors actually
33
+ clustered (the sample when `sample_size` is set, fixing a latent error when
34
+ `n_clusters > sample_size`).
35
+ - **Metadata filter keys match literally** — a filter key containing a dot
36
+ (e.g. `{"a.b": x}`) now matches the literal top-level key `a.b` instead of the
37
+ nested JSON path `a → b`, consistent with the Python filter path. Keys
38
+ containing a double-quote are rejected.
39
+ - **BIT-quantized vector retrieval unpacks correctly** — `UsearchIndex.get()`
40
+ (used by the MMR fallback) returned packed bytes for BIT indexes instead of
41
+ the unpacked ±1 float vectors; it now unpacks them.
42
+ - **`rebuild_index` no longer blocks the database during the HNSW build** — the
43
+ expensive build runs without the shared lock (held only to snapshot and swap);
44
+ writes that land during the build are folded into the new index before the
45
+ swap.
46
+ - **Embedding server caps request body size** — an ASGI middleware rejects
47
+ request bodies larger than the server's own accept limits before they are
48
+ buffered/parsed, closing an unauthenticated memory-exhaustion vector (only
49
+ relevant with the `[server]` extra exposed on a network). A missing
50
+ encryption salt sidecar now logs a warning instead of silently falling back
51
+ to the shared legacy salt.
52
+ - **Robustness pass** — malformed FTS5 keyword queries raise `ValueError` instead
53
+ of a raw SQLite error; the cluster-state table is created eagerly so a
54
+ rolled-back first `save_cluster` cannot desync it; a non-integer
55
+ `EMBEDDING_BATCH_SIZE`/`EMBEDDING_SERVER_MAX_REQUEST_ITEMS` env value warns and
56
+ falls back instead of crashing import; `vacuum()` holds the DB lock; a failed
57
+ index add after the catalog commit is logged (divergence visibility); hybrid
58
+ search applies the Python metadata filter on the keyword side too (SQL/Python
59
+ parity); `logging.configure_logging` swaps handlers atomically.
60
+ - **LangChain `asimilarity_search_with_score`** offloads to a thread instead of
61
+ blocking the event loop.
62
+
63
+ #### Changed
64
+
65
+ - **`AsyncVectorCollection.increment_metadata` now returns `int`** (1 if the row
66
+ existed and was updated, 0 otherwise), matching the synchronous API; it
67
+ previously discarded the value and returned `None`.
68
+ - **LlamaIndex metadata filters fail loudly on unsupported shapes** — the
69
+ `SimpleVecDBLlamaStore` adapter now maps comparison operators
70
+ (`$gt/$gte/$lt/$lte/$ne/$in/$nin`) instead of silently treating them as
71
+ equality, and raises `NotImplementedError` for `OR`/`NOT` conditions and
72
+ unsupported operators rather than returning wrong results.
73
+ - **LangChain relevance scoring now works** — `SimpleVecDBVectorStore` implements
74
+ `_select_relevance_score_fn`, so `similarity_search_with_relevance_scores` and
75
+ `as_retriever(search_type="similarity_score_threshold")` return metric-aware
76
+ `[0, 1]` relevance (higher = better). `similarity_search_with_score` still
77
+ returns the raw distance (FAISS/Chroma convention), now documented as such.
78
+
79
+ ### Clustering and hierarchy fixes
80
+
81
+ Internal correctness and performance work on the clustering and hierarchy
82
+ layers. No public API changes; existing databases are unaffected.
83
+
84
+ #### Fixed
85
+
86
+ - **`load_cluster` survives empty k-means clusters** — when k-means leaves a
87
+ requested cluster empty (common with duplicate vectors or `n_clusters` near
88
+ the number of distinct points), the stored `n_clusters` is smaller than the
89
+ number of centroid rows. The centroid reshape now derives its row count from
90
+ the stored buffer rather than `n_clusters`, which previously raised
91
+ `ValueError` on load.
92
+ - **`assign_to_cluster` matches metadata keys literally** — a `metadata_key`
93
+ containing `.` or `[` is now matched as a literal top-level key (via
94
+ `json_each`) instead of being misread as a nested JSON path, which had caused
95
+ every already-assigned document to be re-assigned on each call.
96
+ - **`cluster(algorithm="hdbscan", sample_size=…)` raises instead of silently
97
+ dropping documents** — HDBSCAN produces no centroids, so out-of-sample
98
+ documents cannot be assigned. The combination now raises a clear `ValueError`
99
+ rather than clustering only the sample.
100
+
101
+ #### Performance
102
+
103
+ - **BLAS-backed out-of-sample centroid assignment** — nearest-centroid
104
+ assignment uses the `‖c‖² − 2·x·c` expansion (a single matmul) instead of
105
+ materialising the dense `(n_vectors, n_centroids, dim)` broadcast temporary
106
+ that could exhaust memory on large collections.
107
+ - **Unassigned-id lookup pushed into SQLite** — `assign_to_cluster(doc_ids=None)`
108
+ finds documents lacking the cluster key with one `json_each` query instead of
109
+ loading and JSON-parsing every row's text and metadata.
110
+ - **Bounded ancestor-walk for cycle detection** — `set_parent` detects
111
+ parent/child cycles by walking the ancestor chain with a depth-bounded
112
+ recursive CTE instead of materialising the entire descendant subtree.
113
+
8
114
  ## [2.6.1] - 2026-05-10
9
115
 
10
116
  ### Storage, mutation, and eventing improvements
@@ -53,6 +53,7 @@ Configuration for `simplevecdb-server`.
53
53
  | `SERVER_HOST` | Host to bind the server to. | `0.0.0.0` |
54
54
  | `SERVER_PORT` | Port to bind the server to. | `53287` (Code default) / `8000` (Example) |
55
55
  | `EMBEDDING_SERVER_MAX_REQUEST_ITEMS` | Max number of prompts allowed per `/v1/embeddings` request (protects latency). | `max(32, EMBEDDING_BATCH_SIZE)` |
56
+ | `EMBEDDING_SERVER_MAX_BODY_BYTES` | Max raw request body size in bytes; larger bodies are rejected (413) before being buffered/parsed, preventing memory exhaustion. | _Derived from the request-item and text-length limits (min 1 MiB)_ |
56
57
  | `EMBEDDING_SERVER_API_KEYS` | Comma-separated API keys to require `Authorization: Bearer`/`X-API-Key`. | _Disabled (unauthenticated)_ |
57
58
 
58
59
  When `EMBEDDING_SERVER_API_KEYS` is set, SimpleVecDB also tracks request counts and token usage per key. Call `GET /v1/usage` with the same key to retrieve your stats.
@@ -6,8 +6,9 @@ release-by-release detail, see the [Changelog](CHANGELOG.md).
6
6
 
7
7
  ## Storage & schema
8
8
 
9
- - **Single-file SQLite** — one `.db` file (or `:memory:`) holds everything:
10
- documents, vectors, FTS5 index, edges, events, TTL, clusters.
9
+ - **File-based storage** — a `.db` file (or `:memory:`) holds documents, the
10
+ FTS5 index, edges, events, TTL, and clusters; vectors live in a per-collection
11
+ `.usearch` HNSW index file alongside it.
11
12
  - **Multi-collection** — isolated namespaces per database via
12
13
  `db.collection("name")`. Each collection has its own quantization,
13
14
  distance metric, and (optional) embedding storage.
@@ -5,17 +5,17 @@
5
5
  [![License: MIT](https://img.shields.io/github/license/coderdayton/simplevecdb)](LICENSE)
6
6
  [![GitHub Stars](https://img.shields.io/github/stars/coderdayton/simplevecdb?style=social)](https://github.com/coderdayton/simplevecdb)
7
7
 
8
- **The dead-simple, local-first vector database.**
8
+ **A local-first, embedded vector database backed by SQLite and usearch.**
9
9
 
10
- SimpleVecDB brings **Chroma-like simplicity** to a single **SQLite file**. Built on **usearch HNSW** (v2.0+), it offers 10-100x faster vector search, quantization, and zero infrastructure headaches. Perfect for local RAG, offline agents, and indie hackers who need production-grade vector search without the operational overhead.
10
+ SimpleVecDB pairs **Chroma-like ergonomics** with a **file-based** store — a SQLite database for metadata and text alongside a `usearch` HNSW index per collection. It provides high-performance vector search, quantization, and hybrid retrieval with no separate services to run. It fits local RAG pipelines, offline agents, and any application that needs production-grade vector search without the operational overhead of a hosted database.
11
11
 
12
12
  ## Why SimpleVecDB?
13
13
 
14
- - **Zero Infrastructure** — Just a `.db` file. No Docker, no Redis, no cloud bills.
15
- - **Blazing Fast** — 10-100x faster with HNSW indexing, sub-millisecond queries on 100k+ vectors.
16
- - **Truly Portable** — Runs anywhere Python runs: Linux, macOS, Windows.
17
- - **Async Ready** — Full async/await support for web servers and concurrent workloads.
18
- - **Batteries Included** — Optional FastAPI embeddings server + LangChain/LlamaIndex integrations.
14
+ - **Zero Infrastructure** — Local files on disk: a SQLite database plus a `usearch` index. No Docker, no Redis, no external services.
15
+ - **High Performance** — usearch HNSW indexing with adaptive search: brute-force under 10k vectors (perfect recall), HNSW above that.
16
+ - **Portable** — Runs anywhere SQLite runs: Linux, macOS, Windows, and WASM.
17
+ - **Async Support** — A complete async/await surface with optional executor injection for thread-safe ONNX/usearch sharing.
18
+ - **Integrations Included** — Optional FastAPI embeddings server and LangChain/LlamaIndex adapters via the `[integrations]` extra.
19
19
  - **Production Ready** — Hybrid search (BM25 + vector), metadata filtering, multi-collection support, and automatic hardware acceleration.
20
20
 
21
21
  ### When to Choose SimpleVecDB
@@ -179,7 +179,7 @@ See **[Examples](examples.md)** for complete RAG workflows with Ollama.
179
179
 
180
180
  ### Multi-Collection Support
181
181
 
182
- Organize vectors by domain within a single database file:
182
+ Organize vectors by domain within a single database:
183
183
 
184
184
  ```python
185
185
  from simplevecdb import VectorDB, Quantization
@@ -272,7 +272,7 @@ See [Clustering Guide](guides/clustering.md) for algorithms, metrics, and use ca
272
272
 
273
273
  | Feature | Status | Description |
274
274
  | :------------------------ | :----- | :--------------------------------------------------------- |
275
- | **Single-File Storage** | ✅ | SQLite `.db` file + `.usearch` index files |
275
+ | **File-Based Storage** | ✅ | SQLite `.db` file + `.usearch` index files |
276
276
  | **Multi-Collection** | ✅ | Isolated namespaces per database |
277
277
  | **HNSW Indexing** | ✅ | 10-100x faster approximate nearest neighbor (usearch) |
278
278
  | **Vector Search** | ✅ | Cosine, Euclidean, Inner Product metrics |
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "simplevecdb"
3
- version = "2.6.1"
3
+ version = "2.6.2"
4
4
  description = "Dead-simple local vector database powered by usearch HNSW."
5
5
  authors = [{ name = "Dayton Dunbar", email = "coderdayton14@gmail.com" }]
6
6
  license = { text = "MIT" }
@@ -301,9 +301,12 @@ class AsyncVectorCollection:
301
301
  self,
302
302
  doc_id: int,
303
303
  deltas: dict[str, int | float],
304
- ) -> None:
305
- """Atomically apply numeric deltas to JSON metadata counters."""
306
- await self._run(self._collection.increment_metadata, doc_id, deltas)
304
+ ) -> int:
305
+ """Atomically apply numeric deltas to JSON metadata counters.
306
+
307
+ Returns 1 if the row existed and was updated, 0 otherwise.
308
+ """
309
+ return await self._run(self._collection.increment_metadata, doc_id, deltas)
307
310
 
308
311
  async def add_edge(
309
312
  self,
@@ -76,19 +76,41 @@ class Config:
76
76
  EMBEDDING_MODEL_REGISTRY_LOCKED: bool = _parse_bool_env(
77
77
  os.getenv("EMBEDDING_MODEL_REGISTRY_LOCKED"), True
78
78
  )
79
- # Auto-detect optimal batch size if not explicitly set
79
+ # Auto-detect optimal batch size if not explicitly set. Tolerate a malformed
80
+ # env value (e.g. "auto") instead of crashing every import of the package.
80
81
  _batch_size_env = os.getenv("EMBEDDING_BATCH_SIZE")
81
- EMBEDDING_BATCH_SIZE: int = (
82
- int(_batch_size_env)
83
- if _batch_size_env is not None
84
- else get_optimal_batch_size()
85
- )
82
+ try:
83
+ EMBEDDING_BATCH_SIZE: int = (
84
+ int(_batch_size_env)
85
+ if _batch_size_env is not None
86
+ else get_optimal_batch_size()
87
+ )
88
+ except ValueError:
89
+ import warnings as _warnings
90
+
91
+ _warnings.warn(
92
+ f"Invalid EMBEDDING_BATCH_SIZE={_batch_size_env!r}; using auto-detected size.",
93
+ stacklevel=2,
94
+ )
95
+ EMBEDDING_BATCH_SIZE = get_optimal_batch_size()
86
96
  _request_limit_env = os.getenv("EMBEDDING_SERVER_MAX_REQUEST_ITEMS") or os.getenv(
87
97
  "EMBEDDING_SERVER_MAX_BATCH"
88
98
  )
89
- EMBEDDING_SERVER_MAX_REQUEST_ITEMS: int = (
90
- int(_request_limit_env) if _request_limit_env else max(32, EMBEDDING_BATCH_SIZE)
91
- )
99
+ try:
100
+ EMBEDDING_SERVER_MAX_REQUEST_ITEMS: int = (
101
+ int(_request_limit_env)
102
+ if _request_limit_env
103
+ else max(32, EMBEDDING_BATCH_SIZE)
104
+ )
105
+ except ValueError:
106
+ import warnings as _warnings
107
+
108
+ _warnings.warn(
109
+ f"Invalid EMBEDDING_SERVER_MAX_REQUEST_ITEMS={_request_limit_env!r}; "
110
+ "using a default.",
111
+ stacklevel=2,
112
+ )
113
+ EMBEDDING_SERVER_MAX_REQUEST_ITEMS = max(32, EMBEDDING_BATCH_SIZE)
92
114
  EMBEDDING_SERVER_API_KEYS: set[str] = _parse_api_keys(
93
115
  os.getenv("EMBEDDING_SERVER_API_KEYS")
94
116
  )