simplevecdb 2.0.0__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. simplevecdb-2.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
  2. simplevecdb-2.1.0/.github/ISSUE_TEMPLATE/config.yml +8 -0
  3. simplevecdb-2.1.0/.github/ISSUE_TEMPLATE/feature_request.yml +58 -0
  4. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.gitignore +4 -0
  5. {simplevecdb-2.0.0/docs → simplevecdb-2.1.0}/CHANGELOG.md +43 -0
  6. simplevecdb-2.0.0/README.md → simplevecdb-2.1.0/PKG-INFO +91 -4
  7. simplevecdb-2.0.0/PKG-INFO → simplevecdb-2.1.0/README.md +68 -24
  8. simplevecdb-2.1.0/docs/api/core.md +147 -0
  9. simplevecdb-2.1.0/docs/api/encryption.md +153 -0
  10. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/examples.md +122 -0
  11. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/index.md +46 -4
  12. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/mkdocs.yml +1 -0
  13. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/pyproject.toml +7 -1
  14. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/__init__.py +14 -2
  15. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/core.py +384 -9
  16. simplevecdb-2.1.0/src/simplevecdb/encryption.py +429 -0
  17. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/engine/catalog.py +219 -8
  18. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/types.py +15 -0
  19. simplevecdb-2.1.0/tests/integration/test_v21_features.py +283 -0
  20. simplevecdb-2.1.0/tests/unit/core/test_core_additional_coverage.py +233 -0
  21. simplevecdb-2.1.0/tests/unit/test_encryption.py +381 -0
  22. simplevecdb-2.1.0/tests/unit/test_hierarchy.py +431 -0
  23. simplevecdb-2.1.0/tests/unit/test_streaming.py +294 -0
  24. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/uv.lock +89 -2
  25. simplevecdb-2.0.0/docs/api/core.md +0 -79
  26. simplevecdb-2.0.0/tests/unit/core/test_core_additional_coverage.py +0 -97
  27. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.bandit +0 -0
  28. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.env.example +0 -0
  29. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/FUNDING.yml +0 -0
  30. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/dependabot.yml +0 -0
  31. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/workflows/ci.yml +0 -0
  32. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/workflows/publish.yml +0 -0
  33. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/workflows/security.yml +0 -0
  34. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/workflows/update-sponsors.yml +0 -0
  35. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.pre-commit-config.yaml +0 -0
  36. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.python-version +0 -0
  37. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/CODE_OF_CONDUCT.md +0 -0
  38. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/CONTRIBUTING.md +0 -0
  39. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/LICENSE +0 -0
  40. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/SECURITY.md +0 -0
  41. {simplevecdb-2.0.0 → simplevecdb-2.1.0/docs}/CHANGELOG.md +0 -0
  42. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/CONTRIBUTING.md +0 -0
  43. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/ENV_SETUP.md +0 -0
  44. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/LICENSE +0 -0
  45. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/async.md +0 -0
  46. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/config.md +0 -0
  47. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/embeddings.md +0 -0
  48. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/engine/catalog.md +0 -0
  49. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/engine/quantization.md +0 -0
  50. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/engine/search.md +0 -0
  51. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/integrations.md +0 -0
  52. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/benchmarks.md +0 -0
  53. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/auto_embed.py +0 -0
  54. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/backend_benchmark.py +0 -0
  55. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/embeddings/perf_benchmark.py +0 -0
  56. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/quant_benchmark.py +0 -0
  57. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/rag/langchain_rag.ipynb +0 -0
  58. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/rag/llama_rag.ipynb +0 -0
  59. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/rag/ollama_rag.ipynb +0 -0
  60. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/smoke_test.py +0 -0
  61. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/async_core.py +0 -0
  62. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/config.py +0 -0
  63. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/constants.py +0 -0
  64. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/embeddings/__init__.py +0 -0
  65. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/embeddings/models.py +0 -0
  66. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/embeddings/server.py +0 -0
  67. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/engine/__init__.py +0 -0
  68. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/engine/quantization.py +0 -0
  69. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/engine/search.py +0 -0
  70. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/engine/usearch_index.py +0 -0
  71. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/integrations/__init__.py +0 -0
  72. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/integrations/langchain.py +0 -0
  73. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/integrations/llamaindex.py +0 -0
  74. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/logging.py +0 -0
  75. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/utils.py +0 -0
  76. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/conftest.py +0 -0
  77. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/integration/test_langchain.py +0 -0
  78. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/integration/test_llamaindex.py +0 -0
  79. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/integration/test_rag.py +0 -0
  80. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/integration/test_server.py +0 -0
  81. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/perf/test_batch_detection.py +0 -0
  82. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/perf/test_performance.py +0 -0
  83. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/__init__.py +0 -0
  84. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_batch_detection.py +0 -0
  85. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_factory_methods.py +0 -0
  86. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_filters.py +0 -0
  87. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_initialization.py +0 -0
  88. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_quantization.py +0 -0
  89. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_similarity_search.py +0 -0
  90. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/embeddings/__init__.py +0 -0
  91. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/embeddings/test_models.py +0 -0
  92. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/embeddings/test_server.py +0 -0
  93. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/integrations/__init__.py +0 -0
  94. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/integrations/test_langchain_coverage.py +0 -0
  95. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/integrations/test_llamaindex_coverage.py +0 -0
  96. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_async.py +0 -0
  97. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_config.py +0 -0
  98. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_core.py +0 -0
  99. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_error_handling.py +0 -0
  100. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_multi_collection.py +0 -0
  101. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_search.py +0 -0
  102. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_search_coverage.py +0 -0
  103. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_types.py +0 -0
  104. {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_utils.py +0 -0
@@ -0,0 +1,82 @@
1
+ name: Bug Report
2
+ description: Report a bug in SimpleVecDB
3
+ labels: ["bug"]
4
+ body:
5
+ - type: markdown
6
+ attributes:
7
+ value: |
8
+ Thanks for taking the time to report a bug. Please fill out the sections below.
9
+
10
+ - type: textarea
11
+ id: description
12
+ attributes:
13
+ label: Describe the bug
14
+ description: A clear and concise description of what the bug is.
15
+ validations:
16
+ required: true
17
+
18
+ - type: textarea
19
+ id: reproduction
20
+ attributes:
21
+ label: Reproduction steps
22
+ description: Minimal code to reproduce the issue.
23
+ placeholder: |
24
+ ```python
25
+ from simplevecdb import VectorDB
26
+ db = VectorDB(":memory:")
27
+ # ... code that triggers bug
28
+ ```
29
+ validations:
30
+ required: true
31
+
32
+ - type: textarea
33
+ id: expected
34
+ attributes:
35
+ label: Expected behavior
36
+ description: What you expected to happen.
37
+ validations:
38
+ required: true
39
+
40
+ - type: textarea
41
+ id: actual
42
+ attributes:
43
+ label: Actual behavior
44
+ description: What actually happened. Include error messages if applicable.
45
+ validations:
46
+ required: true
47
+
48
+ - type: input
49
+ id: version
50
+ attributes:
51
+ label: SimpleVecDB version
52
+ placeholder: "2.0.0"
53
+ validations:
54
+ required: true
55
+
56
+ - type: input
57
+ id: python-version
58
+ attributes:
59
+ label: Python version
60
+ placeholder: "3.11"
61
+ validations:
62
+ required: true
63
+
64
+ - type: dropdown
65
+ id: os
66
+ attributes:
67
+ label: Operating System
68
+ options:
69
+ - Linux
70
+ - macOS
71
+ - Windows
72
+ - Other
73
+ validations:
74
+ required: true
75
+
76
+ - type: textarea
77
+ id: additional
78
+ attributes:
79
+ label: Additional context
80
+ description: Any other context about the problem (logs, screenshots, etc.)
81
+ validations:
82
+ required: false
@@ -0,0 +1,8 @@
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: Documentation
4
+ url: https://simplevecdb.dev
5
+ about: Check the docs before opening an issue
6
+ - name: Discussions
7
+ url: https://github.com/coderdayton/simplevecdb/discussions
8
+ about: Ask questions and share ideas
@@ -0,0 +1,58 @@
1
+ name: Feature Request
2
+ description: Suggest a new feature or enhancement
3
+ labels: ["enhancement"]
4
+ body:
5
+ - type: markdown
6
+ attributes:
7
+ value: |
8
+ Thanks for suggesting a feature! Please describe what you'd like to see.
9
+
10
+ - type: textarea
11
+ id: problem
12
+ attributes:
13
+ label: Problem or motivation
14
+ description: What problem does this feature solve? Why do you need it?
15
+ placeholder: "I'm trying to do X but currently have to..."
16
+ validations:
17
+ required: true
18
+
19
+ - type: textarea
20
+ id: solution
21
+ attributes:
22
+ label: Proposed solution
23
+ description: How would you like this to work?
24
+ placeholder: |
25
+ ```python
26
+ # Example API usage
27
+ db = VectorDB("my.db")
28
+ db.new_feature(...)
29
+ ```
30
+ validations:
31
+ required: true
32
+
33
+ - type: textarea
34
+ id: alternatives
35
+ attributes:
36
+ label: Alternatives considered
37
+ description: Any alternative solutions or workarounds you've tried.
38
+ validations:
39
+ required: false
40
+
41
+ - type: dropdown
42
+ id: scope
43
+ attributes:
44
+ label: Scope
45
+ description: How big is this change?
46
+ options:
47
+ - Small (docs, minor tweak)
48
+ - Medium (new method, config option)
49
+ - Large (new module, breaking change)
50
+ validations:
51
+ required: true
52
+
53
+ - type: checkboxes
54
+ id: contribution
55
+ attributes:
56
+ label: Contribution
57
+ options:
58
+ - label: I'm willing to submit a PR for this feature
@@ -21,6 +21,10 @@ build/
21
21
  *.db
22
22
  *.sqlite
23
23
 
24
+ # OpenCode
25
+ .opencode/
26
+ opencode.json
27
+
24
28
  # Project specific
25
29
  simplevecdb_plan.md
26
30
  AGENTS.md
@@ -5,6 +5,49 @@ All notable changes to SimpleVecDB will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [2.1.0] - 2026-01-01
9
+
10
+ ### Added
11
+
12
+ - **SQLCipher Encryption Support** - Full at-rest encryption for sensitive data:
13
+ - `VectorDB(path, encryption_key="...")` enables AES-256 page-level database encryption
14
+ - Uses SQLCipher for transparent SQLite encryption (PRAGMA key)
15
+ - Usearch index files encrypted with AES-256-GCM (`.usearch.enc`)
16
+ - Zero performance overhead during search (decrypt on load, encrypt on save only)
17
+ - Key derivation: PBKDF2-SHA256 with 480,000 iterations for passphrases
18
+ - Install with `pip install simplevecdb[encryption]`
19
+
20
+ - **New encryption module** (`simplevecdb.encryption`):
21
+ - `create_encrypted_connection()` - SQLCipher connection factory
22
+ - `is_database_encrypted()` - Check if a database file is encrypted
23
+ - `encrypt_index_file()` / `decrypt_index_file()` - Index file encryption
24
+ - `EncryptionError` / `EncryptionUnavailableError` - New exception types
25
+
26
+ - **Streaming Insert API** - Memory-efficient large-scale ingestion:
27
+ - `collection.add_texts_streaming(iterable)` - Process from any iterator/generator
28
+ - Configurable `batch_size` parameter (default: config.EMBEDDING_BATCH_SIZE)
29
+ - Yields `StreamingProgress` after each batch for monitoring
30
+ - Optional `on_progress` callback for custom logging/UI updates
31
+ - New types: `StreamingProgress`, `ProgressCallback`
32
+
33
+ - **Hierarchical Document Relationships** - Parent/child document structure:
34
+ - `parent_ids` parameter in `add_texts()` to link documents
35
+ - `get_children(doc_id)` - Get direct child documents
36
+ - `get_parent(doc_id)` - Get parent document
37
+ - `get_descendants(doc_id, max_depth)` - Recursive children traversal
38
+ - `get_ancestors(doc_id, max_depth)` - Path to root
39
+ - `set_parent(doc_id, parent_id)` - Update relationships
40
+ - Uses SQLite recursive CTE for efficient traversal
41
+ - Auto-migrates existing databases (adds `parent_id` column)
42
+
43
+ ### Changed
44
+
45
+ - `check_migration()` now gracefully handles encrypted databases (returns `needs_migration=False`)
46
+
47
+ ### Dependencies
48
+
49
+ - New optional dependency group `[encryption]`: `sqlcipher3-binary>=0.5.0`, `cryptography>=41.0`
50
+
8
51
  ## [2.0.0] - 2025-12-23
9
52
 
10
53
  ### Breaking Changes
@@ -1,3 +1,26 @@
1
+ Metadata-Version: 2.4
2
+ Name: simplevecdb
3
+ Version: 2.1.0
4
+ Summary: Dead-simple local vector database powered by usearch HNSW.
5
+ Author-email: Dayton Dunbar <coderdayton14@gmail.com>
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Requires-Python: >=3.10
9
+ Requires-Dist: numpy>=2.0
10
+ Requires-Dist: psutil>=5.9.0
11
+ Requires-Dist: python-dotenv>=1.2.1
12
+ Requires-Dist: usearch>=2.12
13
+ Provides-Extra: encryption
14
+ Requires-Dist: cryptography>=41.0; extra == 'encryption'
15
+ Requires-Dist: sqlcipher3-binary>=0.5.0; extra == 'encryption'
16
+ Provides-Extra: examples
17
+ Requires-Dist: ollama; extra == 'examples'
18
+ Provides-Extra: server
19
+ Requires-Dist: fastapi>=0.115; extra == 'server'
20
+ Requires-Dist: sentence-transformers>=5.0; extra == 'server'
21
+ Requires-Dist: uvicorn[standard]>=0.30; extra == 'server'
22
+ Description-Content-Type: text/markdown
23
+
1
24
  # SimpleVecDB
2
25
 
3
26
  [![CI](https://github.com/coderdayton/simplevecdb/actions/workflows/ci.yml/badge.svg)](https://github.com/coderdayton/simplevecdb/actions)
@@ -51,6 +74,9 @@ pip install simplevecdb
51
74
 
52
75
  # With local embeddings server + HuggingFace models (500MB+)
53
76
  pip install "simplevecdb[server]"
77
+
78
+ # With encryption support (SQLCipher)
79
+ pip install "simplevecdb[encryption]"
54
80
  ```
55
81
 
56
82
  **Verify Installation:**
@@ -222,6 +248,63 @@ results = collection.similarity_search(
222
248
 
223
249
  > **Tip:** LangChain and LlamaIndex integrations support all search methods.
224
250
 
251
+ ### Encryption (v2.1+)
252
+
253
+ Protect sensitive data with AES-256 at-rest encryption:
254
+
255
+ ```bash
256
+ pip install "simplevecdb[encryption]"
257
+ ```
258
+
259
+ ```python
260
+ from simplevecdb import VectorDB
261
+
262
+ # Create encrypted database
263
+ db = VectorDB("secure.db", encryption_key="your-secret-key")
264
+ collection = db.collection("confidential")
265
+
266
+ collection.add_texts(["sensitive data"], embeddings=[[0.1]*384])
267
+ db.close()
268
+
269
+ # Reopen requires same key
270
+ db = VectorDB("secure.db", encryption_key="your-secret-key")
271
+ ```
272
+
273
+ ### Streaming Insert (v2.1+)
274
+
275
+ Memory-efficient ingestion for large datasets:
276
+
277
+ ```python
278
+ def load_documents():
279
+ for line in open("large_file.jsonl"):
280
+ doc = json.loads(line)
281
+ yield (doc["text"], doc.get("metadata"), doc.get("embedding"))
282
+
283
+ for progress in collection.add_texts_streaming(load_documents(), batch_size=1000):
284
+ print(f"Processed {progress['docs_processed']} documents")
285
+ ```
286
+
287
+ ### Document Hierarchies (v2.1+)
288
+
289
+ Organize documents in parent-child relationships:
290
+
291
+ ```python
292
+ # Add parent document
293
+ parent_ids = collection.add_texts(["Main document"], embeddings=[[0.1]*384])
294
+
295
+ # Add children
296
+ child_ids = collection.add_texts(
297
+ ["Chunk 1", "Chunk 2"],
298
+ embeddings=[[0.11]*384, [0.12]*384],
299
+ parent_ids=[parent_ids[0], parent_ids[0]]
300
+ )
301
+
302
+ # Navigate hierarchy
303
+ children = collection.get_children(parent_ids[0])
304
+ parent = collection.get_parent(child_ids[0])
305
+ descendants = collection.get_descendants(parent_ids[0])
306
+ ```
307
+
225
308
  ## Feature Matrix
226
309
 
227
310
  | Feature | Status | Description |
@@ -238,7 +321,9 @@ results = collection.similarity_search(
238
321
  | **Framework Integration** | ✅ | LangChain \& LlamaIndex adapters |
239
322
  | **Hardware Acceleration** | ✅ | Auto-detects CUDA/MPS/CPU + SIMD via usearch |
240
323
  | **Local Embeddings** | ✅ | HuggingFace models via `[server]` extras |
241
- | **Built-in Encryption** | 🔜 | SQLCipher integration for at-rest encryption |
324
+ | **Built-in Encryption** | | SQLCipher AES-256 at-rest encryption via `[encryption]` extras |
325
+ | **Streaming Insert** | ✅ | Memory-efficient large-scale ingestion with progress callbacks |
326
+ | **Document Hierarchies** | ✅ | Parent/child relationships for chunked docs |
242
327
 
243
328
  ## Performance Benchmarks
244
329
 
@@ -304,9 +389,11 @@ pip install torch --index-url https://download.pytorch.org/whl/cu118
304
389
  - [x] Multi-collection support
305
390
  - [x] HNSW indexing (usearch backend)
306
391
  - [x] Adaptive search (brute-force/HNSW)
307
- - [ ] SQLCipher encryption (at-rest data protection)
308
- - [ ] Streaming insert API for large-scale ingestion
309
- - [ ] Graph-based metadata relationships
392
+ - [x] SQLCipher encryption (at-rest data protection)
393
+ - [x] Streaming insert API for large-scale ingestion
394
+ - [x] Hierarchical document relationships (parent/child)
395
+ - [ ] Cross-collection search
396
+ - [ ] Vector clustering and auto-tagging
310
397
 
311
398
  Vote on features or propose new ones in [GitHub Discussions](https://github.com/coderdayton/simplevecdb/discussions).
312
399
 
@@ -1,23 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: simplevecdb
3
- Version: 2.0.0
4
- Summary: Dead-simple local vector database powered by usearch HNSW.
5
- Author-email: Dayton Dunbar <coderdayton14@gmail.com>
6
- License: MIT
7
- License-File: LICENSE
8
- Requires-Python: >=3.10
9
- Requires-Dist: numpy>=2.0
10
- Requires-Dist: psutil>=5.9.0
11
- Requires-Dist: python-dotenv>=1.2.1
12
- Requires-Dist: usearch>=2.12
13
- Provides-Extra: examples
14
- Requires-Dist: ollama; extra == 'examples'
15
- Provides-Extra: server
16
- Requires-Dist: fastapi>=0.115; extra == 'server'
17
- Requires-Dist: sentence-transformers>=5.0; extra == 'server'
18
- Requires-Dist: uvicorn[standard]>=0.30; extra == 'server'
19
- Description-Content-Type: text/markdown
20
-
21
1
  # SimpleVecDB
22
2
 
23
3
  [![CI](https://github.com/coderdayton/simplevecdb/actions/workflows/ci.yml/badge.svg)](https://github.com/coderdayton/simplevecdb/actions)
@@ -71,6 +51,9 @@ pip install simplevecdb
71
51
 
72
52
  # With local embeddings server + HuggingFace models (500MB+)
73
53
  pip install "simplevecdb[server]"
54
+
55
+ # With encryption support (SQLCipher)
56
+ pip install "simplevecdb[encryption]"
74
57
  ```
75
58
 
76
59
  **Verify Installation:**
@@ -242,6 +225,63 @@ results = collection.similarity_search(
242
225
 
243
226
  > **Tip:** LangChain and LlamaIndex integrations support all search methods.
244
227
 
228
+ ### Encryption (v2.1+)
229
+
230
+ Protect sensitive data with AES-256 at-rest encryption:
231
+
232
+ ```bash
233
+ pip install "simplevecdb[encryption]"
234
+ ```
235
+
236
+ ```python
237
+ from simplevecdb import VectorDB
238
+
239
+ # Create encrypted database
240
+ db = VectorDB("secure.db", encryption_key="your-secret-key")
241
+ collection = db.collection("confidential")
242
+
243
+ collection.add_texts(["sensitive data"], embeddings=[[0.1]*384])
244
+ db.close()
245
+
246
+ # Reopen requires same key
247
+ db = VectorDB("secure.db", encryption_key="your-secret-key")
248
+ ```
249
+
250
+ ### Streaming Insert (v2.1+)
251
+
252
+ Memory-efficient ingestion for large datasets:
253
+
254
+ ```python
255
+ def load_documents():
256
+ for line in open("large_file.jsonl"):
257
+ doc = json.loads(line)
258
+ yield (doc["text"], doc.get("metadata"), doc.get("embedding"))
259
+
260
+ for progress in collection.add_texts_streaming(load_documents(), batch_size=1000):
261
+ print(f"Processed {progress['docs_processed']} documents")
262
+ ```
263
+
264
+ ### Document Hierarchies (v2.1+)
265
+
266
+ Organize documents in parent-child relationships:
267
+
268
+ ```python
269
+ # Add parent document
270
+ parent_ids = collection.add_texts(["Main document"], embeddings=[[0.1]*384])
271
+
272
+ # Add children
273
+ child_ids = collection.add_texts(
274
+ ["Chunk 1", "Chunk 2"],
275
+ embeddings=[[0.11]*384, [0.12]*384],
276
+ parent_ids=[parent_ids[0], parent_ids[0]]
277
+ )
278
+
279
+ # Navigate hierarchy
280
+ children = collection.get_children(parent_ids[0])
281
+ parent = collection.get_parent(child_ids[0])
282
+ descendants = collection.get_descendants(parent_ids[0])
283
+ ```
284
+
245
285
  ## Feature Matrix
246
286
 
247
287
  | Feature | Status | Description |
@@ -258,7 +298,9 @@ results = collection.similarity_search(
258
298
  | **Framework Integration** | ✅ | LangChain \& LlamaIndex adapters |
259
299
  | **Hardware Acceleration** | ✅ | Auto-detects CUDA/MPS/CPU + SIMD via usearch |
260
300
  | **Local Embeddings** | ✅ | HuggingFace models via `[server]` extras |
261
- | **Built-in Encryption** | 🔜 | SQLCipher integration for at-rest encryption |
301
+ | **Built-in Encryption** | | SQLCipher AES-256 at-rest encryption via `[encryption]` extras |
302
+ | **Streaming Insert** | ✅ | Memory-efficient large-scale ingestion with progress callbacks |
303
+ | **Document Hierarchies** | ✅ | Parent/child relationships for chunked docs |
262
304
 
263
305
  ## Performance Benchmarks
264
306
 
@@ -324,9 +366,11 @@ pip install torch --index-url https://download.pytorch.org/whl/cu118
324
366
  - [x] Multi-collection support
325
367
  - [x] HNSW indexing (usearch backend)
326
368
  - [x] Adaptive search (brute-force/HNSW)
327
- - [ ] SQLCipher encryption (at-rest data protection)
328
- - [ ] Streaming insert API for large-scale ingestion
329
- - [ ] Graph-based metadata relationships
369
+ - [x] SQLCipher encryption (at-rest data protection)
370
+ - [x] Streaming insert API for large-scale ingestion
371
+ - [x] Hierarchical document relationships (parent/child)
372
+ - [ ] Cross-collection search
373
+ - [ ] Vector clustering and auto-tagging
330
374
 
331
375
  Vote on features or propose new ones in [GitHub Discussions](https://github.com/coderdayton/simplevecdb/discussions).
332
376
 
@@ -0,0 +1,147 @@
1
+ # Core API
2
+
3
+ ## VectorDB
4
+
5
+ The main database class for managing vector collections.
6
+
7
+ ::: simplevecdb.core.VectorDB
8
+ options:
9
+ members:
10
+ - collection
11
+ - vacuum
12
+ - close
13
+ - check_migration
14
+
15
+ ## VectorCollection
16
+
17
+ A named collection of vectors within a database.
18
+
19
+ ::: simplevecdb.core.VectorCollection
20
+ options:
21
+ members:
22
+ - add_texts
23
+ - add_texts_streaming
24
+ - similarity_search
25
+ - similarity_search_batch
26
+ - keyword_search
27
+ - hybrid_search
28
+ - max_marginal_relevance_search
29
+ - delete_by_ids
30
+ - remove_texts
31
+ - rebuild_index
32
+ - get_children
33
+ - get_parent
34
+ - get_descendants
35
+ - get_ancestors
36
+ - set_parent
37
+
38
+ ## Quick Reference
39
+
40
+ ### Search Methods
41
+
42
+ | Method | Description | Use Case |
43
+ |--------|-------------|----------|
44
+ | `similarity_search()` | Vector similarity search | Single query, best match |
45
+ | `similarity_search_batch()` | Batch vector search | Multiple queries, ~10x throughput |
46
+ | `keyword_search()` | BM25 full-text search | Keyword matching |
47
+ | `hybrid_search()` | BM25 + vector fusion | Best of both worlds |
48
+ | `max_marginal_relevance_search()` | Diversity-aware search | Avoid redundant results |
49
+
50
+ ### Search Parameters
51
+
52
+ ```python
53
+ # Adaptive search (default) - auto-selects brute-force or HNSW
54
+ results = collection.similarity_search(query, k=10)
55
+
56
+ # Force exact brute-force search (perfect recall)
57
+ results = collection.similarity_search(query, k=10, exact=True)
58
+
59
+ # Force HNSW approximate search (faster)
60
+ results = collection.similarity_search(query, k=10, exact=False)
61
+
62
+ # Parallel search with explicit thread count
63
+ results = collection.similarity_search(query, k=10, threads=4)
64
+
65
+ # Batch search for multiple queries
66
+ results = collection.similarity_search_batch(queries, k=10)
67
+ ```
68
+
69
+ ### Quantization Options
70
+
71
+ ```python
72
+ from simplevecdb import Quantization
73
+
74
+ # Full precision (default)
75
+ collection = db.collection("docs", quantization=Quantization.FLOAT)
76
+
77
+ # Half precision - 2x memory savings, 1.5x faster
78
+ collection = db.collection("docs", quantization=Quantization.FLOAT16)
79
+
80
+ # 8-bit quantization - 4x memory savings
81
+ collection = db.collection("docs", quantization=Quantization.INT8)
82
+
83
+ # 1-bit quantization - 32x memory savings
84
+ collection = db.collection("docs", quantization=Quantization.BIT)
85
+ ```
86
+
87
+ ### Streaming Insert
88
+
89
+ For large-scale ingestion without memory pressure:
90
+
91
+ ```python
92
+ # From generator/iterator
93
+ def load_documents():
94
+ for line in open("large_file.jsonl"):
95
+ doc = json.loads(line)
96
+ yield (doc["text"], doc.get("metadata"), doc.get("embedding"))
97
+
98
+ for progress in collection.add_texts_streaming(load_documents()):
99
+ print(f"Batch {progress['batch_num']}: {progress['docs_processed']} total")
100
+
101
+ # With progress callback
102
+ def log_progress(p):
103
+ print(f"{p['docs_processed']} docs, batch {p['batch_num']}")
104
+
105
+ list(collection.add_texts_streaming(items, batch_size=500, on_progress=log_progress))
106
+ ```
107
+
108
+ ### Hierarchical Relationships
109
+
110
+ Organize documents in parent-child hierarchies for chunked documents, threaded conversations, or nested content:
111
+
112
+ ```python
113
+ # Add documents with parent relationships
114
+ parent_ids = collection.add_texts(["Main document"], metadatas=[{"type": "parent"}])
115
+ parent_id = parent_ids[0]
116
+
117
+ # Add children referencing the parent
118
+ child_ids = collection.add_texts(
119
+ ["Chunk 1", "Chunk 2", "Chunk 3"],
120
+ parent_ids=[parent_id, parent_id, parent_id]
121
+ )
122
+
123
+ # Navigate the hierarchy
124
+ children = collection.get_children(parent_id) # Direct children
125
+ parent = collection.get_parent(child_ids[0]) # Get parent document
126
+ descendants = collection.get_descendants(parent_id) # All nested children
127
+ ancestors = collection.get_ancestors(child_ids[0]) # Path to root
128
+
129
+ # Reparent or orphan documents
130
+ collection.set_parent(child_ids[0], new_parent_id) # Move to new parent
131
+ collection.set_parent(child_ids[0], None) # Make root document
132
+
133
+ # Search within a subtree
134
+ results = collection.similarity_search(
135
+ query_embedding,
136
+ k=5,
137
+ filter={"parent_id": parent_id} # Only search children
138
+ )
139
+ ```
140
+
141
+ | Method | Description |
142
+ |--------|-------------|
143
+ | `get_children(doc_id)` | Direct children of a document |
144
+ | `get_parent(doc_id)` | Parent document (or None if root) |
145
+ | `get_descendants(doc_id, max_depth)` | All nested children recursively |
146
+ | `get_ancestors(doc_id)` | Path from document to root |
147
+ | `set_parent(doc_id, parent_id)` | Move document to new parent (or None to orphan) |