simplevecdb 2.0.0__tar.gz → 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- simplevecdb-2.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- simplevecdb-2.1.0/.github/ISSUE_TEMPLATE/config.yml +8 -0
- simplevecdb-2.1.0/.github/ISSUE_TEMPLATE/feature_request.yml +58 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.gitignore +4 -0
- {simplevecdb-2.0.0/docs → simplevecdb-2.1.0}/CHANGELOG.md +43 -0
- simplevecdb-2.0.0/README.md → simplevecdb-2.1.0/PKG-INFO +91 -4
- simplevecdb-2.0.0/PKG-INFO → simplevecdb-2.1.0/README.md +68 -24
- simplevecdb-2.1.0/docs/api/core.md +147 -0
- simplevecdb-2.1.0/docs/api/encryption.md +153 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/examples.md +122 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/index.md +46 -4
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/mkdocs.yml +1 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/pyproject.toml +7 -1
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/__init__.py +14 -2
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/core.py +384 -9
- simplevecdb-2.1.0/src/simplevecdb/encryption.py +429 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/engine/catalog.py +219 -8
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/types.py +15 -0
- simplevecdb-2.1.0/tests/integration/test_v21_features.py +283 -0
- simplevecdb-2.1.0/tests/unit/core/test_core_additional_coverage.py +233 -0
- simplevecdb-2.1.0/tests/unit/test_encryption.py +381 -0
- simplevecdb-2.1.0/tests/unit/test_hierarchy.py +431 -0
- simplevecdb-2.1.0/tests/unit/test_streaming.py +294 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/uv.lock +89 -2
- simplevecdb-2.0.0/docs/api/core.md +0 -79
- simplevecdb-2.0.0/tests/unit/core/test_core_additional_coverage.py +0 -97
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.bandit +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.env.example +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/FUNDING.yml +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/dependabot.yml +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/workflows/ci.yml +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/workflows/publish.yml +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/workflows/security.yml +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.github/workflows/update-sponsors.yml +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.pre-commit-config.yaml +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/.python-version +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/CODE_OF_CONDUCT.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/CONTRIBUTING.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/LICENSE +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/SECURITY.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0/docs}/CHANGELOG.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/CONTRIBUTING.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/ENV_SETUP.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/LICENSE +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/async.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/config.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/embeddings.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/engine/catalog.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/engine/quantization.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/engine/search.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/api/integrations.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/docs/benchmarks.md +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/auto_embed.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/backend_benchmark.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/embeddings/perf_benchmark.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/quant_benchmark.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/rag/langchain_rag.ipynb +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/rag/llama_rag.ipynb +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/rag/ollama_rag.ipynb +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/examples/smoke_test.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/async_core.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/config.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/constants.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/embeddings/__init__.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/embeddings/models.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/embeddings/server.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/engine/__init__.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/engine/quantization.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/engine/search.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/engine/usearch_index.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/integrations/__init__.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/integrations/langchain.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/integrations/llamaindex.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/logging.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/src/simplevecdb/utils.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/conftest.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/integration/test_langchain.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/integration/test_llamaindex.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/integration/test_rag.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/integration/test_server.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/perf/test_batch_detection.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/perf/test_performance.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/__init__.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_batch_detection.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_factory_methods.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_filters.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_initialization.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_quantization.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/core/test_similarity_search.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/embeddings/__init__.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/embeddings/test_models.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/embeddings/test_server.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/integrations/__init__.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/integrations/test_langchain_coverage.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/integrations/test_llamaindex_coverage.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_async.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_config.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_core.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_error_handling.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_multi_collection.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_search.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_search_coverage.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_types.py +0 -0
- {simplevecdb-2.0.0 → simplevecdb-2.1.0}/tests/unit/test_utils.py +0 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
name: Bug Report
|
|
2
|
+
description: Report a bug in SimpleVecDB
|
|
3
|
+
labels: ["bug"]
|
|
4
|
+
body:
|
|
5
|
+
- type: markdown
|
|
6
|
+
attributes:
|
|
7
|
+
value: |
|
|
8
|
+
Thanks for taking the time to report a bug. Please fill out the sections below.
|
|
9
|
+
|
|
10
|
+
- type: textarea
|
|
11
|
+
id: description
|
|
12
|
+
attributes:
|
|
13
|
+
label: Describe the bug
|
|
14
|
+
description: A clear and concise description of what the bug is.
|
|
15
|
+
validations:
|
|
16
|
+
required: true
|
|
17
|
+
|
|
18
|
+
- type: textarea
|
|
19
|
+
id: reproduction
|
|
20
|
+
attributes:
|
|
21
|
+
label: Reproduction steps
|
|
22
|
+
description: Minimal code to reproduce the issue.
|
|
23
|
+
placeholder: |
|
|
24
|
+
```python
|
|
25
|
+
from simplevecdb import VectorDB
|
|
26
|
+
db = VectorDB(":memory:")
|
|
27
|
+
# ... code that triggers bug
|
|
28
|
+
```
|
|
29
|
+
validations:
|
|
30
|
+
required: true
|
|
31
|
+
|
|
32
|
+
- type: textarea
|
|
33
|
+
id: expected
|
|
34
|
+
attributes:
|
|
35
|
+
label: Expected behavior
|
|
36
|
+
description: What you expected to happen.
|
|
37
|
+
validations:
|
|
38
|
+
required: true
|
|
39
|
+
|
|
40
|
+
- type: textarea
|
|
41
|
+
id: actual
|
|
42
|
+
attributes:
|
|
43
|
+
label: Actual behavior
|
|
44
|
+
description: What actually happened. Include error messages if applicable.
|
|
45
|
+
validations:
|
|
46
|
+
required: true
|
|
47
|
+
|
|
48
|
+
- type: input
|
|
49
|
+
id: version
|
|
50
|
+
attributes:
|
|
51
|
+
label: SimpleVecDB version
|
|
52
|
+
placeholder: "2.0.0"
|
|
53
|
+
validations:
|
|
54
|
+
required: true
|
|
55
|
+
|
|
56
|
+
- type: input
|
|
57
|
+
id: python-version
|
|
58
|
+
attributes:
|
|
59
|
+
label: Python version
|
|
60
|
+
placeholder: "3.11"
|
|
61
|
+
validations:
|
|
62
|
+
required: true
|
|
63
|
+
|
|
64
|
+
- type: dropdown
|
|
65
|
+
id: os
|
|
66
|
+
attributes:
|
|
67
|
+
label: Operating System
|
|
68
|
+
options:
|
|
69
|
+
- Linux
|
|
70
|
+
- macOS
|
|
71
|
+
- Windows
|
|
72
|
+
- Other
|
|
73
|
+
validations:
|
|
74
|
+
required: true
|
|
75
|
+
|
|
76
|
+
- type: textarea
|
|
77
|
+
id: additional
|
|
78
|
+
attributes:
|
|
79
|
+
label: Additional context
|
|
80
|
+
description: Any other context about the problem (logs, screenshots, etc.)
|
|
81
|
+
validations:
|
|
82
|
+
required: false
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
blank_issues_enabled: false
|
|
2
|
+
contact_links:
|
|
3
|
+
- name: Documentation
|
|
4
|
+
url: https://simplevecdb.dev
|
|
5
|
+
about: Check the docs before opening an issue
|
|
6
|
+
- name: Discussions
|
|
7
|
+
url: https://github.com/coderdayton/simplevecdb/discussions
|
|
8
|
+
about: Ask questions and share ideas
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
name: Feature Request
|
|
2
|
+
description: Suggest a new feature or enhancement
|
|
3
|
+
labels: ["enhancement"]
|
|
4
|
+
body:
|
|
5
|
+
- type: markdown
|
|
6
|
+
attributes:
|
|
7
|
+
value: |
|
|
8
|
+
Thanks for suggesting a feature! Please describe what you'd like to see.
|
|
9
|
+
|
|
10
|
+
- type: textarea
|
|
11
|
+
id: problem
|
|
12
|
+
attributes:
|
|
13
|
+
label: Problem or motivation
|
|
14
|
+
description: What problem does this feature solve? Why do you need it?
|
|
15
|
+
placeholder: "I'm trying to do X but currently have to..."
|
|
16
|
+
validations:
|
|
17
|
+
required: true
|
|
18
|
+
|
|
19
|
+
- type: textarea
|
|
20
|
+
id: solution
|
|
21
|
+
attributes:
|
|
22
|
+
label: Proposed solution
|
|
23
|
+
description: How would you like this to work?
|
|
24
|
+
placeholder: |
|
|
25
|
+
```python
|
|
26
|
+
# Example API usage
|
|
27
|
+
db = VectorDB("my.db")
|
|
28
|
+
db.new_feature(...)
|
|
29
|
+
```
|
|
30
|
+
validations:
|
|
31
|
+
required: true
|
|
32
|
+
|
|
33
|
+
- type: textarea
|
|
34
|
+
id: alternatives
|
|
35
|
+
attributes:
|
|
36
|
+
label: Alternatives considered
|
|
37
|
+
description: Any alternative solutions or workarounds you've tried.
|
|
38
|
+
validations:
|
|
39
|
+
required: false
|
|
40
|
+
|
|
41
|
+
- type: dropdown
|
|
42
|
+
id: scope
|
|
43
|
+
attributes:
|
|
44
|
+
label: Scope
|
|
45
|
+
description: How big is this change?
|
|
46
|
+
options:
|
|
47
|
+
- Small (docs, minor tweak)
|
|
48
|
+
- Medium (new method, config option)
|
|
49
|
+
- Large (new module, breaking change)
|
|
50
|
+
validations:
|
|
51
|
+
required: true
|
|
52
|
+
|
|
53
|
+
- type: checkboxes
|
|
54
|
+
id: contribution
|
|
55
|
+
attributes:
|
|
56
|
+
label: Contribution
|
|
57
|
+
options:
|
|
58
|
+
- label: I'm willing to submit a PR for this feature
|
|
@@ -5,6 +5,49 @@ All notable changes to SimpleVecDB will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [2.1.0] - 2026-01-01
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- **SQLCipher Encryption Support** - Full at-rest encryption for sensitive data:
|
|
13
|
+
- `VectorDB(path, encryption_key="...")` enables AES-256 page-level database encryption
|
|
14
|
+
- Uses SQLCipher for transparent SQLite encryption (PRAGMA key)
|
|
15
|
+
- Usearch index files encrypted with AES-256-GCM (`.usearch.enc`)
|
|
16
|
+
- Zero performance overhead during search (decrypt on load, encrypt on save only)
|
|
17
|
+
- Key derivation: PBKDF2-SHA256 with 480,000 iterations for passphrases
|
|
18
|
+
- Install with `pip install simplevecdb[encryption]`
|
|
19
|
+
|
|
20
|
+
- **New encryption module** (`simplevecdb.encryption`):
|
|
21
|
+
- `create_encrypted_connection()` - SQLCipher connection factory
|
|
22
|
+
- `is_database_encrypted()` - Check if a database file is encrypted
|
|
23
|
+
- `encrypt_index_file()` / `decrypt_index_file()` - Index file encryption
|
|
24
|
+
- `EncryptionError` / `EncryptionUnavailableError` - New exception types
|
|
25
|
+
|
|
26
|
+
- **Streaming Insert API** - Memory-efficient large-scale ingestion:
|
|
27
|
+
- `collection.add_texts_streaming(iterable)` - Process from any iterator/generator
|
|
28
|
+
- Configurable `batch_size` parameter (default: config.EMBEDDING_BATCH_SIZE)
|
|
29
|
+
- Yields `StreamingProgress` after each batch for monitoring
|
|
30
|
+
- Optional `on_progress` callback for custom logging/UI updates
|
|
31
|
+
- New types: `StreamingProgress`, `ProgressCallback`
|
|
32
|
+
|
|
33
|
+
- **Hierarchical Document Relationships** - Parent/child document structure:
|
|
34
|
+
- `parent_ids` parameter in `add_texts()` to link documents
|
|
35
|
+
- `get_children(doc_id)` - Get direct child documents
|
|
36
|
+
- `get_parent(doc_id)` - Get parent document
|
|
37
|
+
- `get_descendants(doc_id, max_depth)` - Recursive children traversal
|
|
38
|
+
- `get_ancestors(doc_id, max_depth)` - Path to root
|
|
39
|
+
- `set_parent(doc_id, parent_id)` - Update relationships
|
|
40
|
+
- Uses SQLite recursive CTE for efficient traversal
|
|
41
|
+
- Auto-migrates existing databases (adds `parent_id` column)
|
|
42
|
+
|
|
43
|
+
### Changed
|
|
44
|
+
|
|
45
|
+
- `check_migration()` now gracefully handles encrypted databases (returns `needs_migration=False`)
|
|
46
|
+
|
|
47
|
+
### Dependencies
|
|
48
|
+
|
|
49
|
+
- New optional dependency group `[encryption]`: `sqlcipher3-binary>=0.5.0`, `cryptography>=41.0`
|
|
50
|
+
|
|
8
51
|
## [2.0.0] - 2025-12-23
|
|
9
52
|
|
|
10
53
|
### Breaking Changes
|
|
@@ -1,3 +1,26 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: simplevecdb
|
|
3
|
+
Version: 2.1.0
|
|
4
|
+
Summary: Dead-simple local vector database powered by usearch HNSW.
|
|
5
|
+
Author-email: Dayton Dunbar <coderdayton14@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Requires-Dist: numpy>=2.0
|
|
10
|
+
Requires-Dist: psutil>=5.9.0
|
|
11
|
+
Requires-Dist: python-dotenv>=1.2.1
|
|
12
|
+
Requires-Dist: usearch>=2.12
|
|
13
|
+
Provides-Extra: encryption
|
|
14
|
+
Requires-Dist: cryptography>=41.0; extra == 'encryption'
|
|
15
|
+
Requires-Dist: sqlcipher3-binary>=0.5.0; extra == 'encryption'
|
|
16
|
+
Provides-Extra: examples
|
|
17
|
+
Requires-Dist: ollama; extra == 'examples'
|
|
18
|
+
Provides-Extra: server
|
|
19
|
+
Requires-Dist: fastapi>=0.115; extra == 'server'
|
|
20
|
+
Requires-Dist: sentence-transformers>=5.0; extra == 'server'
|
|
21
|
+
Requires-Dist: uvicorn[standard]>=0.30; extra == 'server'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
1
24
|
# SimpleVecDB
|
|
2
25
|
|
|
3
26
|
[](https://github.com/coderdayton/simplevecdb/actions)
|
|
@@ -51,6 +74,9 @@ pip install simplevecdb
|
|
|
51
74
|
|
|
52
75
|
# With local embeddings server + HuggingFace models (500MB+)
|
|
53
76
|
pip install "simplevecdb[server]"
|
|
77
|
+
|
|
78
|
+
# With encryption support (SQLCipher)
|
|
79
|
+
pip install "simplevecdb[encryption]"
|
|
54
80
|
```
|
|
55
81
|
|
|
56
82
|
**Verify Installation:**
|
|
@@ -222,6 +248,63 @@ results = collection.similarity_search(
|
|
|
222
248
|
|
|
223
249
|
> **Tip:** LangChain and LlamaIndex integrations support all search methods.
|
|
224
250
|
|
|
251
|
+
### Encryption (v2.1+)
|
|
252
|
+
|
|
253
|
+
Protect sensitive data with AES-256 at-rest encryption:
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
pip install "simplevecdb[encryption]"
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
from simplevecdb import VectorDB
|
|
261
|
+
|
|
262
|
+
# Create encrypted database
|
|
263
|
+
db = VectorDB("secure.db", encryption_key="your-secret-key")
|
|
264
|
+
collection = db.collection("confidential")
|
|
265
|
+
|
|
266
|
+
collection.add_texts(["sensitive data"], embeddings=[[0.1]*384])
|
|
267
|
+
db.close()
|
|
268
|
+
|
|
269
|
+
# Reopen requires same key
|
|
270
|
+
db = VectorDB("secure.db", encryption_key="your-secret-key")
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### Streaming Insert (v2.1+)
|
|
274
|
+
|
|
275
|
+
Memory-efficient ingestion for large datasets:
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
def load_documents():
|
|
279
|
+
for line in open("large_file.jsonl"):
|
|
280
|
+
doc = json.loads(line)
|
|
281
|
+
yield (doc["text"], doc.get("metadata"), doc.get("embedding"))
|
|
282
|
+
|
|
283
|
+
for progress in collection.add_texts_streaming(load_documents(), batch_size=1000):
|
|
284
|
+
print(f"Processed {progress['docs_processed']} documents")
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### Document Hierarchies (v2.1+)
|
|
288
|
+
|
|
289
|
+
Organize documents in parent-child relationships:
|
|
290
|
+
|
|
291
|
+
```python
|
|
292
|
+
# Add parent document
|
|
293
|
+
parent_ids = collection.add_texts(["Main document"], embeddings=[[0.1]*384])
|
|
294
|
+
|
|
295
|
+
# Add children
|
|
296
|
+
child_ids = collection.add_texts(
|
|
297
|
+
["Chunk 1", "Chunk 2"],
|
|
298
|
+
embeddings=[[0.11]*384, [0.12]*384],
|
|
299
|
+
parent_ids=[parent_ids[0], parent_ids[0]]
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Navigate hierarchy
|
|
303
|
+
children = collection.get_children(parent_ids[0])
|
|
304
|
+
parent = collection.get_parent(child_ids[0])
|
|
305
|
+
descendants = collection.get_descendants(parent_ids[0])
|
|
306
|
+
```
|
|
307
|
+
|
|
225
308
|
## Feature Matrix
|
|
226
309
|
|
|
227
310
|
| Feature | Status | Description |
|
|
@@ -238,7 +321,9 @@ results = collection.similarity_search(
|
|
|
238
321
|
| **Framework Integration** | ✅ | LangChain \& LlamaIndex adapters |
|
|
239
322
|
| **Hardware Acceleration** | ✅ | Auto-detects CUDA/MPS/CPU + SIMD via usearch |
|
|
240
323
|
| **Local Embeddings** | ✅ | HuggingFace models via `[server]` extras |
|
|
241
|
-
| **Built-in Encryption** |
|
|
324
|
+
| **Built-in Encryption** | ✅ | SQLCipher AES-256 at-rest encryption via `[encryption]` extras |
|
|
325
|
+
| **Streaming Insert** | ✅ | Memory-efficient large-scale ingestion with progress callbacks |
|
|
326
|
+
| **Document Hierarchies** | ✅ | Parent/child relationships for chunked docs |
|
|
242
327
|
|
|
243
328
|
## Performance Benchmarks
|
|
244
329
|
|
|
@@ -304,9 +389,11 @@ pip install torch --index-url https://download.pytorch.org/whl/cu118
|
|
|
304
389
|
- [x] Multi-collection support
|
|
305
390
|
- [x] HNSW indexing (usearch backend)
|
|
306
391
|
- [x] Adaptive search (brute-force/HNSW)
|
|
307
|
-
- [
|
|
308
|
-
- [
|
|
309
|
-
- [
|
|
392
|
+
- [x] SQLCipher encryption (at-rest data protection)
|
|
393
|
+
- [x] Streaming insert API for large-scale ingestion
|
|
394
|
+
- [x] Hierarchical document relationships (parent/child)
|
|
395
|
+
- [ ] Cross-collection search
|
|
396
|
+
- [ ] Vector clustering and auto-tagging
|
|
310
397
|
|
|
311
398
|
Vote on features or propose new ones in [GitHub Discussions](https://github.com/coderdayton/simplevecdb/discussions).
|
|
312
399
|
|
|
@@ -1,23 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: simplevecdb
|
|
3
|
-
Version: 2.0.0
|
|
4
|
-
Summary: Dead-simple local vector database powered by usearch HNSW.
|
|
5
|
-
Author-email: Dayton Dunbar <coderdayton14@gmail.com>
|
|
6
|
-
License: MIT
|
|
7
|
-
License-File: LICENSE
|
|
8
|
-
Requires-Python: >=3.10
|
|
9
|
-
Requires-Dist: numpy>=2.0
|
|
10
|
-
Requires-Dist: psutil>=5.9.0
|
|
11
|
-
Requires-Dist: python-dotenv>=1.2.1
|
|
12
|
-
Requires-Dist: usearch>=2.12
|
|
13
|
-
Provides-Extra: examples
|
|
14
|
-
Requires-Dist: ollama; extra == 'examples'
|
|
15
|
-
Provides-Extra: server
|
|
16
|
-
Requires-Dist: fastapi>=0.115; extra == 'server'
|
|
17
|
-
Requires-Dist: sentence-transformers>=5.0; extra == 'server'
|
|
18
|
-
Requires-Dist: uvicorn[standard]>=0.30; extra == 'server'
|
|
19
|
-
Description-Content-Type: text/markdown
|
|
20
|
-
|
|
21
1
|
# SimpleVecDB
|
|
22
2
|
|
|
23
3
|
[](https://github.com/coderdayton/simplevecdb/actions)
|
|
@@ -71,6 +51,9 @@ pip install simplevecdb
|
|
|
71
51
|
|
|
72
52
|
# With local embeddings server + HuggingFace models (500MB+)
|
|
73
53
|
pip install "simplevecdb[server]"
|
|
54
|
+
|
|
55
|
+
# With encryption support (SQLCipher)
|
|
56
|
+
pip install "simplevecdb[encryption]"
|
|
74
57
|
```
|
|
75
58
|
|
|
76
59
|
**Verify Installation:**
|
|
@@ -242,6 +225,63 @@ results = collection.similarity_search(
|
|
|
242
225
|
|
|
243
226
|
> **Tip:** LangChain and LlamaIndex integrations support all search methods.
|
|
244
227
|
|
|
228
|
+
### Encryption (v2.1+)
|
|
229
|
+
|
|
230
|
+
Protect sensitive data with AES-256 at-rest encryption:
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
pip install "simplevecdb[encryption]"
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
from simplevecdb import VectorDB
|
|
238
|
+
|
|
239
|
+
# Create encrypted database
|
|
240
|
+
db = VectorDB("secure.db", encryption_key="your-secret-key")
|
|
241
|
+
collection = db.collection("confidential")
|
|
242
|
+
|
|
243
|
+
collection.add_texts(["sensitive data"], embeddings=[[0.1]*384])
|
|
244
|
+
db.close()
|
|
245
|
+
|
|
246
|
+
# Reopen requires same key
|
|
247
|
+
db = VectorDB("secure.db", encryption_key="your-secret-key")
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### Streaming Insert (v2.1+)
|
|
251
|
+
|
|
252
|
+
Memory-efficient ingestion for large datasets:
|
|
253
|
+
|
|
254
|
+
```python
|
|
255
|
+
def load_documents():
|
|
256
|
+
for line in open("large_file.jsonl"):
|
|
257
|
+
doc = json.loads(line)
|
|
258
|
+
yield (doc["text"], doc.get("metadata"), doc.get("embedding"))
|
|
259
|
+
|
|
260
|
+
for progress in collection.add_texts_streaming(load_documents(), batch_size=1000):
|
|
261
|
+
print(f"Processed {progress['docs_processed']} documents")
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
### Document Hierarchies (v2.1+)
|
|
265
|
+
|
|
266
|
+
Organize documents in parent-child relationships:
|
|
267
|
+
|
|
268
|
+
```python
|
|
269
|
+
# Add parent document
|
|
270
|
+
parent_ids = collection.add_texts(["Main document"], embeddings=[[0.1]*384])
|
|
271
|
+
|
|
272
|
+
# Add children
|
|
273
|
+
child_ids = collection.add_texts(
|
|
274
|
+
["Chunk 1", "Chunk 2"],
|
|
275
|
+
embeddings=[[0.11]*384, [0.12]*384],
|
|
276
|
+
parent_ids=[parent_ids[0], parent_ids[0]]
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Navigate hierarchy
|
|
280
|
+
children = collection.get_children(parent_ids[0])
|
|
281
|
+
parent = collection.get_parent(child_ids[0])
|
|
282
|
+
descendants = collection.get_descendants(parent_ids[0])
|
|
283
|
+
```
|
|
284
|
+
|
|
245
285
|
## Feature Matrix
|
|
246
286
|
|
|
247
287
|
| Feature | Status | Description |
|
|
@@ -258,7 +298,9 @@ results = collection.similarity_search(
|
|
|
258
298
|
| **Framework Integration** | ✅ | LangChain \& LlamaIndex adapters |
|
|
259
299
|
| **Hardware Acceleration** | ✅ | Auto-detects CUDA/MPS/CPU + SIMD via usearch |
|
|
260
300
|
| **Local Embeddings** | ✅ | HuggingFace models via `[server]` extras |
|
|
261
|
-
| **Built-in Encryption** |
|
|
301
|
+
| **Built-in Encryption** | ✅ | SQLCipher AES-256 at-rest encryption via `[encryption]` extras |
|
|
302
|
+
| **Streaming Insert** | ✅ | Memory-efficient large-scale ingestion with progress callbacks |
|
|
303
|
+
| **Document Hierarchies** | ✅ | Parent/child relationships for chunked docs |
|
|
262
304
|
|
|
263
305
|
## Performance Benchmarks
|
|
264
306
|
|
|
@@ -324,9 +366,11 @@ pip install torch --index-url https://download.pytorch.org/whl/cu118
|
|
|
324
366
|
- [x] Multi-collection support
|
|
325
367
|
- [x] HNSW indexing (usearch backend)
|
|
326
368
|
- [x] Adaptive search (brute-force/HNSW)
|
|
327
|
-
- [
|
|
328
|
-
- [
|
|
329
|
-
- [
|
|
369
|
+
- [x] SQLCipher encryption (at-rest data protection)
|
|
370
|
+
- [x] Streaming insert API for large-scale ingestion
|
|
371
|
+
- [x] Hierarchical document relationships (parent/child)
|
|
372
|
+
- [ ] Cross-collection search
|
|
373
|
+
- [ ] Vector clustering and auto-tagging
|
|
330
374
|
|
|
331
375
|
Vote on features or propose new ones in [GitHub Discussions](https://github.com/coderdayton/simplevecdb/discussions).
|
|
332
376
|
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Core API
|
|
2
|
+
|
|
3
|
+
## VectorDB
|
|
4
|
+
|
|
5
|
+
The main database class for managing vector collections.
|
|
6
|
+
|
|
7
|
+
::: simplevecdb.core.VectorDB
|
|
8
|
+
options:
|
|
9
|
+
members:
|
|
10
|
+
- collection
|
|
11
|
+
- vacuum
|
|
12
|
+
- close
|
|
13
|
+
- check_migration
|
|
14
|
+
|
|
15
|
+
## VectorCollection
|
|
16
|
+
|
|
17
|
+
A named collection of vectors within a database.
|
|
18
|
+
|
|
19
|
+
::: simplevecdb.core.VectorCollection
|
|
20
|
+
options:
|
|
21
|
+
members:
|
|
22
|
+
- add_texts
|
|
23
|
+
- add_texts_streaming
|
|
24
|
+
- similarity_search
|
|
25
|
+
- similarity_search_batch
|
|
26
|
+
- keyword_search
|
|
27
|
+
- hybrid_search
|
|
28
|
+
- max_marginal_relevance_search
|
|
29
|
+
- delete_by_ids
|
|
30
|
+
- remove_texts
|
|
31
|
+
- rebuild_index
|
|
32
|
+
- get_children
|
|
33
|
+
- get_parent
|
|
34
|
+
- get_descendants
|
|
35
|
+
- get_ancestors
|
|
36
|
+
- set_parent
|
|
37
|
+
|
|
38
|
+
## Quick Reference
|
|
39
|
+
|
|
40
|
+
### Search Methods
|
|
41
|
+
|
|
42
|
+
| Method | Description | Use Case |
|
|
43
|
+
|--------|-------------|----------|
|
|
44
|
+
| `similarity_search()` | Vector similarity search | Single query, best match |
|
|
45
|
+
| `similarity_search_batch()` | Batch vector search | Multiple queries, ~10x throughput |
|
|
46
|
+
| `keyword_search()` | BM25 full-text search | Keyword matching |
|
|
47
|
+
| `hybrid_search()` | BM25 + vector fusion | Best of both worlds |
|
|
48
|
+
| `max_marginal_relevance_search()` | Diversity-aware search | Avoid redundant results |
|
|
49
|
+
|
|
50
|
+
### Search Parameters
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
# Adaptive search (default) - auto-selects brute-force or HNSW
|
|
54
|
+
results = collection.similarity_search(query, k=10)
|
|
55
|
+
|
|
56
|
+
# Force exact brute-force search (perfect recall)
|
|
57
|
+
results = collection.similarity_search(query, k=10, exact=True)
|
|
58
|
+
|
|
59
|
+
# Force HNSW approximate search (faster)
|
|
60
|
+
results = collection.similarity_search(query, k=10, exact=False)
|
|
61
|
+
|
|
62
|
+
# Parallel search with explicit thread count
|
|
63
|
+
results = collection.similarity_search(query, k=10, threads=4)
|
|
64
|
+
|
|
65
|
+
# Batch search for multiple queries
|
|
66
|
+
results = collection.similarity_search_batch(queries, k=10)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Quantization Options
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from simplevecdb import Quantization
|
|
73
|
+
|
|
74
|
+
# Full precision (default)
|
|
75
|
+
collection = db.collection("docs", quantization=Quantization.FLOAT)
|
|
76
|
+
|
|
77
|
+
# Half precision - 2x memory savings, 1.5x faster
|
|
78
|
+
collection = db.collection("docs", quantization=Quantization.FLOAT16)
|
|
79
|
+
|
|
80
|
+
# 8-bit quantization - 4x memory savings
|
|
81
|
+
collection = db.collection("docs", quantization=Quantization.INT8)
|
|
82
|
+
|
|
83
|
+
# 1-bit quantization - 32x memory savings
|
|
84
|
+
collection = db.collection("docs", quantization=Quantization.BIT)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Streaming Insert
|
|
88
|
+
|
|
89
|
+
For large-scale ingestion without memory pressure:
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
# From generator/iterator
|
|
93
|
+
def load_documents():
|
|
94
|
+
for line in open("large_file.jsonl"):
|
|
95
|
+
doc = json.loads(line)
|
|
96
|
+
yield (doc["text"], doc.get("metadata"), doc.get("embedding"))
|
|
97
|
+
|
|
98
|
+
for progress in collection.add_texts_streaming(load_documents()):
|
|
99
|
+
print(f"Batch {progress['batch_num']}: {progress['docs_processed']} total")
|
|
100
|
+
|
|
101
|
+
# With progress callback
|
|
102
|
+
def log_progress(p):
|
|
103
|
+
print(f"{p['docs_processed']} docs, batch {p['batch_num']}")
|
|
104
|
+
|
|
105
|
+
list(collection.add_texts_streaming(items, batch_size=500, on_progress=log_progress))
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Hierarchical Relationships
|
|
109
|
+
|
|
110
|
+
Organize documents in parent-child hierarchies for chunked documents, threaded conversations, or nested content:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
# Add documents with parent relationships
|
|
114
|
+
parent_ids = collection.add_texts(["Main document"], metadatas=[{"type": "parent"}])
|
|
115
|
+
parent_id = parent_ids[0]
|
|
116
|
+
|
|
117
|
+
# Add children referencing the parent
|
|
118
|
+
child_ids = collection.add_texts(
|
|
119
|
+
["Chunk 1", "Chunk 2", "Chunk 3"],
|
|
120
|
+
parent_ids=[parent_id, parent_id, parent_id]
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Navigate the hierarchy
|
|
124
|
+
children = collection.get_children(parent_id) # Direct children
|
|
125
|
+
parent = collection.get_parent(child_ids[0]) # Get parent document
|
|
126
|
+
descendants = collection.get_descendants(parent_id) # All nested children
|
|
127
|
+
ancestors = collection.get_ancestors(child_ids[0]) # Path to root
|
|
128
|
+
|
|
129
|
+
# Reparent or orphan documents
|
|
130
|
+
collection.set_parent(child_ids[0], new_parent_id) # Move to new parent
|
|
131
|
+
collection.set_parent(child_ids[0], None) # Make root document
|
|
132
|
+
|
|
133
|
+
# Search within a subtree
|
|
134
|
+
results = collection.similarity_search(
|
|
135
|
+
query_embedding,
|
|
136
|
+
k=5,
|
|
137
|
+
filter={"parent_id": parent_id} # Only search children
|
|
138
|
+
)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
| Method | Description |
|
|
142
|
+
|--------|-------------|
|
|
143
|
+
| `get_children(doc_id)` | Direct children of a document |
|
|
144
|
+
| `get_parent(doc_id)` | Parent document (or None if root) |
|
|
145
|
+
| `get_descendants(doc_id, max_depth)` | All nested children recursively |
|
|
146
|
+
| `get_ancestors(doc_id)` | Path from document to root |
|
|
147
|
+
| `set_parent(doc_id, parent_id)` | Move document to new parent (or None to orphan) |
|