simplevecdb 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- simplevecdb-1.0.0/.env.example +32 -0
- simplevecdb-1.0.0/.github/FUNDING.yml +6 -0
- simplevecdb-1.0.0/.github/workflows/ci.yml +34 -0
- simplevecdb-1.0.0/.github/workflows/update-sponsors.yml +27 -0
- simplevecdb-1.0.0/.gitignore +31 -0
- simplevecdb-1.0.0/.pre-commit-config.yaml +23 -0
- simplevecdb-1.0.0/.python-version +1 -0
- simplevecdb-1.0.0/CHANGELOG.md +124 -0
- simplevecdb-1.0.0/CONTRIBUTING.md +174 -0
- simplevecdb-1.0.0/LICENSE +21 -0
- simplevecdb-1.0.0/PKG-INFO +453 -0
- simplevecdb-1.0.0/README.md +433 -0
- simplevecdb-1.0.0/docs/CHANGELOG.md +124 -0
- simplevecdb-1.0.0/docs/CONTRIBUTING.md +174 -0
- simplevecdb-1.0.0/docs/ENV_SETUP.md +78 -0
- simplevecdb-1.0.0/docs/LICENSE +1 -0
- simplevecdb-1.0.0/docs/api/config.md +3 -0
- simplevecdb-1.0.0/docs/api/core.md +5 -0
- simplevecdb-1.0.0/docs/api/embeddings.md +5 -0
- simplevecdb-1.0.0/docs/api/integrations.md +9 -0
- simplevecdb-1.0.0/docs/benchmarks.md +36 -0
- simplevecdb-1.0.0/docs/examples.md +31 -0
- simplevecdb-1.0.0/docs/index.md +433 -0
- simplevecdb-1.0.0/examples/auto_embed.py +8 -0
- simplevecdb-1.0.0/examples/embeddings/perf_benchmark.py +172 -0
- simplevecdb-1.0.0/examples/quant_benchmark.py +42 -0
- simplevecdb-1.0.0/examples/rag/langchain_rag.ipynb +234 -0
- simplevecdb-1.0.0/examples/rag/llama_rag.ipynb +277 -0
- simplevecdb-1.0.0/examples/rag/ollama_rag.ipynb +505 -0
- simplevecdb-1.0.0/examples/smoke_test.py +30 -0
- simplevecdb-1.0.0/mkdocs.yml +77 -0
- simplevecdb-1.0.0/pyproject.toml +69 -0
- simplevecdb-1.0.0/src/simplevecdb/__init__.py +20 -0
- simplevecdb-1.0.0/src/simplevecdb/config.py +105 -0
- simplevecdb-1.0.0/src/simplevecdb/core.py +918 -0
- simplevecdb-1.0.0/src/simplevecdb/embeddings/__init__.py +0 -0
- simplevecdb-1.0.0/src/simplevecdb/embeddings/models.py +104 -0
- simplevecdb-1.0.0/src/simplevecdb/embeddings/server.py +276 -0
- simplevecdb-1.0.0/src/simplevecdb/integrations/__init__.py +9 -0
- simplevecdb-1.0.0/src/simplevecdb/integrations/langchain.py +255 -0
- simplevecdb-1.0.0/src/simplevecdb/integrations/llamaindex.py +220 -0
- simplevecdb-1.0.0/src/simplevecdb/types.py +28 -0
- simplevecdb-1.0.0/src/simplevecdb/utils.py +19 -0
- simplevecdb-1.0.0/tests/conftest.py +47 -0
- simplevecdb-1.0.0/tests/integration/test_langchain.py +142 -0
- simplevecdb-1.0.0/tests/integration/test_llamaindex.py +139 -0
- simplevecdb-1.0.0/tests/integration/test_rag.py +78 -0
- simplevecdb-1.0.0/tests/integration/test_server.py +96 -0
- simplevecdb-1.0.0/tests/perf/test_batch_detection.py +121 -0
- simplevecdb-1.0.0/tests/perf/test_performance.py +58 -0
- simplevecdb-1.0.0/tests/unit/core/__init__.py +1 -0
- simplevecdb-1.0.0/tests/unit/core/test_batch_detection.py +274 -0
- simplevecdb-1.0.0/tests/unit/core/test_brute_force.py +142 -0
- simplevecdb-1.0.0/tests/unit/core/test_core_additional_coverage.py +173 -0
- simplevecdb-1.0.0/tests/unit/core/test_factory_methods.py +42 -0
- simplevecdb-1.0.0/tests/unit/core/test_filters.py +60 -0
- simplevecdb-1.0.0/tests/unit/core/test_initialization.py +98 -0
- simplevecdb-1.0.0/tests/unit/core/test_quantization.py +47 -0
- simplevecdb-1.0.0/tests/unit/core/test_similarity_search.py +47 -0
- simplevecdb-1.0.0/tests/unit/embeddings/__init__.py +1 -0
- simplevecdb-1.0.0/tests/unit/embeddings/test_models.py +175 -0
- simplevecdb-1.0.0/tests/unit/embeddings/test_server.py +172 -0
- simplevecdb-1.0.0/tests/unit/integrations/__init__.py +1 -0
- simplevecdb-1.0.0/tests/unit/integrations/test_langchain_coverage.py +72 -0
- simplevecdb-1.0.0/tests/unit/integrations/test_llamaindex_coverage.py +100 -0
- simplevecdb-1.0.0/tests/unit/test_config.py +109 -0
- simplevecdb-1.0.0/tests/unit/test_core.py +443 -0
- simplevecdb-1.0.0/tests/unit/test_multi_collection.py +67 -0
- simplevecdb-1.0.0/tests/unit/test_search.py +198 -0
- simplevecdb-1.0.0/tests/unit/test_types.py +16 -0
- simplevecdb-1.0.0/tests/unit/test_utils.py +40 -0
- simplevecdb-1.0.0/tinyvecdb_plan.md +212 -0
- simplevecdb-1.0.0/uv.lock +5883 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# SimpleVecDB Configuration
|
|
2
|
+
|
|
3
|
+
# Embedding Model
|
|
4
|
+
# Options: Any HuggingFace model ID compatible with SentenceTransformers
|
|
5
|
+
|
|
6
|
+
# Default: Snowflake/snowflake-arctic-embed-xs (384-dim, best balance, fast)
|
|
7
|
+
# Alternative: TaylorAI/bge-micro-v2 (384-dim, tiny, fast)
|
|
8
|
+
|
|
9
|
+
# Note: All models converted to ONNX format for performance.
|
|
10
|
+
|
|
11
|
+
EMBEDDING_MODEL=Snowflake/snowflake-arctic-embed-xs
|
|
12
|
+
EMBEDDING_CACHE_DIR=~/.cache/simplevecdb # Model cache directory
|
|
13
|
+
# Optional: alias registry for allowed models (alias=repo_id,comma-separated)
|
|
14
|
+
# EMBEDDING_MODEL_REGISTRY=default=Snowflake/snowflake-arctic-embed-xs,local-bge=TaylorAI/bge-micro-v2
|
|
15
|
+
# Set to 0 to allow arbitrary repo IDs (default is locked)
|
|
16
|
+
# EMBEDDING_MODEL_REGISTRY_LOCKED=1
|
|
17
|
+
|
|
18
|
+
# Batch size for embedding inference (optional - auto-detected if not set)
|
|
19
|
+
# Auto-detection considers: GPU VRAM, Apple Silicon, CPU cores, architecture
|
|
20
|
+
# Override only if you need specific batch size for your use case
|
|
21
|
+
# EMBEDDING_BATCH_SIZE=256
|
|
22
|
+
|
|
23
|
+
# Embedding server controls
|
|
24
|
+
# EMBEDDING_SERVER_MAX_REQUEST_ITEMS=128 # Max prompts per /v1/embeddings call
|
|
25
|
+
# EMBEDDING_SERVER_API_KEYS=local-dev-token
|
|
26
|
+
|
|
27
|
+
# Database
|
|
28
|
+
DATABASE_PATH=./data/simplevecdb.db
|
|
29
|
+
|
|
30
|
+
# Server Configuration
|
|
31
|
+
SERVER_HOST=0.0.0.0
|
|
32
|
+
SERVER_PORT=53287
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main ]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
matrix:
|
|
13
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Install uv
|
|
19
|
+
uses: astral-sh/setup-uv@v3
|
|
20
|
+
with:
|
|
21
|
+
enable-cache: true
|
|
22
|
+
|
|
23
|
+
- name: Set up Python
|
|
24
|
+
run: uv python install ${{ matrix.python-version }}
|
|
25
|
+
|
|
26
|
+
- name: Install dependencies
|
|
27
|
+
run: uv sync --all-extras --dev
|
|
28
|
+
|
|
29
|
+
- name: Test with pytest
|
|
30
|
+
run: uv run pytest tests/ -vv --cov=src/simplevecdb
|
|
31
|
+
|
|
32
|
+
- name: Check coverage
|
|
33
|
+
run: |
|
|
34
|
+
uv run coverage report -m --fail-under=90
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: Update Sponsors
|
|
2
|
+
on:
|
|
3
|
+
schedule:
|
|
4
|
+
- cron: "0 0 * * *" # daily
|
|
5
|
+
workflow_dispatch:
|
|
6
|
+
permissions:
|
|
7
|
+
contents: write
|
|
8
|
+
jobs:
|
|
9
|
+
deploy:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- name: Checkout 🛎️
|
|
13
|
+
uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Update Sponsors ❤️
|
|
16
|
+
uses: JamesIves/github-sponsors-readme-action@v1
|
|
17
|
+
with:
|
|
18
|
+
file: README.md
|
|
19
|
+
token: ${{ secrets.GH_PAT }}
|
|
20
|
+
|
|
21
|
+
- name: Commit changes 📝
|
|
22
|
+
run: |
|
|
23
|
+
git config --global user.name "github-actions[bot]"
|
|
24
|
+
git config --global user.email "github-actions[bot]@users.noreply.github.com"
|
|
25
|
+
git add README.md
|
|
26
|
+
git commit -m "Update sponsors list" || echo "No changes to commit"
|
|
27
|
+
git push origin HEAD:main
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Python / uv
|
|
2
|
+
.venv/
|
|
3
|
+
__pycache__/
|
|
4
|
+
*.pyc
|
|
5
|
+
.env
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
*.egg-info/
|
|
9
|
+
.mypy_cache/
|
|
10
|
+
.pytest_cache/
|
|
11
|
+
.ruff_cache/
|
|
12
|
+
|
|
13
|
+
# IDE
|
|
14
|
+
.vscode/
|
|
15
|
+
.idea/
|
|
16
|
+
|
|
17
|
+
# Jupyter
|
|
18
|
+
.ipynb_checkpoints
|
|
19
|
+
|
|
20
|
+
# Databases
|
|
21
|
+
*.db
|
|
22
|
+
*.sqlite
|
|
23
|
+
|
|
24
|
+
# Project specific
|
|
25
|
+
simplevecdb_plan.md
|
|
26
|
+
AGENTS.md
|
|
27
|
+
htmlcov/
|
|
28
|
+
site/
|
|
29
|
+
scripts
|
|
30
|
+
.coverage
|
|
31
|
+
README_OLD.md
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: local
|
|
3
|
+
hooks:
|
|
4
|
+
- id: ruff
|
|
5
|
+
name: ruff
|
|
6
|
+
entry: uv run ruff check . --fix
|
|
7
|
+
language: system
|
|
8
|
+
types: [python]
|
|
9
|
+
pass_filenames: false
|
|
10
|
+
|
|
11
|
+
- id: mypy
|
|
12
|
+
name: mypy
|
|
13
|
+
entry: uv run mypy .
|
|
14
|
+
language: system
|
|
15
|
+
types: [python]
|
|
16
|
+
pass_filenames: false
|
|
17
|
+
|
|
18
|
+
- id: pytest-cov
|
|
19
|
+
name: pytest coverage
|
|
20
|
+
entry: uv run pytest tests/ -vv --cov=src/simplevecdb
|
|
21
|
+
language: system
|
|
22
|
+
types: [python]
|
|
23
|
+
pass_filenames: false
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.10
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to SimpleVecDB will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [1.0.0] - 11-23-2025
|
|
9
|
+
|
|
10
|
+
### 🎉 Initial Release
|
|
11
|
+
|
|
12
|
+
SimpleVecDB's first stable release brings production-ready local vector search to a single SQLite file.
|
|
13
|
+
|
|
14
|
+
### Added
|
|
15
|
+
|
|
16
|
+
#### Core Features
|
|
17
|
+
|
|
18
|
+
- **Multi-collection catalog system**: Organize documents in named collections within a single database
|
|
19
|
+
- **Vector search**: Cosine, L2 (Euclidean), and L1 (Manhattan) distance metrics
|
|
20
|
+
- **Quantization**: FLOAT32, INT8 (4x compression), and BIT (32x compression) support
|
|
21
|
+
- **Metadata filtering**: JSON-based filtering with SQL `WHERE` clauses
|
|
22
|
+
- **Batch processing**: Automatic batching for efficient bulk operations
|
|
23
|
+
- **Persistence**: Single `.db` file with WAL mode for concurrent reads
|
|
24
|
+
|
|
25
|
+
#### Hybrid Search
|
|
26
|
+
|
|
27
|
+
- **BM25 keyword search**: Full-text search using SQLite FTS5
|
|
28
|
+
- **Hybrid search**: Reciprocal Rank Fusion combining BM25 + vector similarity
|
|
29
|
+
- **Query vector reuse**: Pass pre-computed embeddings to avoid redundant embedding calls
|
|
30
|
+
- **Metadata filtering**: Works across all search modes (vector, keyword, hybrid)
|
|
31
|
+
|
|
32
|
+
#### Embeddings Server
|
|
33
|
+
|
|
34
|
+
- **OpenAI-compatible API**: `/v1/embeddings` endpoint for local embedding generation
|
|
35
|
+
- **Model registry**: Configure allowed models or allow arbitrary HuggingFace repos
|
|
36
|
+
- **Request limits**: Configurable max batch size per request
|
|
37
|
+
- **API key authentication**: Optional Bearer token / X-API-Key authentication
|
|
38
|
+
- **Usage tracking**: Per-key request and token metrics via `/v1/usage`
|
|
39
|
+
- **Model listing**: `/v1/models` endpoint for registry inspection
|
|
40
|
+
- **ONNX optimization**: Quantized ONNX runtime for fast CPU inference
|
|
41
|
+
|
|
42
|
+
#### Hardware Optimization
|
|
43
|
+
|
|
44
|
+
- **Auto-detection**: Automatically detects CUDA GPUs, Apple Silicon (MPS), ROCm, and CPU
|
|
45
|
+
- **Adaptive batching**: Optimal batch sizes based on:
|
|
46
|
+
- NVIDIA GPUs: 64-512 (scaled by VRAM 4GB-24GB+)
|
|
47
|
+
- AMD GPUs: 256 (ROCm)
|
|
48
|
+
- Apple Silicon: 32-128 (M1/M2 vs M3/M4, base vs Max/Ultra)
|
|
49
|
+
- ARM CPUs: 4-16 (mobile, Raspberry Pi, servers)
|
|
50
|
+
- x86 CPUs: 8-64 (scaled by core count)
|
|
51
|
+
- **Manual override**: `EMBEDDING_BATCH_SIZE` environment variable
|
|
52
|
+
|
|
53
|
+
#### Integrations
|
|
54
|
+
|
|
55
|
+
- **LangChain**: `SimpleVecDBVectorStore` with async support and MMR
|
|
56
|
+
- `similarity_search`, `similarity_search_with_score`
|
|
57
|
+
- `max_marginal_relevance_search`
|
|
58
|
+
- `keyword_search`, `hybrid_search`
|
|
59
|
+
- `add_texts`, `add_documents`, `delete`
|
|
60
|
+
- **LlamaIndex**: `SimpleVecDBLlamaStore` with query mode support
|
|
61
|
+
- `VectorStoreQueryMode.DEFAULT` (dense vector)
|
|
62
|
+
- `VectorStoreQueryMode.SPARSE` / `TEXT_SEARCH` (BM25)
|
|
63
|
+
- `VectorStoreQueryMode.HYBRID` / `SEMANTIC_HYBRID` (fusion)
|
|
64
|
+
- Metadata filtering across all modes
|
|
65
|
+
|
|
66
|
+
#### Examples & Documentation
|
|
67
|
+
|
|
68
|
+
- **RAG notebooks**: LangChain, LlamaIndex, and Ollama integration examples
|
|
69
|
+
- **Performance benchmarks**: Insertion speed, query latency, storage efficiency
|
|
70
|
+
- **API documentation**: Full class and method reference via MkDocs
|
|
71
|
+
- **Setup guide**: Environment variables and configuration options
|
|
72
|
+
- **Contributing guide**: Development setup and testing instructions
|
|
73
|
+
|
|
74
|
+
### Configuration
|
|
75
|
+
|
|
76
|
+
- `EMBEDDING_MODEL`: HuggingFace model ID (default: `Snowflake/snowflake-arctic-embed-xs`)
|
|
77
|
+
- `EMBEDDING_CACHE_DIR`: Model cache directory (default: `~/.cache/simplevecdb`)
|
|
78
|
+
- `EMBEDDING_MODEL_REGISTRY`: Comma-separated `alias=repo_id` entries
|
|
79
|
+
- `EMBEDDING_MODEL_REGISTRY_LOCKED`: Enforce registry allowlist (default: `1`)
|
|
80
|
+
- `EMBEDDING_BATCH_SIZE`: Inference batch size (auto-detected if not set)
|
|
81
|
+
- `EMBEDDING_SERVER_MAX_REQUEST_ITEMS`: Max prompts per `/v1/embeddings` call
|
|
82
|
+
- `EMBEDDING_SERVER_API_KEYS`: Comma-separated API keys for authentication
|
|
83
|
+
- `DATABASE_PATH`: SQLite database path (default: `:memory:`)
|
|
84
|
+
- `SERVER_HOST`: Embeddings server host (default: `0.0.0.0`)
|
|
85
|
+
- `SERVER_PORT`: Embeddings server port (default: `8000`)
|
|
86
|
+
|
|
87
|
+
### Performance
|
|
88
|
+
|
|
89
|
+
Benchmarks on i9-13900K & RTX 4090 with 10k vectors (384-dim):
|
|
90
|
+
|
|
91
|
+
| Quantization | Storage | Insert Speed | Query Time (k=10) |
|
|
92
|
+
| ------------ | -------- | ------------ | ----------------- |
|
|
93
|
+
| FLOAT32 | 15.50 MB | 15,585 vec/s | 3.55 ms |
|
|
94
|
+
| INT8 | 4.23 MB | 27,893 vec/s | 3.93 ms |
|
|
95
|
+
| BIT | 0.95 MB | 32,321 vec/s | 0.27 ms |
|
|
96
|
+
|
|
97
|
+
### Testing
|
|
98
|
+
|
|
99
|
+
- 177 unit and integration tests
|
|
100
|
+
- 97% code coverage
|
|
101
|
+
- Type-safe (mypy strict mode)
|
|
102
|
+
- CI/CD on Python 3.10, 3.11, 3.12, 3.13
|
|
103
|
+
|
|
104
|
+
### Dependencies
|
|
105
|
+
|
|
106
|
+
- Core: `sqlite-vec>=0.1.6`, `numpy>=2.0`, `python-dotenv>=1.2.1`, `psutil>=5.9.0`
|
|
107
|
+
- Server extras: `fastapi>=0.115`, `uvicorn[standard]>=0.30`, `sentence-transformers[onnx]==3.3.1`
|
|
108
|
+
|
|
109
|
+
### Notes
|
|
110
|
+
|
|
111
|
+
- Requires SQLite builds with FTS5 enabled for keyword/hybrid search (bundled with Python 3.10+)
|
|
112
|
+
- Works on Linux, macOS, Windows, and WASM environments
|
|
113
|
+
- Zero external dependencies beyond Python for core functionality
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Links
|
|
118
|
+
|
|
119
|
+
- **GitHub**: https://github.com/coderdayton/simplevecdb
|
|
120
|
+
- **PyPI**: https://pypi.org/project/simplevecdb/
|
|
121
|
+
- **Documentation**: https://coderdayton.github.io/simplevecdb/
|
|
122
|
+
- **License**: MIT
|
|
123
|
+
|
|
124
|
+
[1.0.0]: https://github.com/coderdayton/simplevecdb/releases/tag/v1.0.0
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# Contributing to SimpleVecDB
|
|
2
|
+
|
|
3
|
+
Thanks for considering a contribution to SimpleVecDB; your help steadily improves this local-first vector database.
|
|
4
|
+
|
|
5
|
+
## Getting Started
|
|
6
|
+
|
|
7
|
+
### Prerequisites
|
|
8
|
+
|
|
9
|
+
- Python 3.10+
|
|
10
|
+
- `uv` (recommended) or `pip`
|
|
11
|
+
- Git
|
|
12
|
+
|
|
13
|
+
### Local Setup
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
git clone https://github.com/coderdayton/simplevecdb.git
|
|
17
|
+
cd simplevecdb
|
|
18
|
+
|
|
19
|
+
# Install dependencies with development tools
|
|
20
|
+
uv sync
|
|
21
|
+
|
|
22
|
+
# Or with pip
|
|
23
|
+
pip install -e ".[dev]"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Project Structure
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
simplevecdb/
|
|
30
|
+
├── src/simplevecdb/
|
|
31
|
+
│ ├── core.py # Main VectorDB class
|
|
32
|
+
│ ├── types.py # Document, DistanceStrategy types
|
|
33
|
+
│ ├── config.py # Configuration management
|
|
34
|
+
│ ├── embeddings/
|
|
35
|
+
│ │ ├── models.py # Local embedding models
|
|
36
|
+
│ │ └── server.py # FastAPI embedding server
|
|
37
|
+
│ └── integrations/
|
|
38
|
+
│ ├── langchain.py # LangChain VectorStore wrapper
|
|
39
|
+
│ └── llamaindex.py # LlamaIndex VectorStore wrapper
|
|
40
|
+
├── tests/ # Unit, integration and performance tests
|
|
41
|
+
├── examples/ # RAG notebooks, demos
|
|
42
|
+
└── docs/ # Documentation
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Development Workflow
|
|
46
|
+
|
|
47
|
+
### Running Tests
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# All tests
|
|
51
|
+
pytest
|
|
52
|
+
|
|
53
|
+
# With coverage
|
|
54
|
+
pytest --cov=simplevecdb
|
|
55
|
+
|
|
56
|
+
# Specific test file
|
|
57
|
+
pytest tests/unit/test_search.py
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Code Style
|
|
61
|
+
|
|
62
|
+
- Follow PEP 8 standards
|
|
63
|
+
- Use type hints wherever possible (Python 3.10+ syntax: `list[str]` instead of `List[str]`)
|
|
64
|
+
- Run a linter (consider using `ruff` or `black`)
|
|
65
|
+
|
|
66
|
+
### Making Changes
|
|
67
|
+
|
|
68
|
+
1. **Create a feature branch**
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
git checkout -b feat/your-feature-name
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
2. **Make your changes** and commit with clear messages
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
git commit -m "feat: add cool feature" # or fix:, docs:, etc.
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
3. **Add/update tests** for any new functionality
|
|
81
|
+
|
|
82
|
+
4. **Run tests locally** to ensure nothing breaks
|
|
83
|
+
|
|
84
|
+
5. **Submit a pull request** with a clear description
|
|
85
|
+
|
|
86
|
+
## Areas for Contribution
|
|
87
|
+
|
|
88
|
+
### High Priority
|
|
89
|
+
|
|
90
|
+
- **HNSW indexing**: Faster approximate nearest neighbor search (waiting on sqlite-vec)
|
|
91
|
+
- **Advanced Metadata filtering**: Complex WHERE clause support (OR, nested queries)
|
|
92
|
+
- **Documentation**: Docstrings, guides, API docs
|
|
93
|
+
|
|
94
|
+
### Medium Priority
|
|
95
|
+
|
|
96
|
+
- **Custom Quantization**: Support for custom quantization tables/centroids
|
|
97
|
+
- **Performance benchmarks**: Add more comprehensive benchmarks (1M+ vectors)
|
|
98
|
+
- **Integration tests**: Expand test coverage for LangChain/LlamaIndex
|
|
99
|
+
|
|
100
|
+
### Lower Priority
|
|
101
|
+
|
|
102
|
+
- **GUI**: Desktop app (Tauri-based)
|
|
103
|
+
- **Encryption**: SQLCipher integration
|
|
104
|
+
- **Analytics**: Query performance monitoring
|
|
105
|
+
|
|
106
|
+
## Testing Guidelines
|
|
107
|
+
|
|
108
|
+
- Write tests for all new features
|
|
109
|
+
- Ensure tests pass locally before submitting PR
|
|
110
|
+
- Aim for >80% code coverage
|
|
111
|
+
- Test edge cases (empty vectors, large datasets, etc.)
|
|
112
|
+
|
|
113
|
+
Example test structure:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
def test_similarity_search_with_k():
|
|
117
|
+
db = VectorDB(":memory:")
|
|
118
|
+
collection = db.collection("default")
|
|
119
|
+
collection.add_texts(["doc1", "doc2", "doc3"])
|
|
120
|
+
results = collection.similarity_search("query", k=2)
|
|
121
|
+
assert len(results) == 2
|
|
122
|
+
assert all(isinstance(score, float) for _, score in results)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Documentation
|
|
126
|
+
|
|
127
|
+
- Update docstrings for any API changes
|
|
128
|
+
- Add examples in the `examples/` directory for new features
|
|
129
|
+
- Update README.md if adding major features
|
|
130
|
+
- Use type hints to make APIs self-documenting
|
|
131
|
+
|
|
132
|
+
## Performance Considerations
|
|
133
|
+
|
|
134
|
+
- SimpleVecDB prioritizes simplicity over maximum performance
|
|
135
|
+
- Benchmark large-scale operations (10k+ vectors)
|
|
136
|
+
- Use NumPy efficiently for vector operations
|
|
137
|
+
- Minimize database round-trips
|
|
138
|
+
|
|
139
|
+
## Debugging
|
|
140
|
+
|
|
141
|
+
Enable verbose logging:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
import logging
|
|
145
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Run the embedding server locally for testing:
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
simplevecdb-server
|
|
152
|
+
# Server runs at http://localhost:53287 by default
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Submitting a Pull Request
|
|
156
|
+
|
|
157
|
+
1. Ensure all tests pass: `pytest`
|
|
158
|
+
2. Keep commits clean and focused
|
|
159
|
+
3. Write a clear PR description explaining:
|
|
160
|
+
- What problem does it solve?
|
|
161
|
+
- How does it work?
|
|
162
|
+
- Any breaking changes?
|
|
163
|
+
4. Link any related issues
|
|
164
|
+
5. Be patient — we'll review as soon as we can!
|
|
165
|
+
|
|
166
|
+
## Questions?
|
|
167
|
+
|
|
168
|
+
- Open a GitHub issue for bugs or feature requests
|
|
169
|
+
- Reach out to [@coderdayton](https://github.com/coderdayton) on GitHub
|
|
170
|
+
- Check existing issues before filing a duplicate
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
**Thank you for contributing!** Every bit helps make SimpleVecDB better for everyone. 🚀
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Dayton Dunbar
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|