patchvec 0.5.8.1__tar.gz → 0.5.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {patchvec-0.5.8.1 → patchvec-0.5.9}/ABOUT.md +10 -7
- {patchvec-0.5.8.1/patchvec.egg-info → patchvec-0.5.9}/PKG-INFO +14 -12
- {patchvec-0.5.8.1 → patchvec-0.5.9}/README.md +63 -42
- {patchvec-0.5.8.1 → patchvec-0.5.9}/config.yml.example +51 -45
- {patchvec-0.5.8.1 → patchvec-0.5.9/patchvec.egg-info}/PKG-INFO +14 -12
- {patchvec-0.5.8.1 → patchvec-0.5.9}/patchvec.egg-info/SOURCES.txt +35 -15
- {patchvec-0.5.8.1 → patchvec-0.5.9}/patchvec.egg-info/requires.txt +2 -4
- patchvec-0.5.9/pave/assets/config.yml.example +224 -0
- patchvec-0.5.9/pave/assets/tenants.yml.example +19 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/auth.py +5 -5
- patchvec-0.5.9/pave/backends/__init__.py +13 -0
- patchvec-0.5.9/pave/backends/base.py +29 -0
- patchvec-0.5.9/pave/backends/faiss.py +153 -0
- patchvec-0.5.9/pave/backends/qdrant.py +46 -0
- patchvec-0.5.9/pave/cli.py +337 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/config.py +120 -23
- patchvec-0.5.9/pave/embedders/__init__.py +7 -0
- patchvec-0.5.9/pave/embedders/base.py +21 -0
- patchvec-0.5.9/pave/embedders/factory.py +22 -0
- patchvec-0.5.9/pave/embedders/openai.py +47 -0
- patchvec-0.5.9/pave/embedders/sbert.py +69 -0
- patchvec-0.5.9/pave/filters.py +164 -0
- patchvec-0.5.9/pave/main.py +339 -0
- patchvec-0.5.9/pave/metadb.py +603 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/metrics.py +2 -2
- patchvec-0.5.9/pave/routes/__init__.py +16 -0
- patchvec-0.5.9/pave/routes/admin.py +131 -0
- patchvec-0.5.9/pave/routes/collections.py +123 -0
- patchvec-0.5.9/pave/routes/documents.py +164 -0
- patchvec-0.5.9/pave/routes/health.py +107 -0
- patchvec-0.5.9/pave/routes/search.py +177 -0
- patchvec-0.5.9/pave/runtime_paths.py +89 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/service.py +70 -226
- patchvec-0.5.9/pave/stores/__init__.py +6 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/stores/base.py +19 -7
- patchvec-0.5.9/pave/stores/local.py +807 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/setup.py +6 -8
- patchvec-0.5.9/tests/test_bench_search_latency.py +56 -0
- patchvec-0.5.9/tests/test_bench_stress.py +56 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_cli.py +47 -5
- patchvec-0.5.9/tests/test_concurrent_upsert.py +62 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_config_runtime.py +22 -8
- patchvec-0.5.9/tests/test_config_tenants.py +246 -0
- patchvec-0.5.9/tests/test_csv_ingest.py +141 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_csv_ingest_api.py +14 -0
- patchvec-0.5.9/tests/test_data_export.py +173 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_delete_document.py +4 -3
- patchvec-0.5.9/tests/test_docid_default.py +49 -0
- patchvec-0.5.9/tests/test_faiss_backend.py +86 -0
- patchvec-0.5.8.1/tests/test_txtai_store.py → patchvec-0.5.9/tests/test_faiss_store.py +40 -14
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_health.py +1 -1
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_list_collections.py +12 -14
- patchvec-0.5.9/tests/test_main_args.py +28 -0
- patchvec-0.5.9/tests/test_meta_store.py +490 -0
- patchvec-0.5.9/tests/test_metadata_split.py +89 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_metrics.py +10 -10
- patchvec-0.5.9/tests/test_relevance.py +297 -0
- patchvec-0.5.9/tests/test_runtime_paths.py +83 -0
- patchvec-0.5.9/tests/test_sbert_embedder.py +243 -0
- patchvec-0.5.8.1/tests/test_txtai_store_cache_race.py → patchvec-0.5.9/tests/test_store_cache_race.py +4 -2
- patchvec-0.5.8.1/tests/test_txtai_store_catalog_metrics.py → patchvec-0.5.9/tests/test_store_catalog_metrics.py +4 -3
- patchvec-0.5.9/tests/test_store_close_race.py +171 -0
- patchvec-0.5.8.1/tests/test_txtai_store_filters.py → patchvec-0.5.9/tests/test_store_filters.py +6 -43
- patchvec-0.5.9/tests/test_store_meta_fetch_scope.py +152 -0
- patchvec-0.5.9/tests/test_store_sanitized_filter_parity.py +171 -0
- patchvec-0.5.8.1/pave/cli.py +0 -211
- patchvec-0.5.8.1/pave/embedders/__init__.py +0 -4
- patchvec-0.5.8.1/pave/embedders/base.py +0 -12
- patchvec-0.5.8.1/pave/embedders/factory.py +0 -21
- patchvec-0.5.8.1/pave/embedders/openai_emb.py +0 -30
- patchvec-0.5.8.1/pave/embedders/sbert_emb.py +0 -24
- patchvec-0.5.8.1/pave/embedders/txtai_emb.py +0 -58
- patchvec-0.5.8.1/pave/main.py +0 -864
- patchvec-0.5.8.1/pave/meta_store.py +0 -320
- patchvec-0.5.8.1/pave/stores/__init__.py +0 -4
- patchvec-0.5.8.1/pave/stores/factory.py +0 -18
- patchvec-0.5.8.1/pave/stores/qdrant_store.py +0 -38
- patchvec-0.5.8.1/pave/stores/txtai_store.py +0 -945
- patchvec-0.5.8.1/tests/test_csv_ingest.py +0 -81
- patchvec-0.5.8.1/tests/test_data_export.py +0 -91
- patchvec-0.5.8.1/tests/test_docid_default.py +0 -41
- patchvec-0.5.8.1/tests/test_meta_store.py +0 -91
- patchvec-0.5.8.1/tests/test_txtai_concurrent_upsert.py +0 -77
- patchvec-0.5.8.1/tests/test_txtai_store_close_race.py +0 -79
- patchvec-0.5.8.1/tests/test_txtai_store_meta_fetch_scope.py +0 -59
- patchvec-0.5.8.1/tests/test_txtai_store_sql_safety.py +0 -116
- {patchvec-0.5.8.1 → patchvec-0.5.9}/LICENSE +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/MANIFEST.in +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/patchvec.egg-info/dependency_links.txt +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/patchvec.egg-info/entry_points.txt +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/patchvec.egg-info/top_level.txt +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/__init__.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/assets/__init__.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/assets/patchvec_icon_192.png +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/assets/ui.html +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/log.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/preprocess.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/schemas.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/ui.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/requirements-cpu.txt +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/setup.cfg +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_admin_tenants.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_auth.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_auth_api.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_collection_rename.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_collections.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_ingest_errors.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_ingest_size_limit.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_lazy_app.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_log.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_request_id.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_search_errors.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_tenant_rate_limit.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_ui.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_upload_search_csv.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_upload_search_pdf.py +0 -0
- {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_upload_search_txt.py +0 -0
|
@@ -12,7 +12,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
|
12
12
|
- Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
|
|
13
13
|
- REST and CLI entry points
|
|
14
14
|
- Health/metrics endpoints + Prometheus exporter
|
|
15
|
-
- Pluggable embeddings and stores; default
|
|
15
|
+
- Pluggable embeddings and stores; default stack is local FAISS + SBERT
|
|
16
16
|
|
|
17
17
|
## Requirements
|
|
18
18
|
- Python 3.10–3.14
|
|
@@ -47,27 +47,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
|
|
|
47
47
|
```
|
|
48
48
|
|
|
49
49
|
## Minimal config (optional)
|
|
50
|
-
By default PatchVec runs with sensible local defaults.
|
|
51
|
-
|
|
50
|
+
By default PatchVec runs with sensible local defaults. For a user install,
|
|
51
|
+
customize `~/patchvec/config.yml`:
|
|
52
52
|
```yaml
|
|
53
53
|
vector_store:
|
|
54
|
-
type:
|
|
54
|
+
type: faiss
|
|
55
55
|
embedder:
|
|
56
|
-
type:
|
|
56
|
+
type: sbert
|
|
57
57
|
auth:
|
|
58
58
|
mode: static
|
|
59
59
|
global_key: ${PATCHVEC_GLOBAL_KEY}
|
|
60
60
|
```
|
|
61
61
|
Then export:
|
|
62
62
|
```bash
|
|
63
|
-
export PATCHVEC_CONFIG=./config.yml
|
|
64
63
|
export PATCHVEC_GLOBAL_KEY="your-secret"
|
|
65
64
|
```
|
|
65
|
+
If you keep the file elsewhere, point the runtime at it explicitly:
|
|
66
|
+
```bash
|
|
67
|
+
export PATCHVEC_CONFIG=/path/to/config.yml
|
|
68
|
+
```
|
|
66
69
|
|
|
67
70
|
## CLI example
|
|
68
71
|
```bash
|
|
69
72
|
pavecli create-collection demo books
|
|
70
|
-
pavecli
|
|
73
|
+
pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
|
|
71
74
|
--metadata='{"lang":"en"}'
|
|
72
75
|
pavecli search demo books "captain nemo" -k 5
|
|
73
76
|
```
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: patchvec
|
|
3
|
-
Version: 0.5.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.5.9
|
|
4
|
+
Summary: PaveDB — A lightweight, pluggable vector search microservice.
|
|
5
5
|
Author: Rodrigo Rodrigues da Silva
|
|
6
6
|
Author-email: rodrigo@flowlexi.com
|
|
7
7
|
License: AGPL-3.0-or-later
|
|
@@ -24,7 +24,6 @@ Description-Content-Type: text/markdown
|
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
Requires-Dist: fastapi>=0.115.0
|
|
26
26
|
Requires-Dist: uvicorn[standard]>=0.30.6
|
|
27
|
-
Requires-Dist: txtai>=6.3.0
|
|
28
27
|
Requires-Dist: pydantic>=2.8.2
|
|
29
28
|
Requires-Dist: python-multipart>=0.0.9
|
|
30
29
|
Requires-Dist: pypdf>=5.0.0
|
|
@@ -32,14 +31,14 @@ Requires-Dist: pyyaml>=6.0.2
|
|
|
32
31
|
Requires-Dist: python-dotenv>=1.0.1
|
|
33
32
|
Requires-Dist: faiss-cpu>=1.7.1
|
|
34
33
|
Requires-Dist: torch>=2.10.0
|
|
34
|
+
Requires-Dist: sentence-transformers>=2.7.0
|
|
35
35
|
Provides-Extra: cpu
|
|
36
|
-
Provides-Extra: sbert
|
|
37
|
-
Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
|
|
38
36
|
Provides-Extra: openai
|
|
39
37
|
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
40
38
|
Provides-Extra: test
|
|
41
39
|
Requires-Dist: pytest; extra == "test"
|
|
42
40
|
Requires-Dist: httpx; extra == "test"
|
|
41
|
+
Requires-Dist: datasets>=3.5.0; extra == "test"
|
|
43
42
|
Dynamic: author
|
|
44
43
|
Dynamic: author-email
|
|
45
44
|
Dynamic: classifier
|
|
@@ -67,7 +66,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
|
67
66
|
- Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
|
|
68
67
|
- REST and CLI entry points
|
|
69
68
|
- Health/metrics endpoints + Prometheus exporter
|
|
70
|
-
- Pluggable embeddings and stores; default
|
|
69
|
+
- Pluggable embeddings and stores; default stack is local FAISS + SBERT
|
|
71
70
|
|
|
72
71
|
## Requirements
|
|
73
72
|
- Python 3.10–3.14
|
|
@@ -102,27 +101,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
|
|
|
102
101
|
```
|
|
103
102
|
|
|
104
103
|
## Minimal config (optional)
|
|
105
|
-
By default PatchVec runs with sensible local defaults.
|
|
106
|
-
|
|
104
|
+
By default PatchVec runs with sensible local defaults. For a user install,
|
|
105
|
+
customize `~/patchvec/config.yml`:
|
|
107
106
|
```yaml
|
|
108
107
|
vector_store:
|
|
109
|
-
type:
|
|
108
|
+
type: faiss
|
|
110
109
|
embedder:
|
|
111
|
-
type:
|
|
110
|
+
type: sbert
|
|
112
111
|
auth:
|
|
113
112
|
mode: static
|
|
114
113
|
global_key: ${PATCHVEC_GLOBAL_KEY}
|
|
115
114
|
```
|
|
116
115
|
Then export:
|
|
117
116
|
```bash
|
|
118
|
-
export PATCHVEC_CONFIG=./config.yml
|
|
119
117
|
export PATCHVEC_GLOBAL_KEY="your-secret"
|
|
120
118
|
```
|
|
119
|
+
If you keep the file elsewhere, point the runtime at it explicitly:
|
|
120
|
+
```bash
|
|
121
|
+
export PATCHVEC_CONFIG=/path/to/config.yml
|
|
122
|
+
```
|
|
121
123
|
|
|
122
124
|
## CLI example
|
|
123
125
|
```bash
|
|
124
126
|
pavecli create-collection demo books
|
|
125
|
-
pavecli
|
|
127
|
+
pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
|
|
126
128
|
--metadata='{"lang":"en"}'
|
|
127
129
|
pavecli search demo books "captain nemo" -k 5
|
|
128
130
|
```
|
|
@@ -1,32 +1,38 @@
|
|
|
1
1
|
<!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigo@flowlexi.com> -->
|
|
2
2
|
<!-- SPDX-License-Identifier: AGPL-3.0-or-later -->
|
|
3
3
|
|
|
4
|
-
#
|
|
4
|
+
# 🛣️ PatchVec — Vector search you can understand & deploy within minutes
|
|
5
5
|
|
|
6
|
-
PatchVec is a single-process vector search engine
|
|
7
|
-
documents, chunks and embeds them, and gives you semantic search with
|
|
8
|
-
full provenance — document id, page, character offset, and the exact
|
|
9
|
-
snippet that matched. No cluster, no managed service, no
|
|
10
|
-
opaque pipelines.
|
|
6
|
+
PatchVec is a single-process vector search engine for AI applications.
|
|
11
7
|
|
|
12
|
-
|
|
8
|
+
It ingests your documents, chunks and embeds them, and gives you semantic search with
|
|
9
|
+
full provenance — document id, page, character offset, and the exact snippet that
|
|
10
|
+
matched.
|
|
11
|
+
|
|
12
|
+
Built for developers shipping **RAG (Retrieval-Augmented Generation)** systems, PatchVec
|
|
13
|
+
provides a straightforward service for **vector search, embeddings pipelines, and
|
|
14
|
+
semantic retrieval**. It runs as a **REST service, a CLI tool, or an embedded library**,
|
|
15
|
+
so you can ship your first version quickly and keep growing with the same codebase as
|
|
16
|
+
your application scales. No cluster. No opaque pipelines.
|
|
17
|
+
|
|
18
|
+
Drop a file in. Search it. See exactly what came back — and why. Minutes after your
|
|
19
|
+
first commit.
|
|
13
20
|
|
|
14
21
|
## ⚙️ Why PatchVec
|
|
15
22
|
|
|
16
|
-
- **Ingest files, not embeddings** — hand it a PDF, CSV, or TXT
|
|
17
|
-
PatchVec chunks, embeds, and indexes it. No preprocessing pipeline
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
## 🧭 Workflows
|
|
23
|
+
- **Ingest files, not embeddings** — hand it a PDF, CSV, or TXT (more formats to come)
|
|
24
|
+
and PatchVec chunks, embeds, and indexes it. No preprocessing pipeline to build.
|
|
25
|
+
- **Full provenance on every hit** — every search result traces back to a document,
|
|
26
|
+
page, and character offset. Latency and request traceability are built into every
|
|
27
|
+
response.
|
|
28
|
+
- **Multi-tenant by default** — tenant/collection namespacing is built in, not bolted
|
|
29
|
+
on (and transparent when you just don't need it).
|
|
30
|
+
- **REST, CLI, or embed it** — run as an HTTP service, script via the CLI, or import the
|
|
31
|
+
library directly in your Python app.
|
|
32
|
+
- **Pluggable embeddings** (soon) — swap models per collection; wire in local or hosted
|
|
33
|
+
embedding backends.
|
|
34
|
+
|
|
35
|
+
## 🧭 How to
|
|
30
36
|
|
|
31
37
|
### 🐳 Docker workflow (prebuilt images)
|
|
32
38
|
|
|
@@ -68,8 +74,8 @@ docker rm -f patchvec
|
|
|
68
74
|
|
|
69
75
|
### 🐍 PyPI workflow
|
|
70
76
|
|
|
71
|
-
Install Patchvec from PyPI inside an isolated virtual environment
|
|
72
|
-
|
|
77
|
+
Install Patchvec from PyPI inside an isolated virtual environment. You can run it
|
|
78
|
+
purely from env vars, or later point it at an explicit config file.
|
|
73
79
|
|
|
74
80
|
**Requires Python 3.10–3.14.**
|
|
75
81
|
|
|
@@ -80,17 +86,13 @@ source .venv-pv/bin/activate
|
|
|
80
86
|
python -m pip install --upgrade pip
|
|
81
87
|
pip install "patchvec[cpu]"
|
|
82
88
|
|
|
83
|
-
#
|
|
84
|
-
|
|
85
|
-
curl -LO https://raw.githubusercontent.com/rodrigopitanga/patchvec/main/tenants.yml.example
|
|
86
|
-
cp config.yml.example config.yml
|
|
87
|
-
cp tenants.yml.example tenants.yml
|
|
89
|
+
# create the default instance under ~/patchvec
|
|
90
|
+
pavecli init
|
|
88
91
|
|
|
89
92
|
# sample demo corpus
|
|
90
93
|
curl -LO https://raw.githubusercontent.com/rodrigopitanga/patchvec/main/demo/20k_leagues.txt
|
|
91
94
|
|
|
92
|
-
#
|
|
93
|
-
export PATCHVEC_CONFIG="$HOME/pv/config.yml"
|
|
95
|
+
# set an admin key for the generated config
|
|
94
96
|
export PATCHVEC_GLOBAL_KEY=super-sekret
|
|
95
97
|
|
|
96
98
|
# option A: run the service (stays up until you stop it)
|
|
@@ -113,6 +115,14 @@ pavecli search demo books "captain nemo" -k 3
|
|
|
113
115
|
|
|
114
116
|
Deactivate the virtual environment with `deactivate` when finished.
|
|
115
117
|
|
|
118
|
+
By default, a non-dev runtime reads `~/patchvec/config.yml` if present, keeps
|
|
119
|
+
tenant sidecar loading disabled unless `auth.tenants_file` is configured, and
|
|
120
|
+
stores data in `~/patchvec/data`. You can override any of that with the
|
|
121
|
+
`PATCHVEC_*` environment scheme or by pointing `PATCHVEC_CONFIG` at an explicit
|
|
122
|
+
config file. For alternate instances, use `pavecli init /path/to/instance` and
|
|
123
|
+
then point commands at that root with `pavesrv --home=/path/to/instance` or
|
|
124
|
+
`pavecli <command> ... --home /path/to/instance`.
|
|
125
|
+
|
|
116
126
|
### 🌐 REST API and Web UI usage
|
|
117
127
|
|
|
118
128
|
When the server is running (either via Docker or `pavesrv`), the API listens on
|
|
@@ -168,9 +178,8 @@ The Swagger UI is available at `http://localhost:8086/`.
|
|
|
168
178
|
|
|
169
179
|
Health and metrics endpoints are available at `/health` and `/metrics`.
|
|
170
180
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
`PATCHVEC_AUTH__MODE`, etc.), which precedes conf files.
|
|
181
|
+
Runtime options are also accepted via the `PATCHVEC_*` environment variable scheme
|
|
182
|
+
(`PATCHVEC_SERVER__PORT`, `PATCHVEC_AUTH__MODE`, etc.), which precedes config files.
|
|
174
183
|
|
|
175
184
|
### 🔁 Live data updates
|
|
176
185
|
|
|
@@ -178,7 +187,7 @@ Patchvec supports live data refresh without restarting the server. Re-ingest the
|
|
|
178
187
|
`docid` to *replace* vector content (filename doesn't matter - metadata will change
|
|
179
188
|
though), or explicitly delete the document and then ingest it again.
|
|
180
189
|
|
|
181
|
-
|
|
190
|
+
Re-ingest to replace (CLI path example):
|
|
182
191
|
|
|
183
192
|
```bash
|
|
184
193
|
# initial ingest
|
|
@@ -188,11 +197,11 @@ pavecli ingest demo books 20k_leagues.txt --docid=verne-20k
|
|
|
188
197
|
cp 20k_leagues.txt 20k_leagues_v2.txt
|
|
189
198
|
echo "THE END" >> 20k_leagues_v2.txt
|
|
190
199
|
|
|
191
|
-
# re-ingest with the same docid to replace the indexed content
|
|
200
|
+
# re-ingest with the *same docid* to replace the indexed content
|
|
192
201
|
pavecli ingest demo books 20k_leagues_v2.txt --docid=verne-20k
|
|
193
202
|
```
|
|
194
203
|
|
|
195
|
-
|
|
204
|
+
Delete by ID then ingest (REST path example):
|
|
196
205
|
|
|
197
206
|
```bash
|
|
198
207
|
curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
@@ -208,9 +217,10 @@ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
|
208
217
|
|
|
209
218
|
### 🛠️ Developer workflow
|
|
210
219
|
|
|
211
|
-
Building from source relies on `Makefile` shortcuts (`make install-dev`,
|
|
212
|
-
`
|
|
213
|
-
|
|
220
|
+
Building from source relies on `Makefile` shortcuts (`make install-dev`, `make serve`,
|
|
221
|
+
`make test`, `make check`, etc.).
|
|
222
|
+
|
|
223
|
+
The full contributor workflow, target reference, and coding style live in
|
|
214
224
|
[CONTRIBUTING.md](CONTRIBUTING.md). Performance benchmarks are documented in
|
|
215
225
|
[README-benchmarks.md](README-benchmarks.md).
|
|
216
226
|
|
|
@@ -229,9 +239,20 @@ See `config.yml.example` for the full logging configuration.
|
|
|
229
239
|
|
|
230
240
|
## 🗺️ Roadmap
|
|
231
241
|
|
|
232
|
-
Short
|
|
233
|
-
issue titled `claim: <task ID>`, and
|
|
242
|
+
Short/mid-term tasks and long-term plans are all tracked in
|
|
243
|
+
[`ROADMAP.md`](ROADMAP.md). Pick one, open an issue titled `claim: <task ID>`, and
|
|
244
|
+
ship a patch. If you find a bug, file it under the *Issues* tab.
|
|
234
245
|
|
|
235
246
|
## 📜 License
|
|
236
247
|
|
|
237
|
-
|
|
248
|
+
PatchVec is free software: you can use it, copy it, redistribute it and/or modify it
|
|
249
|
+
free of charge under the terms of the GNU Affero General Public License as published by
|
|
250
|
+
the Free Software Foundation, either version 3 of the License, or (at your option) any
|
|
251
|
+
later version.
|
|
252
|
+
|
|
253
|
+
PatchVec is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
254
|
+
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
255
|
+
PURPOSE. See the GNU Affero General Public License for more details.
|
|
256
|
+
|
|
257
|
+
SPDX-License-Identifier: AGPL-3.0-or-later
|
|
258
|
+
Copyright (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigo@flowlexi.com>
|
|
@@ -2,31 +2,50 @@
|
|
|
2
2
|
# Copy to config.yml and adjust.
|
|
3
3
|
# Secrets (API keys) should live in a separate untracked file — see tenants.yml.example.
|
|
4
4
|
#
|
|
5
|
-
#
|
|
6
|
-
#
|
|
5
|
+
# Default user-install paths:
|
|
6
|
+
# config: ~/pavedb/config.yml
|
|
7
|
+
# tenants: ~/pavedb/tenants.yml
|
|
8
|
+
# data: ~/pavedb/data
|
|
7
9
|
#
|
|
8
|
-
#
|
|
10
|
+
# Distro-like install example:
|
|
11
|
+
# config: /etc/pavedb/config.yml
|
|
12
|
+
# tenants: /var/pavedb/tenants.yml
|
|
13
|
+
# data: /var/pavedb/data
|
|
14
|
+
#
|
|
15
|
+
# Config file location override:
|
|
16
|
+
# PAVEDB_CONFIG=/etc/pavedb/config.yml
|
|
17
|
+
#
|
|
18
|
+
# In Docker/systemd deployments always set PAVEDB_CONFIG explicitly — the
|
|
9
19
|
# default path expands ~ relative to the process user, which may not be what
|
|
10
20
|
# you expect inside a container. Example compose snippet:
|
|
11
21
|
#
|
|
12
22
|
# environment:
|
|
13
|
-
#
|
|
23
|
+
# PAVEDB_CONFIG: /etc/pavedb/config.yml
|
|
14
24
|
# volumes:
|
|
15
|
-
# - ./config.yml:/etc/
|
|
25
|
+
# - ./config.yml:/etc/pavedb/config.yml:ro
|
|
16
26
|
#
|
|
17
27
|
# All keys can also be overridden inline via environment variables:
|
|
18
|
-
#
|
|
19
|
-
#
|
|
28
|
+
# PAVEDB_<KEY>=value (top-level, e.g. PAVEDB_DATA_DIR)
|
|
29
|
+
# PAVEDB_<SECTION>__<KEY>=val (nested, e.g. PAVEDB_LOG__LEVEL=debug)
|
|
30
|
+
# Legacy PATCHVEC_* vars still work in v0.5.9 but will be removed in v0.6.
|
|
31
|
+
#
|
|
32
|
+
# `auth.tenants_file` is optional. If unset, its default is `None` and no
|
|
33
|
+
# tenants sidecar file is loaded.
|
|
34
|
+
# If set, PaveDB loads that sidecar first. Then inline tenant config is
|
|
35
|
+
# applied with precedence:
|
|
36
|
+
# env vars > config.yml > tenants.yml > defaults
|
|
37
|
+
# Example: define tenant "acme" entirely from env:
|
|
38
|
+
# PAVEDB_AUTH__API_KEYS__acme=change-me
|
|
39
|
+
# PAVEDB_TENANTS__acme__MAX_CONCURRENT=5
|
|
20
40
|
|
|
21
41
|
# ---------------------------------------------------------------------------
|
|
22
42
|
# Storage
|
|
23
43
|
# ---------------------------------------------------------------------------
|
|
24
44
|
|
|
25
45
|
# Data directory — ~ is expanded at startup.
|
|
26
|
-
# Default (library/dev): ~/
|
|
27
|
-
# For
|
|
28
|
-
|
|
29
|
-
data_dir: ~/patchvec/data
|
|
46
|
+
# Default (library/dev): ~/pavedb/data
|
|
47
|
+
# For distro-like installs use an absolute path, e.g. /var/pavedb/data.
|
|
48
|
+
data_dir: ~/pavedb/data
|
|
30
49
|
|
|
31
50
|
# ---------------------------------------------------------------------------
|
|
32
51
|
# Common collection
|
|
@@ -55,11 +74,13 @@ auth:
|
|
|
55
74
|
|
|
56
75
|
# Global admin key — grants access to all tenants and admin routes.
|
|
57
76
|
# Always read from the environment; never hardcode in committed files.
|
|
58
|
-
global_key: ${
|
|
77
|
+
global_key: ${PAVEDB_GLOBAL_KEY}
|
|
59
78
|
|
|
60
|
-
# External tenant→key mapping file
|
|
61
|
-
#
|
|
62
|
-
|
|
79
|
+
# External tenant→key mapping file.
|
|
80
|
+
# Keep the same key paths there (`auth.api_keys`, `tenants.*`).
|
|
81
|
+
# User install default: ~/pavedb/tenants.yml
|
|
82
|
+
# Distro-like install: /var/pavedb/tenants.yml
|
|
83
|
+
# tenants_file: ~/pavedb/tenants.yml
|
|
63
84
|
|
|
64
85
|
# Inline tenant→key mapping (fallback; keep empty in the repo).
|
|
65
86
|
api_keys: {}
|
|
@@ -69,51 +90,36 @@ auth:
|
|
|
69
90
|
# ---------------------------------------------------------------------------
|
|
70
91
|
|
|
71
92
|
vector_store:
|
|
72
|
-
#
|
|
73
|
-
|
|
74
|
-
type: default
|
|
93
|
+
# faiss — local FAISS index (built-in, no extra services required).
|
|
94
|
+
type: faiss
|
|
75
95
|
|
|
76
|
-
# Options for type=
|
|
77
|
-
|
|
96
|
+
# Options for type=faiss.
|
|
97
|
+
faiss:
|
|
78
98
|
embed_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# Options for type=qdrant.
|
|
82
|
-
# NOTE: QdrantStore is currently a stub — all methods raise NotImplementedError.
|
|
83
|
-
# Contributions welcome! See pave/stores/qdrant_store.py.
|
|
84
|
-
qdrant:
|
|
85
|
-
url: http://localhost:6333
|
|
86
|
-
api_key: ${PATCHVEC_QDRANT_API_KEY}
|
|
87
|
-
prefer_payload_filters: true
|
|
88
|
-
collection_prefix: patchvec_
|
|
99
|
+
max_query_chars: 4000
|
|
89
100
|
|
|
90
101
|
# ---------------------------------------------------------------------------
|
|
91
102
|
# Embedder
|
|
92
103
|
# ---------------------------------------------------------------------------
|
|
93
104
|
# Controls how text is converted to vectors before indexing and querying.
|
|
94
105
|
# The embedder is shared across all collections (per-collection config: v0.6).
|
|
95
|
-
#
|
|
96
|
-
# NOTE: the sbert and openai embedder types are currently stubs — the factory
|
|
97
|
-
# exists but is not wired into the default store. Only type=default (txtai) is
|
|
98
|
-
# fully functional today. Per-collection embedder selection lands in v0.6.
|
|
99
|
-
# Contributions welcome! See pave/embedders/ and pave/stores/factory.py.
|
|
100
106
|
|
|
101
107
|
embedder:
|
|
102
|
-
#
|
|
103
|
-
#
|
|
104
|
-
|
|
105
|
-
type: default
|
|
106
|
-
|
|
107
|
-
txtai:
|
|
108
|
-
path: sentence-transformers/paraphrase-MiniLM-L3-v2
|
|
108
|
+
# sbert — direct sentence-transformers (recommended local default).
|
|
109
|
+
# openai — OpenAI embeddings API (requires API key, adds latency).
|
|
110
|
+
type: sbert
|
|
109
111
|
|
|
112
|
+
# Used by type=sbert.
|
|
110
113
|
sbert:
|
|
111
114
|
model: sentence-transformers/all-MiniLM-L6-v2
|
|
112
115
|
batch_size: 64
|
|
113
|
-
device: auto # cpu | cuda |
|
|
116
|
+
device: auto # cpu | cuda | mps | auto
|
|
114
117
|
|
|
118
|
+
# Used by type=openai.
|
|
115
119
|
openai:
|
|
116
|
-
|
|
120
|
+
model: text-embedding-3-small
|
|
121
|
+
batch_size: 256
|
|
122
|
+
api_key: ${PAVEDB_OPENAI_API_KEY}
|
|
117
123
|
dim: 1536
|
|
118
124
|
|
|
119
125
|
# ---------------------------------------------------------------------------
|
|
@@ -194,7 +200,7 @@ server:
|
|
|
194
200
|
|
|
195
201
|
log:
|
|
196
202
|
# Dev log level (stderr). DEBUG | INFO | WARNING | ERROR — default INFO.
|
|
197
|
-
# Overridden by
|
|
203
|
+
# Overridden by PAVEDB_LOG__LEVEL env var (e.g. in Makefile: debug).
|
|
198
204
|
# Per-namespace overrides: log.debug / log.watch / log.quiet (list of loggers).
|
|
199
205
|
level: INFO
|
|
200
206
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: patchvec
|
|
3
|
-
Version: 0.5.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.5.9
|
|
4
|
+
Summary: PaveDB — A lightweight, pluggable vector search microservice.
|
|
5
5
|
Author: Rodrigo Rodrigues da Silva
|
|
6
6
|
Author-email: rodrigo@flowlexi.com
|
|
7
7
|
License: AGPL-3.0-or-later
|
|
@@ -24,7 +24,6 @@ Description-Content-Type: text/markdown
|
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
Requires-Dist: fastapi>=0.115.0
|
|
26
26
|
Requires-Dist: uvicorn[standard]>=0.30.6
|
|
27
|
-
Requires-Dist: txtai>=6.3.0
|
|
28
27
|
Requires-Dist: pydantic>=2.8.2
|
|
29
28
|
Requires-Dist: python-multipart>=0.0.9
|
|
30
29
|
Requires-Dist: pypdf>=5.0.0
|
|
@@ -32,14 +31,14 @@ Requires-Dist: pyyaml>=6.0.2
|
|
|
32
31
|
Requires-Dist: python-dotenv>=1.0.1
|
|
33
32
|
Requires-Dist: faiss-cpu>=1.7.1
|
|
34
33
|
Requires-Dist: torch>=2.10.0
|
|
34
|
+
Requires-Dist: sentence-transformers>=2.7.0
|
|
35
35
|
Provides-Extra: cpu
|
|
36
|
-
Provides-Extra: sbert
|
|
37
|
-
Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
|
|
38
36
|
Provides-Extra: openai
|
|
39
37
|
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
40
38
|
Provides-Extra: test
|
|
41
39
|
Requires-Dist: pytest; extra == "test"
|
|
42
40
|
Requires-Dist: httpx; extra == "test"
|
|
41
|
+
Requires-Dist: datasets>=3.5.0; extra == "test"
|
|
43
42
|
Dynamic: author
|
|
44
43
|
Dynamic: author-email
|
|
45
44
|
Dynamic: classifier
|
|
@@ -67,7 +66,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
|
67
66
|
- Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
|
|
68
67
|
- REST and CLI entry points
|
|
69
68
|
- Health/metrics endpoints + Prometheus exporter
|
|
70
|
-
- Pluggable embeddings and stores; default
|
|
69
|
+
- Pluggable embeddings and stores; default stack is local FAISS + SBERT
|
|
71
70
|
|
|
72
71
|
## Requirements
|
|
73
72
|
- Python 3.10–3.14
|
|
@@ -102,27 +101,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
|
|
|
102
101
|
```
|
|
103
102
|
|
|
104
103
|
## Minimal config (optional)
|
|
105
|
-
By default PatchVec runs with sensible local defaults.
|
|
106
|
-
|
|
104
|
+
By default PatchVec runs with sensible local defaults. For a user install,
|
|
105
|
+
customize `~/patchvec/config.yml`:
|
|
107
106
|
```yaml
|
|
108
107
|
vector_store:
|
|
109
|
-
type:
|
|
108
|
+
type: faiss
|
|
110
109
|
embedder:
|
|
111
|
-
type:
|
|
110
|
+
type: sbert
|
|
112
111
|
auth:
|
|
113
112
|
mode: static
|
|
114
113
|
global_key: ${PATCHVEC_GLOBAL_KEY}
|
|
115
114
|
```
|
|
116
115
|
Then export:
|
|
117
116
|
```bash
|
|
118
|
-
export PATCHVEC_CONFIG=./config.yml
|
|
119
117
|
export PATCHVEC_GLOBAL_KEY="your-secret"
|
|
120
118
|
```
|
|
119
|
+
If you keep the file elsewhere, point the runtime at it explicitly:
|
|
120
|
+
```bash
|
|
121
|
+
export PATCHVEC_CONFIG=/path/to/config.yml
|
|
122
|
+
```
|
|
121
123
|
|
|
122
124
|
## CLI example
|
|
123
125
|
```bash
|
|
124
126
|
pavecli create-collection demo books
|
|
125
|
-
pavecli
|
|
127
|
+
pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
|
|
126
128
|
--metadata='{"lang":"en"}'
|
|
127
129
|
pavecli search demo books "captain nemo" -k 5
|
|
128
130
|
```
|
|
@@ -15,59 +15,79 @@ pave/__init__.py
|
|
|
15
15
|
pave/auth.py
|
|
16
16
|
pave/cli.py
|
|
17
17
|
pave/config.py
|
|
18
|
+
pave/filters.py
|
|
18
19
|
pave/log.py
|
|
19
20
|
pave/main.py
|
|
20
|
-
pave/
|
|
21
|
+
pave/metadb.py
|
|
21
22
|
pave/metrics.py
|
|
22
23
|
pave/preprocess.py
|
|
24
|
+
pave/runtime_paths.py
|
|
23
25
|
pave/schemas.py
|
|
24
26
|
pave/service.py
|
|
25
27
|
pave/ui.py
|
|
26
28
|
pave/assets/__init__.py
|
|
29
|
+
pave/assets/config.yml.example
|
|
27
30
|
pave/assets/patchvec_icon_192.png
|
|
31
|
+
pave/assets/tenants.yml.example
|
|
28
32
|
pave/assets/ui.html
|
|
33
|
+
pave/backends/__init__.py
|
|
34
|
+
pave/backends/base.py
|
|
35
|
+
pave/backends/faiss.py
|
|
36
|
+
pave/backends/qdrant.py
|
|
29
37
|
pave/embedders/__init__.py
|
|
30
38
|
pave/embedders/base.py
|
|
31
39
|
pave/embedders/factory.py
|
|
32
|
-
pave/embedders/
|
|
33
|
-
pave/embedders/
|
|
34
|
-
pave/
|
|
40
|
+
pave/embedders/openai.py
|
|
41
|
+
pave/embedders/sbert.py
|
|
42
|
+
pave/routes/__init__.py
|
|
43
|
+
pave/routes/admin.py
|
|
44
|
+
pave/routes/collections.py
|
|
45
|
+
pave/routes/documents.py
|
|
46
|
+
pave/routes/health.py
|
|
47
|
+
pave/routes/search.py
|
|
35
48
|
pave/stores/__init__.py
|
|
36
49
|
pave/stores/base.py
|
|
37
|
-
pave/stores/
|
|
38
|
-
pave/stores/qdrant_store.py
|
|
39
|
-
pave/stores/txtai_store.py
|
|
50
|
+
pave/stores/local.py
|
|
40
51
|
tests/test_admin_tenants.py
|
|
41
52
|
tests/test_auth.py
|
|
42
53
|
tests/test_auth_api.py
|
|
54
|
+
tests/test_bench_search_latency.py
|
|
55
|
+
tests/test_bench_stress.py
|
|
43
56
|
tests/test_cli.py
|
|
44
57
|
tests/test_collection_rename.py
|
|
45
58
|
tests/test_collections.py
|
|
59
|
+
tests/test_concurrent_upsert.py
|
|
46
60
|
tests/test_config_runtime.py
|
|
61
|
+
tests/test_config_tenants.py
|
|
47
62
|
tests/test_csv_ingest.py
|
|
48
63
|
tests/test_csv_ingest_api.py
|
|
49
64
|
tests/test_data_export.py
|
|
50
65
|
tests/test_delete_document.py
|
|
51
66
|
tests/test_docid_default.py
|
|
67
|
+
tests/test_faiss_backend.py
|
|
68
|
+
tests/test_faiss_store.py
|
|
52
69
|
tests/test_health.py
|
|
53
70
|
tests/test_ingest_errors.py
|
|
54
71
|
tests/test_ingest_size_limit.py
|
|
55
72
|
tests/test_lazy_app.py
|
|
56
73
|
tests/test_list_collections.py
|
|
57
74
|
tests/test_log.py
|
|
75
|
+
tests/test_main_args.py
|
|
58
76
|
tests/test_meta_store.py
|
|
77
|
+
tests/test_metadata_split.py
|
|
59
78
|
tests/test_metrics.py
|
|
79
|
+
tests/test_relevance.py
|
|
60
80
|
tests/test_request_id.py
|
|
81
|
+
tests/test_runtime_paths.py
|
|
82
|
+
tests/test_sbert_embedder.py
|
|
61
83
|
tests/test_search_errors.py
|
|
84
|
+
tests/test_store_cache_race.py
|
|
85
|
+
tests/test_store_catalog_metrics.py
|
|
86
|
+
tests/test_store_close_race.py
|
|
87
|
+
tests/test_store_filters.py
|
|
88
|
+
tests/test_store_meta_fetch_scope.py
|
|
89
|
+
tests/test_store_sanitized_filter_parity.py
|
|
62
90
|
tests/test_tenant_rate_limit.py
|
|
63
|
-
tests/test_txtai_concurrent_upsert.py
|
|
64
|
-
tests/test_txtai_store.py
|
|
65
|
-
tests/test_txtai_store_cache_race.py
|
|
66
|
-
tests/test_txtai_store_catalog_metrics.py
|
|
67
|
-
tests/test_txtai_store_close_race.py
|
|
68
|
-
tests/test_txtai_store_filters.py
|
|
69
|
-
tests/test_txtai_store_meta_fetch_scope.py
|
|
70
|
-
tests/test_txtai_store_sql_safety.py
|
|
71
91
|
tests/test_ui.py
|
|
72
92
|
tests/test_upload_search_csv.py
|
|
73
93
|
tests/test_upload_search_pdf.py
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
fastapi>=0.115.0
|
|
2
2
|
uvicorn[standard]>=0.30.6
|
|
3
|
-
txtai>=6.3.0
|
|
4
3
|
pydantic>=2.8.2
|
|
5
4
|
python-multipart>=0.0.9
|
|
6
5
|
pypdf>=5.0.0
|
|
@@ -8,15 +7,14 @@ pyyaml>=6.0.2
|
|
|
8
7
|
python-dotenv>=1.0.1
|
|
9
8
|
faiss-cpu>=1.7.1
|
|
10
9
|
torch>=2.10.0
|
|
10
|
+
sentence-transformers>=2.7.0
|
|
11
11
|
|
|
12
12
|
[cpu]
|
|
13
13
|
|
|
14
14
|
[openai]
|
|
15
15
|
openai>=1.0.0
|
|
16
16
|
|
|
17
|
-
[sbert]
|
|
18
|
-
sentence-transformers>=2.7.0
|
|
19
|
-
|
|
20
17
|
[test]
|
|
21
18
|
pytest
|
|
22
19
|
httpx
|
|
20
|
+
datasets>=3.5.0
|