patchvec 0.5.8__tar.gz → 0.5.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {patchvec-0.5.8 → patchvec-0.5.9}/ABOUT.md +10 -7
- {patchvec-0.5.8/patchvec.egg-info → patchvec-0.5.9}/PKG-INFO +14 -12
- patchvec-0.5.9/README.md +258 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/config.yml.example +51 -45
- {patchvec-0.5.8 → patchvec-0.5.9/patchvec.egg-info}/PKG-INFO +14 -12
- {patchvec-0.5.8 → patchvec-0.5.9}/patchvec.egg-info/SOURCES.txt +35 -15
- {patchvec-0.5.8 → patchvec-0.5.9}/patchvec.egg-info/requires.txt +2 -4
- patchvec-0.5.9/pave/assets/config.yml.example +224 -0
- patchvec-0.5.9/pave/assets/tenants.yml.example +19 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/auth.py +5 -5
- patchvec-0.5.9/pave/backends/__init__.py +13 -0
- patchvec-0.5.9/pave/backends/base.py +29 -0
- patchvec-0.5.9/pave/backends/faiss.py +153 -0
- patchvec-0.5.9/pave/backends/qdrant.py +46 -0
- patchvec-0.5.9/pave/cli.py +337 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/config.py +120 -23
- patchvec-0.5.9/pave/embedders/__init__.py +7 -0
- patchvec-0.5.9/pave/embedders/base.py +21 -0
- patchvec-0.5.9/pave/embedders/factory.py +22 -0
- patchvec-0.5.9/pave/embedders/openai.py +47 -0
- patchvec-0.5.9/pave/embedders/sbert.py +69 -0
- patchvec-0.5.9/pave/filters.py +164 -0
- patchvec-0.5.9/pave/main.py +339 -0
- patchvec-0.5.9/pave/metadb.py +603 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/metrics.py +2 -2
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/preprocess.py +2 -1
- patchvec-0.5.9/pave/routes/__init__.py +16 -0
- patchvec-0.5.9/pave/routes/admin.py +131 -0
- patchvec-0.5.9/pave/routes/collections.py +123 -0
- patchvec-0.5.9/pave/routes/documents.py +164 -0
- patchvec-0.5.9/pave/routes/health.py +107 -0
- patchvec-0.5.9/pave/routes/search.py +177 -0
- patchvec-0.5.9/pave/runtime_paths.py +89 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/service.py +78 -232
- patchvec-0.5.9/pave/stores/__init__.py +6 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/stores/base.py +22 -8
- patchvec-0.5.9/pave/stores/local.py +807 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/setup.py +6 -8
- patchvec-0.5.9/tests/test_bench_search_latency.py +56 -0
- patchvec-0.5.9/tests/test_bench_stress.py +56 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_cli.py +78 -5
- patchvec-0.5.9/tests/test_concurrent_upsert.py +62 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_config_runtime.py +22 -8
- patchvec-0.5.9/tests/test_config_tenants.py +246 -0
- patchvec-0.5.9/tests/test_csv_ingest.py +141 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_csv_ingest_api.py +14 -0
- patchvec-0.5.9/tests/test_data_export.py +173 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_delete_document.py +4 -3
- patchvec-0.5.9/tests/test_docid_default.py +49 -0
- patchvec-0.5.9/tests/test_faiss_backend.py +86 -0
- patchvec-0.5.8/tests/test_txtai_store.py → patchvec-0.5.9/tests/test_faiss_store.py +68 -12
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_health.py +1 -1
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_list_collections.py +12 -14
- patchvec-0.5.9/tests/test_main_args.py +28 -0
- patchvec-0.5.9/tests/test_meta_store.py +490 -0
- patchvec-0.5.9/tests/test_metadata_split.py +89 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_metrics.py +10 -10
- patchvec-0.5.9/tests/test_relevance.py +297 -0
- patchvec-0.5.9/tests/test_runtime_paths.py +83 -0
- patchvec-0.5.9/tests/test_sbert_embedder.py +243 -0
- patchvec-0.5.8/tests/test_txtai_store_cache_race.py → patchvec-0.5.9/tests/test_store_cache_race.py +4 -2
- patchvec-0.5.8/tests/test_txtai_store_catalog_metrics.py → patchvec-0.5.9/tests/test_store_catalog_metrics.py +4 -3
- patchvec-0.5.9/tests/test_store_close_race.py +171 -0
- patchvec-0.5.8/tests/test_txtai_store_filters.py → patchvec-0.5.9/tests/test_store_filters.py +6 -43
- patchvec-0.5.9/tests/test_store_meta_fetch_scope.py +152 -0
- patchvec-0.5.9/tests/test_store_sanitized_filter_parity.py +171 -0
- patchvec-0.5.8/README.md +0 -211
- patchvec-0.5.8/pave/cli.py +0 -211
- patchvec-0.5.8/pave/embedders/__init__.py +0 -4
- patchvec-0.5.8/pave/embedders/base.py +0 -12
- patchvec-0.5.8/pave/embedders/factory.py +0 -21
- patchvec-0.5.8/pave/embedders/openai_emb.py +0 -30
- patchvec-0.5.8/pave/embedders/sbert_emb.py +0 -24
- patchvec-0.5.8/pave/embedders/txtai_emb.py +0 -58
- patchvec-0.5.8/pave/main.py +0 -864
- patchvec-0.5.8/pave/meta_store.py +0 -320
- patchvec-0.5.8/pave/stores/__init__.py +0 -4
- patchvec-0.5.8/pave/stores/factory.py +0 -18
- patchvec-0.5.8/pave/stores/qdrant_store.py +0 -37
- patchvec-0.5.8/pave/stores/txtai_store.py +0 -950
- patchvec-0.5.8/tests/test_csv_ingest.py +0 -81
- patchvec-0.5.8/tests/test_data_export.py +0 -91
- patchvec-0.5.8/tests/test_docid_default.py +0 -41
- patchvec-0.5.8/tests/test_meta_store.py +0 -91
- patchvec-0.5.8/tests/test_txtai_concurrent_upsert.py +0 -77
- patchvec-0.5.8/tests/test_txtai_store_close_race.py +0 -79
- patchvec-0.5.8/tests/test_txtai_store_meta_fetch_scope.py +0 -59
- patchvec-0.5.8/tests/test_txtai_store_sql_safety.py +0 -116
- {patchvec-0.5.8 → patchvec-0.5.9}/LICENSE +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/MANIFEST.in +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/patchvec.egg-info/dependency_links.txt +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/patchvec.egg-info/entry_points.txt +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/patchvec.egg-info/top_level.txt +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/__init__.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/assets/__init__.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/assets/patchvec_icon_192.png +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/assets/ui.html +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/log.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/schemas.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/pave/ui.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/requirements-cpu.txt +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/setup.cfg +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_admin_tenants.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_auth.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_auth_api.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_collection_rename.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_collections.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_ingest_errors.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_ingest_size_limit.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_lazy_app.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_log.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_request_id.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_search_errors.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_tenant_rate_limit.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_ui.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_upload_search_csv.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_upload_search_pdf.py +0 -0
- {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_upload_search_txt.py +0 -0
|
@@ -12,7 +12,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
|
12
12
|
- Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
|
|
13
13
|
- REST and CLI entry points
|
|
14
14
|
- Health/metrics endpoints + Prometheus exporter
|
|
15
|
-
- Pluggable embeddings and stores; default
|
|
15
|
+
- Pluggable embeddings and stores; default stack is local FAISS + SBERT
|
|
16
16
|
|
|
17
17
|
## Requirements
|
|
18
18
|
- Python 3.10–3.14
|
|
@@ -47,27 +47,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
|
|
|
47
47
|
```
|
|
48
48
|
|
|
49
49
|
## Minimal config (optional)
|
|
50
|
-
By default PatchVec runs with sensible local defaults.
|
|
51
|
-
|
|
50
|
+
By default PatchVec runs with sensible local defaults. For a user install,
|
|
51
|
+
customize `~/patchvec/config.yml`:
|
|
52
52
|
```yaml
|
|
53
53
|
vector_store:
|
|
54
|
-
type:
|
|
54
|
+
type: faiss
|
|
55
55
|
embedder:
|
|
56
|
-
type:
|
|
56
|
+
type: sbert
|
|
57
57
|
auth:
|
|
58
58
|
mode: static
|
|
59
59
|
global_key: ${PATCHVEC_GLOBAL_KEY}
|
|
60
60
|
```
|
|
61
61
|
Then export:
|
|
62
62
|
```bash
|
|
63
|
-
export PATCHVEC_CONFIG=./config.yml
|
|
64
63
|
export PATCHVEC_GLOBAL_KEY="your-secret"
|
|
65
64
|
```
|
|
65
|
+
If you keep the file elsewhere, point the runtime at it explicitly:
|
|
66
|
+
```bash
|
|
67
|
+
export PATCHVEC_CONFIG=/path/to/config.yml
|
|
68
|
+
```
|
|
66
69
|
|
|
67
70
|
## CLI example
|
|
68
71
|
```bash
|
|
69
72
|
pavecli create-collection demo books
|
|
70
|
-
pavecli
|
|
73
|
+
pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
|
|
71
74
|
--metadata='{"lang":"en"}'
|
|
72
75
|
pavecli search demo books "captain nemo" -k 5
|
|
73
76
|
```
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: patchvec
|
|
3
|
-
Version: 0.5.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.5.9
|
|
4
|
+
Summary: PaveDB — A lightweight, pluggable vector search microservice.
|
|
5
5
|
Author: Rodrigo Rodrigues da Silva
|
|
6
6
|
Author-email: rodrigo@flowlexi.com
|
|
7
7
|
License: AGPL-3.0-or-later
|
|
@@ -24,7 +24,6 @@ Description-Content-Type: text/markdown
|
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
Requires-Dist: fastapi>=0.115.0
|
|
26
26
|
Requires-Dist: uvicorn[standard]>=0.30.6
|
|
27
|
-
Requires-Dist: txtai>=6.3.0
|
|
28
27
|
Requires-Dist: pydantic>=2.8.2
|
|
29
28
|
Requires-Dist: python-multipart>=0.0.9
|
|
30
29
|
Requires-Dist: pypdf>=5.0.0
|
|
@@ -32,14 +31,14 @@ Requires-Dist: pyyaml>=6.0.2
|
|
|
32
31
|
Requires-Dist: python-dotenv>=1.0.1
|
|
33
32
|
Requires-Dist: faiss-cpu>=1.7.1
|
|
34
33
|
Requires-Dist: torch>=2.10.0
|
|
34
|
+
Requires-Dist: sentence-transformers>=2.7.0
|
|
35
35
|
Provides-Extra: cpu
|
|
36
|
-
Provides-Extra: sbert
|
|
37
|
-
Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
|
|
38
36
|
Provides-Extra: openai
|
|
39
37
|
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
40
38
|
Provides-Extra: test
|
|
41
39
|
Requires-Dist: pytest; extra == "test"
|
|
42
40
|
Requires-Dist: httpx; extra == "test"
|
|
41
|
+
Requires-Dist: datasets>=3.5.0; extra == "test"
|
|
43
42
|
Dynamic: author
|
|
44
43
|
Dynamic: author-email
|
|
45
44
|
Dynamic: classifier
|
|
@@ -67,7 +66,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
|
67
66
|
- Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
|
|
68
67
|
- REST and CLI entry points
|
|
69
68
|
- Health/metrics endpoints + Prometheus exporter
|
|
70
|
-
- Pluggable embeddings and stores; default
|
|
69
|
+
- Pluggable embeddings and stores; default stack is local FAISS + SBERT
|
|
71
70
|
|
|
72
71
|
## Requirements
|
|
73
72
|
- Python 3.10–3.14
|
|
@@ -102,27 +101,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
|
|
|
102
101
|
```
|
|
103
102
|
|
|
104
103
|
## Minimal config (optional)
|
|
105
|
-
By default PatchVec runs with sensible local defaults.
|
|
106
|
-
|
|
104
|
+
By default PatchVec runs with sensible local defaults. For a user install,
|
|
105
|
+
customize `~/patchvec/config.yml`:
|
|
107
106
|
```yaml
|
|
108
107
|
vector_store:
|
|
109
|
-
type:
|
|
108
|
+
type: faiss
|
|
110
109
|
embedder:
|
|
111
|
-
type:
|
|
110
|
+
type: sbert
|
|
112
111
|
auth:
|
|
113
112
|
mode: static
|
|
114
113
|
global_key: ${PATCHVEC_GLOBAL_KEY}
|
|
115
114
|
```
|
|
116
115
|
Then export:
|
|
117
116
|
```bash
|
|
118
|
-
export PATCHVEC_CONFIG=./config.yml
|
|
119
117
|
export PATCHVEC_GLOBAL_KEY="your-secret"
|
|
120
118
|
```
|
|
119
|
+
If you keep the file elsewhere, point the runtime at it explicitly:
|
|
120
|
+
```bash
|
|
121
|
+
export PATCHVEC_CONFIG=/path/to/config.yml
|
|
122
|
+
```
|
|
121
123
|
|
|
122
124
|
## CLI example
|
|
123
125
|
```bash
|
|
124
126
|
pavecli create-collection demo books
|
|
125
|
-
pavecli
|
|
127
|
+
pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
|
|
126
128
|
--metadata='{"lang":"en"}'
|
|
127
129
|
pavecli search demo books "captain nemo" -k 5
|
|
128
130
|
```
|
patchvec-0.5.9/README.md
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
<!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigo@flowlexi.com> -->
|
|
2
|
+
<!-- SPDX-License-Identifier: AGPL-3.0-or-later -->
|
|
3
|
+
|
|
4
|
+
# 🛣️ PatchVec — Vector search you can understand & deploy within minutes
|
|
5
|
+
|
|
6
|
+
PatchVec is a single-process vector search engine for AI applications.
|
|
7
|
+
|
|
8
|
+
It ingests your documents, chunks and embeds them, and gives you semantic search with
|
|
9
|
+
full provenance — document id, page, character offset, and the exact snippet that
|
|
10
|
+
matched.
|
|
11
|
+
|
|
12
|
+
Built for developers shipping **RAG (Retrieval-Augmented Generation)** systems, PatchVec
|
|
13
|
+
provides a straightforward service for **vector search, embeddings pipelines, and
|
|
14
|
+
semantic retrieval**. It runs as a **REST service, a CLI tool, or an embedded library**,
|
|
15
|
+
so you can ship your first version quickly and keep growing with the same codebase as
|
|
16
|
+
your application scales. No cluster. No opaque pipelines.
|
|
17
|
+
|
|
18
|
+
Drop a file in. Search it. See exactly what came back — and why. Minutes after your
|
|
19
|
+
first commit.
|
|
20
|
+
|
|
21
|
+
## ⚙️ Why PatchVec
|
|
22
|
+
|
|
23
|
+
- **Ingest files, not embeddings** — hand it a PDF, CSV, or TXT (more formats to come)
|
|
24
|
+
and PatchVec chunks, embeds, and indexes it. No preprocessing pipeline to build.
|
|
25
|
+
- **Full provenance on every hit** — every search result traces back to a document,
|
|
26
|
+
page, and character offset. Latency and request traceability are built into every
|
|
27
|
+
response.
|
|
28
|
+
- **Multi-tenant by default** — tenant/collection namespacing is built in, not bolted
|
|
29
|
+
on (and transparent when you just don't need it).
|
|
30
|
+
- **REST, CLI, or embed it** — run as an HTTP service, script via the CLI, or import the
|
|
31
|
+
library directly in your Python app.
|
|
32
|
+
- **Pluggable embeddings** (soon) — swap models per collection; wire in local or hosted
|
|
33
|
+
embedding backends.
|
|
34
|
+
|
|
35
|
+
## 🧭 How to
|
|
36
|
+
|
|
37
|
+
### 🐳 Docker workflow (prebuilt images)
|
|
38
|
+
|
|
39
|
+
Pull the image that fits your hardware from the
|
|
40
|
+
[Flowlexi Container Registry](https://gitlab.com/flowlexi/patchvec/container_registry)
|
|
41
|
+
on GitLab (CUDA builds publish as `latest-gpu`, CPU-only as
|
|
42
|
+
`latest-cpu`).
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
docker pull registry.gitlab.com/flowlexi/patchvec/patchvec:latest-gpu
|
|
46
|
+
docker pull registry.gitlab.com/flowlexi/patchvec/patchvec:latest-cpu
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Run the service by choosing the tag you need and mapping the API port locally:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
docker run -d --name patchvec \
|
|
53
|
+
-p 8086:8086 \
|
|
54
|
+
registry.gitlab.com/flowlexi/patchvec/patchvec:latest-cpu
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Use the bundled CLI inside the container to create a tenant/collection, ingest a demo
|
|
58
|
+
document, and query it:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
docker exec patchvec pavecli create-collection demo books
|
|
62
|
+
docker exec patchvec pavecli ingest demo books /app/demo/20k_leagues.txt \
|
|
63
|
+
--docid=verne-20k --metadata='{"lang":"en"}'
|
|
64
|
+
docker exec patchvec pavecli search demo books "captain nemo" -k 3
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
See below for REST and UI.
|
|
68
|
+
|
|
69
|
+
Stop the container when you are done:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
docker rm -f patchvec
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### 🐍 PyPI workflow
|
|
76
|
+
|
|
77
|
+
Install Patchvec from PyPI inside an isolated virtual environment. You can run it
|
|
78
|
+
purely from env vars, or later point it at an explicit config file.
|
|
79
|
+
|
|
80
|
+
**Requires Python 3.10–3.14.**
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
mkdir -p ~/pv && cd ~/pv # or wherever
|
|
84
|
+
python -m venv .venv-pv
|
|
85
|
+
source .venv-pv/bin/activate
|
|
86
|
+
python -m pip install --upgrade pip
|
|
87
|
+
pip install "patchvec[cpu]"
|
|
88
|
+
|
|
89
|
+
# create the default instance under ~/patchvec
|
|
90
|
+
pavecli init
|
|
91
|
+
|
|
92
|
+
# sample demo corpus
|
|
93
|
+
curl -LO https://raw.githubusercontent.com/rodrigopitanga/patchvec/main/demo/20k_leagues.txt
|
|
94
|
+
|
|
95
|
+
# set an admin key for the generated config
|
|
96
|
+
export PATCHVEC_GLOBAL_KEY=super-sekret
|
|
97
|
+
|
|
98
|
+
# option A: run the service (stays up until you stop it)
|
|
99
|
+
pavesrv
|
|
100
|
+
|
|
101
|
+
# option B: operate entirely via the CLI (no server needed)
|
|
102
|
+
pavecli create-collection demo books
|
|
103
|
+
pavecli ingest demo books 20k_leagues.txt --docid=verne-20k --metadata='{"lang":"en"}'
|
|
104
|
+
pavecli search demo books "captain nemo" -k 3
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
> **CPU-only deployments:** The command above pulls the default PyTorch wheel
|
|
108
|
+
> from PyPI, which includes CUDA support (~2 GB). For a leaner, CPU-only torch
|
|
109
|
+
> install, point pip at the PyTorch CPU index:
|
|
110
|
+
> ```bash
|
|
111
|
+
> pip install "patchvec[cpu]" \
|
|
112
|
+
> --index-url https://download.pytorch.org/whl/cpu \
|
|
113
|
+
> --extra-index-url https://pypi.org/simple
|
|
114
|
+
> ```
|
|
115
|
+
|
|
116
|
+
Deactivate the virtual environment with `deactivate` when finished.
|
|
117
|
+
|
|
118
|
+
By default, a non-dev runtime reads `~/patchvec/config.yml` if present, keeps
|
|
119
|
+
tenant sidecar loading disabled unless `auth.tenants_file` is configured, and
|
|
120
|
+
stores data in `~/patchvec/data`. You can override any of that with the
|
|
121
|
+
`PATCHVEC_*` environment scheme or by pointing `PATCHVEC_CONFIG` at an explicit
|
|
122
|
+
config file. For alternate instances, use `pavecli init /path/to/instance` and
|
|
123
|
+
then point commands at that root with `pavesrv --home=/path/to/instance` or
|
|
124
|
+
`pavecli <command> ... --home /path/to/instance`.
|
|
125
|
+
|
|
126
|
+
### 🌐 REST API and Web UI usage
|
|
127
|
+
|
|
128
|
+
When the server is running (either via Docker or `pavesrv`), the API listens on
|
|
129
|
+
`http://localhost:8086`. The following `curl` commands mirror the CLI sequence
|
|
130
|
+
above—adjust the file path to wherever you stored the corpus
|
|
131
|
+
(`/app/demo/20k_leagues.txt` in Docker, `~/pv/20k_leagues.txt` for PyPI installs) and
|
|
132
|
+
reuse the bearer token exported earlier:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
# create collection
|
|
136
|
+
curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
137
|
+
-X POST http://localhost:8086/collections/demo/books
|
|
138
|
+
|
|
139
|
+
# ingest document
|
|
140
|
+
curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
141
|
+
-X POST http://localhost:8086/collections/demo/books/documents \
|
|
142
|
+
-F "file=@20k_leagues.txt" \
|
|
143
|
+
-F 'metadata={"lang":"en"}'
|
|
144
|
+
|
|
145
|
+
# run search
|
|
146
|
+
curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
147
|
+
"http://localhost:8086/collections/demo/books/search?q=captain+nemo&k=3"
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Every hit comes back with provenance you can trace, plus latency
|
|
151
|
+
and request id for observability:
|
|
152
|
+
|
|
153
|
+
```json
|
|
154
|
+
{
|
|
155
|
+
"matches": [
|
|
156
|
+
{
|
|
157
|
+
"id": "verne-20k::chunk_42",
|
|
158
|
+
"score": 0.82,
|
|
159
|
+
"text": "Captain Nemo conducted me to the central staircase ...",
|
|
160
|
+
"tenant": "demo",
|
|
161
|
+
"collection": "books",
|
|
162
|
+
"match_reason": "semantic",
|
|
163
|
+
"meta": {
|
|
164
|
+
"docid": "verne-20k",
|
|
165
|
+
"filename": "20k_leagues.txt",
|
|
166
|
+
"offset": 21000,
|
|
167
|
+
"lang": "en",
|
|
168
|
+
"ingested_at": "2026-03-07T12:00:00Z"
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
],
|
|
172
|
+
"latency_ms": 12.4,
|
|
173
|
+
"request_id": "req-5f3a-b812"
|
|
174
|
+
}
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
The Swagger UI is available at `http://localhost:8086/`.
|
|
178
|
+
|
|
179
|
+
Health and metrics endpoints are available at `/health` and `/metrics`.
|
|
180
|
+
|
|
181
|
+
Runtime options are also accepted via the `PATCHVEC_*` environment variable scheme
|
|
182
|
+
(`PATCHVEC_SERVER__PORT`, `PATCHVEC_AUTH__MODE`, etc.), which precedes config files.
|
|
183
|
+
|
|
184
|
+
### 🔁 Live data updates
|
|
185
|
+
|
|
186
|
+
Patchvec supports live data refresh without restarting the server. Re-ingest the same
|
|
187
|
+
`docid` to *replace* vector content (filename doesn't matter - metadata will change
|
|
188
|
+
though), or explicitly delete the document and then ingest it again.
|
|
189
|
+
|
|
190
|
+
Re-ingest to replace (CLI path example):
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
# initial ingest
|
|
194
|
+
pavecli ingest demo books 20k_leagues.txt --docid=verne-20k
|
|
195
|
+
|
|
196
|
+
# modify the content (filename can change — docid is what matters)
|
|
197
|
+
cp 20k_leagues.txt 20k_leagues_v2.txt
|
|
198
|
+
echo "THE END" >> 20k_leagues_v2.txt
|
|
199
|
+
|
|
200
|
+
# re-ingest with the *same docid* to replace the indexed content
|
|
201
|
+
pavecli ingest demo books 20k_leagues_v2.txt --docid=verne-20k
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Delete by ID then ingest (REST path example):
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
208
|
+
-X DELETE http://localhost:8086/collections/demo/books/documents/verne-20k
|
|
209
|
+
|
|
210
|
+
# make changes
|
|
211
|
+
|
|
212
|
+
curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
213
|
+
-X POST http://localhost:8086/collections/demo/books/documents \
|
|
214
|
+
-F "file=@demo/20k_leagues.txt" \
|
|
215
|
+
-F 'docid=verne-20k'
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
### 🛠️ Developer workflow
|
|
219
|
+
|
|
220
|
+
Building from source relies on `Makefile` shortcuts (`make install-dev`, `make serve`,
|
|
221
|
+
`make test`, `make check`, etc.).
|
|
222
|
+
|
|
223
|
+
The full contributor workflow, target reference, and coding style live in
|
|
224
|
+
[CONTRIBUTING.md](CONTRIBUTING.md). Performance benchmarks are documented in
|
|
225
|
+
[README-benchmarks.md](README-benchmarks.md).
|
|
226
|
+
|
|
227
|
+
## Logging
|
|
228
|
+
|
|
229
|
+
PatchVec writes human-readable logs to stderr and optionally emits
|
|
230
|
+
structured JSON lines (one per search/ingest/delete) for production
|
|
231
|
+
observability. Enable the ops stream in `config.yml`:
|
|
232
|
+
|
|
233
|
+
```yaml
|
|
234
|
+
log:
|
|
235
|
+
ops_log: stdout # null (off) | stdout | /path/to/ops.jsonl
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
See `config.yml.example` for the full logging configuration.
|
|
239
|
+
|
|
240
|
+
## 🗺️ Roadmap
|
|
241
|
+
|
|
242
|
+
Short/mid-term tasks and long-term plans are all tracked in
|
|
243
|
+
[`ROADMAP.md`](ROADMAP.md). Pick one, open an issue titled `claim: <task ID>`, and
|
|
244
|
+
ship a patch. If you find a bug, file it under the *Issues* tab.
|
|
245
|
+
|
|
246
|
+
## 📜 License
|
|
247
|
+
|
|
248
|
+
PatchVec is free software: you can use it, copy it, redistribute it and/or modify it
|
|
249
|
+
free of charge under the terms of the GNU Affero General Public License as published by
|
|
250
|
+
the Free Software Foundation, either version 3 of the License, or (at your option) any
|
|
251
|
+
later version.
|
|
252
|
+
|
|
253
|
+
PatchVec is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
254
|
+
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
255
|
+
PURPOSE. See the GNU Affero General Public License for more details.
|
|
256
|
+
|
|
257
|
+
SPDX-License-Identifier: AGPL-3.0-or-later
|
|
258
|
+
Copyright (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigo@flowlexi.com>
|
|
@@ -2,31 +2,50 @@
|
|
|
2
2
|
# Copy to config.yml and adjust.
|
|
3
3
|
# Secrets (API keys) should live in a separate untracked file — see tenants.yml.example.
|
|
4
4
|
#
|
|
5
|
-
#
|
|
6
|
-
#
|
|
5
|
+
# Default user-install paths:
|
|
6
|
+
# config: ~/pavedb/config.yml
|
|
7
|
+
# tenants: ~/pavedb/tenants.yml
|
|
8
|
+
# data: ~/pavedb/data
|
|
7
9
|
#
|
|
8
|
-
#
|
|
10
|
+
# Distro-like install example:
|
|
11
|
+
# config: /etc/pavedb/config.yml
|
|
12
|
+
# tenants: /var/pavedb/tenants.yml
|
|
13
|
+
# data: /var/pavedb/data
|
|
14
|
+
#
|
|
15
|
+
# Config file location override:
|
|
16
|
+
# PAVEDB_CONFIG=/etc/pavedb/config.yml
|
|
17
|
+
#
|
|
18
|
+
# In Docker/systemd deployments always set PAVEDB_CONFIG explicitly — the
|
|
9
19
|
# default path expands ~ relative to the process user, which may not be what
|
|
10
20
|
# you expect inside a container. Example compose snippet:
|
|
11
21
|
#
|
|
12
22
|
# environment:
|
|
13
|
-
#
|
|
23
|
+
# PAVEDB_CONFIG: /etc/pavedb/config.yml
|
|
14
24
|
# volumes:
|
|
15
|
-
# - ./config.yml:/etc/
|
|
25
|
+
# - ./config.yml:/etc/pavedb/config.yml:ro
|
|
16
26
|
#
|
|
17
27
|
# All keys can also be overridden inline via environment variables:
|
|
18
|
-
#
|
|
19
|
-
#
|
|
28
|
+
# PAVEDB_<KEY>=value (top-level, e.g. PAVEDB_DATA_DIR)
|
|
29
|
+
# PAVEDB_<SECTION>__<KEY>=val (nested, e.g. PAVEDB_LOG__LEVEL=debug)
|
|
30
|
+
# Legacy PATCHVEC_* vars still work in v0.5.9 but will be removed in v0.6.
|
|
31
|
+
#
|
|
32
|
+
# `auth.tenants_file` is optional. If unset, its default is `None` and no
|
|
33
|
+
# tenants sidecar file is loaded.
|
|
34
|
+
# If set, PaveDB loads that sidecar first. Then inline tenant config is
|
|
35
|
+
# applied with precedence:
|
|
36
|
+
# env vars > config.yml > tenants.yml > defaults
|
|
37
|
+
# Example: define tenant "acme" entirely from env:
|
|
38
|
+
# PAVEDB_AUTH__API_KEYS__acme=change-me
|
|
39
|
+
# PAVEDB_TENANTS__acme__MAX_CONCURRENT=5
|
|
20
40
|
|
|
21
41
|
# ---------------------------------------------------------------------------
|
|
22
42
|
# Storage
|
|
23
43
|
# ---------------------------------------------------------------------------
|
|
24
44
|
|
|
25
45
|
# Data directory — ~ is expanded at startup.
|
|
26
|
-
# Default (library/dev): ~/
|
|
27
|
-
# For
|
|
28
|
-
|
|
29
|
-
data_dir: ~/patchvec/data
|
|
46
|
+
# Default (library/dev): ~/pavedb/data
|
|
47
|
+
# For distro-like installs use an absolute path, e.g. /var/pavedb/data.
|
|
48
|
+
data_dir: ~/pavedb/data
|
|
30
49
|
|
|
31
50
|
# ---------------------------------------------------------------------------
|
|
32
51
|
# Common collection
|
|
@@ -55,11 +74,13 @@ auth:
|
|
|
55
74
|
|
|
56
75
|
# Global admin key — grants access to all tenants and admin routes.
|
|
57
76
|
# Always read from the environment; never hardcode in committed files.
|
|
58
|
-
global_key: ${
|
|
77
|
+
global_key: ${PAVEDB_GLOBAL_KEY}
|
|
59
78
|
|
|
60
|
-
# External tenant→key mapping file
|
|
61
|
-
#
|
|
62
|
-
|
|
79
|
+
# External tenant→key mapping file.
|
|
80
|
+
# Keep the same key paths there (`auth.api_keys`, `tenants.*`).
|
|
81
|
+
# User install default: ~/pavedb/tenants.yml
|
|
82
|
+
# Distro-like install: /var/pavedb/tenants.yml
|
|
83
|
+
# tenants_file: ~/pavedb/tenants.yml
|
|
63
84
|
|
|
64
85
|
# Inline tenant→key mapping (fallback; keep empty in the repo).
|
|
65
86
|
api_keys: {}
|
|
@@ -69,51 +90,36 @@ auth:
|
|
|
69
90
|
# ---------------------------------------------------------------------------
|
|
70
91
|
|
|
71
92
|
vector_store:
|
|
72
|
-
#
|
|
73
|
-
|
|
74
|
-
type: default
|
|
93
|
+
# faiss — local FAISS index (built-in, no extra services required).
|
|
94
|
+
type: faiss
|
|
75
95
|
|
|
76
|
-
# Options for type=
|
|
77
|
-
|
|
96
|
+
# Options for type=faiss.
|
|
97
|
+
faiss:
|
|
78
98
|
embed_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# Options for type=qdrant.
|
|
82
|
-
# NOTE: QdrantStore is currently a stub — all methods raise NotImplementedError.
|
|
83
|
-
# Contributions welcome! See pave/stores/qdrant_store.py.
|
|
84
|
-
qdrant:
|
|
85
|
-
url: http://localhost:6333
|
|
86
|
-
api_key: ${PATCHVEC_QDRANT_API_KEY}
|
|
87
|
-
prefer_payload_filters: true
|
|
88
|
-
collection_prefix: patchvec_
|
|
99
|
+
max_query_chars: 4000
|
|
89
100
|
|
|
90
101
|
# ---------------------------------------------------------------------------
|
|
91
102
|
# Embedder
|
|
92
103
|
# ---------------------------------------------------------------------------
|
|
93
104
|
# Controls how text is converted to vectors before indexing and querying.
|
|
94
105
|
# The embedder is shared across all collections (per-collection config: v0.6).
|
|
95
|
-
#
|
|
96
|
-
# NOTE: the sbert and openai embedder types are currently stubs — the factory
|
|
97
|
-
# exists but is not wired into the default store. Only type=default (txtai) is
|
|
98
|
-
# fully functional today. Per-collection embedder selection lands in v0.6.
|
|
99
|
-
# Contributions welcome! See pave/embedders/ and pave/stores/factory.py.
|
|
100
106
|
|
|
101
107
|
embedder:
|
|
102
|
-
#
|
|
103
|
-
#
|
|
104
|
-
|
|
105
|
-
type: default
|
|
106
|
-
|
|
107
|
-
txtai:
|
|
108
|
-
path: sentence-transformers/paraphrase-MiniLM-L3-v2
|
|
108
|
+
# sbert — direct sentence-transformers (recommended local default).
|
|
109
|
+
# openai — OpenAI embeddings API (requires API key, adds latency).
|
|
110
|
+
type: sbert
|
|
109
111
|
|
|
112
|
+
# Used by type=sbert.
|
|
110
113
|
sbert:
|
|
111
114
|
model: sentence-transformers/all-MiniLM-L6-v2
|
|
112
115
|
batch_size: 64
|
|
113
|
-
device: auto # cpu | cuda |
|
|
116
|
+
device: auto # cpu | cuda | mps | auto
|
|
114
117
|
|
|
118
|
+
# Used by type=openai.
|
|
115
119
|
openai:
|
|
116
|
-
|
|
120
|
+
model: text-embedding-3-small
|
|
121
|
+
batch_size: 256
|
|
122
|
+
api_key: ${PAVEDB_OPENAI_API_KEY}
|
|
117
123
|
dim: 1536
|
|
118
124
|
|
|
119
125
|
# ---------------------------------------------------------------------------
|
|
@@ -194,7 +200,7 @@ server:
|
|
|
194
200
|
|
|
195
201
|
log:
|
|
196
202
|
# Dev log level (stderr). DEBUG | INFO | WARNING | ERROR — default INFO.
|
|
197
|
-
# Overridden by
|
|
203
|
+
# Overridden by PAVEDB_LOG__LEVEL env var (e.g. in Makefile: debug).
|
|
198
204
|
# Per-namespace overrides: log.debug / log.watch / log.quiet (list of loggers).
|
|
199
205
|
level: INFO
|
|
200
206
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: patchvec
|
|
3
|
-
Version: 0.5.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.5.9
|
|
4
|
+
Summary: PaveDB — A lightweight, pluggable vector search microservice.
|
|
5
5
|
Author: Rodrigo Rodrigues da Silva
|
|
6
6
|
Author-email: rodrigo@flowlexi.com
|
|
7
7
|
License: AGPL-3.0-or-later
|
|
@@ -24,7 +24,6 @@ Description-Content-Type: text/markdown
|
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
Requires-Dist: fastapi>=0.115.0
|
|
26
26
|
Requires-Dist: uvicorn[standard]>=0.30.6
|
|
27
|
-
Requires-Dist: txtai>=6.3.0
|
|
28
27
|
Requires-Dist: pydantic>=2.8.2
|
|
29
28
|
Requires-Dist: python-multipart>=0.0.9
|
|
30
29
|
Requires-Dist: pypdf>=5.0.0
|
|
@@ -32,14 +31,14 @@ Requires-Dist: pyyaml>=6.0.2
|
|
|
32
31
|
Requires-Dist: python-dotenv>=1.0.1
|
|
33
32
|
Requires-Dist: faiss-cpu>=1.7.1
|
|
34
33
|
Requires-Dist: torch>=2.10.0
|
|
34
|
+
Requires-Dist: sentence-transformers>=2.7.0
|
|
35
35
|
Provides-Extra: cpu
|
|
36
|
-
Provides-Extra: sbert
|
|
37
|
-
Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
|
|
38
36
|
Provides-Extra: openai
|
|
39
37
|
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
40
38
|
Provides-Extra: test
|
|
41
39
|
Requires-Dist: pytest; extra == "test"
|
|
42
40
|
Requires-Dist: httpx; extra == "test"
|
|
41
|
+
Requires-Dist: datasets>=3.5.0; extra == "test"
|
|
43
42
|
Dynamic: author
|
|
44
43
|
Dynamic: author-email
|
|
45
44
|
Dynamic: classifier
|
|
@@ -67,7 +66,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
|
67
66
|
- Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
|
|
68
67
|
- REST and CLI entry points
|
|
69
68
|
- Health/metrics endpoints + Prometheus exporter
|
|
70
|
-
- Pluggable embeddings and stores; default
|
|
69
|
+
- Pluggable embeddings and stores; default stack is local FAISS + SBERT
|
|
71
70
|
|
|
72
71
|
## Requirements
|
|
73
72
|
- Python 3.10–3.14
|
|
@@ -102,27 +101,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
|
|
|
102
101
|
```
|
|
103
102
|
|
|
104
103
|
## Minimal config (optional)
|
|
105
|
-
By default PatchVec runs with sensible local defaults.
|
|
106
|
-
|
|
104
|
+
By default PatchVec runs with sensible local defaults. For a user install,
|
|
105
|
+
customize `~/patchvec/config.yml`:
|
|
107
106
|
```yaml
|
|
108
107
|
vector_store:
|
|
109
|
-
type:
|
|
108
|
+
type: faiss
|
|
110
109
|
embedder:
|
|
111
|
-
type:
|
|
110
|
+
type: sbert
|
|
112
111
|
auth:
|
|
113
112
|
mode: static
|
|
114
113
|
global_key: ${PATCHVEC_GLOBAL_KEY}
|
|
115
114
|
```
|
|
116
115
|
Then export:
|
|
117
116
|
```bash
|
|
118
|
-
export PATCHVEC_CONFIG=./config.yml
|
|
119
117
|
export PATCHVEC_GLOBAL_KEY="your-secret"
|
|
120
118
|
```
|
|
119
|
+
If you keep the file elsewhere, point the runtime at it explicitly:
|
|
120
|
+
```bash
|
|
121
|
+
export PATCHVEC_CONFIG=/path/to/config.yml
|
|
122
|
+
```
|
|
121
123
|
|
|
122
124
|
## CLI example
|
|
123
125
|
```bash
|
|
124
126
|
pavecli create-collection demo books
|
|
125
|
-
pavecli
|
|
127
|
+
pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
|
|
126
128
|
--metadata='{"lang":"en"}'
|
|
127
129
|
pavecli search demo books "captain nemo" -k 5
|
|
128
130
|
```
|