patchvec 0.5.6__tar.gz → 0.5.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patchvec-0.5.7/ABOUT.md +98 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/MANIFEST.in +0 -3
- patchvec-0.5.7/PKG-INFO +153 -0
- patchvec-0.5.7/README.md +162 -0
- patchvec-0.5.7/patchvec.egg-info/PKG-INFO +153 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/patchvec.egg-info/SOURCES.txt +5 -4
- {patchvec-0.5.6 → patchvec-0.5.7}/patchvec.egg-info/requires.txt +13 -2
- patchvec-0.5.7/pave/assets/__init__.py +4 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/assets/ui.html +4 -1
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/auth.py +5 -5
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/cli.py +60 -1
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/config.py +98 -40
- patchvec-0.5.7/pave/embedders/__init__.py +4 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/embedders/txtai_emb.py +4 -4
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/main.py +145 -23
- patchvec-0.5.7/pave/metrics.py +202 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/preprocess.py +9 -8
- patchvec-0.5.7/pave/service.py +331 -0
- patchvec-0.5.7/pave/stores/__init__.py +4 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/stores/base.py +13 -4
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/stores/qdrant_store.py +4 -3
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/stores/txtai_store.py +262 -105
- patchvec-0.5.7/requirements-cpu.txt +9 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/setup.py +24 -11
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_cli.py +39 -1
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_csv_ingest.py +1 -1
- patchvec-0.5.7/tests/test_data_export.py +91 -0
- patchvec-0.5.7/tests/test_delete_document.py +109 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_docid_default.py +1 -1
- patchvec-0.5.7/tests/test_metrics.py +168 -0
- patchvec-0.5.7/tests/test_request_id.py +93 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_txtai_concurrent_upsert.py +2 -1
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_txtai_store.py +12 -0
- patchvec-0.5.7/tests/test_txtai_store_filters.py +237 -0
- patchvec-0.5.7/tests/test_txtai_store_sql_safety.py +116 -0
- patchvec-0.5.6/ABOUT.md +0 -69
- patchvec-0.5.6/PKG-INFO +0 -115
- patchvec-0.5.6/README.md +0 -87
- patchvec-0.5.6/patchvec.egg-info/PKG-INFO +0 -115
- patchvec-0.5.6/pave/embedders/__init__.py +0 -1
- patchvec-0.5.6/pave/metrics.py +0 -52
- patchvec-0.5.6/pave/service.py +0 -92
- patchvec-0.5.6/pave/stores/__init__.py +0 -1
- patchvec-0.5.6/requirements-base.txt +0 -1
- patchvec-0.5.6/requirements-cpu.txt +0 -6
- patchvec-0.5.6/requirements-test.txt +0 -5
- patchvec-0.5.6/requirements.txt +0 -11
- patchvec-0.5.6/scripts/release.sh +0 -89
- patchvec-0.5.6/tests/test_metrics.py +0 -35
- patchvec-0.5.6/tests/test_txtai_store_filters.py +0 -112
- {patchvec-0.5.6 → patchvec-0.5.7}/LICENSE +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/config.yml.example +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/patchvec.egg-info/dependency_links.txt +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/patchvec.egg-info/entry_points.txt +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/patchvec.egg-info/top_level.txt +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/__init__.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/assets/patchvec_icon_192.png +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/embedders/base.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/embedders/factory.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/embedders/openai_emb.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/embedders/sbert_emb.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/stores/factory.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/pave/ui.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/setup.cfg +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_auth.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_collections.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_config_runtime.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_health.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_ui.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_upload_search_csv.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_upload_search_pdf.py +0 -0
- {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_upload_search_txt.py +0 -0
patchvec-0.5.7/ABOUT.md
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
<!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net> -->
|
|
2
|
+
<!-- SPDX-License-Identifier: GPL-3.0-or-later -->
|
|
3
|
+
|
|
4
|
+
# PatchVec — A lightweight, pluggable vector search microservice.
|
|
5
|
+
|
|
6
|
+
Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
7
|
+
|
|
8
|
+
## Highlights
|
|
9
|
+
- Multi-tenant collections: `/collections/{tenant}/{name}`
|
|
10
|
+
- Upload and search TXT, CSV, and PDF
|
|
11
|
+
- Deterministic provenance: every hit returns doc id, page, offset, snippet
|
|
12
|
+
- Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
|
|
13
|
+
- REST and CLI entry points
|
|
14
|
+
- Health/metrics endpoints + Prometheus exporter
|
|
15
|
+
- Pluggable embeddings and stores; default backend is local
|
|
16
|
+
|
|
17
|
+
## Requirements
|
|
18
|
+
- Python 3.10–3.14
|
|
19
|
+
|
|
20
|
+
## Install (PyPI)
|
|
21
|
+
```bash
|
|
22
|
+
python -m venv .venv
|
|
23
|
+
source .venv/bin/activate
|
|
24
|
+
pip install patchvec
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
CPU-only deployments can use the PyTorch CPU wheel index:
|
|
28
|
+
```bash
|
|
29
|
+
pip install "patchvec[cpu]" \
|
|
30
|
+
--index-url https://download.pytorch.org/whl/cpu \
|
|
31
|
+
--extra-index-url https://pypi.org/simple
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quickstart
|
|
35
|
+
```bash
|
|
36
|
+
# Start the server (installed entry point)
|
|
37
|
+
pavesrv
|
|
38
|
+
|
|
39
|
+
# Or run with uvicorn manually if you prefer:
|
|
40
|
+
uvicorn pave.main:app --host 0.0.0.0 --port 8086
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Auth defaults to `none` only for dev. For production, set static auth:
|
|
44
|
+
```bash
|
|
45
|
+
export PATCHVEC_AUTH__MODE=static
|
|
46
|
+
export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Minimal config (optional)
|
|
50
|
+
By default PatchVec runs with sensible local defaults. To customize, create `config.yml`:
|
|
51
|
+
```yaml
|
|
52
|
+
vector_store:
|
|
53
|
+
type: default
|
|
54
|
+
embedder:
|
|
55
|
+
type: default
|
|
56
|
+
auth:
|
|
57
|
+
mode: static
|
|
58
|
+
global_key: ${PATCHVEC_GLOBAL_KEY}
|
|
59
|
+
```
|
|
60
|
+
Then export:
|
|
61
|
+
```bash
|
|
62
|
+
export PATCHVEC_CONFIG=./config.yml
|
|
63
|
+
export PATCHVEC_GLOBAL_KEY="your-secret"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## CLI example
|
|
67
|
+
```bash
|
|
68
|
+
pavecli create-collection demo books
|
|
69
|
+
pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k --metadata='{"lang":"en"}'
|
|
70
|
+
pavecli search demo books "captain nemo" -k 5
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## REST example
|
|
74
|
+
```bash
|
|
75
|
+
# Create a collection
|
|
76
|
+
curl -X POST http://localhost:8086/collections/demo/books \
|
|
77
|
+
-H "Authorization: Bearer your-secret"
|
|
78
|
+
|
|
79
|
+
# Upload a TXT document
|
|
80
|
+
curl -X POST http://localhost:8086/collections/demo/books/documents \
|
|
81
|
+
-H "Authorization: Bearer your-secret" \
|
|
82
|
+
-F "file=@demo/20k_leagues.txt" -F "docid=verne-20k" \
|
|
83
|
+
-F 'metadata={"lang":"en"}'
|
|
84
|
+
|
|
85
|
+
# Search (GET, no filters)
|
|
86
|
+
curl -G --data-urlencode "q=hello" \
|
|
87
|
+
-H "Authorization: Bearer your-secret" \
|
|
88
|
+
http://localhost:8086/collections/demo/books/search
|
|
89
|
+
|
|
90
|
+
# Search (POST, with filters)
|
|
91
|
+
curl -X POST http://localhost:8086/collections/demo/books/search \
|
|
92
|
+
-H "Authorization: Bearer your-secret" \
|
|
93
|
+
-H "Content-Type: application/json" \
|
|
94
|
+
-d '{"q":"captain nemo","k":5,"filters":{"docid":"verne-20k"}}'
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## License
|
|
98
|
+
GPL-3.0-or-later — (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
|
|
@@ -2,10 +2,7 @@ include ABOUT.md
|
|
|
2
2
|
include README.md
|
|
3
3
|
include LICENSE
|
|
4
4
|
include config.yml.example
|
|
5
|
-
include requirements.txt
|
|
6
5
|
include requirements-cpu.txt
|
|
7
|
-
include requirements-base.txt
|
|
8
|
-
include requirements-test.txt
|
|
9
6
|
recursive-include pave *.py
|
|
10
7
|
recursive-include scripts *.sh
|
|
11
8
|
recursive-include pave/assets *
|
patchvec-0.5.7/PKG-INFO
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: patchvec
|
|
3
|
+
Version: 0.5.7
|
|
4
|
+
Summary: Patchvec — A lightweight, pluggable vector search microservice.
|
|
5
|
+
Author: Rodrigo Rodrigues da Silva
|
|
6
|
+
Author-email: rodrigopitanga@posteo.net
|
|
7
|
+
License: GPL-3.0-or-later
|
|
8
|
+
Project-URL: Homepage, https://github.com/rodrigopitanga/patchvec/?tab=readme-ov-file#readme-ov-file
|
|
9
|
+
Project-URL: Source, https://github.com/rodrigopitanga/patchvec
|
|
10
|
+
Project-URL: Tracker, https://github.com/rodrigopitanga/patchvec/issues
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Framework :: FastAPI
|
|
20
|
+
Classifier: Topic :: Database
|
|
21
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
22
|
+
Requires-Python: >=3.10,<3.15
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: fastapi>=0.115.0
|
|
26
|
+
Requires-Dist: uvicorn[standard]>=0.30.6
|
|
27
|
+
Requires-Dist: txtai>=6.3.0
|
|
28
|
+
Requires-Dist: pydantic>=2.8.2
|
|
29
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
30
|
+
Requires-Dist: pypdf>=5.0.0
|
|
31
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
32
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
33
|
+
Requires-Dist: faiss-cpu>=1.7.1
|
|
34
|
+
Requires-Dist: torch>=2.10.0
|
|
35
|
+
Provides-Extra: cpu
|
|
36
|
+
Provides-Extra: sbert
|
|
37
|
+
Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
|
|
38
|
+
Provides-Extra: openai
|
|
39
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
40
|
+
Provides-Extra: test
|
|
41
|
+
Requires-Dist: pytest; extra == "test"
|
|
42
|
+
Requires-Dist: httpx; extra == "test"
|
|
43
|
+
Dynamic: author
|
|
44
|
+
Dynamic: author-email
|
|
45
|
+
Dynamic: classifier
|
|
46
|
+
Dynamic: description
|
|
47
|
+
Dynamic: description-content-type
|
|
48
|
+
Dynamic: license
|
|
49
|
+
Dynamic: license-file
|
|
50
|
+
Dynamic: project-url
|
|
51
|
+
Dynamic: provides-extra
|
|
52
|
+
Dynamic: requires-dist
|
|
53
|
+
Dynamic: requires-python
|
|
54
|
+
Dynamic: summary
|
|
55
|
+
|
|
56
|
+
<!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net> -->
|
|
57
|
+
<!-- SPDX-License-Identifier: GPL-3.0-or-later -->
|
|
58
|
+
|
|
59
|
+
# PatchVec — A lightweight, pluggable vector search microservice.
|
|
60
|
+
|
|
61
|
+
Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
62
|
+
|
|
63
|
+
## Highlights
|
|
64
|
+
- Multi-tenant collections: `/collections/{tenant}/{name}`
|
|
65
|
+
- Upload and search TXT, CSV, and PDF
|
|
66
|
+
- Deterministic provenance: every hit returns doc id, page, offset, snippet
|
|
67
|
+
- Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
|
|
68
|
+
- REST and CLI entry points
|
|
69
|
+
- Health/metrics endpoints + Prometheus exporter
|
|
70
|
+
- Pluggable embeddings and stores; default backend is local
|
|
71
|
+
|
|
72
|
+
## Requirements
|
|
73
|
+
- Python 3.10–3.14
|
|
74
|
+
|
|
75
|
+
## Install (PyPI)
|
|
76
|
+
```bash
|
|
77
|
+
python -m venv .venv
|
|
78
|
+
source .venv/bin/activate
|
|
79
|
+
pip install patchvec
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
CPU-only deployments can use the PyTorch CPU wheel index:
|
|
83
|
+
```bash
|
|
84
|
+
pip install "patchvec[cpu]" \
|
|
85
|
+
--index-url https://download.pytorch.org/whl/cpu \
|
|
86
|
+
--extra-index-url https://pypi.org/simple
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Quickstart
|
|
90
|
+
```bash
|
|
91
|
+
# Start the server (installed entry point)
|
|
92
|
+
pavesrv
|
|
93
|
+
|
|
94
|
+
# Or run with uvicorn manually if you prefer:
|
|
95
|
+
uvicorn pave.main:app --host 0.0.0.0 --port 8086
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Auth defaults to `none` only for dev. For production, set static auth:
|
|
99
|
+
```bash
|
|
100
|
+
export PATCHVEC_AUTH__MODE=static
|
|
101
|
+
export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Minimal config (optional)
|
|
105
|
+
By default PatchVec runs with sensible local defaults. To customize, create `config.yml`:
|
|
106
|
+
```yaml
|
|
107
|
+
vector_store:
|
|
108
|
+
type: default
|
|
109
|
+
embedder:
|
|
110
|
+
type: default
|
|
111
|
+
auth:
|
|
112
|
+
mode: static
|
|
113
|
+
global_key: ${PATCHVEC_GLOBAL_KEY}
|
|
114
|
+
```
|
|
115
|
+
Then export:
|
|
116
|
+
```bash
|
|
117
|
+
export PATCHVEC_CONFIG=./config.yml
|
|
118
|
+
export PATCHVEC_GLOBAL_KEY="your-secret"
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## CLI example
|
|
122
|
+
```bash
|
|
123
|
+
pavecli create-collection demo books
|
|
124
|
+
pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k --metadata='{"lang":"en"}'
|
|
125
|
+
pavecli search demo books "captain nemo" -k 5
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## REST example
|
|
129
|
+
```bash
|
|
130
|
+
# Create a collection
|
|
131
|
+
curl -X POST http://localhost:8086/collections/demo/books \
|
|
132
|
+
-H "Authorization: Bearer your-secret"
|
|
133
|
+
|
|
134
|
+
# Upload a TXT document
|
|
135
|
+
curl -X POST http://localhost:8086/collections/demo/books/documents \
|
|
136
|
+
-H "Authorization: Bearer your-secret" \
|
|
137
|
+
-F "file=@demo/20k_leagues.txt" -F "docid=verne-20k" \
|
|
138
|
+
-F 'metadata={"lang":"en"}'
|
|
139
|
+
|
|
140
|
+
# Search (GET, no filters)
|
|
141
|
+
curl -G --data-urlencode "q=hello" \
|
|
142
|
+
-H "Authorization: Bearer your-secret" \
|
|
143
|
+
http://localhost:8086/collections/demo/books/search
|
|
144
|
+
|
|
145
|
+
# Search (POST, with filters)
|
|
146
|
+
curl -X POST http://localhost:8086/collections/demo/books/search \
|
|
147
|
+
-H "Authorization: Bearer your-secret" \
|
|
148
|
+
-H "Content-Type: application/json" \
|
|
149
|
+
-d '{"q":"captain nemo","k":5,"filters":{"docid":"verne-20k"}}'
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## License
|
|
153
|
+
GPL-3.0-or-later — (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
|
patchvec-0.5.7/README.md
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
<!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net> -->
|
|
2
|
+
<!-- SPDX-License-Identifier: GPL-3.0-or-later -->
|
|
3
|
+
|
|
4
|
+
# 🍰 PatchVec — Lightweight, Pluggable Vector Search Microservice
|
|
5
|
+
|
|
6
|
+
Patchvec is a compact vector store built for people who want provenance and fast iteration on RAG plumbing. No black boxes, no hidden pipelines: every chunk records document id, page, and byte offsets, and you can swap embeddings or storage backends per collection.
|
|
7
|
+
|
|
8
|
+
## ⚙️ Core capabilities
|
|
9
|
+
|
|
10
|
+
- **Docker images** — prebuilt CPU/GPU images published to the GitLab Container Registry.
|
|
11
|
+
- **Tenants and collections** — isolation by tenant with per-collection configuration.
|
|
12
|
+
- **Pluggable embeddings** — choose the embedding adapter per collection; wire in local or hosted models.
|
|
13
|
+
- **REST and CLI** — production use over HTTP, quick experiments with the bundled CLI.
|
|
14
|
+
- **Deterministic provenance** — every hit returns doc id, page, offset, and snippet for traceability.
|
|
15
|
+
|
|
16
|
+
## 🧭 Workflows
|
|
17
|
+
|
|
18
|
+
### 🐳 Docker workflow (prebuilt images)
|
|
19
|
+
|
|
20
|
+
Pull the image that fits your hardware from the [https://gitlab.com/flowlexi](Flowlexi) Container Registry on Gitlab (CUDA builds publish as `latest-gpu`, CPU-only as `latest-cpu`).
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
docker pull registry.gitlab.com/flowlexi/patchvec/patchvec:latest-gpu
|
|
24
|
+
docker pull registry.gitlab.com/flowlexi/patchvec/patchvec:latest-cpu
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Run the service by choosing the tag you need and mapping the API port locally:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
docker run -d --name patchvec \
|
|
31
|
+
-p 8086:8086 \
|
|
32
|
+
registry.gitlab.com/flowlexi/patchvec/patchvec:latest-cpu
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Use the bundled CLI inside the container to create a tenant/collection, ingest a demo document, and query it:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
docker exec patchvec pavecli create-collection demo books
|
|
39
|
+
docker exec patchvec pavecli upload demo books /app/demo/20k_leagues.txt \
|
|
40
|
+
--docid=verne-20k --metadata='{"lang":"en"}'
|
|
41
|
+
docker exec patchvec pavecli search demo books "captain nemo" -k 3
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
See below for REST and UI.
|
|
45
|
+
|
|
46
|
+
Stop the container when you are done:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
docker rm -f patchvec
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### 🐍 PyPI workflow
|
|
53
|
+
|
|
54
|
+
Install Patchvec from PyPI inside an isolated virtual environment and point it at a local configuration directory.
|
|
55
|
+
|
|
56
|
+
**Requires Python 3.10–3.14.**
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
mkdir -p ~/pv && cd ~/pv #or wherever
|
|
60
|
+
python -m venv .venv-pv
|
|
61
|
+
source .venv-pv/bin/activate
|
|
62
|
+
python -m pip install --upgrade pip
|
|
63
|
+
pip install "patchvec[cpu]"
|
|
64
|
+
|
|
65
|
+
# grab the default configs
|
|
66
|
+
curl -LO https://raw.githubusercontent.com/patchvec/patchvec/main/config.yml.example
|
|
67
|
+
curl -LO https://raw.githubusercontent.com/patchvec/patchvec/main/tenants.yml.example
|
|
68
|
+
cp config.yml.example config.yml
|
|
69
|
+
cp tenants.yml.example tenants.yml
|
|
70
|
+
|
|
71
|
+
# sample demo corpus
|
|
72
|
+
curl -LO https://raw.githubusercontent.com/patchvec/patchvec/main/demo/20k_leagues.txt
|
|
73
|
+
|
|
74
|
+
# point Patchvec at the config directory and set a local admin key
|
|
75
|
+
export PATCHVEC_CONFIG="$HOME/pv/config.yml"
|
|
76
|
+
export PATCHVEC_GLOBAL_KEY=super-sekret
|
|
77
|
+
|
|
78
|
+
# option A: run the service (stays up until you stop it)
|
|
79
|
+
pavesrv
|
|
80
|
+
|
|
81
|
+
# option B: operate entirely via the CLI (no server needed)
|
|
82
|
+
pavecli create-collection demo books
|
|
83
|
+
pavecli upload demo books 20k_leagues.txt --docid=verne-20k --metadata='{"lang":"en"}'
|
|
84
|
+
pavecli search demo books "captain nemo" -k 3
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
> **CPU-only deployments:** The command above pulls the default PyTorch wheel
|
|
88
|
+
> from PyPI, which includes CUDA support (~2 GB). For a leaner, CPU-only torch
|
|
89
|
+
> install, point pip at the PyTorch CPU index:
|
|
90
|
+
>
|
|
91
|
+
> ```bash
|
|
92
|
+
> pip install "patchvec[cpu]" \
|
|
93
|
+
> --index-url https://download.pytorch.org/whl/cpu \
|
|
94
|
+
> --extra-index-url https://pypi.org/simple
|
|
95
|
+
> ```
|
|
96
|
+
|
|
97
|
+
Deactivate the virtual environment with `deactivate` when finished.
|
|
98
|
+
|
|
99
|
+
### 🌐 REST API and Web UI usage
|
|
100
|
+
|
|
101
|
+
When the server is running (either via Docker or `pavesrv`), the API listens on `http://localhost:8086`. The following `curl` commands mirror the CLI sequence above—adjust the file path to wherever you stored the corpus (`/app/demo/20k_leagues.txt` in Docker, `~/pv/20k_leagues.txt` for PyPI installs) and reuse the bearer token exported earlier:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# create collection
|
|
105
|
+
curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
106
|
+
-X POST http://localhost:8086/collections/demo/books
|
|
107
|
+
|
|
108
|
+
# ingest document
|
|
109
|
+
curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
110
|
+
-X POST http://localhost:8086/collections/demo/books/documents \
|
|
111
|
+
-F "file=@20k_leagues.txt" \
|
|
112
|
+
-F 'metadata={"lang":"en"}'
|
|
113
|
+
|
|
114
|
+
# run search
|
|
115
|
+
curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
116
|
+
"http://localhost:8086/collections/demo/books/search?q=captain+nemo&k=3"
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
There is a simple Swagger UI available at the root of the server. Just point your browser to `http://localhost:8086/`
|
|
120
|
+
|
|
121
|
+
Health and metrics endpoints are available at `/health` and `/metrics`.
|
|
122
|
+
|
|
123
|
+
Configuration files copied in either workflow can be customised. Runtime options are also accepted via the `PATCHVEC_*` environment variable scheme (`PATCHVEC_SERVER__PORT`, `PATCHVEC_AUTH__MODE`, etc.), which precedes conf files.
|
|
124
|
+
|
|
125
|
+
### 🔁 Live data updates
|
|
126
|
+
|
|
127
|
+
Patchvec supports live data refresh without restarting the server. Re-upload the same `docid` to *replace* vector content (filename doesn't matter - metadata will change though), or explicitly delete the document and then ingest it again.
|
|
128
|
+
|
|
129
|
+
CLI (re-upload to replace):
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k
|
|
133
|
+
cp demo/20k_leagues.txt demo/20k_leagues_mod.txt
|
|
134
|
+
echo "THE END" >> demo/20k_leagues_mod.txt
|
|
135
|
+
pavecli upload demo books 20k_leagues.txt --docid=verne-20k
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
REST (delete then ingest):
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
142
|
+
-X DELETE http://localhost:8086/collections/demo/books/documents/verne-20k
|
|
143
|
+
|
|
144
|
+
# make changes
|
|
145
|
+
|
|
146
|
+
curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
|
|
147
|
+
-X POST http://localhost:8086/collections/demo/books/documents \
|
|
148
|
+
-F "file=@demo/20k_leagues.txt" \
|
|
149
|
+
-F 'docid=verne-20k'
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### 🛠️ Developer workflow
|
|
153
|
+
|
|
154
|
+
Building from source relies on the `Makefile` shortcuts (`make install-dev`, `USE_CPU=1 make serve`, `make test`, etc.). The full contributor workflow, target reference, and task claiming rules live in [CONTRIBUTING.md](CONTRIBUTING.md). Performance benchmarks are documented in [README-benchmarks.md](README-benchmarks.md).
|
|
155
|
+
|
|
156
|
+
## 🗺️ Roadmap
|
|
157
|
+
|
|
158
|
+
Short & mid-term chores are tracked in [`ROADMAP.md`](ROADMAP.md). Pick one, open an issue titled `claim: <task>`, and ship a patch.
|
|
159
|
+
|
|
160
|
+
## 📜 License
|
|
161
|
+
|
|
162
|
+
GPL-3.0-or-later — (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: patchvec
|
|
3
|
+
Version: 0.5.7
|
|
4
|
+
Summary: Patchvec — A lightweight, pluggable vector search microservice.
|
|
5
|
+
Author: Rodrigo Rodrigues da Silva
|
|
6
|
+
Author-email: rodrigopitanga@posteo.net
|
|
7
|
+
License: GPL-3.0-or-later
|
|
8
|
+
Project-URL: Homepage, https://github.com/rodrigopitanga/patchvec/?tab=readme-ov-file#readme-ov-file
|
|
9
|
+
Project-URL: Source, https://github.com/rodrigopitanga/patchvec
|
|
10
|
+
Project-URL: Tracker, https://github.com/rodrigopitanga/patchvec/issues
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Framework :: FastAPI
|
|
20
|
+
Classifier: Topic :: Database
|
|
21
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
22
|
+
Requires-Python: >=3.10,<3.15
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: fastapi>=0.115.0
|
|
26
|
+
Requires-Dist: uvicorn[standard]>=0.30.6
|
|
27
|
+
Requires-Dist: txtai>=6.3.0
|
|
28
|
+
Requires-Dist: pydantic>=2.8.2
|
|
29
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
30
|
+
Requires-Dist: pypdf>=5.0.0
|
|
31
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
32
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
33
|
+
Requires-Dist: faiss-cpu>=1.7.1
|
|
34
|
+
Requires-Dist: torch>=2.10.0
|
|
35
|
+
Provides-Extra: cpu
|
|
36
|
+
Provides-Extra: sbert
|
|
37
|
+
Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
|
|
38
|
+
Provides-Extra: openai
|
|
39
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
40
|
+
Provides-Extra: test
|
|
41
|
+
Requires-Dist: pytest; extra == "test"
|
|
42
|
+
Requires-Dist: httpx; extra == "test"
|
|
43
|
+
Dynamic: author
|
|
44
|
+
Dynamic: author-email
|
|
45
|
+
Dynamic: classifier
|
|
46
|
+
Dynamic: description
|
|
47
|
+
Dynamic: description-content-type
|
|
48
|
+
Dynamic: license
|
|
49
|
+
Dynamic: license-file
|
|
50
|
+
Dynamic: project-url
|
|
51
|
+
Dynamic: provides-extra
|
|
52
|
+
Dynamic: requires-dist
|
|
53
|
+
Dynamic: requires-python
|
|
54
|
+
Dynamic: summary
|
|
55
|
+
|
|
56
|
+
<!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net> -->
|
|
57
|
+
<!-- SPDX-License-Identifier: GPL-3.0-or-later -->
|
|
58
|
+
|
|
59
|
+
# PatchVec — A lightweight, pluggable vector search microservice.
|
|
60
|
+
|
|
61
|
+
Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
62
|
+
|
|
63
|
+
## Highlights
|
|
64
|
+
- Multi-tenant collections: `/collections/{tenant}/{name}`
|
|
65
|
+
- Upload and search TXT, CSV, and PDF
|
|
66
|
+
- Deterministic provenance: every hit returns doc id, page, offset, snippet
|
|
67
|
+
- Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
|
|
68
|
+
- REST and CLI entry points
|
|
69
|
+
- Health/metrics endpoints + Prometheus exporter
|
|
70
|
+
- Pluggable embeddings and stores; default backend is local
|
|
71
|
+
|
|
72
|
+
## Requirements
|
|
73
|
+
- Python 3.10–3.14
|
|
74
|
+
|
|
75
|
+
## Install (PyPI)
|
|
76
|
+
```bash
|
|
77
|
+
python -m venv .venv
|
|
78
|
+
source .venv/bin/activate
|
|
79
|
+
pip install patchvec
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
CPU-only deployments can use the PyTorch CPU wheel index:
|
|
83
|
+
```bash
|
|
84
|
+
pip install "patchvec[cpu]" \
|
|
85
|
+
--index-url https://download.pytorch.org/whl/cpu \
|
|
86
|
+
--extra-index-url https://pypi.org/simple
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Quickstart
|
|
90
|
+
```bash
|
|
91
|
+
# Start the server (installed entry point)
|
|
92
|
+
pavesrv
|
|
93
|
+
|
|
94
|
+
# Or run with uvicorn manually if you prefer:
|
|
95
|
+
uvicorn pave.main:app --host 0.0.0.0 --port 8086
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Auth defaults to `none` only for dev. For production, set static auth:
|
|
99
|
+
```bash
|
|
100
|
+
export PATCHVEC_AUTH__MODE=static
|
|
101
|
+
export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Minimal config (optional)
|
|
105
|
+
By default PatchVec runs with sensible local defaults. To customize, create `config.yml`:
|
|
106
|
+
```yaml
|
|
107
|
+
vector_store:
|
|
108
|
+
type: default
|
|
109
|
+
embedder:
|
|
110
|
+
type: default
|
|
111
|
+
auth:
|
|
112
|
+
mode: static
|
|
113
|
+
global_key: ${PATCHVEC_GLOBAL_KEY}
|
|
114
|
+
```
|
|
115
|
+
Then export:
|
|
116
|
+
```bash
|
|
117
|
+
export PATCHVEC_CONFIG=./config.yml
|
|
118
|
+
export PATCHVEC_GLOBAL_KEY="your-secret"
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## CLI example
|
|
122
|
+
```bash
|
|
123
|
+
pavecli create-collection demo books
|
|
124
|
+
pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k --metadata='{"lang":"en"}'
|
|
125
|
+
pavecli search demo books "captain nemo" -k 5
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## REST example
|
|
129
|
+
```bash
|
|
130
|
+
# Create a collection
|
|
131
|
+
curl -X POST http://localhost:8086/collections/demo/books \
|
|
132
|
+
-H "Authorization: Bearer your-secret"
|
|
133
|
+
|
|
134
|
+
# Upload a TXT document
|
|
135
|
+
curl -X POST http://localhost:8086/collections/demo/books/documents \
|
|
136
|
+
-H "Authorization: Bearer your-secret" \
|
|
137
|
+
-F "file=@demo/20k_leagues.txt" -F "docid=verne-20k" \
|
|
138
|
+
-F 'metadata={"lang":"en"}'
|
|
139
|
+
|
|
140
|
+
# Search (GET, no filters)
|
|
141
|
+
curl -G --data-urlencode "q=hello" \
|
|
142
|
+
-H "Authorization: Bearer your-secret" \
|
|
143
|
+
http://localhost:8086/collections/demo/books/search
|
|
144
|
+
|
|
145
|
+
# Search (POST, with filters)
|
|
146
|
+
curl -X POST http://localhost:8086/collections/demo/books/search \
|
|
147
|
+
-H "Authorization: Bearer your-secret" \
|
|
148
|
+
-H "Content-Type: application/json" \
|
|
149
|
+
-d '{"q":"captain nemo","k":5,"filters":{"docid":"verne-20k"}}'
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## License
|
|
153
|
+
GPL-3.0-or-later — (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
|
|
@@ -3,10 +3,7 @@ LICENSE
|
|
|
3
3
|
MANIFEST.in
|
|
4
4
|
README.md
|
|
5
5
|
config.yml.example
|
|
6
|
-
requirements-base.txt
|
|
7
6
|
requirements-cpu.txt
|
|
8
|
-
requirements-test.txt
|
|
9
|
-
requirements.txt
|
|
10
7
|
setup.py
|
|
11
8
|
patchvec.egg-info/PKG-INFO
|
|
12
9
|
patchvec.egg-info/SOURCES.txt
|
|
@@ -23,6 +20,7 @@ pave/metrics.py
|
|
|
23
20
|
pave/preprocess.py
|
|
24
21
|
pave/service.py
|
|
25
22
|
pave/ui.py
|
|
23
|
+
pave/assets/__init__.py
|
|
26
24
|
pave/assets/patchvec_icon_192.png
|
|
27
25
|
pave/assets/ui.html
|
|
28
26
|
pave/embedders/__init__.py
|
|
@@ -36,18 +34,21 @@ pave/stores/base.py
|
|
|
36
34
|
pave/stores/factory.py
|
|
37
35
|
pave/stores/qdrant_store.py
|
|
38
36
|
pave/stores/txtai_store.py
|
|
39
|
-
scripts/release.sh
|
|
40
37
|
tests/test_auth.py
|
|
41
38
|
tests/test_cli.py
|
|
42
39
|
tests/test_collections.py
|
|
43
40
|
tests/test_config_runtime.py
|
|
44
41
|
tests/test_csv_ingest.py
|
|
42
|
+
tests/test_data_export.py
|
|
43
|
+
tests/test_delete_document.py
|
|
45
44
|
tests/test_docid_default.py
|
|
46
45
|
tests/test_health.py
|
|
47
46
|
tests/test_metrics.py
|
|
47
|
+
tests/test_request_id.py
|
|
48
48
|
tests/test_txtai_concurrent_upsert.py
|
|
49
49
|
tests/test_txtai_store.py
|
|
50
50
|
tests/test_txtai_store_filters.py
|
|
51
|
+
tests/test_txtai_store_sql_safety.py
|
|
51
52
|
tests/test_ui.py
|
|
52
53
|
tests/test_upload_search_csv.py
|
|
53
54
|
tests/test_upload_search_pdf.py
|
|
@@ -6,6 +6,17 @@ python-multipart>=0.0.9
|
|
|
6
6
|
pypdf>=5.0.0
|
|
7
7
|
pyyaml>=6.0.2
|
|
8
8
|
python-dotenv>=1.0.1
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
faiss-cpu>=1.7.1
|
|
10
|
+
torch>=2.10.0
|
|
11
|
+
|
|
12
|
+
[cpu]
|
|
13
|
+
|
|
14
|
+
[openai]
|
|
11
15
|
openai>=1.0.0
|
|
16
|
+
|
|
17
|
+
[sbert]
|
|
18
|
+
sentence-transformers>=2.7.0
|
|
19
|
+
|
|
20
|
+
[test]
|
|
21
|
+
pytest
|
|
22
|
+
httpx
|
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
<!doctype html>
|
|
2
|
+
<!-- (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net> -->
|
|
3
|
+
<!-- SPDX-License-Identifier: GPL-3.0-or-later -->
|
|
4
|
+
|
|
2
5
|
<html lang="en">
|
|
3
6
|
<head>
|
|
4
7
|
<meta charset="utf-8" />
|
|
@@ -63,7 +66,7 @@
|
|
|
63
66
|
</div>
|
|
64
67
|
|
|
65
68
|
<div class="footer">
|
|
66
|
-
© 2025 <a href="https://flowlexi.com" target="_blank" rel="noopener">Flowlexi</a> •
|
|
69
|
+
© 2025, 2026 <a href="https://flowlexi.com" target="_blank" rel="noopener">Flowlexi</a> •
|
|
67
70
|
<a href="__REPO_URL__" target="_blank" rel="noopener">🍰 patchvec __VERSION__</a> •
|
|
68
71
|
<a href="__LICENSE_URL__" target="_blank" rel="noopener">__LICENSE_NAME__</a>
|
|
69
72
|
</div>
|