patchvec 0.5.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patchvec-0.5.6/ABOUT.md +69 -0
- patchvec-0.5.6/LICENSE +9 -0
- patchvec-0.5.6/MANIFEST.in +12 -0
- patchvec-0.5.6/PKG-INFO +115 -0
- patchvec-0.5.6/README.md +87 -0
- patchvec-0.5.6/config.yml.example +69 -0
- patchvec-0.5.6/patchvec.egg-info/PKG-INFO +115 -0
- patchvec-0.5.6/patchvec.egg-info/SOURCES.txt +54 -0
- patchvec-0.5.6/patchvec.egg-info/dependency_links.txt +1 -0
- patchvec-0.5.6/patchvec.egg-info/entry_points.txt +3 -0
- patchvec-0.5.6/patchvec.egg-info/requires.txt +11 -0
- patchvec-0.5.6/patchvec.egg-info/top_level.txt +1 -0
- patchvec-0.5.6/pave/__init__.py +6 -0
- patchvec-0.5.6/pave/assets/patchvec_icon_192.png +0 -0
- patchvec-0.5.6/pave/assets/ui.html +125 -0
- patchvec-0.5.6/pave/auth.py +108 -0
- patchvec-0.5.6/pave/cli.py +97 -0
- patchvec-0.5.6/pave/config.py +240 -0
- patchvec-0.5.6/pave/embedders/__init__.py +1 -0
- patchvec-0.5.6/pave/embedders/base.py +12 -0
- patchvec-0.5.6/pave/embedders/factory.py +21 -0
- patchvec-0.5.6/pave/embedders/openai_emb.py +30 -0
- patchvec-0.5.6/pave/embedders/sbert_emb.py +24 -0
- patchvec-0.5.6/pave/embedders/txtai_emb.py +58 -0
- patchvec-0.5.6/pave/main.py +303 -0
- patchvec-0.5.6/pave/metrics.py +52 -0
- patchvec-0.5.6/pave/preprocess.py +151 -0
- patchvec-0.5.6/pave/service.py +92 -0
- patchvec-0.5.6/pave/stores/__init__.py +1 -0
- patchvec-0.5.6/pave/stores/base.py +33 -0
- patchvec-0.5.6/pave/stores/factory.py +18 -0
- patchvec-0.5.6/pave/stores/qdrant_store.py +26 -0
- patchvec-0.5.6/pave/stores/txtai_store.py +445 -0
- patchvec-0.5.6/pave/ui.py +175 -0
- patchvec-0.5.6/requirements-base.txt +1 -0
- patchvec-0.5.6/requirements-cpu.txt +6 -0
- patchvec-0.5.6/requirements-test.txt +5 -0
- patchvec-0.5.6/requirements.txt +11 -0
- patchvec-0.5.6/scripts/release.sh +89 -0
- patchvec-0.5.6/setup.cfg +4 -0
- patchvec-0.5.6/setup.py +67 -0
- patchvec-0.5.6/tests/test_auth.py +79 -0
- patchvec-0.5.6/tests/test_cli.py +45 -0
- patchvec-0.5.6/tests/test_collections.py +8 -0
- patchvec-0.5.6/tests/test_config_runtime.py +41 -0
- patchvec-0.5.6/tests/test_csv_ingest.py +75 -0
- patchvec-0.5.6/tests/test_docid_default.py +41 -0
- patchvec-0.5.6/tests/test_health.py +32 -0
- patchvec-0.5.6/tests/test_metrics.py +35 -0
- patchvec-0.5.6/tests/test_txtai_concurrent_upsert.py +76 -0
- patchvec-0.5.6/tests/test_txtai_store.py +126 -0
- patchvec-0.5.6/tests/test_txtai_store_filters.py +112 -0
- patchvec-0.5.6/tests/test_ui.py +33 -0
- patchvec-0.5.6/tests/test_upload_search_csv.py +36 -0
- patchvec-0.5.6/tests/test_upload_search_pdf.py +37 -0
- patchvec-0.5.6/tests/test_upload_search_txt.py +119 -0
patchvec-0.5.6/ABOUT.md
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# PatchVec — A lightweight, pluggable vector search microservice.
|
|
2
|
+
|
|
3
|
+
Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
4
|
+
|
|
5
|
+
## Highlights
|
|
6
|
+
- Multi-tenant collections: `/collections/{tenant}/{name}`
|
|
7
|
+
- Upload and search **TXT**, **CSV**, and **PDF**
|
|
8
|
+
- Chunking per format:
|
|
9
|
+
- PDF → 1 chunk/page
|
|
10
|
+
- TXT → configurable chunk size + overlap
|
|
11
|
+
- CSV → 1 chunk/row
|
|
12
|
+
- Metadata filters on POST search (`{"filters": {"docid": "DOC-1"}}`)
|
|
13
|
+
- Health, metrics, and Prometheus endpoints
|
|
14
|
+
- Configurable auth modes: `none` or `static` (Bearer)
|
|
15
|
+
- Default backends are local (vendor-neutral "default"); embedders & stores are pluggable
|
|
16
|
+
|
|
17
|
+
## Requirements
|
|
18
|
+
- Python 3.10+
|
|
19
|
+
|
|
20
|
+
## Install
|
|
21
|
+
```bash
|
|
22
|
+
python -m venv .venv
|
|
23
|
+
source .venv/bin/activate
|
|
24
|
+
pip install patchvec
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
> CPU-only by default. If you have a CUDA setup and want GPU-accelerated deps, install the GPU-enabled packages in your environment before running PatchVec.
|
|
28
|
+
|
|
29
|
+
## Quickstart
|
|
30
|
+
```bash
|
|
31
|
+
# Start the server (installed entry point)
|
|
32
|
+
pavesrv
|
|
33
|
+
|
|
34
|
+
# Or run with uvicorn manually if you prefer:
|
|
35
|
+
uvicorn pave.main:app --host 0.0.0.0 --port 8080
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Minimal config (optional)
|
|
39
|
+
By default PatchVec runs with sensible local defaults. To customize, create `config.yml` and set:
|
|
40
|
+
```yaml
|
|
41
|
+
vector_store:
|
|
42
|
+
type: default
|
|
43
|
+
embedder:
|
|
44
|
+
type: default
|
|
45
|
+
auth:
|
|
46
|
+
mode: none # or 'static' with per-tenant Bearer keys
|
|
47
|
+
```
|
|
48
|
+
Then export:
|
|
49
|
+
```bash
|
|
50
|
+
export PATCHVEC_CONFIG=./config.yml
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## REST example
|
|
54
|
+
```bash
|
|
55
|
+
# Create a collection
|
|
56
|
+
curl -X POST http://localhost:8080/collections/acme/docs
|
|
57
|
+
|
|
58
|
+
# Upload a TXT document
|
|
59
|
+
curl -X POST http://localhost:8080/collections/acme/docs/documents -F "file=@sample.txt" -F "docid=DOC1"
|
|
60
|
+
|
|
61
|
+
# Search (GET, no filters)
|
|
62
|
+
curl -G --data-urlencode "q=hello" http://localhost:8080/collections/acme/docs/search
|
|
63
|
+
|
|
64
|
+
# Search (POST, with filters)
|
|
65
|
+
curl -X POST http://localhost:8080/collections/acme/docs/search -H "Content-Type: application/json" -d '{"q":"hello","k":5,"filters":{"docid":"DOC1"}}'
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## License
|
|
69
|
+
GPL-3.0-or-later — (C) 2025 Rodrigo Rodrigues da Silva
|
patchvec-0.5.6/LICENSE
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
GNU GENERAL PUBLIC LICENSE
|
|
2
|
+
Version 3, 29 June 2007
|
|
3
|
+
|
|
4
|
+
Copyright (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
|
|
5
|
+
|
|
6
|
+
Everyone is permitted to copy and distribute verbatim copies
|
|
7
|
+
of this license document, but changing it is not allowed.
|
|
8
|
+
|
|
9
|
+
Full text: https://www.gnu.org/licenses/gpl-3.0.txt
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
include ABOUT.md
|
|
2
|
+
include README.md
|
|
3
|
+
include LICENSE
|
|
4
|
+
include config.yml.example
|
|
5
|
+
include requirements.txt
|
|
6
|
+
include requirements-cpu.txt
|
|
7
|
+
include requirements-base.txt
|
|
8
|
+
include requirements-test.txt
|
|
9
|
+
recursive-include pave *.py
|
|
10
|
+
recursive-include scripts *.sh
|
|
11
|
+
recursive-include pave/assets *
|
|
12
|
+
global-exclude __pycache__ *.py[cod] *.so
|
patchvec-0.5.6/PKG-INFO
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: patchvec
|
|
3
|
+
Version: 0.5.6
|
|
4
|
+
Summary: Patchvec — A lightweight, pluggable vector search microservice.
|
|
5
|
+
Author: Rodrigo Rodrigues da Silva
|
|
6
|
+
Author-email: rodrigopitanga@posteo.net
|
|
7
|
+
License: GPL-3.0-or-later
|
|
8
|
+
Project-URL: Homepage, https://gitlab.com/flowlexi/patchvec
|
|
9
|
+
Project-URL: Source, https://gitlab.com/flowlexi/patchvec
|
|
10
|
+
Project-URL: Tracker, https://gitlab.com/flowlexi/patchvec/issues
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Framework :: FastAPI
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: fastapi>=0.115.0
|
|
25
|
+
Requires-Dist: uvicorn[standard]>=0.30.6
|
|
26
|
+
Requires-Dist: txtai>=6.3.0
|
|
27
|
+
Requires-Dist: pydantic>=2.8.2
|
|
28
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
29
|
+
Requires-Dist: pypdf>=5.0.0
|
|
30
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
31
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
32
|
+
Requires-Dist: qdrant-client>=1.9.2
|
|
33
|
+
Requires-Dist: sentence-transformers>=2.7.0
|
|
34
|
+
Requires-Dist: openai>=1.0.0
|
|
35
|
+
Dynamic: author
|
|
36
|
+
Dynamic: author-email
|
|
37
|
+
Dynamic: classifier
|
|
38
|
+
Dynamic: description
|
|
39
|
+
Dynamic: description-content-type
|
|
40
|
+
Dynamic: license
|
|
41
|
+
Dynamic: license-file
|
|
42
|
+
Dynamic: project-url
|
|
43
|
+
Dynamic: requires-dist
|
|
44
|
+
Dynamic: requires-python
|
|
45
|
+
Dynamic: summary
|
|
46
|
+
|
|
47
|
+
# PatchVec — A lightweight, pluggable vector search microservice.
|
|
48
|
+
|
|
49
|
+
Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
50
|
+
|
|
51
|
+
## Highlights
|
|
52
|
+
- Multi-tenant collections: `/collections/{tenant}/{name}`
|
|
53
|
+
- Upload and search **TXT**, **CSV**, and **PDF**
|
|
54
|
+
- Chunking per format:
|
|
55
|
+
- PDF → 1 chunk/page
|
|
56
|
+
- TXT → configurable chunk size + overlap
|
|
57
|
+
- CSV → 1 chunk/row
|
|
58
|
+
- Metadata filters on POST search (`{"filters": {"docid": "DOC-1"}}`)
|
|
59
|
+
- Health, metrics, and Prometheus endpoints
|
|
60
|
+
- Configurable auth modes: `none` or `static` (Bearer)
|
|
61
|
+
- Default backends are local (vendor-neutral "default"); embedders & stores are pluggable
|
|
62
|
+
|
|
63
|
+
## Requirements
|
|
64
|
+
- Python 3.10+
|
|
65
|
+
|
|
66
|
+
## Install
|
|
67
|
+
```bash
|
|
68
|
+
python -m venv .venv
|
|
69
|
+
source .venv/bin/activate
|
|
70
|
+
pip install patchvec
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
> CPU-only by default. If you have a CUDA setup and want GPU-accelerated deps, install the GPU-enabled packages in your environment before running PatchVec.
|
|
74
|
+
|
|
75
|
+
## Quickstart
|
|
76
|
+
```bash
|
|
77
|
+
# Start the server (installed entry point)
|
|
78
|
+
pavesrv
|
|
79
|
+
|
|
80
|
+
# Or run with uvicorn manually if you prefer:
|
|
81
|
+
uvicorn pave.main:app --host 0.0.0.0 --port 8080
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Minimal config (optional)
|
|
85
|
+
By default PatchVec runs with sensible local defaults. To customize, create `config.yml` and set:
|
|
86
|
+
```yaml
|
|
87
|
+
vector_store:
|
|
88
|
+
type: default
|
|
89
|
+
embedder:
|
|
90
|
+
type: default
|
|
91
|
+
auth:
|
|
92
|
+
mode: none # or 'static' with per-tenant Bearer keys
|
|
93
|
+
```
|
|
94
|
+
Then export:
|
|
95
|
+
```bash
|
|
96
|
+
export PATCHVEC_CONFIG=./config.yml
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## REST example
|
|
100
|
+
```bash
|
|
101
|
+
# Create a collection
|
|
102
|
+
curl -X POST http://localhost:8080/collections/acme/docs
|
|
103
|
+
|
|
104
|
+
# Upload a TXT document
|
|
105
|
+
curl -X POST http://localhost:8080/collections/acme/docs/documents -F "file=@sample.txt" -F "docid=DOC1"
|
|
106
|
+
|
|
107
|
+
# Search (GET, no filters)
|
|
108
|
+
curl -G --data-urlencode "q=hello" http://localhost:8080/collections/acme/docs/search
|
|
109
|
+
|
|
110
|
+
# Search (POST, with filters)
|
|
111
|
+
curl -X POST http://localhost:8080/collections/acme/docs/search -H "Content-Type: application/json" -d '{"q":"hello","k":5,"filters":{"docid":"DOC1"}}'
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## License
|
|
115
|
+
GPL-3.0-or-later — (C) 2025 Rodrigo Rodrigues da Silva
|
patchvec-0.5.6/README.md
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# 🍰 PatchVec — Lightweight, Pluggable Vector Search Microservice
|
|
2
|
+
|
|
3
|
+
Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 🚀 Quickstart
|
|
8
|
+
|
|
9
|
+
### 🖥️ CPU-only Dev (default)
|
|
10
|
+
```bash
|
|
11
|
+
python -m venv .venv
|
|
12
|
+
source .venv/bin/activate
|
|
13
|
+
python -m pip install --upgrade pip
|
|
14
|
+
pip install -r requirements-cpu.txt
|
|
15
|
+
./pavesrv.sh
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
### 📦 Install from PyPI
|
|
19
|
+
```bash
|
|
20
|
+
python -m venv .venv
|
|
21
|
+
source .venv/bin/activate
|
|
22
|
+
python -m pip install --upgrade pip
|
|
23
|
+
pip install patchvec[cpu] # or patchvec[gpu] for CUDA
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### ▶️ Run the Server
|
|
27
|
+
From source:
|
|
28
|
+
```bash
|
|
29
|
+
./pavesrv.sh
|
|
30
|
+
```
|
|
31
|
+
From PyPI:
|
|
32
|
+
```bash
|
|
33
|
+
pavesrv
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### ⚙️ Minimal Config
|
|
37
|
+
For production (static auth), set env vars (do not commit secrets):
|
|
38
|
+
```env
|
|
39
|
+
PATCHVEC_AUTH__MODE=static
|
|
40
|
+
PATCHVEC_AUTH__GLOBAL_KEY=sekret-passwod
|
|
41
|
+
```
|
|
42
|
+
(Optional: copy `config.yml.example` to an untracked `config.yml` and tweak as needed)
|
|
43
|
+
(Tip: use an untracked `tenants.yml` and point `auth.tenants_file` to it in `config.yml`.)
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## 🔧 Overriding Server Settings (uvicorn)
|
|
48
|
+
You can override a few server knobs via environment variables:
|
|
49
|
+
```bash
|
|
50
|
+
HOST=127.0.0.1 PORT=9000 RELOAD=1 WORKERS=4 LOG_LEVEL=debug pavesrv
|
|
51
|
+
```
|
|
52
|
+
> Note: Full configuration uses the `PATCHVEC_...` env scheme (e.g., `PATCHVEC_SERVER__PORT=9000`).
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## 🌐 REST API Examples
|
|
57
|
+
|
|
58
|
+
**Create a collection**
|
|
59
|
+
```bash
|
|
60
|
+
curl -X POST "http://localhost:8086/collections/acme/invoices"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**Upload a TXT/PDF/CSV document**
|
|
64
|
+
```bash
|
|
65
|
+
curl -X POST "http://localhost:8086/collections/acme/invoices/documents" -F "file=@sample.txt" -F "docid=DOC-1" -F 'metadata={"lang":"pt"}'
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Search (GET, no filters)**
|
|
69
|
+
```bash
|
|
70
|
+
curl "http://localhost:8086/collections/acme/invoices/search?q=garantia&k=5"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
**Search (POST with filters)**
|
|
74
|
+
```bash
|
|
75
|
+
curl -X POST "http://localhost:8086/collections/acme/invoices/search" -H "Content-Type: application/json" -d '{"q": "garantia", "k": 5, "filters": {"docid": "DOC-1"}}'
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Health / Metrics**
|
|
79
|
+
```bash
|
|
80
|
+
curl "http://localhost:8086/health"
|
|
81
|
+
curl "http://localhost:8086/metrics"
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## 📜 License
|
|
87
|
+
GPL-3.0-or-later — (C) 2025 Rodrigo Rodrigues da Silva
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# Patchvec sample configuration (commit this file as config.yml.example)
|
|
2
|
+
# Copy to config.yml and adjust. Secrets should live in a separate, untracked file (see tenants.yml.example).
|
|
3
|
+
|
|
4
|
+
data_dir: ./data
|
|
5
|
+
|
|
6
|
+
# Optional "common" collection included in searches when enabled
|
|
7
|
+
common_enabled: true
|
|
8
|
+
common_tenant: global
|
|
9
|
+
common_collection: common
|
|
10
|
+
|
|
11
|
+
# Authentication
|
|
12
|
+
auth:
|
|
13
|
+
# Modes: none | static
|
|
14
|
+
mode: static
|
|
15
|
+
# Default admin user when auth=none
|
|
16
|
+
default_access_tenant: public
|
|
17
|
+
# Global admin key (read from env; forced NOT hardcode in committed files)
|
|
18
|
+
global_key: ${PATCHVEC_GLOBAL_KEY}
|
|
19
|
+
# External tenants→keys mapping file (untracked). If present, it
|
|
20
|
+
# will be merged and override duplicates.
|
|
21
|
+
tenants_file: ./tenants.yml
|
|
22
|
+
# Inline mapping (fallback; keep empty in repo)
|
|
23
|
+
api_keys: {}
|
|
24
|
+
|
|
25
|
+
# Vector store selection
|
|
26
|
+
vector_store:
|
|
27
|
+
type: default # default | qdrant
|
|
28
|
+
txtai:
|
|
29
|
+
embed_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
|
30
|
+
backend: faiss
|
|
31
|
+
qdrant:
|
|
32
|
+
url: http://localhost:6333
|
|
33
|
+
api_key: ${PATCHVEC_QDRANT_API_KEY}
|
|
34
|
+
prefer_payload_filters: true
|
|
35
|
+
collection_prefix: patchvec_
|
|
36
|
+
|
|
37
|
+
# Embedder selection
|
|
38
|
+
embedder:
|
|
39
|
+
type: default # default | sbert | openai
|
|
40
|
+
# default (txtai-backed)
|
|
41
|
+
txtai:
|
|
42
|
+
path: sentence-transformers/paraphrase-MiniLM-L3-v2
|
|
43
|
+
sbert:
|
|
44
|
+
model: sentence-transformers/all-MiniLM-L6-v2
|
|
45
|
+
batch_size: 64
|
|
46
|
+
device: auto # cpu | cuda | auto
|
|
47
|
+
openai:
|
|
48
|
+
api_key: ${PATCHVEC_OPENAI_API_KEY}
|
|
49
|
+
dim: 1536
|
|
50
|
+
|
|
51
|
+
# Text preprocessing
|
|
52
|
+
preprocess:
|
|
53
|
+
txt_chunk_size: 1000
|
|
54
|
+
txt_chunk_overlap: 200
|
|
55
|
+
|
|
56
|
+
# Optional server defaults
|
|
57
|
+
# (env HOST/PORT/RELOAD/WORKERS/LOG_LEVEL always override)
|
|
58
|
+
server:
|
|
59
|
+
host: 0.0.0.0
|
|
60
|
+
port: 8086
|
|
61
|
+
reload: false
|
|
62
|
+
workers: 1
|
|
63
|
+
log_level: info
|
|
64
|
+
|
|
65
|
+
# Optional instance customization
|
|
66
|
+
instance:
|
|
67
|
+
name: PatchVec
|
|
68
|
+
desc: Vector Search Microservice (pluggable, functional)
|
|
69
|
+
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: patchvec
|
|
3
|
+
Version: 0.5.6
|
|
4
|
+
Summary: Patchvec — A lightweight, pluggable vector search microservice.
|
|
5
|
+
Author: Rodrigo Rodrigues da Silva
|
|
6
|
+
Author-email: rodrigopitanga@posteo.net
|
|
7
|
+
License: GPL-3.0-or-later
|
|
8
|
+
Project-URL: Homepage, https://gitlab.com/flowlexi/patchvec
|
|
9
|
+
Project-URL: Source, https://gitlab.com/flowlexi/patchvec
|
|
10
|
+
Project-URL: Tracker, https://gitlab.com/flowlexi/patchvec/issues
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Framework :: FastAPI
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: fastapi>=0.115.0
|
|
25
|
+
Requires-Dist: uvicorn[standard]>=0.30.6
|
|
26
|
+
Requires-Dist: txtai>=6.3.0
|
|
27
|
+
Requires-Dist: pydantic>=2.8.2
|
|
28
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
29
|
+
Requires-Dist: pypdf>=5.0.0
|
|
30
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
31
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
32
|
+
Requires-Dist: qdrant-client>=1.9.2
|
|
33
|
+
Requires-Dist: sentence-transformers>=2.7.0
|
|
34
|
+
Requires-Dist: openai>=1.0.0
|
|
35
|
+
Dynamic: author
|
|
36
|
+
Dynamic: author-email
|
|
37
|
+
Dynamic: classifier
|
|
38
|
+
Dynamic: description
|
|
39
|
+
Dynamic: description-content-type
|
|
40
|
+
Dynamic: license
|
|
41
|
+
Dynamic: license-file
|
|
42
|
+
Dynamic: project-url
|
|
43
|
+
Dynamic: requires-dist
|
|
44
|
+
Dynamic: requires-python
|
|
45
|
+
Dynamic: summary
|
|
46
|
+
|
|
47
|
+
# PatchVec — A lightweight, pluggable vector search microservice.
|
|
48
|
+
|
|
49
|
+
Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
50
|
+
|
|
51
|
+
## Highlights
|
|
52
|
+
- Multi-tenant collections: `/collections/{tenant}/{name}`
|
|
53
|
+
- Upload and search **TXT**, **CSV**, and **PDF**
|
|
54
|
+
- Chunking per format:
|
|
55
|
+
- PDF → 1 chunk/page
|
|
56
|
+
- TXT → configurable chunk size + overlap
|
|
57
|
+
- CSV → 1 chunk/row
|
|
58
|
+
- Metadata filters on POST search (`{"filters": {"docid": "DOC-1"}}`)
|
|
59
|
+
- Health, metrics, and Prometheus endpoints
|
|
60
|
+
- Configurable auth modes: `none` or `static` (Bearer)
|
|
61
|
+
- Default backends are local (vendor-neutral "default"); embedders & stores are pluggable
|
|
62
|
+
|
|
63
|
+
## Requirements
|
|
64
|
+
- Python 3.10+
|
|
65
|
+
|
|
66
|
+
## Install
|
|
67
|
+
```bash
|
|
68
|
+
python -m venv .venv
|
|
69
|
+
source .venv/bin/activate
|
|
70
|
+
pip install patchvec
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
> CPU-only by default. If you have a CUDA setup and want GPU-accelerated deps, install the GPU-enabled packages in your environment before running PatchVec.
|
|
74
|
+
|
|
75
|
+
## Quickstart
|
|
76
|
+
```bash
|
|
77
|
+
# Start the server (installed entry point)
|
|
78
|
+
pavesrv
|
|
79
|
+
|
|
80
|
+
# Or run with uvicorn manually if you prefer:
|
|
81
|
+
uvicorn pave.main:app --host 0.0.0.0 --port 8080
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Minimal config (optional)
|
|
85
|
+
By default PatchVec runs with sensible local defaults. To customize, create `config.yml` and set:
|
|
86
|
+
```yaml
|
|
87
|
+
vector_store:
|
|
88
|
+
type: default
|
|
89
|
+
embedder:
|
|
90
|
+
type: default
|
|
91
|
+
auth:
|
|
92
|
+
mode: none # or 'static' with per-tenant Bearer keys
|
|
93
|
+
```
|
|
94
|
+
Then export:
|
|
95
|
+
```bash
|
|
96
|
+
export PATCHVEC_CONFIG=./config.yml
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## REST example
|
|
100
|
+
```bash
|
|
101
|
+
# Create a collection
|
|
102
|
+
curl -X POST http://localhost:8080/collections/acme/docs
|
|
103
|
+
|
|
104
|
+
# Upload a TXT document
|
|
105
|
+
curl -X POST http://localhost:8080/collections/acme/docs/documents -F "file=@sample.txt" -F "docid=DOC1"
|
|
106
|
+
|
|
107
|
+
# Search (GET, no filters)
|
|
108
|
+
curl -G --data-urlencode "q=hello" http://localhost:8080/collections/acme/docs/search
|
|
109
|
+
|
|
110
|
+
# Search (POST, with filters)
|
|
111
|
+
curl -X POST http://localhost:8080/collections/acme/docs/search -H "Content-Type: application/json" -d '{"q":"hello","k":5,"filters":{"docid":"DOC1"}}'
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## License
|
|
115
|
+
GPL-3.0-or-later — (C) 2025 Rodrigo Rodrigues da Silva
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
ABOUT.md
|
|
2
|
+
LICENSE
|
|
3
|
+
MANIFEST.in
|
|
4
|
+
README.md
|
|
5
|
+
config.yml.example
|
|
6
|
+
requirements-base.txt
|
|
7
|
+
requirements-cpu.txt
|
|
8
|
+
requirements-test.txt
|
|
9
|
+
requirements.txt
|
|
10
|
+
setup.py
|
|
11
|
+
patchvec.egg-info/PKG-INFO
|
|
12
|
+
patchvec.egg-info/SOURCES.txt
|
|
13
|
+
patchvec.egg-info/dependency_links.txt
|
|
14
|
+
patchvec.egg-info/entry_points.txt
|
|
15
|
+
patchvec.egg-info/requires.txt
|
|
16
|
+
patchvec.egg-info/top_level.txt
|
|
17
|
+
pave/__init__.py
|
|
18
|
+
pave/auth.py
|
|
19
|
+
pave/cli.py
|
|
20
|
+
pave/config.py
|
|
21
|
+
pave/main.py
|
|
22
|
+
pave/metrics.py
|
|
23
|
+
pave/preprocess.py
|
|
24
|
+
pave/service.py
|
|
25
|
+
pave/ui.py
|
|
26
|
+
pave/assets/patchvec_icon_192.png
|
|
27
|
+
pave/assets/ui.html
|
|
28
|
+
pave/embedders/__init__.py
|
|
29
|
+
pave/embedders/base.py
|
|
30
|
+
pave/embedders/factory.py
|
|
31
|
+
pave/embedders/openai_emb.py
|
|
32
|
+
pave/embedders/sbert_emb.py
|
|
33
|
+
pave/embedders/txtai_emb.py
|
|
34
|
+
pave/stores/__init__.py
|
|
35
|
+
pave/stores/base.py
|
|
36
|
+
pave/stores/factory.py
|
|
37
|
+
pave/stores/qdrant_store.py
|
|
38
|
+
pave/stores/txtai_store.py
|
|
39
|
+
scripts/release.sh
|
|
40
|
+
tests/test_auth.py
|
|
41
|
+
tests/test_cli.py
|
|
42
|
+
tests/test_collections.py
|
|
43
|
+
tests/test_config_runtime.py
|
|
44
|
+
tests/test_csv_ingest.py
|
|
45
|
+
tests/test_docid_default.py
|
|
46
|
+
tests/test_health.py
|
|
47
|
+
tests/test_metrics.py
|
|
48
|
+
tests/test_txtai_concurrent_upsert.py
|
|
49
|
+
tests/test_txtai_store.py
|
|
50
|
+
tests/test_txtai_store_filters.py
|
|
51
|
+
tests/test_ui.py
|
|
52
|
+
tests/test_upload_search_csv.py
|
|
53
|
+
tests/test_upload_search_pdf.py
|
|
54
|
+
tests/test_upload_search_txt.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pave
|
|
Binary file
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
<!doctype html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="utf-8" />
|
|
5
|
+
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
|
6
|
+
<!-- favicon robusto -->
|
|
7
|
+
<link rel="icon" type="image/png" sizes="32x32" href="/assets/patchvec_icon_192.png" />
|
|
8
|
+
<link rel="icon" type="image/png" sizes="192x192" href="/assets/patchvec_icon_192.png" />
|
|
9
|
+
<link rel="icon" href="/favicon.ico" />
|
|
10
|
+
<title>__INST_NAME__ • Search</title>
|
|
11
|
+
<style>
|
|
12
|
+
/* paleta */
|
|
13
|
+
:root{ --bg:#f6e6d9; --panel:#fffaf6; --text:#2b1e11; --muted:#6b5b53; --accent:#c9463d; --border:#ead7c7; --link:#0f2e4d; --link-accent:#14b8a6; }
|
|
14
|
+
@media (prefers-color-scheme: dark){
|
|
15
|
+
:root{ --bg:#1a1410; --panel:#221a14; --text:#f1e9e4; --muted:#b7a9a1; --accent:#ef6b62; --border:#3a2c22; --link:#9dd9d3; --link-accent:#34d399; }
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
*{ box-sizing:border-box }
|
|
19
|
+
html,body{ height:100% }
|
|
20
|
+
body{
|
|
21
|
+
min-height:100vh; display:flex; flex-direction:column; overflow:hidden;
|
|
22
|
+
margin:0; font:16px/1.45 system-ui,Segoe UI,Roboto,Inter,Arial; background:var(--bg); color:var(--text);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
.bar{
|
|
26
|
+
flex:0 0 auto; display:flex; gap:10px; align-items:center;
|
|
27
|
+
padding:12px; border-bottom:1px solid var(--border); background:var(--panel);
|
|
28
|
+
}
|
|
29
|
+
.left{ display:flex; gap:8px; align-items:center }
|
|
30
|
+
.right{ margin-left:auto; color:var(--muted); white-space:nowrap; overflow:hidden; text-overflow:ellipsis }
|
|
31
|
+
|
|
32
|
+
.tab{
|
|
33
|
+
padding:8px 12px; border-radius:10px; border:1px solid var(--border);
|
|
34
|
+
cursor:pointer; background:transparent; color:var(--text);
|
|
35
|
+
}
|
|
36
|
+
.tab.active{ background:var(--link-accent); color:#041318; border-color:var(--link-accent) }
|
|
37
|
+
|
|
38
|
+
.frames{ flex:1 1 auto; min-height:0; background:var(--bg) }
|
|
39
|
+
.frame{ display:none; width:100%; height:100%; border:0; background:var(--bg) }
|
|
40
|
+
.frame.active{ display:block }
|
|
41
|
+
|
|
42
|
+
.footer{
|
|
43
|
+
flex:0 0 auto; position:sticky; bottom:0;
|
|
44
|
+
display:flex; gap:12px; align-items:center; justify-content:center;
|
|
45
|
+
padding:10px; color:var(--muted); border-top:1px solid var(--border); background:var(--panel);
|
|
46
|
+
}
|
|
47
|
+
.footer a{ color:var(--link); text-decoration:none }
|
|
48
|
+
.footer a:hover{ color:var(--link-accent) }
|
|
49
|
+
</style>
|
|
50
|
+
</head>
|
|
51
|
+
<body>
|
|
52
|
+
<div class="bar">
|
|
53
|
+
<div class="left">
|
|
54
|
+
<button class="tab active" data-target="search" data-title="__INST_NAME__ • Search">Search</button>
|
|
55
|
+
<button class="tab" data-target="ingest" data-title="__INST_NAME__ • Ingest">Ingest</button>
|
|
56
|
+
</div>
|
|
57
|
+
<div class="right"><strong>__INST_NAME__</strong> — <small>__INST_DESC__</small></div>
|
|
58
|
+
</div>
|
|
59
|
+
|
|
60
|
+
<div class="frames">
|
|
61
|
+
<iframe id="search" class="frame active" src="/ui/search" title="Search"></iframe>
|
|
62
|
+
<iframe id="ingest" class="frame" src="/ui/ingest" title="Ingest"></iframe>
|
|
63
|
+
</div>
|
|
64
|
+
|
|
65
|
+
<div class="footer">
|
|
66
|
+
© 2025 <a href="https://flowlexi.com" target="_blank" rel="noopener">Flowlexi</a> •
|
|
67
|
+
<a href="__REPO_URL__" target="_blank" rel="noopener">🍰 patchvec __VERSION__</a> •
|
|
68
|
+
<a href="__LICENSE_URL__" target="_blank" rel="noopener">__LICENSE_NAME__</a>
|
|
69
|
+
</div>
|
|
70
|
+
<script>
|
|
71
|
+
function activeFrame(){ return document.querySelector('.frame.active'); }
|
|
72
|
+
|
|
73
|
+
function scrubSwagger(frame){
|
|
74
|
+
const win = frame?.contentWindow;
|
|
75
|
+
const doc = win?.document;
|
|
76
|
+
if(!doc) return;
|
|
77
|
+
if(!doc.querySelector('.swagger-ui')){
|
|
78
|
+
setTimeout(()=>scrubSwagger(frame), 80);
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// CSS: remove topo/infos
|
|
83
|
+
const STYLE_ID = 'pv-clean';
|
|
84
|
+
if(!doc.getElementById(STYLE_ID)){
|
|
85
|
+
const s = doc.createElement('style');
|
|
86
|
+
s.id = STYLE_ID;
|
|
87
|
+
s.textContent = `
|
|
88
|
+
#operations-tag-default,
|
|
89
|
+
.swagger-ui .topbar,
|
|
90
|
+
.swagger-ui .download-url-wrapper,
|
|
91
|
+
.swagger-ui .information-container,
|
|
92
|
+
.swagger-ui .info,
|
|
93
|
+
.swagger-ui .servers { display:none !important; }
|
|
94
|
+
.swagger-ui .wrapper { margin:0 !important; padding:0 0 8px !important; }
|
|
95
|
+
html, body { background:transparent !important; }
|
|
96
|
+
/* auth icon */
|
|
97
|
+
.swagger-ui .auth-wrapper .authorize:hover { border-color: rgba(20,184,166,.8); }
|
|
98
|
+
.swagger-ui .btn.authorize { background: white }
|
|
99
|
+
.swagger-ui .scheme-container { width:100%; height:; margin: 8px 0 0 0; padding: 0 0; background:transparent; border-color: transparent; box-shadow:0 0 0 0 rgba(0,0,0,0)}
|
|
100
|
+
`;
|
|
101
|
+
doc.head.appendChild(s);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const tabs = document.querySelectorAll('.tab');
|
|
106
|
+
const frames = document.querySelectorAll('.frame');
|
|
107
|
+
|
|
108
|
+
tabs.forEach(function(tab){
|
|
109
|
+
tab.addEventListener('click', function(){
|
|
110
|
+
tabs.forEach(t=>t.classList.remove('active'));
|
|
111
|
+
frames.forEach(f=>f.classList.remove('active'));
|
|
112
|
+
tab.classList.add('active');
|
|
113
|
+
const fr = document.getElementById(tab.dataset.target);
|
|
114
|
+
fr.classList.add('active');
|
|
115
|
+
document.title = tab.dataset.title || document.title;
|
|
116
|
+
scrubSwagger(fr);
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
frames.forEach(function(fr){
|
|
121
|
+
fr.addEventListener('load', function(){ scrubSwagger(fr); });
|
|
122
|
+
});
|
|
123
|
+
</script>
|
|
124
|
+
</body>
|
|
125
|
+
</html>
|