patchvec 0.5.6__tar.gz → 0.5.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. patchvec-0.5.7/ABOUT.md +98 -0
  2. {patchvec-0.5.6 → patchvec-0.5.7}/MANIFEST.in +0 -3
  3. patchvec-0.5.7/PKG-INFO +153 -0
  4. patchvec-0.5.7/README.md +162 -0
  5. patchvec-0.5.7/patchvec.egg-info/PKG-INFO +153 -0
  6. {patchvec-0.5.6 → patchvec-0.5.7}/patchvec.egg-info/SOURCES.txt +5 -4
  7. {patchvec-0.5.6 → patchvec-0.5.7}/patchvec.egg-info/requires.txt +13 -2
  8. patchvec-0.5.7/pave/assets/__init__.py +4 -0
  9. {patchvec-0.5.6 → patchvec-0.5.7}/pave/assets/ui.html +4 -1
  10. {patchvec-0.5.6 → patchvec-0.5.7}/pave/auth.py +5 -5
  11. {patchvec-0.5.6 → patchvec-0.5.7}/pave/cli.py +60 -1
  12. {patchvec-0.5.6 → patchvec-0.5.7}/pave/config.py +98 -40
  13. patchvec-0.5.7/pave/embedders/__init__.py +4 -0
  14. {patchvec-0.5.6 → patchvec-0.5.7}/pave/embedders/txtai_emb.py +4 -4
  15. {patchvec-0.5.6 → patchvec-0.5.7}/pave/main.py +145 -23
  16. patchvec-0.5.7/pave/metrics.py +202 -0
  17. {patchvec-0.5.6 → patchvec-0.5.7}/pave/preprocess.py +9 -8
  18. patchvec-0.5.7/pave/service.py +331 -0
  19. patchvec-0.5.7/pave/stores/__init__.py +4 -0
  20. {patchvec-0.5.6 → patchvec-0.5.7}/pave/stores/base.py +13 -4
  21. {patchvec-0.5.6 → patchvec-0.5.7}/pave/stores/qdrant_store.py +4 -3
  22. {patchvec-0.5.6 → patchvec-0.5.7}/pave/stores/txtai_store.py +262 -105
  23. patchvec-0.5.7/requirements-cpu.txt +9 -0
  24. {patchvec-0.5.6 → patchvec-0.5.7}/setup.py +24 -11
  25. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_cli.py +39 -1
  26. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_csv_ingest.py +1 -1
  27. patchvec-0.5.7/tests/test_data_export.py +91 -0
  28. patchvec-0.5.7/tests/test_delete_document.py +109 -0
  29. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_docid_default.py +1 -1
  30. patchvec-0.5.7/tests/test_metrics.py +168 -0
  31. patchvec-0.5.7/tests/test_request_id.py +93 -0
  32. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_txtai_concurrent_upsert.py +2 -1
  33. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_txtai_store.py +12 -0
  34. patchvec-0.5.7/tests/test_txtai_store_filters.py +237 -0
  35. patchvec-0.5.7/tests/test_txtai_store_sql_safety.py +116 -0
  36. patchvec-0.5.6/ABOUT.md +0 -69
  37. patchvec-0.5.6/PKG-INFO +0 -115
  38. patchvec-0.5.6/README.md +0 -87
  39. patchvec-0.5.6/patchvec.egg-info/PKG-INFO +0 -115
  40. patchvec-0.5.6/pave/embedders/__init__.py +0 -1
  41. patchvec-0.5.6/pave/metrics.py +0 -52
  42. patchvec-0.5.6/pave/service.py +0 -92
  43. patchvec-0.5.6/pave/stores/__init__.py +0 -1
  44. patchvec-0.5.6/requirements-base.txt +0 -1
  45. patchvec-0.5.6/requirements-cpu.txt +0 -6
  46. patchvec-0.5.6/requirements-test.txt +0 -5
  47. patchvec-0.5.6/requirements.txt +0 -11
  48. patchvec-0.5.6/scripts/release.sh +0 -89
  49. patchvec-0.5.6/tests/test_metrics.py +0 -35
  50. patchvec-0.5.6/tests/test_txtai_store_filters.py +0 -112
  51. {patchvec-0.5.6 → patchvec-0.5.7}/LICENSE +0 -0
  52. {patchvec-0.5.6 → patchvec-0.5.7}/config.yml.example +0 -0
  53. {patchvec-0.5.6 → patchvec-0.5.7}/patchvec.egg-info/dependency_links.txt +0 -0
  54. {patchvec-0.5.6 → patchvec-0.5.7}/patchvec.egg-info/entry_points.txt +0 -0
  55. {patchvec-0.5.6 → patchvec-0.5.7}/patchvec.egg-info/top_level.txt +0 -0
  56. {patchvec-0.5.6 → patchvec-0.5.7}/pave/__init__.py +0 -0
  57. {patchvec-0.5.6 → patchvec-0.5.7}/pave/assets/patchvec_icon_192.png +0 -0
  58. {patchvec-0.5.6 → patchvec-0.5.7}/pave/embedders/base.py +0 -0
  59. {patchvec-0.5.6 → patchvec-0.5.7}/pave/embedders/factory.py +0 -0
  60. {patchvec-0.5.6 → patchvec-0.5.7}/pave/embedders/openai_emb.py +0 -0
  61. {patchvec-0.5.6 → patchvec-0.5.7}/pave/embedders/sbert_emb.py +0 -0
  62. {patchvec-0.5.6 → patchvec-0.5.7}/pave/stores/factory.py +0 -0
  63. {patchvec-0.5.6 → patchvec-0.5.7}/pave/ui.py +0 -0
  64. {patchvec-0.5.6 → patchvec-0.5.7}/setup.cfg +0 -0
  65. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_auth.py +0 -0
  66. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_collections.py +0 -0
  67. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_config_runtime.py +0 -0
  68. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_health.py +0 -0
  69. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_ui.py +0 -0
  70. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_upload_search_csv.py +0 -0
  71. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_upload_search_pdf.py +0 -0
  72. {patchvec-0.5.6 → patchvec-0.5.7}/tests/test_upload_search_txt.py +0 -0
@@ -0,0 +1,98 @@
1
+ <!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net> -->
2
+ <!-- SPDX-License-Identifier: GPL-3.0-or-later -->
3
+
4
+ # PatchVec — A lightweight, pluggable vector search microservice.
5
+
6
+ Upload → chunk → index (with metadata) → search via REST and CLI.
7
+
8
+ ## Highlights
9
+ - Multi-tenant collections: `/collections/{tenant}/{name}`
10
+ - Upload and search TXT, CSV, and PDF
11
+ - Deterministic provenance: every hit returns doc id, page, offset, snippet
12
+ - Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
13
+ - REST and CLI entry points
14
+ - Health/metrics endpoints + Prometheus exporter
15
+ - Pluggable embeddings and stores; default backend is local
16
+
17
+ ## Requirements
18
+ - Python 3.10–3.14
19
+
20
+ ## Install (PyPI)
21
+ ```bash
22
+ python -m venv .venv
23
+ source .venv/bin/activate
24
+ pip install patchvec
25
+ ```
26
+
27
+ CPU-only deployments can use the PyTorch CPU wheel index:
28
+ ```bash
29
+ pip install "patchvec[cpu]" \
30
+ --index-url https://download.pytorch.org/whl/cpu \
31
+ --extra-index-url https://pypi.org/simple
32
+ ```
33
+
34
+ ## Quickstart
35
+ ```bash
36
+ # Start the server (installed entry point)
37
+ pavesrv
38
+
39
+ # Or run with uvicorn manually if you prefer:
40
+ uvicorn pave.main:app --host 0.0.0.0 --port 8086
41
+ ```
42
+
43
+ Auth defaults to `none` only for dev. For production, set static auth:
44
+ ```bash
45
+ export PATCHVEC_AUTH__MODE=static
46
+ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
47
+ ```
48
+
49
+ ## Minimal config (optional)
50
+ By default PatchVec runs with sensible local defaults. To customize, create `config.yml`:
51
+ ```yaml
52
+ vector_store:
53
+ type: default
54
+ embedder:
55
+ type: default
56
+ auth:
57
+ mode: static
58
+ global_key: ${PATCHVEC_GLOBAL_KEY}
59
+ ```
60
+ Then export:
61
+ ```bash
62
+ export PATCHVEC_CONFIG=./config.yml
63
+ export PATCHVEC_GLOBAL_KEY="your-secret"
64
+ ```
65
+
66
+ ## CLI example
67
+ ```bash
68
+ pavecli create-collection demo books
69
+ pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k --metadata='{"lang":"en"}'
70
+ pavecli search demo books "captain nemo" -k 5
71
+ ```
72
+
73
+ ## REST example
74
+ ```bash
75
+ # Create a collection
76
+ curl -X POST http://localhost:8086/collections/demo/books \
77
+ -H "Authorization: Bearer your-secret"
78
+
79
+ # Upload a TXT document
80
+ curl -X POST http://localhost:8086/collections/demo/books/documents \
81
+ -H "Authorization: Bearer your-secret" \
82
+ -F "file=@demo/20k_leagues.txt" -F "docid=verne-20k" \
83
+ -F 'metadata={"lang":"en"}'
84
+
85
+ # Search (GET, no filters)
86
+ curl -G --data-urlencode "q=hello" \
87
+ -H "Authorization: Bearer your-secret" \
88
+ http://localhost:8086/collections/demo/books/search
89
+
90
+ # Search (POST, with filters)
91
+ curl -X POST http://localhost:8086/collections/demo/books/search \
92
+ -H "Authorization: Bearer your-secret" \
93
+ -H "Content-Type: application/json" \
94
+ -d '{"q":"captain nemo","k":5,"filters":{"docid":"verne-20k"}}'
95
+ ```
96
+
97
+ ## License
98
+ GPL-3.0-or-later — (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
@@ -2,10 +2,7 @@ include ABOUT.md
2
2
  include README.md
3
3
  include LICENSE
4
4
  include config.yml.example
5
- include requirements.txt
6
5
  include requirements-cpu.txt
7
- include requirements-base.txt
8
- include requirements-test.txt
9
6
  recursive-include pave *.py
10
7
  recursive-include scripts *.sh
11
8
  recursive-include pave/assets *
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: patchvec
3
+ Version: 0.5.7
4
+ Summary: Patchvec — A lightweight, pluggable vector search microservice.
5
+ Author: Rodrigo Rodrigues da Silva
6
+ Author-email: rodrigopitanga@posteo.net
7
+ License: GPL-3.0-or-later
8
+ Project-URL: Homepage, https://github.com/rodrigopitanga/patchvec/?tab=readme-ov-file#readme-ov-file
9
+ Project-URL: Source, https://github.com/rodrigopitanga/patchvec
10
+ Project-URL: Tracker, https://github.com/rodrigopitanga/patchvec/issues
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Classifier: Framework :: FastAPI
20
+ Classifier: Topic :: Database
21
+ Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
22
+ Requires-Python: >=3.10,<3.15
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: fastapi>=0.115.0
26
+ Requires-Dist: uvicorn[standard]>=0.30.6
27
+ Requires-Dist: txtai>=6.3.0
28
+ Requires-Dist: pydantic>=2.8.2
29
+ Requires-Dist: python-multipart>=0.0.9
30
+ Requires-Dist: pypdf>=5.0.0
31
+ Requires-Dist: pyyaml>=6.0.2
32
+ Requires-Dist: python-dotenv>=1.0.1
33
+ Requires-Dist: faiss-cpu>=1.7.1
34
+ Requires-Dist: torch>=2.10.0
35
+ Provides-Extra: cpu
36
+ Provides-Extra: sbert
37
+ Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
38
+ Provides-Extra: openai
39
+ Requires-Dist: openai>=1.0.0; extra == "openai"
40
+ Provides-Extra: test
41
+ Requires-Dist: pytest; extra == "test"
42
+ Requires-Dist: httpx; extra == "test"
43
+ Dynamic: author
44
+ Dynamic: author-email
45
+ Dynamic: classifier
46
+ Dynamic: description
47
+ Dynamic: description-content-type
48
+ Dynamic: license
49
+ Dynamic: license-file
50
+ Dynamic: project-url
51
+ Dynamic: provides-extra
52
+ Dynamic: requires-dist
53
+ Dynamic: requires-python
54
+ Dynamic: summary
55
+
56
+ <!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net> -->
57
+ <!-- SPDX-License-Identifier: GPL-3.0-or-later -->
58
+
59
+ # PatchVec — A lightweight, pluggable vector search microservice.
60
+
61
+ Upload → chunk → index (with metadata) → search via REST and CLI.
62
+
63
+ ## Highlights
64
+ - Multi-tenant collections: `/collections/{tenant}/{name}`
65
+ - Upload and search TXT, CSV, and PDF
66
+ - Deterministic provenance: every hit returns doc id, page, offset, snippet
67
+ - Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
68
+ - REST and CLI entry points
69
+ - Health/metrics endpoints + Prometheus exporter
70
+ - Pluggable embeddings and stores; default backend is local
71
+
72
+ ## Requirements
73
+ - Python 3.10–3.14
74
+
75
+ ## Install (PyPI)
76
+ ```bash
77
+ python -m venv .venv
78
+ source .venv/bin/activate
79
+ pip install patchvec
80
+ ```
81
+
82
+ CPU-only deployments can use the PyTorch CPU wheel index:
83
+ ```bash
84
+ pip install "patchvec[cpu]" \
85
+ --index-url https://download.pytorch.org/whl/cpu \
86
+ --extra-index-url https://pypi.org/simple
87
+ ```
88
+
89
+ ## Quickstart
90
+ ```bash
91
+ # Start the server (installed entry point)
92
+ pavesrv
93
+
94
+ # Or run with uvicorn manually if you prefer:
95
+ uvicorn pave.main:app --host 0.0.0.0 --port 8086
96
+ ```
97
+
98
+ Auth defaults to `none` only for dev. For production, set static auth:
99
+ ```bash
100
+ export PATCHVEC_AUTH__MODE=static
101
+ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
102
+ ```
103
+
104
+ ## Minimal config (optional)
105
+ By default PatchVec runs with sensible local defaults. To customize, create `config.yml`:
106
+ ```yaml
107
+ vector_store:
108
+ type: default
109
+ embedder:
110
+ type: default
111
+ auth:
112
+ mode: static
113
+ global_key: ${PATCHVEC_GLOBAL_KEY}
114
+ ```
115
+ Then export:
116
+ ```bash
117
+ export PATCHVEC_CONFIG=./config.yml
118
+ export PATCHVEC_GLOBAL_KEY="your-secret"
119
+ ```
120
+
121
+ ## CLI example
122
+ ```bash
123
+ pavecli create-collection demo books
124
+ pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k --metadata='{"lang":"en"}'
125
+ pavecli search demo books "captain nemo" -k 5
126
+ ```
127
+
128
+ ## REST example
129
+ ```bash
130
+ # Create a collection
131
+ curl -X POST http://localhost:8086/collections/demo/books \
132
+ -H "Authorization: Bearer your-secret"
133
+
134
+ # Upload a TXT document
135
+ curl -X POST http://localhost:8086/collections/demo/books/documents \
136
+ -H "Authorization: Bearer your-secret" \
137
+ -F "file=@demo/20k_leagues.txt" -F "docid=verne-20k" \
138
+ -F 'metadata={"lang":"en"}'
139
+
140
+ # Search (GET, no filters)
141
+ curl -G --data-urlencode "q=hello" \
142
+ -H "Authorization: Bearer your-secret" \
143
+ http://localhost:8086/collections/demo/books/search
144
+
145
+ # Search (POST, with filters)
146
+ curl -X POST http://localhost:8086/collections/demo/books/search \
147
+ -H "Authorization: Bearer your-secret" \
148
+ -H "Content-Type: application/json" \
149
+ -d '{"q":"captain nemo","k":5,"filters":{"docid":"verne-20k"}}'
150
+ ```
151
+
152
+ ## License
153
+ GPL-3.0-or-later — (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
@@ -0,0 +1,162 @@
1
+ <!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net> -->
2
+ <!-- SPDX-License-Identifier: GPL-3.0-or-later -->
3
+
4
+ # 🍰 PatchVec — Lightweight, Pluggable Vector Search Microservice
5
+
6
+ Patchvec is a compact vector store built for people who want provenance and fast iteration on RAG plumbing. No black boxes, no hidden pipelines: every chunk records document id, page, and byte offsets, and you can swap embeddings or storage backends per collection.
7
+
8
+ ## ⚙️ Core capabilities
9
+
10
+ - **Docker images** — prebuilt CPU/GPU images published to the GitLab Container Registry.
11
+ - **Tenants and collections** — isolation by tenant with per-collection configuration.
12
+ - **Pluggable embeddings** — choose the embedding adapter per collection; wire in local or hosted models.
13
+ - **REST and CLI** — production use over HTTP, quick experiments with the bundled CLI.
14
+ - **Deterministic provenance** — every hit returns doc id, page, offset, and snippet for traceability.
15
+
16
+ ## 🧭 Workflows
17
+
18
+ ### 🐳 Docker workflow (prebuilt images)
19
+
20
+ Pull the image that fits your hardware from the [https://gitlab.com/flowlexi](Flowlexi) Container Registry on Gitlab (CUDA builds publish as `latest-gpu`, CPU-only as `latest-cpu`).
21
+
22
+ ```bash
23
+ docker pull registry.gitlab.com/flowlexi/patchvec/patchvec:latest-gpu
24
+ docker pull registry.gitlab.com/flowlexi/patchvec/patchvec:latest-cpu
25
+ ```
26
+
27
+ Run the service by choosing the tag you need and mapping the API port locally:
28
+
29
+ ```bash
30
+ docker run -d --name patchvec \
31
+ -p 8086:8086 \
32
+ registry.gitlab.com/flowlexi/patchvec/patchvec:latest-cpu
33
+ ```
34
+
35
+ Use the bundled CLI inside the container to create a tenant/collection, ingest a demo document, and query it:
36
+
37
+ ```bash
38
+ docker exec patchvec pavecli create-collection demo books
39
+ docker exec patchvec pavecli upload demo books /app/demo/20k_leagues.txt \
40
+ --docid=verne-20k --metadata='{"lang":"en"}'
41
+ docker exec patchvec pavecli search demo books "captain nemo" -k 3
42
+ ```
43
+
44
+ See below for REST and UI.
45
+
46
+ Stop the container when you are done:
47
+
48
+ ```bash
49
+ docker rm -f patchvec
50
+ ```
51
+
52
+ ### 🐍 PyPI workflow
53
+
54
+ Install Patchvec from PyPI inside an isolated virtual environment and point it at a local configuration directory.
55
+
56
+ **Requires Python 3.10–3.14.**
57
+
58
+ ```bash
59
+ mkdir -p ~/pv && cd ~/pv #or wherever
60
+ python -m venv .venv-pv
61
+ source .venv-pv/bin/activate
62
+ python -m pip install --upgrade pip
63
+ pip install "patchvec[cpu]"
64
+
65
+ # grab the default configs
66
+ curl -LO https://raw.githubusercontent.com/patchvec/patchvec/main/config.yml.example
67
+ curl -LO https://raw.githubusercontent.com/patchvec/patchvec/main/tenants.yml.example
68
+ cp config.yml.example config.yml
69
+ cp tenants.yml.example tenants.yml
70
+
71
+ # sample demo corpus
72
+ curl -LO https://raw.githubusercontent.com/patchvec/patchvec/main/demo/20k_leagues.txt
73
+
74
+ # point Patchvec at the config directory and set a local admin key
75
+ export PATCHVEC_CONFIG="$HOME/pv/config.yml"
76
+ export PATCHVEC_GLOBAL_KEY=super-sekret
77
+
78
+ # option A: run the service (stays up until you stop it)
79
+ pavesrv
80
+
81
+ # option B: operate entirely via the CLI (no server needed)
82
+ pavecli create-collection demo books
83
+ pavecli upload demo books 20k_leagues.txt --docid=verne-20k --metadata='{"lang":"en"}'
84
+ pavecli search demo books "captain nemo" -k 3
85
+ ```
86
+
87
+ > **CPU-only deployments:** The command above pulls the default PyTorch wheel
88
+ > from PyPI, which includes CUDA support (~2 GB). For a leaner, CPU-only torch
89
+ > install, point pip at the PyTorch CPU index:
90
+ >
91
+ > ```bash
92
+ > pip install "patchvec[cpu]" \
93
+ > --index-url https://download.pytorch.org/whl/cpu \
94
+ > --extra-index-url https://pypi.org/simple
95
+ > ```
96
+
97
+ Deactivate the virtual environment with `deactivate` when finished.
98
+
99
+ ### 🌐 REST API and Web UI usage
100
+
101
+ When the server is running (either via Docker or `pavesrv`), the API listens on `http://localhost:8086`. The following `curl` commands mirror the CLI sequence above—adjust the file path to wherever you stored the corpus (`/app/demo/20k_leagues.txt` in Docker, `~/pv/20k_leagues.txt` for PyPI installs) and reuse the bearer token exported earlier:
102
+
103
+ ```bash
104
+ # create collection
105
+ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
106
+ -X POST http://localhost:8086/collections/demo/books
107
+
108
+ # ingest document
109
+ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
110
+ -X POST http://localhost:8086/collections/demo/books/documents \
111
+ -F "file=@20k_leagues.txt" \
112
+ -F 'metadata={"lang":"en"}'
113
+
114
+ # run search
115
+ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
116
+ "http://localhost:8086/collections/demo/books/search?q=captain+nemo&k=3"
117
+ ```
118
+
119
+ There is a simple Swagger UI available at the root of the server. Just point your browser to `http://localhost:8086/`
120
+
121
+ Health and metrics endpoints are available at `/health` and `/metrics`.
122
+
123
+ Configuration files copied in either workflow can be customised. Runtime options are also accepted via the `PATCHVEC_*` environment variable scheme (`PATCHVEC_SERVER__PORT`, `PATCHVEC_AUTH__MODE`, etc.), which precedes conf files.
124
+
125
+ ### 🔁 Live data updates
126
+
127
+ Patchvec supports live data refresh without restarting the server. Re-upload the same `docid` to *replace* vector content (filename doesn't matter - metadata will change though), or explicitly delete the document and then ingest it again.
128
+
129
+ CLI (re-upload to replace):
130
+
131
+ ```bash
132
+ pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k
133
+ cp demo/20k_leagues.txt demo/20k_leagues_mod.txt
134
+ echo "THE END" >> demo/20k_leagues_mod.txt
135
+ pavecli upload demo books 20k_leagues.txt --docid=verne-20k
136
+ ```
137
+
138
+ REST (delete then ingest):
139
+
140
+ ```bash
141
+ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
142
+ -X DELETE http://localhost:8086/collections/demo/books/documents/verne-20k
143
+
144
+ # make changes
145
+
146
+ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
147
+ -X POST http://localhost:8086/collections/demo/books/documents \
148
+ -F "file=@demo/20k_leagues.txt" \
149
+ -F 'docid=verne-20k'
150
+ ```
151
+
152
+ ### 🛠️ Developer workflow
153
+
154
+ Building from source relies on the `Makefile` shortcuts (`make install-dev`, `USE_CPU=1 make serve`, `make test`, etc.). The full contributor workflow, target reference, and task claiming rules live in [CONTRIBUTING.md](CONTRIBUTING.md). Performance benchmarks are documented in [README-benchmarks.md](README-benchmarks.md).
155
+
156
+ ## 🗺️ Roadmap
157
+
158
+ Short & mid-term chores are tracked in [`ROADMAP.md`](ROADMAP.md). Pick one, open an issue titled `claim: <task>`, and ship a patch.
159
+
160
+ ## 📜 License
161
+
162
+ GPL-3.0-or-later — (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: patchvec
3
+ Version: 0.5.7
4
+ Summary: Patchvec — A lightweight, pluggable vector search microservice.
5
+ Author: Rodrigo Rodrigues da Silva
6
+ Author-email: rodrigopitanga@posteo.net
7
+ License: GPL-3.0-or-later
8
+ Project-URL: Homepage, https://github.com/rodrigopitanga/patchvec/?tab=readme-ov-file#readme-ov-file
9
+ Project-URL: Source, https://github.com/rodrigopitanga/patchvec
10
+ Project-URL: Tracker, https://github.com/rodrigopitanga/patchvec/issues
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Classifier: Framework :: FastAPI
20
+ Classifier: Topic :: Database
21
+ Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
22
+ Requires-Python: >=3.10,<3.15
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: fastapi>=0.115.0
26
+ Requires-Dist: uvicorn[standard]>=0.30.6
27
+ Requires-Dist: txtai>=6.3.0
28
+ Requires-Dist: pydantic>=2.8.2
29
+ Requires-Dist: python-multipart>=0.0.9
30
+ Requires-Dist: pypdf>=5.0.0
31
+ Requires-Dist: pyyaml>=6.0.2
32
+ Requires-Dist: python-dotenv>=1.0.1
33
+ Requires-Dist: faiss-cpu>=1.7.1
34
+ Requires-Dist: torch>=2.10.0
35
+ Provides-Extra: cpu
36
+ Provides-Extra: sbert
37
+ Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
38
+ Provides-Extra: openai
39
+ Requires-Dist: openai>=1.0.0; extra == "openai"
40
+ Provides-Extra: test
41
+ Requires-Dist: pytest; extra == "test"
42
+ Requires-Dist: httpx; extra == "test"
43
+ Dynamic: author
44
+ Dynamic: author-email
45
+ Dynamic: classifier
46
+ Dynamic: description
47
+ Dynamic: description-content-type
48
+ Dynamic: license
49
+ Dynamic: license-file
50
+ Dynamic: project-url
51
+ Dynamic: provides-extra
52
+ Dynamic: requires-dist
53
+ Dynamic: requires-python
54
+ Dynamic: summary
55
+
56
+ <!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net> -->
57
+ <!-- SPDX-License-Identifier: GPL-3.0-or-later -->
58
+
59
+ # PatchVec — A lightweight, pluggable vector search microservice.
60
+
61
+ Upload → chunk → index (with metadata) → search via REST and CLI.
62
+
63
+ ## Highlights
64
+ - Multi-tenant collections: `/collections/{tenant}/{name}`
65
+ - Upload and search TXT, CSV, and PDF
66
+ - Deterministic provenance: every hit returns doc id, page, offset, snippet
67
+ - Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
68
+ - REST and CLI entry points
69
+ - Health/metrics endpoints + Prometheus exporter
70
+ - Pluggable embeddings and stores; default backend is local
71
+
72
+ ## Requirements
73
+ - Python 3.10–3.14
74
+
75
+ ## Install (PyPI)
76
+ ```bash
77
+ python -m venv .venv
78
+ source .venv/bin/activate
79
+ pip install patchvec
80
+ ```
81
+
82
+ CPU-only deployments can use the PyTorch CPU wheel index:
83
+ ```bash
84
+ pip install "patchvec[cpu]" \
85
+ --index-url https://download.pytorch.org/whl/cpu \
86
+ --extra-index-url https://pypi.org/simple
87
+ ```
88
+
89
+ ## Quickstart
90
+ ```bash
91
+ # Start the server (installed entry point)
92
+ pavesrv
93
+
94
+ # Or run with uvicorn manually if you prefer:
95
+ uvicorn pave.main:app --host 0.0.0.0 --port 8086
96
+ ```
97
+
98
+ Auth defaults to `none` only for dev. For production, set static auth:
99
+ ```bash
100
+ export PATCHVEC_AUTH__MODE=static
101
+ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
102
+ ```
103
+
104
+ ## Minimal config (optional)
105
+ By default PatchVec runs with sensible local defaults. To customize, create `config.yml`:
106
+ ```yaml
107
+ vector_store:
108
+ type: default
109
+ embedder:
110
+ type: default
111
+ auth:
112
+ mode: static
113
+ global_key: ${PATCHVEC_GLOBAL_KEY}
114
+ ```
115
+ Then export:
116
+ ```bash
117
+ export PATCHVEC_CONFIG=./config.yml
118
+ export PATCHVEC_GLOBAL_KEY="your-secret"
119
+ ```
120
+
121
+ ## CLI example
122
+ ```bash
123
+ pavecli create-collection demo books
124
+ pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k --metadata='{"lang":"en"}'
125
+ pavecli search demo books "captain nemo" -k 5
126
+ ```
127
+
128
+ ## REST example
129
+ ```bash
130
+ # Create a collection
131
+ curl -X POST http://localhost:8086/collections/demo/books \
132
+ -H "Authorization: Bearer your-secret"
133
+
134
+ # Upload a TXT document
135
+ curl -X POST http://localhost:8086/collections/demo/books/documents \
136
+ -H "Authorization: Bearer your-secret" \
137
+ -F "file=@demo/20k_leagues.txt" -F "docid=verne-20k" \
138
+ -F 'metadata={"lang":"en"}'
139
+
140
+ # Search (GET, no filters)
141
+ curl -G --data-urlencode "q=hello" \
142
+ -H "Authorization: Bearer your-secret" \
143
+ http://localhost:8086/collections/demo/books/search
144
+
145
+ # Search (POST, with filters)
146
+ curl -X POST http://localhost:8086/collections/demo/books/search \
147
+ -H "Authorization: Bearer your-secret" \
148
+ -H "Content-Type: application/json" \
149
+ -d '{"q":"captain nemo","k":5,"filters":{"docid":"verne-20k"}}'
150
+ ```
151
+
152
+ ## License
153
+ GPL-3.0-or-later — (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
@@ -3,10 +3,7 @@ LICENSE
3
3
  MANIFEST.in
4
4
  README.md
5
5
  config.yml.example
6
- requirements-base.txt
7
6
  requirements-cpu.txt
8
- requirements-test.txt
9
- requirements.txt
10
7
  setup.py
11
8
  patchvec.egg-info/PKG-INFO
12
9
  patchvec.egg-info/SOURCES.txt
@@ -23,6 +20,7 @@ pave/metrics.py
23
20
  pave/preprocess.py
24
21
  pave/service.py
25
22
  pave/ui.py
23
+ pave/assets/__init__.py
26
24
  pave/assets/patchvec_icon_192.png
27
25
  pave/assets/ui.html
28
26
  pave/embedders/__init__.py
@@ -36,18 +34,21 @@ pave/stores/base.py
36
34
  pave/stores/factory.py
37
35
  pave/stores/qdrant_store.py
38
36
  pave/stores/txtai_store.py
39
- scripts/release.sh
40
37
  tests/test_auth.py
41
38
  tests/test_cli.py
42
39
  tests/test_collections.py
43
40
  tests/test_config_runtime.py
44
41
  tests/test_csv_ingest.py
42
+ tests/test_data_export.py
43
+ tests/test_delete_document.py
45
44
  tests/test_docid_default.py
46
45
  tests/test_health.py
47
46
  tests/test_metrics.py
47
+ tests/test_request_id.py
48
48
  tests/test_txtai_concurrent_upsert.py
49
49
  tests/test_txtai_store.py
50
50
  tests/test_txtai_store_filters.py
51
+ tests/test_txtai_store_sql_safety.py
51
52
  tests/test_ui.py
52
53
  tests/test_upload_search_csv.py
53
54
  tests/test_upload_search_pdf.py
@@ -6,6 +6,17 @@ python-multipart>=0.0.9
6
6
  pypdf>=5.0.0
7
7
  pyyaml>=6.0.2
8
8
  python-dotenv>=1.0.1
9
- qdrant-client>=1.9.2
10
- sentence-transformers>=2.7.0
9
+ faiss-cpu>=1.7.1
10
+ torch>=2.10.0
11
+
12
+ [cpu]
13
+
14
+ [openai]
11
15
  openai>=1.0.0
16
+
17
+ [sbert]
18
+ sentence-transformers>=2.7.0
19
+
20
+ [test]
21
+ pytest
22
+ httpx
@@ -0,0 +1,4 @@
1
+ # (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
2
+ # SPDX-License-Identifier: GPL-3.0-or-later
3
+
4
+ # pkg
@@ -1,4 +1,7 @@
1
1
  <!doctype html>
2
+ <!-- (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net> -->
3
+ <!-- SPDX-License-Identifier: GPL-3.0-or-later -->
4
+
2
5
  <html lang="en">
3
6
  <head>
4
7
  <meta charset="utf-8" />
@@ -63,7 +66,7 @@
63
66
  </div>
64
67
 
65
68
  <div class="footer">
66
- © 2025 <a href="https://flowlexi.com" target="_blank" rel="noopener">Flowlexi</a> •
69
+ © 2025, 2026 <a href="https://flowlexi.com" target="_blank" rel="noopener">Flowlexi</a> •
67
70
  <a href="__REPO_URL__" target="_blank" rel="noopener">🍰 patchvec __VERSION__</a> •
68
71
  <a href="__LICENSE_URL__" target="_blank" rel="noopener">__LICENSE_NAME__</a>
69
72
  </div>