patchvec 0.5.8.1__tar.gz → 0.5.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {patchvec-0.5.8.1 → patchvec-0.5.9}/ABOUT.md +10 -7
  2. {patchvec-0.5.8.1/patchvec.egg-info → patchvec-0.5.9}/PKG-INFO +14 -12
  3. {patchvec-0.5.8.1 → patchvec-0.5.9}/README.md +63 -42
  4. {patchvec-0.5.8.1 → patchvec-0.5.9}/config.yml.example +51 -45
  5. {patchvec-0.5.8.1 → patchvec-0.5.9/patchvec.egg-info}/PKG-INFO +14 -12
  6. {patchvec-0.5.8.1 → patchvec-0.5.9}/patchvec.egg-info/SOURCES.txt +35 -15
  7. {patchvec-0.5.8.1 → patchvec-0.5.9}/patchvec.egg-info/requires.txt +2 -4
  8. patchvec-0.5.9/pave/assets/config.yml.example +224 -0
  9. patchvec-0.5.9/pave/assets/tenants.yml.example +19 -0
  10. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/auth.py +5 -5
  11. patchvec-0.5.9/pave/backends/__init__.py +13 -0
  12. patchvec-0.5.9/pave/backends/base.py +29 -0
  13. patchvec-0.5.9/pave/backends/faiss.py +153 -0
  14. patchvec-0.5.9/pave/backends/qdrant.py +46 -0
  15. patchvec-0.5.9/pave/cli.py +337 -0
  16. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/config.py +120 -23
  17. patchvec-0.5.9/pave/embedders/__init__.py +7 -0
  18. patchvec-0.5.9/pave/embedders/base.py +21 -0
  19. patchvec-0.5.9/pave/embedders/factory.py +22 -0
  20. patchvec-0.5.9/pave/embedders/openai.py +47 -0
  21. patchvec-0.5.9/pave/embedders/sbert.py +69 -0
  22. patchvec-0.5.9/pave/filters.py +164 -0
  23. patchvec-0.5.9/pave/main.py +339 -0
  24. patchvec-0.5.9/pave/metadb.py +603 -0
  25. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/metrics.py +2 -2
  26. patchvec-0.5.9/pave/routes/__init__.py +16 -0
  27. patchvec-0.5.9/pave/routes/admin.py +131 -0
  28. patchvec-0.5.9/pave/routes/collections.py +123 -0
  29. patchvec-0.5.9/pave/routes/documents.py +164 -0
  30. patchvec-0.5.9/pave/routes/health.py +107 -0
  31. patchvec-0.5.9/pave/routes/search.py +177 -0
  32. patchvec-0.5.9/pave/runtime_paths.py +89 -0
  33. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/service.py +70 -226
  34. patchvec-0.5.9/pave/stores/__init__.py +6 -0
  35. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/stores/base.py +19 -7
  36. patchvec-0.5.9/pave/stores/local.py +807 -0
  37. {patchvec-0.5.8.1 → patchvec-0.5.9}/setup.py +6 -8
  38. patchvec-0.5.9/tests/test_bench_search_latency.py +56 -0
  39. patchvec-0.5.9/tests/test_bench_stress.py +56 -0
  40. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_cli.py +47 -5
  41. patchvec-0.5.9/tests/test_concurrent_upsert.py +62 -0
  42. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_config_runtime.py +22 -8
  43. patchvec-0.5.9/tests/test_config_tenants.py +246 -0
  44. patchvec-0.5.9/tests/test_csv_ingest.py +141 -0
  45. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_csv_ingest_api.py +14 -0
  46. patchvec-0.5.9/tests/test_data_export.py +173 -0
  47. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_delete_document.py +4 -3
  48. patchvec-0.5.9/tests/test_docid_default.py +49 -0
  49. patchvec-0.5.9/tests/test_faiss_backend.py +86 -0
  50. patchvec-0.5.8.1/tests/test_txtai_store.py → patchvec-0.5.9/tests/test_faiss_store.py +40 -14
  51. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_health.py +1 -1
  52. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_list_collections.py +12 -14
  53. patchvec-0.5.9/tests/test_main_args.py +28 -0
  54. patchvec-0.5.9/tests/test_meta_store.py +490 -0
  55. patchvec-0.5.9/tests/test_metadata_split.py +89 -0
  56. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_metrics.py +10 -10
  57. patchvec-0.5.9/tests/test_relevance.py +297 -0
  58. patchvec-0.5.9/tests/test_runtime_paths.py +83 -0
  59. patchvec-0.5.9/tests/test_sbert_embedder.py +243 -0
  60. patchvec-0.5.8.1/tests/test_txtai_store_cache_race.py → patchvec-0.5.9/tests/test_store_cache_race.py +4 -2
  61. patchvec-0.5.8.1/tests/test_txtai_store_catalog_metrics.py → patchvec-0.5.9/tests/test_store_catalog_metrics.py +4 -3
  62. patchvec-0.5.9/tests/test_store_close_race.py +171 -0
  63. patchvec-0.5.8.1/tests/test_txtai_store_filters.py → patchvec-0.5.9/tests/test_store_filters.py +6 -43
  64. patchvec-0.5.9/tests/test_store_meta_fetch_scope.py +152 -0
  65. patchvec-0.5.9/tests/test_store_sanitized_filter_parity.py +171 -0
  66. patchvec-0.5.8.1/pave/cli.py +0 -211
  67. patchvec-0.5.8.1/pave/embedders/__init__.py +0 -4
  68. patchvec-0.5.8.1/pave/embedders/base.py +0 -12
  69. patchvec-0.5.8.1/pave/embedders/factory.py +0 -21
  70. patchvec-0.5.8.1/pave/embedders/openai_emb.py +0 -30
  71. patchvec-0.5.8.1/pave/embedders/sbert_emb.py +0 -24
  72. patchvec-0.5.8.1/pave/embedders/txtai_emb.py +0 -58
  73. patchvec-0.5.8.1/pave/main.py +0 -864
  74. patchvec-0.5.8.1/pave/meta_store.py +0 -320
  75. patchvec-0.5.8.1/pave/stores/__init__.py +0 -4
  76. patchvec-0.5.8.1/pave/stores/factory.py +0 -18
  77. patchvec-0.5.8.1/pave/stores/qdrant_store.py +0 -38
  78. patchvec-0.5.8.1/pave/stores/txtai_store.py +0 -945
  79. patchvec-0.5.8.1/tests/test_csv_ingest.py +0 -81
  80. patchvec-0.5.8.1/tests/test_data_export.py +0 -91
  81. patchvec-0.5.8.1/tests/test_docid_default.py +0 -41
  82. patchvec-0.5.8.1/tests/test_meta_store.py +0 -91
  83. patchvec-0.5.8.1/tests/test_txtai_concurrent_upsert.py +0 -77
  84. patchvec-0.5.8.1/tests/test_txtai_store_close_race.py +0 -79
  85. patchvec-0.5.8.1/tests/test_txtai_store_meta_fetch_scope.py +0 -59
  86. patchvec-0.5.8.1/tests/test_txtai_store_sql_safety.py +0 -116
  87. {patchvec-0.5.8.1 → patchvec-0.5.9}/LICENSE +0 -0
  88. {patchvec-0.5.8.1 → patchvec-0.5.9}/MANIFEST.in +0 -0
  89. {patchvec-0.5.8.1 → patchvec-0.5.9}/patchvec.egg-info/dependency_links.txt +0 -0
  90. {patchvec-0.5.8.1 → patchvec-0.5.9}/patchvec.egg-info/entry_points.txt +0 -0
  91. {patchvec-0.5.8.1 → patchvec-0.5.9}/patchvec.egg-info/top_level.txt +0 -0
  92. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/__init__.py +0 -0
  93. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/assets/__init__.py +0 -0
  94. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/assets/patchvec_icon_192.png +0 -0
  95. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/assets/ui.html +0 -0
  96. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/log.py +0 -0
  97. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/preprocess.py +0 -0
  98. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/schemas.py +0 -0
  99. {patchvec-0.5.8.1 → patchvec-0.5.9}/pave/ui.py +0 -0
  100. {patchvec-0.5.8.1 → patchvec-0.5.9}/requirements-cpu.txt +0 -0
  101. {patchvec-0.5.8.1 → patchvec-0.5.9}/setup.cfg +0 -0
  102. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_admin_tenants.py +0 -0
  103. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_auth.py +0 -0
  104. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_auth_api.py +0 -0
  105. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_collection_rename.py +0 -0
  106. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_collections.py +0 -0
  107. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_ingest_errors.py +0 -0
  108. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_ingest_size_limit.py +0 -0
  109. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_lazy_app.py +0 -0
  110. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_log.py +0 -0
  111. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_request_id.py +0 -0
  112. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_search_errors.py +0 -0
  113. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_tenant_rate_limit.py +0 -0
  114. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_ui.py +0 -0
  115. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_upload_search_csv.py +0 -0
  116. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_upload_search_pdf.py +0 -0
  117. {patchvec-0.5.8.1 → patchvec-0.5.9}/tests/test_upload_search_txt.py +0 -0
@@ -12,7 +12,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
12
12
  - Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
13
13
  - REST and CLI entry points
14
14
  - Health/metrics endpoints + Prometheus exporter
15
- - Pluggable embeddings and stores; default backend is local
15
+ - Pluggable embeddings and stores; default stack is local FAISS + SBERT
16
16
 
17
17
  ## Requirements
18
18
  - Python 3.10–3.14
@@ -47,27 +47,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
47
47
  ```
48
48
 
49
49
  ## Minimal config (optional)
50
- By default PatchVec runs with sensible local defaults. To customize, create
51
- `config.yml`:
50
+ By default PatchVec runs with sensible local defaults. For a user install,
51
+ customize `~/patchvec/config.yml`:
52
52
  ```yaml
53
53
  vector_store:
54
- type: default
54
+ type: faiss
55
55
  embedder:
56
- type: default
56
+ type: sbert
57
57
  auth:
58
58
  mode: static
59
59
  global_key: ${PATCHVEC_GLOBAL_KEY}
60
60
  ```
61
61
  Then export:
62
62
  ```bash
63
- export PATCHVEC_CONFIG=./config.yml
64
63
  export PATCHVEC_GLOBAL_KEY="your-secret"
65
64
  ```
65
+ If you keep the file elsewhere, point the runtime at it explicitly:
66
+ ```bash
67
+ export PATCHVEC_CONFIG=/path/to/config.yml
68
+ ```
66
69
 
67
70
  ## CLI example
68
71
  ```bash
69
72
  pavecli create-collection demo books
70
- pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k \
73
+ pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
71
74
  --metadata='{"lang":"en"}'
72
75
  pavecli search demo books "captain nemo" -k 5
73
76
  ```
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patchvec
3
- Version: 0.5.8.1
4
- Summary: Patchvec — A lightweight, pluggable vector search microservice.
3
+ Version: 0.5.9
4
+ Summary: PaveDB — A lightweight, pluggable vector search microservice.
5
5
  Author: Rodrigo Rodrigues da Silva
6
6
  Author-email: rodrigo@flowlexi.com
7
7
  License: AGPL-3.0-or-later
@@ -24,7 +24,6 @@ Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: fastapi>=0.115.0
26
26
  Requires-Dist: uvicorn[standard]>=0.30.6
27
- Requires-Dist: txtai>=6.3.0
28
27
  Requires-Dist: pydantic>=2.8.2
29
28
  Requires-Dist: python-multipart>=0.0.9
30
29
  Requires-Dist: pypdf>=5.0.0
@@ -32,14 +31,14 @@ Requires-Dist: pyyaml>=6.0.2
32
31
  Requires-Dist: python-dotenv>=1.0.1
33
32
  Requires-Dist: faiss-cpu>=1.7.1
34
33
  Requires-Dist: torch>=2.10.0
34
+ Requires-Dist: sentence-transformers>=2.7.0
35
35
  Provides-Extra: cpu
36
- Provides-Extra: sbert
37
- Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
38
36
  Provides-Extra: openai
39
37
  Requires-Dist: openai>=1.0.0; extra == "openai"
40
38
  Provides-Extra: test
41
39
  Requires-Dist: pytest; extra == "test"
42
40
  Requires-Dist: httpx; extra == "test"
41
+ Requires-Dist: datasets>=3.5.0; extra == "test"
43
42
  Dynamic: author
44
43
  Dynamic: author-email
45
44
  Dynamic: classifier
@@ -67,7 +66,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
67
66
  - Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
68
67
  - REST and CLI entry points
69
68
  - Health/metrics endpoints + Prometheus exporter
70
- - Pluggable embeddings and stores; default backend is local
69
+ - Pluggable embeddings and stores; default stack is local FAISS + SBERT
71
70
 
72
71
  ## Requirements
73
72
  - Python 3.10–3.14
@@ -102,27 +101,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
102
101
  ```
103
102
 
104
103
  ## Minimal config (optional)
105
- By default PatchVec runs with sensible local defaults. To customize, create
106
- `config.yml`:
104
+ By default PatchVec runs with sensible local defaults. For a user install,
105
+ customize `~/patchvec/config.yml`:
107
106
  ```yaml
108
107
  vector_store:
109
- type: default
108
+ type: faiss
110
109
  embedder:
111
- type: default
110
+ type: sbert
112
111
  auth:
113
112
  mode: static
114
113
  global_key: ${PATCHVEC_GLOBAL_KEY}
115
114
  ```
116
115
  Then export:
117
116
  ```bash
118
- export PATCHVEC_CONFIG=./config.yml
119
117
  export PATCHVEC_GLOBAL_KEY="your-secret"
120
118
  ```
119
+ If you keep the file elsewhere, point the runtime at it explicitly:
120
+ ```bash
121
+ export PATCHVEC_CONFIG=/path/to/config.yml
122
+ ```
121
123
 
122
124
  ## CLI example
123
125
  ```bash
124
126
  pavecli create-collection demo books
125
- pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k \
127
+ pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
126
128
  --metadata='{"lang":"en"}'
127
129
  pavecli search demo books "captain nemo" -k 5
128
130
  ```
@@ -1,32 +1,38 @@
1
1
  <!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigo@flowlexi.com> -->
2
2
  <!-- SPDX-License-Identifier: AGPL-3.0-or-later -->
3
3
 
4
- # 🍰 PatchVec — Vector Search You Can Understand
4
+ # 🛣️ PatchVec — Vector search you can understand & deploy within minutes
5
5
 
6
- PatchVec is a single-process vector search engine that ingests your
7
- documents, chunks and embeds them, and gives you semantic search with
8
- full provenance — document id, page, character offset, and the exact
9
- snippet that matched. No cluster, no managed service, no
10
- opaque pipelines.
6
+ PatchVec is a single-process vector search engine for AI applications.
11
7
 
12
- Drop a file in, search it, see exactly what came back and why.
8
+ It ingests your documents, chunks and embeds them, and gives you semantic search with
9
+ full provenance — document id, page, character offset, and the exact snippet that
10
+ matched.
11
+
12
+ Built for developers shipping **RAG (Retrieval-Augmented Generation)** systems, PatchVec
13
+ provides a straightforward service for **vector search, embeddings pipelines, and
14
+ semantic retrieval**. It runs as a **REST service, a CLI tool, or an embedded library**,
15
+ so you can ship your first version quickly and keep growing with the same codebase as
16
+ your application scales. No cluster. No opaque pipelines.
17
+
18
+ Drop a file in. Search it. See exactly what came back — and why. Minutes after your
19
+ first commit.
13
20
 
14
21
  ## ⚙️ Why PatchVec
15
22
 
16
- - **Ingest files, not embeddings** — hand it a PDF, CSV, or TXT and
17
- PatchVec chunks, embeds, and indexes it. No preprocessing pipeline
18
- to build.
19
- - **Full provenance on every hit** every search result traces back
20
- to a document, page, and character offset. Latency and request
21
- traceability are built into every response.
22
- - **Multi-tenant by default** tenant/collection namespacing is
23
- built in, not bolted on.
24
- - **REST, CLI, or embed it** — run as an HTTP service, script via
25
- the CLI, or import the library directly in your Python app.
26
- - **Pluggable embeddings** — swap models per collection; wire in
27
- local or hosted embedding backends.
28
-
29
- ## 🧭 Workflows
23
+ - **Ingest files, not embeddings** — hand it a PDF, CSV, or TXT (more formats to come)
24
+ and PatchVec chunks, embeds, and indexes it. No preprocessing pipeline to build.
25
+ - **Full provenance on every hit** — every search result traces back to a document,
26
+ page, and character offset. Latency and request traceability are built into every
27
+ response.
28
+ - **Multi-tenant by default** — tenant/collection namespacing is built in, not bolted
29
+ on (and transparent when you just don't need it).
30
+ - **REST, CLI, or embed it** — run as an HTTP service, script via the CLI, or import the
31
+ library directly in your Python app.
32
+ - **Pluggable embeddings** (soon) swap models per collection; wire in local or hosted
33
+ embedding backends.
34
+
35
+ ## 🧭 How to
30
36
 
31
37
  ### 🐳 Docker workflow (prebuilt images)
32
38
 
@@ -68,8 +74,8 @@ docker rm -f patchvec
68
74
 
69
75
  ### 🐍 PyPI workflow
70
76
 
71
- Install Patchvec from PyPI inside an isolated virtual environment and point it at a
72
- local configuration directory.
77
+ Install Patchvec from PyPI inside an isolated virtual environment. You can run it
78
+ purely from env vars, or later point it at an explicit config file.
73
79
 
74
80
  **Requires Python 3.10–3.14.**
75
81
 
@@ -80,17 +86,13 @@ source .venv-pv/bin/activate
80
86
  python -m pip install --upgrade pip
81
87
  pip install "patchvec[cpu]"
82
88
 
83
- # grab the default configs
84
- curl -LO https://raw.githubusercontent.com/rodrigopitanga/patchvec/main/config.yml.example
85
- curl -LO https://raw.githubusercontent.com/rodrigopitanga/patchvec/main/tenants.yml.example
86
- cp config.yml.example config.yml
87
- cp tenants.yml.example tenants.yml
89
+ # create the default instance under ~/patchvec
90
+ pavecli init
88
91
 
89
92
  # sample demo corpus
90
93
  curl -LO https://raw.githubusercontent.com/rodrigopitanga/patchvec/main/demo/20k_leagues.txt
91
94
 
92
- # point Patchvec at the config directory and set a local admin key
93
- export PATCHVEC_CONFIG="$HOME/pv/config.yml"
95
+ # set an admin key for the generated config
94
96
  export PATCHVEC_GLOBAL_KEY=super-sekret
95
97
 
96
98
  # option A: run the service (stays up until you stop it)
@@ -113,6 +115,14 @@ pavecli search demo books "captain nemo" -k 3
113
115
 
114
116
  Deactivate the virtual environment with `deactivate` when finished.
115
117
 
118
+ By default, a non-dev runtime reads `~/patchvec/config.yml` if present, keeps
119
+ tenant sidecar loading disabled unless `auth.tenants_file` is configured, and
120
+ stores data in `~/patchvec/data`. You can override any of that with the
121
+ `PATCHVEC_*` environment scheme or by pointing `PATCHVEC_CONFIG` at an explicit
122
+ config file. For alternate instances, use `pavecli init /path/to/instance` and
123
+ then point commands at that root with `pavesrv --home=/path/to/instance` or
124
+ `pavecli <command> ... --home /path/to/instance`.
125
+
116
126
  ### 🌐 REST API and Web UI usage
117
127
 
118
128
  When the server is running (either via Docker or `pavesrv`), the API listens on
@@ -168,9 +178,8 @@ The Swagger UI is available at `http://localhost:8086/`.
168
178
 
169
179
  Health and metrics endpoints are available at `/health` and `/metrics`.
170
180
 
171
- Configuration files copied in either workflow can be customised. Runtime options are
172
- also accepted via the `PATCHVEC_*` environment variable scheme (`PATCHVEC_SERVER__PORT`,
173
- `PATCHVEC_AUTH__MODE`, etc.), which precedes conf files.
181
+ Runtime options are also accepted via the `PATCHVEC_*` environment variable scheme
182
+ (`PATCHVEC_SERVER__PORT`, `PATCHVEC_AUTH__MODE`, etc.), which precedes config files.
174
183
 
175
184
  ### 🔁 Live data updates
176
185
 
@@ -178,7 +187,7 @@ Patchvec supports live data refresh without restarting the server. Re-ingest the
178
187
  `docid` to *replace* vector content (filename doesn't matter - metadata will change
179
188
  though), or explicitly delete the document and then ingest it again.
180
189
 
181
- CLI (re-ingest to replace):
190
+ Re-ingest to replace (CLI path example):
182
191
 
183
192
  ```bash
184
193
  # initial ingest
@@ -188,11 +197,11 @@ pavecli ingest demo books 20k_leagues.txt --docid=verne-20k
188
197
  cp 20k_leagues.txt 20k_leagues_v2.txt
189
198
  echo "THE END" >> 20k_leagues_v2.txt
190
199
 
191
- # re-ingest with the same docid to replace the indexed content
200
+ # re-ingest with the *same docid* to replace the indexed content
192
201
  pavecli ingest demo books 20k_leagues_v2.txt --docid=verne-20k
193
202
  ```
194
203
 
195
- REST (delete then ingest):
204
+ Delete by ID then ingest (REST path example):
196
205
 
197
206
  ```bash
198
207
  curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
@@ -208,9 +217,10 @@ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
208
217
 
209
218
  ### 🛠️ Developer workflow
210
219
 
211
- Building from source relies on `Makefile` shortcuts (`make install-dev`,
212
- `USE_CPU=1 make serve`, `make test`, `make check`, etc.).
213
- The full contributor workflow, target reference, and task claiming rules live in
220
+ Building from source relies on `Makefile` shortcuts (`make install-dev`, `make serve`,
221
+ `make test`, `make check`, etc.).
222
+
223
+ The full contributor workflow, target reference, and coding style live in
214
224
  [CONTRIBUTING.md](CONTRIBUTING.md). Performance benchmarks are documented in
215
225
  [README-benchmarks.md](README-benchmarks.md).
216
226
 
@@ -229,9 +239,20 @@ See `config.yml.example` for the full logging configuration.
229
239
 
230
240
  ## 🗺️ Roadmap
231
241
 
232
- Short & mid-term chores are tracked in [`ROADMAP.md`](ROADMAP.md). Pick one, open an
233
- issue titled `claim: <task ID>`, and ship a patch.
242
+ Short/mid-term tasks and long-term plans are all tracked in
243
+ [`ROADMAP.md`](ROADMAP.md). Pick one, open an issue titled `claim: <task ID>`, and
244
+ ship a patch. If you find a bug, file it under the *Issues* tab.
234
245
 
235
246
  ## 📜 License
236
247
 
237
- AGPL-3.0-or-later (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigo@flowlexi.com>
248
+ PatchVec is free software: you can use it, copy it, redistribute it and/or modify it
249
+ free of charge under the terms of the GNU Affero General Public License as published by
250
+ the Free Software Foundation, either version 3 of the License, or (at your option) any
251
+ later version.
252
+
253
+ PatchVec is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
254
+ without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
255
+ PURPOSE. See the GNU Affero General Public License for more details.
256
+
257
+ SPDX-License-Identifier: AGPL-3.0-or-later
258
+ Copyright (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigo@flowlexi.com>
@@ -2,31 +2,50 @@
2
2
  # Copy to config.yml and adjust.
3
3
  # Secrets (API keys) should live in a separate untracked file — see tenants.yml.example.
4
4
  #
5
- # Config file location (default: ~/patchvec/config.yml):
6
- # PATCHVEC_CONFIG=/etc/patchvec/config.yml
5
+ # Default user-install paths:
6
+ # config: ~/pavedb/config.yml
7
+ # tenants: ~/pavedb/tenants.yml
8
+ # data: ~/pavedb/data
7
9
  #
8
- # In Docker/systemd deployments always set PATCHVEC_CONFIG explicitly — the
10
+ # Distro-like install example:
11
+ # config: /etc/pavedb/config.yml
12
+ # tenants: /var/pavedb/tenants.yml
13
+ # data: /var/pavedb/data
14
+ #
15
+ # Config file location override:
16
+ # PAVEDB_CONFIG=/etc/pavedb/config.yml
17
+ #
18
+ # In Docker/systemd deployments always set PAVEDB_CONFIG explicitly — the
9
19
  # default path expands ~ relative to the process user, which may not be what
10
20
  # you expect inside a container. Example compose snippet:
11
21
  #
12
22
  # environment:
13
- # PATCHVEC_CONFIG: /etc/patchvec/config.yml
23
+ # PAVEDB_CONFIG: /etc/pavedb/config.yml
14
24
  # volumes:
15
- # - ./config.yml:/etc/patchvec/config.yml:ro
25
+ # - ./config.yml:/etc/pavedb/config.yml:ro
16
26
  #
17
27
  # All keys can also be overridden inline via environment variables:
18
- # PATCHVEC_<KEY>=value (top-level, e.g. PATCHVEC_DATA_DIR)
19
- # PATCHVEC_<SECTION>__<KEY>=val (nested, e.g. PATCHVEC_LOG__LEVEL=debug)
28
+ # PAVEDB_<KEY>=value (top-level, e.g. PAVEDB_DATA_DIR)
29
+ # PAVEDB_<SECTION>__<KEY>=val (nested, e.g. PAVEDB_LOG__LEVEL=debug)
30
+ # Legacy PATCHVEC_* vars still work in v0.5.9 but will be removed in v0.6.
31
+ #
32
+ # `auth.tenants_file` is optional. If unset, its default is `None` and no
33
+ # tenants sidecar file is loaded.
34
+ # If set, PaveDB loads that sidecar first. Then inline tenant config is
35
+ # applied with precedence:
36
+ # env vars > config.yml > tenants.yml > defaults
37
+ # Example: define tenant "acme" entirely from env:
38
+ # PAVEDB_AUTH__API_KEYS__acme=change-me
39
+ # PAVEDB_TENANTS__acme__MAX_CONCURRENT=5
20
40
 
21
41
  # ---------------------------------------------------------------------------
22
42
  # Storage
23
43
  # ---------------------------------------------------------------------------
24
44
 
25
45
  # Data directory — ~ is expanded at startup.
26
- # Default (library/dev): ~/patchvec/data
27
- # For Docker/systemd use an absolute path:
28
- # mkdir -p /var/lib/patchvec/data && chown patchvec:patchvec /var/lib/patchvec/data
29
- data_dir: ~/patchvec/data
46
+ # Default (library/dev): ~/pavedb/data
47
+ # For distro-like installs use an absolute path, e.g. /var/pavedb/data.
48
+ data_dir: ~/pavedb/data
30
49
 
31
50
  # ---------------------------------------------------------------------------
32
51
  # Common collection
@@ -55,11 +74,13 @@ auth:
55
74
 
56
75
  # Global admin key — grants access to all tenants and admin routes.
57
76
  # Always read from the environment; never hardcode in committed files.
58
- global_key: ${PATCHVEC_GLOBAL_KEY}
77
+ global_key: ${PAVEDB_GLOBAL_KEY}
59
78
 
60
- # External tenant→key mapping file (untracked, merged at startup).
61
- # Keys in this file override any inline api_keys entries with the same name.
62
- tenants_file: ./tenants.yml
79
+ # External tenant→key mapping file.
80
+ # Keep the same key paths there (`auth.api_keys`, `tenants.*`).
81
+ # User install default: ~/pavedb/tenants.yml
82
+ # Distro-like install: /var/pavedb/tenants.yml
83
+ # tenants_file: ~/pavedb/tenants.yml
63
84
 
64
85
  # Inline tenant→key mapping (fallback; keep empty in the repo).
65
86
  api_keys: {}
@@ -69,51 +90,36 @@ auth:
69
90
  # ---------------------------------------------------------------------------
70
91
 
71
92
  vector_store:
72
- # default txtai + FAISS (built-in, no extra services required).
73
- # qdrant — Qdrant server (requires a running Qdrant instance).
74
- type: default
93
+ # faiss local FAISS index (built-in, no extra services required).
94
+ type: faiss
75
95
 
76
- # Options for type=default (txtai-backed FAISS store).
77
- txtai:
96
+ # Options for type=faiss.
97
+ faiss:
78
98
  embed_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
79
- backend: faiss
80
-
81
- # Options for type=qdrant.
82
- # NOTE: QdrantStore is currently a stub — all methods raise NotImplementedError.
83
- # Contributions welcome! See pave/stores/qdrant_store.py.
84
- qdrant:
85
- url: http://localhost:6333
86
- api_key: ${PATCHVEC_QDRANT_API_KEY}
87
- prefer_payload_filters: true
88
- collection_prefix: patchvec_
99
+ max_query_chars: 4000
89
100
 
90
101
  # ---------------------------------------------------------------------------
91
102
  # Embedder
92
103
  # ---------------------------------------------------------------------------
93
104
  # Controls how text is converted to vectors before indexing and querying.
94
105
  # The embedder is shared across all collections (per-collection config: v0.6).
95
- #
96
- # NOTE: the sbert and openai embedder types are currently stubs — the factory
97
- # exists but is not wired into the default store. Only type=default (txtai) is
98
- # fully functional today. Per-collection embedder selection lands in v0.6.
99
- # Contributions welcome! See pave/embedders/ and pave/stores/factory.py.
100
106
 
101
107
  embedder:
102
- # default txtai-managed sentence-transformers model (recommended).
103
- # sbert direct sentence-transformers, more control over batching/device.
104
- # openai — OpenAI text-embedding-ada-002 (requires API key, adds latency).
105
- type: default
106
-
107
- txtai:
108
- path: sentence-transformers/paraphrase-MiniLM-L3-v2
108
+ # sbert direct sentence-transformers (recommended local default).
109
+ # openai OpenAI embeddings API (requires API key, adds latency).
110
+ type: sbert
109
111
 
112
+ # Used by type=sbert.
110
113
  sbert:
111
114
  model: sentence-transformers/all-MiniLM-L6-v2
112
115
  batch_size: 64
113
- device: auto # cpu | cuda | auto (auto detects CUDA at runtime)
116
+ device: auto # cpu | cuda | mps | auto
114
117
 
118
+ # Used by type=openai.
115
119
  openai:
116
- api_key: ${PATCHVEC_OPENAI_API_KEY}
120
+ model: text-embedding-3-small
121
+ batch_size: 256
122
+ api_key: ${PAVEDB_OPENAI_API_KEY}
117
123
  dim: 1536
118
124
 
119
125
  # ---------------------------------------------------------------------------
@@ -194,7 +200,7 @@ server:
194
200
 
195
201
  log:
196
202
  # Dev log level (stderr). DEBUG | INFO | WARNING | ERROR — default INFO.
197
- # Overridden by PATCHVEC_LOG__LEVEL env var (e.g. in Makefile: debug).
203
+ # Overridden by PAVEDB_LOG__LEVEL env var (e.g. in Makefile: debug).
198
204
  # Per-namespace overrides: log.debug / log.watch / log.quiet (list of loggers).
199
205
  level: INFO
200
206
 
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patchvec
3
- Version: 0.5.8.1
4
- Summary: Patchvec — A lightweight, pluggable vector search microservice.
3
+ Version: 0.5.9
4
+ Summary: PaveDB — A lightweight, pluggable vector search microservice.
5
5
  Author: Rodrigo Rodrigues da Silva
6
6
  Author-email: rodrigo@flowlexi.com
7
7
  License: AGPL-3.0-or-later
@@ -24,7 +24,6 @@ Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: fastapi>=0.115.0
26
26
  Requires-Dist: uvicorn[standard]>=0.30.6
27
- Requires-Dist: txtai>=6.3.0
28
27
  Requires-Dist: pydantic>=2.8.2
29
28
  Requires-Dist: python-multipart>=0.0.9
30
29
  Requires-Dist: pypdf>=5.0.0
@@ -32,14 +31,14 @@ Requires-Dist: pyyaml>=6.0.2
32
31
  Requires-Dist: python-dotenv>=1.0.1
33
32
  Requires-Dist: faiss-cpu>=1.7.1
34
33
  Requires-Dist: torch>=2.10.0
34
+ Requires-Dist: sentence-transformers>=2.7.0
35
35
  Provides-Extra: cpu
36
- Provides-Extra: sbert
37
- Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
38
36
  Provides-Extra: openai
39
37
  Requires-Dist: openai>=1.0.0; extra == "openai"
40
38
  Provides-Extra: test
41
39
  Requires-Dist: pytest; extra == "test"
42
40
  Requires-Dist: httpx; extra == "test"
41
+ Requires-Dist: datasets>=3.5.0; extra == "test"
43
42
  Dynamic: author
44
43
  Dynamic: author-email
45
44
  Dynamic: classifier
@@ -67,7 +66,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
67
66
  - Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
68
67
  - REST and CLI entry points
69
68
  - Health/metrics endpoints + Prometheus exporter
70
- - Pluggable embeddings and stores; default backend is local
69
+ - Pluggable embeddings and stores; default stack is local FAISS + SBERT
71
70
 
72
71
  ## Requirements
73
72
  - Python 3.10–3.14
@@ -102,27 +101,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
102
101
  ```
103
102
 
104
103
  ## Minimal config (optional)
105
- By default PatchVec runs with sensible local defaults. To customize, create
106
- `config.yml`:
104
+ By default PatchVec runs with sensible local defaults. For a user install,
105
+ customize `~/patchvec/config.yml`:
107
106
  ```yaml
108
107
  vector_store:
109
- type: default
108
+ type: faiss
110
109
  embedder:
111
- type: default
110
+ type: sbert
112
111
  auth:
113
112
  mode: static
114
113
  global_key: ${PATCHVEC_GLOBAL_KEY}
115
114
  ```
116
115
  Then export:
117
116
  ```bash
118
- export PATCHVEC_CONFIG=./config.yml
119
117
  export PATCHVEC_GLOBAL_KEY="your-secret"
120
118
  ```
119
+ If you keep the file elsewhere, point the runtime at it explicitly:
120
+ ```bash
121
+ export PATCHVEC_CONFIG=/path/to/config.yml
122
+ ```
121
123
 
122
124
  ## CLI example
123
125
  ```bash
124
126
  pavecli create-collection demo books
125
- pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k \
127
+ pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
126
128
  --metadata='{"lang":"en"}'
127
129
  pavecli search demo books "captain nemo" -k 5
128
130
  ```
@@ -15,59 +15,79 @@ pave/__init__.py
15
15
  pave/auth.py
16
16
  pave/cli.py
17
17
  pave/config.py
18
+ pave/filters.py
18
19
  pave/log.py
19
20
  pave/main.py
20
- pave/meta_store.py
21
+ pave/metadb.py
21
22
  pave/metrics.py
22
23
  pave/preprocess.py
24
+ pave/runtime_paths.py
23
25
  pave/schemas.py
24
26
  pave/service.py
25
27
  pave/ui.py
26
28
  pave/assets/__init__.py
29
+ pave/assets/config.yml.example
27
30
  pave/assets/patchvec_icon_192.png
31
+ pave/assets/tenants.yml.example
28
32
  pave/assets/ui.html
33
+ pave/backends/__init__.py
34
+ pave/backends/base.py
35
+ pave/backends/faiss.py
36
+ pave/backends/qdrant.py
29
37
  pave/embedders/__init__.py
30
38
  pave/embedders/base.py
31
39
  pave/embedders/factory.py
32
- pave/embedders/openai_emb.py
33
- pave/embedders/sbert_emb.py
34
- pave/embedders/txtai_emb.py
40
+ pave/embedders/openai.py
41
+ pave/embedders/sbert.py
42
+ pave/routes/__init__.py
43
+ pave/routes/admin.py
44
+ pave/routes/collections.py
45
+ pave/routes/documents.py
46
+ pave/routes/health.py
47
+ pave/routes/search.py
35
48
  pave/stores/__init__.py
36
49
  pave/stores/base.py
37
- pave/stores/factory.py
38
- pave/stores/qdrant_store.py
39
- pave/stores/txtai_store.py
50
+ pave/stores/local.py
40
51
  tests/test_admin_tenants.py
41
52
  tests/test_auth.py
42
53
  tests/test_auth_api.py
54
+ tests/test_bench_search_latency.py
55
+ tests/test_bench_stress.py
43
56
  tests/test_cli.py
44
57
  tests/test_collection_rename.py
45
58
  tests/test_collections.py
59
+ tests/test_concurrent_upsert.py
46
60
  tests/test_config_runtime.py
61
+ tests/test_config_tenants.py
47
62
  tests/test_csv_ingest.py
48
63
  tests/test_csv_ingest_api.py
49
64
  tests/test_data_export.py
50
65
  tests/test_delete_document.py
51
66
  tests/test_docid_default.py
67
+ tests/test_faiss_backend.py
68
+ tests/test_faiss_store.py
52
69
  tests/test_health.py
53
70
  tests/test_ingest_errors.py
54
71
  tests/test_ingest_size_limit.py
55
72
  tests/test_lazy_app.py
56
73
  tests/test_list_collections.py
57
74
  tests/test_log.py
75
+ tests/test_main_args.py
58
76
  tests/test_meta_store.py
77
+ tests/test_metadata_split.py
59
78
  tests/test_metrics.py
79
+ tests/test_relevance.py
60
80
  tests/test_request_id.py
81
+ tests/test_runtime_paths.py
82
+ tests/test_sbert_embedder.py
61
83
  tests/test_search_errors.py
84
+ tests/test_store_cache_race.py
85
+ tests/test_store_catalog_metrics.py
86
+ tests/test_store_close_race.py
87
+ tests/test_store_filters.py
88
+ tests/test_store_meta_fetch_scope.py
89
+ tests/test_store_sanitized_filter_parity.py
62
90
  tests/test_tenant_rate_limit.py
63
- tests/test_txtai_concurrent_upsert.py
64
- tests/test_txtai_store.py
65
- tests/test_txtai_store_cache_race.py
66
- tests/test_txtai_store_catalog_metrics.py
67
- tests/test_txtai_store_close_race.py
68
- tests/test_txtai_store_filters.py
69
- tests/test_txtai_store_meta_fetch_scope.py
70
- tests/test_txtai_store_sql_safety.py
71
91
  tests/test_ui.py
72
92
  tests/test_upload_search_csv.py
73
93
  tests/test_upload_search_pdf.py
@@ -1,6 +1,5 @@
1
1
  fastapi>=0.115.0
2
2
  uvicorn[standard]>=0.30.6
3
- txtai>=6.3.0
4
3
  pydantic>=2.8.2
5
4
  python-multipart>=0.0.9
6
5
  pypdf>=5.0.0
@@ -8,15 +7,14 @@ pyyaml>=6.0.2
8
7
  python-dotenv>=1.0.1
9
8
  faiss-cpu>=1.7.1
10
9
  torch>=2.10.0
10
+ sentence-transformers>=2.7.0
11
11
 
12
12
  [cpu]
13
13
 
14
14
  [openai]
15
15
  openai>=1.0.0
16
16
 
17
- [sbert]
18
- sentence-transformers>=2.7.0
19
-
20
17
  [test]
21
18
  pytest
22
19
  httpx
20
+ datasets>=3.5.0