patchvec 0.5.8__tar.gz → 0.5.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. {patchvec-0.5.8 → patchvec-0.5.9}/ABOUT.md +10 -7
  2. {patchvec-0.5.8/patchvec.egg-info → patchvec-0.5.9}/PKG-INFO +14 -12
  3. patchvec-0.5.9/README.md +258 -0
  4. {patchvec-0.5.8 → patchvec-0.5.9}/config.yml.example +51 -45
  5. {patchvec-0.5.8 → patchvec-0.5.9/patchvec.egg-info}/PKG-INFO +14 -12
  6. {patchvec-0.5.8 → patchvec-0.5.9}/patchvec.egg-info/SOURCES.txt +35 -15
  7. {patchvec-0.5.8 → patchvec-0.5.9}/patchvec.egg-info/requires.txt +2 -4
  8. patchvec-0.5.9/pave/assets/config.yml.example +224 -0
  9. patchvec-0.5.9/pave/assets/tenants.yml.example +19 -0
  10. {patchvec-0.5.8 → patchvec-0.5.9}/pave/auth.py +5 -5
  11. patchvec-0.5.9/pave/backends/__init__.py +13 -0
  12. patchvec-0.5.9/pave/backends/base.py +29 -0
  13. patchvec-0.5.9/pave/backends/faiss.py +153 -0
  14. patchvec-0.5.9/pave/backends/qdrant.py +46 -0
  15. patchvec-0.5.9/pave/cli.py +337 -0
  16. {patchvec-0.5.8 → patchvec-0.5.9}/pave/config.py +120 -23
  17. patchvec-0.5.9/pave/embedders/__init__.py +7 -0
  18. patchvec-0.5.9/pave/embedders/base.py +21 -0
  19. patchvec-0.5.9/pave/embedders/factory.py +22 -0
  20. patchvec-0.5.9/pave/embedders/openai.py +47 -0
  21. patchvec-0.5.9/pave/embedders/sbert.py +69 -0
  22. patchvec-0.5.9/pave/filters.py +164 -0
  23. patchvec-0.5.9/pave/main.py +339 -0
  24. patchvec-0.5.9/pave/metadb.py +603 -0
  25. {patchvec-0.5.8 → patchvec-0.5.9}/pave/metrics.py +2 -2
  26. {patchvec-0.5.8 → patchvec-0.5.9}/pave/preprocess.py +2 -1
  27. patchvec-0.5.9/pave/routes/__init__.py +16 -0
  28. patchvec-0.5.9/pave/routes/admin.py +131 -0
  29. patchvec-0.5.9/pave/routes/collections.py +123 -0
  30. patchvec-0.5.9/pave/routes/documents.py +164 -0
  31. patchvec-0.5.9/pave/routes/health.py +107 -0
  32. patchvec-0.5.9/pave/routes/search.py +177 -0
  33. patchvec-0.5.9/pave/runtime_paths.py +89 -0
  34. {patchvec-0.5.8 → patchvec-0.5.9}/pave/service.py +78 -232
  35. patchvec-0.5.9/pave/stores/__init__.py +6 -0
  36. {patchvec-0.5.8 → patchvec-0.5.9}/pave/stores/base.py +22 -8
  37. patchvec-0.5.9/pave/stores/local.py +807 -0
  38. {patchvec-0.5.8 → patchvec-0.5.9}/setup.py +6 -8
  39. patchvec-0.5.9/tests/test_bench_search_latency.py +56 -0
  40. patchvec-0.5.9/tests/test_bench_stress.py +56 -0
  41. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_cli.py +78 -5
  42. patchvec-0.5.9/tests/test_concurrent_upsert.py +62 -0
  43. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_config_runtime.py +22 -8
  44. patchvec-0.5.9/tests/test_config_tenants.py +246 -0
  45. patchvec-0.5.9/tests/test_csv_ingest.py +141 -0
  46. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_csv_ingest_api.py +14 -0
  47. patchvec-0.5.9/tests/test_data_export.py +173 -0
  48. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_delete_document.py +4 -3
  49. patchvec-0.5.9/tests/test_docid_default.py +49 -0
  50. patchvec-0.5.9/tests/test_faiss_backend.py +86 -0
  51. patchvec-0.5.8/tests/test_txtai_store.py → patchvec-0.5.9/tests/test_faiss_store.py +68 -12
  52. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_health.py +1 -1
  53. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_list_collections.py +12 -14
  54. patchvec-0.5.9/tests/test_main_args.py +28 -0
  55. patchvec-0.5.9/tests/test_meta_store.py +490 -0
  56. patchvec-0.5.9/tests/test_metadata_split.py +89 -0
  57. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_metrics.py +10 -10
  58. patchvec-0.5.9/tests/test_relevance.py +297 -0
  59. patchvec-0.5.9/tests/test_runtime_paths.py +83 -0
  60. patchvec-0.5.9/tests/test_sbert_embedder.py +243 -0
  61. patchvec-0.5.8/tests/test_txtai_store_cache_race.py → patchvec-0.5.9/tests/test_store_cache_race.py +4 -2
  62. patchvec-0.5.8/tests/test_txtai_store_catalog_metrics.py → patchvec-0.5.9/tests/test_store_catalog_metrics.py +4 -3
  63. patchvec-0.5.9/tests/test_store_close_race.py +171 -0
  64. patchvec-0.5.8/tests/test_txtai_store_filters.py → patchvec-0.5.9/tests/test_store_filters.py +6 -43
  65. patchvec-0.5.9/tests/test_store_meta_fetch_scope.py +152 -0
  66. patchvec-0.5.9/tests/test_store_sanitized_filter_parity.py +171 -0
  67. patchvec-0.5.8/README.md +0 -211
  68. patchvec-0.5.8/pave/cli.py +0 -211
  69. patchvec-0.5.8/pave/embedders/__init__.py +0 -4
  70. patchvec-0.5.8/pave/embedders/base.py +0 -12
  71. patchvec-0.5.8/pave/embedders/factory.py +0 -21
  72. patchvec-0.5.8/pave/embedders/openai_emb.py +0 -30
  73. patchvec-0.5.8/pave/embedders/sbert_emb.py +0 -24
  74. patchvec-0.5.8/pave/embedders/txtai_emb.py +0 -58
  75. patchvec-0.5.8/pave/main.py +0 -864
  76. patchvec-0.5.8/pave/meta_store.py +0 -320
  77. patchvec-0.5.8/pave/stores/__init__.py +0 -4
  78. patchvec-0.5.8/pave/stores/factory.py +0 -18
  79. patchvec-0.5.8/pave/stores/qdrant_store.py +0 -37
  80. patchvec-0.5.8/pave/stores/txtai_store.py +0 -950
  81. patchvec-0.5.8/tests/test_csv_ingest.py +0 -81
  82. patchvec-0.5.8/tests/test_data_export.py +0 -91
  83. patchvec-0.5.8/tests/test_docid_default.py +0 -41
  84. patchvec-0.5.8/tests/test_meta_store.py +0 -91
  85. patchvec-0.5.8/tests/test_txtai_concurrent_upsert.py +0 -77
  86. patchvec-0.5.8/tests/test_txtai_store_close_race.py +0 -79
  87. patchvec-0.5.8/tests/test_txtai_store_meta_fetch_scope.py +0 -59
  88. patchvec-0.5.8/tests/test_txtai_store_sql_safety.py +0 -116
  89. {patchvec-0.5.8 → patchvec-0.5.9}/LICENSE +0 -0
  90. {patchvec-0.5.8 → patchvec-0.5.9}/MANIFEST.in +0 -0
  91. {patchvec-0.5.8 → patchvec-0.5.9}/patchvec.egg-info/dependency_links.txt +0 -0
  92. {patchvec-0.5.8 → patchvec-0.5.9}/patchvec.egg-info/entry_points.txt +0 -0
  93. {patchvec-0.5.8 → patchvec-0.5.9}/patchvec.egg-info/top_level.txt +0 -0
  94. {patchvec-0.5.8 → patchvec-0.5.9}/pave/__init__.py +0 -0
  95. {patchvec-0.5.8 → patchvec-0.5.9}/pave/assets/__init__.py +0 -0
  96. {patchvec-0.5.8 → patchvec-0.5.9}/pave/assets/patchvec_icon_192.png +0 -0
  97. {patchvec-0.5.8 → patchvec-0.5.9}/pave/assets/ui.html +0 -0
  98. {patchvec-0.5.8 → patchvec-0.5.9}/pave/log.py +0 -0
  99. {patchvec-0.5.8 → patchvec-0.5.9}/pave/schemas.py +0 -0
  100. {patchvec-0.5.8 → patchvec-0.5.9}/pave/ui.py +0 -0
  101. {patchvec-0.5.8 → patchvec-0.5.9}/requirements-cpu.txt +0 -0
  102. {patchvec-0.5.8 → patchvec-0.5.9}/setup.cfg +0 -0
  103. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_admin_tenants.py +0 -0
  104. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_auth.py +0 -0
  105. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_auth_api.py +0 -0
  106. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_collection_rename.py +0 -0
  107. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_collections.py +0 -0
  108. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_ingest_errors.py +0 -0
  109. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_ingest_size_limit.py +0 -0
  110. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_lazy_app.py +0 -0
  111. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_log.py +0 -0
  112. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_request_id.py +0 -0
  113. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_search_errors.py +0 -0
  114. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_tenant_rate_limit.py +0 -0
  115. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_ui.py +0 -0
  116. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_upload_search_csv.py +0 -0
  117. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_upload_search_pdf.py +0 -0
  118. {patchvec-0.5.8 → patchvec-0.5.9}/tests/test_upload_search_txt.py +0 -0
@@ -12,7 +12,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
12
12
  - Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
13
13
  - REST and CLI entry points
14
14
  - Health/metrics endpoints + Prometheus exporter
15
- - Pluggable embeddings and stores; default backend is local
15
+ - Pluggable embeddings and stores; default stack is local FAISS + SBERT
16
16
 
17
17
  ## Requirements
18
18
  - Python 3.10–3.14
@@ -47,27 +47,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
47
47
  ```
48
48
 
49
49
  ## Minimal config (optional)
50
- By default PatchVec runs with sensible local defaults. To customize, create
51
- `config.yml`:
50
+ By default PatchVec runs with sensible local defaults. For a user install,
51
+ customize `~/patchvec/config.yml`:
52
52
  ```yaml
53
53
  vector_store:
54
- type: default
54
+ type: faiss
55
55
  embedder:
56
- type: default
56
+ type: sbert
57
57
  auth:
58
58
  mode: static
59
59
  global_key: ${PATCHVEC_GLOBAL_KEY}
60
60
  ```
61
61
  Then export:
62
62
  ```bash
63
- export PATCHVEC_CONFIG=./config.yml
64
63
  export PATCHVEC_GLOBAL_KEY="your-secret"
65
64
  ```
65
+ If you keep the file elsewhere, point the runtime at it explicitly:
66
+ ```bash
67
+ export PATCHVEC_CONFIG=/path/to/config.yml
68
+ ```
66
69
 
67
70
  ## CLI example
68
71
  ```bash
69
72
  pavecli create-collection demo books
70
- pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k \
73
+ pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
71
74
  --metadata='{"lang":"en"}'
72
75
  pavecli search demo books "captain nemo" -k 5
73
76
  ```
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patchvec
3
- Version: 0.5.8
4
- Summary: Patchvec — A lightweight, pluggable vector search microservice.
3
+ Version: 0.5.9
4
+ Summary: PaveDB — A lightweight, pluggable vector search microservice.
5
5
  Author: Rodrigo Rodrigues da Silva
6
6
  Author-email: rodrigo@flowlexi.com
7
7
  License: AGPL-3.0-or-later
@@ -24,7 +24,6 @@ Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: fastapi>=0.115.0
26
26
  Requires-Dist: uvicorn[standard]>=0.30.6
27
- Requires-Dist: txtai>=6.3.0
28
27
  Requires-Dist: pydantic>=2.8.2
29
28
  Requires-Dist: python-multipart>=0.0.9
30
29
  Requires-Dist: pypdf>=5.0.0
@@ -32,14 +31,14 @@ Requires-Dist: pyyaml>=6.0.2
32
31
  Requires-Dist: python-dotenv>=1.0.1
33
32
  Requires-Dist: faiss-cpu>=1.7.1
34
33
  Requires-Dist: torch>=2.10.0
34
+ Requires-Dist: sentence-transformers>=2.7.0
35
35
  Provides-Extra: cpu
36
- Provides-Extra: sbert
37
- Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
38
36
  Provides-Extra: openai
39
37
  Requires-Dist: openai>=1.0.0; extra == "openai"
40
38
  Provides-Extra: test
41
39
  Requires-Dist: pytest; extra == "test"
42
40
  Requires-Dist: httpx; extra == "test"
41
+ Requires-Dist: datasets>=3.5.0; extra == "test"
43
42
  Dynamic: author
44
43
  Dynamic: author-email
45
44
  Dynamic: classifier
@@ -67,7 +66,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
67
66
  - Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
68
67
  - REST and CLI entry points
69
68
  - Health/metrics endpoints + Prometheus exporter
70
- - Pluggable embeddings and stores; default backend is local
69
+ - Pluggable embeddings and stores; default stack is local FAISS + SBERT
71
70
 
72
71
  ## Requirements
73
72
  - Python 3.10–3.14
@@ -102,27 +101,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
102
101
  ```
103
102
 
104
103
  ## Minimal config (optional)
105
- By default PatchVec runs with sensible local defaults. To customize, create
106
- `config.yml`:
104
+ By default PatchVec runs with sensible local defaults. For a user install,
105
+ customize `~/patchvec/config.yml`:
107
106
  ```yaml
108
107
  vector_store:
109
- type: default
108
+ type: faiss
110
109
  embedder:
111
- type: default
110
+ type: sbert
112
111
  auth:
113
112
  mode: static
114
113
  global_key: ${PATCHVEC_GLOBAL_KEY}
115
114
  ```
116
115
  Then export:
117
116
  ```bash
118
- export PATCHVEC_CONFIG=./config.yml
119
117
  export PATCHVEC_GLOBAL_KEY="your-secret"
120
118
  ```
119
+ If you keep the file elsewhere, point the runtime at it explicitly:
120
+ ```bash
121
+ export PATCHVEC_CONFIG=/path/to/config.yml
122
+ ```
121
123
 
122
124
  ## CLI example
123
125
  ```bash
124
126
  pavecli create-collection demo books
125
- pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k \
127
+ pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
126
128
  --metadata='{"lang":"en"}'
127
129
  pavecli search demo books "captain nemo" -k 5
128
130
  ```
@@ -0,0 +1,258 @@
1
+ <!-- (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigo@flowlexi.com> -->
2
+ <!-- SPDX-License-Identifier: AGPL-3.0-or-later -->
3
+
4
+ # 🛣️ PatchVec — Vector search you can understand & deploy within minutes
5
+
6
+ PatchVec is a single-process vector search engine for AI applications.
7
+
8
+ It ingests your documents, chunks and embeds them, and gives you semantic search with
9
+ full provenance — document id, page, character offset, and the exact snippet that
10
+ matched.
11
+
12
+ Built for developers shipping **RAG (Retrieval-Augmented Generation)** systems, PatchVec
13
+ provides a straightforward service for **vector search, embeddings pipelines, and
14
+ semantic retrieval**. It runs as a **REST service, a CLI tool, or an embedded library**,
15
+ so you can ship your first version quickly and keep growing with the same codebase as
16
+ your application scales. No cluster. No opaque pipelines.
17
+
18
+ Drop a file in. Search it. See exactly what came back — and why. Minutes after your
19
+ first commit.
20
+
21
+ ## ⚙️ Why PatchVec
22
+
23
+ - **Ingest files, not embeddings** — hand it a PDF, CSV, or TXT (more formats to come)
24
+ and PatchVec chunks, embeds, and indexes it. No preprocessing pipeline to build.
25
+ - **Full provenance on every hit** — every search result traces back to a document,
26
+ page, and character offset. Latency and request traceability are built into every
27
+ response.
28
+ - **Multi-tenant by default** — tenant/collection namespacing is built in, not bolted
29
+ on (and transparent when you just don't need it).
30
+ - **REST, CLI, or embed it** — run as an HTTP service, script via the CLI, or import the
31
+ library directly in your Python app.
32
+ - **Pluggable embeddings** (soon) — swap models per collection; wire in local or hosted
33
+ embedding backends.
34
+
35
+ ## 🧭 How to
36
+
37
+ ### 🐳 Docker workflow (prebuilt images)
38
+
39
+ Pull the image that fits your hardware from the
40
+ [Flowlexi Container Registry](https://gitlab.com/flowlexi/patchvec/container_registry)
41
+ on GitLab (CUDA builds publish as `latest-gpu`, CPU-only as
42
+ `latest-cpu`).
43
+
44
+ ```bash
45
+ docker pull registry.gitlab.com/flowlexi/patchvec/patchvec:latest-gpu
46
+ docker pull registry.gitlab.com/flowlexi/patchvec/patchvec:latest-cpu
47
+ ```
48
+
49
+ Run the service by choosing the tag you need and mapping the API port locally:
50
+
51
+ ```bash
52
+ docker run -d --name patchvec \
53
+ -p 8086:8086 \
54
+ registry.gitlab.com/flowlexi/patchvec/patchvec:latest-cpu
55
+ ```
56
+
57
+ Use the bundled CLI inside the container to create a tenant/collection, ingest a demo
58
+ document, and query it:
59
+
60
+ ```bash
61
+ docker exec patchvec pavecli create-collection demo books
62
+ docker exec patchvec pavecli ingest demo books /app/demo/20k_leagues.txt \
63
+ --docid=verne-20k --metadata='{"lang":"en"}'
64
+ docker exec patchvec pavecli search demo books "captain nemo" -k 3
65
+ ```
66
+
67
+ See below for REST and UI.
68
+
69
+ Stop the container when you are done:
70
+
71
+ ```bash
72
+ docker rm -f patchvec
73
+ ```
74
+
75
+ ### 🐍 PyPI workflow
76
+
77
+ Install Patchvec from PyPI inside an isolated virtual environment. You can run it
78
+ purely from env vars, or later point it at an explicit config file.
79
+
80
+ **Requires Python 3.10–3.14.**
81
+
82
+ ```bash
83
+ mkdir -p ~/pv && cd ~/pv # or wherever
84
+ python -m venv .venv-pv
85
+ source .venv-pv/bin/activate
86
+ python -m pip install --upgrade pip
87
+ pip install "patchvec[cpu]"
88
+
89
+ # create the default instance under ~/patchvec
90
+ pavecli init
91
+
92
+ # sample demo corpus
93
+ curl -LO https://raw.githubusercontent.com/rodrigopitanga/patchvec/main/demo/20k_leagues.txt
94
+
95
+ # set an admin key for the generated config
96
+ export PATCHVEC_GLOBAL_KEY=super-sekret
97
+
98
+ # option A: run the service (stays up until you stop it)
99
+ pavesrv
100
+
101
+ # option B: operate entirely via the CLI (no server needed)
102
+ pavecli create-collection demo books
103
+ pavecli ingest demo books 20k_leagues.txt --docid=verne-20k --metadata='{"lang":"en"}'
104
+ pavecli search demo books "captain nemo" -k 3
105
+ ```
106
+
107
+ > **CPU-only deployments:** The command above pulls the default PyTorch wheel
108
+ > from PyPI, which includes CUDA support (~2 GB). For a leaner, CPU-only torch
109
+ > install, point pip at the PyTorch CPU index:
110
+ > ```bash
111
+ > pip install "patchvec[cpu]" \
112
+ > --index-url https://download.pytorch.org/whl/cpu \
113
+ > --extra-index-url https://pypi.org/simple
114
+ > ```
115
+
116
+ Deactivate the virtual environment with `deactivate` when finished.
117
+
118
+ By default, a non-dev runtime reads `~/patchvec/config.yml` if present, keeps
119
+ tenant sidecar loading disabled unless `auth.tenants_file` is configured, and
120
+ stores data in `~/patchvec/data`. You can override any of that with the
121
+ `PATCHVEC_*` environment scheme or by pointing `PATCHVEC_CONFIG` at an explicit
122
+ config file. For alternate instances, use `pavecli init /path/to/instance` and
123
+ then point commands at that root with `pavesrv --home=/path/to/instance` or
124
+ `pavecli <command> ... --home /path/to/instance`.
125
+
126
+ ### 🌐 REST API and Web UI usage
127
+
128
+ When the server is running (either via Docker or `pavesrv`), the API listens on
129
+ `http://localhost:8086`. The following `curl` commands mirror the CLI sequence
130
+ above—adjust the file path to wherever you stored the corpus
131
+ (`/app/demo/20k_leagues.txt` in Docker, `~/pv/20k_leagues.txt` for PyPI installs) and
132
+ reuse the bearer token exported earlier:
133
+
134
+ ```bash
135
+ # create collection
136
+ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
137
+ -X POST http://localhost:8086/collections/demo/books
138
+
139
+ # ingest document
140
+ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
141
+ -X POST http://localhost:8086/collections/demo/books/documents \
142
+ -F "file=@20k_leagues.txt" \
143
+ -F 'metadata={"lang":"en"}'
144
+
145
+ # run search
146
+ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
147
+ "http://localhost:8086/collections/demo/books/search?q=captain+nemo&k=3"
148
+ ```
149
+
150
+ Every hit comes back with provenance you can trace, plus latency
151
+ and request id for observability:
152
+
153
+ ```json
154
+ {
155
+ "matches": [
156
+ {
157
+ "id": "verne-20k::chunk_42",
158
+ "score": 0.82,
159
+ "text": "Captain Nemo conducted me to the central staircase ...",
160
+ "tenant": "demo",
161
+ "collection": "books",
162
+ "match_reason": "semantic",
163
+ "meta": {
164
+ "docid": "verne-20k",
165
+ "filename": "20k_leagues.txt",
166
+ "offset": 21000,
167
+ "lang": "en",
168
+ "ingested_at": "2026-03-07T12:00:00Z"
169
+ }
170
+ }
171
+ ],
172
+ "latency_ms": 12.4,
173
+ "request_id": "req-5f3a-b812"
174
+ }
175
+ ```
176
+
177
+ The Swagger UI is available at `http://localhost:8086/`.
178
+
179
+ Health and metrics endpoints are available at `/health` and `/metrics`.
180
+
181
+ Runtime options are also accepted via the `PATCHVEC_*` environment variable scheme
182
+ (`PATCHVEC_SERVER__PORT`, `PATCHVEC_AUTH__MODE`, etc.), which precedes config files.
183
+
184
+ ### 🔁 Live data updates
185
+
186
+ Patchvec supports live data refresh without restarting the server. Re-ingest the same
187
+ `docid` to *replace* vector content (filename doesn't matter - metadata will change
188
+ though), or explicitly delete the document and then ingest it again.
189
+
190
+ Re-ingest to replace (CLI path example):
191
+
192
+ ```bash
193
+ # initial ingest
194
+ pavecli ingest demo books 20k_leagues.txt --docid=verne-20k
195
+
196
+ # modify the content (filename can change — docid is what matters)
197
+ cp 20k_leagues.txt 20k_leagues_v2.txt
198
+ echo "THE END" >> 20k_leagues_v2.txt
199
+
200
+ # re-ingest with the *same docid* to replace the indexed content
201
+ pavecli ingest demo books 20k_leagues_v2.txt --docid=verne-20k
202
+ ```
203
+
204
+ Delete by ID then ingest (REST path example):
205
+
206
+ ```bash
207
+ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
208
+ -X DELETE http://localhost:8086/collections/demo/books/documents/verne-20k
209
+
210
+ # make changes
211
+
212
+ curl -H "Authorization: Bearer $PATCHVEC_GLOBAL_KEY" \
213
+ -X POST http://localhost:8086/collections/demo/books/documents \
214
+ -F "file=@demo/20k_leagues.txt" \
215
+ -F 'docid=verne-20k'
216
+ ```
217
+
218
+ ### 🛠️ Developer workflow
219
+
220
+ Building from source relies on `Makefile` shortcuts (`make install-dev`, `make serve`,
221
+ `make test`, `make check`, etc.).
222
+
223
+ The full contributor workflow, target reference, and coding style live in
224
+ [CONTRIBUTING.md](CONTRIBUTING.md). Performance benchmarks are documented in
225
+ [README-benchmarks.md](README-benchmarks.md).
226
+
227
+ ## Logging
228
+
229
+ PatchVec writes human-readable logs to stderr and optionally emits
230
+ structured JSON lines (one per search/ingest/delete) for production
231
+ observability. Enable the ops stream in `config.yml`:
232
+
233
+ ```yaml
234
+ log:
235
+ ops_log: stdout # null (off) | stdout | /path/to/ops.jsonl
236
+ ```
237
+
238
+ See `config.yml.example` for the full logging configuration.
239
+
240
+ ## 🗺️ Roadmap
241
+
242
+ Short/mid-term tasks and long-term plans are all tracked in
243
+ [`ROADMAP.md`](ROADMAP.md). Pick one, open an issue titled `claim: <task ID>`, and
244
+ ship a patch. If you find a bug, file it under the *Issues* tab.
245
+
246
+ ## 📜 License
247
+
248
+ PatchVec is free software: you can use it, copy it, redistribute it and/or modify it
249
+ free of charge under the terms of the GNU Affero General Public License as published by
250
+ the Free Software Foundation, either version 3 of the License, or (at your option) any
251
+ later version.
252
+
253
+ PatchVec is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
254
+ without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
255
+ PURPOSE. See the GNU Affero General Public License for more details.
256
+
257
+ SPDX-License-Identifier: AGPL-3.0-or-later
258
+ Copyright (C) 2025, 2026 Rodrigo Rodrigues da Silva <rodrigo@flowlexi.com>
@@ -2,31 +2,50 @@
2
2
  # Copy to config.yml and adjust.
3
3
  # Secrets (API keys) should live in a separate untracked file — see tenants.yml.example.
4
4
  #
5
- # Config file location (default: ~/patchvec/config.yml):
6
- # PATCHVEC_CONFIG=/etc/patchvec/config.yml
5
+ # Default user-install paths:
6
+ # config: ~/pavedb/config.yml
7
+ # tenants: ~/pavedb/tenants.yml
8
+ # data: ~/pavedb/data
7
9
  #
8
- # In Docker/systemd deployments always set PATCHVEC_CONFIG explicitly — the
10
+ # Distro-like install example:
11
+ # config: /etc/pavedb/config.yml
12
+ # tenants: /var/pavedb/tenants.yml
13
+ # data: /var/pavedb/data
14
+ #
15
+ # Config file location override:
16
+ # PAVEDB_CONFIG=/etc/pavedb/config.yml
17
+ #
18
+ # In Docker/systemd deployments always set PAVEDB_CONFIG explicitly — the
9
19
  # default path expands ~ relative to the process user, which may not be what
10
20
  # you expect inside a container. Example compose snippet:
11
21
  #
12
22
  # environment:
13
- # PATCHVEC_CONFIG: /etc/patchvec/config.yml
23
+ # PAVEDB_CONFIG: /etc/pavedb/config.yml
14
24
  # volumes:
15
- # - ./config.yml:/etc/patchvec/config.yml:ro
25
+ # - ./config.yml:/etc/pavedb/config.yml:ro
16
26
  #
17
27
  # All keys can also be overridden inline via environment variables:
18
- # PATCHVEC_<KEY>=value (top-level, e.g. PATCHVEC_DATA_DIR)
19
- # PATCHVEC_<SECTION>__<KEY>=val (nested, e.g. PATCHVEC_LOG__LEVEL=debug)
28
+ # PAVEDB_<KEY>=value (top-level, e.g. PAVEDB_DATA_DIR)
29
+ # PAVEDB_<SECTION>__<KEY>=val (nested, e.g. PAVEDB_LOG__LEVEL=debug)
30
+ # Legacy PATCHVEC_* vars still work in v0.5.9 but will be removed in v0.6.
31
+ #
32
+ # `auth.tenants_file` is optional. If unset, its default is `None` and no
33
+ # tenants sidecar file is loaded.
34
+ # If set, PaveDB loads that sidecar first. Then inline tenant config is
35
+ # applied with precedence:
36
+ # env vars > config.yml > tenants.yml > defaults
37
+ # Example: define tenant "acme" entirely from env:
38
+ # PAVEDB_AUTH__API_KEYS__acme=change-me
39
+ # PAVEDB_TENANTS__acme__MAX_CONCURRENT=5
20
40
 
21
41
  # ---------------------------------------------------------------------------
22
42
  # Storage
23
43
  # ---------------------------------------------------------------------------
24
44
 
25
45
  # Data directory — ~ is expanded at startup.
26
- # Default (library/dev): ~/patchvec/data
27
- # For Docker/systemd use an absolute path:
28
- # mkdir -p /var/lib/patchvec/data && chown patchvec:patchvec /var/lib/patchvec/data
29
- data_dir: ~/patchvec/data
46
+ # Default (library/dev): ~/pavedb/data
47
+ # For distro-like installs use an absolute path, e.g. /var/pavedb/data.
48
+ data_dir: ~/pavedb/data
30
49
 
31
50
  # ---------------------------------------------------------------------------
32
51
  # Common collection
@@ -55,11 +74,13 @@ auth:
55
74
 
56
75
  # Global admin key — grants access to all tenants and admin routes.
57
76
  # Always read from the environment; never hardcode in committed files.
58
- global_key: ${PATCHVEC_GLOBAL_KEY}
77
+ global_key: ${PAVEDB_GLOBAL_KEY}
59
78
 
60
- # External tenant→key mapping file (untracked, merged at startup).
61
- # Keys in this file override any inline api_keys entries with the same name.
62
- tenants_file: ./tenants.yml
79
+ # External tenant→key mapping file.
80
+ # Keep the same key paths there (`auth.api_keys`, `tenants.*`).
81
+ # User install default: ~/pavedb/tenants.yml
82
+ # Distro-like install: /var/pavedb/tenants.yml
83
+ # tenants_file: ~/pavedb/tenants.yml
63
84
 
64
85
  # Inline tenant→key mapping (fallback; keep empty in the repo).
65
86
  api_keys: {}
@@ -69,51 +90,36 @@ auth:
69
90
  # ---------------------------------------------------------------------------
70
91
 
71
92
  vector_store:
72
- # default txtai + FAISS (built-in, no extra services required).
73
- # qdrant — Qdrant server (requires a running Qdrant instance).
74
- type: default
93
+ # faiss local FAISS index (built-in, no extra services required).
94
+ type: faiss
75
95
 
76
- # Options for type=default (txtai-backed FAISS store).
77
- txtai:
96
+ # Options for type=faiss.
97
+ faiss:
78
98
  embed_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
79
- backend: faiss
80
-
81
- # Options for type=qdrant.
82
- # NOTE: QdrantStore is currently a stub — all methods raise NotImplementedError.
83
- # Contributions welcome! See pave/stores/qdrant_store.py.
84
- qdrant:
85
- url: http://localhost:6333
86
- api_key: ${PATCHVEC_QDRANT_API_KEY}
87
- prefer_payload_filters: true
88
- collection_prefix: patchvec_
99
+ max_query_chars: 4000
89
100
 
90
101
  # ---------------------------------------------------------------------------
91
102
  # Embedder
92
103
  # ---------------------------------------------------------------------------
93
104
  # Controls how text is converted to vectors before indexing and querying.
94
105
  # The embedder is shared across all collections (per-collection config: v0.6).
95
- #
96
- # NOTE: the sbert and openai embedder types are currently stubs — the factory
97
- # exists but is not wired into the default store. Only type=default (txtai) is
98
- # fully functional today. Per-collection embedder selection lands in v0.6.
99
- # Contributions welcome! See pave/embedders/ and pave/stores/factory.py.
100
106
 
101
107
  embedder:
102
- # default txtai-managed sentence-transformers model (recommended).
103
- # sbert direct sentence-transformers, more control over batching/device.
104
- # openai — OpenAI text-embedding-ada-002 (requires API key, adds latency).
105
- type: default
106
-
107
- txtai:
108
- path: sentence-transformers/paraphrase-MiniLM-L3-v2
108
+ # sbert direct sentence-transformers (recommended local default).
109
+ # openai OpenAI embeddings API (requires API key, adds latency).
110
+ type: sbert
109
111
 
112
+ # Used by type=sbert.
110
113
  sbert:
111
114
  model: sentence-transformers/all-MiniLM-L6-v2
112
115
  batch_size: 64
113
- device: auto # cpu | cuda | auto (auto detects CUDA at runtime)
116
+ device: auto # cpu | cuda | mps | auto
114
117
 
118
+ # Used by type=openai.
115
119
  openai:
116
- api_key: ${PATCHVEC_OPENAI_API_KEY}
120
+ model: text-embedding-3-small
121
+ batch_size: 256
122
+ api_key: ${PAVEDB_OPENAI_API_KEY}
117
123
  dim: 1536
118
124
 
119
125
  # ---------------------------------------------------------------------------
@@ -194,7 +200,7 @@ server:
194
200
 
195
201
  log:
196
202
  # Dev log level (stderr). DEBUG | INFO | WARNING | ERROR — default INFO.
197
- # Overridden by PATCHVEC_LOG__LEVEL env var (e.g. in Makefile: debug).
203
+ # Overridden by PAVEDB_LOG__LEVEL env var (e.g. in Makefile: debug).
198
204
  # Per-namespace overrides: log.debug / log.watch / log.quiet (list of loggers).
199
205
  level: INFO
200
206
 
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patchvec
3
- Version: 0.5.8
4
- Summary: Patchvec — A lightweight, pluggable vector search microservice.
3
+ Version: 0.5.9
4
+ Summary: PaveDB — A lightweight, pluggable vector search microservice.
5
5
  Author: Rodrigo Rodrigues da Silva
6
6
  Author-email: rodrigo@flowlexi.com
7
7
  License: AGPL-3.0-or-later
@@ -24,7 +24,6 @@ Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: fastapi>=0.115.0
26
26
  Requires-Dist: uvicorn[standard]>=0.30.6
27
- Requires-Dist: txtai>=6.3.0
28
27
  Requires-Dist: pydantic>=2.8.2
29
28
  Requires-Dist: python-multipart>=0.0.9
30
29
  Requires-Dist: pypdf>=5.0.0
@@ -32,14 +31,14 @@ Requires-Dist: pyyaml>=6.0.2
32
31
  Requires-Dist: python-dotenv>=1.0.1
33
32
  Requires-Dist: faiss-cpu>=1.7.1
34
33
  Requires-Dist: torch>=2.10.0
34
+ Requires-Dist: sentence-transformers>=2.7.0
35
35
  Provides-Extra: cpu
36
- Provides-Extra: sbert
37
- Requires-Dist: sentence-transformers>=2.7.0; extra == "sbert"
38
36
  Provides-Extra: openai
39
37
  Requires-Dist: openai>=1.0.0; extra == "openai"
40
38
  Provides-Extra: test
41
39
  Requires-Dist: pytest; extra == "test"
42
40
  Requires-Dist: httpx; extra == "test"
41
+ Requires-Dist: datasets>=3.5.0; extra == "test"
43
42
  Dynamic: author
44
43
  Dynamic: author-email
45
44
  Dynamic: classifier
@@ -67,7 +66,7 @@ Upload → chunk → index (with metadata) → search via REST and CLI.
67
66
  - Metadata filters on search (`{"filters": {"docid": "DOC-1"}}`)
68
67
  - REST and CLI entry points
69
68
  - Health/metrics endpoints + Prometheus exporter
70
- - Pluggable embeddings and stores; default backend is local
69
+ - Pluggable embeddings and stores; default stack is local FAISS + SBERT
71
70
 
72
71
  ## Requirements
73
72
  - Python 3.10–3.14
@@ -102,27 +101,30 @@ export PATCHVEC_AUTH__GLOBAL_KEY="your-secret"
102
101
  ```
103
102
 
104
103
  ## Minimal config (optional)
105
- By default PatchVec runs with sensible local defaults. To customize, create
106
- `config.yml`:
104
+ By default PatchVec runs with sensible local defaults. For a user install,
105
+ customize `~/patchvec/config.yml`:
107
106
  ```yaml
108
107
  vector_store:
109
- type: default
108
+ type: faiss
110
109
  embedder:
111
- type: default
110
+ type: sbert
112
111
  auth:
113
112
  mode: static
114
113
  global_key: ${PATCHVEC_GLOBAL_KEY}
115
114
  ```
116
115
  Then export:
117
116
  ```bash
118
- export PATCHVEC_CONFIG=./config.yml
119
117
  export PATCHVEC_GLOBAL_KEY="your-secret"
120
118
  ```
119
+ If you keep the file elsewhere, point the runtime at it explicitly:
120
+ ```bash
121
+ export PATCHVEC_CONFIG=/path/to/config.yml
122
+ ```
121
123
 
122
124
  ## CLI example
123
125
  ```bash
124
126
  pavecli create-collection demo books
125
- pavecli upload demo books demo/20k_leagues.txt --docid=verne-20k \
127
+ pavecli ingest demo books demo/20k_leagues.txt --docid=verne-20k \
126
128
  --metadata='{"lang":"en"}'
127
129
  pavecli search demo books "captain nemo" -k 5
128
130
  ```