patchvec 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patchvec-0.5.6.dist-info/METADATA +115 -0
- patchvec-0.5.6.dist-info/RECORD +28 -0
- patchvec-0.5.6.dist-info/WHEEL +5 -0
- patchvec-0.5.6.dist-info/entry_points.txt +3 -0
- patchvec-0.5.6.dist-info/licenses/LICENSE +9 -0
- patchvec-0.5.6.dist-info/top_level.txt +1 -0
- pave/__init__.py +6 -0
- pave/assets/patchvec_icon_192.png +0 -0
- pave/assets/ui.html +125 -0
- pave/auth.py +108 -0
- pave/cli.py +97 -0
- pave/config.py +240 -0
- pave/embedders/__init__.py +1 -0
- pave/embedders/base.py +12 -0
- pave/embedders/factory.py +21 -0
- pave/embedders/openai_emb.py +30 -0
- pave/embedders/sbert_emb.py +24 -0
- pave/embedders/txtai_emb.py +58 -0
- pave/main.py +303 -0
- pave/metrics.py +52 -0
- pave/preprocess.py +151 -0
- pave/service.py +92 -0
- pave/stores/__init__.py +1 -0
- pave/stores/base.py +33 -0
- pave/stores/factory.py +18 -0
- pave/stores/qdrant_store.py +26 -0
- pave/stores/txtai_store.py +445 -0
- pave/ui.py +175 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: patchvec
|
|
3
|
+
Version: 0.5.6
|
|
4
|
+
Summary: Patchvec — A lightweight, pluggable vector search microservice.
|
|
5
|
+
Author: Rodrigo Rodrigues da Silva
|
|
6
|
+
Author-email: rodrigopitanga@posteo.net
|
|
7
|
+
License: GPL-3.0-or-later
|
|
8
|
+
Project-URL: Homepage, https://gitlab.com/flowlexi/patchvec
|
|
9
|
+
Project-URL: Source, https://gitlab.com/flowlexi/patchvec
|
|
10
|
+
Project-URL: Tracker, https://gitlab.com/flowlexi/patchvec/issues
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Framework :: FastAPI
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: fastapi>=0.115.0
|
|
25
|
+
Requires-Dist: uvicorn[standard]>=0.30.6
|
|
26
|
+
Requires-Dist: txtai>=6.3.0
|
|
27
|
+
Requires-Dist: pydantic>=2.8.2
|
|
28
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
29
|
+
Requires-Dist: pypdf>=5.0.0
|
|
30
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
31
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
32
|
+
Requires-Dist: qdrant-client>=1.9.2
|
|
33
|
+
Requires-Dist: sentence-transformers>=2.7.0
|
|
34
|
+
Requires-Dist: openai>=1.0.0
|
|
35
|
+
Dynamic: author
|
|
36
|
+
Dynamic: author-email
|
|
37
|
+
Dynamic: classifier
|
|
38
|
+
Dynamic: description
|
|
39
|
+
Dynamic: description-content-type
|
|
40
|
+
Dynamic: license
|
|
41
|
+
Dynamic: license-file
|
|
42
|
+
Dynamic: project-url
|
|
43
|
+
Dynamic: requires-dist
|
|
44
|
+
Dynamic: requires-python
|
|
45
|
+
Dynamic: summary
|
|
46
|
+
|
|
47
|
+
# PatchVec — A lightweight, pluggable vector search microservice.
|
|
48
|
+
|
|
49
|
+
Upload → chunk → index (with metadata) → search via REST and CLI.
|
|
50
|
+
|
|
51
|
+
## Highlights
|
|
52
|
+
- Multi-tenant collections: `/collections/{tenant}/{name}`
|
|
53
|
+
- Upload and search **TXT**, **CSV**, and **PDF**
|
|
54
|
+
- Chunking per format:
|
|
55
|
+
- PDF → 1 chunk/page
|
|
56
|
+
- TXT → configurable chunk size + overlap
|
|
57
|
+
- CSV → 1 chunk/row
|
|
58
|
+
- Metadata filters on POST search (`{"filters": {"docid": "DOC-1"}}`)
|
|
59
|
+
- Health, metrics, and Prometheus endpoints
|
|
60
|
+
- Configurable auth modes: `none` or `static` (Bearer)
|
|
61
|
+
- Default backends are local (vendor-neutral "default"); embedders & stores are pluggable
|
|
62
|
+
|
|
63
|
+
## Requirements
|
|
64
|
+
- Python 3.10+
|
|
65
|
+
|
|
66
|
+
## Install
|
|
67
|
+
```bash
|
|
68
|
+
python -m venv .venv
|
|
69
|
+
source .venv/bin/activate
|
|
70
|
+
pip install patchvec
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
> CPU-only by default. If you have a CUDA setup and want GPU-accelerated deps, install the GPU-enabled packages in your environment before running PatchVec.
|
|
74
|
+
|
|
75
|
+
## Quickstart
|
|
76
|
+
```bash
|
|
77
|
+
# Start the server (installed entry point)
|
|
78
|
+
pavesrv
|
|
79
|
+
|
|
80
|
+
# Or run with uvicorn manually if you prefer:
|
|
81
|
+
uvicorn pave.main:app --host 0.0.0.0 --port 8080
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Minimal config (optional)
|
|
85
|
+
By default PatchVec runs with sensible local defaults. To customize, create `config.yml` and set:
|
|
86
|
+
```yaml
|
|
87
|
+
vector_store:
|
|
88
|
+
type: default
|
|
89
|
+
embedder:
|
|
90
|
+
type: default
|
|
91
|
+
auth:
|
|
92
|
+
mode: none # or 'static' with per-tenant Bearer keys
|
|
93
|
+
```
|
|
94
|
+
Then export:
|
|
95
|
+
```bash
|
|
96
|
+
export PATCHVEC_CONFIG=./config.yml
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## REST example
|
|
100
|
+
```bash
|
|
101
|
+
# Create a collection
|
|
102
|
+
curl -X POST http://localhost:8080/collections/acme/docs
|
|
103
|
+
|
|
104
|
+
# Upload a TXT document
|
|
105
|
+
curl -X POST http://localhost:8080/collections/acme/docs/documents -F "file=@sample.txt" -F "docid=DOC1"
|
|
106
|
+
|
|
107
|
+
# Search (GET, no filters)
|
|
108
|
+
curl -G --data-urlencode "q=hello" http://localhost:8080/collections/acme/docs/search
|
|
109
|
+
|
|
110
|
+
# Search (POST, with filters)
|
|
111
|
+
curl -X POST http://localhost:8080/collections/acme/docs/search -H "Content-Type: application/json" -d '{"q":"hello","k":5,"filters":{"docid":"DOC1"}}'
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## License
|
|
115
|
+
GPL-3.0-or-later — (C) 2025 Rodrigo Rodrigues da Silva
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
patchvec-0.5.6.dist-info/licenses/LICENSE,sha256=pZN4UctsMWb-i8NGK_cawTQZ_uIYTR98GPAPzhRJXOM,299
|
|
2
|
+
pave/__init__.py,sha256=49d6bp1SBYCw7mk4wu_qO9mASeZVqYqsbuzKmxAtT7s,223
|
|
3
|
+
pave/auth.py,sha256=PVKA243OKvs0jaob-DJk6EMRzlBR9Qc2lNedlFUfjG8,3674
|
|
4
|
+
pave/cli.py,sha256=pLqQcA9awbvxGLxCzAcOcj1sdkPm2g3eNA7TXdngnpk,3649
|
|
5
|
+
pave/config.py,sha256=j6veVNb-tUzo_TnbzYpTR1Q2TDxmqHbMs152J4DFnFU,7876
|
|
6
|
+
pave/main.py,sha256=siufImb2qdxZvMJ4PHsbIKFDDskWqOzasFnjMph_g28,9736
|
|
7
|
+
pave/metrics.py,sha256=qqhvLCCzY2sgmjgsnNBnRxoZ5xTe9RW5DvoGMQiQAZg,1588
|
|
8
|
+
pave/preprocess.py,sha256=-llfKyWJ6aOPQ-y7XFmoJwF_9r34MnyqVbCghd8J-Vc,5392
|
|
9
|
+
pave/service.py,sha256=x1wu1yQd99Lmt4JvdvvKxQNhZgzkQ4u80p8P6oAxP_Y,3602
|
|
10
|
+
pave/ui.py,sha256=GQ0iRcCAP9UMdT_M0m7JDQhawrqaop6I-opivGMR0hE,6345
|
|
11
|
+
pave/assets/patchvec_icon_192.png,sha256=kUkXir9MtwxWTd_9hPTdycDwdRCimjprJhmaIP6R-XM,9733
|
|
12
|
+
pave/assets/ui.html,sha256=spFtzCDT84tmJfCA1WdpVY9xKKQ4oXJxjuOiV4ghWJo,4943
|
|
13
|
+
pave/embedders/__init__.py,sha256=Y6PqISvtEzZ5xLK2nL0jsQ256X8ffVn39Md05OS0SDk,6
|
|
14
|
+
pave/embedders/base.py,sha256=bm3mTOEt_sN-V96QmIKSW882LhIxsAyR0NMCaVvY7CM,364
|
|
15
|
+
pave/embedders/factory.py,sha256=-cxlFpDxWqbODEZ4vcZArFAdvoAdc4WATEePk7whCEo,803
|
|
16
|
+
pave/embedders/openai_emb.py,sha256=EDEULYNbB4uuH7axsLCSLqmuNmQ43jEUWYS_mcnBlR8,1135
|
|
17
|
+
pave/embedders/sbert_emb.py,sha256=cnposbN1f2SYR31aR06PUPI_ZGaQ0qCA9dFtxaCLf_M,968
|
|
18
|
+
pave/embedders/txtai_emb.py,sha256=00VTIil1SI5hT0-4GfPYYnT-EtKgBjoaZ57dSCeXjUU,2233
|
|
19
|
+
pave/stores/__init__.py,sha256=Y6PqISvtEzZ5xLK2nL0jsQ256X8ffVn39Md05OS0SDk,6
|
|
20
|
+
pave/stores/base.py,sha256=zxDEN6cpWLIPmYeo1GjXx2hoBB-xZuZR7Bd-FI8J9kQ,1111
|
|
21
|
+
pave/stores/factory.py,sha256=LqIwkwXz9rOQGTPfKu9VonxqoXUWhWSk0OEIiCu3FRs,634
|
|
22
|
+
pave/stores/qdrant_store.py,sha256=18tTn5V4NDDKAtjjgsPP7GKiLhZmxhrs2APUKs8-9os,1159
|
|
23
|
+
pave/stores/txtai_store.py,sha256=2HKgvGNHYsuQtbOjHkgYoF-Zde-LuLMV6lqVMFLSsJo,16227
|
|
24
|
+
patchvec-0.5.6.dist-info/METADATA,sha256=lgLS6YG89YYa-Ei-G364BdQv3jMPtV96L3kTDzMuRkg,3643
|
|
25
|
+
patchvec-0.5.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
26
|
+
patchvec-0.5.6.dist-info/entry_points.txt,sha256=L6cnN4Byk5eDON2d4ipquuLowUjo7-5wpxLb-kThytQ,75
|
|
27
|
+
patchvec-0.5.6.dist-info/top_level.txt,sha256=AbaAN6M4jryKL79DrYPnt49TpE2sjp97qYmm31RD_-U,5
|
|
28
|
+
patchvec-0.5.6.dist-info/RECORD,,
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
GNU GENERAL PUBLIC LICENSE
|
|
2
|
+
Version 3, 29 June 2007
|
|
3
|
+
|
|
4
|
+
Copyright (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
|
|
5
|
+
|
|
6
|
+
Everyone is permitted to copy and distribute verbatim copies
|
|
7
|
+
of this license document, but changing it is not allowed.
|
|
8
|
+
|
|
9
|
+
Full text: https://www.gnu.org/licenses/gpl-3.0.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pave
|
pave/__init__.py
ADDED
|
Binary file
|
pave/assets/ui.html
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
<!doctype html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="utf-8" />
|
|
5
|
+
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
|
6
|
+
<!-- favicon robusto -->
|
|
7
|
+
<link rel="icon" type="image/png" sizes="32x32" href="/assets/patchvec_icon_192.png" />
|
|
8
|
+
<link rel="icon" type="image/png" sizes="192x192" href="/assets/patchvec_icon_192.png" />
|
|
9
|
+
<link rel="icon" href="/favicon.ico" />
|
|
10
|
+
<title>__INST_NAME__ • Search</title>
|
|
11
|
+
<style>
|
|
12
|
+
/* paleta */
|
|
13
|
+
:root{ --bg:#f6e6d9; --panel:#fffaf6; --text:#2b1e11; --muted:#6b5b53; --accent:#c9463d; --border:#ead7c7; --link:#0f2e4d; --link-accent:#14b8a6; }
|
|
14
|
+
@media (prefers-color-scheme: dark){
|
|
15
|
+
:root{ --bg:#1a1410; --panel:#221a14; --text:#f1e9e4; --muted:#b7a9a1; --accent:#ef6b62; --border:#3a2c22; --link:#9dd9d3; --link-accent:#34d399; }
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
*{ box-sizing:border-box }
|
|
19
|
+
html,body{ height:100% }
|
|
20
|
+
body{
|
|
21
|
+
min-height:100vh; display:flex; flex-direction:column; overflow:hidden;
|
|
22
|
+
margin:0; font:16px/1.45 system-ui,Segoe UI,Roboto,Inter,Arial; background:var(--bg); color:var(--text);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
.bar{
|
|
26
|
+
flex:0 0 auto; display:flex; gap:10px; align-items:center;
|
|
27
|
+
padding:12px; border-bottom:1px solid var(--border); background:var(--panel);
|
|
28
|
+
}
|
|
29
|
+
.left{ display:flex; gap:8px; align-items:center }
|
|
30
|
+
.right{ margin-left:auto; color:var(--muted); white-space:nowrap; overflow:hidden; text-overflow:ellipsis }
|
|
31
|
+
|
|
32
|
+
.tab{
|
|
33
|
+
padding:8px 12px; border-radius:10px; border:1px solid var(--border);
|
|
34
|
+
cursor:pointer; background:transparent; color:var(--text);
|
|
35
|
+
}
|
|
36
|
+
.tab.active{ background:var(--link-accent); color:#041318; border-color:var(--link-accent) }
|
|
37
|
+
|
|
38
|
+
.frames{ flex:1 1 auto; min-height:0; background:var(--bg) }
|
|
39
|
+
.frame{ display:none; width:100%; height:100%; border:0; background:var(--bg) }
|
|
40
|
+
.frame.active{ display:block }
|
|
41
|
+
|
|
42
|
+
.footer{
|
|
43
|
+
flex:0 0 auto; position:sticky; bottom:0;
|
|
44
|
+
display:flex; gap:12px; align-items:center; justify-content:center;
|
|
45
|
+
padding:10px; color:var(--muted); border-top:1px solid var(--border); background:var(--panel);
|
|
46
|
+
}
|
|
47
|
+
.footer a{ color:var(--link); text-decoration:none }
|
|
48
|
+
.footer a:hover{ color:var(--link-accent) }
|
|
49
|
+
</style>
|
|
50
|
+
</head>
|
|
51
|
+
<body>
|
|
52
|
+
<div class="bar">
|
|
53
|
+
<div class="left">
|
|
54
|
+
<button class="tab active" data-target="search" data-title="__INST_NAME__ • Search">Search</button>
|
|
55
|
+
<button class="tab" data-target="ingest" data-title="__INST_NAME__ • Ingest">Ingest</button>
|
|
56
|
+
</div>
|
|
57
|
+
<div class="right"><strong>__INST_NAME__</strong> — <small>__INST_DESC__</small></div>
|
|
58
|
+
</div>
|
|
59
|
+
|
|
60
|
+
<div class="frames">
|
|
61
|
+
<iframe id="search" class="frame active" src="/ui/search" title="Search"></iframe>
|
|
62
|
+
<iframe id="ingest" class="frame" src="/ui/ingest" title="Ingest"></iframe>
|
|
63
|
+
</div>
|
|
64
|
+
|
|
65
|
+
<div class="footer">
|
|
66
|
+
© 2025 <a href="https://flowlexi.com" target="_blank" rel="noopener">Flowlexi</a> •
|
|
67
|
+
<a href="__REPO_URL__" target="_blank" rel="noopener">🍰 patchvec __VERSION__</a> •
|
|
68
|
+
<a href="__LICENSE_URL__" target="_blank" rel="noopener">__LICENSE_NAME__</a>
|
|
69
|
+
</div>
|
|
70
|
+
<script>
|
|
71
|
+
function activeFrame(){ return document.querySelector('.frame.active'); }
|
|
72
|
+
|
|
73
|
+
function scrubSwagger(frame){
|
|
74
|
+
const win = frame?.contentWindow;
|
|
75
|
+
const doc = win?.document;
|
|
76
|
+
if(!doc) return;
|
|
77
|
+
if(!doc.querySelector('.swagger-ui')){
|
|
78
|
+
setTimeout(()=>scrubSwagger(frame), 80);
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// CSS: remove topo/infos
|
|
83
|
+
const STYLE_ID = 'pv-clean';
|
|
84
|
+
if(!doc.getElementById(STYLE_ID)){
|
|
85
|
+
const s = doc.createElement('style');
|
|
86
|
+
s.id = STYLE_ID;
|
|
87
|
+
s.textContent = `
|
|
88
|
+
#operations-tag-default,
|
|
89
|
+
.swagger-ui .topbar,
|
|
90
|
+
.swagger-ui .download-url-wrapper,
|
|
91
|
+
.swagger-ui .information-container,
|
|
92
|
+
.swagger-ui .info,
|
|
93
|
+
.swagger-ui .servers { display:none !important; }
|
|
94
|
+
.swagger-ui .wrapper { margin:0 !important; padding:0 0 8px !important; }
|
|
95
|
+
html, body { background:transparent !important; }
|
|
96
|
+
/* auth icon */
|
|
97
|
+
.swagger-ui .auth-wrapper .authorize:hover { border-color: rgba(20,184,166,.8); }
|
|
98
|
+
.swagger-ui .btn.authorize { background: white }
|
|
99
|
+
.swagger-ui .scheme-container { width:100%; height:; margin: 8px 0 0 0; padding: 0 0; background:transparent; border-color: transparent; box-shadow:0 0 0 0 rgba(0,0,0,0)}
|
|
100
|
+
`;
|
|
101
|
+
doc.head.appendChild(s);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const tabs = document.querySelectorAll('.tab');
|
|
106
|
+
const frames = document.querySelectorAll('.frame');
|
|
107
|
+
|
|
108
|
+
tabs.forEach(function(tab){
|
|
109
|
+
tab.addEventListener('click', function(){
|
|
110
|
+
tabs.forEach(t=>t.classList.remove('active'));
|
|
111
|
+
frames.forEach(f=>f.classList.remove('active'));
|
|
112
|
+
tab.classList.add('active');
|
|
113
|
+
const fr = document.getElementById(tab.dataset.target);
|
|
114
|
+
fr.classList.add('active');
|
|
115
|
+
document.title = tab.dataset.title || document.title;
|
|
116
|
+
scrubSwagger(fr);
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
frames.forEach(function(fr){
|
|
121
|
+
fr.addEventListener('load', function(){ scrubSwagger(fr); });
|
|
122
|
+
});
|
|
123
|
+
</script>
|
|
124
|
+
</body>
|
|
125
|
+
</html>
|
pave/auth.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
|
|
2
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
3
|
+
|
|
4
|
+
# pave/auth.py
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Optional, Dict, Tuple
|
|
9
|
+
from fastapi import HTTPException, Depends, Security
|
|
10
|
+
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
|
11
|
+
from . import config as cfg
|
|
12
|
+
|
|
13
|
+
bearer = HTTPBearer(auto_error=False)
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class AuthContext:
|
|
17
|
+
tenant: Optional[str]
|
|
18
|
+
is_admin: bool
|
|
19
|
+
|
|
20
|
+
def _raise_401():
|
|
21
|
+
raise HTTPException(
|
|
22
|
+
status_code=401,
|
|
23
|
+
detail="missing or invalid authorization header",
|
|
24
|
+
headers={"WWW-Authenticate": 'Bearer realm="patchvec", error="invalid_token"'},
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def _raise_403():
|
|
28
|
+
raise HTTPException(
|
|
29
|
+
status_code=403,
|
|
30
|
+
detail="forbidden",
|
|
31
|
+
headers={"WWW-Authenticate": 'Bearer realm="patchvec", error="insufficient_scope"'},
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def auth_ctx(credentials: HTTPAuthorizationCredentials | None = Security(bearer)) -> AuthContext:
|
|
35
|
+
# read from CFG.get so tests and env overrides work
|
|
36
|
+
mode = str(cfg.CFG.get("auth.mode", "none")).strip().lower()
|
|
37
|
+
|
|
38
|
+
if mode == "none":
|
|
39
|
+
# open mode (dev): treat as admin
|
|
40
|
+
return AuthContext(tenant=str(cfg.CFG.get("auth.default_access_tenant", None)), is_admin=True)
|
|
41
|
+
|
|
42
|
+
if mode == "static":
|
|
43
|
+
token = credentials.credentials.strip() if credentials and credentials.scheme == "Bearer" else None
|
|
44
|
+
if not token:
|
|
45
|
+
_raise_401()
|
|
46
|
+
|
|
47
|
+
# global key
|
|
48
|
+
global_key = cfg.CFG.get("auth.global_key")
|
|
49
|
+
if global_key and token == str(global_key):
|
|
50
|
+
return AuthContext(tenant=None, is_admin=True)
|
|
51
|
+
|
|
52
|
+
# per-tenant keys
|
|
53
|
+
api_keys: Dict[str, str] = cfg.CFG.get("auth.api_keys", {}) or {}
|
|
54
|
+
for t, expected in api_keys.items():
|
|
55
|
+
if token == str(expected):
|
|
56
|
+
return AuthContext(tenant=t, is_admin=False)
|
|
57
|
+
|
|
58
|
+
_raise_403()
|
|
59
|
+
|
|
60
|
+
raise HTTPException(status_code=500, detail=f"unknown auth mode: {mode}")
|
|
61
|
+
|
|
62
|
+
def authorize_tenant(tenant: str, ctx: AuthContext = Depends(auth_ctx)) -> AuthContext:
|
|
63
|
+
if ctx.is_admin or ctx.tenant == tenant:
|
|
64
|
+
return ctx
|
|
65
|
+
_raise_403()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# --- Startup security policy -------------------------------------------------
|
|
69
|
+
|
|
70
|
+
def _is_dev(cfg) -> bool:
|
|
71
|
+
# check dev flag (CFG or PATCHVEC_DEV)
|
|
72
|
+
return bool(cfg.get("dev", False)) or str(cfg.get("PATCHVEC_DEV", "0")) == "1"
|
|
73
|
+
|
|
74
|
+
def enforce_policy(cfg) -> None:
|
|
75
|
+
"""
|
|
76
|
+
Fail fast if auth is not configured in prod.
|
|
77
|
+
Allow auth=none only in dev mode, force loopback bind.
|
|
78
|
+
"""
|
|
79
|
+
mode = str(cfg.get("auth.mode", "none")).strip().lower()
|
|
80
|
+
dev = _is_dev(cfg)
|
|
81
|
+
|
|
82
|
+
if mode == "none":
|
|
83
|
+
if not dev:
|
|
84
|
+
raise RuntimeError(
|
|
85
|
+
"auth.mode=none not allowed in production. "
|
|
86
|
+
"Set auth.mode=static with a key or run with PATCHVEC_DEV=1 for dev."
|
|
87
|
+
)
|
|
88
|
+
host = str(cfg.get("server.host", "127.0.0.1")).strip()
|
|
89
|
+
if host not in ("127.0.0.1", "localhost"):
|
|
90
|
+
# enforce loopback in dev
|
|
91
|
+
try:
|
|
92
|
+
cfg._data["server.host"] = "127.0.0.1"
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
if mode == "static":
|
|
97
|
+
has_global = bool(cfg.get("auth.global_key"))
|
|
98
|
+
has_map = bool(cfg.get("auth.api_keys"))
|
|
99
|
+
if not (has_global or has_map):
|
|
100
|
+
raise RuntimeError(
|
|
101
|
+
"auth.mode=static requires global_key or api_keys"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def resolve_bind(cfg) -> Tuple[str, int]:
|
|
105
|
+
# return host/port after policy enforcement
|
|
106
|
+
host = str(cfg.get("server.host", "127.0.0.1"))
|
|
107
|
+
port = int(cfg.get("server.port", 8086))
|
|
108
|
+
return host, port
|
pave/cli.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
|
|
2
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
import argparse, json, uuid, pathlib
|
|
6
|
+
from pave.stores.factory import get_store
|
|
7
|
+
from pave.service import (
|
|
8
|
+
create_collection as svc_create_collection,
|
|
9
|
+
delete_collection as svc_delete_collection,
|
|
10
|
+
ingest_document as svc_ingest_document,
|
|
11
|
+
do_search as svc_do_search,
|
|
12
|
+
)
|
|
13
|
+
from pave.config import get_cfg, reload_cfg
|
|
14
|
+
|
|
15
|
+
store = get_store(get_cfg())
|
|
16
|
+
|
|
17
|
+
def _read(path: str) -> bytes:
|
|
18
|
+
return pathlib.Path(path).read_bytes()
|
|
19
|
+
|
|
20
|
+
def cmd_create(args):
|
|
21
|
+
out = svc_create_collection(store, args.tenant, args.collection)
|
|
22
|
+
print(json.dumps(out, ensure_ascii=False))
|
|
23
|
+
|
|
24
|
+
def cmd_upload(args):
|
|
25
|
+
baseid = args.docid or str(uuid.uuid4())
|
|
26
|
+
meta = json.loads(args.metadata) if args.metadata else {}
|
|
27
|
+
content = _read(args.file)
|
|
28
|
+
|
|
29
|
+
# CSV controls (optional)
|
|
30
|
+
csv_opts = None
|
|
31
|
+
if args.csv_has_header or args.csv_meta_cols or args.csv_include_cols:
|
|
32
|
+
csv_opts = {
|
|
33
|
+
"has_header": args.csv_has_header or "auto", # "auto" | "yes" | "no"
|
|
34
|
+
"meta_cols": args.csv_meta_cols or "", # "name1,name2" or "1,3"
|
|
35
|
+
"include_cols": args.csv_include_cols or "", # "nameA,2,5"
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
out = svc_ingest_document(
|
|
39
|
+
store, args.tenant, args.collection, args.file, content,
|
|
40
|
+
baseid if args.docid else None, meta, csv_options=csv_opts
|
|
41
|
+
)
|
|
42
|
+
print(json.dumps(out, ensure_ascii=False))
|
|
43
|
+
|
|
44
|
+
def cmd_search(args):
|
|
45
|
+
filters = json.loads(args.filters) if args.filters else None
|
|
46
|
+
out = svc_do_search(store, args.tenant, args.collection, args.query, args.k, filters=filters)
|
|
47
|
+
print(json.dumps(out, ensure_ascii=False))
|
|
48
|
+
|
|
49
|
+
def cmd_delete(args):
|
|
50
|
+
out = svc_delete_collection(store, args.tenant, args.collection)
|
|
51
|
+
print(json.dumps(out, ensure_ascii=False))
|
|
52
|
+
|
|
53
|
+
def main_cli(argv=None):
|
|
54
|
+
p = argparse.ArgumentParser(prog="pavecli")
|
|
55
|
+
sub = p.add_subparsers(dest="cmd", required=True)
|
|
56
|
+
#if p.config: reload_cfg(p.config)
|
|
57
|
+
|
|
58
|
+
p_create = sub.add_parser("create-collection")
|
|
59
|
+
p_create.add_argument("tenant")
|
|
60
|
+
p_create.add_argument("collection")
|
|
61
|
+
p_create.set_defaults(func=cmd_create)
|
|
62
|
+
|
|
63
|
+
p_upload = sub.add_parser("upload")
|
|
64
|
+
p_upload.add_argument("tenant")
|
|
65
|
+
p_upload.add_argument("collection")
|
|
66
|
+
p_upload.add_argument("file")
|
|
67
|
+
p_upload.add_argument("--docid")
|
|
68
|
+
p_upload.add_argument("--metadata")
|
|
69
|
+
|
|
70
|
+
# --- CSV controls ---
|
|
71
|
+
p_upload.add_argument("--csv-has-header", choices=["auto", "yes", "no"],
|
|
72
|
+
help="CSV header handling: auto (sniff), yes, or no")
|
|
73
|
+
p_upload.add_argument("--csv-meta-cols",
|
|
74
|
+
help="CSV columns to store as metadata only (exclude from text). Names or 1-based indices, comma-separated")
|
|
75
|
+
p_upload.add_argument("--csv-include-cols",
|
|
76
|
+
help="CSV columns to include in indexed text. Names or 1-based indices, comma-separated. Defaults to all non-meta columns")
|
|
77
|
+
|
|
78
|
+
p_upload.set_defaults(func=cmd_upload)
|
|
79
|
+
|
|
80
|
+
p_search = sub.add_parser("search")
|
|
81
|
+
p_search.add_argument("tenant")
|
|
82
|
+
p_search.add_argument("collection")
|
|
83
|
+
p_search.add_argument("query")
|
|
84
|
+
p_search.add_argument("-k", type=int, default=5)
|
|
85
|
+
p_search.add_argument("--filters", help='JSON object, e.g. {"docid":"DOC-1"}')
|
|
86
|
+
p_search.set_defaults(func=cmd_search)
|
|
87
|
+
|
|
88
|
+
p_delete = sub.add_parser("delete-collection")
|
|
89
|
+
p_delete.add_argument("tenant")
|
|
90
|
+
p_delete.add_argument("collection")
|
|
91
|
+
p_delete.set_defaults(func=cmd_delete)
|
|
92
|
+
|
|
93
|
+
args = p.parse_args(argv)
|
|
94
|
+
return args.func(args)
|
|
95
|
+
|
|
96
|
+
if __name__ == "__main__":
|
|
97
|
+
raise SystemExit(main_cli())
|