patchvec 0.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,115 @@
1
+ Metadata-Version: 2.4
2
+ Name: patchvec
3
+ Version: 0.5.6
4
+ Summary: Patchvec — A lightweight, pluggable vector search microservice.
5
+ Author: Rodrigo Rodrigues da Silva
6
+ Author-email: rodrigopitanga@posteo.net
7
+ License: GPL-3.0-or-later
8
+ Project-URL: Homepage, https://gitlab.com/flowlexi/patchvec
9
+ Project-URL: Source, https://gitlab.com/flowlexi/patchvec
10
+ Project-URL: Tracker, https://gitlab.com/flowlexi/patchvec/issues
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Framework :: FastAPI
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: fastapi>=0.115.0
25
+ Requires-Dist: uvicorn[standard]>=0.30.6
26
+ Requires-Dist: txtai>=6.3.0
27
+ Requires-Dist: pydantic>=2.8.2
28
+ Requires-Dist: python-multipart>=0.0.9
29
+ Requires-Dist: pypdf>=5.0.0
30
+ Requires-Dist: pyyaml>=6.0.2
31
+ Requires-Dist: python-dotenv>=1.0.1
32
+ Requires-Dist: qdrant-client>=1.9.2
33
+ Requires-Dist: sentence-transformers>=2.7.0
34
+ Requires-Dist: openai>=1.0.0
35
+ Dynamic: author
36
+ Dynamic: author-email
37
+ Dynamic: classifier
38
+ Dynamic: description
39
+ Dynamic: description-content-type
40
+ Dynamic: license
41
+ Dynamic: license-file
42
+ Dynamic: project-url
43
+ Dynamic: requires-dist
44
+ Dynamic: requires-python
45
+ Dynamic: summary
46
+
47
+ # PatchVec — A lightweight, pluggable vector search microservice.
48
+
49
+ Upload → chunk → index (with metadata) → search via REST and CLI.
50
+
51
+ ## Highlights
52
+ - Multi-tenant collections: `/collections/{tenant}/{name}`
53
+ - Upload and search **TXT**, **CSV**, and **PDF**
54
+ - Chunking per format:
55
+ - PDF → 1 chunk/page
56
+ - TXT → configurable chunk size + overlap
57
+ - CSV → 1 chunk/row
58
+ - Metadata filters on POST search (`{"filters": {"docid": "DOC-1"}}`)
59
+ - Health, metrics, and Prometheus endpoints
60
+ - Configurable auth modes: `none` or `static` (Bearer)
61
+ - Default backends are local (vendor-neutral "default"); embedders & stores are pluggable
62
+
63
+ ## Requirements
64
+ - Python 3.10+
65
+
66
+ ## Install
67
+ ```bash
68
+ python -m venv .venv
69
+ source .venv/bin/activate
70
+ pip install patchvec
71
+ ```
72
+
73
+ > CPU-only by default. If you have a CUDA setup and want GPU-accelerated deps, install the GPU-enabled packages in your environment before running PatchVec.
74
+
75
+ ## Quickstart
76
+ ```bash
77
+ # Start the server (installed entry point)
78
+ pavesrv
79
+
80
+ # Or run with uvicorn manually if you prefer:
81
+ uvicorn pave.main:app --host 0.0.0.0 --port 8080
82
+ ```
83
+
84
+ ## Minimal config (optional)
85
+ By default PatchVec runs with sensible local defaults. To customize, create `config.yml` and set:
86
+ ```yaml
87
+ vector_store:
88
+ type: default
89
+ embedder:
90
+ type: default
91
+ auth:
92
+ mode: none # or 'static' with per-tenant Bearer keys
93
+ ```
94
+ Then export:
95
+ ```bash
96
+ export PATCHVEC_CONFIG=./config.yml
97
+ ```
98
+
99
+ ## REST example
100
+ ```bash
101
+ # Create a collection
102
+ curl -X POST http://localhost:8080/collections/acme/docs
103
+
104
+ # Upload a TXT document
105
+ curl -X POST http://localhost:8080/collections/acme/docs/documents -F "file=@sample.txt" -F "docid=DOC1"
106
+
107
+ # Search (GET, no filters)
108
+ curl -G --data-urlencode "q=hello" http://localhost:8080/collections/acme/docs/search
109
+
110
+ # Search (POST, with filters)
111
+ curl -X POST http://localhost:8080/collections/acme/docs/search -H "Content-Type: application/json" -d '{"q":"hello","k":5,"filters":{"docid":"DOC1"}}'
112
+ ```
113
+
114
+ ## License
115
+ GPL-3.0-or-later — (C) 2025 Rodrigo Rodrigues da Silva
@@ -0,0 +1,28 @@
1
+ patchvec-0.5.6.dist-info/licenses/LICENSE,sha256=pZN4UctsMWb-i8NGK_cawTQZ_uIYTR98GPAPzhRJXOM,299
2
+ pave/__init__.py,sha256=49d6bp1SBYCw7mk4wu_qO9mASeZVqYqsbuzKmxAtT7s,223
3
+ pave/auth.py,sha256=PVKA243OKvs0jaob-DJk6EMRzlBR9Qc2lNedlFUfjG8,3674
4
+ pave/cli.py,sha256=pLqQcA9awbvxGLxCzAcOcj1sdkPm2g3eNA7TXdngnpk,3649
5
+ pave/config.py,sha256=j6veVNb-tUzo_TnbzYpTR1Q2TDxmqHbMs152J4DFnFU,7876
6
+ pave/main.py,sha256=siufImb2qdxZvMJ4PHsbIKFDDskWqOzasFnjMph_g28,9736
7
+ pave/metrics.py,sha256=qqhvLCCzY2sgmjgsnNBnRxoZ5xTe9RW5DvoGMQiQAZg,1588
8
+ pave/preprocess.py,sha256=-llfKyWJ6aOPQ-y7XFmoJwF_9r34MnyqVbCghd8J-Vc,5392
9
+ pave/service.py,sha256=x1wu1yQd99Lmt4JvdvvKxQNhZgzkQ4u80p8P6oAxP_Y,3602
10
+ pave/ui.py,sha256=GQ0iRcCAP9UMdT_M0m7JDQhawrqaop6I-opivGMR0hE,6345
11
+ pave/assets/patchvec_icon_192.png,sha256=kUkXir9MtwxWTd_9hPTdycDwdRCimjprJhmaIP6R-XM,9733
12
+ pave/assets/ui.html,sha256=spFtzCDT84tmJfCA1WdpVY9xKKQ4oXJxjuOiV4ghWJo,4943
13
+ pave/embedders/__init__.py,sha256=Y6PqISvtEzZ5xLK2nL0jsQ256X8ffVn39Md05OS0SDk,6
14
+ pave/embedders/base.py,sha256=bm3mTOEt_sN-V96QmIKSW882LhIxsAyR0NMCaVvY7CM,364
15
+ pave/embedders/factory.py,sha256=-cxlFpDxWqbODEZ4vcZArFAdvoAdc4WATEePk7whCEo,803
16
+ pave/embedders/openai_emb.py,sha256=EDEULYNbB4uuH7axsLCSLqmuNmQ43jEUWYS_mcnBlR8,1135
17
+ pave/embedders/sbert_emb.py,sha256=cnposbN1f2SYR31aR06PUPI_ZGaQ0qCA9dFtxaCLf_M,968
18
+ pave/embedders/txtai_emb.py,sha256=00VTIil1SI5hT0-4GfPYYnT-EtKgBjoaZ57dSCeXjUU,2233
19
+ pave/stores/__init__.py,sha256=Y6PqISvtEzZ5xLK2nL0jsQ256X8ffVn39Md05OS0SDk,6
20
+ pave/stores/base.py,sha256=zxDEN6cpWLIPmYeo1GjXx2hoBB-xZuZR7Bd-FI8J9kQ,1111
21
+ pave/stores/factory.py,sha256=LqIwkwXz9rOQGTPfKu9VonxqoXUWhWSk0OEIiCu3FRs,634
22
+ pave/stores/qdrant_store.py,sha256=18tTn5V4NDDKAtjjgsPP7GKiLhZmxhrs2APUKs8-9os,1159
23
+ pave/stores/txtai_store.py,sha256=2HKgvGNHYsuQtbOjHkgYoF-Zde-LuLMV6lqVMFLSsJo,16227
24
+ patchvec-0.5.6.dist-info/METADATA,sha256=lgLS6YG89YYa-Ei-G364BdQv3jMPtV96L3kTDzMuRkg,3643
25
+ patchvec-0.5.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
26
+ patchvec-0.5.6.dist-info/entry_points.txt,sha256=L6cnN4Byk5eDON2d4ipquuLowUjo7-5wpxLb-kThytQ,75
27
+ patchvec-0.5.6.dist-info/top_level.txt,sha256=AbaAN6M4jryKL79DrYPnt49TpE2sjp97qYmm31RD_-U,5
28
+ patchvec-0.5.6.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ pavecli = pave.cli:main_cli
3
+ pavesrv = pave.main:main_srv
@@ -0,0 +1,9 @@
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
5
+
6
+ Everyone is permitted to copy and distribute verbatim copies
7
+ of this license document, but changing it is not allowed.
8
+
9
+ Full text: https://www.gnu.org/licenses/gpl-3.0.txt
@@ -0,0 +1 @@
1
+ pave
pave/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ # (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
2
+ # SPDX-License-Identifier: GPL-3.0-or-later
3
+ __all__ = [
4
+ "config", "auth", "preprocess", "metrics", "service", "main", "cli",
5
+ "embedders", "stores"
6
+ ]
Binary file
pave/assets/ui.html ADDED
@@ -0,0 +1,125 @@
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width,initial-scale=1" />
6
+ <!-- favicon robusto -->
7
+ <link rel="icon" type="image/png" sizes="32x32" href="/assets/patchvec_icon_192.png" />
8
+ <link rel="icon" type="image/png" sizes="192x192" href="/assets/patchvec_icon_192.png" />
9
+ <link rel="icon" href="/favicon.ico" />
10
+ <title>__INST_NAME__ • Search</title>
11
+ <style>
12
+ /* paleta */
13
+ :root{ --bg:#f6e6d9; --panel:#fffaf6; --text:#2b1e11; --muted:#6b5b53; --accent:#c9463d; --border:#ead7c7; --link:#0f2e4d; --link-accent:#14b8a6; }
14
+ @media (prefers-color-scheme: dark){
15
+ :root{ --bg:#1a1410; --panel:#221a14; --text:#f1e9e4; --muted:#b7a9a1; --accent:#ef6b62; --border:#3a2c22; --link:#9dd9d3; --link-accent:#34d399; }
16
+ }
17
+
18
+ *{ box-sizing:border-box }
19
+ html,body{ height:100% }
20
+ body{
21
+ min-height:100vh; display:flex; flex-direction:column; overflow:hidden;
22
+ margin:0; font:16px/1.45 system-ui,Segoe UI,Roboto,Inter,Arial; background:var(--bg); color:var(--text);
23
+ }
24
+
25
+ .bar{
26
+ flex:0 0 auto; display:flex; gap:10px; align-items:center;
27
+ padding:12px; border-bottom:1px solid var(--border); background:var(--panel);
28
+ }
29
+ .left{ display:flex; gap:8px; align-items:center }
30
+ .right{ margin-left:auto; color:var(--muted); white-space:nowrap; overflow:hidden; text-overflow:ellipsis }
31
+
32
+ .tab{
33
+ padding:8px 12px; border-radius:10px; border:1px solid var(--border);
34
+ cursor:pointer; background:transparent; color:var(--text);
35
+ }
36
+ .tab.active{ background:var(--link-accent); color:#041318; border-color:var(--link-accent) }
37
+
38
+ .frames{ flex:1 1 auto; min-height:0; background:var(--bg) }
39
+ .frame{ display:none; width:100%; height:100%; border:0; background:var(--bg) }
40
+ .frame.active{ display:block }
41
+
42
+ .footer{
43
+ flex:0 0 auto; position:sticky; bottom:0;
44
+ display:flex; gap:12px; align-items:center; justify-content:center;
45
+ padding:10px; color:var(--muted); border-top:1px solid var(--border); background:var(--panel);
46
+ }
47
+ .footer a{ color:var(--link); text-decoration:none }
48
+ .footer a:hover{ color:var(--link-accent) }
49
+ </style>
50
+ </head>
51
+ <body>
52
+ <div class="bar">
53
+ <div class="left">
54
+ <button class="tab active" data-target="search" data-title="__INST_NAME__ • Search">Search</button>
55
+ <button class="tab" data-target="ingest" data-title="__INST_NAME__ • Ingest">Ingest</button>
56
+ </div>
57
+ <div class="right"><strong>__INST_NAME__</strong> — <small>__INST_DESC__</small></div>
58
+ </div>
59
+
60
+ <div class="frames">
61
+ <iframe id="search" class="frame active" src="/ui/search" title="Search"></iframe>
62
+ <iframe id="ingest" class="frame" src="/ui/ingest" title="Ingest"></iframe>
63
+ </div>
64
+
65
+ <div class="footer">
66
+ © 2025 <a href="https://flowlexi.com" target="_blank" rel="noopener">Flowlexi</a> •
67
+ <a href="__REPO_URL__" target="_blank" rel="noopener">🍰 patchvec __VERSION__</a> •
68
+ <a href="__LICENSE_URL__" target="_blank" rel="noopener">__LICENSE_NAME__</a>
69
+ </div>
70
+ <script>
71
+ function activeFrame(){ return document.querySelector('.frame.active'); }
72
+
73
+ function scrubSwagger(frame){
74
+ const win = frame?.contentWindow;
75
+ const doc = win?.document;
76
+ if(!doc) return;
77
+ if(!doc.querySelector('.swagger-ui')){
78
+ setTimeout(()=>scrubSwagger(frame), 80);
79
+ return;
80
+ }
81
+
82
+ // CSS: remove topo/infos
83
+ const STYLE_ID = 'pv-clean';
84
+ if(!doc.getElementById(STYLE_ID)){
85
+ const s = doc.createElement('style');
86
+ s.id = STYLE_ID;
87
+ s.textContent = `
88
+ #operations-tag-default,
89
+ .swagger-ui .topbar,
90
+ .swagger-ui .download-url-wrapper,
91
+ .swagger-ui .information-container,
92
+ .swagger-ui .info,
93
+ .swagger-ui .servers { display:none !important; }
94
+ .swagger-ui .wrapper { margin:0 !important; padding:0 0 8px !important; }
95
+ html, body { background:transparent !important; }
96
+ /* auth icon */
97
+ .swagger-ui .auth-wrapper .authorize:hover { border-color: rgba(20,184,166,.8); }
98
+ .swagger-ui .btn.authorize { background: white }
99
+ .swagger-ui .scheme-container { width:100%; height:; margin: 8px 0 0 0; padding: 0 0; background:transparent; border-color: transparent; box-shadow:0 0 0 0 rgba(0,0,0,0)}
100
+ `;
101
+ doc.head.appendChild(s);
102
+ }
103
+ }
104
+
105
+ const tabs = document.querySelectorAll('.tab');
106
+ const frames = document.querySelectorAll('.frame');
107
+
108
+ tabs.forEach(function(tab){
109
+ tab.addEventListener('click', function(){
110
+ tabs.forEach(t=>t.classList.remove('active'));
111
+ frames.forEach(f=>f.classList.remove('active'));
112
+ tab.classList.add('active');
113
+ const fr = document.getElementById(tab.dataset.target);
114
+ fr.classList.add('active');
115
+ document.title = tab.dataset.title || document.title;
116
+ scrubSwagger(fr);
117
+ });
118
+ });
119
+
120
+ frames.forEach(function(fr){
121
+ fr.addEventListener('load', function(){ scrubSwagger(fr); });
122
+ });
123
+ </script>
124
+ </body>
125
+ </html>
pave/auth.py ADDED
@@ -0,0 +1,108 @@
1
+ # (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
2
+ # SPDX-License-Identifier: GPL-3.0-or-later
3
+
4
+ # pave/auth.py
5
+
6
+ from __future__ import annotations
7
+ from dataclasses import dataclass
8
+ from typing import Optional, Dict, Tuple
9
+ from fastapi import HTTPException, Depends, Security
10
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
11
+ from . import config as cfg
12
+
13
+ bearer = HTTPBearer(auto_error=False)
14
+
15
+ @dataclass
16
+ class AuthContext:
17
+ tenant: Optional[str]
18
+ is_admin: bool
19
+
20
+ def _raise_401():
21
+ raise HTTPException(
22
+ status_code=401,
23
+ detail="missing or invalid authorization header",
24
+ headers={"WWW-Authenticate": 'Bearer realm="patchvec", error="invalid_token"'},
25
+ )
26
+
27
+ def _raise_403():
28
+ raise HTTPException(
29
+ status_code=403,
30
+ detail="forbidden",
31
+ headers={"WWW-Authenticate": 'Bearer realm="patchvec", error="insufficient_scope"'},
32
+ )
33
+
34
+ def auth_ctx(credentials: HTTPAuthorizationCredentials | None = Security(bearer)) -> AuthContext:
35
+ # read from CFG.get so tests and env overrides work
36
+ mode = str(cfg.CFG.get("auth.mode", "none")).strip().lower()
37
+
38
+ if mode == "none":
39
+ # open mode (dev): treat as admin
40
+ return AuthContext(tenant=str(cfg.CFG.get("auth.default_access_tenant", None)), is_admin=True)
41
+
42
+ if mode == "static":
43
+ token = credentials.credentials.strip() if credentials and credentials.scheme == "Bearer" else None
44
+ if not token:
45
+ _raise_401()
46
+
47
+ # global key
48
+ global_key = cfg.CFG.get("auth.global_key")
49
+ if global_key and token == str(global_key):
50
+ return AuthContext(tenant=None, is_admin=True)
51
+
52
+ # per-tenant keys
53
+ api_keys: Dict[str, str] = cfg.CFG.get("auth.api_keys", {}) or {}
54
+ for t, expected in api_keys.items():
55
+ if token == str(expected):
56
+ return AuthContext(tenant=t, is_admin=False)
57
+
58
+ _raise_403()
59
+
60
+ raise HTTPException(status_code=500, detail=f"unknown auth mode: {mode}")
61
+
62
+ def authorize_tenant(tenant: str, ctx: AuthContext = Depends(auth_ctx)) -> AuthContext:
63
+ if ctx.is_admin or ctx.tenant == tenant:
64
+ return ctx
65
+ _raise_403()
66
+
67
+
68
+ # --- Startup security policy -------------------------------------------------
69
+
70
+ def _is_dev(cfg) -> bool:
71
+ # check dev flag (CFG or PATCHVEC_DEV)
72
+ return bool(cfg.get("dev", False)) or str(cfg.get("PATCHVEC_DEV", "0")) == "1"
73
+
74
+ def enforce_policy(cfg) -> None:
75
+ """
76
+ Fail fast if auth is not configured in prod.
77
+ Allow auth=none only in dev mode, force loopback bind.
78
+ """
79
+ mode = str(cfg.get("auth.mode", "none")).strip().lower()
80
+ dev = _is_dev(cfg)
81
+
82
+ if mode == "none":
83
+ if not dev:
84
+ raise RuntimeError(
85
+ "auth.mode=none not allowed in production. "
86
+ "Set auth.mode=static with a key or run with PATCHVEC_DEV=1 for dev."
87
+ )
88
+ host = str(cfg.get("server.host", "127.0.0.1")).strip()
89
+ if host not in ("127.0.0.1", "localhost"):
90
+ # enforce loopback in dev
91
+ try:
92
+ cfg._data["server.host"] = "127.0.0.1"
93
+ except Exception:
94
+ pass
95
+
96
+ if mode == "static":
97
+ has_global = bool(cfg.get("auth.global_key"))
98
+ has_map = bool(cfg.get("auth.api_keys"))
99
+ if not (has_global or has_map):
100
+ raise RuntimeError(
101
+ "auth.mode=static requires global_key or api_keys"
102
+ )
103
+
104
+ def resolve_bind(cfg) -> Tuple[str, int]:
105
+ # return host/port after policy enforcement
106
+ host = str(cfg.get("server.host", "127.0.0.1"))
107
+ port = int(cfg.get("server.port", 8086))
108
+ return host, port
pave/cli.py ADDED
@@ -0,0 +1,97 @@
1
+ # (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
2
+ # SPDX-License-Identifier: GPL-3.0-or-later
3
+
4
+ from __future__ import annotations
5
+ import argparse, json, uuid, pathlib
6
+ from pave.stores.factory import get_store
7
+ from pave.service import (
8
+ create_collection as svc_create_collection,
9
+ delete_collection as svc_delete_collection,
10
+ ingest_document as svc_ingest_document,
11
+ do_search as svc_do_search,
12
+ )
13
+ from pave.config import get_cfg, reload_cfg
14
+
15
+ store = get_store(get_cfg())
16
+
17
+ def _read(path: str) -> bytes:
18
+ return pathlib.Path(path).read_bytes()
19
+
20
+ def cmd_create(args):
21
+ out = svc_create_collection(store, args.tenant, args.collection)
22
+ print(json.dumps(out, ensure_ascii=False))
23
+
24
+ def cmd_upload(args):
25
+ baseid = args.docid or str(uuid.uuid4())
26
+ meta = json.loads(args.metadata) if args.metadata else {}
27
+ content = _read(args.file)
28
+
29
+ # CSV controls (optional)
30
+ csv_opts = None
31
+ if args.csv_has_header or args.csv_meta_cols or args.csv_include_cols:
32
+ csv_opts = {
33
+ "has_header": args.csv_has_header or "auto", # "auto" | "yes" | "no"
34
+ "meta_cols": args.csv_meta_cols or "", # "name1,name2" or "1,3"
35
+ "include_cols": args.csv_include_cols or "", # "nameA,2,5"
36
+ }
37
+
38
+ out = svc_ingest_document(
39
+ store, args.tenant, args.collection, args.file, content,
40
+ baseid if args.docid else None, meta, csv_options=csv_opts
41
+ )
42
+ print(json.dumps(out, ensure_ascii=False))
43
+
44
+ def cmd_search(args):
45
+ filters = json.loads(args.filters) if args.filters else None
46
+ out = svc_do_search(store, args.tenant, args.collection, args.query, args.k, filters=filters)
47
+ print(json.dumps(out, ensure_ascii=False))
48
+
49
+ def cmd_delete(args):
50
+ out = svc_delete_collection(store, args.tenant, args.collection)
51
+ print(json.dumps(out, ensure_ascii=False))
52
+
53
+ def main_cli(argv=None):
54
+ p = argparse.ArgumentParser(prog="pavecli")
55
+ sub = p.add_subparsers(dest="cmd", required=True)
56
+ #if p.config: reload_cfg(p.config)
57
+
58
+ p_create = sub.add_parser("create-collection")
59
+ p_create.add_argument("tenant")
60
+ p_create.add_argument("collection")
61
+ p_create.set_defaults(func=cmd_create)
62
+
63
+ p_upload = sub.add_parser("upload")
64
+ p_upload.add_argument("tenant")
65
+ p_upload.add_argument("collection")
66
+ p_upload.add_argument("file")
67
+ p_upload.add_argument("--docid")
68
+ p_upload.add_argument("--metadata")
69
+
70
+ # --- CSV controls ---
71
+ p_upload.add_argument("--csv-has-header", choices=["auto", "yes", "no"],
72
+ help="CSV header handling: auto (sniff), yes, or no")
73
+ p_upload.add_argument("--csv-meta-cols",
74
+ help="CSV columns to store as metadata only (exclude from text). Names or 1-based indices, comma-separated")
75
+ p_upload.add_argument("--csv-include-cols",
76
+ help="CSV columns to include in indexed text. Names or 1-based indices, comma-separated. Defaults to all non-meta columns")
77
+
78
+ p_upload.set_defaults(func=cmd_upload)
79
+
80
+ p_search = sub.add_parser("search")
81
+ p_search.add_argument("tenant")
82
+ p_search.add_argument("collection")
83
+ p_search.add_argument("query")
84
+ p_search.add_argument("-k", type=int, default=5)
85
+ p_search.add_argument("--filters", help='JSON object, e.g. {"docid":"DOC-1"}')
86
+ p_search.set_defaults(func=cmd_search)
87
+
88
+ p_delete = sub.add_parser("delete-collection")
89
+ p_delete.add_argument("tenant")
90
+ p_delete.add_argument("collection")
91
+ p_delete.set_defaults(func=cmd_delete)
92
+
93
+ args = p.parse_args(argv)
94
+ return args.func(args)
95
+
96
+ if __name__ == "__main__":
97
+ raise SystemExit(main_cli())