patchvec 0.5.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. patchvec-0.5.6/ABOUT.md +69 -0
  2. patchvec-0.5.6/LICENSE +9 -0
  3. patchvec-0.5.6/MANIFEST.in +12 -0
  4. patchvec-0.5.6/PKG-INFO +115 -0
  5. patchvec-0.5.6/README.md +87 -0
  6. patchvec-0.5.6/config.yml.example +69 -0
  7. patchvec-0.5.6/patchvec.egg-info/PKG-INFO +115 -0
  8. patchvec-0.5.6/patchvec.egg-info/SOURCES.txt +54 -0
  9. patchvec-0.5.6/patchvec.egg-info/dependency_links.txt +1 -0
  10. patchvec-0.5.6/patchvec.egg-info/entry_points.txt +3 -0
  11. patchvec-0.5.6/patchvec.egg-info/requires.txt +11 -0
  12. patchvec-0.5.6/patchvec.egg-info/top_level.txt +1 -0
  13. patchvec-0.5.6/pave/__init__.py +6 -0
  14. patchvec-0.5.6/pave/assets/patchvec_icon_192.png +0 -0
  15. patchvec-0.5.6/pave/assets/ui.html +125 -0
  16. patchvec-0.5.6/pave/auth.py +108 -0
  17. patchvec-0.5.6/pave/cli.py +97 -0
  18. patchvec-0.5.6/pave/config.py +240 -0
  19. patchvec-0.5.6/pave/embedders/__init__.py +1 -0
  20. patchvec-0.5.6/pave/embedders/base.py +12 -0
  21. patchvec-0.5.6/pave/embedders/factory.py +21 -0
  22. patchvec-0.5.6/pave/embedders/openai_emb.py +30 -0
  23. patchvec-0.5.6/pave/embedders/sbert_emb.py +24 -0
  24. patchvec-0.5.6/pave/embedders/txtai_emb.py +58 -0
  25. patchvec-0.5.6/pave/main.py +303 -0
  26. patchvec-0.5.6/pave/metrics.py +52 -0
  27. patchvec-0.5.6/pave/preprocess.py +151 -0
  28. patchvec-0.5.6/pave/service.py +92 -0
  29. patchvec-0.5.6/pave/stores/__init__.py +1 -0
  30. patchvec-0.5.6/pave/stores/base.py +33 -0
  31. patchvec-0.5.6/pave/stores/factory.py +18 -0
  32. patchvec-0.5.6/pave/stores/qdrant_store.py +26 -0
  33. patchvec-0.5.6/pave/stores/txtai_store.py +445 -0
  34. patchvec-0.5.6/pave/ui.py +175 -0
  35. patchvec-0.5.6/requirements-base.txt +1 -0
  36. patchvec-0.5.6/requirements-cpu.txt +6 -0
  37. patchvec-0.5.6/requirements-test.txt +5 -0
  38. patchvec-0.5.6/requirements.txt +11 -0
  39. patchvec-0.5.6/scripts/release.sh +89 -0
  40. patchvec-0.5.6/setup.cfg +4 -0
  41. patchvec-0.5.6/setup.py +67 -0
  42. patchvec-0.5.6/tests/test_auth.py +79 -0
  43. patchvec-0.5.6/tests/test_cli.py +45 -0
  44. patchvec-0.5.6/tests/test_collections.py +8 -0
  45. patchvec-0.5.6/tests/test_config_runtime.py +41 -0
  46. patchvec-0.5.6/tests/test_csv_ingest.py +75 -0
  47. patchvec-0.5.6/tests/test_docid_default.py +41 -0
  48. patchvec-0.5.6/tests/test_health.py +32 -0
  49. patchvec-0.5.6/tests/test_metrics.py +35 -0
  50. patchvec-0.5.6/tests/test_txtai_concurrent_upsert.py +76 -0
  51. patchvec-0.5.6/tests/test_txtai_store.py +126 -0
  52. patchvec-0.5.6/tests/test_txtai_store_filters.py +112 -0
  53. patchvec-0.5.6/tests/test_ui.py +33 -0
  54. patchvec-0.5.6/tests/test_upload_search_csv.py +36 -0
  55. patchvec-0.5.6/tests/test_upload_search_pdf.py +37 -0
  56. patchvec-0.5.6/tests/test_upload_search_txt.py +119 -0
@@ -0,0 +1,69 @@
1
+ # PatchVec — A lightweight, pluggable vector search microservice.
2
+
3
+ Upload → chunk → index (with metadata) → search via REST and CLI.
4
+
5
+ ## Highlights
6
+ - Multi-tenant collections: `/collections/{tenant}/{name}`
7
+ - Upload and search **TXT**, **CSV**, and **PDF**
8
+ - Chunking per format:
9
+ - PDF → 1 chunk/page
10
+ - TXT → configurable chunk size + overlap
11
+ - CSV → 1 chunk/row
12
+ - Metadata filters on POST search (`{"filters": {"docid": "DOC-1"}}`)
13
+ - Health, metrics, and Prometheus endpoints
14
+ - Configurable auth modes: `none` or `static` (Bearer)
15
+ - Default backends are local (vendor-neutral "default"); embedders & stores are pluggable
16
+
17
+ ## Requirements
18
+ - Python 3.10+
19
+
20
+ ## Install
21
+ ```bash
22
+ python -m venv .venv
23
+ source .venv/bin/activate
24
+ pip install patchvec
25
+ ```
26
+
27
+ > CPU-only by default. If you have a CUDA setup and want GPU-accelerated deps, install the GPU-enabled packages in your environment before running PatchVec.
28
+
29
+ ## Quickstart
30
+ ```bash
31
+ # Start the server (installed entry point)
32
+ pavesrv
33
+
34
+ # Or run with uvicorn manually if you prefer:
35
+ uvicorn pave.main:app --host 0.0.0.0 --port 8080
36
+ ```
37
+
38
+ ## Minimal config (optional)
39
+ By default PatchVec runs with sensible local defaults. To customize, create `config.yml` and set:
40
+ ```yaml
41
+ vector_store:
42
+ type: default
43
+ embedder:
44
+ type: default
45
+ auth:
46
+ mode: none # or 'static' with per-tenant Bearer keys
47
+ ```
48
+ Then export:
49
+ ```bash
50
+ export PATCHVEC_CONFIG=./config.yml
51
+ ```
52
+
53
+ ## REST example
54
+ ```bash
55
+ # Create a collection
56
+ curl -X POST http://localhost:8080/collections/acme/docs
57
+
58
+ # Upload a TXT document
59
+ curl -X POST http://localhost:8080/collections/acme/docs/documents -F "file=@sample.txt" -F "docid=DOC1"
60
+
61
+ # Search (GET, no filters)
62
+ curl -G --data-urlencode "q=hello" http://localhost:8080/collections/acme/docs/search
63
+
64
+ # Search (POST, with filters)
65
+ curl -X POST http://localhost:8080/collections/acme/docs/search -H "Content-Type: application/json" -d '{"q":"hello","k":5,"filters":{"docid":"DOC1"}}'
66
+ ```
67
+
68
+ ## License
69
+ GPL-3.0-or-later — (C) 2025 Rodrigo Rodrigues da Silva
patchvec-0.5.6/LICENSE ADDED
@@ -0,0 +1,9 @@
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
5
+
6
+ Everyone is permitted to copy and distribute verbatim copies
7
+ of this license document, but changing it is not allowed.
8
+
9
+ Full text: https://www.gnu.org/licenses/gpl-3.0.txt
@@ -0,0 +1,12 @@
1
+ include ABOUT.md
2
+ include README.md
3
+ include LICENSE
4
+ include config.yml.example
5
+ include requirements.txt
6
+ include requirements-cpu.txt
7
+ include requirements-base.txt
8
+ include requirements-test.txt
9
+ recursive-include pave *.py
10
+ recursive-include scripts *.sh
11
+ recursive-include pave/assets *
12
+ global-exclude __pycache__ *.py[cod] *.so
@@ -0,0 +1,115 @@
1
+ Metadata-Version: 2.4
2
+ Name: patchvec
3
+ Version: 0.5.6
4
+ Summary: Patchvec — A lightweight, pluggable vector search microservice.
5
+ Author: Rodrigo Rodrigues da Silva
6
+ Author-email: rodrigopitanga@posteo.net
7
+ License: GPL-3.0-or-later
8
+ Project-URL: Homepage, https://gitlab.com/flowlexi/patchvec
9
+ Project-URL: Source, https://gitlab.com/flowlexi/patchvec
10
+ Project-URL: Tracker, https://gitlab.com/flowlexi/patchvec/issues
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Framework :: FastAPI
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: fastapi>=0.115.0
25
+ Requires-Dist: uvicorn[standard]>=0.30.6
26
+ Requires-Dist: txtai>=6.3.0
27
+ Requires-Dist: pydantic>=2.8.2
28
+ Requires-Dist: python-multipart>=0.0.9
29
+ Requires-Dist: pypdf>=5.0.0
30
+ Requires-Dist: pyyaml>=6.0.2
31
+ Requires-Dist: python-dotenv>=1.0.1
32
+ Requires-Dist: qdrant-client>=1.9.2
33
+ Requires-Dist: sentence-transformers>=2.7.0
34
+ Requires-Dist: openai>=1.0.0
35
+ Dynamic: author
36
+ Dynamic: author-email
37
+ Dynamic: classifier
38
+ Dynamic: description
39
+ Dynamic: description-content-type
40
+ Dynamic: license
41
+ Dynamic: license-file
42
+ Dynamic: project-url
43
+ Dynamic: requires-dist
44
+ Dynamic: requires-python
45
+ Dynamic: summary
46
+
47
+ # PatchVec — A lightweight, pluggable vector search microservice.
48
+
49
+ Upload → chunk → index (with metadata) → search via REST and CLI.
50
+
51
+ ## Highlights
52
+ - Multi-tenant collections: `/collections/{tenant}/{name}`
53
+ - Upload and search **TXT**, **CSV**, and **PDF**
54
+ - Chunking per format:
55
+ - PDF → 1 chunk/page
56
+ - TXT → configurable chunk size + overlap
57
+ - CSV → 1 chunk/row
58
+ - Metadata filters on POST search (`{"filters": {"docid": "DOC-1"}}`)
59
+ - Health, metrics, and Prometheus endpoints
60
+ - Configurable auth modes: `none` or `static` (Bearer)
61
+ - Default backends are local (vendor-neutral "default"); embedders & stores are pluggable
62
+
63
+ ## Requirements
64
+ - Python 3.10+
65
+
66
+ ## Install
67
+ ```bash
68
+ python -m venv .venv
69
+ source .venv/bin/activate
70
+ pip install patchvec
71
+ ```
72
+
73
+ > CPU-only by default. If you have a CUDA setup and want GPU-accelerated deps, install the GPU-enabled packages in your environment before running PatchVec.
74
+
75
+ ## Quickstart
76
+ ```bash
77
+ # Start the server (installed entry point)
78
+ pavesrv
79
+
80
+ # Or run with uvicorn manually if you prefer:
81
+ uvicorn pave.main:app --host 0.0.0.0 --port 8080
82
+ ```
83
+
84
+ ## Minimal config (optional)
85
+ By default PatchVec runs with sensible local defaults. To customize, create `config.yml` and set:
86
+ ```yaml
87
+ vector_store:
88
+ type: default
89
+ embedder:
90
+ type: default
91
+ auth:
92
+ mode: none # or 'static' with per-tenant Bearer keys
93
+ ```
94
+ Then export:
95
+ ```bash
96
+ export PATCHVEC_CONFIG=./config.yml
97
+ ```
98
+
99
+ ## REST example
100
+ ```bash
101
+ # Create a collection
102
+ curl -X POST http://localhost:8080/collections/acme/docs
103
+
104
+ # Upload a TXT document
105
+ curl -X POST http://localhost:8080/collections/acme/docs/documents -F "file=@sample.txt" -F "docid=DOC1"
106
+
107
+ # Search (GET, no filters)
108
+ curl -G --data-urlencode "q=hello" http://localhost:8080/collections/acme/docs/search
109
+
110
+ # Search (POST, with filters)
111
+ curl -X POST http://localhost:8080/collections/acme/docs/search -H "Content-Type: application/json" -d '{"q":"hello","k":5,"filters":{"docid":"DOC1"}}'
112
+ ```
113
+
114
+ ## License
115
+ GPL-3.0-or-later — (C) 2025 Rodrigo Rodrigues da Silva
@@ -0,0 +1,87 @@
1
+ # 🍰 PatchVec — Lightweight, Pluggable Vector Search Microservice
2
+
3
+ Upload → chunk → index (with metadata) → search via REST and CLI.
4
+
5
+ ---
6
+
7
+ ## 🚀 Quickstart
8
+
9
+ ### 🖥️ CPU-only Dev (default)
10
+ ```bash
11
+ python -m venv .venv
12
+ source .venv/bin/activate
13
+ python -m pip install --upgrade pip
14
+ pip install -r requirements-cpu.txt
15
+ ./pavesrv.sh
16
+ ```
17
+
18
+ ### 📦 Install from PyPI
19
+ ```bash
20
+ python -m venv .venv
21
+ source .venv/bin/activate
22
+ python -m pip install --upgrade pip
23
+ pip install patchvec[cpu] # or patchvec[gpu] for CUDA
24
+ ```
25
+
26
+ ### ▶️ Run the Server
27
+ From source:
28
+ ```bash
29
+ ./pavesrv.sh
30
+ ```
31
+ From PyPI:
32
+ ```bash
33
+ pavesrv
34
+ ```
35
+
36
+ ### ⚙️ Minimal Config
37
+ For production (static auth), set env vars (do not commit secrets):
38
+ ```env
39
+ PATCHVEC_AUTH__MODE=static
40
+ PATCHVEC_AUTH__GLOBAL_KEY=sekret-passwod
41
+ ```
42
+ (Optional: copy `config.yml.example` to an untracked `config.yml` and tweak as needed)
43
+ (Tip: use an untracked `tenants.yml` and point `auth.tenants_file` to it in `config.yml`.)
44
+
45
+ ---
46
+
47
+ ## 🔧 Overriding Server Settings (uvicorn)
48
+ You can override a few server knobs via environment variables:
49
+ ```bash
50
+ HOST=127.0.0.1 PORT=9000 RELOAD=1 WORKERS=4 LOG_LEVEL=debug pavesrv
51
+ ```
52
+ > Note: Full configuration uses the `PATCHVEC_...` env scheme (e.g., `PATCHVEC_SERVER__PORT=9000`).
53
+
54
+ ---
55
+
56
+ ## 🌐 REST API Examples
57
+
58
+ **Create a collection**
59
+ ```bash
60
+ curl -X POST "http://localhost:8086/collections/acme/invoices"
61
+ ```
62
+
63
+ **Upload a TXT/PDF/CSV document**
64
+ ```bash
65
+ curl -X POST "http://localhost:8086/collections/acme/invoices/documents" -F "file=@sample.txt" -F "docid=DOC-1" -F 'metadata={"lang":"pt"}'
66
+ ```
67
+
68
+ **Search (GET, no filters)**
69
+ ```bash
70
+ curl "http://localhost:8086/collections/acme/invoices/search?q=garantia&k=5"
71
+ ```
72
+
73
+ **Search (POST with filters)**
74
+ ```bash
75
+ curl -X POST "http://localhost:8086/collections/acme/invoices/search" -H "Content-Type: application/json" -d '{"q": "garantia", "k": 5, "filters": {"docid": "DOC-1"}}'
76
+ ```
77
+
78
+ **Health / Metrics**
79
+ ```bash
80
+ curl "http://localhost:8086/health"
81
+ curl "http://localhost:8086/metrics"
82
+ ```
83
+
84
+ ---
85
+
86
+ ## 📜 License
87
+ GPL-3.0-or-later — (C) 2025 Rodrigo Rodrigues da Silva
@@ -0,0 +1,69 @@
1
+ # Patchvec sample configuration (commit this file as config.yml.example)
2
+ # Copy to config.yml and adjust. Secrets should live in a separate, untracked file (see tenants.yml.example).
3
+
4
+ data_dir: ./data
5
+
6
+ # Optional "common" collection included in searches when enabled
7
+ common_enabled: true
8
+ common_tenant: global
9
+ common_collection: common
10
+
11
+ # Authentication
12
+ auth:
13
+ # Modes: none | static
14
+ mode: static
15
+ # Default admin user when auth=none
16
+ default_access_tenant: public
17
+ # Global admin key (read from env; forced NOT hardcode in committed files)
18
+ global_key: ${PATCHVEC_GLOBAL_KEY}
19
+ # External tenants→keys mapping file (untracked). If present, it
20
+ # will be merged and override duplicates.
21
+ tenants_file: ./tenants.yml
22
+ # Inline mapping (fallback; keep empty in repo)
23
+ api_keys: {}
24
+
25
+ # Vector store selection
26
+ vector_store:
27
+ type: default # default | qdrant
28
+ txtai:
29
+ embed_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
30
+ backend: faiss
31
+ qdrant:
32
+ url: http://localhost:6333
33
+ api_key: ${PATCHVEC_QDRANT_API_KEY}
34
+ prefer_payload_filters: true
35
+ collection_prefix: patchvec_
36
+
37
+ # Embedder selection
38
+ embedder:
39
+ type: default # default | sbert | openai
40
+ # default (txtai-backed)
41
+ txtai:
42
+ path: sentence-transformers/paraphrase-MiniLM-L3-v2
43
+ sbert:
44
+ model: sentence-transformers/all-MiniLM-L6-v2
45
+ batch_size: 64
46
+ device: auto # cpu | cuda | auto
47
+ openai:
48
+ api_key: ${PATCHVEC_OPENAI_API_KEY}
49
+ dim: 1536
50
+
51
+ # Text preprocessing
52
+ preprocess:
53
+ txt_chunk_size: 1000
54
+ txt_chunk_overlap: 200
55
+
56
+ # Optional server defaults
57
+ # (env HOST/PORT/RELOAD/WORKERS/LOG_LEVEL always override)
58
+ server:
59
+ host: 0.0.0.0
60
+ port: 8086
61
+ reload: false
62
+ workers: 1
63
+ log_level: info
64
+
65
+ # Optional instance customization
66
+ instance:
67
+ name: PatchVec
68
+ desc: Vector Search Microservice (pluggable, functional)
69
+
@@ -0,0 +1,115 @@
1
+ Metadata-Version: 2.4
2
+ Name: patchvec
3
+ Version: 0.5.6
4
+ Summary: Patchvec — A lightweight, pluggable vector search microservice.
5
+ Author: Rodrigo Rodrigues da Silva
6
+ Author-email: rodrigopitanga@posteo.net
7
+ License: GPL-3.0-or-later
8
+ Project-URL: Homepage, https://gitlab.com/flowlexi/patchvec
9
+ Project-URL: Source, https://gitlab.com/flowlexi/patchvec
10
+ Project-URL: Tracker, https://gitlab.com/flowlexi/patchvec/issues
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Framework :: FastAPI
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: fastapi>=0.115.0
25
+ Requires-Dist: uvicorn[standard]>=0.30.6
26
+ Requires-Dist: txtai>=6.3.0
27
+ Requires-Dist: pydantic>=2.8.2
28
+ Requires-Dist: python-multipart>=0.0.9
29
+ Requires-Dist: pypdf>=5.0.0
30
+ Requires-Dist: pyyaml>=6.0.2
31
+ Requires-Dist: python-dotenv>=1.0.1
32
+ Requires-Dist: qdrant-client>=1.9.2
33
+ Requires-Dist: sentence-transformers>=2.7.0
34
+ Requires-Dist: openai>=1.0.0
35
+ Dynamic: author
36
+ Dynamic: author-email
37
+ Dynamic: classifier
38
+ Dynamic: description
39
+ Dynamic: description-content-type
40
+ Dynamic: license
41
+ Dynamic: license-file
42
+ Dynamic: project-url
43
+ Dynamic: requires-dist
44
+ Dynamic: requires-python
45
+ Dynamic: summary
46
+
47
+ # PatchVec — A lightweight, pluggable vector search microservice.
48
+
49
+ Upload → chunk → index (with metadata) → search via REST and CLI.
50
+
51
+ ## Highlights
52
+ - Multi-tenant collections: `/collections/{tenant}/{name}`
53
+ - Upload and search **TXT**, **CSV**, and **PDF**
54
+ - Chunking per format:
55
+ - PDF → 1 chunk/page
56
+ - TXT → configurable chunk size + overlap
57
+ - CSV → 1 chunk/row
58
+ - Metadata filters on POST search (`{"filters": {"docid": "DOC-1"}}`)
59
+ - Health, metrics, and Prometheus endpoints
60
+ - Configurable auth modes: `none` or `static` (Bearer)
61
+ - Default backends are local (vendor-neutral "default"); embedders & stores are pluggable
62
+
63
+ ## Requirements
64
+ - Python 3.10+
65
+
66
+ ## Install
67
+ ```bash
68
+ python -m venv .venv
69
+ source .venv/bin/activate
70
+ pip install patchvec
71
+ ```
72
+
73
+ > CPU-only by default. If you have a CUDA setup and want GPU-accelerated deps, install the GPU-enabled packages in your environment before running PatchVec.
74
+
75
+ ## Quickstart
76
+ ```bash
77
+ # Start the server (installed entry point)
78
+ pavesrv
79
+
80
+ # Or run with uvicorn manually if you prefer:
81
+ uvicorn pave.main:app --host 0.0.0.0 --port 8080
82
+ ```
83
+
84
+ ## Minimal config (optional)
85
+ By default PatchVec runs with sensible local defaults. To customize, create `config.yml` and set:
86
+ ```yaml
87
+ vector_store:
88
+ type: default
89
+ embedder:
90
+ type: default
91
+ auth:
92
+ mode: none # or 'static' with per-tenant Bearer keys
93
+ ```
94
+ Then export:
95
+ ```bash
96
+ export PATCHVEC_CONFIG=./config.yml
97
+ ```
98
+
99
+ ## REST example
100
+ ```bash
101
+ # Create a collection
102
+ curl -X POST http://localhost:8080/collections/acme/docs
103
+
104
+ # Upload a TXT document
105
+ curl -X POST http://localhost:8080/collections/acme/docs/documents -F "file=@sample.txt" -F "docid=DOC1"
106
+
107
+ # Search (GET, no filters)
108
+ curl -G --data-urlencode "q=hello" http://localhost:8080/collections/acme/docs/search
109
+
110
+ # Search (POST, with filters)
111
+ curl -X POST http://localhost:8080/collections/acme/docs/search -H "Content-Type: application/json" -d '{"q":"hello","k":5,"filters":{"docid":"DOC1"}}'
112
+ ```
113
+
114
+ ## License
115
+ GPL-3.0-or-later — (C) 2025 Rodrigo Rodrigues da Silva
@@ -0,0 +1,54 @@
1
+ ABOUT.md
2
+ LICENSE
3
+ MANIFEST.in
4
+ README.md
5
+ config.yml.example
6
+ requirements-base.txt
7
+ requirements-cpu.txt
8
+ requirements-test.txt
9
+ requirements.txt
10
+ setup.py
11
+ patchvec.egg-info/PKG-INFO
12
+ patchvec.egg-info/SOURCES.txt
13
+ patchvec.egg-info/dependency_links.txt
14
+ patchvec.egg-info/entry_points.txt
15
+ patchvec.egg-info/requires.txt
16
+ patchvec.egg-info/top_level.txt
17
+ pave/__init__.py
18
+ pave/auth.py
19
+ pave/cli.py
20
+ pave/config.py
21
+ pave/main.py
22
+ pave/metrics.py
23
+ pave/preprocess.py
24
+ pave/service.py
25
+ pave/ui.py
26
+ pave/assets/patchvec_icon_192.png
27
+ pave/assets/ui.html
28
+ pave/embedders/__init__.py
29
+ pave/embedders/base.py
30
+ pave/embedders/factory.py
31
+ pave/embedders/openai_emb.py
32
+ pave/embedders/sbert_emb.py
33
+ pave/embedders/txtai_emb.py
34
+ pave/stores/__init__.py
35
+ pave/stores/base.py
36
+ pave/stores/factory.py
37
+ pave/stores/qdrant_store.py
38
+ pave/stores/txtai_store.py
39
+ scripts/release.sh
40
+ tests/test_auth.py
41
+ tests/test_cli.py
42
+ tests/test_collections.py
43
+ tests/test_config_runtime.py
44
+ tests/test_csv_ingest.py
45
+ tests/test_docid_default.py
46
+ tests/test_health.py
47
+ tests/test_metrics.py
48
+ tests/test_txtai_concurrent_upsert.py
49
+ tests/test_txtai_store.py
50
+ tests/test_txtai_store_filters.py
51
+ tests/test_ui.py
52
+ tests/test_upload_search_csv.py
53
+ tests/test_upload_search_pdf.py
54
+ tests/test_upload_search_txt.py
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ pavecli = pave.cli:main_cli
3
+ pavesrv = pave.main:main_srv
@@ -0,0 +1,11 @@
1
+ fastapi>=0.115.0
2
+ uvicorn[standard]>=0.30.6
3
+ txtai>=6.3.0
4
+ pydantic>=2.8.2
5
+ python-multipart>=0.0.9
6
+ pypdf>=5.0.0
7
+ pyyaml>=6.0.2
8
+ python-dotenv>=1.0.1
9
+ qdrant-client>=1.9.2
10
+ sentence-transformers>=2.7.0
11
+ openai>=1.0.0
@@ -0,0 +1 @@
1
+ pave
@@ -0,0 +1,6 @@
1
+ # (C) 2025 Rodrigo Rodrigues da Silva <rodrigopitanga@posteo.net>
2
+ # SPDX-License-Identifier: GPL-3.0-or-later
3
+ __all__ = [
4
+ "config", "auth", "preprocess", "metrics", "service", "main", "cli",
5
+ "embedders", "stores"
6
+ ]
@@ -0,0 +1,125 @@
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width,initial-scale=1" />
6
+ <!-- favicon robusto -->
7
+ <link rel="icon" type="image/png" sizes="32x32" href="/assets/patchvec_icon_192.png" />
8
+ <link rel="icon" type="image/png" sizes="192x192" href="/assets/patchvec_icon_192.png" />
9
+ <link rel="icon" href="/favicon.ico" />
10
+ <title>__INST_NAME__ • Search</title>
11
+ <style>
12
+ /* paleta */
13
+ :root{ --bg:#f6e6d9; --panel:#fffaf6; --text:#2b1e11; --muted:#6b5b53; --accent:#c9463d; --border:#ead7c7; --link:#0f2e4d; --link-accent:#14b8a6; }
14
+ @media (prefers-color-scheme: dark){
15
+ :root{ --bg:#1a1410; --panel:#221a14; --text:#f1e9e4; --muted:#b7a9a1; --accent:#ef6b62; --border:#3a2c22; --link:#9dd9d3; --link-accent:#34d399; }
16
+ }
17
+
18
+ *{ box-sizing:border-box }
19
+ html,body{ height:100% }
20
+ body{
21
+ min-height:100vh; display:flex; flex-direction:column; overflow:hidden;
22
+ margin:0; font:16px/1.45 system-ui,Segoe UI,Roboto,Inter,Arial; background:var(--bg); color:var(--text);
23
+ }
24
+
25
+ .bar{
26
+ flex:0 0 auto; display:flex; gap:10px; align-items:center;
27
+ padding:12px; border-bottom:1px solid var(--border); background:var(--panel);
28
+ }
29
+ .left{ display:flex; gap:8px; align-items:center }
30
+ .right{ margin-left:auto; color:var(--muted); white-space:nowrap; overflow:hidden; text-overflow:ellipsis }
31
+
32
+ .tab{
33
+ padding:8px 12px; border-radius:10px; border:1px solid var(--border);
34
+ cursor:pointer; background:transparent; color:var(--text);
35
+ }
36
+ .tab.active{ background:var(--link-accent); color:#041318; border-color:var(--link-accent) }
37
+
38
+ .frames{ flex:1 1 auto; min-height:0; background:var(--bg) }
39
+ .frame{ display:none; width:100%; height:100%; border:0; background:var(--bg) }
40
+ .frame.active{ display:block }
41
+
42
+ .footer{
43
+ flex:0 0 auto; position:sticky; bottom:0;
44
+ display:flex; gap:12px; align-items:center; justify-content:center;
45
+ padding:10px; color:var(--muted); border-top:1px solid var(--border); background:var(--panel);
46
+ }
47
+ .footer a{ color:var(--link); text-decoration:none }
48
+ .footer a:hover{ color:var(--link-accent) }
49
+ </style>
50
+ </head>
51
+ <body>
52
+ <div class="bar">
53
+ <div class="left">
54
+ <button class="tab active" data-target="search" data-title="__INST_NAME__ • Search">Search</button>
55
+ <button class="tab" data-target="ingest" data-title="__INST_NAME__ • Ingest">Ingest</button>
56
+ </div>
57
+ <div class="right"><strong>__INST_NAME__</strong> — <small>__INST_DESC__</small></div>
58
+ </div>
59
+
60
+ <div class="frames">
61
+ <iframe id="search" class="frame active" src="/ui/search" title="Search"></iframe>
62
+ <iframe id="ingest" class="frame" src="/ui/ingest" title="Ingest"></iframe>
63
+ </div>
64
+
65
+ <div class="footer">
66
+ © 2025 <a href="https://flowlexi.com" target="_blank" rel="noopener">Flowlexi</a> •
67
+ <a href="__REPO_URL__" target="_blank" rel="noopener">🍰 patchvec __VERSION__</a> •
68
+ <a href="__LICENSE_URL__" target="_blank" rel="noopener">__LICENSE_NAME__</a>
69
+ </div>
70
+ <script>
71
+ function activeFrame(){ return document.querySelector('.frame.active'); }
72
+
73
+ function scrubSwagger(frame){
74
+ const win = frame?.contentWindow;
75
+ const doc = win?.document;
76
+ if(!doc) return;
77
+ if(!doc.querySelector('.swagger-ui')){
78
+ setTimeout(()=>scrubSwagger(frame), 80);
79
+ return;
80
+ }
81
+
82
+ // CSS: remove topo/infos
83
+ const STYLE_ID = 'pv-clean';
84
+ if(!doc.getElementById(STYLE_ID)){
85
+ const s = doc.createElement('style');
86
+ s.id = STYLE_ID;
87
+ s.textContent = `
88
+ #operations-tag-default,
89
+ .swagger-ui .topbar,
90
+ .swagger-ui .download-url-wrapper,
91
+ .swagger-ui .information-container,
92
+ .swagger-ui .info,
93
+ .swagger-ui .servers { display:none !important; }
94
+ .swagger-ui .wrapper { margin:0 !important; padding:0 0 8px !important; }
95
+ html, body { background:transparent !important; }
96
+ /* auth icon */
97
+ .swagger-ui .auth-wrapper .authorize:hover { border-color: rgba(20,184,166,.8); }
98
+ .swagger-ui .btn.authorize { background: white }
99
+ .swagger-ui .scheme-container { width:100%; height:; margin: 8px 0 0 0; padding: 0 0; background:transparent; border-color: transparent; box-shadow:0 0 0 0 rgba(0,0,0,0)}
100
+ `;
101
+ doc.head.appendChild(s);
102
+ }
103
+ }
104
+
105
+ const tabs = document.querySelectorAll('.tab');
106
+ const frames = document.querySelectorAll('.frame');
107
+
108
+ tabs.forEach(function(tab){
109
+ tab.addEventListener('click', function(){
110
+ tabs.forEach(t=>t.classList.remove('active'));
111
+ frames.forEach(f=>f.classList.remove('active'));
112
+ tab.classList.add('active');
113
+ const fr = document.getElementById(tab.dataset.target);
114
+ fr.classList.add('active');
115
+ document.title = tab.dataset.title || document.title;
116
+ scrubSwagger(fr);
117
+ });
118
+ });
119
+
120
+ frames.forEach(function(fr){
121
+ fr.addEventListener('load', function(){ scrubSwagger(fr); });
122
+ });
123
+ </script>
124
+ </body>
125
+ </html>