getbased-rag 0.7.3__tar.gz → 0.7.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {getbased_rag-0.7.3/src/getbased_rag.egg-info → getbased_rag-0.7.4}/PKG-INFO +27 -18
  2. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/README.md +23 -14
  3. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/pyproject.toml +4 -4
  4. {getbased_rag-0.7.3 → getbased_rag-0.7.4/src/getbased_rag.egg-info}/PKG-INFO +27 -18
  5. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/getbased_rag.egg-info/requires.txt +3 -3
  6. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/lens/api_key.py +7 -4
  7. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/lens/cli.py +4 -3
  8. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/lens/embedder.py +12 -12
  9. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/lens/registry.py +4 -6
  10. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/lens/server.py +33 -9
  11. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/lens/store.py +0 -1
  12. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/tests/test_onnx_embedder.py +0 -2
  13. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/LICENSE +0 -0
  14. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/setup.cfg +0 -0
  15. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/getbased_rag.egg-info/SOURCES.txt +0 -0
  16. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/getbased_rag.egg-info/dependency_links.txt +0 -0
  17. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/getbased_rag.egg-info/entry_points.txt +0 -0
  18. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/getbased_rag.egg-info/top_level.txt +0 -0
  19. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/lens/__init__.py +0 -0
  20. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/lens/config.py +0 -0
  21. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/src/lens/ingest.py +0 -0
  22. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/tests/test_env_loader.py +0 -0
  23. {getbased_rag-0.7.3 → getbased_rag-0.7.4}/tests/test_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: getbased-rag
3
- Version: 0.7.3
3
+ Version: 0.7.4
4
4
  Summary: getbased-rag — standalone RAG knowledge server (formerly the Electron-bundled Lens)
5
5
  License-Expression: AGPL-3.0-or-later
6
6
  Requires-Python: >=3.10
@@ -14,13 +14,13 @@ Requires-Dist: typer>=0.12
14
14
  Requires-Dist: pydantic>=2.0
15
15
  Requires-Dist: pyyaml>=6.0
16
16
  Requires-Dist: rich>=13.0
17
- Requires-Dist: python-multipart>=0.0.6
17
+ Requires-Dist: python-multipart>=0.0.31
18
18
  Provides-Extra: pdf
19
- Requires-Dist: pypdf>=3.9; extra == "pdf"
19
+ Requires-Dist: pypdf>=6.13.3; extra == "pdf"
20
20
  Provides-Extra: docx
21
21
  Requires-Dist: python-docx>=1.0; extra == "docx"
22
22
  Provides-Extra: full
23
- Requires-Dist: pypdf>=3.9; extra == "full"
23
+ Requires-Dist: pypdf>=6.13.3; extra == "full"
24
24
  Requires-Dist: python-docx>=1.0; extra == "full"
25
25
  Requires-Dist: onnxruntime>=1.17; extra == "full"
26
26
  Requires-Dist: huggingface-hub>=0.20; extra == "full"
@@ -32,9 +32,9 @@ Dynamic: license-file
32
32
 
33
33
  # getbased-rag
34
34
 
35
- > **Installing for the first time?** The [getbased-agent-stack](https://github.com/elkimek/getbased-agents/tree/main/packages/stack) meta-package bundles this server with the MCP that Claude Code / Hermes / OpenClaw talk to, plus [getbased-dashboard](https://github.com/elkimek/getbased-agents/tree/main/packages/dashboard) for a browser UI. One command and you're up.
35
+ > **Installing for the first time?** The [getbased-agent-stack](https://github.com/elkimek/getbased-agents/tree/main/packages/stack) meta-package bundles this server with the MCP adapter and [getbased-dashboard](https://github.com/elkimek/getbased-agents/tree/main/packages/dashboard). Use it unless you only want the knowledge server.
36
36
 
37
- A standalone RAG knowledge server the backend that used to ship inside the getbased Electron desktop app, now just Python. Point any client (the getbased PWA's *External server* lens backend, the dashboard, or your own) at it.
37
+ A standalone local knowledge server for getbased. Point the getbased PWA's **External server** Knowledge Base backend, the dashboard, or your own compatible client at it.
38
38
 
39
39
  - **Stack**: FastAPI + Uvicorn · Qdrant (embedded local mode) · sentence-transformers / ONNX Runtime
40
40
  - **Default port**: 8322, loopback only
@@ -69,7 +69,7 @@ lens serve
69
69
 
70
70
  First start auto-generates an API key at the data dir (see below), prints the bind address, and lazy-loads the embedding model on the first query (~90 MB download for MiniLM).
71
71
 
72
- Copy the API key out when you need to configure a client:
72
+ Create the API key file and show its path when you need to configure a client:
73
73
 
74
74
  ```bash
75
75
  lens key
@@ -78,8 +78,11 @@ lens key
78
78
  Smoke test:
79
79
 
80
80
  ```bash
81
+ key_file="$(lens key | sed 's/^file: //')"
82
+ LENS_API_KEY="$(tr -d '\n' < "$key_file")"
83
+
81
84
  curl -s http://127.0.0.1:8322/health
82
- curl -s -H "Authorization: Bearer $(lens key)" http://127.0.0.1:8322/info | jq
85
+ curl -s -H "Authorization: Bearer ${LENS_API_KEY}" http://127.0.0.1:8322/info | jq
83
86
  ```
84
87
 
85
88
  Ingest a file or directory from the CLI:
@@ -92,7 +95,10 @@ lens stats
92
95
  Or over HTTP (what the dashboard + PWA use):
93
96
 
94
97
  ```bash
95
- curl -H "Authorization: Bearer $(lens key)" \
98
+ key_file="$(lens key | sed 's/^file: //')"
99
+ LENS_API_KEY="$(tr -d '\n' < "$key_file")"
100
+
101
+ curl -H "Authorization: Bearer ${LENS_API_KEY}" \
96
102
  -F "files=@paper.pdf" -F "files=@notes.md" \
97
103
  http://127.0.0.1:8322/ingest
98
104
  ```
@@ -104,7 +110,10 @@ curl -H "Authorization: Bearer $(lens key)" \
104
110
  Every library is pinned to one embedding model at creation time — Qdrant collections are dimension-locked, so you can't swap models on an existing library without re-ingesting. Call `GET /models` for the curated list (MiniLM-L6-v2 · BGE-small/base/large-en · BGE-M3) with dims and download sizes, then pass `embedding_model` on create:
105
111
 
106
112
  ```bash
107
- curl -H "Authorization: Bearer $(lens key)" \
113
+ key_file="$(lens key | sed 's/^file: //')"
114
+ LENS_API_KEY="$(tr -d '\n' < "$key_file")"
115
+
116
+ curl -H "Authorization: Bearer ${LENS_API_KEY}" \
108
117
  -H "Content-Type: application/json" \
109
118
  -d '{"name":"Research","embedding_model":"BAAI/bge-m3"}' \
110
119
  http://127.0.0.1:8322/libraries
@@ -127,18 +136,18 @@ The dashboard uses the streaming path for its bottom-right pill (chunks/sec rate
127
136
 
128
137
  ## Wiring into the getbased PWA
129
138
 
130
- In the PWA: **Settings → AI → Knowledge Base External server**
139
+ In the PWA: open **Knowledge Base**, choose **External server**, then enter:
131
140
 
132
141
  | Field | Value |
133
142
  |---|---|
134
143
  | URL | `http://127.0.0.1:8322` |
135
- | API key | output of `lens key` |
144
+ | API key | value in the file printed by `lens key` |
136
145
 
137
146
  Click **Save**, then **Test connection**. `rag_ready: false` is expected before you ingest anything.
138
147
 
139
148
  ### Agent access (Claude Code, Hermes, OpenClaw, etc.)
140
149
 
141
- Pair this server with [getbased-mcp](https://github.com/elkimek/getbased-agents/tree/main/packages/mcp) to expose `knowledge_search`, `knowledge_list_libraries`, `knowledge_activate_library`, and `knowledge_stats` as MCP tools. Typical setup: run both the lens server and getbased-mcp on the same VM, point MCP's `LENS_URL` at `http://localhost:8322`.
150
+ Pair this server with [getbased-mcp](https://github.com/elkimek/getbased-agents/tree/main/packages/mcp) to expose `knowledge_search`, `knowledge_list_libraries`, `knowledge_activate_library`, and `knowledge_stats` as MCP tools. Typical setup: run both the knowledge server and getbased-mcp on the same machine, then point MCP's `LENS_URL` at `http://localhost:8322`.
142
151
 
143
152
  ### Browser UI
144
153
 
@@ -183,7 +192,7 @@ LENS_ONNX_PROVIDER=cuda lens serve
183
192
 
184
193
  ## HTTP API
185
194
 
186
- All endpoints except `/`, `/health` require `Authorization: Bearer <key>`.
195
+ All endpoints except `/`, `/health` require `Authorization: Bearer ${LENS_API_KEY}`.
187
196
 
188
197
  | Method | Path | Purpose |
189
198
  |---|---|---|
@@ -206,7 +215,7 @@ All endpoints except `/`, `/health` require `Authorization: Bearer <key>`.
206
215
  ## Security notes
207
216
 
208
217
  - Default bind is `127.0.0.1` — queries never leak to the LAN unless you explicitly set `LENS_HOST=0.0.0.0`.
209
- - The API key file is mode `0600` and never exposed over HTTP. Use `lens key` locally to read it.
218
+ - The API key file is mode `0600`; `lens key` shows the file path instead of printing the secret.
210
219
  - Bearer comparison uses `secrets.compare_digest` — constant-time, no timing-leak class of bug.
211
220
  - Upload paths are basename-sanitised server-side (so `../../etc/passwd` can't escape the ingest temp dir).
212
221
  - Zip uploads are zip-slip-guarded — each archive entry must resolve inside its own per-zip subdirectory AND inside the overall ingest root.
@@ -222,8 +231,8 @@ lens ingest <path> Index files into the active library
222
231
  lens stats List indexed sources + chunk counts
223
232
  lens delete <source> Drop chunks belonging to one source
224
233
  lens clear Wipe the active library
225
- lens info Show config + API key
226
- lens key Print the API key (creates one if missing)
234
+ lens info Show config + masked API-key fingerprint
235
+ lens key Show API key file path
227
236
  ```
228
237
 
229
238
  ---
@@ -236,4 +245,4 @@ AGPL-3.0-or-later.
236
245
 
237
246
  ## Lineage
238
247
 
239
- This repo is the Python portion lifted out of [getbased](https://github.com/elkimek/getbased) after the Electron desktop app was retired. The PWA's `external-server` lens backend speaks this same HTTP contract unchanged.
248
+ This repo is the Python knowledge-server portion lifted out of [getbased](https://github.com/elkimek/get-based) after the Electron desktop app was retired. The PWA's external-server Knowledge Base backend speaks this same HTTP contract.
@@ -1,8 +1,8 @@
1
1
  # getbased-rag
2
2
 
3
- > **Installing for the first time?** The [getbased-agent-stack](https://github.com/elkimek/getbased-agents/tree/main/packages/stack) meta-package bundles this server with the MCP that Claude Code / Hermes / OpenClaw talk to, plus [getbased-dashboard](https://github.com/elkimek/getbased-agents/tree/main/packages/dashboard) for a browser UI. One command and you're up.
3
+ > **Installing for the first time?** The [getbased-agent-stack](https://github.com/elkimek/getbased-agents/tree/main/packages/stack) meta-package bundles this server with the MCP adapter and [getbased-dashboard](https://github.com/elkimek/getbased-agents/tree/main/packages/dashboard). Use it unless you only want the knowledge server.
4
4
 
5
- A standalone RAG knowledge server the backend that used to ship inside the getbased Electron desktop app, now just Python. Point any client (the getbased PWA's *External server* lens backend, the dashboard, or your own) at it.
5
+ A standalone local knowledge server for getbased. Point the getbased PWA's **External server** Knowledge Base backend, the dashboard, or your own compatible client at it.
6
6
 
7
7
  - **Stack**: FastAPI + Uvicorn · Qdrant (embedded local mode) · sentence-transformers / ONNX Runtime
8
8
  - **Default port**: 8322, loopback only
@@ -37,7 +37,7 @@ lens serve
37
37
 
38
38
  First start auto-generates an API key at the data dir (see below), prints the bind address, and lazy-loads the embedding model on the first query (~90 MB download for MiniLM).
39
39
 
40
- Copy the API key out when you need to configure a client:
40
+ Create the API key file and show its path when you need to configure a client:
41
41
 
42
42
  ```bash
43
43
  lens key
@@ -46,8 +46,11 @@ lens key
46
46
  Smoke test:
47
47
 
48
48
  ```bash
49
+ key_file="$(lens key | sed 's/^file: //')"
50
+ LENS_API_KEY="$(tr -d '\n' < "$key_file")"
51
+
49
52
  curl -s http://127.0.0.1:8322/health
50
- curl -s -H "Authorization: Bearer $(lens key)" http://127.0.0.1:8322/info | jq
53
+ curl -s -H "Authorization: Bearer ${LENS_API_KEY}" http://127.0.0.1:8322/info | jq
51
54
  ```
52
55
 
53
56
  Ingest a file or directory from the CLI:
@@ -60,7 +63,10 @@ lens stats
60
63
  Or over HTTP (what the dashboard + PWA use):
61
64
 
62
65
  ```bash
63
- curl -H "Authorization: Bearer $(lens key)" \
66
+ key_file="$(lens key | sed 's/^file: //')"
67
+ LENS_API_KEY="$(tr -d '\n' < "$key_file")"
68
+
69
+ curl -H "Authorization: Bearer ${LENS_API_KEY}" \
64
70
  -F "files=@paper.pdf" -F "files=@notes.md" \
65
71
  http://127.0.0.1:8322/ingest
66
72
  ```
@@ -72,7 +78,10 @@ curl -H "Authorization: Bearer $(lens key)" \
72
78
  Every library is pinned to one embedding model at creation time — Qdrant collections are dimension-locked, so you can't swap models on an existing library without re-ingesting. Call `GET /models` for the curated list (MiniLM-L6-v2 · BGE-small/base/large-en · BGE-M3) with dims and download sizes, then pass `embedding_model` on create:
73
79
 
74
80
  ```bash
75
- curl -H "Authorization: Bearer $(lens key)" \
81
+ key_file="$(lens key | sed 's/^file: //')"
82
+ LENS_API_KEY="$(tr -d '\n' < "$key_file")"
83
+
84
+ curl -H "Authorization: Bearer ${LENS_API_KEY}" \
76
85
  -H "Content-Type: application/json" \
77
86
  -d '{"name":"Research","embedding_model":"BAAI/bge-m3"}' \
78
87
  http://127.0.0.1:8322/libraries
@@ -95,18 +104,18 @@ The dashboard uses the streaming path for its bottom-right pill (chunks/sec rate
95
104
 
96
105
  ## Wiring into the getbased PWA
97
106
 
98
- In the PWA: **Settings → AI → Knowledge Base External server**
107
+ In the PWA: open **Knowledge Base**, choose **External server**, then enter:
99
108
 
100
109
  | Field | Value |
101
110
  |---|---|
102
111
  | URL | `http://127.0.0.1:8322` |
103
- | API key | output of `lens key` |
112
+ | API key | value in the file printed by `lens key` |
104
113
 
105
114
  Click **Save**, then **Test connection**. `rag_ready: false` is expected before you ingest anything.
106
115
 
107
116
  ### Agent access (Claude Code, Hermes, OpenClaw, etc.)
108
117
 
109
- Pair this server with [getbased-mcp](https://github.com/elkimek/getbased-agents/tree/main/packages/mcp) to expose `knowledge_search`, `knowledge_list_libraries`, `knowledge_activate_library`, and `knowledge_stats` as MCP tools. Typical setup: run both the lens server and getbased-mcp on the same VM, point MCP's `LENS_URL` at `http://localhost:8322`.
118
+ Pair this server with [getbased-mcp](https://github.com/elkimek/getbased-agents/tree/main/packages/mcp) to expose `knowledge_search`, `knowledge_list_libraries`, `knowledge_activate_library`, and `knowledge_stats` as MCP tools. Typical setup: run both the knowledge server and getbased-mcp on the same machine, then point MCP's `LENS_URL` at `http://localhost:8322`.
110
119
 
111
120
  ### Browser UI
112
121
 
@@ -151,7 +160,7 @@ LENS_ONNX_PROVIDER=cuda lens serve
151
160
 
152
161
  ## HTTP API
153
162
 
154
- All endpoints except `/`, `/health` require `Authorization: Bearer <key>`.
163
+ All endpoints except `/`, `/health` require `Authorization: Bearer ${LENS_API_KEY}`.
155
164
 
156
165
  | Method | Path | Purpose |
157
166
  |---|---|---|
@@ -174,7 +183,7 @@ All endpoints except `/`, `/health` require `Authorization: Bearer <key>`.
174
183
  ## Security notes
175
184
 
176
185
  - Default bind is `127.0.0.1` — queries never leak to the LAN unless you explicitly set `LENS_HOST=0.0.0.0`.
177
- - The API key file is mode `0600` and never exposed over HTTP. Use `lens key` locally to read it.
186
+ - The API key file is mode `0600`; `lens key` shows the file path instead of printing the secret.
178
187
  - Bearer comparison uses `secrets.compare_digest` — constant-time, no timing-leak class of bug.
179
188
  - Upload paths are basename-sanitised server-side (so `../../etc/passwd` can't escape the ingest temp dir).
180
189
  - Zip uploads are zip-slip-guarded — each archive entry must resolve inside its own per-zip subdirectory AND inside the overall ingest root.
@@ -190,8 +199,8 @@ lens ingest <path> Index files into the active library
190
199
  lens stats List indexed sources + chunk counts
191
200
  lens delete <source> Drop chunks belonging to one source
192
201
  lens clear Wipe the active library
193
- lens info Show config + API key
194
- lens key Print the API key (creates one if missing)
202
+ lens info Show config + masked API-key fingerprint
203
+ lens key Show API key file path
195
204
  ```
196
205
 
197
206
  ---
@@ -204,4 +213,4 @@ AGPL-3.0-or-later.
204
213
 
205
214
  ## Lineage
206
215
 
207
- This repo is the Python portion lifted out of [getbased](https://github.com/elkimek/getbased) after the Electron desktop app was retired. The PWA's `external-server` lens backend speaks this same HTTP contract unchanged.
216
+ This repo is the Python knowledge-server portion lifted out of [getbased](https://github.com/elkimek/get-based) after the Electron desktop app was retired. The PWA's external-server Knowledge Base backend speaks this same HTTP contract.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "getbased-rag"
7
- version = "0.7.3"
7
+ version = "0.7.4"
8
8
  description = "getbased-rag — standalone RAG knowledge server (formerly the Electron-bundled Lens)"
9
9
  readme = "README.md"
10
10
  license = "AGPL-3.0-or-later"
@@ -18,11 +18,11 @@ dependencies = [
18
18
  "pydantic>=2.0",
19
19
  "pyyaml>=6.0",
20
20
  "rich>=13.0",
21
- "python-multipart>=0.0.6",
21
+ "python-multipart>=0.0.31",
22
22
  ]
23
23
 
24
24
  [project.optional-dependencies]
25
- pdf = ["pypdf>=3.9"]
25
+ pdf = ["pypdf>=6.13.3"]
26
26
  docx = ["python-docx>=1.0"]
27
27
  # ONNX acceleration. `full` now includes it by default — the install
28
28
  # is small (~200 MB vs the old ~2 GB) because we load pre-exported
@@ -36,7 +36,7 @@ docx = ["python-docx>=1.0"]
36
36
  # already in core gets you the default LocalEmbedder path — ~2-3x
37
37
  # slower than ONNX but zero extra config.
38
38
  full = [
39
- "pypdf>=3.9",
39
+ "pypdf>=6.13.3",
40
40
  "python-docx>=1.0",
41
41
  "onnxruntime>=1.17",
42
42
  "huggingface-hub>=0.20",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: getbased-rag
3
- Version: 0.7.3
3
+ Version: 0.7.4
4
4
  Summary: getbased-rag — standalone RAG knowledge server (formerly the Electron-bundled Lens)
5
5
  License-Expression: AGPL-3.0-or-later
6
6
  Requires-Python: >=3.10
@@ -14,13 +14,13 @@ Requires-Dist: typer>=0.12
14
14
  Requires-Dist: pydantic>=2.0
15
15
  Requires-Dist: pyyaml>=6.0
16
16
  Requires-Dist: rich>=13.0
17
- Requires-Dist: python-multipart>=0.0.6
17
+ Requires-Dist: python-multipart>=0.0.31
18
18
  Provides-Extra: pdf
19
- Requires-Dist: pypdf>=3.9; extra == "pdf"
19
+ Requires-Dist: pypdf>=6.13.3; extra == "pdf"
20
20
  Provides-Extra: docx
21
21
  Requires-Dist: python-docx>=1.0; extra == "docx"
22
22
  Provides-Extra: full
23
- Requires-Dist: pypdf>=3.9; extra == "full"
23
+ Requires-Dist: pypdf>=6.13.3; extra == "full"
24
24
  Requires-Dist: python-docx>=1.0; extra == "full"
25
25
  Requires-Dist: onnxruntime>=1.17; extra == "full"
26
26
  Requires-Dist: huggingface-hub>=0.20; extra == "full"
@@ -32,9 +32,9 @@ Dynamic: license-file
32
32
 
33
33
  # getbased-rag
34
34
 
35
- > **Installing for the first time?** The [getbased-agent-stack](https://github.com/elkimek/getbased-agents/tree/main/packages/stack) meta-package bundles this server with the MCP that Claude Code / Hermes / OpenClaw talk to, plus [getbased-dashboard](https://github.com/elkimek/getbased-agents/tree/main/packages/dashboard) for a browser UI. One command and you're up.
35
+ > **Installing for the first time?** The [getbased-agent-stack](https://github.com/elkimek/getbased-agents/tree/main/packages/stack) meta-package bundles this server with the MCP adapter and [getbased-dashboard](https://github.com/elkimek/getbased-agents/tree/main/packages/dashboard). Use it unless you only want the knowledge server.
36
36
 
37
- A standalone RAG knowledge server the backend that used to ship inside the getbased Electron desktop app, now just Python. Point any client (the getbased PWA's *External server* lens backend, the dashboard, or your own) at it.
37
+ A standalone local knowledge server for getbased. Point the getbased PWA's **External server** Knowledge Base backend, the dashboard, or your own compatible client at it.
38
38
 
39
39
  - **Stack**: FastAPI + Uvicorn · Qdrant (embedded local mode) · sentence-transformers / ONNX Runtime
40
40
  - **Default port**: 8322, loopback only
@@ -69,7 +69,7 @@ lens serve
69
69
 
70
70
  First start auto-generates an API key at the data dir (see below), prints the bind address, and lazy-loads the embedding model on the first query (~90 MB download for MiniLM).
71
71
 
72
- Copy the API key out when you need to configure a client:
72
+ Create the API key file and show its path when you need to configure a client:
73
73
 
74
74
  ```bash
75
75
  lens key
@@ -78,8 +78,11 @@ lens key
78
78
  Smoke test:
79
79
 
80
80
  ```bash
81
+ key_file="$(lens key | sed 's/^file: //')"
82
+ LENS_API_KEY="$(tr -d '\n' < "$key_file")"
83
+
81
84
  curl -s http://127.0.0.1:8322/health
82
- curl -s -H "Authorization: Bearer $(lens key)" http://127.0.0.1:8322/info | jq
85
+ curl -s -H "Authorization: Bearer ${LENS_API_KEY}" http://127.0.0.1:8322/info | jq
83
86
  ```
84
87
 
85
88
  Ingest a file or directory from the CLI:
@@ -92,7 +95,10 @@ lens stats
92
95
  Or over HTTP (what the dashboard + PWA use):
93
96
 
94
97
  ```bash
95
- curl -H "Authorization: Bearer $(lens key)" \
98
+ key_file="$(lens key | sed 's/^file: //')"
99
+ LENS_API_KEY="$(tr -d '\n' < "$key_file")"
100
+
101
+ curl -H "Authorization: Bearer ${LENS_API_KEY}" \
96
102
  -F "files=@paper.pdf" -F "files=@notes.md" \
97
103
  http://127.0.0.1:8322/ingest
98
104
  ```
@@ -104,7 +110,10 @@ curl -H "Authorization: Bearer $(lens key)" \
104
110
  Every library is pinned to one embedding model at creation time — Qdrant collections are dimension-locked, so you can't swap models on an existing library without re-ingesting. Call `GET /models` for the curated list (MiniLM-L6-v2 · BGE-small/base/large-en · BGE-M3) with dims and download sizes, then pass `embedding_model` on create:
105
111
 
106
112
  ```bash
107
- curl -H "Authorization: Bearer $(lens key)" \
113
+ key_file="$(lens key | sed 's/^file: //')"
114
+ LENS_API_KEY="$(tr -d '\n' < "$key_file")"
115
+
116
+ curl -H "Authorization: Bearer ${LENS_API_KEY}" \
108
117
  -H "Content-Type: application/json" \
109
118
  -d '{"name":"Research","embedding_model":"BAAI/bge-m3"}' \
110
119
  http://127.0.0.1:8322/libraries
@@ -127,18 +136,18 @@ The dashboard uses the streaming path for its bottom-right pill (chunks/sec rate
127
136
 
128
137
  ## Wiring into the getbased PWA
129
138
 
130
- In the PWA: **Settings → AI → Knowledge Base External server**
139
+ In the PWA: open **Knowledge Base**, choose **External server**, then enter:
131
140
 
132
141
  | Field | Value |
133
142
  |---|---|
134
143
  | URL | `http://127.0.0.1:8322` |
135
- | API key | output of `lens key` |
144
+ | API key | value in the file printed by `lens key` |
136
145
 
137
146
  Click **Save**, then **Test connection**. `rag_ready: false` is expected before you ingest anything.
138
147
 
139
148
  ### Agent access (Claude Code, Hermes, OpenClaw, etc.)
140
149
 
141
- Pair this server with [getbased-mcp](https://github.com/elkimek/getbased-agents/tree/main/packages/mcp) to expose `knowledge_search`, `knowledge_list_libraries`, `knowledge_activate_library`, and `knowledge_stats` as MCP tools. Typical setup: run both the lens server and getbased-mcp on the same VM, point MCP's `LENS_URL` at `http://localhost:8322`.
150
+ Pair this server with [getbased-mcp](https://github.com/elkimek/getbased-agents/tree/main/packages/mcp) to expose `knowledge_search`, `knowledge_list_libraries`, `knowledge_activate_library`, and `knowledge_stats` as MCP tools. Typical setup: run both the knowledge server and getbased-mcp on the same machine, then point MCP's `LENS_URL` at `http://localhost:8322`.
142
151
 
143
152
  ### Browser UI
144
153
 
@@ -183,7 +192,7 @@ LENS_ONNX_PROVIDER=cuda lens serve
183
192
 
184
193
  ## HTTP API
185
194
 
186
- All endpoints except `/`, `/health` require `Authorization: Bearer <key>`.
195
+ All endpoints except `/`, `/health` require `Authorization: Bearer ${LENS_API_KEY}`.
187
196
 
188
197
  | Method | Path | Purpose |
189
198
  |---|---|---|
@@ -206,7 +215,7 @@ All endpoints except `/`, `/health` require `Authorization: Bearer <key>`.
206
215
  ## Security notes
207
216
 
208
217
  - Default bind is `127.0.0.1` — queries never leak to the LAN unless you explicitly set `LENS_HOST=0.0.0.0`.
209
- - The API key file is mode `0600` and never exposed over HTTP. Use `lens key` locally to read it.
218
+ - The API key file is mode `0600`; `lens key` shows the file path instead of printing the secret.
210
219
  - Bearer comparison uses `secrets.compare_digest` — constant-time, no timing-leak class of bug.
211
220
  - Upload paths are basename-sanitised server-side (so `../../etc/passwd` can't escape the ingest temp dir).
212
221
  - Zip uploads are zip-slip-guarded — each archive entry must resolve inside its own per-zip subdirectory AND inside the overall ingest root.
@@ -222,8 +231,8 @@ lens ingest <path> Index files into the active library
222
231
  lens stats List indexed sources + chunk counts
223
232
  lens delete <source> Drop chunks belonging to one source
224
233
  lens clear Wipe the active library
225
- lens info Show config + API key
226
- lens key Print the API key (creates one if missing)
234
+ lens info Show config + masked API-key fingerprint
235
+ lens key Show API key file path
227
236
  ```
228
237
 
229
238
  ---
@@ -236,4 +245,4 @@ AGPL-3.0-or-later.
236
245
 
237
246
  ## Lineage
238
247
 
239
- This repo is the Python portion lifted out of [getbased](https://github.com/elkimek/getbased) after the Electron desktop app was retired. The PWA's `external-server` lens backend speaks this same HTTP contract unchanged.
248
+ This repo is the Python knowledge-server portion lifted out of [getbased](https://github.com/elkimek/get-based) after the Electron desktop app was retired. The PWA's external-server Knowledge Base backend speaks this same HTTP contract.
@@ -6,20 +6,20 @@ typer>=0.12
6
6
  pydantic>=2.0
7
7
  pyyaml>=6.0
8
8
  rich>=13.0
9
- python-multipart>=0.0.6
9
+ python-multipart>=0.0.31
10
10
 
11
11
  [docx]
12
12
  python-docx>=1.0
13
13
 
14
14
  [full]
15
- pypdf>=3.9
15
+ pypdf>=6.13.3
16
16
  python-docx>=1.0
17
17
  onnxruntime>=1.17
18
18
  huggingface-hub>=0.20
19
19
  tokenizers>=0.15
20
20
 
21
21
  [pdf]
22
- pypdf>=3.9
22
+ pypdf>=6.13.3
23
23
 
24
24
  [test]
25
25
  pytest>=8.0
@@ -9,8 +9,11 @@ from __future__ import annotations
9
9
 
10
10
  import os
11
11
  import secrets
12
+ import logging
12
13
  from pathlib import Path
13
14
 
15
+ log = logging.getLogger("lens.api_key")
16
+
14
17
 
15
18
  def get_or_create_api_key(key_file: Path) -> str:
16
19
  """Read the API key from disk; generate + write one if missing.
@@ -24,8 +27,8 @@ def get_or_create_api_key(key_file: Path) -> str:
24
27
  key = key_file.read_text().strip()
25
28
  if key:
26
29
  return key
27
- except OSError:
28
- pass
30
+ except OSError as e:
31
+ log.debug("Unable to read existing Lens API key file %s: %s", key_file, e)
29
32
 
30
33
  key_file.parent.mkdir(parents=True, exist_ok=True)
31
34
  key = secrets.token_urlsafe(32)
@@ -50,6 +53,6 @@ def load_api_key(key_file: Path) -> str | None:
50
53
  if key_file.exists():
51
54
  key = key_file.read_text().strip()
52
55
  return key if key else None
53
- except OSError:
54
- pass
56
+ except OSError as e:
57
+ log.debug("Unable to read Lens API key file %s: %s", key_file, e)
55
58
  return None
@@ -3,7 +3,7 @@
3
3
  lens serve Start the HTTP server (default if no command)
4
4
  lens ingest <path> Index files into the local store
5
5
  lens info Show config + key + status
6
- lens key Print the API key (creates one if missing)
6
+ lens key Show API key file path
7
7
 
8
8
  Configuration comes from environment variables — see config.py for the full list.
9
9
  The Tauri desktop wrapper sets LENS_HOST, LENS_PORT, LENS_DATA_DIR,
@@ -207,9 +207,10 @@ def info():
207
207
 
208
208
  @app.command()
209
209
  def key():
210
- """Print the API key (generates one on first invocation)."""
210
+ """Show the API key file path, creating the key if missing."""
211
211
  config = LensConfig.from_env()
212
- print(get_or_create_api_key(config.api_key_file))
212
+ get_or_create_api_key(config.api_key_file)
213
+ print(f"file: {config.api_key_file}")
213
214
 
214
215
 
215
216
  def main():
@@ -50,12 +50,12 @@ class Embedder(ABC):
50
50
  @abstractmethod
51
51
  def encode(self, texts: list[str]) -> list[list[float]]:
52
52
  """Encode a batch of texts into normalized vectors."""
53
- ...
53
+ raise NotImplementedError
54
54
 
55
55
  @abstractmethod
56
56
  def dimension(self) -> int:
57
57
  """Return the embedding dimensionality."""
58
- ...
58
+ raise NotImplementedError
59
59
 
60
60
  def info(self) -> dict:
61
61
  """Return a small dict describing this backend — which engine,
@@ -237,8 +237,8 @@ class OnnxEmbedder(Embedder):
237
237
  pad_token = raw
238
238
  elif isinstance(raw, dict):
239
239
  pad_token = raw.get("content")
240
- except (ValueError, _json.JSONDecodeError):
241
- pass
240
+ except (ValueError, _json.JSONDecodeError) as e:
241
+ log.debug("Unable to read pad token config from %s: %s", cfg_path, e)
242
242
 
243
243
  # Fallback: probe the tokenizer for any of the usual suspects.
244
244
  if not pad_token:
@@ -435,8 +435,8 @@ class OnnxEmbedder(Embedder):
435
435
  # cap"; treat anything absurd as 512 fallback.
436
436
  if 32 <= ml <= 16384:
437
437
  return ml
438
- except (ValueError, _json.JSONDecodeError):
439
- pass
438
+ except (ValueError, _json.JSONDecodeError) as e:
439
+ log.debug("Unable to read max-length config from %s: %s", cfg, e)
440
440
  return 8192 if "bge-m3" in self._model_name.lower() else 512
441
441
 
442
442
  def _detect_dimension(self) -> int:
@@ -506,8 +506,8 @@ class OnnxEmbedder(Embedder):
506
506
  providers = self._session.get_providers()
507
507
  if providers:
508
508
  active = providers[0]
509
- except Exception:
510
- pass
509
+ except Exception as e: # noqa: BLE001
510
+ log.debug("Unable to inspect active ONNX providers: %s", e)
511
511
  return {
512
512
  "engine": "onnx",
513
513
  "model": self._model_name,
@@ -577,8 +577,8 @@ class LocalEmbedder(Embedder):
577
577
  device = "cuda"
578
578
  elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
579
579
  device = "mps"
580
- except Exception:
581
- pass
580
+ except Exception as e: # noqa: BLE001
581
+ log.debug("Unable to inspect torch device availability: %s", e)
582
582
  return {
583
583
  "engine": "pytorch",
584
584
  "model": self._model_name,
@@ -638,8 +638,8 @@ class CloudInferenceEmbedder(Embedder):
638
638
  host = ""
639
639
  try:
640
640
  host = urlparse(self._url).hostname or ""
641
- except Exception:
642
- pass
641
+ except Exception as e: # noqa: BLE001
642
+ log.debug("Unable to parse Qdrant Cloud host from %s: %s", self._url, e)
643
643
  return {
644
644
  "engine": "qdrant-cloud",
645
645
  "model": self._model_name,
@@ -23,8 +23,6 @@ import os
23
23
  import tempfile
24
24
  import time
25
25
  import uuid
26
- from pathlib import Path
27
- from typing import Optional
28
26
 
29
27
  from .config import LensConfig
30
28
 
@@ -90,14 +88,14 @@ class Registry:
90
88
  f.flush()
91
89
  try:
92
90
  os.fsync(f.fileno())
93
- except OSError:
94
- pass
91
+ except OSError as e:
92
+ log.debug("Unable to fsync libraries registry temp file: %s", e)
95
93
  os.replace(tmp_name, self._path)
96
94
  except Exception:
97
95
  try:
98
96
  os.unlink(tmp_name)
99
- except OSError:
100
- pass
97
+ except OSError as e:
98
+ log.debug("Unable to remove temporary libraries registry %s: %s", tmp_name, e)
101
99
  raise
102
100
 
103
101
  # ── Public surface ────────────────────────────────────────────────
@@ -74,6 +74,10 @@ class LibraryRenameRequest(BaseModel):
74
74
  name: str = Field(..., min_length=1, max_length=120)
75
75
 
76
76
 
77
+ class SourceDeleteRequest(BaseModel):
78
+ source: str = Field(..., min_length=1, max_length=4096)
79
+
80
+
77
81
  def create_app(config: LensConfig) -> FastAPI:
78
82
  """Build the FastAPI app with config-driven dependencies."""
79
83
  config.ensure_dirs()
@@ -282,13 +286,8 @@ def create_app(config: LensConfig) -> FastAPI:
282
286
  log.exception("Stats failed")
283
287
  raise HTTPException(500, "Stats failed — see server logs")
284
288
 
285
- @app.delete("/sources/{source:path}")
286
- async def delete_source_endpoint(
287
- source: str,
288
- authorization: Optional[str] = Header(default=None),
289
- ):
289
+ def _delete_source(source: str) -> dict:
290
290
  """Delete every chunk for a given source in the active library."""
291
- require_auth(authorization)
292
291
  store = active_store()
293
292
  try:
294
293
  deleted = store.delete_by_source(source)
@@ -297,6 +296,27 @@ def create_app(config: LensConfig) -> FastAPI:
297
296
  log.exception("Delete failed")
298
297
  raise HTTPException(500, "Delete failed — see server logs")
299
298
 
299
+ @app.post("/sources/delete")
300
+ async def delete_source_body_endpoint(
301
+ req: SourceDeleteRequest,
302
+ authorization: Optional[str] = Header(default=None),
303
+ ):
304
+ """Delete a source via a fixed upstream path and JSON body.
305
+
306
+ Dashboard uses this route so browser-provided source names never become
307
+ part of the dashboard→rag request URL.
308
+ """
309
+ require_auth(authorization)
310
+ return _delete_source(req.source)
311
+
312
+ @app.delete("/sources/{source:path}")
313
+ async def delete_source_endpoint(
314
+ source: str,
315
+ authorization: Optional[str] = Header(default=None),
316
+ ):
317
+ require_auth(authorization)
318
+ return _delete_source(source)
319
+
300
320
  @app.delete("/sources")
301
321
  async def clear_endpoint(authorization: Optional[str] = Header(default=None)):
302
322
  """Drop the active library's collection contents."""
@@ -668,7 +688,7 @@ def create_app(config: LensConfig) -> FastAPI:
668
688
  break
669
689
  await _asyncio.sleep(1.0)
670
690
  except _asyncio.CancelledError:
671
- pass
691
+ log.debug("Disconnect watcher cancelled")
672
692
 
673
693
  async def _ndjson_gen():
674
694
  ingest_task = _asyncio.create_task(_run_ingest())
@@ -688,8 +708,12 @@ def create_app(config: LensConfig) -> FastAPI:
688
708
  if not ingest_task.done():
689
709
  try:
690
710
  await _asyncio.wait_for(ingest_task, timeout=5.0)
691
- except (_asyncio.CancelledError, _asyncio.TimeoutError, Exception):
692
- pass
711
+ except _asyncio.CancelledError:
712
+ log.debug("Cancelled while waiting for ingest task cleanup")
713
+ except _asyncio.TimeoutError:
714
+ log.debug("Timed out waiting for ingest task cleanup")
715
+ except Exception as e: # noqa: BLE001
716
+ log.debug("Ingest task cleanup failed: %s", e)
693
717
  _cleanup_tmpdir()
694
718
 
695
719
  return StreamingResponse(
@@ -3,7 +3,6 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
- from pathlib import Path
7
6
  from typing import Optional
8
7
 
9
8
  from .config import LensConfig
@@ -10,10 +10,8 @@ these silently corrupts embeddings for someone's knowledge base.
10
10
  from __future__ import annotations
11
11
 
12
12
  import json
13
- import os
14
13
  from pathlib import Path
15
14
 
16
- import numpy as np
17
15
  import pytest
18
16
 
19
17
  from lens.embedder import OnnxEmbedder, _ONNX_REPO_MAP
File without changes
File without changes