okb 1.1.0a0__tar.gz → 1.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {okb-1.1.0a0 → okb-1.1.2}/PKG-INFO +91 -8
  2. {okb-1.1.0a0 → okb-1.1.2}/README.md +90 -7
  3. {okb-1.1.0a0 → okb-1.1.2}/okb/cli.py +1083 -16
  4. {okb-1.1.0a0 → okb-1.1.2}/okb/config.py +122 -4
  5. {okb-1.1.0a0 → okb-1.1.2}/okb/http_server.py +356 -91
  6. okb-1.1.2/okb/llm/analyze.py +524 -0
  7. okb-1.1.2/okb/llm/consolidate.py +685 -0
  8. okb-1.1.2/okb/llm/enrich.py +723 -0
  9. okb-1.1.2/okb/llm/extractors/__init__.py +13 -0
  10. okb-1.1.2/okb/llm/extractors/base.py +44 -0
  11. okb-1.1.2/okb/llm/extractors/cross_doc.py +478 -0
  12. okb-1.1.2/okb/llm/extractors/dedup.py +499 -0
  13. okb-1.1.2/okb/llm/extractors/entity.py +369 -0
  14. okb-1.1.2/okb/llm/extractors/todo.py +149 -0
  15. {okb-1.1.0a0 → okb-1.1.2}/okb/llm/providers.py +9 -6
  16. {okb-1.1.0a0 → okb-1.1.2}/okb/mcp_server.py +1036 -12
  17. okb-1.1.2/okb/migrations/0008.enrichment.sql +46 -0
  18. okb-1.1.2/okb/migrations/0009.entity-consolidation.sql +120 -0
  19. okb-1.1.2/okb/migrations/0010.token-id.sql +7 -0
  20. {okb-1.1.0a0 → okb-1.1.2}/okb/modal_llm.py +26 -8
  21. {okb-1.1.0a0 → okb-1.1.2}/okb/plugins/sources/github.py +5 -5
  22. {okb-1.1.0a0 → okb-1.1.2}/okb/tokens.py +25 -3
  23. {okb-1.1.0a0 → okb-1.1.2}/pyproject.toml +1 -1
  24. {okb-1.1.0a0 → okb-1.1.2}/okb/__init__.py +0 -0
  25. {okb-1.1.0a0 → okb-1.1.2}/okb/data/init.sql +0 -0
  26. {okb-1.1.0a0 → okb-1.1.2}/okb/ingest.py +0 -0
  27. {okb-1.1.0a0 → okb-1.1.2}/okb/llm/__init__.py +0 -0
  28. {okb-1.1.0a0 → okb-1.1.2}/okb/llm/base.py +0 -0
  29. {okb-1.1.0a0 → okb-1.1.2}/okb/llm/cache.py +0 -0
  30. {okb-1.1.0a0 → okb-1.1.2}/okb/llm/filter.py +0 -0
  31. {okb-1.1.0a0 → okb-1.1.2}/okb/local_embedder.py +0 -0
  32. {okb-1.1.0a0 → okb-1.1.2}/okb/migrate.py +0 -0
  33. {okb-1.1.0a0 → okb-1.1.2}/okb/migrations/0001.initial-schema.sql +0 -0
  34. {okb-1.1.0a0 → okb-1.1.2}/okb/migrations/0002.sync-state.sql +0 -0
  35. {okb-1.1.0a0 → okb-1.1.2}/okb/migrations/0003.structured-fields.sql +0 -0
  36. {okb-1.1.0a0 → okb-1.1.2}/okb/migrations/0004.tokens.sql +0 -0
  37. {okb-1.1.0a0 → okb-1.1.2}/okb/migrations/0005.database-metadata.sql +0 -0
  38. {okb-1.1.0a0 → okb-1.1.2}/okb/migrations/0006.llm-cache.sql +0 -0
  39. {okb-1.1.0a0 → okb-1.1.2}/okb/modal_embedder.py +0 -0
  40. {okb-1.1.0a0 → okb-1.1.2}/okb/plugins/__init__.py +0 -0
  41. {okb-1.1.0a0 → okb-1.1.2}/okb/plugins/base.py +0 -0
  42. {okb-1.1.0a0 → okb-1.1.2}/okb/plugins/registry.py +0 -0
  43. {okb-1.1.0a0 → okb-1.1.2}/okb/plugins/sources/__init__.py +0 -0
  44. {okb-1.1.0a0 → okb-1.1.2}/okb/plugins/sources/dropbox_paper.py +0 -0
  45. {okb-1.1.0a0 → okb-1.1.2}/okb/plugins/sources/todoist.py +0 -0
  46. {okb-1.1.0a0 → okb-1.1.2}/okb/rescan.py +0 -0
  47. {okb-1.1.0a0 → okb-1.1.2}/okb/scripts/__init__.py +0 -0
  48. {okb-1.1.0a0 → okb-1.1.2}/okb/scripts/watch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: okb
3
- Version: 1.1.0a0
3
+ Version: 1.1.2
4
4
  Summary: Personal knowledge base with semantic search for LLMs
5
5
  Requires-Python: >=3.11
6
6
  Classifier: Programming Language :: Python :: 3
@@ -85,6 +85,8 @@ okb ingest ~/notes ~/docs
85
85
  | `okb db start` | Start pgvector database container |
86
86
  | `okb db stop` | Stop database container |
87
87
  | `okb db status` | Show database status |
88
+ | `okb db migrate [name]` | Apply pending migrations (optionally for specific db) |
89
+ | `okb db list` | List configured databases |
88
90
  | `okb db destroy` | Remove container and volume (destructive) |
89
91
  | `okb ingest <paths>` | Ingest documents into knowledge base |
90
92
  | `okb ingest <paths> --local` | Ingest using local GPU/CPU embedding (no Modal) |
@@ -93,10 +95,11 @@ okb ingest ~/notes ~/docs
93
95
  | `okb watch <paths>` | Watch directories for changes |
94
96
  | `okb config init` | Create default config file |
95
97
  | `okb config show` | Show current configuration |
98
+ | `okb config path` | Print config file path |
96
99
  | `okb modal deploy` | Deploy GPU embedder to Modal |
97
100
  | `okb token create` | Create API token for HTTP server |
98
101
  | `okb token list` | List tokens for a database |
99
- | `okb token revoke` | Revoke an API token |
102
+ | `okb token revoke [TOKEN] --id <n>` | Revoke token by full value or ID |
100
103
  | `okb sync list` | List available API sources (plugins) |
101
104
  | `okb sync list-projects <source>` | List projects from source (for config) |
102
105
  | `okb sync run <sources>` | Sync data from external APIs |
@@ -108,6 +111,18 @@ okb ingest ~/notes ~/docs
108
111
  | `okb llm status` | Show LLM config and connectivity |
109
112
  | `okb llm deploy` | Deploy Modal LLM for open model inference |
110
113
  | `okb llm clear-cache` | Clear LLM response cache |
114
+ | `okb enrich run` | Extract TODOs and entities from documents |
115
+ | `okb enrich run --dry-run` | Show what would be enriched |
116
+ | `okb enrich pending` | List entities awaiting review |
117
+ | `okb enrich approve <id>` | Approve a pending entity |
118
+ | `okb enrich reject <id>` | Reject a pending entity |
119
+ | `okb enrich analyze` | Analyze database and update description/topics |
120
+ | `okb enrich consolidate` | Run entity consolidation (duplicates, clusters) |
121
+ | `okb enrich merge-proposals` | List pending merge proposals |
122
+ | `okb enrich approve-merge <id>` | Approve an entity merge |
123
+ | `okb enrich reject-merge <id>` | Reject an entity merge |
124
+ | `okb enrich clusters` | List topic clusters |
125
+ | `okb enrich relationships` | List entity relationships |
111
126
 
112
127
 
113
128
  ## Configuration
@@ -142,7 +157,7 @@ chunking:
142
157
  Use `--db <name>` to target a specific database with any command.
143
158
 
144
159
  Environment variables override config file settings:
145
- - `KB_DATABASE_URL` - Database connection string
160
+ - `OKB_DATABASE_URL` - Database connection string
146
161
  - `OKB_DOCKER_PORT` - Docker port mapping
147
162
  - `OKB_CONTAINER_NAME` - Docker container name
148
163
 
@@ -163,7 +178,7 @@ Merge: scalars replace, lists extend, dicts deep-merge.
163
178
 
164
179
  ### LLM Integration (Optional)
165
180
 
166
- Enable LLM-based document classification and filtering:
181
+ Enable LLM-based document classification, filtering, and enrichment:
167
182
 
168
183
  ```yaml
169
184
  llm:
@@ -179,11 +194,25 @@ llm:
179
194
  | `claude` | `export ANTHROPIC_API_KEY=...` | ~$0.25/1M tokens |
180
195
  | `modal` | `okb llm deploy` | ~$0.02/min GPU |
181
196
 
182
- For Modal (no API key needed):
197
+ **Modal LLM Setup** (no API key needed, runs on Modal's GPUs):
198
+
183
199
  ```yaml
184
200
  llm:
185
201
  provider: modal
186
- model: meta-llama/Llama-3.2-3B-Instruct
202
+ model: microsoft/Phi-3-mini-4k-instruct # Recommended: no gating
203
+ ```
204
+
205
+ Non-gated models (work immediately):
206
+ - `microsoft/Phi-3-mini-4k-instruct` - Good quality, 4K context
207
+ - `Qwen/Qwen2-1.5B-Instruct` - Smaller/faster
208
+
209
+ Gated models (require HuggingFace approval + token):
210
+ - `meta-llama/Llama-3.2-3B-Instruct` - Requires accepting license at HuggingFace
211
+ - Setup: `modal secret create huggingface HF_TOKEN=hf_...`
212
+
213
+ Deploy after configuring:
214
+ ```bash
215
+ okb llm deploy
187
216
  ```
188
217
 
189
218
  **Pre-ingest filtering** - skip low-value content during sync:
@@ -197,6 +226,36 @@ plugins:
197
226
  action_on_skip: discard # or "archive"
198
227
  ```
199
228
 
229
+ ### Document Enrichment
230
+
231
+ Extract TODOs and entities (people, projects, technologies) from documents using LLM:
232
+
233
+ ```bash
234
+ okb enrich run # Enrich un-enriched documents
235
+ okb enrich run --dry-run # Preview what would be enriched
236
+ okb enrich run --source-type markdown # Only markdown files
237
+ okb enrich run --query "meeting" # Filter by semantic search
238
+ ```
239
+
240
+ Entities are created as pending suggestions for review:
241
+ ```bash
242
+ okb enrich pending # List pending entities
243
+ okb enrich approve <id> # Approve → creates entity document
244
+ okb enrich reject <id> # Reject → hidden from future suggestions
245
+ ```
246
+
247
+ Configure enrichment behavior:
248
+ ```yaml
249
+ enrichment:
250
+ enabled: true
251
+ extract_todos: true
252
+ extract_entities: true
253
+ auto_create_todos: true # TODOs created immediately
254
+ auto_create_entities: false # Entities go to pending review
255
+ min_confidence_todo: 0.7
256
+ min_confidence_entity: 0.8
257
+ ```
258
+
200
259
  CLI commands:
201
260
  ```bash
202
261
  okb llm status # Show config and connectivity
@@ -234,14 +293,20 @@ okb token create --db default -d "Claude Code"
234
293
  okb serve --http --host 0.0.0.0 --port 8080
235
294
  ```
236
295
 
237
- Then configure Claude Code to connect via SSE:
296
+ The server uses Streamable HTTP transport (RFC 9728 compliant):
297
+ - `POST /mcp` - Send JSON-RPC messages, receive SSE response
298
+ - `GET /mcp` - Establish SSE connection for server notifications
299
+ - `DELETE /mcp` - Terminate session
300
+ - `/sse` is an alias for `/mcp` for backward compatibility
301
+
302
+ Configure your MCP client to connect:
238
303
 
239
304
  ```json
240
305
  {
241
306
  "mcpServers": {
242
307
  "knowledge-base": {
243
308
  "type": "sse",
244
- "url": "http://localhost:8080/sse",
309
+ "url": "http://localhost:8080/mcp",
245
310
  "headers": {
246
311
  "Authorization": "Bearer okb_default_rw_a1b2c3d4e5f6g7h8"
247
312
  }
@@ -269,6 +334,20 @@ Then configure Claude Code to connect via SSE:
269
334
  | `add_todo` | Create a TODO item in the knowledge base |
270
335
  | `trigger_sync` | Sync API sources (Todoist, GitHub, Dropbox Paper) |
271
336
  | `trigger_rescan` | Check indexed files for changes and re-ingest |
337
+ | `list_sync_sources` | List available API sync sources with status |
338
+ | `enrich_document` | Run LLM enrichment to extract TODOs/entities |
339
+ | `list_pending_entities` | List entities awaiting review |
340
+ | `approve_entity` | Approve a pending entity |
341
+ | `reject_entity` | Reject a pending entity |
342
+ | `analyze_knowledge_base` | Analyze content and generate description/topics |
343
+ | `find_entity_duplicates` | Find potential duplicate entities |
344
+ | `merge_entities` | Merge duplicate entities |
345
+ | `list_pending_merges` | List pending merge proposals |
346
+ | `approve_merge` | Approve a merge proposal |
347
+ | `reject_merge` | Reject a merge proposal |
348
+ | `get_topic_clusters` | Get topic clusters from consolidation |
349
+ | `get_entity_relationships` | Get relationships between entities |
350
+ | `run_consolidation` | Run full entity consolidation pipeline |
272
351
 
273
352
  ## Contextual Chunking
274
353
 
@@ -291,6 +370,10 @@ project: student-app
291
370
  category: backend
292
371
  ---
293
372
 
373
+ # Your Document Title
374
+
375
+ Content here...
376
+ ```
294
377
 
295
378
  ## Plugin System
296
379
 
@@ -36,6 +36,8 @@ okb ingest ~/notes ~/docs
36
36
  | `okb db start` | Start pgvector database container |
37
37
  | `okb db stop` | Stop database container |
38
38
  | `okb db status` | Show database status |
39
+ | `okb db migrate [name]` | Apply pending migrations (optionally for specific db) |
40
+ | `okb db list` | List configured databases |
39
41
  | `okb db destroy` | Remove container and volume (destructive) |
40
42
  | `okb ingest <paths>` | Ingest documents into knowledge base |
41
43
  | `okb ingest <paths> --local` | Ingest using local GPU/CPU embedding (no Modal) |
@@ -44,10 +46,11 @@ okb ingest ~/notes ~/docs
44
46
  | `okb watch <paths>` | Watch directories for changes |
45
47
  | `okb config init` | Create default config file |
46
48
  | `okb config show` | Show current configuration |
49
+ | `okb config path` | Print config file path |
47
50
  | `okb modal deploy` | Deploy GPU embedder to Modal |
48
51
  | `okb token create` | Create API token for HTTP server |
49
52
  | `okb token list` | List tokens for a database |
50
- | `okb token revoke` | Revoke an API token |
53
+ | `okb token revoke [TOKEN] --id <n>` | Revoke token by full value or ID |
51
54
  | `okb sync list` | List available API sources (plugins) |
52
55
  | `okb sync list-projects <source>` | List projects from source (for config) |
53
56
  | `okb sync run <sources>` | Sync data from external APIs |
@@ -59,6 +62,18 @@ okb ingest ~/notes ~/docs
59
62
  | `okb llm status` | Show LLM config and connectivity |
60
63
  | `okb llm deploy` | Deploy Modal LLM for open model inference |
61
64
  | `okb llm clear-cache` | Clear LLM response cache |
65
+ | `okb enrich run` | Extract TODOs and entities from documents |
66
+ | `okb enrich run --dry-run` | Show what would be enriched |
67
+ | `okb enrich pending` | List entities awaiting review |
68
+ | `okb enrich approve <id>` | Approve a pending entity |
69
+ | `okb enrich reject <id>` | Reject a pending entity |
70
+ | `okb enrich analyze` | Analyze database and update description/topics |
71
+ | `okb enrich consolidate` | Run entity consolidation (duplicates, clusters) |
72
+ | `okb enrich merge-proposals` | List pending merge proposals |
73
+ | `okb enrich approve-merge <id>` | Approve an entity merge |
74
+ | `okb enrich reject-merge <id>` | Reject an entity merge |
75
+ | `okb enrich clusters` | List topic clusters |
76
+ | `okb enrich relationships` | List entity relationships |
62
77
 
63
78
 
64
79
  ## Configuration
@@ -93,7 +108,7 @@ chunking:
93
108
  Use `--db <name>` to target a specific database with any command.
94
109
 
95
110
  Environment variables override config file settings:
96
- - `KB_DATABASE_URL` - Database connection string
111
+ - `OKB_DATABASE_URL` - Database connection string
97
112
  - `OKB_DOCKER_PORT` - Docker port mapping
98
113
  - `OKB_CONTAINER_NAME` - Docker container name
99
114
 
@@ -114,7 +129,7 @@ Merge: scalars replace, lists extend, dicts deep-merge.
114
129
 
115
130
  ### LLM Integration (Optional)
116
131
 
117
- Enable LLM-based document classification and filtering:
132
+ Enable LLM-based document classification, filtering, and enrichment:
118
133
 
119
134
  ```yaml
120
135
  llm:
@@ -130,11 +145,25 @@ llm:
130
145
  | `claude` | `export ANTHROPIC_API_KEY=...` | ~$0.25/1M tokens |
131
146
  | `modal` | `okb llm deploy` | ~$0.02/min GPU |
132
147
 
133
- For Modal (no API key needed):
148
+ **Modal LLM Setup** (no API key needed, runs on Modal's GPUs):
149
+
134
150
  ```yaml
135
151
  llm:
136
152
  provider: modal
137
- model: meta-llama/Llama-3.2-3B-Instruct
153
+ model: microsoft/Phi-3-mini-4k-instruct # Recommended: no gating
154
+ ```
155
+
156
+ Non-gated models (work immediately):
157
+ - `microsoft/Phi-3-mini-4k-instruct` - Good quality, 4K context
158
+ - `Qwen/Qwen2-1.5B-Instruct` - Smaller/faster
159
+
160
+ Gated models (require HuggingFace approval + token):
161
+ - `meta-llama/Llama-3.2-3B-Instruct` - Requires accepting license at HuggingFace
162
+ - Setup: `modal secret create huggingface HF_TOKEN=hf_...`
163
+
164
+ Deploy after configuring:
165
+ ```bash
166
+ okb llm deploy
138
167
  ```
139
168
 
140
169
  **Pre-ingest filtering** - skip low-value content during sync:
@@ -148,6 +177,36 @@ plugins:
148
177
  action_on_skip: discard # or "archive"
149
178
  ```
150
179
 
180
+ ### Document Enrichment
181
+
182
+ Extract TODOs and entities (people, projects, technologies) from documents using LLM:
183
+
184
+ ```bash
185
+ okb enrich run # Enrich un-enriched documents
186
+ okb enrich run --dry-run # Preview what would be enriched
187
+ okb enrich run --source-type markdown # Only markdown files
188
+ okb enrich run --query "meeting" # Filter by semantic search
189
+ ```
190
+
191
+ Entities are created as pending suggestions for review:
192
+ ```bash
193
+ okb enrich pending # List pending entities
194
+ okb enrich approve <id> # Approve → creates entity document
195
+ okb enrich reject <id> # Reject → hidden from future suggestions
196
+ ```
197
+
198
+ Configure enrichment behavior:
199
+ ```yaml
200
+ enrichment:
201
+ enabled: true
202
+ extract_todos: true
203
+ extract_entities: true
204
+ auto_create_todos: true # TODOs created immediately
205
+ auto_create_entities: false # Entities go to pending review
206
+ min_confidence_todo: 0.7
207
+ min_confidence_entity: 0.8
208
+ ```
209
+
151
210
  CLI commands:
152
211
  ```bash
153
212
  okb llm status # Show config and connectivity
@@ -185,14 +244,20 @@ okb token create --db default -d "Claude Code"
185
244
  okb serve --http --host 0.0.0.0 --port 8080
186
245
  ```
187
246
 
188
- Then configure Claude Code to connect via SSE:
247
+ The server uses Streamable HTTP transport (RFC 9728 compliant):
248
+ - `POST /mcp` - Send JSON-RPC messages, receive SSE response
249
+ - `GET /mcp` - Establish SSE connection for server notifications
250
+ - `DELETE /mcp` - Terminate session
251
+ - `/sse` is an alias for `/mcp` for backward compatibility
252
+
253
+ Configure your MCP client to connect:
189
254
 
190
255
  ```json
191
256
  {
192
257
  "mcpServers": {
193
258
  "knowledge-base": {
194
259
  "type": "sse",
195
- "url": "http://localhost:8080/sse",
260
+ "url": "http://localhost:8080/mcp",
196
261
  "headers": {
197
262
  "Authorization": "Bearer okb_default_rw_a1b2c3d4e5f6g7h8"
198
263
  }
@@ -220,6 +285,20 @@ Then configure Claude Code to connect via SSE:
220
285
  | `add_todo` | Create a TODO item in the knowledge base |
221
286
  | `trigger_sync` | Sync API sources (Todoist, GitHub, Dropbox Paper) |
222
287
  | `trigger_rescan` | Check indexed files for changes and re-ingest |
288
+ | `list_sync_sources` | List available API sync sources with status |
289
+ | `enrich_document` | Run LLM enrichment to extract TODOs/entities |
290
+ | `list_pending_entities` | List entities awaiting review |
291
+ | `approve_entity` | Approve a pending entity |
292
+ | `reject_entity` | Reject a pending entity |
293
+ | `analyze_knowledge_base` | Analyze content and generate description/topics |
294
+ | `find_entity_duplicates` | Find potential duplicate entities |
295
+ | `merge_entities` | Merge duplicate entities |
296
+ | `list_pending_merges` | List pending merge proposals |
297
+ | `approve_merge` | Approve a merge proposal |
298
+ | `reject_merge` | Reject a merge proposal |
299
+ | `get_topic_clusters` | Get topic clusters from consolidation |
300
+ | `get_entity_relationships` | Get relationships between entities |
301
+ | `run_consolidation` | Run full entity consolidation pipeline |
223
302
 
224
303
  ## Contextual Chunking
225
304
 
@@ -242,6 +321,10 @@ project: student-app
242
321
  category: backend
243
322
  ---
244
323
 
324
+ # Your Document Title
325
+
326
+ Content here...
327
+ ```
245
328
 
246
329
  ## Plugin System
247
330