okb 1.1.0__tar.gz → 1.1.0a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {okb-1.1.0 → okb-1.1.0a0}/PKG-INFO +6 -83
- {okb-1.1.0 → okb-1.1.0a0}/README.md +5 -82
- {okb-1.1.0 → okb-1.1.0a0}/okb/cli.py +16 -1083
- {okb-1.1.0 → okb-1.1.0a0}/okb/config.py +4 -122
- {okb-1.1.0 → okb-1.1.0a0}/okb/http_server.py +2 -163
- {okb-1.1.0 → okb-1.1.0a0}/okb/llm/providers.py +6 -9
- {okb-1.1.0 → okb-1.1.0a0}/okb/mcp_server.py +12 -1036
- {okb-1.1.0 → okb-1.1.0a0}/okb/modal_llm.py +8 -26
- {okb-1.1.0 → okb-1.1.0a0}/okb/plugins/sources/github.py +5 -5
- {okb-1.1.0 → okb-1.1.0a0}/okb/tokens.py +3 -25
- {okb-1.1.0 → okb-1.1.0a0}/pyproject.toml +1 -1
- okb-1.1.0/okb/llm/analyze.py +0 -524
- okb-1.1.0/okb/llm/consolidate.py +0 -685
- okb-1.1.0/okb/llm/enrich.py +0 -723
- okb-1.1.0/okb/llm/extractors/__init__.py +0 -13
- okb-1.1.0/okb/llm/extractors/base.py +0 -44
- okb-1.1.0/okb/llm/extractors/cross_doc.py +0 -478
- okb-1.1.0/okb/llm/extractors/dedup.py +0 -499
- okb-1.1.0/okb/llm/extractors/entity.py +0 -369
- okb-1.1.0/okb/llm/extractors/todo.py +0 -149
- okb-1.1.0/okb/migrations/0008.enrichment.sql +0 -46
- okb-1.1.0/okb/migrations/0009.entity-consolidation.sql +0 -120
- okb-1.1.0/okb/migrations/0010.token-id.sql +0 -7
- {okb-1.1.0 → okb-1.1.0a0}/okb/__init__.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/data/init.sql +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/ingest.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/llm/__init__.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/llm/base.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/llm/cache.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/llm/filter.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/local_embedder.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/migrate.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/migrations/0001.initial-schema.sql +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/migrations/0002.sync-state.sql +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/migrations/0003.structured-fields.sql +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/migrations/0004.tokens.sql +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/migrations/0005.database-metadata.sql +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/migrations/0006.llm-cache.sql +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/modal_embedder.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/plugins/__init__.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/plugins/base.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/plugins/registry.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/plugins/sources/__init__.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/plugins/sources/dropbox_paper.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/plugins/sources/todoist.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/rescan.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/scripts/__init__.py +0 -0
- {okb-1.1.0 → okb-1.1.0a0}/okb/scripts/watch.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: okb
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.0a0
|
|
4
4
|
Summary: Personal knowledge base with semantic search for LLMs
|
|
5
5
|
Requires-Python: >=3.11
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -85,8 +85,6 @@ okb ingest ~/notes ~/docs
|
|
|
85
85
|
| `okb db start` | Start pgvector database container |
|
|
86
86
|
| `okb db stop` | Stop database container |
|
|
87
87
|
| `okb db status` | Show database status |
|
|
88
|
-
| `okb db migrate [name]` | Apply pending migrations (optionally for specific db) |
|
|
89
|
-
| `okb db list` | List configured databases |
|
|
90
88
|
| `okb db destroy` | Remove container and volume (destructive) |
|
|
91
89
|
| `okb ingest <paths>` | Ingest documents into knowledge base |
|
|
92
90
|
| `okb ingest <paths> --local` | Ingest using local GPU/CPU embedding (no Modal) |
|
|
@@ -95,11 +93,10 @@ okb ingest ~/notes ~/docs
|
|
|
95
93
|
| `okb watch <paths>` | Watch directories for changes |
|
|
96
94
|
| `okb config init` | Create default config file |
|
|
97
95
|
| `okb config show` | Show current configuration |
|
|
98
|
-
| `okb config path` | Print config file path |
|
|
99
96
|
| `okb modal deploy` | Deploy GPU embedder to Modal |
|
|
100
97
|
| `okb token create` | Create API token for HTTP server |
|
|
101
98
|
| `okb token list` | List tokens for a database |
|
|
102
|
-
| `okb token revoke
|
|
99
|
+
| `okb token revoke` | Revoke an API token |
|
|
103
100
|
| `okb sync list` | List available API sources (plugins) |
|
|
104
101
|
| `okb sync list-projects <source>` | List projects from source (for config) |
|
|
105
102
|
| `okb sync run <sources>` | Sync data from external APIs |
|
|
@@ -111,18 +108,6 @@ okb ingest ~/notes ~/docs
|
|
|
111
108
|
| `okb llm status` | Show LLM config and connectivity |
|
|
112
109
|
| `okb llm deploy` | Deploy Modal LLM for open model inference |
|
|
113
110
|
| `okb llm clear-cache` | Clear LLM response cache |
|
|
114
|
-
| `okb enrich run` | Extract TODOs and entities from documents |
|
|
115
|
-
| `okb enrich run --dry-run` | Show what would be enriched |
|
|
116
|
-
| `okb enrich pending` | List entities awaiting review |
|
|
117
|
-
| `okb enrich approve <id>` | Approve a pending entity |
|
|
118
|
-
| `okb enrich reject <id>` | Reject a pending entity |
|
|
119
|
-
| `okb enrich analyze` | Analyze database and update description/topics |
|
|
120
|
-
| `okb enrich consolidate` | Run entity consolidation (duplicates, clusters) |
|
|
121
|
-
| `okb enrich merge-proposals` | List pending merge proposals |
|
|
122
|
-
| `okb enrich approve-merge <id>` | Approve an entity merge |
|
|
123
|
-
| `okb enrich reject-merge <id>` | Reject an entity merge |
|
|
124
|
-
| `okb enrich clusters` | List topic clusters |
|
|
125
|
-
| `okb enrich relationships` | List entity relationships |
|
|
126
111
|
|
|
127
112
|
|
|
128
113
|
## Configuration
|
|
@@ -157,7 +142,7 @@ chunking:
|
|
|
157
142
|
Use `--db <name>` to target a specific database with any command.
|
|
158
143
|
|
|
159
144
|
Environment variables override config file settings:
|
|
160
|
-
- `
|
|
145
|
+
- `KB_DATABASE_URL` - Database connection string
|
|
161
146
|
- `OKB_DOCKER_PORT` - Docker port mapping
|
|
162
147
|
- `OKB_CONTAINER_NAME` - Docker container name
|
|
163
148
|
|
|
@@ -178,7 +163,7 @@ Merge: scalars replace, lists extend, dicts deep-merge.
|
|
|
178
163
|
|
|
179
164
|
### LLM Integration (Optional)
|
|
180
165
|
|
|
181
|
-
Enable LLM-based document classification
|
|
166
|
+
Enable LLM-based document classification and filtering:
|
|
182
167
|
|
|
183
168
|
```yaml
|
|
184
169
|
llm:
|
|
@@ -194,25 +179,11 @@ llm:
|
|
|
194
179
|
| `claude` | `export ANTHROPIC_API_KEY=...` | ~$0.25/1M tokens |
|
|
195
180
|
| `modal` | `okb llm deploy` | ~$0.02/min GPU |
|
|
196
181
|
|
|
197
|
-
|
|
198
|
-
|
|
182
|
+
For Modal (no API key needed):
|
|
199
183
|
```yaml
|
|
200
184
|
llm:
|
|
201
185
|
provider: modal
|
|
202
|
-
model:
|
|
203
|
-
```
|
|
204
|
-
|
|
205
|
-
Non-gated models (work immediately):
|
|
206
|
-
- `microsoft/Phi-3-mini-4k-instruct` - Good quality, 4K context
|
|
207
|
-
- `Qwen/Qwen2-1.5B-Instruct` - Smaller/faster
|
|
208
|
-
|
|
209
|
-
Gated models (require HuggingFace approval + token):
|
|
210
|
-
- `meta-llama/Llama-3.2-3B-Instruct` - Requires accepting license at HuggingFace
|
|
211
|
-
- Setup: `modal secret create huggingface HF_TOKEN=hf_...`
|
|
212
|
-
|
|
213
|
-
Deploy after configuring:
|
|
214
|
-
```bash
|
|
215
|
-
okb llm deploy
|
|
186
|
+
model: meta-llama/Llama-3.2-3B-Instruct
|
|
216
187
|
```
|
|
217
188
|
|
|
218
189
|
**Pre-ingest filtering** - skip low-value content during sync:
|
|
@@ -226,36 +197,6 @@ plugins:
|
|
|
226
197
|
action_on_skip: discard # or "archive"
|
|
227
198
|
```
|
|
228
199
|
|
|
229
|
-
### Document Enrichment
|
|
230
|
-
|
|
231
|
-
Extract TODOs and entities (people, projects, technologies) from documents using LLM:
|
|
232
|
-
|
|
233
|
-
```bash
|
|
234
|
-
okb enrich run # Enrich un-enriched documents
|
|
235
|
-
okb enrich run --dry-run # Preview what would be enriched
|
|
236
|
-
okb enrich run --source-type markdown # Only markdown files
|
|
237
|
-
okb enrich run --query "meeting" # Filter by semantic search
|
|
238
|
-
```
|
|
239
|
-
|
|
240
|
-
Entities are created as pending suggestions for review:
|
|
241
|
-
```bash
|
|
242
|
-
okb enrich pending # List pending entities
|
|
243
|
-
okb enrich approve <id> # Approve → creates entity document
|
|
244
|
-
okb enrich reject <id> # Reject → hidden from future suggestions
|
|
245
|
-
```
|
|
246
|
-
|
|
247
|
-
Configure enrichment behavior:
|
|
248
|
-
```yaml
|
|
249
|
-
enrichment:
|
|
250
|
-
enabled: true
|
|
251
|
-
extract_todos: true
|
|
252
|
-
extract_entities: true
|
|
253
|
-
auto_create_todos: true # TODOs created immediately
|
|
254
|
-
auto_create_entities: false # Entities go to pending review
|
|
255
|
-
min_confidence_todo: 0.7
|
|
256
|
-
min_confidence_entity: 0.8
|
|
257
|
-
```
|
|
258
|
-
|
|
259
200
|
CLI commands:
|
|
260
201
|
```bash
|
|
261
202
|
okb llm status # Show config and connectivity
|
|
@@ -328,20 +269,6 @@ Then configure Claude Code to connect via SSE:
|
|
|
328
269
|
| `add_todo` | Create a TODO item in the knowledge base |
|
|
329
270
|
| `trigger_sync` | Sync API sources (Todoist, GitHub, Dropbox Paper) |
|
|
330
271
|
| `trigger_rescan` | Check indexed files for changes and re-ingest |
|
|
331
|
-
| `list_sync_sources` | List available API sync sources with status |
|
|
332
|
-
| `enrich_document` | Run LLM enrichment to extract TODOs/entities |
|
|
333
|
-
| `list_pending_entities` | List entities awaiting review |
|
|
334
|
-
| `approve_entity` | Approve a pending entity |
|
|
335
|
-
| `reject_entity` | Reject a pending entity |
|
|
336
|
-
| `analyze_knowledge_base` | Analyze content and generate description/topics |
|
|
337
|
-
| `find_entity_duplicates` | Find potential duplicate entities |
|
|
338
|
-
| `merge_entities` | Merge duplicate entities |
|
|
339
|
-
| `list_pending_merges` | List pending merge proposals |
|
|
340
|
-
| `approve_merge` | Approve a merge proposal |
|
|
341
|
-
| `reject_merge` | Reject a merge proposal |
|
|
342
|
-
| `get_topic_clusters` | Get topic clusters from consolidation |
|
|
343
|
-
| `get_entity_relationships` | Get relationships between entities |
|
|
344
|
-
| `run_consolidation` | Run full entity consolidation pipeline |
|
|
345
272
|
|
|
346
273
|
## Contextual Chunking
|
|
347
274
|
|
|
@@ -364,10 +291,6 @@ project: student-app
|
|
|
364
291
|
category: backend
|
|
365
292
|
---
|
|
366
293
|
|
|
367
|
-
# Your Document Title
|
|
368
|
-
|
|
369
|
-
Content here...
|
|
370
|
-
```
|
|
371
294
|
|
|
372
295
|
## Plugin System
|
|
373
296
|
|
|
@@ -36,8 +36,6 @@ okb ingest ~/notes ~/docs
|
|
|
36
36
|
| `okb db start` | Start pgvector database container |
|
|
37
37
|
| `okb db stop` | Stop database container |
|
|
38
38
|
| `okb db status` | Show database status |
|
|
39
|
-
| `okb db migrate [name]` | Apply pending migrations (optionally for specific db) |
|
|
40
|
-
| `okb db list` | List configured databases |
|
|
41
39
|
| `okb db destroy` | Remove container and volume (destructive) |
|
|
42
40
|
| `okb ingest <paths>` | Ingest documents into knowledge base |
|
|
43
41
|
| `okb ingest <paths> --local` | Ingest using local GPU/CPU embedding (no Modal) |
|
|
@@ -46,11 +44,10 @@ okb ingest ~/notes ~/docs
|
|
|
46
44
|
| `okb watch <paths>` | Watch directories for changes |
|
|
47
45
|
| `okb config init` | Create default config file |
|
|
48
46
|
| `okb config show` | Show current configuration |
|
|
49
|
-
| `okb config path` | Print config file path |
|
|
50
47
|
| `okb modal deploy` | Deploy GPU embedder to Modal |
|
|
51
48
|
| `okb token create` | Create API token for HTTP server |
|
|
52
49
|
| `okb token list` | List tokens for a database |
|
|
53
|
-
| `okb token revoke
|
|
50
|
+
| `okb token revoke` | Revoke an API token |
|
|
54
51
|
| `okb sync list` | List available API sources (plugins) |
|
|
55
52
|
| `okb sync list-projects <source>` | List projects from source (for config) |
|
|
56
53
|
| `okb sync run <sources>` | Sync data from external APIs |
|
|
@@ -62,18 +59,6 @@ okb ingest ~/notes ~/docs
|
|
|
62
59
|
| `okb llm status` | Show LLM config and connectivity |
|
|
63
60
|
| `okb llm deploy` | Deploy Modal LLM for open model inference |
|
|
64
61
|
| `okb llm clear-cache` | Clear LLM response cache |
|
|
65
|
-
| `okb enrich run` | Extract TODOs and entities from documents |
|
|
66
|
-
| `okb enrich run --dry-run` | Show what would be enriched |
|
|
67
|
-
| `okb enrich pending` | List entities awaiting review |
|
|
68
|
-
| `okb enrich approve <id>` | Approve a pending entity |
|
|
69
|
-
| `okb enrich reject <id>` | Reject a pending entity |
|
|
70
|
-
| `okb enrich analyze` | Analyze database and update description/topics |
|
|
71
|
-
| `okb enrich consolidate` | Run entity consolidation (duplicates, clusters) |
|
|
72
|
-
| `okb enrich merge-proposals` | List pending merge proposals |
|
|
73
|
-
| `okb enrich approve-merge <id>` | Approve an entity merge |
|
|
74
|
-
| `okb enrich reject-merge <id>` | Reject an entity merge |
|
|
75
|
-
| `okb enrich clusters` | List topic clusters |
|
|
76
|
-
| `okb enrich relationships` | List entity relationships |
|
|
77
62
|
|
|
78
63
|
|
|
79
64
|
## Configuration
|
|
@@ -108,7 +93,7 @@ chunking:
|
|
|
108
93
|
Use `--db <name>` to target a specific database with any command.
|
|
109
94
|
|
|
110
95
|
Environment variables override config file settings:
|
|
111
|
-
- `
|
|
96
|
+
- `KB_DATABASE_URL` - Database connection string
|
|
112
97
|
- `OKB_DOCKER_PORT` - Docker port mapping
|
|
113
98
|
- `OKB_CONTAINER_NAME` - Docker container name
|
|
114
99
|
|
|
@@ -129,7 +114,7 @@ Merge: scalars replace, lists extend, dicts deep-merge.
|
|
|
129
114
|
|
|
130
115
|
### LLM Integration (Optional)
|
|
131
116
|
|
|
132
|
-
Enable LLM-based document classification
|
|
117
|
+
Enable LLM-based document classification and filtering:
|
|
133
118
|
|
|
134
119
|
```yaml
|
|
135
120
|
llm:
|
|
@@ -145,25 +130,11 @@ llm:
|
|
|
145
130
|
| `claude` | `export ANTHROPIC_API_KEY=...` | ~$0.25/1M tokens |
|
|
146
131
|
| `modal` | `okb llm deploy` | ~$0.02/min GPU |
|
|
147
132
|
|
|
148
|
-
|
|
149
|
-
|
|
133
|
+
For Modal (no API key needed):
|
|
150
134
|
```yaml
|
|
151
135
|
llm:
|
|
152
136
|
provider: modal
|
|
153
|
-
model:
|
|
154
|
-
```
|
|
155
|
-
|
|
156
|
-
Non-gated models (work immediately):
|
|
157
|
-
- `microsoft/Phi-3-mini-4k-instruct` - Good quality, 4K context
|
|
158
|
-
- `Qwen/Qwen2-1.5B-Instruct` - Smaller/faster
|
|
159
|
-
|
|
160
|
-
Gated models (require HuggingFace approval + token):
|
|
161
|
-
- `meta-llama/Llama-3.2-3B-Instruct` - Requires accepting license at HuggingFace
|
|
162
|
-
- Setup: `modal secret create huggingface HF_TOKEN=hf_...`
|
|
163
|
-
|
|
164
|
-
Deploy after configuring:
|
|
165
|
-
```bash
|
|
166
|
-
okb llm deploy
|
|
137
|
+
model: meta-llama/Llama-3.2-3B-Instruct
|
|
167
138
|
```
|
|
168
139
|
|
|
169
140
|
**Pre-ingest filtering** - skip low-value content during sync:
|
|
@@ -177,36 +148,6 @@ plugins:
|
|
|
177
148
|
action_on_skip: discard # or "archive"
|
|
178
149
|
```
|
|
179
150
|
|
|
180
|
-
### Document Enrichment
|
|
181
|
-
|
|
182
|
-
Extract TODOs and entities (people, projects, technologies) from documents using LLM:
|
|
183
|
-
|
|
184
|
-
```bash
|
|
185
|
-
okb enrich run # Enrich un-enriched documents
|
|
186
|
-
okb enrich run --dry-run # Preview what would be enriched
|
|
187
|
-
okb enrich run --source-type markdown # Only markdown files
|
|
188
|
-
okb enrich run --query "meeting" # Filter by semantic search
|
|
189
|
-
```
|
|
190
|
-
|
|
191
|
-
Entities are created as pending suggestions for review:
|
|
192
|
-
```bash
|
|
193
|
-
okb enrich pending # List pending entities
|
|
194
|
-
okb enrich approve <id> # Approve → creates entity document
|
|
195
|
-
okb enrich reject <id> # Reject → hidden from future suggestions
|
|
196
|
-
```
|
|
197
|
-
|
|
198
|
-
Configure enrichment behavior:
|
|
199
|
-
```yaml
|
|
200
|
-
enrichment:
|
|
201
|
-
enabled: true
|
|
202
|
-
extract_todos: true
|
|
203
|
-
extract_entities: true
|
|
204
|
-
auto_create_todos: true # TODOs created immediately
|
|
205
|
-
auto_create_entities: false # Entities go to pending review
|
|
206
|
-
min_confidence_todo: 0.7
|
|
207
|
-
min_confidence_entity: 0.8
|
|
208
|
-
```
|
|
209
|
-
|
|
210
151
|
CLI commands:
|
|
211
152
|
```bash
|
|
212
153
|
okb llm status # Show config and connectivity
|
|
@@ -279,20 +220,6 @@ Then configure Claude Code to connect via SSE:
|
|
|
279
220
|
| `add_todo` | Create a TODO item in the knowledge base |
|
|
280
221
|
| `trigger_sync` | Sync API sources (Todoist, GitHub, Dropbox Paper) |
|
|
281
222
|
| `trigger_rescan` | Check indexed files for changes and re-ingest |
|
|
282
|
-
| `list_sync_sources` | List available API sync sources with status |
|
|
283
|
-
| `enrich_document` | Run LLM enrichment to extract TODOs/entities |
|
|
284
|
-
| `list_pending_entities` | List entities awaiting review |
|
|
285
|
-
| `approve_entity` | Approve a pending entity |
|
|
286
|
-
| `reject_entity` | Reject a pending entity |
|
|
287
|
-
| `analyze_knowledge_base` | Analyze content and generate description/topics |
|
|
288
|
-
| `find_entity_duplicates` | Find potential duplicate entities |
|
|
289
|
-
| `merge_entities` | Merge duplicate entities |
|
|
290
|
-
| `list_pending_merges` | List pending merge proposals |
|
|
291
|
-
| `approve_merge` | Approve a merge proposal |
|
|
292
|
-
| `reject_merge` | Reject a merge proposal |
|
|
293
|
-
| `get_topic_clusters` | Get topic clusters from consolidation |
|
|
294
|
-
| `get_entity_relationships` | Get relationships between entities |
|
|
295
|
-
| `run_consolidation` | Run full entity consolidation pipeline |
|
|
296
223
|
|
|
297
224
|
## Contextual Chunking
|
|
298
225
|
|
|
@@ -315,10 +242,6 @@ project: student-app
|
|
|
315
242
|
category: backend
|
|
316
243
|
---
|
|
317
244
|
|
|
318
|
-
# Your Document Title
|
|
319
|
-
|
|
320
|
-
Content here...
|
|
321
|
-
```
|
|
322
245
|
|
|
323
246
|
## Plugin System
|
|
324
247
|
|