okb 1.1.0a0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ -- LLM enrichment for document annotation (TODOs and entities)
2
+ -- depends: 0006.llm-cache
3
+
4
+ -- Track enrichment state on documents
5
+ ALTER TABLE documents ADD COLUMN IF NOT EXISTS enriched_at TIMESTAMPTZ;
6
+ ALTER TABLE documents ADD COLUMN IF NOT EXISTS enrichment_version INTEGER;
7
+
8
+ -- Index for "needs enrichment" queries
9
+ CREATE INDEX IF NOT EXISTS idx_documents_needs_enrichment
10
+ ON documents(enriched_at) WHERE enriched_at IS NULL;
11
+
12
+ -- Pending entity suggestions (before approval)
13
+ CREATE TABLE IF NOT EXISTS pending_entities (
14
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
15
+ source_document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
16
+ entity_name TEXT NOT NULL,
17
+ entity_type TEXT NOT NULL, -- person, project, technology, concept, organization
18
+ aliases JSONB DEFAULT '[]',
19
+ description TEXT,
20
+ mentions JSONB DEFAULT '[]', -- Context snippets from source document
21
+ confidence REAL,
22
+ status TEXT DEFAULT 'pending', -- pending, approved, rejected
23
+ created_at TIMESTAMPTZ DEFAULT NOW(),
24
+ reviewed_at TIMESTAMPTZ
25
+ );
26
+
27
+ CREATE INDEX IF NOT EXISTS idx_pending_entities_status ON pending_entities(status);
28
+ CREATE INDEX IF NOT EXISTS idx_pending_entities_source ON pending_entities(source_document_id);
29
+ CREATE INDEX IF NOT EXISTS idx_pending_entities_type ON pending_entities(entity_type);
30
+
31
+ -- Entity references (links entity documents to source documents)
32
+ -- When an entity is approved, it becomes a document with source_path like okb://entity/person/john-smith
33
+ -- This table tracks which documents mention each entity
34
+ CREATE TABLE IF NOT EXISTS entity_refs (
35
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
36
+ entity_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
37
+ document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
38
+ mention_text TEXT NOT NULL,
39
+ context TEXT, -- Surrounding text for context
40
+ confidence REAL,
41
+ created_at TIMESTAMPTZ DEFAULT NOW(),
42
+ UNIQUE(entity_id, document_id, mention_text)
43
+ );
44
+
45
+ CREATE INDEX IF NOT EXISTS idx_entity_refs_entity ON entity_refs(entity_id);
46
+ CREATE INDEX IF NOT EXISTS idx_entity_refs_document ON entity_refs(document_id);
@@ -0,0 +1,120 @@
1
+ -- Entity consolidation: deduplication, cross-doc detection, clustering, relationships
2
+ -- depends: 0008.enrichment
3
+
4
+ -- Canonical mappings: alias text -> entity document
5
+ -- Used for deduplication and alias resolution
6
+ CREATE TABLE IF NOT EXISTS entity_aliases (
7
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
8
+ alias_text TEXT NOT NULL,
9
+ entity_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
10
+ confidence REAL, -- How confident we are this alias belongs to entity
11
+ source TEXT DEFAULT 'manual', -- 'manual', 'merge', 'extraction'
12
+ created_at TIMESTAMPTZ DEFAULT NOW(),
13
+ UNIQUE(alias_text, entity_id)
14
+ );
15
+
16
+ CREATE INDEX IF NOT EXISTS idx_entity_aliases_text ON entity_aliases(LOWER(alias_text));
17
+ CREATE INDEX IF NOT EXISTS idx_entity_aliases_entity ON entity_aliases(entity_id);
18
+
19
+ -- Proposed entity merges awaiting user confirmation
20
+ CREATE TABLE IF NOT EXISTS pending_entity_merges (
21
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
22
+ canonical_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
23
+ duplicate_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
24
+ confidence REAL NOT NULL, -- How confident we are these are the same
25
+ reason TEXT, -- Why we think they're the same ("embedding_similarity", "alias_match", "llm")
26
+ detected_at TIMESTAMPTZ DEFAULT NOW(),
27
+ status TEXT DEFAULT 'pending', -- 'pending', 'approved', 'rejected'
28
+ reviewed_at TIMESTAMPTZ,
29
+ UNIQUE(canonical_id, duplicate_id)
30
+ );
31
+
32
+ CREATE INDEX IF NOT EXISTS idx_pending_merges_status ON pending_entity_merges(status);
33
+ CREATE INDEX IF NOT EXISTS idx_pending_merges_confidence ON pending_entity_merges(confidence DESC);
34
+
35
+ -- Entity-to-entity relationships
36
+ CREATE TABLE IF NOT EXISTS entity_relationships (
37
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
38
+ source_entity_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
39
+ target_entity_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
40
+ relationship_type TEXT NOT NULL, -- 'works_for', 'uses', 'belongs_to', 'related_to'
41
+ confidence REAL,
42
+ source TEXT DEFAULT 'extraction', -- 'extraction', 'manual'
43
+ context TEXT, -- Supporting context for the relationship
44
+ created_at TIMESTAMPTZ DEFAULT NOW(),
45
+ UNIQUE(source_entity_id, target_entity_id, relationship_type)
46
+ );
47
+
48
+ CREATE INDEX IF NOT EXISTS idx_entity_rel_source ON entity_relationships(source_entity_id);
49
+ CREATE INDEX IF NOT EXISTS idx_entity_rel_target ON entity_relationships(target_entity_id);
50
+ CREATE INDEX IF NOT EXISTS idx_entity_rel_type ON entity_relationships(relationship_type);
51
+
52
+ -- Topic clusters group related entities and documents
53
+ CREATE TABLE IF NOT EXISTS topic_clusters (
54
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
55
+ name TEXT NOT NULL,
56
+ description TEXT,
57
+ centroid vector(768), -- Cluster centroid embedding
58
+ member_count INTEGER DEFAULT 0,
59
+ created_at TIMESTAMPTZ DEFAULT NOW(),
60
+ updated_at TIMESTAMPTZ DEFAULT NOW()
61
+ );
62
+
63
+ CREATE INDEX IF NOT EXISTS idx_topic_clusters_centroid ON topic_clusters
64
+ USING hnsw (centroid vector_cosine_ops);
65
+
66
+ -- Cluster membership: entities and documents can belong to clusters
67
+ CREATE TABLE IF NOT EXISTS topic_cluster_members (
68
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
69
+ cluster_id UUID NOT NULL REFERENCES topic_clusters(id) ON DELETE CASCADE,
70
+ document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
71
+ distance REAL, -- Distance from cluster centroid
72
+ is_entity BOOLEAN DEFAULT FALSE, -- True if document is an entity
73
+ added_at TIMESTAMPTZ DEFAULT NOW(),
74
+ UNIQUE(cluster_id, document_id)
75
+ );
76
+
77
+ CREATE INDEX IF NOT EXISTS idx_cluster_members_cluster ON topic_cluster_members(cluster_id);
78
+ CREATE INDEX IF NOT EXISTS idx_cluster_members_document ON topic_cluster_members(document_id);
79
+
80
+ -- Proposed cluster merges awaiting confirmation
81
+ CREATE TABLE IF NOT EXISTS pending_cluster_merges (
82
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
83
+ primary_cluster_id UUID NOT NULL REFERENCES topic_clusters(id) ON DELETE CASCADE,
84
+ secondary_cluster_id UUID NOT NULL REFERENCES topic_clusters(id) ON DELETE CASCADE,
85
+ similarity REAL NOT NULL, -- How similar the clusters are
86
+ status TEXT DEFAULT 'pending', -- 'pending', 'approved', 'rejected'
87
+ detected_at TIMESTAMPTZ DEFAULT NOW(),
88
+ reviewed_at TIMESTAMPTZ,
89
+ UNIQUE(primary_cluster_id, secondary_cluster_id)
90
+ );
91
+
92
+ CREATE INDEX IF NOT EXISTS idx_pending_cluster_merges_status ON pending_cluster_merges(status);
93
+
94
+ -- Cross-document entity candidates: detected mentions not yet extracted as entities
95
+ CREATE TABLE IF NOT EXISTS cross_doc_entity_candidates (
96
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
97
+ text TEXT NOT NULL, -- The mention text (normalized)
98
+ document_ids UUID[] NOT NULL, -- Array of document IDs containing this mention
99
+ document_count INTEGER NOT NULL, -- Number of documents (for quick filtering)
100
+ sample_contexts JSONB DEFAULT '[]', -- Sample text contexts where it appears
101
+ suggested_type TEXT, -- Suggested entity type
102
+ confidence REAL,
103
+ status TEXT DEFAULT 'pending', -- 'pending', 'approved', 'rejected', 'exists'
104
+ created_at TIMESTAMPTZ DEFAULT NOW(),
105
+ reviewed_at TIMESTAMPTZ,
106
+ UNIQUE(text)
107
+ );
108
+
109
+ CREATE INDEX IF NOT EXISTS idx_cross_doc_status ON cross_doc_entity_candidates(status);
110
+ CREATE INDEX IF NOT EXISTS idx_cross_doc_count ON cross_doc_entity_candidates(document_count DESC);
111
+
112
+ -- Track consolidation runs
113
+ CREATE TABLE IF NOT EXISTS consolidation_runs (
114
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
115
+ run_type TEXT NOT NULL, -- 'dedup', 'cross_doc', 'cluster', 'relationship', 'full'
116
+ started_at TIMESTAMPTZ DEFAULT NOW(),
117
+ completed_at TIMESTAMPTZ,
118
+ stats JSONB DEFAULT '{}', -- Run statistics
119
+ error TEXT -- Error message if failed
120
+ );
@@ -0,0 +1,7 @@
1
+ -- Add ID column to tokens for easier revocation
2
+ -- depends: 0009.entity-consolidation
3
+
4
+ ALTER TABLE tokens ADD COLUMN IF NOT EXISTS id SERIAL;
5
+
6
+ -- Create index for ID lookups
7
+ CREATE INDEX IF NOT EXISTS tokens_id_idx ON tokens(id);
okb/modal_llm.py CHANGED
@@ -2,20 +2,34 @@
2
2
  Modal-based GPU LLM service for document classification.
3
3
 
4
4
  Provides on-demand GPU access for LLM inference using open models.
5
- Uses Llama 3.2 3B by default - fast and efficient for classification tasks.
6
5
 
7
6
  Usage:
7
+ # Deploy with default model (Phi-3)
8
8
  modal deploy modal_llm.py
9
9
 
10
+ # Deploy with specific model
11
+ OKB_LLM_MODEL=meta-llama/Llama-3.2-3B-Instruct modal deploy modal_llm.py
12
+
10
13
  Then call from Python:
11
14
  llm = modal.Cls.from_name("knowledge-llm", "LLM")()
12
15
  response = llm.complete.remote("Classify this document", system="You are a classifier")
13
16
  """
14
17
 
18
+ import os
19
+
15
20
  import modal
16
21
 
17
22
  app = modal.App("knowledge-llm")
18
23
 
24
+ # Model is set via environment variable at deploy time
25
+ # Default to Phi-3 which doesn't require HuggingFace approval
26
+ DEFAULT_MODEL = "microsoft/Phi-3-mini-4k-instruct"
27
+ MODEL_ID = os.environ.get("OKB_LLM_MODEL", DEFAULT_MODEL)
28
+
29
+ # GPU type - L4 recommended for speed/cost balance
30
+ DEFAULT_GPU = "L4"
31
+ GPU_TYPE = os.environ.get("OKB_MODAL_GPU", DEFAULT_GPU)
32
+
19
33
  # Container image with transformers and torch
20
34
  llm_image = (
21
35
  modal.Image.debian_slim(python_version="3.11")
@@ -24,17 +38,19 @@ llm_image = (
24
38
  "torch>=2.0.0",
25
39
  "accelerate>=0.27.0",
26
40
  "bitsandbytes>=0.42.0", # For quantization
41
+ "hf_transfer", # Fast downloads
27
42
  )
28
- .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
43
+ .env({
44
+ "HF_HUB_ENABLE_HF_TRANSFER": "1",
45
+ "OKB_LLM_MODEL": MODEL_ID,
46
+ "OKB_MODAL_GPU": GPU_TYPE,
47
+ })
29
48
  )
30
49
 
31
- # Default model - Llama 3.2 3B is fast and good for classification
32
- DEFAULT_MODEL = "meta-llama/Llama-3.2-3B-Instruct"
33
-
34
50
 
35
51
  @app.cls(
36
52
  image=llm_image,
37
- gpu="T4", # T4 is sufficient for 3B model with quantization
53
+ gpu=GPU_TYPE,
38
54
  timeout=300,
39
55
  scaledown_window=300, # Keep warm for 5 min
40
56
  retries=1,
@@ -42,14 +58,16 @@ DEFAULT_MODEL = "meta-llama/Llama-3.2-3B-Instruct"
42
58
  class LLM:
43
59
  """GPU-accelerated LLM for document classification."""
44
60
 
45
- model_id: str = DEFAULT_MODEL
46
-
47
61
  @modal.enter()
48
62
  def load_model(self):
49
63
  """Load model once when container starts."""
64
+ import os
65
+
50
66
  import torch
51
67
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
52
68
 
69
+ # Read model from environment (set at deploy time)
70
+ self.model_id = os.environ.get("OKB_LLM_MODEL", "microsoft/Phi-3-mini-4k-instruct")
53
71
  print(f"Loading model: {self.model_id}")
54
72
 
55
73
  # Use 4-bit quantization for memory efficiency
@@ -95,11 +95,11 @@ class GitHubSource:
95
95
  token: ${GITHUB_TOKEN}
96
96
 
97
97
  Usage:
98
- lkb sync run github --repo owner/repo # README + docs/ (default)
99
- lkb sync run github --repo owner/repo --source # All source files
100
- lkb sync run github --repo owner/repo --issues # Include issues
101
- lkb sync run github --repo owner/repo --prs # Include PRs
102
- lkb sync run github --repo owner/repo --wiki # Include wiki
98
+ okb sync run github --repo owner/repo # README + docs/ (default)
99
+ okb sync run github --repo owner/repo --source # All source files
100
+ okb sync run github --repo owner/repo --issues # Include issues
101
+ okb sync run github --repo owner/repo --prs # Include PRs
102
+ okb sync run github --repo owner/repo --wiki # Include wiki
103
103
  """
104
104
 
105
105
  name = "github"
okb/tokens.py CHANGED
@@ -22,6 +22,7 @@ from psycopg.rows import dict_row
22
22
  class TokenInfo:
23
23
  """Information about a token."""
24
24
 
25
+ id: int
25
26
  token_hash: str
26
27
  database: str
27
28
  permissions: str # 'ro' or 'rw'
@@ -143,7 +144,7 @@ def list_tokens(db_url: str) -> list[TokenInfo]:
143
144
  with psycopg.connect(db_url, row_factory=dict_row) as conn:
144
145
  results = conn.execute(
145
146
  """
146
- SELECT token_hash, permissions, description, created_at, last_used_at
147
+ SELECT id, token_hash, permissions, description, created_at, last_used_at
147
148
  FROM tokens
148
149
  ORDER BY created_at DESC
149
150
  """
@@ -151,6 +152,7 @@ def list_tokens(db_url: str) -> list[TokenInfo]:
151
152
 
152
153
  return [
153
154
  TokenInfo(
155
+ id=r["id"],
154
156
  token_hash=r["token_hash"],
155
157
  database=db_name,
156
158
  permissions=r["permissions"],
@@ -171,7 +173,7 @@ def delete_token(db_url: str, token_or_prefix: str) -> bool:
171
173
 
172
174
  Args:
173
175
  db_url: Database connection URL
174
- token_or_prefix: Full token or token prefix (e.g., 'lkb_personal_ro')
176
+ token_or_prefix: Full token or token prefix (e.g., 'okb_personal_ro')
175
177
 
176
178
  Returns:
177
179
  True if token was deleted, False if not found
@@ -199,6 +201,25 @@ def delete_token(db_url: str, token_or_prefix: str) -> bool:
199
201
  return False
200
202
 
201
203
 
204
+ def delete_token_by_id(db_url: str, token_id: int) -> bool:
205
+ """Delete a token by its ID.
206
+
207
+ Args:
208
+ db_url: Database connection URL
209
+ token_id: Token ID from the tokens table
210
+
211
+ Returns:
212
+ True if token was deleted, False if not found
213
+ """
214
+ with psycopg.connect(db_url) as conn:
215
+ result = conn.execute(
216
+ "DELETE FROM tokens WHERE id = %s RETURNING id",
217
+ (token_id,),
218
+ ).fetchone()
219
+ conn.commit()
220
+ return result is not None
221
+
222
+
202
223
  def verify_token(token: str, get_db_url_fn) -> TokenInfo | None:
203
224
  """Verify a token and return its info if valid.
204
225
 
@@ -225,7 +246,7 @@ def verify_token(token: str, get_db_url_fn) -> TokenInfo | None:
225
246
  with psycopg.connect(db_url, row_factory=dict_row) as conn:
226
247
  result = conn.execute(
227
248
  """
228
- SELECT token_hash, permissions, description, created_at, last_used_at
249
+ SELECT id, token_hash, permissions, description, created_at, last_used_at
229
250
  FROM tokens
230
251
  WHERE token_hash = %s
231
252
  """,
@@ -243,6 +264,7 @@ def verify_token(token: str, get_db_url_fn) -> TokenInfo | None:
243
264
  conn.commit()
244
265
 
245
266
  return TokenInfo(
267
+ id=result["id"],
246
268
  token_hash=result["token_hash"],
247
269
  database=database,
248
270
  permissions=result["permissions"],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: okb
3
- Version: 1.1.0a0
3
+ Version: 1.1.1
4
4
  Summary: Personal knowledge base with semantic search for LLMs
5
5
  Requires-Python: >=3.11
6
6
  Classifier: Programming Language :: Python :: 3
@@ -85,6 +85,8 @@ okb ingest ~/notes ~/docs
85
85
  | `okb db start` | Start pgvector database container |
86
86
  | `okb db stop` | Stop database container |
87
87
  | `okb db status` | Show database status |
88
+ | `okb db migrate [name]` | Apply pending migrations (optionally for specific db) |
89
+ | `okb db list` | List configured databases |
88
90
  | `okb db destroy` | Remove container and volume (destructive) |
89
91
  | `okb ingest <paths>` | Ingest documents into knowledge base |
90
92
  | `okb ingest <paths> --local` | Ingest using local GPU/CPU embedding (no Modal) |
@@ -93,10 +95,11 @@ okb ingest ~/notes ~/docs
93
95
  | `okb watch <paths>` | Watch directories for changes |
94
96
  | `okb config init` | Create default config file |
95
97
  | `okb config show` | Show current configuration |
98
+ | `okb config path` | Print config file path |
96
99
  | `okb modal deploy` | Deploy GPU embedder to Modal |
97
100
  | `okb token create` | Create API token for HTTP server |
98
101
  | `okb token list` | List tokens for a database |
99
- | `okb token revoke` | Revoke an API token |
102
+ | `okb token revoke [TOKEN] --id <n>` | Revoke token by full value or ID |
100
103
  | `okb sync list` | List available API sources (plugins) |
101
104
  | `okb sync list-projects <source>` | List projects from source (for config) |
102
105
  | `okb sync run <sources>` | Sync data from external APIs |
@@ -108,6 +111,18 @@ okb ingest ~/notes ~/docs
108
111
  | `okb llm status` | Show LLM config and connectivity |
109
112
  | `okb llm deploy` | Deploy Modal LLM for open model inference |
110
113
  | `okb llm clear-cache` | Clear LLM response cache |
114
+ | `okb enrich run` | Extract TODOs and entities from documents |
115
+ | `okb enrich run --dry-run` | Show what would be enriched |
116
+ | `okb enrich pending` | List entities awaiting review |
117
+ | `okb enrich approve <id>` | Approve a pending entity |
118
+ | `okb enrich reject <id>` | Reject a pending entity |
119
+ | `okb enrich analyze` | Analyze database and update description/topics |
120
+ | `okb enrich consolidate` | Run entity consolidation (duplicates, clusters) |
121
+ | `okb enrich merge-proposals` | List pending merge proposals |
122
+ | `okb enrich approve-merge <id>` | Approve an entity merge |
123
+ | `okb enrich reject-merge <id>` | Reject an entity merge |
124
+ | `okb enrich clusters` | List topic clusters |
125
+ | `okb enrich relationships` | List entity relationships |
111
126
 
112
127
 
113
128
  ## Configuration
@@ -142,7 +157,7 @@ chunking:
142
157
  Use `--db <name>` to target a specific database with any command.
143
158
 
144
159
  Environment variables override config file settings:
145
- - `KB_DATABASE_URL` - Database connection string
160
+ - `OKB_DATABASE_URL` - Database connection string
146
161
  - `OKB_DOCKER_PORT` - Docker port mapping
147
162
  - `OKB_CONTAINER_NAME` - Docker container name
148
163
 
@@ -163,7 +178,7 @@ Merge: scalars replace, lists extend, dicts deep-merge.
163
178
 
164
179
  ### LLM Integration (Optional)
165
180
 
166
- Enable LLM-based document classification and filtering:
181
+ Enable LLM-based document classification, filtering, and enrichment:
167
182
 
168
183
  ```yaml
169
184
  llm:
@@ -179,11 +194,25 @@ llm:
179
194
  | `claude` | `export ANTHROPIC_API_KEY=...` | ~$0.25/1M tokens |
180
195
  | `modal` | `okb llm deploy` | ~$0.02/min GPU |
181
196
 
182
- For Modal (no API key needed):
197
+ **Modal LLM Setup** (no API key needed, runs on Modal's GPUs):
198
+
183
199
  ```yaml
184
200
  llm:
185
201
  provider: modal
186
- model: meta-llama/Llama-3.2-3B-Instruct
202
+ model: microsoft/Phi-3-mini-4k-instruct # Recommended: no gating
203
+ ```
204
+
205
+ Non-gated models (work immediately):
206
+ - `microsoft/Phi-3-mini-4k-instruct` - Good quality, 4K context
207
+ - `Qwen/Qwen2-1.5B-Instruct` - Smaller/faster
208
+
209
+ Gated models (require HuggingFace approval + token):
210
+ - `meta-llama/Llama-3.2-3B-Instruct` - Requires accepting license at HuggingFace
211
+ - Setup: `modal secret create huggingface HF_TOKEN=hf_...`
212
+
213
+ Deploy after configuring:
214
+ ```bash
215
+ okb llm deploy
187
216
  ```
188
217
 
189
218
  **Pre-ingest filtering** - skip low-value content during sync:
@@ -197,6 +226,36 @@ plugins:
197
226
  action_on_skip: discard # or "archive"
198
227
  ```
199
228
 
229
+ ### Document Enrichment
230
+
231
+ Extract TODOs and entities (people, projects, technologies) from documents using LLM:
232
+
233
+ ```bash
234
+ okb enrich run # Enrich un-enriched documents
235
+ okb enrich run --dry-run # Preview what would be enriched
236
+ okb enrich run --source-type markdown # Only markdown files
237
+ okb enrich run --query "meeting" # Filter by semantic search
238
+ ```
239
+
240
+ Entities are created as pending suggestions for review:
241
+ ```bash
242
+ okb enrich pending # List pending entities
243
+ okb enrich approve <id> # Approve → creates entity document
244
+ okb enrich reject <id> # Reject → hidden from future suggestions
245
+ ```
246
+
247
+ Configure enrichment behavior:
248
+ ```yaml
249
+ enrichment:
250
+ enabled: true
251
+ extract_todos: true
252
+ extract_entities: true
253
+ auto_create_todos: true # TODOs created immediately
254
+ auto_create_entities: false # Entities go to pending review
255
+ min_confidence_todo: 0.7
256
+ min_confidence_entity: 0.8
257
+ ```
258
+
200
259
  CLI commands:
201
260
  ```bash
202
261
  okb llm status # Show config and connectivity
@@ -234,14 +293,20 @@ okb token create --db default -d "Claude Code"
234
293
  okb serve --http --host 0.0.0.0 --port 8080
235
294
  ```
236
295
 
237
- Then configure Claude Code to connect via SSE:
296
+ The server uses Streamable HTTP transport (RFC 9728 compliant):
297
+ - `POST /mcp` - Send JSON-RPC messages, receive SSE response
298
+ - `GET /mcp` - Establish SSE connection for server notifications
299
+ - `DELETE /mcp` - Terminate session
300
+ - `/sse` is an alias for `/mcp` for backward compatibility
301
+
302
+ Configure your MCP client to connect:
238
303
 
239
304
  ```json
240
305
  {
241
306
  "mcpServers": {
242
307
  "knowledge-base": {
243
308
  "type": "sse",
244
- "url": "http://localhost:8080/sse",
309
+ "url": "http://localhost:8080/mcp",
245
310
  "headers": {
246
311
  "Authorization": "Bearer okb_default_rw_a1b2c3d4e5f6g7h8"
247
312
  }
@@ -269,6 +334,20 @@ Then configure Claude Code to connect via SSE:
269
334
  | `add_todo` | Create a TODO item in the knowledge base |
270
335
  | `trigger_sync` | Sync API sources (Todoist, GitHub, Dropbox Paper) |
271
336
  | `trigger_rescan` | Check indexed files for changes and re-ingest |
337
+ | `list_sync_sources` | List available API sync sources with status |
338
+ | `enrich_document` | Run LLM enrichment to extract TODOs/entities |
339
+ | `list_pending_entities` | List entities awaiting review |
340
+ | `approve_entity` | Approve a pending entity |
341
+ | `reject_entity` | Reject a pending entity |
342
+ | `analyze_knowledge_base` | Analyze content and generate description/topics |
343
+ | `find_entity_duplicates` | Find potential duplicate entities |
344
+ | `merge_entities` | Merge duplicate entities |
345
+ | `list_pending_merges` | List pending merge proposals |
346
+ | `approve_merge` | Approve a merge proposal |
347
+ | `reject_merge` | Reject a merge proposal |
348
+ | `get_topic_clusters` | Get topic clusters from consolidation |
349
+ | `get_entity_relationships` | Get relationships between entities |
350
+ | `run_consolidation` | Run full entity consolidation pipeline |
272
351
 
273
352
  ## Contextual Chunking
274
353
 
@@ -291,6 +370,10 @@ project: student-app
291
370
  category: backend
292
371
  ---
293
372
 
373
+ # Your Document Title
374
+
375
+ Content here...
376
+ ```
294
377
 
295
378
  ## Plugin System
296
379
 
@@ -1,16 +1,25 @@
1
1
  okb/__init__.py,sha256=2yaWIYQbho7N2O2zwTn3ZH11b8b3SaoDVlxluVTqwy4,92
2
- okb/cli.py,sha256=y8Vr9Scy7PyAtgrCb2yIsN3kRvhwUvxpnpiF6RVV_MA,47735
3
- okb/config.py,sha256=DKmX2fgteGdh0QMsA-Immu-mZcvLjHWeB8HIf9rcM5o,22898
2
+ okb/cli.py,sha256=8v_SaXFOrJYrCPSr6JgIqqzFHHYpPlHvan4CLaYUDTs,88639
3
+ okb/config.py,sha256=vKDC6b6Tm3_XZzvn7nA9WlGCWzCT8vtV9AvLes02YW8,28562
4
4
  okb/data/init.sql,sha256=QpsicUN7PQ7d8zyOCRNChOu5XKdUVC3xySlRDPyKSN8,2728
5
- okb/http_server.py,sha256=jcpNWB1aGtcHE7h0U4gCxA4lZyqWHGgsiArv7DyPSZw,20595
5
+ okb/http_server.py,sha256=QdF7KDWrgvbEWXkm8nrBansKN7YbGE28yjEXC9bEXJc,30139
6
6
  okb/ingest.py,sha256=D5plxCC2tQXZenMNUa482dUDqsyuaq2APAQqaIgRAqU,54505
7
7
  okb/llm/__init__.py,sha256=4jelqgXvF-eEPyLCuAmcxagN0H923wI9pBJJZKv4r0E,2368
8
+ okb/llm/analyze.py,sha256=BKW308AtjWStZcZiMKaRqFmQsuTclp3Qp3W4nsdw4vk,18569
8
9
  okb/llm/base.py,sha256=gOm7zBiNdHrj7xxJfpb-4qZdYxWM0lA0vKfrBStO60E,2279
9
10
  okb/llm/cache.py,sha256=rxRPMNBtP336MSpGWA8F7rDZnF0O2RM3rEsNtoxS0Zk,6142
11
+ okb/llm/consolidate.py,sha256=TjXBWzzlJrX3_z0CysrFAxclp1K7XGgMb_SI5hdK7Y8,23178
12
+ okb/llm/enrich.py,sha256=Yc09xvXynuiCATK451J5sBQ9lkPPllio8vAPYOEVDFo,23133
13
+ okb/llm/extractors/__init__.py,sha256=8sHgtgtydF4CBFiIOfURYcyunVxK770qp_aMz4jxnFU,317
14
+ okb/llm/extractors/base.py,sha256=p_PIMdUoEw8-C5jG8TcbG73jegE5ovmIGbbbdDrRFVs,1250
15
+ okb/llm/extractors/cross_doc.py,sha256=nw1-nHSHKxGlL95rfHFkStdbzpy4H3eRclyUyn2YE1A,18114
16
+ okb/llm/extractors/dedup.py,sha256=TVDBhNX7rPmpipO2WckAhe_Oj1LmSRPce-EceZ6FdbM,16562
17
+ okb/llm/extractors/entity.py,sha256=woIF5AeXD2dmFDoiOVCPgMUVWQPYHOmg1KE-ZRB3o0E,9925
18
+ okb/llm/extractors/todo.py,sha256=LdAyioQ5fZ-KJC_W_-CAU8OlMvju3Oci7E7aueoTnpI,4345
10
19
  okb/llm/filter.py,sha256=y20bc3vHtp5gj7T7AhsJ45ZkAkBgztj6WPjsVAmvEeo,5447
11
- okb/llm/providers.py,sha256=AdVw9FFgv58-KJEfXv9JqWlkxBl-LcRWOao95CsjqWA,9718
20
+ okb/llm/providers.py,sha256=SpbEpJYSQHc43qyvBdv2IuUNKsf0_NfDmnooNM1tzHc,9838
12
21
  okb/local_embedder.py,sha256=zzjBUFp4IH2xsvKyKjKZyX9dJuE_3PDMHMwpyRYSISQ,2098
13
- okb/mcp_server.py,sha256=BnMxyGf524sK-8CYPyL3ZM_DEqWFsXpF7_66xj3-Ecs,59407
22
+ okb/mcp_server.py,sha256=IUEGo4xmo2HhvKHhgeADqvNxICk4dBTX-6BNSmi8fkY,98610
14
23
  okb/migrate.py,sha256=2faYL-SHiQCkGXpTUlBFMCj0B-6JYCHqZl9u6vOlui8,1693
15
24
  okb/migrations/0001.initial-schema.sql,sha256=0s5pj9Ad6f0u_mxODAM_-DbDI3aI37Wdu5XMPAzAIqw,2577
16
25
  okb/migrations/0002.sync-state.sql,sha256=w34dOA9sPg60NMS1aHvOhORff1k_Di9cO2ghwVQSPHU,696
@@ -18,20 +27,23 @@ okb/migrations/0003.structured-fields.sql,sha256=rPCSrdtotCoRpOfjHf1Ifx0pfizpYS9
18
27
  okb/migrations/0004.tokens.sql,sha256=VtcLfA1_SVVQLkEKZ-av_93Fg0ksVWLm1tlR7nJXoaQ,448
19
28
  okb/migrations/0005.database-metadata.sql,sha256=0X4LyuUUX34s3ph2C70FnBBau5HEBwR4xyY-hwPEX90,709
20
29
  okb/migrations/0006.llm-cache.sql,sha256=azjPpj00WH_8tx4JI8PJKZ1AOAJEhbkneVvYa3ZRZ1w,493
30
+ okb/migrations/0008.enrichment.sql,sha256=pkKtyuFgRiPhZ7YAyYr6yihT2R-Tx64hE18iqphqFAw,2160
31
+ okb/migrations/0009.entity-consolidation.sql,sha256=Q2TBHJqMa1fvAqaopa20KdJCIxqldDAeOPGC5YiuzxY,5800
32
+ okb/migrations/0010.token-id.sql,sha256=odFPvY3z63f6Zfx8j5qBj2LobvPafpT2hDi_MwBBf6Q,231
21
33
  okb/modal_embedder.py,sha256=V1cpGWrtEo1MGkrD9Nc-5wRmf9e7IwKPsQj7nVuLlyg,3469
22
- okb/modal_llm.py,sha256=4rYE3VZ_T09HXCgTIYFLu1s_C2FRC9y4dgMUGqJuO2M,5368
34
+ okb/modal_llm.py,sha256=NpCykbjlhUUdQNQ4NMLu-uEBFJbYwJDV2NdnwUZeG84,5903
23
35
  okb/plugins/__init__.py,sha256=50LNAH4bvfIw5CHT82sknGjdCldQ-4ds0wxo1zM9E2k,324
24
36
  okb/plugins/base.py,sha256=6TIN1UIItmuIsP4NDJhuRMH0ngKkQiGmtHTeYj1K8OU,3171
25
37
  okb/plugins/registry.py,sha256=fN7NfoOaRnMyXSWT2srd6vEr4riJjmncQFfberf0IE8,3741
26
38
  okb/plugins/sources/__init__.py,sha256=n58rAbcJC45JbofUY6IA526rSRjkYn4_tGjWma3TOUI,214
27
39
  okb/plugins/sources/dropbox_paper.py,sha256=Oi59NbJGQrwjE2Xhcinc2InKRc27Gdg7l8xVTbKLkI8,7493
28
- okb/plugins/sources/github.py,sha256=ozdTZPkU8h2-ZIx5o1FB58QBZ6P0eoVntluWL3vG87I,16309
40
+ okb/plugins/sources/github.py,sha256=YlwsY2hRKlGTyHL5L4vCC1TP-b6WJzEYbPDEXnB4y-I,16309
29
41
  okb/plugins/sources/todoist.py,sha256=B22tKYFZhuDhZHhpRdGWDGho9y7FBNgGlI1g2nf13-8,8849
30
42
  okb/rescan.py,sha256=dVdQEkVUjsrtOKAGZc0LC2uwcnkjB8hn2SOVWHnY-R8,8396
31
43
  okb/scripts/__init__.py,sha256=HPp8YCtIeo9XMOtOGCtntiwYr9eCxAJ1MF9Lo9WVzUA,53
32
44
  okb/scripts/watch.py,sha256=b8oGPTN3flNdNQJETeqQ1RNZ8U1LiKvHntLwvHRIviA,6354
33
- okb/tokens.py,sha256=JJ1C-mvtnT2O0cmjSu57PI9Nt53Sl9DqbmPuLnHlN6g,8043
34
- okb-1.1.0a0.dist-info/METADATA,sha256=IhNkQv-lucqtYIaXcNfxkKFkSD5Avo3Vy5buDbXHELo,10578
35
- okb-1.1.0a0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
36
- okb-1.1.0a0.dist-info/entry_points.txt,sha256=YX6b8BlV9sSAXrneoIm3dkXtRcgHhSzbDaOpJ0yCKRs,230
37
- okb-1.1.0a0.dist-info/RECORD,,
45
+ okb/tokens.py,sha256=3Of_PwNCTTexXC3d-EAiPjLdsbyk2F_dTeY30O3mqp8,8635
46
+ okb-1.1.1.dist-info/METADATA,sha256=n_hhat8q-7KJGyfcDhCqA_tqLLWKKgRIDgLpOpZ2IEo,14113
47
+ okb-1.1.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
48
+ okb-1.1.1.dist-info/entry_points.txt,sha256=YX6b8BlV9sSAXrneoIm3dkXtRcgHhSzbDaOpJ0yCKRs,230
49
+ okb-1.1.1.dist-info/RECORD,,
File without changes