okb 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
okb/config.py CHANGED
@@ -53,7 +53,7 @@ class DatabaseConfig:
53
53
 
54
54
  name: str
55
55
  url: str
56
- managed: bool = True # Whether lkb manages this (Docker) or external
56
+ managed: bool = True # Whether okb manages this (Docker) or external
57
57
  default: bool = False
58
58
  description: str | None = None # Human-readable description for LLM context
59
59
  topics: list[str] | None = None # Topic keywords to help LLM route queries
@@ -259,6 +259,7 @@ DEFAULTS = {
259
259
  "yarn.lock",
260
260
  "uv.lock",
261
261
  "Cargo.lock",
262
+ "poetry.lock",
262
263
  "*.pyc",
263
264
  "*.pyo",
264
265
  "*.tmp",
@@ -281,7 +282,7 @@ DEFAULTS = {
281
282
  },
282
283
  "llm": {
283
284
  # LLM provider configuration
284
- # provider: None = disabled, "claude" = Anthropic API
285
+ # provider: None = disabled, "claude" = Anthropic API, "modal" = Modal GPU
285
286
  "provider": None,
286
287
  "model": "claude-haiku-4-5-20251001",
287
288
  "timeout": 30,
@@ -289,6 +290,38 @@ DEFAULTS = {
289
290
  # Bedrock settings (when use_bedrock is True)
290
291
  "use_bedrock": False,
291
292
  "aws_region": "us-west-2",
293
+ # Modal settings (when provider is "modal")
294
+ "modal_gpu": "L4", # GPU type: T4, L4, A10G, A100, etc.
295
+ },
296
+ "enrichment": {
297
+ # LLM-based document enrichment
298
+ "enabled": True,
299
+ "version": 1, # Increment to force re-enrichment
300
+ # What to extract
301
+ "extract_todos": True,
302
+ "extract_entities": True,
303
+ # Auto-create behavior
304
+ "auto_create_todos": True, # TODOs created immediately
305
+ "auto_create_entities": False, # Entities go to pending_entities table
306
+ # Confidence thresholds
307
+ "min_confidence_todo": 0.7,
308
+ "min_confidence_entity": 0.8,
309
+ # Auto-enrich during ingest (per source type)
310
+ "auto_enrich": {
311
+ "markdown": True,
312
+ "org": True,
313
+ "text": True,
314
+ "code": False, # Skip code files
315
+ "web": False, # Skip web pages
316
+ "todoist-task": False, # Already structured
317
+ },
318
+ # Entity consolidation settings
319
+ "consolidation": {
320
+ "cross_doc_min_mentions": 3, # Min docs for cross-doc detection
321
+ "embedding_similarity_threshold": 0.85, # For duplicate detection
322
+ "auto_merge_threshold": 0.95, # Auto-approve above this
323
+ "min_cluster_size": 3, # Min entities per cluster
324
+ },
292
325
  },
293
326
  }
294
327
 
@@ -349,12 +382,30 @@ class Config:
349
382
  llm_cache_responses: bool = True
350
383
  llm_use_bedrock: bool = False
351
384
  llm_aws_region: str = "us-west-2"
385
+ llm_modal_gpu: str = "L4"
386
+
387
+ # Enrichment settings (loaded from config in __post_init__)
388
+ enrichment_enabled: bool = True
389
+ enrichment_version: int = 1
390
+ enrichment_extract_todos: bool = True
391
+ enrichment_extract_entities: bool = True
392
+ enrichment_auto_create_todos: bool = True
393
+ enrichment_auto_create_entities: bool = False
394
+ enrichment_min_confidence_todo: float = 0.7
395
+ enrichment_min_confidence_entity: float = 0.8
396
+ enrichment_auto_enrich: dict[str, bool] = field(default_factory=dict)
397
+
398
+ # Consolidation settings (loaded from config in __post_init__)
399
+ consolidation_cross_doc_min_mentions: int = 3
400
+ consolidation_embedding_similarity_threshold: float = 0.85
401
+ consolidation_auto_merge_threshold: float = 0.95
402
+ consolidation_min_cluster_size: int = 3
352
403
 
353
404
  def __post_init__(self):
354
405
  """Load configuration from file and environment."""
355
406
  file_config = load_config_file()
356
407
 
357
- # Load and merge local config overlay (.lkbconf.yaml)
408
+ # Load and merge local config overlay (.okbconf.yaml)
358
409
  local_path = find_local_config()
359
410
  local_default_db: str | None = None
360
411
  if local_path:
@@ -417,7 +468,7 @@ class Config:
417
468
  else:
418
469
  # Legacy: single database_url (env > file > default)
419
470
  legacy_url = os.environ.get(
420
- "KB_DATABASE_URL",
471
+ "OKB_DATABASE_URL",
421
472
  file_config.get("database_url", DEFAULTS["databases"]["default"]["url"]),
422
473
  )
423
474
  self.databases["default"] = DatabaseConfig(
@@ -535,6 +586,55 @@ class Config:
535
586
  )
536
587
  self.llm_use_bedrock = llm_cfg.get("use_bedrock", DEFAULTS["llm"]["use_bedrock"])
537
588
  self.llm_aws_region = llm_cfg.get("aws_region", DEFAULTS["llm"]["aws_region"])
589
+ self.llm_modal_gpu = os.environ.get(
590
+ "OKB_MODAL_GPU",
591
+ llm_cfg.get("modal_gpu", DEFAULTS["llm"]["modal_gpu"]),
592
+ )
593
+
594
+ # Enrichment settings
595
+ enrich_cfg = file_config.get("enrichment", {})
596
+ self.enrichment_enabled = enrich_cfg.get("enabled", DEFAULTS["enrichment"]["enabled"])
597
+ self.enrichment_version = enrich_cfg.get("version", DEFAULTS["enrichment"]["version"])
598
+ self.enrichment_extract_todos = enrich_cfg.get(
599
+ "extract_todos", DEFAULTS["enrichment"]["extract_todos"]
600
+ )
601
+ self.enrichment_extract_entities = enrich_cfg.get(
602
+ "extract_entities", DEFAULTS["enrichment"]["extract_entities"]
603
+ )
604
+ self.enrichment_auto_create_todos = enrich_cfg.get(
605
+ "auto_create_todos", DEFAULTS["enrichment"]["auto_create_todos"]
606
+ )
607
+ self.enrichment_auto_create_entities = enrich_cfg.get(
608
+ "auto_create_entities", DEFAULTS["enrichment"]["auto_create_entities"]
609
+ )
610
+ self.enrichment_min_confidence_todo = enrich_cfg.get(
611
+ "min_confidence_todo", DEFAULTS["enrichment"]["min_confidence_todo"]
612
+ )
613
+ self.enrichment_min_confidence_entity = enrich_cfg.get(
614
+ "min_confidence_entity", DEFAULTS["enrichment"]["min_confidence_entity"]
615
+ )
616
+ self.enrichment_auto_enrich = enrich_cfg.get(
617
+ "auto_enrich", DEFAULTS["enrichment"]["auto_enrich"]
618
+ )
619
+
620
+ # Consolidation settings
621
+ consolidation_cfg = enrich_cfg.get("consolidation", {})
622
+ self.consolidation_cross_doc_min_mentions = consolidation_cfg.get(
623
+ "cross_doc_min_mentions",
624
+ DEFAULTS["enrichment"]["consolidation"]["cross_doc_min_mentions"],
625
+ )
626
+ self.consolidation_embedding_similarity_threshold = consolidation_cfg.get(
627
+ "embedding_similarity_threshold",
628
+ DEFAULTS["enrichment"]["consolidation"]["embedding_similarity_threshold"],
629
+ )
630
+ self.consolidation_auto_merge_threshold = consolidation_cfg.get(
631
+ "auto_merge_threshold",
632
+ DEFAULTS["enrichment"]["consolidation"]["auto_merge_threshold"],
633
+ )
634
+ self.consolidation_min_cluster_size = consolidation_cfg.get(
635
+ "min_cluster_size",
636
+ DEFAULTS["enrichment"]["consolidation"]["min_cluster_size"],
637
+ )
538
638
 
539
639
  def get_database(self, name: str | None = None) -> DatabaseConfig:
540
640
  """Get database config by name, or default if None."""
@@ -648,6 +748,24 @@ class Config:
648
748
  "cache_responses": self.llm_cache_responses,
649
749
  "use_bedrock": self.llm_use_bedrock,
650
750
  "aws_region": self.llm_aws_region,
751
+ "modal_gpu": self.llm_modal_gpu,
752
+ },
753
+ "enrichment": {
754
+ "enabled": self.enrichment_enabled,
755
+ "version": self.enrichment_version,
756
+ "extract_todos": self.enrichment_extract_todos,
757
+ "extract_entities": self.enrichment_extract_entities,
758
+ "auto_create_todos": self.enrichment_auto_create_todos,
759
+ "auto_create_entities": self.enrichment_auto_create_entities,
760
+ "min_confidence_todo": self.enrichment_min_confidence_todo,
761
+ "min_confidence_entity": self.enrichment_min_confidence_entity,
762
+ "auto_enrich": self.enrichment_auto_enrich,
763
+ "consolidation": {
764
+ "cross_doc_min_mentions": self.consolidation_cross_doc_min_mentions,
765
+ "embedding_similarity_threshold": self.consolidation_embedding_similarity_threshold,
766
+ "auto_merge_threshold": self.consolidation_auto_merge_threshold,
767
+ "min_cluster_size": self.consolidation_min_cluster_size,
768
+ },
651
769
  },
652
770
  }
653
771
 
okb/http_server.py CHANGED
@@ -37,9 +37,15 @@ READ_ONLY_TOOLS = frozenset(
37
37
  "get_document",
38
38
  "list_sources",
39
39
  "list_projects",
40
+ "list_documents_by_project",
40
41
  "recent_documents",
41
42
  "get_actionable_items",
42
43
  "get_database_info",
44
+ "list_sync_sources",
45
+ "list_pending_entities",
46
+ "list_pending_merges",
47
+ "get_topic_clusters",
48
+ "get_entity_relationships",
43
49
  }
44
50
  )
45
51
 
@@ -49,6 +55,17 @@ WRITE_TOOLS = frozenset(
49
55
  "delete_knowledge",
50
56
  "set_database_description",
51
57
  "add_todo",
58
+ "trigger_sync",
59
+ "trigger_rescan",
60
+ "enrich_document",
61
+ "approve_entity",
62
+ "reject_entity",
63
+ "analyze_knowledge_base",
64
+ "find_entity_duplicates",
65
+ "merge_entities",
66
+ "approve_merge",
67
+ "reject_merge",
68
+ "run_consolidation",
52
69
  }
53
70
  )
54
71
 
@@ -206,6 +223,24 @@ class HTTPMCPServer:
206
223
  content=[TextContent(type="text", text=f"## Projects\n\n{project_list}")]
207
224
  )
208
225
 
226
+ elif name == "list_documents_by_project":
227
+ project = arguments["project"]
228
+ limit = arguments.get("limit", 100)
229
+ docs = kb.list_documents_by_project(project, limit)
230
+ if not docs:
231
+ return CallToolResult(
232
+ content=[
233
+ TextContent(
234
+ type="text", text=f"No documents found for project '{project}'."
235
+ )
236
+ ]
237
+ )
238
+ output = [f"## Documents in '{project}' ({len(docs)} documents)\n"]
239
+ for d in docs:
240
+ output.append(f"- **{d['title'] or d['source_path']}** ({d['source_type']})")
241
+ output.append(f" - `{d['source_path']}`")
242
+ return CallToolResult(content=[TextContent(type="text", text="\n".join(output))])
243
+
209
244
  elif name == "recent_documents":
210
245
  from .mcp_server import format_relative_time, get_document_date
211
246
 
@@ -263,13 +298,13 @@ class HTTPMCPServer:
263
298
  deleted = kb.delete_knowledge(arguments["source_path"])
264
299
  if deleted:
265
300
  return CallToolResult(
266
- content=[TextContent(type="text", text="Knowledge entry deleted.")]
301
+ content=[TextContent(type="text", text="Document deleted.")]
267
302
  )
268
303
  return CallToolResult(
269
304
  content=[
270
305
  TextContent(
271
306
  type="text",
272
- text="Could not delete. Entry not found or not a Claude-saved entry.",
307
+ text="Could not delete. Document not found.",
273
308
  )
274
309
  ]
275
310
  )
@@ -349,6 +384,177 @@ class HTTPMCPServer:
349
384
  content=[TextContent(type="text", text="No fields provided to update.")]
350
385
  )
351
386
 
387
+ elif name == "add_todo":
388
+ result = kb.save_todo(
389
+ title=arguments["title"],
390
+ content=arguments.get("content"),
391
+ due_date=arguments.get("due_date"),
392
+ priority=arguments.get("priority"),
393
+ project=arguments.get("project"),
394
+ tags=arguments.get("tags"),
395
+ )
396
+ parts = [
397
+ "TODO created:",
398
+ f"- Title: {result['title']}",
399
+ f"- Path: `{result['source_path']}`",
400
+ ]
401
+ if result.get("priority"):
402
+ parts.append(f"- Priority: P{result['priority']}")
403
+ if result.get("due_date"):
404
+ parts.append(f"- Due: {result['due_date']}")
405
+ return CallToolResult(content=[TextContent(type="text", text="\n".join(parts))])
406
+
407
+ elif name == "trigger_sync":
408
+ from .mcp_server import _run_sync
409
+
410
+ # Get the db_url from the knowledge base
411
+ result = _run_sync(
412
+ kb.db_url,
413
+ sources=arguments.get("sources", []),
414
+ sync_all=arguments.get("all", False),
415
+ full=arguments.get("full", False),
416
+ doc_ids=arguments.get("doc_ids"),
417
+ )
418
+ return CallToolResult(content=[TextContent(type="text", text=result)])
419
+
420
+ elif name == "trigger_rescan":
421
+ from .mcp_server import _run_rescan
422
+
423
+ result = _run_rescan(
424
+ kb.db_url,
425
+ dry_run=arguments.get("dry_run", False),
426
+ delete_missing=arguments.get("delete_missing", False),
427
+ )
428
+ return CallToolResult(content=[TextContent(type="text", text=result)])
429
+
430
+ elif name == "list_sync_sources":
431
+ from .mcp_server import _list_sync_sources
432
+
433
+ token_info = getattr(self.server, "_current_token_info", None)
434
+ db_name = token_info.database if token_info else config.get_database().name
435
+ result = _list_sync_sources(kb.db_url, db_name)
436
+ return CallToolResult(content=[TextContent(type="text", text=result)])
437
+
438
+ elif name == "enrich_document":
439
+ from .mcp_server import _enrich_document
440
+
441
+ result = _enrich_document(
442
+ kb.db_url,
443
+ source_path=arguments["source_path"],
444
+ extract_todos=arguments.get("extract_todos", True),
445
+ extract_entities=arguments.get("extract_entities", True),
446
+ auto_create_entities=arguments.get("auto_create_entities", False),
447
+ )
448
+ return CallToolResult(content=[TextContent(type="text", text=result)])
449
+
450
+ elif name == "list_pending_entities":
451
+ from .mcp_server import _list_pending_entities
452
+
453
+ result = _list_pending_entities(
454
+ kb.db_url,
455
+ entity_type=arguments.get("entity_type"),
456
+ limit=arguments.get("limit", 20),
457
+ )
458
+ return CallToolResult(content=[TextContent(type="text", text=result)])
459
+
460
+ elif name == "approve_entity":
461
+ from .mcp_server import _approve_entity
462
+
463
+ result = _approve_entity(kb.db_url, arguments["pending_id"])
464
+ return CallToolResult(content=[TextContent(type="text", text=result)])
465
+
466
+ elif name == "reject_entity":
467
+ from .mcp_server import _reject_entity
468
+
469
+ result = _reject_entity(kb.db_url, arguments["pending_id"])
470
+ return CallToolResult(content=[TextContent(type="text", text=result)])
471
+
472
+ elif name == "analyze_knowledge_base":
473
+ from .mcp_server import _analyze_knowledge_base
474
+
475
+ result = _analyze_knowledge_base(
476
+ kb.db_url,
477
+ project=arguments.get("project"),
478
+ sample_size=arguments.get("sample_size", 15),
479
+ auto_update=arguments.get("auto_update", True),
480
+ )
481
+ return CallToolResult(content=[TextContent(type="text", text=result)])
482
+
483
+ # Entity consolidation tools
484
+ elif name == "find_entity_duplicates":
485
+ from .mcp_server import _find_entity_duplicates
486
+
487
+ result = _find_entity_duplicates(
488
+ kb.db_url,
489
+ similarity_threshold=arguments.get("similarity_threshold", 0.85),
490
+ limit=arguments.get("limit", 50),
491
+ )
492
+ return CallToolResult(content=[TextContent(type="text", text=result)])
493
+
494
+ elif name == "merge_entities":
495
+ from .mcp_server import _merge_entities
496
+
497
+ result = _merge_entities(
498
+ kb.db_url,
499
+ canonical_path=arguments["canonical_path"],
500
+ duplicate_path=arguments["duplicate_path"],
501
+ )
502
+ return CallToolResult(content=[TextContent(type="text", text=result)])
503
+
504
+ elif name == "list_pending_merges":
505
+ from .mcp_server import _list_pending_merges
506
+
507
+ result = _list_pending_merges(
508
+ kb.db_url,
509
+ limit=arguments.get("limit", 50),
510
+ )
511
+ return CallToolResult(content=[TextContent(type="text", text=result)])
512
+
513
+ elif name == "approve_merge":
514
+ from .mcp_server import _approve_merge
515
+
516
+ result = _approve_merge(kb.db_url, arguments["merge_id"])
517
+ return CallToolResult(content=[TextContent(type="text", text=result)])
518
+
519
+ elif name == "reject_merge":
520
+ from .mcp_server import _reject_merge
521
+
522
+ result = _reject_merge(kb.db_url, arguments["merge_id"])
523
+ return CallToolResult(content=[TextContent(type="text", text=result)])
524
+
525
+ elif name == "get_topic_clusters":
526
+ from .mcp_server import _get_topic_clusters
527
+
528
+ result = _get_topic_clusters(
529
+ kb.db_url,
530
+ limit=arguments.get("limit", 20),
531
+ )
532
+ return CallToolResult(content=[TextContent(type="text", text=result)])
533
+
534
+ elif name == "get_entity_relationships":
535
+ from .mcp_server import _get_entity_relationships
536
+
537
+ result = _get_entity_relationships(
538
+ kb.db_url,
539
+ entity_name=arguments.get("entity_name"),
540
+ relationship_type=arguments.get("relationship_type"),
541
+ limit=arguments.get("limit", 50),
542
+ )
543
+ return CallToolResult(content=[TextContent(type="text", text=result)])
544
+
545
+ elif name == "run_consolidation":
546
+ from .mcp_server import _run_consolidation
547
+
548
+ result = _run_consolidation(
549
+ kb.db_url,
550
+ detect_duplicates=arguments.get("detect_duplicates", True),
551
+ detect_cross_doc=arguments.get("detect_cross_doc", True),
552
+ build_clusters=arguments.get("build_clusters", True),
553
+ extract_relationships=arguments.get("extract_relationships", True),
554
+ dry_run=arguments.get("dry_run", False),
555
+ )
556
+ return CallToolResult(content=[TextContent(type="text", text=result)])
557
+
352
558
  else:
353
559
  return CallToolResult(
354
560
  content=[TextContent(type="text", text=f"Unknown tool: {name}")]