okb 1.1.0a0__py3-none-any.whl → 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- okb/cli.py +1083 -16
- okb/config.py +122 -4
- okb/http_server.py +356 -91
- okb/llm/analyze.py +524 -0
- okb/llm/consolidate.py +685 -0
- okb/llm/enrich.py +723 -0
- okb/llm/extractors/__init__.py +13 -0
- okb/llm/extractors/base.py +44 -0
- okb/llm/extractors/cross_doc.py +478 -0
- okb/llm/extractors/dedup.py +499 -0
- okb/llm/extractors/entity.py +369 -0
- okb/llm/extractors/todo.py +149 -0
- okb/llm/providers.py +9 -6
- okb/mcp_server.py +1036 -12
- okb/migrations/0008.enrichment.sql +46 -0
- okb/migrations/0009.entity-consolidation.sql +120 -0
- okb/migrations/0010.token-id.sql +7 -0
- okb/modal_llm.py +26 -8
- okb/plugins/sources/github.py +5 -5
- okb/tokens.py +25 -3
- {okb-1.1.0a0.dist-info → okb-1.1.2.dist-info}/METADATA +91 -8
- {okb-1.1.0a0.dist-info → okb-1.1.2.dist-info}/RECORD +24 -12
- {okb-1.1.0a0.dist-info → okb-1.1.2.dist-info}/WHEEL +0 -0
- {okb-1.1.0a0.dist-info → okb-1.1.2.dist-info}/entry_points.txt +0 -0
okb/config.py
CHANGED
|
@@ -53,7 +53,7 @@ class DatabaseConfig:
|
|
|
53
53
|
|
|
54
54
|
name: str
|
|
55
55
|
url: str
|
|
56
|
-
managed: bool = True # Whether
|
|
56
|
+
managed: bool = True # Whether okb manages this (Docker) or external
|
|
57
57
|
default: bool = False
|
|
58
58
|
description: str | None = None # Human-readable description for LLM context
|
|
59
59
|
topics: list[str] | None = None # Topic keywords to help LLM route queries
|
|
@@ -259,6 +259,7 @@ DEFAULTS = {
|
|
|
259
259
|
"yarn.lock",
|
|
260
260
|
"uv.lock",
|
|
261
261
|
"Cargo.lock",
|
|
262
|
+
"poetry.lock",
|
|
262
263
|
"*.pyc",
|
|
263
264
|
"*.pyo",
|
|
264
265
|
"*.tmp",
|
|
@@ -281,7 +282,7 @@ DEFAULTS = {
|
|
|
281
282
|
},
|
|
282
283
|
"llm": {
|
|
283
284
|
# LLM provider configuration
|
|
284
|
-
# provider: None = disabled, "claude" = Anthropic API
|
|
285
|
+
# provider: None = disabled, "claude" = Anthropic API, "modal" = Modal GPU
|
|
285
286
|
"provider": None,
|
|
286
287
|
"model": "claude-haiku-4-5-20251001",
|
|
287
288
|
"timeout": 30,
|
|
@@ -289,6 +290,38 @@ DEFAULTS = {
|
|
|
289
290
|
# Bedrock settings (when use_bedrock is True)
|
|
290
291
|
"use_bedrock": False,
|
|
291
292
|
"aws_region": "us-west-2",
|
|
293
|
+
# Modal settings (when provider is "modal")
|
|
294
|
+
"modal_gpu": "L4", # GPU type: T4, L4, A10G, A100, etc.
|
|
295
|
+
},
|
|
296
|
+
"enrichment": {
|
|
297
|
+
# LLM-based document enrichment
|
|
298
|
+
"enabled": True,
|
|
299
|
+
"version": 1, # Increment to force re-enrichment
|
|
300
|
+
# What to extract
|
|
301
|
+
"extract_todos": True,
|
|
302
|
+
"extract_entities": True,
|
|
303
|
+
# Auto-create behavior
|
|
304
|
+
"auto_create_todos": True, # TODOs created immediately
|
|
305
|
+
"auto_create_entities": False, # Entities go to pending_entities table
|
|
306
|
+
# Confidence thresholds
|
|
307
|
+
"min_confidence_todo": 0.7,
|
|
308
|
+
"min_confidence_entity": 0.8,
|
|
309
|
+
# Auto-enrich during ingest (per source type)
|
|
310
|
+
"auto_enrich": {
|
|
311
|
+
"markdown": True,
|
|
312
|
+
"org": True,
|
|
313
|
+
"text": True,
|
|
314
|
+
"code": False, # Skip code files
|
|
315
|
+
"web": False, # Skip web pages
|
|
316
|
+
"todoist-task": False, # Already structured
|
|
317
|
+
},
|
|
318
|
+
# Entity consolidation settings
|
|
319
|
+
"consolidation": {
|
|
320
|
+
"cross_doc_min_mentions": 3, # Min docs for cross-doc detection
|
|
321
|
+
"embedding_similarity_threshold": 0.85, # For duplicate detection
|
|
322
|
+
"auto_merge_threshold": 0.95, # Auto-approve above this
|
|
323
|
+
"min_cluster_size": 3, # Min entities per cluster
|
|
324
|
+
},
|
|
292
325
|
},
|
|
293
326
|
}
|
|
294
327
|
|
|
@@ -349,12 +382,30 @@ class Config:
|
|
|
349
382
|
llm_cache_responses: bool = True
|
|
350
383
|
llm_use_bedrock: bool = False
|
|
351
384
|
llm_aws_region: str = "us-west-2"
|
|
385
|
+
llm_modal_gpu: str = "L4"
|
|
386
|
+
|
|
387
|
+
# Enrichment settings (loaded from config in __post_init__)
|
|
388
|
+
enrichment_enabled: bool = True
|
|
389
|
+
enrichment_version: int = 1
|
|
390
|
+
enrichment_extract_todos: bool = True
|
|
391
|
+
enrichment_extract_entities: bool = True
|
|
392
|
+
enrichment_auto_create_todos: bool = True
|
|
393
|
+
enrichment_auto_create_entities: bool = False
|
|
394
|
+
enrichment_min_confidence_todo: float = 0.7
|
|
395
|
+
enrichment_min_confidence_entity: float = 0.8
|
|
396
|
+
enrichment_auto_enrich: dict[str, bool] = field(default_factory=dict)
|
|
397
|
+
|
|
398
|
+
# Consolidation settings (loaded from config in __post_init__)
|
|
399
|
+
consolidation_cross_doc_min_mentions: int = 3
|
|
400
|
+
consolidation_embedding_similarity_threshold: float = 0.85
|
|
401
|
+
consolidation_auto_merge_threshold: float = 0.95
|
|
402
|
+
consolidation_min_cluster_size: int = 3
|
|
352
403
|
|
|
353
404
|
def __post_init__(self):
|
|
354
405
|
"""Load configuration from file and environment."""
|
|
355
406
|
file_config = load_config_file()
|
|
356
407
|
|
|
357
|
-
# Load and merge local config overlay (.
|
|
408
|
+
# Load and merge local config overlay (.okbconf.yaml)
|
|
358
409
|
local_path = find_local_config()
|
|
359
410
|
local_default_db: str | None = None
|
|
360
411
|
if local_path:
|
|
@@ -417,7 +468,7 @@ class Config:
|
|
|
417
468
|
else:
|
|
418
469
|
# Legacy: single database_url (env > file > default)
|
|
419
470
|
legacy_url = os.environ.get(
|
|
420
|
-
"
|
|
471
|
+
"OKB_DATABASE_URL",
|
|
421
472
|
file_config.get("database_url", DEFAULTS["databases"]["default"]["url"]),
|
|
422
473
|
)
|
|
423
474
|
self.databases["default"] = DatabaseConfig(
|
|
@@ -535,6 +586,55 @@ class Config:
|
|
|
535
586
|
)
|
|
536
587
|
self.llm_use_bedrock = llm_cfg.get("use_bedrock", DEFAULTS["llm"]["use_bedrock"])
|
|
537
588
|
self.llm_aws_region = llm_cfg.get("aws_region", DEFAULTS["llm"]["aws_region"])
|
|
589
|
+
self.llm_modal_gpu = os.environ.get(
|
|
590
|
+
"OKB_MODAL_GPU",
|
|
591
|
+
llm_cfg.get("modal_gpu", DEFAULTS["llm"]["modal_gpu"]),
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
# Enrichment settings
|
|
595
|
+
enrich_cfg = file_config.get("enrichment", {})
|
|
596
|
+
self.enrichment_enabled = enrich_cfg.get("enabled", DEFAULTS["enrichment"]["enabled"])
|
|
597
|
+
self.enrichment_version = enrich_cfg.get("version", DEFAULTS["enrichment"]["version"])
|
|
598
|
+
self.enrichment_extract_todos = enrich_cfg.get(
|
|
599
|
+
"extract_todos", DEFAULTS["enrichment"]["extract_todos"]
|
|
600
|
+
)
|
|
601
|
+
self.enrichment_extract_entities = enrich_cfg.get(
|
|
602
|
+
"extract_entities", DEFAULTS["enrichment"]["extract_entities"]
|
|
603
|
+
)
|
|
604
|
+
self.enrichment_auto_create_todos = enrich_cfg.get(
|
|
605
|
+
"auto_create_todos", DEFAULTS["enrichment"]["auto_create_todos"]
|
|
606
|
+
)
|
|
607
|
+
self.enrichment_auto_create_entities = enrich_cfg.get(
|
|
608
|
+
"auto_create_entities", DEFAULTS["enrichment"]["auto_create_entities"]
|
|
609
|
+
)
|
|
610
|
+
self.enrichment_min_confidence_todo = enrich_cfg.get(
|
|
611
|
+
"min_confidence_todo", DEFAULTS["enrichment"]["min_confidence_todo"]
|
|
612
|
+
)
|
|
613
|
+
self.enrichment_min_confidence_entity = enrich_cfg.get(
|
|
614
|
+
"min_confidence_entity", DEFAULTS["enrichment"]["min_confidence_entity"]
|
|
615
|
+
)
|
|
616
|
+
self.enrichment_auto_enrich = enrich_cfg.get(
|
|
617
|
+
"auto_enrich", DEFAULTS["enrichment"]["auto_enrich"]
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
# Consolidation settings
|
|
621
|
+
consolidation_cfg = enrich_cfg.get("consolidation", {})
|
|
622
|
+
self.consolidation_cross_doc_min_mentions = consolidation_cfg.get(
|
|
623
|
+
"cross_doc_min_mentions",
|
|
624
|
+
DEFAULTS["enrichment"]["consolidation"]["cross_doc_min_mentions"],
|
|
625
|
+
)
|
|
626
|
+
self.consolidation_embedding_similarity_threshold = consolidation_cfg.get(
|
|
627
|
+
"embedding_similarity_threshold",
|
|
628
|
+
DEFAULTS["enrichment"]["consolidation"]["embedding_similarity_threshold"],
|
|
629
|
+
)
|
|
630
|
+
self.consolidation_auto_merge_threshold = consolidation_cfg.get(
|
|
631
|
+
"auto_merge_threshold",
|
|
632
|
+
DEFAULTS["enrichment"]["consolidation"]["auto_merge_threshold"],
|
|
633
|
+
)
|
|
634
|
+
self.consolidation_min_cluster_size = consolidation_cfg.get(
|
|
635
|
+
"min_cluster_size",
|
|
636
|
+
DEFAULTS["enrichment"]["consolidation"]["min_cluster_size"],
|
|
637
|
+
)
|
|
538
638
|
|
|
539
639
|
def get_database(self, name: str | None = None) -> DatabaseConfig:
|
|
540
640
|
"""Get database config by name, or default if None."""
|
|
@@ -648,6 +748,24 @@ class Config:
|
|
|
648
748
|
"cache_responses": self.llm_cache_responses,
|
|
649
749
|
"use_bedrock": self.llm_use_bedrock,
|
|
650
750
|
"aws_region": self.llm_aws_region,
|
|
751
|
+
"modal_gpu": self.llm_modal_gpu,
|
|
752
|
+
},
|
|
753
|
+
"enrichment": {
|
|
754
|
+
"enabled": self.enrichment_enabled,
|
|
755
|
+
"version": self.enrichment_version,
|
|
756
|
+
"extract_todos": self.enrichment_extract_todos,
|
|
757
|
+
"extract_entities": self.enrichment_extract_entities,
|
|
758
|
+
"auto_create_todos": self.enrichment_auto_create_todos,
|
|
759
|
+
"auto_create_entities": self.enrichment_auto_create_entities,
|
|
760
|
+
"min_confidence_todo": self.enrichment_min_confidence_todo,
|
|
761
|
+
"min_confidence_entity": self.enrichment_min_confidence_entity,
|
|
762
|
+
"auto_enrich": self.enrichment_auto_enrich,
|
|
763
|
+
"consolidation": {
|
|
764
|
+
"cross_doc_min_mentions": self.consolidation_cross_doc_min_mentions,
|
|
765
|
+
"embedding_similarity_threshold": self.consolidation_embedding_similarity_threshold,
|
|
766
|
+
"auto_merge_threshold": self.consolidation_auto_merge_threshold,
|
|
767
|
+
"min_cluster_size": self.consolidation_min_cluster_size,
|
|
768
|
+
},
|
|
651
769
|
},
|
|
652
770
|
}
|
|
653
771
|
|