PyPI - okb - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.0a0__py3-none-any.whl - Mend

okb 1.1.0py3-none-any.whl → 1.1.0a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

okb/cli.py +16 -1083
okb/config.py +4 -122
okb/http_server.py +2 -163
okb/llm/providers.py +6 -9
okb/mcp_server.py +12 -1036
okb/modal_llm.py +8 -26
okb/plugins/sources/github.py +5 -5
okb/tokens.py +3 -25
{okb-1.1.0.dist-info → okb-1.1.0a0.dist-info}/METADATA +6 -83
{okb-1.1.0.dist-info → okb-1.1.0a0.dist-info}/RECORD +12 -24
okb/llm/analyze.py +0 -524
okb/llm/consolidate.py +0 -685
okb/llm/enrich.py +0 -723
okb/llm/extractors/__init__.py +0 -13
okb/llm/extractors/base.py +0 -44
okb/llm/extractors/cross_doc.py +0 -478
okb/llm/extractors/dedup.py +0 -499
okb/llm/extractors/entity.py +0 -369
okb/llm/extractors/todo.py +0 -149
okb/migrations/0008.enrichment.sql +0 -46
okb/migrations/0009.entity-consolidation.sql +0 -120
okb/migrations/0010.token-id.sql +0 -7
{okb-1.1.0.dist-info → okb-1.1.0a0.dist-info}/WHEEL +0 -0
{okb-1.1.0.dist-info → okb-1.1.0a0.dist-info}/entry_points.txt +0 -0

okb/config.py CHANGED Viewed

@@ -53,7 +53,7 @@ class DatabaseConfig:
     name: str
     url: str
-    managed: bool = True  # Whether okb manages this (Docker) or external
+    managed: bool = True  # Whether lkb manages this (Docker) or external
     default: bool = False
     description: str | None = None  # Human-readable description for LLM context
     topics: list[str] | None = None  # Topic keywords to help LLM route queries
@@ -259,7 +259,6 @@ DEFAULTS = {
             "yarn.lock",
             "uv.lock",
             "Cargo.lock",
-            "poetry.lock",
             "*.pyc",
             "*.pyo",
             "*.tmp",
@@ -282,7 +281,7 @@ DEFAULTS = {
     },
     "llm": {
         # LLM provider configuration
-        # provider: None = disabled, "claude" = Anthropic API, "modal" = Modal GPU
+        # provider: None = disabled, "claude" = Anthropic API
         "provider": None,
         "model": "claude-haiku-4-5-20251001",
         "timeout": 30,
@@ -290,38 +289,6 @@ DEFAULTS = {
         # Bedrock settings (when use_bedrock is True)
         "use_bedrock": False,
         "aws_region": "us-west-2",
-        # Modal settings (when provider is "modal")
-        "modal_gpu": "L4",  # GPU type: T4, L4, A10G, A100, etc.
-    },
-    "enrichment": {
-        # LLM-based document enrichment
-        "enabled": True,
-        "version": 1,  # Increment to force re-enrichment
-        # What to extract
-        "extract_todos": True,
-        "extract_entities": True,
-        # Auto-create behavior
-        "auto_create_todos": True,      # TODOs created immediately
-        "auto_create_entities": False,  # Entities go to pending_entities table
-        # Confidence thresholds
-        "min_confidence_todo": 0.7,
-        "min_confidence_entity": 0.8,
-        # Auto-enrich during ingest (per source type)
-        "auto_enrich": {
-            "markdown": True,
-            "org": True,
-            "text": True,
-            "code": False,      # Skip code files
-            "web": False,       # Skip web pages
-            "todoist-task": False,  # Already structured
-        },
-        # Entity consolidation settings
-        "consolidation": {
-            "cross_doc_min_mentions": 3,       # Min docs for cross-doc detection
-            "embedding_similarity_threshold": 0.85,  # For duplicate detection
-            "auto_merge_threshold": 0.95,      # Auto-approve above this
-            "min_cluster_size": 3,             # Min entities per cluster
-        },
     },
 }
@@ -382,30 +349,12 @@ class Config:
     llm_cache_responses: bool = True
     llm_use_bedrock: bool = False
     llm_aws_region: str = "us-west-2"
-    llm_modal_gpu: str = "L4"
-    # Enrichment settings (loaded from config in __post_init__)
-    enrichment_enabled: bool = True
-    enrichment_version: int = 1
-    enrichment_extract_todos: bool = True
-    enrichment_extract_entities: bool = True
-    enrichment_auto_create_todos: bool = True
-    enrichment_auto_create_entities: bool = False
-    enrichment_min_confidence_todo: float = 0.7
-    enrichment_min_confidence_entity: float = 0.8
-    enrichment_auto_enrich: dict[str, bool] = field(default_factory=dict)
-    # Consolidation settings (loaded from config in __post_init__)
-    consolidation_cross_doc_min_mentions: int = 3
-    consolidation_embedding_similarity_threshold: float = 0.85
-    consolidation_auto_merge_threshold: float = 0.95
-    consolidation_min_cluster_size: int = 3
     def __post_init__(self):
         """Load configuration from file and environment."""
         file_config = load_config_file()
-        # Load and merge local config overlay (.okbconf.yaml)
+        # Load and merge local config overlay (.lkbconf.yaml)
         local_path = find_local_config()
         local_default_db: str | None = None
         if local_path:
@@ -468,7 +417,7 @@ class Config:
         else:
             # Legacy: single database_url (env > file > default)
             legacy_url = os.environ.get(
-                "OKB_DATABASE_URL",
+                "KB_DATABASE_URL",
                 file_config.get("database_url", DEFAULTS["databases"]["default"]["url"]),
             )
             self.databases["default"] = DatabaseConfig(
@@ -586,55 +535,6 @@ class Config:
         )
         self.llm_use_bedrock = llm_cfg.get("use_bedrock", DEFAULTS["llm"]["use_bedrock"])
         self.llm_aws_region = llm_cfg.get("aws_region", DEFAULTS["llm"]["aws_region"])
-        self.llm_modal_gpu = os.environ.get(
-            "OKB_MODAL_GPU",
-            llm_cfg.get("modal_gpu", DEFAULTS["llm"]["modal_gpu"]),
-        )
-        # Enrichment settings
-        enrich_cfg = file_config.get("enrichment", {})
-        self.enrichment_enabled = enrich_cfg.get("enabled", DEFAULTS["enrichment"]["enabled"])
-        self.enrichment_version = enrich_cfg.get("version", DEFAULTS["enrichment"]["version"])
-        self.enrichment_extract_todos = enrich_cfg.get(
-            "extract_todos", DEFAULTS["enrichment"]["extract_todos"]
-        )
-        self.enrichment_extract_entities = enrich_cfg.get(
-            "extract_entities", DEFAULTS["enrichment"]["extract_entities"]
-        )
-        self.enrichment_auto_create_todos = enrich_cfg.get(
-            "auto_create_todos", DEFAULTS["enrichment"]["auto_create_todos"]
-        )
-        self.enrichment_auto_create_entities = enrich_cfg.get(
-            "auto_create_entities", DEFAULTS["enrichment"]["auto_create_entities"]
-        )
-        self.enrichment_min_confidence_todo = enrich_cfg.get(
-            "min_confidence_todo", DEFAULTS["enrichment"]["min_confidence_todo"]
-        )
-        self.enrichment_min_confidence_entity = enrich_cfg.get(
-            "min_confidence_entity", DEFAULTS["enrichment"]["min_confidence_entity"]
-        )
-        self.enrichment_auto_enrich = enrich_cfg.get(
-            "auto_enrich", DEFAULTS["enrichment"]["auto_enrich"]
-        )
-        # Consolidation settings
-        consolidation_cfg = enrich_cfg.get("consolidation", {})
-        self.consolidation_cross_doc_min_mentions = consolidation_cfg.get(
-            "cross_doc_min_mentions",
-            DEFAULTS["enrichment"]["consolidation"]["cross_doc_min_mentions"],
-        )
-        self.consolidation_embedding_similarity_threshold = consolidation_cfg.get(
-            "embedding_similarity_threshold",
-            DEFAULTS["enrichment"]["consolidation"]["embedding_similarity_threshold"],
-        )
-        self.consolidation_auto_merge_threshold = consolidation_cfg.get(
-            "auto_merge_threshold",
-            DEFAULTS["enrichment"]["consolidation"]["auto_merge_threshold"],
-        )
-        self.consolidation_min_cluster_size = consolidation_cfg.get(
-            "min_cluster_size",
-            DEFAULTS["enrichment"]["consolidation"]["min_cluster_size"],
-        )
     def get_database(self, name: str | None = None) -> DatabaseConfig:
         """Get database config by name, or default if None."""
@@ -748,24 +648,6 @@ class Config:
                 "cache_responses": self.llm_cache_responses,
                 "use_bedrock": self.llm_use_bedrock,
                 "aws_region": self.llm_aws_region,
-                "modal_gpu": self.llm_modal_gpu,
-            },
-            "enrichment": {
-                "enabled": self.enrichment_enabled,
-                "version": self.enrichment_version,
-                "extract_todos": self.enrichment_extract_todos,
-                "extract_entities": self.enrichment_extract_entities,
-                "auto_create_todos": self.enrichment_auto_create_todos,
-                "auto_create_entities": self.enrichment_auto_create_entities,
-                "min_confidence_todo": self.enrichment_min_confidence_todo,
-                "min_confidence_entity": self.enrichment_min_confidence_entity,
-                "auto_enrich": self.enrichment_auto_enrich,
-                "consolidation": {
-                    "cross_doc_min_mentions": self.consolidation_cross_doc_min_mentions,
-                    "embedding_similarity_threshold": self.consolidation_embedding_similarity_threshold,
-                    "auto_merge_threshold": self.consolidation_auto_merge_threshold,
-                    "min_cluster_size": self.consolidation_min_cluster_size,
-                },
             },
         }

okb/http_server.py CHANGED Viewed

@@ -37,15 +37,9 @@ READ_ONLY_TOOLS = frozenset(
         "get_document",
         "list_sources",
         "list_projects",
-        "list_documents_by_project",
         "recent_documents",
         "get_actionable_items",
         "get_database_info",
-        "list_sync_sources",
-        "list_pending_entities",
-        "list_pending_merges",
-        "get_topic_clusters",
-        "get_entity_relationships",
     }
 )
@@ -57,15 +51,6 @@ WRITE_TOOLS = frozenset(
         "add_todo",
         "trigger_sync",
         "trigger_rescan",
-        "enrich_document",
-        "approve_entity",
-        "reject_entity",
-        "analyze_knowledge_base",
-        "find_entity_duplicates",
-        "merge_entities",
-        "approve_merge",
-        "reject_merge",
-        "run_consolidation",
     }
 )
@@ -223,24 +208,6 @@ class HTTPMCPServer:
                     content=[TextContent(type="text", text=f"## Projects\n\n{project_list}")]
                 )
-            elif name == "list_documents_by_project":
-                project = arguments["project"]
-                limit = arguments.get("limit", 100)
-                docs = kb.list_documents_by_project(project, limit)
-                if not docs:
-                    return CallToolResult(
-                        content=[
-                            TextContent(
-                                type="text", text=f"No documents found for project '{project}'."
-                            )
-                        ]
-                    )
-                output = [f"## Documents in '{project}' ({len(docs)} documents)\n"]
-                for d in docs:
-                    output.append(f"- **{d['title'] or d['source_path']}** ({d['source_type']})")
-                    output.append(f"  - `{d['source_path']}`")
-                return CallToolResult(content=[TextContent(type="text", text="\n".join(output))])
             elif name == "recent_documents":
                 from .mcp_server import format_relative_time, get_document_date
@@ -298,13 +265,13 @@ class HTTPMCPServer:
                 deleted = kb.delete_knowledge(arguments["source_path"])
                 if deleted:
                     return CallToolResult(
-                        content=[TextContent(type="text", text="Document deleted.")]
+                        content=[TextContent(type="text", text="Knowledge entry deleted.")]
                     )
                 return CallToolResult(
                     content=[
                         TextContent(
                             type="text",
-                            text="Could not delete. Document not found.",
+                            text="Could not delete. Entry not found or not a Claude-saved entry.",
                         )
                     ]
                 )
@@ -427,134 +394,6 @@ class HTTPMCPServer:
                 )
                 return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "list_sync_sources":
-                from .mcp_server import _list_sync_sources
-                token_info = getattr(self.server, "_current_token_info", None)
-                db_name = token_info.database if token_info else config.get_database().name
-                result = _list_sync_sources(kb.db_url, db_name)
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "enrich_document":
-                from .mcp_server import _enrich_document
-                result = _enrich_document(
-                    kb.db_url,
-                    source_path=arguments["source_path"],
-                    extract_todos=arguments.get("extract_todos", True),
-                    extract_entities=arguments.get("extract_entities", True),
-                    auto_create_entities=arguments.get("auto_create_entities", False),
-                )
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "list_pending_entities":
-                from .mcp_server import _list_pending_entities
-                result = _list_pending_entities(
-                    kb.db_url,
-                    entity_type=arguments.get("entity_type"),
-                    limit=arguments.get("limit", 20),
-                )
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "approve_entity":
-                from .mcp_server import _approve_entity
-                result = _approve_entity(kb.db_url, arguments["pending_id"])
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "reject_entity":
-                from .mcp_server import _reject_entity
-                result = _reject_entity(kb.db_url, arguments["pending_id"])
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "analyze_knowledge_base":
-                from .mcp_server import _analyze_knowledge_base
-                result = _analyze_knowledge_base(
-                    kb.db_url,
-                    project=arguments.get("project"),
-                    sample_size=arguments.get("sample_size", 15),
-                    auto_update=arguments.get("auto_update", True),
-                )
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            # Entity consolidation tools
-            elif name == "find_entity_duplicates":
-                from .mcp_server import _find_entity_duplicates
-                result = _find_entity_duplicates(
-                    kb.db_url,
-                    similarity_threshold=arguments.get("similarity_threshold", 0.85),
-                    limit=arguments.get("limit", 50),
-                )
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "merge_entities":
-                from .mcp_server import _merge_entities
-                result = _merge_entities(
-                    kb.db_url,
-                    canonical_path=arguments["canonical_path"],
-                    duplicate_path=arguments["duplicate_path"],
-                )
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "list_pending_merges":
-                from .mcp_server import _list_pending_merges
-                result = _list_pending_merges(
-                    kb.db_url,
-                    limit=arguments.get("limit", 50),
-                )
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "approve_merge":
-                from .mcp_server import _approve_merge
-                result = _approve_merge(kb.db_url, arguments["merge_id"])
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "reject_merge":
-                from .mcp_server import _reject_merge
-                result = _reject_merge(kb.db_url, arguments["merge_id"])
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "get_topic_clusters":
-                from .mcp_server import _get_topic_clusters
-                result = _get_topic_clusters(
-                    kb.db_url,
-                    limit=arguments.get("limit", 20),
-                )
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "get_entity_relationships":
-                from .mcp_server import _get_entity_relationships
-                result = _get_entity_relationships(
-                    kb.db_url,
-                    entity_name=arguments.get("entity_name"),
-                    relationship_type=arguments.get("relationship_type"),
-                    limit=arguments.get("limit", 50),
-                )
-                return CallToolResult(content=[TextContent(type="text", text=result)])
-            elif name == "run_consolidation":
-                from .mcp_server import _run_consolidation
-                result = _run_consolidation(
-                    kb.db_url,
-                    detect_duplicates=arguments.get("detect_duplicates", True),
-                    detect_cross_doc=arguments.get("detect_cross_doc", True),
-                    build_clusters=arguments.get("build_clusters", True),
-                    extract_relationships=arguments.get("extract_relationships", True),
-                    dry_run=arguments.get("dry_run", False),
-                )
-                return CallToolResult(content=[TextContent(type="text", text=result)])
             else:
                 return CallToolResult(
                     content=[TextContent(type="text", text=f"Unknown tool: {name}")]

okb/llm/providers.py CHANGED Viewed

@@ -165,13 +165,13 @@ class ClaudeProvider:
 class ModalProvider:
-    """Modal-based LLM provider using open models (Phi-3, Llama, Mistral, etc.).
+    """Modal-based LLM provider using open models (Llama, Mistral, etc.).
     Runs on Modal GPU infrastructure - no API key needed, pay per compute.
-    Requires deploying the Modal app first: `okb llm deploy`
+    Requires deploying the Modal app first: `modal deploy lkb/modal_llm.py`
     Config:
-        model: Model name (default: microsoft/Phi-3-mini-4k-instruct)
+        model: Model name (default: meta-llama/Llama-3.2-3B-Instruct)
         timeout: Request timeout in seconds (default: 60)
     """
@@ -179,7 +179,7 @@ class ModalProvider:
     def __init__(self) -> None:
         self._llm = None
-        self._model: str = "microsoft/Phi-3-mini-4k-instruct"
+        self._model: str = "meta-llama/Llama-3.2-3B-Instruct"
         self._timeout: int = 60
     def configure(self, config: dict) -> None:
@@ -202,7 +202,7 @@ class ModalProvider:
             self._llm = modal.Cls.from_name("knowledge-llm", "LLM")()
         except modal.exception.NotFoundError:
             raise RuntimeError(
-                "Modal LLM app not deployed. Deploy with: okb llm deploy"
+                "Modal LLM app not deployed. Deploy with: modal deploy lkb/modal_llm.py"
             )
     def complete(
@@ -244,12 +244,9 @@ class ModalProvider:
     def list_models(self) -> list[str]:
         """List recommended models for Modal."""
         return [
-            # Non-gated (work immediately)
-            "microsoft/Phi-3-mini-4k-instruct",
-            "Qwen/Qwen2-1.5B-Instruct",
-            # Gated (require HuggingFace approval + HF_TOKEN)
             "meta-llama/Llama-3.2-3B-Instruct",
             "meta-llama/Llama-3.2-1B-Instruct",
+            "mistralai/Mistral-7B-Instruct-v0.3",
         ]

okb 1.1.0__py3-none-any.whl → 1.1.0a0__py3-none-any.whl

okb 1.1.0py3-none-any.whl → 1.1.0a0py3-none-any.whl