okb 1.1.0__py3-none-any.whl → 1.1.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
okb/config.py CHANGED
@@ -53,7 +53,7 @@ class DatabaseConfig:
53
53
 
54
54
  name: str
55
55
  url: str
56
- managed: bool = True # Whether okb manages this (Docker) or external
56
+ managed: bool = True # Whether lkb manages this (Docker) or external
57
57
  default: bool = False
58
58
  description: str | None = None # Human-readable description for LLM context
59
59
  topics: list[str] | None = None # Topic keywords to help LLM route queries
@@ -259,7 +259,6 @@ DEFAULTS = {
259
259
  "yarn.lock",
260
260
  "uv.lock",
261
261
  "Cargo.lock",
262
- "poetry.lock",
263
262
  "*.pyc",
264
263
  "*.pyo",
265
264
  "*.tmp",
@@ -282,7 +281,7 @@ DEFAULTS = {
282
281
  },
283
282
  "llm": {
284
283
  # LLM provider configuration
285
- # provider: None = disabled, "claude" = Anthropic API, "modal" = Modal GPU
284
+ # provider: None = disabled, "claude" = Anthropic API
286
285
  "provider": None,
287
286
  "model": "claude-haiku-4-5-20251001",
288
287
  "timeout": 30,
@@ -290,38 +289,6 @@ DEFAULTS = {
290
289
  # Bedrock settings (when use_bedrock is True)
291
290
  "use_bedrock": False,
292
291
  "aws_region": "us-west-2",
293
- # Modal settings (when provider is "modal")
294
- "modal_gpu": "L4", # GPU type: T4, L4, A10G, A100, etc.
295
- },
296
- "enrichment": {
297
- # LLM-based document enrichment
298
- "enabled": True,
299
- "version": 1, # Increment to force re-enrichment
300
- # What to extract
301
- "extract_todos": True,
302
- "extract_entities": True,
303
- # Auto-create behavior
304
- "auto_create_todos": True, # TODOs created immediately
305
- "auto_create_entities": False, # Entities go to pending_entities table
306
- # Confidence thresholds
307
- "min_confidence_todo": 0.7,
308
- "min_confidence_entity": 0.8,
309
- # Auto-enrich during ingest (per source type)
310
- "auto_enrich": {
311
- "markdown": True,
312
- "org": True,
313
- "text": True,
314
- "code": False, # Skip code files
315
- "web": False, # Skip web pages
316
- "todoist-task": False, # Already structured
317
- },
318
- # Entity consolidation settings
319
- "consolidation": {
320
- "cross_doc_min_mentions": 3, # Min docs for cross-doc detection
321
- "embedding_similarity_threshold": 0.85, # For duplicate detection
322
- "auto_merge_threshold": 0.95, # Auto-approve above this
323
- "min_cluster_size": 3, # Min entities per cluster
324
- },
325
292
  },
326
293
  }
327
294
 
@@ -382,30 +349,12 @@ class Config:
382
349
  llm_cache_responses: bool = True
383
350
  llm_use_bedrock: bool = False
384
351
  llm_aws_region: str = "us-west-2"
385
- llm_modal_gpu: str = "L4"
386
-
387
- # Enrichment settings (loaded from config in __post_init__)
388
- enrichment_enabled: bool = True
389
- enrichment_version: int = 1
390
- enrichment_extract_todos: bool = True
391
- enrichment_extract_entities: bool = True
392
- enrichment_auto_create_todos: bool = True
393
- enrichment_auto_create_entities: bool = False
394
- enrichment_min_confidence_todo: float = 0.7
395
- enrichment_min_confidence_entity: float = 0.8
396
- enrichment_auto_enrich: dict[str, bool] = field(default_factory=dict)
397
-
398
- # Consolidation settings (loaded from config in __post_init__)
399
- consolidation_cross_doc_min_mentions: int = 3
400
- consolidation_embedding_similarity_threshold: float = 0.85
401
- consolidation_auto_merge_threshold: float = 0.95
402
- consolidation_min_cluster_size: int = 3
403
352
 
404
353
  def __post_init__(self):
405
354
  """Load configuration from file and environment."""
406
355
  file_config = load_config_file()
407
356
 
408
- # Load and merge local config overlay (.okbconf.yaml)
357
+ # Load and merge local config overlay (.lkbconf.yaml)
409
358
  local_path = find_local_config()
410
359
  local_default_db: str | None = None
411
360
  if local_path:
@@ -468,7 +417,7 @@ class Config:
468
417
  else:
469
418
  # Legacy: single database_url (env > file > default)
470
419
  legacy_url = os.environ.get(
471
- "OKB_DATABASE_URL",
420
+ "KB_DATABASE_URL",
472
421
  file_config.get("database_url", DEFAULTS["databases"]["default"]["url"]),
473
422
  )
474
423
  self.databases["default"] = DatabaseConfig(
@@ -586,55 +535,6 @@ class Config:
586
535
  )
587
536
  self.llm_use_bedrock = llm_cfg.get("use_bedrock", DEFAULTS["llm"]["use_bedrock"])
588
537
  self.llm_aws_region = llm_cfg.get("aws_region", DEFAULTS["llm"]["aws_region"])
589
- self.llm_modal_gpu = os.environ.get(
590
- "OKB_MODAL_GPU",
591
- llm_cfg.get("modal_gpu", DEFAULTS["llm"]["modal_gpu"]),
592
- )
593
-
594
- # Enrichment settings
595
- enrich_cfg = file_config.get("enrichment", {})
596
- self.enrichment_enabled = enrich_cfg.get("enabled", DEFAULTS["enrichment"]["enabled"])
597
- self.enrichment_version = enrich_cfg.get("version", DEFAULTS["enrichment"]["version"])
598
- self.enrichment_extract_todos = enrich_cfg.get(
599
- "extract_todos", DEFAULTS["enrichment"]["extract_todos"]
600
- )
601
- self.enrichment_extract_entities = enrich_cfg.get(
602
- "extract_entities", DEFAULTS["enrichment"]["extract_entities"]
603
- )
604
- self.enrichment_auto_create_todos = enrich_cfg.get(
605
- "auto_create_todos", DEFAULTS["enrichment"]["auto_create_todos"]
606
- )
607
- self.enrichment_auto_create_entities = enrich_cfg.get(
608
- "auto_create_entities", DEFAULTS["enrichment"]["auto_create_entities"]
609
- )
610
- self.enrichment_min_confidence_todo = enrich_cfg.get(
611
- "min_confidence_todo", DEFAULTS["enrichment"]["min_confidence_todo"]
612
- )
613
- self.enrichment_min_confidence_entity = enrich_cfg.get(
614
- "min_confidence_entity", DEFAULTS["enrichment"]["min_confidence_entity"]
615
- )
616
- self.enrichment_auto_enrich = enrich_cfg.get(
617
- "auto_enrich", DEFAULTS["enrichment"]["auto_enrich"]
618
- )
619
-
620
- # Consolidation settings
621
- consolidation_cfg = enrich_cfg.get("consolidation", {})
622
- self.consolidation_cross_doc_min_mentions = consolidation_cfg.get(
623
- "cross_doc_min_mentions",
624
- DEFAULTS["enrichment"]["consolidation"]["cross_doc_min_mentions"],
625
- )
626
- self.consolidation_embedding_similarity_threshold = consolidation_cfg.get(
627
- "embedding_similarity_threshold",
628
- DEFAULTS["enrichment"]["consolidation"]["embedding_similarity_threshold"],
629
- )
630
- self.consolidation_auto_merge_threshold = consolidation_cfg.get(
631
- "auto_merge_threshold",
632
- DEFAULTS["enrichment"]["consolidation"]["auto_merge_threshold"],
633
- )
634
- self.consolidation_min_cluster_size = consolidation_cfg.get(
635
- "min_cluster_size",
636
- DEFAULTS["enrichment"]["consolidation"]["min_cluster_size"],
637
- )
638
538
 
639
539
  def get_database(self, name: str | None = None) -> DatabaseConfig:
640
540
  """Get database config by name, or default if None."""
@@ -748,24 +648,6 @@ class Config:
748
648
  "cache_responses": self.llm_cache_responses,
749
649
  "use_bedrock": self.llm_use_bedrock,
750
650
  "aws_region": self.llm_aws_region,
751
- "modal_gpu": self.llm_modal_gpu,
752
- },
753
- "enrichment": {
754
- "enabled": self.enrichment_enabled,
755
- "version": self.enrichment_version,
756
- "extract_todos": self.enrichment_extract_todos,
757
- "extract_entities": self.enrichment_extract_entities,
758
- "auto_create_todos": self.enrichment_auto_create_todos,
759
- "auto_create_entities": self.enrichment_auto_create_entities,
760
- "min_confidence_todo": self.enrichment_min_confidence_todo,
761
- "min_confidence_entity": self.enrichment_min_confidence_entity,
762
- "auto_enrich": self.enrichment_auto_enrich,
763
- "consolidation": {
764
- "cross_doc_min_mentions": self.consolidation_cross_doc_min_mentions,
765
- "embedding_similarity_threshold": self.consolidation_embedding_similarity_threshold,
766
- "auto_merge_threshold": self.consolidation_auto_merge_threshold,
767
- "min_cluster_size": self.consolidation_min_cluster_size,
768
- },
769
651
  },
770
652
  }
771
653
 
okb/http_server.py CHANGED
@@ -37,15 +37,9 @@ READ_ONLY_TOOLS = frozenset(
37
37
  "get_document",
38
38
  "list_sources",
39
39
  "list_projects",
40
- "list_documents_by_project",
41
40
  "recent_documents",
42
41
  "get_actionable_items",
43
42
  "get_database_info",
44
- "list_sync_sources",
45
- "list_pending_entities",
46
- "list_pending_merges",
47
- "get_topic_clusters",
48
- "get_entity_relationships",
49
43
  }
50
44
  )
51
45
 
@@ -57,15 +51,6 @@ WRITE_TOOLS = frozenset(
57
51
  "add_todo",
58
52
  "trigger_sync",
59
53
  "trigger_rescan",
60
- "enrich_document",
61
- "approve_entity",
62
- "reject_entity",
63
- "analyze_knowledge_base",
64
- "find_entity_duplicates",
65
- "merge_entities",
66
- "approve_merge",
67
- "reject_merge",
68
- "run_consolidation",
69
54
  }
70
55
  )
71
56
 
@@ -223,24 +208,6 @@ class HTTPMCPServer:
223
208
  content=[TextContent(type="text", text=f"## Projects\n\n{project_list}")]
224
209
  )
225
210
 
226
- elif name == "list_documents_by_project":
227
- project = arguments["project"]
228
- limit = arguments.get("limit", 100)
229
- docs = kb.list_documents_by_project(project, limit)
230
- if not docs:
231
- return CallToolResult(
232
- content=[
233
- TextContent(
234
- type="text", text=f"No documents found for project '{project}'."
235
- )
236
- ]
237
- )
238
- output = [f"## Documents in '{project}' ({len(docs)} documents)\n"]
239
- for d in docs:
240
- output.append(f"- **{d['title'] or d['source_path']}** ({d['source_type']})")
241
- output.append(f" - `{d['source_path']}`")
242
- return CallToolResult(content=[TextContent(type="text", text="\n".join(output))])
243
-
244
211
  elif name == "recent_documents":
245
212
  from .mcp_server import format_relative_time, get_document_date
246
213
 
@@ -298,13 +265,13 @@ class HTTPMCPServer:
298
265
  deleted = kb.delete_knowledge(arguments["source_path"])
299
266
  if deleted:
300
267
  return CallToolResult(
301
- content=[TextContent(type="text", text="Document deleted.")]
268
+ content=[TextContent(type="text", text="Knowledge entry deleted.")]
302
269
  )
303
270
  return CallToolResult(
304
271
  content=[
305
272
  TextContent(
306
273
  type="text",
307
- text="Could not delete. Document not found.",
274
+ text="Could not delete. Entry not found or not a Claude-saved entry.",
308
275
  )
309
276
  ]
310
277
  )
@@ -427,134 +394,6 @@ class HTTPMCPServer:
427
394
  )
428
395
  return CallToolResult(content=[TextContent(type="text", text=result)])
429
396
 
430
- elif name == "list_sync_sources":
431
- from .mcp_server import _list_sync_sources
432
-
433
- token_info = getattr(self.server, "_current_token_info", None)
434
- db_name = token_info.database if token_info else config.get_database().name
435
- result = _list_sync_sources(kb.db_url, db_name)
436
- return CallToolResult(content=[TextContent(type="text", text=result)])
437
-
438
- elif name == "enrich_document":
439
- from .mcp_server import _enrich_document
440
-
441
- result = _enrich_document(
442
- kb.db_url,
443
- source_path=arguments["source_path"],
444
- extract_todos=arguments.get("extract_todos", True),
445
- extract_entities=arguments.get("extract_entities", True),
446
- auto_create_entities=arguments.get("auto_create_entities", False),
447
- )
448
- return CallToolResult(content=[TextContent(type="text", text=result)])
449
-
450
- elif name == "list_pending_entities":
451
- from .mcp_server import _list_pending_entities
452
-
453
- result = _list_pending_entities(
454
- kb.db_url,
455
- entity_type=arguments.get("entity_type"),
456
- limit=arguments.get("limit", 20),
457
- )
458
- return CallToolResult(content=[TextContent(type="text", text=result)])
459
-
460
- elif name == "approve_entity":
461
- from .mcp_server import _approve_entity
462
-
463
- result = _approve_entity(kb.db_url, arguments["pending_id"])
464
- return CallToolResult(content=[TextContent(type="text", text=result)])
465
-
466
- elif name == "reject_entity":
467
- from .mcp_server import _reject_entity
468
-
469
- result = _reject_entity(kb.db_url, arguments["pending_id"])
470
- return CallToolResult(content=[TextContent(type="text", text=result)])
471
-
472
- elif name == "analyze_knowledge_base":
473
- from .mcp_server import _analyze_knowledge_base
474
-
475
- result = _analyze_knowledge_base(
476
- kb.db_url,
477
- project=arguments.get("project"),
478
- sample_size=arguments.get("sample_size", 15),
479
- auto_update=arguments.get("auto_update", True),
480
- )
481
- return CallToolResult(content=[TextContent(type="text", text=result)])
482
-
483
- # Entity consolidation tools
484
- elif name == "find_entity_duplicates":
485
- from .mcp_server import _find_entity_duplicates
486
-
487
- result = _find_entity_duplicates(
488
- kb.db_url,
489
- similarity_threshold=arguments.get("similarity_threshold", 0.85),
490
- limit=arguments.get("limit", 50),
491
- )
492
- return CallToolResult(content=[TextContent(type="text", text=result)])
493
-
494
- elif name == "merge_entities":
495
- from .mcp_server import _merge_entities
496
-
497
- result = _merge_entities(
498
- kb.db_url,
499
- canonical_path=arguments["canonical_path"],
500
- duplicate_path=arguments["duplicate_path"],
501
- )
502
- return CallToolResult(content=[TextContent(type="text", text=result)])
503
-
504
- elif name == "list_pending_merges":
505
- from .mcp_server import _list_pending_merges
506
-
507
- result = _list_pending_merges(
508
- kb.db_url,
509
- limit=arguments.get("limit", 50),
510
- )
511
- return CallToolResult(content=[TextContent(type="text", text=result)])
512
-
513
- elif name == "approve_merge":
514
- from .mcp_server import _approve_merge
515
-
516
- result = _approve_merge(kb.db_url, arguments["merge_id"])
517
- return CallToolResult(content=[TextContent(type="text", text=result)])
518
-
519
- elif name == "reject_merge":
520
- from .mcp_server import _reject_merge
521
-
522
- result = _reject_merge(kb.db_url, arguments["merge_id"])
523
- return CallToolResult(content=[TextContent(type="text", text=result)])
524
-
525
- elif name == "get_topic_clusters":
526
- from .mcp_server import _get_topic_clusters
527
-
528
- result = _get_topic_clusters(
529
- kb.db_url,
530
- limit=arguments.get("limit", 20),
531
- )
532
- return CallToolResult(content=[TextContent(type="text", text=result)])
533
-
534
- elif name == "get_entity_relationships":
535
- from .mcp_server import _get_entity_relationships
536
-
537
- result = _get_entity_relationships(
538
- kb.db_url,
539
- entity_name=arguments.get("entity_name"),
540
- relationship_type=arguments.get("relationship_type"),
541
- limit=arguments.get("limit", 50),
542
- )
543
- return CallToolResult(content=[TextContent(type="text", text=result)])
544
-
545
- elif name == "run_consolidation":
546
- from .mcp_server import _run_consolidation
547
-
548
- result = _run_consolidation(
549
- kb.db_url,
550
- detect_duplicates=arguments.get("detect_duplicates", True),
551
- detect_cross_doc=arguments.get("detect_cross_doc", True),
552
- build_clusters=arguments.get("build_clusters", True),
553
- extract_relationships=arguments.get("extract_relationships", True),
554
- dry_run=arguments.get("dry_run", False),
555
- )
556
- return CallToolResult(content=[TextContent(type="text", text=result)])
557
-
558
397
  else:
559
398
  return CallToolResult(
560
399
  content=[TextContent(type="text", text=f"Unknown tool: {name}")]
okb/llm/providers.py CHANGED
@@ -165,13 +165,13 @@ class ClaudeProvider:
165
165
 
166
166
 
167
167
  class ModalProvider:
168
- """Modal-based LLM provider using open models (Phi-3, Llama, Mistral, etc.).
168
+ """Modal-based LLM provider using open models (Llama, Mistral, etc.).
169
169
 
170
170
  Runs on Modal GPU infrastructure - no API key needed, pay per compute.
171
- Requires deploying the Modal app first: `okb llm deploy`
171
+ Requires deploying the Modal app first: `modal deploy lkb/modal_llm.py`
172
172
 
173
173
  Config:
174
- model: Model name (default: microsoft/Phi-3-mini-4k-instruct)
174
+ model: Model name (default: meta-llama/Llama-3.2-3B-Instruct)
175
175
  timeout: Request timeout in seconds (default: 60)
176
176
  """
177
177
 
@@ -179,7 +179,7 @@ class ModalProvider:
179
179
 
180
180
  def __init__(self) -> None:
181
181
  self._llm = None
182
- self._model: str = "microsoft/Phi-3-mini-4k-instruct"
182
+ self._model: str = "meta-llama/Llama-3.2-3B-Instruct"
183
183
  self._timeout: int = 60
184
184
 
185
185
  def configure(self, config: dict) -> None:
@@ -202,7 +202,7 @@ class ModalProvider:
202
202
  self._llm = modal.Cls.from_name("knowledge-llm", "LLM")()
203
203
  except modal.exception.NotFoundError:
204
204
  raise RuntimeError(
205
- "Modal LLM app not deployed. Deploy with: okb llm deploy"
205
+ "Modal LLM app not deployed. Deploy with: modal deploy lkb/modal_llm.py"
206
206
  )
207
207
 
208
208
  def complete(
@@ -244,12 +244,9 @@ class ModalProvider:
244
244
  def list_models(self) -> list[str]:
245
245
  """List recommended models for Modal."""
246
246
  return [
247
- # Non-gated (work immediately)
248
- "microsoft/Phi-3-mini-4k-instruct",
249
- "Qwen/Qwen2-1.5B-Instruct",
250
- # Gated (require HuggingFace approval + HF_TOKEN)
251
247
  "meta-llama/Llama-3.2-3B-Instruct",
252
248
  "meta-llama/Llama-3.2-1B-Instruct",
249
+ "mistralai/Mistral-7B-Instruct-v0.3",
253
250
  ]
254
251
 
255
252