okb 1.1.0__py3-none-any.whl → 1.1.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- okb/cli.py +16 -1083
- okb/config.py +4 -122
- okb/http_server.py +2 -163
- okb/llm/providers.py +6 -9
- okb/mcp_server.py +12 -1036
- okb/modal_llm.py +8 -26
- okb/plugins/sources/github.py +5 -5
- okb/tokens.py +3 -25
- {okb-1.1.0.dist-info → okb-1.1.0a0.dist-info}/METADATA +6 -83
- {okb-1.1.0.dist-info → okb-1.1.0a0.dist-info}/RECORD +12 -24
- okb/llm/analyze.py +0 -524
- okb/llm/consolidate.py +0 -685
- okb/llm/enrich.py +0 -723
- okb/llm/extractors/__init__.py +0 -13
- okb/llm/extractors/base.py +0 -44
- okb/llm/extractors/cross_doc.py +0 -478
- okb/llm/extractors/dedup.py +0 -499
- okb/llm/extractors/entity.py +0 -369
- okb/llm/extractors/todo.py +0 -149
- okb/migrations/0008.enrichment.sql +0 -46
- okb/migrations/0009.entity-consolidation.sql +0 -120
- okb/migrations/0010.token-id.sql +0 -7
- {okb-1.1.0.dist-info → okb-1.1.0a0.dist-info}/WHEEL +0 -0
- {okb-1.1.0.dist-info → okb-1.1.0a0.dist-info}/entry_points.txt +0 -0
okb/config.py
CHANGED
|
@@ -53,7 +53,7 @@ class DatabaseConfig:
|
|
|
53
53
|
|
|
54
54
|
name: str
|
|
55
55
|
url: str
|
|
56
|
-
managed: bool = True # Whether
|
|
56
|
+
managed: bool = True # Whether lkb manages this (Docker) or external
|
|
57
57
|
default: bool = False
|
|
58
58
|
description: str | None = None # Human-readable description for LLM context
|
|
59
59
|
topics: list[str] | None = None # Topic keywords to help LLM route queries
|
|
@@ -259,7 +259,6 @@ DEFAULTS = {
|
|
|
259
259
|
"yarn.lock",
|
|
260
260
|
"uv.lock",
|
|
261
261
|
"Cargo.lock",
|
|
262
|
-
"poetry.lock",
|
|
263
262
|
"*.pyc",
|
|
264
263
|
"*.pyo",
|
|
265
264
|
"*.tmp",
|
|
@@ -282,7 +281,7 @@ DEFAULTS = {
|
|
|
282
281
|
},
|
|
283
282
|
"llm": {
|
|
284
283
|
# LLM provider configuration
|
|
285
|
-
# provider: None = disabled, "claude" = Anthropic API
|
|
284
|
+
# provider: None = disabled, "claude" = Anthropic API
|
|
286
285
|
"provider": None,
|
|
287
286
|
"model": "claude-haiku-4-5-20251001",
|
|
288
287
|
"timeout": 30,
|
|
@@ -290,38 +289,6 @@ DEFAULTS = {
|
|
|
290
289
|
# Bedrock settings (when use_bedrock is True)
|
|
291
290
|
"use_bedrock": False,
|
|
292
291
|
"aws_region": "us-west-2",
|
|
293
|
-
# Modal settings (when provider is "modal")
|
|
294
|
-
"modal_gpu": "L4", # GPU type: T4, L4, A10G, A100, etc.
|
|
295
|
-
},
|
|
296
|
-
"enrichment": {
|
|
297
|
-
# LLM-based document enrichment
|
|
298
|
-
"enabled": True,
|
|
299
|
-
"version": 1, # Increment to force re-enrichment
|
|
300
|
-
# What to extract
|
|
301
|
-
"extract_todos": True,
|
|
302
|
-
"extract_entities": True,
|
|
303
|
-
# Auto-create behavior
|
|
304
|
-
"auto_create_todos": True, # TODOs created immediately
|
|
305
|
-
"auto_create_entities": False, # Entities go to pending_entities table
|
|
306
|
-
# Confidence thresholds
|
|
307
|
-
"min_confidence_todo": 0.7,
|
|
308
|
-
"min_confidence_entity": 0.8,
|
|
309
|
-
# Auto-enrich during ingest (per source type)
|
|
310
|
-
"auto_enrich": {
|
|
311
|
-
"markdown": True,
|
|
312
|
-
"org": True,
|
|
313
|
-
"text": True,
|
|
314
|
-
"code": False, # Skip code files
|
|
315
|
-
"web": False, # Skip web pages
|
|
316
|
-
"todoist-task": False, # Already structured
|
|
317
|
-
},
|
|
318
|
-
# Entity consolidation settings
|
|
319
|
-
"consolidation": {
|
|
320
|
-
"cross_doc_min_mentions": 3, # Min docs for cross-doc detection
|
|
321
|
-
"embedding_similarity_threshold": 0.85, # For duplicate detection
|
|
322
|
-
"auto_merge_threshold": 0.95, # Auto-approve above this
|
|
323
|
-
"min_cluster_size": 3, # Min entities per cluster
|
|
324
|
-
},
|
|
325
292
|
},
|
|
326
293
|
}
|
|
327
294
|
|
|
@@ -382,30 +349,12 @@ class Config:
|
|
|
382
349
|
llm_cache_responses: bool = True
|
|
383
350
|
llm_use_bedrock: bool = False
|
|
384
351
|
llm_aws_region: str = "us-west-2"
|
|
385
|
-
llm_modal_gpu: str = "L4"
|
|
386
|
-
|
|
387
|
-
# Enrichment settings (loaded from config in __post_init__)
|
|
388
|
-
enrichment_enabled: bool = True
|
|
389
|
-
enrichment_version: int = 1
|
|
390
|
-
enrichment_extract_todos: bool = True
|
|
391
|
-
enrichment_extract_entities: bool = True
|
|
392
|
-
enrichment_auto_create_todos: bool = True
|
|
393
|
-
enrichment_auto_create_entities: bool = False
|
|
394
|
-
enrichment_min_confidence_todo: float = 0.7
|
|
395
|
-
enrichment_min_confidence_entity: float = 0.8
|
|
396
|
-
enrichment_auto_enrich: dict[str, bool] = field(default_factory=dict)
|
|
397
|
-
|
|
398
|
-
# Consolidation settings (loaded from config in __post_init__)
|
|
399
|
-
consolidation_cross_doc_min_mentions: int = 3
|
|
400
|
-
consolidation_embedding_similarity_threshold: float = 0.85
|
|
401
|
-
consolidation_auto_merge_threshold: float = 0.95
|
|
402
|
-
consolidation_min_cluster_size: int = 3
|
|
403
352
|
|
|
404
353
|
def __post_init__(self):
|
|
405
354
|
"""Load configuration from file and environment."""
|
|
406
355
|
file_config = load_config_file()
|
|
407
356
|
|
|
408
|
-
# Load and merge local config overlay (.
|
|
357
|
+
# Load and merge local config overlay (.lkbconf.yaml)
|
|
409
358
|
local_path = find_local_config()
|
|
410
359
|
local_default_db: str | None = None
|
|
411
360
|
if local_path:
|
|
@@ -468,7 +417,7 @@ class Config:
|
|
|
468
417
|
else:
|
|
469
418
|
# Legacy: single database_url (env > file > default)
|
|
470
419
|
legacy_url = os.environ.get(
|
|
471
|
-
"
|
|
420
|
+
"KB_DATABASE_URL",
|
|
472
421
|
file_config.get("database_url", DEFAULTS["databases"]["default"]["url"]),
|
|
473
422
|
)
|
|
474
423
|
self.databases["default"] = DatabaseConfig(
|
|
@@ -586,55 +535,6 @@ class Config:
|
|
|
586
535
|
)
|
|
587
536
|
self.llm_use_bedrock = llm_cfg.get("use_bedrock", DEFAULTS["llm"]["use_bedrock"])
|
|
588
537
|
self.llm_aws_region = llm_cfg.get("aws_region", DEFAULTS["llm"]["aws_region"])
|
|
589
|
-
self.llm_modal_gpu = os.environ.get(
|
|
590
|
-
"OKB_MODAL_GPU",
|
|
591
|
-
llm_cfg.get("modal_gpu", DEFAULTS["llm"]["modal_gpu"]),
|
|
592
|
-
)
|
|
593
|
-
|
|
594
|
-
# Enrichment settings
|
|
595
|
-
enrich_cfg = file_config.get("enrichment", {})
|
|
596
|
-
self.enrichment_enabled = enrich_cfg.get("enabled", DEFAULTS["enrichment"]["enabled"])
|
|
597
|
-
self.enrichment_version = enrich_cfg.get("version", DEFAULTS["enrichment"]["version"])
|
|
598
|
-
self.enrichment_extract_todos = enrich_cfg.get(
|
|
599
|
-
"extract_todos", DEFAULTS["enrichment"]["extract_todos"]
|
|
600
|
-
)
|
|
601
|
-
self.enrichment_extract_entities = enrich_cfg.get(
|
|
602
|
-
"extract_entities", DEFAULTS["enrichment"]["extract_entities"]
|
|
603
|
-
)
|
|
604
|
-
self.enrichment_auto_create_todos = enrich_cfg.get(
|
|
605
|
-
"auto_create_todos", DEFAULTS["enrichment"]["auto_create_todos"]
|
|
606
|
-
)
|
|
607
|
-
self.enrichment_auto_create_entities = enrich_cfg.get(
|
|
608
|
-
"auto_create_entities", DEFAULTS["enrichment"]["auto_create_entities"]
|
|
609
|
-
)
|
|
610
|
-
self.enrichment_min_confidence_todo = enrich_cfg.get(
|
|
611
|
-
"min_confidence_todo", DEFAULTS["enrichment"]["min_confidence_todo"]
|
|
612
|
-
)
|
|
613
|
-
self.enrichment_min_confidence_entity = enrich_cfg.get(
|
|
614
|
-
"min_confidence_entity", DEFAULTS["enrichment"]["min_confidence_entity"]
|
|
615
|
-
)
|
|
616
|
-
self.enrichment_auto_enrich = enrich_cfg.get(
|
|
617
|
-
"auto_enrich", DEFAULTS["enrichment"]["auto_enrich"]
|
|
618
|
-
)
|
|
619
|
-
|
|
620
|
-
# Consolidation settings
|
|
621
|
-
consolidation_cfg = enrich_cfg.get("consolidation", {})
|
|
622
|
-
self.consolidation_cross_doc_min_mentions = consolidation_cfg.get(
|
|
623
|
-
"cross_doc_min_mentions",
|
|
624
|
-
DEFAULTS["enrichment"]["consolidation"]["cross_doc_min_mentions"],
|
|
625
|
-
)
|
|
626
|
-
self.consolidation_embedding_similarity_threshold = consolidation_cfg.get(
|
|
627
|
-
"embedding_similarity_threshold",
|
|
628
|
-
DEFAULTS["enrichment"]["consolidation"]["embedding_similarity_threshold"],
|
|
629
|
-
)
|
|
630
|
-
self.consolidation_auto_merge_threshold = consolidation_cfg.get(
|
|
631
|
-
"auto_merge_threshold",
|
|
632
|
-
DEFAULTS["enrichment"]["consolidation"]["auto_merge_threshold"],
|
|
633
|
-
)
|
|
634
|
-
self.consolidation_min_cluster_size = consolidation_cfg.get(
|
|
635
|
-
"min_cluster_size",
|
|
636
|
-
DEFAULTS["enrichment"]["consolidation"]["min_cluster_size"],
|
|
637
|
-
)
|
|
638
538
|
|
|
639
539
|
def get_database(self, name: str | None = None) -> DatabaseConfig:
|
|
640
540
|
"""Get database config by name, or default if None."""
|
|
@@ -748,24 +648,6 @@ class Config:
|
|
|
748
648
|
"cache_responses": self.llm_cache_responses,
|
|
749
649
|
"use_bedrock": self.llm_use_bedrock,
|
|
750
650
|
"aws_region": self.llm_aws_region,
|
|
751
|
-
"modal_gpu": self.llm_modal_gpu,
|
|
752
|
-
},
|
|
753
|
-
"enrichment": {
|
|
754
|
-
"enabled": self.enrichment_enabled,
|
|
755
|
-
"version": self.enrichment_version,
|
|
756
|
-
"extract_todos": self.enrichment_extract_todos,
|
|
757
|
-
"extract_entities": self.enrichment_extract_entities,
|
|
758
|
-
"auto_create_todos": self.enrichment_auto_create_todos,
|
|
759
|
-
"auto_create_entities": self.enrichment_auto_create_entities,
|
|
760
|
-
"min_confidence_todo": self.enrichment_min_confidence_todo,
|
|
761
|
-
"min_confidence_entity": self.enrichment_min_confidence_entity,
|
|
762
|
-
"auto_enrich": self.enrichment_auto_enrich,
|
|
763
|
-
"consolidation": {
|
|
764
|
-
"cross_doc_min_mentions": self.consolidation_cross_doc_min_mentions,
|
|
765
|
-
"embedding_similarity_threshold": self.consolidation_embedding_similarity_threshold,
|
|
766
|
-
"auto_merge_threshold": self.consolidation_auto_merge_threshold,
|
|
767
|
-
"min_cluster_size": self.consolidation_min_cluster_size,
|
|
768
|
-
},
|
|
769
651
|
},
|
|
770
652
|
}
|
|
771
653
|
|
okb/http_server.py
CHANGED
|
@@ -37,15 +37,9 @@ READ_ONLY_TOOLS = frozenset(
|
|
|
37
37
|
"get_document",
|
|
38
38
|
"list_sources",
|
|
39
39
|
"list_projects",
|
|
40
|
-
"list_documents_by_project",
|
|
41
40
|
"recent_documents",
|
|
42
41
|
"get_actionable_items",
|
|
43
42
|
"get_database_info",
|
|
44
|
-
"list_sync_sources",
|
|
45
|
-
"list_pending_entities",
|
|
46
|
-
"list_pending_merges",
|
|
47
|
-
"get_topic_clusters",
|
|
48
|
-
"get_entity_relationships",
|
|
49
43
|
}
|
|
50
44
|
)
|
|
51
45
|
|
|
@@ -57,15 +51,6 @@ WRITE_TOOLS = frozenset(
|
|
|
57
51
|
"add_todo",
|
|
58
52
|
"trigger_sync",
|
|
59
53
|
"trigger_rescan",
|
|
60
|
-
"enrich_document",
|
|
61
|
-
"approve_entity",
|
|
62
|
-
"reject_entity",
|
|
63
|
-
"analyze_knowledge_base",
|
|
64
|
-
"find_entity_duplicates",
|
|
65
|
-
"merge_entities",
|
|
66
|
-
"approve_merge",
|
|
67
|
-
"reject_merge",
|
|
68
|
-
"run_consolidation",
|
|
69
54
|
}
|
|
70
55
|
)
|
|
71
56
|
|
|
@@ -223,24 +208,6 @@ class HTTPMCPServer:
|
|
|
223
208
|
content=[TextContent(type="text", text=f"## Projects\n\n{project_list}")]
|
|
224
209
|
)
|
|
225
210
|
|
|
226
|
-
elif name == "list_documents_by_project":
|
|
227
|
-
project = arguments["project"]
|
|
228
|
-
limit = arguments.get("limit", 100)
|
|
229
|
-
docs = kb.list_documents_by_project(project, limit)
|
|
230
|
-
if not docs:
|
|
231
|
-
return CallToolResult(
|
|
232
|
-
content=[
|
|
233
|
-
TextContent(
|
|
234
|
-
type="text", text=f"No documents found for project '{project}'."
|
|
235
|
-
)
|
|
236
|
-
]
|
|
237
|
-
)
|
|
238
|
-
output = [f"## Documents in '{project}' ({len(docs)} documents)\n"]
|
|
239
|
-
for d in docs:
|
|
240
|
-
output.append(f"- **{d['title'] or d['source_path']}** ({d['source_type']})")
|
|
241
|
-
output.append(f" - `{d['source_path']}`")
|
|
242
|
-
return CallToolResult(content=[TextContent(type="text", text="\n".join(output))])
|
|
243
|
-
|
|
244
211
|
elif name == "recent_documents":
|
|
245
212
|
from .mcp_server import format_relative_time, get_document_date
|
|
246
213
|
|
|
@@ -298,13 +265,13 @@ class HTTPMCPServer:
|
|
|
298
265
|
deleted = kb.delete_knowledge(arguments["source_path"])
|
|
299
266
|
if deleted:
|
|
300
267
|
return CallToolResult(
|
|
301
|
-
content=[TextContent(type="text", text="
|
|
268
|
+
content=[TextContent(type="text", text="Knowledge entry deleted.")]
|
|
302
269
|
)
|
|
303
270
|
return CallToolResult(
|
|
304
271
|
content=[
|
|
305
272
|
TextContent(
|
|
306
273
|
type="text",
|
|
307
|
-
text="Could not delete.
|
|
274
|
+
text="Could not delete. Entry not found or not a Claude-saved entry.",
|
|
308
275
|
)
|
|
309
276
|
]
|
|
310
277
|
)
|
|
@@ -427,134 +394,6 @@ class HTTPMCPServer:
|
|
|
427
394
|
)
|
|
428
395
|
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
429
396
|
|
|
430
|
-
elif name == "list_sync_sources":
|
|
431
|
-
from .mcp_server import _list_sync_sources
|
|
432
|
-
|
|
433
|
-
token_info = getattr(self.server, "_current_token_info", None)
|
|
434
|
-
db_name = token_info.database if token_info else config.get_database().name
|
|
435
|
-
result = _list_sync_sources(kb.db_url, db_name)
|
|
436
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
437
|
-
|
|
438
|
-
elif name == "enrich_document":
|
|
439
|
-
from .mcp_server import _enrich_document
|
|
440
|
-
|
|
441
|
-
result = _enrich_document(
|
|
442
|
-
kb.db_url,
|
|
443
|
-
source_path=arguments["source_path"],
|
|
444
|
-
extract_todos=arguments.get("extract_todos", True),
|
|
445
|
-
extract_entities=arguments.get("extract_entities", True),
|
|
446
|
-
auto_create_entities=arguments.get("auto_create_entities", False),
|
|
447
|
-
)
|
|
448
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
449
|
-
|
|
450
|
-
elif name == "list_pending_entities":
|
|
451
|
-
from .mcp_server import _list_pending_entities
|
|
452
|
-
|
|
453
|
-
result = _list_pending_entities(
|
|
454
|
-
kb.db_url,
|
|
455
|
-
entity_type=arguments.get("entity_type"),
|
|
456
|
-
limit=arguments.get("limit", 20),
|
|
457
|
-
)
|
|
458
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
459
|
-
|
|
460
|
-
elif name == "approve_entity":
|
|
461
|
-
from .mcp_server import _approve_entity
|
|
462
|
-
|
|
463
|
-
result = _approve_entity(kb.db_url, arguments["pending_id"])
|
|
464
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
465
|
-
|
|
466
|
-
elif name == "reject_entity":
|
|
467
|
-
from .mcp_server import _reject_entity
|
|
468
|
-
|
|
469
|
-
result = _reject_entity(kb.db_url, arguments["pending_id"])
|
|
470
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
471
|
-
|
|
472
|
-
elif name == "analyze_knowledge_base":
|
|
473
|
-
from .mcp_server import _analyze_knowledge_base
|
|
474
|
-
|
|
475
|
-
result = _analyze_knowledge_base(
|
|
476
|
-
kb.db_url,
|
|
477
|
-
project=arguments.get("project"),
|
|
478
|
-
sample_size=arguments.get("sample_size", 15),
|
|
479
|
-
auto_update=arguments.get("auto_update", True),
|
|
480
|
-
)
|
|
481
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
482
|
-
|
|
483
|
-
# Entity consolidation tools
|
|
484
|
-
elif name == "find_entity_duplicates":
|
|
485
|
-
from .mcp_server import _find_entity_duplicates
|
|
486
|
-
|
|
487
|
-
result = _find_entity_duplicates(
|
|
488
|
-
kb.db_url,
|
|
489
|
-
similarity_threshold=arguments.get("similarity_threshold", 0.85),
|
|
490
|
-
limit=arguments.get("limit", 50),
|
|
491
|
-
)
|
|
492
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
493
|
-
|
|
494
|
-
elif name == "merge_entities":
|
|
495
|
-
from .mcp_server import _merge_entities
|
|
496
|
-
|
|
497
|
-
result = _merge_entities(
|
|
498
|
-
kb.db_url,
|
|
499
|
-
canonical_path=arguments["canonical_path"],
|
|
500
|
-
duplicate_path=arguments["duplicate_path"],
|
|
501
|
-
)
|
|
502
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
503
|
-
|
|
504
|
-
elif name == "list_pending_merges":
|
|
505
|
-
from .mcp_server import _list_pending_merges
|
|
506
|
-
|
|
507
|
-
result = _list_pending_merges(
|
|
508
|
-
kb.db_url,
|
|
509
|
-
limit=arguments.get("limit", 50),
|
|
510
|
-
)
|
|
511
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
512
|
-
|
|
513
|
-
elif name == "approve_merge":
|
|
514
|
-
from .mcp_server import _approve_merge
|
|
515
|
-
|
|
516
|
-
result = _approve_merge(kb.db_url, arguments["merge_id"])
|
|
517
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
518
|
-
|
|
519
|
-
elif name == "reject_merge":
|
|
520
|
-
from .mcp_server import _reject_merge
|
|
521
|
-
|
|
522
|
-
result = _reject_merge(kb.db_url, arguments["merge_id"])
|
|
523
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
524
|
-
|
|
525
|
-
elif name == "get_topic_clusters":
|
|
526
|
-
from .mcp_server import _get_topic_clusters
|
|
527
|
-
|
|
528
|
-
result = _get_topic_clusters(
|
|
529
|
-
kb.db_url,
|
|
530
|
-
limit=arguments.get("limit", 20),
|
|
531
|
-
)
|
|
532
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
533
|
-
|
|
534
|
-
elif name == "get_entity_relationships":
|
|
535
|
-
from .mcp_server import _get_entity_relationships
|
|
536
|
-
|
|
537
|
-
result = _get_entity_relationships(
|
|
538
|
-
kb.db_url,
|
|
539
|
-
entity_name=arguments.get("entity_name"),
|
|
540
|
-
relationship_type=arguments.get("relationship_type"),
|
|
541
|
-
limit=arguments.get("limit", 50),
|
|
542
|
-
)
|
|
543
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
544
|
-
|
|
545
|
-
elif name == "run_consolidation":
|
|
546
|
-
from .mcp_server import _run_consolidation
|
|
547
|
-
|
|
548
|
-
result = _run_consolidation(
|
|
549
|
-
kb.db_url,
|
|
550
|
-
detect_duplicates=arguments.get("detect_duplicates", True),
|
|
551
|
-
detect_cross_doc=arguments.get("detect_cross_doc", True),
|
|
552
|
-
build_clusters=arguments.get("build_clusters", True),
|
|
553
|
-
extract_relationships=arguments.get("extract_relationships", True),
|
|
554
|
-
dry_run=arguments.get("dry_run", False),
|
|
555
|
-
)
|
|
556
|
-
return CallToolResult(content=[TextContent(type="text", text=result)])
|
|
557
|
-
|
|
558
397
|
else:
|
|
559
398
|
return CallToolResult(
|
|
560
399
|
content=[TextContent(type="text", text=f"Unknown tool: {name}")]
|
okb/llm/providers.py
CHANGED
|
@@ -165,13 +165,13 @@ class ClaudeProvider:
|
|
|
165
165
|
|
|
166
166
|
|
|
167
167
|
class ModalProvider:
|
|
168
|
-
"""Modal-based LLM provider using open models (
|
|
168
|
+
"""Modal-based LLM provider using open models (Llama, Mistral, etc.).
|
|
169
169
|
|
|
170
170
|
Runs on Modal GPU infrastructure - no API key needed, pay per compute.
|
|
171
|
-
Requires deploying the Modal app first: `
|
|
171
|
+
Requires deploying the Modal app first: `modal deploy lkb/modal_llm.py`
|
|
172
172
|
|
|
173
173
|
Config:
|
|
174
|
-
model: Model name (default:
|
|
174
|
+
model: Model name (default: meta-llama/Llama-3.2-3B-Instruct)
|
|
175
175
|
timeout: Request timeout in seconds (default: 60)
|
|
176
176
|
"""
|
|
177
177
|
|
|
@@ -179,7 +179,7 @@ class ModalProvider:
|
|
|
179
179
|
|
|
180
180
|
def __init__(self) -> None:
|
|
181
181
|
self._llm = None
|
|
182
|
-
self._model: str = "
|
|
182
|
+
self._model: str = "meta-llama/Llama-3.2-3B-Instruct"
|
|
183
183
|
self._timeout: int = 60
|
|
184
184
|
|
|
185
185
|
def configure(self, config: dict) -> None:
|
|
@@ -202,7 +202,7 @@ class ModalProvider:
|
|
|
202
202
|
self._llm = modal.Cls.from_name("knowledge-llm", "LLM")()
|
|
203
203
|
except modal.exception.NotFoundError:
|
|
204
204
|
raise RuntimeError(
|
|
205
|
-
"Modal LLM app not deployed. Deploy with:
|
|
205
|
+
"Modal LLM app not deployed. Deploy with: modal deploy lkb/modal_llm.py"
|
|
206
206
|
)
|
|
207
207
|
|
|
208
208
|
def complete(
|
|
@@ -244,12 +244,9 @@ class ModalProvider:
|
|
|
244
244
|
def list_models(self) -> list[str]:
|
|
245
245
|
"""List recommended models for Modal."""
|
|
246
246
|
return [
|
|
247
|
-
# Non-gated (work immediately)
|
|
248
|
-
"microsoft/Phi-3-mini-4k-instruct",
|
|
249
|
-
"Qwen/Qwen2-1.5B-Instruct",
|
|
250
|
-
# Gated (require HuggingFace approval + HF_TOKEN)
|
|
251
247
|
"meta-llama/Llama-3.2-3B-Instruct",
|
|
252
248
|
"meta-llama/Llama-3.2-1B-Instruct",
|
|
249
|
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
|
253
250
|
]
|
|
254
251
|
|
|
255
252
|
|