okb 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- okb/__init__.py +3 -0
- okb/cli.py +1272 -0
- okb/config.py +661 -0
- okb/data/init.sql +92 -0
- okb/http_server.py +463 -0
- okb/ingest.py +1589 -0
- okb/llm/__init__.py +86 -0
- okb/llm/base.py +83 -0
- okb/llm/cache.py +217 -0
- okb/llm/filter.py +187 -0
- okb/llm/providers.py +322 -0
- okb/local_embedder.py +87 -0
- okb/mcp_server.py +1393 -0
- okb/migrate.py +53 -0
- okb/migrations/0001.initial-schema.sql +91 -0
- okb/migrations/0002.sync-state.sql +22 -0
- okb/migrations/0003.structured-fields.sql +22 -0
- okb/migrations/0004.tokens.sql +13 -0
- okb/migrations/0005.database-metadata.sql +19 -0
- okb/migrations/0006.llm-cache.sql +13 -0
- okb/modal_embedder.py +120 -0
- okb/modal_llm.py +178 -0
- okb/plugins/__init__.py +8 -0
- okb/plugins/base.py +110 -0
- okb/plugins/registry.py +123 -0
- okb/plugins/sources/__init__.py +5 -0
- okb/plugins/sources/dropbox_paper.py +188 -0
- okb/plugins/sources/github.py +484 -0
- okb/rescan.py +227 -0
- okb/scripts/__init__.py +1 -0
- okb/scripts/watch.py +206 -0
- okb/tokens.py +277 -0
- okb-1.0.0.dist-info/METADATA +397 -0
- okb-1.0.0.dist-info/RECORD +36 -0
- okb-1.0.0.dist-info/WHEEL +4 -0
- okb-1.0.0.dist-info/entry_points.txt +9 -0
okb/llm/providers.py
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""LLM provider implementations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
from .base import LLMResponse
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from .base import LLMProvider
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ClaudeProvider:
|
|
16
|
+
"""Claude API provider using the Anthropic SDK.
|
|
17
|
+
|
|
18
|
+
Supports both direct API access and AWS Bedrock.
|
|
19
|
+
|
|
20
|
+
Config:
|
|
21
|
+
model: Model name (default: claude-haiku-4-5-20251001)
|
|
22
|
+
api_key: API key (default: reads ANTHROPIC_API_KEY)
|
|
23
|
+
timeout: Request timeout in seconds (default: 30)
|
|
24
|
+
|
|
25
|
+
For Bedrock:
|
|
26
|
+
use_bedrock: true
|
|
27
|
+
aws_region: AWS region (default: us-west-2)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
name = "claude"
|
|
31
|
+
|
|
32
|
+
def __init__(self) -> None:
|
|
33
|
+
self._client = None
|
|
34
|
+
self._model: str = "claude-haiku-4-5-20251001"
|
|
35
|
+
self._timeout: int = 30
|
|
36
|
+
self._use_bedrock: bool = False
|
|
37
|
+
self._aws_region: str = "us-west-2"
|
|
38
|
+
|
|
39
|
+
def configure(self, config: dict) -> None:
|
|
40
|
+
"""Configure the Claude provider.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
config: Configuration dict with optional keys:
|
|
44
|
+
- model: Model name
|
|
45
|
+
- api_key: API key (or uses ANTHROPIC_API_KEY env var)
|
|
46
|
+
- timeout: Request timeout
|
|
47
|
+
- use_bedrock: Use AWS Bedrock instead of direct API
|
|
48
|
+
- aws_region: AWS region for Bedrock
|
|
49
|
+
"""
|
|
50
|
+
self._model = config.get("model", self._model)
|
|
51
|
+
self._timeout = config.get("timeout", self._timeout)
|
|
52
|
+
self._use_bedrock = config.get("use_bedrock", False)
|
|
53
|
+
self._aws_region = config.get("aws_region", self._aws_region)
|
|
54
|
+
|
|
55
|
+
if self._use_bedrock:
|
|
56
|
+
self._init_bedrock_client()
|
|
57
|
+
else:
|
|
58
|
+
self._init_anthropic_client(config.get("api_key"))
|
|
59
|
+
|
|
60
|
+
def _init_anthropic_client(self, api_key: str | None = None) -> None:
|
|
61
|
+
"""Initialize the standard Anthropic client."""
|
|
62
|
+
try:
|
|
63
|
+
from anthropic import Anthropic
|
|
64
|
+
except ImportError:
|
|
65
|
+
raise ImportError(
|
|
66
|
+
"anthropic package not installed. Install with: pip install 'local-kb[llm]'"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Use provided key, or fall back to env var (SDK default behavior)
|
|
70
|
+
kwargs = {"timeout": self._timeout}
|
|
71
|
+
if api_key:
|
|
72
|
+
kwargs["api_key"] = api_key
|
|
73
|
+
|
|
74
|
+
self._client = Anthropic(**kwargs)
|
|
75
|
+
|
|
76
|
+
def _init_bedrock_client(self) -> None:
|
|
77
|
+
"""Initialize the Bedrock client."""
|
|
78
|
+
try:
|
|
79
|
+
from anthropic import AnthropicBedrock
|
|
80
|
+
except ImportError:
|
|
81
|
+
raise ImportError(
|
|
82
|
+
"anthropic[bedrock] package not installed. "
|
|
83
|
+
"Install with: pip install 'anthropic[bedrock]'"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
self._client = AnthropicBedrock(
|
|
87
|
+
aws_region=self._aws_region,
|
|
88
|
+
timeout=self._timeout,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def complete(
|
|
92
|
+
self,
|
|
93
|
+
prompt: str,
|
|
94
|
+
system: str | None = None,
|
|
95
|
+
max_tokens: int = 1024,
|
|
96
|
+
) -> LLMResponse:
|
|
97
|
+
"""Generate a completion using Claude.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
prompt: The user prompt
|
|
101
|
+
system: Optional system prompt
|
|
102
|
+
max_tokens: Maximum tokens in response
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
LLMResponse with generated content
|
|
106
|
+
"""
|
|
107
|
+
if self._client is None:
|
|
108
|
+
raise RuntimeError("Provider not configured. Call configure() first.")
|
|
109
|
+
|
|
110
|
+
messages = [{"role": "user", "content": prompt}]
|
|
111
|
+
kwargs = {
|
|
112
|
+
"model": self._model,
|
|
113
|
+
"max_tokens": max_tokens,
|
|
114
|
+
"messages": messages,
|
|
115
|
+
}
|
|
116
|
+
if system:
|
|
117
|
+
kwargs["system"] = system
|
|
118
|
+
|
|
119
|
+
response = self._client.messages.create(**kwargs)
|
|
120
|
+
|
|
121
|
+
# Extract text content from response
|
|
122
|
+
content = ""
|
|
123
|
+
for block in response.content:
|
|
124
|
+
if hasattr(block, "text"):
|
|
125
|
+
content += block.text
|
|
126
|
+
|
|
127
|
+
return LLMResponse(
|
|
128
|
+
content=content,
|
|
129
|
+
model=response.model,
|
|
130
|
+
input_tokens=response.usage.input_tokens,
|
|
131
|
+
output_tokens=response.usage.output_tokens,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
def is_available(self) -> bool:
|
|
135
|
+
"""Check if Claude API is available."""
|
|
136
|
+
if self._client is None:
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
# For Bedrock, assume available if client is configured
|
|
140
|
+
# (AWS credentials are validated on first request)
|
|
141
|
+
if self._use_bedrock:
|
|
142
|
+
return True
|
|
143
|
+
|
|
144
|
+
# For direct API, check if API key is set
|
|
145
|
+
return bool(os.environ.get("ANTHROPIC_API_KEY") or hasattr(self._client, "_api_key"))
|
|
146
|
+
|
|
147
|
+
def list_models(self) -> list[str]:
|
|
148
|
+
"""List available Claude models.
|
|
149
|
+
|
|
150
|
+
Returns a static list of commonly used models.
|
|
151
|
+
"""
|
|
152
|
+
if self._use_bedrock:
|
|
153
|
+
return [
|
|
154
|
+
"anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
155
|
+
"anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
156
|
+
"anthropic.claude-sonnet-4-20250514-v1:0",
|
|
157
|
+
"anthropic.claude-opus-4-20250514-v1:0",
|
|
158
|
+
]
|
|
159
|
+
return [
|
|
160
|
+
"claude-haiku-4-5-20251001",
|
|
161
|
+
"claude-sonnet-4-5-20250929",
|
|
162
|
+
"claude-sonnet-4-20250514",
|
|
163
|
+
"claude-opus-4-20250514",
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class ModalProvider:
|
|
168
|
+
"""Modal-based LLM provider using open models (Llama, Mistral, etc.).
|
|
169
|
+
|
|
170
|
+
Runs on Modal GPU infrastructure - no API key needed, pay per compute.
|
|
171
|
+
Requires deploying the Modal app first: `modal deploy lkb/modal_llm.py`
|
|
172
|
+
|
|
173
|
+
Config:
|
|
174
|
+
model: Model name (default: meta-llama/Llama-3.2-3B-Instruct)
|
|
175
|
+
timeout: Request timeout in seconds (default: 60)
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
name = "modal"
|
|
179
|
+
|
|
180
|
+
def __init__(self) -> None:
|
|
181
|
+
self._llm = None
|
|
182
|
+
self._model: str = "meta-llama/Llama-3.2-3B-Instruct"
|
|
183
|
+
self._timeout: int = 60
|
|
184
|
+
|
|
185
|
+
def configure(self, config: dict) -> None:
|
|
186
|
+
"""Configure the Modal provider.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
config: Configuration dict with optional keys:
|
|
190
|
+
- model: HuggingFace model ID
|
|
191
|
+
- timeout: Request timeout in seconds
|
|
192
|
+
"""
|
|
193
|
+
self._model = config.get("model", self._model)
|
|
194
|
+
self._timeout = config.get("timeout", self._timeout)
|
|
195
|
+
|
|
196
|
+
try:
|
|
197
|
+
import modal
|
|
198
|
+
except ImportError:
|
|
199
|
+
raise ImportError("modal package not installed. Install with: pip install modal")
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
self._llm = modal.Cls.from_name("knowledge-llm", "LLM")()
|
|
203
|
+
except modal.exception.NotFoundError:
|
|
204
|
+
raise RuntimeError(
|
|
205
|
+
"Modal LLM app not deployed. Deploy with: modal deploy lkb/modal_llm.py"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
def complete(
|
|
209
|
+
self,
|
|
210
|
+
prompt: str,
|
|
211
|
+
system: str | None = None,
|
|
212
|
+
max_tokens: int = 256,
|
|
213
|
+
) -> LLMResponse:
|
|
214
|
+
"""Generate a completion using Modal LLM.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
prompt: The user prompt
|
|
218
|
+
system: Optional system prompt
|
|
219
|
+
max_tokens: Maximum tokens in response
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
LLMResponse with generated content
|
|
223
|
+
"""
|
|
224
|
+
if self._llm is None:
|
|
225
|
+
raise RuntimeError("Provider not configured. Call configure() first.")
|
|
226
|
+
|
|
227
|
+
response = self._llm.complete.remote(
|
|
228
|
+
prompt,
|
|
229
|
+
system=system,
|
|
230
|
+
max_tokens=max_tokens,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return LLMResponse(
|
|
234
|
+
content=response["content"],
|
|
235
|
+
model=response["model"],
|
|
236
|
+
input_tokens=response.get("input_tokens"),
|
|
237
|
+
output_tokens=response.get("output_tokens"),
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
def is_available(self) -> bool:
|
|
241
|
+
"""Check if Modal LLM is available."""
|
|
242
|
+
return self._llm is not None
|
|
243
|
+
|
|
244
|
+
def list_models(self) -> list[str]:
|
|
245
|
+
"""List recommended models for Modal."""
|
|
246
|
+
return [
|
|
247
|
+
"meta-llama/Llama-3.2-3B-Instruct",
|
|
248
|
+
"meta-llama/Llama-3.2-1B-Instruct",
|
|
249
|
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
|
250
|
+
]
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# Registry of available providers
|
|
254
|
+
PROVIDERS: dict[str, type] = {
|
|
255
|
+
"claude": ClaudeProvider,
|
|
256
|
+
"modal": ModalProvider,
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
# Cached provider instance
|
|
260
|
+
_provider_instance: LLMProvider | None = None
|
|
261
|
+
_provider_initialized: bool = False
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def get_provider() -> LLMProvider | None:
|
|
265
|
+
"""Get the configured LLM provider instance.
|
|
266
|
+
|
|
267
|
+
Returns None if LLM is disabled (no provider configured).
|
|
268
|
+
Caches the provider instance for reuse.
|
|
269
|
+
"""
|
|
270
|
+
global _provider_instance, _provider_initialized
|
|
271
|
+
|
|
272
|
+
if _provider_initialized:
|
|
273
|
+
return _provider_instance
|
|
274
|
+
|
|
275
|
+
from ..config import config
|
|
276
|
+
|
|
277
|
+
# Check if LLM is configured
|
|
278
|
+
provider_name = config.llm_provider
|
|
279
|
+
if not provider_name:
|
|
280
|
+
_provider_initialized = True
|
|
281
|
+
return None
|
|
282
|
+
|
|
283
|
+
if provider_name not in PROVIDERS:
|
|
284
|
+
print(
|
|
285
|
+
f"Warning: Unknown LLM provider '{provider_name}'. Available: {list(PROVIDERS.keys())}",
|
|
286
|
+
file=sys.stderr,
|
|
287
|
+
)
|
|
288
|
+
_provider_initialized = True
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
# Create and configure provider
|
|
292
|
+
provider_class = PROVIDERS[provider_name]
|
|
293
|
+
provider = provider_class()
|
|
294
|
+
|
|
295
|
+
# Build config dict from Config object
|
|
296
|
+
provider_config = {
|
|
297
|
+
"model": config.llm_model,
|
|
298
|
+
"timeout": config.llm_timeout,
|
|
299
|
+
"use_bedrock": config.llm_use_bedrock,
|
|
300
|
+
"aws_region": config.llm_aws_region,
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
provider.configure(provider_config)
|
|
305
|
+
except ImportError as e:
|
|
306
|
+
print(f"Warning: Could not initialize LLM provider: {e}", file=sys.stderr)
|
|
307
|
+
_provider_initialized = True
|
|
308
|
+
return None
|
|
309
|
+
|
|
310
|
+
_provider_instance = provider
|
|
311
|
+
_provider_initialized = True
|
|
312
|
+
return provider
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def reset_provider() -> None:
|
|
316
|
+
"""Reset the cached provider instance.
|
|
317
|
+
|
|
318
|
+
Useful for testing or after config changes.
|
|
319
|
+
"""
|
|
320
|
+
global _provider_instance, _provider_initialized
|
|
321
|
+
_provider_instance = None
|
|
322
|
+
_provider_initialized = False
|
okb/local_embedder.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Local CPU-based embedder for query-time use.
|
|
3
|
+
|
|
4
|
+
Avoids Modal cold starts for interactive queries.
|
|
5
|
+
Latency: ~200-500ms per query on modern CPU
|
|
6
|
+
Memory: ~1.5 GB for model
|
|
7
|
+
|
|
8
|
+
The same nomic-embed-text model is used for consistency.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from functools import lru_cache
|
|
12
|
+
|
|
13
|
+
from .config import config
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@lru_cache(maxsize=1)
|
|
17
|
+
def get_model():
|
|
18
|
+
"""
|
|
19
|
+
Load embedding model once, cache in memory.
|
|
20
|
+
|
|
21
|
+
First call takes ~10-30 seconds to download/load.
|
|
22
|
+
Subsequent calls return cached model instantly.
|
|
23
|
+
|
|
24
|
+
Auto-detects GPU (CUDA/MPS) and uses it if available.
|
|
25
|
+
"""
|
|
26
|
+
from sentence_transformers import SentenceTransformer
|
|
27
|
+
|
|
28
|
+
print(f"Loading embedding model: {config.model_name}")
|
|
29
|
+
model = SentenceTransformer(
|
|
30
|
+
config.model_name,
|
|
31
|
+
trust_remote_code=True,
|
|
32
|
+
)
|
|
33
|
+
# Report actual device being used
|
|
34
|
+
device = str(model.device)
|
|
35
|
+
print(f"Model loaded on {device}")
|
|
36
|
+
return model
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def embed_query(text: str) -> list[float]:
|
|
40
|
+
"""
|
|
41
|
+
Generate embedding for a search query.
|
|
42
|
+
|
|
43
|
+
Uses "search_query: " prefix as required by nomic model.
|
|
44
|
+
"""
|
|
45
|
+
model = get_model()
|
|
46
|
+
embedding = model.encode(
|
|
47
|
+
f"search_query: {text}",
|
|
48
|
+
convert_to_numpy=True,
|
|
49
|
+
normalize_embeddings=True,
|
|
50
|
+
)
|
|
51
|
+
return embedding.tolist()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def embed_document(text: str) -> list[float]:
|
|
55
|
+
"""
|
|
56
|
+
Generate embedding for a document chunk.
|
|
57
|
+
|
|
58
|
+
Uses "search_document: " prefix as required by nomic model.
|
|
59
|
+
Prefer Modal for batch document embedding.
|
|
60
|
+
"""
|
|
61
|
+
model = get_model()
|
|
62
|
+
embedding = model.encode(
|
|
63
|
+
f"search_document: {text}",
|
|
64
|
+
convert_to_numpy=True,
|
|
65
|
+
normalize_embeddings=True,
|
|
66
|
+
)
|
|
67
|
+
return embedding.tolist()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def warmup():
|
|
71
|
+
"""
|
|
72
|
+
Pre-load model to avoid first-query latency.
|
|
73
|
+
|
|
74
|
+
Call this at server startup.
|
|
75
|
+
"""
|
|
76
|
+
get_model()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
# Quick test
|
|
81
|
+
warmup()
|
|
82
|
+
|
|
83
|
+
test_query = "How do I optimize Django database queries?"
|
|
84
|
+
embedding = embed_query(test_query)
|
|
85
|
+
print(f"Query: {test_query}")
|
|
86
|
+
print(f"Embedding dimension: {len(embedding)}")
|
|
87
|
+
print(f"First 5 values: {embedding[:5]}")
|