kryten-llm 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kryten_llm/__init__.py +22 -0
- kryten_llm/__main__.py +148 -0
- kryten_llm/components/__init__.py +24 -0
- kryten_llm/components/config_reloader.py +286 -0
- kryten_llm/components/context_manager.py +186 -0
- kryten_llm/components/formatter.py +383 -0
- kryten_llm/components/health_monitor.py +266 -0
- kryten_llm/components/heartbeat.py +122 -0
- kryten_llm/components/listener.py +79 -0
- kryten_llm/components/llm_manager.py +349 -0
- kryten_llm/components/prompt_builder.py +148 -0
- kryten_llm/components/rate_limiter.py +478 -0
- kryten_llm/components/response_logger.py +105 -0
- kryten_llm/components/spam_detector.py +388 -0
- kryten_llm/components/trigger_engine.py +278 -0
- kryten_llm/components/validator.py +269 -0
- kryten_llm/config.py +93 -0
- kryten_llm/models/__init__.py +25 -0
- kryten_llm/models/config.py +496 -0
- kryten_llm/models/events.py +16 -0
- kryten_llm/models/phase3.py +59 -0
- kryten_llm/service.py +572 -0
- kryten_llm/utils/__init__.py +0 -0
- kryten_llm-0.2.2.dist-info/METADATA +271 -0
- kryten_llm-0.2.2.dist-info/RECORD +28 -0
- kryten_llm-0.2.2.dist-info/WHEEL +4 -0
- kryten_llm-0.2.2.dist-info/entry_points.txt +3 -0
- kryten_llm-0.2.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Heartbeat publishing for Phase 5.
|
|
2
|
+
|
|
3
|
+
Publishes periodic health status to NATS.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from nats.aio.client import Client as NATSClient
|
|
12
|
+
|
|
13
|
+
from kryten_llm.components.health_monitor import ServiceHealthMonitor
|
|
14
|
+
from kryten_llm.models.config import ServiceMetadata
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class HeartbeatPublisher:
|
|
18
|
+
"""Publish periodic heartbeat messages.
|
|
19
|
+
|
|
20
|
+
Publishes service health status to kryten.service.heartbeat.llm subject
|
|
21
|
+
at configured interval.
|
|
22
|
+
|
|
23
|
+
Phase 5 Implementation (REQ-002).
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
config: ServiceMetadata,
|
|
29
|
+
health_monitor: ServiceHealthMonitor,
|
|
30
|
+
nats_client: NATSClient,
|
|
31
|
+
logger: logging.Logger,
|
|
32
|
+
start_time: float,
|
|
33
|
+
):
|
|
34
|
+
"""Initialize heartbeat publisher.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
config: Service metadata configuration
|
|
38
|
+
health_monitor: Health monitoring component
|
|
39
|
+
nats_client: NATS client for publishing
|
|
40
|
+
logger: Logger instance
|
|
41
|
+
start_time: Service start timestamp
|
|
42
|
+
"""
|
|
43
|
+
self.config = config
|
|
44
|
+
self.health_monitor = health_monitor
|
|
45
|
+
self.nats = nats_client
|
|
46
|
+
self.logger = logger
|
|
47
|
+
self.start_time = start_time
|
|
48
|
+
|
|
49
|
+
self._heartbeat_task: Optional[asyncio.Task] = None
|
|
50
|
+
self._running = False
|
|
51
|
+
|
|
52
|
+
async def start(self) -> None:
|
|
53
|
+
"""Start heartbeat publishing loop."""
|
|
54
|
+
if not self.config.enable_heartbeats:
|
|
55
|
+
self.logger.info("Heartbeats disabled in configuration")
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
if self._running:
|
|
59
|
+
self.logger.warning("Heartbeat publisher already running")
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
self._running = True
|
|
63
|
+
self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
|
|
64
|
+
self.logger.info(
|
|
65
|
+
f"Heartbeat publisher started (interval: {self.config.heartbeat_interval_seconds}s)"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
async def stop(self) -> None:
|
|
69
|
+
"""Stop heartbeat publishing loop."""
|
|
70
|
+
if not self._running:
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
self._running = False
|
|
74
|
+
|
|
75
|
+
if self._heartbeat_task:
|
|
76
|
+
self._heartbeat_task.cancel()
|
|
77
|
+
try:
|
|
78
|
+
await self._heartbeat_task
|
|
79
|
+
except asyncio.CancelledError:
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
self.logger.info("Heartbeat publisher stopped")
|
|
83
|
+
|
|
84
|
+
async def _heartbeat_loop(self) -> None:
|
|
85
|
+
"""Main heartbeat publishing loop."""
|
|
86
|
+
while self._running:
|
|
87
|
+
try:
|
|
88
|
+
await self._publish_heartbeat()
|
|
89
|
+
await asyncio.sleep(self.config.heartbeat_interval_seconds)
|
|
90
|
+
except asyncio.CancelledError:
|
|
91
|
+
break
|
|
92
|
+
except Exception as e:
|
|
93
|
+
self.logger.error(f"Error in heartbeat loop: {e}", exc_info=True)
|
|
94
|
+
await asyncio.sleep(self.config.heartbeat_interval_seconds)
|
|
95
|
+
|
|
96
|
+
async def _publish_heartbeat(self) -> None:
|
|
97
|
+
"""Publish single heartbeat message.
|
|
98
|
+
|
|
99
|
+
Implements REQ-002 heartbeat publishing.
|
|
100
|
+
"""
|
|
101
|
+
try:
|
|
102
|
+
# Calculate uptime
|
|
103
|
+
import time
|
|
104
|
+
|
|
105
|
+
uptime = time.time() - self.start_time
|
|
106
|
+
|
|
107
|
+
# Build payload from health monitor
|
|
108
|
+
payload = self.health_monitor.get_heartbeat_payload(uptime)
|
|
109
|
+
|
|
110
|
+
# Publish to NATS
|
|
111
|
+
subject = f"kryten.service.heartbeat.{self.config.service_name}"
|
|
112
|
+
data = json.dumps(payload).encode("utf-8")
|
|
113
|
+
|
|
114
|
+
await self.nats.publish(subject, data)
|
|
115
|
+
|
|
116
|
+
self.logger.debug(
|
|
117
|
+
f"Published heartbeat: {payload['health']} "
|
|
118
|
+
f"({payload['status']['messages_processed']} messages processed)"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
except Exception as e:
|
|
122
|
+
self.logger.error(f"Failed to publish heartbeat: {e}", exc_info=True)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Message listener and filter for chat messages."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from kryten_llm.models.config import LLMConfig
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MessageListener:
|
|
12
|
+
"""Filters and validates incoming chat messages.
|
|
13
|
+
|
|
14
|
+
Implements REQ-001, REQ-002, REQ-003 from Phase 1 specification:
|
|
15
|
+
- Filter spam messages (commands starting with !, /, .)
|
|
16
|
+
- Filter system users ([server], [bot], [system])
|
|
17
|
+
- Validate required fields (username, msg, time)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
# System usernames to ignore
|
|
21
|
+
SYSTEM_USERS = {"[server]", "[bot]", "[system]"}
|
|
22
|
+
|
|
23
|
+
# Command prefixes to filter
|
|
24
|
+
COMMAND_PREFIXES = ("!", "/", ".")
|
|
25
|
+
|
|
26
|
+
def __init__(self, config: LLMConfig):
|
|
27
|
+
"""Initialize with configuration.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
config: LLM configuration containing filtering rules
|
|
31
|
+
"""
|
|
32
|
+
self.config = config
|
|
33
|
+
logger.info("MessageListener initialized")
|
|
34
|
+
|
|
35
|
+
async def filter_message(self, data: dict) -> Optional[dict]:
|
|
36
|
+
"""Filter and validate a chatMsg event.
|
|
37
|
+
|
|
38
|
+
Implements filtering logic per specification:
|
|
39
|
+
1. Check required fields exist
|
|
40
|
+
2. Filter spam/command messages
|
|
41
|
+
3. Filter system users
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
data: Raw chatMsg event data from NATS
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Filtered message dict or None if message should be ignored
|
|
48
|
+
|
|
49
|
+
Message dict structure:
|
|
50
|
+
{
|
|
51
|
+
"username": str, # Username of sender
|
|
52
|
+
"msg": str, # Message text
|
|
53
|
+
"time": int, # Timestamp
|
|
54
|
+
"meta": dict, # Metadata (rank, etc.)
|
|
55
|
+
}
|
|
56
|
+
"""
|
|
57
|
+
# REQ-003: Validate required fields
|
|
58
|
+
required_fields = ["username", "msg", "time"]
|
|
59
|
+
for field in required_fields:
|
|
60
|
+
if field not in data:
|
|
61
|
+
logger.debug(f"Invalid message format: missing required field '{field}'")
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
username = data["username"]
|
|
65
|
+
msg = data["msg"]
|
|
66
|
+
|
|
67
|
+
# REQ-002: Filter system users
|
|
68
|
+
if username in self.SYSTEM_USERS:
|
|
69
|
+
logger.debug(f"Filtered system user message from: {username}")
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
# REQ-001: Filter spam messages (commands)
|
|
73
|
+
if msg.startswith(self.COMMAND_PREFIXES):
|
|
74
|
+
logger.debug(f"Filtered command message: {msg[:20]}...")
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
# Message passed all filters
|
|
78
|
+
logger.debug(f"Accepted message from {username}: {msg[:50]}...")
|
|
79
|
+
return data
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
"""Enhanced LLM API manager with multi-provider support and fallback.
|
|
2
|
+
|
|
3
|
+
Phase 3: Implements REQ-001 through REQ-007 and REQ-025 through REQ-032 for
|
|
4
|
+
resilient multi-provider LLM interactions with automatic fallback.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import time
|
|
11
|
+
from typing import Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
import aiohttp
|
|
14
|
+
|
|
15
|
+
from kryten_llm.models.config import LLMConfig, LLMProvider
|
|
16
|
+
from kryten_llm.models.phase3 import LLMRequest, LLMResponse
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LLMManager:
|
|
22
|
+
"""Enhanced LLM manager with multi-provider support.
|
|
23
|
+
|
|
24
|
+
Phase 3 enhancements:
|
|
25
|
+
- Support multiple provider configurations (REQ-001)
|
|
26
|
+
- Attempt providers in priority order with fallback (REQ-002)
|
|
27
|
+
- Implement exponential backoff for retries (REQ-003)
|
|
28
|
+
- Support provider selection by trigger (REQ-004)
|
|
29
|
+
- Handle provider-specific errors gracefully (REQ-005)
|
|
30
|
+
- Log provider selection and fallback decisions (REQ-006)
|
|
31
|
+
- Support different provider types (REQ-007)
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, config: LLMConfig):
|
|
35
|
+
"""Initialize with provider configurations.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
config: LLM configuration containing provider settings
|
|
39
|
+
"""
|
|
40
|
+
self.config = config
|
|
41
|
+
self.providers: Dict[str, LLMProvider] = {}
|
|
42
|
+
self._load_providers()
|
|
43
|
+
|
|
44
|
+
logger.info(
|
|
45
|
+
f"LLMManager initialized with {len(self.providers)} providers: "
|
|
46
|
+
f"{list(self.providers.keys())}"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def _load_providers(self) -> None:
|
|
50
|
+
"""Load and validate provider configurations.
|
|
51
|
+
|
|
52
|
+
REQ-001: Support multiple provider configurations.
|
|
53
|
+
SEC-001: Resolve environment variable references in API keys.
|
|
54
|
+
"""
|
|
55
|
+
for provider_name, provider_config in self.config.llm_providers.items():
|
|
56
|
+
# SEC-001: Resolve API key environment variables
|
|
57
|
+
api_key = self._resolve_api_key(provider_config.api_key)
|
|
58
|
+
|
|
59
|
+
# Store resolved provider
|
|
60
|
+
provider_config.api_key = api_key
|
|
61
|
+
self.providers[provider_name] = provider_config
|
|
62
|
+
|
|
63
|
+
logger.debug(
|
|
64
|
+
f"Loaded provider: {provider_name} "
|
|
65
|
+
f"(type={provider_config.type}, model={provider_config.model}, "
|
|
66
|
+
f"priority={provider_config.priority})"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def _resolve_api_key(self, api_key: str) -> str:
|
|
70
|
+
"""Resolve environment variable references in API key.
|
|
71
|
+
|
|
72
|
+
SEC-001: Support ${ENV_VAR} syntax for secure key storage.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
api_key: API key string (may contain ${ENV_VAR})
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Resolved API key value
|
|
79
|
+
"""
|
|
80
|
+
if api_key.startswith("${") and api_key.endswith("}"):
|
|
81
|
+
env_var = api_key[2:-1]
|
|
82
|
+
resolved = os.getenv(env_var, "")
|
|
83
|
+
if not resolved:
|
|
84
|
+
logger.warning(f"Environment variable {env_var} not set, using empty string")
|
|
85
|
+
return resolved
|
|
86
|
+
return api_key
|
|
87
|
+
|
|
88
|
+
def _get_provider_priority(self, preferred_provider: Optional[str]) -> List[str]:
|
|
89
|
+
"""Get ordered list of providers to try.
|
|
90
|
+
|
|
91
|
+
REQ-002: Attempt providers in priority order.
|
|
92
|
+
REQ-004: Support preferred provider for triggers.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
preferred_provider: Preferred provider name (from trigger)
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Ordered list of provider names to attempt
|
|
99
|
+
"""
|
|
100
|
+
# REQ-004: If preferred provider specified and exists, try it first
|
|
101
|
+
if preferred_provider and preferred_provider in self.providers:
|
|
102
|
+
# Preferred first, then others by priority
|
|
103
|
+
others = [
|
|
104
|
+
name
|
|
105
|
+
for name, p in sorted(self.providers.items(), key=lambda x: x[1].priority)
|
|
106
|
+
if name != preferred_provider
|
|
107
|
+
]
|
|
108
|
+
return [preferred_provider] + others
|
|
109
|
+
|
|
110
|
+
# REQ-002: Use configured priority order or sort by priority
|
|
111
|
+
if self.config.default_provider_priority:
|
|
112
|
+
# Use configured order, filter to existing providers
|
|
113
|
+
priority_order = [
|
|
114
|
+
name for name in self.config.default_provider_priority if name in self.providers
|
|
115
|
+
]
|
|
116
|
+
# Add any providers not in configured order
|
|
117
|
+
remaining = [name for name in self.providers if name not in priority_order]
|
|
118
|
+
return priority_order + sorted(remaining, key=lambda x: self.providers[x].priority)
|
|
119
|
+
else:
|
|
120
|
+
# Sort all providers by priority field
|
|
121
|
+
return [name for name, _ in sorted(self.providers.items(), key=lambda x: x[1].priority)]
|
|
122
|
+
|
|
123
|
+
async def generate_response(self, request: LLMRequest) -> Optional[LLMResponse]:
|
|
124
|
+
"""Generate response with automatic provider fallback.
|
|
125
|
+
|
|
126
|
+
REQ-002: Attempt providers in priority order until success.
|
|
127
|
+
REQ-006: Log provider selection and fallback decisions.
|
|
128
|
+
REQ-032: Graceful degradation when all providers fail.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
request: LLM request with prompts and optional preferred provider
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
LLM response or None if all providers failed
|
|
135
|
+
"""
|
|
136
|
+
provider_order = self._get_provider_priority(request.preferred_provider)
|
|
137
|
+
errors = []
|
|
138
|
+
|
|
139
|
+
# REQ-006: Log provider selection
|
|
140
|
+
logger.info(f"Attempting {len(provider_order)} providers in order: {provider_order}")
|
|
141
|
+
|
|
142
|
+
for provider_name in provider_order:
|
|
143
|
+
if provider_name not in self.providers:
|
|
144
|
+
logger.warning(f"Provider {provider_name} not found, skipping")
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
provider = self.providers[provider_name]
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
# REQ-003: Try provider with retries and exponential backoff
|
|
151
|
+
response = await self._try_provider(provider, provider_name, request)
|
|
152
|
+
|
|
153
|
+
# REQ-006: Log successful provider
|
|
154
|
+
logger.info(
|
|
155
|
+
f"LLM response generated using provider: {provider_name} "
|
|
156
|
+
f"(model={response.model_used}, time={response.response_time:.2f}s, "
|
|
157
|
+
f"tokens={response.tokens_used})"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return response
|
|
161
|
+
|
|
162
|
+
except Exception as e:
|
|
163
|
+
# REQ-031: Log provider failure with context
|
|
164
|
+
error_msg = f"Provider {provider_name} failed: {type(e).__name__}: {str(e)}"
|
|
165
|
+
logger.warning(error_msg)
|
|
166
|
+
errors.append(error_msg)
|
|
167
|
+
|
|
168
|
+
# Continue to next provider (fallback)
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
# REQ-032: All providers failed - log comprehensive error
|
|
172
|
+
logger.error(
|
|
173
|
+
f"All {len(provider_order)} LLM providers failed. " f"Errors: {'; '.join(errors)}"
|
|
174
|
+
)
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
async def _try_provider(
|
|
178
|
+
self, provider: LLMProvider, provider_name: str, request: LLMRequest
|
|
179
|
+
) -> LLMResponse:
|
|
180
|
+
"""Attempt to get response from a single provider with retries.
|
|
181
|
+
|
|
182
|
+
REQ-003: Implement exponential backoff for retries.
|
|
183
|
+
REQ-005: Handle provider-specific errors gracefully.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
provider: Provider configuration
|
|
187
|
+
provider_name: Provider identifier
|
|
188
|
+
request: LLM request
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
LLM response
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
Exception: If all retry attempts fail
|
|
195
|
+
"""
|
|
196
|
+
retry_delay = self.config.retry_strategy.initial_delay
|
|
197
|
+
last_exception = None
|
|
198
|
+
|
|
199
|
+
for attempt in range(provider.max_retries + 1): # +1 for initial attempt
|
|
200
|
+
try:
|
|
201
|
+
# Attempt the request
|
|
202
|
+
response = await self._call_provider(provider, provider_name, request)
|
|
203
|
+
|
|
204
|
+
# Success
|
|
205
|
+
if attempt > 0:
|
|
206
|
+
logger.info(f"Provider {provider_name} succeeded on attempt {attempt + 1}")
|
|
207
|
+
return response
|
|
208
|
+
|
|
209
|
+
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
|
|
210
|
+
# REQ-005: Handle transient errors with retry
|
|
211
|
+
last_exception = e
|
|
212
|
+
|
|
213
|
+
if attempt < provider.max_retries:
|
|
214
|
+
# REQ-003: Exponential backoff
|
|
215
|
+
logger.debug(
|
|
216
|
+
f"Provider {provider_name} attempt {attempt + 1} failed: {e}. "
|
|
217
|
+
f"Retrying in {retry_delay:.1f}s..."
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
await asyncio.sleep(retry_delay)
|
|
221
|
+
|
|
222
|
+
# Calculate next delay with exponential backoff
|
|
223
|
+
retry_delay = min(
|
|
224
|
+
retry_delay * self.config.retry_strategy.multiplier,
|
|
225
|
+
self.config.retry_strategy.max_delay,
|
|
226
|
+
)
|
|
227
|
+
else:
|
|
228
|
+
# Max retries exceeded
|
|
229
|
+
logger.warning(f"Provider {provider_name} failed after {attempt + 1} attempts")
|
|
230
|
+
raise
|
|
231
|
+
|
|
232
|
+
except Exception as e:
|
|
233
|
+
# REQ-005: Non-retryable errors (auth, invalid config, etc.)
|
|
234
|
+
logger.error(
|
|
235
|
+
f"Provider {provider_name} non-retryable error: "
|
|
236
|
+
f"{type(e).__name__}: {str(e)}"
|
|
237
|
+
)
|
|
238
|
+
raise
|
|
239
|
+
|
|
240
|
+
# Should not reach here, but handle edge case
|
|
241
|
+
if last_exception:
|
|
242
|
+
raise last_exception
|
|
243
|
+
raise RuntimeError(f"Provider {provider_name} failed with unknown error")
|
|
244
|
+
|
|
245
|
+
async def _call_provider(
|
|
246
|
+
self, provider: LLMProvider, provider_name: str, request: LLMRequest
|
|
247
|
+
) -> LLMResponse:
|
|
248
|
+
"""Call LLM provider API.
|
|
249
|
+
|
|
250
|
+
REQ-007: Support different provider types.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
provider: Provider configuration
|
|
254
|
+
provider_name: Provider identifier
|
|
255
|
+
request: LLM request
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
LLM response
|
|
259
|
+
|
|
260
|
+
Raises:
|
|
261
|
+
Exception: On API errors
|
|
262
|
+
"""
|
|
263
|
+
start_time = time.time()
|
|
264
|
+
|
|
265
|
+
# REQ-007: Route to provider-specific implementation
|
|
266
|
+
if provider.type in ("openai", "openai_compatible", "openrouter"):
|
|
267
|
+
response = await self._call_openai_provider(provider, provider_name, request)
|
|
268
|
+
else:
|
|
269
|
+
raise ValueError(f"Unsupported provider type: {provider.type}")
|
|
270
|
+
|
|
271
|
+
response_time = time.time() - start_time
|
|
272
|
+
response.response_time = response_time
|
|
273
|
+
|
|
274
|
+
return response
|
|
275
|
+
|
|
276
|
+
async def _call_openai_provider(
|
|
277
|
+
self, provider: LLMProvider, provider_name: str, request: LLMRequest
|
|
278
|
+
) -> LLMResponse:
|
|
279
|
+
"""Call OpenAI-compatible provider API.
|
|
280
|
+
|
|
281
|
+
REQ-007: Support OpenAI-compatible providers.
|
|
282
|
+
REQ-024: Support provider-specific headers.
|
|
283
|
+
SEC-001: Never log API keys.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
provider: Provider configuration
|
|
287
|
+
provider_name: Provider identifier
|
|
288
|
+
request: LLM request
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
LLM response
|
|
292
|
+
|
|
293
|
+
Raises:
|
|
294
|
+
aiohttp.ClientError: On HTTP errors
|
|
295
|
+
asyncio.TimeoutError: On timeout
|
|
296
|
+
"""
|
|
297
|
+
# Build request
|
|
298
|
+
url = f"{provider.base_url.rstrip('/')}/chat/completions"
|
|
299
|
+
|
|
300
|
+
# REQ-024: Support custom headers
|
|
301
|
+
headers = {
|
|
302
|
+
"Authorization": f"Bearer {provider.api_key}",
|
|
303
|
+
"Content-Type": "application/json",
|
|
304
|
+
}
|
|
305
|
+
if provider.custom_headers:
|
|
306
|
+
headers.update(provider.custom_headers)
|
|
307
|
+
|
|
308
|
+
payload = {
|
|
309
|
+
"model": provider.model,
|
|
310
|
+
"messages": [
|
|
311
|
+
{"role": "system", "content": request.system_prompt},
|
|
312
|
+
{"role": "user", "content": request.user_prompt},
|
|
313
|
+
],
|
|
314
|
+
"temperature": request.temperature,
|
|
315
|
+
"max_tokens": request.max_tokens,
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
# SEC-001: Log without exposing API key
|
|
319
|
+
logger.debug(
|
|
320
|
+
f"Calling {provider_name}: model={provider.model}, "
|
|
321
|
+
f"temp={request.temperature}, max_tokens={request.max_tokens}"
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
# Make API call
|
|
325
|
+
timeout = aiohttp.ClientTimeout(total=provider.timeout_seconds)
|
|
326
|
+
|
|
327
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
328
|
+
async with session.post(url, headers=headers, json=payload) as response:
|
|
329
|
+
# REQ-005: Handle HTTP errors
|
|
330
|
+
if response.status != 200:
|
|
331
|
+
error_text = await response.text()
|
|
332
|
+
# SEC-001: Don't log full error (may contain keys)
|
|
333
|
+
raise aiohttp.ClientError(f"HTTP {response.status}: {error_text[:200]}")
|
|
334
|
+
|
|
335
|
+
data = await response.json()
|
|
336
|
+
|
|
337
|
+
# Validate response format
|
|
338
|
+
if "choices" not in data or len(data["choices"]) == 0:
|
|
339
|
+
raise ValueError("Invalid API response: no choices returned")
|
|
340
|
+
|
|
341
|
+
content = data["choices"][0]["message"]["content"]
|
|
342
|
+
tokens = data.get("usage", {}).get("total_tokens")
|
|
343
|
+
|
|
344
|
+
return LLMResponse(
|
|
345
|
+
content=content,
|
|
346
|
+
provider_used=provider_name,
|
|
347
|
+
model_used=provider.model,
|
|
348
|
+
tokens_used=tokens,
|
|
349
|
+
)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Prompt builder for LLM requests."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from kryten_llm.models.config import LLMConfig
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PromptBuilder:
|
|
11
|
+
"""Constructs prompts for LLM generation.
|
|
12
|
+
|
|
13
|
+
Implements REQ-011, REQ-012, REQ-013 from Phase 1 specification:
|
|
14
|
+
- Construct system prompts from PersonalityConfig
|
|
15
|
+
- Include character name, description, traits, and response style
|
|
16
|
+
- Construct user prompts with username and cleaned message
|
|
17
|
+
|
|
18
|
+
Phase 1: Basic prompt construction
|
|
19
|
+
Phase 2: Add trigger context injection (REQ-034)
|
|
20
|
+
Phase 3: Add video and chat history context
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, config: LLMConfig):
|
|
24
|
+
"""Initialize with configuration.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
config: LLM configuration containing personality settings
|
|
28
|
+
"""
|
|
29
|
+
self.config = config
|
|
30
|
+
self.personality = config.personality
|
|
31
|
+
logger.info(f"PromptBuilder initialized for character: {self.personality.character_name}")
|
|
32
|
+
|
|
33
|
+
def build_system_prompt(self) -> str:
|
|
34
|
+
"""Build system prompt from personality configuration.
|
|
35
|
+
|
|
36
|
+
Implements REQ-011, REQ-012: Include all personality attributes
|
|
37
|
+
in a structured system prompt.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
System prompt text
|
|
41
|
+
"""
|
|
42
|
+
# Format personality traits and expertise as comma-separated lists
|
|
43
|
+
traits = ", ".join(self.personality.personality_traits)
|
|
44
|
+
expertise = ", ".join(self.personality.expertise)
|
|
45
|
+
|
|
46
|
+
# Build system prompt following specification template
|
|
47
|
+
prompt = f"""You are {self.personality.character_name}, \
|
|
48
|
+
{self.personality.character_description}.
|
|
49
|
+
|
|
50
|
+
Personality traits: {traits}
|
|
51
|
+
Areas of expertise: {expertise}
|
|
52
|
+
|
|
53
|
+
Response style: {self.personality.response_style}
|
|
54
|
+
|
|
55
|
+
Important rules:
|
|
56
|
+
- Keep responses under 240 characters
|
|
57
|
+
- Stay in character
|
|
58
|
+
- Be natural and conversational
|
|
59
|
+
- Do not use markdown formatting
|
|
60
|
+
- Do not start responses with your character name"""
|
|
61
|
+
|
|
62
|
+
logger.debug(f"Built system prompt ({len(prompt)} chars)")
|
|
63
|
+
return prompt
|
|
64
|
+
|
|
65
|
+
def build_user_prompt(
|
|
66
|
+
self,
|
|
67
|
+
username: str,
|
|
68
|
+
message: str,
|
|
69
|
+
trigger_context: str | None = None,
|
|
70
|
+
context: dict | None = None,
|
|
71
|
+
) -> str:
|
|
72
|
+
"""Build user prompt with context injection.
|
|
73
|
+
|
|
74
|
+
Implements REQ-013 (Phase 1): Simple user prompt with username and message.
|
|
75
|
+
Implements REQ-034 (Phase 2): Optionally inject trigger context.
|
|
76
|
+
Phase 3 enhancements (REQ-014 through REQ-018):
|
|
77
|
+
- Accept context dict from ContextManager
|
|
78
|
+
- Inject current video when available
|
|
79
|
+
- Inject recent chat history when available
|
|
80
|
+
- Manage prompt length to fit context window
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
username: Username of message sender
|
|
84
|
+
message: Cleaned message text (bot name already removed)
|
|
85
|
+
trigger_context: Optional context from trigger (Phase 2)
|
|
86
|
+
context: Optional context dict from ContextManager (Phase 3)
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
User prompt text with injected context
|
|
90
|
+
"""
|
|
91
|
+
# Build prompt parts in priority order
|
|
92
|
+
parts = [f"{username} says: {message}"]
|
|
93
|
+
|
|
94
|
+
# REQ-015: Add current video context if available
|
|
95
|
+
if context and context.get("current_video"):
|
|
96
|
+
video = context["current_video"]
|
|
97
|
+
parts.append(
|
|
98
|
+
f"\n\nCurrently playing: {video['title']} " f"(queued by {video['queued_by']})"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# REQ-016: Add chat history context if available
|
|
102
|
+
if context and context.get("recent_messages"):
|
|
103
|
+
messages = context["recent_messages"]
|
|
104
|
+
if messages:
|
|
105
|
+
# Limit to last 5-10 messages to avoid token bloat
|
|
106
|
+
recent = messages[-5:]
|
|
107
|
+
history_lines = [f"- {msg['username']}: {msg['message']}" for msg in recent]
|
|
108
|
+
parts.append("\n\nRecent conversation:\n" + "\n".join(history_lines))
|
|
109
|
+
|
|
110
|
+
# REQ-017: Add trigger context if provided (highest priority)
|
|
111
|
+
if trigger_context:
|
|
112
|
+
parts.append(f"\n\nContext: {trigger_context}")
|
|
113
|
+
|
|
114
|
+
prompt = "".join(parts)
|
|
115
|
+
|
|
116
|
+
# REQ-018: Manage prompt length
|
|
117
|
+
max_chars = self.config.context.context_window_chars
|
|
118
|
+
if len(prompt) > max_chars:
|
|
119
|
+
# Truncate chat history first to preserve essential context
|
|
120
|
+
prompt = self._truncate_prompt(prompt, max_chars, trigger_context)
|
|
121
|
+
|
|
122
|
+
logger.debug(
|
|
123
|
+
f"Built user prompt for {username} ({len(prompt)} chars)"
|
|
124
|
+
+ (" with video context" if context and context.get("current_video") else "")
|
|
125
|
+
+ (" with chat history" if context and context.get("recent_messages") else "")
|
|
126
|
+
+ (" with trigger context" if trigger_context else "")
|
|
127
|
+
)
|
|
128
|
+
return prompt
|
|
129
|
+
|
|
130
|
+
def _truncate_prompt(self, prompt: str, max_chars: int, trigger_context: str | None) -> str:
|
|
131
|
+
"""Truncate prompt intelligently to fit context window.
|
|
132
|
+
|
|
133
|
+
REQ-018: Priority order - keep trigger context > video > chat history.
|
|
134
|
+
Simple truncation for Phase 3, can be enhanced later.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
prompt: Full prompt text
|
|
138
|
+
max_chars: Maximum allowed characters
|
|
139
|
+
trigger_context: Trigger context to preserve
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Truncated prompt
|
|
143
|
+
"""
|
|
144
|
+
logger.warning(f"Prompt too long ({len(prompt)} chars), truncating to {max_chars}")
|
|
145
|
+
|
|
146
|
+
# Simple truncation - just cut off excess
|
|
147
|
+
# TODO Phase 4: Implement smarter truncation that removes chat history first
|
|
148
|
+
return prompt[:max_chars]
|