kryten-llm 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kryten_llm/service.py ADDED
@@ -0,0 +1,572 @@
1
+ """Main service class for kryten-llm."""
2
+
3
+ import asyncio
4
+ import logging
5
+ import time
6
+ import uuid
7
+ from typing import Any
8
+
9
+ from kryten import ChatMessageEvent, KrytenClient # type: ignore[import-untyped]
10
+
11
+ from kryten_llm.components import (
12
+ ContextManager,
13
+ LLMManager,
14
+ MessageListener,
15
+ PromptBuilder,
16
+ RateLimiter,
17
+ ResponseFormatter,
18
+ ResponseLogger,
19
+ TriggerEngine,
20
+ )
21
+ from kryten_llm.components.health_monitor import ServiceHealthMonitor
22
+ from kryten_llm.components.spam_detector import SpamDetector
23
+ from kryten_llm.components.validator import ResponseValidator
24
+ from kryten_llm.models.config import LLMConfig
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class LLMService:
30
+ """Kryten LLM Service using kryten-py infrastructure."""
31
+
32
+ def __init__(self, config: LLMConfig):
33
+ """Initialize the service.
34
+
35
+ Args:
36
+ config: Validated LLMConfig object
37
+ """
38
+ self.config = config
39
+
40
+ # Use KrytenClient from kryten-py
41
+ self.client = KrytenClient(self.config.model_dump())
42
+
43
+ # Phase 5: Service start time for uptime tracking
44
+ self.start_time = time.time()
45
+
46
+ self._shutdown_event = asyncio.Event()
47
+
48
+ # Phase 1 components
49
+ self.listener = MessageListener(config)
50
+ self.trigger_engine = TriggerEngine(config)
51
+ self.prompt_builder = PromptBuilder(config)
52
+ self.response_formatter = ResponseFormatter(config)
53
+
54
+ # Phase 2 components
55
+ self.rate_limiter = RateLimiter(config)
56
+
57
+ # Phase 3 components
58
+ self.context_manager = ContextManager(config)
59
+ self.llm_manager = LLMManager(config)
60
+ self.response_logger = ResponseLogger(config)
61
+
62
+ # Phase 4 components
63
+ self.validator = ResponseValidator(config.validation)
64
+ self.spam_detector = SpamDetector(config.spam_detection)
65
+
66
+ # Phase 5 components
67
+ self.health_monitor: ServiceHealthMonitor | None = None # Initialized after NATS connection
68
+
69
+ async def start(self) -> None:
70
+ """Start the service."""
71
+ logger.info("Starting LLM service")
72
+
73
+ if self.config.testing.dry_run:
74
+ logger.warning("⚠ DRY RUN MODE - Responses will NOT be sent to chat")
75
+
76
+ logger.info(f"Bot personality: {self.config.personality.character_name}")
77
+ logger.info(f"Default LLM provider: {self.config.default_provider}")
78
+ logger.info(f"Triggers configured: {len(self.config.triggers)}")
79
+
80
+ # Register event handlers BEFORE connect (kryten-py pattern)
81
+ @self.client.on("chatmsg")
82
+ async def handle_chat(event):
83
+ await self._handle_chat_message(event)
84
+
85
+ @self.client.on("changemedia")
86
+ async def handle_media_change(event):
87
+ await self.context_manager._handle_video_change(event)
88
+
89
+ # Connect to NATS - KrytenClient handles lifecycle/heartbeats automatically
90
+ # based on the 'service' config we provide via model_dump()
91
+ await self.client.connect()
92
+
93
+ # Subscribe to robot startup - re-announce when robot starts
94
+ await self.client.subscribe("kryten.lifecycle.robot.startup", self._handle_robot_startup)
95
+ logger.info("Subscribed to kryten.lifecycle.robot.startup")
96
+
97
+ # Phase 5: Initialize health monitor for internal tracking
98
+ self.health_monitor = ServiceHealthMonitor(
99
+ config=self.config.service_metadata, logger=logger
100
+ )
101
+
102
+ # Phase 5: Update initial NATS health status
103
+ self.health_monitor.update_component_health("nats", True, "Connected to NATS")
104
+
105
+ # Use KrytenClient's built-in lifecycle publisher
106
+ self.lifecycle = self.client.lifecycle
107
+
108
+ # Phase 5: Register group restart callback (REQ-008)
109
+ if self.lifecycle:
110
+ self.lifecycle.on_restart_notice(self._handle_group_restart)
111
+
112
+ logger.info("ContextManager initialized for video tracking")
113
+ logger.info("LLM service started and ready")
114
+
115
+ async def stop(self, reason: str = "Normal shutdown") -> None:
116
+ """Stop the service with graceful shutdown.
117
+
118
+ Phase 5 Implementation (REQ-007).
119
+
120
+ Args:
121
+ reason: Shutdown reason
122
+ """
123
+ logger.info(f"Stopping LLM service: {reason}")
124
+ self._shutdown_event.set()
125
+
126
+ # KrytenClient.disconnect() handles lifecycle shutdown automatically
127
+ # Disconnect from NATS
128
+ await self.client.disconnect()
129
+
130
+ logger.info("LLM service stopped")
131
+
132
+ async def wait_for_shutdown(self) -> None:
133
+ """Wait for shutdown signal."""
134
+ await self._shutdown_event.wait()
135
+
136
+ async def _handle_chat_message(self, event: ChatMessageEvent) -> None:
137
+ """Handle chatMsg events using typed ChatMessageEvent from kryten-py.
138
+
139
+ Processing pipeline (Phase 4 enhanced):
140
+ 1. Filter message (MessageListener)
141
+ 2. Add to context (ContextManager)
142
+ 3. Check triggers (TriggerEngine)
143
+ 4. Check spam detection (SpamDetector - NEW Phase 4)
144
+ 5. Check rate limits (RateLimiter)
145
+ 6. Get context (ContextManager)
146
+ 7. Build prompts (PromptBuilder)
147
+ 8. Generate response (LLMManager)
148
+ 9. Validate response (ResponseValidator - NEW Phase 4)
149
+ 10. Format response (ResponseFormatter - ENHANCED Phase 4)
150
+ 11. Send to chat or log (based on dry_run)
151
+ 12. Record message for spam tracking (SpamDetector - NEW Phase 4)
152
+ 13. Record response (RateLimiter)
153
+ 14. Log response (ResponseLogger)
154
+
155
+ Args:
156
+ event: ChatMessageEvent from kryten-py with typed fields
157
+ """
158
+ # Convert ChatMessageEvent to dict format expected by components
159
+ # TODO: Refactor components to use typed events directly
160
+ data = {
161
+ "username": event.username,
162
+ "msg": event.message,
163
+ "time": int(event.timestamp.timestamp()),
164
+ "meta": {"rank": event.rank},
165
+ "channel": event.channel,
166
+ "domain": event.domain,
167
+ }
168
+ # Generate correlation ID for error tracking (REQ-026)
169
+ correlation_id = (
170
+ self._generate_correlation_id()
171
+ if self.config.error_handling.generate_correlation_ids
172
+ else None
173
+ )
174
+
175
+ # Phase 5: Track message processed (REQ-010)
176
+ if self.health_monitor:
177
+ self.health_monitor.record_message_processed()
178
+
179
+ filtered = None
180
+ try:
181
+ # 1. Filter message
182
+ filtered = await self.listener.filter_message(data)
183
+ if not filtered:
184
+ return
185
+
186
+ # 2. Add message to context (Phase 3)
187
+ # ContextManager will exclude bot's own messages automatically
188
+ self.context_manager.add_chat_message(filtered["username"], filtered["msg"])
189
+
190
+ # 3. Check triggers (mentions + trigger words with probability)
191
+ trigger_result = await self.trigger_engine.check_triggers(filtered)
192
+ if not trigger_result:
193
+ return
194
+
195
+ logger.info(
196
+ f"Triggered by {trigger_result.trigger_type} '{trigger_result.trigger_name}': "
197
+ f"{filtered['username']}"
198
+ )
199
+
200
+ # 4. Check spam detection (Phase 4 - REQ-016 through REQ-022)
201
+ rank = filtered.get("meta", {}).get("rank", 1)
202
+ mention_count = 1 if trigger_result.trigger_type == "mention" else 0
203
+ spam_check = self.spam_detector.check_spam(
204
+ filtered["username"],
205
+ filtered["msg"],
206
+ user_rank=rank,
207
+ mention_count=mention_count,
208
+ )
209
+
210
+ if spam_check.is_spam:
211
+ logger.warning(
212
+ f"[{correlation_id}] Spam detected from "
213
+ f"{filtered['username']}: {spam_check.reason}"
214
+ )
215
+ # Don't process message further, but record for tracking
216
+ self.spam_detector.record_message(
217
+ filtered["username"],
218
+ filtered["msg"],
219
+ rank,
220
+ mention_count,
221
+ )
222
+ return
223
+
224
+ # 5. Check rate limits (Phase 2)
225
+ rate_limit_decision = await self.rate_limiter.check_rate_limit(
226
+ filtered["username"], trigger_result, rank
227
+ )
228
+
229
+ if not rate_limit_decision.allowed:
230
+ logger.info(
231
+ f"[{correlation_id}] Rate limit blocked response: {rate_limit_decision.reason} "
232
+ f"(retry in {rate_limit_decision.retry_after}s)"
233
+ )
234
+ # Still log the blocked attempt
235
+ await self.response_logger.log_response(
236
+ filtered["username"],
237
+ trigger_result,
238
+ filtered["msg"],
239
+ "", # No LLM response
240
+ [],
241
+ rate_limit_decision,
242
+ False,
243
+ )
244
+ return
245
+
246
+ # 6. Get context (Phase 3)
247
+ context = self.context_manager.get_context()
248
+
249
+ # 7. Build prompts (Phase 3)
250
+ system_prompt = self.prompt_builder.build_system_prompt()
251
+ user_prompt = self.prompt_builder.build_user_prompt(
252
+ filtered["username"],
253
+ trigger_result.cleaned_message or filtered["msg"],
254
+ trigger_result.context, # Phase 2 trigger context
255
+ context, # Phase 3 video + chat context
256
+ )
257
+
258
+ # 8. Generate response (Phase 3)
259
+ from kryten_llm.models.phase3 import LLMRequest
260
+
261
+ # Get temperature/max_tokens from default provider config
262
+ default_provider = self.config.llm_providers.get(self.config.default_provider)
263
+ temperature = default_provider.temperature if default_provider else 0.8
264
+ max_tokens = default_provider.max_tokens if default_provider else 256
265
+
266
+ llm_request = LLMRequest(
267
+ system_prompt=system_prompt,
268
+ user_prompt=user_prompt,
269
+ temperature=temperature,
270
+ max_tokens=max_tokens,
271
+ preferred_provider=(
272
+ trigger_result.preferred_provider
273
+ if hasattr(trigger_result, "preferred_provider")
274
+ else None
275
+ ),
276
+ )
277
+
278
+ llm_response_obj = await self.llm_manager.generate_response(llm_request)
279
+
280
+ if not llm_response_obj:
281
+ logger.error(f"[{correlation_id}] LLM failed to generate response")
282
+
283
+ # Phase 5: Record provider failure if we know which one failed
284
+ # Note: llm_manager should ideally tell us which provider failed
285
+ # For now, we'll record this as a general error
286
+ if self.health_monitor:
287
+ self.health_monitor.record_error()
288
+
289
+ await self.response_logger.log_response(
290
+ filtered["username"],
291
+ trigger_result,
292
+ filtered["msg"],
293
+ "",
294
+ [],
295
+ rate_limit_decision,
296
+ False,
297
+ )
298
+ return
299
+
300
+ # Extract content from LLMResponse
301
+ llm_response = llm_response_obj.content
302
+
303
+ # Phase 5: Record successful provider API call
304
+ if self.health_monitor and llm_response_obj.provider_used:
305
+ self.health_monitor.record_provider_success(llm_response_obj.provider_used)
306
+
307
+ # Log provider metrics
308
+ logger.info(
309
+ f"[{correlation_id}] LLM response from "
310
+ f"{llm_response_obj.provider_used}/{llm_response_obj.model_used} "
311
+ f"({llm_response_obj.tokens_used} tokens, "
312
+ f"{llm_response_obj.response_time:.2f}s)"
313
+ )
314
+
315
+ # 9. Validate response (Phase 4 - REQ-009 through REQ-015)
316
+ validation = self.validator.validate(llm_response, filtered["msg"], context)
317
+ if not validation.valid:
318
+ logger.warning(
319
+ f"[{correlation_id}] Response validation failed: {validation.reason} "
320
+ f"(severity: {validation.severity})"
321
+ )
322
+ await self.response_logger.log_response(
323
+ filtered["username"],
324
+ trigger_result,
325
+ filtered["msg"],
326
+ llm_response,
327
+ [],
328
+ rate_limit_decision,
329
+ False,
330
+ )
331
+ return
332
+
333
+ # 10. Format response (Phase 4 - REQ-001 through REQ-008)
334
+ formatted_parts = self.response_formatter.format_response(llm_response)
335
+
336
+ if not formatted_parts:
337
+ logger.warning(f"[{correlation_id}] Formatter returned empty response")
338
+ await self.response_logger.log_response(
339
+ filtered["username"],
340
+ trigger_result,
341
+ filtered["msg"],
342
+ llm_response,
343
+ [],
344
+ rate_limit_decision,
345
+ False,
346
+ )
347
+ return
348
+
349
+ # 11. Send to chat or log
350
+ sent = False
351
+ for i, part in enumerate(formatted_parts):
352
+ if self.config.testing.dry_run:
353
+ logger.info(f"[{correlation_id}] [DRY RUN] Would send: {part}")
354
+ else:
355
+ channel_config = self.config.channels[0]
356
+ await self.client.send_chat(
357
+ channel_config["channel"], part, domain=channel_config["domain"]
358
+ )
359
+ logger.info(
360
+ f"[{correlation_id}] Sent response part {i+1}/{len(formatted_parts)}"
361
+ )
362
+ sent = True
363
+
364
+ # Delay between parts
365
+ if i < len(formatted_parts) - 1:
366
+ await asyncio.sleep(self.config.message_processing.split_delay_seconds)
367
+
368
+ # 12. Record message for spam tracking (Phase 4 - REQ-016)
369
+ self.spam_detector.record_message(
370
+ filtered["username"], filtered["msg"], rank, mention_count
371
+ )
372
+
373
+ # 13. Record response (update rate limit state)
374
+ if sent or not self.config.testing.dry_run:
375
+ await self.rate_limiter.record_response(filtered["username"], trigger_result)
376
+
377
+ # Phase 5: Track successful response sent
378
+ if self.health_monitor:
379
+ self.health_monitor.record_response_sent()
380
+
381
+ # 14. Log response
382
+ await self.response_logger.log_response(
383
+ filtered["username"],
384
+ trigger_result,
385
+ filtered["msg"],
386
+ llm_response,
387
+ formatted_parts,
388
+ rate_limit_decision,
389
+ sent,
390
+ )
391
+
392
+ except Exception as e:
393
+ # Phase 5: Track error
394
+ if self.health_monitor:
395
+ self.health_monitor.record_error()
396
+
397
+ # Phase 4 error handling (REQ-023 through REQ-028)
398
+ username = filtered.get("username", "unknown") if filtered else "unknown"
399
+ msg = filtered.get("msg", "") if filtered else ""
400
+ self._handle_error(e, username, msg, correlation_id)
401
+
402
+ def _generate_correlation_id(self) -> str:
403
+ """Generate unique correlation ID for request tracking.
404
+
405
+ Implements REQ-026 from Phase 4 specification.
406
+
407
+ Returns:
408
+ Unique correlation ID (e.g., "msg-a1b2c3d4e5f6")
409
+ """
410
+ return f"msg-{uuid.uuid4().hex[:12]}"
411
+
412
+ def _handle_error(
413
+ self, error: Exception, username: str, message: str, correlation_id: str | None
414
+ ):
415
+ """Handle errors with comprehensive logging and optional fallback.
416
+
417
+ Implements REQ-023 through REQ-028 from Phase 4 specification.
418
+
419
+ Args:
420
+ error: Exception that occurred
421
+ username: User who sent message
422
+ message: Original message
423
+ correlation_id: Correlation ID for tracking
424
+ """
425
+ log_extra = {
426
+ "username": username,
427
+ "message": message,
428
+ "error_type": type(error).__name__,
429
+ }
430
+
431
+ if correlation_id:
432
+ log_extra["correlation_id"] = correlation_id
433
+
434
+ if self.config.error_handling.log_full_context:
435
+ logger.error(
436
+ f"[{correlation_id}] Error processing message from {username}: {message}",
437
+ exc_info=True,
438
+ extra=log_extra,
439
+ )
440
+ else:
441
+ logger.error(
442
+ f"[{correlation_id}] Error processing message from {username}: "
443
+ f"{type(error).__name__}",
444
+ extra=log_extra,
445
+ )
446
+
447
+ # Optional: Send fallback response (REQ-028)
448
+ # Note: This would need to be async, so for now just log
449
+ # In a full implementation, this method should be async or queue a fallback task
450
+ if self.config.error_handling.enable_fallback_responses:
451
+ logger.info(
452
+ f"[{correlation_id}] Fallback responses enabled but not implemented in sync method"
453
+ )
454
+
455
+ async def _handle_discovery_poll(self, msg: Any) -> None:
456
+ """Handle discovery poll request.
457
+
458
+ Phase 5 Implementation (REQ-005).
459
+
460
+ Args:
461
+ msg: NATS message
462
+ """
463
+ logger.info("Discovery poll received, re-announcing service")
464
+
465
+ if self.config.service_metadata.enable_service_discovery and self.lifecycle:
466
+ await self.lifecycle.publish_startup(
467
+ personality=self.config.personality.character_name,
468
+ providers_configured=len(self.config.llm_providers),
469
+ triggers_loaded=len(self.config.triggers),
470
+ re_announcement=True,
471
+ )
472
+
473
+ async def _handle_robot_startup(self, msg: Any) -> None:
474
+ """Handle robot startup notification.
475
+
476
+ Phase 5 Implementation (REQ-006).
477
+
478
+ Args:
479
+ msg: NATS message
480
+ """
481
+ logger.info("Robot startup detected, re-announcing service")
482
+
483
+ if self.config.service_metadata.enable_service_discovery and self.lifecycle:
484
+ await self.lifecycle.publish_startup(
485
+ personality=self.config.personality.character_name,
486
+ providers_configured=len(self.config.llm_providers),
487
+ triggers_loaded=len(self.config.triggers),
488
+ re_announcement=True,
489
+ )
490
+
491
+ async def _handle_group_restart(self, data: dict) -> None:
492
+ """Handle group restart notice.
493
+
494
+ Phase 5 Implementation (REQ-008).
495
+
496
+ Args:
497
+ data: Restart notice data
498
+ """
499
+ delay = data.get("delay_seconds", 5)
500
+ reason = data.get("reason", "Group restart")
501
+
502
+ logger.warning(f"Group restart requested: {reason}. Shutting down in {delay}s...")
503
+
504
+ # Wait for delay period
505
+ await asyncio.sleep(delay)
506
+
507
+ # Initiate graceful shutdown
508
+ await self.stop(reason=f"Group restart: {reason}")
509
+
510
+ async def reload_config(self, new_config: "LLMConfig") -> None:
511
+ """Reload configuration with hot-swappable components.
512
+
513
+ Phase 6: Hot-reload support for configuration changes without restart.
514
+
515
+ Only safe changes are applied:
516
+ - Trigger configurations
517
+ - Rate limits
518
+ - Personality settings
519
+ - Spam detection settings
520
+ - LLM provider settings
521
+
522
+ Unsafe changes (NATS, channels) require restart.
523
+
524
+ Args:
525
+ new_config: New validated configuration
526
+ """
527
+
528
+ logger.info("Applying new configuration...")
529
+ old_config = self.config
530
+
531
+ try:
532
+ # Update config reference
533
+ self.config = new_config
534
+
535
+ # Rebuild trigger engine with new patterns (Phase 6 pattern caching)
536
+ self.trigger_engine = TriggerEngine(new_config)
537
+ logger.info(f"TriggerEngine rebuilt with {len(self.trigger_engine.triggers)} triggers")
538
+
539
+ # Rebuild rate limiter (preserves current state)
540
+ # Note: RateLimiter tracks state internally, new instance loses history
541
+ # For production, consider preserving state
542
+ self.rate_limiter = RateLimiter(new_config)
543
+ logger.info("RateLimiter rebuilt with new limits")
544
+
545
+ # Update spam detector
546
+ self.spam_detector = SpamDetector(new_config.spam_detection)
547
+ logger.info("SpamDetector rebuilt with new settings")
548
+
549
+ # Update prompt builder (uses personality config)
550
+ self.prompt_builder = PromptBuilder(new_config)
551
+ logger.info(f"PromptBuilder rebuilt for {new_config.personality.character_name}")
552
+
553
+ # Update response formatter
554
+ self.response_formatter = ResponseFormatter(new_config)
555
+
556
+ # Update response validator
557
+ self.validator = ResponseValidator(new_config.validation)
558
+
559
+ # Update LLM manager with new providers
560
+ self.llm_manager = LLMManager(new_config)
561
+ logger.info(f"LLMManager rebuilt with {len(new_config.llm_providers)} providers")
562
+
563
+ # Update context manager config (doesn't require rebuild)
564
+ self.context_manager.config = new_config
565
+
566
+ logger.info("Configuration hot-reload completed successfully")
567
+
568
+ except Exception as e:
569
+ # Rollback on error
570
+ logger.error(f"Config reload failed, rolling back: {e}", exc_info=True)
571
+ self.config = old_config
572
+ raise
File without changes