kryten-llm 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,478 @@
1
+ """Rate limiting for bot responses."""
2
+
3
+ import logging
4
+ from collections import deque
5
+ from dataclasses import dataclass
6
+ from datetime import datetime, timedelta
7
+
8
+ from kryten_llm.models.config import LLMConfig
9
+ from kryten_llm.models.events import TriggerResult
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @dataclass
15
+ class RateLimitDecision:
16
+ """Result of rate limit check.
17
+
18
+ Implements REQ-021: Detailed decision with reason and retry time.
19
+ """
20
+
21
+ allowed: bool # True if response allowed
22
+ reason: str # Human-readable reason
23
+ retry_after: int # Seconds until next allowed (0 if allowed)
24
+ details: dict # Additional context (limits, counts, cooldowns)
25
+
26
+
27
+ class RateLimiter:
28
+ """Manages rate limiting for bot responses.
29
+
30
+ Implements multi-level rate limiting:
31
+ - Global rate limits (REQ-011)
32
+ - Per-user rate limits (REQ-012)
33
+ - Per-trigger rate limits (REQ-013)
34
+ - Global cooldown (REQ-014)
35
+ - User cooldown (REQ-015)
36
+ - Mention cooldown (REQ-016)
37
+ - Trigger cooldown (REQ-017)
38
+ - Admin multipliers (REQ-018, REQ-019, REQ-020)
39
+
40
+ State is stored in-memory (REQ-023, CON-002).
41
+ """
42
+
43
+ def __init__(self, config: LLMConfig):
44
+ """Initialize rate limiter with configuration.
45
+
46
+ Args:
47
+ config: LLM configuration containing rate_limits settings
48
+ """
49
+ self.config = config
50
+ self.rate_limits = config.rate_limits
51
+
52
+ # Global rate tracking (REQ-011)
53
+ self.global_responses_minute: deque[datetime] = deque()
54
+ self.global_responses_hour: deque[datetime] = deque()
55
+ self.last_response_time: datetime | None = None
56
+
57
+ # Per-user rate tracking (REQ-012)
58
+ self.user_responses_hour: dict[str, deque[datetime]] = {}
59
+ self.user_last_response: dict[str, datetime] = {}
60
+
61
+ # Per-trigger rate tracking (REQ-013)
62
+ self.trigger_responses_hour: dict[str, deque[datetime]] = {}
63
+ self.trigger_last_response: dict[str, datetime] = {}
64
+
65
+ # Mention-specific tracking (REQ-016)
66
+ self.last_mention_response: datetime | None = None
67
+
68
+ logger.info(
69
+ f"RateLimiter initialized: global={self.rate_limits.global_max_per_minute}/min, "
70
+ f"{self.rate_limits.global_max_per_hour}/hr, "
71
+ f"user={self.rate_limits.user_max_per_hour}/hr"
72
+ )
73
+
74
+ async def check_rate_limit(
75
+ self, username: str, trigger_result: TriggerResult, rank: int = 1
76
+ ) -> RateLimitDecision:
77
+ """Check if response is allowed by rate limits.
78
+
79
+ Checks in order (first failure returns decision):
80
+ 1. Global rate limits (per minute, per hour)
81
+ 2. Global cooldown (time since last response)
82
+ 3. User rate limits (per hour)
83
+ 4. User cooldown (time since last response to this user)
84
+ 5. Mention cooldown (if trigger_type="mention")
85
+ 6. Trigger rate limits (max per hour)
86
+ 7. Trigger cooldown (time since last activation)
87
+
88
+ Applies admin multipliers if rank >= 3 (REQ-018, REQ-019, REQ-020).
89
+
90
+ Args:
91
+ username: Username triggering response
92
+ trigger_result: TriggerResult from TriggerEngine
93
+ rank: User rank (default 1, admin >= 3)
94
+
95
+ Returns:
96
+ RateLimitDecision with allowed=True/False and details
97
+ """
98
+ now = datetime.now()
99
+ is_admin = self._is_admin(rank)
100
+
101
+ # Clean old timestamps from all deques
102
+ self._clean_old_timestamps(self.global_responses_minute, 60)
103
+ self._clean_old_timestamps(self.global_responses_hour, 3600)
104
+
105
+ # Check global limits (REQ-011)
106
+ decision = self._check_global_limits(is_admin)
107
+ if decision:
108
+ return decision
109
+
110
+ # Check global cooldown (REQ-014)
111
+ if self.last_response_time:
112
+ cooldown = self._apply_admin_multiplier(
113
+ self.rate_limits.global_cooldown_seconds,
114
+ self.rate_limits.admin_cooldown_multiplier,
115
+ is_admin,
116
+ )
117
+ elapsed = (now - self.last_response_time).total_seconds()
118
+ if elapsed < cooldown:
119
+ retry_after = int(cooldown - elapsed)
120
+ return RateLimitDecision(
121
+ allowed=False,
122
+ reason="global cooldown active",
123
+ retry_after=retry_after,
124
+ details={
125
+ "last_response": self.last_response_time.isoformat(),
126
+ "cooldown_seconds": cooldown,
127
+ "elapsed": elapsed,
128
+ "is_admin": is_admin,
129
+ },
130
+ )
131
+
132
+ # Check user limits (REQ-012, REQ-015)
133
+ decision = self._check_user_limits(username, is_admin, now)
134
+ if decision:
135
+ return decision
136
+
137
+ # Check mention cooldown (REQ-016)
138
+ if trigger_result.trigger_type == "mention":
139
+ if self.last_mention_response:
140
+ cooldown = self._apply_admin_multiplier(
141
+ self.rate_limits.mention_cooldown_seconds,
142
+ self.rate_limits.admin_cooldown_multiplier,
143
+ is_admin,
144
+ )
145
+ elapsed = (now - self.last_mention_response).total_seconds()
146
+ if elapsed < cooldown:
147
+ retry_after = int(cooldown - elapsed)
148
+ return RateLimitDecision(
149
+ allowed=False,
150
+ reason="mention cooldown active",
151
+ retry_after=retry_after,
152
+ details={
153
+ "last_mention": self.last_mention_response.isoformat(),
154
+ "cooldown_seconds": cooldown,
155
+ "elapsed": elapsed,
156
+ "is_admin": is_admin,
157
+ },
158
+ )
159
+
160
+ # Check trigger-specific limits (REQ-013, REQ-017)
161
+ if trigger_result.trigger_type == "trigger_word" and trigger_result.trigger_name:
162
+ decision = self._check_trigger_limits(trigger_result, is_admin, now)
163
+ if decision:
164
+ return decision
165
+
166
+ # All checks passed
167
+ logger.debug(f"Rate limit check passed for {username} (rank={rank})")
168
+ return RateLimitDecision(
169
+ allowed=True,
170
+ reason="allowed",
171
+ retry_after=0,
172
+ details={
173
+ "global_count_minute": len(self.global_responses_minute),
174
+ "global_count_hour": len(self.global_responses_hour),
175
+ "user_count_hour": len(self.user_responses_hour.get(username, [])),
176
+ "is_admin": is_admin,
177
+ },
178
+ )
179
+
180
+ async def record_response(self, username: str, trigger_result: TriggerResult) -> None:
181
+ """Record that a response was sent (update state).
182
+
183
+ Implements REQ-022: Update state after responses are sent.
184
+
185
+ Args:
186
+ username: Username who triggered response
187
+ trigger_result: TriggerResult from trigger check
188
+ """
189
+ now = datetime.now()
190
+
191
+ # Update global tracking
192
+ self.global_responses_minute.append(now)
193
+ self.global_responses_hour.append(now)
194
+ self.last_response_time = now
195
+
196
+ # Update per-user tracking
197
+ if username not in self.user_responses_hour:
198
+ self.user_responses_hour[username] = deque()
199
+ self.user_responses_hour[username].append(now)
200
+ self.user_last_response[username] = now
201
+
202
+ # Update mention tracking
203
+ if trigger_result.trigger_type == "mention":
204
+ self.last_mention_response = now
205
+
206
+ # Update per-trigger tracking
207
+ if trigger_result.trigger_type == "trigger_word" and trigger_result.trigger_name:
208
+ trigger_name = trigger_result.trigger_name
209
+ if trigger_name not in self.trigger_responses_hour:
210
+ self.trigger_responses_hour[trigger_name] = deque()
211
+ self.trigger_responses_hour[trigger_name].append(now)
212
+ self.trigger_last_response[trigger_name] = now
213
+
214
+ logger.debug(
215
+ f"Response recorded for {username} "
216
+ f"(trigger: {trigger_result.trigger_type}/{trigger_result.trigger_name})"
217
+ )
218
+
219
+ def _is_admin(self, rank: int) -> bool:
220
+ """Check if user is admin/moderator.
221
+
222
+ Implements REQ-018: Admin detection via rank.
223
+
224
+ Args:
225
+ rank: User rank from message metadata
226
+
227
+ Returns:
228
+ True if rank >= 3 (admin/moderator)
229
+ """
230
+ return rank >= 3
231
+
232
+ def _apply_admin_multiplier(
233
+ self, value: int | float, multiplier: float, is_admin: bool
234
+ ) -> int | float:
235
+ """Apply admin multiplier to cooldown or limit.
236
+
237
+ Implements REQ-019, REQ-020: Admin multipliers for cooldowns and limits.
238
+
239
+ Args:
240
+ value: Original value (cooldown or limit)
241
+ multiplier: Multiplier to apply if admin
242
+ is_admin: Whether user is admin
243
+
244
+ Returns:
245
+ Modified value if admin, original value otherwise
246
+ """
247
+ if is_admin:
248
+ result = value * multiplier
249
+ return int(result) if isinstance(value, int) else result
250
+ return value
251
+
252
+ def _clean_old_timestamps(self, timestamps: deque[datetime], window_seconds: int) -> None:
253
+ """Remove timestamps older than window from deque.
254
+
255
+ Args:
256
+ timestamps: Deque of timestamps to clean
257
+ window_seconds: Time window in seconds
258
+ """
259
+ cutoff = datetime.now() - timedelta(seconds=window_seconds)
260
+ while timestamps and timestamps[0] < cutoff:
261
+ timestamps.popleft()
262
+
263
+ def _check_global_limits(self, is_admin: bool) -> RateLimitDecision | None:
264
+ """Check global rate limits.
265
+
266
+ Implements REQ-011: Global per-minute and per-hour limits.
267
+
268
+ Args:
269
+ is_admin: Whether user is admin
270
+
271
+ Returns:
272
+ RateLimitDecision if blocked, None if allowed
273
+ """
274
+ # Check per-minute limit
275
+ limit_minute = self._apply_admin_multiplier(
276
+ self.rate_limits.global_max_per_minute,
277
+ self.rate_limits.admin_limit_multiplier,
278
+ is_admin,
279
+ )
280
+ if len(self.global_responses_minute) >= limit_minute:
281
+ # Calculate retry_after based on oldest timestamp
282
+ if self.global_responses_minute:
283
+ oldest = self.global_responses_minute[0]
284
+ retry_after = int(60 - (datetime.now() - oldest).total_seconds())
285
+ retry_after = max(1, retry_after)
286
+ else:
287
+ retry_after = 60
288
+
289
+ return RateLimitDecision(
290
+ allowed=False,
291
+ reason="global per-minute limit reached",
292
+ retry_after=retry_after,
293
+ details={
294
+ "count": len(self.global_responses_minute),
295
+ "limit": limit_minute,
296
+ "is_admin": is_admin,
297
+ },
298
+ )
299
+
300
+ # Check per-hour limit
301
+ limit_hour = self._apply_admin_multiplier(
302
+ self.rate_limits.global_max_per_hour, self.rate_limits.admin_limit_multiplier, is_admin
303
+ )
304
+ if len(self.global_responses_hour) >= limit_hour:
305
+ if self.global_responses_hour:
306
+ oldest = self.global_responses_hour[0]
307
+ retry_after = int(3600 - (datetime.now() - oldest).total_seconds())
308
+ retry_after = max(1, retry_after)
309
+ else:
310
+ retry_after = 3600
311
+
312
+ return RateLimitDecision(
313
+ allowed=False,
314
+ reason="global per-hour limit reached",
315
+ retry_after=retry_after,
316
+ details={
317
+ "count": len(self.global_responses_hour),
318
+ "limit": limit_hour,
319
+ "is_admin": is_admin,
320
+ },
321
+ )
322
+
323
+ return None
324
+
325
+ def _check_user_limits(
326
+ self, username: str, is_admin: bool, now: datetime
327
+ ) -> RateLimitDecision | None:
328
+ """Check per-user rate limits.
329
+
330
+ Implements REQ-012, REQ-015: Per-user limits and cooldowns.
331
+
332
+ Args:
333
+ username: Username to check
334
+ is_admin: Whether user is admin
335
+ now: Current datetime
336
+
337
+ Returns:
338
+ RateLimitDecision if blocked, None if allowed
339
+ """
340
+ # Clean user's timestamp deque
341
+ if username in self.user_responses_hour:
342
+ self._clean_old_timestamps(self.user_responses_hour[username], 3600)
343
+
344
+ # Check per-user per-hour limit
345
+ limit_hour = self._apply_admin_multiplier(
346
+ self.rate_limits.user_max_per_hour, self.rate_limits.admin_limit_multiplier, is_admin
347
+ )
348
+ user_count = len(self.user_responses_hour.get(username, []))
349
+ if user_count >= limit_hour:
350
+ if self.user_responses_hour.get(username):
351
+ oldest = self.user_responses_hour[username][0]
352
+ retry_after = int(3600 - (now - oldest).total_seconds())
353
+ retry_after = max(1, retry_after)
354
+ else:
355
+ retry_after = 3600
356
+
357
+ return RateLimitDecision(
358
+ allowed=False,
359
+ reason="user per-hour limit reached",
360
+ retry_after=retry_after,
361
+ details={
362
+ "username": username,
363
+ "count": user_count,
364
+ "limit": limit_hour,
365
+ "is_admin": is_admin,
366
+ },
367
+ )
368
+
369
+ # Check user cooldown
370
+ if username in self.user_last_response:
371
+ cooldown = self._apply_admin_multiplier(
372
+ self.rate_limits.user_cooldown_seconds,
373
+ self.rate_limits.admin_cooldown_multiplier,
374
+ is_admin,
375
+ )
376
+ elapsed = (now - self.user_last_response[username]).total_seconds()
377
+ if elapsed < cooldown:
378
+ retry_after = int(cooldown - elapsed)
379
+ return RateLimitDecision(
380
+ allowed=False,
381
+ reason="user cooldown active",
382
+ retry_after=retry_after,
383
+ details={
384
+ "username": username,
385
+ "last_response": self.user_last_response[username].isoformat(),
386
+ "cooldown_seconds": cooldown,
387
+ "elapsed": elapsed,
388
+ "is_admin": is_admin,
389
+ },
390
+ )
391
+
392
+ return None
393
+
394
+ def _check_trigger_limits(
395
+ self, trigger_result: TriggerResult, is_admin: bool, now: datetime
396
+ ) -> RateLimitDecision | None:
397
+ """Check per-trigger rate limits.
398
+
399
+ Implements REQ-013, REQ-017: Per-trigger limits and cooldowns.
400
+
401
+ Args:
402
+ trigger_result: TriggerResult from trigger check
403
+ is_admin: Whether user is admin
404
+ now: Current datetime
405
+
406
+ Returns:
407
+ RateLimitDecision if blocked, None if allowed
408
+ """
409
+ trigger_name = trigger_result.trigger_name
410
+
411
+ # If no trigger name, allow (shouldn't happen in practice)
412
+ if trigger_name is None:
413
+ return None
414
+
415
+ # Find trigger config
416
+ trigger_config = None
417
+ for t in self.config.triggers:
418
+ if t.name == trigger_name:
419
+ trigger_config = t
420
+ break
421
+
422
+ if not trigger_config:
423
+ # Trigger not found in config, allow by default
424
+ return None
425
+
426
+ # Clean trigger's timestamp deque
427
+ if trigger_name in self.trigger_responses_hour:
428
+ self._clean_old_timestamps(self.trigger_responses_hour[trigger_name], 3600)
429
+
430
+ # Check per-trigger per-hour limit
431
+ limit_hour = self._apply_admin_multiplier(
432
+ trigger_config.max_responses_per_hour, self.rate_limits.admin_limit_multiplier, is_admin
433
+ )
434
+ trigger_count = len(self.trigger_responses_hour.get(trigger_name, []))
435
+ if trigger_count >= limit_hour:
436
+ if self.trigger_responses_hour.get(trigger_name):
437
+ oldest = self.trigger_responses_hour[trigger_name][0]
438
+ retry_after = int(3600 - (now - oldest).total_seconds())
439
+ retry_after = max(1, retry_after)
440
+ else:
441
+ retry_after = 3600
442
+
443
+ return RateLimitDecision(
444
+ allowed=False,
445
+ reason=f"trigger '{trigger_name}' per-hour limit reached",
446
+ retry_after=retry_after,
447
+ details={
448
+ "trigger_name": trigger_name,
449
+ "count": trigger_count,
450
+ "limit": limit_hour,
451
+ "is_admin": is_admin,
452
+ },
453
+ )
454
+
455
+ # Check trigger cooldown
456
+ if trigger_name in self.trigger_last_response:
457
+ cooldown = self._apply_admin_multiplier(
458
+ trigger_config.cooldown_seconds,
459
+ self.rate_limits.admin_cooldown_multiplier,
460
+ is_admin,
461
+ )
462
+ elapsed = (now - self.trigger_last_response[trigger_name]).total_seconds()
463
+ if elapsed < cooldown:
464
+ retry_after = int(cooldown - elapsed)
465
+ return RateLimitDecision(
466
+ allowed=False,
467
+ reason=f"trigger '{trigger_name}' cooldown active",
468
+ retry_after=retry_after,
469
+ details={
470
+ "trigger_name": trigger_name,
471
+ "last_response": self.trigger_last_response[trigger_name].isoformat(),
472
+ "cooldown_seconds": cooldown,
473
+ "elapsed": elapsed,
474
+ "is_admin": is_admin,
475
+ },
476
+ )
477
+
478
+ return None
@@ -0,0 +1,105 @@
1
+ """Response logging for analysis and debugging."""
2
+
3
+ import json
4
+ import logging
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+
8
+ from kryten_llm.components.rate_limiter import RateLimitDecision
9
+ from kryten_llm.models.config import LLMConfig
10
+ from kryten_llm.models.events import TriggerResult
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class ResponseLogger:
16
+ """Logs bot responses to JSONL file for analysis.
17
+
18
+ Implements REQ-024 through REQ-029:
19
+ - Log all responses to JSONL file
20
+ - Include comprehensive metadata
21
+ - Handle file I/O errors gracefully
22
+ - Create directories and files as needed
23
+ - Append to existing logs
24
+ - Produce valid JSON per line
25
+ """
26
+
27
+ def __init__(self, config: LLMConfig):
28
+ """Initialize logger with configuration.
29
+
30
+ Args:
31
+ config: LLM configuration containing testing.log_file setting
32
+ """
33
+ self.config = config
34
+ self.log_path = Path(config.testing.log_file)
35
+ self.enabled = config.testing.log_responses
36
+
37
+ # REQ-027, REQ-032: Create log directory if missing
38
+ if self.enabled:
39
+ try:
40
+ self.log_path.parent.mkdir(parents=True, exist_ok=True)
41
+ logger.info(f"ResponseLogger initialized: {self.log_path}")
42
+ except Exception as e:
43
+ logger.error(f"Failed to create log directory: {e}")
44
+ self.enabled = False
45
+
46
+ async def log_response(
47
+ self,
48
+ username: str,
49
+ trigger_result: TriggerResult,
50
+ input_message: str,
51
+ llm_response: str,
52
+ formatted_parts: list[str],
53
+ rate_limit_decision: RateLimitDecision,
54
+ sent: bool,
55
+ ) -> None:
56
+ """Log a response event to JSONL file.
57
+
58
+ Implements REQ-024, REQ-025: Log all responses with comprehensive metadata.
59
+
60
+ Args:
61
+ username: User who triggered response
62
+ trigger_result: TriggerResult from trigger check
63
+ input_message: Original user message
64
+ llm_response: Raw LLM response
65
+ formatted_parts: List of formatted message parts
66
+ rate_limit_decision: Rate limit decision details
67
+ sent: Whether response was actually sent (False if dry-run or blocked)
68
+ """
69
+ # REQ-033: Respect log_responses config flag
70
+ if not self.enabled:
71
+ return
72
+
73
+ # REQ-025: Build comprehensive log entry
74
+ entry = {
75
+ "timestamp": datetime.now().isoformat(),
76
+ "trigger_type": trigger_result.trigger_type,
77
+ "trigger_name": trigger_result.trigger_name,
78
+ "trigger_priority": trigger_result.priority,
79
+ "username": username,
80
+ "input_message": input_message,
81
+ "cleaned_message": trigger_result.cleaned_message,
82
+ "llm_response": llm_response,
83
+ "formatted_parts": formatted_parts,
84
+ "response_sent": sent,
85
+ "rate_limit": {
86
+ "allowed": rate_limit_decision.allowed,
87
+ "reason": rate_limit_decision.reason,
88
+ "retry_after": rate_limit_decision.retry_after,
89
+ "details": rate_limit_decision.details,
90
+ },
91
+ }
92
+
93
+ # REQ-026: Handle file I/O errors gracefully
94
+ try:
95
+ # REQ-028, REQ-029: Append valid JSON (one per line)
96
+ with open(self.log_path, "a", encoding="utf-8") as f:
97
+ f.write(json.dumps(entry) + "\n")
98
+
99
+ logger.debug(
100
+ f"Logged response: {trigger_result.trigger_type}/{trigger_result.trigger_name} "
101
+ f"from {username} (sent={sent})"
102
+ )
103
+ except Exception as e:
104
+ # REQ-026: Don't crash on I/O errors
105
+ logger.error(f"Failed to write response log: {e}")