dtSpark 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. dtSpark/__init__.py +0 -0
  2. dtSpark/_description.txt +1 -0
  3. dtSpark/_full_name.txt +1 -0
  4. dtSpark/_licence.txt +21 -0
  5. dtSpark/_metadata.yaml +6 -0
  6. dtSpark/_name.txt +1 -0
  7. dtSpark/_version.txt +1 -0
  8. dtSpark/aws/__init__.py +7 -0
  9. dtSpark/aws/authentication.py +296 -0
  10. dtSpark/aws/bedrock.py +578 -0
  11. dtSpark/aws/costs.py +318 -0
  12. dtSpark/aws/pricing.py +580 -0
  13. dtSpark/cli_interface.py +2645 -0
  14. dtSpark/conversation_manager.py +3050 -0
  15. dtSpark/core/__init__.py +12 -0
  16. dtSpark/core/application.py +3355 -0
  17. dtSpark/core/context_compaction.py +735 -0
  18. dtSpark/daemon/__init__.py +104 -0
  19. dtSpark/daemon/__main__.py +10 -0
  20. dtSpark/daemon/action_monitor.py +213 -0
  21. dtSpark/daemon/daemon_app.py +730 -0
  22. dtSpark/daemon/daemon_manager.py +289 -0
  23. dtSpark/daemon/execution_coordinator.py +194 -0
  24. dtSpark/daemon/pid_file.py +169 -0
  25. dtSpark/database/__init__.py +482 -0
  26. dtSpark/database/autonomous_actions.py +1191 -0
  27. dtSpark/database/backends.py +329 -0
  28. dtSpark/database/connection.py +122 -0
  29. dtSpark/database/conversations.py +520 -0
  30. dtSpark/database/credential_prompt.py +218 -0
  31. dtSpark/database/files.py +205 -0
  32. dtSpark/database/mcp_ops.py +355 -0
  33. dtSpark/database/messages.py +161 -0
  34. dtSpark/database/schema.py +673 -0
  35. dtSpark/database/tool_permissions.py +186 -0
  36. dtSpark/database/usage.py +167 -0
  37. dtSpark/files/__init__.py +4 -0
  38. dtSpark/files/manager.py +322 -0
  39. dtSpark/launch.py +39 -0
  40. dtSpark/limits/__init__.py +10 -0
  41. dtSpark/limits/costs.py +296 -0
  42. dtSpark/limits/tokens.py +342 -0
  43. dtSpark/llm/__init__.py +17 -0
  44. dtSpark/llm/anthropic_direct.py +446 -0
  45. dtSpark/llm/base.py +146 -0
  46. dtSpark/llm/context_limits.py +438 -0
  47. dtSpark/llm/manager.py +177 -0
  48. dtSpark/llm/ollama.py +578 -0
  49. dtSpark/mcp_integration/__init__.py +5 -0
  50. dtSpark/mcp_integration/manager.py +653 -0
  51. dtSpark/mcp_integration/tool_selector.py +225 -0
  52. dtSpark/resources/config.yaml.template +631 -0
  53. dtSpark/safety/__init__.py +22 -0
  54. dtSpark/safety/llm_service.py +111 -0
  55. dtSpark/safety/patterns.py +229 -0
  56. dtSpark/safety/prompt_inspector.py +442 -0
  57. dtSpark/safety/violation_logger.py +346 -0
  58. dtSpark/scheduler/__init__.py +20 -0
  59. dtSpark/scheduler/creation_tools.py +599 -0
  60. dtSpark/scheduler/execution_queue.py +159 -0
  61. dtSpark/scheduler/executor.py +1152 -0
  62. dtSpark/scheduler/manager.py +395 -0
  63. dtSpark/tools/__init__.py +4 -0
  64. dtSpark/tools/builtin.py +833 -0
  65. dtSpark/web/__init__.py +20 -0
  66. dtSpark/web/auth.py +152 -0
  67. dtSpark/web/dependencies.py +37 -0
  68. dtSpark/web/endpoints/__init__.py +17 -0
  69. dtSpark/web/endpoints/autonomous_actions.py +1125 -0
  70. dtSpark/web/endpoints/chat.py +621 -0
  71. dtSpark/web/endpoints/conversations.py +353 -0
  72. dtSpark/web/endpoints/main_menu.py +547 -0
  73. dtSpark/web/endpoints/streaming.py +421 -0
  74. dtSpark/web/server.py +578 -0
  75. dtSpark/web/session.py +167 -0
  76. dtSpark/web/ssl_utils.py +195 -0
  77. dtSpark/web/static/css/dark-theme.css +427 -0
  78. dtSpark/web/static/js/actions.js +1101 -0
  79. dtSpark/web/static/js/chat.js +614 -0
  80. dtSpark/web/static/js/main.js +496 -0
  81. dtSpark/web/static/js/sse-client.js +242 -0
  82. dtSpark/web/templates/actions.html +408 -0
  83. dtSpark/web/templates/base.html +93 -0
  84. dtSpark/web/templates/chat.html +814 -0
  85. dtSpark/web/templates/conversations.html +350 -0
  86. dtSpark/web/templates/goodbye.html +81 -0
  87. dtSpark/web/templates/login.html +90 -0
  88. dtSpark/web/templates/main_menu.html +983 -0
  89. dtSpark/web/templates/new_conversation.html +191 -0
  90. dtSpark/web/web_interface.py +137 -0
  91. dtspark-1.0.4.dist-info/METADATA +187 -0
  92. dtspark-1.0.4.dist-info/RECORD +96 -0
  93. dtspark-1.0.4.dist-info/WHEEL +5 -0
  94. dtspark-1.0.4.dist-info/entry_points.txt +3 -0
  95. dtspark-1.0.4.dist-info/licenses/LICENSE +21 -0
  96. dtspark-1.0.4.dist-info/top_level.txt +1 -0
dtSpark/launch.py ADDED
@@ -0,0 +1,39 @@
1
+ """
2
+ Application entry point for Spark.
3
+
4
+ This launcher handles:
5
+ - Main application (CLI or Web interface)
6
+ - Daemon mode for autonomous action execution
7
+
8
+ Usage:
9
+ dtSpark # Run main application
10
+ dtSpark daemon start # Start daemon in background
11
+ dtSpark daemon stop # Stop daemon
12
+ dtSpark daemon status # Check daemon status
13
+ """
14
+
15
+ import sys
16
+ import os
17
+
18
+ os.environ['CONTAINER_MODE'] = 'true'
19
+
20
+ def main():
21
+ """
22
+ Main entry point that routes to appropriate handler.
23
+
24
+ Routes daemon commands to the daemon module, otherwise runs
25
+ the main application.
26
+ """
27
+ # Check for daemon commands
28
+ if len(sys.argv) > 1 and sys.argv[1] == 'daemon':
29
+ from dtSpark.daemon import daemon_main
30
+ daemon_main()
31
+ else:
32
+ # Run main application
33
+ from dtSpark.core.application import main as app_main
34
+ app_main()
35
+
36
+
37
+ # Entry point for console_scripts
38
+ if __name__ == "__main__":
39
+ main()
@@ -0,0 +1,10 @@
1
+ """Usage limits and tracking module."""
2
+ from .tokens import TokenManager, LimitStatus
3
+ try:
4
+ from .costs import CostManager
5
+ except:
6
+ CostManager = None
7
+
8
+ __all__ = ['TokenManager', 'LimitStatus']
9
+ if CostManager:
10
+ __all__.append('CostManager')
@@ -0,0 +1,296 @@
1
+ """
2
+ Cost management module for AWS Bedrock usage.
3
+
4
+ This module provides functionality for:
5
+ - Tracking usage costs over rolling time windows
6
+ - Budget monitoring and warnings
7
+ - Cost-based usage limits with override options
8
+ """
9
+
10
+ import logging
11
+ from datetime import datetime, timedelta
12
+ from typing import Dict, Optional, Tuple
13
+ from enum import Enum
14
+
15
+
16
+ class BudgetStatus(Enum):
17
+ """Budget status levels."""
18
+ OK = "ok"
19
+ WARNING_75 = "warning_75"
20
+ WARNING_85 = "warning_85"
21
+ WARNING_95 = "warning_95"
22
+ EXCEEDED = "exceeded"
23
+
24
+
25
+ class CostManager:
26
+ """Manages cost tracking and budget enforcement."""
27
+
28
+ def __init__(self, database, pricing_manager, config: Dict):
29
+ """
30
+ Initialise the cost manager.
31
+
32
+ Args:
33
+ database: ConversationDatabase instance
34
+ pricing_manager: BedrockPricing instance
35
+ config: Cost management configuration dictionary
36
+ """
37
+ self.database = database
38
+ self.pricing = pricing_manager
39
+ self.enabled = config.get('enabled', False)
40
+ self.max_spend = float(config.get('max_spend', 10.0))
41
+ self.period_hours = int(config.get('period_hours', 24))
42
+ self.allow_override = config.get('allow_override', True)
43
+ self.current_override = 0.0 # Additional spend allowed for current period
44
+ self.override_expires = None # When the override expires
45
+
46
+ def check_budget_before_request(self, model_id: str, region: str,
47
+ input_tokens: int, max_output_tokens: int) -> Tuple[bool, str, BudgetStatus]:
48
+ """
49
+ Check if a request would exceed the budget.
50
+
51
+ Args:
52
+ model_id: Bedrock model ID
53
+ region: AWS region
54
+ input_tokens: Number of input tokens
55
+ max_output_tokens: Maximum output tokens
56
+
57
+ Returns:
58
+ Tuple of (allowed, message, status)
59
+ """
60
+ if not self.enabled:
61
+ return True, "", BudgetStatus.OK
62
+
63
+ # Estimate maximum cost for this request
64
+ estimated_cost = self.pricing.estimate_max_cost(
65
+ model_id, region, input_tokens, max_output_tokens
66
+ )
67
+
68
+ # Get current spend in rolling window
69
+ current_spend = self._get_rolling_window_spend()
70
+
71
+ # Calculate effective limit (base + override)
72
+ effective_limit = self.max_spend + self.current_override
73
+
74
+ # Calculate projected spend
75
+ projected_spend = current_spend + estimated_cost
76
+
77
+ # Determine status
78
+ percentage = (projected_spend / effective_limit) * 100
79
+
80
+ if projected_spend > effective_limit:
81
+ # Budget exceeded
82
+ time_until_reset = self._time_until_reset()
83
+ return False, self._format_exceeded_message(
84
+ current_spend, effective_limit, estimated_cost, time_until_reset
85
+ ), BudgetStatus.EXCEEDED
86
+
87
+ elif percentage >= 95:
88
+ return True, self._format_warning_message(
89
+ percentage, current_spend, effective_limit, estimated_cost
90
+ ), BudgetStatus.WARNING_95
91
+
92
+ elif percentage >= 85:
93
+ return True, self._format_warning_message(
94
+ percentage, current_spend, effective_limit, estimated_cost
95
+ ), BudgetStatus.WARNING_85
96
+
97
+ elif percentage >= 75:
98
+ return True, self._format_warning_message(
99
+ percentage, current_spend, effective_limit, estimated_cost
100
+ ), BudgetStatus.WARNING_75
101
+
102
+ else:
103
+ return True, "", BudgetStatus.OK
104
+
105
+ def record_usage(self, conversation_id: int, model_id: str, region: str,
106
+ input_tokens: int, output_tokens: int) -> float:
107
+ """
108
+ Record actual usage after a request completes.
109
+
110
+ Args:
111
+ conversation_id: Conversation ID
112
+ model_id: Bedrock model ID
113
+ region: AWS region
114
+ input_tokens: Actual input tokens used
115
+ output_tokens: Actual output tokens used
116
+
117
+ Returns:
118
+ Actual cost in USD
119
+ """
120
+ if not self.enabled:
121
+ return 0.0
122
+
123
+ # Calculate actual cost
124
+ cost, source = self.pricing.calculate_cost(
125
+ model_id, region, input_tokens, output_tokens
126
+ )
127
+
128
+ # Store usage in database
129
+ self.database.record_usage(
130
+ conversation_id=conversation_id,
131
+ model_id=model_id,
132
+ region=region,
133
+ input_tokens=input_tokens,
134
+ output_tokens=output_tokens,
135
+ cost=cost,
136
+ timestamp=datetime.now()
137
+ )
138
+
139
+ logging.debug(f"Recorded usage: ${cost:.4f} ({source})")
140
+ return cost
141
+
142
+ def _get_rolling_window_spend(self) -> float:
143
+ """
144
+ Get total spend in the current rolling window.
145
+
146
+ Returns:
147
+ Total spend in USD
148
+ """
149
+ # Check if override has expired
150
+ if self.override_expires and datetime.now() >= self.override_expires:
151
+ self.current_override = 0.0
152
+ self.override_expires = None
153
+ logging.info("Cost override has expired")
154
+
155
+ # Calculate start of rolling window
156
+ window_start = datetime.now() - timedelta(hours=self.period_hours)
157
+
158
+ # Get usage from database
159
+ total_spend = self.database.get_usage_in_window(window_start)
160
+
161
+ return total_spend
162
+
163
+ def _time_until_reset(self) -> timedelta:
164
+ """
165
+ Calculate time until the rolling window resets.
166
+
167
+ Returns:
168
+ Time delta until oldest usage expires
169
+ """
170
+ window_start = datetime.now() - timedelta(hours=self.period_hours)
171
+ oldest_usage_time = self.database.get_oldest_usage_in_window(window_start)
172
+
173
+ if oldest_usage_time:
174
+ # Time until this usage falls out of the window
175
+ reset_time = oldest_usage_time + timedelta(hours=self.period_hours)
176
+ time_remaining = reset_time - datetime.now()
177
+ return max(time_remaining, timedelta(0))
178
+ else:
179
+ # No usage in window, resets immediately
180
+ return timedelta(0)
181
+
182
+ def _format_warning_message(self, percentage: float, current_spend: float,
183
+ limit: float, estimated_cost: float) -> str:
184
+ """
185
+ Format a budget warning message.
186
+
187
+ Args:
188
+ percentage: Percentage of budget used
189
+ current_spend: Current spend in USD
190
+ limit: Budget limit in USD
191
+ estimated_cost: Estimated cost of current request
192
+
193
+ Returns:
194
+ Formatted warning message
195
+ """
196
+ remaining = limit - current_spend - estimated_cost
197
+
198
+ message = (
199
+ f"Budget Warning: {percentage:.1f}% of ${limit:.2f} budget used "
200
+ f"(${current_spend:.2f} spent, ${estimated_cost:.4f} this request, "
201
+ f"${remaining:.2f} remaining in {self.period_hours}h window)"
202
+ )
203
+
204
+ return message
205
+
206
+ def _format_exceeded_message(self, current_spend: float, limit: float,
207
+ estimated_cost: float, time_until_reset: timedelta) -> str:
208
+ """
209
+ Format a budget exceeded message.
210
+
211
+ Args:
212
+ current_spend: Current spend in USD
213
+ limit: Budget limit in USD
214
+ estimated_cost: Estimated cost of request
215
+ time_until_reset: Time until budget resets
216
+
217
+ Returns:
218
+ Formatted exceeded message
219
+ """
220
+ hours = int(time_until_reset.total_seconds() // 3600)
221
+ minutes = int((time_until_reset.total_seconds() % 3600) // 60)
222
+
223
+ message = (
224
+ f"Budget Limit Reached: ${current_spend:.2f} of ${limit:.2f} spent. "
225
+ f"This request (${estimated_cost:.4f}) would exceed the limit. "
226
+ )
227
+
228
+ if time_until_reset.total_seconds() > 0:
229
+ message += f"Budget resets in {hours}h {minutes}m. "
230
+ else:
231
+ message += "Budget resets now (no recent usage). "
232
+
233
+ if self.allow_override:
234
+ message += "Override available."
235
+ else:
236
+ message += "No override allowed."
237
+
238
+ return message
239
+
240
+ def apply_override(self, additional_percentage: float) -> bool:
241
+ """
242
+ Apply a budget override for the current period.
243
+
244
+ Args:
245
+ additional_percentage: Additional percentage to allow (e.g., 10.0 for 10%)
246
+
247
+ Returns:
248
+ True if override applied successfully
249
+ """
250
+ if not self.allow_override:
251
+ logging.warning("Budget override not allowed by configuration")
252
+ return False
253
+
254
+ # Calculate additional spend allowed
255
+ additional_spend = self.max_spend * (additional_percentage / 100.0)
256
+ self.current_override = additional_spend
257
+
258
+ # Set override to expire after the current period
259
+ self.override_expires = datetime.now() + timedelta(hours=self.period_hours)
260
+
261
+ logging.info(
262
+ f"Budget override applied: +${additional_spend:.2f} ({additional_percentage}%) "
263
+ f"until {self.override_expires}"
264
+ )
265
+
266
+ return True
267
+
268
+ def get_budget_summary(self) -> Dict:
269
+ """
270
+ Get current budget status summary.
271
+
272
+ Returns:
273
+ Dictionary with budget information
274
+ """
275
+ if not self.enabled:
276
+ return {'enabled': False}
277
+
278
+ current_spend = self._get_rolling_window_spend()
279
+ effective_limit = self.max_spend + self.current_override
280
+ percentage = (current_spend / effective_limit * 100) if effective_limit > 0 else 0
281
+ remaining = effective_limit - current_spend
282
+ time_until_reset = self._time_until_reset()
283
+
284
+ return {
285
+ 'enabled': True,
286
+ 'current_spend': current_spend,
287
+ 'limit': self.max_spend,
288
+ 'effective_limit': effective_limit,
289
+ 'override_amount': self.current_override,
290
+ 'percentage_used': percentage,
291
+ 'remaining': remaining,
292
+ 'period_hours': self.period_hours,
293
+ 'time_until_reset_seconds': time_until_reset.total_seconds(),
294
+ 'override_active': self.current_override > 0,
295
+ 'override_expires': self.override_expires.isoformat() if self.override_expires else None
296
+ }
@@ -0,0 +1,342 @@
1
+ """
2
+ Token management module for AWS Bedrock usage.
3
+
4
+ This module provides functionality for:
5
+ - Tracking token usage (input and output separately) over rolling time windows
6
+ - Token limit monitoring and warnings
7
+ - Token-based usage limits with override options
8
+ """
9
+
10
+ import logging
11
+ from datetime import datetime, timedelta
12
+ from typing import Dict, Optional, Tuple
13
+ from enum import Enum
14
+
15
+
16
+ class LimitStatus(Enum):
17
+ """Token limit status levels."""
18
+ OK = "ok"
19
+ WARNING_75 = "warning_75"
20
+ WARNING_85 = "warning_85"
21
+ WARNING_95 = "warning_95"
22
+ EXCEEDED = "exceeded"
23
+
24
+
25
+ class TokenManager:
26
+ """Manages token usage tracking and limit enforcement."""
27
+
28
+ def __init__(self, database, config: Dict):
29
+ """
30
+ Initialise the token manager.
31
+
32
+ Args:
33
+ database: ConversationDatabase instance
34
+ config: Token management configuration dictionary
35
+ """
36
+ self.database = database
37
+ self.enabled = config.get('enabled', False)
38
+ self.max_input_tokens = int(config.get('max_input_tokens', 100000))
39
+ self.max_output_tokens = int(config.get('max_output_tokens', 50000))
40
+ self.period_hours = int(config.get('period_hours', 24))
41
+ self.allow_override = config.get('allow_override', True)
42
+
43
+ # Override tracking
44
+ self.current_input_override = 0 # Additional input tokens allowed
45
+ self.current_output_override = 0 # Additional output tokens allowed
46
+ self.override_expires = None # When the override expires
47
+
48
+ def check_limits_before_request(self, model_id: str, region: str,
49
+ input_tokens: int, max_output_tokens: int) -> Tuple[bool, str, LimitStatus]:
50
+ """
51
+ Check if a request would exceed the token limits.
52
+
53
+ Args:
54
+ model_id: Bedrock model ID
55
+ region: AWS region
56
+ input_tokens: Number of input tokens
57
+ max_output_tokens: Maximum output tokens expected
58
+
59
+ Returns:
60
+ Tuple of (allowed, message, status)
61
+ """
62
+ if not self.enabled:
63
+ return True, "", LimitStatus.OK
64
+
65
+ # Get current usage in rolling window
66
+ current_input, current_output = self._get_rolling_window_usage()
67
+
68
+ # Calculate effective limits (base + override)
69
+ effective_input_limit = self.max_input_tokens + self.current_input_override
70
+ effective_output_limit = self.max_output_tokens + self.current_output_override
71
+
72
+ # Calculate projected usage
73
+ projected_input = current_input + input_tokens
74
+ projected_output = current_output + max_output_tokens
75
+
76
+ # Check input tokens
77
+ input_percentage = (projected_input / effective_input_limit) * 100 if effective_input_limit > 0 else 0
78
+
79
+ # Check output tokens
80
+ output_percentage = (projected_output / effective_output_limit) * 100 if effective_output_limit > 0 else 0
81
+
82
+ # Use the higher percentage for status determination
83
+ max_percentage = max(input_percentage, output_percentage)
84
+
85
+ # Check if either limit exceeded
86
+ if projected_input > effective_input_limit or projected_output > effective_output_limit:
87
+ time_until_reset = self._time_until_reset()
88
+ return False, self._format_exceeded_message(
89
+ current_input, current_output,
90
+ effective_input_limit, effective_output_limit,
91
+ input_tokens, max_output_tokens,
92
+ time_until_reset
93
+ ), LimitStatus.EXCEEDED
94
+
95
+ elif max_percentage >= 95:
96
+ return True, self._format_warning_message(
97
+ max_percentage, current_input, current_output,
98
+ effective_input_limit, effective_output_limit,
99
+ input_tokens, max_output_tokens
100
+ ), LimitStatus.WARNING_95
101
+
102
+ elif max_percentage >= 85:
103
+ return True, self._format_warning_message(
104
+ max_percentage, current_input, current_output,
105
+ effective_input_limit, effective_output_limit,
106
+ input_tokens, max_output_tokens
107
+ ), LimitStatus.WARNING_85
108
+
109
+ elif max_percentage >= 75:
110
+ return True, self._format_warning_message(
111
+ max_percentage, current_input, current_output,
112
+ effective_input_limit, effective_output_limit,
113
+ input_tokens, max_output_tokens
114
+ ), LimitStatus.WARNING_75
115
+
116
+ else:
117
+ return True, "", LimitStatus.OK
118
+
119
+ def record_usage(self, conversation_id: int, model_id: str, region: str,
120
+ input_tokens: int, output_tokens: int) -> Tuple[int, int]:
121
+ """
122
+ Record actual token usage after a request completes.
123
+
124
+ Args:
125
+ conversation_id: Conversation ID
126
+ model_id: Bedrock model ID
127
+ region: AWS region
128
+ input_tokens: Actual input tokens used
129
+ output_tokens: Actual output tokens used
130
+
131
+ Returns:
132
+ Tuple of (input_tokens, output_tokens) recorded
133
+ """
134
+ if not self.enabled:
135
+ return 0, 0
136
+
137
+ # Store usage in database
138
+ self.database.record_usage(
139
+ conversation_id=conversation_id,
140
+ model_id=model_id,
141
+ region=region,
142
+ input_tokens=input_tokens,
143
+ output_tokens=output_tokens,
144
+ cost=0.0, # Not tracking cost anymore
145
+ timestamp=datetime.now()
146
+ )
147
+
148
+ logging.debug(f"Recorded usage: {input_tokens} input tokens, {output_tokens} output tokens")
149
+ return input_tokens, output_tokens
150
+
151
+ def _get_rolling_window_usage(self) -> Tuple[int, int]:
152
+ """
153
+ Get total token usage in the current rolling window.
154
+
155
+ Returns:
156
+ Tuple of (total_input_tokens, total_output_tokens)
157
+ """
158
+ # Check if override has expired
159
+ if self.override_expires and datetime.now() >= self.override_expires:
160
+ self.current_input_override = 0
161
+ self.current_output_override = 0
162
+ self.override_expires = None
163
+ logging.info("Token override has expired")
164
+
165
+ # Calculate start of rolling window
166
+ window_start = datetime.now() - timedelta(hours=self.period_hours)
167
+
168
+ # Get usage from database
169
+ total_input, total_output = self.database.get_token_usage_in_window(window_start)
170
+
171
+ return total_input, total_output
172
+
173
+ def _time_until_reset(self) -> timedelta:
174
+ """
175
+ Calculate time until the rolling window resets.
176
+
177
+ Returns:
178
+ Time delta until oldest usage expires
179
+ """
180
+ window_start = datetime.now() - timedelta(hours=self.period_hours)
181
+ oldest_usage_time = self.database.get_oldest_usage_in_window(window_start)
182
+
183
+ if oldest_usage_time:
184
+ # Time until this usage falls out of the window
185
+ reset_time = oldest_usage_time + timedelta(hours=self.period_hours)
186
+ time_remaining = reset_time - datetime.now()
187
+ return max(time_remaining, timedelta(0))
188
+ else:
189
+ # No usage in window, resets immediately
190
+ return timedelta(0)
191
+
192
+ def _format_warning_message(self, percentage: float,
193
+ current_input: int, current_output: int,
194
+ input_limit: int, output_limit: int,
195
+ request_input: int, request_output: int) -> str:
196
+ """
197
+ Format a token limit warning message.
198
+
199
+ Args:
200
+ percentage: Percentage of limit used (highest of input/output)
201
+ current_input: Current input tokens used
202
+ current_output: Current output tokens used
203
+ input_limit: Input token limit
204
+ output_limit: Output token limit
205
+ request_input: Input tokens for current request
206
+ request_output: Output tokens for current request
207
+
208
+ Returns:
209
+ Formatted warning message
210
+ """
211
+ input_remaining = input_limit - current_input - request_input
212
+ output_remaining = output_limit - current_output - request_output
213
+
214
+ message = (
215
+ f"Token Limit Warning: {percentage:.1f}% of limits used. "
216
+ f"Input: {current_input:,}/{input_limit:,} (+{request_input:,} this request, {input_remaining:,} remaining). "
217
+ f"Output: {current_output:,}/{output_limit:,} (+{request_output:,} this request, {output_remaining:,} remaining). "
218
+ f"Window: {self.period_hours}h"
219
+ )
220
+
221
+ return message
222
+
223
+ def _format_exceeded_message(self, current_input: int, current_output: int,
224
+ input_limit: int, output_limit: int,
225
+ request_input: int, request_output: int,
226
+ time_until_reset: timedelta) -> str:
227
+ """
228
+ Format a token limit exceeded message.
229
+
230
+ Args:
231
+ current_input: Current input tokens used
232
+ current_output: Current output tokens used
233
+ input_limit: Input token limit
234
+ output_limit: Output token limit
235
+ request_input: Input tokens for request
236
+ request_output: Output tokens for request
237
+ time_until_reset: Time until limit resets
238
+
239
+ Returns:
240
+ Formatted exceeded message
241
+ """
242
+ hours = int(time_until_reset.total_seconds() // 3600)
243
+ minutes = int((time_until_reset.total_seconds() % 3600) // 60)
244
+
245
+ # Determine which limit was exceeded
246
+ input_exceeded = (current_input + request_input) > input_limit
247
+ output_exceeded = (current_output + request_output) > output_limit
248
+
249
+ message = "Token Limit Reached: "
250
+
251
+ if input_exceeded and output_exceeded:
252
+ message += f"Both limits exceeded. "
253
+ elif input_exceeded:
254
+ message += f"Input limit exceeded: {current_input:,}/{input_limit:,} used, {request_input:,} requested. "
255
+ else:
256
+ message += f"Output limit exceeded: {current_output:,}/{output_limit:,} used, {request_output:,} requested. "
257
+
258
+ if time_until_reset.total_seconds() > 0:
259
+ message += f"Limit resets in {hours}h {minutes}m. "
260
+ else:
261
+ message += "Limit resets now (no recent usage). "
262
+
263
+ if self.allow_override:
264
+ message += "Override available."
265
+ else:
266
+ message += "No override allowed."
267
+
268
+ return message
269
+
270
+ def apply_override(self, additional_percentage: float) -> bool:
271
+ """
272
+ Apply a token limit override for the current period.
273
+
274
+ Args:
275
+ additional_percentage: Additional percentage to allow (e.g., 10.0 for 10%)
276
+
277
+ Returns:
278
+ True if override applied successfully
279
+ """
280
+ if not self.allow_override:
281
+ logging.warning("Token limit override not allowed by configuration")
282
+ return False
283
+
284
+ # Calculate additional tokens allowed for both input and output
285
+ additional_input = int(self.max_input_tokens * (additional_percentage / 100.0))
286
+ additional_output = int(self.max_output_tokens * (additional_percentage / 100.0))
287
+
288
+ self.current_input_override = additional_input
289
+ self.current_output_override = additional_output
290
+
291
+ # Set override to expire after the current period
292
+ self.override_expires = datetime.now() + timedelta(hours=self.period_hours)
293
+
294
+ logging.info(
295
+ f"Token limit override applied: +{additional_input:,} input tokens, "
296
+ f"+{additional_output:,} output tokens ({additional_percentage}%) "
297
+ f"until {self.override_expires}"
298
+ )
299
+
300
+ return True
301
+
302
+ def get_usage_summary(self) -> Dict:
303
+ """
304
+ Get current token usage status summary.
305
+
306
+ Returns:
307
+ Dictionary with usage information
308
+ """
309
+ if not self.enabled:
310
+ return {'enabled': False}
311
+
312
+ current_input, current_output = self._get_rolling_window_usage()
313
+ effective_input_limit = self.max_input_tokens + self.current_input_override
314
+ effective_output_limit = self.max_output_tokens + self.current_output_override
315
+
316
+ input_percentage = (current_input / effective_input_limit * 100) if effective_input_limit > 0 else 0
317
+ output_percentage = (current_output / effective_output_limit * 100) if effective_output_limit > 0 else 0
318
+
319
+ input_remaining = effective_input_limit - current_input
320
+ output_remaining = effective_output_limit - current_output
321
+
322
+ time_until_reset = self._time_until_reset()
323
+
324
+ return {
325
+ 'enabled': True,
326
+ 'current_input_tokens': current_input,
327
+ 'current_output_tokens': current_output,
328
+ 'input_limit': self.max_input_tokens,
329
+ 'output_limit': self.max_output_tokens,
330
+ 'effective_input_limit': effective_input_limit,
331
+ 'effective_output_limit': effective_output_limit,
332
+ 'input_override_amount': self.current_input_override,
333
+ 'output_override_amount': self.current_output_override,
334
+ 'input_percentage_used': input_percentage,
335
+ 'output_percentage_used': output_percentage,
336
+ 'input_remaining': input_remaining,
337
+ 'output_remaining': output_remaining,
338
+ 'period_hours': self.period_hours,
339
+ 'time_until_reset_seconds': time_until_reset.total_seconds(),
340
+ 'override_active': self.current_input_override > 0 or self.current_output_override > 0,
341
+ 'override_expires': self.override_expires.isoformat() if self.override_expires else None
342
+ }