dtSpark 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtSpark/__init__.py +0 -0
- dtSpark/_description.txt +1 -0
- dtSpark/_full_name.txt +1 -0
- dtSpark/_licence.txt +21 -0
- dtSpark/_metadata.yaml +6 -0
- dtSpark/_name.txt +1 -0
- dtSpark/_version.txt +1 -0
- dtSpark/aws/__init__.py +7 -0
- dtSpark/aws/authentication.py +296 -0
- dtSpark/aws/bedrock.py +578 -0
- dtSpark/aws/costs.py +318 -0
- dtSpark/aws/pricing.py +580 -0
- dtSpark/cli_interface.py +2645 -0
- dtSpark/conversation_manager.py +3050 -0
- dtSpark/core/__init__.py +12 -0
- dtSpark/core/application.py +3355 -0
- dtSpark/core/context_compaction.py +735 -0
- dtSpark/daemon/__init__.py +104 -0
- dtSpark/daemon/__main__.py +10 -0
- dtSpark/daemon/action_monitor.py +213 -0
- dtSpark/daemon/daemon_app.py +730 -0
- dtSpark/daemon/daemon_manager.py +289 -0
- dtSpark/daemon/execution_coordinator.py +194 -0
- dtSpark/daemon/pid_file.py +169 -0
- dtSpark/database/__init__.py +482 -0
- dtSpark/database/autonomous_actions.py +1191 -0
- dtSpark/database/backends.py +329 -0
- dtSpark/database/connection.py +122 -0
- dtSpark/database/conversations.py +520 -0
- dtSpark/database/credential_prompt.py +218 -0
- dtSpark/database/files.py +205 -0
- dtSpark/database/mcp_ops.py +355 -0
- dtSpark/database/messages.py +161 -0
- dtSpark/database/schema.py +673 -0
- dtSpark/database/tool_permissions.py +186 -0
- dtSpark/database/usage.py +167 -0
- dtSpark/files/__init__.py +4 -0
- dtSpark/files/manager.py +322 -0
- dtSpark/launch.py +39 -0
- dtSpark/limits/__init__.py +10 -0
- dtSpark/limits/costs.py +296 -0
- dtSpark/limits/tokens.py +342 -0
- dtSpark/llm/__init__.py +17 -0
- dtSpark/llm/anthropic_direct.py +446 -0
- dtSpark/llm/base.py +146 -0
- dtSpark/llm/context_limits.py +438 -0
- dtSpark/llm/manager.py +177 -0
- dtSpark/llm/ollama.py +578 -0
- dtSpark/mcp_integration/__init__.py +5 -0
- dtSpark/mcp_integration/manager.py +653 -0
- dtSpark/mcp_integration/tool_selector.py +225 -0
- dtSpark/resources/config.yaml.template +631 -0
- dtSpark/safety/__init__.py +22 -0
- dtSpark/safety/llm_service.py +111 -0
- dtSpark/safety/patterns.py +229 -0
- dtSpark/safety/prompt_inspector.py +442 -0
- dtSpark/safety/violation_logger.py +346 -0
- dtSpark/scheduler/__init__.py +20 -0
- dtSpark/scheduler/creation_tools.py +599 -0
- dtSpark/scheduler/execution_queue.py +159 -0
- dtSpark/scheduler/executor.py +1152 -0
- dtSpark/scheduler/manager.py +395 -0
- dtSpark/tools/__init__.py +4 -0
- dtSpark/tools/builtin.py +833 -0
- dtSpark/web/__init__.py +20 -0
- dtSpark/web/auth.py +152 -0
- dtSpark/web/dependencies.py +37 -0
- dtSpark/web/endpoints/__init__.py +17 -0
- dtSpark/web/endpoints/autonomous_actions.py +1125 -0
- dtSpark/web/endpoints/chat.py +621 -0
- dtSpark/web/endpoints/conversations.py +353 -0
- dtSpark/web/endpoints/main_menu.py +547 -0
- dtSpark/web/endpoints/streaming.py +421 -0
- dtSpark/web/server.py +578 -0
- dtSpark/web/session.py +167 -0
- dtSpark/web/ssl_utils.py +195 -0
- dtSpark/web/static/css/dark-theme.css +427 -0
- dtSpark/web/static/js/actions.js +1101 -0
- dtSpark/web/static/js/chat.js +614 -0
- dtSpark/web/static/js/main.js +496 -0
- dtSpark/web/static/js/sse-client.js +242 -0
- dtSpark/web/templates/actions.html +408 -0
- dtSpark/web/templates/base.html +93 -0
- dtSpark/web/templates/chat.html +814 -0
- dtSpark/web/templates/conversations.html +350 -0
- dtSpark/web/templates/goodbye.html +81 -0
- dtSpark/web/templates/login.html +90 -0
- dtSpark/web/templates/main_menu.html +983 -0
- dtSpark/web/templates/new_conversation.html +191 -0
- dtSpark/web/web_interface.py +137 -0
- dtspark-1.0.4.dist-info/METADATA +187 -0
- dtspark-1.0.4.dist-info/RECORD +96 -0
- dtspark-1.0.4.dist-info/WHEEL +5 -0
- dtspark-1.0.4.dist-info/entry_points.txt +3 -0
- dtspark-1.0.4.dist-info/licenses/LICENSE +21 -0
- dtspark-1.0.4.dist-info/top_level.txt +1 -0
dtSpark/launch.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Application entry point for Spark.
|
|
3
|
+
|
|
4
|
+
This launcher handles:
|
|
5
|
+
- Main application (CLI or Web interface)
|
|
6
|
+
- Daemon mode for autonomous action execution
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
dtSpark # Run main application
|
|
10
|
+
dtSpark daemon start # Start daemon in background
|
|
11
|
+
dtSpark daemon stop # Stop daemon
|
|
12
|
+
dtSpark daemon status # Check daemon status
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import sys
|
|
16
|
+
import os
|
|
17
|
+
|
|
18
|
+
os.environ['CONTAINER_MODE'] = 'true'
|
|
19
|
+
|
|
20
|
+
def main():
|
|
21
|
+
"""
|
|
22
|
+
Main entry point that routes to appropriate handler.
|
|
23
|
+
|
|
24
|
+
Routes daemon commands to the daemon module, otherwise runs
|
|
25
|
+
the main application.
|
|
26
|
+
"""
|
|
27
|
+
# Check for daemon commands
|
|
28
|
+
if len(sys.argv) > 1 and sys.argv[1] == 'daemon':
|
|
29
|
+
from dtSpark.daemon import daemon_main
|
|
30
|
+
daemon_main()
|
|
31
|
+
else:
|
|
32
|
+
# Run main application
|
|
33
|
+
from dtSpark.core.application import main as app_main
|
|
34
|
+
app_main()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Entry point for console_scripts
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
main()
|
dtSpark/limits/costs.py
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cost management module for AWS Bedrock usage.
|
|
3
|
+
|
|
4
|
+
This module provides functionality for:
|
|
5
|
+
- Tracking usage costs over rolling time windows
|
|
6
|
+
- Budget monitoring and warnings
|
|
7
|
+
- Cost-based usage limits with override options
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from datetime import datetime, timedelta
|
|
12
|
+
from typing import Dict, Optional, Tuple
|
|
13
|
+
from enum import Enum
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BudgetStatus(Enum):
|
|
17
|
+
"""Budget status levels."""
|
|
18
|
+
OK = "ok"
|
|
19
|
+
WARNING_75 = "warning_75"
|
|
20
|
+
WARNING_85 = "warning_85"
|
|
21
|
+
WARNING_95 = "warning_95"
|
|
22
|
+
EXCEEDED = "exceeded"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CostManager:
|
|
26
|
+
"""Manages cost tracking and budget enforcement."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, database, pricing_manager, config: Dict):
|
|
29
|
+
"""
|
|
30
|
+
Initialise the cost manager.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
database: ConversationDatabase instance
|
|
34
|
+
pricing_manager: BedrockPricing instance
|
|
35
|
+
config: Cost management configuration dictionary
|
|
36
|
+
"""
|
|
37
|
+
self.database = database
|
|
38
|
+
self.pricing = pricing_manager
|
|
39
|
+
self.enabled = config.get('enabled', False)
|
|
40
|
+
self.max_spend = float(config.get('max_spend', 10.0))
|
|
41
|
+
self.period_hours = int(config.get('period_hours', 24))
|
|
42
|
+
self.allow_override = config.get('allow_override', True)
|
|
43
|
+
self.current_override = 0.0 # Additional spend allowed for current period
|
|
44
|
+
self.override_expires = None # When the override expires
|
|
45
|
+
|
|
46
|
+
def check_budget_before_request(self, model_id: str, region: str,
|
|
47
|
+
input_tokens: int, max_output_tokens: int) -> Tuple[bool, str, BudgetStatus]:
|
|
48
|
+
"""
|
|
49
|
+
Check if a request would exceed the budget.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
model_id: Bedrock model ID
|
|
53
|
+
region: AWS region
|
|
54
|
+
input_tokens: Number of input tokens
|
|
55
|
+
max_output_tokens: Maximum output tokens
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Tuple of (allowed, message, status)
|
|
59
|
+
"""
|
|
60
|
+
if not self.enabled:
|
|
61
|
+
return True, "", BudgetStatus.OK
|
|
62
|
+
|
|
63
|
+
# Estimate maximum cost for this request
|
|
64
|
+
estimated_cost = self.pricing.estimate_max_cost(
|
|
65
|
+
model_id, region, input_tokens, max_output_tokens
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Get current spend in rolling window
|
|
69
|
+
current_spend = self._get_rolling_window_spend()
|
|
70
|
+
|
|
71
|
+
# Calculate effective limit (base + override)
|
|
72
|
+
effective_limit = self.max_spend + self.current_override
|
|
73
|
+
|
|
74
|
+
# Calculate projected spend
|
|
75
|
+
projected_spend = current_spend + estimated_cost
|
|
76
|
+
|
|
77
|
+
# Determine status
|
|
78
|
+
percentage = (projected_spend / effective_limit) * 100
|
|
79
|
+
|
|
80
|
+
if projected_spend > effective_limit:
|
|
81
|
+
# Budget exceeded
|
|
82
|
+
time_until_reset = self._time_until_reset()
|
|
83
|
+
return False, self._format_exceeded_message(
|
|
84
|
+
current_spend, effective_limit, estimated_cost, time_until_reset
|
|
85
|
+
), BudgetStatus.EXCEEDED
|
|
86
|
+
|
|
87
|
+
elif percentage >= 95:
|
|
88
|
+
return True, self._format_warning_message(
|
|
89
|
+
percentage, current_spend, effective_limit, estimated_cost
|
|
90
|
+
), BudgetStatus.WARNING_95
|
|
91
|
+
|
|
92
|
+
elif percentage >= 85:
|
|
93
|
+
return True, self._format_warning_message(
|
|
94
|
+
percentage, current_spend, effective_limit, estimated_cost
|
|
95
|
+
), BudgetStatus.WARNING_85
|
|
96
|
+
|
|
97
|
+
elif percentage >= 75:
|
|
98
|
+
return True, self._format_warning_message(
|
|
99
|
+
percentage, current_spend, effective_limit, estimated_cost
|
|
100
|
+
), BudgetStatus.WARNING_75
|
|
101
|
+
|
|
102
|
+
else:
|
|
103
|
+
return True, "", BudgetStatus.OK
|
|
104
|
+
|
|
105
|
+
def record_usage(self, conversation_id: int, model_id: str, region: str,
|
|
106
|
+
input_tokens: int, output_tokens: int) -> float:
|
|
107
|
+
"""
|
|
108
|
+
Record actual usage after a request completes.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
conversation_id: Conversation ID
|
|
112
|
+
model_id: Bedrock model ID
|
|
113
|
+
region: AWS region
|
|
114
|
+
input_tokens: Actual input tokens used
|
|
115
|
+
output_tokens: Actual output tokens used
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Actual cost in USD
|
|
119
|
+
"""
|
|
120
|
+
if not self.enabled:
|
|
121
|
+
return 0.0
|
|
122
|
+
|
|
123
|
+
# Calculate actual cost
|
|
124
|
+
cost, source = self.pricing.calculate_cost(
|
|
125
|
+
model_id, region, input_tokens, output_tokens
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Store usage in database
|
|
129
|
+
self.database.record_usage(
|
|
130
|
+
conversation_id=conversation_id,
|
|
131
|
+
model_id=model_id,
|
|
132
|
+
region=region,
|
|
133
|
+
input_tokens=input_tokens,
|
|
134
|
+
output_tokens=output_tokens,
|
|
135
|
+
cost=cost,
|
|
136
|
+
timestamp=datetime.now()
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
logging.debug(f"Recorded usage: ${cost:.4f} ({source})")
|
|
140
|
+
return cost
|
|
141
|
+
|
|
142
|
+
def _get_rolling_window_spend(self) -> float:
|
|
143
|
+
"""
|
|
144
|
+
Get total spend in the current rolling window.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Total spend in USD
|
|
148
|
+
"""
|
|
149
|
+
# Check if override has expired
|
|
150
|
+
if self.override_expires and datetime.now() >= self.override_expires:
|
|
151
|
+
self.current_override = 0.0
|
|
152
|
+
self.override_expires = None
|
|
153
|
+
logging.info("Cost override has expired")
|
|
154
|
+
|
|
155
|
+
# Calculate start of rolling window
|
|
156
|
+
window_start = datetime.now() - timedelta(hours=self.period_hours)
|
|
157
|
+
|
|
158
|
+
# Get usage from database
|
|
159
|
+
total_spend = self.database.get_usage_in_window(window_start)
|
|
160
|
+
|
|
161
|
+
return total_spend
|
|
162
|
+
|
|
163
|
+
def _time_until_reset(self) -> timedelta:
|
|
164
|
+
"""
|
|
165
|
+
Calculate time until the rolling window resets.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Time delta until oldest usage expires
|
|
169
|
+
"""
|
|
170
|
+
window_start = datetime.now() - timedelta(hours=self.period_hours)
|
|
171
|
+
oldest_usage_time = self.database.get_oldest_usage_in_window(window_start)
|
|
172
|
+
|
|
173
|
+
if oldest_usage_time:
|
|
174
|
+
# Time until this usage falls out of the window
|
|
175
|
+
reset_time = oldest_usage_time + timedelta(hours=self.period_hours)
|
|
176
|
+
time_remaining = reset_time - datetime.now()
|
|
177
|
+
return max(time_remaining, timedelta(0))
|
|
178
|
+
else:
|
|
179
|
+
# No usage in window, resets immediately
|
|
180
|
+
return timedelta(0)
|
|
181
|
+
|
|
182
|
+
def _format_warning_message(self, percentage: float, current_spend: float,
|
|
183
|
+
limit: float, estimated_cost: float) -> str:
|
|
184
|
+
"""
|
|
185
|
+
Format a budget warning message.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
percentage: Percentage of budget used
|
|
189
|
+
current_spend: Current spend in USD
|
|
190
|
+
limit: Budget limit in USD
|
|
191
|
+
estimated_cost: Estimated cost of current request
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Formatted warning message
|
|
195
|
+
"""
|
|
196
|
+
remaining = limit - current_spend - estimated_cost
|
|
197
|
+
|
|
198
|
+
message = (
|
|
199
|
+
f"Budget Warning: {percentage:.1f}% of ${limit:.2f} budget used "
|
|
200
|
+
f"(${current_spend:.2f} spent, ${estimated_cost:.4f} this request, "
|
|
201
|
+
f"${remaining:.2f} remaining in {self.period_hours}h window)"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
return message
|
|
205
|
+
|
|
206
|
+
def _format_exceeded_message(self, current_spend: float, limit: float,
|
|
207
|
+
estimated_cost: float, time_until_reset: timedelta) -> str:
|
|
208
|
+
"""
|
|
209
|
+
Format a budget exceeded message.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
current_spend: Current spend in USD
|
|
213
|
+
limit: Budget limit in USD
|
|
214
|
+
estimated_cost: Estimated cost of request
|
|
215
|
+
time_until_reset: Time until budget resets
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Formatted exceeded message
|
|
219
|
+
"""
|
|
220
|
+
hours = int(time_until_reset.total_seconds() // 3600)
|
|
221
|
+
minutes = int((time_until_reset.total_seconds() % 3600) // 60)
|
|
222
|
+
|
|
223
|
+
message = (
|
|
224
|
+
f"Budget Limit Reached: ${current_spend:.2f} of ${limit:.2f} spent. "
|
|
225
|
+
f"This request (${estimated_cost:.4f}) would exceed the limit. "
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
if time_until_reset.total_seconds() > 0:
|
|
229
|
+
message += f"Budget resets in {hours}h {minutes}m. "
|
|
230
|
+
else:
|
|
231
|
+
message += "Budget resets now (no recent usage). "
|
|
232
|
+
|
|
233
|
+
if self.allow_override:
|
|
234
|
+
message += "Override available."
|
|
235
|
+
else:
|
|
236
|
+
message += "No override allowed."
|
|
237
|
+
|
|
238
|
+
return message
|
|
239
|
+
|
|
240
|
+
def apply_override(self, additional_percentage: float) -> bool:
|
|
241
|
+
"""
|
|
242
|
+
Apply a budget override for the current period.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
additional_percentage: Additional percentage to allow (e.g., 10.0 for 10%)
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
True if override applied successfully
|
|
249
|
+
"""
|
|
250
|
+
if not self.allow_override:
|
|
251
|
+
logging.warning("Budget override not allowed by configuration")
|
|
252
|
+
return False
|
|
253
|
+
|
|
254
|
+
# Calculate additional spend allowed
|
|
255
|
+
additional_spend = self.max_spend * (additional_percentage / 100.0)
|
|
256
|
+
self.current_override = additional_spend
|
|
257
|
+
|
|
258
|
+
# Set override to expire after the current period
|
|
259
|
+
self.override_expires = datetime.now() + timedelta(hours=self.period_hours)
|
|
260
|
+
|
|
261
|
+
logging.info(
|
|
262
|
+
f"Budget override applied: +${additional_spend:.2f} ({additional_percentage}%) "
|
|
263
|
+
f"until {self.override_expires}"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
return True
|
|
267
|
+
|
|
268
|
+
def get_budget_summary(self) -> Dict:
|
|
269
|
+
"""
|
|
270
|
+
Get current budget status summary.
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
Dictionary with budget information
|
|
274
|
+
"""
|
|
275
|
+
if not self.enabled:
|
|
276
|
+
return {'enabled': False}
|
|
277
|
+
|
|
278
|
+
current_spend = self._get_rolling_window_spend()
|
|
279
|
+
effective_limit = self.max_spend + self.current_override
|
|
280
|
+
percentage = (current_spend / effective_limit * 100) if effective_limit > 0 else 0
|
|
281
|
+
remaining = effective_limit - current_spend
|
|
282
|
+
time_until_reset = self._time_until_reset()
|
|
283
|
+
|
|
284
|
+
return {
|
|
285
|
+
'enabled': True,
|
|
286
|
+
'current_spend': current_spend,
|
|
287
|
+
'limit': self.max_spend,
|
|
288
|
+
'effective_limit': effective_limit,
|
|
289
|
+
'override_amount': self.current_override,
|
|
290
|
+
'percentage_used': percentage,
|
|
291
|
+
'remaining': remaining,
|
|
292
|
+
'period_hours': self.period_hours,
|
|
293
|
+
'time_until_reset_seconds': time_until_reset.total_seconds(),
|
|
294
|
+
'override_active': self.current_override > 0,
|
|
295
|
+
'override_expires': self.override_expires.isoformat() if self.override_expires else None
|
|
296
|
+
}
|
dtSpark/limits/tokens.py
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Token management module for AWS Bedrock usage.
|
|
3
|
+
|
|
4
|
+
This module provides functionality for:
|
|
5
|
+
- Tracking token usage (input and output separately) over rolling time windows
|
|
6
|
+
- Token limit monitoring and warnings
|
|
7
|
+
- Token-based usage limits with override options
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from datetime import datetime, timedelta
|
|
12
|
+
from typing import Dict, Optional, Tuple
|
|
13
|
+
from enum import Enum
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class LimitStatus(Enum):
|
|
17
|
+
"""Token limit status levels."""
|
|
18
|
+
OK = "ok"
|
|
19
|
+
WARNING_75 = "warning_75"
|
|
20
|
+
WARNING_85 = "warning_85"
|
|
21
|
+
WARNING_95 = "warning_95"
|
|
22
|
+
EXCEEDED = "exceeded"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TokenManager:
|
|
26
|
+
"""Manages token usage tracking and limit enforcement."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, database, config: Dict):
|
|
29
|
+
"""
|
|
30
|
+
Initialise the token manager.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
database: ConversationDatabase instance
|
|
34
|
+
config: Token management configuration dictionary
|
|
35
|
+
"""
|
|
36
|
+
self.database = database
|
|
37
|
+
self.enabled = config.get('enabled', False)
|
|
38
|
+
self.max_input_tokens = int(config.get('max_input_tokens', 100000))
|
|
39
|
+
self.max_output_tokens = int(config.get('max_output_tokens', 50000))
|
|
40
|
+
self.period_hours = int(config.get('period_hours', 24))
|
|
41
|
+
self.allow_override = config.get('allow_override', True)
|
|
42
|
+
|
|
43
|
+
# Override tracking
|
|
44
|
+
self.current_input_override = 0 # Additional input tokens allowed
|
|
45
|
+
self.current_output_override = 0 # Additional output tokens allowed
|
|
46
|
+
self.override_expires = None # When the override expires
|
|
47
|
+
|
|
48
|
+
def check_limits_before_request(self, model_id: str, region: str,
|
|
49
|
+
input_tokens: int, max_output_tokens: int) -> Tuple[bool, str, LimitStatus]:
|
|
50
|
+
"""
|
|
51
|
+
Check if a request would exceed the token limits.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
model_id: Bedrock model ID
|
|
55
|
+
region: AWS region
|
|
56
|
+
input_tokens: Number of input tokens
|
|
57
|
+
max_output_tokens: Maximum output tokens expected
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Tuple of (allowed, message, status)
|
|
61
|
+
"""
|
|
62
|
+
if not self.enabled:
|
|
63
|
+
return True, "", LimitStatus.OK
|
|
64
|
+
|
|
65
|
+
# Get current usage in rolling window
|
|
66
|
+
current_input, current_output = self._get_rolling_window_usage()
|
|
67
|
+
|
|
68
|
+
# Calculate effective limits (base + override)
|
|
69
|
+
effective_input_limit = self.max_input_tokens + self.current_input_override
|
|
70
|
+
effective_output_limit = self.max_output_tokens + self.current_output_override
|
|
71
|
+
|
|
72
|
+
# Calculate projected usage
|
|
73
|
+
projected_input = current_input + input_tokens
|
|
74
|
+
projected_output = current_output + max_output_tokens
|
|
75
|
+
|
|
76
|
+
# Check input tokens
|
|
77
|
+
input_percentage = (projected_input / effective_input_limit) * 100 if effective_input_limit > 0 else 0
|
|
78
|
+
|
|
79
|
+
# Check output tokens
|
|
80
|
+
output_percentage = (projected_output / effective_output_limit) * 100 if effective_output_limit > 0 else 0
|
|
81
|
+
|
|
82
|
+
# Use the higher percentage for status determination
|
|
83
|
+
max_percentage = max(input_percentage, output_percentage)
|
|
84
|
+
|
|
85
|
+
# Check if either limit exceeded
|
|
86
|
+
if projected_input > effective_input_limit or projected_output > effective_output_limit:
|
|
87
|
+
time_until_reset = self._time_until_reset()
|
|
88
|
+
return False, self._format_exceeded_message(
|
|
89
|
+
current_input, current_output,
|
|
90
|
+
effective_input_limit, effective_output_limit,
|
|
91
|
+
input_tokens, max_output_tokens,
|
|
92
|
+
time_until_reset
|
|
93
|
+
), LimitStatus.EXCEEDED
|
|
94
|
+
|
|
95
|
+
elif max_percentage >= 95:
|
|
96
|
+
return True, self._format_warning_message(
|
|
97
|
+
max_percentage, current_input, current_output,
|
|
98
|
+
effective_input_limit, effective_output_limit,
|
|
99
|
+
input_tokens, max_output_tokens
|
|
100
|
+
), LimitStatus.WARNING_95
|
|
101
|
+
|
|
102
|
+
elif max_percentage >= 85:
|
|
103
|
+
return True, self._format_warning_message(
|
|
104
|
+
max_percentage, current_input, current_output,
|
|
105
|
+
effective_input_limit, effective_output_limit,
|
|
106
|
+
input_tokens, max_output_tokens
|
|
107
|
+
), LimitStatus.WARNING_85
|
|
108
|
+
|
|
109
|
+
elif max_percentage >= 75:
|
|
110
|
+
return True, self._format_warning_message(
|
|
111
|
+
max_percentage, current_input, current_output,
|
|
112
|
+
effective_input_limit, effective_output_limit,
|
|
113
|
+
input_tokens, max_output_tokens
|
|
114
|
+
), LimitStatus.WARNING_75
|
|
115
|
+
|
|
116
|
+
else:
|
|
117
|
+
return True, "", LimitStatus.OK
|
|
118
|
+
|
|
119
|
+
def record_usage(self, conversation_id: int, model_id: str, region: str,
|
|
120
|
+
input_tokens: int, output_tokens: int) -> Tuple[int, int]:
|
|
121
|
+
"""
|
|
122
|
+
Record actual token usage after a request completes.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
conversation_id: Conversation ID
|
|
126
|
+
model_id: Bedrock model ID
|
|
127
|
+
region: AWS region
|
|
128
|
+
input_tokens: Actual input tokens used
|
|
129
|
+
output_tokens: Actual output tokens used
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Tuple of (input_tokens, output_tokens) recorded
|
|
133
|
+
"""
|
|
134
|
+
if not self.enabled:
|
|
135
|
+
return 0, 0
|
|
136
|
+
|
|
137
|
+
# Store usage in database
|
|
138
|
+
self.database.record_usage(
|
|
139
|
+
conversation_id=conversation_id,
|
|
140
|
+
model_id=model_id,
|
|
141
|
+
region=region,
|
|
142
|
+
input_tokens=input_tokens,
|
|
143
|
+
output_tokens=output_tokens,
|
|
144
|
+
cost=0.0, # Not tracking cost anymore
|
|
145
|
+
timestamp=datetime.now()
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
logging.debug(f"Recorded usage: {input_tokens} input tokens, {output_tokens} output tokens")
|
|
149
|
+
return input_tokens, output_tokens
|
|
150
|
+
|
|
151
|
+
def _get_rolling_window_usage(self) -> Tuple[int, int]:
|
|
152
|
+
"""
|
|
153
|
+
Get total token usage in the current rolling window.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
Tuple of (total_input_tokens, total_output_tokens)
|
|
157
|
+
"""
|
|
158
|
+
# Check if override has expired
|
|
159
|
+
if self.override_expires and datetime.now() >= self.override_expires:
|
|
160
|
+
self.current_input_override = 0
|
|
161
|
+
self.current_output_override = 0
|
|
162
|
+
self.override_expires = None
|
|
163
|
+
logging.info("Token override has expired")
|
|
164
|
+
|
|
165
|
+
# Calculate start of rolling window
|
|
166
|
+
window_start = datetime.now() - timedelta(hours=self.period_hours)
|
|
167
|
+
|
|
168
|
+
# Get usage from database
|
|
169
|
+
total_input, total_output = self.database.get_token_usage_in_window(window_start)
|
|
170
|
+
|
|
171
|
+
return total_input, total_output
|
|
172
|
+
|
|
173
|
+
def _time_until_reset(self) -> timedelta:
|
|
174
|
+
"""
|
|
175
|
+
Calculate time until the rolling window resets.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Time delta until oldest usage expires
|
|
179
|
+
"""
|
|
180
|
+
window_start = datetime.now() - timedelta(hours=self.period_hours)
|
|
181
|
+
oldest_usage_time = self.database.get_oldest_usage_in_window(window_start)
|
|
182
|
+
|
|
183
|
+
if oldest_usage_time:
|
|
184
|
+
# Time until this usage falls out of the window
|
|
185
|
+
reset_time = oldest_usage_time + timedelta(hours=self.period_hours)
|
|
186
|
+
time_remaining = reset_time - datetime.now()
|
|
187
|
+
return max(time_remaining, timedelta(0))
|
|
188
|
+
else:
|
|
189
|
+
# No usage in window, resets immediately
|
|
190
|
+
return timedelta(0)
|
|
191
|
+
|
|
192
|
+
def _format_warning_message(self, percentage: float,
|
|
193
|
+
current_input: int, current_output: int,
|
|
194
|
+
input_limit: int, output_limit: int,
|
|
195
|
+
request_input: int, request_output: int) -> str:
|
|
196
|
+
"""
|
|
197
|
+
Format a token limit warning message.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
percentage: Percentage of limit used (highest of input/output)
|
|
201
|
+
current_input: Current input tokens used
|
|
202
|
+
current_output: Current output tokens used
|
|
203
|
+
input_limit: Input token limit
|
|
204
|
+
output_limit: Output token limit
|
|
205
|
+
request_input: Input tokens for current request
|
|
206
|
+
request_output: Output tokens for current request
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
Formatted warning message
|
|
210
|
+
"""
|
|
211
|
+
input_remaining = input_limit - current_input - request_input
|
|
212
|
+
output_remaining = output_limit - current_output - request_output
|
|
213
|
+
|
|
214
|
+
message = (
|
|
215
|
+
f"Token Limit Warning: {percentage:.1f}% of limits used. "
|
|
216
|
+
f"Input: {current_input:,}/{input_limit:,} (+{request_input:,} this request, {input_remaining:,} remaining). "
|
|
217
|
+
f"Output: {current_output:,}/{output_limit:,} (+{request_output:,} this request, {output_remaining:,} remaining). "
|
|
218
|
+
f"Window: {self.period_hours}h"
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
return message
|
|
222
|
+
|
|
223
|
+
def _format_exceeded_message(self, current_input: int, current_output: int,
|
|
224
|
+
input_limit: int, output_limit: int,
|
|
225
|
+
request_input: int, request_output: int,
|
|
226
|
+
time_until_reset: timedelta) -> str:
|
|
227
|
+
"""
|
|
228
|
+
Format a token limit exceeded message.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
current_input: Current input tokens used
|
|
232
|
+
current_output: Current output tokens used
|
|
233
|
+
input_limit: Input token limit
|
|
234
|
+
output_limit: Output token limit
|
|
235
|
+
request_input: Input tokens for request
|
|
236
|
+
request_output: Output tokens for request
|
|
237
|
+
time_until_reset: Time until limit resets
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Formatted exceeded message
|
|
241
|
+
"""
|
|
242
|
+
hours = int(time_until_reset.total_seconds() // 3600)
|
|
243
|
+
minutes = int((time_until_reset.total_seconds() % 3600) // 60)
|
|
244
|
+
|
|
245
|
+
# Determine which limit was exceeded
|
|
246
|
+
input_exceeded = (current_input + request_input) > input_limit
|
|
247
|
+
output_exceeded = (current_output + request_output) > output_limit
|
|
248
|
+
|
|
249
|
+
message = "Token Limit Reached: "
|
|
250
|
+
|
|
251
|
+
if input_exceeded and output_exceeded:
|
|
252
|
+
message += f"Both limits exceeded. "
|
|
253
|
+
elif input_exceeded:
|
|
254
|
+
message += f"Input limit exceeded: {current_input:,}/{input_limit:,} used, {request_input:,} requested. "
|
|
255
|
+
else:
|
|
256
|
+
message += f"Output limit exceeded: {current_output:,}/{output_limit:,} used, {request_output:,} requested. "
|
|
257
|
+
|
|
258
|
+
if time_until_reset.total_seconds() > 0:
|
|
259
|
+
message += f"Limit resets in {hours}h {minutes}m. "
|
|
260
|
+
else:
|
|
261
|
+
message += "Limit resets now (no recent usage). "
|
|
262
|
+
|
|
263
|
+
if self.allow_override:
|
|
264
|
+
message += "Override available."
|
|
265
|
+
else:
|
|
266
|
+
message += "No override allowed."
|
|
267
|
+
|
|
268
|
+
return message
|
|
269
|
+
|
|
270
|
+
def apply_override(self, additional_percentage: float) -> bool:
|
|
271
|
+
"""
|
|
272
|
+
Apply a token limit override for the current period.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
additional_percentage: Additional percentage to allow (e.g., 10.0 for 10%)
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
True if override applied successfully
|
|
279
|
+
"""
|
|
280
|
+
if not self.allow_override:
|
|
281
|
+
logging.warning("Token limit override not allowed by configuration")
|
|
282
|
+
return False
|
|
283
|
+
|
|
284
|
+
# Calculate additional tokens allowed for both input and output
|
|
285
|
+
additional_input = int(self.max_input_tokens * (additional_percentage / 100.0))
|
|
286
|
+
additional_output = int(self.max_output_tokens * (additional_percentage / 100.0))
|
|
287
|
+
|
|
288
|
+
self.current_input_override = additional_input
|
|
289
|
+
self.current_output_override = additional_output
|
|
290
|
+
|
|
291
|
+
# Set override to expire after the current period
|
|
292
|
+
self.override_expires = datetime.now() + timedelta(hours=self.period_hours)
|
|
293
|
+
|
|
294
|
+
logging.info(
|
|
295
|
+
f"Token limit override applied: +{additional_input:,} input tokens, "
|
|
296
|
+
f"+{additional_output:,} output tokens ({additional_percentage}%) "
|
|
297
|
+
f"until {self.override_expires}"
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
return True
|
|
301
|
+
|
|
302
|
+
def get_usage_summary(self) -> Dict:
|
|
303
|
+
"""
|
|
304
|
+
Get current token usage status summary.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
Dictionary with usage information
|
|
308
|
+
"""
|
|
309
|
+
if not self.enabled:
|
|
310
|
+
return {'enabled': False}
|
|
311
|
+
|
|
312
|
+
current_input, current_output = self._get_rolling_window_usage()
|
|
313
|
+
effective_input_limit = self.max_input_tokens + self.current_input_override
|
|
314
|
+
effective_output_limit = self.max_output_tokens + self.current_output_override
|
|
315
|
+
|
|
316
|
+
input_percentage = (current_input / effective_input_limit * 100) if effective_input_limit > 0 else 0
|
|
317
|
+
output_percentage = (current_output / effective_output_limit * 100) if effective_output_limit > 0 else 0
|
|
318
|
+
|
|
319
|
+
input_remaining = effective_input_limit - current_input
|
|
320
|
+
output_remaining = effective_output_limit - current_output
|
|
321
|
+
|
|
322
|
+
time_until_reset = self._time_until_reset()
|
|
323
|
+
|
|
324
|
+
return {
|
|
325
|
+
'enabled': True,
|
|
326
|
+
'current_input_tokens': current_input,
|
|
327
|
+
'current_output_tokens': current_output,
|
|
328
|
+
'input_limit': self.max_input_tokens,
|
|
329
|
+
'output_limit': self.max_output_tokens,
|
|
330
|
+
'effective_input_limit': effective_input_limit,
|
|
331
|
+
'effective_output_limit': effective_output_limit,
|
|
332
|
+
'input_override_amount': self.current_input_override,
|
|
333
|
+
'output_override_amount': self.current_output_override,
|
|
334
|
+
'input_percentage_used': input_percentage,
|
|
335
|
+
'output_percentage_used': output_percentage,
|
|
336
|
+
'input_remaining': input_remaining,
|
|
337
|
+
'output_remaining': output_remaining,
|
|
338
|
+
'period_hours': self.period_hours,
|
|
339
|
+
'time_until_reset_seconds': time_until_reset.total_seconds(),
|
|
340
|
+
'override_active': self.current_input_override > 0 or self.current_output_override > 0,
|
|
341
|
+
'override_expires': self.override_expires.isoformat() if self.override_expires else None
|
|
342
|
+
}
|