dtSpark 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtSpark/__init__.py +0 -0
- dtSpark/_description.txt +1 -0
- dtSpark/_full_name.txt +1 -0
- dtSpark/_licence.txt +21 -0
- dtSpark/_metadata.yaml +6 -0
- dtSpark/_name.txt +1 -0
- dtSpark/_version.txt +1 -0
- dtSpark/aws/__init__.py +7 -0
- dtSpark/aws/authentication.py +296 -0
- dtSpark/aws/bedrock.py +578 -0
- dtSpark/aws/costs.py +318 -0
- dtSpark/aws/pricing.py +580 -0
- dtSpark/cli_interface.py +2645 -0
- dtSpark/conversation_manager.py +3050 -0
- dtSpark/core/__init__.py +12 -0
- dtSpark/core/application.py +3355 -0
- dtSpark/core/context_compaction.py +735 -0
- dtSpark/daemon/__init__.py +104 -0
- dtSpark/daemon/__main__.py +10 -0
- dtSpark/daemon/action_monitor.py +213 -0
- dtSpark/daemon/daemon_app.py +730 -0
- dtSpark/daemon/daemon_manager.py +289 -0
- dtSpark/daemon/execution_coordinator.py +194 -0
- dtSpark/daemon/pid_file.py +169 -0
- dtSpark/database/__init__.py +482 -0
- dtSpark/database/autonomous_actions.py +1191 -0
- dtSpark/database/backends.py +329 -0
- dtSpark/database/connection.py +122 -0
- dtSpark/database/conversations.py +520 -0
- dtSpark/database/credential_prompt.py +218 -0
- dtSpark/database/files.py +205 -0
- dtSpark/database/mcp_ops.py +355 -0
- dtSpark/database/messages.py +161 -0
- dtSpark/database/schema.py +673 -0
- dtSpark/database/tool_permissions.py +186 -0
- dtSpark/database/usage.py +167 -0
- dtSpark/files/__init__.py +4 -0
- dtSpark/files/manager.py +322 -0
- dtSpark/launch.py +39 -0
- dtSpark/limits/__init__.py +10 -0
- dtSpark/limits/costs.py +296 -0
- dtSpark/limits/tokens.py +342 -0
- dtSpark/llm/__init__.py +17 -0
- dtSpark/llm/anthropic_direct.py +446 -0
- dtSpark/llm/base.py +146 -0
- dtSpark/llm/context_limits.py +438 -0
- dtSpark/llm/manager.py +177 -0
- dtSpark/llm/ollama.py +578 -0
- dtSpark/mcp_integration/__init__.py +5 -0
- dtSpark/mcp_integration/manager.py +653 -0
- dtSpark/mcp_integration/tool_selector.py +225 -0
- dtSpark/resources/config.yaml.template +631 -0
- dtSpark/safety/__init__.py +22 -0
- dtSpark/safety/llm_service.py +111 -0
- dtSpark/safety/patterns.py +229 -0
- dtSpark/safety/prompt_inspector.py +442 -0
- dtSpark/safety/violation_logger.py +346 -0
- dtSpark/scheduler/__init__.py +20 -0
- dtSpark/scheduler/creation_tools.py +599 -0
- dtSpark/scheduler/execution_queue.py +159 -0
- dtSpark/scheduler/executor.py +1152 -0
- dtSpark/scheduler/manager.py +395 -0
- dtSpark/tools/__init__.py +4 -0
- dtSpark/tools/builtin.py +833 -0
- dtSpark/web/__init__.py +20 -0
- dtSpark/web/auth.py +152 -0
- dtSpark/web/dependencies.py +37 -0
- dtSpark/web/endpoints/__init__.py +17 -0
- dtSpark/web/endpoints/autonomous_actions.py +1125 -0
- dtSpark/web/endpoints/chat.py +621 -0
- dtSpark/web/endpoints/conversations.py +353 -0
- dtSpark/web/endpoints/main_menu.py +547 -0
- dtSpark/web/endpoints/streaming.py +421 -0
- dtSpark/web/server.py +578 -0
- dtSpark/web/session.py +167 -0
- dtSpark/web/ssl_utils.py +195 -0
- dtSpark/web/static/css/dark-theme.css +427 -0
- dtSpark/web/static/js/actions.js +1101 -0
- dtSpark/web/static/js/chat.js +614 -0
- dtSpark/web/static/js/main.js +496 -0
- dtSpark/web/static/js/sse-client.js +242 -0
- dtSpark/web/templates/actions.html +408 -0
- dtSpark/web/templates/base.html +93 -0
- dtSpark/web/templates/chat.html +814 -0
- dtSpark/web/templates/conversations.html +350 -0
- dtSpark/web/templates/goodbye.html +81 -0
- dtSpark/web/templates/login.html +90 -0
- dtSpark/web/templates/main_menu.html +983 -0
- dtSpark/web/templates/new_conversation.html +191 -0
- dtSpark/web/web_interface.py +137 -0
- dtspark-1.0.4.dist-info/METADATA +187 -0
- dtspark-1.0.4.dist-info/RECORD +96 -0
- dtspark-1.0.4.dist-info/WHEEL +5 -0
- dtspark-1.0.4.dist-info/entry_points.txt +3 -0
- dtspark-1.0.4.dist-info/licenses/LICENSE +21 -0
- dtspark-1.0.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Context limit resolver module for model-specific context window management.
|
|
3
|
+
|
|
4
|
+
This module provides functionality for resolving context window limits
|
|
5
|
+
based on model ID and provider, using configurable defaults for different
|
|
6
|
+
model families and providers.
|
|
7
|
+
|
|
8
|
+
The resolution follows this priority order:
|
|
9
|
+
1. Exact match in provider-specific configuration
|
|
10
|
+
2. Partial match (model ID contains pattern) in provider configuration
|
|
11
|
+
3. Provider default from configuration
|
|
12
|
+
4. Hardcoded defaults for known model families (Claude = 200K, etc.)
|
|
13
|
+
5. Global default (8192 tokens)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Dict, Any, Optional
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Default context limits when no configuration is provided
|
|
21
|
+
DEFAULT_CONTEXT_LIMITS = {
|
|
22
|
+
'context_window': 8192,
|
|
23
|
+
'max_output': 4096
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# Hardcoded defaults for known model families when config is missing
|
|
27
|
+
# These provide sensible fallbacks without requiring config
|
|
28
|
+
HARDCODED_MODEL_DEFAULTS = {
|
|
29
|
+
'anthropic': {
|
|
30
|
+
# All Claude models have 200K context window
|
|
31
|
+
'claude': {'context_window': 200000, 'max_output': 32000},
|
|
32
|
+
'default': {'context_window': 200000, 'max_output': 32000},
|
|
33
|
+
},
|
|
34
|
+
'aws_bedrock': {
|
|
35
|
+
'claude': {'context_window': 200000, 'max_output': 32000}, # Claude on Bedrock
|
|
36
|
+
'amazon.titan': {'context_window': 8192, 'max_output': 4096},
|
|
37
|
+
'meta.llama': {'context_window': 128000, 'max_output': 4096},
|
|
38
|
+
'mistral': {'context_window': 128000, 'max_output': 4096},
|
|
39
|
+
'default': {'context_window': 8192, 'max_output': 4096},
|
|
40
|
+
},
|
|
41
|
+
'ollama': {
|
|
42
|
+
'llama': {'context_window': 128000, 'max_output': 4096},
|
|
43
|
+
'mistral': {'context_window': 32000, 'max_output': 4096},
|
|
44
|
+
'codellama': {'context_window': 16000, 'max_output': 4096},
|
|
45
|
+
'default': {'context_window': 8192, 'max_output': 4096},
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ContextLimitResolver:
|
|
51
|
+
"""
|
|
52
|
+
Resolves context window limits for models based on configuration.
|
|
53
|
+
|
|
54
|
+
This class provides a flexible way to look up context window and
|
|
55
|
+
max output token limits for any model based on its ID and provider.
|
|
56
|
+
It supports both exact and partial matching of model IDs.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
60
|
+
"""
|
|
61
|
+
Initialise the context limit resolver.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
config: Full configuration dictionary containing 'model_context_limits' section,
|
|
65
|
+
or a Settings object that uses dot notation for access.
|
|
66
|
+
If None or missing the section, uses hardcoded defaults.
|
|
67
|
+
"""
|
|
68
|
+
self.limits_config = {}
|
|
69
|
+
self._settings_obj = None
|
|
70
|
+
|
|
71
|
+
if config:
|
|
72
|
+
# Try standard dictionary access first
|
|
73
|
+
if isinstance(config, dict):
|
|
74
|
+
self.limits_config = config.get('model_context_limits', {})
|
|
75
|
+
elif hasattr(config, 'get'):
|
|
76
|
+
# Try to get as dict first (some Settings objects support this)
|
|
77
|
+
limits = config.get('model_context_limits', None)
|
|
78
|
+
if isinstance(limits, dict) and limits:
|
|
79
|
+
self.limits_config = limits
|
|
80
|
+
else:
|
|
81
|
+
# Store Settings object for dot notation access
|
|
82
|
+
self._settings_obj = config
|
|
83
|
+
self.limits_config = self._build_limits_from_settings(config)
|
|
84
|
+
|
|
85
|
+
logging.info(f"ContextLimitResolver initialised with {len(self.limits_config)} provider sections")
|
|
86
|
+
if self.limits_config:
|
|
87
|
+
for provider, models in self.limits_config.items():
|
|
88
|
+
if isinstance(models, dict):
|
|
89
|
+
logging.info(f" Provider '{provider}': {list(models.keys())}")
|
|
90
|
+
|
|
91
|
+
def _build_limits_from_settings(self, settings) -> Dict[str, Any]:
|
|
92
|
+
"""
|
|
93
|
+
Build limits config from a Settings object using dot notation.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
settings: Settings object with dot notation access
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Dictionary with provider sections
|
|
100
|
+
"""
|
|
101
|
+
limits_config = {}
|
|
102
|
+
|
|
103
|
+
# Define known providers and model patterns to check
|
|
104
|
+
providers = ['anthropic', 'aws_bedrock', 'ollama']
|
|
105
|
+
known_models = {
|
|
106
|
+
'anthropic': [
|
|
107
|
+
'claude-opus-4', 'claude-sonnet-4', 'claude-opus-4.5', 'claude-sonnet-4.5',
|
|
108
|
+
'claude-3-5-sonnet', 'claude-3-5-haiku', 'claude-3-opus', 'claude-3-sonnet',
|
|
109
|
+
'claude-3-haiku', 'default'
|
|
110
|
+
],
|
|
111
|
+
'aws_bedrock': [
|
|
112
|
+
'amazon.titan-text-express', 'meta.llama3-1', 'mistral.mistral-large',
|
|
113
|
+
'default'
|
|
114
|
+
],
|
|
115
|
+
'ollama': [
|
|
116
|
+
'llama3.2', 'mistral', 'codellama', 'default'
|
|
117
|
+
]
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
for provider in providers:
|
|
121
|
+
provider_config = {}
|
|
122
|
+
models = known_models.get(provider, ['default'])
|
|
123
|
+
|
|
124
|
+
for model in models:
|
|
125
|
+
context_key = f'model_context_limits.{provider}.{model}.context_window'
|
|
126
|
+
output_key = f'model_context_limits.{provider}.{model}.max_output'
|
|
127
|
+
|
|
128
|
+
context_window = settings.get(context_key, None)
|
|
129
|
+
max_output = settings.get(output_key, None)
|
|
130
|
+
|
|
131
|
+
logging.debug(f"Settings lookup: {context_key} = {context_window}")
|
|
132
|
+
|
|
133
|
+
if context_window is not None and max_output is not None:
|
|
134
|
+
provider_config[model] = {
|
|
135
|
+
'context_window': int(context_window),
|
|
136
|
+
'max_output': int(max_output)
|
|
137
|
+
}
|
|
138
|
+
logging.info(f"Loaded model limits: {provider}.{model} = {context_window}/{max_output}")
|
|
139
|
+
|
|
140
|
+
if provider_config:
|
|
141
|
+
limits_config[provider] = provider_config
|
|
142
|
+
|
|
143
|
+
# Also try global default
|
|
144
|
+
global_context = settings.get('model_context_limits.default.context_window', None)
|
|
145
|
+
global_output = settings.get('model_context_limits.default.max_output', None)
|
|
146
|
+
if global_context is not None and global_output is not None:
|
|
147
|
+
limits_config['default'] = {
|
|
148
|
+
'context_window': int(global_context),
|
|
149
|
+
'max_output': int(global_output)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return limits_config
|
|
153
|
+
|
|
154
|
+
def get_context_limits(self, model_id: str, provider: str) -> Dict[str, int]:
|
|
155
|
+
"""
|
|
156
|
+
Get context window and max output limits for a model.
|
|
157
|
+
|
|
158
|
+
The resolution follows this priority order:
|
|
159
|
+
1. Exact match in provider-specific configuration
|
|
160
|
+
2. Partial match (model ID contains pattern) in provider configuration
|
|
161
|
+
3. Provider default
|
|
162
|
+
4. Global default
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
model_id: The model identifier (e.g., 'claude-3-5-sonnet-20241022')
|
|
166
|
+
provider: Provider name. Supported values:
|
|
167
|
+
- 'anthropic' (for Anthropic Direct API)
|
|
168
|
+
- 'aws_bedrock' (for AWS Bedrock non-Claude models)
|
|
169
|
+
- 'ollama' (for Ollama models)
|
|
170
|
+
Note: Claude models on Bedrock should use 'anthropic' provider
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Dict with 'context_window' and 'max_output' keys (both integers)
|
|
174
|
+
"""
|
|
175
|
+
if not model_id:
|
|
176
|
+
logging.warning("Empty model_id provided, using global default")
|
|
177
|
+
return self._get_global_default()
|
|
178
|
+
|
|
179
|
+
model_id_lower = model_id.lower()
|
|
180
|
+
provider_key = self._normalise_provider_key(provider, model_id_lower)
|
|
181
|
+
|
|
182
|
+
# Get provider-specific limits section
|
|
183
|
+
provider_limits = self.limits_config.get(provider_key, {})
|
|
184
|
+
|
|
185
|
+
if provider_limits:
|
|
186
|
+
# 1. Try exact match
|
|
187
|
+
limits = self._try_exact_match(model_id_lower, provider_limits)
|
|
188
|
+
if limits:
|
|
189
|
+
logging.debug(f"Exact match found for {model_id} in {provider_key}")
|
|
190
|
+
return limits
|
|
191
|
+
|
|
192
|
+
# 2. Try partial match (model_id contains pattern)
|
|
193
|
+
limits = self._try_partial_match(model_id_lower, provider_limits)
|
|
194
|
+
if limits:
|
|
195
|
+
logging.debug(f"Partial match found for {model_id} in {provider_key}")
|
|
196
|
+
return limits
|
|
197
|
+
|
|
198
|
+
# 3. Try provider default
|
|
199
|
+
if 'default' in provider_limits:
|
|
200
|
+
limits = self._extract_limits(provider_limits['default'])
|
|
201
|
+
if limits:
|
|
202
|
+
logging.debug(f"Using provider default for {model_id} in {provider_key}")
|
|
203
|
+
return limits
|
|
204
|
+
|
|
205
|
+
# 4. Try hardcoded defaults for known model families
|
|
206
|
+
hardcoded = self._try_hardcoded_defaults(model_id_lower, provider_key)
|
|
207
|
+
if hardcoded:
|
|
208
|
+
logging.info(f"Using hardcoded defaults for {model_id} ({provider_key}): "
|
|
209
|
+
f"context_window={hardcoded['context_window']}, max_output={hardcoded['max_output']}")
|
|
210
|
+
return hardcoded
|
|
211
|
+
|
|
212
|
+
# 5. Fall back to global default
|
|
213
|
+
logging.debug(f"Using global default for {model_id}")
|
|
214
|
+
return self._get_global_default()
|
|
215
|
+
|
|
216
|
+
def _normalise_provider_key(self, provider: str, model_id_lower: str) -> str:
|
|
217
|
+
"""
|
|
218
|
+
Normalise provider key for configuration lookup.
|
|
219
|
+
|
|
220
|
+
Detects if a Bedrock model is actually a Claude model and routes
|
|
221
|
+
to the anthropic section for correct limits.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
provider: Original provider string
|
|
225
|
+
model_id_lower: Lowercase model ID
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Normalised provider key for config lookup
|
|
229
|
+
"""
|
|
230
|
+
provider_lower = provider.lower() if provider else ''
|
|
231
|
+
|
|
232
|
+
# Map common provider names to config keys
|
|
233
|
+
provider_map = {
|
|
234
|
+
'anthropic direct': 'anthropic',
|
|
235
|
+
'anthropic_direct': 'anthropic', # underscore variant
|
|
236
|
+
'anthropic': 'anthropic',
|
|
237
|
+
'aws bedrock': 'aws_bedrock',
|
|
238
|
+
'aws_bedrock': 'aws_bedrock',
|
|
239
|
+
'bedrock': 'aws_bedrock',
|
|
240
|
+
'ollama': 'ollama',
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
normalised = provider_map.get(provider_lower, provider_lower)
|
|
244
|
+
|
|
245
|
+
# Special case: Claude models on Bedrock should use anthropic limits
|
|
246
|
+
if normalised == 'aws_bedrock' and self._is_claude_model(model_id_lower):
|
|
247
|
+
logging.debug(f"Routing Claude model {model_id_lower} to anthropic limits")
|
|
248
|
+
return 'anthropic'
|
|
249
|
+
|
|
250
|
+
return normalised
|
|
251
|
+
|
|
252
|
+
def _is_claude_model(self, model_id_lower: str) -> bool:
|
|
253
|
+
"""
|
|
254
|
+
Check if a model ID refers to a Claude model.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
model_id_lower: Lowercase model ID
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
True if this is a Claude/Anthropic model
|
|
261
|
+
"""
|
|
262
|
+
claude_patterns = [
|
|
263
|
+
'claude',
|
|
264
|
+
'anthropic',
|
|
265
|
+
]
|
|
266
|
+
return any(pattern in model_id_lower for pattern in claude_patterns)
|
|
267
|
+
|
|
268
|
+
def _try_exact_match(self, model_id_lower: str, provider_limits: Dict) -> Optional[Dict[str, int]]:
|
|
269
|
+
"""
|
|
270
|
+
Try to find an exact match for the model ID.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
model_id_lower: Lowercase model ID
|
|
274
|
+
provider_limits: Provider-specific limits dictionary
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
Limits dict if found, None otherwise
|
|
278
|
+
"""
|
|
279
|
+
for pattern, limits in provider_limits.items():
|
|
280
|
+
if pattern == 'default':
|
|
281
|
+
continue
|
|
282
|
+
if model_id_lower == pattern.lower():
|
|
283
|
+
return self._extract_limits(limits)
|
|
284
|
+
return None
|
|
285
|
+
|
|
286
|
+
def _try_partial_match(self, model_id_lower: str, provider_limits: Dict) -> Optional[Dict[str, int]]:
|
|
287
|
+
"""
|
|
288
|
+
Try to find a partial match where model_id contains the pattern.
|
|
289
|
+
|
|
290
|
+
Uses longest match first to prefer more specific patterns.
|
|
291
|
+
E.g., 'claude-3-5-sonnet' matches before 'claude-3'.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
model_id_lower: Lowercase model ID
|
|
295
|
+
provider_limits: Provider-specific limits dictionary
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Limits dict if found, None otherwise
|
|
299
|
+
"""
|
|
300
|
+
# Sort patterns by length (longest first) for most specific match
|
|
301
|
+
patterns = [(k, v) for k, v in provider_limits.items() if k != 'default']
|
|
302
|
+
patterns.sort(key=lambda x: len(x[0]), reverse=True)
|
|
303
|
+
|
|
304
|
+
for pattern, limits in patterns:
|
|
305
|
+
pattern_lower = pattern.lower()
|
|
306
|
+
# Check if pattern is contained in model_id
|
|
307
|
+
if pattern_lower in model_id_lower:
|
|
308
|
+
return self._extract_limits(limits)
|
|
309
|
+
return None
|
|
310
|
+
|
|
311
|
+
def _try_hardcoded_defaults(self, model_id_lower: str, provider: str) -> Optional[Dict[str, int]]:
|
|
312
|
+
"""
|
|
313
|
+
Try to find hardcoded defaults for known model families.
|
|
314
|
+
|
|
315
|
+
This provides sensible fallbacks when config isn't available.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
model_id_lower: Lowercase model ID
|
|
319
|
+
provider: Provider key (anthropic, aws_bedrock, ollama)
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
Limits dict if found, None otherwise
|
|
323
|
+
"""
|
|
324
|
+
provider_defaults = HARDCODED_MODEL_DEFAULTS.get(provider, {})
|
|
325
|
+
if not provider_defaults:
|
|
326
|
+
return None
|
|
327
|
+
|
|
328
|
+
# Try to match model patterns (longest first)
|
|
329
|
+
patterns = [(k, v) for k, v in provider_defaults.items() if k != 'default']
|
|
330
|
+
patterns.sort(key=lambda x: len(x[0]), reverse=True)
|
|
331
|
+
|
|
332
|
+
for pattern, limits in patterns:
|
|
333
|
+
if pattern.lower() in model_id_lower:
|
|
334
|
+
return limits.copy()
|
|
335
|
+
|
|
336
|
+
# Try provider default
|
|
337
|
+
if 'default' in provider_defaults:
|
|
338
|
+
return provider_defaults['default'].copy()
|
|
339
|
+
|
|
340
|
+
return None
|
|
341
|
+
|
|
342
|
+
def _extract_limits(self, limits_data: Any) -> Optional[Dict[str, int]]:
|
|
343
|
+
"""
|
|
344
|
+
Extract context_window and max_output from limits data.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
limits_data: Can be a dict with context_window/max_output keys,
|
|
348
|
+
or a legacy format
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
Dict with 'context_window' and 'max_output', or None if invalid
|
|
352
|
+
"""
|
|
353
|
+
if isinstance(limits_data, dict):
|
|
354
|
+
context_window = limits_data.get('context_window')
|
|
355
|
+
max_output = limits_data.get('max_output')
|
|
356
|
+
|
|
357
|
+
if context_window is not None and max_output is not None:
|
|
358
|
+
return {
|
|
359
|
+
'context_window': int(context_window),
|
|
360
|
+
'max_output': int(max_output)
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
return None
|
|
364
|
+
|
|
365
|
+
def _get_global_default(self) -> Dict[str, int]:
|
|
366
|
+
"""
|
|
367
|
+
Get the global default context limits.
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
Dict with 'context_window' and 'max_output'
|
|
371
|
+
"""
|
|
372
|
+
# Try config global default first
|
|
373
|
+
global_default = self.limits_config.get('default')
|
|
374
|
+
if global_default:
|
|
375
|
+
limits = self._extract_limits(global_default)
|
|
376
|
+
if limits:
|
|
377
|
+
return limits
|
|
378
|
+
|
|
379
|
+
# Fall back to hardcoded default
|
|
380
|
+
return DEFAULT_CONTEXT_LIMITS.copy()
|
|
381
|
+
|
|
382
|
+
def get_context_window(self, model_id: str, provider: str) -> int:
|
|
383
|
+
"""
|
|
384
|
+
Convenience method to get just the context window size.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
model_id: The model identifier
|
|
388
|
+
provider: Provider name
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
Context window size in tokens
|
|
392
|
+
"""
|
|
393
|
+
return self.get_context_limits(model_id, provider)['context_window']
|
|
394
|
+
|
|
395
|
+
def get_max_output(self, model_id: str, provider: str) -> int:
|
|
396
|
+
"""
|
|
397
|
+
Convenience method to get just the max output tokens.
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
model_id: The model identifier
|
|
401
|
+
provider: Provider name
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
Maximum output tokens
|
|
405
|
+
"""
|
|
406
|
+
return self.get_context_limits(model_id, provider)['max_output']
|
|
407
|
+
|
|
408
|
+
def calculate_compaction_threshold(self, model_id: str, provider: str,
|
|
409
|
+
threshold_ratio: float = 0.7) -> int:
|
|
410
|
+
"""
|
|
411
|
+
Calculate the token count at which compaction should be triggered.
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
model_id: The model identifier
|
|
415
|
+
provider: Provider name
|
|
416
|
+
threshold_ratio: Fraction of context window to trigger compaction (default 0.7)
|
|
417
|
+
|
|
418
|
+
Returns:
|
|
419
|
+
Token count threshold for compaction
|
|
420
|
+
"""
|
|
421
|
+
context_window = self.get_context_window(model_id, provider)
|
|
422
|
+
return int(context_window * threshold_ratio)
|
|
423
|
+
|
|
424
|
+
def calculate_emergency_threshold(self, model_id: str, provider: str,
|
|
425
|
+
emergency_ratio: float = 0.95) -> int:
|
|
426
|
+
"""
|
|
427
|
+
Calculate the emergency token count at which compaction is forced.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
model_id: The model identifier
|
|
431
|
+
provider: Provider name
|
|
432
|
+
emergency_ratio: Fraction of context window for emergency compaction (default 0.95)
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
Token count threshold for emergency compaction
|
|
436
|
+
"""
|
|
437
|
+
context_window = self.get_context_window(model_id, provider)
|
|
438
|
+
return int(context_window * emergency_ratio)
|
dtSpark/llm/manager.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Manager for handling multiple LLM providers.
|
|
3
|
+
|
|
4
|
+
This module manages:
|
|
5
|
+
- Initialisation of available LLM providers
|
|
6
|
+
- Model selection across providers
|
|
7
|
+
- Routing requests to the appropriate provider
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import List, Dict, Optional, Any
|
|
12
|
+
from dtSpark.llm.base import LLMService
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LLMManager:
|
|
16
|
+
"""Manages multiple LLM service providers."""
|
|
17
|
+
|
|
18
|
+
def __init__(self):
|
|
19
|
+
"""Initialise the LLM manager."""
|
|
20
|
+
self.providers: Dict[str, LLMService] = {}
|
|
21
|
+
self.active_provider: Optional[str] = None
|
|
22
|
+
self.active_service: Optional[LLMService] = None
|
|
23
|
+
|
|
24
|
+
def register_provider(self, provider: LLMService):
|
|
25
|
+
"""
|
|
26
|
+
Register an LLM provider.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
provider: LLMService implementation to register
|
|
30
|
+
"""
|
|
31
|
+
provider_name = provider.get_provider_name()
|
|
32
|
+
self.providers[provider_name] = provider
|
|
33
|
+
logging.info(f"Registered LLM provider: {provider_name}")
|
|
34
|
+
|
|
35
|
+
# Set as active if it's the first provider
|
|
36
|
+
if not self.active_provider:
|
|
37
|
+
self.active_provider = provider_name
|
|
38
|
+
self.active_service = provider
|
|
39
|
+
|
|
40
|
+
def list_all_models(self) -> List[Dict[str, Any]]:
|
|
41
|
+
"""
|
|
42
|
+
List all models from all registered providers.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Combined list of models from all providers
|
|
46
|
+
"""
|
|
47
|
+
all_models = []
|
|
48
|
+
|
|
49
|
+
for provider_name, provider in self.providers.items():
|
|
50
|
+
try:
|
|
51
|
+
models = provider.list_available_models()
|
|
52
|
+
# Ensure each model has provider info
|
|
53
|
+
for model in models:
|
|
54
|
+
if 'provider' not in model:
|
|
55
|
+
model['provider'] = provider_name
|
|
56
|
+
all_models.extend(models)
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logging.error(f"Failed to list models from {provider_name}: {e}")
|
|
59
|
+
|
|
60
|
+
return all_models
|
|
61
|
+
|
|
62
|
+
def set_model(self, model_id: str, provider_name: Optional[str] = None):
|
|
63
|
+
"""
|
|
64
|
+
Set the active model.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
model_id: Model identifier
|
|
68
|
+
provider_name: Optional provider name. If not specified, searches all providers.
|
|
69
|
+
"""
|
|
70
|
+
if provider_name:
|
|
71
|
+
# Set model on specific provider
|
|
72
|
+
if provider_name not in self.providers:
|
|
73
|
+
raise ValueError(f"Provider {provider_name} not registered")
|
|
74
|
+
|
|
75
|
+
provider = self.providers[provider_name]
|
|
76
|
+
provider.set_model(model_id)
|
|
77
|
+
self.active_provider = provider_name
|
|
78
|
+
self.active_service = provider
|
|
79
|
+
logging.info(f"Active provider set to: {provider_name}")
|
|
80
|
+
else:
|
|
81
|
+
# Search for model across all providers
|
|
82
|
+
for prov_name, provider in self.providers.items():
|
|
83
|
+
models = provider.list_available_models()
|
|
84
|
+
if any(m['id'] == model_id for m in models):
|
|
85
|
+
provider.set_model(model_id)
|
|
86
|
+
self.active_provider = prov_name
|
|
87
|
+
self.active_service = provider
|
|
88
|
+
logging.info(f"Model {model_id} found on provider: {prov_name}")
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
raise ValueError(f"Model {model_id} not found on any provider")
|
|
92
|
+
|
|
93
|
+
def invoke_model(
|
|
94
|
+
self,
|
|
95
|
+
messages: List[Dict[str, Any]],
|
|
96
|
+
max_tokens: int = 4096,
|
|
97
|
+
temperature: float = 0.7,
|
|
98
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
99
|
+
system: Optional[str] = None,
|
|
100
|
+
max_retries: int = 3
|
|
101
|
+
) -> Optional[Dict[str, Any]]:
|
|
102
|
+
"""
|
|
103
|
+
Invoke the active model.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
messages: Conversation messages
|
|
107
|
+
max_tokens: Maximum tokens to generate
|
|
108
|
+
temperature: Sampling temperature
|
|
109
|
+
tools: Optional tool definitions
|
|
110
|
+
system: Optional system prompt
|
|
111
|
+
max_retries: Maximum retry attempts
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
Response dictionary in standard format
|
|
115
|
+
"""
|
|
116
|
+
if not self.active_service:
|
|
117
|
+
return {
|
|
118
|
+
'error': True,
|
|
119
|
+
'error_code': 'NoProviderActive',
|
|
120
|
+
'error_message': 'No LLM provider is active',
|
|
121
|
+
'error_type': 'ConfigurationError'
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return self.active_service.invoke_model(
|
|
125
|
+
messages=messages,
|
|
126
|
+
max_tokens=max_tokens,
|
|
127
|
+
temperature=temperature,
|
|
128
|
+
tools=tools,
|
|
129
|
+
system=system,
|
|
130
|
+
max_retries=max_retries
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def get_active_provider(self) -> Optional[str]:
|
|
134
|
+
"""Get the name of the active provider."""
|
|
135
|
+
return self.active_provider
|
|
136
|
+
|
|
137
|
+
def get_active_service(self) -> Optional[LLMService]:
|
|
138
|
+
"""Get the active LLM service."""
|
|
139
|
+
return self.active_service
|
|
140
|
+
|
|
141
|
+
def count_tokens(self, text: str) -> int:
|
|
142
|
+
"""
|
|
143
|
+
Count tokens using the active provider's tokeniser.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
text: Text to count tokens for
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Token count
|
|
150
|
+
"""
|
|
151
|
+
if self.active_service:
|
|
152
|
+
return self.active_service.count_tokens(text)
|
|
153
|
+
# Fallback estimation
|
|
154
|
+
return len(text) // 4
|
|
155
|
+
|
|
156
|
+
def get_rate_limits(self) -> dict:
|
|
157
|
+
"""
|
|
158
|
+
Get rate limit information for the active provider.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Dictionary with rate limit information:
|
|
162
|
+
{
|
|
163
|
+
'input_tokens_per_minute': int or None
|
|
164
|
+
'output_tokens_per_minute': int or None
|
|
165
|
+
'requests_per_minute': int or None
|
|
166
|
+
'has_limits': bool
|
|
167
|
+
}
|
|
168
|
+
"""
|
|
169
|
+
if self.active_service:
|
|
170
|
+
return self.active_service.get_rate_limits()
|
|
171
|
+
# Default: no limits
|
|
172
|
+
return {
|
|
173
|
+
'input_tokens_per_minute': None,
|
|
174
|
+
'output_tokens_per_minute': None,
|
|
175
|
+
'requests_per_minute': None,
|
|
176
|
+
'has_limits': False
|
|
177
|
+
}
|