dtSpark 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. dtSpark/__init__.py +0 -0
  2. dtSpark/_description.txt +1 -0
  3. dtSpark/_full_name.txt +1 -0
  4. dtSpark/_licence.txt +21 -0
  5. dtSpark/_metadata.yaml +6 -0
  6. dtSpark/_name.txt +1 -0
  7. dtSpark/_version.txt +1 -0
  8. dtSpark/aws/__init__.py +7 -0
  9. dtSpark/aws/authentication.py +296 -0
  10. dtSpark/aws/bedrock.py +578 -0
  11. dtSpark/aws/costs.py +318 -0
  12. dtSpark/aws/pricing.py +580 -0
  13. dtSpark/cli_interface.py +2645 -0
  14. dtSpark/conversation_manager.py +3050 -0
  15. dtSpark/core/__init__.py +12 -0
  16. dtSpark/core/application.py +3355 -0
  17. dtSpark/core/context_compaction.py +735 -0
  18. dtSpark/daemon/__init__.py +104 -0
  19. dtSpark/daemon/__main__.py +10 -0
  20. dtSpark/daemon/action_monitor.py +213 -0
  21. dtSpark/daemon/daemon_app.py +730 -0
  22. dtSpark/daemon/daemon_manager.py +289 -0
  23. dtSpark/daemon/execution_coordinator.py +194 -0
  24. dtSpark/daemon/pid_file.py +169 -0
  25. dtSpark/database/__init__.py +482 -0
  26. dtSpark/database/autonomous_actions.py +1191 -0
  27. dtSpark/database/backends.py +329 -0
  28. dtSpark/database/connection.py +122 -0
  29. dtSpark/database/conversations.py +520 -0
  30. dtSpark/database/credential_prompt.py +218 -0
  31. dtSpark/database/files.py +205 -0
  32. dtSpark/database/mcp_ops.py +355 -0
  33. dtSpark/database/messages.py +161 -0
  34. dtSpark/database/schema.py +673 -0
  35. dtSpark/database/tool_permissions.py +186 -0
  36. dtSpark/database/usage.py +167 -0
  37. dtSpark/files/__init__.py +4 -0
  38. dtSpark/files/manager.py +322 -0
  39. dtSpark/launch.py +39 -0
  40. dtSpark/limits/__init__.py +10 -0
  41. dtSpark/limits/costs.py +296 -0
  42. dtSpark/limits/tokens.py +342 -0
  43. dtSpark/llm/__init__.py +17 -0
  44. dtSpark/llm/anthropic_direct.py +446 -0
  45. dtSpark/llm/base.py +146 -0
  46. dtSpark/llm/context_limits.py +438 -0
  47. dtSpark/llm/manager.py +177 -0
  48. dtSpark/llm/ollama.py +578 -0
  49. dtSpark/mcp_integration/__init__.py +5 -0
  50. dtSpark/mcp_integration/manager.py +653 -0
  51. dtSpark/mcp_integration/tool_selector.py +225 -0
  52. dtSpark/resources/config.yaml.template +631 -0
  53. dtSpark/safety/__init__.py +22 -0
  54. dtSpark/safety/llm_service.py +111 -0
  55. dtSpark/safety/patterns.py +229 -0
  56. dtSpark/safety/prompt_inspector.py +442 -0
  57. dtSpark/safety/violation_logger.py +346 -0
  58. dtSpark/scheduler/__init__.py +20 -0
  59. dtSpark/scheduler/creation_tools.py +599 -0
  60. dtSpark/scheduler/execution_queue.py +159 -0
  61. dtSpark/scheduler/executor.py +1152 -0
  62. dtSpark/scheduler/manager.py +395 -0
  63. dtSpark/tools/__init__.py +4 -0
  64. dtSpark/tools/builtin.py +833 -0
  65. dtSpark/web/__init__.py +20 -0
  66. dtSpark/web/auth.py +152 -0
  67. dtSpark/web/dependencies.py +37 -0
  68. dtSpark/web/endpoints/__init__.py +17 -0
  69. dtSpark/web/endpoints/autonomous_actions.py +1125 -0
  70. dtSpark/web/endpoints/chat.py +621 -0
  71. dtSpark/web/endpoints/conversations.py +353 -0
  72. dtSpark/web/endpoints/main_menu.py +547 -0
  73. dtSpark/web/endpoints/streaming.py +421 -0
  74. dtSpark/web/server.py +578 -0
  75. dtSpark/web/session.py +167 -0
  76. dtSpark/web/ssl_utils.py +195 -0
  77. dtSpark/web/static/css/dark-theme.css +427 -0
  78. dtSpark/web/static/js/actions.js +1101 -0
  79. dtSpark/web/static/js/chat.js +614 -0
  80. dtSpark/web/static/js/main.js +496 -0
  81. dtSpark/web/static/js/sse-client.js +242 -0
  82. dtSpark/web/templates/actions.html +408 -0
  83. dtSpark/web/templates/base.html +93 -0
  84. dtSpark/web/templates/chat.html +814 -0
  85. dtSpark/web/templates/conversations.html +350 -0
  86. dtSpark/web/templates/goodbye.html +81 -0
  87. dtSpark/web/templates/login.html +90 -0
  88. dtSpark/web/templates/main_menu.html +983 -0
  89. dtSpark/web/templates/new_conversation.html +191 -0
  90. dtSpark/web/web_interface.py +137 -0
  91. dtspark-1.0.4.dist-info/METADATA +187 -0
  92. dtspark-1.0.4.dist-info/RECORD +96 -0
  93. dtspark-1.0.4.dist-info/WHEEL +5 -0
  94. dtspark-1.0.4.dist-info/entry_points.txt +3 -0
  95. dtspark-1.0.4.dist-info/licenses/LICENSE +21 -0
  96. dtspark-1.0.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,17 @@
1
+ """
2
+ LLM service providers module.
3
+
4
+ This module provides abstraction for different LLM providers,
5
+ allowing the application to work with AWS Bedrock, Ollama, Anthropic Direct API,
6
+ and potentially other providers through a common interface.
7
+
8
+ Also includes context limit resolution for model-specific token limits.
9
+ """
10
+
11
+ from .base import LLMService
12
+ from .manager import LLMManager
13
+ from .ollama import OllamaService
14
+ from .anthropic_direct import AnthropicService
15
+ from .context_limits import ContextLimitResolver
16
+
17
+ __all__ = ['LLMService', 'LLMManager', 'OllamaService', 'AnthropicService', 'ContextLimitResolver']
@@ -0,0 +1,446 @@
1
+ """
2
+ Anthropic Direct API service module.
3
+
4
+ This module provides functionality for:
5
+ - Listing available Anthropic models
6
+ - Invoking Anthropic models via direct API
7
+ - Tool/function calling support
8
+ - Token counting
9
+ - Rate limit handling with exponential backoff
10
+ """
11
+
12
+ import logging
13
+ import os
14
+ import time
15
+ from typing import List, Dict, Optional, Any
16
+ from dtSpark.llm.base import LLMService
17
+
18
+ try:
19
+ from anthropic import Anthropic, RateLimitError
20
+ except ImportError:
21
+ logging.error("anthropic module not installed. Please run: pip install anthropic")
22
+ raise
23
+
24
+
25
+ class AnthropicService(LLMService):
26
+ """Manages interactions with Anthropic API directly using official SDK."""
27
+
28
+ # Rate limits for Anthropic API (default tier)
29
+ # Source: https://docs.anthropic.com/en/api/rate-limits
30
+ # These are conservative defaults - actual limits depend on account tier
31
+ DEFAULT_RATE_LIMITS = {
32
+ 'input_tokens_per_minute': 30000, # Default tier limit
33
+ 'output_tokens_per_minute': 8000, # Default tier limit
34
+ 'requests_per_minute': 50, # Default tier limit
35
+ 'has_limits': True
36
+ }
37
+
38
+ # Model specifications: pricing and max tokens by model ID pattern
39
+ # Source: https://www.anthropic.com/pricing and model documentation
40
+ MODEL_SPECS = {
41
+ # Pricing per million tokens (MTok)
42
+ 'claude-opus-4': {'input': 15.00, 'output': 75.00, 'max_output': 32000},
43
+ 'claude-sonnet-4': {'input': 3.00, 'output': 15.00, 'max_output': 64000},
44
+ 'claude-3-7-sonnet': {'input': 3.00, 'output': 15.00, 'max_output': 64000}, # claude-3-7-sonnet-20250219
45
+ 'claude-sonnet-3.7': {'input': 3.00, 'output': 15.00, 'max_output': 64000}, # alias
46
+ 'claude-haiku-4': {'input': 0.80, 'output': 4.00, 'max_output': 64000},
47
+ 'claude-3-5-sonnet': {'input': 3.00, 'output': 15.00, 'max_output': 8192},
48
+ 'claude-3-5-haiku': {'input': 0.80, 'output': 4.00, 'max_output': 8192},
49
+ 'claude-3-opus': {'input': 15.00, 'output': 75.00, 'max_output': 4096},
50
+ 'claude-3-sonnet': {'input': 3.00, 'output': 15.00, 'max_output': 4096},
51
+ 'claude-3-haiku': {'input': 0.25, 'output': 1.25, 'max_output': 4096},
52
+ # Default for unknown models
53
+ 'default': {'input': 3.00, 'output': 15.00, 'max_output': 8192}
54
+ }
55
+
56
+ def __init__(self, api_key: Optional[str] = None, default_max_tokens: int = 8192,
57
+ rate_limit_max_retries: int = 5, rate_limit_base_delay: float = 2.0):
58
+ """
59
+ Initialise the Anthropic service.
60
+
61
+ Args:
62
+ api_key: Anthropic API key (or use ANTHROPIC_API_KEY env var)
63
+ default_max_tokens: Default maximum tokens to request (will be capped to model's limit)
64
+ rate_limit_max_retries: Maximum number of retries for rate limit errors (default: 5)
65
+ rate_limit_base_delay: Base delay in seconds for exponential backoff (default: 2.0)
66
+ """
67
+ self.api_key = api_key or os.environ.get('ANTHROPIC_API_KEY')
68
+ if not self.api_key:
69
+ raise ValueError("Anthropic API key required. Set via parameter or ANTHROPIC_API_KEY env var")
70
+
71
+ self.client = Anthropic(api_key=self.api_key)
72
+ self.current_model_id = None
73
+ self.default_max_tokens = default_max_tokens
74
+ self.rate_limit_max_retries = rate_limit_max_retries
75
+ self.rate_limit_base_delay = rate_limit_base_delay
76
+
77
+
78
+ def get_provider_name(self) -> str:
79
+ """Get provider name."""
80
+ return "Anthropic Direct"
81
+
82
+ def get_access_info(self) -> str:
83
+ """Get access information."""
84
+ return "Anthropic API"
85
+
86
+ def _get_model_spec(self, model_id: str) -> Dict[str, Any]:
87
+ """
88
+ Get specifications for a model by matching ID pattern.
89
+
90
+ Args:
91
+ model_id: Full model ID from API
92
+
93
+ Returns:
94
+ Dictionary with input, output pricing and max_output
95
+ """
96
+ # Try to match model ID with spec patterns
97
+ for pattern, spec in self.MODEL_SPECS.items():
98
+ if pattern in model_id:
99
+ return spec
100
+
101
+ # Return default if no match found
102
+ logging.warning(f"No specs found for model {model_id}, using defaults")
103
+ return self.MODEL_SPECS['default']
104
+
105
+ def list_available_models(self) -> List[Dict[str, Any]]:
106
+ """
107
+ List all available Anthropic models from the API.
108
+
109
+ Returns:
110
+ List of model dictionaries
111
+ """
112
+ models = []
113
+
114
+ try:
115
+ # Fetch models from Anthropic API
116
+ response = self.client.models.list()
117
+
118
+ for model in response.data:
119
+ # Get specs for this model
120
+ specs = self._get_model_spec(model.id)
121
+
122
+ models.append({
123
+ 'id': model.id,
124
+ 'name': model.display_name if hasattr(model, 'display_name') else model.id,
125
+ 'provider': 'Anthropic',
126
+ 'access_info': self.get_access_info(),
127
+ 'supports_tools': True, # All Claude models support tools
128
+ 'context_length': 200000, # All current Claude models have 200K context
129
+ 'max_output': specs['max_output'],
130
+ 'response_streaming': True,
131
+ 'pricing': {'input': specs['input'], 'output': specs['output']}
132
+ })
133
+
134
+ logging.info(f"Found {len(models)} Anthropic models from API")
135
+
136
+ except Exception as e:
137
+ logging.error(f"Failed to fetch models from Anthropic API: {e}")
138
+ # Return empty list if API call fails
139
+ logging.warning("Returning empty model list due to API error")
140
+
141
+ return models
142
+
143
+ def set_model(self, model_id: str):
144
+ """Set the active Anthropic model."""
145
+ self.current_model_id = model_id
146
+ logging.info(f"Anthropic model set to: {model_id}")
147
+
148
+ def get_model_max_tokens(self, model_id: str) -> int:
149
+ """
150
+ Get the maximum output tokens for a specific model.
151
+
152
+ Args:
153
+ model_id: The model ID to look up
154
+
155
+ Returns:
156
+ Maximum output tokens for the model (defaults to 8192 if not found)
157
+ """
158
+ specs = self._get_model_spec(model_id)
159
+ return specs['max_output']
160
+
161
+ def invoke_model(
162
+ self,
163
+ messages: List[Dict[str, Any]],
164
+ max_tokens: int = 4096,
165
+ temperature: float = 0.7,
166
+ tools: Optional[List[Dict[str, Any]]] = None,
167
+ system: Optional[str] = None,
168
+ max_retries: int = 3
169
+ ) -> Optional[Dict[str, Any]]:
170
+ """
171
+ Invoke Anthropic model with conversation.
172
+
173
+ Args:
174
+ messages: Conversation messages
175
+ max_tokens: Maximum tokens to generate
176
+ temperature: Sampling temperature
177
+ tools: Optional tool definitions
178
+ system: Optional system prompt
179
+ max_retries: Maximum retry attempts
180
+
181
+ Returns:
182
+ Response dictionary in standard format
183
+ """
184
+ if not self.current_model_id:
185
+ return {
186
+ 'error': True,
187
+ 'error_code': 'NoModelSelected',
188
+ 'error_message': 'No Anthropic model selected',
189
+ 'error_type': 'ConfigurationError'
190
+ }
191
+
192
+ try:
193
+ # Use provided max_tokens or fall back to default from config
194
+ requested_max_tokens = max_tokens if max_tokens != 4096 else self.default_max_tokens
195
+
196
+ # Get model's max output tokens to ensure we don't exceed it
197
+ model_max_output = self.get_model_max_tokens(self.current_model_id)
198
+
199
+ # Cap max_tokens to model's limit
200
+ actual_max_tokens = min(requested_max_tokens, model_max_output)
201
+ if actual_max_tokens < requested_max_tokens:
202
+ logging.info(
203
+ f"Capping max_tokens from {requested_max_tokens} to {actual_max_tokens} "
204
+ f"(model {self.current_model_id} limit)"
205
+ )
206
+
207
+ # Convert messages to Anthropic format
208
+ anthropic_messages = self._convert_messages_to_anthropic(messages)
209
+
210
+ # Build API parameters
211
+ api_params = {
212
+ 'model': self.current_model_id,
213
+ 'messages': anthropic_messages,
214
+ 'max_tokens': actual_max_tokens,
215
+ 'temperature': temperature
216
+ }
217
+
218
+ if system:
219
+ api_params['system'] = system
220
+
221
+ if tools:
222
+ api_params['tools'] = self._convert_tools_to_anthropic(tools)
223
+ logging.debug(f"Sending {len(api_params['tools'])} tools to Anthropic API")
224
+
225
+ logging.debug(f"Invoking Anthropic model: {self.current_model_id}")
226
+ logging.debug(f"API params (excluding messages): {{'model': api_params['model'], 'max_tokens': api_params['max_tokens'], 'temperature': api_params['temperature'], 'has_system': 'system' in api_params, 'has_tools': 'tools' in api_params, 'num_tools': len(api_params.get('tools', []))}}")
227
+
228
+ # Use streaming to avoid 10-minute timeout
229
+ # Accumulate response from stream
230
+ text_parts = []
231
+ content_blocks = []
232
+ tool_use_blocks = []
233
+ stop_reason = None
234
+ usage_info = {'input_tokens': 0, 'output_tokens': 0}
235
+
236
+ # Implement rate limit handling with exponential backoff
237
+ for retry_attempt in range(self.rate_limit_max_retries):
238
+ try:
239
+ with self.client.messages.stream(**api_params) as stream:
240
+ for event in stream:
241
+ # Handle different event types
242
+ if hasattr(event, 'type'):
243
+ if event.type == 'content_block_start':
244
+ # Track content blocks as they start
245
+ pass
246
+ elif event.type == 'content_block_delta':
247
+ # Accumulate text deltas
248
+ if hasattr(event, 'delta'):
249
+ if hasattr(event.delta, 'type'):
250
+ if event.delta.type == 'text_delta':
251
+ text_parts.append(event.delta.text)
252
+ elif event.type == 'message_stop':
253
+ # Message complete
254
+ pass
255
+ elif event.type == 'message_delta':
256
+ # Update stop reason and usage
257
+ if hasattr(event, 'delta') and hasattr(event.delta, 'stop_reason'):
258
+ stop_reason = event.delta.stop_reason
259
+ if hasattr(event, 'usage'):
260
+ usage_info['output_tokens'] = event.usage.output_tokens
261
+
262
+ # Get final message to extract full content and usage
263
+ final_message = stream.get_final_message()
264
+
265
+ # Extract usage information
266
+ if hasattr(final_message, 'usage'):
267
+ usage_info['input_tokens'] = final_message.usage.input_tokens
268
+ usage_info['output_tokens'] = final_message.usage.output_tokens
269
+
270
+ # Extract stop reason
271
+ if hasattr(final_message, 'stop_reason'):
272
+ stop_reason = final_message.stop_reason
273
+
274
+ # Extract content blocks (including tool use)
275
+ if hasattr(final_message, 'content'):
276
+ for block in final_message.content:
277
+ if hasattr(block, 'type'):
278
+ if block.type == 'text':
279
+ content_blocks.append({
280
+ 'type': 'text',
281
+ 'text': block.text
282
+ })
283
+ elif block.type == 'tool_use':
284
+ tool_block = {
285
+ 'type': 'tool_use',
286
+ 'id': block.id,
287
+ 'name': block.name,
288
+ 'input': block.input
289
+ }
290
+ tool_use_blocks.append(tool_block)
291
+ content_blocks.append(tool_block)
292
+
293
+ # Successfully completed - break out of retry loop
294
+ break
295
+
296
+ except RateLimitError as e:
297
+ # Handle rate limit errors with exponential backoff
298
+ if retry_attempt < self.rate_limit_max_retries - 1:
299
+ wait_time = self.rate_limit_base_delay ** retry_attempt
300
+ logging.warning(
301
+ f"Rate limit exceeded (attempt {retry_attempt + 1}/{self.rate_limit_max_retries}). "
302
+ f"Waiting {wait_time:.1f} seconds before retrying..."
303
+ )
304
+ logging.debug(f"Rate limit error details: {str(e)}")
305
+ time.sleep(wait_time)
306
+ else:
307
+ # Final retry failed
308
+ logging.error(
309
+ f"Rate limit exceeded after {self.rate_limit_max_retries} attempts. "
310
+ f"Please reduce request frequency or contact Anthropic for rate limit increase."
311
+ )
312
+ logging.error(f"Rate limit error details: {str(e)}")
313
+ return {
314
+ 'error': True,
315
+ 'error_code': 'RateLimitExceeded',
316
+ 'error_message': f"Rate limit exceeded after {self.rate_limit_max_retries} retry attempts. {str(e)}",
317
+ 'error_type': 'RateLimitError'
318
+ }
319
+
320
+ # Build response in standard format
321
+ response = {
322
+ 'stop_reason': stop_reason,
323
+ 'usage': usage_info,
324
+ 'content_blocks': content_blocks,
325
+ 'content': ''.join(text_parts)
326
+ }
327
+
328
+ # Add tool_use if present
329
+ if tool_use_blocks:
330
+ response['tool_use'] = tool_use_blocks
331
+ response['stop_reason'] = 'tool_use'
332
+
333
+ return response
334
+
335
+ except Exception as e:
336
+ logging.error(f"Anthropic API error: {e}")
337
+ return {
338
+ 'error': True,
339
+ 'error_code': 'AnthropicAPIError',
340
+ 'error_message': str(e),
341
+ 'error_type': 'RequestError'
342
+ }
343
+
344
+ def _convert_messages_to_anthropic(
345
+ self,
346
+ messages: List[Dict[str, Any]]
347
+ ) -> List[Dict[str, Any]]:
348
+ """
349
+ Convert standard message format to Anthropic format.
350
+
351
+ The Anthropic API uses the same format as our standard, so minimal conversion needed.
352
+ """
353
+ anthropic_messages = []
354
+
355
+ for msg in messages:
356
+ role = msg.get('role', 'user')
357
+ content = msg.get('content', [])
358
+
359
+ # Anthropic uses the same content block format
360
+ anthropic_messages.append({
361
+ 'role': role,
362
+ 'content': content
363
+ })
364
+
365
+ return anthropic_messages
366
+
367
+ def _convert_tools_to_anthropic(
368
+ self,
369
+ tools: List[Dict[str, Any]]
370
+ ) -> List[Dict[str, Any]]:
371
+ """
372
+ Convert tool definitions to Anthropic format.
373
+
374
+ Anthropic requires the input_schema to have a 'type' field.
375
+ """
376
+ anthropic_tools = []
377
+
378
+ logging.debug(f"Converting {len(tools)} tools to Anthropic format")
379
+
380
+ for tool in tools:
381
+ # Extract toolSpec
382
+ tool_spec = tool.get('toolSpec', tool)
383
+
384
+ logging.debug(f"Original tool spec: {tool_spec}")
385
+
386
+ # Get input schema and ensure it has 'type' field
387
+ # Check both 'inputSchema' (Bedrock format) and 'input_schema' (MCP format)
388
+ input_schema = tool_spec.get('inputSchema') or tool_spec.get('input_schema', {})
389
+
390
+ # Anthropic requires 'type' field in input_schema
391
+ if 'type' not in input_schema:
392
+ input_schema = {
393
+ 'type': 'object',
394
+ 'properties': input_schema.get('properties', {}),
395
+ 'required': input_schema.get('required', [])
396
+ }
397
+
398
+ anthropic_tool = {
399
+ 'name': tool_spec.get('name', ''),
400
+ 'description': tool_spec.get('description', ''),
401
+ 'input_schema': input_schema
402
+ }
403
+
404
+ logging.debug(f"Converted Anthropic tool: {anthropic_tool}")
405
+ anthropic_tools.append(anthropic_tool)
406
+
407
+ return anthropic_tools
408
+
409
+ def supports_streaming(self) -> bool:
410
+ """Check if Anthropic supports streaming."""
411
+ return True # Streaming is implemented and used by default
412
+
413
+ def count_tokens(self, text: str) -> int:
414
+ """
415
+ Count tokens using Anthropic's token counting API.
416
+
417
+ Args:
418
+ text: Text to count tokens for
419
+
420
+ Returns:
421
+ Token count
422
+ """
423
+ try:
424
+ # Use Anthropic's messages.count_tokens endpoint
425
+ # This requires a model and messages in the proper format
426
+ model = self.current_model_id or 'claude-sonnet-4-20250514'
427
+ response = self.client.messages.count_tokens(
428
+ model=model,
429
+ messages=[{'role': 'user', 'content': text}]
430
+ )
431
+ return response.input_tokens
432
+ except Exception as e:
433
+ logging.warning(f"Token counting failed: {e}")
434
+ # Fallback: rough estimate of 4 chars per token
435
+ return len(text) // 4
436
+
437
+ def get_rate_limits(self) -> dict:
438
+ """
439
+ Get rate limit information for Anthropic Direct API.
440
+
441
+ Returns:
442
+ Dictionary with rate limit information.
443
+ Note: Actual limits depend on account tier.
444
+ """
445
+ return self.DEFAULT_RATE_LIMITS.copy()
446
+
dtSpark/llm/base.py ADDED
@@ -0,0 +1,146 @@
1
+ """
2
+ Abstract base class for LLM service providers.
3
+
4
+ This module defines the interface that all LLM providers must implement,
5
+ allowing the application to work with different LLM backends seamlessly.
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ from typing import List, Dict, Optional, Any
10
+
11
+
12
+ class LLMService(ABC):
13
+ """Abstract base class for LLM service providers."""
14
+
15
+ @abstractmethod
16
+ def get_provider_name(self) -> str:
17
+ """
18
+ Get the name of this LLM provider.
19
+
20
+ Returns:
21
+ Provider name (e.g., 'AWS Bedrock', 'Ollama')
22
+ """
23
+ pass
24
+
25
+ @abstractmethod
26
+ def get_access_info(self) -> str:
27
+ """
28
+ Get access information for this provider.
29
+
30
+ Returns:
31
+ Access information (e.g., 'AWS Bedrock', 'Ollama (http://localhost:11434)')
32
+ """
33
+ pass
34
+
35
+ @abstractmethod
36
+ def list_available_models(self) -> List[Dict[str, Any]]:
37
+ """
38
+ List all available models from this provider.
39
+
40
+ Returns:
41
+ List of model dictionaries with standard keys:
42
+ - id: str - Unique model identifier
43
+ - name: str - Display name
44
+ - provider: str - Provider name
45
+ - supports_tools: bool - Whether model supports tool calling
46
+ - context_length: int - Maximum context window size
47
+ """
48
+ pass
49
+
50
+ @abstractmethod
51
+ def set_model(self, model_id: str):
52
+ """
53
+ Set the active model for this provider.
54
+
55
+ Args:
56
+ model_id: Model identifier to use
57
+ """
58
+ pass
59
+
60
+ @abstractmethod
61
+ def invoke_model(
62
+ self,
63
+ messages: List[Dict[str, Any]],
64
+ max_tokens: int = 4096,
65
+ temperature: float = 0.7,
66
+ tools: Optional[List[Dict[str, Any]]] = None,
67
+ system: Optional[str] = None,
68
+ max_retries: int = 3
69
+ ) -> Optional[Dict[str, Any]]:
70
+ """
71
+ Invoke the model with a conversation.
72
+
73
+ Args:
74
+ messages: Conversation messages in standard format
75
+ max_tokens: Maximum tokens to generate
76
+ temperature: Sampling temperature (0.0-1.0)
77
+ tools: Optional tool definitions
78
+ system: Optional system prompt
79
+ max_retries: Maximum retry attempts for transient failures
80
+
81
+ Returns:
82
+ Response dictionary with standard format:
83
+ {
84
+ 'content': str or List - Response content
85
+ 'stop_reason': str - Why generation stopped
86
+ 'usage': {
87
+ 'input_tokens': int,
88
+ 'output_tokens': int
89
+ },
90
+ 'tool_use': Optional[List] - Tool calls if any
91
+ }
92
+
93
+ Or error dictionary on failure:
94
+ {
95
+ 'error': True,
96
+ 'error_code': str,
97
+ 'error_message': str,
98
+ 'error_type': str
99
+ }
100
+ """
101
+ pass
102
+
103
+ @abstractmethod
104
+ def supports_streaming(self) -> bool:
105
+ """
106
+ Check if this provider supports streaming responses.
107
+
108
+ Returns:
109
+ True if streaming is supported
110
+ """
111
+ pass
112
+
113
+ @abstractmethod
114
+ def count_tokens(self, text: str) -> int:
115
+ """
116
+ Count tokens in text using provider's tokeniser.
117
+
118
+ Args:
119
+ text: Text to count tokens for
120
+
121
+ Returns:
122
+ Token count
123
+ """
124
+ pass
125
+
126
+ def get_rate_limits(self) -> Dict[str, Any]:
127
+ """
128
+ Get rate limit information for this provider.
129
+
130
+ Returns:
131
+ Dictionary with rate limit information:
132
+ {
133
+ 'input_tokens_per_minute': int or None - Max input tokens per minute
134
+ 'output_tokens_per_minute': int or None - Max output tokens per minute
135
+ 'requests_per_minute': int or None - Max requests per minute
136
+ 'has_limits': bool - Whether this provider has rate limits
137
+ }
138
+
139
+ Default implementation returns no limits (unlimited).
140
+ """
141
+ return {
142
+ 'input_tokens_per_minute': None,
143
+ 'output_tokens_per_minute': None,
144
+ 'requests_per_minute': None,
145
+ 'has_limits': False
146
+ }