isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,551 @@
1
+ """
2
+ Inference Configuration Models
3
+
4
+ Configuration models for inference operations, providing structured configuration
5
+ management for different providers, models, and inference parameters.
6
+ """
7
+
8
+ import logging
9
+ from datetime import datetime, timezone
10
+ from typing import Dict, List, Optional, Any, Union
11
+ from dataclasses import dataclass, field
12
+ from enum import Enum
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ class LoadBalancingStrategy(str, Enum):
17
+ """Load balancing strategy enumeration"""
18
+ ROUND_ROBIN = "round_robin"
19
+ LEAST_CONNECTIONS = "least_connections"
20
+ WEIGHTED_ROUND_ROBIN = "weighted_round_robin"
21
+ FASTEST_RESPONSE = "fastest_response"
22
+ RANDOM = "random"
23
+ STICKY_SESSION = "sticky_session"
24
+
25
+ class RetryStrategy(str, Enum):
26
+ """Retry strategy enumeration"""
27
+ NONE = "none"
28
+ EXPONENTIAL_BACKOFF = "exponential_backoff"
29
+ LINEAR_BACKOFF = "linear_backoff"
30
+ FIXED_INTERVAL = "fixed_interval"
31
+ IMMEDIATE = "immediate"
32
+
33
+ class CachingStrategy(str, Enum):
34
+ """Caching strategy enumeration"""
35
+ NONE = "none"
36
+ LRU = "lru"
37
+ TTL = "ttl"
38
+ SEMANTIC = "semantic"
39
+ PROBABILISTIC = "probabilistic"
40
+
41
+ @dataclass
42
+ class ProviderConfig:
43
+ """
44
+ Provider-specific configuration
45
+
46
+ Contains provider-specific settings, authentication, and limits.
47
+ """
48
+ provider_name: str
49
+ base_url: Optional[str] = None
50
+ api_key: Optional[str] = None
51
+ organization_id: Optional[str] = None
52
+ project_id: Optional[str] = None
53
+ region: Optional[str] = None
54
+ api_version: Optional[str] = None
55
+ timeout_seconds: int = 300
56
+ max_retries: int = 3
57
+ retry_strategy: str = RetryStrategy.EXPONENTIAL_BACKOFF
58
+ rate_limit_rpm: Optional[int] = None # requests per minute
59
+ rate_limit_tpm: Optional[int] = None # tokens per minute
60
+ concurrent_requests: int = 10
61
+ enable_streaming: bool = True
62
+ custom_headers: Optional[Dict[str, str]] = None
63
+ proxy_config: Optional[Dict[str, str]] = None
64
+ ssl_verify: bool = True
65
+ connection_pool_size: int = 100
66
+ keepalive_timeout: int = 30
67
+ user_agent: Optional[str] = None
68
+
69
+ def __post_init__(self):
70
+ if self.custom_headers is None:
71
+ self.custom_headers = {}
72
+ if self.proxy_config is None:
73
+ self.proxy_config = {}
74
+
75
+ @property
76
+ def is_configured(self) -> bool:
77
+ """Check if provider is properly configured"""
78
+ return bool(self.provider_name and (self.api_key or self.base_url))
79
+
80
+ @property
81
+ def has_rate_limits(self) -> bool:
82
+ """Check if rate limits are configured"""
83
+ return self.rate_limit_rpm is not None or self.rate_limit_tpm is not None
84
+
85
+ def get_auth_headers(self) -> Dict[str, str]:
86
+ """Get authentication headers for requests"""
87
+ headers = {}
88
+
89
+ if self.api_key:
90
+ if self.provider_name.lower() == "openai":
91
+ headers["Authorization"] = f"Bearer {self.api_key}"
92
+ elif self.provider_name.lower() == "anthropic":
93
+ headers["x-api-key"] = self.api_key
94
+ elif self.provider_name.lower() == "replicate":
95
+ headers["Authorization"] = f"Token {self.api_key}"
96
+ else:
97
+ headers["Authorization"] = f"Bearer {self.api_key}"
98
+
99
+ if self.organization_id:
100
+ if self.provider_name.lower() == "openai":
101
+ headers["OpenAI-Organization"] = self.organization_id
102
+
103
+ if self.project_id:
104
+ if self.provider_name.lower() == "openai":
105
+ headers["OpenAI-Project"] = self.project_id
106
+
107
+ # Add custom headers
108
+ headers.update(self.custom_headers)
109
+
110
+ return headers
111
+
112
+ def get_request_timeout(self, model_type: str = "llm") -> int:
113
+ """Get appropriate timeout for model type"""
114
+ # Different model types may need different timeouts
115
+ multipliers = {
116
+ "vision": 2.0,
117
+ "image_gen": 5.0,
118
+ "audio": 3.0,
119
+ "embedding": 0.5,
120
+ "llm": 1.0
121
+ }
122
+
123
+ multiplier = multipliers.get(model_type, 1.0)
124
+ return int(self.timeout_seconds * multiplier)
125
+
126
+ def calculate_retry_delay(self, attempt: int) -> float:
127
+ """Calculate retry delay based on strategy"""
128
+ if self.retry_strategy == RetryStrategy.NONE:
129
+ return 0
130
+ elif self.retry_strategy == RetryStrategy.IMMEDIATE:
131
+ return 0
132
+ elif self.retry_strategy == RetryStrategy.FIXED_INTERVAL:
133
+ return 1.0
134
+ elif self.retry_strategy == RetryStrategy.LINEAR_BACKOFF:
135
+ return attempt * 1.0
136
+ elif self.retry_strategy == RetryStrategy.EXPONENTIAL_BACKOFF:
137
+ return min(60, (2 ** attempt) + (attempt * 0.1))
138
+
139
+ return 1.0
140
+
141
+ @dataclass
142
+ class ModelConfig:
143
+ """
144
+ Model-specific configuration
145
+
146
+ Defines model parameters, inference settings, and optimization options.
147
+ """
148
+ model_id: str
149
+ model_name: Optional[str] = None
150
+ model_type: str = "llm" # llm, vision, audio, embedding, etc.
151
+ provider: Optional[str] = None
152
+ endpoint_path: Optional[str] = None
153
+
154
+ # Generation parameters
155
+ temperature: Optional[float] = None
156
+ max_tokens: Optional[int] = None
157
+ top_p: Optional[float] = None
158
+ top_k: Optional[int] = None
159
+ frequency_penalty: Optional[float] = None
160
+ presence_penalty: Optional[float] = None
161
+ stop_sequences: Optional[List[str]] = None
162
+
163
+ # Context and formatting
164
+ context_length: Optional[int] = None
165
+ system_message: Optional[str] = None
166
+ prompt_template: Optional[str] = None
167
+ response_format: Optional[str] = None # "text", "json", "structured"
168
+
169
+ # Performance settings
170
+ batch_size: int = 1
171
+ streaming: bool = False
172
+ use_cache: bool = True
173
+ cache_ttl_seconds: int = 3600
174
+
175
+ # Cost and usage controls
176
+ max_cost_per_request: Optional[float] = None
177
+ max_tokens_per_minute: Optional[int] = None
178
+ priority: int = 5 # 1-10 scale
179
+
180
+ # Advanced settings
181
+ logit_bias: Optional[Dict[str, float]] = None
182
+ seed: Optional[int] = None
183
+ tools: Optional[List[Dict[str, Any]]] = None
184
+ tool_choice: Optional[str] = None
185
+
186
+ def __post_init__(self):
187
+ if self.stop_sequences is None:
188
+ self.stop_sequences = []
189
+ if self.logit_bias is None:
190
+ self.logit_bias = {}
191
+ if self.tools is None:
192
+ self.tools = []
193
+
194
+ @property
195
+ def supports_streaming(self) -> bool:
196
+ """Check if model supports streaming"""
197
+ return self.streaming and self.model_type in ["llm", "vision"]
198
+
199
+ @property
200
+ def supports_tools(self) -> bool:
201
+ """Check if model supports function calling"""
202
+ return bool(self.tools) and self.model_type == "llm"
203
+
204
+ @property
205
+ def estimated_cost_per_1k_tokens(self) -> float:
206
+ """Estimate cost per 1000 tokens (would be provider/model specific)"""
207
+ # This would be loaded from a pricing database in practice
208
+ cost_map = {
209
+ "gpt-4": 0.03,
210
+ "gpt-3.5-turbo": 0.002,
211
+ "claude-3-opus": 0.015,
212
+ "claude-3-sonnet": 0.003,
213
+ "gemini-pro": 0.001
214
+ }
215
+
216
+ # Simple heuristic based on model name
217
+ for model_prefix, cost in cost_map.items():
218
+ if model_prefix in self.model_id.lower():
219
+ return cost
220
+
221
+ return 0.01 # Default estimate
222
+
223
+ def estimate_request_cost(self, input_tokens: int, output_tokens: int = 0) -> float:
224
+ """Estimate cost for a request"""
225
+ total_tokens = input_tokens + output_tokens
226
+ cost_per_1k = self.estimated_cost_per_1k_tokens
227
+ return (total_tokens / 1000) * cost_per_1k
228
+
229
+ def validate_parameters(self) -> List[str]:
230
+ """Validate model parameters"""
231
+ issues = []
232
+
233
+ if not self.model_id:
234
+ issues.append("Model ID is required")
235
+
236
+ if self.temperature is not None and (self.temperature < 0 or self.temperature > 2):
237
+ issues.append("Temperature must be between 0 and 2")
238
+
239
+ if self.top_p is not None and (self.top_p < 0 or self.top_p > 1):
240
+ issues.append("Top-p must be between 0 and 1")
241
+
242
+ if self.max_tokens is not None and self.max_tokens < 1:
243
+ issues.append("Max tokens must be positive")
244
+
245
+ if self.batch_size < 1:
246
+ issues.append("Batch size must be at least 1")
247
+
248
+ if self.priority < 1 or self.priority > 10:
249
+ issues.append("Priority must be between 1 and 10")
250
+
251
+ return issues
252
+
253
+ def get_generation_params(self) -> Dict[str, Any]:
254
+ """Get generation parameters for API calls"""
255
+ params = {}
256
+
257
+ if self.temperature is not None:
258
+ params["temperature"] = self.temperature
259
+ if self.max_tokens is not None:
260
+ params["max_tokens"] = self.max_tokens
261
+ if self.top_p is not None:
262
+ params["top_p"] = self.top_p
263
+ if self.top_k is not None:
264
+ params["top_k"] = self.top_k
265
+ if self.frequency_penalty is not None:
266
+ params["frequency_penalty"] = self.frequency_penalty
267
+ if self.presence_penalty is not None:
268
+ params["presence_penalty"] = self.presence_penalty
269
+ if self.stop_sequences:
270
+ params["stop"] = self.stop_sequences
271
+ if self.seed is not None:
272
+ params["seed"] = self.seed
273
+ if self.logit_bias:
274
+ params["logit_bias"] = self.logit_bias
275
+ if self.tools:
276
+ params["tools"] = self.tools
277
+ if self.tool_choice:
278
+ params["tool_choice"] = self.tool_choice
279
+
280
+ return params
281
+
282
+ def update_from_request(self, request_params: Dict[str, Any]):
283
+ """Update config from request parameters"""
284
+ for key, value in request_params.items():
285
+ if hasattr(self, key) and value is not None:
286
+ setattr(self, key, value)
287
+
288
+ @dataclass
289
+ class InferenceConfig:
290
+ """
291
+ Complete inference configuration
292
+
293
+ Combines provider, model, and execution settings for inference operations.
294
+ """
295
+ config_id: Optional[str] = None
296
+ config_name: Optional[str] = None
297
+ provider_config: Optional[ProviderConfig] = None
298
+ model_config: Optional[ModelConfig] = None
299
+
300
+ # Load balancing and failover
301
+ load_balancing: str = LoadBalancingStrategy.ROUND_ROBIN
302
+ failover_providers: Optional[List[str]] = None
303
+ failover_models: Optional[List[str]] = None
304
+ auto_fallback: bool = True
305
+
306
+ # Caching configuration
307
+ caching_strategy: str = CachingStrategy.LRU
308
+ cache_size_mb: int = 1024
309
+ cache_ttl_seconds: int = 3600
310
+ semantic_cache_threshold: float = 0.95
311
+
312
+ # Queue and throttling
313
+ queue_max_size: int = 1000
314
+ queue_timeout_seconds: int = 300
315
+ throttle_requests_per_second: Optional[float] = None
316
+
317
+ # Monitoring and logging
318
+ enable_metrics: bool = True
319
+ enable_detailed_logging: bool = False
320
+ log_request_data: bool = False
321
+ log_response_data: bool = False
322
+ track_token_usage: bool = True
323
+
324
+ # Security settings
325
+ input_sanitization: bool = True
326
+ output_filtering: bool = False
327
+ content_moderation: bool = False
328
+ pii_detection: bool = False
329
+
330
+ # Optimization settings
331
+ batch_processing: bool = False
332
+ connection_pooling: bool = True
333
+ request_compression: bool = True
334
+ response_compression: bool = True
335
+
336
+ created_at: datetime = None
337
+ updated_at: datetime = None
338
+ created_by: Optional[str] = None
339
+ is_active: bool = True
340
+ tags: Optional[Dict[str, str]] = None
341
+
342
+ def __post_init__(self):
343
+ if self.created_at is None:
344
+ self.created_at = datetime.now(timezone.utc)
345
+ if self.updated_at is None:
346
+ self.updated_at = self.created_at
347
+ if self.failover_providers is None:
348
+ self.failover_providers = []
349
+ if self.failover_models is None:
350
+ self.failover_models = []
351
+ if self.tags is None:
352
+ self.tags = {}
353
+
354
+ @property
355
+ def primary_provider(self) -> Optional[str]:
356
+ """Get primary provider name"""
357
+ return self.provider_config.provider_name if self.provider_config else None
358
+
359
+ @property
360
+ def primary_model(self) -> Optional[str]:
361
+ """Get primary model ID"""
362
+ return self.model_config.model_id if self.model_config else None
363
+
364
+ @property
365
+ def has_failover(self) -> bool:
366
+ """Check if failover is configured"""
367
+ return bool(self.failover_providers or self.failover_models)
368
+
369
+ @property
370
+ def supports_batching(self) -> bool:
371
+ """Check if batching is enabled and supported"""
372
+ return (self.batch_processing and
373
+ self.model_config and
374
+ self.model_config.batch_size > 1)
375
+
376
+ @property
377
+ def cache_enabled(self) -> bool:
378
+ """Check if caching is enabled"""
379
+ return self.caching_strategy != CachingStrategy.NONE
380
+
381
+ def validate(self) -> List[str]:
382
+ """Validate complete configuration"""
383
+ issues = []
384
+
385
+ if not self.provider_config:
386
+ issues.append("Provider configuration is required")
387
+ elif not self.provider_config.is_configured:
388
+ issues.append("Provider configuration is incomplete")
389
+
390
+ if not self.model_config:
391
+ issues.append("Model configuration is required")
392
+ else:
393
+ issues.extend([f"Model: {issue}" for issue in self.model_config.validate_parameters()])
394
+
395
+ if self.cache_size_mb < 1:
396
+ issues.append("Cache size must be at least 1 MB")
397
+
398
+ if self.queue_max_size < 1:
399
+ issues.append("Queue max size must be positive")
400
+
401
+ if self.throttle_requests_per_second is not None and self.throttle_requests_per_second <= 0:
402
+ issues.append("Throttle rate must be positive")
403
+
404
+ return issues
405
+
406
+ def get_effective_timeout(self) -> int:
407
+ """Get effective timeout considering provider and model settings"""
408
+ if not self.provider_config or not self.model_config:
409
+ return 300 # Default 5 minutes
410
+
411
+ return self.provider_config.get_request_timeout(self.model_config.model_type)
412
+
413
+ def get_cache_key(self, request_data: Dict[str, Any]) -> str:
414
+ """Generate cache key for request"""
415
+ import hashlib
416
+ import json
417
+
418
+ # Include model and key request parameters in cache key
419
+ cache_data = {
420
+ "model_id": self.primary_model,
421
+ "provider": self.primary_provider,
422
+ "request": request_data
423
+ }
424
+
425
+ # Add relevant model parameters
426
+ if self.model_config:
427
+ for param in ["temperature", "max_tokens", "top_p", "top_k"]:
428
+ value = getattr(self.model_config, param, None)
429
+ if value is not None:
430
+ cache_data[param] = value
431
+
432
+ cache_str = json.dumps(cache_data, sort_keys=True)
433
+ return hashlib.sha256(cache_str.encode()).hexdigest()[:32]
434
+
435
+ def should_use_failover(self, error_type: str, attempt_count: int) -> bool:
436
+ """Determine if failover should be used"""
437
+ if not self.auto_fallback or not self.has_failover:
438
+ return False
439
+
440
+ # Failover conditions
441
+ failover_errors = ["timeout", "rate_limit", "server_error", "model_error"]
442
+ max_attempts = 3
443
+
444
+ return (error_type in failover_errors and
445
+ attempt_count >= max_attempts)
446
+
447
+ def get_next_failover_option(self, current_provider: str, current_model: str) -> Optional[Dict[str, str]]:
448
+ """Get next failover provider/model combination"""
449
+ # Simple round-robin failover
450
+ if self.failover_providers:
451
+ try:
452
+ current_index = self.failover_providers.index(current_provider)
453
+ next_index = (current_index + 1) % len(self.failover_providers)
454
+ return {"provider": self.failover_providers[next_index], "model": current_model}
455
+ except ValueError:
456
+ if self.failover_providers:
457
+ return {"provider": self.failover_providers[0], "model": current_model}
458
+
459
+ if self.failover_models:
460
+ try:
461
+ current_index = self.failover_models.index(current_model)
462
+ next_index = (current_index + 1) % len(self.failover_models)
463
+ return {"provider": current_provider, "model": self.failover_models[next_index]}
464
+ except ValueError:
465
+ if self.failover_models:
466
+ return {"provider": current_provider, "model": self.failover_models[0]}
467
+
468
+ return None
469
+
470
+ def merge_with(self, other: 'InferenceConfig') -> 'InferenceConfig':
471
+ """Merge this configuration with another"""
472
+ merged = InferenceConfig()
473
+
474
+ # Copy all fields from self
475
+ for field_name in self.__dataclass_fields__:
476
+ setattr(merged, field_name, getattr(self, field_name))
477
+
478
+ # Override with non-None values from other
479
+ for field_name in other.__dataclass_fields__:
480
+ other_value = getattr(other, field_name)
481
+ if other_value is not None:
482
+ setattr(merged, field_name, other_value)
483
+
484
+ merged.updated_at = datetime.now(timezone.utc)
485
+ return merged
486
+
487
+ # Factory functions for common configurations
488
+
489
+ def create_openai_config(
490
+ api_key: str,
491
+ model_id: str = "gpt-3.5-turbo",
492
+ temperature: float = 0.7,
493
+ max_tokens: int = 1000
494
+ ) -> InferenceConfig:
495
+ """Create OpenAI inference configuration"""
496
+ return InferenceConfig(
497
+ provider_config=ProviderConfig(
498
+ provider_name="openai",
499
+ base_url="https://api.openai.com/v1",
500
+ api_key=api_key,
501
+ timeout_seconds=120,
502
+ max_retries=3
503
+ ),
504
+ model_config=ModelConfig(
505
+ model_id=model_id,
506
+ model_type="llm",
507
+ temperature=temperature,
508
+ max_tokens=max_tokens,
509
+ streaming=True
510
+ )
511
+ )
512
+
513
+ def create_anthropic_config(
514
+ api_key: str,
515
+ model_id: str = "claude-3-sonnet-20240229",
516
+ max_tokens: int = 1000
517
+ ) -> InferenceConfig:
518
+ """Create Anthropic inference configuration"""
519
+ return InferenceConfig(
520
+ provider_config=ProviderConfig(
521
+ provider_name="anthropic",
522
+ base_url="https://api.anthropic.com",
523
+ api_key=api_key,
524
+ api_version="2023-06-01",
525
+ timeout_seconds=180
526
+ ),
527
+ model_config=ModelConfig(
528
+ model_id=model_id,
529
+ model_type="llm",
530
+ max_tokens=max_tokens
531
+ )
532
+ )
533
+
534
+ def create_multi_provider_config(
535
+ configs: List[InferenceConfig],
536
+ load_balancing: str = LoadBalancingStrategy.ROUND_ROBIN
537
+ ) -> InferenceConfig:
538
+ """Create multi-provider configuration with failover"""
539
+ if not configs:
540
+ raise ValueError("At least one configuration is required")
541
+
542
+ primary = configs[0]
543
+ failover_providers = [config.primary_provider for config in configs[1:] if config.primary_provider]
544
+
545
+ return InferenceConfig(
546
+ provider_config=primary.provider_config,
547
+ model_config=primary.model_config,
548
+ load_balancing=load_balancing,
549
+ failover_providers=failover_providers,
550
+ auto_fallback=True
551
+ )