isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,551 @@
|
|
1
|
+
"""
|
2
|
+
Inference Configuration Models
|
3
|
+
|
4
|
+
Configuration models for inference operations, providing structured configuration
|
5
|
+
management for different providers, models, and inference parameters.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
from datetime import datetime, timezone
|
10
|
+
from typing import Dict, List, Optional, Any, Union
|
11
|
+
from dataclasses import dataclass, field
|
12
|
+
from enum import Enum
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
class LoadBalancingStrategy(str, Enum):
|
17
|
+
"""Load balancing strategy enumeration"""
|
18
|
+
ROUND_ROBIN = "round_robin"
|
19
|
+
LEAST_CONNECTIONS = "least_connections"
|
20
|
+
WEIGHTED_ROUND_ROBIN = "weighted_round_robin"
|
21
|
+
FASTEST_RESPONSE = "fastest_response"
|
22
|
+
RANDOM = "random"
|
23
|
+
STICKY_SESSION = "sticky_session"
|
24
|
+
|
25
|
+
class RetryStrategy(str, Enum):
|
26
|
+
"""Retry strategy enumeration"""
|
27
|
+
NONE = "none"
|
28
|
+
EXPONENTIAL_BACKOFF = "exponential_backoff"
|
29
|
+
LINEAR_BACKOFF = "linear_backoff"
|
30
|
+
FIXED_INTERVAL = "fixed_interval"
|
31
|
+
IMMEDIATE = "immediate"
|
32
|
+
|
33
|
+
class CachingStrategy(str, Enum):
|
34
|
+
"""Caching strategy enumeration"""
|
35
|
+
NONE = "none"
|
36
|
+
LRU = "lru"
|
37
|
+
TTL = "ttl"
|
38
|
+
SEMANTIC = "semantic"
|
39
|
+
PROBABILISTIC = "probabilistic"
|
40
|
+
|
41
|
+
@dataclass
|
42
|
+
class ProviderConfig:
|
43
|
+
"""
|
44
|
+
Provider-specific configuration
|
45
|
+
|
46
|
+
Contains provider-specific settings, authentication, and limits.
|
47
|
+
"""
|
48
|
+
provider_name: str
|
49
|
+
base_url: Optional[str] = None
|
50
|
+
api_key: Optional[str] = None
|
51
|
+
organization_id: Optional[str] = None
|
52
|
+
project_id: Optional[str] = None
|
53
|
+
region: Optional[str] = None
|
54
|
+
api_version: Optional[str] = None
|
55
|
+
timeout_seconds: int = 300
|
56
|
+
max_retries: int = 3
|
57
|
+
retry_strategy: str = RetryStrategy.EXPONENTIAL_BACKOFF
|
58
|
+
rate_limit_rpm: Optional[int] = None # requests per minute
|
59
|
+
rate_limit_tpm: Optional[int] = None # tokens per minute
|
60
|
+
concurrent_requests: int = 10
|
61
|
+
enable_streaming: bool = True
|
62
|
+
custom_headers: Optional[Dict[str, str]] = None
|
63
|
+
proxy_config: Optional[Dict[str, str]] = None
|
64
|
+
ssl_verify: bool = True
|
65
|
+
connection_pool_size: int = 100
|
66
|
+
keepalive_timeout: int = 30
|
67
|
+
user_agent: Optional[str] = None
|
68
|
+
|
69
|
+
def __post_init__(self):
|
70
|
+
if self.custom_headers is None:
|
71
|
+
self.custom_headers = {}
|
72
|
+
if self.proxy_config is None:
|
73
|
+
self.proxy_config = {}
|
74
|
+
|
75
|
+
@property
|
76
|
+
def is_configured(self) -> bool:
|
77
|
+
"""Check if provider is properly configured"""
|
78
|
+
return bool(self.provider_name and (self.api_key or self.base_url))
|
79
|
+
|
80
|
+
@property
|
81
|
+
def has_rate_limits(self) -> bool:
|
82
|
+
"""Check if rate limits are configured"""
|
83
|
+
return self.rate_limit_rpm is not None or self.rate_limit_tpm is not None
|
84
|
+
|
85
|
+
def get_auth_headers(self) -> Dict[str, str]:
|
86
|
+
"""Get authentication headers for requests"""
|
87
|
+
headers = {}
|
88
|
+
|
89
|
+
if self.api_key:
|
90
|
+
if self.provider_name.lower() == "openai":
|
91
|
+
headers["Authorization"] = f"Bearer {self.api_key}"
|
92
|
+
elif self.provider_name.lower() == "anthropic":
|
93
|
+
headers["x-api-key"] = self.api_key
|
94
|
+
elif self.provider_name.lower() == "replicate":
|
95
|
+
headers["Authorization"] = f"Token {self.api_key}"
|
96
|
+
else:
|
97
|
+
headers["Authorization"] = f"Bearer {self.api_key}"
|
98
|
+
|
99
|
+
if self.organization_id:
|
100
|
+
if self.provider_name.lower() == "openai":
|
101
|
+
headers["OpenAI-Organization"] = self.organization_id
|
102
|
+
|
103
|
+
if self.project_id:
|
104
|
+
if self.provider_name.lower() == "openai":
|
105
|
+
headers["OpenAI-Project"] = self.project_id
|
106
|
+
|
107
|
+
# Add custom headers
|
108
|
+
headers.update(self.custom_headers)
|
109
|
+
|
110
|
+
return headers
|
111
|
+
|
112
|
+
def get_request_timeout(self, model_type: str = "llm") -> int:
|
113
|
+
"""Get appropriate timeout for model type"""
|
114
|
+
# Different model types may need different timeouts
|
115
|
+
multipliers = {
|
116
|
+
"vision": 2.0,
|
117
|
+
"image_gen": 5.0,
|
118
|
+
"audio": 3.0,
|
119
|
+
"embedding": 0.5,
|
120
|
+
"llm": 1.0
|
121
|
+
}
|
122
|
+
|
123
|
+
multiplier = multipliers.get(model_type, 1.0)
|
124
|
+
return int(self.timeout_seconds * multiplier)
|
125
|
+
|
126
|
+
def calculate_retry_delay(self, attempt: int) -> float:
|
127
|
+
"""Calculate retry delay based on strategy"""
|
128
|
+
if self.retry_strategy == RetryStrategy.NONE:
|
129
|
+
return 0
|
130
|
+
elif self.retry_strategy == RetryStrategy.IMMEDIATE:
|
131
|
+
return 0
|
132
|
+
elif self.retry_strategy == RetryStrategy.FIXED_INTERVAL:
|
133
|
+
return 1.0
|
134
|
+
elif self.retry_strategy == RetryStrategy.LINEAR_BACKOFF:
|
135
|
+
return attempt * 1.0
|
136
|
+
elif self.retry_strategy == RetryStrategy.EXPONENTIAL_BACKOFF:
|
137
|
+
return min(60, (2 ** attempt) + (attempt * 0.1))
|
138
|
+
|
139
|
+
return 1.0
|
140
|
+
|
141
|
+
@dataclass
|
142
|
+
class ModelConfig:
|
143
|
+
"""
|
144
|
+
Model-specific configuration
|
145
|
+
|
146
|
+
Defines model parameters, inference settings, and optimization options.
|
147
|
+
"""
|
148
|
+
model_id: str
|
149
|
+
model_name: Optional[str] = None
|
150
|
+
model_type: str = "llm" # llm, vision, audio, embedding, etc.
|
151
|
+
provider: Optional[str] = None
|
152
|
+
endpoint_path: Optional[str] = None
|
153
|
+
|
154
|
+
# Generation parameters
|
155
|
+
temperature: Optional[float] = None
|
156
|
+
max_tokens: Optional[int] = None
|
157
|
+
top_p: Optional[float] = None
|
158
|
+
top_k: Optional[int] = None
|
159
|
+
frequency_penalty: Optional[float] = None
|
160
|
+
presence_penalty: Optional[float] = None
|
161
|
+
stop_sequences: Optional[List[str]] = None
|
162
|
+
|
163
|
+
# Context and formatting
|
164
|
+
context_length: Optional[int] = None
|
165
|
+
system_message: Optional[str] = None
|
166
|
+
prompt_template: Optional[str] = None
|
167
|
+
response_format: Optional[str] = None # "text", "json", "structured"
|
168
|
+
|
169
|
+
# Performance settings
|
170
|
+
batch_size: int = 1
|
171
|
+
streaming: bool = False
|
172
|
+
use_cache: bool = True
|
173
|
+
cache_ttl_seconds: int = 3600
|
174
|
+
|
175
|
+
# Cost and usage controls
|
176
|
+
max_cost_per_request: Optional[float] = None
|
177
|
+
max_tokens_per_minute: Optional[int] = None
|
178
|
+
priority: int = 5 # 1-10 scale
|
179
|
+
|
180
|
+
# Advanced settings
|
181
|
+
logit_bias: Optional[Dict[str, float]] = None
|
182
|
+
seed: Optional[int] = None
|
183
|
+
tools: Optional[List[Dict[str, Any]]] = None
|
184
|
+
tool_choice: Optional[str] = None
|
185
|
+
|
186
|
+
def __post_init__(self):
|
187
|
+
if self.stop_sequences is None:
|
188
|
+
self.stop_sequences = []
|
189
|
+
if self.logit_bias is None:
|
190
|
+
self.logit_bias = {}
|
191
|
+
if self.tools is None:
|
192
|
+
self.tools = []
|
193
|
+
|
194
|
+
@property
|
195
|
+
def supports_streaming(self) -> bool:
|
196
|
+
"""Check if model supports streaming"""
|
197
|
+
return self.streaming and self.model_type in ["llm", "vision"]
|
198
|
+
|
199
|
+
@property
|
200
|
+
def supports_tools(self) -> bool:
|
201
|
+
"""Check if model supports function calling"""
|
202
|
+
return bool(self.tools) and self.model_type == "llm"
|
203
|
+
|
204
|
+
@property
|
205
|
+
def estimated_cost_per_1k_tokens(self) -> float:
|
206
|
+
"""Estimate cost per 1000 tokens (would be provider/model specific)"""
|
207
|
+
# This would be loaded from a pricing database in practice
|
208
|
+
cost_map = {
|
209
|
+
"gpt-4": 0.03,
|
210
|
+
"gpt-3.5-turbo": 0.002,
|
211
|
+
"claude-3-opus": 0.015,
|
212
|
+
"claude-3-sonnet": 0.003,
|
213
|
+
"gemini-pro": 0.001
|
214
|
+
}
|
215
|
+
|
216
|
+
# Simple heuristic based on model name
|
217
|
+
for model_prefix, cost in cost_map.items():
|
218
|
+
if model_prefix in self.model_id.lower():
|
219
|
+
return cost
|
220
|
+
|
221
|
+
return 0.01 # Default estimate
|
222
|
+
|
223
|
+
def estimate_request_cost(self, input_tokens: int, output_tokens: int = 0) -> float:
|
224
|
+
"""Estimate cost for a request"""
|
225
|
+
total_tokens = input_tokens + output_tokens
|
226
|
+
cost_per_1k = self.estimated_cost_per_1k_tokens
|
227
|
+
return (total_tokens / 1000) * cost_per_1k
|
228
|
+
|
229
|
+
def validate_parameters(self) -> List[str]:
|
230
|
+
"""Validate model parameters"""
|
231
|
+
issues = []
|
232
|
+
|
233
|
+
if not self.model_id:
|
234
|
+
issues.append("Model ID is required")
|
235
|
+
|
236
|
+
if self.temperature is not None and (self.temperature < 0 or self.temperature > 2):
|
237
|
+
issues.append("Temperature must be between 0 and 2")
|
238
|
+
|
239
|
+
if self.top_p is not None and (self.top_p < 0 or self.top_p > 1):
|
240
|
+
issues.append("Top-p must be between 0 and 1")
|
241
|
+
|
242
|
+
if self.max_tokens is not None and self.max_tokens < 1:
|
243
|
+
issues.append("Max tokens must be positive")
|
244
|
+
|
245
|
+
if self.batch_size < 1:
|
246
|
+
issues.append("Batch size must be at least 1")
|
247
|
+
|
248
|
+
if self.priority < 1 or self.priority > 10:
|
249
|
+
issues.append("Priority must be between 1 and 10")
|
250
|
+
|
251
|
+
return issues
|
252
|
+
|
253
|
+
def get_generation_params(self) -> Dict[str, Any]:
|
254
|
+
"""Get generation parameters for API calls"""
|
255
|
+
params = {}
|
256
|
+
|
257
|
+
if self.temperature is not None:
|
258
|
+
params["temperature"] = self.temperature
|
259
|
+
if self.max_tokens is not None:
|
260
|
+
params["max_tokens"] = self.max_tokens
|
261
|
+
if self.top_p is not None:
|
262
|
+
params["top_p"] = self.top_p
|
263
|
+
if self.top_k is not None:
|
264
|
+
params["top_k"] = self.top_k
|
265
|
+
if self.frequency_penalty is not None:
|
266
|
+
params["frequency_penalty"] = self.frequency_penalty
|
267
|
+
if self.presence_penalty is not None:
|
268
|
+
params["presence_penalty"] = self.presence_penalty
|
269
|
+
if self.stop_sequences:
|
270
|
+
params["stop"] = self.stop_sequences
|
271
|
+
if self.seed is not None:
|
272
|
+
params["seed"] = self.seed
|
273
|
+
if self.logit_bias:
|
274
|
+
params["logit_bias"] = self.logit_bias
|
275
|
+
if self.tools:
|
276
|
+
params["tools"] = self.tools
|
277
|
+
if self.tool_choice:
|
278
|
+
params["tool_choice"] = self.tool_choice
|
279
|
+
|
280
|
+
return params
|
281
|
+
|
282
|
+
def update_from_request(self, request_params: Dict[str, Any]):
|
283
|
+
"""Update config from request parameters"""
|
284
|
+
for key, value in request_params.items():
|
285
|
+
if hasattr(self, key) and value is not None:
|
286
|
+
setattr(self, key, value)
|
287
|
+
|
288
|
+
@dataclass
|
289
|
+
class InferenceConfig:
|
290
|
+
"""
|
291
|
+
Complete inference configuration
|
292
|
+
|
293
|
+
Combines provider, model, and execution settings for inference operations.
|
294
|
+
"""
|
295
|
+
config_id: Optional[str] = None
|
296
|
+
config_name: Optional[str] = None
|
297
|
+
provider_config: Optional[ProviderConfig] = None
|
298
|
+
model_config: Optional[ModelConfig] = None
|
299
|
+
|
300
|
+
# Load balancing and failover
|
301
|
+
load_balancing: str = LoadBalancingStrategy.ROUND_ROBIN
|
302
|
+
failover_providers: Optional[List[str]] = None
|
303
|
+
failover_models: Optional[List[str]] = None
|
304
|
+
auto_fallback: bool = True
|
305
|
+
|
306
|
+
# Caching configuration
|
307
|
+
caching_strategy: str = CachingStrategy.LRU
|
308
|
+
cache_size_mb: int = 1024
|
309
|
+
cache_ttl_seconds: int = 3600
|
310
|
+
semantic_cache_threshold: float = 0.95
|
311
|
+
|
312
|
+
# Queue and throttling
|
313
|
+
queue_max_size: int = 1000
|
314
|
+
queue_timeout_seconds: int = 300
|
315
|
+
throttle_requests_per_second: Optional[float] = None
|
316
|
+
|
317
|
+
# Monitoring and logging
|
318
|
+
enable_metrics: bool = True
|
319
|
+
enable_detailed_logging: bool = False
|
320
|
+
log_request_data: bool = False
|
321
|
+
log_response_data: bool = False
|
322
|
+
track_token_usage: bool = True
|
323
|
+
|
324
|
+
# Security settings
|
325
|
+
input_sanitization: bool = True
|
326
|
+
output_filtering: bool = False
|
327
|
+
content_moderation: bool = False
|
328
|
+
pii_detection: bool = False
|
329
|
+
|
330
|
+
# Optimization settings
|
331
|
+
batch_processing: bool = False
|
332
|
+
connection_pooling: bool = True
|
333
|
+
request_compression: bool = True
|
334
|
+
response_compression: bool = True
|
335
|
+
|
336
|
+
created_at: datetime = None
|
337
|
+
updated_at: datetime = None
|
338
|
+
created_by: Optional[str] = None
|
339
|
+
is_active: bool = True
|
340
|
+
tags: Optional[Dict[str, str]] = None
|
341
|
+
|
342
|
+
def __post_init__(self):
|
343
|
+
if self.created_at is None:
|
344
|
+
self.created_at = datetime.now(timezone.utc)
|
345
|
+
if self.updated_at is None:
|
346
|
+
self.updated_at = self.created_at
|
347
|
+
if self.failover_providers is None:
|
348
|
+
self.failover_providers = []
|
349
|
+
if self.failover_models is None:
|
350
|
+
self.failover_models = []
|
351
|
+
if self.tags is None:
|
352
|
+
self.tags = {}
|
353
|
+
|
354
|
+
@property
|
355
|
+
def primary_provider(self) -> Optional[str]:
|
356
|
+
"""Get primary provider name"""
|
357
|
+
return self.provider_config.provider_name if self.provider_config else None
|
358
|
+
|
359
|
+
@property
|
360
|
+
def primary_model(self) -> Optional[str]:
|
361
|
+
"""Get primary model ID"""
|
362
|
+
return self.model_config.model_id if self.model_config else None
|
363
|
+
|
364
|
+
@property
|
365
|
+
def has_failover(self) -> bool:
|
366
|
+
"""Check if failover is configured"""
|
367
|
+
return bool(self.failover_providers or self.failover_models)
|
368
|
+
|
369
|
+
@property
|
370
|
+
def supports_batching(self) -> bool:
|
371
|
+
"""Check if batching is enabled and supported"""
|
372
|
+
return (self.batch_processing and
|
373
|
+
self.model_config and
|
374
|
+
self.model_config.batch_size > 1)
|
375
|
+
|
376
|
+
@property
|
377
|
+
def cache_enabled(self) -> bool:
|
378
|
+
"""Check if caching is enabled"""
|
379
|
+
return self.caching_strategy != CachingStrategy.NONE
|
380
|
+
|
381
|
+
def validate(self) -> List[str]:
|
382
|
+
"""Validate complete configuration"""
|
383
|
+
issues = []
|
384
|
+
|
385
|
+
if not self.provider_config:
|
386
|
+
issues.append("Provider configuration is required")
|
387
|
+
elif not self.provider_config.is_configured:
|
388
|
+
issues.append("Provider configuration is incomplete")
|
389
|
+
|
390
|
+
if not self.model_config:
|
391
|
+
issues.append("Model configuration is required")
|
392
|
+
else:
|
393
|
+
issues.extend([f"Model: {issue}" for issue in self.model_config.validate_parameters()])
|
394
|
+
|
395
|
+
if self.cache_size_mb < 1:
|
396
|
+
issues.append("Cache size must be at least 1 MB")
|
397
|
+
|
398
|
+
if self.queue_max_size < 1:
|
399
|
+
issues.append("Queue max size must be positive")
|
400
|
+
|
401
|
+
if self.throttle_requests_per_second is not None and self.throttle_requests_per_second <= 0:
|
402
|
+
issues.append("Throttle rate must be positive")
|
403
|
+
|
404
|
+
return issues
|
405
|
+
|
406
|
+
def get_effective_timeout(self) -> int:
|
407
|
+
"""Get effective timeout considering provider and model settings"""
|
408
|
+
if not self.provider_config or not self.model_config:
|
409
|
+
return 300 # Default 5 minutes
|
410
|
+
|
411
|
+
return self.provider_config.get_request_timeout(self.model_config.model_type)
|
412
|
+
|
413
|
+
def get_cache_key(self, request_data: Dict[str, Any]) -> str:
|
414
|
+
"""Generate cache key for request"""
|
415
|
+
import hashlib
|
416
|
+
import json
|
417
|
+
|
418
|
+
# Include model and key request parameters in cache key
|
419
|
+
cache_data = {
|
420
|
+
"model_id": self.primary_model,
|
421
|
+
"provider": self.primary_provider,
|
422
|
+
"request": request_data
|
423
|
+
}
|
424
|
+
|
425
|
+
# Add relevant model parameters
|
426
|
+
if self.model_config:
|
427
|
+
for param in ["temperature", "max_tokens", "top_p", "top_k"]:
|
428
|
+
value = getattr(self.model_config, param, None)
|
429
|
+
if value is not None:
|
430
|
+
cache_data[param] = value
|
431
|
+
|
432
|
+
cache_str = json.dumps(cache_data, sort_keys=True)
|
433
|
+
return hashlib.sha256(cache_str.encode()).hexdigest()[:32]
|
434
|
+
|
435
|
+
def should_use_failover(self, error_type: str, attempt_count: int) -> bool:
|
436
|
+
"""Determine if failover should be used"""
|
437
|
+
if not self.auto_fallback or not self.has_failover:
|
438
|
+
return False
|
439
|
+
|
440
|
+
# Failover conditions
|
441
|
+
failover_errors = ["timeout", "rate_limit", "server_error", "model_error"]
|
442
|
+
max_attempts = 3
|
443
|
+
|
444
|
+
return (error_type in failover_errors and
|
445
|
+
attempt_count >= max_attempts)
|
446
|
+
|
447
|
+
def get_next_failover_option(self, current_provider: str, current_model: str) -> Optional[Dict[str, str]]:
|
448
|
+
"""Get next failover provider/model combination"""
|
449
|
+
# Simple round-robin failover
|
450
|
+
if self.failover_providers:
|
451
|
+
try:
|
452
|
+
current_index = self.failover_providers.index(current_provider)
|
453
|
+
next_index = (current_index + 1) % len(self.failover_providers)
|
454
|
+
return {"provider": self.failover_providers[next_index], "model": current_model}
|
455
|
+
except ValueError:
|
456
|
+
if self.failover_providers:
|
457
|
+
return {"provider": self.failover_providers[0], "model": current_model}
|
458
|
+
|
459
|
+
if self.failover_models:
|
460
|
+
try:
|
461
|
+
current_index = self.failover_models.index(current_model)
|
462
|
+
next_index = (current_index + 1) % len(self.failover_models)
|
463
|
+
return {"provider": current_provider, "model": self.failover_models[next_index]}
|
464
|
+
except ValueError:
|
465
|
+
if self.failover_models:
|
466
|
+
return {"provider": current_provider, "model": self.failover_models[0]}
|
467
|
+
|
468
|
+
return None
|
469
|
+
|
470
|
+
def merge_with(self, other: 'InferenceConfig') -> 'InferenceConfig':
|
471
|
+
"""Merge this configuration with another"""
|
472
|
+
merged = InferenceConfig()
|
473
|
+
|
474
|
+
# Copy all fields from self
|
475
|
+
for field_name in self.__dataclass_fields__:
|
476
|
+
setattr(merged, field_name, getattr(self, field_name))
|
477
|
+
|
478
|
+
# Override with non-None values from other
|
479
|
+
for field_name in other.__dataclass_fields__:
|
480
|
+
other_value = getattr(other, field_name)
|
481
|
+
if other_value is not None:
|
482
|
+
setattr(merged, field_name, other_value)
|
483
|
+
|
484
|
+
merged.updated_at = datetime.now(timezone.utc)
|
485
|
+
return merged
|
486
|
+
|
487
|
+
# Factory functions for common configurations
|
488
|
+
|
489
|
+
def create_openai_config(
|
490
|
+
api_key: str,
|
491
|
+
model_id: str = "gpt-3.5-turbo",
|
492
|
+
temperature: float = 0.7,
|
493
|
+
max_tokens: int = 1000
|
494
|
+
) -> InferenceConfig:
|
495
|
+
"""Create OpenAI inference configuration"""
|
496
|
+
return InferenceConfig(
|
497
|
+
provider_config=ProviderConfig(
|
498
|
+
provider_name="openai",
|
499
|
+
base_url="https://api.openai.com/v1",
|
500
|
+
api_key=api_key,
|
501
|
+
timeout_seconds=120,
|
502
|
+
max_retries=3
|
503
|
+
),
|
504
|
+
model_config=ModelConfig(
|
505
|
+
model_id=model_id,
|
506
|
+
model_type="llm",
|
507
|
+
temperature=temperature,
|
508
|
+
max_tokens=max_tokens,
|
509
|
+
streaming=True
|
510
|
+
)
|
511
|
+
)
|
512
|
+
|
513
|
+
def create_anthropic_config(
|
514
|
+
api_key: str,
|
515
|
+
model_id: str = "claude-3-sonnet-20240229",
|
516
|
+
max_tokens: int = 1000
|
517
|
+
) -> InferenceConfig:
|
518
|
+
"""Create Anthropic inference configuration"""
|
519
|
+
return InferenceConfig(
|
520
|
+
provider_config=ProviderConfig(
|
521
|
+
provider_name="anthropic",
|
522
|
+
base_url="https://api.anthropic.com",
|
523
|
+
api_key=api_key,
|
524
|
+
api_version="2023-06-01",
|
525
|
+
timeout_seconds=180
|
526
|
+
),
|
527
|
+
model_config=ModelConfig(
|
528
|
+
model_id=model_id,
|
529
|
+
model_type="llm",
|
530
|
+
max_tokens=max_tokens
|
531
|
+
)
|
532
|
+
)
|
533
|
+
|
534
|
+
def create_multi_provider_config(
|
535
|
+
configs: List[InferenceConfig],
|
536
|
+
load_balancing: str = LoadBalancingStrategy.ROUND_ROBIN
|
537
|
+
) -> InferenceConfig:
|
538
|
+
"""Create multi-provider configuration with failover"""
|
539
|
+
if not configs:
|
540
|
+
raise ValueError("At least one configuration is required")
|
541
|
+
|
542
|
+
primary = configs[0]
|
543
|
+
failover_providers = [config.primary_provider for config in configs[1:] if config.primary_provider]
|
544
|
+
|
545
|
+
return InferenceConfig(
|
546
|
+
provider_config=primary.provider_config,
|
547
|
+
model_config=primary.model_config,
|
548
|
+
load_balancing=load_balancing,
|
549
|
+
failover_providers=failover_providers,
|
550
|
+
auto_fallback=True
|
551
|
+
)
|