isa-model 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +5 -0
- isa_model/core/model_manager.py +143 -0
- isa_model/core/model_registry.py +115 -0
- isa_model/core/model_router.py +226 -0
- isa_model/core/model_storage.py +133 -0
- isa_model/core/model_version.py +0 -0
- isa_model/core/resource_manager.py +202 -0
- isa_model/core/storage/hf_storage.py +0 -0
- isa_model/core/storage/local_storage.py +0 -0
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +120 -0
- isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +18 -0
- isa_model/deployment/gpu_int8_ds8/app/server.py +66 -0
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +43 -0
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +35 -0
- isa_model/inference/__init__.py +11 -0
- isa_model/inference/adapter/unified_api.py +248 -0
- isa_model/inference/ai_factory.py +359 -0
- isa_model/inference/base.py +46 -0
- isa_model/inference/providers/__init__.py +19 -0
- isa_model/inference/providers/base_provider.py +30 -0
- isa_model/inference/providers/model_cache_manager.py +341 -0
- isa_model/inference/providers/ollama_provider.py +73 -0
- isa_model/inference/providers/openai_provider.py +101 -0
- isa_model/inference/providers/replicate_provider.py +107 -0
- isa_model/inference/providers/triton_provider.py +439 -0
- isa_model/inference/services/__init__.py +14 -0
- isa_model/inference/services/audio/base_stt_service.py +91 -0
- isa_model/inference/services/audio/base_tts_service.py +136 -0
- isa_model/inference/services/audio/openai_tts_service.py +71 -0
- isa_model/inference/services/base_service.py +106 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +97 -0
- isa_model/inference/services/embedding/openai_embed_service.py +0 -0
- isa_model/inference/services/llm/__init__.py +12 -0
- isa_model/inference/services/llm/base_llm_service.py +134 -0
- isa_model/inference/services/llm/ollama_llm_service.py +99 -0
- isa_model/inference/services/llm/openai_llm_service.py +138 -0
- isa_model/inference/services/others/table_transformer_service.py +61 -0
- isa_model/inference/services/vision/__init__.py +12 -0
- isa_model/inference/services/vision/helpers/image_utils.py +58 -0
- isa_model/inference/services/vision/helpers/text_splitter.py +46 -0
- isa_model/inference/services/vision/ollama_vision_service.py +60 -0
- isa_model/inference/services/vision/openai_vision_service.py +80 -0
- isa_model/inference/services/vision/replicate_image_gen_service.py +185 -0
- isa_model/inference/utils/conversion/bge_rerank_convert.py +73 -0
- isa_model/inference/utils/conversion/onnx_converter.py +0 -0
- isa_model/inference/utils/conversion/torch_converter.py +0 -0
- isa_model/scripts/inference_tracker.py +283 -0
- isa_model/scripts/mlflow_manager.py +379 -0
- isa_model/scripts/model_registry.py +465 -0
- isa_model/scripts/start_mlflow.py +95 -0
- isa_model/scripts/training_tracker.py +257 -0
- isa_model/training/engine/llama_factory/__init__.py +39 -0
- isa_model/training/engine/llama_factory/config.py +115 -0
- isa_model/training/engine/llama_factory/data_adapter.py +284 -0
- isa_model/training/engine/llama_factory/examples/__init__.py +6 -0
- isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +185 -0
- isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +163 -0
- isa_model/training/engine/llama_factory/factory.py +331 -0
- isa_model/training/engine/llama_factory/rl.py +254 -0
- isa_model/training/engine/llama_factory/trainer.py +171 -0
- isa_model/training/image_model/configs/create_config.py +37 -0
- isa_model/training/image_model/configs/create_flux_config.py +26 -0
- isa_model/training/image_model/configs/create_lora_config.py +21 -0
- isa_model/training/image_model/prepare_massed_compute.py +97 -0
- isa_model/training/image_model/prepare_upload.py +17 -0
- isa_model/training/image_model/raw_data/create_captions.py +16 -0
- isa_model/training/image_model/raw_data/create_lora_captions.py +20 -0
- isa_model/training/image_model/raw_data/pre_processing.py +200 -0
- isa_model/training/image_model/train/train.py +42 -0
- isa_model/training/image_model/train/train_flux.py +41 -0
- isa_model/training/image_model/train/train_lora.py +57 -0
- isa_model/training/image_model/train_main.py +25 -0
- isa_model/training/llm_model/annotation/annotation_schema.py +47 -0
- isa_model/training/llm_model/annotation/processors/annotation_processor.py +126 -0
- isa_model/training/llm_model/annotation/storage/dataset_manager.py +131 -0
- isa_model/training/llm_model/annotation/storage/dataset_schema.py +44 -0
- isa_model/training/llm_model/annotation/tests/test_annotation_flow.py +109 -0
- isa_model/training/llm_model/annotation/tests/test_minio copy.py +113 -0
- isa_model/training/llm_model/annotation/tests/test_minio_upload.py +43 -0
- isa_model/training/llm_model/annotation/views/annotation_controller.py +158 -0
- isa_model-0.0.1.dist-info/METADATA +327 -0
- isa_model-0.0.1.dist-info/RECORD +86 -0
- isa_model-0.0.1.dist-info/WHEEL +5 -0
- isa_model-0.0.1.dist-info/licenses/LICENSE +21 -0
- isa_model-0.0.1.dist-info/top_level.txt +1 -0
isa_model/__init__.py
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
from typing import Dict, Optional, List, Any
|
2
|
+
import logging
|
3
|
+
from pathlib import Path
|
4
|
+
from huggingface_hub import hf_hub_download, snapshot_download
|
5
|
+
from huggingface_hub.utils import HfHubHTTPError
|
6
|
+
from .model_storage import ModelStorage, LocalModelStorage
|
7
|
+
from .model_registry import ModelRegistry, ModelType, ModelCapability
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
class ModelManager:
|
12
|
+
"""Model management service for handling model downloads, versions, and caching"""
|
13
|
+
|
14
|
+
def __init__(self,
|
15
|
+
storage: Optional[ModelStorage] = None,
|
16
|
+
registry: Optional[ModelRegistry] = None):
|
17
|
+
self.storage = storage or LocalModelStorage()
|
18
|
+
self.registry = registry or ModelRegistry()
|
19
|
+
|
20
|
+
async def get_model(self,
|
21
|
+
model_id: str,
|
22
|
+
repo_id: str,
|
23
|
+
model_type: ModelType,
|
24
|
+
capabilities: List[ModelCapability],
|
25
|
+
revision: Optional[str] = None,
|
26
|
+
force_download: bool = False) -> Path:
|
27
|
+
"""
|
28
|
+
Get model files, downloading if necessary
|
29
|
+
|
30
|
+
Args:
|
31
|
+
model_id: Unique identifier for the model
|
32
|
+
repo_id: Hugging Face repository ID
|
33
|
+
model_type: Type of model (LLM, embedding, etc.)
|
34
|
+
capabilities: List of model capabilities
|
35
|
+
revision: Specific model version/tag
|
36
|
+
force_download: Force re-download even if cached
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
Path to the model files
|
40
|
+
"""
|
41
|
+
# Check if model is already downloaded
|
42
|
+
if not force_download:
|
43
|
+
model_path = await self.storage.load_model(model_id)
|
44
|
+
if model_path:
|
45
|
+
logger.info(f"Using cached model {model_id}")
|
46
|
+
return model_path
|
47
|
+
|
48
|
+
try:
|
49
|
+
# Download model files
|
50
|
+
logger.info(f"Downloading model {model_id} from {repo_id}")
|
51
|
+
model_dir = Path(f"./models/temp/{model_id}")
|
52
|
+
model_dir.mkdir(parents=True, exist_ok=True)
|
53
|
+
|
54
|
+
snapshot_download(
|
55
|
+
repo_id=repo_id,
|
56
|
+
revision=revision,
|
57
|
+
local_dir=model_dir,
|
58
|
+
local_dir_use_symlinks=False
|
59
|
+
)
|
60
|
+
|
61
|
+
# Save model and metadata
|
62
|
+
metadata = {
|
63
|
+
"repo_id": repo_id,
|
64
|
+
"revision": revision,
|
65
|
+
"downloaded_at": str(Path(model_dir).stat().st_mtime)
|
66
|
+
}
|
67
|
+
|
68
|
+
# Register model
|
69
|
+
self.registry.register_model(
|
70
|
+
model_id=model_id,
|
71
|
+
model_type=model_type,
|
72
|
+
capabilities=capabilities,
|
73
|
+
metadata=metadata
|
74
|
+
)
|
75
|
+
|
76
|
+
# Save model files
|
77
|
+
await self.storage.save_model(model_id, str(model_dir), metadata)
|
78
|
+
|
79
|
+
return await self.storage.load_model(model_id)
|
80
|
+
|
81
|
+
except HfHubHTTPError as e:
|
82
|
+
logger.error(f"Failed to download model {model_id}: {e}")
|
83
|
+
raise
|
84
|
+
|
85
|
+
async def list_models(self) -> List[Dict[str, Any]]:
|
86
|
+
"""List all downloaded models with their metadata"""
|
87
|
+
models = await self.storage.list_models()
|
88
|
+
return [
|
89
|
+
{
|
90
|
+
"model_id": model_id,
|
91
|
+
**metadata,
|
92
|
+
**(self.registry.get_model_info(model_id) or {})
|
93
|
+
}
|
94
|
+
for model_id, metadata in models.items()
|
95
|
+
]
|
96
|
+
|
97
|
+
async def remove_model(self, model_id: str) -> bool:
|
98
|
+
"""Remove a model and its metadata"""
|
99
|
+
try:
|
100
|
+
# Remove from storage
|
101
|
+
storage_success = await self.storage.delete_model(model_id)
|
102
|
+
|
103
|
+
# Unregister from registry
|
104
|
+
registry_success = self.registry.unregister_model(model_id)
|
105
|
+
|
106
|
+
return storage_success and registry_success
|
107
|
+
|
108
|
+
except Exception as e:
|
109
|
+
logger.error(f"Failed to remove model {model_id}: {e}")
|
110
|
+
return False
|
111
|
+
|
112
|
+
async def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
|
113
|
+
"""Get information about a specific model"""
|
114
|
+
storage_info = await self.storage.get_metadata(model_id)
|
115
|
+
registry_info = self.registry.get_model_info(model_id)
|
116
|
+
|
117
|
+
if not storage_info and not registry_info:
|
118
|
+
return None
|
119
|
+
|
120
|
+
return {
|
121
|
+
**(storage_info or {}),
|
122
|
+
**(registry_info or {})
|
123
|
+
}
|
124
|
+
|
125
|
+
async def update_model(self,
|
126
|
+
model_id: str,
|
127
|
+
repo_id: str,
|
128
|
+
model_type: ModelType,
|
129
|
+
capabilities: List[ModelCapability],
|
130
|
+
revision: Optional[str] = None) -> bool:
|
131
|
+
"""Update a model to a new version"""
|
132
|
+
try:
|
133
|
+
return bool(await self.get_model(
|
134
|
+
model_id=model_id,
|
135
|
+
repo_id=repo_id,
|
136
|
+
model_type=model_type,
|
137
|
+
capabilities=capabilities,
|
138
|
+
revision=revision,
|
139
|
+
force_download=True
|
140
|
+
))
|
141
|
+
except Exception as e:
|
142
|
+
logger.error(f"Failed to update model {model_id}: {e}")
|
143
|
+
return False
|
@@ -0,0 +1,115 @@
|
|
1
|
+
from typing import Dict, List, Optional, Any
|
2
|
+
from enum import Enum
|
3
|
+
import logging
|
4
|
+
from pathlib import Path
|
5
|
+
import json
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
class ModelCapability(str, Enum):
|
10
|
+
"""Model capabilities"""
|
11
|
+
TEXT_GENERATION = "text_generation"
|
12
|
+
CHAT = "chat"
|
13
|
+
EMBEDDING = "embedding"
|
14
|
+
RERANKING = "reranking"
|
15
|
+
REASONING = "reasoning"
|
16
|
+
IMAGE_GENERATION = "image_generation"
|
17
|
+
IMAGE_ANALYSIS = "image_analysis"
|
18
|
+
AUDIO_TRANSCRIPTION = "audio_transcription"
|
19
|
+
IMAGE_UNDERSTANDING = "image_understanding"
|
20
|
+
|
21
|
+
class ModelType(str, Enum):
|
22
|
+
"""Model types"""
|
23
|
+
LLM = "llm"
|
24
|
+
EMBEDDING = "embedding"
|
25
|
+
RERANK = "rerank"
|
26
|
+
IMAGE = "image"
|
27
|
+
AUDIO = "audio"
|
28
|
+
VIDEO = "video"
|
29
|
+
VISION = "vision"
|
30
|
+
|
31
|
+
class ModelRegistry:
|
32
|
+
"""Registry for model metadata and capabilities"""
|
33
|
+
|
34
|
+
def __init__(self, registry_file: str = "./models/model_registry.json"):
|
35
|
+
self.registry_file = Path(registry_file)
|
36
|
+
self.registry: Dict[str, Dict[str, Any]] = {}
|
37
|
+
self._load_registry()
|
38
|
+
|
39
|
+
def _load_registry(self):
|
40
|
+
"""Load model registry from file"""
|
41
|
+
if self.registry_file.exists():
|
42
|
+
with open(self.registry_file, 'r') as f:
|
43
|
+
self.registry = json.load(f)
|
44
|
+
else:
|
45
|
+
self.registry = {}
|
46
|
+
self._save_registry()
|
47
|
+
|
48
|
+
def _save_registry(self):
|
49
|
+
"""Save model registry to file"""
|
50
|
+
self.registry_file.parent.mkdir(parents=True, exist_ok=True)
|
51
|
+
with open(self.registry_file, 'w') as f:
|
52
|
+
json.dump(self.registry, f, indent=2)
|
53
|
+
|
54
|
+
def register_model(self,
|
55
|
+
model_id: str,
|
56
|
+
model_type: ModelType,
|
57
|
+
capabilities: List[ModelCapability],
|
58
|
+
metadata: Dict[str, Any]) -> bool:
|
59
|
+
"""Register a model with its capabilities and metadata"""
|
60
|
+
try:
|
61
|
+
self.registry[model_id] = {
|
62
|
+
"type": model_type,
|
63
|
+
"capabilities": [cap.value for cap in capabilities],
|
64
|
+
"metadata": metadata
|
65
|
+
}
|
66
|
+
self._save_registry()
|
67
|
+
logger.info(f"Registered model {model_id}")
|
68
|
+
return True
|
69
|
+
except Exception as e:
|
70
|
+
logger.error(f"Failed to register model {model_id}: {e}")
|
71
|
+
return False
|
72
|
+
|
73
|
+
def unregister_model(self, model_id: str) -> bool:
|
74
|
+
"""Unregister a model"""
|
75
|
+
try:
|
76
|
+
if model_id in self.registry:
|
77
|
+
del self.registry[model_id]
|
78
|
+
self._save_registry()
|
79
|
+
logger.info(f"Unregistered model {model_id}")
|
80
|
+
return True
|
81
|
+
return False
|
82
|
+
except Exception as e:
|
83
|
+
logger.error(f"Failed to unregister model {model_id}: {e}")
|
84
|
+
return False
|
85
|
+
|
86
|
+
def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
|
87
|
+
"""Get model information"""
|
88
|
+
return self.registry.get(model_id)
|
89
|
+
|
90
|
+
def get_models_by_type(self, model_type: ModelType) -> Dict[str, Dict[str, Any]]:
|
91
|
+
"""Get all models of a specific type"""
|
92
|
+
return {
|
93
|
+
model_id: info
|
94
|
+
for model_id, info in self.registry.items()
|
95
|
+
if info["type"] == model_type
|
96
|
+
}
|
97
|
+
|
98
|
+
def get_models_by_capability(self, capability: ModelCapability) -> Dict[str, Dict[str, Any]]:
|
99
|
+
"""Get all models with a specific capability"""
|
100
|
+
return {
|
101
|
+
model_id: info
|
102
|
+
for model_id, info in self.registry.items()
|
103
|
+
if capability.value in info["capabilities"]
|
104
|
+
}
|
105
|
+
|
106
|
+
def has_capability(self, model_id: str, capability: ModelCapability) -> bool:
|
107
|
+
"""Check if a model has a specific capability"""
|
108
|
+
model_info = self.get_model_info(model_id)
|
109
|
+
if not model_info:
|
110
|
+
return False
|
111
|
+
return capability.value in model_info["capabilities"]
|
112
|
+
|
113
|
+
def list_models(self) -> Dict[str, Dict[str, Any]]:
|
114
|
+
"""List all registered models"""
|
115
|
+
return self.registry
|
@@ -0,0 +1,226 @@
|
|
1
|
+
import random
|
2
|
+
import time
|
3
|
+
from typing import Dict, List, Any, Optional, Callable
|
4
|
+
import threading
|
5
|
+
|
6
|
+
class ModelRouter:
|
7
|
+
"""
|
8
|
+
Routes requests to appropriate model instances based on different strategies:
|
9
|
+
- Weighted round-robin
|
10
|
+
- Least connections
|
11
|
+
- Least response time
|
12
|
+
- Dynamic load balancing
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, registry):
|
16
|
+
self.registry = registry
|
17
|
+
self.model_stats = {} # Track performance metrics for each model
|
18
|
+
self.lock = threading.RLock()
|
19
|
+
|
20
|
+
# Maps model_type -> list of model_ids of that type
|
21
|
+
self.model_type_mapping = {}
|
22
|
+
|
23
|
+
# Maps routing_strategy_name -> routing_function
|
24
|
+
self.routing_strategies = {
|
25
|
+
"round_robin": self._route_round_robin,
|
26
|
+
"weighted_random": self._route_weighted_random,
|
27
|
+
"least_connections": self._route_least_connections,
|
28
|
+
"least_response_time": self._route_least_response_time,
|
29
|
+
"dynamic_load": self._route_dynamic_load
|
30
|
+
}
|
31
|
+
|
32
|
+
# Round-robin counters for each model type
|
33
|
+
self.rr_counters = {}
|
34
|
+
|
35
|
+
def register_model_type(self, model_type: str, model_ids: List[str], weights: Optional[List[float]] = None):
|
36
|
+
"""Register models of a specific type with optional weights"""
|
37
|
+
with self.lock:
|
38
|
+
self.model_type_mapping[model_type] = model_ids
|
39
|
+
|
40
|
+
# Initialize stats for each model
|
41
|
+
for i, model_id in enumerate(model_ids):
|
42
|
+
weight = weights[i] if weights and i < len(weights) else 1.0
|
43
|
+
|
44
|
+
if model_id not in self.model_stats:
|
45
|
+
self.model_stats[model_id] = {
|
46
|
+
"active_connections": 0,
|
47
|
+
"total_requests": 0,
|
48
|
+
"avg_response_time": 0,
|
49
|
+
"weight": weight,
|
50
|
+
"last_used": 0
|
51
|
+
}
|
52
|
+
else:
|
53
|
+
# Update weight if model already exists
|
54
|
+
self.model_stats[model_id]["weight"] = weight
|
55
|
+
|
56
|
+
# Initialize round-robin counter
|
57
|
+
self.rr_counters[model_type] = 0
|
58
|
+
|
59
|
+
def route_request(self, model_type: str, routing_strategy: str = "round_robin") -> Optional[str]:
|
60
|
+
"""
|
61
|
+
Route a request to an appropriate model of the given type
|
62
|
+
|
63
|
+
Args:
|
64
|
+
model_type: Type of model needed
|
65
|
+
routing_strategy: Strategy to use for routing
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
model_id: ID of the model to use, or None if no models available
|
69
|
+
"""
|
70
|
+
with self.lock:
|
71
|
+
if model_type not in self.model_type_mapping:
|
72
|
+
return None
|
73
|
+
|
74
|
+
if not self.model_type_mapping[model_type]:
|
75
|
+
return None
|
76
|
+
|
77
|
+
# Get the routing function
|
78
|
+
routing_func = self.routing_strategies.get(routing_strategy, self._route_round_robin)
|
79
|
+
|
80
|
+
# Route the request
|
81
|
+
model_id = routing_func(model_type)
|
82
|
+
|
83
|
+
if model_id:
|
84
|
+
# Update stats
|
85
|
+
self.model_stats[model_id]["active_connections"] += 1
|
86
|
+
self.model_stats[model_id]["total_requests"] += 1
|
87
|
+
self.model_stats[model_id]["last_used"] = time.time()
|
88
|
+
|
89
|
+
return model_id
|
90
|
+
|
91
|
+
def release_connection(self, model_id: str, response_time: float = None):
|
92
|
+
"""Release a connection and update stats"""
|
93
|
+
with self.lock:
|
94
|
+
if model_id in self.model_stats:
|
95
|
+
stats = self.model_stats[model_id]
|
96
|
+
stats["active_connections"] = max(0, stats["active_connections"] - 1)
|
97
|
+
|
98
|
+
# Update average response time
|
99
|
+
if response_time is not None:
|
100
|
+
old_avg = stats["avg_response_time"]
|
101
|
+
total_req = stats["total_requests"]
|
102
|
+
|
103
|
+
if total_req > 0:
|
104
|
+
# Weighted average
|
105
|
+
stats["avg_response_time"] = (old_avg * (total_req - 1) + response_time) / total_req
|
106
|
+
|
107
|
+
# Routing strategies
|
108
|
+
def _route_round_robin(self, model_type: str) -> Optional[str]:
|
109
|
+
"""Simple round-robin routing"""
|
110
|
+
models = self.model_type_mapping.get(model_type, [])
|
111
|
+
if not models:
|
112
|
+
return None
|
113
|
+
|
114
|
+
# Get and increment counter
|
115
|
+
counter = self.rr_counters[model_type]
|
116
|
+
self.rr_counters[model_type] = (counter + 1) % len(models)
|
117
|
+
|
118
|
+
return models[counter]
|
119
|
+
|
120
|
+
def _route_weighted_random(self, model_type: str) -> Optional[str]:
|
121
|
+
"""Weighted random selection based on configured weights"""
|
122
|
+
models = self.model_type_mapping.get(model_type, [])
|
123
|
+
if not models:
|
124
|
+
return None
|
125
|
+
|
126
|
+
# Get weights
|
127
|
+
weights = [self.model_stats[model_id]["weight"] for model_id in models]
|
128
|
+
|
129
|
+
# Weighted random selection
|
130
|
+
total = sum(weights)
|
131
|
+
r = random.uniform(0, total)
|
132
|
+
upto = 0
|
133
|
+
|
134
|
+
for i, w in enumerate(weights):
|
135
|
+
upto += w
|
136
|
+
if upto >= r:
|
137
|
+
return models[i]
|
138
|
+
|
139
|
+
# Fallback
|
140
|
+
return models[-1]
|
141
|
+
|
142
|
+
def _route_least_connections(self, model_type: str) -> Optional[str]:
|
143
|
+
"""Route to the model with the fewest active connections"""
|
144
|
+
models = self.model_type_mapping.get(model_type, [])
|
145
|
+
if not models:
|
146
|
+
return None
|
147
|
+
|
148
|
+
# Find model with least connections
|
149
|
+
min_connections = float('inf')
|
150
|
+
selected_model = None
|
151
|
+
|
152
|
+
for model_id in models:
|
153
|
+
connections = self.model_stats[model_id]["active_connections"]
|
154
|
+
if connections < min_connections:
|
155
|
+
min_connections = connections
|
156
|
+
selected_model = model_id
|
157
|
+
|
158
|
+
return selected_model
|
159
|
+
|
160
|
+
def _route_least_response_time(self, model_type: str) -> Optional[str]:
|
161
|
+
"""Route to the model with the lowest average response time"""
|
162
|
+
models = self.model_type_mapping.get(model_type, [])
|
163
|
+
if not models:
|
164
|
+
return None
|
165
|
+
|
166
|
+
# Find model with lowest response time
|
167
|
+
min_response_time = float('inf')
|
168
|
+
selected_model = None
|
169
|
+
|
170
|
+
for model_id in models:
|
171
|
+
response_time = self.model_stats[model_id]["avg_response_time"]
|
172
|
+
# Skip models with no data yet
|
173
|
+
if response_time == 0:
|
174
|
+
continue
|
175
|
+
|
176
|
+
if response_time < min_response_time:
|
177
|
+
min_response_time = response_time
|
178
|
+
selected_model = model_id
|
179
|
+
|
180
|
+
# If no model has response time data, fall back to least connections
|
181
|
+
if selected_model is None:
|
182
|
+
return self._route_least_connections(model_type)
|
183
|
+
|
184
|
+
return selected_model
|
185
|
+
|
186
|
+
def _route_dynamic_load(self, model_type: str) -> Optional[str]:
|
187
|
+
"""
|
188
|
+
Dynamic load balancing based on a combination of:
|
189
|
+
- Connection count
|
190
|
+
- Response time
|
191
|
+
- Recent usage
|
192
|
+
"""
|
193
|
+
models = self.model_type_mapping.get(model_type, [])
|
194
|
+
if not models:
|
195
|
+
return None
|
196
|
+
|
197
|
+
# Calculate a score for each model (lower is better)
|
198
|
+
best_score = float('inf')
|
199
|
+
selected_model = None
|
200
|
+
now = time.time()
|
201
|
+
|
202
|
+
for model_id in models:
|
203
|
+
stats = self.model_stats[model_id]
|
204
|
+
|
205
|
+
# Normalize each factor between 0 and 1
|
206
|
+
connections = stats["active_connections"]
|
207
|
+
conn_score = connections / (connections + 1) # Approaches 1 as connections increase
|
208
|
+
|
209
|
+
resp_time = stats["avg_response_time"]
|
210
|
+
# Max expected response time (adjust as needed)
|
211
|
+
max_resp_time = 5.0
|
212
|
+
resp_score = min(1.0, resp_time / max_resp_time)
|
213
|
+
|
214
|
+
# Time since last use (for distributing load)
|
215
|
+
recency = now - stats["last_used"] if stats["last_used"] > 0 else 60
|
216
|
+
recency_score = 1.0 - min(1.0, recency / 60.0) # Unused for 60s approaches 0
|
217
|
+
|
218
|
+
# Combined score (lower is better)
|
219
|
+
# Weights can be adjusted based on importance
|
220
|
+
score = (0.4 * conn_score) + (0.4 * resp_score) + (0.2 * recency_score)
|
221
|
+
|
222
|
+
if score < best_score:
|
223
|
+
best_score = score
|
224
|
+
selected_model = model_id
|
225
|
+
|
226
|
+
return selected_model
|
@@ -0,0 +1,133 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Optional, Dict, Any, BinaryIO
|
3
|
+
from pathlib import Path
|
4
|
+
import logging
|
5
|
+
import json
|
6
|
+
import shutil
|
7
|
+
import os
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
class ModelStorage(ABC):
|
12
|
+
"""Base class for model storage implementations"""
|
13
|
+
|
14
|
+
@abstractmethod
|
15
|
+
async def save_model(self, model_id: str, model_path: str, metadata: Dict[str, Any]) -> bool:
|
16
|
+
"""Save model files and metadata"""
|
17
|
+
pass
|
18
|
+
|
19
|
+
@abstractmethod
|
20
|
+
async def load_model(self, model_id: str) -> Optional[Path]:
|
21
|
+
"""Load model files"""
|
22
|
+
pass
|
23
|
+
|
24
|
+
@abstractmethod
|
25
|
+
async def delete_model(self, model_id: str) -> bool:
|
26
|
+
"""Delete model files and metadata"""
|
27
|
+
pass
|
28
|
+
|
29
|
+
@abstractmethod
|
30
|
+
async def get_metadata(self, model_id: str) -> Optional[Dict[str, Any]]:
|
31
|
+
"""Get model metadata"""
|
32
|
+
pass
|
33
|
+
|
34
|
+
@abstractmethod
|
35
|
+
async def list_models(self) -> Dict[str, Dict[str, Any]]:
|
36
|
+
"""List all stored models with their metadata"""
|
37
|
+
pass
|
38
|
+
|
39
|
+
class LocalModelStorage(ModelStorage):
|
40
|
+
"""Local file system based model storage"""
|
41
|
+
|
42
|
+
def __init__(self, base_dir: str = "./models"):
|
43
|
+
self.base_dir = Path(base_dir)
|
44
|
+
self.models_dir = self.base_dir / "models"
|
45
|
+
self.metadata_file = self.base_dir / "model_metadata.json"
|
46
|
+
self._ensure_directories()
|
47
|
+
self._load_metadata()
|
48
|
+
|
49
|
+
def _ensure_directories(self):
|
50
|
+
"""Ensure required directories exist"""
|
51
|
+
self.models_dir.mkdir(parents=True, exist_ok=True)
|
52
|
+
|
53
|
+
def _load_metadata(self):
|
54
|
+
"""Load model metadata from file"""
|
55
|
+
if self.metadata_file.exists():
|
56
|
+
with open(self.metadata_file, 'r') as f:
|
57
|
+
self.metadata = json.load(f)
|
58
|
+
else:
|
59
|
+
self.metadata = {}
|
60
|
+
self._save_metadata()
|
61
|
+
|
62
|
+
def _save_metadata(self):
|
63
|
+
"""Save model metadata to file"""
|
64
|
+
with open(self.metadata_file, 'w') as f:
|
65
|
+
json.dump(self.metadata, f, indent=2)
|
66
|
+
|
67
|
+
async def save_model(self, model_id: str, model_path: str, metadata: Dict[str, Any]) -> bool:
|
68
|
+
"""Save model files and metadata"""
|
69
|
+
try:
|
70
|
+
model_dir = self.models_dir / model_id
|
71
|
+
source_path = Path(model_path)
|
72
|
+
|
73
|
+
# Copy model files
|
74
|
+
if source_path.is_file():
|
75
|
+
model_dir.mkdir(parents=True, exist_ok=True)
|
76
|
+
shutil.copy2(source_path, model_dir / source_path.name)
|
77
|
+
else:
|
78
|
+
shutil.copytree(source_path, model_dir, dirs_exist_ok=True)
|
79
|
+
|
80
|
+
# Update metadata
|
81
|
+
self.metadata[model_id] = {
|
82
|
+
**metadata,
|
83
|
+
"storage_path": str(model_dir),
|
84
|
+
"saved_at": str(Path(model_dir).stat().st_mtime)
|
85
|
+
}
|
86
|
+
self._save_metadata()
|
87
|
+
|
88
|
+
logger.info(f"Saved model {model_id} to {model_dir}")
|
89
|
+
return True
|
90
|
+
|
91
|
+
except Exception as e:
|
92
|
+
logger.error(f"Failed to save model {model_id}: {e}")
|
93
|
+
return False
|
94
|
+
|
95
|
+
async def load_model(self, model_id: str) -> Optional[Path]:
|
96
|
+
"""Load model files"""
|
97
|
+
try:
|
98
|
+
model_dir = self.models_dir / model_id
|
99
|
+
if not model_dir.exists():
|
100
|
+
logger.warning(f"Model {model_id} not found at {model_dir}")
|
101
|
+
return None
|
102
|
+
|
103
|
+
return model_dir
|
104
|
+
|
105
|
+
except Exception as e:
|
106
|
+
logger.error(f"Failed to load model {model_id}: {e}")
|
107
|
+
return None
|
108
|
+
|
109
|
+
async def delete_model(self, model_id: str) -> bool:
|
110
|
+
"""Delete model files and metadata"""
|
111
|
+
try:
|
112
|
+
model_dir = self.models_dir / model_id
|
113
|
+
if model_dir.exists():
|
114
|
+
shutil.rmtree(model_dir)
|
115
|
+
|
116
|
+
if model_id in self.metadata:
|
117
|
+
del self.metadata[model_id]
|
118
|
+
self._save_metadata()
|
119
|
+
|
120
|
+
logger.info(f"Deleted model {model_id}")
|
121
|
+
return True
|
122
|
+
|
123
|
+
except Exception as e:
|
124
|
+
logger.error(f"Failed to delete model {model_id}: {e}")
|
125
|
+
return False
|
126
|
+
|
127
|
+
async def get_metadata(self, model_id: str) -> Optional[Dict[str, Any]]:
|
128
|
+
"""Get model metadata"""
|
129
|
+
return self.metadata.get(model_id)
|
130
|
+
|
131
|
+
async def list_models(self) -> Dict[str, Dict[str, Any]]:
|
132
|
+
"""List all stored models with their metadata"""
|
133
|
+
return self.metadata
|
File without changes
|