isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,717 @@
|
|
1
|
+
"""
|
2
|
+
Model Serving Service - Step 3 of Model Pipeline
|
3
|
+
Handles model deployment, serving, and real-time predictions
|
4
|
+
"""
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
import numpy as np
|
8
|
+
from typing import Dict, List, Any, Optional, Union
|
9
|
+
import logging
|
10
|
+
from dataclasses import dataclass, field
|
11
|
+
from datetime import datetime, timedelta
|
12
|
+
import json
|
13
|
+
import threading
|
14
|
+
import time
|
15
|
+
from concurrent.futures import ThreadPoolExecutor
|
16
|
+
from pathlib import Path
|
17
|
+
|
18
|
+
try:
|
19
|
+
import joblib
|
20
|
+
JOBLIB_AVAILABLE = True
|
21
|
+
except ImportError:
|
22
|
+
JOBLIB_AVAILABLE = False
|
23
|
+
logging.warning("joblib not available. Model serialization will be limited.")
|
24
|
+
|
25
|
+
try:
|
26
|
+
import pickle
|
27
|
+
PICKLE_AVAILABLE = True
|
28
|
+
except ImportError:
|
29
|
+
PICKLE_AVAILABLE = False
|
30
|
+
|
31
|
+
logger = logging.getLogger(__name__)
|
32
|
+
|
33
|
+
@dataclass
|
34
|
+
class ServingConfig:
|
35
|
+
"""Configuration for model serving"""
|
36
|
+
model_id: str
|
37
|
+
serving_mode: str = "batch" # batch, real_time, api
|
38
|
+
cache_predictions: bool = True
|
39
|
+
cache_ttl_seconds: int = 3600
|
40
|
+
batch_size: int = 1000
|
41
|
+
enable_monitoring: bool = True
|
42
|
+
preprocessing_required: bool = True
|
43
|
+
|
44
|
+
@dataclass
|
45
|
+
class ServingResult:
|
46
|
+
"""Result of model serving operations"""
|
47
|
+
success: bool
|
48
|
+
serving_info: Dict[str, Any] = field(default_factory=dict)
|
49
|
+
predictions: Optional[Union[List, np.ndarray, pd.DataFrame]] = None
|
50
|
+
serving_metadata: Dict[str, Any] = field(default_factory=dict)
|
51
|
+
performance_metrics: Dict[str, Any] = field(default_factory=dict)
|
52
|
+
warnings: List[str] = field(default_factory=list)
|
53
|
+
errors: List[str] = field(default_factory=list)
|
54
|
+
|
55
|
+
class ModelCache:
|
56
|
+
"""Thread-safe model cache with TTL"""
|
57
|
+
|
58
|
+
def __init__(self, max_size: int = 10, default_ttl: int = 3600):
|
59
|
+
self.max_size = max_size
|
60
|
+
self.default_ttl = default_ttl
|
61
|
+
self.cache = {}
|
62
|
+
self.access_times = {}
|
63
|
+
self.creation_times = {}
|
64
|
+
self._lock = threading.RLock()
|
65
|
+
|
66
|
+
def get(self, model_id: str) -> Optional[Any]:
|
67
|
+
"""Get model from cache"""
|
68
|
+
with self._lock:
|
69
|
+
if model_id in self.cache:
|
70
|
+
# Check TTL
|
71
|
+
if time.time() - self.creation_times[model_id] > self.default_ttl:
|
72
|
+
self._remove(model_id)
|
73
|
+
return None
|
74
|
+
|
75
|
+
# Update access time
|
76
|
+
self.access_times[model_id] = time.time()
|
77
|
+
return self.cache[model_id]
|
78
|
+
return None
|
79
|
+
|
80
|
+
def put(self, model_id: str, model: Any) -> None:
|
81
|
+
"""Put model in cache"""
|
82
|
+
with self._lock:
|
83
|
+
# Check if we need to evict
|
84
|
+
if len(self.cache) >= self.max_size and model_id not in self.cache:
|
85
|
+
self._evict_lru()
|
86
|
+
|
87
|
+
self.cache[model_id] = model
|
88
|
+
self.access_times[model_id] = time.time()
|
89
|
+
self.creation_times[model_id] = time.time()
|
90
|
+
|
91
|
+
def remove(self, model_id: str) -> bool:
|
92
|
+
"""Remove model from cache"""
|
93
|
+
with self._lock:
|
94
|
+
return self._remove(model_id)
|
95
|
+
|
96
|
+
def _remove(self, model_id: str) -> bool:
|
97
|
+
"""Internal remove method"""
|
98
|
+
if model_id in self.cache:
|
99
|
+
del self.cache[model_id]
|
100
|
+
del self.access_times[model_id]
|
101
|
+
del self.creation_times[model_id]
|
102
|
+
return True
|
103
|
+
return False
|
104
|
+
|
105
|
+
def _evict_lru(self) -> None:
|
106
|
+
"""Evict least recently used item"""
|
107
|
+
if self.access_times:
|
108
|
+
lru_model = min(self.access_times.items(), key=lambda x: x[1])[0]
|
109
|
+
self._remove(lru_model)
|
110
|
+
|
111
|
+
def clear(self) -> None:
|
112
|
+
"""Clear all cached models"""
|
113
|
+
with self._lock:
|
114
|
+
self.cache.clear()
|
115
|
+
self.access_times.clear()
|
116
|
+
self.creation_times.clear()
|
117
|
+
|
118
|
+
def get_stats(self) -> Dict[str, Any]:
|
119
|
+
"""Get cache statistics"""
|
120
|
+
with self._lock:
|
121
|
+
return {
|
122
|
+
'cache_size': len(self.cache),
|
123
|
+
'max_size': self.max_size,
|
124
|
+
'cached_models': list(self.cache.keys()),
|
125
|
+
'hit_rate': getattr(self, '_hit_count', 0) / max(getattr(self, '_access_count', 1), 1)
|
126
|
+
}
|
127
|
+
|
128
|
+
class ModelServingService:
|
129
|
+
"""
|
130
|
+
Model Serving Service - Step 3 of Model Pipeline
|
131
|
+
|
132
|
+
Handles:
|
133
|
+
- Model deployment and serving infrastructure
|
134
|
+
- Real-time and batch predictions
|
135
|
+
- Model caching and performance optimization
|
136
|
+
- Serving monitoring and analytics
|
137
|
+
"""
|
138
|
+
|
139
|
+
def __init__(self, cache_size: int = 10, cache_ttl: int = 3600):
|
140
|
+
self.execution_stats = {
|
141
|
+
'total_serving_operations': 0,
|
142
|
+
'successful_serving_operations': 0,
|
143
|
+
'failed_serving_operations': 0,
|
144
|
+
'total_predictions_made': 0,
|
145
|
+
'average_prediction_time': 0.0
|
146
|
+
}
|
147
|
+
|
148
|
+
# Model cache for fast serving
|
149
|
+
self.model_cache = ModelCache(max_size=cache_size, default_ttl=cache_ttl)
|
150
|
+
|
151
|
+
# Serving configuration for each model
|
152
|
+
self.serving_configs = {}
|
153
|
+
|
154
|
+
# Prediction history for monitoring
|
155
|
+
self.prediction_history = {}
|
156
|
+
|
157
|
+
# Thread pool for concurrent predictions (lazy initialized)
|
158
|
+
self._thread_pool = None
|
159
|
+
self._thread_pool_lock = threading.Lock()
|
160
|
+
|
161
|
+
logger.info("Model Serving Service initialized")
|
162
|
+
|
163
|
+
@property
|
164
|
+
def thread_pool(self):
|
165
|
+
"""Lazy initialization of thread pool to avoid mutex issues"""
|
166
|
+
if self._thread_pool is None:
|
167
|
+
with self._thread_pool_lock:
|
168
|
+
if self._thread_pool is None:
|
169
|
+
self._thread_pool = ThreadPoolExecutor(max_workers=4)
|
170
|
+
return self._thread_pool
|
171
|
+
|
172
|
+
def deploy_model(self,
|
173
|
+
model_info: Dict[str, Any],
|
174
|
+
serving_config: ServingConfig) -> ServingResult:
|
175
|
+
"""
|
176
|
+
Deploy a trained model for serving
|
177
|
+
|
178
|
+
Args:
|
179
|
+
model_info: Information about the trained model
|
180
|
+
serving_config: Configuration for serving
|
181
|
+
|
182
|
+
Returns:
|
183
|
+
ServingResult with deployment information
|
184
|
+
"""
|
185
|
+
start_time = datetime.now()
|
186
|
+
|
187
|
+
try:
|
188
|
+
model_id = serving_config.model_id
|
189
|
+
logger.info(f"Deploying model for serving: {model_id}")
|
190
|
+
|
191
|
+
# Initialize result
|
192
|
+
result = ServingResult(
|
193
|
+
success=False,
|
194
|
+
serving_metadata={
|
195
|
+
'start_time': start_time,
|
196
|
+
'model_id': model_id,
|
197
|
+
'serving_mode': serving_config.serving_mode
|
198
|
+
}
|
199
|
+
)
|
200
|
+
|
201
|
+
# Validate model info
|
202
|
+
if not model_info or 'processor' not in model_info:
|
203
|
+
result.errors.append("Valid model information required for deployment")
|
204
|
+
return self._finalize_serving_result(result, start_time)
|
205
|
+
|
206
|
+
# Store serving configuration
|
207
|
+
self.serving_configs[model_id] = serving_config
|
208
|
+
|
209
|
+
# Load model into cache
|
210
|
+
cache_result = self._load_model_to_cache(model_info, serving_config)
|
211
|
+
if not cache_result['success']:
|
212
|
+
result.errors.extend(cache_result['errors'])
|
213
|
+
return self._finalize_serving_result(result, start_time)
|
214
|
+
|
215
|
+
# Initialize prediction history
|
216
|
+
self.prediction_history[model_id] = {
|
217
|
+
'total_predictions': 0,
|
218
|
+
'successful_predictions': 0,
|
219
|
+
'failed_predictions': 0,
|
220
|
+
'last_prediction': None,
|
221
|
+
'deployment_time': start_time,
|
222
|
+
'performance_metrics': {}
|
223
|
+
}
|
224
|
+
|
225
|
+
# Setup serving endpoint based on mode
|
226
|
+
serving_setup = self._setup_serving_endpoint(serving_config)
|
227
|
+
|
228
|
+
# Success
|
229
|
+
result.success = True
|
230
|
+
result.serving_info = {
|
231
|
+
'model_id': model_id,
|
232
|
+
'serving_mode': serving_config.serving_mode,
|
233
|
+
'cache_enabled': serving_config.cache_predictions,
|
234
|
+
'deployment_status': 'active',
|
235
|
+
'serving_endpoint': serving_setup.get('endpoint'),
|
236
|
+
'batch_size': serving_config.batch_size
|
237
|
+
}
|
238
|
+
|
239
|
+
return self._finalize_serving_result(result, start_time)
|
240
|
+
|
241
|
+
except Exception as e:
|
242
|
+
logger.error(f"Model deployment failed: {e}")
|
243
|
+
result.errors.append(f"Deployment error: {str(e)}")
|
244
|
+
return self._finalize_serving_result(result, start_time)
|
245
|
+
|
246
|
+
def predict(self,
|
247
|
+
model_id: str,
|
248
|
+
input_data: Union[pd.DataFrame, Dict[str, Any], List[Dict[str, Any]]],
|
249
|
+
prediction_config: Optional[Dict[str, Any]] = None) -> ServingResult:
|
250
|
+
"""
|
251
|
+
Make predictions using a deployed model
|
252
|
+
|
253
|
+
Args:
|
254
|
+
model_id: ID of the deployed model
|
255
|
+
input_data: Input data for prediction
|
256
|
+
prediction_config: Optional configuration for prediction
|
257
|
+
|
258
|
+
Returns:
|
259
|
+
ServingResult with predictions
|
260
|
+
"""
|
261
|
+
start_time = datetime.now()
|
262
|
+
prediction_config = prediction_config or {}
|
263
|
+
|
264
|
+
try:
|
265
|
+
logger.info(f"Making predictions with model: {model_id}")
|
266
|
+
|
267
|
+
# Initialize result
|
268
|
+
result = ServingResult(
|
269
|
+
success=False,
|
270
|
+
serving_metadata={
|
271
|
+
'start_time': start_time,
|
272
|
+
'model_id': model_id,
|
273
|
+
'prediction_mode': 'single' if isinstance(input_data, dict) else 'batch'
|
274
|
+
}
|
275
|
+
)
|
276
|
+
|
277
|
+
# Check if model is deployed
|
278
|
+
if model_id not in self.serving_configs:
|
279
|
+
result.errors.append(f"Model {model_id} is not deployed")
|
280
|
+
return self._finalize_serving_result(result, start_time)
|
281
|
+
|
282
|
+
# Get model from cache
|
283
|
+
cached_model = self.model_cache.get(model_id)
|
284
|
+
if not cached_model:
|
285
|
+
result.errors.append(f"Model {model_id} not found in cache")
|
286
|
+
return self._finalize_serving_result(result, start_time)
|
287
|
+
|
288
|
+
# Prepare input data
|
289
|
+
prepared_data = self._prepare_input_data(input_data, cached_model, prediction_config)
|
290
|
+
if not prepared_data['success']:
|
291
|
+
result.errors.extend(prepared_data['errors'])
|
292
|
+
return self._finalize_serving_result(result, start_time)
|
293
|
+
|
294
|
+
X_input = prepared_data['data']
|
295
|
+
|
296
|
+
# Make predictions
|
297
|
+
prediction_result = self._make_predictions(
|
298
|
+
cached_model, X_input, model_id, prediction_config
|
299
|
+
)
|
300
|
+
|
301
|
+
if not prediction_result['success']:
|
302
|
+
result.errors.extend(prediction_result['errors'])
|
303
|
+
return self._finalize_serving_result(result, start_time)
|
304
|
+
|
305
|
+
# Update serving statistics
|
306
|
+
self._update_prediction_statistics(model_id, True, start_time)
|
307
|
+
|
308
|
+
# Success
|
309
|
+
result.success = True
|
310
|
+
result.predictions = prediction_result['predictions']
|
311
|
+
result.serving_info = {
|
312
|
+
'model_id': model_id,
|
313
|
+
'prediction_count': prediction_result['prediction_count'],
|
314
|
+
'prediction_type': prediction_result['prediction_type'],
|
315
|
+
'confidence_scores': prediction_result.get('confidence_scores'),
|
316
|
+
'preprocessing_applied': prepared_data.get('preprocessing_applied', False)
|
317
|
+
}
|
318
|
+
|
319
|
+
return self._finalize_serving_result(result, start_time)
|
320
|
+
|
321
|
+
except Exception as e:
|
322
|
+
logger.error(f"Prediction failed: {e}")
|
323
|
+
result.errors.append(f"Prediction error: {str(e)}")
|
324
|
+
self._update_prediction_statistics(model_id, False, start_time)
|
325
|
+
return self._finalize_serving_result(result, start_time)
|
326
|
+
|
327
|
+
def batch_predict(self,
|
328
|
+
model_id: str,
|
329
|
+
input_data: pd.DataFrame,
|
330
|
+
batch_config: Optional[Dict[str, Any]] = None) -> ServingResult:
|
331
|
+
"""Make batch predictions efficiently"""
|
332
|
+
batch_config = batch_config or {}
|
333
|
+
serving_config = self.serving_configs.get(model_id)
|
334
|
+
|
335
|
+
if not serving_config:
|
336
|
+
return ServingResult(
|
337
|
+
success=False,
|
338
|
+
errors=[f"Model {model_id} not deployed"]
|
339
|
+
)
|
340
|
+
|
341
|
+
batch_size = batch_config.get('batch_size', serving_config.batch_size)
|
342
|
+
|
343
|
+
# Process in batches for large datasets
|
344
|
+
if len(input_data) > batch_size:
|
345
|
+
return self._process_large_batch(model_id, input_data, batch_size, batch_config)
|
346
|
+
else:
|
347
|
+
return self.predict(model_id, input_data, batch_config)
|
348
|
+
|
349
|
+
def get_serving_status(self, model_id: Optional[str] = None) -> Dict[str, Any]:
|
350
|
+
"""Get serving status for models"""
|
351
|
+
try:
|
352
|
+
if model_id:
|
353
|
+
# Status for specific model
|
354
|
+
if model_id not in self.serving_configs:
|
355
|
+
return {'error': f'Model {model_id} not deployed'}
|
356
|
+
|
357
|
+
config = self.serving_configs[model_id]
|
358
|
+
history = self.prediction_history.get(model_id, {})
|
359
|
+
|
360
|
+
return {
|
361
|
+
'model_id': model_id,
|
362
|
+
'serving_mode': config.serving_mode,
|
363
|
+
'deployment_time': history.get('deployment_time'),
|
364
|
+
'total_predictions': history.get('total_predictions', 0),
|
365
|
+
'success_rate': self._calculate_success_rate(history),
|
366
|
+
'last_prediction': history.get('last_prediction'),
|
367
|
+
'cache_status': 'cached' if self.model_cache.get(model_id) else 'not_cached',
|
368
|
+
'performance_metrics': history.get('performance_metrics', {})
|
369
|
+
}
|
370
|
+
else:
|
371
|
+
# Status for all deployed models
|
372
|
+
all_status = {}
|
373
|
+
for mid in self.serving_configs.keys():
|
374
|
+
all_status[mid] = self.get_serving_status(mid)
|
375
|
+
|
376
|
+
return {
|
377
|
+
'deployed_models': len(self.serving_configs),
|
378
|
+
'cache_stats': self.model_cache.get_stats(),
|
379
|
+
'service_stats': self.get_execution_stats(),
|
380
|
+
'individual_models': all_status
|
381
|
+
}
|
382
|
+
|
383
|
+
except Exception as e:
|
384
|
+
return {'error': str(e)}
|
385
|
+
|
386
|
+
def undeploy_model(self, model_id: str) -> bool:
|
387
|
+
"""Remove model from serving"""
|
388
|
+
try:
|
389
|
+
# Remove from cache
|
390
|
+
self.model_cache.remove(model_id)
|
391
|
+
|
392
|
+
# Remove serving config
|
393
|
+
if model_id in self.serving_configs:
|
394
|
+
del self.serving_configs[model_id]
|
395
|
+
|
396
|
+
# Clean up prediction history
|
397
|
+
if model_id in self.prediction_history:
|
398
|
+
del self.prediction_history[model_id]
|
399
|
+
|
400
|
+
logger.info(f"Model {model_id} undeployed successfully")
|
401
|
+
return True
|
402
|
+
|
403
|
+
except Exception as e:
|
404
|
+
logger.error(f"Failed to undeploy model {model_id}: {e}")
|
405
|
+
return False
|
406
|
+
|
407
|
+
def save_model(self,
|
408
|
+
model_id: str,
|
409
|
+
file_path: str,
|
410
|
+
format: str = "joblib") -> bool:
|
411
|
+
"""Save a deployed model to disk"""
|
412
|
+
try:
|
413
|
+
cached_model = self.model_cache.get(model_id)
|
414
|
+
if not cached_model:
|
415
|
+
logger.error(f"Model {model_id} not found in cache")
|
416
|
+
return False
|
417
|
+
|
418
|
+
if format == "joblib" and JOBLIB_AVAILABLE:
|
419
|
+
joblib.dump(cached_model['model_instance'], file_path)
|
420
|
+
elif format == "pickle" and PICKLE_AVAILABLE:
|
421
|
+
with open(file_path, 'wb') as f:
|
422
|
+
pickle.dump(cached_model['model_instance'], f)
|
423
|
+
else:
|
424
|
+
logger.error(f"Unsupported format {format} or library not available")
|
425
|
+
return False
|
426
|
+
|
427
|
+
logger.info(f"Model {model_id} saved to {file_path}")
|
428
|
+
return True
|
429
|
+
|
430
|
+
except Exception as e:
|
431
|
+
logger.error(f"Failed to save model: {e}")
|
432
|
+
return False
|
433
|
+
|
434
|
+
def load_model_from_file(self,
|
435
|
+
model_id: str,
|
436
|
+
file_path: str,
|
437
|
+
format: str = "joblib",
|
438
|
+
serving_config: Optional[ServingConfig] = None) -> bool:
|
439
|
+
"""Load a model from disk for serving"""
|
440
|
+
try:
|
441
|
+
if format == "joblib" and JOBLIB_AVAILABLE:
|
442
|
+
model_instance = joblib.load(file_path)
|
443
|
+
elif format == "pickle" and PICKLE_AVAILABLE:
|
444
|
+
with open(file_path, 'rb') as f:
|
445
|
+
model_instance = pickle.load(f)
|
446
|
+
else:
|
447
|
+
logger.error(f"Unsupported format {format} or library not available")
|
448
|
+
return False
|
449
|
+
|
450
|
+
# Create model info structure
|
451
|
+
model_info = {
|
452
|
+
'model_instance': model_instance,
|
453
|
+
'processor': None, # Would need to be provided separately
|
454
|
+
'model_id': model_id,
|
455
|
+
'loaded_from_file': True,
|
456
|
+
'file_path': file_path
|
457
|
+
}
|
458
|
+
|
459
|
+
# Use default serving config if not provided
|
460
|
+
if not serving_config:
|
461
|
+
serving_config = ServingConfig(model_id=model_id)
|
462
|
+
|
463
|
+
# Deploy the loaded model
|
464
|
+
result = self.deploy_model(model_info, serving_config)
|
465
|
+
return result.success
|
466
|
+
|
467
|
+
except Exception as e:
|
468
|
+
logger.error(f"Failed to load model from file: {e}")
|
469
|
+
return False
|
470
|
+
|
471
|
+
def _load_model_to_cache(self,
|
472
|
+
model_info: Dict[str, Any],
|
473
|
+
serving_config: ServingConfig) -> Dict[str, Any]:
|
474
|
+
"""Load model to cache for serving"""
|
475
|
+
try:
|
476
|
+
model_id = serving_config.model_id
|
477
|
+
|
478
|
+
# Package model with metadata
|
479
|
+
cached_model = {
|
480
|
+
'model_instance': model_info.get('model_instance'),
|
481
|
+
'processor': model_info.get('processor'),
|
482
|
+
'problem_type': model_info.get('problem_type'),
|
483
|
+
'target_column': model_info.get('target_column'),
|
484
|
+
'training_config': model_info.get('training_config'),
|
485
|
+
'loaded_at': datetime.now()
|
486
|
+
}
|
487
|
+
|
488
|
+
# Add to cache
|
489
|
+
self.model_cache.put(model_id, cached_model)
|
490
|
+
|
491
|
+
return {'success': True}
|
492
|
+
|
493
|
+
except Exception as e:
|
494
|
+
return {
|
495
|
+
'success': False,
|
496
|
+
'errors': [f'Failed to load model to cache: {str(e)}']
|
497
|
+
}
|
498
|
+
|
499
|
+
def _setup_serving_endpoint(self, serving_config: ServingConfig) -> Dict[str, Any]:
|
500
|
+
"""Setup serving endpoint based on configuration"""
|
501
|
+
# For now, return basic endpoint info
|
502
|
+
# In a full implementation, this would setup REST API endpoints
|
503
|
+
return {
|
504
|
+
'endpoint': f'/predict/{serving_config.model_id}',
|
505
|
+
'methods': ['POST'],
|
506
|
+
'serving_mode': serving_config.serving_mode
|
507
|
+
}
|
508
|
+
|
509
|
+
def _prepare_input_data(self,
|
510
|
+
input_data: Union[pd.DataFrame, Dict, List],
|
511
|
+
cached_model: Dict[str, Any],
|
512
|
+
config: Dict[str, Any]) -> Dict[str, Any]:
|
513
|
+
"""Prepare input data for prediction"""
|
514
|
+
try:
|
515
|
+
# Convert input to DataFrame if needed
|
516
|
+
if isinstance(input_data, dict):
|
517
|
+
df_input = pd.DataFrame([input_data])
|
518
|
+
elif isinstance(input_data, list):
|
519
|
+
df_input = pd.DataFrame(input_data)
|
520
|
+
else:
|
521
|
+
df_input = input_data.copy()
|
522
|
+
|
523
|
+
# Apply preprocessing if available and required
|
524
|
+
preprocessing_applied = False
|
525
|
+
processor = cached_model.get('processor')
|
526
|
+
|
527
|
+
if processor and hasattr(processor, '_basic_preprocessing'):
|
528
|
+
df_input = processor._basic_preprocessing(df_input)
|
529
|
+
preprocessing_applied = True
|
530
|
+
|
531
|
+
return {
|
532
|
+
'success': True,
|
533
|
+
'data': df_input,
|
534
|
+
'preprocessing_applied': preprocessing_applied
|
535
|
+
}
|
536
|
+
|
537
|
+
except Exception as e:
|
538
|
+
return {
|
539
|
+
'success': False,
|
540
|
+
'errors': [f'Data preparation failed: {str(e)}']
|
541
|
+
}
|
542
|
+
|
543
|
+
def _make_predictions(self,
|
544
|
+
cached_model: Dict[str, Any],
|
545
|
+
X_input: pd.DataFrame,
|
546
|
+
model_id: str,
|
547
|
+
config: Dict[str, Any]) -> Dict[str, Any]:
|
548
|
+
"""Make actual predictions"""
|
549
|
+
try:
|
550
|
+
model_instance = cached_model['model_instance']
|
551
|
+
|
552
|
+
if not model_instance:
|
553
|
+
return {
|
554
|
+
'success': False,
|
555
|
+
'errors': ['Model instance not available']
|
556
|
+
}
|
557
|
+
|
558
|
+
# Make predictions
|
559
|
+
predictions = model_instance.predict(X_input)
|
560
|
+
|
561
|
+
result = {
|
562
|
+
'success': True,
|
563
|
+
'predictions': predictions.tolist() if hasattr(predictions, 'tolist') else predictions,
|
564
|
+
'prediction_count': len(predictions) if hasattr(predictions, '__len__') else 1,
|
565
|
+
'prediction_type': 'batch' if len(X_input) > 1 else 'single'
|
566
|
+
}
|
567
|
+
|
568
|
+
# Add confidence scores if available
|
569
|
+
if config.get('include_probabilities', False) and hasattr(model_instance, 'predict_proba'):
|
570
|
+
try:
|
571
|
+
probabilities = model_instance.predict_proba(X_input)
|
572
|
+
result['confidence_scores'] = probabilities.tolist()
|
573
|
+
except:
|
574
|
+
pass # Skip if not applicable
|
575
|
+
|
576
|
+
return result
|
577
|
+
|
578
|
+
except Exception as e:
|
579
|
+
return {
|
580
|
+
'success': False,
|
581
|
+
'errors': [f'Prediction execution failed: {str(e)}']
|
582
|
+
}
|
583
|
+
|
584
|
+
def _process_large_batch(self,
|
585
|
+
model_id: str,
|
586
|
+
input_data: pd.DataFrame,
|
587
|
+
batch_size: int,
|
588
|
+
config: Dict[str, Any]) -> ServingResult:
|
589
|
+
"""Process large datasets in batches"""
|
590
|
+
all_predictions = []
|
591
|
+
total_batches = (len(input_data) + batch_size - 1) // batch_size
|
592
|
+
|
593
|
+
start_time = datetime.now()
|
594
|
+
|
595
|
+
try:
|
596
|
+
for i in range(0, len(input_data), batch_size):
|
597
|
+
batch_data = input_data.iloc[i:i+batch_size]
|
598
|
+
|
599
|
+
batch_result = self.predict(model_id, batch_data, config)
|
600
|
+
|
601
|
+
if batch_result.success:
|
602
|
+
all_predictions.extend(batch_result.predictions)
|
603
|
+
else:
|
604
|
+
return ServingResult(
|
605
|
+
success=False,
|
606
|
+
errors=[f"Batch {i//batch_size + 1} failed: {batch_result.errors}"]
|
607
|
+
)
|
608
|
+
|
609
|
+
return ServingResult(
|
610
|
+
success=True,
|
611
|
+
predictions=all_predictions,
|
612
|
+
serving_info={
|
613
|
+
'model_id': model_id,
|
614
|
+
'total_predictions': len(all_predictions),
|
615
|
+
'batch_count': total_batches,
|
616
|
+
'batch_size': batch_size
|
617
|
+
},
|
618
|
+
performance_metrics={
|
619
|
+
'total_duration': (datetime.now() - start_time).total_seconds(),
|
620
|
+
'predictions_per_second': len(all_predictions) / max((datetime.now() - start_time).total_seconds(), 0.001)
|
621
|
+
}
|
622
|
+
)
|
623
|
+
|
624
|
+
except Exception as e:
|
625
|
+
return ServingResult(
|
626
|
+
success=False,
|
627
|
+
errors=[f"Batch processing failed: {str(e)}"]
|
628
|
+
)
|
629
|
+
|
630
|
+
def _update_prediction_statistics(self,
|
631
|
+
model_id: str,
|
632
|
+
success: bool,
|
633
|
+
start_time: datetime):
|
634
|
+
"""Update prediction statistics"""
|
635
|
+
if model_id in self.prediction_history:
|
636
|
+
history = self.prediction_history[model_id]
|
637
|
+
history['total_predictions'] += 1
|
638
|
+
history['last_prediction'] = datetime.now()
|
639
|
+
|
640
|
+
if success:
|
641
|
+
history['successful_predictions'] += 1
|
642
|
+
else:
|
643
|
+
history['failed_predictions'] += 1
|
644
|
+
|
645
|
+
# Update average prediction time
|
646
|
+
duration = (datetime.now() - start_time).total_seconds()
|
647
|
+
old_avg = history['performance_metrics'].get('average_prediction_time', 0)
|
648
|
+
total = history['total_predictions']
|
649
|
+
history['performance_metrics']['average_prediction_time'] = (old_avg * (total - 1) + duration) / total
|
650
|
+
|
651
|
+
def _calculate_success_rate(self, history: Dict[str, Any]) -> float:
|
652
|
+
"""Calculate success rate for predictions"""
|
653
|
+
total = history.get('total_predictions', 0)
|
654
|
+
successful = history.get('successful_predictions', 0)
|
655
|
+
return successful / max(total, 1)
|
656
|
+
|
657
|
+
def _finalize_serving_result(self,
|
658
|
+
result: ServingResult,
|
659
|
+
start_time: datetime) -> ServingResult:
|
660
|
+
"""Finalize serving result with timing and stats"""
|
661
|
+
end_time = datetime.now()
|
662
|
+
duration = (end_time - start_time).total_seconds()
|
663
|
+
|
664
|
+
# Update performance metrics
|
665
|
+
result.performance_metrics['serving_duration_seconds'] = duration
|
666
|
+
result.performance_metrics['end_time'] = end_time
|
667
|
+
result.serving_metadata['end_time'] = end_time
|
668
|
+
result.serving_metadata['duration_seconds'] = duration
|
669
|
+
|
670
|
+
# Update execution stats
|
671
|
+
self.execution_stats['total_serving_operations'] += 1
|
672
|
+
if result.success:
|
673
|
+
self.execution_stats['successful_serving_operations'] += 1
|
674
|
+
|
675
|
+
# Count predictions
|
676
|
+
if result.predictions is not None:
|
677
|
+
if hasattr(result.predictions, '__len__'):
|
678
|
+
self.execution_stats['total_predictions_made'] += len(result.predictions)
|
679
|
+
else:
|
680
|
+
self.execution_stats['total_predictions_made'] += 1
|
681
|
+
else:
|
682
|
+
self.execution_stats['failed_serving_operations'] += 1
|
683
|
+
|
684
|
+
# Update average prediction time
|
685
|
+
total = self.execution_stats['total_serving_operations']
|
686
|
+
old_avg = self.execution_stats['average_prediction_time']
|
687
|
+
self.execution_stats['average_prediction_time'] = (old_avg * (total - 1) + duration) / total
|
688
|
+
|
689
|
+
logger.info(f"Serving completed: success={result.success}, duration={duration:.2f}s")
|
690
|
+
return result
|
691
|
+
|
692
|
+
def get_execution_stats(self) -> Dict[str, Any]:
|
693
|
+
"""Get service execution statistics"""
|
694
|
+
return {
|
695
|
+
**self.execution_stats,
|
696
|
+
'success_rate': (
|
697
|
+
self.execution_stats['successful_serving_operations'] /
|
698
|
+
max(1, self.execution_stats['total_serving_operations'])
|
699
|
+
),
|
700
|
+
'average_predictions_per_operation': (
|
701
|
+
self.execution_stats['total_predictions_made'] /
|
702
|
+
max(1, self.execution_stats['successful_serving_operations'])
|
703
|
+
)
|
704
|
+
}
|
705
|
+
|
706
|
+
def cleanup(self):
|
707
|
+
"""Cleanup serving resources"""
|
708
|
+
try:
|
709
|
+
# Clear model cache
|
710
|
+
self.model_cache.clear()
|
711
|
+
|
712
|
+
# Shutdown thread pool
|
713
|
+
self.thread_pool.shutdown(wait=True)
|
714
|
+
|
715
|
+
logger.info("Model Serving Service cleanup completed")
|
716
|
+
except Exception as e:
|
717
|
+
logger.warning(f"Serving service cleanup warning: {e}")
|