isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,338 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Runtime Management for Self-Owned Deployed Services
|
3
|
-
|
4
|
-
This module manages the runtime aspects of self-owned deployed model services.
|
5
|
-
It does NOT handle third-party API services (OpenAI, Replicate) - those are
|
6
|
-
managed in the inference module.
|
7
|
-
|
8
|
-
Only for services deployed by ISADeploymentService or similar self-owned deployments.
|
9
|
-
"""
|
10
|
-
|
11
|
-
import asyncio
|
12
|
-
import logging
|
13
|
-
import time
|
14
|
-
from typing import Dict, List, Optional, Any, Union
|
15
|
-
from dataclasses import dataclass, field
|
16
|
-
from datetime import datetime, timedelta
|
17
|
-
import httpx
|
18
|
-
from pathlib import Path
|
19
|
-
|
20
|
-
from ...core.types import (
|
21
|
-
ServiceStatus,
|
22
|
-
DeploymentPlatform,
|
23
|
-
HealthMetrics,
|
24
|
-
ServiceMetrics,
|
25
|
-
ResourceRequirements
|
26
|
-
)
|
27
|
-
|
28
|
-
logger = logging.getLogger(__name__)
|
29
|
-
|
30
|
-
|
31
|
-
@dataclass
|
32
|
-
class DeployedService:
|
33
|
-
"""Runtime information for a self-owned deployed service"""
|
34
|
-
service_id: str
|
35
|
-
deployment_id: str
|
36
|
-
model_id: str
|
37
|
-
platform: DeploymentPlatform
|
38
|
-
endpoint_url: str
|
39
|
-
status: ServiceStatus = ServiceStatus.PENDING
|
40
|
-
health_check_url: Optional[str] = None
|
41
|
-
api_key: Optional[str] = None
|
42
|
-
resource_requirements: Optional[ResourceRequirements] = None
|
43
|
-
metadata: Dict[str, Any] = field(default_factory=dict)
|
44
|
-
created_at: datetime = field(default_factory=datetime.now)
|
45
|
-
last_health_check: Optional[datetime] = None
|
46
|
-
health_metrics: Optional[HealthMetrics] = None
|
47
|
-
service_metrics: Optional[ServiceMetrics] = None
|
48
|
-
|
49
|
-
|
50
|
-
class DeployedServiceManager:
|
51
|
-
"""
|
52
|
-
Manages runtime aspects of self-owned deployed services.
|
53
|
-
|
54
|
-
Features:
|
55
|
-
- Health monitoring for deployed services
|
56
|
-
- Service discovery and status tracking
|
57
|
-
- Runtime metrics collection
|
58
|
-
- Service lifecycle management
|
59
|
-
|
60
|
-
Example:
|
61
|
-
```python
|
62
|
-
from isa_model.deployment.runtime import DeployedServiceManager
|
63
|
-
|
64
|
-
manager = DeployedServiceManager()
|
65
|
-
|
66
|
-
# Register a newly deployed service
|
67
|
-
service = await manager.register_deployed_service(
|
68
|
-
service_id="gemma-4b-alpaca-v1-prod",
|
69
|
-
deployment_id="gemma-4b-alpaca-v1-int8-20241230-143022",
|
70
|
-
model_id="gemma-4b-alpaca-v1",
|
71
|
-
platform=DeploymentPlatform.RUNPOD,
|
72
|
-
endpoint_url="https://api.runpod.ai/v2/xyz123/inference"
|
73
|
-
)
|
74
|
-
|
75
|
-
# Monitor health
|
76
|
-
health = await manager.check_service_health(service.service_id)
|
77
|
-
```
|
78
|
-
"""
|
79
|
-
|
80
|
-
def __init__(self, storage_backend: str = "local"):
|
81
|
-
"""Initialize deployed service manager"""
|
82
|
-
self.storage_backend = storage_backend
|
83
|
-
self.services: Dict[str, DeployedService] = {}
|
84
|
-
self.health_check_interval = 60 # seconds
|
85
|
-
self.health_check_timeout = 30 # seconds
|
86
|
-
self._monitoring_tasks: Dict[str, asyncio.Task] = {}
|
87
|
-
|
88
|
-
logger.info(f"DeployedServiceManager initialized with {storage_backend} backend")
|
89
|
-
|
90
|
-
async def register_deployed_service(self,
|
91
|
-
service_id: str,
|
92
|
-
deployment_id: str,
|
93
|
-
model_id: str,
|
94
|
-
platform: DeploymentPlatform,
|
95
|
-
endpoint_url: str,
|
96
|
-
health_check_url: Optional[str] = None,
|
97
|
-
api_key: Optional[str] = None,
|
98
|
-
resource_requirements: Optional[ResourceRequirements] = None,
|
99
|
-
metadata: Optional[Dict[str, Any]] = None) -> DeployedService:
|
100
|
-
"""Register a newly deployed self-owned service"""
|
101
|
-
|
102
|
-
if health_check_url is None:
|
103
|
-
# Try common health check patterns
|
104
|
-
if endpoint_url.endswith('/'):
|
105
|
-
health_check_url = f"{endpoint_url}health"
|
106
|
-
else:
|
107
|
-
health_check_url = f"{endpoint_url}/health"
|
108
|
-
|
109
|
-
service = DeployedService(
|
110
|
-
service_id=service_id,
|
111
|
-
deployment_id=deployment_id,
|
112
|
-
model_id=model_id,
|
113
|
-
platform=platform,
|
114
|
-
endpoint_url=endpoint_url,
|
115
|
-
health_check_url=health_check_url,
|
116
|
-
api_key=api_key,
|
117
|
-
resource_requirements=resource_requirements,
|
118
|
-
metadata=metadata or {},
|
119
|
-
status=ServiceStatus.DEPLOYING
|
120
|
-
)
|
121
|
-
|
122
|
-
self.services[service_id] = service
|
123
|
-
|
124
|
-
# Start health monitoring
|
125
|
-
await self._start_health_monitoring(service_id)
|
126
|
-
|
127
|
-
logger.info(f"Registered deployed service: {service_id} on {platform.value}")
|
128
|
-
return service
|
129
|
-
|
130
|
-
async def get_service(self, service_id: str) -> Optional[DeployedService]:
|
131
|
-
"""Get service information"""
|
132
|
-
return self.services.get(service_id)
|
133
|
-
|
134
|
-
async def list_services(self,
|
135
|
-
platform: Optional[DeploymentPlatform] = None,
|
136
|
-
status: Optional[ServiceStatus] = None) -> List[DeployedService]:
|
137
|
-
"""List deployed services with optional filtering"""
|
138
|
-
services = list(self.services.values())
|
139
|
-
|
140
|
-
if platform:
|
141
|
-
services = [s for s in services if s.platform == platform]
|
142
|
-
|
143
|
-
if status:
|
144
|
-
services = [s for s in services if s.status == status]
|
145
|
-
|
146
|
-
return services
|
147
|
-
|
148
|
-
async def check_service_health(self, service_id: str) -> Optional[HealthMetrics]:
|
149
|
-
"""Perform health check on a specific service"""
|
150
|
-
service = self.services.get(service_id)
|
151
|
-
if not service or not service.health_check_url:
|
152
|
-
return None
|
153
|
-
|
154
|
-
start_time = time.time()
|
155
|
-
|
156
|
-
try:
|
157
|
-
async with httpx.AsyncClient(timeout=self.health_check_timeout) as client:
|
158
|
-
headers = {}
|
159
|
-
if service.api_key:
|
160
|
-
headers["Authorization"] = f"Bearer {service.api_key}"
|
161
|
-
|
162
|
-
response = await client.get(service.health_check_url, headers=headers)
|
163
|
-
|
164
|
-
response_time_ms = int((time.time() - start_time) * 1000)
|
165
|
-
|
166
|
-
is_healthy = response.status_code == 200
|
167
|
-
|
168
|
-
# Try to extract additional metrics from response
|
169
|
-
metrics_data = {}
|
170
|
-
try:
|
171
|
-
if response.headers.get('content-type', '').startswith('application/json'):
|
172
|
-
metrics_data = response.json()
|
173
|
-
except:
|
174
|
-
pass
|
175
|
-
|
176
|
-
health_metrics = HealthMetrics(
|
177
|
-
is_healthy=is_healthy,
|
178
|
-
response_time_ms=response_time_ms,
|
179
|
-
status_code=response.status_code,
|
180
|
-
cpu_usage_percent=metrics_data.get('cpu_usage'),
|
181
|
-
memory_usage_mb=metrics_data.get('memory_usage_mb'),
|
182
|
-
gpu_usage_percent=metrics_data.get('gpu_usage'),
|
183
|
-
error_message=None if is_healthy else f"HTTP {response.status_code}",
|
184
|
-
checked_at=datetime.now()
|
185
|
-
)
|
186
|
-
|
187
|
-
# Update service status based on health
|
188
|
-
if is_healthy and service.status == ServiceStatus.DEPLOYING:
|
189
|
-
service.status = ServiceStatus.HEALTHY
|
190
|
-
elif not is_healthy and service.status == ServiceStatus.HEALTHY:
|
191
|
-
service.status = ServiceStatus.UNHEALTHY
|
192
|
-
|
193
|
-
service.last_health_check = datetime.now()
|
194
|
-
service.health_metrics = health_metrics
|
195
|
-
|
196
|
-
return health_metrics
|
197
|
-
|
198
|
-
except Exception as e:
|
199
|
-
logger.error(f"Health check failed for {service_id}: {e}")
|
200
|
-
|
201
|
-
error_metrics = HealthMetrics(
|
202
|
-
is_healthy=False,
|
203
|
-
response_time_ms=int((time.time() - start_time) * 1000),
|
204
|
-
error_message=str(e),
|
205
|
-
checked_at=datetime.now()
|
206
|
-
)
|
207
|
-
|
208
|
-
service.status = ServiceStatus.UNHEALTHY
|
209
|
-
service.last_health_check = datetime.now()
|
210
|
-
service.health_metrics = error_metrics
|
211
|
-
|
212
|
-
return error_metrics
|
213
|
-
|
214
|
-
async def update_service_metrics(self,
|
215
|
-
service_id: str,
|
216
|
-
request_count: int = 0,
|
217
|
-
processing_time_ms: int = 0,
|
218
|
-
error_count: int = 0,
|
219
|
-
cost_usd: float = 0.0):
|
220
|
-
"""Update service runtime metrics"""
|
221
|
-
service = self.services.get(service_id)
|
222
|
-
if not service:
|
223
|
-
return
|
224
|
-
|
225
|
-
if not service.service_metrics:
|
226
|
-
service.service_metrics = ServiceMetrics(
|
227
|
-
window_start=datetime.now()
|
228
|
-
)
|
229
|
-
|
230
|
-
service.service_metrics.request_count += request_count
|
231
|
-
service.service_metrics.total_processing_time_ms += processing_time_ms
|
232
|
-
service.service_metrics.error_count += error_count
|
233
|
-
service.service_metrics.total_cost_usd += cost_usd
|
234
|
-
service.service_metrics.window_end = datetime.now()
|
235
|
-
|
236
|
-
async def stop_service(self, service_id: str) -> bool:
|
237
|
-
"""Stop a deployed service and cleanup resources"""
|
238
|
-
service = self.services.get(service_id)
|
239
|
-
if not service:
|
240
|
-
return False
|
241
|
-
|
242
|
-
# Stop health monitoring
|
243
|
-
await self._stop_health_monitoring(service_id)
|
244
|
-
|
245
|
-
# Update status
|
246
|
-
service.status = ServiceStatus.STOPPED
|
247
|
-
|
248
|
-
# Note: Actual service termination would depend on the platform
|
249
|
-
# For RunPod, Modal, etc., we would call their respective APIs
|
250
|
-
|
251
|
-
logger.info(f"Stopped service: {service_id}")
|
252
|
-
return True
|
253
|
-
|
254
|
-
async def remove_service(self, service_id: str) -> bool:
|
255
|
-
"""Remove service from registry"""
|
256
|
-
if service_id in self.services:
|
257
|
-
await self._stop_health_monitoring(service_id)
|
258
|
-
del self.services[service_id]
|
259
|
-
logger.info(f"Removed service: {service_id}")
|
260
|
-
return True
|
261
|
-
return False
|
262
|
-
|
263
|
-
async def _start_health_monitoring(self, service_id: str):
|
264
|
-
"""Start background health monitoring for a service"""
|
265
|
-
if service_id in self._monitoring_tasks:
|
266
|
-
return # Already monitoring
|
267
|
-
|
268
|
-
async def health_monitor():
|
269
|
-
while service_id in self.services:
|
270
|
-
try:
|
271
|
-
await self.check_service_health(service_id)
|
272
|
-
await asyncio.sleep(self.health_check_interval)
|
273
|
-
except asyncio.CancelledError:
|
274
|
-
break
|
275
|
-
except Exception as e:
|
276
|
-
logger.error(f"Health monitoring error for {service_id}: {e}")
|
277
|
-
await asyncio.sleep(self.health_check_interval)
|
278
|
-
|
279
|
-
task = asyncio.create_task(health_monitor())
|
280
|
-
self._monitoring_tasks[service_id] = task
|
281
|
-
logger.info(f"Started health monitoring for {service_id}")
|
282
|
-
|
283
|
-
async def _stop_health_monitoring(self, service_id: str):
|
284
|
-
"""Stop health monitoring for a service"""
|
285
|
-
if service_id in self._monitoring_tasks:
|
286
|
-
task = self._monitoring_tasks.pop(service_id)
|
287
|
-
task.cancel()
|
288
|
-
try:
|
289
|
-
await task
|
290
|
-
except asyncio.CancelledError:
|
291
|
-
pass
|
292
|
-
logger.info(f"Stopped health monitoring for {service_id}")
|
293
|
-
|
294
|
-
async def get_service_status_summary(self) -> Dict[str, Any]:
|
295
|
-
"""Get summary of all deployed services"""
|
296
|
-
summary = {
|
297
|
-
"total_services": len(self.services),
|
298
|
-
"healthy_services": 0,
|
299
|
-
"unhealthy_services": 0,
|
300
|
-
"deploying_services": 0,
|
301
|
-
"stopped_services": 0,
|
302
|
-
"platforms": {},
|
303
|
-
"last_updated": datetime.now().isoformat()
|
304
|
-
}
|
305
|
-
|
306
|
-
for service in self.services.values():
|
307
|
-
# Count by status
|
308
|
-
if service.status == ServiceStatus.HEALTHY:
|
309
|
-
summary["healthy_services"] += 1
|
310
|
-
elif service.status == ServiceStatus.UNHEALTHY:
|
311
|
-
summary["unhealthy_services"] += 1
|
312
|
-
elif service.status == ServiceStatus.DEPLOYING:
|
313
|
-
summary["deploying_services"] += 1
|
314
|
-
elif service.status == ServiceStatus.STOPPED:
|
315
|
-
summary["stopped_services"] += 1
|
316
|
-
|
317
|
-
# Count by platform
|
318
|
-
platform = service.platform.value
|
319
|
-
summary["platforms"][platform] = summary["platforms"].get(platform, 0) + 1
|
320
|
-
|
321
|
-
return summary
|
322
|
-
|
323
|
-
async def cleanup_old_services(self, max_age_hours: int = 24):
|
324
|
-
"""Remove services that haven't been healthy for a specified time"""
|
325
|
-
cutoff_time = datetime.now() - timedelta(hours=max_age_hours)
|
326
|
-
|
327
|
-
services_to_remove = []
|
328
|
-
for service_id, service in self.services.items():
|
329
|
-
if (service.status == ServiceStatus.STOPPED and
|
330
|
-
service.last_health_check and
|
331
|
-
service.last_health_check < cutoff_time):
|
332
|
-
services_to_remove.append(service_id)
|
333
|
-
|
334
|
-
for service_id in services_to_remove:
|
335
|
-
await self.remove_service(service_id)
|
336
|
-
|
337
|
-
logger.info(f"Cleaned up {len(services_to_remove)} old services")
|
338
|
-
return len(services_to_remove)
|