isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,338 +0,0 @@
1
- """
2
- Runtime Management for Self-Owned Deployed Services
3
-
4
- This module manages the runtime aspects of self-owned deployed model services.
5
- It does NOT handle third-party API services (OpenAI, Replicate) - those are
6
- managed in the inference module.
7
-
8
- Only for services deployed by ISADeploymentService or similar self-owned deployments.
9
- """
10
-
11
- import asyncio
12
- import logging
13
- import time
14
- from typing import Dict, List, Optional, Any, Union
15
- from dataclasses import dataclass, field
16
- from datetime import datetime, timedelta
17
- import httpx
18
- from pathlib import Path
19
-
20
- from ...core.types import (
21
- ServiceStatus,
22
- DeploymentPlatform,
23
- HealthMetrics,
24
- ServiceMetrics,
25
- ResourceRequirements
26
- )
27
-
28
- logger = logging.getLogger(__name__)
29
-
30
-
31
- @dataclass
32
- class DeployedService:
33
- """Runtime information for a self-owned deployed service"""
34
- service_id: str
35
- deployment_id: str
36
- model_id: str
37
- platform: DeploymentPlatform
38
- endpoint_url: str
39
- status: ServiceStatus = ServiceStatus.PENDING
40
- health_check_url: Optional[str] = None
41
- api_key: Optional[str] = None
42
- resource_requirements: Optional[ResourceRequirements] = None
43
- metadata: Dict[str, Any] = field(default_factory=dict)
44
- created_at: datetime = field(default_factory=datetime.now)
45
- last_health_check: Optional[datetime] = None
46
- health_metrics: Optional[HealthMetrics] = None
47
- service_metrics: Optional[ServiceMetrics] = None
48
-
49
-
50
- class DeployedServiceManager:
51
- """
52
- Manages runtime aspects of self-owned deployed services.
53
-
54
- Features:
55
- - Health monitoring for deployed services
56
- - Service discovery and status tracking
57
- - Runtime metrics collection
58
- - Service lifecycle management
59
-
60
- Example:
61
- ```python
62
- from isa_model.deployment.runtime import DeployedServiceManager
63
-
64
- manager = DeployedServiceManager()
65
-
66
- # Register a newly deployed service
67
- service = await manager.register_deployed_service(
68
- service_id="gemma-4b-alpaca-v1-prod",
69
- deployment_id="gemma-4b-alpaca-v1-int8-20241230-143022",
70
- model_id="gemma-4b-alpaca-v1",
71
- platform=DeploymentPlatform.RUNPOD,
72
- endpoint_url="https://api.runpod.ai/v2/xyz123/inference"
73
- )
74
-
75
- # Monitor health
76
- health = await manager.check_service_health(service.service_id)
77
- ```
78
- """
79
-
80
- def __init__(self, storage_backend: str = "local"):
81
- """Initialize deployed service manager"""
82
- self.storage_backend = storage_backend
83
- self.services: Dict[str, DeployedService] = {}
84
- self.health_check_interval = 60 # seconds
85
- self.health_check_timeout = 30 # seconds
86
- self._monitoring_tasks: Dict[str, asyncio.Task] = {}
87
-
88
- logger.info(f"DeployedServiceManager initialized with {storage_backend} backend")
89
-
90
- async def register_deployed_service(self,
91
- service_id: str,
92
- deployment_id: str,
93
- model_id: str,
94
- platform: DeploymentPlatform,
95
- endpoint_url: str,
96
- health_check_url: Optional[str] = None,
97
- api_key: Optional[str] = None,
98
- resource_requirements: Optional[ResourceRequirements] = None,
99
- metadata: Optional[Dict[str, Any]] = None) -> DeployedService:
100
- """Register a newly deployed self-owned service"""
101
-
102
- if health_check_url is None:
103
- # Try common health check patterns
104
- if endpoint_url.endswith('/'):
105
- health_check_url = f"{endpoint_url}health"
106
- else:
107
- health_check_url = f"{endpoint_url}/health"
108
-
109
- service = DeployedService(
110
- service_id=service_id,
111
- deployment_id=deployment_id,
112
- model_id=model_id,
113
- platform=platform,
114
- endpoint_url=endpoint_url,
115
- health_check_url=health_check_url,
116
- api_key=api_key,
117
- resource_requirements=resource_requirements,
118
- metadata=metadata or {},
119
- status=ServiceStatus.DEPLOYING
120
- )
121
-
122
- self.services[service_id] = service
123
-
124
- # Start health monitoring
125
- await self._start_health_monitoring(service_id)
126
-
127
- logger.info(f"Registered deployed service: {service_id} on {platform.value}")
128
- return service
129
-
130
- async def get_service(self, service_id: str) -> Optional[DeployedService]:
131
- """Get service information"""
132
- return self.services.get(service_id)
133
-
134
- async def list_services(self,
135
- platform: Optional[DeploymentPlatform] = None,
136
- status: Optional[ServiceStatus] = None) -> List[DeployedService]:
137
- """List deployed services with optional filtering"""
138
- services = list(self.services.values())
139
-
140
- if platform:
141
- services = [s for s in services if s.platform == platform]
142
-
143
- if status:
144
- services = [s for s in services if s.status == status]
145
-
146
- return services
147
-
148
- async def check_service_health(self, service_id: str) -> Optional[HealthMetrics]:
149
- """Perform health check on a specific service"""
150
- service = self.services.get(service_id)
151
- if not service or not service.health_check_url:
152
- return None
153
-
154
- start_time = time.time()
155
-
156
- try:
157
- async with httpx.AsyncClient(timeout=self.health_check_timeout) as client:
158
- headers = {}
159
- if service.api_key:
160
- headers["Authorization"] = f"Bearer {service.api_key}"
161
-
162
- response = await client.get(service.health_check_url, headers=headers)
163
-
164
- response_time_ms = int((time.time() - start_time) * 1000)
165
-
166
- is_healthy = response.status_code == 200
167
-
168
- # Try to extract additional metrics from response
169
- metrics_data = {}
170
- try:
171
- if response.headers.get('content-type', '').startswith('application/json'):
172
- metrics_data = response.json()
173
- except:
174
- pass
175
-
176
- health_metrics = HealthMetrics(
177
- is_healthy=is_healthy,
178
- response_time_ms=response_time_ms,
179
- status_code=response.status_code,
180
- cpu_usage_percent=metrics_data.get('cpu_usage'),
181
- memory_usage_mb=metrics_data.get('memory_usage_mb'),
182
- gpu_usage_percent=metrics_data.get('gpu_usage'),
183
- error_message=None if is_healthy else f"HTTP {response.status_code}",
184
- checked_at=datetime.now()
185
- )
186
-
187
- # Update service status based on health
188
- if is_healthy and service.status == ServiceStatus.DEPLOYING:
189
- service.status = ServiceStatus.HEALTHY
190
- elif not is_healthy and service.status == ServiceStatus.HEALTHY:
191
- service.status = ServiceStatus.UNHEALTHY
192
-
193
- service.last_health_check = datetime.now()
194
- service.health_metrics = health_metrics
195
-
196
- return health_metrics
197
-
198
- except Exception as e:
199
- logger.error(f"Health check failed for {service_id}: {e}")
200
-
201
- error_metrics = HealthMetrics(
202
- is_healthy=False,
203
- response_time_ms=int((time.time() - start_time) * 1000),
204
- error_message=str(e),
205
- checked_at=datetime.now()
206
- )
207
-
208
- service.status = ServiceStatus.UNHEALTHY
209
- service.last_health_check = datetime.now()
210
- service.health_metrics = error_metrics
211
-
212
- return error_metrics
213
-
214
- async def update_service_metrics(self,
215
- service_id: str,
216
- request_count: int = 0,
217
- processing_time_ms: int = 0,
218
- error_count: int = 0,
219
- cost_usd: float = 0.0):
220
- """Update service runtime metrics"""
221
- service = self.services.get(service_id)
222
- if not service:
223
- return
224
-
225
- if not service.service_metrics:
226
- service.service_metrics = ServiceMetrics(
227
- window_start=datetime.now()
228
- )
229
-
230
- service.service_metrics.request_count += request_count
231
- service.service_metrics.total_processing_time_ms += processing_time_ms
232
- service.service_metrics.error_count += error_count
233
- service.service_metrics.total_cost_usd += cost_usd
234
- service.service_metrics.window_end = datetime.now()
235
-
236
- async def stop_service(self, service_id: str) -> bool:
237
- """Stop a deployed service and cleanup resources"""
238
- service = self.services.get(service_id)
239
- if not service:
240
- return False
241
-
242
- # Stop health monitoring
243
- await self._stop_health_monitoring(service_id)
244
-
245
- # Update status
246
- service.status = ServiceStatus.STOPPED
247
-
248
- # Note: Actual service termination would depend on the platform
249
- # For RunPod, Modal, etc., we would call their respective APIs
250
-
251
- logger.info(f"Stopped service: {service_id}")
252
- return True
253
-
254
- async def remove_service(self, service_id: str) -> bool:
255
- """Remove service from registry"""
256
- if service_id in self.services:
257
- await self._stop_health_monitoring(service_id)
258
- del self.services[service_id]
259
- logger.info(f"Removed service: {service_id}")
260
- return True
261
- return False
262
-
263
- async def _start_health_monitoring(self, service_id: str):
264
- """Start background health monitoring for a service"""
265
- if service_id in self._monitoring_tasks:
266
- return # Already monitoring
267
-
268
- async def health_monitor():
269
- while service_id in self.services:
270
- try:
271
- await self.check_service_health(service_id)
272
- await asyncio.sleep(self.health_check_interval)
273
- except asyncio.CancelledError:
274
- break
275
- except Exception as e:
276
- logger.error(f"Health monitoring error for {service_id}: {e}")
277
- await asyncio.sleep(self.health_check_interval)
278
-
279
- task = asyncio.create_task(health_monitor())
280
- self._monitoring_tasks[service_id] = task
281
- logger.info(f"Started health monitoring for {service_id}")
282
-
283
- async def _stop_health_monitoring(self, service_id: str):
284
- """Stop health monitoring for a service"""
285
- if service_id in self._monitoring_tasks:
286
- task = self._monitoring_tasks.pop(service_id)
287
- task.cancel()
288
- try:
289
- await task
290
- except asyncio.CancelledError:
291
- pass
292
- logger.info(f"Stopped health monitoring for {service_id}")
293
-
294
- async def get_service_status_summary(self) -> Dict[str, Any]:
295
- """Get summary of all deployed services"""
296
- summary = {
297
- "total_services": len(self.services),
298
- "healthy_services": 0,
299
- "unhealthy_services": 0,
300
- "deploying_services": 0,
301
- "stopped_services": 0,
302
- "platforms": {},
303
- "last_updated": datetime.now().isoformat()
304
- }
305
-
306
- for service in self.services.values():
307
- # Count by status
308
- if service.status == ServiceStatus.HEALTHY:
309
- summary["healthy_services"] += 1
310
- elif service.status == ServiceStatus.UNHEALTHY:
311
- summary["unhealthy_services"] += 1
312
- elif service.status == ServiceStatus.DEPLOYING:
313
- summary["deploying_services"] += 1
314
- elif service.status == ServiceStatus.STOPPED:
315
- summary["stopped_services"] += 1
316
-
317
- # Count by platform
318
- platform = service.platform.value
319
- summary["platforms"][platform] = summary["platforms"].get(platform, 0) + 1
320
-
321
- return summary
322
-
323
- async def cleanup_old_services(self, max_age_hours: int = 24):
324
- """Remove services that haven't been healthy for a specified time"""
325
- cutoff_time = datetime.now() - timedelta(hours=max_age_hours)
326
-
327
- services_to_remove = []
328
- for service_id, service in self.services.items():
329
- if (service.status == ServiceStatus.STOPPED and
330
- service.last_health_check and
331
- service.last_health_check < cutoff_time):
332
- services_to_remove.append(service_id)
333
-
334
- for service_id in services_to_remove:
335
- await self.remove_service(service_id)
336
-
337
- logger.info(f"Cleaned up {len(services_to_remove)} old services")
338
- return len(services_to_remove)
@@ -1,9 +0,0 @@
1
- """
2
- Deployment Services
3
-
4
- This module contains services for automated deployment and management of AI models.
5
- """
6
-
7
- from .auto_deploy_vision_service import AutoDeployVisionService
8
-
9
- __all__ = ['AutoDeployVisionService']