isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +937 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
  15. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  16. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
  17. isa_model/deployment/cloud/modal/register_models.py +321 -0
  18. isa_model/deployment/runtime/deployed_service.py +338 -0
  19. isa_model/deployment/services/__init__.py +9 -0
  20. isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
  21. isa_model/deployment/services/model_service.py +332 -0
  22. isa_model/deployment/services/service_monitor.py +356 -0
  23. isa_model/deployment/services/service_registry.py +527 -0
  24. isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
  25. isa_model/eval/__init__.py +80 -44
  26. isa_model/eval/config/__init__.py +10 -0
  27. isa_model/eval/config/evaluation_config.py +108 -0
  28. isa_model/eval/evaluators/__init__.py +18 -0
  29. isa_model/eval/evaluators/base_evaluator.py +503 -0
  30. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  31. isa_model/eval/factory.py +417 -709
  32. isa_model/eval/infrastructure/__init__.py +24 -0
  33. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  34. isa_model/eval/metrics.py +191 -21
  35. isa_model/inference/ai_factory.py +257 -601
  36. isa_model/inference/services/audio/base_stt_service.py +65 -1
  37. isa_model/inference/services/audio/base_tts_service.py +75 -1
  38. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  39. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  40. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  41. isa_model/inference/services/base_service.py +55 -17
  42. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  43. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  44. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  45. isa_model/inference/services/helpers/stacked_config.py +148 -0
  46. isa_model/inference/services/img/__init__.py +18 -0
  47. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
  48. isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
  49. isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
  50. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
  51. isa_model/inference/services/llm/__init__.py +3 -3
  52. isa_model/inference/services/llm/base_llm_service.py +492 -40
  53. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  54. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  55. isa_model/inference/services/llm/ollama_llm_service.py +51 -17
  56. isa_model/inference/services/llm/openai_llm_service.py +70 -19
  57. isa_model/inference/services/llm/yyds_llm_service.py +24 -23
  58. isa_model/inference/services/vision/__init__.py +38 -4
  59. isa_model/inference/services/vision/base_vision_service.py +218 -117
  60. isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
  61. isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
  62. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  63. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  64. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  65. isa_model/inference/services/vision/openai_vision_service.py +104 -307
  66. isa_model/inference/services/vision/replicate_vision_service.py +140 -325
  67. isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
  68. isa_model/scripts/register_models.py +370 -0
  69. isa_model/scripts/register_models_with_embeddings.py +510 -0
  70. isa_model/serving/api/fastapi_server.py +6 -1
  71. isa_model/serving/api/routes/unified.py +274 -0
  72. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
  73. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
  74. isa_model/config/__init__.py +0 -9
  75. isa_model/config/config_manager.py +0 -213
  76. isa_model/core/model_manager.py +0 -213
  77. isa_model/core/model_registry.py +0 -375
  78. isa_model/core/vision_models_init.py +0 -116
  79. isa_model/inference/billing_tracker.py +0 -406
  80. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  81. isa_model/inference/services/stacked/__init__.py +0 -26
  82. isa_model/inference/services/stacked/config.py +0 -426
  83. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  84. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  85. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  86. /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
  87. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
  88. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,321 @@
1
+ """
2
+ Model Registration Script for UI Analysis Pipeline
3
+
4
+ Registers the latest versions of UI analysis models in the core model registry
5
+ Prepares models for Modal deployment with proper version management
6
+ """
7
+
8
+ import asyncio
9
+ from pathlib import Path
10
+ import sys
11
+ import os
12
+
13
+ # Add project root to path
14
+ project_root = Path(__file__).parent.parent.parent.parent
15
+ sys.path.insert(0, str(project_root))
16
+
17
+ from isa_model.core.model_manager import ModelManager
18
+ from isa_model.core.model_repo import ModelRegistry, ModelType, ModelCapability
19
+
20
+ async def register_ui_analysis_models():
21
+ """Register UI analysis models with latest versions"""
22
+
23
+ # Initialize model manager and registry
24
+ model_manager = ModelManager()
25
+
26
+ print("🔧 Registering UI Analysis Models...")
27
+
28
+ # Debug: Check available capabilities
29
+ print("Available capabilities:")
30
+ for cap in ModelCapability:
31
+ print(f" - {cap.name}: {cap.value}")
32
+ print()
33
+
34
+ # Model definitions with latest versions from HuggingFace
35
+ models_to_register = [
36
+ {
37
+ "model_id": "omniparser-v2.0",
38
+ "repo_id": "microsoft/OmniParser",
39
+ "model_type": ModelType.VISION,
40
+ "capabilities": [
41
+ ModelCapability.UI_DETECTION,
42
+ ModelCapability.IMAGE_ANALYSIS,
43
+ ModelCapability.IMAGE_UNDERSTANDING
44
+ ],
45
+ "revision": "main", # Latest version
46
+ "metadata": {
47
+ "description": "Microsoft OmniParser v2.0 - Advanced UI element detection",
48
+ "provider": "microsoft",
49
+ "model_family": "omniparser",
50
+ "version": "2.0",
51
+ "paper": "https://arxiv.org/abs/2408.00203",
52
+ "huggingface_url": "https://huggingface.co/microsoft/OmniParser",
53
+ "use_case": "UI element detection and parsing",
54
+ "input_format": "image",
55
+ "output_format": "structured_elements",
56
+ "gpu_memory_mb": 8192,
57
+ "inference_time_ms": 500
58
+ }
59
+ },
60
+ {
61
+ "model_id": "table-transformer-v1.1-detection",
62
+ "repo_id": "microsoft/table-transformer-detection",
63
+ "model_type": ModelType.VISION,
64
+ "capabilities": [
65
+ ModelCapability.TABLE_DETECTION,
66
+ ModelCapability.IMAGE_ANALYSIS
67
+ ],
68
+ "revision": "main",
69
+ "metadata": {
70
+ "description": "Microsoft Table Transformer v1.1 - Table detection model",
71
+ "provider": "microsoft",
72
+ "model_family": "table-transformer",
73
+ "version": "1.1",
74
+ "paper": "https://arxiv.org/abs/2110.00061",
75
+ "huggingface_url": "https://huggingface.co/microsoft/table-transformer-detection",
76
+ "use_case": "Table detection in documents and images",
77
+ "input_format": "image",
78
+ "output_format": "bounding_boxes",
79
+ "gpu_memory_mb": 4096,
80
+ "inference_time_ms": 300
81
+ }
82
+ },
83
+ {
84
+ "model_id": "table-transformer-v1.1-structure",
85
+ "repo_id": "microsoft/table-transformer-structure-recognition",
86
+ "model_type": ModelType.VISION,
87
+ "capabilities": [
88
+ ModelCapability.TABLE_STRUCTURE_RECOGNITION,
89
+ ModelCapability.IMAGE_ANALYSIS
90
+ ],
91
+ "revision": "main",
92
+ "metadata": {
93
+ "description": "Microsoft Table Transformer v1.1 - Table structure recognition",
94
+ "provider": "microsoft",
95
+ "model_family": "table-transformer",
96
+ "version": "1.1",
97
+ "paper": "https://arxiv.org/abs/2110.00061",
98
+ "huggingface_url": "https://huggingface.co/microsoft/table-transformer-structure-recognition",
99
+ "use_case": "Table structure recognition and cell extraction",
100
+ "input_format": "image",
101
+ "output_format": "table_structure",
102
+ "gpu_memory_mb": 4096,
103
+ "inference_time_ms": 400
104
+ }
105
+ },
106
+ {
107
+ "model_id": "paddleocr-v3.0",
108
+ "repo_id": "PaddlePaddle/PaddleOCR",
109
+ "model_type": ModelType.VISION,
110
+ "capabilities": [
111
+ ModelCapability.OCR,
112
+ ModelCapability.IMAGE_ANALYSIS
113
+ ],
114
+ "revision": "release/2.8",
115
+ "metadata": {
116
+ "description": "PaddleOCR v3.0 - Multilingual OCR model",
117
+ "provider": "paddlepaddle",
118
+ "model_family": "paddleocr",
119
+ "version": "3.0",
120
+ "github_url": "https://github.com/PaddlePaddle/PaddleOCR",
121
+ "huggingface_url": "https://huggingface.co/PaddlePaddle/PaddleOCR",
122
+ "use_case": "Text extraction from images",
123
+ "input_format": "image",
124
+ "output_format": "text_with_coordinates",
125
+ "languages": ["en", "ch", "multilingual"],
126
+ "gpu_memory_mb": 2048,
127
+ "inference_time_ms": 200
128
+ }
129
+ },
130
+ {
131
+ "model_id": "yolov8n-fallback",
132
+ "repo_id": "ultralytics/yolov8",
133
+ "model_type": ModelType.VISION,
134
+ "capabilities": [
135
+ ModelCapability.IMAGE_ANALYSIS,
136
+ ModelCapability.UI_DETECTION # As fallback
137
+ ],
138
+ "revision": "main",
139
+ "metadata": {
140
+ "description": "YOLOv8 Nano - Fallback object detection model",
141
+ "provider": "ultralytics",
142
+ "model_family": "yolo",
143
+ "version": "8.0",
144
+ "github_url": "https://github.com/ultralytics/ultralytics",
145
+ "use_case": "General object detection (fallback for UI elements)",
146
+ "input_format": "image",
147
+ "output_format": "bounding_boxes",
148
+ "gpu_memory_mb": 1024,
149
+ "inference_time_ms": 50
150
+ }
151
+ }
152
+ ]
153
+
154
+ # Register each model
155
+ registration_results = []
156
+
157
+ for model_config in models_to_register:
158
+ print(f"\n📝 Registering {model_config['model_id']}...")
159
+
160
+ try:
161
+ # Register model in registry (without downloading)
162
+ success = model_manager.registry.register_model(
163
+ model_id=model_config['model_id'],
164
+ model_type=model_config['model_type'],
165
+ capabilities=model_config['capabilities'],
166
+ metadata={
167
+ **model_config['metadata'],
168
+ 'repo_id': model_config['repo_id'],
169
+ 'revision': model_config['revision'],
170
+ 'registered_at': 'auto',
171
+ 'download_status': 'not_downloaded'
172
+ }
173
+ )
174
+
175
+ if success:
176
+ print(f"✅ Successfully registered {model_config['model_id']}")
177
+ registration_results.append({
178
+ 'model_id': model_config['model_id'],
179
+ 'status': 'success'
180
+ })
181
+ else:
182
+ print(f"❌ Failed to register {model_config['model_id']}")
183
+ registration_results.append({
184
+ 'model_id': model_config['model_id'],
185
+ 'status': 'failed'
186
+ })
187
+
188
+ except Exception as e:
189
+ print(f"❌ Error registering {model_config['model_id']}: {e}")
190
+ registration_results.append({
191
+ 'model_id': model_config['model_id'],
192
+ 'status': 'error',
193
+ 'error': str(e)
194
+ })
195
+
196
+ # Print summary
197
+ print(f"\n📊 Registration Summary:")
198
+ successful = [r for r in registration_results if r['status'] == 'success']
199
+ failed = [r for r in registration_results if r['status'] != 'success']
200
+
201
+ print(f"✅ Successfully registered: {len(successful)} models")
202
+ for result in successful:
203
+ print(f" - {result['model_id']}")
204
+
205
+ if failed:
206
+ print(f"❌ Failed to register: {len(failed)} models")
207
+ for result in failed:
208
+ error_msg = f" ({result.get('error', 'unknown error')})" if 'error' in result else ""
209
+ print(f" - {result['model_id']}{error_msg}")
210
+
211
+ return registration_results
212
+
213
+ async def verify_model_registry():
214
+ """Verify registered models and their capabilities"""
215
+
216
+ model_manager = ModelManager()
217
+
218
+ print(f"\n🔍 Verifying Model Registry...")
219
+
220
+ # Check models by capability
221
+ capabilities_to_check = [
222
+ ModelCapability.UI_DETECTION,
223
+ ModelCapability.OCR,
224
+ ModelCapability.TABLE_DETECTION,
225
+ ModelCapability.TABLE_STRUCTURE_RECOGNITION
226
+ ]
227
+
228
+ for capability in capabilities_to_check:
229
+ models = model_manager.registry.get_models_by_capability(capability)
230
+ print(f"\n📋 Models with {capability.value} capability:")
231
+
232
+ if models:
233
+ for model_id, model_info in models.items():
234
+ metadata = model_info.get('metadata', {})
235
+ version = metadata.get('version', 'unknown')
236
+ provider = metadata.get('provider', 'unknown')
237
+ print(f" ✅ {model_id} (v{version}, {provider})")
238
+ else:
239
+ print(f" ❌ No models found for {capability.value}")
240
+
241
+ # Print overall stats
242
+ stats = model_manager.registry.get_stats()
243
+ print(f"\n📈 Registry Statistics:")
244
+ print(f" Total models: {stats['total_models']}")
245
+ print(f" Models by type: {stats['models_by_type']}")
246
+ print(f" Models by capability: {stats['models_by_capability']}")
247
+
248
+ def get_model_for_capability(capability: ModelCapability) -> str:
249
+ """Get the best model for a specific capability"""
250
+
251
+ model_manager = ModelManager()
252
+ models = model_manager.registry.get_models_by_capability(capability)
253
+
254
+ if not models:
255
+ return None
256
+
257
+ # Priority order for UI analysis models
258
+ priority_order = {
259
+ ModelCapability.UI_DETECTION: [
260
+ "omniparser-v2.0",
261
+ "yolov8n-fallback"
262
+ ],
263
+ ModelCapability.OCR: [
264
+ "paddleocr-v3.0"
265
+ ],
266
+ ModelCapability.TABLE_DETECTION: [
267
+ "table-transformer-v1.1-detection"
268
+ ],
269
+ ModelCapability.TABLE_STRUCTURE_RECOGNITION: [
270
+ "table-transformer-v1.1-structure"
271
+ ]
272
+ }
273
+
274
+ preferred_models = priority_order.get(capability, [])
275
+
276
+ # Return the first available preferred model
277
+ for model_id in preferred_models:
278
+ if model_id in models:
279
+ return model_id
280
+
281
+ # Fallback to first available model
282
+ return list(models.keys())[0] if models else None
283
+
284
+ async def main():
285
+ """Main registration workflow"""
286
+
287
+ print("🚀 ISA Model Registry - UI Analysis Models Registration")
288
+ print("=" * 60)
289
+
290
+ try:
291
+ # Register models
292
+ results = await register_ui_analysis_models()
293
+
294
+ # Verify registration
295
+ await verify_model_registry()
296
+
297
+ print(f"\n🎉 Model registration completed!")
298
+ print(f" Use ModelManager.get_model() to download and use models")
299
+ print(f" Use get_model_for_capability() to get recommended models")
300
+
301
+ # Show usage example
302
+ print(f"\n💡 Usage Example:")
303
+ print(f" from isa_model.core.model_manager import ModelManager")
304
+ print(f" from isa_model.core.model_repo import ModelCapability")
305
+ print(f" ")
306
+ print(f" manager = ModelManager()")
307
+ print(f" ui_model_path = await manager.get_model(")
308
+ print(f" model_id='omniparser-v2.0',")
309
+ print(f" repo_id='microsoft/OmniParser',")
310
+ print(f" model_type=ModelType.VISION,")
311
+ print(f" capabilities=[ModelCapability.UI_DETECTION]")
312
+ print(f" )")
313
+
314
+ except Exception as e:
315
+ print(f"❌ Registration failed: {e}")
316
+ return False
317
+
318
+ return True
319
+
320
+ if __name__ == "__main__":
321
+ asyncio.run(main())
@@ -0,0 +1,338 @@
1
+ """
2
+ Runtime Management for Self-Owned Deployed Services
3
+
4
+ This module manages the runtime aspects of self-owned deployed model services.
5
+ It does NOT handle third-party API services (OpenAI, Replicate) - those are
6
+ managed in the inference module.
7
+
8
+ Only for services deployed by ISADeploymentService or similar self-owned deployments.
9
+ """
10
+
11
+ import asyncio
12
+ import logging
13
+ import time
14
+ from typing import Dict, List, Optional, Any, Union
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime, timedelta
17
+ import httpx
18
+ from pathlib import Path
19
+
20
+ from ...core.types import (
21
+ ServiceStatus,
22
+ DeploymentPlatform,
23
+ HealthMetrics,
24
+ ServiceMetrics,
25
+ ResourceRequirements
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ @dataclass
32
+ class DeployedService:
33
+ """Runtime information for a self-owned deployed service"""
34
+ service_id: str
35
+ deployment_id: str
36
+ model_id: str
37
+ platform: DeploymentPlatform
38
+ endpoint_url: str
39
+ status: ServiceStatus = ServiceStatus.PENDING
40
+ health_check_url: Optional[str] = None
41
+ api_key: Optional[str] = None
42
+ resource_requirements: Optional[ResourceRequirements] = None
43
+ metadata: Dict[str, Any] = field(default_factory=dict)
44
+ created_at: datetime = field(default_factory=datetime.now)
45
+ last_health_check: Optional[datetime] = None
46
+ health_metrics: Optional[HealthMetrics] = None
47
+ service_metrics: Optional[ServiceMetrics] = None
48
+
49
+
50
+ class DeployedServiceManager:
51
+ """
52
+ Manages runtime aspects of self-owned deployed services.
53
+
54
+ Features:
55
+ - Health monitoring for deployed services
56
+ - Service discovery and status tracking
57
+ - Runtime metrics collection
58
+ - Service lifecycle management
59
+
60
+ Example:
61
+ ```python
62
+ from isa_model.deployment.runtime import DeployedServiceManager
63
+
64
+ manager = DeployedServiceManager()
65
+
66
+ # Register a newly deployed service
67
+ service = await manager.register_deployed_service(
68
+ service_id="gemma-4b-alpaca-v1-prod",
69
+ deployment_id="gemma-4b-alpaca-v1-int8-20241230-143022",
70
+ model_id="gemma-4b-alpaca-v1",
71
+ platform=DeploymentPlatform.RUNPOD,
72
+ endpoint_url="https://api.runpod.ai/v2/xyz123/inference"
73
+ )
74
+
75
+ # Monitor health
76
+ health = await manager.check_service_health(service.service_id)
77
+ ```
78
+ """
79
+
80
+ def __init__(self, storage_backend: str = "local"):
81
+ """Initialize deployed service manager"""
82
+ self.storage_backend = storage_backend
83
+ self.services: Dict[str, DeployedService] = {}
84
+ self.health_check_interval = 60 # seconds
85
+ self.health_check_timeout = 30 # seconds
86
+ self._monitoring_tasks: Dict[str, asyncio.Task] = {}
87
+
88
+ logger.info(f"DeployedServiceManager initialized with {storage_backend} backend")
89
+
90
+ async def register_deployed_service(self,
91
+ service_id: str,
92
+ deployment_id: str,
93
+ model_id: str,
94
+ platform: DeploymentPlatform,
95
+ endpoint_url: str,
96
+ health_check_url: Optional[str] = None,
97
+ api_key: Optional[str] = None,
98
+ resource_requirements: Optional[ResourceRequirements] = None,
99
+ metadata: Optional[Dict[str, Any]] = None) -> DeployedService:
100
+ """Register a newly deployed self-owned service"""
101
+
102
+ if health_check_url is None:
103
+ # Try common health check patterns
104
+ if endpoint_url.endswith('/'):
105
+ health_check_url = f"{endpoint_url}health"
106
+ else:
107
+ health_check_url = f"{endpoint_url}/health"
108
+
109
+ service = DeployedService(
110
+ service_id=service_id,
111
+ deployment_id=deployment_id,
112
+ model_id=model_id,
113
+ platform=platform,
114
+ endpoint_url=endpoint_url,
115
+ health_check_url=health_check_url,
116
+ api_key=api_key,
117
+ resource_requirements=resource_requirements,
118
+ metadata=metadata or {},
119
+ status=ServiceStatus.DEPLOYING
120
+ )
121
+
122
+ self.services[service_id] = service
123
+
124
+ # Start health monitoring
125
+ await self._start_health_monitoring(service_id)
126
+
127
+ logger.info(f"Registered deployed service: {service_id} on {platform.value}")
128
+ return service
129
+
130
+ async def get_service(self, service_id: str) -> Optional[DeployedService]:
131
+ """Get service information"""
132
+ return self.services.get(service_id)
133
+
134
+ async def list_services(self,
135
+ platform: Optional[DeploymentPlatform] = None,
136
+ status: Optional[ServiceStatus] = None) -> List[DeployedService]:
137
+ """List deployed services with optional filtering"""
138
+ services = list(self.services.values())
139
+
140
+ if platform:
141
+ services = [s for s in services if s.platform == platform]
142
+
143
+ if status:
144
+ services = [s for s in services if s.status == status]
145
+
146
+ return services
147
+
148
+ async def check_service_health(self, service_id: str) -> Optional[HealthMetrics]:
149
+ """Perform health check on a specific service"""
150
+ service = self.services.get(service_id)
151
+ if not service or not service.health_check_url:
152
+ return None
153
+
154
+ start_time = time.time()
155
+
156
+ try:
157
+ async with httpx.AsyncClient(timeout=self.health_check_timeout) as client:
158
+ headers = {}
159
+ if service.api_key:
160
+ headers["Authorization"] = f"Bearer {service.api_key}"
161
+
162
+ response = await client.get(service.health_check_url, headers=headers)
163
+
164
+ response_time_ms = int((time.time() - start_time) * 1000)
165
+
166
+ is_healthy = response.status_code == 200
167
+
168
+ # Try to extract additional metrics from response
169
+ metrics_data = {}
170
+ try:
171
+ if response.headers.get('content-type', '').startswith('application/json'):
172
+ metrics_data = response.json()
173
+ except:
174
+ pass
175
+
176
+ health_metrics = HealthMetrics(
177
+ is_healthy=is_healthy,
178
+ response_time_ms=response_time_ms,
179
+ status_code=response.status_code,
180
+ cpu_usage_percent=metrics_data.get('cpu_usage'),
181
+ memory_usage_mb=metrics_data.get('memory_usage_mb'),
182
+ gpu_usage_percent=metrics_data.get('gpu_usage'),
183
+ error_message=None if is_healthy else f"HTTP {response.status_code}",
184
+ checked_at=datetime.now()
185
+ )
186
+
187
+ # Update service status based on health
188
+ if is_healthy and service.status == ServiceStatus.DEPLOYING:
189
+ service.status = ServiceStatus.HEALTHY
190
+ elif not is_healthy and service.status == ServiceStatus.HEALTHY:
191
+ service.status = ServiceStatus.UNHEALTHY
192
+
193
+ service.last_health_check = datetime.now()
194
+ service.health_metrics = health_metrics
195
+
196
+ return health_metrics
197
+
198
+ except Exception as e:
199
+ logger.error(f"Health check failed for {service_id}: {e}")
200
+
201
+ error_metrics = HealthMetrics(
202
+ is_healthy=False,
203
+ response_time_ms=int((time.time() - start_time) * 1000),
204
+ error_message=str(e),
205
+ checked_at=datetime.now()
206
+ )
207
+
208
+ service.status = ServiceStatus.UNHEALTHY
209
+ service.last_health_check = datetime.now()
210
+ service.health_metrics = error_metrics
211
+
212
+ return error_metrics
213
+
214
+ async def update_service_metrics(self,
215
+ service_id: str,
216
+ request_count: int = 0,
217
+ processing_time_ms: int = 0,
218
+ error_count: int = 0,
219
+ cost_usd: float = 0.0):
220
+ """Update service runtime metrics"""
221
+ service = self.services.get(service_id)
222
+ if not service:
223
+ return
224
+
225
+ if not service.service_metrics:
226
+ service.service_metrics = ServiceMetrics(
227
+ window_start=datetime.now()
228
+ )
229
+
230
+ service.service_metrics.request_count += request_count
231
+ service.service_metrics.total_processing_time_ms += processing_time_ms
232
+ service.service_metrics.error_count += error_count
233
+ service.service_metrics.total_cost_usd += cost_usd
234
+ service.service_metrics.window_end = datetime.now()
235
+
236
+ async def stop_service(self, service_id: str) -> bool:
237
+ """Stop a deployed service and cleanup resources"""
238
+ service = self.services.get(service_id)
239
+ if not service:
240
+ return False
241
+
242
+ # Stop health monitoring
243
+ await self._stop_health_monitoring(service_id)
244
+
245
+ # Update status
246
+ service.status = ServiceStatus.STOPPED
247
+
248
+ # Note: Actual service termination would depend on the platform
249
+ # For RunPod, Modal, etc., we would call their respective APIs
250
+
251
+ logger.info(f"Stopped service: {service_id}")
252
+ return True
253
+
254
+ async def remove_service(self, service_id: str) -> bool:
255
+ """Remove service from registry"""
256
+ if service_id in self.services:
257
+ await self._stop_health_monitoring(service_id)
258
+ del self.services[service_id]
259
+ logger.info(f"Removed service: {service_id}")
260
+ return True
261
+ return False
262
+
263
+ async def _start_health_monitoring(self, service_id: str):
264
+ """Start background health monitoring for a service"""
265
+ if service_id in self._monitoring_tasks:
266
+ return # Already monitoring
267
+
268
+ async def health_monitor():
269
+ while service_id in self.services:
270
+ try:
271
+ await self.check_service_health(service_id)
272
+ await asyncio.sleep(self.health_check_interval)
273
+ except asyncio.CancelledError:
274
+ break
275
+ except Exception as e:
276
+ logger.error(f"Health monitoring error for {service_id}: {e}")
277
+ await asyncio.sleep(self.health_check_interval)
278
+
279
+ task = asyncio.create_task(health_monitor())
280
+ self._monitoring_tasks[service_id] = task
281
+ logger.info(f"Started health monitoring for {service_id}")
282
+
283
+ async def _stop_health_monitoring(self, service_id: str):
284
+ """Stop health monitoring for a service"""
285
+ if service_id in self._monitoring_tasks:
286
+ task = self._monitoring_tasks.pop(service_id)
287
+ task.cancel()
288
+ try:
289
+ await task
290
+ except asyncio.CancelledError:
291
+ pass
292
+ logger.info(f"Stopped health monitoring for {service_id}")
293
+
294
+ async def get_service_status_summary(self) -> Dict[str, Any]:
295
+ """Get summary of all deployed services"""
296
+ summary = {
297
+ "total_services": len(self.services),
298
+ "healthy_services": 0,
299
+ "unhealthy_services": 0,
300
+ "deploying_services": 0,
301
+ "stopped_services": 0,
302
+ "platforms": {},
303
+ "last_updated": datetime.now().isoformat()
304
+ }
305
+
306
+ for service in self.services.values():
307
+ # Count by status
308
+ if service.status == ServiceStatus.HEALTHY:
309
+ summary["healthy_services"] += 1
310
+ elif service.status == ServiceStatus.UNHEALTHY:
311
+ summary["unhealthy_services"] += 1
312
+ elif service.status == ServiceStatus.DEPLOYING:
313
+ summary["deploying_services"] += 1
314
+ elif service.status == ServiceStatus.STOPPED:
315
+ summary["stopped_services"] += 1
316
+
317
+ # Count by platform
318
+ platform = service.platform.value
319
+ summary["platforms"][platform] = summary["platforms"].get(platform, 0) + 1
320
+
321
+ return summary
322
+
323
+ async def cleanup_old_services(self, max_age_hours: int = 24):
324
+ """Remove services that haven't been healthy for a specified time"""
325
+ cutoff_time = datetime.now() - timedelta(hours=max_age_hours)
326
+
327
+ services_to_remove = []
328
+ for service_id, service in self.services.items():
329
+ if (service.status == ServiceStatus.STOPPED and
330
+ service.last_health_check and
331
+ service.last_health_check < cutoff_time):
332
+ services_to_remove.append(service_id)
333
+
334
+ for service_id in services_to_remove:
335
+ await self.remove_service(service_id)
336
+
337
+ logger.info(f"Cleaned up {len(services_to_remove)} old services")
338
+ return len(services_to_remove)
@@ -0,0 +1,9 @@
1
+ """
2
+ Deployment Services
3
+
4
+ This module contains services for automated deployment and management of AI models.
5
+ """
6
+
7
+ from .auto_deploy_vision_service import AutoDeployVisionService
8
+
9
+ __all__ = ['AutoDeployVisionService']