isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +937 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
  15. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  16. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
  17. isa_model/deployment/cloud/modal/register_models.py +321 -0
  18. isa_model/deployment/runtime/deployed_service.py +338 -0
  19. isa_model/deployment/services/__init__.py +9 -0
  20. isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
  21. isa_model/deployment/services/model_service.py +332 -0
  22. isa_model/deployment/services/service_monitor.py +356 -0
  23. isa_model/deployment/services/service_registry.py +527 -0
  24. isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
  25. isa_model/eval/__init__.py +80 -44
  26. isa_model/eval/config/__init__.py +10 -0
  27. isa_model/eval/config/evaluation_config.py +108 -0
  28. isa_model/eval/evaluators/__init__.py +18 -0
  29. isa_model/eval/evaluators/base_evaluator.py +503 -0
  30. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  31. isa_model/eval/factory.py +417 -709
  32. isa_model/eval/infrastructure/__init__.py +24 -0
  33. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  34. isa_model/eval/metrics.py +191 -21
  35. isa_model/inference/ai_factory.py +257 -601
  36. isa_model/inference/services/audio/base_stt_service.py +65 -1
  37. isa_model/inference/services/audio/base_tts_service.py +75 -1
  38. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  39. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  40. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  41. isa_model/inference/services/base_service.py +55 -17
  42. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  43. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  44. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  45. isa_model/inference/services/helpers/stacked_config.py +148 -0
  46. isa_model/inference/services/img/__init__.py +18 -0
  47. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
  48. isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
  49. isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
  50. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
  51. isa_model/inference/services/llm/__init__.py +3 -3
  52. isa_model/inference/services/llm/base_llm_service.py +492 -40
  53. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  54. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  55. isa_model/inference/services/llm/ollama_llm_service.py +51 -17
  56. isa_model/inference/services/llm/openai_llm_service.py +70 -19
  57. isa_model/inference/services/llm/yyds_llm_service.py +24 -23
  58. isa_model/inference/services/vision/__init__.py +38 -4
  59. isa_model/inference/services/vision/base_vision_service.py +218 -117
  60. isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
  61. isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
  62. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  63. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  64. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  65. isa_model/inference/services/vision/openai_vision_service.py +104 -307
  66. isa_model/inference/services/vision/replicate_vision_service.py +140 -325
  67. isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
  68. isa_model/scripts/register_models.py +370 -0
  69. isa_model/scripts/register_models_with_embeddings.py +510 -0
  70. isa_model/serving/api/fastapi_server.py +6 -1
  71. isa_model/serving/api/routes/unified.py +274 -0
  72. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
  73. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
  74. isa_model/config/__init__.py +0 -9
  75. isa_model/config/config_manager.py +0 -213
  76. isa_model/core/model_manager.py +0 -213
  77. isa_model/core/model_registry.py +0 -375
  78. isa_model/core/vision_models_init.py +0 -116
  79. isa_model/inference/billing_tracker.py +0 -406
  80. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  81. isa_model/inference/services/stacked/__init__.py +0 -26
  82. isa_model/inference/services/stacked/config.py +0 -426
  83. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  84. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  85. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  86. /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
  87. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
  88. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,332 @@
1
+ """
2
+ ModelService - Core abstraction for deployed model services in the MaaS platform
3
+
4
+ This represents a deployed service instance that can be discovered, monitored, and invoked.
5
+ It's the bridge between the high-level AIFactory interface and the underlying platform services.
6
+ """
7
+
8
+ from typing import Dict, List, Optional, Any, Union
9
+ from enum import Enum
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ class ServiceStatus(str, Enum):
17
+ """Service deployment and health status"""
18
+ PENDING = "pending"
19
+ DEPLOYING = "deploying"
20
+ HEALTHY = "healthy"
21
+ UNHEALTHY = "unhealthy"
22
+ STOPPED = "stopped"
23
+
24
+ class ServiceType(str, Enum):
25
+ """Types of services available in the platform"""
26
+ LLM = "llm"
27
+ EMBEDDING = "embedding"
28
+ VISION = "vision"
29
+ AUDIO = "audio"
30
+ IMAGE_GEN = "image_gen"
31
+
32
+ class DeploymentPlatform(str, Enum):
33
+ """Supported deployment platforms for self-owned services only"""
34
+ MODAL = "modal"
35
+ KUBERNETES = "kubernetes"
36
+ RUNPOD = "runpod"
37
+ YYDS = "yyds"
38
+ OLLAMA = "ollama" # Local deployment
39
+
40
+ @dataclass
41
+ class HealthMetrics:
42
+ """Service health metrics"""
43
+ is_healthy: bool
44
+ response_time_ms: Optional[int] = None
45
+ status_code: Optional[int] = None
46
+ cpu_usage_percent: Optional[float] = None
47
+ memory_usage_mb: Optional[int] = None
48
+ gpu_usage_percent: Optional[float] = None
49
+ error_message: Optional[str] = None
50
+ checked_at: Optional[datetime] = None
51
+
52
+ @dataclass
53
+ class UsageMetrics:
54
+ """Service usage and cost metrics"""
55
+ request_count: int = 0
56
+ total_processing_time_ms: int = 0
57
+ error_count: int = 0
58
+ total_cost_usd: float = 0.0
59
+ window_start: Optional[datetime] = None
60
+ window_end: Optional[datetime] = None
61
+
62
+ @dataclass
63
+ class ResourceRequirements:
64
+ """Service resource requirements"""
65
+ gpu_type: Optional[str] = None
66
+ memory_mb: Optional[int] = None
67
+ cpu_cores: Optional[int] = None
68
+ storage_gb: Optional[int] = None
69
+ min_replicas: int = 0
70
+ max_replicas: int = 1
71
+
72
+ class ModelService:
73
+ """
74
+ Core abstraction for a deployed model service in the MaaS platform
75
+
76
+ This class represents a self-owned deployed service instance that:
77
+ - Has been deployed to a platform (Modal, Kubernetes, RunPod, etc.)
78
+ - Can be discovered through the ServiceRegistry
79
+ - Can be health-checked and monitored
80
+ - Provides inference capabilities through specific endpoints
81
+
82
+ Note: This is only for self-owned deployments. Third-party services
83
+ (OpenAI, Replicate, etc.) are managed by ThirdPartyServiceManager.
84
+ """
85
+
86
+ def __init__(
87
+ self,
88
+ service_id: str,
89
+ service_name: str,
90
+ model_id: Optional[str],
91
+ deployment_platform: DeploymentPlatform,
92
+ service_type: ServiceType,
93
+ inference_endpoint: Optional[str] = None,
94
+ health_endpoint: Optional[str] = None,
95
+ capabilities: Optional[List[str]] = None,
96
+ config: Optional[Dict[str, Any]] = None,
97
+ resource_requirements: Optional[ResourceRequirements] = None,
98
+ metadata: Optional[Dict[str, Any]] = None,
99
+ status: ServiceStatus = ServiceStatus.PENDING
100
+ ):
101
+ self.service_id = service_id
102
+ self.service_name = service_name
103
+ self.model_id = model_id
104
+ self.deployment_platform = deployment_platform
105
+ self.service_type = service_type
106
+ self.status = status
107
+
108
+ # Endpoints
109
+ self.inference_endpoint = inference_endpoint
110
+ self.health_endpoint = health_endpoint
111
+
112
+ # Capabilities and configuration
113
+ self.capabilities = capabilities or []
114
+ self.config = config or {}
115
+ self.resource_requirements = resource_requirements or ResourceRequirements()
116
+ self.metadata = metadata or {}
117
+
118
+ # Metrics (populated by monitoring systems)
119
+ self.health_metrics: Optional[HealthMetrics] = None
120
+ self.usage_metrics: Optional[UsageMetrics] = None
121
+
122
+ # Timestamps
123
+ self.created_at: Optional[datetime] = None
124
+ self.updated_at: Optional[datetime] = None
125
+
126
+ logger.debug(f"Created ModelService: {service_id} ({service_name})")
127
+
128
+ def is_healthy(self) -> bool:
129
+ """Check if the service is currently healthy"""
130
+ if self.status != ServiceStatus.HEALTHY:
131
+ return False
132
+
133
+ if self.health_metrics:
134
+ return self.health_metrics.is_healthy
135
+
136
+ # If no health metrics, assume healthy if status is healthy
137
+ return True
138
+
139
+ def is_available(self) -> bool:
140
+ """Check if the service is available for inference requests"""
141
+ return (
142
+ self.status == ServiceStatus.HEALTHY and
143
+ self.inference_endpoint is not None and
144
+ self.is_healthy()
145
+ )
146
+
147
+ def has_capability(self, capability: str) -> bool:
148
+ """Check if this service provides a specific capability"""
149
+ return capability in self.capabilities
150
+
151
+ def get_endpoint_url(self, endpoint_type: str = "inference") -> Optional[str]:
152
+ """Get endpoint URL for the service"""
153
+ if endpoint_type == "inference":
154
+ return self.inference_endpoint
155
+ elif endpoint_type == "health":
156
+ return self.health_endpoint
157
+ else:
158
+ # Check if it's in metadata
159
+ endpoints = self.metadata.get("endpoints", {})
160
+ return endpoints.get(endpoint_type)
161
+
162
+ def update_health_metrics(self, metrics: HealthMetrics) -> None:
163
+ """Update health metrics for this service"""
164
+ self.health_metrics = metrics
165
+
166
+ # Update service status based on health
167
+ if metrics.is_healthy:
168
+ if self.status != ServiceStatus.HEALTHY:
169
+ self.status = ServiceStatus.HEALTHY
170
+ logger.info(f"Service {self.service_id} is now healthy")
171
+ else:
172
+ if self.status == ServiceStatus.HEALTHY:
173
+ self.status = ServiceStatus.UNHEALTHY
174
+ logger.warning(f"Service {self.service_id} is now unhealthy: {metrics.error_message}")
175
+
176
+ def update_usage_metrics(self, metrics: UsageMetrics) -> None:
177
+ """Update usage metrics for this service"""
178
+ self.usage_metrics = metrics
179
+ logger.debug(f"Updated usage metrics for {self.service_id}: {metrics.request_count} requests")
180
+
181
+ def to_dict(self) -> Dict[str, Any]:
182
+ """Convert service to dictionary representation"""
183
+ return {
184
+ "service_id": self.service_id,
185
+ "service_name": self.service_name,
186
+ "model_id": self.model_id,
187
+ "deployment_platform": self.deployment_platform.value,
188
+ "service_type": self.service_type.value,
189
+ "status": self.status.value,
190
+ "inference_endpoint": self.inference_endpoint,
191
+ "health_endpoint": self.health_endpoint,
192
+ "capabilities": self.capabilities,
193
+ "config": self.config,
194
+ "resource_requirements": {
195
+ "gpu_type": self.resource_requirements.gpu_type,
196
+ "memory_mb": self.resource_requirements.memory_mb,
197
+ "cpu_cores": self.resource_requirements.cpu_cores,
198
+ "storage_gb": self.resource_requirements.storage_gb,
199
+ "min_replicas": self.resource_requirements.min_replicas,
200
+ "max_replicas": self.resource_requirements.max_replicas,
201
+ },
202
+ "metadata": self.metadata,
203
+ "health_metrics": {
204
+ "is_healthy": self.health_metrics.is_healthy if self.health_metrics else None,
205
+ "response_time_ms": self.health_metrics.response_time_ms if self.health_metrics else None,
206
+ "status_code": self.health_metrics.status_code if self.health_metrics else None,
207
+ "error_message": self.health_metrics.error_message if self.health_metrics else None,
208
+ "checked_at": self.health_metrics.checked_at.isoformat() if self.health_metrics and self.health_metrics.checked_at else None,
209
+ } if self.health_metrics else None,
210
+ "usage_metrics": {
211
+ "request_count": self.usage_metrics.request_count if self.usage_metrics else 0,
212
+ "total_processing_time_ms": self.usage_metrics.total_processing_time_ms if self.usage_metrics else 0,
213
+ "error_count": self.usage_metrics.error_count if self.usage_metrics else 0,
214
+ "total_cost_usd": self.usage_metrics.total_cost_usd if self.usage_metrics else 0.0,
215
+ } if self.usage_metrics else None,
216
+ "created_at": self.created_at.isoformat() if self.created_at else None,
217
+ "updated_at": self.updated_at.isoformat() if self.updated_at else None,
218
+ }
219
+
220
+ @classmethod
221
+ def from_dict(cls, data: Dict[str, Any]) -> 'ModelService':
222
+ """Create ModelService from dictionary representation"""
223
+ # Create resource requirements
224
+ resource_data = data.get("resource_requirements", {})
225
+ resources = ResourceRequirements(
226
+ gpu_type=resource_data.get("gpu_type"),
227
+ memory_mb=resource_data.get("memory_mb"),
228
+ cpu_cores=resource_data.get("cpu_cores"),
229
+ storage_gb=resource_data.get("storage_gb"),
230
+ min_replicas=resource_data.get("min_replicas", 0),
231
+ max_replicas=resource_data.get("max_replicas", 1),
232
+ )
233
+
234
+ # Create service
235
+ service = cls(
236
+ service_id=data["service_id"],
237
+ service_name=data["service_name"],
238
+ model_id=data.get("model_id"),
239
+ deployment_platform=DeploymentPlatform(data["deployment_platform"]),
240
+ service_type=ServiceType(data["service_type"]),
241
+ status=ServiceStatus(data.get("status", "pending")),
242
+ inference_endpoint=data.get("inference_endpoint"),
243
+ health_endpoint=data.get("health_endpoint"),
244
+ capabilities=data.get("capabilities", []),
245
+ config=data.get("config", {}),
246
+ resource_requirements=resources,
247
+ metadata=data.get("metadata", {}),
248
+ )
249
+
250
+ # Set timestamps
251
+ if data.get("created_at"):
252
+ service.created_at = datetime.fromisoformat(data["created_at"].replace('Z', '+00:00'))
253
+ if data.get("updated_at"):
254
+ service.updated_at = datetime.fromisoformat(data["updated_at"].replace('Z', '+00:00'))
255
+
256
+ # Set health metrics
257
+ health_data = data.get("health_metrics")
258
+ if health_data and health_data.get("is_healthy") is not None:
259
+ checked_at = None
260
+ if health_data.get("checked_at"):
261
+ checked_at = datetime.fromisoformat(health_data["checked_at"].replace('Z', '+00:00'))
262
+
263
+ service.health_metrics = HealthMetrics(
264
+ is_healthy=health_data["is_healthy"],
265
+ response_time_ms=health_data.get("response_time_ms"),
266
+ status_code=health_data.get("status_code"),
267
+ error_message=health_data.get("error_message"),
268
+ checked_at=checked_at,
269
+ )
270
+
271
+ # Set usage metrics
272
+ usage_data = data.get("usage_metrics")
273
+ if usage_data:
274
+ service.usage_metrics = UsageMetrics(
275
+ request_count=usage_data.get("request_count", 0),
276
+ total_processing_time_ms=usage_data.get("total_processing_time_ms", 0),
277
+ error_count=usage_data.get("error_count", 0),
278
+ total_cost_usd=usage_data.get("total_cost_usd", 0.0),
279
+ )
280
+
281
+ return service
282
+
283
+ def __repr__(self) -> str:
284
+ return f"ModelService(id={self.service_id}, name={self.service_name}, platform={self.deployment_platform.value}, status={self.status.value})"
285
+
286
+ def __str__(self) -> str:
287
+ return f"{self.service_name} ({self.service_id}) on {self.deployment_platform.value} - {self.status.value}"
288
+
289
+ # Factory functions for common service types
290
+
291
+ def create_modal_service(
292
+ service_name: str,
293
+ model_id: str,
294
+ inference_endpoint: str,
295
+ health_endpoint: Optional[str] = None,
296
+ capabilities: Optional[List[str]] = None,
297
+ gpu_type: str = "T4",
298
+ memory_mb: int = 16384,
299
+ **kwargs
300
+ ) -> ModelService:
301
+ """Factory function for Modal-deployed services"""
302
+ service_id = f"{service_name}-modal-{datetime.now().strftime('%Y%m%d%H%M%S')}"
303
+
304
+ resources = ResourceRequirements(
305
+ gpu_type=gpu_type,
306
+ memory_mb=memory_mb,
307
+ min_replicas=0, # Modal can scale to zero
308
+ max_replicas=10, # Reasonable default
309
+ )
310
+
311
+ return ModelService(
312
+ service_id=service_id,
313
+ service_name=service_name,
314
+ model_id=model_id,
315
+ deployment_platform=DeploymentPlatform.MODAL,
316
+ service_type=ServiceType.VISION, # Most Modal services are vision
317
+ inference_endpoint=inference_endpoint,
318
+ health_endpoint=health_endpoint,
319
+ capabilities=capabilities or [],
320
+ resource_requirements=resources,
321
+ metadata={
322
+ "platform": "modal",
323
+ "auto_scaling": True,
324
+ "scale_to_zero": True,
325
+ **kwargs
326
+ },
327
+ status=ServiceStatus.HEALTHY, # Assume healthy when creating
328
+ )
329
+
330
+ # REMOVED: create_openai_service function
331
+ # OpenAI is a third-party service provider, not a deployment platform.
332
+ # Use ThirdPartyServiceManager in the inference module instead.
@@ -0,0 +1,356 @@
1
+ """
2
+ Service Health Monitor - Automated health checking and service discovery for MaaS platform
3
+
4
+ This module provides automated health monitoring and service discovery capabilities
5
+ for the ISA Model MaaS platform.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import time
11
+ from typing import Dict, List, Optional, Any
12
+ from datetime import datetime, timezone
13
+ import httpx
14
+ import json
15
+
16
+ from .service_registry import ServiceRegistry
17
+ from .model_service import ModelService, HealthMetrics, ServiceStatus
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ class ServiceMonitor:
22
+ """
23
+ Service health monitor that automatically checks service health and updates registry
24
+
25
+ Features:
26
+ - Periodic health checks for all registered services
27
+ - Automatic service discovery from endpoints
28
+ - Health metrics collection and storage
29
+ - Service status updates based on health
30
+ """
31
+
32
+ def __init__(self, service_registry: ServiceRegistry, check_interval: int = 300):
33
+ """
34
+ Initialize the service monitor
35
+
36
+ Args:
37
+ service_registry: ServiceRegistry instance to monitor
38
+ check_interval: Health check interval in seconds (default: 5 minutes)
39
+ """
40
+ self.service_registry = service_registry
41
+ self.check_interval = check_interval
42
+ self._monitoring = False
43
+ self._monitor_task: Optional[asyncio.Task] = None
44
+
45
+ logger.info(f"ServiceMonitor initialized with {check_interval}s check interval")
46
+
47
+ async def start_monitoring(self):
48
+ """Start the health monitoring background task"""
49
+ if self._monitoring:
50
+ logger.warning("Service monitoring is already running")
51
+ return
52
+
53
+ self._monitoring = True
54
+ self._monitor_task = asyncio.create_task(self._monitor_loop())
55
+ logger.info("Service health monitoring started")
56
+
57
+ async def stop_monitoring(self):
58
+ """Stop the health monitoring background task"""
59
+ if not self._monitoring:
60
+ return
61
+
62
+ self._monitoring = False
63
+ if self._monitor_task:
64
+ self._monitor_task.cancel()
65
+ try:
66
+ await self._monitor_task
67
+ except asyncio.CancelledError:
68
+ pass
69
+
70
+ logger.info("Service health monitoring stopped")
71
+
72
+ async def _monitor_loop(self):
73
+ """Main monitoring loop that runs health checks periodically"""
74
+ while self._monitoring:
75
+ try:
76
+ await self.check_all_services()
77
+ await asyncio.sleep(self.check_interval)
78
+ except asyncio.CancelledError:
79
+ break
80
+ except Exception as e:
81
+ logger.error(f"Error in monitoring loop: {e}")
82
+ await asyncio.sleep(min(self.check_interval, 60)) # Don't wait too long on error
83
+
84
+ async def check_all_services(self) -> Dict[str, bool]:
85
+ """
86
+ Check health of all registered services
87
+
88
+ Returns:
89
+ Dictionary mapping service_id to health status (True = healthy)
90
+ """
91
+ # Get all services from registry - this is a simplified implementation
92
+ # In practice, you'd want a method to get all services from ServiceRegistry
93
+ results = {}
94
+
95
+ try:
96
+ # For now, we'll check known service names
97
+ known_services = ["isa_vision_table", "isa_vision_ui", "isa_vision_doc"]
98
+
99
+ for service_name in known_services:
100
+ try:
101
+ services = await self.service_registry.get_services_by_name(service_name)
102
+ for service in services:
103
+ health_result = await self.check_service_health(service)
104
+ results[service.service_id] = health_result
105
+ except Exception as e:
106
+ logger.error(f"Failed to check services for {service_name}: {e}")
107
+
108
+ logger.info(f"Health check completed for {len(results)} services")
109
+ return results
110
+
111
+ except Exception as e:
112
+ logger.error(f"Failed to check all services: {e}")
113
+ return results
114
+
115
+ async def check_service_health(self, service: ModelService) -> bool:
116
+ """
117
+ Check health of a specific service
118
+
119
+ Args:
120
+ service: ModelService instance to check
121
+
122
+ Returns:
123
+ True if service is healthy, False otherwise
124
+ """
125
+ start_time = time.time()
126
+
127
+ try:
128
+ # Check if service has a health endpoint
129
+ health_endpoint = service.health_endpoint or service.get_endpoint_url("health")
130
+
131
+ if not health_endpoint:
132
+ # No health endpoint, try to ping inference endpoint
133
+ health_endpoint = service.inference_endpoint
134
+
135
+ if not health_endpoint:
136
+ logger.warning(f"No endpoint available for service {service.service_id}")
137
+ return False
138
+
139
+ # Perform health check
140
+ async with httpx.AsyncClient(timeout=30.0) as client:
141
+ try:
142
+ response = await client.get(health_endpoint)
143
+ response_time_ms = int((time.time() - start_time) * 1000)
144
+
145
+ # Create health metrics
146
+ health_metrics = HealthMetrics(
147
+ is_healthy=response.status_code == 200,
148
+ response_time_ms=response_time_ms,
149
+ status_code=response.status_code,
150
+ checked_at=datetime.now(timezone.utc)
151
+ )
152
+
153
+ # Try to extract additional metrics from response
154
+ if response.status_code == 200:
155
+ try:
156
+ health_data = response.json()
157
+ if isinstance(health_data, dict):
158
+ # Extract metrics if available
159
+ health_metrics.cpu_usage_percent = health_data.get("cpu_usage")
160
+ health_metrics.memory_usage_mb = health_data.get("memory_usage_mb")
161
+ health_metrics.gpu_usage_percent = health_data.get("gpu_usage")
162
+ except json.JSONDecodeError:
163
+ pass # Health endpoint might not return JSON
164
+ else:
165
+ health_metrics.error_message = f"HTTP {response.status_code}: {response.text[:200]}"
166
+
167
+ # Update service health in registry
168
+ await self.service_registry.update_service_health(service.service_id, health_metrics)
169
+
170
+ logger.debug(f"Health check for {service.service_id}: {health_metrics.is_healthy} ({response_time_ms}ms)")
171
+ return health_metrics.is_healthy
172
+
173
+ except httpx.TimeoutException:
174
+ # Service is not responding
175
+ health_metrics = HealthMetrics(
176
+ is_healthy=False,
177
+ response_time_ms=int((time.time() - start_time) * 1000),
178
+ error_message="Service timeout",
179
+ checked_at=datetime.now(timezone.utc)
180
+ )
181
+
182
+ await self.service_registry.update_service_health(service.service_id, health_metrics)
183
+ logger.warning(f"Service {service.service_id} health check timed out")
184
+ return False
185
+
186
+ except httpx.RequestError as e:
187
+ # Network or connection error
188
+ health_metrics = HealthMetrics(
189
+ is_healthy=False,
190
+ response_time_ms=int((time.time() - start_time) * 1000),
191
+ error_message=f"Request error: {str(e)}",
192
+ checked_at=datetime.now(timezone.utc)
193
+ )
194
+
195
+ await self.service_registry.update_service_health(service.service_id, health_metrics)
196
+ logger.warning(f"Service {service.service_id} health check failed: {e}")
197
+ return False
198
+
199
+ except Exception as e:
200
+ # Unexpected error
201
+ health_metrics = HealthMetrics(
202
+ is_healthy=False,
203
+ response_time_ms=int((time.time() - start_time) * 1000),
204
+ error_message=f"Health check error: {str(e)}",
205
+ checked_at=datetime.now(timezone.utc)
206
+ )
207
+
208
+ try:
209
+ await self.service_registry.update_service_health(service.service_id, health_metrics)
210
+ except Exception as update_error:
211
+ logger.error(f"Failed to update health metrics: {update_error}")
212
+
213
+ logger.error(f"Unexpected error checking service {service.service_id}: {e}")
214
+ return False
215
+
216
+ async def discover_services(self) -> List[ModelService]:
217
+ """
218
+ Discover services from known endpoints and register them if not already registered
219
+
220
+ Returns:
221
+ List of discovered ModelService instances
222
+ """
223
+ discovered_services = []
224
+
225
+ # Known service endpoints to check for discovery
226
+ known_endpoints = [
227
+ {
228
+ "name": "isa_vision_table",
229
+ "base_url": "https://qwen-vision-table.modal.run",
230
+ "service_type": "vision",
231
+ "capabilities": ["table_detection", "table_structure_recognition"]
232
+ },
233
+ {
234
+ "name": "isa_vision_doc",
235
+ "base_url": "https://isa-vision-doc.modal.run",
236
+ "service_type": "vision",
237
+ "capabilities": ["table_detection", "ocr", "image_analysis"]
238
+ },
239
+ {
240
+ "name": "isa_vision_ui",
241
+ "base_url": "https://isa-vision-ui.modal.run",
242
+ "service_type": "vision",
243
+ "capabilities": ["ui_detection", "element_detection"]
244
+ }
245
+ ]
246
+
247
+ for endpoint_info in known_endpoints:
248
+ try:
249
+ service = await self._discover_service_from_endpoint(endpoint_info)
250
+ if service:
251
+ discovered_services.append(service)
252
+ except Exception as e:
253
+ logger.warning(f"Failed to discover service from {endpoint_info['name']}: {e}")
254
+
255
+ logger.info(f"Discovered {len(discovered_services)} services")
256
+ return discovered_services
257
+
258
+ async def _discover_service_from_endpoint(self, endpoint_info: Dict[str, Any]) -> Optional[ModelService]:
259
+ """
260
+ Discover a service from an endpoint by checking its health/info endpoint
261
+
262
+ Args:
263
+ endpoint_info: Dictionary with service endpoint information
264
+
265
+ Returns:
266
+ ModelService instance if discovered successfully, None otherwise
267
+ """
268
+ try:
269
+ base_url = endpoint_info["base_url"]
270
+ health_url = f"{base_url}/health_check"
271
+
272
+ async with httpx.AsyncClient(timeout=10.0) as client:
273
+ response = await client.get(health_url)
274
+
275
+ if response.status_code == 200:
276
+ # Service is responding, check if it's already registered
277
+ existing_services = await self.service_registry.get_services_by_name(endpoint_info["name"])
278
+
279
+ if existing_services:
280
+ logger.debug(f"Service {endpoint_info['name']} already registered")
281
+ return existing_services[0] # Return existing service
282
+
283
+ # Service is not registered, create and register it
284
+ from .model_service import ServiceType, DeploymentPlatform, ServiceStatus, ResourceRequirements
285
+
286
+ service = ModelService(
287
+ service_id=f"{endpoint_info['name']}-discovered-{int(time.time())}",
288
+ service_name=endpoint_info["name"],
289
+ model_id=f"{endpoint_info['name']}-model",
290
+ deployment_platform=DeploymentPlatform.MODAL,
291
+ service_type=ServiceType.VISION,
292
+ status=ServiceStatus.HEALTHY,
293
+ inference_endpoint=f"{base_url}/",
294
+ health_endpoint=health_url,
295
+ capabilities=endpoint_info.get("capabilities", []),
296
+ resource_requirements=ResourceRequirements(),
297
+ metadata={
298
+ "discovered": True,
299
+ "discovery_time": datetime.now(timezone.utc).isoformat(),
300
+ "base_url": base_url
301
+ }
302
+ )
303
+
304
+ # Register the discovered service
305
+ success = await self.service_registry.register_service(service)
306
+
307
+ if success:
308
+ logger.info(f"Successfully registered discovered service: {endpoint_info['name']}")
309
+ return service
310
+ else:
311
+ logger.warning(f"Failed to register discovered service: {endpoint_info['name']}")
312
+ return None
313
+
314
+ else:
315
+ logger.debug(f"Service at {base_url} not responding (HTTP {response.status_code})")
316
+ return None
317
+
318
+ except Exception as e:
319
+ logger.warning(f"Failed to discover service from {endpoint_info['base_url']}: {e}")
320
+ return None
321
+
322
+ async def get_service_statistics(self) -> Dict[str, Any]:
323
+ """
324
+ Get comprehensive service statistics including health metrics
325
+
326
+ Returns:
327
+ Dictionary with service statistics
328
+ """
329
+ try:
330
+ # Get basic statistics from registry
331
+ stats = await self.service_registry.get_service_statistics()
332
+
333
+ # Add monitoring-specific statistics
334
+ stats.update({
335
+ "monitoring_enabled": self._monitoring,
336
+ "check_interval_seconds": self.check_interval,
337
+ "last_check": datetime.now(timezone.utc).isoformat()
338
+ })
339
+
340
+ return stats
341
+
342
+ except Exception as e:
343
+ logger.error(f"Failed to get service statistics: {e}")
344
+ return {
345
+ "error": str(e),
346
+ "monitoring_enabled": self._monitoring,
347
+ "check_interval_seconds": self.check_interval
348
+ }
349
+
350
+ def __del__(self):
351
+ """Cleanup when monitor is destroyed"""
352
+ if self._monitoring and self._monitor_task:
353
+ try:
354
+ self._monitor_task.cancel()
355
+ except:
356
+ pass