isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +937 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +257 -601
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -17
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
- isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
- isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +492 -40
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +51 -17
- isa_model/inference/services/llm/openai_llm_service.py +70 -19
- isa_model/inference/services/llm/yyds_llm_service.py +24 -23
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +218 -117
- isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
- isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +104 -307
- isa_model/inference/services/vision/replicate_vision_service.py +140 -325
- isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/api/fastapi_server.py +6 -1
- isa_model/serving/api/routes/unified.py +274 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
- isa_model/config/__init__.py +0 -9
- isa_model/config/config_manager.py +0 -213
- isa_model/core/model_manager.py +0 -213
- isa_model/core/model_registry.py +0 -375
- isa_model/core/vision_models_init.py +0 -116
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/stacked/__init__.py +0 -26
- isa_model/inference/services/stacked/config.py +0 -426
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,332 @@
|
|
1
|
+
"""
|
2
|
+
ModelService - Core abstraction for deployed model services in the MaaS platform
|
3
|
+
|
4
|
+
This represents a deployed service instance that can be discovered, monitored, and invoked.
|
5
|
+
It's the bridge between the high-level AIFactory interface and the underlying platform services.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Dict, List, Optional, Any, Union
|
9
|
+
from enum import Enum
|
10
|
+
from dataclasses import dataclass, field
|
11
|
+
from datetime import datetime
|
12
|
+
import logging
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
class ServiceStatus(str, Enum):
|
17
|
+
"""Service deployment and health status"""
|
18
|
+
PENDING = "pending"
|
19
|
+
DEPLOYING = "deploying"
|
20
|
+
HEALTHY = "healthy"
|
21
|
+
UNHEALTHY = "unhealthy"
|
22
|
+
STOPPED = "stopped"
|
23
|
+
|
24
|
+
class ServiceType(str, Enum):
|
25
|
+
"""Types of services available in the platform"""
|
26
|
+
LLM = "llm"
|
27
|
+
EMBEDDING = "embedding"
|
28
|
+
VISION = "vision"
|
29
|
+
AUDIO = "audio"
|
30
|
+
IMAGE_GEN = "image_gen"
|
31
|
+
|
32
|
+
class DeploymentPlatform(str, Enum):
|
33
|
+
"""Supported deployment platforms for self-owned services only"""
|
34
|
+
MODAL = "modal"
|
35
|
+
KUBERNETES = "kubernetes"
|
36
|
+
RUNPOD = "runpod"
|
37
|
+
YYDS = "yyds"
|
38
|
+
OLLAMA = "ollama" # Local deployment
|
39
|
+
|
40
|
+
@dataclass
|
41
|
+
class HealthMetrics:
|
42
|
+
"""Service health metrics"""
|
43
|
+
is_healthy: bool
|
44
|
+
response_time_ms: Optional[int] = None
|
45
|
+
status_code: Optional[int] = None
|
46
|
+
cpu_usage_percent: Optional[float] = None
|
47
|
+
memory_usage_mb: Optional[int] = None
|
48
|
+
gpu_usage_percent: Optional[float] = None
|
49
|
+
error_message: Optional[str] = None
|
50
|
+
checked_at: Optional[datetime] = None
|
51
|
+
|
52
|
+
@dataclass
|
53
|
+
class UsageMetrics:
|
54
|
+
"""Service usage and cost metrics"""
|
55
|
+
request_count: int = 0
|
56
|
+
total_processing_time_ms: int = 0
|
57
|
+
error_count: int = 0
|
58
|
+
total_cost_usd: float = 0.0
|
59
|
+
window_start: Optional[datetime] = None
|
60
|
+
window_end: Optional[datetime] = None
|
61
|
+
|
62
|
+
@dataclass
|
63
|
+
class ResourceRequirements:
|
64
|
+
"""Service resource requirements"""
|
65
|
+
gpu_type: Optional[str] = None
|
66
|
+
memory_mb: Optional[int] = None
|
67
|
+
cpu_cores: Optional[int] = None
|
68
|
+
storage_gb: Optional[int] = None
|
69
|
+
min_replicas: int = 0
|
70
|
+
max_replicas: int = 1
|
71
|
+
|
72
|
+
class ModelService:
|
73
|
+
"""
|
74
|
+
Core abstraction for a deployed model service in the MaaS platform
|
75
|
+
|
76
|
+
This class represents a self-owned deployed service instance that:
|
77
|
+
- Has been deployed to a platform (Modal, Kubernetes, RunPod, etc.)
|
78
|
+
- Can be discovered through the ServiceRegistry
|
79
|
+
- Can be health-checked and monitored
|
80
|
+
- Provides inference capabilities through specific endpoints
|
81
|
+
|
82
|
+
Note: This is only for self-owned deployments. Third-party services
|
83
|
+
(OpenAI, Replicate, etc.) are managed by ThirdPartyServiceManager.
|
84
|
+
"""
|
85
|
+
|
86
|
+
def __init__(
|
87
|
+
self,
|
88
|
+
service_id: str,
|
89
|
+
service_name: str,
|
90
|
+
model_id: Optional[str],
|
91
|
+
deployment_platform: DeploymentPlatform,
|
92
|
+
service_type: ServiceType,
|
93
|
+
inference_endpoint: Optional[str] = None,
|
94
|
+
health_endpoint: Optional[str] = None,
|
95
|
+
capabilities: Optional[List[str]] = None,
|
96
|
+
config: Optional[Dict[str, Any]] = None,
|
97
|
+
resource_requirements: Optional[ResourceRequirements] = None,
|
98
|
+
metadata: Optional[Dict[str, Any]] = None,
|
99
|
+
status: ServiceStatus = ServiceStatus.PENDING
|
100
|
+
):
|
101
|
+
self.service_id = service_id
|
102
|
+
self.service_name = service_name
|
103
|
+
self.model_id = model_id
|
104
|
+
self.deployment_platform = deployment_platform
|
105
|
+
self.service_type = service_type
|
106
|
+
self.status = status
|
107
|
+
|
108
|
+
# Endpoints
|
109
|
+
self.inference_endpoint = inference_endpoint
|
110
|
+
self.health_endpoint = health_endpoint
|
111
|
+
|
112
|
+
# Capabilities and configuration
|
113
|
+
self.capabilities = capabilities or []
|
114
|
+
self.config = config or {}
|
115
|
+
self.resource_requirements = resource_requirements or ResourceRequirements()
|
116
|
+
self.metadata = metadata or {}
|
117
|
+
|
118
|
+
# Metrics (populated by monitoring systems)
|
119
|
+
self.health_metrics: Optional[HealthMetrics] = None
|
120
|
+
self.usage_metrics: Optional[UsageMetrics] = None
|
121
|
+
|
122
|
+
# Timestamps
|
123
|
+
self.created_at: Optional[datetime] = None
|
124
|
+
self.updated_at: Optional[datetime] = None
|
125
|
+
|
126
|
+
logger.debug(f"Created ModelService: {service_id} ({service_name})")
|
127
|
+
|
128
|
+
def is_healthy(self) -> bool:
|
129
|
+
"""Check if the service is currently healthy"""
|
130
|
+
if self.status != ServiceStatus.HEALTHY:
|
131
|
+
return False
|
132
|
+
|
133
|
+
if self.health_metrics:
|
134
|
+
return self.health_metrics.is_healthy
|
135
|
+
|
136
|
+
# If no health metrics, assume healthy if status is healthy
|
137
|
+
return True
|
138
|
+
|
139
|
+
def is_available(self) -> bool:
|
140
|
+
"""Check if the service is available for inference requests"""
|
141
|
+
return (
|
142
|
+
self.status == ServiceStatus.HEALTHY and
|
143
|
+
self.inference_endpoint is not None and
|
144
|
+
self.is_healthy()
|
145
|
+
)
|
146
|
+
|
147
|
+
def has_capability(self, capability: str) -> bool:
|
148
|
+
"""Check if this service provides a specific capability"""
|
149
|
+
return capability in self.capabilities
|
150
|
+
|
151
|
+
def get_endpoint_url(self, endpoint_type: str = "inference") -> Optional[str]:
|
152
|
+
"""Get endpoint URL for the service"""
|
153
|
+
if endpoint_type == "inference":
|
154
|
+
return self.inference_endpoint
|
155
|
+
elif endpoint_type == "health":
|
156
|
+
return self.health_endpoint
|
157
|
+
else:
|
158
|
+
# Check if it's in metadata
|
159
|
+
endpoints = self.metadata.get("endpoints", {})
|
160
|
+
return endpoints.get(endpoint_type)
|
161
|
+
|
162
|
+
def update_health_metrics(self, metrics: HealthMetrics) -> None:
|
163
|
+
"""Update health metrics for this service"""
|
164
|
+
self.health_metrics = metrics
|
165
|
+
|
166
|
+
# Update service status based on health
|
167
|
+
if metrics.is_healthy:
|
168
|
+
if self.status != ServiceStatus.HEALTHY:
|
169
|
+
self.status = ServiceStatus.HEALTHY
|
170
|
+
logger.info(f"Service {self.service_id} is now healthy")
|
171
|
+
else:
|
172
|
+
if self.status == ServiceStatus.HEALTHY:
|
173
|
+
self.status = ServiceStatus.UNHEALTHY
|
174
|
+
logger.warning(f"Service {self.service_id} is now unhealthy: {metrics.error_message}")
|
175
|
+
|
176
|
+
def update_usage_metrics(self, metrics: UsageMetrics) -> None:
|
177
|
+
"""Update usage metrics for this service"""
|
178
|
+
self.usage_metrics = metrics
|
179
|
+
logger.debug(f"Updated usage metrics for {self.service_id}: {metrics.request_count} requests")
|
180
|
+
|
181
|
+
def to_dict(self) -> Dict[str, Any]:
|
182
|
+
"""Convert service to dictionary representation"""
|
183
|
+
return {
|
184
|
+
"service_id": self.service_id,
|
185
|
+
"service_name": self.service_name,
|
186
|
+
"model_id": self.model_id,
|
187
|
+
"deployment_platform": self.deployment_platform.value,
|
188
|
+
"service_type": self.service_type.value,
|
189
|
+
"status": self.status.value,
|
190
|
+
"inference_endpoint": self.inference_endpoint,
|
191
|
+
"health_endpoint": self.health_endpoint,
|
192
|
+
"capabilities": self.capabilities,
|
193
|
+
"config": self.config,
|
194
|
+
"resource_requirements": {
|
195
|
+
"gpu_type": self.resource_requirements.gpu_type,
|
196
|
+
"memory_mb": self.resource_requirements.memory_mb,
|
197
|
+
"cpu_cores": self.resource_requirements.cpu_cores,
|
198
|
+
"storage_gb": self.resource_requirements.storage_gb,
|
199
|
+
"min_replicas": self.resource_requirements.min_replicas,
|
200
|
+
"max_replicas": self.resource_requirements.max_replicas,
|
201
|
+
},
|
202
|
+
"metadata": self.metadata,
|
203
|
+
"health_metrics": {
|
204
|
+
"is_healthy": self.health_metrics.is_healthy if self.health_metrics else None,
|
205
|
+
"response_time_ms": self.health_metrics.response_time_ms if self.health_metrics else None,
|
206
|
+
"status_code": self.health_metrics.status_code if self.health_metrics else None,
|
207
|
+
"error_message": self.health_metrics.error_message if self.health_metrics else None,
|
208
|
+
"checked_at": self.health_metrics.checked_at.isoformat() if self.health_metrics and self.health_metrics.checked_at else None,
|
209
|
+
} if self.health_metrics else None,
|
210
|
+
"usage_metrics": {
|
211
|
+
"request_count": self.usage_metrics.request_count if self.usage_metrics else 0,
|
212
|
+
"total_processing_time_ms": self.usage_metrics.total_processing_time_ms if self.usage_metrics else 0,
|
213
|
+
"error_count": self.usage_metrics.error_count if self.usage_metrics else 0,
|
214
|
+
"total_cost_usd": self.usage_metrics.total_cost_usd if self.usage_metrics else 0.0,
|
215
|
+
} if self.usage_metrics else None,
|
216
|
+
"created_at": self.created_at.isoformat() if self.created_at else None,
|
217
|
+
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
|
218
|
+
}
|
219
|
+
|
220
|
+
@classmethod
|
221
|
+
def from_dict(cls, data: Dict[str, Any]) -> 'ModelService':
|
222
|
+
"""Create ModelService from dictionary representation"""
|
223
|
+
# Create resource requirements
|
224
|
+
resource_data = data.get("resource_requirements", {})
|
225
|
+
resources = ResourceRequirements(
|
226
|
+
gpu_type=resource_data.get("gpu_type"),
|
227
|
+
memory_mb=resource_data.get("memory_mb"),
|
228
|
+
cpu_cores=resource_data.get("cpu_cores"),
|
229
|
+
storage_gb=resource_data.get("storage_gb"),
|
230
|
+
min_replicas=resource_data.get("min_replicas", 0),
|
231
|
+
max_replicas=resource_data.get("max_replicas", 1),
|
232
|
+
)
|
233
|
+
|
234
|
+
# Create service
|
235
|
+
service = cls(
|
236
|
+
service_id=data["service_id"],
|
237
|
+
service_name=data["service_name"],
|
238
|
+
model_id=data.get("model_id"),
|
239
|
+
deployment_platform=DeploymentPlatform(data["deployment_platform"]),
|
240
|
+
service_type=ServiceType(data["service_type"]),
|
241
|
+
status=ServiceStatus(data.get("status", "pending")),
|
242
|
+
inference_endpoint=data.get("inference_endpoint"),
|
243
|
+
health_endpoint=data.get("health_endpoint"),
|
244
|
+
capabilities=data.get("capabilities", []),
|
245
|
+
config=data.get("config", {}),
|
246
|
+
resource_requirements=resources,
|
247
|
+
metadata=data.get("metadata", {}),
|
248
|
+
)
|
249
|
+
|
250
|
+
# Set timestamps
|
251
|
+
if data.get("created_at"):
|
252
|
+
service.created_at = datetime.fromisoformat(data["created_at"].replace('Z', '+00:00'))
|
253
|
+
if data.get("updated_at"):
|
254
|
+
service.updated_at = datetime.fromisoformat(data["updated_at"].replace('Z', '+00:00'))
|
255
|
+
|
256
|
+
# Set health metrics
|
257
|
+
health_data = data.get("health_metrics")
|
258
|
+
if health_data and health_data.get("is_healthy") is not None:
|
259
|
+
checked_at = None
|
260
|
+
if health_data.get("checked_at"):
|
261
|
+
checked_at = datetime.fromisoformat(health_data["checked_at"].replace('Z', '+00:00'))
|
262
|
+
|
263
|
+
service.health_metrics = HealthMetrics(
|
264
|
+
is_healthy=health_data["is_healthy"],
|
265
|
+
response_time_ms=health_data.get("response_time_ms"),
|
266
|
+
status_code=health_data.get("status_code"),
|
267
|
+
error_message=health_data.get("error_message"),
|
268
|
+
checked_at=checked_at,
|
269
|
+
)
|
270
|
+
|
271
|
+
# Set usage metrics
|
272
|
+
usage_data = data.get("usage_metrics")
|
273
|
+
if usage_data:
|
274
|
+
service.usage_metrics = UsageMetrics(
|
275
|
+
request_count=usage_data.get("request_count", 0),
|
276
|
+
total_processing_time_ms=usage_data.get("total_processing_time_ms", 0),
|
277
|
+
error_count=usage_data.get("error_count", 0),
|
278
|
+
total_cost_usd=usage_data.get("total_cost_usd", 0.0),
|
279
|
+
)
|
280
|
+
|
281
|
+
return service
|
282
|
+
|
283
|
+
def __repr__(self) -> str:
|
284
|
+
return f"ModelService(id={self.service_id}, name={self.service_name}, platform={self.deployment_platform.value}, status={self.status.value})"
|
285
|
+
|
286
|
+
def __str__(self) -> str:
|
287
|
+
return f"{self.service_name} ({self.service_id}) on {self.deployment_platform.value} - {self.status.value}"
|
288
|
+
|
289
|
+
# Factory functions for common service types
|
290
|
+
|
291
|
+
def create_modal_service(
|
292
|
+
service_name: str,
|
293
|
+
model_id: str,
|
294
|
+
inference_endpoint: str,
|
295
|
+
health_endpoint: Optional[str] = None,
|
296
|
+
capabilities: Optional[List[str]] = None,
|
297
|
+
gpu_type: str = "T4",
|
298
|
+
memory_mb: int = 16384,
|
299
|
+
**kwargs
|
300
|
+
) -> ModelService:
|
301
|
+
"""Factory function for Modal-deployed services"""
|
302
|
+
service_id = f"{service_name}-modal-{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
303
|
+
|
304
|
+
resources = ResourceRequirements(
|
305
|
+
gpu_type=gpu_type,
|
306
|
+
memory_mb=memory_mb,
|
307
|
+
min_replicas=0, # Modal can scale to zero
|
308
|
+
max_replicas=10, # Reasonable default
|
309
|
+
)
|
310
|
+
|
311
|
+
return ModelService(
|
312
|
+
service_id=service_id,
|
313
|
+
service_name=service_name,
|
314
|
+
model_id=model_id,
|
315
|
+
deployment_platform=DeploymentPlatform.MODAL,
|
316
|
+
service_type=ServiceType.VISION, # Most Modal services are vision
|
317
|
+
inference_endpoint=inference_endpoint,
|
318
|
+
health_endpoint=health_endpoint,
|
319
|
+
capabilities=capabilities or [],
|
320
|
+
resource_requirements=resources,
|
321
|
+
metadata={
|
322
|
+
"platform": "modal",
|
323
|
+
"auto_scaling": True,
|
324
|
+
"scale_to_zero": True,
|
325
|
+
**kwargs
|
326
|
+
},
|
327
|
+
status=ServiceStatus.HEALTHY, # Assume healthy when creating
|
328
|
+
)
|
329
|
+
|
330
|
+
# REMOVED: create_openai_service function
|
331
|
+
# OpenAI is a third-party service provider, not a deployment platform.
|
332
|
+
# Use ThirdPartyServiceManager in the inference module instead.
|
@@ -0,0 +1,356 @@
|
|
1
|
+
"""
|
2
|
+
Service Health Monitor - Automated health checking and service discovery for MaaS platform
|
3
|
+
|
4
|
+
This module provides automated health monitoring and service discovery capabilities
|
5
|
+
for the ISA Model MaaS platform.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
import logging
|
10
|
+
import time
|
11
|
+
from typing import Dict, List, Optional, Any
|
12
|
+
from datetime import datetime, timezone
|
13
|
+
import httpx
|
14
|
+
import json
|
15
|
+
|
16
|
+
from .service_registry import ServiceRegistry
|
17
|
+
from .model_service import ModelService, HealthMetrics, ServiceStatus
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
class ServiceMonitor:
|
22
|
+
"""
|
23
|
+
Service health monitor that automatically checks service health and updates registry
|
24
|
+
|
25
|
+
Features:
|
26
|
+
- Periodic health checks for all registered services
|
27
|
+
- Automatic service discovery from endpoints
|
28
|
+
- Health metrics collection and storage
|
29
|
+
- Service status updates based on health
|
30
|
+
"""
|
31
|
+
|
32
|
+
def __init__(self, service_registry: ServiceRegistry, check_interval: int = 300):
|
33
|
+
"""
|
34
|
+
Initialize the service monitor
|
35
|
+
|
36
|
+
Args:
|
37
|
+
service_registry: ServiceRegistry instance to monitor
|
38
|
+
check_interval: Health check interval in seconds (default: 5 minutes)
|
39
|
+
"""
|
40
|
+
self.service_registry = service_registry
|
41
|
+
self.check_interval = check_interval
|
42
|
+
self._monitoring = False
|
43
|
+
self._monitor_task: Optional[asyncio.Task] = None
|
44
|
+
|
45
|
+
logger.info(f"ServiceMonitor initialized with {check_interval}s check interval")
|
46
|
+
|
47
|
+
async def start_monitoring(self):
|
48
|
+
"""Start the health monitoring background task"""
|
49
|
+
if self._monitoring:
|
50
|
+
logger.warning("Service monitoring is already running")
|
51
|
+
return
|
52
|
+
|
53
|
+
self._monitoring = True
|
54
|
+
self._monitor_task = asyncio.create_task(self._monitor_loop())
|
55
|
+
logger.info("Service health monitoring started")
|
56
|
+
|
57
|
+
async def stop_monitoring(self):
|
58
|
+
"""Stop the health monitoring background task"""
|
59
|
+
if not self._monitoring:
|
60
|
+
return
|
61
|
+
|
62
|
+
self._monitoring = False
|
63
|
+
if self._monitor_task:
|
64
|
+
self._monitor_task.cancel()
|
65
|
+
try:
|
66
|
+
await self._monitor_task
|
67
|
+
except asyncio.CancelledError:
|
68
|
+
pass
|
69
|
+
|
70
|
+
logger.info("Service health monitoring stopped")
|
71
|
+
|
72
|
+
async def _monitor_loop(self):
|
73
|
+
"""Main monitoring loop that runs health checks periodically"""
|
74
|
+
while self._monitoring:
|
75
|
+
try:
|
76
|
+
await self.check_all_services()
|
77
|
+
await asyncio.sleep(self.check_interval)
|
78
|
+
except asyncio.CancelledError:
|
79
|
+
break
|
80
|
+
except Exception as e:
|
81
|
+
logger.error(f"Error in monitoring loop: {e}")
|
82
|
+
await asyncio.sleep(min(self.check_interval, 60)) # Don't wait too long on error
|
83
|
+
|
84
|
+
async def check_all_services(self) -> Dict[str, bool]:
|
85
|
+
"""
|
86
|
+
Check health of all registered services
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
Dictionary mapping service_id to health status (True = healthy)
|
90
|
+
"""
|
91
|
+
# Get all services from registry - this is a simplified implementation
|
92
|
+
# In practice, you'd want a method to get all services from ServiceRegistry
|
93
|
+
results = {}
|
94
|
+
|
95
|
+
try:
|
96
|
+
# For now, we'll check known service names
|
97
|
+
known_services = ["isa_vision_table", "isa_vision_ui", "isa_vision_doc"]
|
98
|
+
|
99
|
+
for service_name in known_services:
|
100
|
+
try:
|
101
|
+
services = await self.service_registry.get_services_by_name(service_name)
|
102
|
+
for service in services:
|
103
|
+
health_result = await self.check_service_health(service)
|
104
|
+
results[service.service_id] = health_result
|
105
|
+
except Exception as e:
|
106
|
+
logger.error(f"Failed to check services for {service_name}: {e}")
|
107
|
+
|
108
|
+
logger.info(f"Health check completed for {len(results)} services")
|
109
|
+
return results
|
110
|
+
|
111
|
+
except Exception as e:
|
112
|
+
logger.error(f"Failed to check all services: {e}")
|
113
|
+
return results
|
114
|
+
|
115
|
+
async def check_service_health(self, service: ModelService) -> bool:
|
116
|
+
"""
|
117
|
+
Check health of a specific service
|
118
|
+
|
119
|
+
Args:
|
120
|
+
service: ModelService instance to check
|
121
|
+
|
122
|
+
Returns:
|
123
|
+
True if service is healthy, False otherwise
|
124
|
+
"""
|
125
|
+
start_time = time.time()
|
126
|
+
|
127
|
+
try:
|
128
|
+
# Check if service has a health endpoint
|
129
|
+
health_endpoint = service.health_endpoint or service.get_endpoint_url("health")
|
130
|
+
|
131
|
+
if not health_endpoint:
|
132
|
+
# No health endpoint, try to ping inference endpoint
|
133
|
+
health_endpoint = service.inference_endpoint
|
134
|
+
|
135
|
+
if not health_endpoint:
|
136
|
+
logger.warning(f"No endpoint available for service {service.service_id}")
|
137
|
+
return False
|
138
|
+
|
139
|
+
# Perform health check
|
140
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
141
|
+
try:
|
142
|
+
response = await client.get(health_endpoint)
|
143
|
+
response_time_ms = int((time.time() - start_time) * 1000)
|
144
|
+
|
145
|
+
# Create health metrics
|
146
|
+
health_metrics = HealthMetrics(
|
147
|
+
is_healthy=response.status_code == 200,
|
148
|
+
response_time_ms=response_time_ms,
|
149
|
+
status_code=response.status_code,
|
150
|
+
checked_at=datetime.now(timezone.utc)
|
151
|
+
)
|
152
|
+
|
153
|
+
# Try to extract additional metrics from response
|
154
|
+
if response.status_code == 200:
|
155
|
+
try:
|
156
|
+
health_data = response.json()
|
157
|
+
if isinstance(health_data, dict):
|
158
|
+
# Extract metrics if available
|
159
|
+
health_metrics.cpu_usage_percent = health_data.get("cpu_usage")
|
160
|
+
health_metrics.memory_usage_mb = health_data.get("memory_usage_mb")
|
161
|
+
health_metrics.gpu_usage_percent = health_data.get("gpu_usage")
|
162
|
+
except json.JSONDecodeError:
|
163
|
+
pass # Health endpoint might not return JSON
|
164
|
+
else:
|
165
|
+
health_metrics.error_message = f"HTTP {response.status_code}: {response.text[:200]}"
|
166
|
+
|
167
|
+
# Update service health in registry
|
168
|
+
await self.service_registry.update_service_health(service.service_id, health_metrics)
|
169
|
+
|
170
|
+
logger.debug(f"Health check for {service.service_id}: {health_metrics.is_healthy} ({response_time_ms}ms)")
|
171
|
+
return health_metrics.is_healthy
|
172
|
+
|
173
|
+
except httpx.TimeoutException:
|
174
|
+
# Service is not responding
|
175
|
+
health_metrics = HealthMetrics(
|
176
|
+
is_healthy=False,
|
177
|
+
response_time_ms=int((time.time() - start_time) * 1000),
|
178
|
+
error_message="Service timeout",
|
179
|
+
checked_at=datetime.now(timezone.utc)
|
180
|
+
)
|
181
|
+
|
182
|
+
await self.service_registry.update_service_health(service.service_id, health_metrics)
|
183
|
+
logger.warning(f"Service {service.service_id} health check timed out")
|
184
|
+
return False
|
185
|
+
|
186
|
+
except httpx.RequestError as e:
|
187
|
+
# Network or connection error
|
188
|
+
health_metrics = HealthMetrics(
|
189
|
+
is_healthy=False,
|
190
|
+
response_time_ms=int((time.time() - start_time) * 1000),
|
191
|
+
error_message=f"Request error: {str(e)}",
|
192
|
+
checked_at=datetime.now(timezone.utc)
|
193
|
+
)
|
194
|
+
|
195
|
+
await self.service_registry.update_service_health(service.service_id, health_metrics)
|
196
|
+
logger.warning(f"Service {service.service_id} health check failed: {e}")
|
197
|
+
return False
|
198
|
+
|
199
|
+
except Exception as e:
|
200
|
+
# Unexpected error
|
201
|
+
health_metrics = HealthMetrics(
|
202
|
+
is_healthy=False,
|
203
|
+
response_time_ms=int((time.time() - start_time) * 1000),
|
204
|
+
error_message=f"Health check error: {str(e)}",
|
205
|
+
checked_at=datetime.now(timezone.utc)
|
206
|
+
)
|
207
|
+
|
208
|
+
try:
|
209
|
+
await self.service_registry.update_service_health(service.service_id, health_metrics)
|
210
|
+
except Exception as update_error:
|
211
|
+
logger.error(f"Failed to update health metrics: {update_error}")
|
212
|
+
|
213
|
+
logger.error(f"Unexpected error checking service {service.service_id}: {e}")
|
214
|
+
return False
|
215
|
+
|
216
|
+
async def discover_services(self) -> List[ModelService]:
|
217
|
+
"""
|
218
|
+
Discover services from known endpoints and register them if not already registered
|
219
|
+
|
220
|
+
Returns:
|
221
|
+
List of discovered ModelService instances
|
222
|
+
"""
|
223
|
+
discovered_services = []
|
224
|
+
|
225
|
+
# Known service endpoints to check for discovery
|
226
|
+
known_endpoints = [
|
227
|
+
{
|
228
|
+
"name": "isa_vision_table",
|
229
|
+
"base_url": "https://qwen-vision-table.modal.run",
|
230
|
+
"service_type": "vision",
|
231
|
+
"capabilities": ["table_detection", "table_structure_recognition"]
|
232
|
+
},
|
233
|
+
{
|
234
|
+
"name": "isa_vision_doc",
|
235
|
+
"base_url": "https://isa-vision-doc.modal.run",
|
236
|
+
"service_type": "vision",
|
237
|
+
"capabilities": ["table_detection", "ocr", "image_analysis"]
|
238
|
+
},
|
239
|
+
{
|
240
|
+
"name": "isa_vision_ui",
|
241
|
+
"base_url": "https://isa-vision-ui.modal.run",
|
242
|
+
"service_type": "vision",
|
243
|
+
"capabilities": ["ui_detection", "element_detection"]
|
244
|
+
}
|
245
|
+
]
|
246
|
+
|
247
|
+
for endpoint_info in known_endpoints:
|
248
|
+
try:
|
249
|
+
service = await self._discover_service_from_endpoint(endpoint_info)
|
250
|
+
if service:
|
251
|
+
discovered_services.append(service)
|
252
|
+
except Exception as e:
|
253
|
+
logger.warning(f"Failed to discover service from {endpoint_info['name']}: {e}")
|
254
|
+
|
255
|
+
logger.info(f"Discovered {len(discovered_services)} services")
|
256
|
+
return discovered_services
|
257
|
+
|
258
|
+
async def _discover_service_from_endpoint(self, endpoint_info: Dict[str, Any]) -> Optional[ModelService]:
|
259
|
+
"""
|
260
|
+
Discover a service from an endpoint by checking its health/info endpoint
|
261
|
+
|
262
|
+
Args:
|
263
|
+
endpoint_info: Dictionary with service endpoint information
|
264
|
+
|
265
|
+
Returns:
|
266
|
+
ModelService instance if discovered successfully, None otherwise
|
267
|
+
"""
|
268
|
+
try:
|
269
|
+
base_url = endpoint_info["base_url"]
|
270
|
+
health_url = f"{base_url}/health_check"
|
271
|
+
|
272
|
+
async with httpx.AsyncClient(timeout=10.0) as client:
|
273
|
+
response = await client.get(health_url)
|
274
|
+
|
275
|
+
if response.status_code == 200:
|
276
|
+
# Service is responding, check if it's already registered
|
277
|
+
existing_services = await self.service_registry.get_services_by_name(endpoint_info["name"])
|
278
|
+
|
279
|
+
if existing_services:
|
280
|
+
logger.debug(f"Service {endpoint_info['name']} already registered")
|
281
|
+
return existing_services[0] # Return existing service
|
282
|
+
|
283
|
+
# Service is not registered, create and register it
|
284
|
+
from .model_service import ServiceType, DeploymentPlatform, ServiceStatus, ResourceRequirements
|
285
|
+
|
286
|
+
service = ModelService(
|
287
|
+
service_id=f"{endpoint_info['name']}-discovered-{int(time.time())}",
|
288
|
+
service_name=endpoint_info["name"],
|
289
|
+
model_id=f"{endpoint_info['name']}-model",
|
290
|
+
deployment_platform=DeploymentPlatform.MODAL,
|
291
|
+
service_type=ServiceType.VISION,
|
292
|
+
status=ServiceStatus.HEALTHY,
|
293
|
+
inference_endpoint=f"{base_url}/",
|
294
|
+
health_endpoint=health_url,
|
295
|
+
capabilities=endpoint_info.get("capabilities", []),
|
296
|
+
resource_requirements=ResourceRequirements(),
|
297
|
+
metadata={
|
298
|
+
"discovered": True,
|
299
|
+
"discovery_time": datetime.now(timezone.utc).isoformat(),
|
300
|
+
"base_url": base_url
|
301
|
+
}
|
302
|
+
)
|
303
|
+
|
304
|
+
# Register the discovered service
|
305
|
+
success = await self.service_registry.register_service(service)
|
306
|
+
|
307
|
+
if success:
|
308
|
+
logger.info(f"Successfully registered discovered service: {endpoint_info['name']}")
|
309
|
+
return service
|
310
|
+
else:
|
311
|
+
logger.warning(f"Failed to register discovered service: {endpoint_info['name']}")
|
312
|
+
return None
|
313
|
+
|
314
|
+
else:
|
315
|
+
logger.debug(f"Service at {base_url} not responding (HTTP {response.status_code})")
|
316
|
+
return None
|
317
|
+
|
318
|
+
except Exception as e:
|
319
|
+
logger.warning(f"Failed to discover service from {endpoint_info['base_url']}: {e}")
|
320
|
+
return None
|
321
|
+
|
322
|
+
async def get_service_statistics(self) -> Dict[str, Any]:
|
323
|
+
"""
|
324
|
+
Get comprehensive service statistics including health metrics
|
325
|
+
|
326
|
+
Returns:
|
327
|
+
Dictionary with service statistics
|
328
|
+
"""
|
329
|
+
try:
|
330
|
+
# Get basic statistics from registry
|
331
|
+
stats = await self.service_registry.get_service_statistics()
|
332
|
+
|
333
|
+
# Add monitoring-specific statistics
|
334
|
+
stats.update({
|
335
|
+
"monitoring_enabled": self._monitoring,
|
336
|
+
"check_interval_seconds": self.check_interval,
|
337
|
+
"last_check": datetime.now(timezone.utc).isoformat()
|
338
|
+
})
|
339
|
+
|
340
|
+
return stats
|
341
|
+
|
342
|
+
except Exception as e:
|
343
|
+
logger.error(f"Failed to get service statistics: {e}")
|
344
|
+
return {
|
345
|
+
"error": str(e),
|
346
|
+
"monitoring_enabled": self._monitoring,
|
347
|
+
"check_interval_seconds": self.check_interval
|
348
|
+
}
|
349
|
+
|
350
|
+
def __del__(self):
|
351
|
+
"""Cleanup when monitor is destroyed"""
|
352
|
+
if self._monitoring and self._monitor_task:
|
353
|
+
try:
|
354
|
+
self._monitor_task.cancel()
|
355
|
+
except:
|
356
|
+
pass
|