isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,475 @@
|
|
1
|
+
"""
|
2
|
+
Deployments API Routes
|
3
|
+
|
4
|
+
Handles automated HuggingFace model deployment to Modal
|
5
|
+
"""
|
6
|
+
|
7
|
+
from fastapi import APIRouter, HTTPException, BackgroundTasks, Request
|
8
|
+
from pydantic import BaseModel
|
9
|
+
from typing import Optional, List, Dict, Any
|
10
|
+
import logging
|
11
|
+
import asyncio
|
12
|
+
import json
|
13
|
+
import time
|
14
|
+
from datetime import datetime
|
15
|
+
from pathlib import Path
|
16
|
+
|
17
|
+
from isa_model.deployment.modal.deployer import ModalDeployer as HuggingFaceModalDeployer
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
router = APIRouter()
|
22
|
+
|
23
|
+
# Request/Response models
|
24
|
+
class AnalyzeModelRequest(BaseModel):
|
25
|
+
model_id: str
|
26
|
+
|
27
|
+
class DeployModelRequest(BaseModel):
|
28
|
+
model_id: str
|
29
|
+
service_name: Optional[str] = None
|
30
|
+
auto_deploy: bool = False
|
31
|
+
|
32
|
+
class DeploymentResponse(BaseModel):
|
33
|
+
success: bool
|
34
|
+
deployment_id: Optional[str] = None
|
35
|
+
model_id: str
|
36
|
+
config: Optional[Dict[str, Any]] = None
|
37
|
+
service_file: Optional[str] = None
|
38
|
+
deployment_command: Optional[str] = None
|
39
|
+
estimated_cost_per_hour: Optional[float] = None
|
40
|
+
deployed: bool = False
|
41
|
+
error: Optional[str] = None
|
42
|
+
|
43
|
+
# Global deployer instance
|
44
|
+
deployer = HuggingFaceModalDeployer()
|
45
|
+
|
46
|
+
# In-memory deployment tracking (in production, use a database)
|
47
|
+
deployments = {}
|
48
|
+
|
49
|
+
@router.post("/analyze", response_model=Dict[str, Any])
|
50
|
+
async def analyze_model(request: AnalyzeModelRequest):
|
51
|
+
"""
|
52
|
+
Analyze a HuggingFace model for deployment compatibility
|
53
|
+
"""
|
54
|
+
try:
|
55
|
+
logger.info(f"Analyzing model: {request.model_id}")
|
56
|
+
|
57
|
+
# Analyze the model
|
58
|
+
config = deployer.analyze_model(request.model_id)
|
59
|
+
|
60
|
+
return {
|
61
|
+
"success": True,
|
62
|
+
"model_id": config.model_id,
|
63
|
+
"model_type": config.model_type,
|
64
|
+
"architecture": config.architecture,
|
65
|
+
"parameters": config.parameters,
|
66
|
+
"gpu_requirements": config.gpu_requirements,
|
67
|
+
"memory_gb": config.memory_gb,
|
68
|
+
"container_memory_mb": config.container_memory_mb,
|
69
|
+
"dependencies": config.dependencies,
|
70
|
+
"capabilities": config.capabilities,
|
71
|
+
"estimated_cost_per_hour": config.estimated_cost_per_hour
|
72
|
+
}
|
73
|
+
|
74
|
+
except Exception as e:
|
75
|
+
logger.error(f"Model analysis failed for {request.model_id}: {e}")
|
76
|
+
raise HTTPException(status_code=400, detail=f"Model analysis failed: {str(e)}")
|
77
|
+
|
78
|
+
@router.post("/deploy", response_model=DeploymentResponse)
|
79
|
+
async def deploy_model(request: DeployModelRequest, background_tasks: BackgroundTasks):
|
80
|
+
"""
|
81
|
+
Deploy a HuggingFace model to Modal
|
82
|
+
"""
|
83
|
+
try:
|
84
|
+
logger.info(f"Starting deployment for model: {request.model_id}")
|
85
|
+
|
86
|
+
# Generate unique deployment ID
|
87
|
+
import time
|
88
|
+
import uuid
|
89
|
+
deployment_id = f"deploy_{uuid.uuid4().hex[:8]}_{int(time.time())}"
|
90
|
+
|
91
|
+
# Add to deployments tracking
|
92
|
+
deployments[deployment_id] = {
|
93
|
+
"id": deployment_id,
|
94
|
+
"model_id": request.model_id,
|
95
|
+
"service_name": request.service_name,
|
96
|
+
"status": "pending",
|
97
|
+
"created_at": time.time(),
|
98
|
+
"auto_deploy": request.auto_deploy
|
99
|
+
}
|
100
|
+
|
101
|
+
# Start deployment in background
|
102
|
+
background_tasks.add_task(
|
103
|
+
perform_deployment,
|
104
|
+
deployment_id,
|
105
|
+
request.model_id,
|
106
|
+
request.service_name,
|
107
|
+
request.auto_deploy
|
108
|
+
)
|
109
|
+
|
110
|
+
return DeploymentResponse(
|
111
|
+
success=True,
|
112
|
+
deployment_id=deployment_id,
|
113
|
+
model_id=request.model_id,
|
114
|
+
deployed=False
|
115
|
+
)
|
116
|
+
|
117
|
+
except Exception as e:
|
118
|
+
logger.error(f"Deployment initiation failed for {request.model_id}: {e}")
|
119
|
+
raise HTTPException(status_code=500, detail=f"Deployment failed: {str(e)}")
|
120
|
+
|
121
|
+
async def perform_deployment(deployment_id: str, model_id: str, service_name: Optional[str], auto_deploy: bool):
|
122
|
+
"""
|
123
|
+
Perform the actual deployment in the background
|
124
|
+
"""
|
125
|
+
import time
|
126
|
+
|
127
|
+
try:
|
128
|
+
logger.info(f"Performing deployment {deployment_id} for model {model_id}")
|
129
|
+
|
130
|
+
# Update status
|
131
|
+
deployments[deployment_id]["status"] = "deploying"
|
132
|
+
deployments[deployment_id]["progress"] = "Analyzing model"
|
133
|
+
|
134
|
+
# Deploy the model
|
135
|
+
result = deployer.deploy_model(model_id, deploy=auto_deploy)
|
136
|
+
|
137
|
+
if result["success"]:
|
138
|
+
deployments[deployment_id].update({
|
139
|
+
"status": "completed" if result.get("deployed") else "generated",
|
140
|
+
"progress": "Deployment completed",
|
141
|
+
"config": result["config"],
|
142
|
+
"service_file": result["service_file"],
|
143
|
+
"deployment_command": result["deployment_command"],
|
144
|
+
"estimated_cost_per_hour": result["estimated_cost_per_hour"],
|
145
|
+
"deployed": result.get("deployed", False),
|
146
|
+
"completed_at": time.time()
|
147
|
+
})
|
148
|
+
else:
|
149
|
+
deployments[deployment_id].update({
|
150
|
+
"status": "failed",
|
151
|
+
"progress": "Deployment failed",
|
152
|
+
"error": result.get("error", "Unknown error"),
|
153
|
+
"failed_at": time.time()
|
154
|
+
})
|
155
|
+
|
156
|
+
except Exception as e:
|
157
|
+
logger.error(f"Deployment {deployment_id} failed: {e}")
|
158
|
+
deployments[deployment_id].update({
|
159
|
+
"status": "failed",
|
160
|
+
"progress": "Deployment failed",
|
161
|
+
"error": str(e),
|
162
|
+
"failed_at": time.time()
|
163
|
+
})
|
164
|
+
|
165
|
+
@router.get("/")
|
166
|
+
async def list_deployments():
|
167
|
+
"""
|
168
|
+
List all deployments
|
169
|
+
"""
|
170
|
+
try:
|
171
|
+
# Convert deployments to list format
|
172
|
+
deployment_list = []
|
173
|
+
|
174
|
+
for deployment_id, deployment in deployments.items():
|
175
|
+
deployment_list.append({
|
176
|
+
"id": deployment_id,
|
177
|
+
"name": deployment.get("service_name") or f"{deployment['model_id'].split('/')[-1]} Service",
|
178
|
+
"model_id": deployment["model_id"],
|
179
|
+
"model_type": "text", # Would be determined from analysis
|
180
|
+
"status": deployment["status"],
|
181
|
+
"gpu": "A10G", # Would be from config
|
182
|
+
"cost_per_hour": "1.20", # Would be from config
|
183
|
+
"created_at": deployment["created_at"],
|
184
|
+
"deployed_at": deployment.get("completed_at"),
|
185
|
+
"error": deployment.get("error")
|
186
|
+
})
|
187
|
+
|
188
|
+
# Add some fallback deployments for demo
|
189
|
+
if not deployment_list:
|
190
|
+
deployment_list = [
|
191
|
+
{
|
192
|
+
"id": "qwen2-vl-7b",
|
193
|
+
"name": "Qwen2.5-VL Service",
|
194
|
+
"model_id": "Qwen/Qwen2.5-VL-7B-Instruct",
|
195
|
+
"model_type": "vision",
|
196
|
+
"status": "active",
|
197
|
+
"gpu": "A100",
|
198
|
+
"cost_per_hour": "4.00",
|
199
|
+
"created_at": 1705312200,
|
200
|
+
"deployed_at": 1705312800
|
201
|
+
},
|
202
|
+
{
|
203
|
+
"id": "embed-service",
|
204
|
+
"name": "BGE Embed Service",
|
205
|
+
"model_id": "BAAI/bge-base-en-v1.5",
|
206
|
+
"model_type": "embedding",
|
207
|
+
"status": "active",
|
208
|
+
"gpu": "A10G",
|
209
|
+
"cost_per_hour": "1.20",
|
210
|
+
"created_at": 1705225800,
|
211
|
+
"deployed_at": 1705226400
|
212
|
+
}
|
213
|
+
]
|
214
|
+
|
215
|
+
return deployment_list
|
216
|
+
|
217
|
+
except Exception as e:
|
218
|
+
logger.error(f"Failed to list deployments: {e}")
|
219
|
+
raise HTTPException(status_code=500, detail=f"Failed to list deployments: {str(e)}")
|
220
|
+
|
221
|
+
@router.get("/{deployment_id}")
|
222
|
+
async def get_deployment(deployment_id: str):
|
223
|
+
"""
|
224
|
+
Get deployment details
|
225
|
+
"""
|
226
|
+
try:
|
227
|
+
if deployment_id not in deployments:
|
228
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
229
|
+
|
230
|
+
return deployments[deployment_id]
|
231
|
+
|
232
|
+
except HTTPException:
|
233
|
+
raise
|
234
|
+
except Exception as e:
|
235
|
+
logger.error(f"Failed to get deployment {deployment_id}: {e}")
|
236
|
+
raise HTTPException(status_code=500, detail=f"Failed to get deployment: {str(e)}")
|
237
|
+
|
238
|
+
@router.get("/{deployment_id}/status")
|
239
|
+
async def get_deployment_status(deployment_id: str, request: Request):
|
240
|
+
"""
|
241
|
+
Get real-time deployment status and monitoring information with tenant isolation
|
242
|
+
"""
|
243
|
+
try:
|
244
|
+
from isa_model.deployment.core.deployment_manager import DeploymentManager
|
245
|
+
from isa_model.serving.api.middleware.tenant_context import get_tenant_context
|
246
|
+
|
247
|
+
# Get tenant context for isolation
|
248
|
+
tenant_context = get_tenant_context()
|
249
|
+
tenant_dict = {
|
250
|
+
"organization_id": tenant_context.organization_id,
|
251
|
+
"user_id": tenant_context.user_id,
|
252
|
+
"role": tenant_context.role
|
253
|
+
} if tenant_context else None
|
254
|
+
|
255
|
+
# Initialize deployment manager
|
256
|
+
manager = DeploymentManager()
|
257
|
+
|
258
|
+
# Verify tenant access to deployment first
|
259
|
+
deployment = await manager.get_deployment(deployment_id, tenant_dict)
|
260
|
+
if not deployment:
|
261
|
+
raise HTTPException(status_code=404, detail="Deployment not found or access denied")
|
262
|
+
|
263
|
+
# Get deployment status
|
264
|
+
status_info = await manager.get_modal_service_status(deployment_id)
|
265
|
+
|
266
|
+
return {
|
267
|
+
"success": True,
|
268
|
+
"deployment_status": status_info
|
269
|
+
}
|
270
|
+
|
271
|
+
except Exception as e:
|
272
|
+
logger.error(f"Failed to get deployment status {deployment_id}: {e}")
|
273
|
+
raise HTTPException(status_code=500, detail=f"Failed to get deployment status: {str(e)}")
|
274
|
+
|
275
|
+
@router.get("/{deployment_id}/monitoring")
|
276
|
+
async def get_deployment_monitoring(deployment_id: str, request: Request):
|
277
|
+
"""
|
278
|
+
Get detailed monitoring metrics for Modal deployment with tenant isolation
|
279
|
+
"""
|
280
|
+
try:
|
281
|
+
from isa_model.deployment.core.deployment_manager import DeploymentManager
|
282
|
+
from isa_model.serving.api.middleware.tenant_context import get_tenant_context
|
283
|
+
|
284
|
+
# Get tenant context for isolation
|
285
|
+
tenant_context = get_tenant_context()
|
286
|
+
tenant_dict = {
|
287
|
+
"organization_id": tenant_context.organization_id,
|
288
|
+
"user_id": tenant_context.user_id,
|
289
|
+
"role": tenant_context.role
|
290
|
+
} if tenant_context else None
|
291
|
+
|
292
|
+
manager = DeploymentManager()
|
293
|
+
|
294
|
+
# Verify tenant access to deployment first
|
295
|
+
deployment = await manager.get_deployment(deployment_id, tenant_dict)
|
296
|
+
if not deployment:
|
297
|
+
raise HTTPException(status_code=404, detail="Deployment not found or access denied")
|
298
|
+
|
299
|
+
status_info = await manager.get_modal_service_status(deployment_id)
|
300
|
+
|
301
|
+
if status_info.get("status") == "not_found":
|
302
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
303
|
+
|
304
|
+
# Extract detailed monitoring data
|
305
|
+
monitoring_data = status_info.get("monitoring", {})
|
306
|
+
|
307
|
+
return {
|
308
|
+
"success": True,
|
309
|
+
"deployment_id": deployment_id,
|
310
|
+
"monitoring": {
|
311
|
+
"health_check": monitoring_data.get("health_check"),
|
312
|
+
"resource_usage": monitoring_data.get("resource_usage"),
|
313
|
+
"request_metrics": monitoring_data.get("request_metrics"),
|
314
|
+
"cost_tracking": monitoring_data.get("cost_tracking"),
|
315
|
+
"last_updated": datetime.now().isoformat()
|
316
|
+
}
|
317
|
+
}
|
318
|
+
|
319
|
+
except HTTPException:
|
320
|
+
raise
|
321
|
+
except Exception as e:
|
322
|
+
logger.error(f"Failed to get monitoring data {deployment_id}: {e}")
|
323
|
+
raise HTTPException(status_code=500, detail=f"Failed to get monitoring data: {str(e)}")
|
324
|
+
|
325
|
+
@router.post("/{deployment_id}/restart")
|
326
|
+
async def restart_deployment(deployment_id: str, request: Request):
|
327
|
+
"""
|
328
|
+
Restart a Modal deployment with tenant isolation
|
329
|
+
"""
|
330
|
+
try:
|
331
|
+
from isa_model.deployment.core.deployment_manager import DeploymentManager
|
332
|
+
from isa_model.serving.api.middleware.tenant_context import get_tenant_context
|
333
|
+
|
334
|
+
# Get tenant context for isolation
|
335
|
+
tenant_context = get_tenant_context()
|
336
|
+
tenant_dict = {
|
337
|
+
"organization_id": tenant_context.organization_id,
|
338
|
+
"user_id": tenant_context.user_id,
|
339
|
+
"role": tenant_context.role
|
340
|
+
} if tenant_context else None
|
341
|
+
|
342
|
+
manager = DeploymentManager()
|
343
|
+
|
344
|
+
# Check if deployment exists and user has access
|
345
|
+
deployment = await manager.get_deployment(deployment_id, tenant_dict)
|
346
|
+
if not deployment:
|
347
|
+
raise HTTPException(status_code=404, detail="Deployment not found or access denied")
|
348
|
+
|
349
|
+
# Update status to restarting
|
350
|
+
await manager.update_deployment_status(deployment_id, "restarting")
|
351
|
+
|
352
|
+
# TODO: Implement actual Modal service restart
|
353
|
+
# For now, simulate restart process
|
354
|
+
await asyncio.sleep(1)
|
355
|
+
|
356
|
+
# Update status to running
|
357
|
+
await manager.update_deployment_status(deployment_id, "running")
|
358
|
+
|
359
|
+
return {
|
360
|
+
"success": True,
|
361
|
+
"message": f"Deployment {deployment_id} restarted successfully",
|
362
|
+
"deployment_id": deployment_id,
|
363
|
+
"status": "running"
|
364
|
+
}
|
365
|
+
|
366
|
+
except HTTPException:
|
367
|
+
raise
|
368
|
+
except Exception as e:
|
369
|
+
logger.error(f"Failed to restart deployment {deployment_id}: {e}")
|
370
|
+
raise HTTPException(status_code=500, detail=f"Failed to restart deployment: {str(e)}")
|
371
|
+
|
372
|
+
@router.delete("/{deployment_id}")
|
373
|
+
async def cancel_deployment(deployment_id: str):
|
374
|
+
"""
|
375
|
+
Cancel a pending deployment
|
376
|
+
"""
|
377
|
+
try:
|
378
|
+
if deployment_id not in deployments:
|
379
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
380
|
+
|
381
|
+
deployment = deployments[deployment_id]
|
382
|
+
|
383
|
+
if deployment["status"] == "pending":
|
384
|
+
deployment["status"] = "cancelled"
|
385
|
+
deployment["cancelled_at"] = time.time()
|
386
|
+
return {"success": True, "message": "Deployment cancelled"}
|
387
|
+
else:
|
388
|
+
raise HTTPException(status_code=400, detail="Cannot cancel deployment in current status")
|
389
|
+
|
390
|
+
except HTTPException:
|
391
|
+
raise
|
392
|
+
except Exception as e:
|
393
|
+
logger.error(f"Failed to cancel deployment {deployment_id}: {e}")
|
394
|
+
raise HTTPException(status_code=500, detail=f"Failed to cancel deployment: {str(e)}")
|
395
|
+
|
396
|
+
@router.post("/{deployment_id}/retry")
|
397
|
+
async def retry_deployment(deployment_id: str, background_tasks: BackgroundTasks):
|
398
|
+
"""
|
399
|
+
Retry a failed deployment
|
400
|
+
"""
|
401
|
+
try:
|
402
|
+
if deployment_id not in deployments:
|
403
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
404
|
+
|
405
|
+
deployment = deployments[deployment_id]
|
406
|
+
|
407
|
+
if deployment["status"] == "failed":
|
408
|
+
# Reset deployment status
|
409
|
+
deployment["status"] = "pending"
|
410
|
+
deployment["error"] = None
|
411
|
+
deployment["progress"] = "Retrying deployment"
|
412
|
+
|
413
|
+
# Start deployment in background
|
414
|
+
background_tasks.add_task(
|
415
|
+
perform_deployment,
|
416
|
+
deployment_id,
|
417
|
+
deployment["model_id"],
|
418
|
+
deployment.get("service_name"),
|
419
|
+
deployment.get("auto_deploy", False)
|
420
|
+
)
|
421
|
+
|
422
|
+
return {"success": True, "message": "Deployment retry started"}
|
423
|
+
else:
|
424
|
+
raise HTTPException(status_code=400, detail="Cannot retry deployment in current status")
|
425
|
+
|
426
|
+
except HTTPException:
|
427
|
+
raise
|
428
|
+
except Exception as e:
|
429
|
+
logger.error(f"Failed to retry deployment {deployment_id}: {e}")
|
430
|
+
raise HTTPException(status_code=500, detail=f"Failed to retry deployment: {str(e)}")
|
431
|
+
|
432
|
+
@router.get("/{deployment_id}/code")
|
433
|
+
async def get_service_code(deployment_id: str):
|
434
|
+
"""
|
435
|
+
Download the generated service code for a deployment
|
436
|
+
"""
|
437
|
+
try:
|
438
|
+
if deployment_id not in deployments:
|
439
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
440
|
+
|
441
|
+
deployment = deployments[deployment_id]
|
442
|
+
service_file = deployment.get("service_file")
|
443
|
+
|
444
|
+
if not service_file or not Path(service_file).exists():
|
445
|
+
raise HTTPException(status_code=404, detail="Service code not found")
|
446
|
+
|
447
|
+
# Read the service code file
|
448
|
+
with open(service_file, 'r') as f:
|
449
|
+
service_code = f.read()
|
450
|
+
|
451
|
+
from fastapi.responses import PlainTextResponse
|
452
|
+
return PlainTextResponse(
|
453
|
+
content=service_code,
|
454
|
+
headers={
|
455
|
+
"Content-Disposition": f"attachment; filename={Path(service_file).name}"
|
456
|
+
}
|
457
|
+
)
|
458
|
+
|
459
|
+
except HTTPException:
|
460
|
+
raise
|
461
|
+
except Exception as e:
|
462
|
+
logger.error(f"Failed to get service code for {deployment_id}: {e}")
|
463
|
+
raise HTTPException(status_code=500, detail=f"Failed to get service code: {str(e)}")
|
464
|
+
|
465
|
+
# Health check for deployments service
|
466
|
+
@router.get("/health")
|
467
|
+
async def deployments_health():
|
468
|
+
"""Health check for deployments service"""
|
469
|
+
return {
|
470
|
+
"status": "healthy",
|
471
|
+
"service": "deployments",
|
472
|
+
"active_deployments": len([d for d in deployments.values() if d["status"] == "active"]),
|
473
|
+
"pending_deployments": len([d for d in deployments.values() if d["status"] == "pending"]),
|
474
|
+
"total_deployments": len(deployments)
|
475
|
+
}
|