isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,440 @@
|
|
1
|
+
"""
|
2
|
+
GPU Gateway API Routes
|
3
|
+
云端Rails API与本地GPU网关的集成接口
|
4
|
+
"""
|
5
|
+
|
6
|
+
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
|
7
|
+
from fastapi.responses import JSONResponse
|
8
|
+
from typing import List, Dict, Any, Optional
|
9
|
+
import logging
|
10
|
+
import asyncio
|
11
|
+
import aiohttp
|
12
|
+
from datetime import datetime, timedelta
|
13
|
+
|
14
|
+
from ....core.config import get_settings
|
15
|
+
from ....deployment.local.config import LocalGPUConfig, LocalServiceType, LocalBackend
|
16
|
+
from ....auth.middleware import get_current_tenant
|
17
|
+
from ....database.models import Tenant
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
router = APIRouter(prefix="/api/gpu-gateway", tags=["GPU Gateway"])
|
22
|
+
|
23
|
+
|
24
|
+
class GPUGatewayClient:
|
25
|
+
"""GPU网关客户端 - 云端与本地GPU网关通信"""
|
26
|
+
|
27
|
+
def __init__(self):
|
28
|
+
self.settings = get_settings()
|
29
|
+
self.gateways: Dict[str, Dict] = {} # gateway_id -> {url, status, last_seen}
|
30
|
+
self.gateway_pool = [] # 可用网关列表
|
31
|
+
|
32
|
+
async def register_gateway(self, gateway_id: str, gateway_url: str,
|
33
|
+
capabilities: List[str] = None):
|
34
|
+
"""注册GPU网关"""
|
35
|
+
self.gateways[gateway_id] = {
|
36
|
+
"url": gateway_url,
|
37
|
+
"status": "online",
|
38
|
+
"last_seen": datetime.now(),
|
39
|
+
"capabilities": capabilities or [],
|
40
|
+
"nodes": [],
|
41
|
+
"metrics": {}
|
42
|
+
}
|
43
|
+
|
44
|
+
if gateway_id not in self.gateway_pool:
|
45
|
+
self.gateway_pool.append(gateway_id)
|
46
|
+
|
47
|
+
logger.info(f"✅ Registered GPU gateway: {gateway_id}")
|
48
|
+
|
49
|
+
async def unregister_gateway(self, gateway_id: str):
|
50
|
+
"""注销GPU网关"""
|
51
|
+
if gateway_id in self.gateways:
|
52
|
+
del self.gateways[gateway_id]
|
53
|
+
|
54
|
+
if gateway_id in self.gateway_pool:
|
55
|
+
self.gateway_pool.remove(gateway_id)
|
56
|
+
|
57
|
+
logger.info(f"❌ Unregistered GPU gateway: {gateway_id}")
|
58
|
+
|
59
|
+
def select_gateway(self, requirements: Dict = None) -> Optional[str]:
|
60
|
+
"""选择最佳GPU网关"""
|
61
|
+
if not self.gateway_pool:
|
62
|
+
return None
|
63
|
+
|
64
|
+
# 简单轮询选择 (可以改进为基于负载的选择)
|
65
|
+
available_gateways = []
|
66
|
+
|
67
|
+
for gateway_id in self.gateway_pool:
|
68
|
+
gateway = self.gateways.get(gateway_id)
|
69
|
+
if gateway and gateway["status"] == "online":
|
70
|
+
# 检查是否在5分钟内有心跳
|
71
|
+
if datetime.now() - gateway["last_seen"] < timedelta(minutes=5):
|
72
|
+
available_gateways.append(gateway_id)
|
73
|
+
|
74
|
+
if available_gateways:
|
75
|
+
# 选择负载最低的网关
|
76
|
+
best_gateway = None
|
77
|
+
min_load = float('inf')
|
78
|
+
|
79
|
+
for gateway_id in available_gateways:
|
80
|
+
gateway = self.gateways[gateway_id]
|
81
|
+
nodes = gateway.get("nodes", [])
|
82
|
+
|
83
|
+
if nodes:
|
84
|
+
# 计算平均负载
|
85
|
+
total_load = sum(node.get("current_load", 0) for node in nodes)
|
86
|
+
avg_load = total_load / len(nodes)
|
87
|
+
|
88
|
+
if avg_load < min_load:
|
89
|
+
min_load = avg_load
|
90
|
+
best_gateway = gateway_id
|
91
|
+
else:
|
92
|
+
# 没有节点信息,选择第一个
|
93
|
+
best_gateway = gateway_id
|
94
|
+
break
|
95
|
+
|
96
|
+
return best_gateway or available_gateways[0]
|
97
|
+
|
98
|
+
return None
|
99
|
+
|
100
|
+
async def forward_request(self, gateway_id: str, endpoint: str,
|
101
|
+
method: str = "POST", data: Dict = None) -> Dict:
|
102
|
+
"""转发请求到GPU网关"""
|
103
|
+
if gateway_id not in self.gateways:
|
104
|
+
raise HTTPException(status_code=404, detail="GPU gateway not found")
|
105
|
+
|
106
|
+
gateway_url = self.gateways[gateway_id]["url"]
|
107
|
+
url = f"{gateway_url}/{endpoint.lstrip('/')}"
|
108
|
+
|
109
|
+
try:
|
110
|
+
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60)) as session:
|
111
|
+
if method.upper() == "GET":
|
112
|
+
async with session.get(url) as response:
|
113
|
+
result = await response.json()
|
114
|
+
else:
|
115
|
+
async with session.post(url, json=data) as response:
|
116
|
+
result = await response.json()
|
117
|
+
|
118
|
+
return result
|
119
|
+
|
120
|
+
except asyncio.TimeoutError:
|
121
|
+
raise HTTPException(status_code=504, detail="Gateway request timeout")
|
122
|
+
except Exception as e:
|
123
|
+
logger.error(f"❌ Gateway request failed: {e}")
|
124
|
+
raise HTTPException(status_code=502, detail=f"Gateway error: {str(e)}")
|
125
|
+
|
126
|
+
async def update_gateway_status(self, gateway_id: str, status_data: Dict):
|
127
|
+
"""更新网关状态"""
|
128
|
+
if gateway_id in self.gateways:
|
129
|
+
gateway = self.gateways[gateway_id]
|
130
|
+
gateway["last_seen"] = datetime.now()
|
131
|
+
gateway["status"] = "online"
|
132
|
+
gateway["nodes"] = status_data.get("nodes", [])
|
133
|
+
gateway["metrics"] = status_data.get("metrics", {})
|
134
|
+
|
135
|
+
|
136
|
+
# 全局GPU网关客户端
|
137
|
+
gpu_gateway_client = GPUGatewayClient()
|
138
|
+
|
139
|
+
|
140
|
+
@router.post("/register")
|
141
|
+
async def register_gateway(request: Dict[str, Any]):
|
142
|
+
"""注册GPU网关"""
|
143
|
+
try:
|
144
|
+
gateway_id = request.get("gateway_id")
|
145
|
+
gateway_url = request.get("gateway_url")
|
146
|
+
capabilities = request.get("capabilities", [])
|
147
|
+
|
148
|
+
if not gateway_id or not gateway_url:
|
149
|
+
raise HTTPException(status_code=400, detail="Missing gateway_id or gateway_url")
|
150
|
+
|
151
|
+
await gpu_gateway_client.register_gateway(
|
152
|
+
gateway_id=gateway_id,
|
153
|
+
gateway_url=gateway_url,
|
154
|
+
capabilities=capabilities
|
155
|
+
)
|
156
|
+
|
157
|
+
return {"success": True, "message": f"Gateway {gateway_id} registered"}
|
158
|
+
|
159
|
+
except Exception as e:
|
160
|
+
logger.error(f"❌ Gateway registration failed: {e}")
|
161
|
+
raise HTTPException(status_code=500, detail=str(e))
|
162
|
+
|
163
|
+
|
164
|
+
@router.post("/unregister")
|
165
|
+
async def unregister_gateway(request: Dict[str, Any]):
|
166
|
+
"""注销GPU网关"""
|
167
|
+
try:
|
168
|
+
gateway_id = request.get("gateway_id")
|
169
|
+
|
170
|
+
if not gateway_id:
|
171
|
+
raise HTTPException(status_code=400, detail="Missing gateway_id")
|
172
|
+
|
173
|
+
await gpu_gateway_client.unregister_gateway(gateway_id)
|
174
|
+
|
175
|
+
return {"success": True, "message": f"Gateway {gateway_id} unregistered"}
|
176
|
+
|
177
|
+
except Exception as e:
|
178
|
+
logger.error(f"❌ Gateway unregistration failed: {e}")
|
179
|
+
raise HTTPException(status_code=500, detail=str(e))
|
180
|
+
|
181
|
+
|
182
|
+
@router.post("/status")
|
183
|
+
async def receive_gateway_status(request: Dict[str, Any]):
|
184
|
+
"""接收网关状态报告"""
|
185
|
+
try:
|
186
|
+
gateway_id = request.get("gateway_id")
|
187
|
+
|
188
|
+
if not gateway_id:
|
189
|
+
raise HTTPException(status_code=400, detail="Missing gateway_id")
|
190
|
+
|
191
|
+
await gpu_gateway_client.update_gateway_status(gateway_id, request)
|
192
|
+
|
193
|
+
return {"success": True, "received": True}
|
194
|
+
|
195
|
+
except Exception as e:
|
196
|
+
logger.error(f"❌ Status update failed: {e}")
|
197
|
+
raise HTTPException(status_code=500, detail=str(e))
|
198
|
+
|
199
|
+
|
200
|
+
@router.get("/gateways")
|
201
|
+
async def list_gateways():
|
202
|
+
"""列出所有GPU网关"""
|
203
|
+
gateways = []
|
204
|
+
|
205
|
+
for gateway_id, gateway_info in gpu_gateway_client.gateways.items():
|
206
|
+
gateways.append({
|
207
|
+
"gateway_id": gateway_id,
|
208
|
+
"url": gateway_info["url"],
|
209
|
+
"status": gateway_info["status"],
|
210
|
+
"last_seen": gateway_info["last_seen"].isoformat(),
|
211
|
+
"nodes": len(gateway_info.get("nodes", [])),
|
212
|
+
"capabilities": gateway_info.get("capabilities", [])
|
213
|
+
})
|
214
|
+
|
215
|
+
return {
|
216
|
+
"success": True,
|
217
|
+
"gateways": gateways,
|
218
|
+
"total": len(gateways)
|
219
|
+
}
|
220
|
+
|
221
|
+
|
222
|
+
@router.post("/deploy")
|
223
|
+
async def deploy_model_to_gateway(
|
224
|
+
request: Dict[str, Any],
|
225
|
+
current_tenant: Tenant = Depends(get_current_tenant)
|
226
|
+
):
|
227
|
+
"""通过网关部署模型"""
|
228
|
+
try:
|
229
|
+
model_id = request.get("model_id")
|
230
|
+
backend = request.get("backend", "transformers")
|
231
|
+
preferred_gateway = request.get("preferred_gateway")
|
232
|
+
|
233
|
+
if not model_id:
|
234
|
+
raise HTTPException(status_code=400, detail="Missing model_id")
|
235
|
+
|
236
|
+
# 选择网关
|
237
|
+
gateway_id = preferred_gateway or gpu_gateway_client.select_gateway()
|
238
|
+
if not gateway_id:
|
239
|
+
raise HTTPException(status_code=503, detail="No available GPU gateways")
|
240
|
+
|
241
|
+
# 构建部署请求
|
242
|
+
deploy_data = {
|
243
|
+
"tenant_id": current_tenant.id,
|
244
|
+
"model_id": model_id,
|
245
|
+
"service_name": f"{current_tenant.id}-{model_id.replace('/', '-')}",
|
246
|
+
"service_type": "llm",
|
247
|
+
"backend": backend,
|
248
|
+
**request # 包含其他配置参数
|
249
|
+
}
|
250
|
+
|
251
|
+
# 转发到网关
|
252
|
+
result = await gpu_gateway_client.forward_request(
|
253
|
+
gateway_id=gateway_id,
|
254
|
+
endpoint="/deploy",
|
255
|
+
method="POST",
|
256
|
+
data=deploy_data
|
257
|
+
)
|
258
|
+
|
259
|
+
# 记录部署信息到数据库
|
260
|
+
# TODO: 保存部署记录
|
261
|
+
|
262
|
+
return {
|
263
|
+
"success": result.get("success", False),
|
264
|
+
"gateway_id": gateway_id,
|
265
|
+
"service_name": result.get("service_name"),
|
266
|
+
"error": result.get("error"),
|
267
|
+
"service_info": result.get("service_info")
|
268
|
+
}
|
269
|
+
|
270
|
+
except HTTPException:
|
271
|
+
raise
|
272
|
+
except Exception as e:
|
273
|
+
logger.error(f"❌ Model deployment failed: {e}")
|
274
|
+
raise HTTPException(status_code=500, detail=str(e))
|
275
|
+
|
276
|
+
|
277
|
+
@router.post("/inference")
|
278
|
+
async def inference_through_gateway(
|
279
|
+
request: Dict[str, Any],
|
280
|
+
current_tenant: Tenant = Depends(get_current_tenant)
|
281
|
+
):
|
282
|
+
"""通过网关进行推理"""
|
283
|
+
try:
|
284
|
+
model_id = request.get("model_id")
|
285
|
+
if not model_id:
|
286
|
+
raise HTTPException(status_code=400, detail="Missing model_id")
|
287
|
+
|
288
|
+
# 选择网关 (可以基于模型ID或其他策略)
|
289
|
+
gateway_id = gpu_gateway_client.select_gateway()
|
290
|
+
if not gateway_id:
|
291
|
+
raise HTTPException(status_code=503, detail="No available GPU gateways")
|
292
|
+
|
293
|
+
# 构建推理请求
|
294
|
+
inference_data = {
|
295
|
+
"tenant_id": current_tenant.id,
|
296
|
+
"model_id": model_id,
|
297
|
+
"request": {
|
298
|
+
key: value for key, value in request.items()
|
299
|
+
if key not in ["model_id"]
|
300
|
+
}
|
301
|
+
}
|
302
|
+
|
303
|
+
# 转发到网关
|
304
|
+
result = await gpu_gateway_client.forward_request(
|
305
|
+
gateway_id=gateway_id,
|
306
|
+
endpoint="/inference",
|
307
|
+
method="POST",
|
308
|
+
data=inference_data
|
309
|
+
)
|
310
|
+
|
311
|
+
return result
|
312
|
+
|
313
|
+
except HTTPException:
|
314
|
+
raise
|
315
|
+
except Exception as e:
|
316
|
+
logger.error(f"❌ Inference request failed: {e}")
|
317
|
+
raise HTTPException(status_code=500, detail=str(e))
|
318
|
+
|
319
|
+
|
320
|
+
@router.get("/metrics")
|
321
|
+
async def get_gpu_metrics():
|
322
|
+
"""获取所有网关的指标"""
|
323
|
+
all_metrics = {}
|
324
|
+
|
325
|
+
for gateway_id, gateway_info in gpu_gateway_client.gateways.items():
|
326
|
+
if gateway_info["status"] == "online":
|
327
|
+
try:
|
328
|
+
metrics = await gpu_gateway_client.forward_request(
|
329
|
+
gateway_id=gateway_id,
|
330
|
+
endpoint="/metrics",
|
331
|
+
method="GET"
|
332
|
+
)
|
333
|
+
all_metrics[gateway_id] = metrics
|
334
|
+
except Exception as e:
|
335
|
+
logger.error(f"❌ Failed to get metrics from {gateway_id}: {e}")
|
336
|
+
all_metrics[gateway_id] = {"error": str(e)}
|
337
|
+
|
338
|
+
return {
|
339
|
+
"success": True,
|
340
|
+
"metrics": all_metrics
|
341
|
+
}
|
342
|
+
|
343
|
+
|
344
|
+
@router.post("/tenants/register")
|
345
|
+
async def register_tenant_on_gateways(
|
346
|
+
request: Dict[str, Any],
|
347
|
+
current_tenant: Tenant = Depends(get_current_tenant)
|
348
|
+
):
|
349
|
+
"""在所有网关上注册租户"""
|
350
|
+
try:
|
351
|
+
tenant_config = {
|
352
|
+
"tenant_id": current_tenant.id,
|
353
|
+
"gpu_quota": request.get("gpu_quota", 1),
|
354
|
+
"memory_quota": request.get("memory_quota", 8192),
|
355
|
+
"priority": request.get("priority", 1),
|
356
|
+
"allowed_models": request.get("allowed_models", []),
|
357
|
+
"rate_limit": request.get("rate_limit", 100)
|
358
|
+
}
|
359
|
+
|
360
|
+
results = {}
|
361
|
+
|
362
|
+
# 在所有在线网关上注册租户
|
363
|
+
for gateway_id, gateway_info in gpu_gateway_client.gateways.items():
|
364
|
+
if gateway_info["status"] == "online":
|
365
|
+
try:
|
366
|
+
result = await gpu_gateway_client.forward_request(
|
367
|
+
gateway_id=gateway_id,
|
368
|
+
endpoint="/tenants",
|
369
|
+
method="POST",
|
370
|
+
data=tenant_config
|
371
|
+
)
|
372
|
+
results[gateway_id] = result
|
373
|
+
except Exception as e:
|
374
|
+
logger.error(f"❌ Failed to register tenant on {gateway_id}: {e}")
|
375
|
+
results[gateway_id] = {"success": False, "error": str(e)}
|
376
|
+
|
377
|
+
return {
|
378
|
+
"success": True,
|
379
|
+
"tenant_id": current_tenant.id,
|
380
|
+
"gateway_results": results
|
381
|
+
}
|
382
|
+
|
383
|
+
except Exception as e:
|
384
|
+
logger.error(f"❌ Tenant registration failed: {e}")
|
385
|
+
raise HTTPException(status_code=500, detail=str(e))
|
386
|
+
|
387
|
+
|
388
|
+
# 背景任务:监控网关健康状态
|
389
|
+
async def monitor_gateways():
|
390
|
+
"""监控网关健康状态"""
|
391
|
+
while True:
|
392
|
+
try:
|
393
|
+
current_time = datetime.now()
|
394
|
+
|
395
|
+
for gateway_id in list(gpu_gateway_client.gateways.keys()):
|
396
|
+
gateway = gpu_gateway_client.gateways[gateway_id]
|
397
|
+
|
398
|
+
# 检查网关是否超时
|
399
|
+
if current_time - gateway["last_seen"] > timedelta(minutes=5):
|
400
|
+
logger.warning(f"⚠️ Gateway {gateway_id} is offline")
|
401
|
+
gateway["status"] = "offline"
|
402
|
+
|
403
|
+
if gateway_id in gpu_gateway_client.gateway_pool:
|
404
|
+
gpu_gateway_client.gateway_pool.remove(gateway_id)
|
405
|
+
|
406
|
+
# 尝试ping网关
|
407
|
+
try:
|
408
|
+
status = await gpu_gateway_client.forward_request(
|
409
|
+
gateway_id=gateway_id,
|
410
|
+
endpoint="/status",
|
411
|
+
method="GET"
|
412
|
+
)
|
413
|
+
|
414
|
+
if status:
|
415
|
+
gateway["status"] = "online"
|
416
|
+
gateway["last_seen"] = current_time
|
417
|
+
|
418
|
+
if gateway_id not in gpu_gateway_client.gateway_pool:
|
419
|
+
gpu_gateway_client.gateway_pool.append(gateway_id)
|
420
|
+
|
421
|
+
except Exception as e:
|
422
|
+
logger.debug(f"Gateway {gateway_id} ping failed: {e}")
|
423
|
+
gateway["status"] = "offline"
|
424
|
+
|
425
|
+
await asyncio.sleep(30) # 每30秒检查一次
|
426
|
+
|
427
|
+
except Exception as e:
|
428
|
+
logger.error(f"❌ Gateway monitoring error: {e}")
|
429
|
+
await asyncio.sleep(10)
|
430
|
+
|
431
|
+
|
432
|
+
# 启动监控任务
|
433
|
+
@router.on_event("startup")
|
434
|
+
async def startup_event():
|
435
|
+
"""启动背景监控任务"""
|
436
|
+
asyncio.create_task(monitor_gateways())
|
437
|
+
|
438
|
+
|
439
|
+
# 导出客户端供其他模块使用
|
440
|
+
__all__ = ["router", "gpu_gateway_client", "GPUGatewayClient"]
|
@@ -4,13 +4,19 @@ Health Check Routes
|
|
4
4
|
System health and status endpoints
|
5
5
|
"""
|
6
6
|
|
7
|
-
from fastapi import APIRouter, HTTPException
|
7
|
+
from fastapi import APIRouter, HTTPException, Request
|
8
8
|
from pydantic import BaseModel
|
9
9
|
import time
|
10
10
|
import psutil
|
11
|
-
import torch
|
12
11
|
from typing import Dict, Any
|
13
12
|
|
13
|
+
# Optional torch import - only available in local mode
|
14
|
+
try:
|
15
|
+
import torch
|
16
|
+
TORCH_AVAILABLE = True
|
17
|
+
except ImportError:
|
18
|
+
TORCH_AVAILABLE = False
|
19
|
+
|
14
20
|
router = APIRouter()
|
15
21
|
|
16
22
|
class HealthResponse(BaseModel):
|
@@ -20,22 +26,36 @@ class HealthResponse(BaseModel):
|
|
20
26
|
uptime: float
|
21
27
|
system: Dict[str, Any]
|
22
28
|
|
29
|
+
@router.get("", response_model=HealthResponse)
|
23
30
|
@router.get("/", response_model=HealthResponse)
|
24
|
-
async def health_check():
|
31
|
+
async def health_check(request: Request):
|
25
32
|
"""
|
26
33
|
Basic health check endpoint
|
34
|
+
Responds to both /health and /health/
|
27
35
|
"""
|
36
|
+
# Check if startup failed
|
37
|
+
startup_failed = getattr(request.app.state, 'startup_failed', False)
|
38
|
+
startup_error = getattr(request.app.state, 'startup_error', None)
|
39
|
+
|
40
|
+
status = "degraded" if startup_failed else "healthy"
|
41
|
+
|
42
|
+
system_info = {
|
43
|
+
"cpu_percent": psutil.cpu_percent(),
|
44
|
+
"memory_percent": psutil.virtual_memory().percent,
|
45
|
+
"gpu_available": torch.cuda.is_available() if TORCH_AVAILABLE else False,
|
46
|
+
"gpu_count": torch.cuda.device_count() if (TORCH_AVAILABLE and torch.cuda.is_available()) else 0
|
47
|
+
}
|
48
|
+
|
49
|
+
if startup_failed:
|
50
|
+
system_info["startup_error"] = startup_error
|
51
|
+
system_info["warning"] = "Server started with initialization errors"
|
52
|
+
|
28
53
|
return HealthResponse(
|
29
|
-
status=
|
54
|
+
status=status,
|
30
55
|
timestamp=time.time(),
|
31
56
|
version="1.0.0",
|
32
57
|
uptime=time.time(), # Simplified uptime
|
33
|
-
system=
|
34
|
-
"cpu_percent": psutil.cpu_percent(),
|
35
|
-
"memory_percent": psutil.virtual_memory().percent,
|
36
|
-
"gpu_available": torch.cuda.is_available(),
|
37
|
-
"gpu_count": torch.cuda.device_count() if torch.cuda.is_available() else 0
|
38
|
-
}
|
58
|
+
system=system_info
|
39
59
|
)
|
40
60
|
|
41
61
|
@router.get("/detailed")
|
@@ -44,7 +64,7 @@ async def detailed_health():
|
|
44
64
|
Detailed health check with system information
|
45
65
|
"""
|
46
66
|
gpu_info = []
|
47
|
-
if torch.cuda.is_available():
|
67
|
+
if TORCH_AVAILABLE and torch.cuda.is_available():
|
48
68
|
for i in range(torch.cuda.device_count()):
|
49
69
|
gpu_info.append({
|
50
70
|
"device": i,
|
@@ -67,7 +87,7 @@ async def detailed_health():
|
|
67
87
|
"total": psutil.virtual_memory().total
|
68
88
|
},
|
69
89
|
"gpu": {
|
70
|
-
"available": torch.cuda.is_available(),
|
90
|
+
"available": torch.cuda.is_available() if TORCH_AVAILABLE else False,
|
71
91
|
"devices": gpu_info
|
72
92
|
}
|
73
93
|
}
|