PyPI - isa-model - Versions diffs - 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

isa-model 0.3.91py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

isa_model/client.py +1166 -584
isa_model/core/cache/redis_cache.py +410 -0
isa_model/core/config/config_manager.py +282 -12
isa_model/core/config.py +91 -1
isa_model/core/database/__init__.py +1 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +297 -0
isa_model/core/database/supabase_client.py +258 -0
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +46 -0
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_billing_tracker.py +60 -88
isa_model/core/models/model_manager.py +66 -25
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +217 -55
isa_model/core/models/model_statistics_tracker.py +234 -0
isa_model/core/models/model_storage.py +0 -1
isa_model/core/models/model_version_manager.py +959 -0
isa_model/core/models/system_models.py +857 -0
isa_model/core/pricing_manager.py +2 -249
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/resilience/circuit_breaker.py +366 -0
isa_model/core/security/secrets.py +358 -0
isa_model/core/services/__init__.py +2 -4
isa_model/core/services/intelligent_model_selector.py +479 -370
isa_model/core/storage/hf_storage.py +2 -2
isa_model/core/types.py +8 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -368
isa_model/deployment/local/__init__.py +31 -0
isa_model/deployment/local/config.py +248 -0
isa_model/deployment/local/gpu_gateway.py +607 -0
isa_model/deployment/local/health_checker.py +428 -0
isa_model/deployment/local/provider.py +586 -0
isa_model/deployment/local/tensorrt_service.py +621 -0
isa_model/deployment/local/transformers_service.py +644 -0
isa_model/deployment/local/vllm_service.py +527 -0
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/modal/deployer.py +894 -0
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +179 -16
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/__init__.py +21 -0
isa_model/inference/services/audio/base_realtime_service.py +225 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/isa_tts_service.py +0 -0
isa_model/inference/services/audio/openai_realtime_service.py +320 -124
isa_model/inference/services/audio/openai_stt_service.py +53 -11
isa_model/inference/services/base_service.py +17 -1
isa_model/inference/services/custom_model_manager.py +277 -0
isa_model/inference/services/embedding/__init__.py +13 -0
isa_model/inference/services/embedding/base_embed_service.py +111 -8
isa_model/inference/services/embedding/isa_embed_service.py +305 -0
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/openai_embed_service.py +2 -4
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
isa_model/inference/services/img/__init__.py +2 -2
isa_model/inference/services/img/base_image_gen_service.py +24 -7
isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
isa_model/inference/services/img/services/replicate_flux.py +226 -0
isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
isa_model/inference/services/img/tests/test_img_client.py +297 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +361 -26
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/local_llm_service.py +747 -0
isa_model/inference/services/llm/ollama_llm_service.py +11 -3
isa_model/inference/services/llm/openai_llm_service.py +670 -56
isa_model/inference/services/llm/yyds_llm_service.py +10 -3
isa_model/inference/services/vision/__init__.py +27 -6
isa_model/inference/services/vision/base_vision_service.py +118 -185
isa_model/inference/services/vision/blip_vision_service.py +359 -0
isa_model/inference/services/vision/helpers/image_utils.py +19 -10
isa_model/inference/services/vision/isa_vision_service.py +634 -0
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +240 -18
isa_model/serving/api/middleware/auth.py +317 -0
isa_model/serving/api/middleware/security.py +268 -0
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +489 -0
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +475 -0
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/logs.py +430 -0
isa_model/serving/api/routes/settings.py +582 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +992 -171
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +318 -0
isa_model/serving/modal_proxy_server.py +249 -0
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
isa_model-0.4.3.dist-info/RECORD +193 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
isa_model/deployment/cloud/modal/register_models.py +0 -321
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks.py +0 -469
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -18
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/factory.py +0 -531
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/metrics.py +0 -798
isa_model/inference/adapter/unified_api.py +0 -248
isa_model/inference/services/helpers/stacked_config.py +0 -148
isa_model/inference/services/img/flux_professional_service.py +0 -603
isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/others/table_transformer_service.py +0 -61
isa_model/inference/services/vision/doc_analysis_service.py +0 -640
isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/vision/ui_analysis_service.py +0 -823
isa_model/scripts/inference_tracker.py +0 -283
isa_model/scripts/mlflow_manager.py +0 -379
isa_model/scripts/model_registry.py +0 -465
isa_model/scripts/register_models.py +0 -370
isa_model/scripts/register_models_with_embeddings.py +0 -510
isa_model/scripts/start_mlflow.py +0 -95
isa_model/scripts/training_tracker.py +0 -257
isa_model/training/__init__.py +0 -74
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -23
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/factory.py +0 -424
isa_model-0.3.91.dist-info/RECORD +0 -138
/isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0

isa_model/serving/api/routes/gpu_gateway.py ADDED Viewed

@@ -0,0 +1,440 @@
+"""
+GPU Gateway API Routes
+云端Rails API与本地GPU网关的集成接口
+"""
+from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
+from fastapi.responses import JSONResponse
+from typing import List, Dict, Any, Optional
+import logging
+import asyncio
+import aiohttp
+from datetime import datetime, timedelta
+from ....core.config import get_settings
+from ....deployment.local.config import LocalGPUConfig, LocalServiceType, LocalBackend
+from ....auth.middleware import get_current_tenant
+from ....database.models import Tenant
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/api/gpu-gateway", tags=["GPU Gateway"])
+class GPUGatewayClient:
+    """GPU网关客户端 - 云端与本地GPU网关通信"""
+    def __init__(self):
+        self.settings = get_settings()
+        self.gateways: Dict[str, Dict] = {}  # gateway_id -> {url, status, last_seen}
+        self.gateway_pool = []  # 可用网关列表
+    async def register_gateway(self, gateway_id: str, gateway_url: str,
+                             capabilities: List[str] = None):
+        """注册GPU网关"""
+        self.gateways[gateway_id] = {
+            "url": gateway_url,
+            "status": "online",
+            "last_seen": datetime.now(),
+            "capabilities": capabilities or [],
+            "nodes": [],
+            "metrics": {}
+        }
+        if gateway_id not in self.gateway_pool:
+            self.gateway_pool.append(gateway_id)
+        logger.info(f"✅ Registered GPU gateway: {gateway_id}")
+    async def unregister_gateway(self, gateway_id: str):
+        """注销GPU网关"""
+        if gateway_id in self.gateways:
+            del self.gateways[gateway_id]
+        if gateway_id in self.gateway_pool:
+            self.gateway_pool.remove(gateway_id)
+        logger.info(f"❌ Unregistered GPU gateway: {gateway_id}")
+    def select_gateway(self, requirements: Dict = None) -> Optional[str]:
+        """选择最佳GPU网关"""
+        if not self.gateway_pool:
+            return None
+        # 简单轮询选择 (可以改进为基于负载的选择)
+        available_gateways = []
+        for gateway_id in self.gateway_pool:
+            gateway = self.gateways.get(gateway_id)
+            if gateway and gateway["status"] == "online":
+                # 检查是否在5分钟内有心跳
+                if datetime.now() - gateway["last_seen"] < timedelta(minutes=5):
+                    available_gateways.append(gateway_id)
+        if available_gateways:
+            # 选择负载最低的网关
+            best_gateway = None
+            min_load = float('inf')
+            for gateway_id in available_gateways:
+                gateway = self.gateways[gateway_id]
+                nodes = gateway.get("nodes", [])
+                if nodes:
+                    # 计算平均负载
+                    total_load = sum(node.get("current_load", 0) for node in nodes)
+                    avg_load = total_load / len(nodes)
+                    if avg_load < min_load:
+                        min_load = avg_load
+                        best_gateway = gateway_id
+                else:
+                    # 没有节点信息，选择第一个
+                    best_gateway = gateway_id
+                    break
+            return best_gateway or available_gateways[0]
+        return None
+    async def forward_request(self, gateway_id: str, endpoint: str,
+                            method: str = "POST", data: Dict = None) -> Dict:
+        """转发请求到GPU网关"""
+        if gateway_id not in self.gateways:
+            raise HTTPException(status_code=404, detail="GPU gateway not found")
+        gateway_url = self.gateways[gateway_id]["url"]
+        url = f"{gateway_url}/{endpoint.lstrip('/')}"
+        try:
+            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60)) as session:
+                if method.upper() == "GET":
+                    async with session.get(url) as response:
+                        result = await response.json()
+                else:
+                    async with session.post(url, json=data) as response:
+                        result = await response.json()
+                return result
+        except asyncio.TimeoutError:
+            raise HTTPException(status_code=504, detail="Gateway request timeout")
+        except Exception as e:
+            logger.error(f"❌ Gateway request failed: {e}")
+            raise HTTPException(status_code=502, detail=f"Gateway error: {str(e)}")
+    async def update_gateway_status(self, gateway_id: str, status_data: Dict):
+        """更新网关状态"""
+        if gateway_id in self.gateways:
+            gateway = self.gateways[gateway_id]
+            gateway["last_seen"] = datetime.now()
+            gateway["status"] = "online"
+            gateway["nodes"] = status_data.get("nodes", [])
+            gateway["metrics"] = status_data.get("metrics", {})
+# 全局GPU网关客户端
+gpu_gateway_client = GPUGatewayClient()
+@router.post("/register")
+async def register_gateway(request: Dict[str, Any]):
+    """注册GPU网关"""
+    try:
+        gateway_id = request.get("gateway_id")
+        gateway_url = request.get("gateway_url")
+        capabilities = request.get("capabilities", [])
+        if not gateway_id or not gateway_url:
+            raise HTTPException(status_code=400, detail="Missing gateway_id or gateway_url")
+        await gpu_gateway_client.register_gateway(
+            gateway_id=gateway_id,
+            gateway_url=gateway_url,
+            capabilities=capabilities
+        )
+        return {"success": True, "message": f"Gateway {gateway_id} registered"}
+    except Exception as e:
+        logger.error(f"❌ Gateway registration failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/unregister")
+async def unregister_gateway(request: Dict[str, Any]):
+    """注销GPU网关"""
+    try:
+        gateway_id = request.get("gateway_id")
+        if not gateway_id:
+            raise HTTPException(status_code=400, detail="Missing gateway_id")
+        await gpu_gateway_client.unregister_gateway(gateway_id)
+        return {"success": True, "message": f"Gateway {gateway_id} unregistered"}
+    except Exception as e:
+        logger.error(f"❌ Gateway unregistration failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/status")
+async def receive_gateway_status(request: Dict[str, Any]):
+    """接收网关状态报告"""
+    try:
+        gateway_id = request.get("gateway_id")
+        if not gateway_id:
+            raise HTTPException(status_code=400, detail="Missing gateway_id")
+        await gpu_gateway_client.update_gateway_status(gateway_id, request)
+        return {"success": True, "received": True}
+    except Exception as e:
+        logger.error(f"❌ Status update failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/gateways")
+async def list_gateways():
+    """列出所有GPU网关"""
+    gateways = []
+    for gateway_id, gateway_info in gpu_gateway_client.gateways.items():
+        gateways.append({
+            "gateway_id": gateway_id,
+            "url": gateway_info["url"],
+            "status": gateway_info["status"],
+            "last_seen": gateway_info["last_seen"].isoformat(),
+            "nodes": len(gateway_info.get("nodes", [])),
+            "capabilities": gateway_info.get("capabilities", [])
+        })
+    return {
+        "success": True,
+        "gateways": gateways,
+        "total": len(gateways)
+    }
+@router.post("/deploy")
+async def deploy_model_to_gateway(
+    request: Dict[str, Any],
+    current_tenant: Tenant = Depends(get_current_tenant)
+):
+    """通过网关部署模型"""
+    try:
+        model_id = request.get("model_id")
+        backend = request.get("backend", "transformers")
+        preferred_gateway = request.get("preferred_gateway")
+        if not model_id:
+            raise HTTPException(status_code=400, detail="Missing model_id")
+        # 选择网关
+        gateway_id = preferred_gateway or gpu_gateway_client.select_gateway()
+        if not gateway_id:
+            raise HTTPException(status_code=503, detail="No available GPU gateways")
+        # 构建部署请求
+        deploy_data = {
+            "tenant_id": current_tenant.id,
+            "model_id": model_id,
+            "service_name": f"{current_tenant.id}-{model_id.replace('/', '-')}",
+            "service_type": "llm",
+            "backend": backend,
+            **request  # 包含其他配置参数
+        }
+        # 转发到网关
+        result = await gpu_gateway_client.forward_request(
+            gateway_id=gateway_id,
+            endpoint="/deploy",
+            method="POST",
+            data=deploy_data
+        )
+        # 记录部署信息到数据库
+        # TODO: 保存部署记录
+        return {
+            "success": result.get("success", False),
+            "gateway_id": gateway_id,
+            "service_name": result.get("service_name"),
+            "error": result.get("error"),
+            "service_info": result.get("service_info")
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"❌ Model deployment failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/inference")
+async def inference_through_gateway(
+    request: Dict[str, Any],
+    current_tenant: Tenant = Depends(get_current_tenant)
+):
+    """通过网关进行推理"""
+    try:
+        model_id = request.get("model_id")
+        if not model_id:
+            raise HTTPException(status_code=400, detail="Missing model_id")
+        # 选择网关 (可以基于模型ID或其他策略)
+        gateway_id = gpu_gateway_client.select_gateway()
+        if not gateway_id:
+            raise HTTPException(status_code=503, detail="No available GPU gateways")
+        # 构建推理请求
+        inference_data = {
+            "tenant_id": current_tenant.id,
+            "model_id": model_id,
+            "request": {
+                key: value for key, value in request.items()
+                if key not in ["model_id"]
+            }
+        }
+        # 转发到网关
+        result = await gpu_gateway_client.forward_request(
+            gateway_id=gateway_id,
+            endpoint="/inference",
+            method="POST",
+            data=inference_data
+        )
+        return result
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"❌ Inference request failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/metrics")
+async def get_gpu_metrics():
+    """获取所有网关的指标"""
+    all_metrics = {}
+    for gateway_id, gateway_info in gpu_gateway_client.gateways.items():
+        if gateway_info["status"] == "online":
+            try:
+                metrics = await gpu_gateway_client.forward_request(
+                    gateway_id=gateway_id,
+                    endpoint="/metrics",
+                    method="GET"
+                )
+                all_metrics[gateway_id] = metrics
+            except Exception as e:
+                logger.error(f"❌ Failed to get metrics from {gateway_id}: {e}")
+                all_metrics[gateway_id] = {"error": str(e)}
+    return {
+        "success": True,
+        "metrics": all_metrics
+    }
+@router.post("/tenants/register")
+async def register_tenant_on_gateways(
+    request: Dict[str, Any],
+    current_tenant: Tenant = Depends(get_current_tenant)
+):
+    """在所有网关上注册租户"""
+    try:
+        tenant_config = {
+            "tenant_id": current_tenant.id,
+            "gpu_quota": request.get("gpu_quota", 1),
+            "memory_quota": request.get("memory_quota", 8192),
+            "priority": request.get("priority", 1),
+            "allowed_models": request.get("allowed_models", []),
+            "rate_limit": request.get("rate_limit", 100)
+        }
+        results = {}
+        # 在所有在线网关上注册租户
+        for gateway_id, gateway_info in gpu_gateway_client.gateways.items():
+            if gateway_info["status"] == "online":
+                try:
+                    result = await gpu_gateway_client.forward_request(
+                        gateway_id=gateway_id,
+                        endpoint="/tenants",
+                        method="POST",
+                        data=tenant_config
+                    )
+                    results[gateway_id] = result
+                except Exception as e:
+                    logger.error(f"❌ Failed to register tenant on {gateway_id}: {e}")
+                    results[gateway_id] = {"success": False, "error": str(e)}
+        return {
+            "success": True,
+            "tenant_id": current_tenant.id,
+            "gateway_results": results
+        }
+    except Exception as e:
+        logger.error(f"❌ Tenant registration failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+# 背景任务：监控网关健康状态
+async def monitor_gateways():
+    """监控网关健康状态"""
+    while True:
+        try:
+            current_time = datetime.now()
+            for gateway_id in list(gpu_gateway_client.gateways.keys()):
+                gateway = gpu_gateway_client.gateways[gateway_id]
+                # 检查网关是否超时
+                if current_time - gateway["last_seen"] > timedelta(minutes=5):
+                    logger.warning(f"⚠️ Gateway {gateway_id} is offline")
+                    gateway["status"] = "offline"
+                    if gateway_id in gpu_gateway_client.gateway_pool:
+                        gpu_gateway_client.gateway_pool.remove(gateway_id)
+                # 尝试ping网关
+                try:
+                    status = await gpu_gateway_client.forward_request(
+                        gateway_id=gateway_id,
+                        endpoint="/status",
+                        method="GET"
+                    )
+                    if status:
+                        gateway["status"] = "online"
+                        gateway["last_seen"] = current_time
+                        if gateway_id not in gpu_gateway_client.gateway_pool:
+                            gpu_gateway_client.gateway_pool.append(gateway_id)
+                except Exception as e:
+                    logger.debug(f"Gateway {gateway_id} ping failed: {e}")
+                    gateway["status"] = "offline"
+            await asyncio.sleep(30)  # 每30秒检查一次
+        except Exception as e:
+            logger.error(f"❌ Gateway monitoring error: {e}")
+            await asyncio.sleep(10)
+# 启动监控任务
+@router.on_event("startup")
+async def startup_event():
+    """启动背景监控任务"""
+    asyncio.create_task(monitor_gateways())
+# 导出客户端供其他模块使用
+__all__ = ["router", "gpu_gateway_client", "GPUGatewayClient"]

isa_model/serving/api/routes/health.py CHANGED Viewed

@@ -4,13 +4,19 @@ Health Check Routes
 System health and status endpoints
 """
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel
 import time
 import psutil
-import torch
 from typing import Dict, Any
+# Optional torch import - only available in local mode
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
 router = APIRouter()
 class HealthResponse(BaseModel):
@@ -20,22 +26,36 @@ class HealthResponse(BaseModel):
     uptime: float
     system: Dict[str, Any]
+@router.get("", response_model=HealthResponse)
 @router.get("/", response_model=HealthResponse)
-async def health_check():
+async def health_check(request: Request):
     """
     Basic health check endpoint
+    Responds to both /health and /health/
     """
+    # Check if startup failed
+    startup_failed = getattr(request.app.state, 'startup_failed', False)
+    startup_error = getattr(request.app.state, 'startup_error', None)
+    status = "degraded" if startup_failed else "healthy"
+    system_info = {
+        "cpu_percent": psutil.cpu_percent(),
+        "memory_percent": psutil.virtual_memory().percent,
+        "gpu_available": torch.cuda.is_available() if TORCH_AVAILABLE else False,
+        "gpu_count": torch.cuda.device_count() if (TORCH_AVAILABLE and torch.cuda.is_available()) else 0
+    }
+    if startup_failed:
+        system_info["startup_error"] = startup_error
+        system_info["warning"] = "Server started with initialization errors"
     return HealthResponse(
-        status="healthy",
+        status=status,
         timestamp=time.time(),
         version="1.0.0",
         uptime=time.time(),  # Simplified uptime
-        system={
-            "cpu_percent": psutil.cpu_percent(),
-            "memory_percent": psutil.virtual_memory().percent,
-            "gpu_available": torch.cuda.is_available(),
-            "gpu_count": torch.cuda.device_count() if torch.cuda.is_available() else 0
-        }
+        system=system_info
     )
 @router.get("/detailed")
@@ -44,7 +64,7 @@ async def detailed_health():
     Detailed health check with system information
     """
     gpu_info = []
-    if torch.cuda.is_available():
+    if TORCH_AVAILABLE and torch.cuda.is_available():
         for i in range(torch.cuda.device_count()):
             gpu_info.append({
                 "device": i,
@@ -67,7 +87,7 @@ async def detailed_health():
                 "total": psutil.virtual_memory().total
             },
             "gpu": {
-                "available": torch.cuda.is_available(),
+                "available": torch.cuda.is_available() if TORCH_AVAILABLE else False,
                 "devices": gpu_info
             }
         }

isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

isa-model 0.3.91py3-none-any.whl → 0.4.3py3-none-any.whl