isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,440 @@
1
+ """
2
+ GPU Gateway API Routes
3
+ 云端Rails API与本地GPU网关的集成接口
4
+ """
5
+
6
+ from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
7
+ from fastapi.responses import JSONResponse
8
+ from typing import List, Dict, Any, Optional
9
+ import logging
10
+ import asyncio
11
+ import aiohttp
12
+ from datetime import datetime, timedelta
13
+
14
+ from ....core.config import get_settings
15
+ from ....deployment.local.config import LocalGPUConfig, LocalServiceType, LocalBackend
16
+ from ....auth.middleware import get_current_tenant
17
+ from ....database.models import Tenant
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ router = APIRouter(prefix="/api/gpu-gateway", tags=["GPU Gateway"])
22
+
23
+
24
+ class GPUGatewayClient:
25
+ """GPU网关客户端 - 云端与本地GPU网关通信"""
26
+
27
+ def __init__(self):
28
+ self.settings = get_settings()
29
+ self.gateways: Dict[str, Dict] = {} # gateway_id -> {url, status, last_seen}
30
+ self.gateway_pool = [] # 可用网关列表
31
+
32
+ async def register_gateway(self, gateway_id: str, gateway_url: str,
33
+ capabilities: List[str] = None):
34
+ """注册GPU网关"""
35
+ self.gateways[gateway_id] = {
36
+ "url": gateway_url,
37
+ "status": "online",
38
+ "last_seen": datetime.now(),
39
+ "capabilities": capabilities or [],
40
+ "nodes": [],
41
+ "metrics": {}
42
+ }
43
+
44
+ if gateway_id not in self.gateway_pool:
45
+ self.gateway_pool.append(gateway_id)
46
+
47
+ logger.info(f"✅ Registered GPU gateway: {gateway_id}")
48
+
49
+ async def unregister_gateway(self, gateway_id: str):
50
+ """注销GPU网关"""
51
+ if gateway_id in self.gateways:
52
+ del self.gateways[gateway_id]
53
+
54
+ if gateway_id in self.gateway_pool:
55
+ self.gateway_pool.remove(gateway_id)
56
+
57
+ logger.info(f"❌ Unregistered GPU gateway: {gateway_id}")
58
+
59
+ def select_gateway(self, requirements: Dict = None) -> Optional[str]:
60
+ """选择最佳GPU网关"""
61
+ if not self.gateway_pool:
62
+ return None
63
+
64
+ # 简单轮询选择 (可以改进为基于负载的选择)
65
+ available_gateways = []
66
+
67
+ for gateway_id in self.gateway_pool:
68
+ gateway = self.gateways.get(gateway_id)
69
+ if gateway and gateway["status"] == "online":
70
+ # 检查是否在5分钟内有心跳
71
+ if datetime.now() - gateway["last_seen"] < timedelta(minutes=5):
72
+ available_gateways.append(gateway_id)
73
+
74
+ if available_gateways:
75
+ # 选择负载最低的网关
76
+ best_gateway = None
77
+ min_load = float('inf')
78
+
79
+ for gateway_id in available_gateways:
80
+ gateway = self.gateways[gateway_id]
81
+ nodes = gateway.get("nodes", [])
82
+
83
+ if nodes:
84
+ # 计算平均负载
85
+ total_load = sum(node.get("current_load", 0) for node in nodes)
86
+ avg_load = total_load / len(nodes)
87
+
88
+ if avg_load < min_load:
89
+ min_load = avg_load
90
+ best_gateway = gateway_id
91
+ else:
92
+ # 没有节点信息,选择第一个
93
+ best_gateway = gateway_id
94
+ break
95
+
96
+ return best_gateway or available_gateways[0]
97
+
98
+ return None
99
+
100
+ async def forward_request(self, gateway_id: str, endpoint: str,
101
+ method: str = "POST", data: Dict = None) -> Dict:
102
+ """转发请求到GPU网关"""
103
+ if gateway_id not in self.gateways:
104
+ raise HTTPException(status_code=404, detail="GPU gateway not found")
105
+
106
+ gateway_url = self.gateways[gateway_id]["url"]
107
+ url = f"{gateway_url}/{endpoint.lstrip('/')}"
108
+
109
+ try:
110
+ async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60)) as session:
111
+ if method.upper() == "GET":
112
+ async with session.get(url) as response:
113
+ result = await response.json()
114
+ else:
115
+ async with session.post(url, json=data) as response:
116
+ result = await response.json()
117
+
118
+ return result
119
+
120
+ except asyncio.TimeoutError:
121
+ raise HTTPException(status_code=504, detail="Gateway request timeout")
122
+ except Exception as e:
123
+ logger.error(f"❌ Gateway request failed: {e}")
124
+ raise HTTPException(status_code=502, detail=f"Gateway error: {str(e)}")
125
+
126
+ async def update_gateway_status(self, gateway_id: str, status_data: Dict):
127
+ """更新网关状态"""
128
+ if gateway_id in self.gateways:
129
+ gateway = self.gateways[gateway_id]
130
+ gateway["last_seen"] = datetime.now()
131
+ gateway["status"] = "online"
132
+ gateway["nodes"] = status_data.get("nodes", [])
133
+ gateway["metrics"] = status_data.get("metrics", {})
134
+
135
+
136
+ # 全局GPU网关客户端
137
+ gpu_gateway_client = GPUGatewayClient()
138
+
139
+
140
+ @router.post("/register")
141
+ async def register_gateway(request: Dict[str, Any]):
142
+ """注册GPU网关"""
143
+ try:
144
+ gateway_id = request.get("gateway_id")
145
+ gateway_url = request.get("gateway_url")
146
+ capabilities = request.get("capabilities", [])
147
+
148
+ if not gateway_id or not gateway_url:
149
+ raise HTTPException(status_code=400, detail="Missing gateway_id or gateway_url")
150
+
151
+ await gpu_gateway_client.register_gateway(
152
+ gateway_id=gateway_id,
153
+ gateway_url=gateway_url,
154
+ capabilities=capabilities
155
+ )
156
+
157
+ return {"success": True, "message": f"Gateway {gateway_id} registered"}
158
+
159
+ except Exception as e:
160
+ logger.error(f"❌ Gateway registration failed: {e}")
161
+ raise HTTPException(status_code=500, detail=str(e))
162
+
163
+
164
+ @router.post("/unregister")
165
+ async def unregister_gateway(request: Dict[str, Any]):
166
+ """注销GPU网关"""
167
+ try:
168
+ gateway_id = request.get("gateway_id")
169
+
170
+ if not gateway_id:
171
+ raise HTTPException(status_code=400, detail="Missing gateway_id")
172
+
173
+ await gpu_gateway_client.unregister_gateway(gateway_id)
174
+
175
+ return {"success": True, "message": f"Gateway {gateway_id} unregistered"}
176
+
177
+ except Exception as e:
178
+ logger.error(f"❌ Gateway unregistration failed: {e}")
179
+ raise HTTPException(status_code=500, detail=str(e))
180
+
181
+
182
+ @router.post("/status")
183
+ async def receive_gateway_status(request: Dict[str, Any]):
184
+ """接收网关状态报告"""
185
+ try:
186
+ gateway_id = request.get("gateway_id")
187
+
188
+ if not gateway_id:
189
+ raise HTTPException(status_code=400, detail="Missing gateway_id")
190
+
191
+ await gpu_gateway_client.update_gateway_status(gateway_id, request)
192
+
193
+ return {"success": True, "received": True}
194
+
195
+ except Exception as e:
196
+ logger.error(f"❌ Status update failed: {e}")
197
+ raise HTTPException(status_code=500, detail=str(e))
198
+
199
+
200
+ @router.get("/gateways")
201
+ async def list_gateways():
202
+ """列出所有GPU网关"""
203
+ gateways = []
204
+
205
+ for gateway_id, gateway_info in gpu_gateway_client.gateways.items():
206
+ gateways.append({
207
+ "gateway_id": gateway_id,
208
+ "url": gateway_info["url"],
209
+ "status": gateway_info["status"],
210
+ "last_seen": gateway_info["last_seen"].isoformat(),
211
+ "nodes": len(gateway_info.get("nodes", [])),
212
+ "capabilities": gateway_info.get("capabilities", [])
213
+ })
214
+
215
+ return {
216
+ "success": True,
217
+ "gateways": gateways,
218
+ "total": len(gateways)
219
+ }
220
+
221
+
222
+ @router.post("/deploy")
223
+ async def deploy_model_to_gateway(
224
+ request: Dict[str, Any],
225
+ current_tenant: Tenant = Depends(get_current_tenant)
226
+ ):
227
+ """通过网关部署模型"""
228
+ try:
229
+ model_id = request.get("model_id")
230
+ backend = request.get("backend", "transformers")
231
+ preferred_gateway = request.get("preferred_gateway")
232
+
233
+ if not model_id:
234
+ raise HTTPException(status_code=400, detail="Missing model_id")
235
+
236
+ # 选择网关
237
+ gateway_id = preferred_gateway or gpu_gateway_client.select_gateway()
238
+ if not gateway_id:
239
+ raise HTTPException(status_code=503, detail="No available GPU gateways")
240
+
241
+ # 构建部署请求
242
+ deploy_data = {
243
+ "tenant_id": current_tenant.id,
244
+ "model_id": model_id,
245
+ "service_name": f"{current_tenant.id}-{model_id.replace('/', '-')}",
246
+ "service_type": "llm",
247
+ "backend": backend,
248
+ **request # 包含其他配置参数
249
+ }
250
+
251
+ # 转发到网关
252
+ result = await gpu_gateway_client.forward_request(
253
+ gateway_id=gateway_id,
254
+ endpoint="/deploy",
255
+ method="POST",
256
+ data=deploy_data
257
+ )
258
+
259
+ # 记录部署信息到数据库
260
+ # TODO: 保存部署记录
261
+
262
+ return {
263
+ "success": result.get("success", False),
264
+ "gateway_id": gateway_id,
265
+ "service_name": result.get("service_name"),
266
+ "error": result.get("error"),
267
+ "service_info": result.get("service_info")
268
+ }
269
+
270
+ except HTTPException:
271
+ raise
272
+ except Exception as e:
273
+ logger.error(f"❌ Model deployment failed: {e}")
274
+ raise HTTPException(status_code=500, detail=str(e))
275
+
276
+
277
+ @router.post("/inference")
278
+ async def inference_through_gateway(
279
+ request: Dict[str, Any],
280
+ current_tenant: Tenant = Depends(get_current_tenant)
281
+ ):
282
+ """通过网关进行推理"""
283
+ try:
284
+ model_id = request.get("model_id")
285
+ if not model_id:
286
+ raise HTTPException(status_code=400, detail="Missing model_id")
287
+
288
+ # 选择网关 (可以基于模型ID或其他策略)
289
+ gateway_id = gpu_gateway_client.select_gateway()
290
+ if not gateway_id:
291
+ raise HTTPException(status_code=503, detail="No available GPU gateways")
292
+
293
+ # 构建推理请求
294
+ inference_data = {
295
+ "tenant_id": current_tenant.id,
296
+ "model_id": model_id,
297
+ "request": {
298
+ key: value for key, value in request.items()
299
+ if key not in ["model_id"]
300
+ }
301
+ }
302
+
303
+ # 转发到网关
304
+ result = await gpu_gateway_client.forward_request(
305
+ gateway_id=gateway_id,
306
+ endpoint="/inference",
307
+ method="POST",
308
+ data=inference_data
309
+ )
310
+
311
+ return result
312
+
313
+ except HTTPException:
314
+ raise
315
+ except Exception as e:
316
+ logger.error(f"❌ Inference request failed: {e}")
317
+ raise HTTPException(status_code=500, detail=str(e))
318
+
319
+
320
+ @router.get("/metrics")
321
+ async def get_gpu_metrics():
322
+ """获取所有网关的指标"""
323
+ all_metrics = {}
324
+
325
+ for gateway_id, gateway_info in gpu_gateway_client.gateways.items():
326
+ if gateway_info["status"] == "online":
327
+ try:
328
+ metrics = await gpu_gateway_client.forward_request(
329
+ gateway_id=gateway_id,
330
+ endpoint="/metrics",
331
+ method="GET"
332
+ )
333
+ all_metrics[gateway_id] = metrics
334
+ except Exception as e:
335
+ logger.error(f"❌ Failed to get metrics from {gateway_id}: {e}")
336
+ all_metrics[gateway_id] = {"error": str(e)}
337
+
338
+ return {
339
+ "success": True,
340
+ "metrics": all_metrics
341
+ }
342
+
343
+
344
+ @router.post("/tenants/register")
345
+ async def register_tenant_on_gateways(
346
+ request: Dict[str, Any],
347
+ current_tenant: Tenant = Depends(get_current_tenant)
348
+ ):
349
+ """在所有网关上注册租户"""
350
+ try:
351
+ tenant_config = {
352
+ "tenant_id": current_tenant.id,
353
+ "gpu_quota": request.get("gpu_quota", 1),
354
+ "memory_quota": request.get("memory_quota", 8192),
355
+ "priority": request.get("priority", 1),
356
+ "allowed_models": request.get("allowed_models", []),
357
+ "rate_limit": request.get("rate_limit", 100)
358
+ }
359
+
360
+ results = {}
361
+
362
+ # 在所有在线网关上注册租户
363
+ for gateway_id, gateway_info in gpu_gateway_client.gateways.items():
364
+ if gateway_info["status"] == "online":
365
+ try:
366
+ result = await gpu_gateway_client.forward_request(
367
+ gateway_id=gateway_id,
368
+ endpoint="/tenants",
369
+ method="POST",
370
+ data=tenant_config
371
+ )
372
+ results[gateway_id] = result
373
+ except Exception as e:
374
+ logger.error(f"❌ Failed to register tenant on {gateway_id}: {e}")
375
+ results[gateway_id] = {"success": False, "error": str(e)}
376
+
377
+ return {
378
+ "success": True,
379
+ "tenant_id": current_tenant.id,
380
+ "gateway_results": results
381
+ }
382
+
383
+ except Exception as e:
384
+ logger.error(f"❌ Tenant registration failed: {e}")
385
+ raise HTTPException(status_code=500, detail=str(e))
386
+
387
+
388
+ # 背景任务:监控网关健康状态
389
+ async def monitor_gateways():
390
+ """监控网关健康状态"""
391
+ while True:
392
+ try:
393
+ current_time = datetime.now()
394
+
395
+ for gateway_id in list(gpu_gateway_client.gateways.keys()):
396
+ gateway = gpu_gateway_client.gateways[gateway_id]
397
+
398
+ # 检查网关是否超时
399
+ if current_time - gateway["last_seen"] > timedelta(minutes=5):
400
+ logger.warning(f"⚠️ Gateway {gateway_id} is offline")
401
+ gateway["status"] = "offline"
402
+
403
+ if gateway_id in gpu_gateway_client.gateway_pool:
404
+ gpu_gateway_client.gateway_pool.remove(gateway_id)
405
+
406
+ # 尝试ping网关
407
+ try:
408
+ status = await gpu_gateway_client.forward_request(
409
+ gateway_id=gateway_id,
410
+ endpoint="/status",
411
+ method="GET"
412
+ )
413
+
414
+ if status:
415
+ gateway["status"] = "online"
416
+ gateway["last_seen"] = current_time
417
+
418
+ if gateway_id not in gpu_gateway_client.gateway_pool:
419
+ gpu_gateway_client.gateway_pool.append(gateway_id)
420
+
421
+ except Exception as e:
422
+ logger.debug(f"Gateway {gateway_id} ping failed: {e}")
423
+ gateway["status"] = "offline"
424
+
425
+ await asyncio.sleep(30) # 每30秒检查一次
426
+
427
+ except Exception as e:
428
+ logger.error(f"❌ Gateway monitoring error: {e}")
429
+ await asyncio.sleep(10)
430
+
431
+
432
+ # 启动监控任务
433
+ @router.on_event("startup")
434
+ async def startup_event():
435
+ """启动背景监控任务"""
436
+ asyncio.create_task(monitor_gateways())
437
+
438
+
439
+ # 导出客户端供其他模块使用
440
+ __all__ = ["router", "gpu_gateway_client", "GPUGatewayClient"]
@@ -4,13 +4,19 @@ Health Check Routes
4
4
  System health and status endpoints
5
5
  """
6
6
 
7
- from fastapi import APIRouter, HTTPException
7
+ from fastapi import APIRouter, HTTPException, Request
8
8
  from pydantic import BaseModel
9
9
  import time
10
10
  import psutil
11
- import torch
12
11
  from typing import Dict, Any
13
12
 
13
+ # Optional torch import - only available in local mode
14
+ try:
15
+ import torch
16
+ TORCH_AVAILABLE = True
17
+ except ImportError:
18
+ TORCH_AVAILABLE = False
19
+
14
20
  router = APIRouter()
15
21
 
16
22
  class HealthResponse(BaseModel):
@@ -20,22 +26,36 @@ class HealthResponse(BaseModel):
20
26
  uptime: float
21
27
  system: Dict[str, Any]
22
28
 
29
+ @router.get("", response_model=HealthResponse)
23
30
  @router.get("/", response_model=HealthResponse)
24
- async def health_check():
31
+ async def health_check(request: Request):
25
32
  """
26
33
  Basic health check endpoint
34
+ Responds to both /health and /health/
27
35
  """
36
+ # Check if startup failed
37
+ startup_failed = getattr(request.app.state, 'startup_failed', False)
38
+ startup_error = getattr(request.app.state, 'startup_error', None)
39
+
40
+ status = "degraded" if startup_failed else "healthy"
41
+
42
+ system_info = {
43
+ "cpu_percent": psutil.cpu_percent(),
44
+ "memory_percent": psutil.virtual_memory().percent,
45
+ "gpu_available": torch.cuda.is_available() if TORCH_AVAILABLE else False,
46
+ "gpu_count": torch.cuda.device_count() if (TORCH_AVAILABLE and torch.cuda.is_available()) else 0
47
+ }
48
+
49
+ if startup_failed:
50
+ system_info["startup_error"] = startup_error
51
+ system_info["warning"] = "Server started with initialization errors"
52
+
28
53
  return HealthResponse(
29
- status="healthy",
54
+ status=status,
30
55
  timestamp=time.time(),
31
56
  version="1.0.0",
32
57
  uptime=time.time(), # Simplified uptime
33
- system={
34
- "cpu_percent": psutil.cpu_percent(),
35
- "memory_percent": psutil.virtual_memory().percent,
36
- "gpu_available": torch.cuda.is_available(),
37
- "gpu_count": torch.cuda.device_count() if torch.cuda.is_available() else 0
38
- }
58
+ system=system_info
39
59
  )
40
60
 
41
61
  @router.get("/detailed")
@@ -44,7 +64,7 @@ async def detailed_health():
44
64
  Detailed health check with system information
45
65
  """
46
66
  gpu_info = []
47
- if torch.cuda.is_available():
67
+ if TORCH_AVAILABLE and torch.cuda.is_available():
48
68
  for i in range(torch.cuda.device_count()):
49
69
  gpu_info.append({
50
70
  "device": i,
@@ -67,7 +87,7 @@ async def detailed_health():
67
87
  "total": psutil.virtual_memory().total
68
88
  },
69
89
  "gpu": {
70
- "available": torch.cuda.is_available(),
90
+ "available": torch.cuda.is_available() if TORCH_AVAILABLE else False,
71
91
  "devices": gpu_info
72
92
  }
73
93
  }