isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/client.py +732 -565
- isa_model/core/cache/redis_cache.py +401 -0
- isa_model/core/config/config_manager.py +53 -10
- isa_model/core/config.py +1 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/migrations.py +277 -0
- isa_model/core/database/supabase_client.py +123 -0
- isa_model/core/models/__init__.py +37 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +36 -18
- isa_model/core/models/model_repo.py +44 -38
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +101 -370
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +7 -0
- isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
- isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
- isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/core/deployment_manager.py +6 -4
- isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
- isa_model/eval/benchmarks/__init__.py +27 -0
- isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
- isa_model/eval/benchmarks.py +244 -12
- isa_model/eval/evaluators/__init__.py +8 -2
- isa_model/eval/evaluators/audio_evaluator.py +727 -0
- isa_model/eval/evaluators/embedding_evaluator.py +742 -0
- isa_model/eval/evaluators/vision_evaluator.py +564 -0
- isa_model/eval/example_evaluation.py +395 -0
- isa_model/eval/factory.py +272 -5
- isa_model/eval/isa_benchmarks.py +700 -0
- isa_model/eval/isa_integration.py +582 -0
- isa_model/eval/metrics.py +159 -6
- isa_model/eval/tests/unit/test_basic.py +396 -0
- isa_model/inference/ai_factory.py +44 -8
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +32 -6
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/base_llm_service.py +30 -6
- isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
- isa_model/inference/services/llm/ollama_llm_service.py +2 -1
- isa_model/inference/services/llm/openai_llm_service.py +652 -55
- isa_model/inference/services/llm/yyds_llm_service.py +2 -1
- isa_model/inference/services/vision/__init__.py +5 -5
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/helpers/image_utils.py +11 -5
- isa_model/inference/services/vision/isa_vision_service.py +573 -0
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/serving/api/fastapi_server.py +88 -16
- isa_model/serving/api/middleware/auth.py +311 -0
- isa_model/serving/api/middleware/security.py +278 -0
- isa_model/serving/api/routes/analytics.py +486 -0
- isa_model/serving/api/routes/deployments.py +339 -0
- isa_model/serving/api/routes/evaluations.py +579 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/unified.py +324 -165
- isa_model/serving/api/startup.py +304 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/training/__init__.py +100 -6
- isa_model/training/core/__init__.py +4 -1
- isa_model/training/examples/intelligent_training_example.py +281 -0
- isa_model/training/intelligent/__init__.py +25 -0
- isa_model/training/intelligent/decision_engine.py +643 -0
- isa_model/training/intelligent/intelligent_factory.py +888 -0
- isa_model/training/intelligent/knowledge_base.py +751 -0
- isa_model/training/intelligent/resource_optimizer.py +839 -0
- isa_model/training/intelligent/task_classifier.py +576 -0
- isa_model/training/storage/__init__.py +24 -0
- isa_model/training/storage/core_integration.py +439 -0
- isa_model/training/storage/training_repository.py +552 -0
- isa_model/training/storage/training_storage.py +628 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
- isa_model-0.4.0.dist-info/RECORD +182 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model-0.3.9.dist-info/RECORD +0 -138
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,339 @@
|
|
1
|
+
"""
|
2
|
+
Deployments API Routes
|
3
|
+
|
4
|
+
Handles automated HuggingFace model deployment to Modal
|
5
|
+
"""
|
6
|
+
|
7
|
+
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
8
|
+
from pydantic import BaseModel
|
9
|
+
from typing import Optional, List, Dict, Any
|
10
|
+
import logging
|
11
|
+
import asyncio
|
12
|
+
import json
|
13
|
+
from pathlib import Path
|
14
|
+
|
15
|
+
from isa_model.deployment.services.auto_hf_modal_deployer import HuggingFaceModalDeployer
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
router = APIRouter()
|
20
|
+
|
21
|
+
# Request/Response models
|
22
|
+
class AnalyzeModelRequest(BaseModel):
|
23
|
+
model_id: str
|
24
|
+
|
25
|
+
class DeployModelRequest(BaseModel):
|
26
|
+
model_id: str
|
27
|
+
service_name: Optional[str] = None
|
28
|
+
auto_deploy: bool = False
|
29
|
+
|
30
|
+
class DeploymentResponse(BaseModel):
|
31
|
+
success: bool
|
32
|
+
deployment_id: Optional[str] = None
|
33
|
+
model_id: str
|
34
|
+
config: Optional[Dict[str, Any]] = None
|
35
|
+
service_file: Optional[str] = None
|
36
|
+
deployment_command: Optional[str] = None
|
37
|
+
estimated_cost_per_hour: Optional[float] = None
|
38
|
+
deployed: bool = False
|
39
|
+
error: Optional[str] = None
|
40
|
+
|
41
|
+
# Global deployer instance
|
42
|
+
deployer = HuggingFaceModalDeployer()
|
43
|
+
|
44
|
+
# In-memory deployment tracking (in production, use a database)
|
45
|
+
deployments = {}
|
46
|
+
|
47
|
+
@router.post("/analyze", response_model=Dict[str, Any])
|
48
|
+
async def analyze_model(request: AnalyzeModelRequest):
|
49
|
+
"""
|
50
|
+
Analyze a HuggingFace model for deployment compatibility
|
51
|
+
"""
|
52
|
+
try:
|
53
|
+
logger.info(f"Analyzing model: {request.model_id}")
|
54
|
+
|
55
|
+
# Analyze the model
|
56
|
+
config = deployer.analyze_model(request.model_id)
|
57
|
+
|
58
|
+
return {
|
59
|
+
"success": True,
|
60
|
+
"model_id": config.model_id,
|
61
|
+
"model_type": config.model_type,
|
62
|
+
"architecture": config.architecture,
|
63
|
+
"parameters": config.parameters,
|
64
|
+
"gpu_requirements": config.gpu_requirements,
|
65
|
+
"memory_gb": config.memory_gb,
|
66
|
+
"container_memory_mb": config.container_memory_mb,
|
67
|
+
"dependencies": config.dependencies,
|
68
|
+
"capabilities": config.capabilities,
|
69
|
+
"estimated_cost_per_hour": config.estimated_cost_per_hour
|
70
|
+
}
|
71
|
+
|
72
|
+
except Exception as e:
|
73
|
+
logger.error(f"Model analysis failed for {request.model_id}: {e}")
|
74
|
+
raise HTTPException(status_code=400, detail=f"Model analysis failed: {str(e)}")
|
75
|
+
|
76
|
+
@router.post("/deploy", response_model=DeploymentResponse)
|
77
|
+
async def deploy_model(request: DeployModelRequest, background_tasks: BackgroundTasks):
|
78
|
+
"""
|
79
|
+
Deploy a HuggingFace model to Modal
|
80
|
+
"""
|
81
|
+
try:
|
82
|
+
logger.info(f"Starting deployment for model: {request.model_id}")
|
83
|
+
|
84
|
+
# Generate unique deployment ID
|
85
|
+
import time
|
86
|
+
import uuid
|
87
|
+
deployment_id = f"deploy_{uuid.uuid4().hex[:8]}_{int(time.time())}"
|
88
|
+
|
89
|
+
# Add to deployments tracking
|
90
|
+
deployments[deployment_id] = {
|
91
|
+
"id": deployment_id,
|
92
|
+
"model_id": request.model_id,
|
93
|
+
"service_name": request.service_name,
|
94
|
+
"status": "pending",
|
95
|
+
"created_at": time.time(),
|
96
|
+
"auto_deploy": request.auto_deploy
|
97
|
+
}
|
98
|
+
|
99
|
+
# Start deployment in background
|
100
|
+
background_tasks.add_task(
|
101
|
+
perform_deployment,
|
102
|
+
deployment_id,
|
103
|
+
request.model_id,
|
104
|
+
request.service_name,
|
105
|
+
request.auto_deploy
|
106
|
+
)
|
107
|
+
|
108
|
+
return DeploymentResponse(
|
109
|
+
success=True,
|
110
|
+
deployment_id=deployment_id,
|
111
|
+
model_id=request.model_id,
|
112
|
+
deployed=False
|
113
|
+
)
|
114
|
+
|
115
|
+
except Exception as e:
|
116
|
+
logger.error(f"Deployment initiation failed for {request.model_id}: {e}")
|
117
|
+
raise HTTPException(status_code=500, detail=f"Deployment failed: {str(e)}")
|
118
|
+
|
119
|
+
async def perform_deployment(deployment_id: str, model_id: str, service_name: Optional[str], auto_deploy: bool):
|
120
|
+
"""
|
121
|
+
Perform the actual deployment in the background
|
122
|
+
"""
|
123
|
+
import time
|
124
|
+
|
125
|
+
try:
|
126
|
+
logger.info(f"Performing deployment {deployment_id} for model {model_id}")
|
127
|
+
|
128
|
+
# Update status
|
129
|
+
deployments[deployment_id]["status"] = "deploying"
|
130
|
+
deployments[deployment_id]["progress"] = "Analyzing model"
|
131
|
+
|
132
|
+
# Deploy the model
|
133
|
+
result = deployer.deploy_model(model_id, deploy=auto_deploy)
|
134
|
+
|
135
|
+
if result["success"]:
|
136
|
+
deployments[deployment_id].update({
|
137
|
+
"status": "completed" if result.get("deployed") else "generated",
|
138
|
+
"progress": "Deployment completed",
|
139
|
+
"config": result["config"],
|
140
|
+
"service_file": result["service_file"],
|
141
|
+
"deployment_command": result["deployment_command"],
|
142
|
+
"estimated_cost_per_hour": result["estimated_cost_per_hour"],
|
143
|
+
"deployed": result.get("deployed", False),
|
144
|
+
"completed_at": time.time()
|
145
|
+
})
|
146
|
+
else:
|
147
|
+
deployments[deployment_id].update({
|
148
|
+
"status": "failed",
|
149
|
+
"progress": "Deployment failed",
|
150
|
+
"error": result.get("error", "Unknown error"),
|
151
|
+
"failed_at": time.time()
|
152
|
+
})
|
153
|
+
|
154
|
+
except Exception as e:
|
155
|
+
logger.error(f"Deployment {deployment_id} failed: {e}")
|
156
|
+
deployments[deployment_id].update({
|
157
|
+
"status": "failed",
|
158
|
+
"progress": "Deployment failed",
|
159
|
+
"error": str(e),
|
160
|
+
"failed_at": time.time()
|
161
|
+
})
|
162
|
+
|
163
|
+
@router.get("/")
|
164
|
+
async def list_deployments():
|
165
|
+
"""
|
166
|
+
List all deployments
|
167
|
+
"""
|
168
|
+
try:
|
169
|
+
# Convert deployments to list format
|
170
|
+
deployment_list = []
|
171
|
+
|
172
|
+
for deployment_id, deployment in deployments.items():
|
173
|
+
deployment_list.append({
|
174
|
+
"id": deployment_id,
|
175
|
+
"name": deployment.get("service_name") or f"{deployment['model_id'].split('/')[-1]} Service",
|
176
|
+
"model_id": deployment["model_id"],
|
177
|
+
"model_type": "text", # Would be determined from analysis
|
178
|
+
"status": deployment["status"],
|
179
|
+
"gpu": "A10G", # Would be from config
|
180
|
+
"cost_per_hour": "1.20", # Would be from config
|
181
|
+
"created_at": deployment["created_at"],
|
182
|
+
"deployed_at": deployment.get("completed_at"),
|
183
|
+
"error": deployment.get("error")
|
184
|
+
})
|
185
|
+
|
186
|
+
# Add some fallback deployments for demo
|
187
|
+
if not deployment_list:
|
188
|
+
deployment_list = [
|
189
|
+
{
|
190
|
+
"id": "qwen2-vl-7b",
|
191
|
+
"name": "Qwen2.5-VL Service",
|
192
|
+
"model_id": "Qwen/Qwen2.5-VL-7B-Instruct",
|
193
|
+
"model_type": "vision",
|
194
|
+
"status": "active",
|
195
|
+
"gpu": "A100",
|
196
|
+
"cost_per_hour": "4.00",
|
197
|
+
"created_at": 1705312200,
|
198
|
+
"deployed_at": 1705312800
|
199
|
+
},
|
200
|
+
{
|
201
|
+
"id": "embed-service",
|
202
|
+
"name": "BGE Embed Service",
|
203
|
+
"model_id": "BAAI/bge-base-en-v1.5",
|
204
|
+
"model_type": "embedding",
|
205
|
+
"status": "active",
|
206
|
+
"gpu": "A10G",
|
207
|
+
"cost_per_hour": "1.20",
|
208
|
+
"created_at": 1705225800,
|
209
|
+
"deployed_at": 1705226400
|
210
|
+
}
|
211
|
+
]
|
212
|
+
|
213
|
+
return deployment_list
|
214
|
+
|
215
|
+
except Exception as e:
|
216
|
+
logger.error(f"Failed to list deployments: {e}")
|
217
|
+
raise HTTPException(status_code=500, detail=f"Failed to list deployments: {str(e)}")
|
218
|
+
|
219
|
+
@router.get("/{deployment_id}")
|
220
|
+
async def get_deployment(deployment_id: str):
|
221
|
+
"""
|
222
|
+
Get deployment details
|
223
|
+
"""
|
224
|
+
try:
|
225
|
+
if deployment_id not in deployments:
|
226
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
227
|
+
|
228
|
+
return deployments[deployment_id]
|
229
|
+
|
230
|
+
except HTTPException:
|
231
|
+
raise
|
232
|
+
except Exception as e:
|
233
|
+
logger.error(f"Failed to get deployment {deployment_id}: {e}")
|
234
|
+
raise HTTPException(status_code=500, detail=f"Failed to get deployment: {str(e)}")
|
235
|
+
|
236
|
+
@router.delete("/{deployment_id}")
|
237
|
+
async def cancel_deployment(deployment_id: str):
|
238
|
+
"""
|
239
|
+
Cancel a pending deployment
|
240
|
+
"""
|
241
|
+
try:
|
242
|
+
if deployment_id not in deployments:
|
243
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
244
|
+
|
245
|
+
deployment = deployments[deployment_id]
|
246
|
+
|
247
|
+
if deployment["status"] == "pending":
|
248
|
+
deployment["status"] = "cancelled"
|
249
|
+
deployment["cancelled_at"] = time.time()
|
250
|
+
return {"success": True, "message": "Deployment cancelled"}
|
251
|
+
else:
|
252
|
+
raise HTTPException(status_code=400, detail="Cannot cancel deployment in current status")
|
253
|
+
|
254
|
+
except HTTPException:
|
255
|
+
raise
|
256
|
+
except Exception as e:
|
257
|
+
logger.error(f"Failed to cancel deployment {deployment_id}: {e}")
|
258
|
+
raise HTTPException(status_code=500, detail=f"Failed to cancel deployment: {str(e)}")
|
259
|
+
|
260
|
+
@router.post("/{deployment_id}/retry")
|
261
|
+
async def retry_deployment(deployment_id: str, background_tasks: BackgroundTasks):
|
262
|
+
"""
|
263
|
+
Retry a failed deployment
|
264
|
+
"""
|
265
|
+
try:
|
266
|
+
if deployment_id not in deployments:
|
267
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
268
|
+
|
269
|
+
deployment = deployments[deployment_id]
|
270
|
+
|
271
|
+
if deployment["status"] == "failed":
|
272
|
+
# Reset deployment status
|
273
|
+
deployment["status"] = "pending"
|
274
|
+
deployment["error"] = None
|
275
|
+
deployment["progress"] = "Retrying deployment"
|
276
|
+
|
277
|
+
# Start deployment in background
|
278
|
+
background_tasks.add_task(
|
279
|
+
perform_deployment,
|
280
|
+
deployment_id,
|
281
|
+
deployment["model_id"],
|
282
|
+
deployment.get("service_name"),
|
283
|
+
deployment.get("auto_deploy", False)
|
284
|
+
)
|
285
|
+
|
286
|
+
return {"success": True, "message": "Deployment retry started"}
|
287
|
+
else:
|
288
|
+
raise HTTPException(status_code=400, detail="Cannot retry deployment in current status")
|
289
|
+
|
290
|
+
except HTTPException:
|
291
|
+
raise
|
292
|
+
except Exception as e:
|
293
|
+
logger.error(f"Failed to retry deployment {deployment_id}: {e}")
|
294
|
+
raise HTTPException(status_code=500, detail=f"Failed to retry deployment: {str(e)}")
|
295
|
+
|
296
|
+
@router.get("/{deployment_id}/code")
|
297
|
+
async def get_service_code(deployment_id: str):
|
298
|
+
"""
|
299
|
+
Download the generated service code for a deployment
|
300
|
+
"""
|
301
|
+
try:
|
302
|
+
if deployment_id not in deployments:
|
303
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
304
|
+
|
305
|
+
deployment = deployments[deployment_id]
|
306
|
+
service_file = deployment.get("service_file")
|
307
|
+
|
308
|
+
if not service_file or not Path(service_file).exists():
|
309
|
+
raise HTTPException(status_code=404, detail="Service code not found")
|
310
|
+
|
311
|
+
# Read the service code file
|
312
|
+
with open(service_file, 'r') as f:
|
313
|
+
service_code = f.read()
|
314
|
+
|
315
|
+
from fastapi.responses import PlainTextResponse
|
316
|
+
return PlainTextResponse(
|
317
|
+
content=service_code,
|
318
|
+
headers={
|
319
|
+
"Content-Disposition": f"attachment; filename={Path(service_file).name}"
|
320
|
+
}
|
321
|
+
)
|
322
|
+
|
323
|
+
except HTTPException:
|
324
|
+
raise
|
325
|
+
except Exception as e:
|
326
|
+
logger.error(f"Failed to get service code for {deployment_id}: {e}")
|
327
|
+
raise HTTPException(status_code=500, detail=f"Failed to get service code: {str(e)}")
|
328
|
+
|
329
|
+
# Health check for deployments service
|
330
|
+
@router.get("/health")
|
331
|
+
async def deployments_health():
|
332
|
+
"""Health check for deployments service"""
|
333
|
+
return {
|
334
|
+
"status": "healthy",
|
335
|
+
"service": "deployments",
|
336
|
+
"active_deployments": len([d for d in deployments.values() if d["status"] == "active"]),
|
337
|
+
"pending_deployments": len([d for d in deployments.values() if d["status"] == "pending"]),
|
338
|
+
"total_deployments": len(deployments)
|
339
|
+
}
|