isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/client.py +732 -565
  3. isa_model/core/cache/redis_cache.py +401 -0
  4. isa_model/core/config/config_manager.py +53 -10
  5. isa_model/core/config.py +1 -1
  6. isa_model/core/database/__init__.py +1 -0
  7. isa_model/core/database/migrations.py +277 -0
  8. isa_model/core/database/supabase_client.py +123 -0
  9. isa_model/core/models/__init__.py +37 -0
  10. isa_model/core/models/model_billing_tracker.py +60 -88
  11. isa_model/core/models/model_manager.py +36 -18
  12. isa_model/core/models/model_repo.py +44 -38
  13. isa_model/core/models/model_statistics_tracker.py +234 -0
  14. isa_model/core/models/model_storage.py +0 -1
  15. isa_model/core/models/model_version_manager.py +959 -0
  16. isa_model/core/pricing_manager.py +2 -249
  17. isa_model/core/resilience/circuit_breaker.py +366 -0
  18. isa_model/core/security/secrets.py +358 -0
  19. isa_model/core/services/__init__.py +2 -4
  20. isa_model/core/services/intelligent_model_selector.py +101 -370
  21. isa_model/core/storage/hf_storage.py +1 -1
  22. isa_model/core/types.py +7 -0
  23. isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
  24. isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
  25. isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
  26. isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
  27. isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
  28. isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
  29. isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
  30. isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
  31. isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
  32. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
  33. isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
  34. isa_model/deployment/core/deployment_manager.py +6 -4
  35. isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
  36. isa_model/eval/benchmarks/__init__.py +27 -0
  37. isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
  38. isa_model/eval/benchmarks.py +244 -12
  39. isa_model/eval/evaluators/__init__.py +8 -2
  40. isa_model/eval/evaluators/audio_evaluator.py +727 -0
  41. isa_model/eval/evaluators/embedding_evaluator.py +742 -0
  42. isa_model/eval/evaluators/vision_evaluator.py +564 -0
  43. isa_model/eval/example_evaluation.py +395 -0
  44. isa_model/eval/factory.py +272 -5
  45. isa_model/eval/isa_benchmarks.py +700 -0
  46. isa_model/eval/isa_integration.py +582 -0
  47. isa_model/eval/metrics.py +159 -6
  48. isa_model/eval/tests/unit/test_basic.py +396 -0
  49. isa_model/inference/ai_factory.py +44 -8
  50. isa_model/inference/services/audio/__init__.py +21 -0
  51. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  52. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  53. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  54. isa_model/inference/services/audio/openai_stt_service.py +32 -6
  55. isa_model/inference/services/base_service.py +17 -1
  56. isa_model/inference/services/embedding/__init__.py +13 -0
  57. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  58. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  59. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  60. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  61. isa_model/inference/services/img/__init__.py +2 -2
  62. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  63. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  64. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  65. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  66. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  67. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  68. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  69. isa_model/inference/services/llm/base_llm_service.py +30 -6
  70. isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
  71. isa_model/inference/services/llm/ollama_llm_service.py +2 -1
  72. isa_model/inference/services/llm/openai_llm_service.py +652 -55
  73. isa_model/inference/services/llm/yyds_llm_service.py +2 -1
  74. isa_model/inference/services/vision/__init__.py +5 -5
  75. isa_model/inference/services/vision/base_vision_service.py +118 -185
  76. isa_model/inference/services/vision/helpers/image_utils.py +11 -5
  77. isa_model/inference/services/vision/isa_vision_service.py +573 -0
  78. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  79. isa_model/serving/api/fastapi_server.py +88 -16
  80. isa_model/serving/api/middleware/auth.py +311 -0
  81. isa_model/serving/api/middleware/security.py +278 -0
  82. isa_model/serving/api/routes/analytics.py +486 -0
  83. isa_model/serving/api/routes/deployments.py +339 -0
  84. isa_model/serving/api/routes/evaluations.py +579 -0
  85. isa_model/serving/api/routes/logs.py +430 -0
  86. isa_model/serving/api/routes/settings.py +582 -0
  87. isa_model/serving/api/routes/unified.py +324 -165
  88. isa_model/serving/api/startup.py +304 -0
  89. isa_model/serving/modal_proxy_server.py +249 -0
  90. isa_model/training/__init__.py +100 -6
  91. isa_model/training/core/__init__.py +4 -1
  92. isa_model/training/examples/intelligent_training_example.py +281 -0
  93. isa_model/training/intelligent/__init__.py +25 -0
  94. isa_model/training/intelligent/decision_engine.py +643 -0
  95. isa_model/training/intelligent/intelligent_factory.py +888 -0
  96. isa_model/training/intelligent/knowledge_base.py +751 -0
  97. isa_model/training/intelligent/resource_optimizer.py +839 -0
  98. isa_model/training/intelligent/task_classifier.py +576 -0
  99. isa_model/training/storage/__init__.py +24 -0
  100. isa_model/training/storage/core_integration.py +439 -0
  101. isa_model/training/storage/training_repository.py +552 -0
  102. isa_model/training/storage/training_storage.py +628 -0
  103. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
  104. isa_model-0.4.0.dist-info/RECORD +182 -0
  105. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  106. isa_model/deployment/cloud/modal/register_models.py +0 -321
  107. isa_model/inference/adapter/unified_api.py +0 -248
  108. isa_model/inference/services/helpers/stacked_config.py +0 -148
  109. isa_model/inference/services/img/flux_professional_service.py +0 -603
  110. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  111. isa_model/inference/services/others/table_transformer_service.py +0 -61
  112. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  113. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  114. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  115. isa_model/scripts/inference_tracker.py +0 -283
  116. isa_model/scripts/mlflow_manager.py +0 -379
  117. isa_model/scripts/model_registry.py +0 -465
  118. isa_model/scripts/register_models.py +0 -370
  119. isa_model/scripts/register_models_with_embeddings.py +0 -510
  120. isa_model/scripts/start_mlflow.py +0 -95
  121. isa_model/scripts/training_tracker.py +0 -257
  122. isa_model-0.3.9.dist-info/RECORD +0 -138
  123. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
  124. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,339 @@
1
+ """
2
+ Deployments API Routes
3
+
4
+ Handles automated HuggingFace model deployment to Modal
5
+ """
6
+
7
+ from fastapi import APIRouter, HTTPException, BackgroundTasks
8
+ from pydantic import BaseModel
9
+ from typing import Optional, List, Dict, Any
10
+ import logging
11
+ import asyncio
12
+ import json
13
+ from pathlib import Path
14
+
15
+ from isa_model.deployment.services.auto_hf_modal_deployer import HuggingFaceModalDeployer
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ router = APIRouter()
20
+
21
+ # Request/Response models
22
+ class AnalyzeModelRequest(BaseModel):
23
+ model_id: str
24
+
25
+ class DeployModelRequest(BaseModel):
26
+ model_id: str
27
+ service_name: Optional[str] = None
28
+ auto_deploy: bool = False
29
+
30
+ class DeploymentResponse(BaseModel):
31
+ success: bool
32
+ deployment_id: Optional[str] = None
33
+ model_id: str
34
+ config: Optional[Dict[str, Any]] = None
35
+ service_file: Optional[str] = None
36
+ deployment_command: Optional[str] = None
37
+ estimated_cost_per_hour: Optional[float] = None
38
+ deployed: bool = False
39
+ error: Optional[str] = None
40
+
41
+ # Global deployer instance
42
+ deployer = HuggingFaceModalDeployer()
43
+
44
+ # In-memory deployment tracking (in production, use a database)
45
+ deployments = {}
46
+
47
+ @router.post("/analyze", response_model=Dict[str, Any])
48
+ async def analyze_model(request: AnalyzeModelRequest):
49
+ """
50
+ Analyze a HuggingFace model for deployment compatibility
51
+ """
52
+ try:
53
+ logger.info(f"Analyzing model: {request.model_id}")
54
+
55
+ # Analyze the model
56
+ config = deployer.analyze_model(request.model_id)
57
+
58
+ return {
59
+ "success": True,
60
+ "model_id": config.model_id,
61
+ "model_type": config.model_type,
62
+ "architecture": config.architecture,
63
+ "parameters": config.parameters,
64
+ "gpu_requirements": config.gpu_requirements,
65
+ "memory_gb": config.memory_gb,
66
+ "container_memory_mb": config.container_memory_mb,
67
+ "dependencies": config.dependencies,
68
+ "capabilities": config.capabilities,
69
+ "estimated_cost_per_hour": config.estimated_cost_per_hour
70
+ }
71
+
72
+ except Exception as e:
73
+ logger.error(f"Model analysis failed for {request.model_id}: {e}")
74
+ raise HTTPException(status_code=400, detail=f"Model analysis failed: {str(e)}")
75
+
76
+ @router.post("/deploy", response_model=DeploymentResponse)
77
+ async def deploy_model(request: DeployModelRequest, background_tasks: BackgroundTasks):
78
+ """
79
+ Deploy a HuggingFace model to Modal
80
+ """
81
+ try:
82
+ logger.info(f"Starting deployment for model: {request.model_id}")
83
+
84
+ # Generate unique deployment ID
85
+ import time
86
+ import uuid
87
+ deployment_id = f"deploy_{uuid.uuid4().hex[:8]}_{int(time.time())}"
88
+
89
+ # Add to deployments tracking
90
+ deployments[deployment_id] = {
91
+ "id": deployment_id,
92
+ "model_id": request.model_id,
93
+ "service_name": request.service_name,
94
+ "status": "pending",
95
+ "created_at": time.time(),
96
+ "auto_deploy": request.auto_deploy
97
+ }
98
+
99
+ # Start deployment in background
100
+ background_tasks.add_task(
101
+ perform_deployment,
102
+ deployment_id,
103
+ request.model_id,
104
+ request.service_name,
105
+ request.auto_deploy
106
+ )
107
+
108
+ return DeploymentResponse(
109
+ success=True,
110
+ deployment_id=deployment_id,
111
+ model_id=request.model_id,
112
+ deployed=False
113
+ )
114
+
115
+ except Exception as e:
116
+ logger.error(f"Deployment initiation failed for {request.model_id}: {e}")
117
+ raise HTTPException(status_code=500, detail=f"Deployment failed: {str(e)}")
118
+
119
+ async def perform_deployment(deployment_id: str, model_id: str, service_name: Optional[str], auto_deploy: bool):
120
+ """
121
+ Perform the actual deployment in the background
122
+ """
123
+ import time
124
+
125
+ try:
126
+ logger.info(f"Performing deployment {deployment_id} for model {model_id}")
127
+
128
+ # Update status
129
+ deployments[deployment_id]["status"] = "deploying"
130
+ deployments[deployment_id]["progress"] = "Analyzing model"
131
+
132
+ # Deploy the model
133
+ result = deployer.deploy_model(model_id, deploy=auto_deploy)
134
+
135
+ if result["success"]:
136
+ deployments[deployment_id].update({
137
+ "status": "completed" if result.get("deployed") else "generated",
138
+ "progress": "Deployment completed",
139
+ "config": result["config"],
140
+ "service_file": result["service_file"],
141
+ "deployment_command": result["deployment_command"],
142
+ "estimated_cost_per_hour": result["estimated_cost_per_hour"],
143
+ "deployed": result.get("deployed", False),
144
+ "completed_at": time.time()
145
+ })
146
+ else:
147
+ deployments[deployment_id].update({
148
+ "status": "failed",
149
+ "progress": "Deployment failed",
150
+ "error": result.get("error", "Unknown error"),
151
+ "failed_at": time.time()
152
+ })
153
+
154
+ except Exception as e:
155
+ logger.error(f"Deployment {deployment_id} failed: {e}")
156
+ deployments[deployment_id].update({
157
+ "status": "failed",
158
+ "progress": "Deployment failed",
159
+ "error": str(e),
160
+ "failed_at": time.time()
161
+ })
162
+
163
+ @router.get("/")
164
+ async def list_deployments():
165
+ """
166
+ List all deployments
167
+ """
168
+ try:
169
+ # Convert deployments to list format
170
+ deployment_list = []
171
+
172
+ for deployment_id, deployment in deployments.items():
173
+ deployment_list.append({
174
+ "id": deployment_id,
175
+ "name": deployment.get("service_name") or f"{deployment['model_id'].split('/')[-1]} Service",
176
+ "model_id": deployment["model_id"],
177
+ "model_type": "text", # Would be determined from analysis
178
+ "status": deployment["status"],
179
+ "gpu": "A10G", # Would be from config
180
+ "cost_per_hour": "1.20", # Would be from config
181
+ "created_at": deployment["created_at"],
182
+ "deployed_at": deployment.get("completed_at"),
183
+ "error": deployment.get("error")
184
+ })
185
+
186
+ # Add some fallback deployments for demo
187
+ if not deployment_list:
188
+ deployment_list = [
189
+ {
190
+ "id": "qwen2-vl-7b",
191
+ "name": "Qwen2.5-VL Service",
192
+ "model_id": "Qwen/Qwen2.5-VL-7B-Instruct",
193
+ "model_type": "vision",
194
+ "status": "active",
195
+ "gpu": "A100",
196
+ "cost_per_hour": "4.00",
197
+ "created_at": 1705312200,
198
+ "deployed_at": 1705312800
199
+ },
200
+ {
201
+ "id": "embed-service",
202
+ "name": "BGE Embed Service",
203
+ "model_id": "BAAI/bge-base-en-v1.5",
204
+ "model_type": "embedding",
205
+ "status": "active",
206
+ "gpu": "A10G",
207
+ "cost_per_hour": "1.20",
208
+ "created_at": 1705225800,
209
+ "deployed_at": 1705226400
210
+ }
211
+ ]
212
+
213
+ return deployment_list
214
+
215
+ except Exception as e:
216
+ logger.error(f"Failed to list deployments: {e}")
217
+ raise HTTPException(status_code=500, detail=f"Failed to list deployments: {str(e)}")
218
+
219
+ @router.get("/{deployment_id}")
220
+ async def get_deployment(deployment_id: str):
221
+ """
222
+ Get deployment details
223
+ """
224
+ try:
225
+ if deployment_id not in deployments:
226
+ raise HTTPException(status_code=404, detail="Deployment not found")
227
+
228
+ return deployments[deployment_id]
229
+
230
+ except HTTPException:
231
+ raise
232
+ except Exception as e:
233
+ logger.error(f"Failed to get deployment {deployment_id}: {e}")
234
+ raise HTTPException(status_code=500, detail=f"Failed to get deployment: {str(e)}")
235
+
236
+ @router.delete("/{deployment_id}")
237
+ async def cancel_deployment(deployment_id: str):
238
+ """
239
+ Cancel a pending deployment
240
+ """
241
+ try:
242
+ if deployment_id not in deployments:
243
+ raise HTTPException(status_code=404, detail="Deployment not found")
244
+
245
+ deployment = deployments[deployment_id]
246
+
247
+ if deployment["status"] == "pending":
248
+ deployment["status"] = "cancelled"
249
+ deployment["cancelled_at"] = time.time()
250
+ return {"success": True, "message": "Deployment cancelled"}
251
+ else:
252
+ raise HTTPException(status_code=400, detail="Cannot cancel deployment in current status")
253
+
254
+ except HTTPException:
255
+ raise
256
+ except Exception as e:
257
+ logger.error(f"Failed to cancel deployment {deployment_id}: {e}")
258
+ raise HTTPException(status_code=500, detail=f"Failed to cancel deployment: {str(e)}")
259
+
260
+ @router.post("/{deployment_id}/retry")
261
+ async def retry_deployment(deployment_id: str, background_tasks: BackgroundTasks):
262
+ """
263
+ Retry a failed deployment
264
+ """
265
+ try:
266
+ if deployment_id not in deployments:
267
+ raise HTTPException(status_code=404, detail="Deployment not found")
268
+
269
+ deployment = deployments[deployment_id]
270
+
271
+ if deployment["status"] == "failed":
272
+ # Reset deployment status
273
+ deployment["status"] = "pending"
274
+ deployment["error"] = None
275
+ deployment["progress"] = "Retrying deployment"
276
+
277
+ # Start deployment in background
278
+ background_tasks.add_task(
279
+ perform_deployment,
280
+ deployment_id,
281
+ deployment["model_id"],
282
+ deployment.get("service_name"),
283
+ deployment.get("auto_deploy", False)
284
+ )
285
+
286
+ return {"success": True, "message": "Deployment retry started"}
287
+ else:
288
+ raise HTTPException(status_code=400, detail="Cannot retry deployment in current status")
289
+
290
+ except HTTPException:
291
+ raise
292
+ except Exception as e:
293
+ logger.error(f"Failed to retry deployment {deployment_id}: {e}")
294
+ raise HTTPException(status_code=500, detail=f"Failed to retry deployment: {str(e)}")
295
+
296
+ @router.get("/{deployment_id}/code")
297
+ async def get_service_code(deployment_id: str):
298
+ """
299
+ Download the generated service code for a deployment
300
+ """
301
+ try:
302
+ if deployment_id not in deployments:
303
+ raise HTTPException(status_code=404, detail="Deployment not found")
304
+
305
+ deployment = deployments[deployment_id]
306
+ service_file = deployment.get("service_file")
307
+
308
+ if not service_file or not Path(service_file).exists():
309
+ raise HTTPException(status_code=404, detail="Service code not found")
310
+
311
+ # Read the service code file
312
+ with open(service_file, 'r') as f:
313
+ service_code = f.read()
314
+
315
+ from fastapi.responses import PlainTextResponse
316
+ return PlainTextResponse(
317
+ content=service_code,
318
+ headers={
319
+ "Content-Disposition": f"attachment; filename={Path(service_file).name}"
320
+ }
321
+ )
322
+
323
+ except HTTPException:
324
+ raise
325
+ except Exception as e:
326
+ logger.error(f"Failed to get service code for {deployment_id}: {e}")
327
+ raise HTTPException(status_code=500, detail=f"Failed to get service code: {str(e)}")
328
+
329
+ # Health check for deployments service
330
+ @router.get("/health")
331
+ async def deployments_health():
332
+ """Health check for deployments service"""
333
+ return {
334
+ "status": "healthy",
335
+ "service": "deployments",
336
+ "active_deployments": len([d for d in deployments.values() if d["status"] == "active"]),
337
+ "pending_deployments": len([d for d in deployments.values() if d["status"] == "pending"]),
338
+ "total_deployments": len(deployments)
339
+ }