isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -9,12 +9,12 @@ This is the main API that handles all types of AI requests:
9
9
  - Embedding tasks
10
10
  """
11
11
 
12
- from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Request, Depends
12
+ from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Request, Depends, Query
13
13
  from fastapi.responses import StreamingResponse
14
14
  from pydantic import BaseModel, Field
15
15
  from typing import Optional, Dict, Any, Union, List, AsyncGenerator
16
16
  import logging
17
- from ..middleware.auth import optional_auth, require_read_access
17
+ from ..middleware.auth import optional_auth, require_read_access, require_write_access
18
18
  from ..middleware.security import rate_limit_standard, rate_limit_heavy, sanitize_input
19
19
  import asyncio
20
20
  import json
@@ -96,6 +96,30 @@ class UnifiedRequest(BaseModel):
96
96
  }
97
97
  ]]
98
98
  )
99
+ output_format: Optional[str] = Field(
100
+ None,
101
+ description="输出格式控制。支持的格式:json(JSON结构化输出)、markdown(Markdown格式)、code(代码块提取)、structured(智能结构化解析)。主要用于文本服务的响应格式化。",
102
+ examples=["json", "markdown", "code", "structured"]
103
+ )
104
+ json_schema: Optional[Dict[str, Any]] = Field(
105
+ None,
106
+ description="JSON模式验证。当output_format='json'时使用,用于验证和约束JSON输出格式。遵循JSON Schema规范。",
107
+ examples=[{
108
+ "type": "object",
109
+ "properties": {
110
+ "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
111
+ "confidence": {"type": "number", "minimum": 0, "maximum": 1}
112
+ },
113
+ "required": ["sentiment", "confidence"]
114
+ }]
115
+ )
116
+ repair_attempts: Optional[int] = Field(
117
+ 3,
118
+ ge=0,
119
+ le=10,
120
+ description="JSON修复尝试次数。当解析JSON失败时,系统会尝试修复常见的JSON格式错误。0表示不进行修复尝试。",
121
+ examples=[3, 0, 5]
122
+ )
99
123
  parameters: Optional[Dict[str, Any]] = Field(
100
124
  default_factory=dict,
101
125
  description="额外的任务参数,用于精细控制服务行为。参数内容根据具体服务类型而定,如temperature、max_tokens、voice等。",
@@ -277,7 +301,17 @@ async def unified_invoke(request: Request, user: Dict = Depends(require_read_acc
277
301
  unified_request.input_data = sanitize_input(unified_request.input_data)
278
302
 
279
303
  except Exception as e:
280
- raise HTTPException(status_code=400, detail=f"Invalid JSON request: {e}")
304
+ from ..error_handlers import handle_validation_error, create_http_exception, ErrorCode
305
+ if hasattr(e, 'errors'): # Pydantic validation error
306
+ error_response = handle_validation_error(e)
307
+ raise HTTPException(status_code=400, detail=error_response)
308
+ else:
309
+ raise create_http_exception(
310
+ f"请求JSON格式错误: {str(e)}",
311
+ 400,
312
+ ErrorCode.INVALID_INPUT,
313
+ {"suggestion": "请检查JSON格式和必需字段"}
314
+ )
281
315
 
282
316
  # Prepare parameters, ensuring tools isn't duplicated
283
317
  params = dict(unified_request.parameters) if unified_request.parameters else {}
@@ -285,6 +319,14 @@ async def unified_invoke(request: Request, user: Dict = Depends(require_read_acc
285
319
  params.pop("tools", None) # Remove tools from parameters if present
286
320
  params["tools"] = unified_request.tools
287
321
 
322
+ # Add JSON output formatting parameters
323
+ if unified_request.output_format:
324
+ params["output_format"] = unified_request.output_format
325
+ if unified_request.json_schema:
326
+ params["json_schema"] = unified_request.json_schema
327
+ if unified_request.repair_attempts is not None:
328
+ params["repair_attempts"] = unified_request.repair_attempts
329
+
288
330
  # Check if this should be a streaming response
289
331
  # Default to streaming for text+chat unless explicitly disabled
290
332
  is_text_chat = (unified_request.service_type == "text" and unified_request.task == "chat")
@@ -350,8 +392,20 @@ async def unified_invoke(request: Request, user: Dict = Depends(require_read_acc
350
392
  yield f"data: {json.dumps({'token': item})}\n\n"
351
393
 
352
394
  except Exception as e:
353
- # Send error as final event
354
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
395
+ from ..error_handlers import create_error_response, ErrorCode
396
+ # Create detailed error response for streaming
397
+ error_response = create_error_response(
398
+ error=e,
399
+ error_code=ErrorCode.INFERENCE_FAILED,
400
+ details={
401
+ "service_type": unified_request.service_type,
402
+ "model": unified_request.model,
403
+ "provider": unified_request.provider,
404
+ "streaming": True
405
+ }
406
+ )
407
+ # Send structured error as final event
408
+ yield f"data: {json.dumps({'error': error_response})}\n\n"
355
409
  finally:
356
410
  # Send end-of-stream marker
357
411
  yield f"data: {json.dumps({'done': True})}\n\n"
@@ -387,11 +441,30 @@ async def unified_invoke(request: Request, user: Dict = Depends(require_read_acc
387
441
  except HTTPException:
388
442
  raise
389
443
  except Exception as e:
444
+ from ..error_handlers import create_error_response, ErrorCode
390
445
  logger.error(f"Unified invoke failed: {e}")
446
+
447
+ # Create detailed error response
448
+ error_response = create_error_response(
449
+ error=e,
450
+ status_code=500,
451
+ error_code=ErrorCode.INFERENCE_FAILED,
452
+ details={
453
+ "service_type": getattr(unified_request, 'service_type', 'unknown'),
454
+ "model": getattr(unified_request, 'model', 'unknown'),
455
+ "provider": getattr(unified_request, 'provider', 'unknown'),
456
+ "task": getattr(unified_request, 'task', 'unknown')
457
+ }
458
+ )
459
+
391
460
  return UnifiedResponse(
392
461
  success=False,
393
- error=str(e),
394
- metadata={}
462
+ error=error_response.get("error"),
463
+ metadata={
464
+ "error_code": error_response.get("error_code"),
465
+ "user_message": error_response.get("user_message"),
466
+ "details": error_response.get("details", {})
467
+ }
395
468
  )
396
469
 
397
470
 
@@ -400,20 +473,71 @@ async def unified_invoke(request: Request, user: Dict = Depends(require_read_acc
400
473
  async def get_available_models(service_type: Optional[str] = None):
401
474
  """Get available models (optional filter by service type)"""
402
475
  try:
403
- client = get_isa_client()
404
- return await client.get_available_models(service_type)
476
+ from ..cache_manager import cached, model_list_cache_key
477
+
478
+ @cached(ttl=600.0, cache_key_func=lambda st=service_type: model_list_cache_key(st)) # 10 minutes cache
479
+ async def _get_models(service_type_param):
480
+ client = get_isa_client()
481
+ return await client.get_available_models(service_type_param)
482
+
483
+ models_list = await _get_models(service_type)
484
+
485
+ # Ensure we return the expected format
486
+ if isinstance(models_list, list):
487
+ return {
488
+ "success": True,
489
+ "models": models_list,
490
+ "total_count": len(models_list),
491
+ "service_type_filter": service_type
492
+ }
493
+ elif isinstance(models_list, dict) and "models" in models_list:
494
+ # Already in correct format
495
+ return models_list
496
+ else:
497
+ # Unknown format, convert to expected format
498
+ return {
499
+ "success": True,
500
+ "models": models_list if isinstance(models_list, list) else [],
501
+ "total_count": len(models_list) if isinstance(models_list, list) else 0,
502
+ "service_type_filter": service_type
503
+ }
405
504
  except Exception as e:
406
505
  logger.error(f"Failed to get available models: {e}")
407
506
  # Fallback static model list
507
+ # Load custom models
508
+ custom_models = []
509
+ try:
510
+ from isa_model.inference.services.custom_model_manager import get_custom_model_manager
511
+ custom_model_manager = get_custom_model_manager()
512
+ custom_models = custom_model_manager.get_models_for_api()
513
+ logger.debug(f"Loaded {len(custom_models)} custom models")
514
+ except Exception as e:
515
+ logger.warning(f"Failed to load custom models: {e}")
516
+
517
+ # Base fallback models
518
+ base_models = [
519
+ {"service_type": "vision", "provider": "openai", "model_id": "gpt-4o-mini"},
520
+ {"service_type": "text", "provider": "openai", "model_id": "gpt-4o-mini"},
521
+ {"service_type": "audio", "provider": "openai", "model_id": "whisper-1"},
522
+ {"service_type": "audio", "provider": "openai", "model_id": "tts-1"},
523
+ {"service_type": "embedding", "provider": "openai", "model_id": "text-embedding-3-small"},
524
+ {"service_type": "image", "provider": "replicate", "model_id": "black-forest-labs/flux-schnell"}
525
+ ]
526
+
527
+ # Combine base models with custom models
528
+ fallback_models = base_models + custom_models
529
+
530
+ # Filter by service_type if provided
531
+ if service_type:
532
+ fallback_models = [m for m in fallback_models if m["service_type"] == service_type]
533
+
408
534
  return {
409
- "models": [
410
- {"service_type": "vision", "provider": "openai", "model_id": "gpt-4.1-mini"},
411
- {"service_type": "text", "provider": "openai", "model_id": "gpt-4.1-mini"},
412
- {"service_type": "audio", "provider": "openai", "model_id": "whisper-1"},
413
- {"service_type": "audio", "provider": "openai", "model_id": "tts-1"},
414
- {"service_type": "embedding", "provider": "openai", "model_id": "text-embedding-3-small"},
415
- {"service_type": "image", "provider": "replicate", "model_id": "black-forest-labs/flux-schnell"}
416
- ]
535
+ "success": False,
536
+ "error": f"Failed to get models: {str(e)}",
537
+ "models": fallback_models,
538
+ "total_count": len(fallback_models),
539
+ "service_type_filter": service_type,
540
+ "fallback": True
417
541
  }
418
542
 
419
543
  @router.get("/health")
@@ -430,4 +554,542 @@ async def health_check():
430
554
  return {
431
555
  "api": "error",
432
556
  "error": str(e)
433
- }
557
+ }
558
+
559
+ # Enhanced Model Management API Endpoints
560
+
561
+ @router.get("/models/search")
562
+ async def search_models(
563
+ query: str = Query(..., description="Search query"),
564
+ model_type: Optional[str] = Query(None, description="Filter by model type"),
565
+ provider: Optional[str] = Query(None, description="Filter by provider"),
566
+ capabilities: Optional[List[str]] = Query(None, description="Filter by capabilities"),
567
+ limit: int = Query(50, ge=1, le=200, description="Maximum number of results"),
568
+ user = Depends(optional_auth)
569
+ ):
570
+ """Search models by query and filters"""
571
+ try:
572
+ # Try database search first
573
+ try:
574
+ from isa_model.core.models.model_repo import ModelRepo
575
+
576
+ repo = ModelRepo()
577
+
578
+ # Convert capabilities from query parameter
579
+ capability_list = None
580
+ if capabilities:
581
+ capability_list = [cap.strip() for cap in capabilities if cap.strip()]
582
+
583
+ results = repo.search_models(
584
+ query=query,
585
+ model_type=model_type,
586
+ provider=provider,
587
+ capabilities=capability_list,
588
+ limit=limit
589
+ )
590
+
591
+ # If we got results from the database, return them
592
+ if results:
593
+ return {
594
+ "success": True,
595
+ "query": query,
596
+ "filters": {
597
+ "model_type": model_type,
598
+ "provider": provider,
599
+ "capabilities": capability_list
600
+ },
601
+ "results": [
602
+ {
603
+ "model_id": model.model_id,
604
+ "model_type": model.model_type,
605
+ "provider": model.provider,
606
+ "description": model.metadata.get("description", ""),
607
+ "capabilities": model.capabilities,
608
+ "updated_at": model.updated_at.isoformat() if model.updated_at else None
609
+ }
610
+ for model in results
611
+ ],
612
+ "total_results": len(results)
613
+ }
614
+
615
+ except Exception as db_error:
616
+ logger.warning(f"Database search failed, using fallback: {db_error}")
617
+
618
+ # Fallback: search in our hardcoded model list + custom models
619
+ # Load custom models
620
+ custom_models_for_search = []
621
+ try:
622
+ from isa_model.inference.services.custom_model_manager import get_custom_model_manager
623
+ custom_model_manager = get_custom_model_manager()
624
+ custom_models_for_search = custom_model_manager.get_models_for_api()
625
+ # Convert format for search
626
+ for model in custom_models_for_search:
627
+ model["model_type"] = model.get("service_type", "text")
628
+ except Exception as e:
629
+ logger.warning(f"Failed to load custom models for search: {e}")
630
+
631
+ fallback_models = [
632
+ {
633
+ "model_id": "gpt-4o-mini",
634
+ "model_type": "text",
635
+ "provider": "openai",
636
+ "description": "Small, fast GPT-4 model optimized for efficiency",
637
+ "capabilities": ["chat", "text_generation", "reasoning"],
638
+ "service_type": "text"
639
+ },
640
+ {
641
+ "model_id": "gpt-4o",
642
+ "model_type": "text",
643
+ "provider": "openai",
644
+ "description": "Large GPT-4 model with enhanced capabilities",
645
+ "capabilities": ["chat", "text_generation", "reasoning", "image_understanding"],
646
+ "service_type": "text"
647
+ },
648
+ {
649
+ "model_id": "text-embedding-3-small",
650
+ "model_type": "embedding",
651
+ "provider": "openai",
652
+ "description": "Small embedding model for text vectorization",
653
+ "capabilities": ["embedding", "similarity"],
654
+ "service_type": "embedding"
655
+ },
656
+ {
657
+ "model_id": "whisper-1",
658
+ "model_type": "audio",
659
+ "provider": "openai",
660
+ "description": "Speech recognition and transcription model",
661
+ "capabilities": ["speech_to_text", "audio_transcription"],
662
+ "service_type": "audio"
663
+ },
664
+ {
665
+ "model_id": "tts-1",
666
+ "model_type": "audio",
667
+ "provider": "openai",
668
+ "description": "Text-to-speech generation model",
669
+ "capabilities": ["text_to_speech"],
670
+ "service_type": "audio"
671
+ },
672
+ {
673
+ "model_id": "flux-schnell",
674
+ "model_type": "image",
675
+ "provider": "replicate",
676
+ "description": "Fast image generation model",
677
+ "capabilities": ["image_generation"],
678
+ "service_type": "image"
679
+ },
680
+ {
681
+ "model_id": "isa-llm-service",
682
+ "model_type": "text",
683
+ "provider": "isa",
684
+ "description": "ISA custom LLM service for trained models",
685
+ "capabilities": ["chat", "text_generation"],
686
+ "service_type": "text"
687
+ },
688
+ {
689
+ "model_id": "isa-omniparser-ui-detection",
690
+ "model_type": "vision",
691
+ "provider": "isa",
692
+ "description": "UI element detection and analysis",
693
+ "capabilities": ["ui_detection", "image_analysis"],
694
+ "service_type": "vision"
695
+ }
696
+ ]
697
+
698
+ # Add custom models to search list
699
+ fallback_models.extend(custom_models_for_search)
700
+
701
+ # Apply search filters
702
+ query_lower = query.lower()
703
+ filtered_models = []
704
+
705
+ for model in fallback_models:
706
+ # Check if query matches
707
+ query_match = (
708
+ query_lower in model["model_id"].lower() or
709
+ query_lower in model["provider"].lower() or
710
+ query_lower in model["description"].lower() or
711
+ any(query_lower in cap.lower() for cap in model["capabilities"])
712
+ )
713
+
714
+ if not query_match:
715
+ continue
716
+
717
+ # Apply type filter
718
+ if model_type and model["model_type"] != model_type:
719
+ continue
720
+
721
+ # Apply provider filter
722
+ if provider and model["provider"] != provider:
723
+ continue
724
+
725
+ # Apply capabilities filter
726
+ if capabilities:
727
+ if not any(cap in model["capabilities"] for cap in capabilities):
728
+ continue
729
+
730
+ filtered_models.append({
731
+ "model_id": model["model_id"],
732
+ "model_type": model["model_type"],
733
+ "provider": model["provider"],
734
+ "description": model["description"],
735
+ "capabilities": model["capabilities"],
736
+ "updated_at": None
737
+ })
738
+
739
+ # Apply limit
740
+ limited_results = filtered_models[:limit]
741
+
742
+ return {
743
+ "success": True,
744
+ "query": query,
745
+ "filters": {
746
+ "model_type": model_type,
747
+ "provider": provider,
748
+ "capabilities": capabilities
749
+ },
750
+ "results": limited_results,
751
+ "total_results": len(limited_results),
752
+ "fallback": True,
753
+ "message": "Using fallback search - database search unavailable"
754
+ }
755
+
756
+ except Exception as e:
757
+ logger.error(f"Failed to search models: {e}")
758
+ raise HTTPException(status_code=500, detail=f"Failed to search models: {str(e)}")
759
+
760
+ @router.get("/models/providers")
761
+ async def get_model_providers(user = Depends(optional_auth)):
762
+ """Get list of available model providers"""
763
+ try:
764
+ from ..cache_manager import cached, provider_list_cache_key
765
+
766
+ @cached(ttl=600.0, cache_key_func=lambda: provider_list_cache_key()) # 10 minutes cache
767
+ async def _get_providers():
768
+ try:
769
+ from isa_model.core.models.model_repo import ModelRepo
770
+ repo = ModelRepo()
771
+ return repo.get_providers_summary()
772
+ except Exception as e:
773
+ logger.warning(f"ModelRepo failed, using fallback: {e}")
774
+ # Fallback to basic provider list
775
+ return [
776
+ {
777
+ "provider": "openai",
778
+ "model_count": 4,
779
+ "model_types": ["text", "vision", "audio", "embedding"],
780
+ "capabilities": ["chat", "completion", "embedding", "vision", "audio"]
781
+ },
782
+ {
783
+ "provider": "isa",
784
+ "model_count": 3,
785
+ "model_types": ["text", "vision", "embedding"],
786
+ "capabilities": ["chat", "completion", "ui_detection", "ocr"]
787
+ },
788
+ {
789
+ "provider": "replicate",
790
+ "model_count": 2,
791
+ "model_types": ["image", "video"],
792
+ "capabilities": ["image_generation", "video_generation"]
793
+ }
794
+ ]
795
+
796
+ providers = await _get_providers()
797
+
798
+ return {
799
+ "success": True,
800
+ "providers": providers,
801
+ "total_count": len(providers),
802
+ "cached": True
803
+ }
804
+
805
+ except Exception as e:
806
+ logger.error(f"Failed to get model providers: {e}")
807
+ raise HTTPException(status_code=500, detail=f"Failed to get model providers: {str(e)}")
808
+
809
+ @router.get("/models/custom")
810
+ async def get_custom_models(
811
+ model_type: Optional[str] = Query(None, description="Filter by model type"),
812
+ provider: Optional[str] = Query(None, description="Filter by provider"),
813
+ user = Depends(optional_auth)
814
+ ):
815
+ """Get list of custom trained models"""
816
+ try:
817
+ from ..cache_manager import cached, custom_models_cache_key
818
+ from isa_model.inference.services.custom_model_manager import get_custom_model_manager
819
+
820
+ @cached(ttl=300.0, cache_key_func=lambda mt=model_type, p=provider: custom_models_cache_key(mt, p)) # 5 minutes cache
821
+ async def _get_custom_models(model_type_param, provider_param):
822
+ custom_model_manager = get_custom_model_manager()
823
+ return custom_model_manager.list_models(model_type=model_type_param, provider=provider_param)
824
+
825
+ models = await _get_custom_models(model_type, provider)
826
+
827
+ # Convert to API format
828
+ api_models = []
829
+ for model in models:
830
+ api_model = {
831
+ "model_id": model.model_id,
832
+ "model_name": model.model_name,
833
+ "model_type": model.model_type,
834
+ "provider": model.provider,
835
+ "base_model": model.base_model,
836
+ "training_date": model.training_date,
837
+ "description": model.metadata.get("description", ""),
838
+ "capabilities": model.capabilities,
839
+ "custom": True
840
+ }
841
+
842
+ if model.performance_metrics:
843
+ api_model["performance_metrics"] = model.performance_metrics
844
+
845
+ if model.deployment_config:
846
+ api_model["deployment_status"] = "configured"
847
+
848
+ api_models.append(api_model)
849
+
850
+ return {
851
+ "success": True,
852
+ "custom_models": api_models,
853
+ "total_count": len(api_models),
854
+ "filters": {
855
+ "model_type": model_type,
856
+ "provider": provider
857
+ },
858
+ "stats": custom_model_manager.get_stats()
859
+ }
860
+
861
+ except Exception as e:
862
+ logger.error(f"Failed to get custom models: {e}")
863
+ return {
864
+ "success": False,
865
+ "error": str(e),
866
+ "custom_models": [],
867
+ "total_count": 0
868
+ }
869
+
870
+ @router.get("/models/capabilities")
871
+ async def get_model_capabilities(user = Depends(optional_auth)):
872
+ """Get list of all available model capabilities"""
873
+ try:
874
+ from ..cache_manager import cached
875
+
876
+ @cached(ttl=3600.0, cache_key_func=lambda: "model_capabilities") # 1 hour cache (static data)
877
+ async def _get_capabilities():
878
+ from isa_model.core.models.model_repo import ModelCapability
879
+
880
+ return [
881
+ {
882
+ "capability": cap.value,
883
+ "description": cap.value.replace("_", " ").title()
884
+ }
885
+ for cap in ModelCapability
886
+ ]
887
+
888
+ capabilities = await _get_capabilities()
889
+
890
+ return {
891
+ "success": True,
892
+ "capabilities": capabilities
893
+ }
894
+
895
+ except Exception as e:
896
+ logger.error(f"Failed to get model capabilities: {e}")
897
+ raise HTTPException(status_code=500, detail=f"Failed to get model capabilities: {str(e)}")
898
+
899
+ @router.get("/models/{model_id}")
900
+ async def get_model_details(model_id: str, user = Depends(optional_auth)):
901
+ """Get detailed information about a specific model"""
902
+ try:
903
+ from ..cache_manager import cached
904
+ from isa_model.core.models.model_repo import ModelRepo
905
+
906
+ @cached(ttl=900.0, cache_key_func=lambda mid=model_id: f"model_details_{mid}") # 15 minutes cache
907
+ async def _get_model_details(model_id_param):
908
+ repo = ModelRepo()
909
+ return repo.get_model_by_id(model_id_param)
910
+
911
+ model = await _get_model_details(model_id)
912
+
913
+ if not model:
914
+ raise HTTPException(status_code=404, detail=f"Model not found: {model_id}")
915
+
916
+ return {
917
+ "success": True,
918
+ "model": {
919
+ "model_id": model.model_id,
920
+ "model_type": model.model_type,
921
+ "provider": model.provider,
922
+ "metadata": model.metadata,
923
+ "capabilities": model.capabilities,
924
+ "created_at": model.created_at.isoformat() if model.created_at else None,
925
+ "updated_at": model.updated_at.isoformat() if model.updated_at else None
926
+ }
927
+ }
928
+
929
+ except HTTPException:
930
+ raise
931
+ except Exception as e:
932
+ logger.error(f"Failed to get model details for {model_id}: {e}")
933
+ raise HTTPException(status_code=500, detail=f"Failed to get model details: {str(e)}")
934
+
935
+ @router.get("/models/{model_id}/versions")
936
+ async def get_model_versions(model_id: str, user = Depends(optional_auth)):
937
+ """Get version history for a specific model"""
938
+ try:
939
+ from isa_model.core.models.model_version_manager import ModelVersionManager
940
+
941
+ version_manager = ModelVersionManager()
942
+ versions = version_manager.get_model_versions(model_id)
943
+
944
+ return {
945
+ "success": True,
946
+ "model_id": model_id,
947
+ "versions": [
948
+ {
949
+ "version": v.version,
950
+ "created_at": v.created_at.isoformat(),
951
+ "metadata": v.metadata,
952
+ "is_active": v.is_active
953
+ }
954
+ for v in versions
955
+ ],
956
+ "total_versions": len(versions)
957
+ }
958
+
959
+ except Exception as e:
960
+ logger.error(f"Failed to get model versions for {model_id}: {e}")
961
+ raise HTTPException(status_code=500, detail=f"Failed to get model versions: {str(e)}")
962
+
963
+ @router.post("/models/{model_id}/versions")
964
+ async def create_model_version(
965
+ model_id: str,
966
+ version_data: Dict[str, Any],
967
+ user = Depends(require_write_access)
968
+ ):
969
+ """Create a new version for a model"""
970
+ try:
971
+ from isa_model.core.models.model_version_manager import ModelVersionManager
972
+
973
+ version_manager = ModelVersionManager()
974
+ new_version = version_manager.create_version(
975
+ model_id=model_id,
976
+ metadata=version_data.get("metadata", {}),
977
+ user_id=user.get("user_id") if user else None
978
+ )
979
+
980
+ return {
981
+ "success": True,
982
+ "message": f"New version created for model {model_id}",
983
+ "version": {
984
+ "version": new_version.version,
985
+ "created_at": new_version.created_at.isoformat(),
986
+ "metadata": new_version.metadata
987
+ }
988
+ }
989
+
990
+ except Exception as e:
991
+ logger.error(f"Failed to create model version for {model_id}: {e}")
992
+ raise HTTPException(status_code=500, detail=f"Failed to create model version: {str(e)}")
993
+
994
+ @router.get("/models/{model_id}/billing")
995
+ async def get_model_billing_info(
996
+ model_id: str,
997
+ start_date: Optional[str] = Query(None, description="Start date (ISO format)"),
998
+ end_date: Optional[str] = Query(None, description="End date (ISO format)"),
999
+ user = Depends(optional_auth)
1000
+ ):
1001
+ """Get billing information for a specific model"""
1002
+ try:
1003
+ from isa_model.core.models.model_billing_tracker import ModelBillingTracker
1004
+ from datetime import datetime, timedelta
1005
+
1006
+ # Parse dates
1007
+ if start_date:
1008
+ start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
1009
+ else:
1010
+ start_dt = datetime.now() - timedelta(days=30)
1011
+
1012
+ if end_date:
1013
+ end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
1014
+ else:
1015
+ end_dt = datetime.now()
1016
+
1017
+ billing_tracker = ModelBillingTracker()
1018
+ billing_info = billing_tracker.get_model_billing_summary(
1019
+ model_id=model_id,
1020
+ start_date=start_dt,
1021
+ end_date=end_dt
1022
+ )
1023
+
1024
+ return {
1025
+ "success": True,
1026
+ "model_id": model_id,
1027
+ "billing_period": {
1028
+ "start_date": start_dt.isoformat(),
1029
+ "end_date": end_dt.isoformat()
1030
+ },
1031
+ "billing_summary": billing_info
1032
+ }
1033
+
1034
+ except Exception as e:
1035
+ logger.error(f"Failed to get billing info for {model_id}: {e}")
1036
+ raise HTTPException(status_code=500, detail=f"Failed to get billing info: {str(e)}")
1037
+
1038
+ @router.put("/models/{model_id}/metadata")
1039
+ async def update_model_metadata(
1040
+ model_id: str,
1041
+ metadata_update: Dict[str, Any],
1042
+ user = Depends(require_write_access)
1043
+ ):
1044
+ """Update metadata for a specific model"""
1045
+ try:
1046
+ from isa_model.core.models.model_repo import ModelRepo
1047
+
1048
+ repo = ModelRepo()
1049
+ success = repo.update_model_metadata(
1050
+ model_id=model_id,
1051
+ metadata_updates=metadata_update,
1052
+ updated_by=user.get("user_id") if user else None
1053
+ )
1054
+
1055
+ if not success:
1056
+ raise HTTPException(status_code=404, detail=f"Model not found: {model_id}")
1057
+
1058
+ return {
1059
+ "success": True,
1060
+ "message": f"Metadata updated for model {model_id}",
1061
+ "updated_fields": list(metadata_update.keys())
1062
+ }
1063
+
1064
+ except HTTPException:
1065
+ raise
1066
+ except Exception as e:
1067
+ logger.error(f"Failed to update metadata for {model_id}: {e}")
1068
+ raise HTTPException(status_code=500, detail=f"Failed to update metadata: {str(e)}")
1069
+
1070
+ @router.get("/models/{model_id}/statistics")
1071
+ async def get_model_statistics(
1072
+ model_id: str,
1073
+ days: int = Query(30, ge=1, le=365, description="Number of days for statistics"),
1074
+ user = Depends(optional_auth)
1075
+ ):
1076
+ """Get usage statistics for a specific model"""
1077
+ try:
1078
+ from isa_model.core.models.model_statistics_tracker import ModelStatisticsTracker
1079
+
1080
+ stats_tracker = ModelStatisticsTracker()
1081
+ statistics = stats_tracker.get_model_statistics(
1082
+ model_id=model_id,
1083
+ days=days
1084
+ )
1085
+
1086
+ return {
1087
+ "success": True,
1088
+ "model_id": model_id,
1089
+ "period_days": days,
1090
+ "statistics": statistics
1091
+ }
1092
+
1093
+ except Exception as e:
1094
+ logger.error(f"Failed to get statistics for {model_id}: {e}")
1095
+ raise HTTPException(status_code=500, detail=f"Failed to get model statistics: {str(e)}")