isa-model 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +200 -6
- isa_model/deployment/services/auto_deploy_vision_service.py +4 -3
- isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
- isa_model/inference/ai_factory.py +83 -3
- isa_model/serving/api/routes/unified.py +72 -0
- {isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/METADATA +1 -1
- {isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/RECORD +9 -18
- isa_model/inference/providers/__init__.py +0 -19
- isa_model/inference/providers/base_provider.py +0 -77
- isa_model/inference/providers/ml_provider.py +0 -50
- isa_model/inference/providers/modal_provider.py +0 -109
- isa_model/inference/providers/model_cache_manager.py +0 -341
- isa_model/inference/providers/ollama_provider.py +0 -92
- isa_model/inference/providers/openai_provider.py +0 -130
- isa_model/inference/providers/replicate_provider.py +0 -119
- isa_model/inference/providers/triton_provider.py +0 -439
- isa_model/inference/providers/yyds_provider.py +0 -108
- {isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/WHEEL +0 -0
- {isa_model-0.3.6.dist-info → isa_model-0.3.8.dist-info}/top_level.txt +0 -0
@@ -10,10 +10,12 @@ This is the main API that handles all types of AI requests:
|
|
10
10
|
"""
|
11
11
|
|
12
12
|
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
|
13
|
+
from fastapi.responses import StreamingResponse
|
13
14
|
from pydantic import BaseModel, Field
|
14
15
|
from typing import Optional, Dict, Any, Union, List
|
15
16
|
import logging
|
16
17
|
import asyncio
|
18
|
+
import json
|
17
19
|
from pathlib import Path
|
18
20
|
|
19
21
|
from isa_model.client import ISAModelClient
|
@@ -28,6 +30,7 @@ class UnifiedRequest(BaseModel):
|
|
28
30
|
service_type: str = Field(..., description="Service type (text, vision, audio, image, embedding)")
|
29
31
|
model_hint: Optional[str] = Field(None, description="Optional model preference")
|
30
32
|
provider_hint: Optional[str] = Field(None, description="Optional provider preference")
|
33
|
+
stream: Optional[bool] = Field(False, description="Enable streaming for text services")
|
31
34
|
parameters: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Additional task parameters")
|
32
35
|
|
33
36
|
class UnifiedResponse(BaseModel):
|
@@ -108,6 +111,75 @@ async def unified_invoke(request: UnifiedRequest) -> UnifiedResponse:
|
|
108
111
|
}
|
109
112
|
)
|
110
113
|
|
114
|
+
@router.post("/stream")
|
115
|
+
async def unified_stream(request: UnifiedRequest):
|
116
|
+
"""
|
117
|
+
**Unified streaming endpoint for text services**
|
118
|
+
|
119
|
+
Returns Server-Sent Events (SSE) stream for real-time token generation.
|
120
|
+
Only supports text service types.
|
121
|
+
"""
|
122
|
+
try:
|
123
|
+
# Validate streaming request
|
124
|
+
if request.service_type != "text":
|
125
|
+
raise HTTPException(status_code=400, detail="Streaming only supported for text services")
|
126
|
+
|
127
|
+
# Get ISA client instance (local mode)
|
128
|
+
client = get_isa_client()
|
129
|
+
|
130
|
+
async def generate_stream():
|
131
|
+
"""Generator for SSE streaming"""
|
132
|
+
try:
|
133
|
+
# Use client's streaming method
|
134
|
+
stream_gen = await client.invoke(
|
135
|
+
input_data=request.input_data,
|
136
|
+
task=request.task,
|
137
|
+
service_type=request.service_type,
|
138
|
+
model_hint=request.model_hint,
|
139
|
+
provider_hint=request.provider_hint,
|
140
|
+
stream=True,
|
141
|
+
**request.parameters
|
142
|
+
)
|
143
|
+
|
144
|
+
# Stream tokens as SSE format
|
145
|
+
async for token in stream_gen:
|
146
|
+
# SSE format: "data: {json}\n\n"
|
147
|
+
token_data = {
|
148
|
+
"token": token,
|
149
|
+
"type": "token"
|
150
|
+
}
|
151
|
+
yield f"data: {json.dumps(token_data)}\n\n"
|
152
|
+
|
153
|
+
# Send completion signal
|
154
|
+
completion_data = {
|
155
|
+
"type": "completion",
|
156
|
+
"status": "finished"
|
157
|
+
}
|
158
|
+
yield f"data: {json.dumps(completion_data)}\n\n"
|
159
|
+
|
160
|
+
except Exception as e:
|
161
|
+
logger.error(f"Streaming error: {e}")
|
162
|
+
error_data = {
|
163
|
+
"type": "error",
|
164
|
+
"error": str(e)
|
165
|
+
}
|
166
|
+
yield f"data: {json.dumps(error_data)}\n\n"
|
167
|
+
|
168
|
+
# Return SSE stream response
|
169
|
+
return StreamingResponse(
|
170
|
+
generate_stream(),
|
171
|
+
media_type="text/plain",
|
172
|
+
headers={
|
173
|
+
"Cache-Control": "no-cache",
|
174
|
+
"Connection": "keep-alive",
|
175
|
+
"Content-Type": "text/plain; charset=utf-8"
|
176
|
+
}
|
177
|
+
)
|
178
|
+
|
179
|
+
except Exception as e:
|
180
|
+
logger.error(f"Streaming setup failed: {e}")
|
181
|
+
raise HTTPException(status_code=500, detail=str(e))
|
182
|
+
|
111
183
|
@router.post("/invoke-file", response_model=UnifiedResponse)
|
112
184
|
async def unified_invoke_file(
|
113
185
|
task: str = Form(...),
|
@@ -1,5 +1,5 @@
|
|
1
1
|
isa_model/__init__.py,sha256=bAbHdrDVQ-mySC_GJtgfLNI8KPcs2LfReBkIOOtpaQQ,867
|
2
|
-
isa_model/client.py,sha256=
|
2
|
+
isa_model/client.py,sha256=oo2WcxElcD9WMtw6STj3vS9KwjI-DvS_ldqC1X-HRBs,35456
|
3
3
|
isa_model/core/config.py,sha256=h9GVTEEMlaJYSCDd0W9q1KtaWTV5V5TawMsKtGuphds,15686
|
4
4
|
isa_model/core/pricing_manager.py,sha256=b7HcZsBQ8ZSCzMepOhqN-J9kU43vhTxX5NabQS0aM70,17125
|
5
5
|
isa_model/core/types.py,sha256=XLUs442WGNc8E0gF2M-nb6dutD_s-XCfpr2BfGBCA2M,8445
|
@@ -30,10 +30,11 @@ isa_model/deployment/gpu_int8_ds8/scripts/test_client.py,sha256=aCULgRYzEQj_ELUK
|
|
30
30
|
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py,sha256=XXrneTCHUeh1LNRcu-YtZQ5B4pNawlrxC-cTWmJU2A8,936
|
31
31
|
isa_model/deployment/runtime/deployed_service.py,sha256=0Z_Hg42oXasEVvuKjwBylJPazcmJYXhS-L9uPainaIg,13400
|
32
32
|
isa_model/deployment/services/__init__.py,sha256=JrLlmBlLb6RfiqGMzVVxKZfF5tAKliQqpon_rPoNoeA,216
|
33
|
-
isa_model/deployment/services/auto_deploy_vision_service.py,sha256=
|
33
|
+
isa_model/deployment/services/auto_deploy_vision_service.py,sha256=bZmkNG2DWvG6DdHfHvUuf8fonygic4vI_A4aogrXzvU,19670
|
34
34
|
isa_model/deployment/services/model_service.py,sha256=_ncC--8hr5BUwzCWh59yRXPKIPVLapx_31TorB2DIr8,13492
|
35
35
|
isa_model/deployment/services/service_monitor.py,sha256=P1zGoeqkNEJwt9AXZF2qTjfSLRm5PKUa80GJVNDSIdA,15223
|
36
36
|
isa_model/deployment/services/service_registry.py,sha256=LQgWQOvoP0lb7mC6WTS6shEt6WuX6xc8rRmcixrKwTc,22765
|
37
|
+
isa_model/deployment/services/simple_auto_deploy_vision_service.py,sha256=rfXsv9mh_w5cXHVYxA4fBD5ppyNY4HplsH34xp4WpY8,9882
|
37
38
|
isa_model/eval/__init__.py,sha256=CRbxC5SN7ow4ymdALSNTHawqw4f82DEdAb7twNT_Pw0,2447
|
38
39
|
isa_model/eval/benchmarks.py,sha256=_L4Vwj2hwf2yhqoleIASO9z5e3LRCClCVEVCQbGt0I8,16885
|
39
40
|
isa_model/eval/factory.py,sha256=bm5OVY7HIxdBgjlH1n7e5K1YO4ytv8e4KB7z_JS9HVQ,20737
|
@@ -46,19 +47,9 @@ isa_model/eval/evaluators/llm_evaluator.py,sha256=yfFJFdxwGV2F3mzEWjZ-0fr9u8SR3A
|
|
46
47
|
isa_model/eval/infrastructure/__init__.py,sha256=fxTdtwAFtjCDOV9MJ3GbhY0A-DqKeTwr_u9WTBnGI_U,648
|
47
48
|
isa_model/eval/infrastructure/experiment_tracker.py,sha256=yfMWIAk6oA8Lfer3AtmKg0OEZiGhczmsCD5gmp--uew,15283
|
48
49
|
isa_model/inference/__init__.py,sha256=usfuQJ4zYY2RRtHkE-V6LuJ5aN7WJogtPUj9Qmy4Wvw,318
|
49
|
-
isa_model/inference/ai_factory.py,sha256=
|
50
|
+
isa_model/inference/ai_factory.py,sha256=oGtRd4wp6IZOTyI3GVKBNN4AtlnrLS7yFZuq2wvkaUg,19784
|
50
51
|
isa_model/inference/base.py,sha256=qwOddnSGI0GUdD6qIdGBPQpkW7UjU3Y-zaZvu70B4WA,1278
|
51
52
|
isa_model/inference/adapter/unified_api.py,sha256=67_Ok8W20m6Otf6r9WyOEVpnxondP4UAxOASk9ozDk4,8668
|
52
|
-
isa_model/inference/providers/__init__.py,sha256=a83q-LMFv8u47wf0XtxvqOw_mlVgA_90wtuwy02qdDE,581
|
53
|
-
isa_model/inference/providers/base_provider.py,sha256=PT-YnGwBu-Jn_4T3iAphkAJw_mYmKVLjUID62vf2_Ow,2711
|
54
|
-
isa_model/inference/providers/ml_provider.py,sha256=4oGGF7lVWQ91Qh3h7olyPFoACLxCROaMxUZlDiZrRL4,1661
|
55
|
-
isa_model/inference/providers/modal_provider.py,sha256=klRYXECD5TJicodHIElsGNGMAsAWRFhvn4yfCRcqdVs,3654
|
56
|
-
isa_model/inference/providers/model_cache_manager.py,sha256=dLRpx7OJweQ5LcSAkU7D0DQRfLtIhG6nGvg4W_gau80,15315
|
57
|
-
isa_model/inference/providers/ollama_provider.py,sha256=IfM9XhdzfE1faguzS2-4GfhK30v5kDPecD3l4z2eB1w,3620
|
58
|
-
isa_model/inference/providers/openai_provider.py,sha256=tB8FMsMivlRx0cfPJ0Yrxh1DCvuXyyjNFXrO4lMkkhA,5366
|
59
|
-
isa_model/inference/providers/replicate_provider.py,sha256=0oi_BglIE6-HYgzLau9ifP8OdpAMO-QkwYk0OXRUzPk,4490
|
60
|
-
isa_model/inference/providers/triton_provider.py,sha256=GKlth7cTOx6ERbsXXJ0gDNby3kVGQNULBDt098BXBSU,15258
|
61
|
-
isa_model/inference/providers/yyds_provider.py,sha256=KbDsopShs11_G9oX3b2i2NgHIqkZV7HYWe9K9uZLccc,4284
|
62
53
|
isa_model/inference/services/__init__.py,sha256=yfLz0YGl8ixk6LfTRL6cRTvZMb9F_Pv1QRgGyNc9xYM,386
|
63
54
|
isa_model/inference/services/base_service.py,sha256=fVaSx0CogHK71UEsNJeSyM8mhqmq5_9ePbbSZVi3Al8,5085
|
64
55
|
isa_model/inference/services/audio/base_stt_service.py,sha256=sfzAfreFdvEOBHtphoTrQSjb-gCoCOW4WCj6iIe51oU,5804
|
@@ -117,7 +108,7 @@ isa_model/serving/api/routes/__init__.py,sha256=RIaG9OPg0AjAIVbtMzwnqGyNU-tuQXbd
|
|
117
108
|
isa_model/serving/api/routes/health.py,sha256=NwQcC_bpcaI4YZHTIKbGtg82yQ6QLdp0TwcqbEiqbWs,2208
|
118
109
|
isa_model/serving/api/routes/llm.py,sha256=5ZVxWugff0i6VBKz_Nv5CqacMZJsPZEKyoSB6XDrW34,385
|
119
110
|
isa_model/serving/api/routes/ui_analysis.py,sha256=-WxLaRKQNHnRh4okB85cWA4blTegpEPZtzHTsF3yeeU,6848
|
120
|
-
isa_model/serving/api/routes/unified.py,sha256=
|
111
|
+
isa_model/serving/api/routes/unified.py,sha256=r6O_X9ql2EDqiTWaWz_anPERwfzNnF9ZvSdjqht8WxE,9727
|
121
112
|
isa_model/serving/api/routes/vision.py,sha256=U9jxssQYe6igtayUW0C2fcYwqmLRIE15__X-5Ru9J4c,396
|
122
113
|
isa_model/serving/api/schemas/__init__.py,sha256=Tu_hzxoKW1ZHpww3-5ER4A2hNuDByZ0rAfrgaJ7Bs-M,275
|
123
114
|
isa_model/serving/api/schemas/common.py,sha256=HVaAS7wlvqrwC1gMZ2Cvo0vzHB053x2uOTAwUoY2vsE,696
|
@@ -141,7 +132,7 @@ isa_model/training/core/config.py,sha256=oqgKpBvtzrN6jwLIQYQ2707lH6nmjrktRiSxp9i
|
|
141
132
|
isa_model/training/core/dataset.py,sha256=XCFsnf0NUMU1dJpdvo_CAMyvXB-9_RCUEiy8TU50e20,7802
|
142
133
|
isa_model/training/core/trainer.py,sha256=h5TjqjdFr0Fsv5y4-0siy1KmOlqLfliVaUXybvuoeXU,26932
|
143
134
|
isa_model/training/core/utils.py,sha256=Nik0M2ssfNbWqP6fKO0Kfyhzr_H6Q19ioxB-qCYbn5E,8387
|
144
|
-
isa_model-0.3.
|
145
|
-
isa_model-0.3.
|
146
|
-
isa_model-0.3.
|
147
|
-
isa_model-0.3.
|
135
|
+
isa_model-0.3.8.dist-info/METADATA,sha256=rQ6RLtoPX1xoT2BFeRQZTx9gRSeCjRxuTbH3xseKbdE,12326
|
136
|
+
isa_model-0.3.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
137
|
+
isa_model-0.3.8.dist-info/top_level.txt,sha256=eHSy_Xb3kNkh2kK11mi1mZh0Wz91AQ5b8k2KFYO-rE8,10
|
138
|
+
isa_model-0.3.8.dist-info/RECORD,,
|
@@ -1,19 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Providers - Components for integrating with different model providers
|
3
|
-
|
4
|
-
File: isa_model/inference/providers/__init__.py
|
5
|
-
This module contains provider implementations for different AI model backends.
|
6
|
-
"""
|
7
|
-
|
8
|
-
from .base_provider import BaseProvider
|
9
|
-
|
10
|
-
__all__ = [
|
11
|
-
"BaseProvider",
|
12
|
-
]
|
13
|
-
|
14
|
-
# Provider implementations can be imported individually as needed
|
15
|
-
# from .triton_provider import TritonProvider
|
16
|
-
# from .ollama_provider import OllamaProvider
|
17
|
-
# from .yyds_provider import YYDSProvider
|
18
|
-
# from .openai_provider import OpenAIProvider
|
19
|
-
# from .replicate_provider import ReplicateProvider
|
@@ -1,77 +0,0 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
from typing import Dict, List, Any, Optional
|
3
|
-
import os
|
4
|
-
import logging
|
5
|
-
from pathlib import Path
|
6
|
-
import dotenv
|
7
|
-
|
8
|
-
from isa_model.inference.base import ModelType, Capability
|
9
|
-
|
10
|
-
logger = logging.getLogger(__name__)
|
11
|
-
|
12
|
-
class BaseProvider(ABC):
|
13
|
-
"""Base class for all AI providers - handles API key management"""
|
14
|
-
|
15
|
-
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
16
|
-
self.config = config or {}
|
17
|
-
self._load_environment_config()
|
18
|
-
self._validate_config()
|
19
|
-
|
20
|
-
def _load_environment_config(self):
|
21
|
-
"""Load configuration from environment variables"""
|
22
|
-
# Load .env file if it exists
|
23
|
-
project_root = Path(__file__).parent.parent.parent.parent
|
24
|
-
env_path = project_root / ".env"
|
25
|
-
|
26
|
-
if env_path.exists():
|
27
|
-
dotenv.load_dotenv(env_path)
|
28
|
-
|
29
|
-
# Subclasses should override this to load provider-specific env vars
|
30
|
-
self._load_provider_env_vars()
|
31
|
-
|
32
|
-
@abstractmethod
|
33
|
-
def _load_provider_env_vars(self):
|
34
|
-
"""Load provider-specific environment variables"""
|
35
|
-
pass
|
36
|
-
|
37
|
-
def _validate_config(self):
|
38
|
-
"""Validate that required configuration is present"""
|
39
|
-
# Subclasses should override this to validate provider-specific config
|
40
|
-
pass
|
41
|
-
|
42
|
-
def get_api_key(self) -> Optional[str]:
|
43
|
-
"""Get the API key for this provider"""
|
44
|
-
return self.config.get("api_key")
|
45
|
-
|
46
|
-
def has_valid_credentials(self) -> bool:
|
47
|
-
"""Check if provider has valid credentials"""
|
48
|
-
return bool(self.get_api_key())
|
49
|
-
|
50
|
-
@abstractmethod
|
51
|
-
def get_capabilities(self) -> Dict[ModelType, List[Capability]]:
|
52
|
-
"""Get provider capabilities by model type"""
|
53
|
-
pass
|
54
|
-
|
55
|
-
@abstractmethod
|
56
|
-
def get_models(self, model_type: ModelType) -> List[str]:
|
57
|
-
"""Get available models for given type"""
|
58
|
-
pass
|
59
|
-
|
60
|
-
def get_config(self) -> Dict[str, Any]:
|
61
|
-
"""Get provider configuration (without sensitive data)"""
|
62
|
-
# Return a copy without sensitive information
|
63
|
-
config_copy = self.config.copy()
|
64
|
-
if "api_key" in config_copy:
|
65
|
-
config_copy["api_key"] = "***" if config_copy["api_key"] else ""
|
66
|
-
if "api_token" in config_copy:
|
67
|
-
config_copy["api_token"] = "***" if config_copy["api_token"] else ""
|
68
|
-
return config_copy
|
69
|
-
|
70
|
-
def get_full_config(self) -> Dict[str, Any]:
|
71
|
-
"""Get full provider configuration (including sensitive data) - for internal use only"""
|
72
|
-
return self.config.copy()
|
73
|
-
|
74
|
-
@abstractmethod
|
75
|
-
def is_reasoning_model(self, model_name: str) -> bool:
|
76
|
-
"""Check if the model is optimized for reasoning tasks"""
|
77
|
-
pass
|
@@ -1,50 +0,0 @@
|
|
1
|
-
from isa_model.inference.providers.base_provider import BaseProvider
|
2
|
-
from isa_model.inference.base import ModelType, Capability
|
3
|
-
from typing import Dict, List, Any
|
4
|
-
import logging
|
5
|
-
|
6
|
-
logger = logging.getLogger(__name__)
|
7
|
-
|
8
|
-
class MLProvider(BaseProvider):
|
9
|
-
"""Provider for traditional ML models"""
|
10
|
-
|
11
|
-
def __init__(self, config=None):
|
12
|
-
default_config = {
|
13
|
-
"model_directory": "./models/ml",
|
14
|
-
"cache_models": True,
|
15
|
-
"max_cache_size": 5
|
16
|
-
}
|
17
|
-
|
18
|
-
merged_config = {**default_config, **(config or {})}
|
19
|
-
super().__init__(config=merged_config)
|
20
|
-
self.name = "ml"
|
21
|
-
|
22
|
-
logger.info(f"Initialized MLProvider with model directory: {self.config['model_directory']}")
|
23
|
-
|
24
|
-
def get_capabilities(self) -> Dict[ModelType, List[Capability]]:
|
25
|
-
"""Get provider capabilities"""
|
26
|
-
return {
|
27
|
-
ModelType.LLM: [], # ML models are not LLMs
|
28
|
-
ModelType.EMBEDDING: [],
|
29
|
-
ModelType.VISION: [],
|
30
|
-
"ML": [ # Custom model type for traditional ML
|
31
|
-
"CLASSIFICATION",
|
32
|
-
"REGRESSION",
|
33
|
-
"CLUSTERING",
|
34
|
-
"FEATURE_EXTRACTION"
|
35
|
-
]
|
36
|
-
}
|
37
|
-
|
38
|
-
def get_models(self, model_type: str = "ML") -> List[str]:
|
39
|
-
"""Get available ML models"""
|
40
|
-
# In practice, this would scan the model directory
|
41
|
-
return [
|
42
|
-
"fraud_detection_rf",
|
43
|
-
"customer_churn_xgb",
|
44
|
-
"price_prediction_lr",
|
45
|
-
"recommendation_kmeans"
|
46
|
-
]
|
47
|
-
|
48
|
-
def get_config(self) -> Dict[str, Any]:
|
49
|
-
"""Get provider configuration"""
|
50
|
-
return self.config
|
@@ -1,109 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Modal Provider
|
3
|
-
|
4
|
-
Provider for ISA self-hosted Modal services
|
5
|
-
No API keys needed since we deploy our own services
|
6
|
-
"""
|
7
|
-
|
8
|
-
import os
|
9
|
-
import logging
|
10
|
-
from typing import Dict, Any, Optional, List
|
11
|
-
from .base_provider import BaseProvider
|
12
|
-
from isa_model.inference.base import ModelType, Capability
|
13
|
-
|
14
|
-
logger = logging.getLogger(__name__)
|
15
|
-
|
16
|
-
class ModalProvider(BaseProvider):
|
17
|
-
"""Provider for ISA Modal services"""
|
18
|
-
|
19
|
-
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
20
|
-
super().__init__(config)
|
21
|
-
self.name = "modal"
|
22
|
-
self.base_url = "https://modal.com" # Not used directly
|
23
|
-
|
24
|
-
def _load_provider_env_vars(self):
|
25
|
-
"""Load Modal-specific environment variables"""
|
26
|
-
# Modal doesn't need API keys for deployed services
|
27
|
-
# But we can load Modal token if available
|
28
|
-
modal_token = os.getenv("MODAL_TOKEN_ID") or os.getenv("MODAL_TOKEN_SECRET")
|
29
|
-
if modal_token:
|
30
|
-
self.config["modal_token"] = modal_token
|
31
|
-
|
32
|
-
# Set default config
|
33
|
-
if "timeout" not in self.config:
|
34
|
-
self.config["timeout"] = 300
|
35
|
-
if "deployment_region" not in self.config:
|
36
|
-
self.config["deployment_region"] = "us-east-1"
|
37
|
-
if "gpu_type" not in self.config:
|
38
|
-
self.config["gpu_type"] = "T4"
|
39
|
-
|
40
|
-
def get_api_key(self) -> str:
|
41
|
-
"""Modal services don't need API keys for deployed apps"""
|
42
|
-
return "modal-deployed-service" # Placeholder
|
43
|
-
|
44
|
-
def get_base_url(self) -> str:
|
45
|
-
"""Get base URL for Modal services"""
|
46
|
-
return self.base_url
|
47
|
-
|
48
|
-
def validate_credentials(self) -> bool:
|
49
|
-
"""
|
50
|
-
Validate Modal credentials
|
51
|
-
For deployed services, we assume they're accessible
|
52
|
-
"""
|
53
|
-
try:
|
54
|
-
# Check if Modal is available
|
55
|
-
import modal
|
56
|
-
return True
|
57
|
-
except ImportError:
|
58
|
-
logger.warning("Modal package not available")
|
59
|
-
return False
|
60
|
-
|
61
|
-
def get_capabilities(self) -> Dict[ModelType, List[Capability]]:
|
62
|
-
"""Get Modal provider capabilities"""
|
63
|
-
return {
|
64
|
-
ModelType.VISION: [
|
65
|
-
Capability.OBJECT_DETECTION,
|
66
|
-
Capability.IMAGE_ANALYSIS,
|
67
|
-
Capability.UI_DETECTION,
|
68
|
-
Capability.OCR,
|
69
|
-
Capability.DOCUMENT_ANALYSIS
|
70
|
-
]
|
71
|
-
}
|
72
|
-
|
73
|
-
def get_models(self, model_type: ModelType) -> List[str]:
|
74
|
-
"""Get available models for given type"""
|
75
|
-
if model_type == ModelType.VISION:
|
76
|
-
return [
|
77
|
-
"omniparser-v2.0",
|
78
|
-
"table-transformer-detection",
|
79
|
-
"table-transformer-structure-v1.1",
|
80
|
-
"paddleocr-3.0",
|
81
|
-
"yolov8"
|
82
|
-
]
|
83
|
-
return []
|
84
|
-
|
85
|
-
def is_reasoning_model(self, model_name: str) -> bool:
|
86
|
-
"""Check if the model is optimized for reasoning tasks"""
|
87
|
-
# Vision models are not reasoning models
|
88
|
-
return False
|
89
|
-
|
90
|
-
def get_default_config(self) -> Dict[str, Any]:
|
91
|
-
"""Get default configuration for Modal services"""
|
92
|
-
return {
|
93
|
-
"timeout": 300, # 5 minutes
|
94
|
-
"max_retries": 3,
|
95
|
-
"deployment_region": "us-east-1",
|
96
|
-
"gpu_type": "T4"
|
97
|
-
}
|
98
|
-
|
99
|
-
def get_billing_info(self) -> Dict[str, Any]:
|
100
|
-
"""Get billing information for Modal services"""
|
101
|
-
return {
|
102
|
-
"provider": "modal",
|
103
|
-
"billing_model": "compute_usage",
|
104
|
-
"cost_per_hour": {
|
105
|
-
"T4": 0.60,
|
106
|
-
"A100": 4.00
|
107
|
-
},
|
108
|
-
"note": "Costs depend on actual usage time, scales to zero when not in use"
|
109
|
-
}
|