isa-model 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,10 +10,12 @@ This is the main API that handles all types of AI requests:
10
10
  """
11
11
 
12
12
  from fastapi import APIRouter, HTTPException, UploadFile, File, Form
13
+ from fastapi.responses import StreamingResponse
13
14
  from pydantic import BaseModel, Field
14
15
  from typing import Optional, Dict, Any, Union, List
15
16
  import logging
16
17
  import asyncio
18
+ import json
17
19
  from pathlib import Path
18
20
 
19
21
  from isa_model.client import ISAModelClient
@@ -28,6 +30,7 @@ class UnifiedRequest(BaseModel):
28
30
  service_type: str = Field(..., description="Service type (text, vision, audio, image, embedding)")
29
31
  model_hint: Optional[str] = Field(None, description="Optional model preference")
30
32
  provider_hint: Optional[str] = Field(None, description="Optional provider preference")
33
+ stream: Optional[bool] = Field(False, description="Enable streaming for text services")
31
34
  parameters: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Additional task parameters")
32
35
 
33
36
  class UnifiedResponse(BaseModel):
@@ -108,6 +111,75 @@ async def unified_invoke(request: UnifiedRequest) -> UnifiedResponse:
108
111
  }
109
112
  )
110
113
 
114
+ @router.post("/stream")
115
+ async def unified_stream(request: UnifiedRequest):
116
+ """
117
+ **Unified streaming endpoint for text services**
118
+
119
+ Returns Server-Sent Events (SSE) stream for real-time token generation.
120
+ Only supports text service types.
121
+ """
122
+ try:
123
+ # Validate streaming request
124
+ if request.service_type != "text":
125
+ raise HTTPException(status_code=400, detail="Streaming only supported for text services")
126
+
127
+ # Get ISA client instance (local mode)
128
+ client = get_isa_client()
129
+
130
+ async def generate_stream():
131
+ """Generator for SSE streaming"""
132
+ try:
133
+ # Use client's streaming method
134
+ stream_gen = await client.invoke(
135
+ input_data=request.input_data,
136
+ task=request.task,
137
+ service_type=request.service_type,
138
+ model_hint=request.model_hint,
139
+ provider_hint=request.provider_hint,
140
+ stream=True,
141
+ **request.parameters
142
+ )
143
+
144
+ # Stream tokens as SSE format
145
+ async for token in stream_gen:
146
+ # SSE format: "data: {json}\n\n"
147
+ token_data = {
148
+ "token": token,
149
+ "type": "token"
150
+ }
151
+ yield f"data: {json.dumps(token_data)}\n\n"
152
+
153
+ # Send completion signal
154
+ completion_data = {
155
+ "type": "completion",
156
+ "status": "finished"
157
+ }
158
+ yield f"data: {json.dumps(completion_data)}\n\n"
159
+
160
+ except Exception as e:
161
+ logger.error(f"Streaming error: {e}")
162
+ error_data = {
163
+ "type": "error",
164
+ "error": str(e)
165
+ }
166
+ yield f"data: {json.dumps(error_data)}\n\n"
167
+
168
+ # Return SSE stream response
169
+ return StreamingResponse(
170
+ generate_stream(),
171
+ media_type="text/plain",
172
+ headers={
173
+ "Cache-Control": "no-cache",
174
+ "Connection": "keep-alive",
175
+ "Content-Type": "text/plain; charset=utf-8"
176
+ }
177
+ )
178
+
179
+ except Exception as e:
180
+ logger.error(f"Streaming setup failed: {e}")
181
+ raise HTTPException(status_code=500, detail=str(e))
182
+
111
183
  @router.post("/invoke-file", response_model=UnifiedResponse)
112
184
  async def unified_invoke_file(
113
185
  task: str = Form(...),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: isa_model
3
- Version: 0.3.6
3
+ Version: 0.3.8
4
4
  Summary: Unified AI model serving framework
5
5
  Author: isA_Model Contributors
6
6
  Classifier: Development Status :: 3 - Alpha
@@ -1,5 +1,5 @@
1
1
  isa_model/__init__.py,sha256=bAbHdrDVQ-mySC_GJtgfLNI8KPcs2LfReBkIOOtpaQQ,867
2
- isa_model/client.py,sha256=5u_hqGB1hcFX8MQdVYlCLqzs4ylQFY3rq91h3iTI24c,27500
2
+ isa_model/client.py,sha256=oo2WcxElcD9WMtw6STj3vS9KwjI-DvS_ldqC1X-HRBs,35456
3
3
  isa_model/core/config.py,sha256=h9GVTEEMlaJYSCDd0W9q1KtaWTV5V5TawMsKtGuphds,15686
4
4
  isa_model/core/pricing_manager.py,sha256=b7HcZsBQ8ZSCzMepOhqN-J9kU43vhTxX5NabQS0aM70,17125
5
5
  isa_model/core/types.py,sha256=XLUs442WGNc8E0gF2M-nb6dutD_s-XCfpr2BfGBCA2M,8445
@@ -30,10 +30,11 @@ isa_model/deployment/gpu_int8_ds8/scripts/test_client.py,sha256=aCULgRYzEQj_ELUK
30
30
  isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py,sha256=XXrneTCHUeh1LNRcu-YtZQ5B4pNawlrxC-cTWmJU2A8,936
31
31
  isa_model/deployment/runtime/deployed_service.py,sha256=0Z_Hg42oXasEVvuKjwBylJPazcmJYXhS-L9uPainaIg,13400
32
32
  isa_model/deployment/services/__init__.py,sha256=JrLlmBlLb6RfiqGMzVVxKZfF5tAKliQqpon_rPoNoeA,216
33
- isa_model/deployment/services/auto_deploy_vision_service.py,sha256=Luo8FaXsEBoKjGw5HQ7veOnv9Eh0e7U0claXaGa3-1o,19624
33
+ isa_model/deployment/services/auto_deploy_vision_service.py,sha256=bZmkNG2DWvG6DdHfHvUuf8fonygic4vI_A4aogrXzvU,19670
34
34
  isa_model/deployment/services/model_service.py,sha256=_ncC--8hr5BUwzCWh59yRXPKIPVLapx_31TorB2DIr8,13492
35
35
  isa_model/deployment/services/service_monitor.py,sha256=P1zGoeqkNEJwt9AXZF2qTjfSLRm5PKUa80GJVNDSIdA,15223
36
36
  isa_model/deployment/services/service_registry.py,sha256=LQgWQOvoP0lb7mC6WTS6shEt6WuX6xc8rRmcixrKwTc,22765
37
+ isa_model/deployment/services/simple_auto_deploy_vision_service.py,sha256=rfXsv9mh_w5cXHVYxA4fBD5ppyNY4HplsH34xp4WpY8,9882
37
38
  isa_model/eval/__init__.py,sha256=CRbxC5SN7ow4ymdALSNTHawqw4f82DEdAb7twNT_Pw0,2447
38
39
  isa_model/eval/benchmarks.py,sha256=_L4Vwj2hwf2yhqoleIASO9z5e3LRCClCVEVCQbGt0I8,16885
39
40
  isa_model/eval/factory.py,sha256=bm5OVY7HIxdBgjlH1n7e5K1YO4ytv8e4KB7z_JS9HVQ,20737
@@ -46,19 +47,9 @@ isa_model/eval/evaluators/llm_evaluator.py,sha256=yfFJFdxwGV2F3mzEWjZ-0fr9u8SR3A
46
47
  isa_model/eval/infrastructure/__init__.py,sha256=fxTdtwAFtjCDOV9MJ3GbhY0A-DqKeTwr_u9WTBnGI_U,648
47
48
  isa_model/eval/infrastructure/experiment_tracker.py,sha256=yfMWIAk6oA8Lfer3AtmKg0OEZiGhczmsCD5gmp--uew,15283
48
49
  isa_model/inference/__init__.py,sha256=usfuQJ4zYY2RRtHkE-V6LuJ5aN7WJogtPUj9Qmy4Wvw,318
49
- isa_model/inference/ai_factory.py,sha256=IN-q3jNmcraZ-PWNTHyhdABoxxhIweZYcXO2fr_uXoM,16478
50
+ isa_model/inference/ai_factory.py,sha256=oGtRd4wp6IZOTyI3GVKBNN4AtlnrLS7yFZuq2wvkaUg,19784
50
51
  isa_model/inference/base.py,sha256=qwOddnSGI0GUdD6qIdGBPQpkW7UjU3Y-zaZvu70B4WA,1278
51
52
  isa_model/inference/adapter/unified_api.py,sha256=67_Ok8W20m6Otf6r9WyOEVpnxondP4UAxOASk9ozDk4,8668
52
- isa_model/inference/providers/__init__.py,sha256=a83q-LMFv8u47wf0XtxvqOw_mlVgA_90wtuwy02qdDE,581
53
- isa_model/inference/providers/base_provider.py,sha256=PT-YnGwBu-Jn_4T3iAphkAJw_mYmKVLjUID62vf2_Ow,2711
54
- isa_model/inference/providers/ml_provider.py,sha256=4oGGF7lVWQ91Qh3h7olyPFoACLxCROaMxUZlDiZrRL4,1661
55
- isa_model/inference/providers/modal_provider.py,sha256=klRYXECD5TJicodHIElsGNGMAsAWRFhvn4yfCRcqdVs,3654
56
- isa_model/inference/providers/model_cache_manager.py,sha256=dLRpx7OJweQ5LcSAkU7D0DQRfLtIhG6nGvg4W_gau80,15315
57
- isa_model/inference/providers/ollama_provider.py,sha256=IfM9XhdzfE1faguzS2-4GfhK30v5kDPecD3l4z2eB1w,3620
58
- isa_model/inference/providers/openai_provider.py,sha256=tB8FMsMivlRx0cfPJ0Yrxh1DCvuXyyjNFXrO4lMkkhA,5366
59
- isa_model/inference/providers/replicate_provider.py,sha256=0oi_BglIE6-HYgzLau9ifP8OdpAMO-QkwYk0OXRUzPk,4490
60
- isa_model/inference/providers/triton_provider.py,sha256=GKlth7cTOx6ERbsXXJ0gDNby3kVGQNULBDt098BXBSU,15258
61
- isa_model/inference/providers/yyds_provider.py,sha256=KbDsopShs11_G9oX3b2i2NgHIqkZV7HYWe9K9uZLccc,4284
62
53
  isa_model/inference/services/__init__.py,sha256=yfLz0YGl8ixk6LfTRL6cRTvZMb9F_Pv1QRgGyNc9xYM,386
63
54
  isa_model/inference/services/base_service.py,sha256=fVaSx0CogHK71UEsNJeSyM8mhqmq5_9ePbbSZVi3Al8,5085
64
55
  isa_model/inference/services/audio/base_stt_service.py,sha256=sfzAfreFdvEOBHtphoTrQSjb-gCoCOW4WCj6iIe51oU,5804
@@ -117,7 +108,7 @@ isa_model/serving/api/routes/__init__.py,sha256=RIaG9OPg0AjAIVbtMzwnqGyNU-tuQXbd
117
108
  isa_model/serving/api/routes/health.py,sha256=NwQcC_bpcaI4YZHTIKbGtg82yQ6QLdp0TwcqbEiqbWs,2208
118
109
  isa_model/serving/api/routes/llm.py,sha256=5ZVxWugff0i6VBKz_Nv5CqacMZJsPZEKyoSB6XDrW34,385
119
110
  isa_model/serving/api/routes/ui_analysis.py,sha256=-WxLaRKQNHnRh4okB85cWA4blTegpEPZtzHTsF3yeeU,6848
120
- isa_model/serving/api/routes/unified.py,sha256=rSoHPtMWkGlzFwzzoZeFlCmFGWj2r3q-5QH9VeNQQxA,7074
111
+ isa_model/serving/api/routes/unified.py,sha256=r6O_X9ql2EDqiTWaWz_anPERwfzNnF9ZvSdjqht8WxE,9727
121
112
  isa_model/serving/api/routes/vision.py,sha256=U9jxssQYe6igtayUW0C2fcYwqmLRIE15__X-5Ru9J4c,396
122
113
  isa_model/serving/api/schemas/__init__.py,sha256=Tu_hzxoKW1ZHpww3-5ER4A2hNuDByZ0rAfrgaJ7Bs-M,275
123
114
  isa_model/serving/api/schemas/common.py,sha256=HVaAS7wlvqrwC1gMZ2Cvo0vzHB053x2uOTAwUoY2vsE,696
@@ -141,7 +132,7 @@ isa_model/training/core/config.py,sha256=oqgKpBvtzrN6jwLIQYQ2707lH6nmjrktRiSxp9i
141
132
  isa_model/training/core/dataset.py,sha256=XCFsnf0NUMU1dJpdvo_CAMyvXB-9_RCUEiy8TU50e20,7802
142
133
  isa_model/training/core/trainer.py,sha256=h5TjqjdFr0Fsv5y4-0siy1KmOlqLfliVaUXybvuoeXU,26932
143
134
  isa_model/training/core/utils.py,sha256=Nik0M2ssfNbWqP6fKO0Kfyhzr_H6Q19ioxB-qCYbn5E,8387
144
- isa_model-0.3.6.dist-info/METADATA,sha256=TMGcK76gGTCDWcXfCp17JuAoWxVN4TfVv5Nu-8mN8JE,12326
145
- isa_model-0.3.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
146
- isa_model-0.3.6.dist-info/top_level.txt,sha256=eHSy_Xb3kNkh2kK11mi1mZh0Wz91AQ5b8k2KFYO-rE8,10
147
- isa_model-0.3.6.dist-info/RECORD,,
135
+ isa_model-0.3.8.dist-info/METADATA,sha256=rQ6RLtoPX1xoT2BFeRQZTx9gRSeCjRxuTbH3xseKbdE,12326
136
+ isa_model-0.3.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
137
+ isa_model-0.3.8.dist-info/top_level.txt,sha256=eHSy_Xb3kNkh2kK11mi1mZh0Wz91AQ5b8k2KFYO-rE8,10
138
+ isa_model-0.3.8.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- """
2
- Providers - Components for integrating with different model providers
3
-
4
- File: isa_model/inference/providers/__init__.py
5
- This module contains provider implementations for different AI model backends.
6
- """
7
-
8
- from .base_provider import BaseProvider
9
-
10
- __all__ = [
11
- "BaseProvider",
12
- ]
13
-
14
- # Provider implementations can be imported individually as needed
15
- # from .triton_provider import TritonProvider
16
- # from .ollama_provider import OllamaProvider
17
- # from .yyds_provider import YYDSProvider
18
- # from .openai_provider import OpenAIProvider
19
- # from .replicate_provider import ReplicateProvider
@@ -1,77 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Dict, List, Any, Optional
3
- import os
4
- import logging
5
- from pathlib import Path
6
- import dotenv
7
-
8
- from isa_model.inference.base import ModelType, Capability
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
- class BaseProvider(ABC):
13
- """Base class for all AI providers - handles API key management"""
14
-
15
- def __init__(self, config: Optional[Dict[str, Any]] = None):
16
- self.config = config or {}
17
- self._load_environment_config()
18
- self._validate_config()
19
-
20
- def _load_environment_config(self):
21
- """Load configuration from environment variables"""
22
- # Load .env file if it exists
23
- project_root = Path(__file__).parent.parent.parent.parent
24
- env_path = project_root / ".env"
25
-
26
- if env_path.exists():
27
- dotenv.load_dotenv(env_path)
28
-
29
- # Subclasses should override this to load provider-specific env vars
30
- self._load_provider_env_vars()
31
-
32
- @abstractmethod
33
- def _load_provider_env_vars(self):
34
- """Load provider-specific environment variables"""
35
- pass
36
-
37
- def _validate_config(self):
38
- """Validate that required configuration is present"""
39
- # Subclasses should override this to validate provider-specific config
40
- pass
41
-
42
- def get_api_key(self) -> Optional[str]:
43
- """Get the API key for this provider"""
44
- return self.config.get("api_key")
45
-
46
- def has_valid_credentials(self) -> bool:
47
- """Check if provider has valid credentials"""
48
- return bool(self.get_api_key())
49
-
50
- @abstractmethod
51
- def get_capabilities(self) -> Dict[ModelType, List[Capability]]:
52
- """Get provider capabilities by model type"""
53
- pass
54
-
55
- @abstractmethod
56
- def get_models(self, model_type: ModelType) -> List[str]:
57
- """Get available models for given type"""
58
- pass
59
-
60
- def get_config(self) -> Dict[str, Any]:
61
- """Get provider configuration (without sensitive data)"""
62
- # Return a copy without sensitive information
63
- config_copy = self.config.copy()
64
- if "api_key" in config_copy:
65
- config_copy["api_key"] = "***" if config_copy["api_key"] else ""
66
- if "api_token" in config_copy:
67
- config_copy["api_token"] = "***" if config_copy["api_token"] else ""
68
- return config_copy
69
-
70
- def get_full_config(self) -> Dict[str, Any]:
71
- """Get full provider configuration (including sensitive data) - for internal use only"""
72
- return self.config.copy()
73
-
74
- @abstractmethod
75
- def is_reasoning_model(self, model_name: str) -> bool:
76
- """Check if the model is optimized for reasoning tasks"""
77
- pass
@@ -1,50 +0,0 @@
1
- from isa_model.inference.providers.base_provider import BaseProvider
2
- from isa_model.inference.base import ModelType, Capability
3
- from typing import Dict, List, Any
4
- import logging
5
-
6
- logger = logging.getLogger(__name__)
7
-
8
- class MLProvider(BaseProvider):
9
- """Provider for traditional ML models"""
10
-
11
- def __init__(self, config=None):
12
- default_config = {
13
- "model_directory": "./models/ml",
14
- "cache_models": True,
15
- "max_cache_size": 5
16
- }
17
-
18
- merged_config = {**default_config, **(config or {})}
19
- super().__init__(config=merged_config)
20
- self.name = "ml"
21
-
22
- logger.info(f"Initialized MLProvider with model directory: {self.config['model_directory']}")
23
-
24
- def get_capabilities(self) -> Dict[ModelType, List[Capability]]:
25
- """Get provider capabilities"""
26
- return {
27
- ModelType.LLM: [], # ML models are not LLMs
28
- ModelType.EMBEDDING: [],
29
- ModelType.VISION: [],
30
- "ML": [ # Custom model type for traditional ML
31
- "CLASSIFICATION",
32
- "REGRESSION",
33
- "CLUSTERING",
34
- "FEATURE_EXTRACTION"
35
- ]
36
- }
37
-
38
- def get_models(self, model_type: str = "ML") -> List[str]:
39
- """Get available ML models"""
40
- # In practice, this would scan the model directory
41
- return [
42
- "fraud_detection_rf",
43
- "customer_churn_xgb",
44
- "price_prediction_lr",
45
- "recommendation_kmeans"
46
- ]
47
-
48
- def get_config(self) -> Dict[str, Any]:
49
- """Get provider configuration"""
50
- return self.config
@@ -1,109 +0,0 @@
1
- """
2
- Modal Provider
3
-
4
- Provider for ISA self-hosted Modal services
5
- No API keys needed since we deploy our own services
6
- """
7
-
8
- import os
9
- import logging
10
- from typing import Dict, Any, Optional, List
11
- from .base_provider import BaseProvider
12
- from isa_model.inference.base import ModelType, Capability
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
- class ModalProvider(BaseProvider):
17
- """Provider for ISA Modal services"""
18
-
19
- def __init__(self, config: Optional[Dict[str, Any]] = None):
20
- super().__init__(config)
21
- self.name = "modal"
22
- self.base_url = "https://modal.com" # Not used directly
23
-
24
- def _load_provider_env_vars(self):
25
- """Load Modal-specific environment variables"""
26
- # Modal doesn't need API keys for deployed services
27
- # But we can load Modal token if available
28
- modal_token = os.getenv("MODAL_TOKEN_ID") or os.getenv("MODAL_TOKEN_SECRET")
29
- if modal_token:
30
- self.config["modal_token"] = modal_token
31
-
32
- # Set default config
33
- if "timeout" not in self.config:
34
- self.config["timeout"] = 300
35
- if "deployment_region" not in self.config:
36
- self.config["deployment_region"] = "us-east-1"
37
- if "gpu_type" not in self.config:
38
- self.config["gpu_type"] = "T4"
39
-
40
- def get_api_key(self) -> str:
41
- """Modal services don't need API keys for deployed apps"""
42
- return "modal-deployed-service" # Placeholder
43
-
44
- def get_base_url(self) -> str:
45
- """Get base URL for Modal services"""
46
- return self.base_url
47
-
48
- def validate_credentials(self) -> bool:
49
- """
50
- Validate Modal credentials
51
- For deployed services, we assume they're accessible
52
- """
53
- try:
54
- # Check if Modal is available
55
- import modal
56
- return True
57
- except ImportError:
58
- logger.warning("Modal package not available")
59
- return False
60
-
61
- def get_capabilities(self) -> Dict[ModelType, List[Capability]]:
62
- """Get Modal provider capabilities"""
63
- return {
64
- ModelType.VISION: [
65
- Capability.OBJECT_DETECTION,
66
- Capability.IMAGE_ANALYSIS,
67
- Capability.UI_DETECTION,
68
- Capability.OCR,
69
- Capability.DOCUMENT_ANALYSIS
70
- ]
71
- }
72
-
73
- def get_models(self, model_type: ModelType) -> List[str]:
74
- """Get available models for given type"""
75
- if model_type == ModelType.VISION:
76
- return [
77
- "omniparser-v2.0",
78
- "table-transformer-detection",
79
- "table-transformer-structure-v1.1",
80
- "paddleocr-3.0",
81
- "yolov8"
82
- ]
83
- return []
84
-
85
- def is_reasoning_model(self, model_name: str) -> bool:
86
- """Check if the model is optimized for reasoning tasks"""
87
- # Vision models are not reasoning models
88
- return False
89
-
90
- def get_default_config(self) -> Dict[str, Any]:
91
- """Get default configuration for Modal services"""
92
- return {
93
- "timeout": 300, # 5 minutes
94
- "max_retries": 3,
95
- "deployment_region": "us-east-1",
96
- "gpu_type": "T4"
97
- }
98
-
99
- def get_billing_info(self) -> Dict[str, Any]:
100
- """Get billing information for Modal services"""
101
- return {
102
- "provider": "modal",
103
- "billing_model": "compute_usage",
104
- "cost_per_hour": {
105
- "T4": 0.60,
106
- "A100": 4.00
107
- },
108
- "note": "Costs depend on actual usage time, scales to zero when not in use"
109
- }