isa-model 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
isa_model/client.py CHANGED
@@ -89,6 +89,47 @@ class ISAModelClient:
89
89
 
90
90
  logger.info("ISA Model Client initialized")
91
91
 
92
+ async def stream(
93
+ self,
94
+ input_data: Union[str, bytes, Path, Dict[str, Any]],
95
+ task: str,
96
+ service_type: str,
97
+ model_hint: Optional[str] = None,
98
+ provider_hint: Optional[str] = None,
99
+ **kwargs
100
+ ):
101
+ """
102
+ Streaming invoke method that yields tokens in real-time
103
+
104
+ Args:
105
+ input_data: Input data (text for LLM streaming)
106
+ task: Task to perform
107
+ service_type: Type of service (only "text" supports streaming)
108
+ model_hint: Optional model preference
109
+ provider_hint: Optional provider preference
110
+ **kwargs: Additional parameters
111
+
112
+ Yields:
113
+ Individual tokens as they arrive from the model
114
+
115
+ Example:
116
+ async for token in client.stream("Hello world", "chat", "text"):
117
+ print(token, end="", flush=True)
118
+ """
119
+ if service_type != "text":
120
+ raise ValueError("Streaming is only supported for text/LLM services")
121
+
122
+ try:
123
+ if self.mode == "api":
124
+ async for token in self._stream_api(input_data, task, service_type, model_hint, provider_hint, **kwargs):
125
+ yield token
126
+ else:
127
+ async for token in self._stream_local(input_data, task, service_type, model_hint, provider_hint, **kwargs):
128
+ yield token
129
+ except Exception as e:
130
+ logger.error(f"Failed to stream {task} on {service_type}: {e}")
131
+ raise
132
+
92
133
  async def invoke(
93
134
  self,
94
135
  input_data: Union[str, bytes, Path, Dict[str, Any]],
@@ -96,8 +137,9 @@ class ISAModelClient:
96
137
  service_type: str,
97
138
  model_hint: Optional[str] = None,
98
139
  provider_hint: Optional[str] = None,
140
+ stream: bool = False,
99
141
  **kwargs
100
- ) -> Dict[str, Any]:
142
+ ) -> Union[Dict[str, Any], object]:
101
143
  """
102
144
  Unified invoke method with intelligent model selection
103
145
 
@@ -107,10 +149,12 @@ class ISAModelClient:
107
149
  service_type: Type of service (vision, audio, text, image, embedding)
108
150
  model_hint: Optional model preference
109
151
  provider_hint: Optional provider preference
152
+ stream: Enable streaming for text services (returns AsyncGenerator)
110
153
  **kwargs: Additional task-specific parameters
111
154
 
112
155
  Returns:
113
- Unified response dictionary with result and metadata
156
+ If stream=False: Unified response dictionary with result and metadata
157
+ If stream=True: AsyncGenerator yielding tokens (only for text services)
114
158
 
115
159
  Examples:
116
160
  # Vision tasks
@@ -126,6 +170,10 @@ class ISAModelClient:
126
170
  await client.invoke("Translate this text", "translate", "text")
127
171
  await client.invoke("What is AI?", "chat", "text")
128
172
 
173
+ # Streaming text
174
+ async for token in await client.invoke("Hello", "chat", "text", stream=True):
175
+ print(token, end="", flush=True)
176
+
129
177
  # Image generation
130
178
  await client.invoke("A beautiful sunset", "generate_image", "image")
131
179
 
@@ -133,7 +181,31 @@ class ISAModelClient:
133
181
  await client.invoke("Text to embed", "create_embedding", "embedding")
134
182
  """
135
183
  try:
136
- # Route to appropriate mode
184
+ # Handle streaming case
185
+ if stream:
186
+ if service_type != "text":
187
+ raise ValueError("Streaming is only supported for text services")
188
+
189
+ if self.mode == "api":
190
+ return self._stream_api(
191
+ input_data=input_data,
192
+ task=task,
193
+ service_type=service_type,
194
+ model_hint=model_hint,
195
+ provider_hint=provider_hint,
196
+ **kwargs
197
+ )
198
+ else:
199
+ return self._stream_local(
200
+ input_data=input_data,
201
+ task=task,
202
+ service_type=service_type,
203
+ model_hint=model_hint,
204
+ provider_hint=provider_hint,
205
+ **kwargs
206
+ )
207
+
208
+ # Route to appropriate mode for non-streaming
137
209
  if self.mode == "api":
138
210
  return await self._invoke_api(
139
211
  input_data=input_data,
@@ -744,6 +816,101 @@ class ISAModelClient:
744
816
  logger.error(f"API binary upload failed: {e}")
745
817
  raise
746
818
 
819
+ async def _stream_local(
820
+ self,
821
+ input_data: Union[str, bytes, Path, Dict[str, Any]],
822
+ task: str,
823
+ service_type: str,
824
+ model_hint: Optional[str] = None,
825
+ provider_hint: Optional[str] = None,
826
+ **kwargs
827
+ ):
828
+ """Local streaming using AI Factory"""
829
+ # Step 1: Select best model for this task
830
+ selected_model = await self._select_model(
831
+ input_data=input_data,
832
+ task=task,
833
+ service_type=service_type,
834
+ model_hint=model_hint,
835
+ provider_hint=provider_hint
836
+ )
837
+
838
+ # Step 2: Get appropriate service
839
+ service = await self._get_service(
840
+ service_type=service_type,
841
+ model_name=selected_model["model_id"],
842
+ provider=selected_model["provider"],
843
+ task=task
844
+ )
845
+
846
+ # Step 3: Yield tokens from the stream
847
+ async for token in service.astream(input_data):
848
+ yield token
849
+
850
+ async def _stream_api(
851
+ self,
852
+ input_data: Union[str, bytes, Path, Dict[str, Any]],
853
+ task: str,
854
+ service_type: str,
855
+ model_hint: Optional[str] = None,
856
+ provider_hint: Optional[str] = None,
857
+ **kwargs
858
+ ):
859
+ """API streaming using Server-Sent Events (SSE)"""
860
+
861
+ # Only support text streaming for now
862
+ if not isinstance(input_data, (str, dict)):
863
+ raise ValueError("API streaming only supports text input")
864
+
865
+ payload = {
866
+ "input_data": input_data,
867
+ "task": task,
868
+ "service_type": service_type,
869
+ "model_hint": model_hint,
870
+ "provider_hint": provider_hint,
871
+ "stream": True,
872
+ "parameters": kwargs
873
+ }
874
+
875
+ async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
876
+ try:
877
+ async with session.post(
878
+ f"{self.api_url}/api/v1/stream",
879
+ json=payload,
880
+ headers=self.headers
881
+ ) as response:
882
+
883
+ if response.status == 200:
884
+ # Parse SSE stream
885
+ async for line in response.content:
886
+ if line:
887
+ line_str = line.decode().strip()
888
+ if line_str.startswith("data: "):
889
+ try:
890
+ # Parse SSE data
891
+ import json
892
+ json_str = line_str[6:] # Remove "data: " prefix
893
+ data = json.loads(json_str)
894
+
895
+ if data.get("type") == "token" and "token" in data:
896
+ yield data["token"]
897
+ elif data.get("type") == "completion":
898
+ # End of stream
899
+ break
900
+ elif data.get("type") == "error":
901
+ raise Exception(f"Server error: {data.get('error')}")
902
+
903
+ except json.JSONDecodeError:
904
+ # Skip malformed lines
905
+ continue
906
+ else:
907
+ error_data = await response.text()
908
+ raise Exception(f"API streaming error {response.status}: {error_data}")
909
+
910
+ except Exception as e:
911
+ logger.error(f"API streaming failed: {e}")
912
+ raise
913
+
747
914
 
748
915
  # Convenience function for quick access
749
916
  def create_client(
@@ -19,10 +19,11 @@ class AutoDeployVisionService(BaseVisionService):
19
19
  of Modal services for ISA vision tasks.
20
20
  """
21
21
 
22
- def __init__(self, provider_name: str = "modal", model_name: str = "qwen_table", **kwargs):
23
- # Use centralized architecture
24
- super().__init__(provider_name, model_name, **kwargs)
22
+ def __init__(self, model_name: str = "isa_vision_table", config: dict = None, **kwargs):
23
+ # Initialize BaseVisionService with modal provider
24
+ super().__init__("modal", model_name, **kwargs)
25
25
  self.model_name = model_name
26
+ self.config = config or {}
26
27
  self.underlying_service = None
27
28
  self._factory = None
28
29
 
@@ -0,0 +1,275 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple Auto-Deploy Vision Service Wrapper
4
+
5
+ A simplified version that avoids complex import dependencies.
6
+ """
7
+
8
+ import asyncio
9
+ import subprocess
10
+ import logging
11
+ import time
12
+ from typing import Dict, Any, Optional, Union, List, BinaryIO
13
+ from pathlib import Path
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class SimpleAutoDeployVisionService:
18
+ """
19
+ Simplified vision service wrapper that handles automatic deployment
20
+ of Modal services for ISA vision tasks without complex inheritance.
21
+ """
22
+
23
+ def __init__(self, model_name: str = "isa_vision_ui", config: dict = None):
24
+ self.model_name = model_name
25
+ self.config = config or {}
26
+ self.underlying_service = None
27
+ self._factory = None
28
+ self._modal_deployed = False
29
+
30
+ logger.info(f"Initialized SimpleAutoDeployVisionService for {model_name}")
31
+
32
+ def _get_factory(self):
33
+ """Get AIFactory instance for service management"""
34
+ if not self._factory:
35
+ from isa_model.inference.ai_factory import AIFactory
36
+ self._factory = AIFactory()
37
+ return self._factory
38
+
39
+ async def _ensure_service_deployed(self) -> bool:
40
+ """Ensure the Modal service is deployed before use"""
41
+ if self._modal_deployed:
42
+ logger.info(f"Service {self.model_name} already deployed")
43
+ return True
44
+
45
+ try:
46
+ factory = self._get_factory()
47
+
48
+ # Check if service is available
49
+ app_name = factory._get_modal_app_name(self.model_name)
50
+ if not factory._check_modal_service_availability(app_name):
51
+ logger.info(f"Deploying {self.model_name} service...")
52
+ success = factory._auto_deploy_modal_service(self.model_name)
53
+ if not success:
54
+ logger.error(f"Failed to deploy {self.model_name}")
55
+ return False
56
+
57
+ # Wait for service to be ready
58
+ logger.info(f"Waiting for {self.model_name} service to be ready...")
59
+ await self._wait_for_service_ready(app_name)
60
+
61
+ # Mark as deployed
62
+ self._modal_deployed = True
63
+
64
+ # Initialize underlying service using proper factory method
65
+ if not self.underlying_service:
66
+ # Create a simple mock service for testing
67
+ self.underlying_service = MockModalVisionService(self.model_name)
68
+
69
+ return True
70
+
71
+ except Exception as e:
72
+ logger.error(f"Failed to ensure service deployment: {e}")
73
+ return False
74
+
75
+ async def _wait_for_service_ready(self, app_name: str, max_wait_time: int = 300):
76
+ """Wait for Modal service to be ready"""
77
+ logger.info(f"Waiting up to {max_wait_time} seconds for {app_name} to be ready...")
78
+ start_time = time.time()
79
+
80
+ while time.time() - start_time < max_wait_time:
81
+ try:
82
+ # Simple wait simulation
83
+ await asyncio.sleep(5)
84
+ logger.info(f"Still waiting for {app_name}... ({int(time.time() - start_time)}s elapsed)")
85
+
86
+ # For testing, assume service is ready after 10 seconds
87
+ if time.time() - start_time > 10:
88
+ logger.info(f"Service {app_name} assumed ready for testing!")
89
+ return
90
+
91
+ except Exception as e:
92
+ logger.debug(f"Service not ready yet: {e}")
93
+
94
+ logger.warning(f"Service {app_name} may not be fully ready after {max_wait_time}s")
95
+
96
+ async def detect_ui_elements(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
97
+ """Detect UI elements with auto-deploy"""
98
+
99
+ # Ensure service is deployed
100
+ if not await self._ensure_service_deployed():
101
+ return {
102
+ 'success': False,
103
+ 'error': f'Failed to deploy {self.model_name} service',
104
+ 'service': self.model_name
105
+ }
106
+
107
+ try:
108
+ # Call the underlying service (mock for testing)
109
+ logger.info(f"Calling UI detection service for {self.model_name}")
110
+ result = await self.underlying_service.detect_ui_elements(image)
111
+
112
+ return result
113
+
114
+ except Exception as e:
115
+ logger.error(f"UI detection failed: {e}")
116
+ return {
117
+ 'success': False,
118
+ 'error': str(e),
119
+ 'service': self.model_name
120
+ }
121
+
122
+ async def analyze_image(
123
+ self,
124
+ image: Union[str, BinaryIO],
125
+ prompt: Optional[str] = None,
126
+ max_tokens: int = 1000
127
+ ) -> Dict[str, Any]:
128
+ """Analyze image with auto-deploy"""
129
+ if not await self._ensure_service_deployed():
130
+ return {
131
+ 'success': False,
132
+ 'error': f'Failed to deploy {self.model_name} service',
133
+ 'service': self.model_name
134
+ }
135
+
136
+ try:
137
+ result = await self.underlying_service.analyze_image(image, prompt, max_tokens)
138
+ return result
139
+ except Exception as e:
140
+ logger.error(f"Image analysis failed: {e}")
141
+ return {
142
+ 'success': False,
143
+ 'error': str(e),
144
+ 'service': self.model_name
145
+ }
146
+
147
+ async def invoke(
148
+ self,
149
+ image: Union[str, BinaryIO],
150
+ prompt: Optional[str] = None,
151
+ task: Optional[str] = None,
152
+ **kwargs
153
+ ) -> Dict[str, Any]:
154
+ """Unified invoke method for all vision operations"""
155
+ if not await self._ensure_service_deployed():
156
+ return {
157
+ 'success': False,
158
+ 'error': f'Failed to deploy {self.model_name} service',
159
+ 'service': self.model_name
160
+ }
161
+
162
+ try:
163
+ # Route to appropriate method based on task
164
+ if task == "detect_ui_elements" or task == "ui_detection":
165
+ return await self.detect_ui_elements(image)
166
+ elif task == "analyze" or task is None:
167
+ return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
168
+ else:
169
+ return await self.underlying_service.invoke(image, prompt, task, **kwargs)
170
+ except Exception as e:
171
+ logger.error(f"Vision invoke failed: {e}")
172
+ return {
173
+ 'success': False,
174
+ 'error': str(e),
175
+ 'service': self.model_name
176
+ }
177
+
178
+ def get_supported_formats(self) -> List[str]:
179
+ """Get list of supported image formats"""
180
+ return ['jpg', 'jpeg', 'png', 'gif', 'webp']
181
+
182
+ def get_max_image_size(self) -> Dict[str, int]:
183
+ """Get maximum supported image dimensions"""
184
+ return {"width": 2048, "height": 2048, "file_size_mb": 10}
185
+
186
+ async def close(self):
187
+ """Cleanup resources"""
188
+ if self.underlying_service:
189
+ await self.underlying_service.close()
190
+ logger.info(f"Closed {self.model_name} service")
191
+
192
+
193
+ class MockModalVisionService:
194
+ """Mock Modal vision service for testing"""
195
+
196
+ def __init__(self, model_name: str):
197
+ self.model_name = model_name
198
+ logger.info(f"Initialized mock service for {model_name}")
199
+
200
+ async def detect_ui_elements(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
201
+ """Mock UI element detection"""
202
+ await asyncio.sleep(0.1) # Simulate processing time
203
+
204
+ # Return mock UI elements based on model type
205
+ if "ui" in self.model_name:
206
+ ui_elements = [
207
+ {
208
+ 'id': 'ui_0',
209
+ 'type': 'button',
210
+ 'content': 'Search Button',
211
+ 'center': [400, 200],
212
+ 'bbox': [350, 180, 450, 220],
213
+ 'confidence': 0.95,
214
+ 'interactable': True
215
+ },
216
+ {
217
+ 'id': 'ui_1',
218
+ 'type': 'input',
219
+ 'content': 'Search Input',
220
+ 'center': [300, 150],
221
+ 'bbox': [200, 130, 400, 170],
222
+ 'confidence': 0.88,
223
+ 'interactable': True
224
+ }
225
+ ]
226
+ else:
227
+ ui_elements = []
228
+
229
+ return {
230
+ 'success': True,
231
+ 'service': self.model_name,
232
+ 'ui_elements': ui_elements,
233
+ 'element_count': len(ui_elements),
234
+ 'processing_time': 0.1,
235
+ 'detection_method': 'mock_omniparser',
236
+ 'model_info': {
237
+ 'primary': 'Mock OmniParser v2.0',
238
+ 'gpu': 'T4',
239
+ 'container_id': 'mock-container'
240
+ }
241
+ }
242
+
243
+ async def analyze_image(
244
+ self,
245
+ image: Union[str, BinaryIO],
246
+ prompt: Optional[str] = None,
247
+ max_tokens: int = 1000
248
+ ) -> Dict[str, Any]:
249
+ """Mock image analysis"""
250
+ await asyncio.sleep(0.1)
251
+
252
+ return {
253
+ 'success': True,
254
+ 'service': self.model_name,
255
+ 'text': f'Mock analysis of image with prompt: {prompt}',
256
+ 'confidence': 0.9,
257
+ 'processing_time': 0.1
258
+ }
259
+
260
+ async def invoke(
261
+ self,
262
+ image: Union[str, BinaryIO],
263
+ prompt: Optional[str] = None,
264
+ task: Optional[str] = None,
265
+ **kwargs
266
+ ) -> Dict[str, Any]:
267
+ """Mock invoke method"""
268
+ if task == "detect_ui_elements":
269
+ return await self.detect_ui_elements(image)
270
+ else:
271
+ return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
272
+
273
+ async def close(self):
274
+ """Mock cleanup"""
275
+ pass
@@ -123,9 +123,9 @@ class AIFactory:
123
123
  # Handle special ISA vision services
124
124
  if model_name in ["isa_vision_table", "isa_vision_ui", "isa_vision_doc"]:
125
125
  try:
126
- from isa_model.inference.services.vision.auto_deploy_vision_service import AutoDeployVisionService
126
+ from isa_model.deployment.services.simple_auto_deploy_vision_service import SimpleAutoDeployVisionService
127
127
  logger.info(f"Creating auto-deploy service wrapper for {model_name}")
128
- return AutoDeployVisionService(model_name, config)
128
+ return SimpleAutoDeployVisionService(model_name, config)
129
129
  except Exception as e:
130
130
  logger.error(f"Failed to create ISA vision service: {e}")
131
131
  raise
@@ -347,4 +347,84 @@ class AIFactory:
347
347
  """Get the singleton instance"""
348
348
  if cls._instance is None:
349
349
  cls._instance = cls()
350
- return cls._instance
350
+ return cls._instance
351
+
352
+ # Modal service deployment methods for AutoDeployVisionService
353
+ def _get_modal_app_name(self, model_name: str) -> str:
354
+ """Get Modal app name for a given model"""
355
+ app_mapping = {
356
+ "isa_vision_table": "qwen-vision-table",
357
+ "isa_vision_ui": "isa-vision-ui",
358
+ "isa_vision_doc": "isa-vision-doc"
359
+ }
360
+ return app_mapping.get(model_name, f"unknown-{model_name}")
361
+
362
+ def _check_modal_service_availability(self, app_name: str) -> bool:
363
+ """Check if Modal service is available and running"""
364
+ try:
365
+ import modal
366
+ # Try to lookup the app
367
+ app = modal.App.lookup(app_name)
368
+ return True
369
+ except Exception as e:
370
+ logger.debug(f"Modal service {app_name} not available: {e}")
371
+ return False
372
+
373
+ def _auto_deploy_modal_service(self, model_name: str) -> bool:
374
+ """Auto-deploy Modal service for given model"""
375
+ try:
376
+ import subprocess
377
+ import os
378
+ from pathlib import Path
379
+
380
+ # Get the Modal service file path
381
+ service_files = {
382
+ "isa_vision_table": "isa_vision_table_service.py",
383
+ "isa_vision_ui": "isa_vision_ui_service.py",
384
+ "isa_vision_doc": "isa_vision_doc_service.py"
385
+ }
386
+
387
+ if model_name not in service_files:
388
+ logger.error(f"No Modal service file found for {model_name}")
389
+ return False
390
+
391
+ # Get the service file path
392
+ service_file = service_files[model_name]
393
+ modal_dir = Path(__file__).parent.parent / "deployment" / "cloud" / "modal"
394
+ service_path = modal_dir / service_file
395
+
396
+ if not service_path.exists():
397
+ logger.error(f"Modal service file not found: {service_path}")
398
+ return False
399
+
400
+ logger.info(f"Deploying Modal service: {service_file}")
401
+
402
+ # Run modal deploy command
403
+ result = subprocess.run(
404
+ ["modal", "deploy", str(service_path)],
405
+ capture_output=True,
406
+ text=True,
407
+ timeout=600, # 10 minute timeout
408
+ cwd=str(modal_dir)
409
+ )
410
+
411
+ if result.returncode == 0:
412
+ logger.info(f"Successfully deployed {model_name} Modal service")
413
+ return True
414
+ else:
415
+ logger.error(f"Failed to deploy {model_name}: {result.stderr}")
416
+ return False
417
+
418
+ except subprocess.TimeoutExpired:
419
+ logger.error(f"Deployment timeout for {model_name}")
420
+ return False
421
+ except Exception as e:
422
+ logger.error(f"Exception during {model_name} deployment: {e}")
423
+ return False
424
+
425
+ def _shutdown_modal_service(self, model_name: str):
426
+ """Shutdown Modal service (optional - Modal handles auto-scaling)"""
427
+ # Modal services auto-scale to zero, so explicit shutdown isn't required
428
+ # This method is here for compatibility with AutoDeployVisionService
429
+ logger.info(f"Modal service {model_name} will auto-scale to zero when idle")
430
+ pass
@@ -10,10 +10,12 @@ This is the main API that handles all types of AI requests:
10
10
  """
11
11
 
12
12
  from fastapi import APIRouter, HTTPException, UploadFile, File, Form
13
+ from fastapi.responses import StreamingResponse
13
14
  from pydantic import BaseModel, Field
14
15
  from typing import Optional, Dict, Any, Union, List
15
16
  import logging
16
17
  import asyncio
18
+ import json
17
19
  from pathlib import Path
18
20
 
19
21
  from isa_model.client import ISAModelClient
@@ -28,6 +30,7 @@ class UnifiedRequest(BaseModel):
28
30
  service_type: str = Field(..., description="Service type (text, vision, audio, image, embedding)")
29
31
  model_hint: Optional[str] = Field(None, description="Optional model preference")
30
32
  provider_hint: Optional[str] = Field(None, description="Optional provider preference")
33
+ stream: Optional[bool] = Field(False, description="Enable streaming for text services")
31
34
  parameters: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Additional task parameters")
32
35
 
33
36
  class UnifiedResponse(BaseModel):
@@ -108,6 +111,75 @@ async def unified_invoke(request: UnifiedRequest) -> UnifiedResponse:
108
111
  }
109
112
  )
110
113
 
114
+ @router.post("/stream")
115
+ async def unified_stream(request: UnifiedRequest):
116
+ """
117
+ **Unified streaming endpoint for text services**
118
+
119
+ Returns Server-Sent Events (SSE) stream for real-time token generation.
120
+ Only supports text service types.
121
+ """
122
+ try:
123
+ # Validate streaming request
124
+ if request.service_type != "text":
125
+ raise HTTPException(status_code=400, detail="Streaming only supported for text services")
126
+
127
+ # Get ISA client instance (local mode)
128
+ client = get_isa_client()
129
+
130
+ async def generate_stream():
131
+ """Generator for SSE streaming"""
132
+ try:
133
+ # Use client's streaming method
134
+ stream_gen = await client.invoke(
135
+ input_data=request.input_data,
136
+ task=request.task,
137
+ service_type=request.service_type,
138
+ model_hint=request.model_hint,
139
+ provider_hint=request.provider_hint,
140
+ stream=True,
141
+ **request.parameters
142
+ )
143
+
144
+ # Stream tokens as SSE format
145
+ async for token in stream_gen:
146
+ # SSE format: "data: {json}\n\n"
147
+ token_data = {
148
+ "token": token,
149
+ "type": "token"
150
+ }
151
+ yield f"data: {json.dumps(token_data)}\n\n"
152
+
153
+ # Send completion signal
154
+ completion_data = {
155
+ "type": "completion",
156
+ "status": "finished"
157
+ }
158
+ yield f"data: {json.dumps(completion_data)}\n\n"
159
+
160
+ except Exception as e:
161
+ logger.error(f"Streaming error: {e}")
162
+ error_data = {
163
+ "type": "error",
164
+ "error": str(e)
165
+ }
166
+ yield f"data: {json.dumps(error_data)}\n\n"
167
+
168
+ # Return SSE stream response
169
+ return StreamingResponse(
170
+ generate_stream(),
171
+ media_type="text/plain",
172
+ headers={
173
+ "Cache-Control": "no-cache",
174
+ "Connection": "keep-alive",
175
+ "Content-Type": "text/plain; charset=utf-8"
176
+ }
177
+ )
178
+
179
+ except Exception as e:
180
+ logger.error(f"Streaming setup failed: {e}")
181
+ raise HTTPException(status_code=500, detail=str(e))
182
+
111
183
  @router.post("/invoke-file", response_model=UnifiedResponse)
112
184
  async def unified_invoke_file(
113
185
  task: str = Form(...),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: isa_model
3
- Version: 0.3.6
3
+ Version: 0.3.7
4
4
  Summary: Unified AI model serving framework
5
5
  Author: isA_Model Contributors
6
6
  Classifier: Development Status :: 3 - Alpha
@@ -1,5 +1,5 @@
1
1
  isa_model/__init__.py,sha256=bAbHdrDVQ-mySC_GJtgfLNI8KPcs2LfReBkIOOtpaQQ,867
2
- isa_model/client.py,sha256=5u_hqGB1hcFX8MQdVYlCLqzs4ylQFY3rq91h3iTI24c,27500
2
+ isa_model/client.py,sha256=ly4614_LIz5Csg1qG7M-eXtN8VR36ClsMWqsgz111BE,34285
3
3
  isa_model/core/config.py,sha256=h9GVTEEMlaJYSCDd0W9q1KtaWTV5V5TawMsKtGuphds,15686
4
4
  isa_model/core/pricing_manager.py,sha256=b7HcZsBQ8ZSCzMepOhqN-J9kU43vhTxX5NabQS0aM70,17125
5
5
  isa_model/core/types.py,sha256=XLUs442WGNc8E0gF2M-nb6dutD_s-XCfpr2BfGBCA2M,8445
@@ -30,10 +30,11 @@ isa_model/deployment/gpu_int8_ds8/scripts/test_client.py,sha256=aCULgRYzEQj_ELUK
30
30
  isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py,sha256=XXrneTCHUeh1LNRcu-YtZQ5B4pNawlrxC-cTWmJU2A8,936
31
31
  isa_model/deployment/runtime/deployed_service.py,sha256=0Z_Hg42oXasEVvuKjwBylJPazcmJYXhS-L9uPainaIg,13400
32
32
  isa_model/deployment/services/__init__.py,sha256=JrLlmBlLb6RfiqGMzVVxKZfF5tAKliQqpon_rPoNoeA,216
33
- isa_model/deployment/services/auto_deploy_vision_service.py,sha256=Luo8FaXsEBoKjGw5HQ7veOnv9Eh0e7U0claXaGa3-1o,19624
33
+ isa_model/deployment/services/auto_deploy_vision_service.py,sha256=bZmkNG2DWvG6DdHfHvUuf8fonygic4vI_A4aogrXzvU,19670
34
34
  isa_model/deployment/services/model_service.py,sha256=_ncC--8hr5BUwzCWh59yRXPKIPVLapx_31TorB2DIr8,13492
35
35
  isa_model/deployment/services/service_monitor.py,sha256=P1zGoeqkNEJwt9AXZF2qTjfSLRm5PKUa80GJVNDSIdA,15223
36
36
  isa_model/deployment/services/service_registry.py,sha256=LQgWQOvoP0lb7mC6WTS6shEt6WuX6xc8rRmcixrKwTc,22765
37
+ isa_model/deployment/services/simple_auto_deploy_vision_service.py,sha256=rfXsv9mh_w5cXHVYxA4fBD5ppyNY4HplsH34xp4WpY8,9882
37
38
  isa_model/eval/__init__.py,sha256=CRbxC5SN7ow4ymdALSNTHawqw4f82DEdAb7twNT_Pw0,2447
38
39
  isa_model/eval/benchmarks.py,sha256=_L4Vwj2hwf2yhqoleIASO9z5e3LRCClCVEVCQbGt0I8,16885
39
40
  isa_model/eval/factory.py,sha256=bm5OVY7HIxdBgjlH1n7e5K1YO4ytv8e4KB7z_JS9HVQ,20737
@@ -46,7 +47,7 @@ isa_model/eval/evaluators/llm_evaluator.py,sha256=yfFJFdxwGV2F3mzEWjZ-0fr9u8SR3A
46
47
  isa_model/eval/infrastructure/__init__.py,sha256=fxTdtwAFtjCDOV9MJ3GbhY0A-DqKeTwr_u9WTBnGI_U,648
47
48
  isa_model/eval/infrastructure/experiment_tracker.py,sha256=yfMWIAk6oA8Lfer3AtmKg0OEZiGhczmsCD5gmp--uew,15283
48
49
  isa_model/inference/__init__.py,sha256=usfuQJ4zYY2RRtHkE-V6LuJ5aN7WJogtPUj9Qmy4Wvw,318
49
- isa_model/inference/ai_factory.py,sha256=IN-q3jNmcraZ-PWNTHyhdABoxxhIweZYcXO2fr_uXoM,16478
50
+ isa_model/inference/ai_factory.py,sha256=oGtRd4wp6IZOTyI3GVKBNN4AtlnrLS7yFZuq2wvkaUg,19784
50
51
  isa_model/inference/base.py,sha256=qwOddnSGI0GUdD6qIdGBPQpkW7UjU3Y-zaZvu70B4WA,1278
51
52
  isa_model/inference/adapter/unified_api.py,sha256=67_Ok8W20m6Otf6r9WyOEVpnxondP4UAxOASk9ozDk4,8668
52
53
  isa_model/inference/providers/__init__.py,sha256=a83q-LMFv8u47wf0XtxvqOw_mlVgA_90wtuwy02qdDE,581
@@ -117,7 +118,7 @@ isa_model/serving/api/routes/__init__.py,sha256=RIaG9OPg0AjAIVbtMzwnqGyNU-tuQXbd
117
118
  isa_model/serving/api/routes/health.py,sha256=NwQcC_bpcaI4YZHTIKbGtg82yQ6QLdp0TwcqbEiqbWs,2208
118
119
  isa_model/serving/api/routes/llm.py,sha256=5ZVxWugff0i6VBKz_Nv5CqacMZJsPZEKyoSB6XDrW34,385
119
120
  isa_model/serving/api/routes/ui_analysis.py,sha256=-WxLaRKQNHnRh4okB85cWA4blTegpEPZtzHTsF3yeeU,6848
120
- isa_model/serving/api/routes/unified.py,sha256=rSoHPtMWkGlzFwzzoZeFlCmFGWj2r3q-5QH9VeNQQxA,7074
121
+ isa_model/serving/api/routes/unified.py,sha256=r6O_X9ql2EDqiTWaWz_anPERwfzNnF9ZvSdjqht8WxE,9727
121
122
  isa_model/serving/api/routes/vision.py,sha256=U9jxssQYe6igtayUW0C2fcYwqmLRIE15__X-5Ru9J4c,396
122
123
  isa_model/serving/api/schemas/__init__.py,sha256=Tu_hzxoKW1ZHpww3-5ER4A2hNuDByZ0rAfrgaJ7Bs-M,275
123
124
  isa_model/serving/api/schemas/common.py,sha256=HVaAS7wlvqrwC1gMZ2Cvo0vzHB053x2uOTAwUoY2vsE,696
@@ -141,7 +142,7 @@ isa_model/training/core/config.py,sha256=oqgKpBvtzrN6jwLIQYQ2707lH6nmjrktRiSxp9i
141
142
  isa_model/training/core/dataset.py,sha256=XCFsnf0NUMU1dJpdvo_CAMyvXB-9_RCUEiy8TU50e20,7802
142
143
  isa_model/training/core/trainer.py,sha256=h5TjqjdFr0Fsv5y4-0siy1KmOlqLfliVaUXybvuoeXU,26932
143
144
  isa_model/training/core/utils.py,sha256=Nik0M2ssfNbWqP6fKO0Kfyhzr_H6Q19ioxB-qCYbn5E,8387
144
- isa_model-0.3.6.dist-info/METADATA,sha256=TMGcK76gGTCDWcXfCp17JuAoWxVN4TfVv5Nu-8mN8JE,12326
145
- isa_model-0.3.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
146
- isa_model-0.3.6.dist-info/top_level.txt,sha256=eHSy_Xb3kNkh2kK11mi1mZh0Wz91AQ5b8k2KFYO-rE8,10
147
- isa_model-0.3.6.dist-info/RECORD,,
145
+ isa_model-0.3.7.dist-info/METADATA,sha256=yjnMzsQji2XQeqdlohiHFdWkXahAuXr9aQJOd6tOnQ0,12326
146
+ isa_model-0.3.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
147
+ isa_model-0.3.7.dist-info/top_level.txt,sha256=eHSy_Xb3kNkh2kK11mi1mZh0Wz91AQ5b8k2KFYO-rE8,10
148
+ isa_model-0.3.7.dist-info/RECORD,,