isa-model 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +770 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/models/model_repo.py +343 -0
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/__init__.py +9 -0
  15. isa_model/deployment/cloud/modal/__init__.py +10 -0
  16. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +766 -0
  17. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  18. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +406 -0
  19. isa_model/deployment/cloud/modal/register_models.py +321 -0
  20. isa_model/deployment/runtime/deployed_service.py +338 -0
  21. isa_model/deployment/services/__init__.py +9 -0
  22. isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
  23. isa_model/deployment/services/model_service.py +332 -0
  24. isa_model/deployment/services/service_monitor.py +356 -0
  25. isa_model/deployment/services/service_registry.py +527 -0
  26. isa_model/eval/__init__.py +80 -44
  27. isa_model/eval/config/__init__.py +10 -0
  28. isa_model/eval/config/evaluation_config.py +108 -0
  29. isa_model/eval/evaluators/__init__.py +18 -0
  30. isa_model/eval/evaluators/base_evaluator.py +503 -0
  31. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  32. isa_model/eval/factory.py +417 -709
  33. isa_model/eval/infrastructure/__init__.py +24 -0
  34. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  35. isa_model/eval/metrics.py +191 -21
  36. isa_model/inference/ai_factory.py +187 -387
  37. isa_model/inference/providers/modal_provider.py +109 -0
  38. isa_model/inference/providers/yyds_provider.py +108 -0
  39. isa_model/inference/services/__init__.py +2 -1
  40. isa_model/inference/services/audio/base_stt_service.py +65 -1
  41. isa_model/inference/services/audio/base_tts_service.py +75 -1
  42. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  43. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  44. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  45. isa_model/inference/services/base_service.py +55 -55
  46. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  47. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  48. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  49. isa_model/inference/services/helpers/stacked_config.py +148 -0
  50. isa_model/inference/services/img/__init__.py +18 -0
  51. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -35
  52. isa_model/inference/services/img/flux_professional_service.py +603 -0
  53. isa_model/inference/services/img/helpers/base_stacked_service.py +274 -0
  54. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +210 -69
  55. isa_model/inference/services/llm/__init__.py +3 -3
  56. isa_model/inference/services/llm/base_llm_service.py +519 -35
  57. isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +40 -0
  58. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  59. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  60. isa_model/inference/services/llm/ollama_llm_service.py +150 -15
  61. isa_model/inference/services/llm/openai_llm_service.py +134 -31
  62. isa_model/inference/services/llm/yyds_llm_service.py +255 -0
  63. isa_model/inference/services/vision/__init__.py +38 -4
  64. isa_model/inference/services/vision/base_vision_service.py +241 -96
  65. isa_model/inference/services/vision/disabled/isA_vision_service.py +500 -0
  66. isa_model/inference/services/vision/doc_analysis_service.py +640 -0
  67. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  68. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  69. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  70. isa_model/inference/services/vision/openai_vision_service.py +109 -170
  71. isa_model/inference/services/vision/replicate_vision_service.py +508 -0
  72. isa_model/inference/services/vision/ui_analysis_service.py +823 -0
  73. isa_model/scripts/register_models.py +370 -0
  74. isa_model/scripts/register_models_with_embeddings.py +510 -0
  75. isa_model/serving/__init__.py +19 -0
  76. isa_model/serving/api/__init__.py +10 -0
  77. isa_model/serving/api/fastapi_server.py +89 -0
  78. isa_model/serving/api/middleware/__init__.py +9 -0
  79. isa_model/serving/api/middleware/request_logger.py +88 -0
  80. isa_model/serving/api/routes/__init__.py +5 -0
  81. isa_model/serving/api/routes/health.py +82 -0
  82. isa_model/serving/api/routes/llm.py +19 -0
  83. isa_model/serving/api/routes/ui_analysis.py +223 -0
  84. isa_model/serving/api/routes/unified.py +202 -0
  85. isa_model/serving/api/routes/vision.py +19 -0
  86. isa_model/serving/api/schemas/__init__.py +17 -0
  87. isa_model/serving/api/schemas/common.py +33 -0
  88. isa_model/serving/api/schemas/ui_analysis.py +78 -0
  89. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
  90. isa_model-0.3.6.dist-info/RECORD +147 -0
  91. isa_model/core/model_manager.py +0 -208
  92. isa_model/core/model_registry.py +0 -342
  93. isa_model/inference/billing_tracker.py +0 -406
  94. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  95. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  96. isa_model-0.3.4.dist-info/RECORD +0 -91
  97. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  98. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  99. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
  100. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,406 @@
1
+ """
2
+ ISA Vision UI Service
3
+
4
+ Specialized service for UI element detection using OmniParser v2.0
5
+ Fallback to YOLOv8 for general object detection
6
+ """
7
+
8
+ import modal
9
+ import torch
10
+ import base64
11
+ import io
12
+ import numpy as np
13
+ from PIL import Image
14
+ from typing import Dict, List, Optional, Any
15
+ import time
16
+ import json
17
+ import os
18
+ import logging
19
+
20
+ # Define Modal application
21
+ app = modal.App("isa-vision-ui")
22
+
23
+ # Download UI detection models
24
+ def download_ui_models():
25
+ """Download UI detection models"""
26
+ from huggingface_hub import snapshot_download
27
+
28
+ print("📦 Downloading UI detection models...")
29
+ os.makedirs("/models", exist_ok=True)
30
+
31
+ # Download OmniParser v2.0
32
+ try:
33
+ snapshot_download(
34
+ repo_id="microsoft/OmniParser-v2.0",
35
+ local_dir="/models/omniparser-v2",
36
+ allow_patterns=["**/*.pt", "**/*.pth", "**/*.bin", "**/*.json", "**/*.safetensors"]
37
+ )
38
+ print("✅ OmniParser v2.0 downloaded")
39
+ except Exception as e:
40
+ print(f"⚠️ OmniParser v2.0 download failed: {e}")
41
+
42
+ # Download YOLOv8 (fallback)
43
+ try:
44
+ from ultralytics import YOLO
45
+ model = YOLO('yolov8n.pt')
46
+ print("✅ YOLOv8 fallback model downloaded")
47
+ except Exception as e:
48
+ print(f"⚠️ YOLOv8 download failed: {e}")
49
+
50
+ print("📦 UI models download completed")
51
+
52
+ # Define Modal container image
53
+ image = (
54
+ modal.Image.debian_slim(python_version="3.11")
55
+ .pip_install([
56
+ # Core AI libraries
57
+ "torch>=2.0.0",
58
+ "torchvision",
59
+ "transformers>=4.35.0",
60
+ "ultralytics>=8.0.43",
61
+ "huggingface_hub",
62
+ "accelerate",
63
+
64
+ # Image processing
65
+ "pillow>=10.0.1",
66
+ "opencv-python-headless",
67
+ "numpy>=1.24.3",
68
+
69
+ # HTTP libraries
70
+ "httpx>=0.26.0",
71
+ "requests",
72
+
73
+ # Utilities
74
+ "pydantic>=2.0.0",
75
+ "python-dotenv",
76
+ ])
77
+ .run_function(download_ui_models)
78
+ .env({"TRANSFORMERS_CACHE": "/models"})
79
+ )
80
+
81
+ # UI Detection Service
82
+ @app.cls(
83
+ gpu="T4",
84
+ image=image,
85
+ memory=16384, # 16GB RAM
86
+ timeout=1800, # 30 minutes
87
+ scaledown_window=60, # 1 minute idle timeout
88
+ min_containers=0, # Scale to zero to save costs
89
+ )
90
+ class UIDetectionService:
91
+ """
92
+ UI Element Detection Service
93
+
94
+ Provides fast UI element detection using OmniParser v2.0
95
+ Falls back to YOLOv8 for general object detection
96
+ """
97
+
98
+ def __init__(self):
99
+ self.models = {}
100
+ self.logger = logging.getLogger(__name__)
101
+
102
+ @modal.enter()
103
+ def load_models(self):
104
+ """Load UI detection models on container startup"""
105
+ print("🚀 Loading UI detection models...")
106
+ start_time = time.time()
107
+
108
+ # Try to load OmniParser first
109
+ try:
110
+ self._load_omniparser()
111
+ except Exception as e:
112
+ print(f"⚠️ OmniParser failed to load: {e}")
113
+ # Fall back to YOLOv8
114
+ self._load_yolo_fallback()
115
+
116
+ load_time = time.time() - start_time
117
+ print(f"✅ UI detection models loaded in {load_time:.2f}s")
118
+
119
+ def _load_omniparser(self):
120
+ """Load OmniParser model"""
121
+ # Placeholder for actual OmniParser loading
122
+ # In practice, you would load the actual OmniParser model here
123
+ print("📱 Loading OmniParser v2.0...")
124
+ self.models['ui_detector'] = "omniparser_placeholder"
125
+ print("✅ OmniParser v2.0 loaded")
126
+
127
+ def _load_yolo_fallback(self):
128
+ """Load YOLOv8 as fallback"""
129
+ from ultralytics import YOLO
130
+
131
+ print("🔄 Loading YOLOv8 fallback...")
132
+ yolo_model = YOLO('yolov8n.pt')
133
+ self.models['detector'] = yolo_model
134
+ print("✅ YOLOv8 fallback loaded")
135
+
136
+ @modal.method()
137
+ def detect_ui_elements(self, image_b64: str, detection_type: str = "ui") -> Dict[str, Any]:
138
+ """
139
+ Detect UI elements in image
140
+
141
+ Args:
142
+ image_b64: Base64 encoded image
143
+ detection_type: Type of detection ("ui" or "general")
144
+
145
+ Returns:
146
+ Detection results with UI elements
147
+ """
148
+ start_time = time.time()
149
+
150
+ try:
151
+ # Decode image
152
+ image = self._decode_image(image_b64)
153
+ image_np = np.array(image)
154
+
155
+ # Perform detection based on available models
156
+ if 'ui_detector' in self.models:
157
+ ui_elements = self._omniparser_detection(image_np)
158
+ detection_method = "omniparser"
159
+ elif 'detector' in self.models:
160
+ ui_elements = self._yolo_detection(image_np)
161
+ detection_method = "yolo_fallback"
162
+ else:
163
+ ui_elements = self._opencv_fallback(image_np)
164
+ detection_method = "opencv_fallback"
165
+
166
+ processing_time = time.time() - start_time
167
+
168
+ return {
169
+ 'success': True,
170
+ 'service': 'isa-vision-ui',
171
+ 'ui_elements': ui_elements,
172
+ 'element_count': len(ui_elements),
173
+ 'processing_time': processing_time,
174
+ 'detection_method': detection_method,
175
+ 'model_info': {
176
+ 'primary': 'OmniParser v2.0' if 'ui_detector' in self.models else 'YOLOv8',
177
+ 'gpu': 'T4',
178
+ 'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
179
+ }
180
+ }
181
+
182
+ except Exception as e:
183
+ self.logger.error(f"UI detection failed: {e}")
184
+ return {
185
+ 'success': False,
186
+ 'service': 'isa-vision-ui',
187
+ 'error': str(e),
188
+ 'processing_time': time.time() - start_time
189
+ }
190
+
191
+ def _omniparser_detection(self, image_np: np.ndarray) -> List[Dict[str, Any]]:
192
+ """OmniParser-based UI element detection"""
193
+ # Placeholder implementation
194
+ # In practice, this would use the actual OmniParser model
195
+ print("🔍 Using OmniParser for UI detection")
196
+
197
+ # Simulate UI element detection
198
+ height, width = image_np.shape[:2]
199
+ ui_elements = []
200
+
201
+ # Mock UI elements (replace with actual OmniParser inference)
202
+ mock_elements = [
203
+ {"type": "button", "confidence": 0.95, "bbox": [100, 200, 200, 250]},
204
+ {"type": "input", "confidence": 0.88, "bbox": [150, 300, 400, 340]},
205
+ {"type": "text", "confidence": 0.92, "bbox": [50, 100, 300, 130]},
206
+ ]
207
+
208
+ for i, elem in enumerate(mock_elements):
209
+ ui_elements.append({
210
+ 'id': f'ui_{i}',
211
+ 'type': elem['type'],
212
+ 'content': f"{elem['type']}_{i}",
213
+ 'center': [
214
+ (elem['bbox'][0] + elem['bbox'][2]) // 2,
215
+ (elem['bbox'][1] + elem['bbox'][3]) // 2
216
+ ],
217
+ 'bbox': elem['bbox'],
218
+ 'confidence': elem['confidence'],
219
+ 'interactable': elem['type'] in ['button', 'input', 'link']
220
+ })
221
+
222
+ return ui_elements
223
+
224
+ def _yolo_detection(self, image_np: np.ndarray) -> List[Dict[str, Any]]:
225
+ """YOLO-based object detection for UI elements"""
226
+ model = self.models['detector']
227
+ results = model(image_np, verbose=False)
228
+
229
+ ui_elements = []
230
+
231
+ if results and results[0].boxes is not None:
232
+ boxes = results[0].boxes.xyxy.cpu().numpy()
233
+ confidences = results[0].boxes.conf.cpu().numpy()
234
+
235
+ for i, (box, conf) in enumerate(zip(boxes, confidences)):
236
+ if conf > 0.3: # Confidence threshold
237
+ x1, y1, x2, y2 = map(int, box)
238
+
239
+ ui_elements.append({
240
+ 'id': f'yolo_{i}',
241
+ 'type': 'detected_object',
242
+ 'content': f'object_{i}',
243
+ 'center': [(x1+x2)//2, (y1+y2)//2],
244
+ 'bbox': [x1, y1, x2, y2],
245
+ 'confidence': float(conf),
246
+ 'interactable': True # Assume detected objects are interactable
247
+ })
248
+
249
+ return ui_elements
250
+
251
+ def _opencv_fallback(self, image_np: np.ndarray) -> List[Dict[str, Any]]:
252
+ """OpenCV-based fallback detection"""
253
+ import cv2
254
+
255
+ # Convert to grayscale
256
+ gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
257
+
258
+ # Edge detection
259
+ edges = cv2.Canny(gray, 50, 150)
260
+
261
+ # Find contours
262
+ contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
263
+
264
+ ui_elements = []
265
+ for i, contour in enumerate(contours[:10]): # Limit to 10 largest
266
+ area = cv2.contourArea(contour)
267
+ if area > 500: # Minimum area threshold
268
+ x, y, w, h = cv2.boundingRect(contour)
269
+
270
+ ui_elements.append({
271
+ 'id': f'cv_{i}',
272
+ 'type': 'contour_element',
273
+ 'content': f'contour_{i}',
274
+ 'center': [x+w//2, y+h//2],
275
+ 'bbox': [x, y, x+w, y+h],
276
+ 'confidence': 0.7,
277
+ 'interactable': True
278
+ })
279
+
280
+ return ui_elements
281
+
282
+ @modal.method()
283
+ def health_check(self) -> Dict[str, Any]:
284
+ """Health check endpoint"""
285
+ return {
286
+ 'status': 'healthy',
287
+ 'service': 'isa-vision-ui',
288
+ 'models_loaded': list(self.models.keys()),
289
+ 'timestamp': time.time(),
290
+ 'gpu': 'T4'
291
+ }
292
+
293
+ def _decode_image(self, image_b64: str) -> Image.Image:
294
+ """Decode base64 image"""
295
+ if image_b64.startswith('data:image'):
296
+ image_b64 = image_b64.split(',')[1]
297
+
298
+ image_data = base64.b64decode(image_b64)
299
+ return Image.open(io.BytesIO(image_data)).convert('RGB')
300
+
301
+ # Auto-registration function
302
+ @app.function()
303
+ async def register_service():
304
+ """Auto-register this service in the model registry"""
305
+ try:
306
+ import sys
307
+ from pathlib import Path
308
+
309
+ # Add project root to path for imports
310
+ project_root = Path(__file__).parent.parent.parent.parent
311
+ sys.path.insert(0, str(project_root))
312
+
313
+ try:
314
+ from isa_model.core.model_manager import ModelManager
315
+ from isa_model.core.model_repo import ModelType, ModelCapability
316
+ except ImportError:
317
+ # Fallback if import fails in Modal environment
318
+ print("⚠️ Could not import model manager - registration skipped")
319
+ return {"success": False, "error": "Model manager not available"}
320
+
321
+ # Use ModelManager to register this service
322
+ model_manager = ModelManager()
323
+
324
+ # Register the service in the registry
325
+ success = model_manager.registry.register_model(
326
+ model_id="omniparser-ui-detection-service",
327
+ model_type=ModelType.VISION,
328
+ capabilities=[
329
+ ModelCapability.UI_DETECTION,
330
+ ModelCapability.IMAGE_ANALYSIS,
331
+ ModelCapability.IMAGE_UNDERSTANDING
332
+ ],
333
+ metadata={
334
+ "description": "UI element detection service using OmniParser v2.0",
335
+ "service_name": "isa-vision-ui",
336
+ "service_type": "modal",
337
+ "deployment_type": "modal",
338
+ "endpoint": "https://isa-vision-ui.modal.run",
339
+ "underlying_model": "microsoft/OmniParser-v2.0",
340
+ "fallback_model": "ultralytics/yolov8",
341
+ "gpu_requirement": "T4",
342
+ "memory_mb": 16384,
343
+ "auto_registered": True,
344
+ "registered_by": "isa_vision_ui_service.py",
345
+ "is_service": True
346
+ }
347
+ )
348
+
349
+ if success:
350
+ print("✅ UI service auto-registered successfully")
351
+ else:
352
+ print("⚠️ UI service registration failed")
353
+
354
+ return {"success": success}
355
+
356
+ except Exception as e:
357
+ print(f"❌ Auto-registration error: {e}")
358
+ return {"success": False, "error": str(e)}
359
+
360
+ # Deployment script
361
+ @app.function()
362
+ def deploy_info():
363
+ """Deployment information"""
364
+ return {
365
+ "service": "ISA Vision UI Detection",
366
+ "model": "microsoft/OmniParser-v2.0 + ultralytics/yolov8 (fallback)",
367
+ "gpu_requirement": "T4",
368
+ "memory_requirement": "16GB",
369
+ "deploy_command": "modal deploy isa_vision_ui_service.py"
370
+ }
371
+
372
+ # Quick deployment function
373
+ @app.function()
374
+ def deploy_service():
375
+ """Deploy this service instantly"""
376
+ import subprocess
377
+ import os
378
+
379
+ print("🚀 Deploying ISA Vision UI Service...")
380
+ try:
381
+ # Get the current file path
382
+ current_file = __file__
383
+
384
+ # Run modal deploy command
385
+ result = subprocess.run(
386
+ ["modal", "deploy", current_file],
387
+ capture_output=True,
388
+ text=True,
389
+ check=True
390
+ )
391
+
392
+ print("✅ Deployment completed successfully!")
393
+ print(f"📝 Output: {result.stdout}")
394
+ return {"success": True, "output": result.stdout}
395
+
396
+ except subprocess.CalledProcessError as e:
397
+ print(f"❌ Deployment failed: {e}")
398
+ print(f"📝 Error: {e.stderr}")
399
+ return {"success": False, "error": str(e), "stderr": e.stderr}
400
+
401
+ if __name__ == "__main__":
402
+ print("🚀 ISA Vision UI Service - Modal Deployment")
403
+ print("Deploy with: modal deploy isa_vision_ui_service.py")
404
+ print("Or call: modal run isa_vision_ui_service.py::deploy_service")
405
+ print("Note: Uses OmniParser v2.0 with YOLOv8 fallback")
406
+ print("\n📝 Service will auto-register in model registry upon deployment")