isa-model 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. isa_model/config/__init__.py +9 -0
  2. isa_model/config/config_manager.py +213 -0
  3. isa_model/core/model_manager.py +5 -0
  4. isa_model/core/model_registry.py +39 -6
  5. isa_model/core/storage/supabase_storage.py +344 -0
  6. isa_model/core/vision_models_init.py +116 -0
  7. isa_model/deployment/cloud/__init__.py +9 -0
  8. isa_model/deployment/cloud/modal/__init__.py +10 -0
  9. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +612 -0
  10. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +305 -0
  11. isa_model/inference/ai_factory.py +238 -14
  12. isa_model/inference/providers/modal_provider.py +109 -0
  13. isa_model/inference/providers/yyds_provider.py +108 -0
  14. isa_model/inference/services/__init__.py +2 -1
  15. isa_model/inference/services/base_service.py +0 -38
  16. isa_model/inference/services/llm/base_llm_service.py +32 -0
  17. isa_model/inference/services/llm/llm_adapter.py +40 -0
  18. isa_model/inference/services/llm/ollama_llm_service.py +104 -3
  19. isa_model/inference/services/llm/openai_llm_service.py +67 -15
  20. isa_model/inference/services/llm/yyds_llm_service.py +254 -0
  21. isa_model/inference/services/stacked/__init__.py +26 -0
  22. isa_model/inference/services/stacked/base_stacked_service.py +269 -0
  23. isa_model/inference/services/stacked/config.py +426 -0
  24. isa_model/inference/services/stacked/doc_analysis_service.py +640 -0
  25. isa_model/inference/services/stacked/flux_professional_service.py +579 -0
  26. isa_model/inference/services/stacked/ui_analysis_service.py +1319 -0
  27. isa_model/inference/services/vision/base_image_gen_service.py +0 -34
  28. isa_model/inference/services/vision/base_vision_service.py +46 -2
  29. isa_model/inference/services/vision/isA_vision_service.py +402 -0
  30. isa_model/inference/services/vision/openai_vision_service.py +151 -9
  31. isa_model/inference/services/vision/replicate_image_gen_service.py +166 -38
  32. isa_model/inference/services/vision/replicate_vision_service.py +693 -0
  33. isa_model/serving/__init__.py +19 -0
  34. isa_model/serving/api/__init__.py +10 -0
  35. isa_model/serving/api/fastapi_server.py +84 -0
  36. isa_model/serving/api/middleware/__init__.py +9 -0
  37. isa_model/serving/api/middleware/request_logger.py +88 -0
  38. isa_model/serving/api/routes/__init__.py +5 -0
  39. isa_model/serving/api/routes/health.py +82 -0
  40. isa_model/serving/api/routes/llm.py +19 -0
  41. isa_model/serving/api/routes/ui_analysis.py +223 -0
  42. isa_model/serving/api/routes/vision.py +19 -0
  43. isa_model/serving/api/schemas/__init__.py +17 -0
  44. isa_model/serving/api/schemas/common.py +33 -0
  45. isa_model/serving/api/schemas/ui_analysis.py +78 -0
  46. {isa_model-0.3.4.dist-info → isa_model-0.3.5.dist-info}/METADATA +1 -1
  47. {isa_model-0.3.4.dist-info → isa_model-0.3.5.dist-info}/RECORD +49 -17
  48. {isa_model-0.3.4.dist-info → isa_model-0.3.5.dist-info}/WHEEL +0 -0
  49. {isa_model-0.3.4.dist-info → isa_model-0.3.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,305 @@
1
+ """
2
+ ISA Vision UI Service
3
+
4
+ Specialized service for UI element detection using OmniParser v2.0
5
+ Fallback to YOLOv8 for general object detection
6
+ """
7
+
8
+ import modal
9
+ import torch
10
+ import base64
11
+ import io
12
+ import numpy as np
13
+ from PIL import Image
14
+ from typing import Dict, List, Optional, Any
15
+ import time
16
+ import json
17
+ import os
18
+ import logging
19
+
20
+ # Define Modal application
21
+ app = modal.App("isa-vision-ui")
22
+
23
+ # Download UI detection models
24
+ def download_ui_models():
25
+ """Download UI detection models"""
26
+ from huggingface_hub import snapshot_download
27
+
28
+ print("📦 Downloading UI detection models...")
29
+ os.makedirs("/models", exist_ok=True)
30
+
31
+ # Download OmniParser v2.0
32
+ try:
33
+ snapshot_download(
34
+ repo_id="microsoft/OmniParser-v2.0",
35
+ local_dir="/models/omniparser-v2",
36
+ allow_patterns=["**/*.pt", "**/*.pth", "**/*.bin", "**/*.json", "**/*.safetensors"]
37
+ )
38
+ print("✅ OmniParser v2.0 downloaded")
39
+ except Exception as e:
40
+ print(f"⚠️ OmniParser v2.0 download failed: {e}")
41
+
42
+ # Download YOLOv8 (fallback)
43
+ try:
44
+ from ultralytics import YOLO
45
+ model = YOLO('yolov8n.pt')
46
+ print("✅ YOLOv8 fallback model downloaded")
47
+ except Exception as e:
48
+ print(f"⚠️ YOLOv8 download failed: {e}")
49
+
50
+ print("📦 UI models download completed")
51
+
52
+ # Define Modal container image
53
+ image = (
54
+ modal.Image.debian_slim(python_version="3.11")
55
+ .pip_install([
56
+ # Core AI libraries
57
+ "torch>=2.0.0",
58
+ "torchvision",
59
+ "transformers>=4.35.0",
60
+ "ultralytics>=8.0.43",
61
+ "huggingface_hub",
62
+ "accelerate",
63
+
64
+ # Image processing
65
+ "pillow>=10.0.1",
66
+ "opencv-python-headless",
67
+ "numpy>=1.24.3",
68
+
69
+ # HTTP libraries
70
+ "httpx>=0.26.0",
71
+ "requests",
72
+
73
+ # Utilities
74
+ "pydantic>=2.0.0",
75
+ "python-dotenv",
76
+ ])
77
+ .run_function(download_ui_models)
78
+ .env({"TRANSFORMERS_CACHE": "/models"})
79
+ )
80
+
81
+ # UI Detection Service
82
+ @app.cls(
83
+ gpu="T4",
84
+ image=image,
85
+ memory=16384, # 16GB RAM
86
+ timeout=1800, # 30 minutes
87
+ scaledown_window=300, # 5 minutes idle timeout
88
+ min_containers=0, # Scale to zero to save costs
89
+ )
90
+ class UIDetectionService:
91
+ """
92
+ UI Element Detection Service
93
+
94
+ Provides fast UI element detection using OmniParser v2.0
95
+ Falls back to YOLOv8 for general object detection
96
+ """
97
+
98
+ def __init__(self):
99
+ self.models = {}
100
+ self.logger = logging.getLogger(__name__)
101
+
102
+ @modal.enter()
103
+ def load_models(self):
104
+ """Load UI detection models on container startup"""
105
+ print("🚀 Loading UI detection models...")
106
+ start_time = time.time()
107
+
108
+ # Try to load OmniParser first
109
+ try:
110
+ self._load_omniparser()
111
+ except Exception as e:
112
+ print(f"⚠️ OmniParser failed to load: {e}")
113
+ # Fall back to YOLOv8
114
+ self._load_yolo_fallback()
115
+
116
+ load_time = time.time() - start_time
117
+ print(f"✅ UI detection models loaded in {load_time:.2f}s")
118
+
119
+ def _load_omniparser(self):
120
+ """Load OmniParser model"""
121
+ # Placeholder for actual OmniParser loading
122
+ # In practice, you would load the actual OmniParser model here
123
+ print("📱 Loading OmniParser v2.0...")
124
+ self.models['ui_detector'] = "omniparser_placeholder"
125
+ print("✅ OmniParser v2.0 loaded")
126
+
127
+ def _load_yolo_fallback(self):
128
+ """Load YOLOv8 as fallback"""
129
+ from ultralytics import YOLO
130
+
131
+ print("🔄 Loading YOLOv8 fallback...")
132
+ yolo_model = YOLO('yolov8n.pt')
133
+ self.models['detector'] = yolo_model
134
+ print("✅ YOLOv8 fallback loaded")
135
+
136
+ @modal.method()
137
+ def detect_ui_elements(self, image_b64: str, detection_type: str = "ui") -> Dict[str, Any]:
138
+ """
139
+ Detect UI elements in image
140
+
141
+ Args:
142
+ image_b64: Base64 encoded image
143
+ detection_type: Type of detection ("ui" or "general")
144
+
145
+ Returns:
146
+ Detection results with UI elements
147
+ """
148
+ start_time = time.time()
149
+
150
+ try:
151
+ # Decode image
152
+ image = self._decode_image(image_b64)
153
+ image_np = np.array(image)
154
+
155
+ # Perform detection based on available models
156
+ if 'ui_detector' in self.models:
157
+ ui_elements = self._omniparser_detection(image_np)
158
+ detection_method = "omniparser"
159
+ elif 'detector' in self.models:
160
+ ui_elements = self._yolo_detection(image_np)
161
+ detection_method = "yolo_fallback"
162
+ else:
163
+ ui_elements = self._opencv_fallback(image_np)
164
+ detection_method = "opencv_fallback"
165
+
166
+ processing_time = time.time() - start_time
167
+
168
+ return {
169
+ 'success': True,
170
+ 'service': 'isa-vision-ui',
171
+ 'ui_elements': ui_elements,
172
+ 'element_count': len(ui_elements),
173
+ 'processing_time': processing_time,
174
+ 'detection_method': detection_method,
175
+ 'model_info': {
176
+ 'primary': 'OmniParser v2.0' if 'ui_detector' in self.models else 'YOLOv8',
177
+ 'gpu': 'T4',
178
+ 'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
179
+ }
180
+ }
181
+
182
+ except Exception as e:
183
+ self.logger.error(f"UI detection failed: {e}")
184
+ return {
185
+ 'success': False,
186
+ 'service': 'isa-vision-ui',
187
+ 'error': str(e),
188
+ 'processing_time': time.time() - start_time
189
+ }
190
+
191
+ def _omniparser_detection(self, image_np: np.ndarray) -> List[Dict[str, Any]]:
192
+ """OmniParser-based UI element detection"""
193
+ # Placeholder implementation
194
+ # In practice, this would use the actual OmniParser model
195
+ print("🔍 Using OmniParser for UI detection")
196
+
197
+ # Simulate UI element detection
198
+ height, width = image_np.shape[:2]
199
+ ui_elements = []
200
+
201
+ # Mock UI elements (replace with actual OmniParser inference)
202
+ mock_elements = [
203
+ {"type": "button", "confidence": 0.95, "bbox": [100, 200, 200, 250]},
204
+ {"type": "input", "confidence": 0.88, "bbox": [150, 300, 400, 340]},
205
+ {"type": "text", "confidence": 0.92, "bbox": [50, 100, 300, 130]},
206
+ ]
207
+
208
+ for i, elem in enumerate(mock_elements):
209
+ ui_elements.append({
210
+ 'id': f'ui_{i}',
211
+ 'type': elem['type'],
212
+ 'content': f"{elem['type']}_{i}",
213
+ 'center': [
214
+ (elem['bbox'][0] + elem['bbox'][2]) // 2,
215
+ (elem['bbox'][1] + elem['bbox'][3]) // 2
216
+ ],
217
+ 'bbox': elem['bbox'],
218
+ 'confidence': elem['confidence'],
219
+ 'interactable': elem['type'] in ['button', 'input', 'link']
220
+ })
221
+
222
+ return ui_elements
223
+
224
+ def _yolo_detection(self, image_np: np.ndarray) -> List[Dict[str, Any]]:
225
+ """YOLO-based object detection for UI elements"""
226
+ model = self.models['detector']
227
+ results = model(image_np, verbose=False)
228
+
229
+ ui_elements = []
230
+
231
+ if results and results[0].boxes is not None:
232
+ boxes = results[0].boxes.xyxy.cpu().numpy()
233
+ confidences = results[0].boxes.conf.cpu().numpy()
234
+
235
+ for i, (box, conf) in enumerate(zip(boxes, confidences)):
236
+ if conf > 0.3: # Confidence threshold
237
+ x1, y1, x2, y2 = map(int, box)
238
+
239
+ ui_elements.append({
240
+ 'id': f'yolo_{i}',
241
+ 'type': 'detected_object',
242
+ 'content': f'object_{i}',
243
+ 'center': [(x1+x2)//2, (y1+y2)//2],
244
+ 'bbox': [x1, y1, x2, y2],
245
+ 'confidence': float(conf),
246
+ 'interactable': True # Assume detected objects are interactable
247
+ })
248
+
249
+ return ui_elements
250
+
251
+ def _opencv_fallback(self, image_np: np.ndarray) -> List[Dict[str, Any]]:
252
+ """OpenCV-based fallback detection"""
253
+ import cv2
254
+
255
+ # Convert to grayscale
256
+ gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
257
+
258
+ # Edge detection
259
+ edges = cv2.Canny(gray, 50, 150)
260
+
261
+ # Find contours
262
+ contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
263
+
264
+ ui_elements = []
265
+ for i, contour in enumerate(contours[:10]): # Limit to 10 largest
266
+ area = cv2.contourArea(contour)
267
+ if area > 500: # Minimum area threshold
268
+ x, y, w, h = cv2.boundingRect(contour)
269
+
270
+ ui_elements.append({
271
+ 'id': f'cv_{i}',
272
+ 'type': 'contour_element',
273
+ 'content': f'contour_{i}',
274
+ 'center': [x+w//2, y+h//2],
275
+ 'bbox': [x, y, x+w, y+h],
276
+ 'confidence': 0.7,
277
+ 'interactable': True
278
+ })
279
+
280
+ return ui_elements
281
+
282
+ @modal.method()
283
+ def health_check(self) -> Dict[str, Any]:
284
+ """Health check endpoint"""
285
+ return {
286
+ 'status': 'healthy',
287
+ 'service': 'isa-vision-ui',
288
+ 'models_loaded': list(self.models.keys()),
289
+ 'timestamp': time.time(),
290
+ 'gpu': 'T4'
291
+ }
292
+
293
+ def _decode_image(self, image_b64: str) -> Image.Image:
294
+ """Decode base64 image"""
295
+ if image_b64.startswith('data:image'):
296
+ image_b64 = image_b64.split(',')[1]
297
+
298
+ image_data = base64.b64decode(image_b64)
299
+ return Image.open(io.BytesIO(image_data)).convert('RGB')
300
+
301
+ # Warmup function removed to save costs
302
+
303
+ if __name__ == "__main__":
304
+ print("🚀 ISA Vision UI Service - Modal Deployment")
305
+ print("Deploy with: modal deploy isa_vision_ui_service.py")
@@ -13,6 +13,7 @@ from isa_model.inference.services.base_service import BaseService
13
13
  from isa_model.inference.base import ModelType
14
14
  from isa_model.inference.services.vision.base_vision_service import BaseVisionService
15
15
  from isa_model.inference.services.vision.base_image_gen_service import BaseImageGenService
16
+ from isa_model.inference.services.stacked import UIAnalysisService, BaseStackedService, DocAnalysisStackedService, FluxProfessionalService
16
17
 
17
18
  if TYPE_CHECKING:
18
19
  from isa_model.inference.services.audio.base_stt_service import BaseSTTService
@@ -55,6 +56,12 @@ class AIFactory:
55
56
  # Register Replicate services
56
57
  self._register_replicate_services()
57
58
 
59
+ # Register ISA Modal services
60
+ self._register_isa_services()
61
+
62
+ # Register YYDS services
63
+ self._register_yyds_services()
64
+
58
65
  logger.info("AI Factory initialized with centralized provider API key management")
59
66
 
60
67
  except Exception as e:
@@ -105,10 +112,16 @@ class AIFactory:
105
112
  try:
106
113
  from isa_model.inference.providers.replicate_provider import ReplicateProvider
107
114
  from isa_model.inference.services.vision.replicate_image_gen_service import ReplicateImageGenService
115
+ from isa_model.inference.services.vision.replicate_vision_service import ReplicateVisionService
108
116
  from isa_model.inference.services.audio.replicate_tts_service import ReplicateTTSService
109
117
 
110
118
  self.register_provider('replicate', ReplicateProvider)
111
- self.register_service('replicate', ModelType.VISION, ReplicateImageGenService)
119
+ # Register vision service for general vision tasks
120
+ self.register_service('replicate', ModelType.VISION, ReplicateVisionService)
121
+ # Register image generation service for FLUX, ControlNet, LoRA, Upscaling
122
+ # Note: Using VISION type as IMAGE_GEN is not defined in ModelType
123
+ # ReplicateImageGenService will be accessed through get_img() methods
124
+ # Register audio service
112
125
  self.register_service('replicate', ModelType.AUDIO, ReplicateTTSService)
113
126
 
114
127
  logger.info("Replicate services registered successfully")
@@ -116,6 +129,34 @@ class AIFactory:
116
129
  except ImportError as e:
117
130
  logger.warning(f"Replicate services not available: {e}")
118
131
 
132
+ def _register_isa_services(self):
133
+ """Register ISA Modal provider and services"""
134
+ try:
135
+ from isa_model.inference.services.vision.isA_vision_service import ISAVisionService
136
+ from isa_model.inference.providers.modal_provider import ModalProvider
137
+
138
+ self.register_provider('modal', ModalProvider)
139
+ self.register_service('modal', ModelType.VISION, ISAVisionService)
140
+
141
+ logger.info("ISA Modal services registered successfully")
142
+
143
+ except ImportError as e:
144
+ logger.warning(f"ISA Modal services not available: {e}")
145
+
146
+ def _register_yyds_services(self):
147
+ """Register YYDS provider and services"""
148
+ try:
149
+ from isa_model.inference.providers.yyds_provider import YydsProvider
150
+ from isa_model.inference.services.llm.yyds_llm_service import YydsLLMService
151
+
152
+ self.register_provider('yyds', YydsProvider)
153
+ self.register_service('yyds', ModelType.LLM, YydsLLMService)
154
+
155
+ logger.info("YYDS services registered successfully")
156
+
157
+ except ImportError as e:
158
+ logger.warning(f"YYDS services not available: {e}")
159
+
119
160
  def register_provider(self, name: str, provider_class: Type[BaseProvider]) -> None:
120
161
  """Register an AI provider"""
121
162
  self._providers[name] = provider_class
@@ -164,7 +205,7 @@ class AIFactory:
164
205
  Get a LLM service instance with automatic defaults
165
206
 
166
207
  Args:
167
- model_name: Name of the model to use (defaults: OpenAI="gpt-4.1-nano", Ollama="llama3.2:3b")
208
+ model_name: Name of the model to use (defaults: OpenAI="gpt-4.1-nano", Ollama="llama3.2:3b", YYDS="claude-sonnet-4-20250514")
168
209
  provider: Provider name (defaults to 'openai' for production, 'ollama' for dev)
169
210
  config: Optional configuration dictionary (auto-loads from .env if not provided)
170
211
  Can include: streaming=True/False, temperature, max_tokens, etc.
@@ -179,6 +220,9 @@ class AIFactory:
179
220
  elif provider == "ollama":
180
221
  final_model_name = model_name or "llama3.2:3b-instruct-fp16"
181
222
  final_provider = provider
223
+ elif provider == "yyds":
224
+ final_model_name = model_name or "claude-sonnet-4-20250514"
225
+ final_provider = provider
182
226
  else:
183
227
  # Default provider selection - OpenAI with cheapest model
184
228
  final_provider = provider or "openai"
@@ -249,27 +293,70 @@ class AIFactory:
249
293
 
250
294
  return cast(BaseVisionService, self.create_service(final_provider, ModelType.VISION, final_model_name, config))
251
295
 
252
- def get_image_generation_service(self, model_name: Optional[str] = None, provider: Optional[str] = None,
253
- config: Optional[Dict[str, Any]] = None) -> 'BaseImageGenService':
296
+ def get_image_gen(self, model_name: Optional[str] = None, provider: Optional[str] = None,
297
+ config: Optional[Dict[str, Any]] = None) -> 'BaseImageGenService':
254
298
  """
255
299
  Get an image generation service instance with automatic defaults
256
300
 
257
301
  Args:
258
- model_name: Name of the model to use (defaults: "black-forest-labs/flux-schnell" for production)
302
+ model_name: Name of the model to use. Supports:
303
+ - FLUX models: "flux-pro", "flux-schnell", "flux-dev"
304
+ - ControlNet: "flux-controlnet", "xlabs-ai/flux-dev-controlnet"
305
+ - LoRA: "flux-lora", "flux-dev-lora"
306
+ - InstantID: "instant-id", "zsxkib/instant-id"
307
+ - Character: "consistent-character", "fofr/consistent-character"
308
+ - Upscaling: "ultimate-upscaler", "ultimate-sd-upscale"
309
+ - Detail: "adetailer"
259
310
  provider: Provider name (defaults to 'replicate')
260
- config: Optional configuration dictionary (auto-loads from .env if not provided)
311
+ config: Optional configuration dictionary
261
312
 
262
313
  Returns:
263
- Image generation service instance
314
+ Image generation service instance with FLUX, ControlNet, LoRA, InstantID, Upscaling support
264
315
  """
265
316
  # Set defaults based on provider
266
317
  final_provider = provider or "replicate"
267
- if final_provider == "replicate":
268
- final_model_name = model_name or "black-forest-labs/flux-schnell"
318
+
319
+ # Default model selection
320
+ if not model_name:
321
+ final_model_name = "black-forest-labs/flux-schnell"
269
322
  else:
270
- final_model_name = model_name or "black-forest-labs/flux-schnell"
323
+ # Map short names to full Replicate model names
324
+ model_mapping = {
325
+ "flux-pro": "black-forest-labs/flux-pro",
326
+ "flux-schnell": "black-forest-labs/flux-schnell",
327
+ "flux-dev": "black-forest-labs/flux-dev",
328
+ "flux-controlnet": "xlabs-ai/flux-dev-controlnet",
329
+ "flux-lora": "xlabs-ai/flux-lora",
330
+ "instant-id": "zsxkib/instant-id",
331
+ "consistent-character": "fofr/consistent-character",
332
+ "ultimate-upscaler": "philz1337x/clarity-upscaler",
333
+ "ultimate-sd-upscale": "philz1337x/clarity-upscaler",
334
+ "adetailer": "sczhou/codeformer"
335
+ }
336
+ final_model_name = model_mapping.get(model_name, model_name)
271
337
 
272
- return cast('BaseImageGenService', self.create_service(final_provider, ModelType.VISION, final_model_name, config))
338
+ # Create ReplicateImageGenService directly for image generation
339
+ try:
340
+ from isa_model.inference.services.vision.replicate_image_gen_service import ReplicateImageGenService
341
+ from isa_model.inference.providers.replicate_provider import ReplicateProvider
342
+
343
+ # Create provider with config
344
+ provider_instance = ReplicateProvider(config=config)
345
+ service = ReplicateImageGenService(provider=provider_instance, model_name=final_model_name)
346
+
347
+ return service
348
+
349
+ except ImportError as e:
350
+ logger.error(f"Failed to import ReplicateImageGenService: {e}")
351
+ raise ValueError(f"Image generation service not available: {e}")
352
+ except Exception as e:
353
+ logger.error(f"Failed to create image generation service: {e}")
354
+ raise
355
+
356
+ def get_image_generation_service(self, model_name: Optional[str] = None, provider: Optional[str] = None,
357
+ config: Optional[Dict[str, Any]] = None) -> 'BaseImageGenService':
358
+ """Alias for get_image_gen() method"""
359
+ return self.get_image_gen(model_name, provider, config)
273
360
 
274
361
  def get_img(self, type: str = "t2i", model_name: Optional[str] = None, provider: Optional[str] = None,
275
362
  config: Optional[Dict[str, Any]] = None) -> 'BaseImageGenService':
@@ -310,7 +397,8 @@ class AIFactory:
310
397
  else:
311
398
  raise ValueError(f"Unknown image generation type: {type}. Use 't2i' or 'i2i'")
312
399
 
313
- return cast('BaseImageGenService', self.create_service(final_provider, ModelType.VISION, final_model_name, config))
400
+ # Use the new get_image_gen method
401
+ return self.get_image_gen(final_model_name, final_provider, config)
314
402
 
315
403
  def get_audio_service(self, model_name: Optional[str] = None, provider: Optional[str] = None,
316
404
  config: Optional[Dict[str, Any]] = None) -> BaseService:
@@ -436,12 +524,13 @@ class AIFactory:
436
524
  Usage:
437
525
  llm = AIFactory().get_llm() # Uses gpt-4.1-nano by default
438
526
  llm = AIFactory().get_llm(model_name="llama3.2", provider="ollama")
527
+ llm = AIFactory().get_llm(provider="yyds") # Uses claude-sonnet-4-20250514 by default
439
528
  llm = AIFactory().get_llm(model_name="gpt-4.1-mini", provider="openai", config={"streaming": True})
440
529
  """
441
530
  return self.get_llm_service(model_name, provider, config)
442
531
 
443
532
  def get_embed(self, model_name: Optional[str] = None, provider: Optional[str] = None,
444
- config: Optional[Dict[str, Any]] = None) -> 'BaseEmbedService':
533
+ config: Optional[Dict[str, Any]] = None) -> BaseService:
445
534
  """
446
535
  Get embedding service with automatic defaults
447
536
 
@@ -547,4 +636,139 @@ class AIFactory:
547
636
  model_name=model_name,
548
637
  provider=provider,
549
638
  config=config
550
- )
639
+ )
640
+
641
+ def get_provider(self, provider_name: str, config: Optional[Dict[str, Any]] = None) -> BaseProvider:
642
+ """
643
+ Get a provider instance
644
+
645
+ Args:
646
+ provider_name: Name of the provider ('openai', 'ollama', 'replicate')
647
+ config: Optional configuration override
648
+
649
+ Returns:
650
+ Provider instance
651
+ """
652
+ if provider_name not in self._providers:
653
+ raise ValueError(f"No provider registered for '{provider_name}'")
654
+
655
+ provider_class = self._providers[provider_name]
656
+ return provider_class(config=config)
657
+
658
+ def get_stacked(
659
+ self,
660
+ service_name: str,
661
+ config: Optional[Dict[str, Any]] = None
662
+ ) -> BaseStackedService:
663
+ """
664
+ Get a stacked service by name with automatic defaults
665
+
666
+ Args:
667
+ service_name: Name of the stacked service ('ui_analysis', etc.)
668
+ config: Optional configuration override
669
+
670
+ Returns:
671
+ Stacked service instance
672
+
673
+ Usage:
674
+ ui_service = AIFactory().get_stacked("ui_analysis", {"task_type": "search"})
675
+ """
676
+ if service_name == "ui_analysis":
677
+ return UIAnalysisService(self, config)
678
+ elif service_name == "search_analysis":
679
+ if config is None:
680
+ config = {}
681
+ config["task_type"] = "search"
682
+ return UIAnalysisService(self, config)
683
+ elif service_name == "content_analysis":
684
+ if config is None:
685
+ config = {}
686
+ config["task_type"] = "content"
687
+ return UIAnalysisService(self, config)
688
+ elif service_name == "navigation_analysis":
689
+ if config is None:
690
+ config = {}
691
+ config["task_type"] = "navigation"
692
+ return UIAnalysisService(self, config)
693
+ elif service_name == "doc_analysis":
694
+ return DocAnalysisStackedService(self, config)
695
+ elif service_name == "flux_professional":
696
+ return FluxProfessionalService(self)
697
+ else:
698
+ raise ValueError(f"Unknown stacked service: {service_name}. Available: ui_analysis, search_analysis, content_analysis, navigation_analysis, doc_analysis, flux_professional")
699
+
700
+ def get_ui_analysis(
701
+ self,
702
+ task_type: str = "login",
703
+ config: Optional[Dict[str, Any]] = None
704
+ ) -> UIAnalysisService:
705
+ """
706
+ Get UI Analysis service with task-specific configuration
707
+
708
+ Args:
709
+ task_type: Type of UI task ('login', 'search', 'content', 'navigation')
710
+ config: Optional configuration override
711
+
712
+ Usage:
713
+ # For login pages (default)
714
+ ui_service = AIFactory().get_ui_analysis()
715
+
716
+ # For search pages
717
+ ui_service = AIFactory().get_ui_analysis(task_type="search")
718
+
719
+ # For content extraction
720
+ ui_service = AIFactory().get_ui_analysis(task_type="content")
721
+ """
722
+ if config is None:
723
+ config = {}
724
+ config["task_type"] = task_type
725
+ return cast(UIAnalysisService, self.get_stacked("ui_analysis", config))
726
+
727
+ def get_doc_analysis(
728
+ self,
729
+ config: Optional[Dict[str, Any]] = None
730
+ ) -> DocAnalysisStackedService:
731
+ """
732
+ Get Document Analysis service with 5-step pipeline
733
+
734
+ Args:
735
+ config: Optional configuration override
736
+
737
+ Usage:
738
+ # Basic document analysis
739
+ doc_service = AIFactory().get_doc_analysis()
740
+
741
+ # Analyze a document image
742
+ result = await doc_service.analyze_document("document.png")
743
+
744
+ # Get structured data ready for business mapping
745
+ structured_data = result["final_output"]["final_structured_data"]
746
+ """
747
+ return cast(DocAnalysisStackedService, self.get_stacked("doc_analysis", config))
748
+
749
+ def get_flux_professional(
750
+ self,
751
+ config: Optional[Dict[str, Any]] = None
752
+ ) -> FluxProfessionalService:
753
+ """
754
+ Get FLUX Professional Pipeline service for multi-stage image generation
755
+
756
+ Args:
757
+ config: Optional configuration override
758
+
759
+ Usage:
760
+ # Basic professional image generation
761
+ flux_service = AIFactory().get_flux_professional()
762
+
763
+ # Generate professional image with character consistency
764
+ result = await flux_service.invoke({
765
+ "prompt": "portrait of a warrior in fantasy armor",
766
+ "face_image": "reference_face.jpg", # For character consistency
767
+ "lora_style": "realism",
768
+ "upscale_factor": 4
769
+ })
770
+
771
+ # Get final high-quality image
772
+ final_image_url = result["final_output"]["image_url"]
773
+ """
774
+ return cast(FluxProfessionalService, self.get_stacked("flux_professional", config))