isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +937 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
  15. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  16. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
  17. isa_model/deployment/cloud/modal/register_models.py +321 -0
  18. isa_model/deployment/runtime/deployed_service.py +338 -0
  19. isa_model/deployment/services/__init__.py +9 -0
  20. isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
  21. isa_model/deployment/services/model_service.py +332 -0
  22. isa_model/deployment/services/service_monitor.py +356 -0
  23. isa_model/deployment/services/service_registry.py +527 -0
  24. isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
  25. isa_model/eval/__init__.py +80 -44
  26. isa_model/eval/config/__init__.py +10 -0
  27. isa_model/eval/config/evaluation_config.py +108 -0
  28. isa_model/eval/evaluators/__init__.py +18 -0
  29. isa_model/eval/evaluators/base_evaluator.py +503 -0
  30. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  31. isa_model/eval/factory.py +417 -709
  32. isa_model/eval/infrastructure/__init__.py +24 -0
  33. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  34. isa_model/eval/metrics.py +191 -21
  35. isa_model/inference/ai_factory.py +257 -601
  36. isa_model/inference/services/audio/base_stt_service.py +65 -1
  37. isa_model/inference/services/audio/base_tts_service.py +75 -1
  38. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  39. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  40. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  41. isa_model/inference/services/base_service.py +55 -17
  42. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  43. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  44. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  45. isa_model/inference/services/helpers/stacked_config.py +148 -0
  46. isa_model/inference/services/img/__init__.py +18 -0
  47. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
  48. isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
  49. isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
  50. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
  51. isa_model/inference/services/llm/__init__.py +3 -3
  52. isa_model/inference/services/llm/base_llm_service.py +492 -40
  53. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  54. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  55. isa_model/inference/services/llm/ollama_llm_service.py +51 -17
  56. isa_model/inference/services/llm/openai_llm_service.py +70 -19
  57. isa_model/inference/services/llm/yyds_llm_service.py +24 -23
  58. isa_model/inference/services/vision/__init__.py +38 -4
  59. isa_model/inference/services/vision/base_vision_service.py +218 -117
  60. isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
  61. isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
  62. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  63. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  64. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  65. isa_model/inference/services/vision/openai_vision_service.py +104 -307
  66. isa_model/inference/services/vision/replicate_vision_service.py +140 -325
  67. isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
  68. isa_model/scripts/register_models.py +370 -0
  69. isa_model/scripts/register_models_with_embeddings.py +510 -0
  70. isa_model/serving/api/fastapi_server.py +6 -1
  71. isa_model/serving/api/routes/unified.py +274 -0
  72. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
  73. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
  74. isa_model/config/__init__.py +0 -9
  75. isa_model/config/config_manager.py +0 -213
  76. isa_model/core/model_manager.py +0 -213
  77. isa_model/core/model_registry.py +0 -375
  78. isa_model/core/vision_models_init.py +0 -116
  79. isa_model/inference/billing_tracker.py +0 -406
  80. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  81. isa_model/inference/services/stacked/__init__.py +0 -26
  82. isa_model/inference/services/stacked/config.py +0 -426
  83. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  84. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  85. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  86. /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
  87. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
  88. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
@@ -3,28 +3,27 @@
3
3
 
4
4
  """
5
5
  Simplified AI Factory for creating inference services
6
- Uses the new service architecture with proper base classes and centralized API key management
6
+ Uses the new unified service architecture with centralized managers
7
7
  """
8
8
 
9
- from typing import Dict, Type, Any, Optional, Tuple, List, TYPE_CHECKING, cast
9
+ from typing import Dict, Any, Optional, TYPE_CHECKING
10
10
  import logging
11
- from isa_model.inference.providers.base_provider import BaseProvider
12
11
  from isa_model.inference.services.base_service import BaseService
13
- from isa_model.inference.base import ModelType
14
- from isa_model.inference.services.vision.base_vision_service import BaseVisionService
15
- from isa_model.inference.services.vision.base_image_gen_service import BaseImageGenService
16
- from isa_model.inference.services.stacked import UIAnalysisService, BaseStackedService, DocAnalysisStackedService, FluxProfessionalService
12
+ from isa_model.core.models.model_manager import ModelManager
13
+ from isa_model.core.config import ConfigManager
17
14
 
18
15
  if TYPE_CHECKING:
19
16
  from isa_model.inference.services.audio.base_stt_service import BaseSTTService
20
17
  from isa_model.inference.services.audio.base_tts_service import BaseTTSService
18
+ from isa_model.inference.services.vision.base_vision_service import BaseVisionService
19
+ from isa_model.inference.services.img.base_image_gen_service import BaseImageGenService
21
20
 
22
21
  logger = logging.getLogger(__name__)
23
22
 
24
23
  class AIFactory:
25
24
  """
26
- Simplified Factory for creating AI services with proper inheritance hierarchy
27
- API key management is handled by individual providers
25
+ Modernized AI Factory using centralized ModelManager and ConfigManager
26
+ Provides unified interface with only 6 core methods: get_llm, get_vision, get_img, get_stt, get_tts, get_embed
28
27
  """
29
28
 
30
29
  _instance = None
@@ -38,184 +37,31 @@ class AIFactory:
38
37
  def __init__(self):
39
38
  """Initialize the AI Factory."""
40
39
  if not self._is_initialized:
41
- self._providers: Dict[str, Type[BaseProvider]] = {}
42
- self._services: Dict[Tuple[str, ModelType], Type[BaseService]] = {}
40
+ # Use centralized managers
41
+ self.model_manager = ModelManager()
42
+ self.config_manager = ConfigManager()
43
43
  self._cached_services: Dict[str, BaseService] = {}
44
- self._initialize_services()
45
- AIFactory._is_initialized = True
46
-
47
- def _initialize_services(self):
48
- """Initialize available providers and services"""
49
- try:
50
- # Register Ollama services
51
- self._register_ollama_services()
52
-
53
- # Register OpenAI services
54
- self._register_openai_services()
55
-
56
- # Register Replicate services
57
- self._register_replicate_services()
58
-
59
- # Register ISA Modal services
60
- self._register_isa_services()
61
-
62
- # Register YYDS services
63
- self._register_yyds_services()
64
-
65
- logger.info("AI Factory initialized with centralized provider API key management")
66
-
67
- except Exception as e:
68
- logger.error(f"Error initializing services: {e}")
69
- logger.warning("Some services may not be available")
70
-
71
- def _register_ollama_services(self):
72
- """Register Ollama provider and services"""
73
- try:
74
- from isa_model.inference.providers.ollama_provider import OllamaProvider
75
- from isa_model.inference.services.llm.ollama_llm_service import OllamaLLMService
76
- from isa_model.inference.services.embedding.ollama_embed_service import OllamaEmbedService
77
- from isa_model.inference.services.vision.ollama_vision_service import OllamaVisionService
78
-
79
- self.register_provider('ollama', OllamaProvider)
80
- self.register_service('ollama', ModelType.LLM, OllamaLLMService)
81
- self.register_service('ollama', ModelType.EMBEDDING, OllamaEmbedService)
82
- self.register_service('ollama', ModelType.VISION, OllamaVisionService)
83
-
84
- logger.info("Ollama services registered successfully")
85
-
86
- except ImportError as e:
87
- logger.warning(f"Ollama services not available: {e}")
88
-
89
- def _register_openai_services(self):
90
- """Register OpenAI provider and services"""
91
- try:
92
- from isa_model.inference.providers.openai_provider import OpenAIProvider
93
- from isa_model.inference.services.llm.openai_llm_service import OpenAILLMService
94
- from isa_model.inference.services.audio.openai_tts_service import OpenAITTSService
95
- from isa_model.inference.services.audio.openai_stt_service import OpenAISTTService
96
- from isa_model.inference.services.embedding.openai_embed_service import OpenAIEmbedService
97
- from isa_model.inference.services.vision.openai_vision_service import OpenAIVisionService
98
-
99
- self.register_provider('openai', OpenAIProvider)
100
- self.register_service('openai', ModelType.LLM, OpenAILLMService)
101
- self.register_service('openai', ModelType.AUDIO, OpenAITTSService)
102
- self.register_service('openai', ModelType.EMBEDDING, OpenAIEmbedService)
103
- self.register_service('openai', ModelType.VISION, OpenAIVisionService)
104
-
105
- logger.info("OpenAI services registered successfully")
106
-
107
- except ImportError as e:
108
- logger.warning(f"OpenAI services not available: {e}")
109
-
110
- def _register_replicate_services(self):
111
- """Register Replicate provider and services"""
112
- try:
113
- from isa_model.inference.providers.replicate_provider import ReplicateProvider
114
- from isa_model.inference.services.vision.replicate_image_gen_service import ReplicateImageGenService
115
- from isa_model.inference.services.vision.replicate_vision_service import ReplicateVisionService
116
- from isa_model.inference.services.audio.replicate_tts_service import ReplicateTTSService
117
-
118
- self.register_provider('replicate', ReplicateProvider)
119
- # Register vision service for general vision tasks
120
- self.register_service('replicate', ModelType.VISION, ReplicateVisionService)
121
- # Register image generation service for FLUX, ControlNet, LoRA, Upscaling
122
- # Note: Using VISION type as IMAGE_GEN is not defined in ModelType
123
- # ReplicateImageGenService will be accessed through get_img() methods
124
- # Register audio service
125
- self.register_service('replicate', ModelType.AUDIO, ReplicateTTSService)
126
-
127
- logger.info("Replicate services registered successfully")
128
-
129
- except ImportError as e:
130
- logger.warning(f"Replicate services not available: {e}")
131
-
132
- def _register_isa_services(self):
133
- """Register ISA Modal provider and services"""
134
- try:
135
- from isa_model.inference.services.vision.isA_vision_service import ISAVisionService
136
- from isa_model.inference.providers.modal_provider import ModalProvider
137
-
138
- self.register_provider('modal', ModalProvider)
139
- self.register_service('modal', ModelType.VISION, ISAVisionService)
140
-
141
- logger.info("ISA Modal services registered successfully")
142
-
143
- except ImportError as e:
144
- logger.warning(f"ISA Modal services not available: {e}")
145
-
146
- def _register_yyds_services(self):
147
- """Register YYDS provider and services"""
148
- try:
149
- from isa_model.inference.providers.yyds_provider import YydsProvider
150
- from isa_model.inference.services.llm.yyds_llm_service import YydsLLMService
151
-
152
- self.register_provider('yyds', YydsProvider)
153
- self.register_service('yyds', ModelType.LLM, YydsLLMService)
154
-
155
- logger.info("YYDS services registered successfully")
156
-
157
- except ImportError as e:
158
- logger.warning(f"YYDS services not available: {e}")
159
-
160
- def register_provider(self, name: str, provider_class: Type[BaseProvider]) -> None:
161
- """Register an AI provider"""
162
- self._providers[name] = provider_class
163
-
164
- def register_service(self, provider_name: str, model_type: ModelType,
165
- service_class: Type[BaseService]) -> None:
166
- """Register a service type with its provider"""
167
- self._services[(provider_name, model_type)] = service_class
168
-
169
- def create_service(self, provider_name: str, model_type: ModelType,
170
- model_name: str, config: Optional[Dict[str, Any]] = None) -> BaseService:
171
- """Create a service instance with provider-managed configuration"""
172
- try:
173
- cache_key = f"{provider_name}_{model_type}_{model_name}"
174
-
175
- if cache_key in self._cached_services:
176
- return self._cached_services[cache_key]
177
-
178
- # Get provider and service classes
179
- provider_class = self._providers.get(provider_name)
180
- service_class = self._services.get((provider_name, model_type))
181
-
182
- if not provider_class:
183
- raise ValueError(f"No provider registered for '{provider_name}'")
184
44
 
185
- if not service_class:
186
- raise ValueError(
187
- f"No service registered for provider '{provider_name}' and model type '{model_type}'"
188
- )
189
-
190
- # Create provider with user config (provider handles .env loading)
191
- provider = provider_class(config=config)
192
- service = service_class(provider=provider, model_name=model_name)
193
-
194
- self._cached_services[cache_key] = service
195
- return service
196
-
197
- except Exception as e:
198
- logger.error(f"Error creating service: {e}")
199
- raise
45
+ logger.info("AI Factory initialized with centralized ModelManager and ConfigManager")
46
+ AIFactory._is_initialized = True
200
47
 
201
- # Convenient methods for common services with updated defaults
202
- def get_llm_service(self, model_name: Optional[str] = None, provider: Optional[str] = None,
203
- config: Optional[Dict[str, Any]] = None) -> BaseService:
48
+ # Core service methods using centralized architecture
49
+ def get_llm(self, model_name: Optional[str] = None, provider: Optional[str] = None,
50
+ config: Optional[Dict[str, Any]] = None) -> BaseService:
204
51
  """
205
52
  Get a LLM service instance with automatic defaults
206
53
 
207
54
  Args:
208
- model_name: Name of the model to use (defaults: OpenAI="gpt-4.1-nano", Ollama="llama3.2:3b", YYDS="claude-sonnet-4-20250514")
55
+ model_name: Name of the model to use (defaults: OpenAI="gpt-4.1-mini", Ollama="llama3.2:3b", YYDS="claude-sonnet-4-20250514")
209
56
  provider: Provider name (defaults to 'openai' for production, 'ollama' for dev)
210
- config: Optional configuration dictionary (auto-loads from .env if not provided)
211
- Can include: streaming=True/False, temperature, max_tokens, etc.
57
+ config: Optional configuration dictionary
212
58
 
213
59
  Returns:
214
60
  LLM service instance
215
61
  """
216
62
  # Set defaults based on provider
217
63
  if provider == "openai":
218
- final_model_name = model_name or "gpt-4.1-nano"
64
+ final_model_name = model_name or "gpt-4.1-mini"
219
65
  final_provider = provider
220
66
  elif provider == "ollama":
221
67
  final_model_name = model_name or "llama3.2:3b-instruct-fp16"
@@ -227,137 +73,99 @@ class AIFactory:
227
73
  # Default provider selection - OpenAI with cheapest model
228
74
  final_provider = provider or "openai"
229
75
  if final_provider == "openai":
230
- final_model_name = model_name or "gpt-4.1-nano"
231
- else:
76
+ final_model_name = model_name or "gpt-4.1-mini"
77
+ elif final_provider == "ollama":
232
78
  final_model_name = model_name or "llama3.2:3b-instruct-fp16"
79
+ else:
80
+ final_model_name = model_name or "gpt-4.1-mini"
233
81
 
234
- return self.create_service(final_provider, ModelType.LLM, final_model_name, config)
235
-
236
- def get_embedding_service(self, model_name: Optional[str] = None, provider: Optional[str] = None,
237
- config: Optional[Dict[str, Any]] = None) -> BaseService:
238
- """
239
- Get an embedding service instance with automatic defaults
240
-
241
- Args:
242
- model_name: Name of the model to use (defaults: OpenAI="text-embedding-3-small", Ollama="bge-m3")
243
- provider: Provider name (defaults to 'openai' for production, 'ollama' for dev)
244
- config: Optional configuration dictionary (auto-loads from .env if not provided)
245
-
246
- Returns:
247
- Embedding service instance
248
- """
249
- # Set defaults based on provider
250
- if provider == "openai":
251
- final_model_name = model_name or "text-embedding-3-small"
252
- final_provider = provider
253
- elif provider == "ollama":
254
- final_model_name = model_name or "bge-m3"
255
- final_provider = provider
256
- else:
257
- # Default provider selection
258
- final_provider = provider or "openai"
82
+ # Create service using new centralized approach
83
+ try:
259
84
  if final_provider == "openai":
260
- final_model_name = model_name or "text-embedding-3-small"
85
+ from isa_model.inference.services.llm.openai_llm_service import OpenAILLMService
86
+ return OpenAILLMService(provider_name=final_provider, model_name=final_model_name,
87
+ model_manager=self.model_manager, config_manager=self.config_manager)
88
+ elif final_provider == "ollama":
89
+ from isa_model.inference.services.llm.ollama_llm_service import OllamaLLMService
90
+ return OllamaLLMService(provider_name=final_provider, model_name=final_model_name,
91
+ model_manager=self.model_manager, config_manager=self.config_manager)
92
+ elif final_provider == "yyds":
93
+ from isa_model.inference.services.llm.yyds_llm_service import YydsLLMService
94
+ return YydsLLMService(provider_name=final_provider, model_name=final_model_name,
95
+ model_manager=self.model_manager, config_manager=self.config_manager)
261
96
  else:
262
- final_model_name = model_name or "bge-m3"
263
-
264
- return self.create_service(final_provider, ModelType.EMBEDDING, final_model_name, config)
97
+ raise ValueError(f"Unsupported LLM provider: {final_provider}")
98
+ except Exception as e:
99
+ logger.error(f"Failed to create LLM service: {e}")
100
+ raise
265
101
 
266
- def get_vision_service(self, model_name: Optional[str] = None, provider: Optional[str] = None,
267
- config: Optional[Dict[str, Any]] = None) -> BaseVisionService:
102
+ def get_vision(
103
+ self,
104
+ model_name: Optional[str] = None,
105
+ provider: Optional[str] = None,
106
+ config: Optional[Dict[str, Any]] = None
107
+ ) -> 'BaseVisionService':
268
108
  """
269
- Get a vision service instance with automatic defaults
109
+ Get vision service with automatic defaults
270
110
 
271
111
  Args:
272
- model_name: Name of the model to use (defaults: OpenAI="gpt-4.1-mini", Ollama="gemma3:4b")
273
- provider: Provider name (defaults to 'openai' for production, 'ollama' for dev)
274
- config: Optional configuration dictionary (auto-loads from .env if not provided)
112
+ model_name: Model name. Special names:
113
+ - "isa_vision_table": Table extraction service
114
+ - "isa_vision_ui": UI detection service
115
+ - "isa_vision_doc": Document analysis service
116
+ - Default: "gpt-4.1-mini"
117
+ provider: Provider name (auto-detected for ISA services)
118
+ config: Optional configuration override
275
119
 
276
120
  Returns:
277
121
  Vision service instance
278
122
  """
279
- # Set defaults based on provider
123
+ # Handle special ISA vision services
124
+ if model_name in ["isa_vision_table", "isa_vision_ui", "isa_vision_doc"]:
125
+ try:
126
+ from isa_model.deployment.services.simple_auto_deploy_vision_service import SimpleAutoDeployVisionService
127
+ logger.info(f"Creating auto-deploy service wrapper for {model_name}")
128
+ return SimpleAutoDeployVisionService(model_name, config)
129
+ except Exception as e:
130
+ logger.error(f"Failed to create ISA vision service: {e}")
131
+ raise
132
+
133
+ # Set defaults for regular services
280
134
  if provider == "openai":
281
135
  final_model_name = model_name or "gpt-4.1-mini"
282
136
  final_provider = provider
283
137
  elif provider == "ollama":
284
138
  final_model_name = model_name or "llama3.2-vision:latest"
285
139
  final_provider = provider
140
+ elif provider == "replicate":
141
+ final_model_name = model_name or "meta/llama-2-70b-chat"
142
+ final_provider = provider
286
143
  else:
287
144
  # Default provider selection
288
145
  final_provider = provider or "openai"
289
146
  if final_provider == "openai":
290
147
  final_model_name = model_name or "gpt-4.1-mini"
291
- else:
148
+ elif final_provider == "ollama":
292
149
  final_model_name = model_name or "llama3.2-vision:latest"
150
+ else:
151
+ final_model_name = model_name or "gpt-4.1-mini"
293
152
 
294
- return cast(BaseVisionService, self.create_service(final_provider, ModelType.VISION, final_model_name, config))
295
-
296
- def get_image_gen(self, model_name: Optional[str] = None, provider: Optional[str] = None,
297
- config: Optional[Dict[str, Any]] = None) -> 'BaseImageGenService':
298
- """
299
- Get an image generation service instance with automatic defaults
300
-
301
- Args:
302
- model_name: Name of the model to use. Supports:
303
- - FLUX models: "flux-pro", "flux-schnell", "flux-dev"
304
- - ControlNet: "flux-controlnet", "xlabs-ai/flux-dev-controlnet"
305
- - LoRA: "flux-lora", "flux-dev-lora"
306
- - InstantID: "instant-id", "zsxkib/instant-id"
307
- - Character: "consistent-character", "fofr/consistent-character"
308
- - Upscaling: "ultimate-upscaler", "ultimate-sd-upscale"
309
- - Detail: "adetailer"
310
- provider: Provider name (defaults to 'replicate')
311
- config: Optional configuration dictionary
312
-
313
- Returns:
314
- Image generation service instance with FLUX, ControlNet, LoRA, InstantID, Upscaling support
315
- """
316
- # Set defaults based on provider
317
- final_provider = provider or "replicate"
318
-
319
- # Default model selection
320
- if not model_name:
321
- final_model_name = "black-forest-labs/flux-schnell"
322
- else:
323
- # Map short names to full Replicate model names
324
- model_mapping = {
325
- "flux-pro": "black-forest-labs/flux-pro",
326
- "flux-schnell": "black-forest-labs/flux-schnell",
327
- "flux-dev": "black-forest-labs/flux-dev",
328
- "flux-controlnet": "xlabs-ai/flux-dev-controlnet",
329
- "flux-lora": "xlabs-ai/flux-lora",
330
- "instant-id": "zsxkib/instant-id",
331
- "consistent-character": "fofr/consistent-character",
332
- "ultimate-upscaler": "philz1337x/clarity-upscaler",
333
- "ultimate-sd-upscale": "philz1337x/clarity-upscaler",
334
- "adetailer": "sczhou/codeformer"
335
- }
336
- final_model_name = model_mapping.get(model_name, model_name)
337
-
338
- # Create ReplicateImageGenService directly for image generation
153
+ # Create service using new centralized approach
339
154
  try:
340
- from isa_model.inference.services.vision.replicate_image_gen_service import ReplicateImageGenService
341
- from isa_model.inference.providers.replicate_provider import ReplicateProvider
342
-
343
- # Create provider with config
344
- provider_instance = ReplicateProvider(config=config)
345
- service = ReplicateImageGenService(provider=provider_instance, model_name=final_model_name)
346
-
347
- return service
348
-
349
- except ImportError as e:
350
- logger.error(f"Failed to import ReplicateImageGenService: {e}")
351
- raise ValueError(f"Image generation service not available: {e}")
155
+ if final_provider == "openai":
156
+ from isa_model.inference.services.vision.openai_vision_service import OpenAIVisionService
157
+ return OpenAIVisionService(provider_name=final_provider, model_name=final_model_name,
158
+ model_manager=self.model_manager, config_manager=self.config_manager)
159
+ elif final_provider == "replicate":
160
+ from isa_model.inference.services.vision.replicate_vision_service import ReplicateVisionService
161
+ return ReplicateVisionService(provider_name=final_provider, model_name=final_model_name,
162
+ model_manager=self.model_manager, config_manager=self.config_manager)
163
+ else:
164
+ raise ValueError(f"Unsupported vision provider: {final_provider}")
352
165
  except Exception as e:
353
- logger.error(f"Failed to create image generation service: {e}")
166
+ logger.error(f"Failed to create vision service: {e}")
354
167
  raise
355
168
 
356
- def get_image_generation_service(self, model_name: Optional[str] = None, provider: Optional[str] = None,
357
- config: Optional[Dict[str, Any]] = None) -> 'BaseImageGenService':
358
- """Alias for get_image_gen() method"""
359
- return self.get_image_gen(model_name, provider, config)
360
-
361
169
  def get_img(self, type: str = "t2i", model_name: Optional[str] = None, provider: Optional[str] = None,
362
170
  config: Optional[Dict[str, Any]] = None) -> 'BaseImageGenService':
363
171
  """
@@ -397,112 +205,138 @@ class AIFactory:
397
205
  else:
398
206
  raise ValueError(f"Unknown image generation type: {type}. Use 't2i' or 'i2i'")
399
207
 
400
- # Use the new get_image_gen method
401
- return self.get_image_gen(final_model_name, final_provider, config)
402
-
403
- def get_audio_service(self, model_name: Optional[str] = None, provider: Optional[str] = None,
404
- config: Optional[Dict[str, Any]] = None) -> BaseService:
208
+ # Create service using new centralized architecture
209
+ try:
210
+ if final_provider == "replicate":
211
+ from isa_model.inference.services.img.replicate_image_gen_service import ReplicateImageGenService
212
+ return ReplicateImageGenService(provider_name=final_provider, model_name=final_model_name,
213
+ model_manager=self.model_manager, config_manager=self.config_manager)
214
+ else:
215
+ raise ValueError(f"Unsupported image generation provider: {final_provider}")
216
+ except Exception as e:
217
+ logger.error(f"Failed to create image generation service: {e}")
218
+ raise
219
+
220
+ def get_stt(self, model_name: Optional[str] = None, provider: Optional[str] = None,
221
+ config: Optional[Dict[str, Any]] = None) -> 'BaseSTTService':
405
222
  """
406
- Get an audio service instance (TTS) with automatic defaults
223
+ Get Speech-to-Text service with automatic defaults
407
224
 
408
225
  Args:
409
- model_name: Name of the model to use (defaults: OpenAI="tts-1")
226
+ model_name: Name of the model to use (defaults: "whisper-1")
410
227
  provider: Provider name (defaults to 'openai')
411
- config: Optional configuration dictionary (auto-loads from .env if not provided)
228
+ config: Optional configuration dictionary
412
229
 
413
230
  Returns:
414
- Audio service instance
231
+ STT service instance
415
232
  """
416
- # Set defaults based on provider
233
+ # Set defaults
417
234
  final_provider = provider or "openai"
418
- if final_provider == "openai":
419
- final_model_name = model_name or "tts-1"
420
- else:
421
- final_model_name = model_name or "tts-1"
235
+ final_model_name = model_name or "whisper-1"
422
236
 
423
- return self.create_service(final_provider, ModelType.AUDIO, final_model_name, config)
424
-
425
- def get_tts_service(self, model_name: Optional[str] = None, provider: Optional[str] = None,
426
- config: Optional[Dict[str, Any]] = None) -> 'BaseTTSService':
237
+ # Create service using new centralized approach
238
+ try:
239
+ if final_provider == "openai":
240
+ from isa_model.inference.services.audio.openai_stt_service import OpenAISTTService
241
+ return OpenAISTTService(provider_name=final_provider, model_name=final_model_name,
242
+ model_manager=self.model_manager, config_manager=self.config_manager)
243
+ else:
244
+ raise ValueError(f"Unsupported STT provider: {final_provider}")
245
+ except Exception as e:
246
+ logger.error(f"Failed to create STT service: {e}")
247
+ raise
248
+
249
+ def get_tts(self, model_name: Optional[str] = None, provider: Optional[str] = None,
250
+ config: Optional[Dict[str, Any]] = None) -> 'BaseTTSService':
427
251
  """
428
- Get a Text-to-Speech service instance with automatic defaults
252
+ Get Text-to-Speech service with automatic defaults
429
253
 
430
254
  Args:
431
255
  model_name: Name of the model to use (defaults: Replicate="kokoro-82m", OpenAI="tts-1")
432
256
  provider: Provider name (defaults to 'replicate' for production, 'openai' for dev)
433
- config: Optional configuration dictionary (auto-loads from .env if not provided)
257
+ config: Optional configuration dictionary
434
258
 
435
259
  Returns:
436
260
  TTS service instance
437
261
  """
438
262
  # Set defaults based on provider
439
263
  if provider == "replicate":
440
- model_name = model_name or "kokoro-82m"
264
+ final_model_name = model_name or "kokoro-82m"
265
+ final_provider = provider
441
266
  elif provider == "openai":
442
- model_name = model_name or "tts-1"
267
+ final_model_name = model_name or "tts-1"
268
+ final_provider = provider
443
269
  else:
444
270
  # Default provider selection
445
- provider = provider or "replicate"
446
- if provider == "replicate":
447
- model_name = model_name or "kokoro-82m"
271
+ final_provider = provider or "replicate"
272
+ if final_provider == "replicate":
273
+ final_model_name = model_name or "kokoro-82m"
448
274
  else:
449
- model_name = model_name or "tts-1"
450
-
451
- # Ensure model_name is never None
452
- if model_name is None:
453
- model_name = "tts-1"
275
+ final_model_name = model_name or "tts-1"
454
276
 
455
- if provider == "replicate":
456
- from isa_model.inference.services.audio.replicate_tts_service import ReplicateTTSService
457
- from isa_model.inference.providers.replicate_provider import ReplicateProvider
458
-
459
- # Use full model name for Replicate
460
- if model_name == "kokoro-82m":
461
- model_name = "jaaari/kokoro-82m:f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13"
462
-
463
- provider_instance = ReplicateProvider(config=config)
464
- return ReplicateTTSService(provider=provider_instance, model_name=model_name)
465
- else:
466
- return cast('BaseTTSService', self.get_audio_service(model_name, provider, config))
277
+ # Create service using new centralized approach
278
+ try:
279
+ if final_provider == "replicate":
280
+ from isa_model.inference.services.audio.replicate_tts_service import ReplicateTTSService
281
+ # Use full model name for Replicate
282
+ if final_model_name == "kokoro-82m":
283
+ final_model_name = "jaaari/kokoro-82m:f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13"
284
+ return ReplicateTTSService(provider_name=final_provider, model_name=final_model_name,
285
+ model_manager=self.model_manager, config_manager=self.config_manager)
286
+ elif final_provider == "openai":
287
+ from isa_model.inference.services.audio.openai_tts_service import OpenAITTSService
288
+ return OpenAITTSService(provider_name=final_provider, model_name=final_model_name,
289
+ model_manager=self.model_manager, config_manager=self.config_manager)
290
+ else:
291
+ raise ValueError(f"Unsupported TTS provider: {final_provider}")
292
+ except Exception as e:
293
+ logger.error(f"Failed to create TTS service: {e}")
294
+ raise
467
295
 
468
- def get_stt_service(self, model_name: Optional[str] = None, provider: Optional[str] = None,
469
- config: Optional[Dict[str, Any]] = None) -> 'BaseSTTService':
296
+ def get_embed(self, model_name: Optional[str] = None, provider: Optional[str] = None,
297
+ config: Optional[Dict[str, Any]] = None) -> BaseService:
470
298
  """
471
- Get a Speech-to-Text service instance with automatic defaults
299
+ Get embedding service with automatic defaults
472
300
 
473
301
  Args:
474
- model_name: Name of the model to use (defaults: "whisper-1")
475
- provider: Provider name (defaults to 'openai')
476
- config: Optional configuration dictionary (auto-loads from .env if not provided)
302
+ model_name: Name of the model to use (defaults: OpenAI="text-embedding-3-small", Ollama="bge-m3")
303
+ provider: Provider name (defaults to 'openai' for production)
304
+ config: Optional configuration dictionary
477
305
 
478
306
  Returns:
479
- STT service instance
307
+ Embedding service instance
480
308
  """
481
309
  # Set defaults based on provider
482
- provider = provider or "openai"
483
310
  if provider == "openai":
484
- model_name = model_name or "whisper-1"
485
-
486
- # Ensure model_name is never None
487
- if model_name is None:
488
- model_name = "whisper-1"
489
-
490
- from isa_model.inference.services.audio.openai_stt_service import OpenAISTTService
491
- from isa_model.inference.providers.openai_provider import OpenAIProvider
311
+ final_model_name = model_name or "text-embedding-3-small"
312
+ final_provider = provider
313
+ elif provider == "ollama":
314
+ final_model_name = model_name or "bge-m3"
315
+ final_provider = provider
316
+ else:
317
+ # Default provider selection
318
+ final_provider = provider or "openai"
319
+ if final_provider == "openai":
320
+ final_model_name = model_name or "text-embedding-3-small"
321
+ else:
322
+ final_model_name = model_name or "bge-m3"
492
323
 
493
- # Create provider and service directly with config
494
- provider_instance = OpenAIProvider(config=config)
495
- return OpenAISTTService(provider=provider_instance, model_name=model_name)
496
-
497
- def get_available_services(self) -> Dict[str, List[str]]:
498
- """Get information about available services"""
499
- services = {}
500
- for (provider, model_type), service_class in self._services.items():
501
- if provider not in services:
502
- services[provider] = []
503
- services[provider].append(f"{model_type.value}: {service_class.__name__}")
504
- return services
505
-
324
+ # Create service using new centralized approach
325
+ try:
326
+ if final_provider == "openai":
327
+ from isa_model.inference.services.embedding.openai_embed_service import OpenAIEmbedService
328
+ return OpenAIEmbedService(provider_name=final_provider, model_name=final_model_name,
329
+ model_manager=self.model_manager, config_manager=self.config_manager)
330
+ elif final_provider == "ollama":
331
+ from isa_model.inference.services.embedding.ollama_embed_service import OllamaEmbedService
332
+ return OllamaEmbedService(provider_name=final_provider, model_name=final_model_name,
333
+ model_manager=self.model_manager, config_manager=self.config_manager)
334
+ else:
335
+ raise ValueError(f"Unsupported embedding provider: {final_provider}")
336
+ except Exception as e:
337
+ logger.error(f"Failed to create embedding service: {e}")
338
+ raise
339
+
506
340
  def clear_cache(self):
507
341
  """Clear the service cache"""
508
342
  self._cached_services.clear()
@@ -515,260 +349,82 @@ class AIFactory:
515
349
  cls._instance = cls()
516
350
  return cls._instance
517
351
 
518
- # Alias method for cleaner API
519
- def get_llm(self, model_name: Optional[str] = None, provider: Optional[str] = None,
520
- config: Optional[Dict[str, Any]] = None) -> BaseService:
521
- """
522
- Alias for get_llm_service with cleaner naming
523
-
524
- Usage:
525
- llm = AIFactory().get_llm() # Uses gpt-4.1-nano by default
526
- llm = AIFactory().get_llm(model_name="llama3.2", provider="ollama")
527
- llm = AIFactory().get_llm(provider="yyds") # Uses claude-sonnet-4-20250514 by default
528
- llm = AIFactory().get_llm(model_name="gpt-4.1-mini", provider="openai", config={"streaming": True})
529
- """
530
- return self.get_llm_service(model_name, provider, config)
531
-
532
- def get_embed(self, model_name: Optional[str] = None, provider: Optional[str] = None,
533
- config: Optional[Dict[str, Any]] = None) -> BaseService:
534
- """
535
- Get embedding service with automatic defaults
536
-
537
- Args:
538
- model_name: Name of the model to use (defaults: OpenAI="text-embedding-3-small", Ollama="bge-m3")
539
- provider: Provider name (defaults to 'openai' for production)
540
- config: Optional configuration dictionary (auto-loads from .env if not provided)
541
-
542
- Returns:
543
- Embedding service instance
544
-
545
- Usage:
546
- # Default (OpenAI text-embedding-3-small)
547
- embed = AIFactory().get_embed()
548
-
549
- # Custom model
550
- embed = AIFactory().get_embed(model_name="text-embedding-3-large", provider="openai")
551
-
552
- # Development (Ollama)
553
- embed = AIFactory().get_embed(provider="ollama")
554
- """
555
- return self.get_embedding_service(model_name, provider, config)
556
-
557
- def get_stt(self, model_name: Optional[str] = None, provider: Optional[str] = None,
558
- config: Optional[Dict[str, Any]] = None) -> 'BaseSTTService':
559
- """
560
- Get Speech-to-Text service with automatic defaults
561
-
562
- Args:
563
- model_name: Name of the model to use (defaults: "whisper-1")
564
- provider: Provider name (defaults to 'openai')
565
- config: Optional configuration dictionary (auto-loads from .env if not provided)
566
-
567
- Returns:
568
- STT service instance
569
-
570
- Usage:
571
- # Default (OpenAI whisper-1)
572
- stt = AIFactory().get_stt()
573
-
574
- # Custom configuration
575
- stt = AIFactory().get_stt(model_name="whisper-1", provider="openai")
576
- """
577
- return self.get_stt_service(model_name, provider, config)
578
-
579
- def get_tts(self, model_name: Optional[str] = None, provider: Optional[str] = None,
580
- config: Optional[Dict[str, Any]] = None) -> 'BaseTTSService':
581
- """
582
- Get Text-to-Speech service with automatic defaults
583
-
584
- Args:
585
- model_name: Name of the model to use (defaults: Replicate="kokoro-82m", OpenAI="tts-1")
586
- provider: Provider name (defaults to 'replicate' for production, 'openai' for dev)
587
- config: Optional configuration dictionary (auto-loads from .env if not provided)
588
-
589
- Returns:
590
- TTS service instance
591
-
592
- Usage:
593
- # Default (Replicate kokoro-82m)
594
- tts = AIFactory().get_tts()
595
-
596
- # Development (OpenAI tts-1)
597
- tts = AIFactory().get_tts(provider="openai")
598
-
599
- # Custom model
600
- tts = AIFactory().get_tts(model_name="tts-1-hd", provider="openai")
601
- """
602
- return self.get_tts_service(model_name, provider, config)
603
-
604
- def get_vision_model(self, model_name: str, provider: str,
605
- config: Optional[Dict[str, Any]] = None) -> BaseService:
606
- """Alias for get_vision_service and get_image_generation_service"""
607
- if provider == "replicate":
608
- return self.get_image_generation_service(model_name, provider, config)
609
- else:
610
- return self.get_vision_service(model_name, provider, config)
611
-
612
- def get_vision(
613
- self,
614
- model_name: Optional[str] = None,
615
- provider: Optional[str] = None,
616
- config: Optional[Dict[str, Any]] = None
617
- ) -> 'BaseVisionService':
618
- """
619
- Get vision service with automatic defaults
620
-
621
- Args:
622
- model_name: Model name (default: gpt-4.1-nano)
623
- provider: Provider name (default: openai)
624
- config: Optional configuration override
625
-
626
- Returns:
627
- Vision service instance
628
- """
629
- # Set defaults
630
- if provider is None:
631
- provider = "openai"
632
- if model_name is None:
633
- model_name = "gpt-4.1-nano"
634
-
635
- return self.get_vision_service(
636
- model_name=model_name,
637
- provider=provider,
638
- config=config
639
- )
640
-
641
- def get_provider(self, provider_name: str, config: Optional[Dict[str, Any]] = None) -> BaseProvider:
642
- """
643
- Get a provider instance
644
-
645
- Args:
646
- provider_name: Name of the provider ('openai', 'ollama', 'replicate')
647
- config: Optional configuration override
648
-
649
- Returns:
650
- Provider instance
651
- """
652
- if provider_name not in self._providers:
653
- raise ValueError(f"No provider registered for '{provider_name}'")
654
-
655
- provider_class = self._providers[provider_name]
656
- return provider_class(config=config)
657
-
658
- def get_stacked(
659
- self,
660
- service_name: str,
661
- config: Optional[Dict[str, Any]] = None
662
- ) -> BaseStackedService:
663
- """
664
- Get a stacked service by name with automatic defaults
665
-
666
- Args:
667
- service_name: Name of the stacked service ('ui_analysis', etc.)
668
- config: Optional configuration override
669
-
670
- Returns:
671
- Stacked service instance
672
-
673
- Usage:
674
- ui_service = AIFactory().get_stacked("ui_analysis", {"task_type": "search"})
675
- """
676
- if service_name == "ui_analysis":
677
- return UIAnalysisService(self, config)
678
- elif service_name == "search_analysis":
679
- if config is None:
680
- config = {}
681
- config["task_type"] = "search"
682
- return UIAnalysisService(self, config)
683
- elif service_name == "content_analysis":
684
- if config is None:
685
- config = {}
686
- config["task_type"] = "content"
687
- return UIAnalysisService(self, config)
688
- elif service_name == "navigation_analysis":
689
- if config is None:
690
- config = {}
691
- config["task_type"] = "navigation"
692
- return UIAnalysisService(self, config)
693
- elif service_name == "doc_analysis":
694
- return DocAnalysisStackedService(self, config)
695
- elif service_name == "flux_professional":
696
- return FluxProfessionalService(self)
697
- else:
698
- raise ValueError(f"Unknown stacked service: {service_name}. Available: ui_analysis, search_analysis, content_analysis, navigation_analysis, doc_analysis, flux_professional")
699
-
700
- def get_ui_analysis(
701
- self,
702
- task_type: str = "login",
703
- config: Optional[Dict[str, Any]] = None
704
- ) -> UIAnalysisService:
705
- """
706
- Get UI Analysis service with task-specific configuration
707
-
708
- Args:
709
- task_type: Type of UI task ('login', 'search', 'content', 'navigation')
710
- config: Optional configuration override
711
-
712
- Usage:
713
- # For login pages (default)
714
- ui_service = AIFactory().get_ui_analysis()
715
-
716
- # For search pages
717
- ui_service = AIFactory().get_ui_analysis(task_type="search")
718
-
719
- # For content extraction
720
- ui_service = AIFactory().get_ui_analysis(task_type="content")
721
- """
722
- if config is None:
723
- config = {}
724
- config["task_type"] = task_type
725
- return cast(UIAnalysisService, self.get_stacked("ui_analysis", config))
726
-
727
- def get_doc_analysis(
728
- self,
729
- config: Optional[Dict[str, Any]] = None
730
- ) -> DocAnalysisStackedService:
731
- """
732
- Get Document Analysis service with 5-step pipeline
733
-
734
- Args:
735
- config: Optional configuration override
736
-
737
- Usage:
738
- # Basic document analysis
739
- doc_service = AIFactory().get_doc_analysis()
740
-
741
- # Analyze a document image
742
- result = await doc_service.analyze_document("document.png")
743
-
744
- # Get structured data ready for business mapping
745
- structured_data = result["final_output"]["final_structured_data"]
746
- """
747
- return cast(DocAnalysisStackedService, self.get_stacked("doc_analysis", config))
352
+ # Modal service deployment methods for AutoDeployVisionService
353
+ def _get_modal_app_name(self, model_name: str) -> str:
354
+ """Get Modal app name for a given model"""
355
+ app_mapping = {
356
+ "isa_vision_table": "qwen-vision-table",
357
+ "isa_vision_ui": "isa-vision-ui",
358
+ "isa_vision_doc": "isa-vision-doc"
359
+ }
360
+ return app_mapping.get(model_name, f"unknown-{model_name}")
361
+
362
+ def _check_modal_service_availability(self, app_name: str) -> bool:
363
+ """Check if Modal service is available and running"""
364
+ try:
365
+ import modal
366
+ # Try to lookup the app
367
+ app = modal.App.lookup(app_name)
368
+ return True
369
+ except Exception as e:
370
+ logger.debug(f"Modal service {app_name} not available: {e}")
371
+ return False
748
372
 
749
- def get_flux_professional(
750
- self,
751
- config: Optional[Dict[str, Any]] = None
752
- ) -> FluxProfessionalService:
753
- """
754
- Get FLUX Professional Pipeline service for multi-stage image generation
755
-
756
- Args:
757
- config: Optional configuration override
758
-
759
- Usage:
760
- # Basic professional image generation
761
- flux_service = AIFactory().get_flux_professional()
762
-
763
- # Generate professional image with character consistency
764
- result = await flux_service.invoke({
765
- "prompt": "portrait of a warrior in fantasy armor",
766
- "face_image": "reference_face.jpg", # For character consistency
767
- "lora_style": "realism",
768
- "upscale_factor": 4
769
- })
373
+ def _auto_deploy_modal_service(self, model_name: str) -> bool:
374
+ """Auto-deploy Modal service for given model"""
375
+ try:
376
+ import subprocess
377
+ import os
378
+ from pathlib import Path
379
+
380
+ # Get the Modal service file path
381
+ service_files = {
382
+ "isa_vision_table": "isa_vision_table_service.py",
383
+ "isa_vision_ui": "isa_vision_ui_service.py",
384
+ "isa_vision_doc": "isa_vision_doc_service.py"
385
+ }
770
386
 
771
- # Get final high-quality image
772
- final_image_url = result["final_output"]["image_url"]
773
- """
774
- return cast(FluxProfessionalService, self.get_stacked("flux_professional", config))
387
+ if model_name not in service_files:
388
+ logger.error(f"No Modal service file found for {model_name}")
389
+ return False
390
+
391
+ # Get the service file path
392
+ service_file = service_files[model_name]
393
+ modal_dir = Path(__file__).parent.parent / "deployment" / "cloud" / "modal"
394
+ service_path = modal_dir / service_file
395
+
396
+ if not service_path.exists():
397
+ logger.error(f"Modal service file not found: {service_path}")
398
+ return False
399
+
400
+ logger.info(f"Deploying Modal service: {service_file}")
401
+
402
+ # Run modal deploy command
403
+ result = subprocess.run(
404
+ ["modal", "deploy", str(service_path)],
405
+ capture_output=True,
406
+ text=True,
407
+ timeout=600, # 10 minute timeout
408
+ cwd=str(modal_dir)
409
+ )
410
+
411
+ if result.returncode == 0:
412
+ logger.info(f"Successfully deployed {model_name} Modal service")
413
+ return True
414
+ else:
415
+ logger.error(f"Failed to deploy {model_name}: {result.stderr}")
416
+ return False
417
+
418
+ except subprocess.TimeoutExpired:
419
+ logger.error(f"Deployment timeout for {model_name}")
420
+ return False
421
+ except Exception as e:
422
+ logger.error(f"Exception during {model_name} deployment: {e}")
423
+ return False
424
+
425
+ def _shutdown_modal_service(self, model_name: str):
426
+ """Shutdown Modal service (optional - Modal handles auto-scaling)"""
427
+ # Modal services auto-scale to zero, so explicit shutdown isn't required
428
+ # This method is here for compatibility with AutoDeployVisionService
429
+ logger.info(f"Modal service {model_name} will auto-scale to zero when idle")
430
+ pass