isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/client.py +732 -565
  3. isa_model/core/cache/redis_cache.py +401 -0
  4. isa_model/core/config/config_manager.py +53 -10
  5. isa_model/core/config.py +1 -1
  6. isa_model/core/database/__init__.py +1 -0
  7. isa_model/core/database/migrations.py +277 -0
  8. isa_model/core/database/supabase_client.py +123 -0
  9. isa_model/core/models/__init__.py +37 -0
  10. isa_model/core/models/model_billing_tracker.py +60 -88
  11. isa_model/core/models/model_manager.py +36 -18
  12. isa_model/core/models/model_repo.py +44 -38
  13. isa_model/core/models/model_statistics_tracker.py +234 -0
  14. isa_model/core/models/model_storage.py +0 -1
  15. isa_model/core/models/model_version_manager.py +959 -0
  16. isa_model/core/pricing_manager.py +2 -249
  17. isa_model/core/resilience/circuit_breaker.py +366 -0
  18. isa_model/core/security/secrets.py +358 -0
  19. isa_model/core/services/__init__.py +2 -4
  20. isa_model/core/services/intelligent_model_selector.py +101 -370
  21. isa_model/core/storage/hf_storage.py +1 -1
  22. isa_model/core/types.py +7 -0
  23. isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
  24. isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
  25. isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
  26. isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
  27. isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
  28. isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
  29. isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
  30. isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
  31. isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
  32. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
  33. isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
  34. isa_model/deployment/core/deployment_manager.py +6 -4
  35. isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
  36. isa_model/eval/benchmarks/__init__.py +27 -0
  37. isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
  38. isa_model/eval/benchmarks.py +244 -12
  39. isa_model/eval/evaluators/__init__.py +8 -2
  40. isa_model/eval/evaluators/audio_evaluator.py +727 -0
  41. isa_model/eval/evaluators/embedding_evaluator.py +742 -0
  42. isa_model/eval/evaluators/vision_evaluator.py +564 -0
  43. isa_model/eval/example_evaluation.py +395 -0
  44. isa_model/eval/factory.py +272 -5
  45. isa_model/eval/isa_benchmarks.py +700 -0
  46. isa_model/eval/isa_integration.py +582 -0
  47. isa_model/eval/metrics.py +159 -6
  48. isa_model/eval/tests/unit/test_basic.py +396 -0
  49. isa_model/inference/ai_factory.py +44 -8
  50. isa_model/inference/services/audio/__init__.py +21 -0
  51. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  52. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  53. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  54. isa_model/inference/services/audio/openai_stt_service.py +32 -6
  55. isa_model/inference/services/base_service.py +17 -1
  56. isa_model/inference/services/embedding/__init__.py +13 -0
  57. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  58. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  59. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  60. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  61. isa_model/inference/services/img/__init__.py +2 -2
  62. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  63. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  64. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  65. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  66. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  67. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  68. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  69. isa_model/inference/services/llm/base_llm_service.py +30 -6
  70. isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
  71. isa_model/inference/services/llm/ollama_llm_service.py +2 -1
  72. isa_model/inference/services/llm/openai_llm_service.py +652 -55
  73. isa_model/inference/services/llm/yyds_llm_service.py +2 -1
  74. isa_model/inference/services/vision/__init__.py +5 -5
  75. isa_model/inference/services/vision/base_vision_service.py +118 -185
  76. isa_model/inference/services/vision/helpers/image_utils.py +11 -5
  77. isa_model/inference/services/vision/isa_vision_service.py +573 -0
  78. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  79. isa_model/serving/api/fastapi_server.py +88 -16
  80. isa_model/serving/api/middleware/auth.py +311 -0
  81. isa_model/serving/api/middleware/security.py +278 -0
  82. isa_model/serving/api/routes/analytics.py +486 -0
  83. isa_model/serving/api/routes/deployments.py +339 -0
  84. isa_model/serving/api/routes/evaluations.py +579 -0
  85. isa_model/serving/api/routes/logs.py +430 -0
  86. isa_model/serving/api/routes/settings.py +582 -0
  87. isa_model/serving/api/routes/unified.py +324 -165
  88. isa_model/serving/api/startup.py +304 -0
  89. isa_model/serving/modal_proxy_server.py +249 -0
  90. isa_model/training/__init__.py +100 -6
  91. isa_model/training/core/__init__.py +4 -1
  92. isa_model/training/examples/intelligent_training_example.py +281 -0
  93. isa_model/training/intelligent/__init__.py +25 -0
  94. isa_model/training/intelligent/decision_engine.py +643 -0
  95. isa_model/training/intelligent/intelligent_factory.py +888 -0
  96. isa_model/training/intelligent/knowledge_base.py +751 -0
  97. isa_model/training/intelligent/resource_optimizer.py +839 -0
  98. isa_model/training/intelligent/task_classifier.py +576 -0
  99. isa_model/training/storage/__init__.py +24 -0
  100. isa_model/training/storage/core_integration.py +439 -0
  101. isa_model/training/storage/training_repository.py +552 -0
  102. isa_model/training/storage/training_storage.py +628 -0
  103. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
  104. isa_model-0.4.0.dist-info/RECORD +182 -0
  105. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  106. isa_model/deployment/cloud/modal/register_models.py +0 -321
  107. isa_model/inference/adapter/unified_api.py +0 -248
  108. isa_model/inference/services/helpers/stacked_config.py +0 -148
  109. isa_model/inference/services/img/flux_professional_service.py +0 -603
  110. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  111. isa_model/inference/services/others/table_transformer_service.py +0 -61
  112. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  113. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  114. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  115. isa_model/scripts/inference_tracker.py +0 -283
  116. isa_model/scripts/mlflow_manager.py +0 -379
  117. isa_model/scripts/model_registry.py +0 -465
  118. isa_model/scripts/register_models.py +0 -370
  119. isa_model/scripts/register_models_with_embeddings.py +0 -510
  120. isa_model/scripts/start_mlflow.py +0 -95
  121. isa_model/scripts/training_tracker.py +0 -257
  122. isa_model-0.3.9.dist-info/RECORD +0 -138
  123. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
  124. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,894 @@
1
+ """
2
+ Automated HuggingFace to Modal Deployment Service
3
+
4
+ This service automatically generates and deploys HuggingFace models to Modal
5
+ with optimized configurations based on model type and architecture.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import time
11
+ import requests
12
+ import tempfile
13
+ from typing import Dict, List, Optional, Any, Union
14
+ from pathlib import Path
15
+ from dataclasses import dataclass
16
+ from huggingface_hub import HfApi, model_info
17
+ import logging
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ @dataclass
22
+ class ModelConfig:
23
+ """Configuration for a HuggingFace model deployment"""
24
+ model_id: str
25
+ model_type: str # text, vision, audio, image, embedding
26
+ architecture: str
27
+ parameters: str
28
+ gpu_requirements: str
29
+ memory_gb: int
30
+ container_memory_mb: int
31
+ python_version: str = "3.10"
32
+ dependencies: List[str] = None
33
+ capabilities: List[str] = None
34
+ max_tokens: int = 2048
35
+ estimated_cost_per_hour: float = 0.0
36
+
37
+ class HuggingFaceModalDeployer:
38
+ """
39
+ Service to automatically deploy HuggingFace models to Modal
40
+ """
41
+
42
+ def __init__(self):
43
+ self.hf_api = HfApi()
44
+ self.supported_architectures = {
45
+ # Text/LLM models
46
+ 'llama': {'type': 'text', 'gpu': 'A100', 'memory': 32768, 'cost': 4.0},
47
+ 'mistral': {'type': 'text', 'gpu': 'A100', 'memory': 24576, 'cost': 4.0},
48
+ 'qwen': {'type': 'text', 'gpu': 'A100', 'memory': 24576, 'cost': 4.0},
49
+ 'gemma': {'type': 'text', 'gpu': 'A10G', 'memory': 16384, 'cost': 1.2},
50
+ 'phi': {'type': 'text', 'gpu': 'A10G', 'memory': 16384, 'cost': 1.2},
51
+ 'gpt': {'type': 'text', 'gpu': 'A100', 'memory': 32768, 'cost': 4.0},
52
+
53
+ # Vision models
54
+ 'clip': {'type': 'vision', 'gpu': 'A10G', 'memory': 16384, 'cost': 1.2},
55
+ 'blip': {'type': 'vision', 'gpu': 'A10G', 'memory': 16384, 'cost': 1.2},
56
+ 'qwen2_vl': {'type': 'vision', 'gpu': 'A100', 'memory': 32768, 'cost': 4.0},
57
+ 'llava': {'type': 'vision', 'gpu': 'A100', 'memory': 24576, 'cost': 4.0},
58
+ 'fuyu': {'type': 'vision', 'gpu': 'A100', 'memory': 32768, 'cost': 4.0},
59
+
60
+ # Audio models
61
+ 'whisper': {'type': 'audio', 'gpu': 'A10G', 'memory': 8192, 'cost': 1.2},
62
+ 'wav2vec2': {'type': 'audio', 'gpu': 'A10G', 'memory': 8192, 'cost': 1.2},
63
+ 'musicgen': {'type': 'audio', 'gpu': 'A100', 'memory': 16384, 'cost': 4.0},
64
+ 'bark': {'type': 'audio', 'gpu': 'A100', 'memory': 16384, 'cost': 4.0},
65
+
66
+ # Image generation models
67
+ 'stable-diffusion': {'type': 'image', 'gpu': 'A100', 'memory': 16384, 'cost': 4.0},
68
+ 'flux': {'type': 'image', 'gpu': 'A100', 'memory': 24576, 'cost': 4.0},
69
+ 'dall-e': {'type': 'image', 'gpu': 'A100', 'memory': 16384, 'cost': 4.0},
70
+
71
+ # Embedding models
72
+ 'sentence-transformers': {'type': 'embedding', 'gpu': 'A10G', 'memory': 8192, 'cost': 1.2},
73
+ 'e5': {'type': 'embedding', 'gpu': 'A10G', 'memory': 8192, 'cost': 1.2},
74
+ 'bge': {'type': 'embedding', 'gpu': 'A10G', 'memory': 8192, 'cost': 1.2},
75
+ }
76
+
77
+ def analyze_model(self, model_id: str) -> ModelConfig:
78
+ """
79
+ Analyze a HuggingFace model and determine deployment configuration
80
+
81
+ Args:
82
+ model_id: HuggingFace model ID (e.g., "microsoft/DialoGPT-medium")
83
+
84
+ Returns:
85
+ ModelConfig with deployment settings
86
+ """
87
+ try:
88
+ # Get model information from HuggingFace
89
+ info = model_info(model_id)
90
+
91
+ # Extract model details
92
+ architecture = self._detect_architecture(model_id, info)
93
+ model_type = self._determine_model_type(model_id, info, architecture)
94
+ parameters = self._estimate_parameters(info)
95
+
96
+ # Get deployment requirements based on architecture
97
+ requirements = self.supported_architectures.get(
98
+ architecture.lower(),
99
+ {'type': 'text', 'gpu': 'A10G', 'memory': 16384, 'cost': 1.2}
100
+ )
101
+
102
+ # Generate capabilities based on model type and tags
103
+ capabilities = self._generate_capabilities(model_type, info)
104
+
105
+ # Generate dependencies based on model type
106
+ dependencies = self._generate_dependencies(model_type, architecture, info)
107
+
108
+ return ModelConfig(
109
+ model_id=model_id,
110
+ model_type=model_type,
111
+ architecture=architecture,
112
+ parameters=parameters,
113
+ gpu_requirements=requirements['gpu'],
114
+ memory_gb=requirements['memory'] // 1024,
115
+ container_memory_mb=requirements['memory'],
116
+ dependencies=dependencies,
117
+ capabilities=capabilities,
118
+ estimated_cost_per_hour=requirements['cost']
119
+ )
120
+
121
+ except Exception as e:
122
+ logger.error(f"Error analyzing model {model_id}: {e}")
123
+ raise
124
+
125
+ def _detect_architecture(self, model_id: str, info) -> str:
126
+ """Detect model architecture from model ID and metadata"""
127
+ model_id_lower = model_id.lower()
128
+
129
+ # Check for specific architectures in model ID
130
+ for arch in self.supported_architectures.keys():
131
+ if arch.replace('_', '-') in model_id_lower or arch.replace('-', '_') in model_id_lower:
132
+ return arch
133
+
134
+ # Check model tags and config
135
+ if hasattr(info, 'tags'):
136
+ for tag in info.tags:
137
+ tag_lower = tag.lower()
138
+ for arch in self.supported_architectures.keys():
139
+ if arch in tag_lower:
140
+ return arch
141
+
142
+ # Check config architectures
143
+ if hasattr(info, 'config') and info.config:
144
+ config_str = str(info.config).lower()
145
+ for arch in self.supported_architectures.keys():
146
+ if arch in config_str:
147
+ return arch
148
+
149
+ # Default fallback
150
+ return 'transformers'
151
+
152
+ def _determine_model_type(self, model_id: str, info, architecture: str) -> str:
153
+ """Determine the primary model type"""
154
+ model_id_lower = model_id.lower()
155
+
156
+ # Check for specific model types in ID
157
+ if any(x in model_id_lower for x in ['vision', 'clip', 'blip', 'llava', 'qwen2-vl', 'fuyu']):
158
+ return 'vision'
159
+ elif any(x in model_id_lower for x in ['whisper', 'wav2vec', 'audio', 'speech', 'tts', 'stt']):
160
+ return 'audio'
161
+ elif any(x in model_id_lower for x in ['stable-diffusion', 'sd-', 'flux', 'dall-e', 'imagen']):
162
+ return 'image'
163
+ elif any(x in model_id_lower for x in ['embed', 'sentence-transformer', 'e5-', 'bge-']):
164
+ return 'embedding'
165
+
166
+ # Check tags
167
+ if hasattr(info, 'tags'):
168
+ for tag in info.tags:
169
+ tag_lower = tag.lower()
170
+ if tag_lower in ['computer-vision', 'image-classification', 'object-detection']:
171
+ return 'vision'
172
+ elif tag_lower in ['automatic-speech-recognition', 'text-to-speech', 'audio']:
173
+ return 'audio'
174
+ elif tag_lower in ['text-to-image', 'image-generation']:
175
+ return 'image'
176
+ elif tag_lower in ['sentence-similarity', 'feature-extraction']:
177
+ return 'embedding'
178
+
179
+ # Use architecture mapping
180
+ if architecture in self.supported_architectures:
181
+ return self.supported_architectures[architecture]['type']
182
+
183
+ return 'text' # Default
184
+
185
+ def _estimate_parameters(self, info) -> str:
186
+ """Estimate model parameters from model info"""
187
+ if hasattr(info, 'config') and info.config:
188
+ config = info.config
189
+ if isinstance(config, dict):
190
+ # Try different parameter estimation methods
191
+ if 'num_parameters' in config:
192
+ params = config['num_parameters']
193
+ elif 'd_model' in config and 'n_layer' in config:
194
+ # Transformer estimation
195
+ d_model = config.get('d_model', 768)
196
+ n_layer = config.get('n_layer', 12)
197
+ vocab_size = config.get('vocab_size', 50000)
198
+ params = (d_model * d_model * 4 * n_layer) + (vocab_size * d_model)
199
+ else:
200
+ return 'Unknown'
201
+
202
+ # Format parameters
203
+ if params > 1e9:
204
+ return f"{params/1e9:.1f}B"
205
+ elif params > 1e6:
206
+ return f"{params/1e6:.0f}M"
207
+ else:
208
+ return f"{params/1e3:.0f}K"
209
+
210
+ return 'Unknown'
211
+
212
+ def _generate_capabilities(self, model_type: str, info) -> List[str]:
213
+ """Generate capabilities list based on model type"""
214
+ base_capabilities = {
215
+ 'text': ['text_generation', 'chat', 'completion'],
216
+ 'vision': ['image_analysis', 'image_understanding', 'visual_question_answering'],
217
+ 'audio': ['speech_recognition', 'audio_processing'],
218
+ 'image': ['image_generation', 'text_to_image'],
219
+ 'embedding': ['text_embedding', 'similarity_search', 'semantic_search']
220
+ }
221
+
222
+ capabilities = base_capabilities.get(model_type, ['general_ai'])
223
+
224
+ # Add specific capabilities based on tags
225
+ if hasattr(info, 'tags'):
226
+ for tag in info.tags:
227
+ if tag == 'conversational':
228
+ capabilities.append('chat')
229
+ elif tag == 'question-answering':
230
+ capabilities.append('question_answering')
231
+ elif tag == 'summarization':
232
+ capabilities.append('text_summarization')
233
+ elif tag == 'translation':
234
+ capabilities.append('translation')
235
+
236
+ return list(set(capabilities))
237
+
238
+ def _generate_dependencies(self, model_type: str, architecture: str, info) -> List[str]:
239
+ """Generate Python dependencies based on model type and architecture"""
240
+ base_deps = [
241
+ "torch>=2.0.0",
242
+ "transformers>=4.35.0",
243
+ "accelerate>=0.24.0",
244
+ "numpy>=1.24.0",
245
+ "requests>=2.31.0",
246
+ "httpx>=0.26.0",
247
+ "pydantic>=2.0.0",
248
+ ]
249
+
250
+ type_deps = {
251
+ 'vision': [
252
+ "Pillow>=10.0.0",
253
+ "opencv-python>=4.8.0",
254
+ "torchvision>=0.15.0",
255
+ ],
256
+ 'audio': [
257
+ "librosa>=0.10.0",
258
+ "soundfile>=0.12.0",
259
+ "torchaudio>=2.0.0",
260
+ ],
261
+ 'image': [
262
+ "diffusers>=0.21.0",
263
+ "Pillow>=10.0.0",
264
+ "controlnet-aux>=0.3.0",
265
+ ],
266
+ 'embedding': [
267
+ "sentence-transformers>=2.2.0",
268
+ "faiss-cpu>=1.7.0",
269
+ ]
270
+ }
271
+
272
+ arch_deps = {
273
+ 'whisper': ["openai-whisper>=20231117"],
274
+ 'stable-diffusion': ["diffusers>=0.21.0", "controlnet-aux>=0.3.0"],
275
+ 'qwen2_vl': ["qwen-vl-utils", "av", "decord"],
276
+ 'llava': ["llava>=1.1.0"],
277
+ }
278
+
279
+ deps = base_deps.copy()
280
+ deps.extend(type_deps.get(model_type, []))
281
+ deps.extend(arch_deps.get(architecture, []))
282
+
283
+ return list(set(deps))
284
+
285
+ def generate_modal_service(self, config: ModelConfig) -> str:
286
+ """
287
+ Generate Modal deployment code for a HuggingFace model
288
+
289
+ Args:
290
+ config: Model configuration
291
+
292
+ Returns:
293
+ Generated Python code for Modal deployment
294
+ """
295
+ service_name = config.model_id.replace('/', '_').replace('-', '_').lower()
296
+
297
+ template = f'''"""
298
+ {config.model_id} Modal Service
299
+
300
+ Automatically generated deployment for {config.model_id}
301
+ - Model Type: {config.model_type}
302
+ - Architecture: {config.architecture}
303
+ - Parameters: {config.parameters}
304
+ - Capabilities: {', '.join(config.capabilities)}
305
+ """
306
+
307
+ import modal
308
+ import time
309
+ import json
310
+ import os
311
+ import logging
312
+ import base64
313
+ import tempfile
314
+ from typing import Dict, List, Optional, Any, Union
315
+ from pathlib import Path
316
+
317
+ # Define Modal application
318
+ app = modal.App("isa-{service_name}")
319
+
320
+ # Define Modal container image
321
+ image = (
322
+ modal.Image.debian_slim(python_version="{config.python_version}")
323
+ .pip_install([
324
+ {self._format_dependencies(config.dependencies)}
325
+ ])
326
+ .apt_install([
327
+ "ffmpeg",
328
+ "libsm6",
329
+ "libxext6",
330
+ "libxrender-dev",
331
+ "libglib2.0-0",
332
+ "libgl1-mesa-glx",
333
+ "git-lfs"
334
+ ])
335
+ .env({{
336
+ "TRANSFORMERS_CACHE": "/models",
337
+ "TORCH_HOME": "/models/torch",
338
+ "HF_HOME": "/models",
339
+ "CUDA_VISIBLE_DEVICES": "0",
340
+ "PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:512"
341
+ }})
342
+ )
343
+
344
+ # Model Service
345
+ @app.cls(
346
+ gpu="{config.gpu_requirements}",
347
+ image=image,
348
+ memory={config.container_memory_mb},
349
+ timeout=1800,
350
+ scaledown_window=300,
351
+ min_containers=0,
352
+ max_containers=5,
353
+ )
354
+ class {service_name.title().replace('_', '')}Service:
355
+ """
356
+ {config.model_id} Service
357
+
358
+ Model: {config.model_id}
359
+ Architecture: {config.architecture}
360
+ Parameters: {config.parameters}
361
+ Capabilities: {', '.join(config.capabilities)}
362
+ """
363
+
364
+ @modal.enter()
365
+ def load_model(self):
366
+ """Load {config.model_id} model and dependencies"""
367
+ print("Loading {config.model_id}...")
368
+ start_time = time.time()
369
+
370
+ self.model = None
371
+ self.tokenizer = None
372
+ self.processor = None
373
+ self.logger = logging.getLogger(__name__)
374
+ self.request_count = 0
375
+ self.total_processing_time = 0.0
376
+
377
+ try:
378
+ import torch
379
+ from transformers import AutoModel, AutoTokenizer, AutoProcessor
380
+
381
+ model_name = "{config.model_id}"
382
+
383
+ print(f"Loading model: {{model_name}}")
384
+
385
+ # Load tokenizer/processor
386
+ try:
387
+ self.processor = AutoProcessor.from_pretrained(model_name)
388
+ print("✅ Processor loaded")
389
+ except:
390
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
391
+ print("✅ Tokenizer loaded")
392
+
393
+ # Load model with optimizations
394
+ self.model = AutoModel.from_pretrained(
395
+ model_name,
396
+ torch_dtype=torch.float16,
397
+ device_map="auto",
398
+ low_cpu_mem_usage=True,
399
+ use_cache=True
400
+ )
401
+
402
+ self.model.eval()
403
+
404
+ # Try to compile model for faster inference
405
+ try:
406
+ self.model = torch.compile(self.model, mode="reduce-overhead")
407
+ print("✅ Model compiled for faster inference")
408
+ except Exception as e:
409
+ print(f"⚠️ Model compilation failed: {{e}}")
410
+
411
+ load_time = time.time() - start_time
412
+ print(f"{config.model_id} loaded successfully in {{load_time:.2f}}s")
413
+
414
+ self.models_loaded = True
415
+
416
+ except Exception as e:
417
+ print(f"Model loading failed: {{e}}")
418
+ import traceback
419
+ traceback.print_exc()
420
+ self.models_loaded = False
421
+
422
+ {self._generate_inference_methods(config)}
423
+
424
+ @modal.method()
425
+ def health_check(self) -> Dict[str, Any]:
426
+ """Health check endpoint"""
427
+ return {{
428
+ 'status': 'healthy',
429
+ 'service': 'isa-{service_name}',
430
+ 'provider': 'ISA',
431
+ 'models_loaded': self.models_loaded,
432
+ 'model': '{config.model_id}',
433
+ 'architecture': '{config.architecture}',
434
+ 'timestamp': time.time(),
435
+ 'gpu': '{config.gpu_requirements}',
436
+ 'memory_usage': '{config.memory_gb}GB',
437
+ 'request_count': self.request_count,
438
+ 'capabilities': {config.capabilities}
439
+ }}
440
+
441
+ # Deployment functions
442
+ @app.function()
443
+ def deploy_info():
444
+ """Deployment information"""
445
+ return {{
446
+ 'service': 'isa-{service_name}',
447
+ 'version': '1.0.0',
448
+ 'description': 'ISA {config.model_id} service',
449
+ 'model': '{config.model_id}',
450
+ 'architecture': '{config.architecture}',
451
+ 'gpu': '{config.gpu_requirements}',
452
+ 'capabilities': {config.capabilities},
453
+ 'deployment_time': time.time()
454
+ }}
455
+
456
+ if __name__ == "__main__":
457
+ print("ISA {config.model_id} Service - Modal Deployment")
458
+ print("Deploy with: modal deploy {service_name}_service.py")
459
+ print()
460
+ print("Model: {config.model_id}")
461
+ print("Architecture: {config.architecture}")
462
+ print("Parameters: {config.parameters}")
463
+ print("GPU: {config.gpu_requirements}")
464
+ print("Capabilities: {', '.join(config.capabilities)}")
465
+ '''
466
+
467
+ return template
468
+
469
+ def _format_dependencies(self, dependencies: List[str]) -> str:
470
+ """Format dependencies for template"""
471
+ formatted = []
472
+ for dep in dependencies:
473
+ formatted.append(f' "{dep}",')
474
+ return '\n'.join(formatted)
475
+
476
+ def _generate_inference_methods(self, config: ModelConfig) -> str:
477
+ """Generate inference methods based on model type"""
478
+ methods = {
479
+ 'text': self._text_generation_method,
480
+ 'vision': self._vision_analysis_method,
481
+ 'audio': self._audio_processing_method,
482
+ 'image': self._image_generation_method,
483
+ 'embedding': self._embedding_method
484
+ }
485
+
486
+ return methods.get(config.model_type, self._generic_inference_method)(config)
487
+
488
+ def _text_generation_method(self, config: ModelConfig) -> str:
489
+ return '''
490
+ @modal.method()
491
+ def generate_text(
492
+ self,
493
+ prompt: str,
494
+ max_tokens: int = 512,
495
+ temperature: float = 0.7,
496
+ top_p: float = 0.9
497
+ ) -> Dict[str, Any]:
498
+ """Generate text using the model"""
499
+ start_time = time.time()
500
+ self.request_count += 1
501
+
502
+ try:
503
+ if not self.models_loaded or not self.model:
504
+ raise RuntimeError("Model not loaded")
505
+
506
+ # Tokenize input
507
+ inputs = self.tokenizer(
508
+ prompt,
509
+ return_tensors="pt",
510
+ padding=True,
511
+ truncation=True
512
+ ).to("cuda")
513
+
514
+ # Generate response
515
+ import torch
516
+ with torch.no_grad():
517
+ outputs = self.model.generate(
518
+ **inputs,
519
+ max_new_tokens=max_tokens,
520
+ temperature=temperature,
521
+ top_p=top_p,
522
+ do_sample=True,
523
+ pad_token_id=self.tokenizer.eos_token_id
524
+ )
525
+
526
+ # Decode response
527
+ response = self.tokenizer.decode(
528
+ outputs[0][inputs.input_ids.shape[1]:],
529
+ skip_special_tokens=True
530
+ )
531
+
532
+ processing_time = time.time() - start_time
533
+
534
+ return {
535
+ 'success': True,
536
+ 'text': response,
537
+ 'processing_time': processing_time,
538
+ 'model': self.model.config.name_or_path
539
+ }
540
+
541
+ except Exception as e:
542
+ return {
543
+ 'success': False,
544
+ 'error': str(e),
545
+ 'processing_time': time.time() - start_time
546
+ }'''
547
+
548
+ def _vision_analysis_method(self, config: ModelConfig) -> str:
549
+ return '''
550
+ @modal.method()
551
+ def analyze_image(
552
+ self,
553
+ image_b64: str,
554
+ prompt: str = "Describe this image.",
555
+ max_tokens: int = 512
556
+ ) -> Dict[str, Any]:
557
+ """Analyze image using the model"""
558
+ start_time = time.time()
559
+ self.request_count += 1
560
+
561
+ try:
562
+ if not self.models_loaded or not self.model:
563
+ raise RuntimeError("Model not loaded")
564
+
565
+ # Decode image
566
+ image_data = base64.b64decode(image_b64)
567
+
568
+ with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
569
+ tmp_file.write(image_data)
570
+ tmp_file.flush()
571
+
572
+ from PIL import Image
573
+ image = Image.open(tmp_file.name)
574
+
575
+ # Process inputs
576
+ if self.processor:
577
+ inputs = self.processor(text=prompt, images=image, return_tensors="pt")
578
+ else:
579
+ # Fallback for models without processor
580
+ inputs = self.tokenizer(prompt, return_tensors="pt")
581
+
582
+ inputs = inputs.to("cuda")
583
+
584
+ # Generate response
585
+ import torch
586
+ with torch.no_grad():
587
+ outputs = self.model.generate(
588
+ **inputs,
589
+ max_new_tokens=max_tokens,
590
+ do_sample=True
591
+ )
592
+
593
+ # Decode response
594
+ if self.processor:
595
+ response = self.processor.decode(outputs[0], skip_special_tokens=True)
596
+ else:
597
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
598
+
599
+ os.unlink(tmp_file.name)
600
+
601
+ processing_time = time.time() - start_time
602
+
603
+ return {
604
+ 'success': True,
605
+ 'text': response,
606
+ 'processing_time': processing_time,
607
+ 'model': self.model.config.name_or_path
608
+ }
609
+
610
+ except Exception as e:
611
+ return {
612
+ 'success': False,
613
+ 'error': str(e),
614
+ 'processing_time': time.time() - start_time
615
+ }'''
616
+
617
+ def _audio_processing_method(self, config: ModelConfig) -> str:
618
+ return '''
619
+ @modal.method()
620
+ def process_audio(
621
+ self,
622
+ audio_b64: str,
623
+ task: str = "transcribe"
624
+ ) -> Dict[str, Any]:
625
+ """Process audio using the model"""
626
+ start_time = time.time()
627
+ self.request_count += 1
628
+
629
+ try:
630
+ if not self.models_loaded or not self.model:
631
+ raise RuntimeError("Model not loaded")
632
+
633
+ # Decode audio
634
+ audio_data = base64.b64decode(audio_b64)
635
+
636
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
637
+ tmp_file.write(audio_data)
638
+ tmp_file.flush()
639
+
640
+ # Process audio
641
+ if self.processor:
642
+ inputs = self.processor(tmp_file.name, return_tensors="pt")
643
+ else:
644
+ import librosa
645
+ audio, sr = librosa.load(tmp_file.name)
646
+ inputs = self.tokenizer(audio, return_tensors="pt")
647
+
648
+ inputs = inputs.to("cuda")
649
+
650
+ # Generate response
651
+ import torch
652
+ with torch.no_grad():
653
+ outputs = self.model.generate(**inputs)
654
+
655
+ # Decode response
656
+ if self.processor:
657
+ response = self.processor.decode(outputs[0], skip_special_tokens=True)
658
+ else:
659
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
660
+
661
+ os.unlink(tmp_file.name)
662
+
663
+ processing_time = time.time() - start_time
664
+
665
+ return {
666
+ 'success': True,
667
+ 'text': response,
668
+ 'processing_time': processing_time,
669
+ 'model': self.model.config.name_or_path
670
+ }
671
+
672
+ except Exception as e:
673
+ return {
674
+ 'success': False,
675
+ 'error': str(e),
676
+ 'processing_time': time.time() - start_time
677
+ }'''
678
+
679
+ def _image_generation_method(self, config: ModelConfig) -> str:
680
+ return '''
681
+ @modal.method()
682
+ def generate_image(
683
+ self,
684
+ prompt: str,
685
+ width: int = 512,
686
+ height: int = 512,
687
+ num_inference_steps: int = 20
688
+ ) -> Dict[str, Any]:
689
+ """Generate image using the model"""
690
+ start_time = time.time()
691
+ self.request_count += 1
692
+
693
+ try:
694
+ if not self.models_loaded or not self.model:
695
+ raise RuntimeError("Model not loaded")
696
+
697
+ # Generate image
698
+ image = self.model(
699
+ prompt=prompt,
700
+ width=width,
701
+ height=height,
702
+ num_inference_steps=num_inference_steps
703
+ ).images[0]
704
+
705
+ # Convert to base64
706
+ import io
707
+ buffer = io.BytesIO()
708
+ image.save(buffer, format="PNG")
709
+ image_b64 = base64.b64encode(buffer.getvalue()).decode()
710
+
711
+ processing_time = time.time() - start_time
712
+
713
+ return {
714
+ 'success': True,
715
+ 'image': image_b64,
716
+ 'processing_time': processing_time,
717
+ 'model': self.model.config.name_or_path
718
+ }
719
+
720
+ except Exception as e:
721
+ return {
722
+ 'success': False,
723
+ 'error': str(e),
724
+ 'processing_time': time.time() - start_time
725
+ }'''
726
+
727
+ def _embedding_method(self, config: ModelConfig) -> str:
728
+ return '''
729
+ @modal.method()
730
+ def embed_text(
731
+ self,
732
+ text: Union[str, List[str]]
733
+ ) -> Dict[str, Any]:
734
+ """Generate embeddings for text"""
735
+ start_time = time.time()
736
+ self.request_count += 1
737
+
738
+ try:
739
+ if not self.models_loaded or not self.model:
740
+ raise RuntimeError("Model not loaded")
741
+
742
+ # Generate embeddings
743
+ if hasattr(self.model, 'encode'):
744
+ embeddings = self.model.encode(text)
745
+ else:
746
+ inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
747
+ inputs = inputs.to("cuda")
748
+
749
+ import torch
750
+ with torch.no_grad():
751
+ outputs = self.model(**inputs)
752
+ embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
753
+
754
+ processing_time = time.time() - start_time
755
+
756
+ return {
757
+ 'success': True,
758
+ 'embeddings': embeddings.tolist(),
759
+ 'processing_time': processing_time,
760
+ 'model': self.model.config.name_or_path
761
+ }
762
+
763
+ except Exception as e:
764
+ return {
765
+ 'success': False,
766
+ 'error': str(e),
767
+ 'processing_time': time.time() - start_time
768
+ }'''
769
+
770
+ def _generic_inference_method(self, config: ModelConfig) -> str:
771
+ return '''
772
+ @modal.method()
773
+ def inference(
774
+ self,
775
+ input_data: str,
776
+ task: str = "generate",
777
+ **kwargs
778
+ ) -> Dict[str, Any]:
779
+ """Generic inference method"""
780
+ start_time = time.time()
781
+ self.request_count += 1
782
+
783
+ try:
784
+ if not self.models_loaded or not self.model:
785
+ raise RuntimeError("Model not loaded")
786
+
787
+ # Process input
788
+ if self.processor:
789
+ inputs = self.processor(input_data, return_tensors="pt")
790
+ else:
791
+ inputs = self.tokenizer(input_data, return_tensors="pt")
792
+
793
+ inputs = inputs.to("cuda")
794
+
795
+ # Generate response
796
+ import torch
797
+ with torch.no_grad():
798
+ outputs = self.model.generate(**inputs, **kwargs)
799
+
800
+ # Decode response
801
+ if self.processor:
802
+ response = self.processor.decode(outputs[0], skip_special_tokens=True)
803
+ else:
804
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
805
+
806
+ processing_time = time.time() - start_time
807
+
808
+ return {
809
+ 'success': True,
810
+ 'output': response,
811
+ 'processing_time': processing_time,
812
+ 'model': self.model.config.name_or_path
813
+ }
814
+
815
+ except Exception as e:
816
+ return {
817
+ 'success': False,
818
+ 'error': str(e),
819
+ 'processing_time': time.time() - start_time
820
+ }'''
821
+
822
+ def deploy_model(self, model_id: str, deploy: bool = False) -> Dict[str, Any]:
823
+ """
824
+ Analyze and optionally deploy a HuggingFace model to Modal
825
+
826
+ Args:
827
+ model_id: HuggingFace model ID
828
+ deploy: Whether to actually deploy to Modal
829
+
830
+ Returns:
831
+ Deployment result with service code
832
+ """
833
+ try:
834
+ # Analyze model
835
+ config = self.analyze_model(model_id)
836
+
837
+ # Generate Modal service code
838
+ service_code = self.generate_modal_service(config)
839
+
840
+ # Save service code to file
841
+ service_name = model_id.replace('/', '_').replace('-', '_').lower()
842
+ output_dir = Path("/Users/xenodennis/Documents/Fun/isA_Model/isa_model/deployment/cloud/modal")
843
+ output_file = output_dir / f"auto_{service_name}_service.py"
844
+
845
+ with open(output_file, 'w') as f:
846
+ f.write(service_code)
847
+
848
+ result = {
849
+ 'success': True,
850
+ 'model_id': model_id,
851
+ 'config': config.__dict__,
852
+ 'service_file': str(output_file),
853
+ 'service_code': service_code,
854
+ 'estimated_cost_per_hour': config.estimated_cost_per_hour,
855
+ 'deployment_command': f"modal deploy {output_file}",
856
+ 'deployed': False
857
+ }
858
+
859
+ # Optional: Actually deploy to Modal
860
+ if deploy:
861
+ try:
862
+ import subprocess
863
+ deployment_result = subprocess.run(
864
+ ['modal', 'deploy', str(output_file)],
865
+ capture_output=True,
866
+ text=True,
867
+ timeout=300
868
+ )
869
+
870
+ if deployment_result.returncode == 0:
871
+ result['deployed'] = True
872
+ result['deployment_output'] = deployment_result.stdout
873
+ else:
874
+ result['deployment_error'] = deployment_result.stderr
875
+
876
+ except Exception as e:
877
+ result['deployment_error'] = str(e)
878
+
879
+ return result
880
+
881
+ except Exception as e:
882
+ return {
883
+ 'success': False,
884
+ 'error': str(e),
885
+ 'model_id': model_id
886
+ }
887
+
888
+ # Example usage
889
+ if __name__ == "__main__":
890
+ deployer = HuggingFaceModalDeployer()
891
+
892
+ # Example: Deploy a text model
893
+ result = deployer.deploy_model("microsoft/DialoGPT-medium")
894
+ print(json.dumps(result, indent=2, default=str))