isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +937 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
  15. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  16. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
  17. isa_model/deployment/cloud/modal/register_models.py +321 -0
  18. isa_model/deployment/runtime/deployed_service.py +338 -0
  19. isa_model/deployment/services/__init__.py +9 -0
  20. isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
  21. isa_model/deployment/services/model_service.py +332 -0
  22. isa_model/deployment/services/service_monitor.py +356 -0
  23. isa_model/deployment/services/service_registry.py +527 -0
  24. isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
  25. isa_model/eval/__init__.py +80 -44
  26. isa_model/eval/config/__init__.py +10 -0
  27. isa_model/eval/config/evaluation_config.py +108 -0
  28. isa_model/eval/evaluators/__init__.py +18 -0
  29. isa_model/eval/evaluators/base_evaluator.py +503 -0
  30. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  31. isa_model/eval/factory.py +417 -709
  32. isa_model/eval/infrastructure/__init__.py +24 -0
  33. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  34. isa_model/eval/metrics.py +191 -21
  35. isa_model/inference/ai_factory.py +257 -601
  36. isa_model/inference/services/audio/base_stt_service.py +65 -1
  37. isa_model/inference/services/audio/base_tts_service.py +75 -1
  38. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  39. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  40. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  41. isa_model/inference/services/base_service.py +55 -17
  42. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  43. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  44. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  45. isa_model/inference/services/helpers/stacked_config.py +148 -0
  46. isa_model/inference/services/img/__init__.py +18 -0
  47. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
  48. isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
  49. isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
  50. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
  51. isa_model/inference/services/llm/__init__.py +3 -3
  52. isa_model/inference/services/llm/base_llm_service.py +492 -40
  53. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  54. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  55. isa_model/inference/services/llm/ollama_llm_service.py +51 -17
  56. isa_model/inference/services/llm/openai_llm_service.py +70 -19
  57. isa_model/inference/services/llm/yyds_llm_service.py +24 -23
  58. isa_model/inference/services/vision/__init__.py +38 -4
  59. isa_model/inference/services/vision/base_vision_service.py +218 -117
  60. isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
  61. isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
  62. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  63. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  64. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  65. isa_model/inference/services/vision/openai_vision_service.py +104 -307
  66. isa_model/inference/services/vision/replicate_vision_service.py +140 -325
  67. isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
  68. isa_model/scripts/register_models.py +370 -0
  69. isa_model/scripts/register_models_with_embeddings.py +510 -0
  70. isa_model/serving/api/fastapi_server.py +6 -1
  71. isa_model/serving/api/routes/unified.py +274 -0
  72. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
  73. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
  74. isa_model/config/__init__.py +0 -9
  75. isa_model/config/config_manager.py +0 -213
  76. isa_model/core/model_manager.py +0 -213
  77. isa_model/core/model_registry.py +0 -375
  78. isa_model/core/vision_models_init.py +0 -116
  79. isa_model/inference/billing_tracker.py +0 -406
  80. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  81. isa_model/inference/services/stacked/__init__.py +0 -26
  82. isa_model/inference/services/stacked/config.py +0 -426
  83. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  84. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  85. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  86. /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
  87. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
  88. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,532 @@
1
+ """
2
+ Qwen2.5-VL-32B Table Data Extraction Service
3
+
4
+ Specialized service for table data extraction using Qwen2.5-VL-32B-Instruct-AWQ
5
+ """
6
+
7
+ import modal
8
+ import torch
9
+ import base64
10
+ import io
11
+ import numpy as np
12
+ from PIL import Image
13
+ from typing import Dict, List, Optional, Any
14
+ import time
15
+ import json
16
+ import os
17
+ import logging
18
+
19
+ # Define Modal application
20
+ app = modal.App("qwen-vision-table")
21
+
22
+ # Download Qwen2.5-VL model
23
+ def download_qwen_model():
24
+ """Download Qwen2.5-VL-32B-Instruct-AWQ model"""
25
+ from huggingface_hub import snapshot_download
26
+
27
+ print("📦 Downloading Qwen2.5-VL-32B-Instruct-AWQ...")
28
+ os.makedirs("/models", exist_ok=True)
29
+
30
+ try:
31
+ snapshot_download(
32
+ repo_id="Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
33
+ local_dir="/models/qwen2.5-vl-32b-awq",
34
+ allow_patterns=["**/*.safetensors", "**/*.json", "**/*.py", "**/*.txt"],
35
+ # Use auth token if needed for gated models
36
+ # token=os.getenv("HF_TOKEN")
37
+ )
38
+ print("✅ Qwen2.5-VL-32B-Instruct-AWQ downloaded")
39
+ except Exception as e:
40
+ print(f"⚠️ Model download failed: {e}")
41
+ raise
42
+
43
+ print("📦 Model download completed")
44
+
45
+ # Define Modal container image with AWQ support
46
+ image = (
47
+ modal.Image.debian_slim(python_version="3.11")
48
+ .pip_install([
49
+ # Core AI libraries with AWQ support
50
+ "torch>=2.1.0",
51
+ "torchvision",
52
+ "transformers>=4.37.0",
53
+ "accelerate>=0.26.0",
54
+ "autoawq>=0.2.0", # AWQ quantization support
55
+ "huggingface_hub",
56
+
57
+ # Qwen-VL specific dependencies
58
+ "qwen-vl-utils", # If available
59
+ "tiktoken",
60
+ "einops",
61
+ "timm",
62
+
63
+ # Image processing
64
+ "pillow>=10.0.1",
65
+ "opencv-python-headless",
66
+ "numpy>=1.24.3",
67
+
68
+ # HTTP libraries
69
+ "httpx>=0.26.0",
70
+ "requests",
71
+
72
+ # Utilities
73
+ "pydantic>=2.0.0",
74
+ "python-dotenv",
75
+ ])
76
+ .run_function(download_qwen_model)
77
+ .env({
78
+ "TRANSFORMERS_CACHE": "/models",
79
+ "HF_HOME": "/models",
80
+ "TORCH_HOME": "/models",
81
+ })
82
+ )
83
+
84
+ # Table Extraction Service
85
+ @app.cls(
86
+ gpu="A100", # A100 recommended for 32B model, H100 if available
87
+ image=image,
88
+ memory=32768, # 32GB RAM for 32B model
89
+ timeout=3600, # 1 hour timeout
90
+ scaledown_window=60, # 1 minute idle timeout
91
+ min_containers=0, # Scale to zero to save costs
92
+ # secrets=[modal.Secret.from_name("huggingface-token")] # If needed
93
+ )
94
+ class QwenTableExtractionService:
95
+ """
96
+ Table Data Extraction Service using Qwen2.5-VL-32B-Instruct-AWQ
97
+
98
+ Provides high-accuracy table extraction from images
99
+ """
100
+
101
+ @modal.enter()
102
+ def load_model(self):
103
+ """Load Qwen2.5-VL model on container startup"""
104
+ print("🚀 Loading Qwen2.5-VL-32B-Instruct-AWQ...")
105
+ start_time = time.time()
106
+
107
+ # Initialize attributes
108
+ self.model = None
109
+ self.processor = None
110
+ self.logger = logging.getLogger(__name__)
111
+
112
+ try:
113
+ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
114
+
115
+ model_path = "Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
116
+
117
+ # Load processor
118
+ print("📱 Loading processor...")
119
+ self.processor = AutoProcessor.from_pretrained(
120
+ model_path,
121
+ trust_remote_code=True
122
+ )
123
+
124
+ # Load model with AWQ quantization
125
+ print("🧠 Loading AWQ quantized model...")
126
+ self.model = Qwen2VLForConditionalGeneration.from_pretrained(
127
+ model_path,
128
+ torch_dtype=torch.float16,
129
+ device_map="auto",
130
+ trust_remote_code=True,
131
+ # AWQ specific settings
132
+ use_safetensors=True,
133
+ )
134
+
135
+ # Try to import qwen-vl-utils
136
+ try:
137
+ from qwen_vl_utils import process_vision_info as qwen_process_vision_info
138
+ print("✅ qwen-vl-utils imported successfully")
139
+ # Use the official process_vision_info if available
140
+ globals()['process_vision_info'] = qwen_process_vision_info
141
+ except ImportError:
142
+ print("⚠️ qwen-vl-utils not found, using custom implementation")
143
+
144
+ # Set to evaluation mode
145
+ self.model.eval()
146
+
147
+ load_time = time.time() - start_time
148
+ print(f"✅ Qwen2.5-VL model loaded in {load_time:.2f}s")
149
+
150
+ except Exception as e:
151
+ print(f"❌ Model loading failed: {e}")
152
+ raise
153
+
154
+ @modal.method()
155
+ def extract_table_data(
156
+ self,
157
+ image_b64: str,
158
+ extraction_format: str = "markdown",
159
+ custom_prompt: Optional[str] = None
160
+ ) -> Dict[str, Any]:
161
+ """
162
+ Extract table data from image
163
+
164
+ Args:
165
+ image_b64: Base64 encoded image
166
+ extraction_format: Output format ("markdown", "json", "csv", "html")
167
+ custom_prompt: Custom extraction prompt
168
+
169
+ Returns:
170
+ Extracted table data and metadata
171
+ """
172
+ start_time = time.time()
173
+
174
+ try:
175
+ # Decode image
176
+ image = self._decode_image(image_b64)
177
+
178
+ # Prepare prompt based on format
179
+ if custom_prompt:
180
+ prompt = custom_prompt
181
+ else:
182
+ prompt = self._get_extraction_prompt(extraction_format)
183
+
184
+ # Process inputs
185
+ messages = [
186
+ {
187
+ "role": "user",
188
+ "content": [
189
+ {"type": "image", "image": image},
190
+ {"type": "text", "text": prompt}
191
+ ]
192
+ }
193
+ ]
194
+
195
+ # Prepare inputs for the model
196
+ text = self.processor.apply_chat_template(
197
+ messages, tokenize=False, add_generation_prompt=True
198
+ )
199
+
200
+ image_inputs, video_inputs = process_vision_info(messages)
201
+ inputs = self.processor(
202
+ text=[text],
203
+ images=image_inputs,
204
+ videos=video_inputs,
205
+ padding=True,
206
+ return_tensors="pt"
207
+ )
208
+ inputs = inputs.to("cuda")
209
+
210
+ # Generate response
211
+ with torch.no_grad():
212
+ generated_ids = self.model.generate(
213
+ **inputs,
214
+ max_new_tokens=2048,
215
+ do_sample=False,
216
+ temperature=0.0, # Deterministic for table extraction
217
+ pad_token_id=self.processor.tokenizer.eos_token_id
218
+ )
219
+
220
+ # Decode response
221
+ generated_ids_trimmed = [
222
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
223
+ ]
224
+ output_text = self.processor.batch_decode(
225
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
226
+ )[0]
227
+
228
+ processing_time = time.time() - start_time
229
+
230
+ # Post-process extracted data
231
+ processed_data = self._post_process_extraction(output_text, extraction_format)
232
+
233
+ return {
234
+ 'success': True,
235
+ 'service': 'qwen-vision-table',
236
+ 'extracted_data': processed_data,
237
+ 'raw_output': output_text,
238
+ 'format': extraction_format,
239
+ 'processing_time': processing_time,
240
+ 'model_info': {
241
+ 'model': 'Qwen2.5-VL-32B-Instruct-AWQ',
242
+ 'gpu': 'A100',
243
+ 'quantization': 'AWQ',
244
+ 'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
245
+ }
246
+ }
247
+
248
+ except Exception as e:
249
+ self.logger.error(f"Table extraction failed: {e}")
250
+ return {
251
+ 'success': False,
252
+ 'service': 'qwen-vision-table',
253
+ 'error': str(e),
254
+ 'processing_time': time.time() - start_time
255
+ }
256
+
257
+ def _get_extraction_prompt(self, format_type: str) -> str:
258
+ """Get extraction prompt based on desired format"""
259
+ base_prompt = "Please extract all the data from this table accurately."
260
+
261
+ format_prompts = {
262
+ "markdown": f"{base_prompt} Format the output as a markdown table with proper alignment.",
263
+ "json": f"{base_prompt} Format the output as a JSON array where each row is an object with column headers as keys.",
264
+ "csv": f"{base_prompt} Format the output as CSV with comma-separated values. Include headers in the first row.",
265
+ "html": f"{base_prompt} Format the output as an HTML table with proper <table>, <tr>, <td>, and <th> tags.",
266
+ }
267
+
268
+ return format_prompts.get(format_type, base_prompt)
269
+
270
+ def _post_process_extraction(self, raw_output: str, format_type: str) -> Dict[str, Any]:
271
+ """Post-process extracted table data"""
272
+ try:
273
+ if format_type == "json":
274
+ # Try to parse JSON
275
+ import json
276
+ try:
277
+ # Extract JSON from the output if it's wrapped in text
278
+ start_idx = raw_output.find('[')
279
+ end_idx = raw_output.rfind(']') + 1
280
+ if start_idx != -1 and end_idx != 0:
281
+ json_str = raw_output[start_idx:end_idx]
282
+ parsed_data = json.loads(json_str)
283
+ return {"structured_data": parsed_data, "raw_text": raw_output}
284
+ except json.JSONDecodeError:
285
+ pass
286
+
287
+ elif format_type == "csv":
288
+ # Parse CSV-like output
289
+ lines = raw_output.strip().split('\n')
290
+ csv_data = [line.split(',') for line in lines if line.strip()]
291
+ return {"structured_data": csv_data, "raw_text": raw_output}
292
+
293
+ # For markdown, html, or unparseable formats, return as text
294
+ return {"structured_data": raw_output, "raw_text": raw_output}
295
+
296
+ except Exception as e:
297
+ self.logger.warning(f"Post-processing failed: {e}")
298
+ return {"structured_data": raw_output, "raw_text": raw_output}
299
+
300
+ @modal.method()
301
+ def batch_extract_tables(self, images_b64: List[str], extraction_format: str = "markdown") -> Dict[str, Any]:
302
+ """
303
+ Extract tables from multiple images
304
+
305
+ Args:
306
+ images_b64: List of base64 encoded images
307
+ extraction_format: Output format for all extractions
308
+
309
+ Returns:
310
+ Batch extraction results
311
+ """
312
+ start_time = time.time()
313
+ results = []
314
+
315
+ for i, image_b64 in enumerate(images_b64):
316
+ try:
317
+ result = self.extract_table_data(image_b64, extraction_format)
318
+ result['image_index'] = i
319
+ results.append(result)
320
+ except Exception as e:
321
+ results.append({
322
+ 'success': False,
323
+ 'image_index': i,
324
+ 'error': str(e)
325
+ })
326
+
327
+ return {
328
+ 'success': True,
329
+ 'service': 'qwen-vision-table',
330
+ 'batch_results': results,
331
+ 'total_images': len(images_b64),
332
+ 'successful_extractions': sum(1 for r in results if r.get('success', False)),
333
+ 'total_processing_time': time.time() - start_time
334
+ }
335
+
336
+ @modal.method()
337
+ def health_check(self) -> Dict[str, Any]:
338
+ """Health check endpoint"""
339
+ return {
340
+ 'status': 'healthy',
341
+ 'service': 'qwen-vision-table',
342
+ 'model': 'Qwen2.5-VL-32B-Instruct-AWQ',
343
+ 'model_loaded': self.model is not None,
344
+ 'processor_loaded': self.processor is not None,
345
+ 'timestamp': time.time(),
346
+ 'gpu': 'A100'
347
+ }
348
+
349
+ def _decode_image(self, image_b64: str) -> Image.Image:
350
+ """Decode base64 image"""
351
+ try:
352
+ if image_b64.startswith('data:image'):
353
+ image_b64 = image_b64.split(',')[1]
354
+
355
+ image_data = base64.b64decode(image_b64)
356
+ return Image.open(io.BytesIO(image_data)).convert('RGB')
357
+ except Exception as e:
358
+ raise ValueError(f"Failed to decode image: {e}")
359
+
360
+ # Helper function for vision processing
361
+ def process_vision_info(messages):
362
+ """Process vision information from messages"""
363
+ image_inputs = []
364
+ video_inputs = []
365
+
366
+ for message in messages:
367
+ if isinstance(message.get("content"), list):
368
+ for content in message["content"]:
369
+ if content.get("type") == "image":
370
+ image_inputs.append(content["image"])
371
+ elif content.get("type") == "video":
372
+ video_inputs.append(content["video"])
373
+
374
+ return image_inputs, video_inputs
375
+
376
+ # Deployment script
377
+ @app.function()
378
+ def deploy_info():
379
+ """Deployment information"""
380
+ return {
381
+ "service": "Qwen2.5-VL-32B Table Extraction",
382
+ "model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
383
+ "gpu_requirement": "A100 (minimum), H100 (recommended)",
384
+ "memory_requirement": "32GB+",
385
+ "deploy_command": "modal deploy qwen_table_extraction.py"
386
+ }
387
+
388
+ # Auto-registration function
389
+ @app.function()
390
+ async def register_service():
391
+ """Auto-register this service in the model registry"""
392
+ try:
393
+ import sys
394
+ from pathlib import Path
395
+
396
+ # Add project root to path for imports
397
+ project_root = Path(__file__).parent.parent.parent.parent
398
+ sys.path.insert(0, str(project_root))
399
+
400
+ try:
401
+ from isa_model.core.model_manager import ModelManager
402
+ from isa_model.core.model_repo import ModelType, ModelCapability
403
+ from isa_model.core.service_registry import ServiceRegistry
404
+ from isa_model.core.types import ServiceType, DeploymentPlatform, ServiceStatus, ResourceRequirements
405
+ from isa_model.core.model_service import ModelService
406
+ except ImportError:
407
+ # Fallback if import fails in Modal environment
408
+ print("⚠️ Could not import required modules - registration skipped")
409
+ return {"success": False, "error": "Required modules not available"}
410
+
411
+ # Use ModelManager to register this service
412
+ model_manager = ModelManager()
413
+
414
+ # 1. First register the underlying model (backward compatibility)
415
+ model_success = model_manager.registry.register_model(
416
+ model_id="qwen2.5-vl-32b-table-service",
417
+ model_type=ModelType.VISION,
418
+ capabilities=[
419
+ ModelCapability.TABLE_DETECTION,
420
+ ModelCapability.TABLE_STRUCTURE_RECOGNITION,
421
+ ModelCapability.OCR,
422
+ ModelCapability.IMAGE_ANALYSIS
423
+ ],
424
+ metadata={
425
+ "description": "Qwen2.5-VL-32B table extraction service",
426
+ "service_name": "qwen-vision-table",
427
+ "service_type": "modal",
428
+ "deployment_type": "modal",
429
+ "endpoint": "https://qwen-vision-table.modal.run",
430
+ "underlying_model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
431
+ "gpu_requirement": "A100",
432
+ "memory_mb": 32768,
433
+ "auto_registered": True,
434
+ "registered_by": "isa_vision_table_service.py",
435
+ "is_service": True # Mark this as a service, not a raw model
436
+ }
437
+ )
438
+
439
+ # 2. Register as a deployed service in the ServiceRegistry (MaaS platform)
440
+ service_success = False
441
+ try:
442
+ service_registry = ServiceRegistry(model_manager.registry)
443
+
444
+ # Create ModelService instance
445
+ service = ModelService(
446
+ service_id="qwen-table-modal-001",
447
+ service_name="isa_vision_table",
448
+ model_id="qwen2.5-vl-32b-table-service",
449
+ deployment_platform=DeploymentPlatform.MODAL,
450
+ service_type=ServiceType.VISION,
451
+ status=ServiceStatus.HEALTHY,
452
+ inference_endpoint="https://qwen-vision-table.modal.run/extract_table_data",
453
+ health_endpoint="https://qwen-vision-table.modal.run/health_check",
454
+ capabilities=["table_detection", "table_structure_recognition", "ocr", "image_analysis"],
455
+ resource_requirements=ResourceRequirements(
456
+ gpu_type="A100",
457
+ memory_mb=32768,
458
+ cpu_cores=8,
459
+ min_replicas=0,
460
+ max_replicas=3
461
+ ),
462
+ metadata={
463
+ "description": "Qwen2.5-VL-32B table extraction service",
464
+ "underlying_model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
465
+ "auto_scaling": True,
466
+ "scale_to_zero": True,
467
+ "platform": "modal",
468
+ "registered_by": "isa_vision_table_service.py"
469
+ }
470
+ )
471
+
472
+ # Register in ServiceRegistry
473
+ service_success = await service_registry.register_service(service)
474
+
475
+ if service_success:
476
+ print("✅ Service registered in MaaS platform ServiceRegistry")
477
+ else:
478
+ print("⚠️ ServiceRegistry registration failed")
479
+
480
+ except Exception as e:
481
+ print(f"⚠️ ServiceRegistry registration error: {e}")
482
+
483
+ if model_success:
484
+ print("✅ Model registry registration successful")
485
+ else:
486
+ print("⚠️ Model registry registration failed")
487
+
488
+ overall_success = model_success and service_success
489
+ return {
490
+ "success": overall_success,
491
+ "model_registry": model_success,
492
+ "service_registry": service_success
493
+ }
494
+
495
+ except Exception as e:
496
+ print(f"❌ Auto-registration error: {e}")
497
+ return {"success": False, "error": str(e)}
498
+
499
+ # Quick deployment function
500
+ @app.function()
501
+ def deploy_service():
502
+ """Deploy this service instantly"""
503
+ import subprocess
504
+
505
+ print("🚀 Deploying Qwen2.5-VL Table Extraction Service...")
506
+ try:
507
+ # Get the current file path
508
+ current_file = __file__
509
+
510
+ # Run modal deploy command
511
+ result = subprocess.run(
512
+ ["modal", "deploy", current_file],
513
+ capture_output=True,
514
+ text=True,
515
+ check=True
516
+ )
517
+
518
+ print("✅ Deployment completed successfully!")
519
+ print(f"📝 Output: {result.stdout}")
520
+ return {"success": True, "output": result.stdout}
521
+
522
+ except subprocess.CalledProcessError as e:
523
+ print(f"❌ Deployment failed: {e}")
524
+ print(f"📝 Error: {e.stderr}")
525
+ return {"success": False, "error": str(e), "stderr": e.stderr}
526
+
527
+ if __name__ == "__main__":
528
+ print("🚀 Qwen2.5-VL Table Extraction Service - Modal Deployment")
529
+ print("Deploy with: modal deploy isa_vision_table_service.py")
530
+ print("Or call: modal run isa_vision_table_service.py::deploy_service")
531
+ print("Note: Requires A100 GPU and 32GB+ RAM for optimal performance")
532
+ print("\n📝 Service will auto-register in model registry upon deployment")
@@ -84,7 +84,7 @@ image = (
84
84
  image=image,
85
85
  memory=16384, # 16GB RAM
86
86
  timeout=1800, # 30 minutes
87
- scaledown_window=300, # 5 minutes idle timeout
87
+ scaledown_window=60, # 1 minute idle timeout
88
88
  min_containers=0, # Scale to zero to save costs
89
89
  )
90
90
  class UIDetectionService:
@@ -298,8 +298,109 @@ class UIDetectionService:
298
298
  image_data = base64.b64decode(image_b64)
299
299
  return Image.open(io.BytesIO(image_data)).convert('RGB')
300
300
 
301
- # Warmup function removed to save costs
301
+ # Auto-registration function
302
+ @app.function()
303
+ async def register_service():
304
+ """Auto-register this service in the model registry"""
305
+ try:
306
+ import sys
307
+ from pathlib import Path
308
+
309
+ # Add project root to path for imports
310
+ project_root = Path(__file__).parent.parent.parent.parent
311
+ sys.path.insert(0, str(project_root))
312
+
313
+ try:
314
+ from isa_model.core.model_manager import ModelManager
315
+ from isa_model.core.model_repo import ModelType, ModelCapability
316
+ except ImportError:
317
+ # Fallback if import fails in Modal environment
318
+ print("⚠️ Could not import model manager - registration skipped")
319
+ return {"success": False, "error": "Model manager not available"}
320
+
321
+ # Use ModelManager to register this service
322
+ model_manager = ModelManager()
323
+
324
+ # Register the service in the registry
325
+ success = model_manager.registry.register_model(
326
+ model_id="omniparser-ui-detection-service",
327
+ model_type=ModelType.VISION,
328
+ capabilities=[
329
+ ModelCapability.UI_DETECTION,
330
+ ModelCapability.IMAGE_ANALYSIS,
331
+ ModelCapability.IMAGE_UNDERSTANDING
332
+ ],
333
+ metadata={
334
+ "description": "UI element detection service using OmniParser v2.0",
335
+ "service_name": "isa-vision-ui",
336
+ "service_type": "modal",
337
+ "deployment_type": "modal",
338
+ "endpoint": "https://isa-vision-ui.modal.run",
339
+ "underlying_model": "microsoft/OmniParser-v2.0",
340
+ "fallback_model": "ultralytics/yolov8",
341
+ "gpu_requirement": "T4",
342
+ "memory_mb": 16384,
343
+ "auto_registered": True,
344
+ "registered_by": "isa_vision_ui_service.py",
345
+ "is_service": True
346
+ }
347
+ )
348
+
349
+ if success:
350
+ print("✅ UI service auto-registered successfully")
351
+ else:
352
+ print("⚠️ UI service registration failed")
353
+
354
+ return {"success": success}
355
+
356
+ except Exception as e:
357
+ print(f"❌ Auto-registration error: {e}")
358
+ return {"success": False, "error": str(e)}
359
+
360
+ # Deployment script
361
+ @app.function()
362
+ def deploy_info():
363
+ """Deployment information"""
364
+ return {
365
+ "service": "ISA Vision UI Detection",
366
+ "model": "microsoft/OmniParser-v2.0 + ultralytics/yolov8 (fallback)",
367
+ "gpu_requirement": "T4",
368
+ "memory_requirement": "16GB",
369
+ "deploy_command": "modal deploy isa_vision_ui_service.py"
370
+ }
371
+
372
+ # Quick deployment function
373
+ @app.function()
374
+ def deploy_service():
375
+ """Deploy this service instantly"""
376
+ import subprocess
377
+ import os
378
+
379
+ print("🚀 Deploying ISA Vision UI Service...")
380
+ try:
381
+ # Get the current file path
382
+ current_file = __file__
383
+
384
+ # Run modal deploy command
385
+ result = subprocess.run(
386
+ ["modal", "deploy", current_file],
387
+ capture_output=True,
388
+ text=True,
389
+ check=True
390
+ )
391
+
392
+ print("✅ Deployment completed successfully!")
393
+ print(f"📝 Output: {result.stdout}")
394
+ return {"success": True, "output": result.stdout}
395
+
396
+ except subprocess.CalledProcessError as e:
397
+ print(f"❌ Deployment failed: {e}")
398
+ print(f"📝 Error: {e.stderr}")
399
+ return {"success": False, "error": str(e), "stderr": e.stderr}
302
400
 
303
401
  if __name__ == "__main__":
304
402
  print("🚀 ISA Vision UI Service - Modal Deployment")
305
- print("Deploy with: modal deploy isa_vision_ui_service.py")
403
+ print("Deploy with: modal deploy isa_vision_ui_service.py")
404
+ print("Or call: modal run isa_vision_ui_service.py::deploy_service")
405
+ print("Note: Uses OmniParser v2.0 with YOLOv8 fallback")
406
+ print("\n📝 Service will auto-register in model registry upon deployment")