isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +937 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
  15. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  16. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
  17. isa_model/deployment/cloud/modal/register_models.py +321 -0
  18. isa_model/deployment/runtime/deployed_service.py +338 -0
  19. isa_model/deployment/services/__init__.py +9 -0
  20. isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
  21. isa_model/deployment/services/model_service.py +332 -0
  22. isa_model/deployment/services/service_monitor.py +356 -0
  23. isa_model/deployment/services/service_registry.py +527 -0
  24. isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
  25. isa_model/eval/__init__.py +80 -44
  26. isa_model/eval/config/__init__.py +10 -0
  27. isa_model/eval/config/evaluation_config.py +108 -0
  28. isa_model/eval/evaluators/__init__.py +18 -0
  29. isa_model/eval/evaluators/base_evaluator.py +503 -0
  30. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  31. isa_model/eval/factory.py +417 -709
  32. isa_model/eval/infrastructure/__init__.py +24 -0
  33. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  34. isa_model/eval/metrics.py +191 -21
  35. isa_model/inference/ai_factory.py +257 -601
  36. isa_model/inference/services/audio/base_stt_service.py +65 -1
  37. isa_model/inference/services/audio/base_tts_service.py +75 -1
  38. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  39. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  40. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  41. isa_model/inference/services/base_service.py +55 -17
  42. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  43. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  44. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  45. isa_model/inference/services/helpers/stacked_config.py +148 -0
  46. isa_model/inference/services/img/__init__.py +18 -0
  47. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
  48. isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
  49. isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
  50. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
  51. isa_model/inference/services/llm/__init__.py +3 -3
  52. isa_model/inference/services/llm/base_llm_service.py +492 -40
  53. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  54. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  55. isa_model/inference/services/llm/ollama_llm_service.py +51 -17
  56. isa_model/inference/services/llm/openai_llm_service.py +70 -19
  57. isa_model/inference/services/llm/yyds_llm_service.py +24 -23
  58. isa_model/inference/services/vision/__init__.py +38 -4
  59. isa_model/inference/services/vision/base_vision_service.py +218 -117
  60. isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
  61. isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
  62. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  63. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  64. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  65. isa_model/inference/services/vision/openai_vision_service.py +104 -307
  66. isa_model/inference/services/vision/replicate_vision_service.py +140 -325
  67. isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
  68. isa_model/scripts/register_models.py +370 -0
  69. isa_model/scripts/register_models_with_embeddings.py +510 -0
  70. isa_model/serving/api/fastapi_server.py +6 -1
  71. isa_model/serving/api/routes/unified.py +274 -0
  72. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
  73. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
  74. isa_model/config/__init__.py +0 -9
  75. isa_model/config/config_manager.py +0 -213
  76. isa_model/core/model_manager.py +0 -213
  77. isa_model/core/model_registry.py +0 -375
  78. isa_model/core/vision_models_init.py +0 -116
  79. isa_model/inference/billing_tracker.py +0 -406
  80. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  81. isa_model/inference/services/stacked/__init__.py +0 -26
  82. isa_model/inference/services/stacked/config.py +0 -426
  83. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  84. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  85. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  86. /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
  87. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
  88. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,291 @@
1
+ """
2
+ Unified Type Definitions for ISA Model SDK
3
+
4
+ This module contains all the common enums and type definitions used across
5
+ the entire SDK to ensure consistency and avoid duplication.
6
+ """
7
+
8
+ from enum import Enum
9
+ from typing import Dict, List, Optional, Any, Union
10
+ from dataclasses import dataclass
11
+ from datetime import datetime
12
+
13
+ # ===== MODEL TYPES =====
14
+
15
+ class ModelType(str, Enum):
16
+ """Types of models in the system"""
17
+ LLM = "llm"
18
+ EMBEDDING = "embedding"
19
+ RERANK = "rerank"
20
+ IMAGE = "image"
21
+ AUDIO = "audio"
22
+ VIDEO = "video"
23
+ VISION = "vision"
24
+ IMAGE_GEN = "image_gen" # Added for consistency
25
+
26
+ class ModelCapability(str, Enum):
27
+ """Model capabilities"""
28
+ TEXT_GENERATION = "text_generation"
29
+ CHAT = "chat"
30
+ EMBEDDING = "embedding"
31
+ RERANKING = "reranking"
32
+ REASONING = "reasoning"
33
+ IMAGE_GENERATION = "image_generation"
34
+ IMAGE_ANALYSIS = "image_analysis"
35
+ AUDIO_TRANSCRIPTION = "audio_transcription"
36
+ IMAGE_UNDERSTANDING = "image_understanding"
37
+ UI_DETECTION = "ui_detection"
38
+ OCR = "ocr"
39
+ TABLE_DETECTION = "table_detection"
40
+ TABLE_STRUCTURE_RECOGNITION = "table_structure_recognition"
41
+
42
+ class ModelStage(str, Enum):
43
+ """Model lifecycle stages"""
44
+ REGISTERED = "registered"
45
+ TRAINING = "training"
46
+ EVALUATION = "evaluation"
47
+ DEPLOYMENT = "deployment"
48
+ PRODUCTION = "production"
49
+ RETIRED = "retired"
50
+
51
+ # ===== SERVICE TYPES =====
52
+
53
+ class ServiceType(str, Enum):
54
+ """Types of services available in the platform"""
55
+ LLM = "llm"
56
+ EMBEDDING = "embedding"
57
+ VISION = "vision"
58
+ AUDIO = "audio"
59
+ IMAGE_GEN = "image_gen"
60
+
61
+ class ServiceStatus(str, Enum):
62
+ """Service deployment and health status"""
63
+ PENDING = "pending"
64
+ DEPLOYING = "deploying"
65
+ HEALTHY = "healthy"
66
+ UNHEALTHY = "unhealthy"
67
+ STOPPED = "stopped"
68
+
69
+ class DeploymentPlatform(str, Enum):
70
+ """Supported deployment platforms for self-owned services only"""
71
+ MODAL = "modal"
72
+ KUBERNETES = "kubernetes"
73
+ RUNPOD = "runpod"
74
+ YYDS = "yyds"
75
+ OLLAMA = "ollama" # Local deployment
76
+
77
+ # ===== OPERATION TYPES =====
78
+
79
+ class ModelOperationType(str, Enum):
80
+ """Types of model operations that incur costs"""
81
+ TRAINING = "training"
82
+ EVALUATION = "evaluation"
83
+ DEPLOYMENT = "deployment"
84
+ INFERENCE = "inference"
85
+ STORAGE = "storage"
86
+
87
+ class InferenceOperationType(str, Enum):
88
+ """Types of inference operations"""
89
+ CHAT = "chat"
90
+ COMPLETION = "completion"
91
+ EMBEDDING = "embedding"
92
+ IMAGE_GENERATION = "image_generation"
93
+ VISION_ANALYSIS = "vision_analysis"
94
+ AUDIO_TRANSCRIPTION = "audio_transcription"
95
+ AUDIO_GENERATION = "audio_generation"
96
+
97
+ # ===== ROUTING AND LOAD BALANCING =====
98
+
99
+ class RoutingStrategy(str, Enum):
100
+ """Routing strategies for distributing requests among model replicas"""
101
+ ROUND_ROBIN = "round_robin"
102
+ WEIGHTED_ROUND_ROBIN = "weighted_round_robin"
103
+ LEAST_CONNECTIONS = "least_connections"
104
+ RESPONSE_TIME = "response_time"
105
+ RANDOM = "random"
106
+ CONSISTENT_HASH = "consistent_hash"
107
+ DYNAMIC_LOAD_BALANCING = "dynamic_load_balancing"
108
+
109
+ # ===== EVALUATION AND TRAINING =====
110
+
111
+ class MetricType(str, Enum):
112
+ """Types of evaluation metrics"""
113
+ ACCURACY = "accuracy"
114
+ PRECISION = "precision"
115
+ RECALL = "recall"
116
+ F1_SCORE = "f1_score"
117
+ BLEU = "bleu"
118
+ ROUGE = "rouge"
119
+ PERPLEXITY = "perplexity"
120
+ BERTSCORE = "bertscore"
121
+ SEMANTIC_SIMILARITY = "semantic_similarity"
122
+
123
+ class AnnotationType(str, Enum):
124
+ """Types of training data annotations"""
125
+ TEXT_CLASSIFICATION = "text_classification"
126
+ NAMED_ENTITY_RECOGNITION = "named_entity_recognition"
127
+ QUESTION_ANSWERING = "question_answering"
128
+ SUMMARIZATION = "summarization"
129
+ TRANSLATION = "translation"
130
+ IMAGE_CLASSIFICATION = "image_classification"
131
+ OBJECT_DETECTION = "object_detection"
132
+ SEMANTIC_SEGMENTATION = "semantic_segmentation"
133
+
134
+ class DatasetType(str, Enum):
135
+ """Types of training datasets"""
136
+ TEXT = "text"
137
+ IMAGE = "image"
138
+ AUDIO = "audio"
139
+ VIDEO = "video"
140
+ MULTIMODAL = "multimodal"
141
+
142
+ class DatasetStatus(str, Enum):
143
+ """Status of training datasets"""
144
+ CREATED = "created"
145
+ UPLOADING = "uploading"
146
+ PROCESSING = "processing"
147
+ READY = "ready"
148
+ ERROR = "error"
149
+
150
+ class ExperimentType(str, Enum):
151
+ """Types of ML experiments"""
152
+ TRAINING = "training"
153
+ EVALUATION = "evaluation"
154
+ HYPERPARAMETER_TUNING = "hyperparameter_tuning"
155
+ MODEL_COMPARISON = "model_comparison"
156
+
157
+ # ===== STACKED SERVICES =====
158
+
159
+ class LayerType(Enum):
160
+ """Types of layers in stacked services"""
161
+ INPUT_PROCESSING = "input_processing"
162
+ MODEL_INFERENCE = "model_inference"
163
+ OUTPUT_PROCESSING = "output_processing"
164
+ VALIDATION = "validation"
165
+ CACHING = "caching"
166
+
167
+ class WorkflowType(Enum):
168
+ """Types of workflows"""
169
+ SEQUENTIAL = "sequential"
170
+ PARALLEL = "parallel"
171
+ CONDITIONAL = "conditional"
172
+ LOOP = "loop"
173
+
174
+ # ===== PROVIDER TYPES =====
175
+
176
+ class Provider(str, Enum):
177
+ """AI service providers"""
178
+ OPENAI = "openai"
179
+ REPLICATE = "replicate"
180
+ OLLAMA = "ollama"
181
+ ANTHROPIC = "anthropic"
182
+ GOOGLE = "google"
183
+ YYDS = "yyds"
184
+ MODAL = "modal"
185
+
186
+ # ===== DATA CLASSES =====
187
+
188
+ @dataclass
189
+ class HealthMetrics:
190
+ """Service health metrics"""
191
+ is_healthy: bool
192
+ response_time_ms: Optional[int] = None
193
+ status_code: Optional[int] = None
194
+ cpu_usage_percent: Optional[float] = None
195
+ memory_usage_mb: Optional[int] = None
196
+ gpu_usage_percent: Optional[float] = None
197
+ error_message: Optional[str] = None
198
+ checked_at: Optional[datetime] = None
199
+
200
+ @dataclass
201
+ class ServiceMetrics:
202
+ """Service runtime metrics"""
203
+ request_count: int = 0
204
+ total_processing_time_ms: int = 0
205
+ error_count: int = 0
206
+ total_cost_usd: float = 0.0
207
+ window_start: Optional[datetime] = None
208
+ window_end: Optional[datetime] = None
209
+
210
+ @dataclass
211
+ class ResourceRequirements:
212
+ """Service resource requirements"""
213
+ gpu_type: Optional[str] = None
214
+ memory_mb: Optional[int] = None
215
+ cpu_cores: Optional[int] = None
216
+ storage_gb: Optional[int] = None
217
+ min_replicas: int = 0
218
+ max_replicas: int = 1
219
+
220
+ @dataclass
221
+ class ModelInfo:
222
+ """Model information structure"""
223
+ model_id: str
224
+ model_type: ModelType
225
+ capabilities: List[ModelCapability]
226
+ stage: ModelStage
227
+ provider: str
228
+ provider_model_name: str
229
+ metadata: Dict[str, Any]
230
+ created_at: Optional[datetime] = None
231
+ updated_at: Optional[datetime] = None
232
+
233
+ @dataclass
234
+ class UsageData:
235
+ """Usage data for billing tracking"""
236
+ operation_type: ModelOperationType
237
+ inference_operation: Optional[InferenceOperationType] = None
238
+ input_tokens: Optional[int] = None
239
+ output_tokens: Optional[int] = None
240
+ input_units: Optional[float] = None
241
+ output_units: Optional[float] = None
242
+ metadata: Optional[Dict[str, Any]] = None
243
+
244
+ # ===== TYPE ALIASES =====
245
+
246
+ # Common type aliases for better readability
247
+ ModelID = str
248
+ ServiceID = str
249
+ DeploymentID = str
250
+ ProviderName = str
251
+ ModelName = str
252
+ EndpointURL = str
253
+ ConfigDict = Dict[str, Any]
254
+ MetadataDict = Dict[str, Any]
255
+
256
+ # ===== BACKWARD COMPATIBILITY =====
257
+
258
+ # Legacy aliases for backward compatibility during migration
259
+ # These should be removed once all modules are updated
260
+
261
+ # From inference/billing_tracker.py
262
+ class LegacyServiceType(Enum):
263
+ """Legacy service type - use ServiceType instead"""
264
+ LLM = "llm"
265
+ EMBEDDING = "embedding"
266
+ VISION = "vision"
267
+ IMAGE_GENERATION = "image_generation"
268
+ AUDIO_STT = "audio_stt"
269
+ AUDIO_TTS = "audio_tts"
270
+
271
+ # Migration mapping
272
+ LEGACY_SERVICE_TYPE_MAPPING = {
273
+ LegacyServiceType.LLM: ServiceType.LLM,
274
+ LegacyServiceType.EMBEDDING: ServiceType.EMBEDDING,
275
+ LegacyServiceType.VISION: ServiceType.VISION,
276
+ LegacyServiceType.IMAGE_GENERATION: ServiceType.IMAGE_GEN,
277
+ LegacyServiceType.AUDIO_STT: ServiceType.AUDIO,
278
+ LegacyServiceType.AUDIO_TTS: ServiceType.AUDIO,
279
+ }
280
+
281
+ def migrate_legacy_service_type(legacy_type: Union[LegacyServiceType, str]) -> ServiceType:
282
+ """Migrate legacy service type to new unified type"""
283
+ if isinstance(legacy_type, str):
284
+ # Try to find matching legacy enum
285
+ for legacy_enum in LegacyServiceType:
286
+ if legacy_enum.value == legacy_type:
287
+ return LEGACY_SERVICE_TYPE_MAPPING[legacy_enum]
288
+ # Fallback to direct mapping
289
+ return ServiceType(legacy_type)
290
+ else:
291
+ return LEGACY_SERVICE_TYPE_MAPPING.get(legacy_type, ServiceType.LLM)
@@ -26,11 +26,13 @@ from .core.deployment_config import (
26
26
  create_gemma_runpod_triton_config,
27
27
  create_local_triton_config
28
28
  )
29
+ from .services import AutoDeployVisionService
29
30
 
30
31
  __all__ = [
31
32
  # Main classes
32
33
  "DeploymentManager",
33
34
  "DeploymentConfig",
35
+ "AutoDeployVisionService",
34
36
 
35
37
  # Configuration classes
36
38
  "ModelConfig",
@@ -119,7 +119,7 @@ image = (
119
119
  image=image,
120
120
  memory=16384, # 16GB RAM
121
121
  timeout=1800, # 30 minutes
122
- scaledown_window=300, # 5 minutes idle timeout
122
+ scaledown_window=60, # 1 minute idle timeout
123
123
  min_containers=0, # Scale to zero when not in use
124
124
  )
125
125
  class DocumentAnalysisService:
@@ -605,8 +605,162 @@ class DocumentAnalysisService:
605
605
  image_data = base64.b64decode(image_b64)
606
606
  return Image.open(io.BytesIO(image_data)).convert('RGB')
607
607
 
608
- # Warmup function removed to save costs
608
+ # Auto-registration function
609
+ @app.function()
610
+ async def register_service():
611
+ """Auto-register this service in the model registry"""
612
+ try:
613
+ import sys
614
+ from pathlib import Path
615
+
616
+ # Add project root to path for imports
617
+ project_root = Path(__file__).parent.parent.parent.parent
618
+ sys.path.insert(0, str(project_root))
619
+
620
+ try:
621
+ from isa_model.core.model_manager import ModelManager
622
+ from isa_model.core.model_repo import ModelType, ModelCapability
623
+ from isa_model.core.service_registry import ServiceRegistry
624
+ from isa_model.core.types import ServiceType, DeploymentPlatform, ServiceStatus, ResourceRequirements
625
+ from isa_model.core.model_service import ModelService
626
+ except ImportError:
627
+ # Fallback if import fails in Modal environment
628
+ print("⚠️ Could not import required modules - registration skipped")
629
+ return {"success": False, "error": "Required modules not available"}
630
+
631
+ # Use ModelManager to register this service
632
+ model_manager = ModelManager()
633
+
634
+ # 1. First register the underlying model (backward compatibility)
635
+ model_success = model_manager.registry.register_model(
636
+ model_id="isa-vision-doc-service",
637
+ model_type=ModelType.VISION,
638
+ capabilities=[
639
+ ModelCapability.TABLE_DETECTION,
640
+ ModelCapability.TABLE_STRUCTURE_RECOGNITION,
641
+ ModelCapability.OCR,
642
+ ModelCapability.IMAGE_ANALYSIS
643
+ ],
644
+ metadata={
645
+ "description": "ISA Vision Document Analysis Service with table detection, structure recognition, and OCR",
646
+ "service_name": "isa-vision-doc",
647
+ "service_type": "modal",
648
+ "deployment_type": "modal",
649
+ "endpoint": "https://isa-vision-doc.modal.run",
650
+ "underlying_models": [
651
+ "microsoft/table-transformer-detection",
652
+ "microsoft/table-transformer-structure-recognition-v1.1-all",
653
+ "PaddleOCR 3.0"
654
+ ],
655
+ "gpu_requirement": "T4",
656
+ "memory_mb": 16384,
657
+ "auto_registered": True,
658
+ "registered_by": "isa_vision_doc_service.py",
659
+ "is_service": True, # Mark this as a service, not a raw model
660
+ "capabilities_details": {
661
+ "table_detection": "Microsoft Table Transformer Detection",
662
+ "table_structure": "Microsoft Table Transformer Structure Recognition v1.1",
663
+ "ocr": "PaddleOCR 3.0 with Chinese/English support"
664
+ }
665
+ }
666
+ )
667
+
668
+ # 2. Register as a deployed service in the ServiceRegistry (MaaS platform)
669
+ service_success = False
670
+ try:
671
+ service_registry = ServiceRegistry(model_manager.registry)
672
+
673
+ # Create ModelService instance
674
+ service = ModelService(
675
+ service_id="isa-vision-doc-modal-001",
676
+ service_name="isa_vision_doc",
677
+ model_id="isa-vision-doc-service",
678
+ deployment_platform=DeploymentPlatform.MODAL,
679
+ service_type=ServiceType.VISION,
680
+ status=ServiceStatus.HEALTHY,
681
+ inference_endpoint="https://isa-vision-doc.modal.run/analyze_document_complete",
682
+ health_endpoint="https://isa-vision-doc.modal.run/health_check",
683
+ capabilities=["table_detection", "table_structure_recognition", "ocr", "image_analysis"],
684
+ resource_requirements=ResourceRequirements(
685
+ gpu_type="T4",
686
+ memory_mb=16384,
687
+ cpu_cores=4,
688
+ min_replicas=0,
689
+ max_replicas=5
690
+ ),
691
+ metadata={
692
+ "description": "ISA Vision Document Analysis Service with table detection, structure recognition, and OCR",
693
+ "underlying_models": [
694
+ "microsoft/table-transformer-detection",
695
+ "microsoft/table-transformer-structure-recognition-v1.1-all",
696
+ "PaddleOCR 3.0"
697
+ ],
698
+ "auto_scaling": True,
699
+ "scale_to_zero": True,
700
+ "platform": "modal",
701
+ "registered_by": "isa_vision_doc_service.py"
702
+ }
703
+ )
704
+
705
+ # Register in ServiceRegistry
706
+ service_success = await service_registry.register_service(service)
707
+
708
+ if service_success:
709
+ print("✅ Service registered in MaaS platform ServiceRegistry")
710
+ else:
711
+ print("⚠️ ServiceRegistry registration failed")
712
+
713
+ except Exception as e:
714
+ print(f"⚠️ ServiceRegistry registration error: {e}")
715
+
716
+ if model_success:
717
+ print("✅ Model registry registration successful")
718
+ else:
719
+ print("⚠️ Model registry registration failed")
720
+
721
+ overall_success = model_success and service_success
722
+ return {
723
+ "success": overall_success,
724
+ "model_registry": model_success,
725
+ "service_registry": service_success
726
+ }
727
+
728
+ except Exception as e:
729
+ print(f"❌ Auto-registration error: {e}")
730
+ return {"success": False, "error": str(e)}
731
+
732
+ # Quick deployment function
733
+ @app.function()
734
+ def deploy_service():
735
+ """Deploy this service instantly"""
736
+ import subprocess
737
+ import os
738
+
739
+ print("🚀 Deploying ISA Vision Document Service...")
740
+ try:
741
+ # Get the current file path
742
+ current_file = __file__
743
+
744
+ # Run modal deploy command
745
+ result = subprocess.run(
746
+ ["modal", "deploy", current_file],
747
+ capture_output=True,
748
+ text=True,
749
+ check=True
750
+ )
751
+
752
+ print("✅ Deployment completed successfully!")
753
+ print(f"📝 Output: {result.stdout}")
754
+ return {"success": True, "output": result.stdout}
755
+
756
+ except subprocess.CalledProcessError as e:
757
+ print(f"❌ Deployment failed: {e}")
758
+ print(f"📝 Error: {e.stderr}")
759
+ return {"success": False, "error": str(e), "stderr": e.stderr}
609
760
 
610
761
  if __name__ == "__main__":
611
762
  print("🚀 ISA Vision Document Service - Modal Deployment")
612
- print("Deploy with: modal deploy isa_vision_doc_service.py")
763
+ print("Deploy with: modal deploy isa_vision_doc_service.py")
764
+ print("Or call: modal run isa_vision_doc_service.py::deploy_service")
765
+ print("Note: Requires T4 GPU and 16GB+ RAM for optimal performance")
766
+ print("\n📝 Service will auto-register in model registry upon deployment")