isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +937 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +257 -601
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -17
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
- isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
- isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +492 -40
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +51 -17
- isa_model/inference/services/llm/openai_llm_service.py +70 -19
- isa_model/inference/services/llm/yyds_llm_service.py +24 -23
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +218 -117
- isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
- isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +104 -307
- isa_model/inference/services/vision/replicate_vision_service.py +140 -325
- isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/api/fastapi_server.py +6 -1
- isa_model/serving/api/routes/unified.py +274 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
- isa_model/config/__init__.py +0 -9
- isa_model/config/config_manager.py +0 -213
- isa_model/core/model_manager.py +0 -213
- isa_model/core/model_registry.py +0 -375
- isa_model/core/vision_models_init.py +0 -116
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/stacked/__init__.py +0 -26
- isa_model/inference/services/stacked/config.py +0 -426
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
isa_model/core/types.py
ADDED
@@ -0,0 +1,291 @@
|
|
1
|
+
"""
|
2
|
+
Unified Type Definitions for ISA Model SDK
|
3
|
+
|
4
|
+
This module contains all the common enums and type definitions used across
|
5
|
+
the entire SDK to ensure consistency and avoid duplication.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from enum import Enum
|
9
|
+
from typing import Dict, List, Optional, Any, Union
|
10
|
+
from dataclasses import dataclass
|
11
|
+
from datetime import datetime
|
12
|
+
|
13
|
+
# ===== MODEL TYPES =====
|
14
|
+
|
15
|
+
class ModelType(str, Enum):
|
16
|
+
"""Types of models in the system"""
|
17
|
+
LLM = "llm"
|
18
|
+
EMBEDDING = "embedding"
|
19
|
+
RERANK = "rerank"
|
20
|
+
IMAGE = "image"
|
21
|
+
AUDIO = "audio"
|
22
|
+
VIDEO = "video"
|
23
|
+
VISION = "vision"
|
24
|
+
IMAGE_GEN = "image_gen" # Added for consistency
|
25
|
+
|
26
|
+
class ModelCapability(str, Enum):
|
27
|
+
"""Model capabilities"""
|
28
|
+
TEXT_GENERATION = "text_generation"
|
29
|
+
CHAT = "chat"
|
30
|
+
EMBEDDING = "embedding"
|
31
|
+
RERANKING = "reranking"
|
32
|
+
REASONING = "reasoning"
|
33
|
+
IMAGE_GENERATION = "image_generation"
|
34
|
+
IMAGE_ANALYSIS = "image_analysis"
|
35
|
+
AUDIO_TRANSCRIPTION = "audio_transcription"
|
36
|
+
IMAGE_UNDERSTANDING = "image_understanding"
|
37
|
+
UI_DETECTION = "ui_detection"
|
38
|
+
OCR = "ocr"
|
39
|
+
TABLE_DETECTION = "table_detection"
|
40
|
+
TABLE_STRUCTURE_RECOGNITION = "table_structure_recognition"
|
41
|
+
|
42
|
+
class ModelStage(str, Enum):
|
43
|
+
"""Model lifecycle stages"""
|
44
|
+
REGISTERED = "registered"
|
45
|
+
TRAINING = "training"
|
46
|
+
EVALUATION = "evaluation"
|
47
|
+
DEPLOYMENT = "deployment"
|
48
|
+
PRODUCTION = "production"
|
49
|
+
RETIRED = "retired"
|
50
|
+
|
51
|
+
# ===== SERVICE TYPES =====
|
52
|
+
|
53
|
+
class ServiceType(str, Enum):
|
54
|
+
"""Types of services available in the platform"""
|
55
|
+
LLM = "llm"
|
56
|
+
EMBEDDING = "embedding"
|
57
|
+
VISION = "vision"
|
58
|
+
AUDIO = "audio"
|
59
|
+
IMAGE_GEN = "image_gen"
|
60
|
+
|
61
|
+
class ServiceStatus(str, Enum):
|
62
|
+
"""Service deployment and health status"""
|
63
|
+
PENDING = "pending"
|
64
|
+
DEPLOYING = "deploying"
|
65
|
+
HEALTHY = "healthy"
|
66
|
+
UNHEALTHY = "unhealthy"
|
67
|
+
STOPPED = "stopped"
|
68
|
+
|
69
|
+
class DeploymentPlatform(str, Enum):
|
70
|
+
"""Supported deployment platforms for self-owned services only"""
|
71
|
+
MODAL = "modal"
|
72
|
+
KUBERNETES = "kubernetes"
|
73
|
+
RUNPOD = "runpod"
|
74
|
+
YYDS = "yyds"
|
75
|
+
OLLAMA = "ollama" # Local deployment
|
76
|
+
|
77
|
+
# ===== OPERATION TYPES =====
|
78
|
+
|
79
|
+
class ModelOperationType(str, Enum):
|
80
|
+
"""Types of model operations that incur costs"""
|
81
|
+
TRAINING = "training"
|
82
|
+
EVALUATION = "evaluation"
|
83
|
+
DEPLOYMENT = "deployment"
|
84
|
+
INFERENCE = "inference"
|
85
|
+
STORAGE = "storage"
|
86
|
+
|
87
|
+
class InferenceOperationType(str, Enum):
|
88
|
+
"""Types of inference operations"""
|
89
|
+
CHAT = "chat"
|
90
|
+
COMPLETION = "completion"
|
91
|
+
EMBEDDING = "embedding"
|
92
|
+
IMAGE_GENERATION = "image_generation"
|
93
|
+
VISION_ANALYSIS = "vision_analysis"
|
94
|
+
AUDIO_TRANSCRIPTION = "audio_transcription"
|
95
|
+
AUDIO_GENERATION = "audio_generation"
|
96
|
+
|
97
|
+
# ===== ROUTING AND LOAD BALANCING =====
|
98
|
+
|
99
|
+
class RoutingStrategy(str, Enum):
|
100
|
+
"""Routing strategies for distributing requests among model replicas"""
|
101
|
+
ROUND_ROBIN = "round_robin"
|
102
|
+
WEIGHTED_ROUND_ROBIN = "weighted_round_robin"
|
103
|
+
LEAST_CONNECTIONS = "least_connections"
|
104
|
+
RESPONSE_TIME = "response_time"
|
105
|
+
RANDOM = "random"
|
106
|
+
CONSISTENT_HASH = "consistent_hash"
|
107
|
+
DYNAMIC_LOAD_BALANCING = "dynamic_load_balancing"
|
108
|
+
|
109
|
+
# ===== EVALUATION AND TRAINING =====
|
110
|
+
|
111
|
+
class MetricType(str, Enum):
|
112
|
+
"""Types of evaluation metrics"""
|
113
|
+
ACCURACY = "accuracy"
|
114
|
+
PRECISION = "precision"
|
115
|
+
RECALL = "recall"
|
116
|
+
F1_SCORE = "f1_score"
|
117
|
+
BLEU = "bleu"
|
118
|
+
ROUGE = "rouge"
|
119
|
+
PERPLEXITY = "perplexity"
|
120
|
+
BERTSCORE = "bertscore"
|
121
|
+
SEMANTIC_SIMILARITY = "semantic_similarity"
|
122
|
+
|
123
|
+
class AnnotationType(str, Enum):
|
124
|
+
"""Types of training data annotations"""
|
125
|
+
TEXT_CLASSIFICATION = "text_classification"
|
126
|
+
NAMED_ENTITY_RECOGNITION = "named_entity_recognition"
|
127
|
+
QUESTION_ANSWERING = "question_answering"
|
128
|
+
SUMMARIZATION = "summarization"
|
129
|
+
TRANSLATION = "translation"
|
130
|
+
IMAGE_CLASSIFICATION = "image_classification"
|
131
|
+
OBJECT_DETECTION = "object_detection"
|
132
|
+
SEMANTIC_SEGMENTATION = "semantic_segmentation"
|
133
|
+
|
134
|
+
class DatasetType(str, Enum):
|
135
|
+
"""Types of training datasets"""
|
136
|
+
TEXT = "text"
|
137
|
+
IMAGE = "image"
|
138
|
+
AUDIO = "audio"
|
139
|
+
VIDEO = "video"
|
140
|
+
MULTIMODAL = "multimodal"
|
141
|
+
|
142
|
+
class DatasetStatus(str, Enum):
|
143
|
+
"""Status of training datasets"""
|
144
|
+
CREATED = "created"
|
145
|
+
UPLOADING = "uploading"
|
146
|
+
PROCESSING = "processing"
|
147
|
+
READY = "ready"
|
148
|
+
ERROR = "error"
|
149
|
+
|
150
|
+
class ExperimentType(str, Enum):
|
151
|
+
"""Types of ML experiments"""
|
152
|
+
TRAINING = "training"
|
153
|
+
EVALUATION = "evaluation"
|
154
|
+
HYPERPARAMETER_TUNING = "hyperparameter_tuning"
|
155
|
+
MODEL_COMPARISON = "model_comparison"
|
156
|
+
|
157
|
+
# ===== STACKED SERVICES =====
|
158
|
+
|
159
|
+
class LayerType(Enum):
|
160
|
+
"""Types of layers in stacked services"""
|
161
|
+
INPUT_PROCESSING = "input_processing"
|
162
|
+
MODEL_INFERENCE = "model_inference"
|
163
|
+
OUTPUT_PROCESSING = "output_processing"
|
164
|
+
VALIDATION = "validation"
|
165
|
+
CACHING = "caching"
|
166
|
+
|
167
|
+
class WorkflowType(Enum):
|
168
|
+
"""Types of workflows"""
|
169
|
+
SEQUENTIAL = "sequential"
|
170
|
+
PARALLEL = "parallel"
|
171
|
+
CONDITIONAL = "conditional"
|
172
|
+
LOOP = "loop"
|
173
|
+
|
174
|
+
# ===== PROVIDER TYPES =====
|
175
|
+
|
176
|
+
class Provider(str, Enum):
|
177
|
+
"""AI service providers"""
|
178
|
+
OPENAI = "openai"
|
179
|
+
REPLICATE = "replicate"
|
180
|
+
OLLAMA = "ollama"
|
181
|
+
ANTHROPIC = "anthropic"
|
182
|
+
GOOGLE = "google"
|
183
|
+
YYDS = "yyds"
|
184
|
+
MODAL = "modal"
|
185
|
+
|
186
|
+
# ===== DATA CLASSES =====
|
187
|
+
|
188
|
+
@dataclass
|
189
|
+
class HealthMetrics:
|
190
|
+
"""Service health metrics"""
|
191
|
+
is_healthy: bool
|
192
|
+
response_time_ms: Optional[int] = None
|
193
|
+
status_code: Optional[int] = None
|
194
|
+
cpu_usage_percent: Optional[float] = None
|
195
|
+
memory_usage_mb: Optional[int] = None
|
196
|
+
gpu_usage_percent: Optional[float] = None
|
197
|
+
error_message: Optional[str] = None
|
198
|
+
checked_at: Optional[datetime] = None
|
199
|
+
|
200
|
+
@dataclass
|
201
|
+
class ServiceMetrics:
|
202
|
+
"""Service runtime metrics"""
|
203
|
+
request_count: int = 0
|
204
|
+
total_processing_time_ms: int = 0
|
205
|
+
error_count: int = 0
|
206
|
+
total_cost_usd: float = 0.0
|
207
|
+
window_start: Optional[datetime] = None
|
208
|
+
window_end: Optional[datetime] = None
|
209
|
+
|
210
|
+
@dataclass
|
211
|
+
class ResourceRequirements:
|
212
|
+
"""Service resource requirements"""
|
213
|
+
gpu_type: Optional[str] = None
|
214
|
+
memory_mb: Optional[int] = None
|
215
|
+
cpu_cores: Optional[int] = None
|
216
|
+
storage_gb: Optional[int] = None
|
217
|
+
min_replicas: int = 0
|
218
|
+
max_replicas: int = 1
|
219
|
+
|
220
|
+
@dataclass
|
221
|
+
class ModelInfo:
|
222
|
+
"""Model information structure"""
|
223
|
+
model_id: str
|
224
|
+
model_type: ModelType
|
225
|
+
capabilities: List[ModelCapability]
|
226
|
+
stage: ModelStage
|
227
|
+
provider: str
|
228
|
+
provider_model_name: str
|
229
|
+
metadata: Dict[str, Any]
|
230
|
+
created_at: Optional[datetime] = None
|
231
|
+
updated_at: Optional[datetime] = None
|
232
|
+
|
233
|
+
@dataclass
|
234
|
+
class UsageData:
|
235
|
+
"""Usage data for billing tracking"""
|
236
|
+
operation_type: ModelOperationType
|
237
|
+
inference_operation: Optional[InferenceOperationType] = None
|
238
|
+
input_tokens: Optional[int] = None
|
239
|
+
output_tokens: Optional[int] = None
|
240
|
+
input_units: Optional[float] = None
|
241
|
+
output_units: Optional[float] = None
|
242
|
+
metadata: Optional[Dict[str, Any]] = None
|
243
|
+
|
244
|
+
# ===== TYPE ALIASES =====
|
245
|
+
|
246
|
+
# Common type aliases for better readability
|
247
|
+
ModelID = str
|
248
|
+
ServiceID = str
|
249
|
+
DeploymentID = str
|
250
|
+
ProviderName = str
|
251
|
+
ModelName = str
|
252
|
+
EndpointURL = str
|
253
|
+
ConfigDict = Dict[str, Any]
|
254
|
+
MetadataDict = Dict[str, Any]
|
255
|
+
|
256
|
+
# ===== BACKWARD COMPATIBILITY =====
|
257
|
+
|
258
|
+
# Legacy aliases for backward compatibility during migration
|
259
|
+
# These should be removed once all modules are updated
|
260
|
+
|
261
|
+
# From inference/billing_tracker.py
|
262
|
+
class LegacyServiceType(Enum):
|
263
|
+
"""Legacy service type - use ServiceType instead"""
|
264
|
+
LLM = "llm"
|
265
|
+
EMBEDDING = "embedding"
|
266
|
+
VISION = "vision"
|
267
|
+
IMAGE_GENERATION = "image_generation"
|
268
|
+
AUDIO_STT = "audio_stt"
|
269
|
+
AUDIO_TTS = "audio_tts"
|
270
|
+
|
271
|
+
# Migration mapping
|
272
|
+
LEGACY_SERVICE_TYPE_MAPPING = {
|
273
|
+
LegacyServiceType.LLM: ServiceType.LLM,
|
274
|
+
LegacyServiceType.EMBEDDING: ServiceType.EMBEDDING,
|
275
|
+
LegacyServiceType.VISION: ServiceType.VISION,
|
276
|
+
LegacyServiceType.IMAGE_GENERATION: ServiceType.IMAGE_GEN,
|
277
|
+
LegacyServiceType.AUDIO_STT: ServiceType.AUDIO,
|
278
|
+
LegacyServiceType.AUDIO_TTS: ServiceType.AUDIO,
|
279
|
+
}
|
280
|
+
|
281
|
+
def migrate_legacy_service_type(legacy_type: Union[LegacyServiceType, str]) -> ServiceType:
|
282
|
+
"""Migrate legacy service type to new unified type"""
|
283
|
+
if isinstance(legacy_type, str):
|
284
|
+
# Try to find matching legacy enum
|
285
|
+
for legacy_enum in LegacyServiceType:
|
286
|
+
if legacy_enum.value == legacy_type:
|
287
|
+
return LEGACY_SERVICE_TYPE_MAPPING[legacy_enum]
|
288
|
+
# Fallback to direct mapping
|
289
|
+
return ServiceType(legacy_type)
|
290
|
+
else:
|
291
|
+
return LEGACY_SERVICE_TYPE_MAPPING.get(legacy_type, ServiceType.LLM)
|
isa_model/deployment/__init__.py
CHANGED
@@ -26,11 +26,13 @@ from .core.deployment_config import (
|
|
26
26
|
create_gemma_runpod_triton_config,
|
27
27
|
create_local_triton_config
|
28
28
|
)
|
29
|
+
from .services import AutoDeployVisionService
|
29
30
|
|
30
31
|
__all__ = [
|
31
32
|
# Main classes
|
32
33
|
"DeploymentManager",
|
33
34
|
"DeploymentConfig",
|
35
|
+
"AutoDeployVisionService",
|
34
36
|
|
35
37
|
# Configuration classes
|
36
38
|
"ModelConfig",
|
@@ -119,7 +119,7 @@ image = (
|
|
119
119
|
image=image,
|
120
120
|
memory=16384, # 16GB RAM
|
121
121
|
timeout=1800, # 30 minutes
|
122
|
-
scaledown_window=
|
122
|
+
scaledown_window=60, # 1 minute idle timeout
|
123
123
|
min_containers=0, # Scale to zero when not in use
|
124
124
|
)
|
125
125
|
class DocumentAnalysisService:
|
@@ -605,8 +605,162 @@ class DocumentAnalysisService:
|
|
605
605
|
image_data = base64.b64decode(image_b64)
|
606
606
|
return Image.open(io.BytesIO(image_data)).convert('RGB')
|
607
607
|
|
608
|
-
#
|
608
|
+
# Auto-registration function
|
609
|
+
@app.function()
|
610
|
+
async def register_service():
|
611
|
+
"""Auto-register this service in the model registry"""
|
612
|
+
try:
|
613
|
+
import sys
|
614
|
+
from pathlib import Path
|
615
|
+
|
616
|
+
# Add project root to path for imports
|
617
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
618
|
+
sys.path.insert(0, str(project_root))
|
619
|
+
|
620
|
+
try:
|
621
|
+
from isa_model.core.model_manager import ModelManager
|
622
|
+
from isa_model.core.model_repo import ModelType, ModelCapability
|
623
|
+
from isa_model.core.service_registry import ServiceRegistry
|
624
|
+
from isa_model.core.types import ServiceType, DeploymentPlatform, ServiceStatus, ResourceRequirements
|
625
|
+
from isa_model.core.model_service import ModelService
|
626
|
+
except ImportError:
|
627
|
+
# Fallback if import fails in Modal environment
|
628
|
+
print("⚠️ Could not import required modules - registration skipped")
|
629
|
+
return {"success": False, "error": "Required modules not available"}
|
630
|
+
|
631
|
+
# Use ModelManager to register this service
|
632
|
+
model_manager = ModelManager()
|
633
|
+
|
634
|
+
# 1. First register the underlying model (backward compatibility)
|
635
|
+
model_success = model_manager.registry.register_model(
|
636
|
+
model_id="isa-vision-doc-service",
|
637
|
+
model_type=ModelType.VISION,
|
638
|
+
capabilities=[
|
639
|
+
ModelCapability.TABLE_DETECTION,
|
640
|
+
ModelCapability.TABLE_STRUCTURE_RECOGNITION,
|
641
|
+
ModelCapability.OCR,
|
642
|
+
ModelCapability.IMAGE_ANALYSIS
|
643
|
+
],
|
644
|
+
metadata={
|
645
|
+
"description": "ISA Vision Document Analysis Service with table detection, structure recognition, and OCR",
|
646
|
+
"service_name": "isa-vision-doc",
|
647
|
+
"service_type": "modal",
|
648
|
+
"deployment_type": "modal",
|
649
|
+
"endpoint": "https://isa-vision-doc.modal.run",
|
650
|
+
"underlying_models": [
|
651
|
+
"microsoft/table-transformer-detection",
|
652
|
+
"microsoft/table-transformer-structure-recognition-v1.1-all",
|
653
|
+
"PaddleOCR 3.0"
|
654
|
+
],
|
655
|
+
"gpu_requirement": "T4",
|
656
|
+
"memory_mb": 16384,
|
657
|
+
"auto_registered": True,
|
658
|
+
"registered_by": "isa_vision_doc_service.py",
|
659
|
+
"is_service": True, # Mark this as a service, not a raw model
|
660
|
+
"capabilities_details": {
|
661
|
+
"table_detection": "Microsoft Table Transformer Detection",
|
662
|
+
"table_structure": "Microsoft Table Transformer Structure Recognition v1.1",
|
663
|
+
"ocr": "PaddleOCR 3.0 with Chinese/English support"
|
664
|
+
}
|
665
|
+
}
|
666
|
+
)
|
667
|
+
|
668
|
+
# 2. Register as a deployed service in the ServiceRegistry (MaaS platform)
|
669
|
+
service_success = False
|
670
|
+
try:
|
671
|
+
service_registry = ServiceRegistry(model_manager.registry)
|
672
|
+
|
673
|
+
# Create ModelService instance
|
674
|
+
service = ModelService(
|
675
|
+
service_id="isa-vision-doc-modal-001",
|
676
|
+
service_name="isa_vision_doc",
|
677
|
+
model_id="isa-vision-doc-service",
|
678
|
+
deployment_platform=DeploymentPlatform.MODAL,
|
679
|
+
service_type=ServiceType.VISION,
|
680
|
+
status=ServiceStatus.HEALTHY,
|
681
|
+
inference_endpoint="https://isa-vision-doc.modal.run/analyze_document_complete",
|
682
|
+
health_endpoint="https://isa-vision-doc.modal.run/health_check",
|
683
|
+
capabilities=["table_detection", "table_structure_recognition", "ocr", "image_analysis"],
|
684
|
+
resource_requirements=ResourceRequirements(
|
685
|
+
gpu_type="T4",
|
686
|
+
memory_mb=16384,
|
687
|
+
cpu_cores=4,
|
688
|
+
min_replicas=0,
|
689
|
+
max_replicas=5
|
690
|
+
),
|
691
|
+
metadata={
|
692
|
+
"description": "ISA Vision Document Analysis Service with table detection, structure recognition, and OCR",
|
693
|
+
"underlying_models": [
|
694
|
+
"microsoft/table-transformer-detection",
|
695
|
+
"microsoft/table-transformer-structure-recognition-v1.1-all",
|
696
|
+
"PaddleOCR 3.0"
|
697
|
+
],
|
698
|
+
"auto_scaling": True,
|
699
|
+
"scale_to_zero": True,
|
700
|
+
"platform": "modal",
|
701
|
+
"registered_by": "isa_vision_doc_service.py"
|
702
|
+
}
|
703
|
+
)
|
704
|
+
|
705
|
+
# Register in ServiceRegistry
|
706
|
+
service_success = await service_registry.register_service(service)
|
707
|
+
|
708
|
+
if service_success:
|
709
|
+
print("✅ Service registered in MaaS platform ServiceRegistry")
|
710
|
+
else:
|
711
|
+
print("⚠️ ServiceRegistry registration failed")
|
712
|
+
|
713
|
+
except Exception as e:
|
714
|
+
print(f"⚠️ ServiceRegistry registration error: {e}")
|
715
|
+
|
716
|
+
if model_success:
|
717
|
+
print("✅ Model registry registration successful")
|
718
|
+
else:
|
719
|
+
print("⚠️ Model registry registration failed")
|
720
|
+
|
721
|
+
overall_success = model_success and service_success
|
722
|
+
return {
|
723
|
+
"success": overall_success,
|
724
|
+
"model_registry": model_success,
|
725
|
+
"service_registry": service_success
|
726
|
+
}
|
727
|
+
|
728
|
+
except Exception as e:
|
729
|
+
print(f"❌ Auto-registration error: {e}")
|
730
|
+
return {"success": False, "error": str(e)}
|
731
|
+
|
732
|
+
# Quick deployment function
|
733
|
+
@app.function()
|
734
|
+
def deploy_service():
|
735
|
+
"""Deploy this service instantly"""
|
736
|
+
import subprocess
|
737
|
+
import os
|
738
|
+
|
739
|
+
print("🚀 Deploying ISA Vision Document Service...")
|
740
|
+
try:
|
741
|
+
# Get the current file path
|
742
|
+
current_file = __file__
|
743
|
+
|
744
|
+
# Run modal deploy command
|
745
|
+
result = subprocess.run(
|
746
|
+
["modal", "deploy", current_file],
|
747
|
+
capture_output=True,
|
748
|
+
text=True,
|
749
|
+
check=True
|
750
|
+
)
|
751
|
+
|
752
|
+
print("✅ Deployment completed successfully!")
|
753
|
+
print(f"📝 Output: {result.stdout}")
|
754
|
+
return {"success": True, "output": result.stdout}
|
755
|
+
|
756
|
+
except subprocess.CalledProcessError as e:
|
757
|
+
print(f"❌ Deployment failed: {e}")
|
758
|
+
print(f"📝 Error: {e.stderr}")
|
759
|
+
return {"success": False, "error": str(e), "stderr": e.stderr}
|
609
760
|
|
610
761
|
if __name__ == "__main__":
|
611
762
|
print("🚀 ISA Vision Document Service - Modal Deployment")
|
612
|
-
print("Deploy with: modal deploy isa_vision_doc_service.py")
|
763
|
+
print("Deploy with: modal deploy isa_vision_doc_service.py")
|
764
|
+
print("Or call: modal run isa_vision_doc_service.py::deploy_service")
|
765
|
+
print("Note: Requires T4 GPU and 16GB+ RAM for optimal performance")
|
766
|
+
print("\n📝 Service will auto-register in model registry upon deployment")
|