isa-model 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +770 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/models/model_repo.py +343 -0
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/__init__.py +9 -0
- isa_model/deployment/cloud/modal/__init__.py +10 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +766 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +406 -0
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +187 -387
- isa_model/inference/providers/modal_provider.py +109 -0
- isa_model/inference/providers/yyds_provider.py +108 -0
- isa_model/inference/services/__init__.py +2 -1
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -55
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -35
- isa_model/inference/services/img/flux_professional_service.py +603 -0
- isa_model/inference/services/img/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +210 -69
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +519 -35
- isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +40 -0
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +150 -15
- isa_model/inference/services/llm/openai_llm_service.py +134 -31
- isa_model/inference/services/llm/yyds_llm_service.py +255 -0
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +241 -96
- isa_model/inference/services/vision/disabled/isA_vision_service.py +500 -0
- isa_model/inference/services/vision/doc_analysis_service.py +640 -0
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +109 -170
- isa_model/inference/services/vision/replicate_vision_service.py +508 -0
- isa_model/inference/services/vision/ui_analysis_service.py +823 -0
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/__init__.py +19 -0
- isa_model/serving/api/__init__.py +10 -0
- isa_model/serving/api/fastapi_server.py +89 -0
- isa_model/serving/api/middleware/__init__.py +9 -0
- isa_model/serving/api/middleware/request_logger.py +88 -0
- isa_model/serving/api/routes/__init__.py +5 -0
- isa_model/serving/api/routes/health.py +82 -0
- isa_model/serving/api/routes/llm.py +19 -0
- isa_model/serving/api/routes/ui_analysis.py +223 -0
- isa_model/serving/api/routes/unified.py +202 -0
- isa_model/serving/api/routes/vision.py +19 -0
- isa_model/serving/api/schemas/__init__.py +17 -0
- isa_model/serving/api/schemas/common.py +33 -0
- isa_model/serving/api/schemas/ui_analysis.py +78 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
- isa_model-0.3.6.dist-info/RECORD +147 -0
- isa_model/core/model_manager.py +0 -208
- isa_model/core/model_registry.py +0 -342
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- isa_model-0.3.4.dist-info/RECORD +0 -91
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,532 @@
|
|
1
|
+
"""
|
2
|
+
Qwen2.5-VL-32B Table Data Extraction Service
|
3
|
+
|
4
|
+
Specialized service for table data extraction using Qwen2.5-VL-32B-Instruct-AWQ
|
5
|
+
"""
|
6
|
+
|
7
|
+
import modal
|
8
|
+
import torch
|
9
|
+
import base64
|
10
|
+
import io
|
11
|
+
import numpy as np
|
12
|
+
from PIL import Image
|
13
|
+
from typing import Dict, List, Optional, Any
|
14
|
+
import time
|
15
|
+
import json
|
16
|
+
import os
|
17
|
+
import logging
|
18
|
+
|
19
|
+
# Define Modal application
|
20
|
+
app = modal.App("qwen-vision-table")
|
21
|
+
|
22
|
+
# Download Qwen2.5-VL model
|
23
|
+
def download_qwen_model():
|
24
|
+
"""Download Qwen2.5-VL-32B-Instruct-AWQ model"""
|
25
|
+
from huggingface_hub import snapshot_download
|
26
|
+
|
27
|
+
print("📦 Downloading Qwen2.5-VL-32B-Instruct-AWQ...")
|
28
|
+
os.makedirs("/models", exist_ok=True)
|
29
|
+
|
30
|
+
try:
|
31
|
+
snapshot_download(
|
32
|
+
repo_id="Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
|
33
|
+
local_dir="/models/qwen2.5-vl-32b-awq",
|
34
|
+
allow_patterns=["**/*.safetensors", "**/*.json", "**/*.py", "**/*.txt"],
|
35
|
+
# Use auth token if needed for gated models
|
36
|
+
# token=os.getenv("HF_TOKEN")
|
37
|
+
)
|
38
|
+
print("✅ Qwen2.5-VL-32B-Instruct-AWQ downloaded")
|
39
|
+
except Exception as e:
|
40
|
+
print(f"⚠️ Model download failed: {e}")
|
41
|
+
raise
|
42
|
+
|
43
|
+
print("📦 Model download completed")
|
44
|
+
|
45
|
+
# Define Modal container image with AWQ support
|
46
|
+
image = (
|
47
|
+
modal.Image.debian_slim(python_version="3.11")
|
48
|
+
.pip_install([
|
49
|
+
# Core AI libraries with AWQ support
|
50
|
+
"torch>=2.1.0",
|
51
|
+
"torchvision",
|
52
|
+
"transformers>=4.37.0",
|
53
|
+
"accelerate>=0.26.0",
|
54
|
+
"autoawq>=0.2.0", # AWQ quantization support
|
55
|
+
"huggingface_hub",
|
56
|
+
|
57
|
+
# Qwen-VL specific dependencies
|
58
|
+
"qwen-vl-utils", # If available
|
59
|
+
"tiktoken",
|
60
|
+
"einops",
|
61
|
+
"timm",
|
62
|
+
|
63
|
+
# Image processing
|
64
|
+
"pillow>=10.0.1",
|
65
|
+
"opencv-python-headless",
|
66
|
+
"numpy>=1.24.3",
|
67
|
+
|
68
|
+
# HTTP libraries
|
69
|
+
"httpx>=0.26.0",
|
70
|
+
"requests",
|
71
|
+
|
72
|
+
# Utilities
|
73
|
+
"pydantic>=2.0.0",
|
74
|
+
"python-dotenv",
|
75
|
+
])
|
76
|
+
.run_function(download_qwen_model)
|
77
|
+
.env({
|
78
|
+
"TRANSFORMERS_CACHE": "/models",
|
79
|
+
"HF_HOME": "/models",
|
80
|
+
"TORCH_HOME": "/models",
|
81
|
+
})
|
82
|
+
)
|
83
|
+
|
84
|
+
# Table Extraction Service
|
85
|
+
@app.cls(
|
86
|
+
gpu="A100", # A100 recommended for 32B model, H100 if available
|
87
|
+
image=image,
|
88
|
+
memory=32768, # 32GB RAM for 32B model
|
89
|
+
timeout=3600, # 1 hour timeout
|
90
|
+
scaledown_window=60, # 1 minute idle timeout
|
91
|
+
min_containers=0, # Scale to zero to save costs
|
92
|
+
# secrets=[modal.Secret.from_name("huggingface-token")] # If needed
|
93
|
+
)
|
94
|
+
class QwenTableExtractionService:
|
95
|
+
"""
|
96
|
+
Table Data Extraction Service using Qwen2.5-VL-32B-Instruct-AWQ
|
97
|
+
|
98
|
+
Provides high-accuracy table extraction from images
|
99
|
+
"""
|
100
|
+
|
101
|
+
@modal.enter()
|
102
|
+
def load_model(self):
|
103
|
+
"""Load Qwen2.5-VL model on container startup"""
|
104
|
+
print("🚀 Loading Qwen2.5-VL-32B-Instruct-AWQ...")
|
105
|
+
start_time = time.time()
|
106
|
+
|
107
|
+
# Initialize attributes
|
108
|
+
self.model = None
|
109
|
+
self.processor = None
|
110
|
+
self.logger = logging.getLogger(__name__)
|
111
|
+
|
112
|
+
try:
|
113
|
+
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
|
114
|
+
|
115
|
+
model_path = "Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
|
116
|
+
|
117
|
+
# Load processor
|
118
|
+
print("📱 Loading processor...")
|
119
|
+
self.processor = AutoProcessor.from_pretrained(
|
120
|
+
model_path,
|
121
|
+
trust_remote_code=True
|
122
|
+
)
|
123
|
+
|
124
|
+
# Load model with AWQ quantization
|
125
|
+
print("🧠 Loading AWQ quantized model...")
|
126
|
+
self.model = Qwen2VLForConditionalGeneration.from_pretrained(
|
127
|
+
model_path,
|
128
|
+
torch_dtype=torch.float16,
|
129
|
+
device_map="auto",
|
130
|
+
trust_remote_code=True,
|
131
|
+
# AWQ specific settings
|
132
|
+
use_safetensors=True,
|
133
|
+
)
|
134
|
+
|
135
|
+
# Try to import qwen-vl-utils
|
136
|
+
try:
|
137
|
+
from qwen_vl_utils import process_vision_info as qwen_process_vision_info
|
138
|
+
print("✅ qwen-vl-utils imported successfully")
|
139
|
+
# Use the official process_vision_info if available
|
140
|
+
globals()['process_vision_info'] = qwen_process_vision_info
|
141
|
+
except ImportError:
|
142
|
+
print("⚠️ qwen-vl-utils not found, using custom implementation")
|
143
|
+
|
144
|
+
# Set to evaluation mode
|
145
|
+
self.model.eval()
|
146
|
+
|
147
|
+
load_time = time.time() - start_time
|
148
|
+
print(f"✅ Qwen2.5-VL model loaded in {load_time:.2f}s")
|
149
|
+
|
150
|
+
except Exception as e:
|
151
|
+
print(f"❌ Model loading failed: {e}")
|
152
|
+
raise
|
153
|
+
|
154
|
+
@modal.method()
|
155
|
+
def extract_table_data(
|
156
|
+
self,
|
157
|
+
image_b64: str,
|
158
|
+
extraction_format: str = "markdown",
|
159
|
+
custom_prompt: Optional[str] = None
|
160
|
+
) -> Dict[str, Any]:
|
161
|
+
"""
|
162
|
+
Extract table data from image
|
163
|
+
|
164
|
+
Args:
|
165
|
+
image_b64: Base64 encoded image
|
166
|
+
extraction_format: Output format ("markdown", "json", "csv", "html")
|
167
|
+
custom_prompt: Custom extraction prompt
|
168
|
+
|
169
|
+
Returns:
|
170
|
+
Extracted table data and metadata
|
171
|
+
"""
|
172
|
+
start_time = time.time()
|
173
|
+
|
174
|
+
try:
|
175
|
+
# Decode image
|
176
|
+
image = self._decode_image(image_b64)
|
177
|
+
|
178
|
+
# Prepare prompt based on format
|
179
|
+
if custom_prompt:
|
180
|
+
prompt = custom_prompt
|
181
|
+
else:
|
182
|
+
prompt = self._get_extraction_prompt(extraction_format)
|
183
|
+
|
184
|
+
# Process inputs
|
185
|
+
messages = [
|
186
|
+
{
|
187
|
+
"role": "user",
|
188
|
+
"content": [
|
189
|
+
{"type": "image", "image": image},
|
190
|
+
{"type": "text", "text": prompt}
|
191
|
+
]
|
192
|
+
}
|
193
|
+
]
|
194
|
+
|
195
|
+
# Prepare inputs for the model
|
196
|
+
text = self.processor.apply_chat_template(
|
197
|
+
messages, tokenize=False, add_generation_prompt=True
|
198
|
+
)
|
199
|
+
|
200
|
+
image_inputs, video_inputs = process_vision_info(messages)
|
201
|
+
inputs = self.processor(
|
202
|
+
text=[text],
|
203
|
+
images=image_inputs,
|
204
|
+
videos=video_inputs,
|
205
|
+
padding=True,
|
206
|
+
return_tensors="pt"
|
207
|
+
)
|
208
|
+
inputs = inputs.to("cuda")
|
209
|
+
|
210
|
+
# Generate response
|
211
|
+
with torch.no_grad():
|
212
|
+
generated_ids = self.model.generate(
|
213
|
+
**inputs,
|
214
|
+
max_new_tokens=2048,
|
215
|
+
do_sample=False,
|
216
|
+
temperature=0.0, # Deterministic for table extraction
|
217
|
+
pad_token_id=self.processor.tokenizer.eos_token_id
|
218
|
+
)
|
219
|
+
|
220
|
+
# Decode response
|
221
|
+
generated_ids_trimmed = [
|
222
|
+
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
223
|
+
]
|
224
|
+
output_text = self.processor.batch_decode(
|
225
|
+
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
226
|
+
)[0]
|
227
|
+
|
228
|
+
processing_time = time.time() - start_time
|
229
|
+
|
230
|
+
# Post-process extracted data
|
231
|
+
processed_data = self._post_process_extraction(output_text, extraction_format)
|
232
|
+
|
233
|
+
return {
|
234
|
+
'success': True,
|
235
|
+
'service': 'qwen-vision-table',
|
236
|
+
'extracted_data': processed_data,
|
237
|
+
'raw_output': output_text,
|
238
|
+
'format': extraction_format,
|
239
|
+
'processing_time': processing_time,
|
240
|
+
'model_info': {
|
241
|
+
'model': 'Qwen2.5-VL-32B-Instruct-AWQ',
|
242
|
+
'gpu': 'A100',
|
243
|
+
'quantization': 'AWQ',
|
244
|
+
'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
|
245
|
+
}
|
246
|
+
}
|
247
|
+
|
248
|
+
except Exception as e:
|
249
|
+
self.logger.error(f"Table extraction failed: {e}")
|
250
|
+
return {
|
251
|
+
'success': False,
|
252
|
+
'service': 'qwen-vision-table',
|
253
|
+
'error': str(e),
|
254
|
+
'processing_time': time.time() - start_time
|
255
|
+
}
|
256
|
+
|
257
|
+
def _get_extraction_prompt(self, format_type: str) -> str:
|
258
|
+
"""Get extraction prompt based on desired format"""
|
259
|
+
base_prompt = "Please extract all the data from this table accurately."
|
260
|
+
|
261
|
+
format_prompts = {
|
262
|
+
"markdown": f"{base_prompt} Format the output as a markdown table with proper alignment.",
|
263
|
+
"json": f"{base_prompt} Format the output as a JSON array where each row is an object with column headers as keys.",
|
264
|
+
"csv": f"{base_prompt} Format the output as CSV with comma-separated values. Include headers in the first row.",
|
265
|
+
"html": f"{base_prompt} Format the output as an HTML table with proper <table>, <tr>, <td>, and <th> tags.",
|
266
|
+
}
|
267
|
+
|
268
|
+
return format_prompts.get(format_type, base_prompt)
|
269
|
+
|
270
|
+
def _post_process_extraction(self, raw_output: str, format_type: str) -> Dict[str, Any]:
|
271
|
+
"""Post-process extracted table data"""
|
272
|
+
try:
|
273
|
+
if format_type == "json":
|
274
|
+
# Try to parse JSON
|
275
|
+
import json
|
276
|
+
try:
|
277
|
+
# Extract JSON from the output if it's wrapped in text
|
278
|
+
start_idx = raw_output.find('[')
|
279
|
+
end_idx = raw_output.rfind(']') + 1
|
280
|
+
if start_idx != -1 and end_idx != 0:
|
281
|
+
json_str = raw_output[start_idx:end_idx]
|
282
|
+
parsed_data = json.loads(json_str)
|
283
|
+
return {"structured_data": parsed_data, "raw_text": raw_output}
|
284
|
+
except json.JSONDecodeError:
|
285
|
+
pass
|
286
|
+
|
287
|
+
elif format_type == "csv":
|
288
|
+
# Parse CSV-like output
|
289
|
+
lines = raw_output.strip().split('\n')
|
290
|
+
csv_data = [line.split(',') for line in lines if line.strip()]
|
291
|
+
return {"structured_data": csv_data, "raw_text": raw_output}
|
292
|
+
|
293
|
+
# For markdown, html, or unparseable formats, return as text
|
294
|
+
return {"structured_data": raw_output, "raw_text": raw_output}
|
295
|
+
|
296
|
+
except Exception as e:
|
297
|
+
self.logger.warning(f"Post-processing failed: {e}")
|
298
|
+
return {"structured_data": raw_output, "raw_text": raw_output}
|
299
|
+
|
300
|
+
@modal.method()
|
301
|
+
def batch_extract_tables(self, images_b64: List[str], extraction_format: str = "markdown") -> Dict[str, Any]:
|
302
|
+
"""
|
303
|
+
Extract tables from multiple images
|
304
|
+
|
305
|
+
Args:
|
306
|
+
images_b64: List of base64 encoded images
|
307
|
+
extraction_format: Output format for all extractions
|
308
|
+
|
309
|
+
Returns:
|
310
|
+
Batch extraction results
|
311
|
+
"""
|
312
|
+
start_time = time.time()
|
313
|
+
results = []
|
314
|
+
|
315
|
+
for i, image_b64 in enumerate(images_b64):
|
316
|
+
try:
|
317
|
+
result = self.extract_table_data(image_b64, extraction_format)
|
318
|
+
result['image_index'] = i
|
319
|
+
results.append(result)
|
320
|
+
except Exception as e:
|
321
|
+
results.append({
|
322
|
+
'success': False,
|
323
|
+
'image_index': i,
|
324
|
+
'error': str(e)
|
325
|
+
})
|
326
|
+
|
327
|
+
return {
|
328
|
+
'success': True,
|
329
|
+
'service': 'qwen-vision-table',
|
330
|
+
'batch_results': results,
|
331
|
+
'total_images': len(images_b64),
|
332
|
+
'successful_extractions': sum(1 for r in results if r.get('success', False)),
|
333
|
+
'total_processing_time': time.time() - start_time
|
334
|
+
}
|
335
|
+
|
336
|
+
@modal.method()
|
337
|
+
def health_check(self) -> Dict[str, Any]:
|
338
|
+
"""Health check endpoint"""
|
339
|
+
return {
|
340
|
+
'status': 'healthy',
|
341
|
+
'service': 'qwen-vision-table',
|
342
|
+
'model': 'Qwen2.5-VL-32B-Instruct-AWQ',
|
343
|
+
'model_loaded': self.model is not None,
|
344
|
+
'processor_loaded': self.processor is not None,
|
345
|
+
'timestamp': time.time(),
|
346
|
+
'gpu': 'A100'
|
347
|
+
}
|
348
|
+
|
349
|
+
def _decode_image(self, image_b64: str) -> Image.Image:
|
350
|
+
"""Decode base64 image"""
|
351
|
+
try:
|
352
|
+
if image_b64.startswith('data:image'):
|
353
|
+
image_b64 = image_b64.split(',')[1]
|
354
|
+
|
355
|
+
image_data = base64.b64decode(image_b64)
|
356
|
+
return Image.open(io.BytesIO(image_data)).convert('RGB')
|
357
|
+
except Exception as e:
|
358
|
+
raise ValueError(f"Failed to decode image: {e}")
|
359
|
+
|
360
|
+
# Helper function for vision processing
|
361
|
+
def process_vision_info(messages):
|
362
|
+
"""Process vision information from messages"""
|
363
|
+
image_inputs = []
|
364
|
+
video_inputs = []
|
365
|
+
|
366
|
+
for message in messages:
|
367
|
+
if isinstance(message.get("content"), list):
|
368
|
+
for content in message["content"]:
|
369
|
+
if content.get("type") == "image":
|
370
|
+
image_inputs.append(content["image"])
|
371
|
+
elif content.get("type") == "video":
|
372
|
+
video_inputs.append(content["video"])
|
373
|
+
|
374
|
+
return image_inputs, video_inputs
|
375
|
+
|
376
|
+
# Deployment script
|
377
|
+
@app.function()
|
378
|
+
def deploy_info():
|
379
|
+
"""Deployment information"""
|
380
|
+
return {
|
381
|
+
"service": "Qwen2.5-VL-32B Table Extraction",
|
382
|
+
"model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
|
383
|
+
"gpu_requirement": "A100 (minimum), H100 (recommended)",
|
384
|
+
"memory_requirement": "32GB+",
|
385
|
+
"deploy_command": "modal deploy qwen_table_extraction.py"
|
386
|
+
}
|
387
|
+
|
388
|
+
# Auto-registration function
|
389
|
+
@app.function()
|
390
|
+
async def register_service():
|
391
|
+
"""Auto-register this service in the model registry"""
|
392
|
+
try:
|
393
|
+
import sys
|
394
|
+
from pathlib import Path
|
395
|
+
|
396
|
+
# Add project root to path for imports
|
397
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
398
|
+
sys.path.insert(0, str(project_root))
|
399
|
+
|
400
|
+
try:
|
401
|
+
from isa_model.core.model_manager import ModelManager
|
402
|
+
from isa_model.core.model_repo import ModelType, ModelCapability
|
403
|
+
from isa_model.core.service_registry import ServiceRegistry
|
404
|
+
from isa_model.core.types import ServiceType, DeploymentPlatform, ServiceStatus, ResourceRequirements
|
405
|
+
from isa_model.core.model_service import ModelService
|
406
|
+
except ImportError:
|
407
|
+
# Fallback if import fails in Modal environment
|
408
|
+
print("⚠️ Could not import required modules - registration skipped")
|
409
|
+
return {"success": False, "error": "Required modules not available"}
|
410
|
+
|
411
|
+
# Use ModelManager to register this service
|
412
|
+
model_manager = ModelManager()
|
413
|
+
|
414
|
+
# 1. First register the underlying model (backward compatibility)
|
415
|
+
model_success = model_manager.registry.register_model(
|
416
|
+
model_id="qwen2.5-vl-32b-table-service",
|
417
|
+
model_type=ModelType.VISION,
|
418
|
+
capabilities=[
|
419
|
+
ModelCapability.TABLE_DETECTION,
|
420
|
+
ModelCapability.TABLE_STRUCTURE_RECOGNITION,
|
421
|
+
ModelCapability.OCR,
|
422
|
+
ModelCapability.IMAGE_ANALYSIS
|
423
|
+
],
|
424
|
+
metadata={
|
425
|
+
"description": "Qwen2.5-VL-32B table extraction service",
|
426
|
+
"service_name": "qwen-vision-table",
|
427
|
+
"service_type": "modal",
|
428
|
+
"deployment_type": "modal",
|
429
|
+
"endpoint": "https://qwen-vision-table.modal.run",
|
430
|
+
"underlying_model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
|
431
|
+
"gpu_requirement": "A100",
|
432
|
+
"memory_mb": 32768,
|
433
|
+
"auto_registered": True,
|
434
|
+
"registered_by": "isa_vision_table_service.py",
|
435
|
+
"is_service": True # Mark this as a service, not a raw model
|
436
|
+
}
|
437
|
+
)
|
438
|
+
|
439
|
+
# 2. Register as a deployed service in the ServiceRegistry (MaaS platform)
|
440
|
+
service_success = False
|
441
|
+
try:
|
442
|
+
service_registry = ServiceRegistry(model_manager.registry)
|
443
|
+
|
444
|
+
# Create ModelService instance
|
445
|
+
service = ModelService(
|
446
|
+
service_id="qwen-table-modal-001",
|
447
|
+
service_name="isa_vision_table",
|
448
|
+
model_id="qwen2.5-vl-32b-table-service",
|
449
|
+
deployment_platform=DeploymentPlatform.MODAL,
|
450
|
+
service_type=ServiceType.VISION,
|
451
|
+
status=ServiceStatus.HEALTHY,
|
452
|
+
inference_endpoint="https://qwen-vision-table.modal.run/extract_table_data",
|
453
|
+
health_endpoint="https://qwen-vision-table.modal.run/health_check",
|
454
|
+
capabilities=["table_detection", "table_structure_recognition", "ocr", "image_analysis"],
|
455
|
+
resource_requirements=ResourceRequirements(
|
456
|
+
gpu_type="A100",
|
457
|
+
memory_mb=32768,
|
458
|
+
cpu_cores=8,
|
459
|
+
min_replicas=0,
|
460
|
+
max_replicas=3
|
461
|
+
),
|
462
|
+
metadata={
|
463
|
+
"description": "Qwen2.5-VL-32B table extraction service",
|
464
|
+
"underlying_model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
|
465
|
+
"auto_scaling": True,
|
466
|
+
"scale_to_zero": True,
|
467
|
+
"platform": "modal",
|
468
|
+
"registered_by": "isa_vision_table_service.py"
|
469
|
+
}
|
470
|
+
)
|
471
|
+
|
472
|
+
# Register in ServiceRegistry
|
473
|
+
service_success = await service_registry.register_service(service)
|
474
|
+
|
475
|
+
if service_success:
|
476
|
+
print("✅ Service registered in MaaS platform ServiceRegistry")
|
477
|
+
else:
|
478
|
+
print("⚠️ ServiceRegistry registration failed")
|
479
|
+
|
480
|
+
except Exception as e:
|
481
|
+
print(f"⚠️ ServiceRegistry registration error: {e}")
|
482
|
+
|
483
|
+
if model_success:
|
484
|
+
print("✅ Model registry registration successful")
|
485
|
+
else:
|
486
|
+
print("⚠️ Model registry registration failed")
|
487
|
+
|
488
|
+
overall_success = model_success and service_success
|
489
|
+
return {
|
490
|
+
"success": overall_success,
|
491
|
+
"model_registry": model_success,
|
492
|
+
"service_registry": service_success
|
493
|
+
}
|
494
|
+
|
495
|
+
except Exception as e:
|
496
|
+
print(f"❌ Auto-registration error: {e}")
|
497
|
+
return {"success": False, "error": str(e)}
|
498
|
+
|
499
|
+
# Quick deployment function
|
500
|
+
@app.function()
|
501
|
+
def deploy_service():
|
502
|
+
"""Deploy this service instantly"""
|
503
|
+
import subprocess
|
504
|
+
|
505
|
+
print("🚀 Deploying Qwen2.5-VL Table Extraction Service...")
|
506
|
+
try:
|
507
|
+
# Get the current file path
|
508
|
+
current_file = __file__
|
509
|
+
|
510
|
+
# Run modal deploy command
|
511
|
+
result = subprocess.run(
|
512
|
+
["modal", "deploy", current_file],
|
513
|
+
capture_output=True,
|
514
|
+
text=True,
|
515
|
+
check=True
|
516
|
+
)
|
517
|
+
|
518
|
+
print("✅ Deployment completed successfully!")
|
519
|
+
print(f"📝 Output: {result.stdout}")
|
520
|
+
return {"success": True, "output": result.stdout}
|
521
|
+
|
522
|
+
except subprocess.CalledProcessError as e:
|
523
|
+
print(f"❌ Deployment failed: {e}")
|
524
|
+
print(f"📝 Error: {e.stderr}")
|
525
|
+
return {"success": False, "error": str(e), "stderr": e.stderr}
|
526
|
+
|
527
|
+
if __name__ == "__main__":
|
528
|
+
print("🚀 Qwen2.5-VL Table Extraction Service - Modal Deployment")
|
529
|
+
print("Deploy with: modal deploy isa_vision_table_service.py")
|
530
|
+
print("Or call: modal run isa_vision_table_service.py::deploy_service")
|
531
|
+
print("Note: Requires A100 GPU and 32GB+ RAM for optimal performance")
|
532
|
+
print("\n📝 Service will auto-register in model registry upon deployment")
|