isa-model 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +770 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/models/model_repo.py +343 -0
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/__init__.py +9 -0
- isa_model/deployment/cloud/modal/__init__.py +10 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +766 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +406 -0
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +187 -387
- isa_model/inference/providers/modal_provider.py +109 -0
- isa_model/inference/providers/yyds_provider.py +108 -0
- isa_model/inference/services/__init__.py +2 -1
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -55
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -35
- isa_model/inference/services/img/flux_professional_service.py +603 -0
- isa_model/inference/services/img/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +210 -69
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +519 -35
- isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +40 -0
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +150 -15
- isa_model/inference/services/llm/openai_llm_service.py +134 -31
- isa_model/inference/services/llm/yyds_llm_service.py +255 -0
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +241 -96
- isa_model/inference/services/vision/disabled/isA_vision_service.py +500 -0
- isa_model/inference/services/vision/doc_analysis_service.py +640 -0
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +109 -170
- isa_model/inference/services/vision/replicate_vision_service.py +508 -0
- isa_model/inference/services/vision/ui_analysis_service.py +823 -0
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/__init__.py +19 -0
- isa_model/serving/api/__init__.py +10 -0
- isa_model/serving/api/fastapi_server.py +89 -0
- isa_model/serving/api/middleware/__init__.py +9 -0
- isa_model/serving/api/middleware/request_logger.py +88 -0
- isa_model/serving/api/routes/__init__.py +5 -0
- isa_model/serving/api/routes/health.py +82 -0
- isa_model/serving/api/routes/llm.py +19 -0
- isa_model/serving/api/routes/ui_analysis.py +223 -0
- isa_model/serving/api/routes/unified.py +202 -0
- isa_model/serving/api/routes/vision.py +19 -0
- isa_model/serving/api/schemas/__init__.py +17 -0
- isa_model/serving/api/schemas/common.py +33 -0
- isa_model/serving/api/schemas/ui_analysis.py +78 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
- isa_model-0.3.6.dist-info/RECORD +147 -0
- isa_model/core/model_manager.py +0 -208
- isa_model/core/model_registry.py +0 -342
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- isa_model-0.3.4.dist-info/RECORD +0 -91
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,500 @@
|
|
1
|
+
"""
|
2
|
+
ISA Vision Service
|
3
|
+
|
4
|
+
Connects to self-hosted Modal UI detection service
|
5
|
+
Provides vision capabilities using our deployed models
|
6
|
+
"""
|
7
|
+
|
8
|
+
import modal
|
9
|
+
import base64
|
10
|
+
import io
|
11
|
+
import logging
|
12
|
+
from typing import Dict, Any, List, Union, Optional, BinaryIO
|
13
|
+
from PIL import Image
|
14
|
+
|
15
|
+
from .base_vision_service import BaseVisionService
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
class ISAVisionService(BaseVisionService):
|
20
|
+
"""ISA Vision Service using Modal backend"""
|
21
|
+
|
22
|
+
def __init__(self, provider, model_name: str):
|
23
|
+
super().__init__(provider, model_name)
|
24
|
+
self.ui_app = None
|
25
|
+
self.doc_app = None
|
26
|
+
self.table_app = None
|
27
|
+
self._initialize_modal_connections()
|
28
|
+
|
29
|
+
def _initialize_modal_connections(self):
|
30
|
+
"""Initialize connections to Modal services"""
|
31
|
+
try:
|
32
|
+
# Connect to UI detection service
|
33
|
+
self.ui_app = modal.App.lookup("isa-vision-ui", create_if_missing=False)
|
34
|
+
logger.info(" Connected to UI detection service")
|
35
|
+
except Exception as e:
|
36
|
+
logger.warning(f"� UI service not available: {e}")
|
37
|
+
self.ui_app = None
|
38
|
+
|
39
|
+
try:
|
40
|
+
# Connect to document analysis service (when deployed)
|
41
|
+
self.doc_app = modal.App.lookup("isa-vision-doc", create_if_missing=False)
|
42
|
+
logger.info(" Connected to document analysis service")
|
43
|
+
except Exception as e:
|
44
|
+
logger.warning(f"� Document service not available: {e}")
|
45
|
+
self.doc_app = None
|
46
|
+
|
47
|
+
try:
|
48
|
+
# Connect to table extraction service
|
49
|
+
self.table_app = modal.App.lookup("qwen-vision-table", create_if_missing=False)
|
50
|
+
logger.info("✅ Connected to table extraction service")
|
51
|
+
except Exception as e:
|
52
|
+
logger.warning(f"⚠️ Table extraction service not available: {e}")
|
53
|
+
self.table_app = None
|
54
|
+
|
55
|
+
async def invoke(
|
56
|
+
self,
|
57
|
+
image: Union[str, BinaryIO],
|
58
|
+
prompt: Optional[str] = None,
|
59
|
+
task: Optional[str] = None,
|
60
|
+
**kwargs
|
61
|
+
) -> Dict[str, Any]:
|
62
|
+
"""
|
63
|
+
Unified invoke method for all vision operations
|
64
|
+
"""
|
65
|
+
if task == "detect_ui" or task == "ui_analysis":
|
66
|
+
return await self.detect_objects(image, **kwargs)
|
67
|
+
elif task == "extract_text" or task == "ocr":
|
68
|
+
return await self.extract_text(image)
|
69
|
+
elif task == "analyze_document":
|
70
|
+
return await self._analyze_document(image)
|
71
|
+
elif task == "extract_table" or task == "table_extraction":
|
72
|
+
return await self.extract_table_data(image, **kwargs)
|
73
|
+
else:
|
74
|
+
return await self.analyze_image(image, prompt, **kwargs)
|
75
|
+
|
76
|
+
async def analyze_image(
|
77
|
+
self,
|
78
|
+
image: Union[str, BinaryIO],
|
79
|
+
prompt: Optional[str] = None,
|
80
|
+
max_tokens: int = 1000
|
81
|
+
) -> Dict[str, Any]:
|
82
|
+
"""Analyze image using UI detection service"""
|
83
|
+
|
84
|
+
if not self.ui_app:
|
85
|
+
return {
|
86
|
+
'error': 'UI detection service not available',
|
87
|
+
'success': False
|
88
|
+
}
|
89
|
+
|
90
|
+
try:
|
91
|
+
# Convert image to base64
|
92
|
+
image_b64 = self._encode_image(image)
|
93
|
+
|
94
|
+
# Call Modal UI detection service using from_name (new API)
|
95
|
+
ui_detector = modal.Cls.from_name("isa-vision-ui", "UIDetectionService")
|
96
|
+
result = ui_detector().detect_ui_elements.remote(image_b64)
|
97
|
+
|
98
|
+
if result.get('success'):
|
99
|
+
return {
|
100
|
+
'success': True,
|
101
|
+
'service': 'isa-vision',
|
102
|
+
'text': f"Detected {result.get('element_count', 0)} UI elements",
|
103
|
+
'detected_objects': result.get('ui_elements', []),
|
104
|
+
'confidence': 0.9,
|
105
|
+
'metadata': {
|
106
|
+
'processing_time': result.get('processing_time'),
|
107
|
+
'detection_method': result.get('detection_method'),
|
108
|
+
'model_info': result.get('model_info')
|
109
|
+
}
|
110
|
+
}
|
111
|
+
else:
|
112
|
+
return {
|
113
|
+
'success': False,
|
114
|
+
'error': result.get('error', 'Unknown error'),
|
115
|
+
'service': 'isa-vision'
|
116
|
+
}
|
117
|
+
|
118
|
+
except Exception as e:
|
119
|
+
logger.error(f"Image analysis failed: {e}")
|
120
|
+
return {
|
121
|
+
'success': False,
|
122
|
+
'error': str(e),
|
123
|
+
'service': 'isa-vision'
|
124
|
+
}
|
125
|
+
|
126
|
+
async def analyze_images(
|
127
|
+
self,
|
128
|
+
images: List[Union[str, BinaryIO]],
|
129
|
+
prompt: Optional[str] = None,
|
130
|
+
max_tokens: int = 1000
|
131
|
+
) -> List[Dict[str, Any]]:
|
132
|
+
"""Analyze multiple images"""
|
133
|
+
results = []
|
134
|
+
for image in images:
|
135
|
+
result = await self.analyze_image(image, prompt, max_tokens)
|
136
|
+
results.append(result)
|
137
|
+
return results
|
138
|
+
|
139
|
+
async def describe_image(
|
140
|
+
self,
|
141
|
+
image: Union[str, BinaryIO],
|
142
|
+
detail_level: str = "medium"
|
143
|
+
) -> Dict[str, Any]:
|
144
|
+
"""Generate description using UI detection"""
|
145
|
+
result = await self.analyze_image(image)
|
146
|
+
|
147
|
+
if result.get('success'):
|
148
|
+
objects = result.get('detected_objects', [])
|
149
|
+
description = f"This appears to be a user interface with {len(objects)} interactive elements. "
|
150
|
+
|
151
|
+
if objects:
|
152
|
+
element_types = list(set([obj.get('type', 'element') for obj in objects]))
|
153
|
+
description += f"The interface contains: {', '.join(element_types)}."
|
154
|
+
|
155
|
+
return {
|
156
|
+
'success': True,
|
157
|
+
'description': description,
|
158
|
+
'objects': objects,
|
159
|
+
'scene': 'User Interface',
|
160
|
+
'colors': ['unknown'] # Could be enhanced with color detection
|
161
|
+
}
|
162
|
+
else:
|
163
|
+
return result
|
164
|
+
|
165
|
+
async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
166
|
+
"""Extract text using document analysis service"""
|
167
|
+
|
168
|
+
if not self.doc_app:
|
169
|
+
# Fallback to UI service for basic text detection
|
170
|
+
return await self._extract_text_fallback(image)
|
171
|
+
|
172
|
+
try:
|
173
|
+
# Convert image to base64
|
174
|
+
image_b64 = self._encode_image(image)
|
175
|
+
|
176
|
+
# Call Modal document analysis service using from_name (new API)
|
177
|
+
doc_analyzer = modal.Cls.from_name("isa-vision-doc", "DocumentAnalysisService")
|
178
|
+
result = doc_analyzer().extract_text.remote(image_b64)
|
179
|
+
|
180
|
+
if result.get('success'):
|
181
|
+
text_results = result.get('text_results', [])
|
182
|
+
all_text = ' '.join([item.get('text', '') for item in text_results])
|
183
|
+
|
184
|
+
return {
|
185
|
+
'success': True,
|
186
|
+
'service': 'isa-vision-doc',
|
187
|
+
'text': all_text,
|
188
|
+
'confidence': sum([item.get('confidence', 0) for item in text_results]) / len(text_results) if text_results else 0,
|
189
|
+
'bounding_boxes': [item.get('bbox') for item in text_results],
|
190
|
+
'language': 'auto-detected',
|
191
|
+
'metadata': {
|
192
|
+
'processing_time': result.get('processing_time'),
|
193
|
+
'text_count': result.get('text_count')
|
194
|
+
}
|
195
|
+
}
|
196
|
+
else:
|
197
|
+
return {
|
198
|
+
'success': False,
|
199
|
+
'error': result.get('error', 'OCR failed'),
|
200
|
+
'service': 'isa-vision-doc'
|
201
|
+
}
|
202
|
+
|
203
|
+
except Exception as e:
|
204
|
+
logger.error(f"Text extraction failed: {e}")
|
205
|
+
return {
|
206
|
+
'success': False,
|
207
|
+
'error': str(e),
|
208
|
+
'service': 'isa-vision-doc'
|
209
|
+
}
|
210
|
+
|
211
|
+
async def detect_objects(
|
212
|
+
self,
|
213
|
+
image: Union[str, BinaryIO],
|
214
|
+
confidence_threshold: float = 0.5
|
215
|
+
) -> Dict[str, Any]:
|
216
|
+
"""Detect UI elements using UI detection service"""
|
217
|
+
|
218
|
+
result = await self.analyze_image(image)
|
219
|
+
|
220
|
+
if result.get('success'):
|
221
|
+
objects = result.get('detected_objects', [])
|
222
|
+
# Filter by confidence threshold
|
223
|
+
filtered_objects = [obj for obj in objects if obj.get('confidence', 0) >= confidence_threshold]
|
224
|
+
|
225
|
+
return {
|
226
|
+
'success': True,
|
227
|
+
'service': 'isa-vision-ui',
|
228
|
+
'objects': filtered_objects,
|
229
|
+
'count': len(filtered_objects),
|
230
|
+
'bounding_boxes': [obj.get('bbox') for obj in filtered_objects],
|
231
|
+
'metadata': result.get('metadata', {})
|
232
|
+
}
|
233
|
+
else:
|
234
|
+
return result
|
235
|
+
|
236
|
+
async def get_object_coordinates(
|
237
|
+
self,
|
238
|
+
image: Union[str, BinaryIO],
|
239
|
+
object_name: str
|
240
|
+
) -> Dict[str, Any]:
|
241
|
+
"""Get coordinates of specific UI element"""
|
242
|
+
|
243
|
+
detection_result = await self.detect_objects(image)
|
244
|
+
|
245
|
+
if not detection_result.get('success'):
|
246
|
+
return detection_result
|
247
|
+
|
248
|
+
objects = detection_result.get('objects', [])
|
249
|
+
|
250
|
+
# Look for object by name/type
|
251
|
+
for obj in objects:
|
252
|
+
obj_type = obj.get('type', '').lower()
|
253
|
+
obj_content = obj.get('content', '').lower()
|
254
|
+
|
255
|
+
if object_name.lower() in obj_type or object_name.lower() in obj_content:
|
256
|
+
return {
|
257
|
+
'success': True,
|
258
|
+
'found': True,
|
259
|
+
'center_coordinates': obj.get('center', [0, 0]),
|
260
|
+
'confidence': obj.get('confidence', 0),
|
261
|
+
'description': f"Found {obj.get('type')} at center coordinates",
|
262
|
+
'object_info': obj
|
263
|
+
}
|
264
|
+
|
265
|
+
return {
|
266
|
+
'success': True,
|
267
|
+
'found': False,
|
268
|
+
'center_coordinates': [0, 0],
|
269
|
+
'confidence': 0,
|
270
|
+
'description': f"Object '{object_name}' not found in image"
|
271
|
+
}
|
272
|
+
|
273
|
+
async def classify_image(
|
274
|
+
self,
|
275
|
+
image: Union[str, BinaryIO],
|
276
|
+
categories: Optional[List[str]] = None
|
277
|
+
) -> Dict[str, Any]:
|
278
|
+
"""Classify image type"""
|
279
|
+
|
280
|
+
result = await self.analyze_image(image)
|
281
|
+
|
282
|
+
if result.get('success'):
|
283
|
+
objects = result.get('detected_objects', [])
|
284
|
+
|
285
|
+
# Simple classification based on detected UI elements
|
286
|
+
if objects:
|
287
|
+
category = "user_interface"
|
288
|
+
confidence = 0.9
|
289
|
+
else:
|
290
|
+
category = "unknown"
|
291
|
+
confidence = 0.1
|
292
|
+
|
293
|
+
return {
|
294
|
+
'success': True,
|
295
|
+
'category': category,
|
296
|
+
'confidence': confidence,
|
297
|
+
'all_predictions': [
|
298
|
+
{'category': category, 'confidence': confidence}
|
299
|
+
]
|
300
|
+
}
|
301
|
+
else:
|
302
|
+
return result
|
303
|
+
|
304
|
+
async def compare_images(
|
305
|
+
self,
|
306
|
+
image1: Union[str, BinaryIO],
|
307
|
+
image2: Union[str, BinaryIO]
|
308
|
+
) -> Dict[str, Any]:
|
309
|
+
"""Compare two images based on UI elements"""
|
310
|
+
|
311
|
+
result1 = await self.analyze_image(image1)
|
312
|
+
result2 = await self.analyze_image(image2)
|
313
|
+
|
314
|
+
if not (result1.get('success') and result2.get('success')):
|
315
|
+
return {
|
316
|
+
'success': False,
|
317
|
+
'error': 'Failed to analyze one or both images'
|
318
|
+
}
|
319
|
+
|
320
|
+
objects1 = result1.get('detected_objects', [])
|
321
|
+
objects2 = result2.get('detected_objects', [])
|
322
|
+
|
323
|
+
# Simple comparison based on element counts and types
|
324
|
+
count_diff = abs(len(objects1) - len(objects2))
|
325
|
+
types1 = set([obj.get('type') for obj in objects1])
|
326
|
+
types2 = set([obj.get('type') for obj in objects2])
|
327
|
+
|
328
|
+
common_types = types1.intersection(types2)
|
329
|
+
unique_types = types1.symmetric_difference(types2)
|
330
|
+
|
331
|
+
similarity_score = len(common_types) / max(len(types1.union(types2)), 1)
|
332
|
+
|
333
|
+
return {
|
334
|
+
'success': True,
|
335
|
+
'similarity_score': similarity_score,
|
336
|
+
'differences': f"Different element types: {list(unique_types)}",
|
337
|
+
'common_elements': f"Common element types: {list(common_types)}",
|
338
|
+
'metadata': {
|
339
|
+
'elements_count_1': len(objects1),
|
340
|
+
'elements_count_2': len(objects2),
|
341
|
+
'count_difference': count_diff
|
342
|
+
}
|
343
|
+
}
|
344
|
+
|
345
|
+
def get_supported_formats(self) -> List[str]:
|
346
|
+
"""Get supported image formats"""
|
347
|
+
return ['jpg', 'jpeg', 'png', 'bmp', 'gif', 'tiff']
|
348
|
+
|
349
|
+
def get_max_image_size(self) -> Dict[str, int]:
|
350
|
+
"""Get maximum image dimensions"""
|
351
|
+
return {'width': 4096, 'height': 4096}
|
352
|
+
|
353
|
+
async def close(self):
|
354
|
+
"""Cleanup resources"""
|
355
|
+
# Modal connections don't need explicit cleanup
|
356
|
+
pass
|
357
|
+
|
358
|
+
# Helper methods
|
359
|
+
|
360
|
+
def _encode_image(self, image: Union[str, BinaryIO]) -> str:
|
361
|
+
"""Convert image to base64 string"""
|
362
|
+
if isinstance(image, str):
|
363
|
+
# File path
|
364
|
+
with open(image, 'rb') as f:
|
365
|
+
image_data = f.read()
|
366
|
+
else:
|
367
|
+
# Binary data
|
368
|
+
if hasattr(image, 'read'):
|
369
|
+
image_data = image.read()
|
370
|
+
else:
|
371
|
+
# Assume it's bytes
|
372
|
+
image_data = bytes(image) if not isinstance(image, bytes) else image
|
373
|
+
|
374
|
+
return base64.b64encode(image_data).decode('utf-8')
|
375
|
+
|
376
|
+
async def _extract_text_fallback(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
377
|
+
"""Fallback OCR using UI service (basic text detection)"""
|
378
|
+
# For now, return placeholder
|
379
|
+
return {
|
380
|
+
'success': False,
|
381
|
+
'error': 'OCR service not available, deploy document analysis service',
|
382
|
+
'text': '',
|
383
|
+
'confidence': 0,
|
384
|
+
'service': 'isa-vision-fallback'
|
385
|
+
}
|
386
|
+
|
387
|
+
async def _analyze_document(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
388
|
+
"""Analyze document with tables and OCR"""
|
389
|
+
|
390
|
+
if not self.doc_app:
|
391
|
+
return {
|
392
|
+
'success': False,
|
393
|
+
'error': 'Document analysis service not deployed',
|
394
|
+
'service': 'isa-vision-doc'
|
395
|
+
}
|
396
|
+
|
397
|
+
try:
|
398
|
+
# Convert image to base64
|
399
|
+
image_b64 = self._encode_image(image)
|
400
|
+
|
401
|
+
# Call Modal document analysis service using from_name (new API)
|
402
|
+
doc_analyzer = modal.Cls.from_name("isa-vision-doc", "DocumentAnalysisService")
|
403
|
+
result = doc_analyzer().analyze_document_complete.remote(image_b64)
|
404
|
+
|
405
|
+
return result
|
406
|
+
|
407
|
+
except Exception as e:
|
408
|
+
logger.error(f"Document analysis failed: {e}")
|
409
|
+
return {
|
410
|
+
'success': False,
|
411
|
+
'error': str(e),
|
412
|
+
'service': 'isa-vision-doc'
|
413
|
+
}
|
414
|
+
|
415
|
+
async def extract_table_data(
|
416
|
+
self,
|
417
|
+
image: Union[str, BinaryIO],
|
418
|
+
extraction_format: str = "markdown",
|
419
|
+
custom_prompt: Optional[str] = None
|
420
|
+
) -> Dict[str, Any]:
|
421
|
+
"""Extract table data using Qwen2.5-VL table extraction service"""
|
422
|
+
|
423
|
+
if not self.table_app:
|
424
|
+
return {
|
425
|
+
'success': False,
|
426
|
+
'error': 'Table extraction service not available',
|
427
|
+
'service': 'isa-vision-table'
|
428
|
+
}
|
429
|
+
|
430
|
+
try:
|
431
|
+
# Convert image to base64
|
432
|
+
image_b64 = self._encode_image(image)
|
433
|
+
|
434
|
+
# Call Modal table extraction service
|
435
|
+
table_extractor = modal.Cls.from_name("qwen-vision-table", "QwenTableExtractionService")
|
436
|
+
result = table_extractor().extract_table_data.remote(
|
437
|
+
image_b64=image_b64,
|
438
|
+
extraction_format=extraction_format,
|
439
|
+
custom_prompt=custom_prompt
|
440
|
+
)
|
441
|
+
|
442
|
+
if result.get('success'):
|
443
|
+
return {
|
444
|
+
'success': True,
|
445
|
+
'service': 'isa-vision-table',
|
446
|
+
'extracted_data': result.get('extracted_data'),
|
447
|
+
'raw_output': result.get('raw_output'),
|
448
|
+
'format': result.get('format'),
|
449
|
+
'processing_time': result.get('processing_time'),
|
450
|
+
'model_info': result.get('model_info')
|
451
|
+
}
|
452
|
+
else:
|
453
|
+
return {
|
454
|
+
'success': False,
|
455
|
+
'error': result.get('error', 'Table extraction failed'),
|
456
|
+
'service': 'isa-vision-table'
|
457
|
+
}
|
458
|
+
|
459
|
+
except Exception as e:
|
460
|
+
logger.error(f"Table extraction failed: {e}")
|
461
|
+
return {
|
462
|
+
'success': False,
|
463
|
+
'error': str(e),
|
464
|
+
'service': 'isa-vision-table'
|
465
|
+
}
|
466
|
+
|
467
|
+
async def batch_extract_tables(
|
468
|
+
self,
|
469
|
+
images: List[Union[str, BinaryIO]],
|
470
|
+
extraction_format: str = "markdown"
|
471
|
+
) -> Dict[str, Any]:
|
472
|
+
"""Extract tables from multiple images"""
|
473
|
+
|
474
|
+
if not self.table_app:
|
475
|
+
return {
|
476
|
+
'success': False,
|
477
|
+
'error': 'Table extraction service not available',
|
478
|
+
'service': 'isa-vision-table'
|
479
|
+
}
|
480
|
+
|
481
|
+
try:
|
482
|
+
# Convert all images to base64
|
483
|
+
images_b64 = [self._encode_image(image) for image in images]
|
484
|
+
|
485
|
+
# Call Modal batch extraction service
|
486
|
+
table_extractor = modal.Cls.from_name("qwen-vision-table", "QwenTableExtractionService")
|
487
|
+
result = table_extractor().batch_extract_tables.remote(
|
488
|
+
images_b64=images_b64,
|
489
|
+
extraction_format=extraction_format
|
490
|
+
)
|
491
|
+
|
492
|
+
return result
|
493
|
+
|
494
|
+
except Exception as e:
|
495
|
+
logger.error(f"Batch table extraction failed: {e}")
|
496
|
+
return {
|
497
|
+
'success': False,
|
498
|
+
'error': str(e),
|
499
|
+
'service': 'isa-vision-table'
|
500
|
+
}
|