isa-model 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +770 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
  15. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  16. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
  17. isa_model/deployment/cloud/modal/register_models.py +321 -0
  18. isa_model/deployment/runtime/deployed_service.py +338 -0
  19. isa_model/deployment/services/__init__.py +9 -0
  20. isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
  21. isa_model/deployment/services/model_service.py +332 -0
  22. isa_model/deployment/services/service_monitor.py +356 -0
  23. isa_model/deployment/services/service_registry.py +527 -0
  24. isa_model/eval/__init__.py +80 -44
  25. isa_model/eval/config/__init__.py +10 -0
  26. isa_model/eval/config/evaluation_config.py +108 -0
  27. isa_model/eval/evaluators/__init__.py +18 -0
  28. isa_model/eval/evaluators/base_evaluator.py +503 -0
  29. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  30. isa_model/eval/factory.py +417 -709
  31. isa_model/eval/infrastructure/__init__.py +24 -0
  32. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  33. isa_model/eval/metrics.py +191 -21
  34. isa_model/inference/ai_factory.py +181 -605
  35. isa_model/inference/services/audio/base_stt_service.py +65 -1
  36. isa_model/inference/services/audio/base_tts_service.py +75 -1
  37. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  38. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  39. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  40. isa_model/inference/services/base_service.py +55 -17
  41. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  42. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  43. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  44. isa_model/inference/services/helpers/stacked_config.py +148 -0
  45. isa_model/inference/services/img/__init__.py +18 -0
  46. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
  47. isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
  48. isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
  49. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
  50. isa_model/inference/services/llm/__init__.py +3 -3
  51. isa_model/inference/services/llm/base_llm_service.py +492 -40
  52. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  53. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  54. isa_model/inference/services/llm/ollama_llm_service.py +51 -17
  55. isa_model/inference/services/llm/openai_llm_service.py +70 -19
  56. isa_model/inference/services/llm/yyds_llm_service.py +24 -23
  57. isa_model/inference/services/vision/__init__.py +38 -4
  58. isa_model/inference/services/vision/base_vision_service.py +218 -117
  59. isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
  60. isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
  61. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  62. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  63. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  64. isa_model/inference/services/vision/openai_vision_service.py +104 -307
  65. isa_model/inference/services/vision/replicate_vision_service.py +140 -325
  66. isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
  67. isa_model/scripts/register_models.py +370 -0
  68. isa_model/scripts/register_models_with_embeddings.py +510 -0
  69. isa_model/serving/api/fastapi_server.py +6 -1
  70. isa_model/serving/api/routes/unified.py +202 -0
  71. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
  72. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/RECORD +77 -53
  73. isa_model/config/__init__.py +0 -9
  74. isa_model/config/config_manager.py +0 -213
  75. isa_model/core/model_manager.py +0 -213
  76. isa_model/core/model_registry.py +0 -375
  77. isa_model/core/vision_models_init.py +0 -116
  78. isa_model/inference/billing_tracker.py +0 -406
  79. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  80. isa_model/inference/services/stacked/__init__.py +0 -26
  81. isa_model/inference/services/stacked/config.py +0 -426
  82. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  83. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  84. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  85. /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
  86. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
  87. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,18 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import Dict, Any, List, Union, Optional, BinaryIO
3
+ import logging
4
+
3
5
  from isa_model.inference.services.base_service import BaseService
6
+ from isa_model.inference.services.vision.helpers.image_utils import (
7
+ get_image_data, prepare_image_base64, prepare_image_data_url,
8
+ get_image_mime_type, get_image_dimensions, validate_image_format
9
+ )
10
+
11
+ logger = logging.getLogger(__name__)
4
12
 
5
13
  class BaseVisionService(BaseService):
6
- """Base class for vision understanding services"""
14
+ """Base class for vision understanding services with common task implementations"""
7
15
 
8
- @abstractmethod
9
16
  async def invoke(
10
17
  self,
11
18
  image: Union[str, BinaryIO],
@@ -14,20 +21,46 @@ class BaseVisionService(BaseService):
14
21
  **kwargs
15
22
  ) -> Dict[str, Any]:
16
23
  """
17
- Unified invoke method for all vision operations
24
+ 统一的任务分发方法 - Base类提供通用实现
18
25
 
19
26
  Args:
20
27
  image: Path to image file or image data
21
28
  prompt: Optional text prompt/question about the image
22
- task: Task type (analyze, describe, extract_text, detect_objects, etc.)
29
+ task: Task type - 支持两大类:图像理解 + 检测抽取
23
30
  **kwargs: Additional task-specific parameters
24
31
 
25
32
  Returns:
26
33
  Dict containing task results
27
34
  """
28
- pass
35
+ task = task or "analyze"
36
+
37
+ # ==================== 图像理解类任务 ====================
38
+ if task == "analyze":
39
+ return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
40
+ elif task == "describe":
41
+ return await self.describe_image(image, kwargs.get("detail_level", "medium"))
42
+ elif task == "classify":
43
+ return await self.classify_image(image, kwargs.get("categories"))
44
+ elif task == "compare":
45
+ return await self.compare_images(image, kwargs.get("image2"))
46
+
47
+ # ==================== 检测抽取类任务 ====================
48
+ elif task == "extract_text":
49
+ return await self.extract_text(image)
50
+ elif task == "detect_objects":
51
+ return await self.detect_objects(image, kwargs.get("confidence_threshold", 0.5))
52
+ elif task == "detect_ui_elements":
53
+ return await self.detect_ui_elements(image, kwargs.get("element_types"), kwargs.get("confidence_threshold", 0.5))
54
+ elif task == "detect_document_elements":
55
+ return await self.detect_document_elements(image, kwargs.get("element_types"), kwargs.get("confidence_threshold", 0.5))
56
+ elif task == "extract_table_data":
57
+ return await self.extract_table_data(image, kwargs.get("table_format", "json"), kwargs.get("preserve_formatting", True))
58
+ elif task == "get_coordinates":
59
+ return await self.get_object_coordinates(image, kwargs.get("object_name", ""))
60
+
61
+ else:
62
+ raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
29
63
 
30
- @abstractmethod
31
64
  async def analyze_image(
32
65
  self,
33
66
  image: Union[str, BinaryIO],
@@ -35,7 +68,7 @@ class BaseVisionService(BaseService):
35
68
  max_tokens: int = 1000
36
69
  ) -> Dict[str, Any]:
37
70
  """
38
- Analyze image and provide description or answer questions
71
+ 通用图像分析 - Provider可选实现
39
72
 
40
73
  Args:
41
74
  image: Path to image file or image data
@@ -49,173 +82,241 @@ class BaseVisionService(BaseService):
49
82
  - detected_objects: List of detected objects (if available)
50
83
  - metadata: Additional metadata about the analysis
51
84
  """
52
- pass
85
+ raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_image task")
53
86
 
54
- @abstractmethod
55
- async def analyze_images(
87
+ # ==================== 图像理解类方法 ====================
88
+
89
+ async def describe_image(
56
90
  self,
57
- images: List[Union[str, BinaryIO]],
58
- prompt: Optional[str] = None,
59
- max_tokens: int = 1000
60
- ) -> List[Dict[str, Any]]:
91
+ image: Union[str, BinaryIO],
92
+ detail_level: str = "medium"
93
+ ) -> Dict[str, Any]:
61
94
  """
62
- Analyze multiple images
63
-
64
- Args:
65
- images: List of image paths or image data
66
- prompt: Optional text prompt/question about the images
67
- max_tokens: Maximum tokens in response
68
-
69
- Returns:
70
- List of analysis result dictionaries
95
+ 图像描述 - Provider可选实现
71
96
  """
72
- pass
97
+ raise NotImplementedError(f"{self.__class__.__name__} does not support describe_image task")
73
98
 
74
- @abstractmethod
75
- async def describe_image(
99
+ async def classify_image(
76
100
  self,
77
101
  image: Union[str, BinaryIO],
78
- detail_level: str = "medium"
102
+ categories: Optional[List[str]] = None
79
103
  ) -> Dict[str, Any]:
80
104
  """
81
- Generate detailed description of image
82
-
83
- Args:
84
- image: Path to image file or image data
85
- detail_level: Level of detail ("low", "medium", "high")
86
-
87
- Returns:
88
- Dict containing description results with keys:
89
- - description: Detailed text description
90
- - objects: List of detected objects
91
- - scene: Scene description
92
- - colors: Dominant colors
105
+ 图像分类 - Provider可选实现
93
106
  """
94
- pass
107
+ raise NotImplementedError(f"{self.__class__.__name__} does not support classify_image task")
108
+
109
+ async def compare_images(
110
+ self,
111
+ image1: Union[str, BinaryIO],
112
+ image2: Union[str, BinaryIO]
113
+ ) -> Dict[str, Any]:
114
+ """
115
+ 图像比较 - Provider可选实现
116
+ """
117
+ raise NotImplementedError(f"{self.__class__.__name__} does not support compare_images task")
118
+
119
+ # ==================== 检测抽取类方法 ====================
95
120
 
96
- @abstractmethod
97
121
  async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
98
122
  """
99
- Extract text from image (OCR)
100
-
101
- Args:
102
- image: Path to image file or image data
103
-
104
- Returns:
105
- Dict containing OCR results with keys:
106
- - text: Extracted text
107
- - confidence: Overall confidence score
108
- - bounding_boxes: Text regions with coordinates (if available)
109
- - language: Detected language (if available)
123
+ 文本提取(OCR) - Provider可选实现
110
124
  """
111
- pass
125
+ raise NotImplementedError(f"{self.__class__.__name__} does not support extract_text task")
112
126
 
113
- @abstractmethod
114
127
  async def detect_objects(
115
128
  self,
116
129
  image: Union[str, BinaryIO],
117
130
  confidence_threshold: float = 0.5
118
131
  ) -> Dict[str, Any]:
119
132
  """
120
- Detect objects in image
121
-
122
- Args:
123
- image: Path to image file or image data
124
- confidence_threshold: Minimum confidence for detections
125
-
126
- Returns:
127
- Dict containing detection results with keys:
128
- - objects: List of detected objects with labels, confidence, and coordinates
129
- - count: Number of objects detected
130
- - bounding_boxes: Object locations with coordinates
133
+ 通用物体检测 - Provider可选实现
131
134
  """
132
- pass
135
+ raise NotImplementedError(f"{self.__class__.__name__} does not support detect_objects task")
133
136
 
134
- @abstractmethod
135
- async def get_object_coordinates(
137
+ async def detect_ui_elements(
136
138
  self,
137
139
  image: Union[str, BinaryIO],
138
- object_name: str
140
+ element_types: Optional[List[str]] = None,
141
+ confidence_threshold: float = 0.5
139
142
  ) -> Dict[str, Any]:
140
143
  """
141
- Get coordinates of a specific object in the image
144
+ UI界面元素检测 - Provider可选实现
142
145
 
143
146
  Args:
144
- image: Path to image file or image data
145
- object_name: Name of the object to locate
147
+ image: 输入图像
148
+ element_types: 要检测的元素类型 ['button', 'input', 'text', 'image', 'link', etc.]
149
+ confidence_threshold: 置信度阈值
146
150
 
147
151
  Returns:
148
- Dict containing coordinate results with keys:
149
- - found: Boolean indicating if object was found
150
- - center_coordinates: List [x, y] with pixel coordinates of center point
151
- - confidence: Confidence score for the detection
152
- - description: Description of the object location
152
+ Dict containing detected UI elements with their bounding boxes and types
153
153
  """
154
- pass
154
+ raise NotImplementedError(f"{self.__class__.__name__} does not support detect_ui_elements task")
155
155
 
156
- @abstractmethod
157
- async def classify_image(
158
- self,
156
+ async def detect_document_elements(
157
+ self,
159
158
  image: Union[str, BinaryIO],
160
- categories: Optional[List[str]] = None
159
+ element_types: Optional[List[str]] = None,
160
+ confidence_threshold: float = 0.5
161
161
  ) -> Dict[str, Any]:
162
162
  """
163
- Classify image into categories
163
+ 文档结构元素检测 - Provider可选实现
164
164
 
165
165
  Args:
166
- image: Path to image file or image data
167
- categories: Optional list of specific categories to consider
166
+ image: 输入图像
167
+ element_types: 要检测的元素类型 ['table', 'header', 'paragraph', 'list', etc.]
168
+ confidence_threshold: 置信度阈值
168
169
 
169
170
  Returns:
170
- Dict containing classification results with keys:
171
- - category: Top predicted category
172
- - confidence: Confidence score
173
- - all_predictions: List of all predictions with scores
171
+ Dict containing detected document elements with their structure and content
174
172
  """
175
- pass
173
+ raise NotImplementedError(f"{self.__class__.__name__} does not support detect_document_elements task")
176
174
 
177
- @abstractmethod
178
- async def compare_images(
179
- self,
180
- image1: Union[str, BinaryIO],
181
- image2: Union[str, BinaryIO]
175
+ async def get_object_coordinates(
176
+ self,
177
+ image: Union[str, BinaryIO],
178
+ object_name: str
179
+ ) -> Dict[str, Any]:
180
+ """
181
+ 获取对象坐标 - Provider可选实现
182
+ """
183
+ raise NotImplementedError(f"{self.__class__.__name__} does not support get_object_coordinates task")
184
+
185
+ async def extract_table_data(
186
+ self,
187
+ image: Union[str, BinaryIO],
188
+ table_format: str = "json",
189
+ preserve_formatting: bool = True
182
190
  ) -> Dict[str, Any]:
183
191
  """
184
- Compare two images for similarity
192
+ 表格数据结构化抽取 - Provider可选实现
185
193
 
186
194
  Args:
187
- image1: First image path or data
188
- image2: Second image path or data
195
+ image: 输入图像
196
+ table_format: 输出格式 ('json', 'csv', 'markdown', 'html')
197
+ preserve_formatting: 是否保持原始格式(合并单元格、样式等)
189
198
 
190
199
  Returns:
191
- Dict containing comparison results with keys:
192
- - similarity_score: Numerical similarity score
193
- - differences: Description of key differences
194
- - common_elements: Description of common elements
200
+ Dict containing extracted table data in structured format:
201
+ {
202
+ "tables": [
203
+ {
204
+ "table_id": "table_1",
205
+ "headers": ["Column1", "Column2", "Column3"],
206
+ "rows": [
207
+ ["cell1", "cell2", "cell3"],
208
+ ["cell4", "cell5", "cell6"]
209
+ ],
210
+ "metadata": {
211
+ "row_count": 2,
212
+ "column_count": 3,
213
+ "has_headers": true,
214
+ "merged_cells": [],
215
+ "table_caption": "optional_caption"
216
+ }
217
+ }
218
+ ],
219
+ "raw_data": "original_table_text",
220
+ "format": "json"
221
+ }
195
222
  """
223
+ raise NotImplementedError(f"{self.__class__.__name__} does not support extract_table_data task")
224
+
225
+ async def close(self):
226
+ """Cleanup resources - default implementation does nothing"""
196
227
  pass
197
228
 
198
- @abstractmethod
199
- def get_supported_formats(self) -> List[str]:
229
+ def get_supported_tasks(self) -> List[str]:
200
230
  """
201
- Get list of supported image formats
231
+ 获取provider支持的任务列表
202
232
 
203
233
  Returns:
204
- List of supported file extensions (e.g., ['jpg', 'png', 'gif'])
234
+ List of supported task names
205
235
  """
206
- pass
236
+ supported = []
237
+
238
+ # 检查哪些方法被实现了
239
+ if hasattr(self, 'analyze_image') and callable(getattr(self, 'analyze_image')):
240
+ try:
241
+ # 尝试调用看是否抛出NotImplementedError
242
+ import inspect
243
+ if not 'NotImplementedError' in inspect.getsource(self.analyze_image):
244
+ supported.append('analyze')
245
+ except:
246
+ pass
247
+
248
+ # 检查各类任务支持情况
249
+ method_task_map = {
250
+ # 图像理解类
251
+ 'describe_image': 'describe',
252
+ 'classify_image': 'classify',
253
+ 'compare_images': 'compare',
254
+ # 检测抽取类
255
+ 'extract_text': 'extract_text',
256
+ 'detect_objects': 'detect_objects',
257
+ 'detect_ui_elements': 'detect_ui_elements',
258
+ 'detect_document_elements': 'detect_document_elements',
259
+ 'extract_table_data': 'extract_table_data',
260
+ 'get_object_coordinates': 'get_coordinates'
261
+ }
262
+
263
+ for method_name, task_name in method_task_map.items():
264
+ if hasattr(self, method_name):
265
+ # 检查是否是默认实现(基于analyze_image)还是provider自己的实现
266
+ supported.append(task_name)
267
+
268
+ return supported
269
+
270
+ # ==================== COMMON TASK IMPLEMENTATIONS ====================
271
+ # 为每个provider提供可选的默认实现,provider可以覆盖这些方法
272
+
273
+ async def analyze_images(
274
+ self,
275
+ images: List[Union[str, BinaryIO]],
276
+ prompt: Optional[str] = None,
277
+ max_tokens: int = 1000
278
+ ) -> List[Dict[str, Any]]:
279
+ """
280
+ 批量图像分析 - Provider可选实现
281
+ 默认实现:如果provider支持analyze_image,则逐个调用
282
+ """
283
+ if hasattr(self, 'analyze_image'):
284
+ results = []
285
+ for image in images:
286
+ try:
287
+ result = await self.analyze_image(image, prompt, max_tokens)
288
+ results.append(result)
289
+ except NotImplementedError:
290
+ raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_images task")
291
+ return results
292
+ else:
293
+ raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_images task")
294
+
295
+
296
+ def get_supported_formats(self) -> List[str]:
297
+ """
298
+ 获取支持的图像格式 - Provider应该实现
299
+ """
300
+ return ['jpg', 'jpeg', 'png', 'gif', 'webp'] # 通用格式
207
301
 
208
- @abstractmethod
209
302
  def get_max_image_size(self) -> Dict[str, int]:
210
303
  """
211
- Get maximum supported image dimensions
212
-
213
- Returns:
214
- Dict with 'width' and 'height' keys for maximum dimensions
304
+ 获取最大图像尺寸 - Provider应该实现
215
305
  """
216
- pass
306
+ return {"width": 2048, "height": 2048, "file_size_mb": 10} # 通用限制
217
307
 
218
- @abstractmethod
219
- async def close(self):
220
- """Cleanup resources"""
221
- pass
308
+ # ==================== UTILITY METHODS ====================
309
+
310
+ def _parse_coordinates_from_text(self, text: str) -> List[Dict[str, Any]]:
311
+ """
312
+ 从文本响应中解析对象坐标 - 使用统一的解析工具
313
+ """
314
+ from isa_model.inference.services.vision.helpers.image_utils import parse_coordinates_from_text
315
+ return parse_coordinates_from_text(text)
316
+
317
+ def _parse_center_coordinates_from_text(self, text: str) -> tuple[bool, Optional[List[int]], str]:
318
+ """
319
+ 从结构化文本响应中解析中心坐标 - 使用统一的解析工具
320
+ """
321
+ from isa_model.inference.services.vision.helpers.image_utils import parse_center_coordinates_from_text
322
+ return parse_center_coordinates_from_text(text)
@@ -23,6 +23,7 @@ class ISAVisionService(BaseVisionService):
23
23
  super().__init__(provider, model_name)
24
24
  self.ui_app = None
25
25
  self.doc_app = None
26
+ self.table_app = None
26
27
  self._initialize_modal_connections()
27
28
 
28
29
  def _initialize_modal_connections(self):
@@ -42,6 +43,14 @@ class ISAVisionService(BaseVisionService):
42
43
  except Exception as e:
43
44
  logger.warning(f"� Document service not available: {e}")
44
45
  self.doc_app = None
46
+
47
+ try:
48
+ # Connect to table extraction service
49
+ self.table_app = modal.App.lookup("qwen-vision-table", create_if_missing=False)
50
+ logger.info("✅ Connected to table extraction service")
51
+ except Exception as e:
52
+ logger.warning(f"⚠️ Table extraction service not available: {e}")
53
+ self.table_app = None
45
54
 
46
55
  async def invoke(
47
56
  self,
@@ -59,6 +68,8 @@ class ISAVisionService(BaseVisionService):
59
68
  return await self.extract_text(image)
60
69
  elif task == "analyze_document":
61
70
  return await self._analyze_document(image)
71
+ elif task == "extract_table" or task == "table_extraction":
72
+ return await self.extract_table_data(image, **kwargs)
62
73
  else:
63
74
  return await self.analyze_image(image, prompt, **kwargs)
64
75
 
@@ -399,4 +410,91 @@ class ISAVisionService(BaseVisionService):
399
410
  'success': False,
400
411
  'error': str(e),
401
412
  'service': 'isa-vision-doc'
413
+ }
414
+
415
+ async def extract_table_data(
416
+ self,
417
+ image: Union[str, BinaryIO],
418
+ extraction_format: str = "markdown",
419
+ custom_prompt: Optional[str] = None
420
+ ) -> Dict[str, Any]:
421
+ """Extract table data using Qwen2.5-VL table extraction service"""
422
+
423
+ if not self.table_app:
424
+ return {
425
+ 'success': False,
426
+ 'error': 'Table extraction service not available',
427
+ 'service': 'isa-vision-table'
428
+ }
429
+
430
+ try:
431
+ # Convert image to base64
432
+ image_b64 = self._encode_image(image)
433
+
434
+ # Call Modal table extraction service
435
+ table_extractor = modal.Cls.from_name("qwen-vision-table", "QwenTableExtractionService")
436
+ result = table_extractor().extract_table_data.remote(
437
+ image_b64=image_b64,
438
+ extraction_format=extraction_format,
439
+ custom_prompt=custom_prompt
440
+ )
441
+
442
+ if result.get('success'):
443
+ return {
444
+ 'success': True,
445
+ 'service': 'isa-vision-table',
446
+ 'extracted_data': result.get('extracted_data'),
447
+ 'raw_output': result.get('raw_output'),
448
+ 'format': result.get('format'),
449
+ 'processing_time': result.get('processing_time'),
450
+ 'model_info': result.get('model_info')
451
+ }
452
+ else:
453
+ return {
454
+ 'success': False,
455
+ 'error': result.get('error', 'Table extraction failed'),
456
+ 'service': 'isa-vision-table'
457
+ }
458
+
459
+ except Exception as e:
460
+ logger.error(f"Table extraction failed: {e}")
461
+ return {
462
+ 'success': False,
463
+ 'error': str(e),
464
+ 'service': 'isa-vision-table'
465
+ }
466
+
467
+ async def batch_extract_tables(
468
+ self,
469
+ images: List[Union[str, BinaryIO]],
470
+ extraction_format: str = "markdown"
471
+ ) -> Dict[str, Any]:
472
+ """Extract tables from multiple images"""
473
+
474
+ if not self.table_app:
475
+ return {
476
+ 'success': False,
477
+ 'error': 'Table extraction service not available',
478
+ 'service': 'isa-vision-table'
479
+ }
480
+
481
+ try:
482
+ # Convert all images to base64
483
+ images_b64 = [self._encode_image(image) for image in images]
484
+
485
+ # Call Modal batch extraction service
486
+ table_extractor = modal.Cls.from_name("qwen-vision-table", "QwenTableExtractionService")
487
+ result = table_extractor().batch_extract_tables.remote(
488
+ images_b64=images_b64,
489
+ extraction_format=extraction_format
490
+ )
491
+
492
+ return result
493
+
494
+ except Exception as e:
495
+ logger.error(f"Batch table extraction failed: {e}")
496
+ return {
497
+ 'success': False,
498
+ 'error': str(e),
499
+ 'service': 'isa-vision-table'
402
500
  }
@@ -22,7 +22,7 @@ import logging
22
22
  from typing import Dict, Any, List, Union, Optional, BinaryIO
23
23
  from datetime import datetime
24
24
 
25
- from isa_model.inference.services.stacked.base_stacked_service import (
25
+ from .helpers.base_stacked_service import (
26
26
  BaseStackedService, LayerConfig, LayerType, LayerResult
27
27
  )
28
28