isa-model 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +770 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +181 -605
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -17
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
- isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
- isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +492 -40
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +51 -17
- isa_model/inference/services/llm/openai_llm_service.py +70 -19
- isa_model/inference/services/llm/yyds_llm_service.py +24 -23
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +218 -117
- isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
- isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +104 -307
- isa_model/inference/services/vision/replicate_vision_service.py +140 -325
- isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/api/fastapi_server.py +6 -1
- isa_model/serving/api/routes/unified.py +202 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
- {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/RECORD +77 -53
- isa_model/config/__init__.py +0 -9
- isa_model/config/config_manager.py +0 -213
- isa_model/core/model_manager.py +0 -213
- isa_model/core/model_registry.py +0 -375
- isa_model/core/vision_models_init.py +0 -116
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/stacked/__init__.py +0 -26
- isa_model/inference/services/stacked/config.py +0 -426
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,18 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
from typing import Dict, Any, List, Union, Optional, BinaryIO
|
3
|
+
import logging
|
4
|
+
|
3
5
|
from isa_model.inference.services.base_service import BaseService
|
6
|
+
from isa_model.inference.services.vision.helpers.image_utils import (
|
7
|
+
get_image_data, prepare_image_base64, prepare_image_data_url,
|
8
|
+
get_image_mime_type, get_image_dimensions, validate_image_format
|
9
|
+
)
|
10
|
+
|
11
|
+
logger = logging.getLogger(__name__)
|
4
12
|
|
5
13
|
class BaseVisionService(BaseService):
|
6
|
-
"""Base class for vision understanding services"""
|
14
|
+
"""Base class for vision understanding services with common task implementations"""
|
7
15
|
|
8
|
-
@abstractmethod
|
9
16
|
async def invoke(
|
10
17
|
self,
|
11
18
|
image: Union[str, BinaryIO],
|
@@ -14,20 +21,46 @@ class BaseVisionService(BaseService):
|
|
14
21
|
**kwargs
|
15
22
|
) -> Dict[str, Any]:
|
16
23
|
"""
|
17
|
-
|
24
|
+
统一的任务分发方法 - Base类提供通用实现
|
18
25
|
|
19
26
|
Args:
|
20
27
|
image: Path to image file or image data
|
21
28
|
prompt: Optional text prompt/question about the image
|
22
|
-
task: Task type
|
29
|
+
task: Task type - 支持两大类:图像理解 + 检测抽取
|
23
30
|
**kwargs: Additional task-specific parameters
|
24
31
|
|
25
32
|
Returns:
|
26
33
|
Dict containing task results
|
27
34
|
"""
|
28
|
-
|
35
|
+
task = task or "analyze"
|
36
|
+
|
37
|
+
# ==================== 图像理解类任务 ====================
|
38
|
+
if task == "analyze":
|
39
|
+
return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
|
40
|
+
elif task == "describe":
|
41
|
+
return await self.describe_image(image, kwargs.get("detail_level", "medium"))
|
42
|
+
elif task == "classify":
|
43
|
+
return await self.classify_image(image, kwargs.get("categories"))
|
44
|
+
elif task == "compare":
|
45
|
+
return await self.compare_images(image, kwargs.get("image2"))
|
46
|
+
|
47
|
+
# ==================== 检测抽取类任务 ====================
|
48
|
+
elif task == "extract_text":
|
49
|
+
return await self.extract_text(image)
|
50
|
+
elif task == "detect_objects":
|
51
|
+
return await self.detect_objects(image, kwargs.get("confidence_threshold", 0.5))
|
52
|
+
elif task == "detect_ui_elements":
|
53
|
+
return await self.detect_ui_elements(image, kwargs.get("element_types"), kwargs.get("confidence_threshold", 0.5))
|
54
|
+
elif task == "detect_document_elements":
|
55
|
+
return await self.detect_document_elements(image, kwargs.get("element_types"), kwargs.get("confidence_threshold", 0.5))
|
56
|
+
elif task == "extract_table_data":
|
57
|
+
return await self.extract_table_data(image, kwargs.get("table_format", "json"), kwargs.get("preserve_formatting", True))
|
58
|
+
elif task == "get_coordinates":
|
59
|
+
return await self.get_object_coordinates(image, kwargs.get("object_name", ""))
|
60
|
+
|
61
|
+
else:
|
62
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
|
29
63
|
|
30
|
-
@abstractmethod
|
31
64
|
async def analyze_image(
|
32
65
|
self,
|
33
66
|
image: Union[str, BinaryIO],
|
@@ -35,7 +68,7 @@ class BaseVisionService(BaseService):
|
|
35
68
|
max_tokens: int = 1000
|
36
69
|
) -> Dict[str, Any]:
|
37
70
|
"""
|
38
|
-
|
71
|
+
通用图像分析 - Provider可选实现
|
39
72
|
|
40
73
|
Args:
|
41
74
|
image: Path to image file or image data
|
@@ -49,173 +82,241 @@ class BaseVisionService(BaseService):
|
|
49
82
|
- detected_objects: List of detected objects (if available)
|
50
83
|
- metadata: Additional metadata about the analysis
|
51
84
|
"""
|
52
|
-
|
85
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_image task")
|
53
86
|
|
54
|
-
|
55
|
-
|
87
|
+
# ==================== 图像理解类方法 ====================
|
88
|
+
|
89
|
+
async def describe_image(
|
56
90
|
self,
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
) -> List[Dict[str, Any]]:
|
91
|
+
image: Union[str, BinaryIO],
|
92
|
+
detail_level: str = "medium"
|
93
|
+
) -> Dict[str, Any]:
|
61
94
|
"""
|
62
|
-
|
63
|
-
|
64
|
-
Args:
|
65
|
-
images: List of image paths or image data
|
66
|
-
prompt: Optional text prompt/question about the images
|
67
|
-
max_tokens: Maximum tokens in response
|
68
|
-
|
69
|
-
Returns:
|
70
|
-
List of analysis result dictionaries
|
95
|
+
图像描述 - Provider可选实现
|
71
96
|
"""
|
72
|
-
|
97
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support describe_image task")
|
73
98
|
|
74
|
-
|
75
|
-
async def describe_image(
|
99
|
+
async def classify_image(
|
76
100
|
self,
|
77
101
|
image: Union[str, BinaryIO],
|
78
|
-
|
102
|
+
categories: Optional[List[str]] = None
|
79
103
|
) -> Dict[str, Any]:
|
80
104
|
"""
|
81
|
-
|
82
|
-
|
83
|
-
Args:
|
84
|
-
image: Path to image file or image data
|
85
|
-
detail_level: Level of detail ("low", "medium", "high")
|
86
|
-
|
87
|
-
Returns:
|
88
|
-
Dict containing description results with keys:
|
89
|
-
- description: Detailed text description
|
90
|
-
- objects: List of detected objects
|
91
|
-
- scene: Scene description
|
92
|
-
- colors: Dominant colors
|
105
|
+
图像分类 - Provider可选实现
|
93
106
|
"""
|
94
|
-
|
107
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support classify_image task")
|
108
|
+
|
109
|
+
async def compare_images(
|
110
|
+
self,
|
111
|
+
image1: Union[str, BinaryIO],
|
112
|
+
image2: Union[str, BinaryIO]
|
113
|
+
) -> Dict[str, Any]:
|
114
|
+
"""
|
115
|
+
图像比较 - Provider可选实现
|
116
|
+
"""
|
117
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support compare_images task")
|
118
|
+
|
119
|
+
# ==================== 检测抽取类方法 ====================
|
95
120
|
|
96
|
-
@abstractmethod
|
97
121
|
async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
98
122
|
"""
|
99
|
-
|
100
|
-
|
101
|
-
Args:
|
102
|
-
image: Path to image file or image data
|
103
|
-
|
104
|
-
Returns:
|
105
|
-
Dict containing OCR results with keys:
|
106
|
-
- text: Extracted text
|
107
|
-
- confidence: Overall confidence score
|
108
|
-
- bounding_boxes: Text regions with coordinates (if available)
|
109
|
-
- language: Detected language (if available)
|
123
|
+
文本提取(OCR) - Provider可选实现
|
110
124
|
"""
|
111
|
-
|
125
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support extract_text task")
|
112
126
|
|
113
|
-
@abstractmethod
|
114
127
|
async def detect_objects(
|
115
128
|
self,
|
116
129
|
image: Union[str, BinaryIO],
|
117
130
|
confidence_threshold: float = 0.5
|
118
131
|
) -> Dict[str, Any]:
|
119
132
|
"""
|
120
|
-
|
121
|
-
|
122
|
-
Args:
|
123
|
-
image: Path to image file or image data
|
124
|
-
confidence_threshold: Minimum confidence for detections
|
125
|
-
|
126
|
-
Returns:
|
127
|
-
Dict containing detection results with keys:
|
128
|
-
- objects: List of detected objects with labels, confidence, and coordinates
|
129
|
-
- count: Number of objects detected
|
130
|
-
- bounding_boxes: Object locations with coordinates
|
133
|
+
通用物体检测 - Provider可选实现
|
131
134
|
"""
|
132
|
-
|
135
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support detect_objects task")
|
133
136
|
|
134
|
-
|
135
|
-
async def get_object_coordinates(
|
137
|
+
async def detect_ui_elements(
|
136
138
|
self,
|
137
139
|
image: Union[str, BinaryIO],
|
138
|
-
|
140
|
+
element_types: Optional[List[str]] = None,
|
141
|
+
confidence_threshold: float = 0.5
|
139
142
|
) -> Dict[str, Any]:
|
140
143
|
"""
|
141
|
-
|
144
|
+
UI界面元素检测 - Provider可选实现
|
142
145
|
|
143
146
|
Args:
|
144
|
-
image:
|
145
|
-
|
147
|
+
image: 输入图像
|
148
|
+
element_types: 要检测的元素类型 ['button', 'input', 'text', 'image', 'link', etc.]
|
149
|
+
confidence_threshold: 置信度阈值
|
146
150
|
|
147
151
|
Returns:
|
148
|
-
Dict containing
|
149
|
-
- found: Boolean indicating if object was found
|
150
|
-
- center_coordinates: List [x, y] with pixel coordinates of center point
|
151
|
-
- confidence: Confidence score for the detection
|
152
|
-
- description: Description of the object location
|
152
|
+
Dict containing detected UI elements with their bounding boxes and types
|
153
153
|
"""
|
154
|
-
|
154
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support detect_ui_elements task")
|
155
155
|
|
156
|
-
|
157
|
-
|
158
|
-
self,
|
156
|
+
async def detect_document_elements(
|
157
|
+
self,
|
159
158
|
image: Union[str, BinaryIO],
|
160
|
-
|
159
|
+
element_types: Optional[List[str]] = None,
|
160
|
+
confidence_threshold: float = 0.5
|
161
161
|
) -> Dict[str, Any]:
|
162
162
|
"""
|
163
|
-
|
163
|
+
文档结构元素检测 - Provider可选实现
|
164
164
|
|
165
165
|
Args:
|
166
|
-
image:
|
167
|
-
|
166
|
+
image: 输入图像
|
167
|
+
element_types: 要检测的元素类型 ['table', 'header', 'paragraph', 'list', etc.]
|
168
|
+
confidence_threshold: 置信度阈值
|
168
169
|
|
169
170
|
Returns:
|
170
|
-
Dict containing
|
171
|
-
- category: Top predicted category
|
172
|
-
- confidence: Confidence score
|
173
|
-
- all_predictions: List of all predictions with scores
|
171
|
+
Dict containing detected document elements with their structure and content
|
174
172
|
"""
|
175
|
-
|
173
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support detect_document_elements task")
|
176
174
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
175
|
+
async def get_object_coordinates(
|
176
|
+
self,
|
177
|
+
image: Union[str, BinaryIO],
|
178
|
+
object_name: str
|
179
|
+
) -> Dict[str, Any]:
|
180
|
+
"""
|
181
|
+
获取对象坐标 - Provider可选实现
|
182
|
+
"""
|
183
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support get_object_coordinates task")
|
184
|
+
|
185
|
+
async def extract_table_data(
|
186
|
+
self,
|
187
|
+
image: Union[str, BinaryIO],
|
188
|
+
table_format: str = "json",
|
189
|
+
preserve_formatting: bool = True
|
182
190
|
) -> Dict[str, Any]:
|
183
191
|
"""
|
184
|
-
|
192
|
+
表格数据结构化抽取 - Provider可选实现
|
185
193
|
|
186
194
|
Args:
|
187
|
-
|
188
|
-
|
195
|
+
image: 输入图像
|
196
|
+
table_format: 输出格式 ('json', 'csv', 'markdown', 'html')
|
197
|
+
preserve_formatting: 是否保持原始格式(合并单元格、样式等)
|
189
198
|
|
190
199
|
Returns:
|
191
|
-
Dict containing
|
192
|
-
|
193
|
-
|
194
|
-
|
200
|
+
Dict containing extracted table data in structured format:
|
201
|
+
{
|
202
|
+
"tables": [
|
203
|
+
{
|
204
|
+
"table_id": "table_1",
|
205
|
+
"headers": ["Column1", "Column2", "Column3"],
|
206
|
+
"rows": [
|
207
|
+
["cell1", "cell2", "cell3"],
|
208
|
+
["cell4", "cell5", "cell6"]
|
209
|
+
],
|
210
|
+
"metadata": {
|
211
|
+
"row_count": 2,
|
212
|
+
"column_count": 3,
|
213
|
+
"has_headers": true,
|
214
|
+
"merged_cells": [],
|
215
|
+
"table_caption": "optional_caption"
|
216
|
+
}
|
217
|
+
}
|
218
|
+
],
|
219
|
+
"raw_data": "original_table_text",
|
220
|
+
"format": "json"
|
221
|
+
}
|
195
222
|
"""
|
223
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support extract_table_data task")
|
224
|
+
|
225
|
+
async def close(self):
|
226
|
+
"""Cleanup resources - default implementation does nothing"""
|
196
227
|
pass
|
197
228
|
|
198
|
-
|
199
|
-
def get_supported_formats(self) -> List[str]:
|
229
|
+
def get_supported_tasks(self) -> List[str]:
|
200
230
|
"""
|
201
|
-
|
231
|
+
获取provider支持的任务列表
|
202
232
|
|
203
233
|
Returns:
|
204
|
-
List of supported
|
234
|
+
List of supported task names
|
205
235
|
"""
|
206
|
-
|
236
|
+
supported = []
|
237
|
+
|
238
|
+
# 检查哪些方法被实现了
|
239
|
+
if hasattr(self, 'analyze_image') and callable(getattr(self, 'analyze_image')):
|
240
|
+
try:
|
241
|
+
# 尝试调用看是否抛出NotImplementedError
|
242
|
+
import inspect
|
243
|
+
if not 'NotImplementedError' in inspect.getsource(self.analyze_image):
|
244
|
+
supported.append('analyze')
|
245
|
+
except:
|
246
|
+
pass
|
247
|
+
|
248
|
+
# 检查各类任务支持情况
|
249
|
+
method_task_map = {
|
250
|
+
# 图像理解类
|
251
|
+
'describe_image': 'describe',
|
252
|
+
'classify_image': 'classify',
|
253
|
+
'compare_images': 'compare',
|
254
|
+
# 检测抽取类
|
255
|
+
'extract_text': 'extract_text',
|
256
|
+
'detect_objects': 'detect_objects',
|
257
|
+
'detect_ui_elements': 'detect_ui_elements',
|
258
|
+
'detect_document_elements': 'detect_document_elements',
|
259
|
+
'extract_table_data': 'extract_table_data',
|
260
|
+
'get_object_coordinates': 'get_coordinates'
|
261
|
+
}
|
262
|
+
|
263
|
+
for method_name, task_name in method_task_map.items():
|
264
|
+
if hasattr(self, method_name):
|
265
|
+
# 检查是否是默认实现(基于analyze_image)还是provider自己的实现
|
266
|
+
supported.append(task_name)
|
267
|
+
|
268
|
+
return supported
|
269
|
+
|
270
|
+
# ==================== COMMON TASK IMPLEMENTATIONS ====================
|
271
|
+
# 为每个provider提供可选的默认实现,provider可以覆盖这些方法
|
272
|
+
|
273
|
+
async def analyze_images(
|
274
|
+
self,
|
275
|
+
images: List[Union[str, BinaryIO]],
|
276
|
+
prompt: Optional[str] = None,
|
277
|
+
max_tokens: int = 1000
|
278
|
+
) -> List[Dict[str, Any]]:
|
279
|
+
"""
|
280
|
+
批量图像分析 - Provider可选实现
|
281
|
+
默认实现:如果provider支持analyze_image,则逐个调用
|
282
|
+
"""
|
283
|
+
if hasattr(self, 'analyze_image'):
|
284
|
+
results = []
|
285
|
+
for image in images:
|
286
|
+
try:
|
287
|
+
result = await self.analyze_image(image, prompt, max_tokens)
|
288
|
+
results.append(result)
|
289
|
+
except NotImplementedError:
|
290
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_images task")
|
291
|
+
return results
|
292
|
+
else:
|
293
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_images task")
|
294
|
+
|
295
|
+
|
296
|
+
def get_supported_formats(self) -> List[str]:
|
297
|
+
"""
|
298
|
+
获取支持的图像格式 - Provider应该实现
|
299
|
+
"""
|
300
|
+
return ['jpg', 'jpeg', 'png', 'gif', 'webp'] # 通用格式
|
207
301
|
|
208
|
-
@abstractmethod
|
209
302
|
def get_max_image_size(self) -> Dict[str, int]:
|
210
303
|
"""
|
211
|
-
|
212
|
-
|
213
|
-
Returns:
|
214
|
-
Dict with 'width' and 'height' keys for maximum dimensions
|
304
|
+
获取最大图像尺寸 - Provider应该实现
|
215
305
|
"""
|
216
|
-
|
306
|
+
return {"width": 2048, "height": 2048, "file_size_mb": 10} # 通用限制
|
217
307
|
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
308
|
+
# ==================== UTILITY METHODS ====================
|
309
|
+
|
310
|
+
def _parse_coordinates_from_text(self, text: str) -> List[Dict[str, Any]]:
|
311
|
+
"""
|
312
|
+
从文本响应中解析对象坐标 - 使用统一的解析工具
|
313
|
+
"""
|
314
|
+
from isa_model.inference.services.vision.helpers.image_utils import parse_coordinates_from_text
|
315
|
+
return parse_coordinates_from_text(text)
|
316
|
+
|
317
|
+
def _parse_center_coordinates_from_text(self, text: str) -> tuple[bool, Optional[List[int]], str]:
|
318
|
+
"""
|
319
|
+
从结构化文本响应中解析中心坐标 - 使用统一的解析工具
|
320
|
+
"""
|
321
|
+
from isa_model.inference.services.vision.helpers.image_utils import parse_center_coordinates_from_text
|
322
|
+
return parse_center_coordinates_from_text(text)
|
isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py}
RENAMED
@@ -23,6 +23,7 @@ class ISAVisionService(BaseVisionService):
|
|
23
23
|
super().__init__(provider, model_name)
|
24
24
|
self.ui_app = None
|
25
25
|
self.doc_app = None
|
26
|
+
self.table_app = None
|
26
27
|
self._initialize_modal_connections()
|
27
28
|
|
28
29
|
def _initialize_modal_connections(self):
|
@@ -42,6 +43,14 @@ class ISAVisionService(BaseVisionService):
|
|
42
43
|
except Exception as e:
|
43
44
|
logger.warning(f"� Document service not available: {e}")
|
44
45
|
self.doc_app = None
|
46
|
+
|
47
|
+
try:
|
48
|
+
# Connect to table extraction service
|
49
|
+
self.table_app = modal.App.lookup("qwen-vision-table", create_if_missing=False)
|
50
|
+
logger.info("✅ Connected to table extraction service")
|
51
|
+
except Exception as e:
|
52
|
+
logger.warning(f"⚠️ Table extraction service not available: {e}")
|
53
|
+
self.table_app = None
|
45
54
|
|
46
55
|
async def invoke(
|
47
56
|
self,
|
@@ -59,6 +68,8 @@ class ISAVisionService(BaseVisionService):
|
|
59
68
|
return await self.extract_text(image)
|
60
69
|
elif task == "analyze_document":
|
61
70
|
return await self._analyze_document(image)
|
71
|
+
elif task == "extract_table" or task == "table_extraction":
|
72
|
+
return await self.extract_table_data(image, **kwargs)
|
62
73
|
else:
|
63
74
|
return await self.analyze_image(image, prompt, **kwargs)
|
64
75
|
|
@@ -399,4 +410,91 @@ class ISAVisionService(BaseVisionService):
|
|
399
410
|
'success': False,
|
400
411
|
'error': str(e),
|
401
412
|
'service': 'isa-vision-doc'
|
413
|
+
}
|
414
|
+
|
415
|
+
async def extract_table_data(
|
416
|
+
self,
|
417
|
+
image: Union[str, BinaryIO],
|
418
|
+
extraction_format: str = "markdown",
|
419
|
+
custom_prompt: Optional[str] = None
|
420
|
+
) -> Dict[str, Any]:
|
421
|
+
"""Extract table data using Qwen2.5-VL table extraction service"""
|
422
|
+
|
423
|
+
if not self.table_app:
|
424
|
+
return {
|
425
|
+
'success': False,
|
426
|
+
'error': 'Table extraction service not available',
|
427
|
+
'service': 'isa-vision-table'
|
428
|
+
}
|
429
|
+
|
430
|
+
try:
|
431
|
+
# Convert image to base64
|
432
|
+
image_b64 = self._encode_image(image)
|
433
|
+
|
434
|
+
# Call Modal table extraction service
|
435
|
+
table_extractor = modal.Cls.from_name("qwen-vision-table", "QwenTableExtractionService")
|
436
|
+
result = table_extractor().extract_table_data.remote(
|
437
|
+
image_b64=image_b64,
|
438
|
+
extraction_format=extraction_format,
|
439
|
+
custom_prompt=custom_prompt
|
440
|
+
)
|
441
|
+
|
442
|
+
if result.get('success'):
|
443
|
+
return {
|
444
|
+
'success': True,
|
445
|
+
'service': 'isa-vision-table',
|
446
|
+
'extracted_data': result.get('extracted_data'),
|
447
|
+
'raw_output': result.get('raw_output'),
|
448
|
+
'format': result.get('format'),
|
449
|
+
'processing_time': result.get('processing_time'),
|
450
|
+
'model_info': result.get('model_info')
|
451
|
+
}
|
452
|
+
else:
|
453
|
+
return {
|
454
|
+
'success': False,
|
455
|
+
'error': result.get('error', 'Table extraction failed'),
|
456
|
+
'service': 'isa-vision-table'
|
457
|
+
}
|
458
|
+
|
459
|
+
except Exception as e:
|
460
|
+
logger.error(f"Table extraction failed: {e}")
|
461
|
+
return {
|
462
|
+
'success': False,
|
463
|
+
'error': str(e),
|
464
|
+
'service': 'isa-vision-table'
|
465
|
+
}
|
466
|
+
|
467
|
+
async def batch_extract_tables(
|
468
|
+
self,
|
469
|
+
images: List[Union[str, BinaryIO]],
|
470
|
+
extraction_format: str = "markdown"
|
471
|
+
) -> Dict[str, Any]:
|
472
|
+
"""Extract tables from multiple images"""
|
473
|
+
|
474
|
+
if not self.table_app:
|
475
|
+
return {
|
476
|
+
'success': False,
|
477
|
+
'error': 'Table extraction service not available',
|
478
|
+
'service': 'isa-vision-table'
|
479
|
+
}
|
480
|
+
|
481
|
+
try:
|
482
|
+
# Convert all images to base64
|
483
|
+
images_b64 = [self._encode_image(image) for image in images]
|
484
|
+
|
485
|
+
# Call Modal batch extraction service
|
486
|
+
table_extractor = modal.Cls.from_name("qwen-vision-table", "QwenTableExtractionService")
|
487
|
+
result = table_extractor().batch_extract_tables.remote(
|
488
|
+
images_b64=images_b64,
|
489
|
+
extraction_format=extraction_format
|
490
|
+
)
|
491
|
+
|
492
|
+
return result
|
493
|
+
|
494
|
+
except Exception as e:
|
495
|
+
logger.error(f"Batch table extraction failed: {e}")
|
496
|
+
return {
|
497
|
+
'success': False,
|
498
|
+
'error': str(e),
|
499
|
+
'service': 'isa-vision-table'
|
402
500
|
}
|
@@ -22,7 +22,7 @@ import logging
|
|
22
22
|
from typing import Dict, Any, List, Union, Optional, BinaryIO
|
23
23
|
from datetime import datetime
|
24
24
|
|
25
|
-
from
|
25
|
+
from .helpers.base_stacked_service import (
|
26
26
|
BaseStackedService, LayerConfig, LayerType, LayerResult
|
27
27
|
)
|
28
28
|
|