isa-model 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +770 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/models/model_repo.py +343 -0
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/__init__.py +9 -0
  15. isa_model/deployment/cloud/modal/__init__.py +10 -0
  16. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +766 -0
  17. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  18. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +406 -0
  19. isa_model/deployment/cloud/modal/register_models.py +321 -0
  20. isa_model/deployment/runtime/deployed_service.py +338 -0
  21. isa_model/deployment/services/__init__.py +9 -0
  22. isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
  23. isa_model/deployment/services/model_service.py +332 -0
  24. isa_model/deployment/services/service_monitor.py +356 -0
  25. isa_model/deployment/services/service_registry.py +527 -0
  26. isa_model/eval/__init__.py +80 -44
  27. isa_model/eval/config/__init__.py +10 -0
  28. isa_model/eval/config/evaluation_config.py +108 -0
  29. isa_model/eval/evaluators/__init__.py +18 -0
  30. isa_model/eval/evaluators/base_evaluator.py +503 -0
  31. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  32. isa_model/eval/factory.py +417 -709
  33. isa_model/eval/infrastructure/__init__.py +24 -0
  34. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  35. isa_model/eval/metrics.py +191 -21
  36. isa_model/inference/ai_factory.py +187 -387
  37. isa_model/inference/providers/modal_provider.py +109 -0
  38. isa_model/inference/providers/yyds_provider.py +108 -0
  39. isa_model/inference/services/__init__.py +2 -1
  40. isa_model/inference/services/audio/base_stt_service.py +65 -1
  41. isa_model/inference/services/audio/base_tts_service.py +75 -1
  42. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  43. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  44. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  45. isa_model/inference/services/base_service.py +55 -55
  46. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  47. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  48. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  49. isa_model/inference/services/helpers/stacked_config.py +148 -0
  50. isa_model/inference/services/img/__init__.py +18 -0
  51. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -35
  52. isa_model/inference/services/img/flux_professional_service.py +603 -0
  53. isa_model/inference/services/img/helpers/base_stacked_service.py +274 -0
  54. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +210 -69
  55. isa_model/inference/services/llm/__init__.py +3 -3
  56. isa_model/inference/services/llm/base_llm_service.py +519 -35
  57. isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +40 -0
  58. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  59. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  60. isa_model/inference/services/llm/ollama_llm_service.py +150 -15
  61. isa_model/inference/services/llm/openai_llm_service.py +134 -31
  62. isa_model/inference/services/llm/yyds_llm_service.py +255 -0
  63. isa_model/inference/services/vision/__init__.py +38 -4
  64. isa_model/inference/services/vision/base_vision_service.py +241 -96
  65. isa_model/inference/services/vision/disabled/isA_vision_service.py +500 -0
  66. isa_model/inference/services/vision/doc_analysis_service.py +640 -0
  67. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  68. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  69. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  70. isa_model/inference/services/vision/openai_vision_service.py +109 -170
  71. isa_model/inference/services/vision/replicate_vision_service.py +508 -0
  72. isa_model/inference/services/vision/ui_analysis_service.py +823 -0
  73. isa_model/scripts/register_models.py +370 -0
  74. isa_model/scripts/register_models_with_embeddings.py +510 -0
  75. isa_model/serving/__init__.py +19 -0
  76. isa_model/serving/api/__init__.py +10 -0
  77. isa_model/serving/api/fastapi_server.py +89 -0
  78. isa_model/serving/api/middleware/__init__.py +9 -0
  79. isa_model/serving/api/middleware/request_logger.py +88 -0
  80. isa_model/serving/api/routes/__init__.py +5 -0
  81. isa_model/serving/api/routes/health.py +82 -0
  82. isa_model/serving/api/routes/llm.py +19 -0
  83. isa_model/serving/api/routes/ui_analysis.py +223 -0
  84. isa_model/serving/api/routes/unified.py +202 -0
  85. isa_model/serving/api/routes/vision.py +19 -0
  86. isa_model/serving/api/schemas/__init__.py +17 -0
  87. isa_model/serving/api/schemas/common.py +33 -0
  88. isa_model/serving/api/schemas/ui_analysis.py +78 -0
  89. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
  90. isa_model-0.3.6.dist-info/RECORD +147 -0
  91. isa_model/core/model_manager.py +0 -208
  92. isa_model/core/model_registry.py +0 -342
  93. isa_model/inference/billing_tracker.py +0 -406
  94. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  95. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  96. isa_model-0.3.4.dist-info/RECORD +0 -91
  97. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  98. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  99. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
  100. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -1,177 +1,322 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import Dict, Any, List, Union, Optional, BinaryIO
3
+ import logging
4
+
3
5
  from isa_model.inference.services.base_service import BaseService
6
+ from isa_model.inference.services.vision.helpers.image_utils import (
7
+ get_image_data, prepare_image_base64, prepare_image_data_url,
8
+ get_image_mime_type, get_image_dimensions, validate_image_format
9
+ )
10
+
11
+ logger = logging.getLogger(__name__)
4
12
 
5
13
  class BaseVisionService(BaseService):
6
- """Base class for vision understanding services"""
14
+ """Base class for vision understanding services with common task implementations"""
7
15
 
8
- @abstractmethod
9
- async def analyze_image(
16
+ async def invoke(
10
17
  self,
11
18
  image: Union[str, BinaryIO],
12
19
  prompt: Optional[str] = None,
13
- max_tokens: int = 1000
20
+ task: Optional[str] = None,
21
+ **kwargs
14
22
  ) -> Dict[str, Any]:
15
23
  """
16
- Analyze image and provide description or answer questions
24
+ 统一的任务分发方法 - Base类提供通用实现
17
25
 
18
26
  Args:
19
27
  image: Path to image file or image data
20
28
  prompt: Optional text prompt/question about the image
21
- max_tokens: Maximum tokens in response
29
+ task: Task type - 支持两大类:图像理解 + 检测抽取
30
+ **kwargs: Additional task-specific parameters
22
31
 
23
32
  Returns:
24
- Dict containing analysis results with keys:
25
- - text: Description or answer about the image
26
- - confidence: Confidence score (if available)
27
- - detected_objects: List of detected objects (if available)
28
- - metadata: Additional metadata about the analysis
33
+ Dict containing task results
29
34
  """
30
- pass
35
+ task = task or "analyze"
36
+
37
+ # ==================== 图像理解类任务 ====================
38
+ if task == "analyze":
39
+ return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
40
+ elif task == "describe":
41
+ return await self.describe_image(image, kwargs.get("detail_level", "medium"))
42
+ elif task == "classify":
43
+ return await self.classify_image(image, kwargs.get("categories"))
44
+ elif task == "compare":
45
+ return await self.compare_images(image, kwargs.get("image2"))
46
+
47
+ # ==================== 检测抽取类任务 ====================
48
+ elif task == "extract_text":
49
+ return await self.extract_text(image)
50
+ elif task == "detect_objects":
51
+ return await self.detect_objects(image, kwargs.get("confidence_threshold", 0.5))
52
+ elif task == "detect_ui_elements":
53
+ return await self.detect_ui_elements(image, kwargs.get("element_types"), kwargs.get("confidence_threshold", 0.5))
54
+ elif task == "detect_document_elements":
55
+ return await self.detect_document_elements(image, kwargs.get("element_types"), kwargs.get("confidence_threshold", 0.5))
56
+ elif task == "extract_table_data":
57
+ return await self.extract_table_data(image, kwargs.get("table_format", "json"), kwargs.get("preserve_formatting", True))
58
+ elif task == "get_coordinates":
59
+ return await self.get_object_coordinates(image, kwargs.get("object_name", ""))
60
+
61
+ else:
62
+ raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
31
63
 
32
- @abstractmethod
33
- async def analyze_images(
64
+ async def analyze_image(
34
65
  self,
35
- images: List[Union[str, BinaryIO]],
66
+ image: Union[str, BinaryIO],
36
67
  prompt: Optional[str] = None,
37
68
  max_tokens: int = 1000
38
- ) -> List[Dict[str, Any]]:
69
+ ) -> Dict[str, Any]:
39
70
  """
40
- Analyze multiple images
71
+ 通用图像分析 - Provider可选实现
41
72
 
42
73
  Args:
43
- images: List of image paths or image data
44
- prompt: Optional text prompt/question about the images
74
+ image: Path to image file or image data
75
+ prompt: Optional text prompt/question about the image
45
76
  max_tokens: Maximum tokens in response
46
77
 
47
78
  Returns:
48
- List of analysis result dictionaries
79
+ Dict containing analysis results with keys:
80
+ - text: Description or answer about the image
81
+ - confidence: Confidence score (if available)
82
+ - detected_objects: List of detected objects (if available)
83
+ - metadata: Additional metadata about the analysis
49
84
  """
50
- pass
85
+ raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_image task")
86
+
87
+ # ==================== 图像理解类方法 ====================
51
88
 
52
- @abstractmethod
53
89
  async def describe_image(
54
90
  self,
55
91
  image: Union[str, BinaryIO],
56
92
  detail_level: str = "medium"
57
93
  ) -> Dict[str, Any]:
58
94
  """
59
- Generate detailed description of image
60
-
61
- Args:
62
- image: Path to image file or image data
63
- detail_level: Level of detail ("low", "medium", "high")
64
-
65
- Returns:
66
- Dict containing description results with keys:
67
- - description: Detailed text description
68
- - objects: List of detected objects
69
- - scene: Scene description
70
- - colors: Dominant colors
95
+ 图像描述 - Provider可选实现
71
96
  """
72
- pass
97
+ raise NotImplementedError(f"{self.__class__.__name__} does not support describe_image task")
98
+
99
+ async def classify_image(
100
+ self,
101
+ image: Union[str, BinaryIO],
102
+ categories: Optional[List[str]] = None
103
+ ) -> Dict[str, Any]:
104
+ """
105
+ 图像分类 - Provider可选实现
106
+ """
107
+ raise NotImplementedError(f"{self.__class__.__name__} does not support classify_image task")
108
+
109
+ async def compare_images(
110
+ self,
111
+ image1: Union[str, BinaryIO],
112
+ image2: Union[str, BinaryIO]
113
+ ) -> Dict[str, Any]:
114
+ """
115
+ 图像比较 - Provider可选实现
116
+ """
117
+ raise NotImplementedError(f"{self.__class__.__name__} does not support compare_images task")
118
+
119
+ # ==================== 检测抽取类方法 ====================
73
120
 
74
- @abstractmethod
75
121
  async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
76
122
  """
77
- Extract text from image (OCR)
78
-
79
- Args:
80
- image: Path to image file or image data
81
-
82
- Returns:
83
- Dict containing OCR results with keys:
84
- - text: Extracted text
85
- - confidence: Overall confidence score
86
- - bounding_boxes: Text regions with coordinates (if available)
87
- - language: Detected language (if available)
123
+ 文本提取(OCR) - Provider可选实现
88
124
  """
89
- pass
125
+ raise NotImplementedError(f"{self.__class__.__name__} does not support extract_text task")
90
126
 
91
- @abstractmethod
92
127
  async def detect_objects(
93
128
  self,
94
129
  image: Union[str, BinaryIO],
95
130
  confidence_threshold: float = 0.5
96
131
  ) -> Dict[str, Any]:
97
132
  """
98
- Detect objects in image
133
+ 通用物体检测 - Provider可选实现
134
+ """
135
+ raise NotImplementedError(f"{self.__class__.__name__} does not support detect_objects task")
136
+
137
+ async def detect_ui_elements(
138
+ self,
139
+ image: Union[str, BinaryIO],
140
+ element_types: Optional[List[str]] = None,
141
+ confidence_threshold: float = 0.5
142
+ ) -> Dict[str, Any]:
143
+ """
144
+ UI界面元素检测 - Provider可选实现
99
145
 
100
146
  Args:
101
- image: Path to image file or image data
102
- confidence_threshold: Minimum confidence for detections
147
+ image: 输入图像
148
+ element_types: 要检测的元素类型 ['button', 'input', 'text', 'image', 'link', etc.]
149
+ confidence_threshold: 置信度阈值
103
150
 
104
151
  Returns:
105
- Dict containing detection results with keys:
106
- - objects: List of detected objects with labels and confidence
107
- - count: Number of objects detected
108
- - bounding_boxes: Object locations (if available)
152
+ Dict containing detected UI elements with their bounding boxes and types
109
153
  """
110
- pass
154
+ raise NotImplementedError(f"{self.__class__.__name__} does not support detect_ui_elements task")
111
155
 
112
- @abstractmethod
113
- async def classify_image(
114
- self,
156
+ async def detect_document_elements(
157
+ self,
115
158
  image: Union[str, BinaryIO],
116
- categories: Optional[List[str]] = None
159
+ element_types: Optional[List[str]] = None,
160
+ confidence_threshold: float = 0.5
117
161
  ) -> Dict[str, Any]:
118
162
  """
119
- Classify image into categories
163
+ 文档结构元素检测 - Provider可选实现
120
164
 
121
165
  Args:
122
- image: Path to image file or image data
123
- categories: Optional list of specific categories to consider
166
+ image: 输入图像
167
+ element_types: 要检测的元素类型 ['table', 'header', 'paragraph', 'list', etc.]
168
+ confidence_threshold: 置信度阈值
124
169
 
125
170
  Returns:
126
- Dict containing classification results with keys:
127
- - category: Top predicted category
128
- - confidence: Confidence score
129
- - all_predictions: List of all predictions with scores
171
+ Dict containing detected document elements with their structure and content
130
172
  """
131
- pass
173
+ raise NotImplementedError(f"{self.__class__.__name__} does not support detect_document_elements task")
132
174
 
133
- @abstractmethod
134
- async def compare_images(
135
- self,
136
- image1: Union[str, BinaryIO],
137
- image2: Union[str, BinaryIO]
175
+ async def get_object_coordinates(
176
+ self,
177
+ image: Union[str, BinaryIO],
178
+ object_name: str
179
+ ) -> Dict[str, Any]:
180
+ """
181
+ 获取对象坐标 - Provider可选实现
182
+ """
183
+ raise NotImplementedError(f"{self.__class__.__name__} does not support get_object_coordinates task")
184
+
185
+ async def extract_table_data(
186
+ self,
187
+ image: Union[str, BinaryIO],
188
+ table_format: str = "json",
189
+ preserve_formatting: bool = True
138
190
  ) -> Dict[str, Any]:
139
191
  """
140
- Compare two images for similarity
192
+ 表格数据结构化抽取 - Provider可选实现
141
193
 
142
194
  Args:
143
- image1: First image path or data
144
- image2: Second image path or data
195
+ image: 输入图像
196
+ table_format: 输出格式 ('json', 'csv', 'markdown', 'html')
197
+ preserve_formatting: 是否保持原始格式(合并单元格、样式等)
145
198
 
146
199
  Returns:
147
- Dict containing comparison results with keys:
148
- - similarity_score: Numerical similarity score
149
- - differences: Description of key differences
150
- - common_elements: Description of common elements
200
+ Dict containing extracted table data in structured format:
201
+ {
202
+ "tables": [
203
+ {
204
+ "table_id": "table_1",
205
+ "headers": ["Column1", "Column2", "Column3"],
206
+ "rows": [
207
+ ["cell1", "cell2", "cell3"],
208
+ ["cell4", "cell5", "cell6"]
209
+ ],
210
+ "metadata": {
211
+ "row_count": 2,
212
+ "column_count": 3,
213
+ "has_headers": true,
214
+ "merged_cells": [],
215
+ "table_caption": "optional_caption"
216
+ }
217
+ }
218
+ ],
219
+ "raw_data": "original_table_text",
220
+ "format": "json"
221
+ }
151
222
  """
223
+ raise NotImplementedError(f"{self.__class__.__name__} does not support extract_table_data task")
224
+
225
+ async def close(self):
226
+ """Cleanup resources - default implementation does nothing"""
152
227
  pass
153
228
 
154
- @abstractmethod
155
- def get_supported_formats(self) -> List[str]:
229
+ def get_supported_tasks(self) -> List[str]:
156
230
  """
157
- Get list of supported image formats
231
+ 获取provider支持的任务列表
158
232
 
159
233
  Returns:
160
- List of supported file extensions (e.g., ['jpg', 'png', 'gif'])
234
+ List of supported task names
161
235
  """
162
- pass
236
+ supported = []
237
+
238
+ # 检查哪些方法被实现了
239
+ if hasattr(self, 'analyze_image') and callable(getattr(self, 'analyze_image')):
240
+ try:
241
+ # 尝试调用看是否抛出NotImplementedError
242
+ import inspect
243
+ if not 'NotImplementedError' in inspect.getsource(self.analyze_image):
244
+ supported.append('analyze')
245
+ except:
246
+ pass
247
+
248
+ # 检查各类任务支持情况
249
+ method_task_map = {
250
+ # 图像理解类
251
+ 'describe_image': 'describe',
252
+ 'classify_image': 'classify',
253
+ 'compare_images': 'compare',
254
+ # 检测抽取类
255
+ 'extract_text': 'extract_text',
256
+ 'detect_objects': 'detect_objects',
257
+ 'detect_ui_elements': 'detect_ui_elements',
258
+ 'detect_document_elements': 'detect_document_elements',
259
+ 'extract_table_data': 'extract_table_data',
260
+ 'get_object_coordinates': 'get_coordinates'
261
+ }
262
+
263
+ for method_name, task_name in method_task_map.items():
264
+ if hasattr(self, method_name):
265
+ # 检查是否是默认实现(基于analyze_image)还是provider自己的实现
266
+ supported.append(task_name)
267
+
268
+ return supported
269
+
270
+ # ==================== COMMON TASK IMPLEMENTATIONS ====================
271
+ # 为每个provider提供可选的默认实现,provider可以覆盖这些方法
272
+
273
+ async def analyze_images(
274
+ self,
275
+ images: List[Union[str, BinaryIO]],
276
+ prompt: Optional[str] = None,
277
+ max_tokens: int = 1000
278
+ ) -> List[Dict[str, Any]]:
279
+ """
280
+ 批量图像分析 - Provider可选实现
281
+ 默认实现:如果provider支持analyze_image,则逐个调用
282
+ """
283
+ if hasattr(self, 'analyze_image'):
284
+ results = []
285
+ for image in images:
286
+ try:
287
+ result = await self.analyze_image(image, prompt, max_tokens)
288
+ results.append(result)
289
+ except NotImplementedError:
290
+ raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_images task")
291
+ return results
292
+ else:
293
+ raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_images task")
294
+
295
+
296
+ def get_supported_formats(self) -> List[str]:
297
+ """
298
+ 获取支持的图像格式 - Provider应该实现
299
+ """
300
+ return ['jpg', 'jpeg', 'png', 'gif', 'webp'] # 通用格式
163
301
 
164
- @abstractmethod
165
302
  def get_max_image_size(self) -> Dict[str, int]:
166
303
  """
167
- Get maximum supported image dimensions
168
-
169
- Returns:
170
- Dict with 'width' and 'height' keys for maximum dimensions
304
+ 获取最大图像尺寸 - Provider应该实现
171
305
  """
172
- pass
306
+ return {"width": 2048, "height": 2048, "file_size_mb": 10} # 通用限制
173
307
 
174
- @abstractmethod
175
- async def close(self):
176
- """Cleanup resources"""
177
- pass
308
+ # ==================== UTILITY METHODS ====================
309
+
310
+ def _parse_coordinates_from_text(self, text: str) -> List[Dict[str, Any]]:
311
+ """
312
+ 从文本响应中解析对象坐标 - 使用统一的解析工具
313
+ """
314
+ from isa_model.inference.services.vision.helpers.image_utils import parse_coordinates_from_text
315
+ return parse_coordinates_from_text(text)
316
+
317
+ def _parse_center_coordinates_from_text(self, text: str) -> tuple[bool, Optional[List[int]], str]:
318
+ """
319
+ 从结构化文本响应中解析中心坐标 - 使用统一的解析工具
320
+ """
321
+ from isa_model.inference.services.vision.helpers.image_utils import parse_center_coordinates_from_text
322
+ return parse_center_coordinates_from_text(text)