isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +937 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
  15. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  16. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
  17. isa_model/deployment/cloud/modal/register_models.py +321 -0
  18. isa_model/deployment/runtime/deployed_service.py +338 -0
  19. isa_model/deployment/services/__init__.py +9 -0
  20. isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
  21. isa_model/deployment/services/model_service.py +332 -0
  22. isa_model/deployment/services/service_monitor.py +356 -0
  23. isa_model/deployment/services/service_registry.py +527 -0
  24. isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
  25. isa_model/eval/__init__.py +80 -44
  26. isa_model/eval/config/__init__.py +10 -0
  27. isa_model/eval/config/evaluation_config.py +108 -0
  28. isa_model/eval/evaluators/__init__.py +18 -0
  29. isa_model/eval/evaluators/base_evaluator.py +503 -0
  30. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  31. isa_model/eval/factory.py +417 -709
  32. isa_model/eval/infrastructure/__init__.py +24 -0
  33. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  34. isa_model/eval/metrics.py +191 -21
  35. isa_model/inference/ai_factory.py +257 -601
  36. isa_model/inference/services/audio/base_stt_service.py +65 -1
  37. isa_model/inference/services/audio/base_tts_service.py +75 -1
  38. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  39. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  40. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  41. isa_model/inference/services/base_service.py +55 -17
  42. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  43. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  44. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  45. isa_model/inference/services/helpers/stacked_config.py +148 -0
  46. isa_model/inference/services/img/__init__.py +18 -0
  47. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
  48. isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
  49. isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
  50. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
  51. isa_model/inference/services/llm/__init__.py +3 -3
  52. isa_model/inference/services/llm/base_llm_service.py +492 -40
  53. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  54. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  55. isa_model/inference/services/llm/ollama_llm_service.py +51 -17
  56. isa_model/inference/services/llm/openai_llm_service.py +70 -19
  57. isa_model/inference/services/llm/yyds_llm_service.py +24 -23
  58. isa_model/inference/services/vision/__init__.py +38 -4
  59. isa_model/inference/services/vision/base_vision_service.py +218 -117
  60. isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
  61. isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
  62. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  63. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  64. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  65. isa_model/inference/services/vision/openai_vision_service.py +104 -307
  66. isa_model/inference/services/vision/replicate_vision_service.py +140 -325
  67. isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
  68. isa_model/scripts/register_models.py +370 -0
  69. isa_model/scripts/register_models_with_embeddings.py +510 -0
  70. isa_model/serving/api/fastapi_server.py +6 -1
  71. isa_model/serving/api/routes/unified.py +274 -0
  72. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
  73. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
  74. isa_model/config/__init__.py +0 -9
  75. isa_model/config/config_manager.py +0 -213
  76. isa_model/core/model_manager.py +0 -213
  77. isa_model/core/model_registry.py +0 -375
  78. isa_model/core/vision_models_init.py +0 -116
  79. isa_model/inference/billing_tracker.py +0 -406
  80. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  81. isa_model/inference/services/stacked/__init__.py +0 -26
  82. isa_model/inference/services/stacked/config.py +0 -426
  83. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  84. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  85. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  86. /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
  87. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
  88. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
@@ -1,32 +1,31 @@
1
1
  from typing import Dict, Any, Union, List, Optional, BinaryIO
2
- import base64
3
- import aiohttp
4
2
  from openai import AsyncOpenAI
5
3
  from tenacity import retry, stop_after_attempt, wait_exponential
6
4
  from isa_model.inference.services.vision.base_vision_service import BaseVisionService
7
- from isa_model.inference.providers.base_provider import BaseProvider
8
- from isa_model.inference.billing_tracker import ServiceType
5
+ from isa_model.inference.services.vision.helpers.image_utils import prepare_image_base64
6
+ from isa_model.inference.services.vision.helpers.vision_prompts import VisionPromptMixin
7
+ from isa_model.core.types import ServiceType
9
8
  import logging
10
9
 
11
10
  logger = logging.getLogger(__name__)
12
11
 
13
- class OpenAIVisionService(BaseVisionService):
14
- """OpenAI Vision service using gpt-4.1-nano with vision capabilities"""
12
+ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
13
+ """OpenAI Vision service using centralized config management"""
15
14
 
16
- def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-4.1-nano"):
17
- super().__init__(provider, model_name)
15
+ def __init__(self, provider_name: str, model_name: str = "gpt-4o-mini", **kwargs):
16
+ super().__init__(provider_name, model_name, **kwargs)
18
17
 
19
- # Get full configuration from provider (including sensitive data)
20
- provider_config = provider.get_full_config()
18
+ # Get configuration from centralized config manager
19
+ provider_config = self.get_provider_config()
21
20
 
22
- # Initialize AsyncOpenAI client with provider configuration
21
+ # Initialize AsyncOpenAI client with centralized configuration
23
22
  try:
24
23
  if not provider_config.get("api_key"):
25
24
  raise ValueError("OpenAI API key not found in provider configuration")
26
25
 
27
26
  self._client = AsyncOpenAI(
28
27
  api_key=provider_config["api_key"],
29
- base_url=provider_config.get("base_url", "https://api.openai.com/v1"),
28
+ base_url=provider_config.get("api_base_url", "https://api.openai.com/v1"),
30
29
  organization=provider_config.get("organization")
31
30
  )
32
31
 
@@ -44,57 +43,7 @@ class OpenAIVisionService(BaseVisionService):
44
43
  """Get the underlying OpenAI client"""
45
44
  return self._client
46
45
 
47
- async def _download_image(self, image_url: str) -> bytes:
48
- """Download image from URL"""
49
- async with aiohttp.ClientSession() as session:
50
- async with session.get(image_url) as response:
51
- if response.status == 200:
52
- return await response.read()
53
- else:
54
- raise ValueError(f"Failed to download image from {image_url}: {response.status}")
55
46
 
56
- def _encode_image(self, image_path_or_data: Union[str, bytes, BinaryIO]) -> str:
57
- """Encode image to base64"""
58
- if isinstance(image_path_or_data, str):
59
- # If it's a file path
60
- with open(image_path_or_data, "rb") as image_file:
61
- return base64.b64encode(image_file.read()).decode("utf-8")
62
- elif hasattr(image_path_or_data, 'read'):
63
- # If it's a file-like object (BinaryIO)
64
- data = image_path_or_data.read() # type: ignore
65
- if isinstance(data, bytes):
66
- return base64.b64encode(data).decode("utf-8")
67
- else:
68
- raise ValueError("File-like object did not return bytes")
69
- else:
70
- # If it's bytes data
71
- return base64.b64encode(image_path_or_data).decode("utf-8") # type: ignore
72
-
73
- async def invoke(
74
- self,
75
- image: Union[str, BinaryIO],
76
- prompt: Optional[str] = None,
77
- task: Optional[str] = None,
78
- **kwargs
79
- ) -> Dict[str, Any]:
80
- """
81
- Unified invoke method for all vision operations
82
- """
83
- task = task or "analyze"
84
-
85
- if task == "analyze":
86
- return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
87
- elif task == "describe":
88
- return await self.describe_image(image, kwargs.get("detail_level", "medium"))
89
- elif task == "extract_text":
90
- return await self.extract_text(image)
91
- elif task == "detect_objects":
92
- return await self.detect_objects(image, kwargs.get("confidence_threshold", 0.5))
93
- elif task == "classify":
94
- return await self.classify_image(image, kwargs.get("categories"))
95
- else:
96
- # Default to analyze_image for unknown tasks
97
- return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
98
47
 
99
48
  @retry(
100
49
  stop=stop_after_attempt(3),
@@ -119,22 +68,8 @@ class OpenAIVisionService(BaseVisionService):
119
68
  Dict containing analysis results
120
69
  """
121
70
  try:
122
- # Handle different input types
123
- if isinstance(image, str):
124
- if image.startswith(('http://', 'https://')):
125
- # Download image from URL
126
- image_bytes = await self._download_image(image)
127
- base64_image = self._encode_image(image_bytes)
128
- else:
129
- # File path
130
- base64_image = self._encode_image(image)
131
- else:
132
- # BinaryIO or bytes data
133
- if hasattr(image, 'read'):
134
- image_data = image.read()
135
- else:
136
- image_data = image
137
- base64_image = self._encode_image(image_data)
71
+ # Use unified image processing from image_utils
72
+ base64_image = prepare_image_base64(image)
138
73
 
139
74
  # Use default prompt if none provided
140
75
  if prompt is None:
@@ -166,7 +101,7 @@ class OpenAIVisionService(BaseVisionService):
166
101
 
167
102
  # Track usage for billing
168
103
  if response.usage:
169
- self._track_usage(
104
+ await self._track_usage(
170
105
  service_type=ServiceType.VISION,
171
106
  operation="image_analysis",
172
107
  input_tokens=response.usage.prompt_tokens,
@@ -176,14 +111,36 @@ class OpenAIVisionService(BaseVisionService):
176
111
 
177
112
  content = response.choices[0].message.content or ""
178
113
 
114
+ # 尝试解析JSON响应(对于结构化任务)
115
+ try:
116
+ import json
117
+ # 检查响应是否是JSON格式
118
+ if content.strip().startswith('{') and content.strip().endswith('}'):
119
+ parsed_json = json.loads(content)
120
+ return {
121
+ "text": content,
122
+ "parsed_data": parsed_json,
123
+ "confidence": 1.0,
124
+ "metadata": {
125
+ "model": self.model_name,
126
+ "prompt": prompt[:100],
127
+ "tokens_used": response.usage.total_tokens if response.usage else 0,
128
+ "response_format": "json"
129
+ }
130
+ }
131
+ except json.JSONDecodeError:
132
+ pass
133
+
134
+ # 标准文本响应
179
135
  return {
180
136
  "text": content,
181
137
  "confidence": 1.0, # OpenAI doesn't provide confidence scores
182
- "detected_objects": [], # Would need separate object detection
138
+ "detected_objects": [], # Populated by specific detection methods
183
139
  "metadata": {
184
140
  "model": self.model_name,
185
- "prompt": prompt,
186
- "tokens_used": response.usage.total_tokens if response.usage else 0
141
+ "prompt": prompt[:100],
142
+ "tokens_used": response.usage.total_tokens if response.usage else 0,
143
+ "response_format": "text"
187
144
  }
188
145
  }
189
146
 
@@ -191,264 +148,104 @@ class OpenAIVisionService(BaseVisionService):
191
148
  logger.error(f"Error in image analysis: {e}")
192
149
  raise
193
150
 
194
- async def analyze_images(
195
- self,
196
- images: List[Union[str, BinaryIO]],
197
- prompt: Optional[str] = None,
198
- max_tokens: int = 1000
199
- ) -> List[Dict[str, Any]]:
200
- """Analyze multiple images"""
201
- results = []
202
- for image in images:
203
- result = await self.analyze_image(image, prompt, max_tokens)
204
- results.append(result)
205
- return results
151
+ # ==================== 基于提示词的智能功能实现 ====================
152
+ # OpenAI通过改变提示词就能实现大部分Vision功能
153
+ # 使用统一的VisionPromptMixin提供标准提示词
206
154
 
155
+ # 重写其他方法以使用智能提示词
207
156
  async def describe_image(
208
157
  self,
209
158
  image: Union[str, BinaryIO],
210
159
  detail_level: str = "medium"
211
160
  ) -> Dict[str, Any]:
212
- """Generate detailed description of image"""
213
- detail_prompts = {
214
- "low": "Briefly describe what you see in this image.",
215
- "medium": "Describe what you see in this image in detail, including objects, colors, and scene.",
216
- "high": "Provide a comprehensive and detailed description of this image, including all visible objects, their positions, colors, textures, lighting, composition, and any text or symbols present."
217
- }
218
-
219
- prompt = detail_prompts.get(detail_level, detail_prompts["medium"])
220
- result = await self.analyze_image(image, prompt, 1500)
221
-
222
- return {
223
- "description": result["text"],
224
- "objects": [], # Would need object detection API
225
- "scene": result["text"], # Use same description
226
- "colors": [], # Would need color analysis
227
- "detail_level": detail_level,
228
- "metadata": result["metadata"]
229
- }
161
+ """
162
+ 图像描述 - 使用专门提示词
163
+ """
164
+ prompt = self.get_task_prompt("describe", detail_level=detail_level)
165
+ return await self.analyze_image(image, prompt)
230
166
 
231
167
  async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
232
- """Extract text from image (OCR)"""
233
- prompt = "Extract all text visible in this image. Provide only the text content, maintaining the original structure and formatting as much as possible."
234
- result = await self.analyze_image(image, prompt, 1000)
168
+ """
169
+ 文本提取(OCR) - 使用专门提示词
170
+ """
171
+ prompt = self.get_task_prompt("extract_text")
235
172
 
236
- return {
237
- "text": result["text"],
238
- "confidence": 1.0,
239
- "bounding_boxes": [], # OpenAI vision doesn't provide bounding boxes
240
- "language": "unknown", # Would need language detection
241
- "metadata": result["metadata"]
242
- }
173
+ return await self.analyze_image(image, prompt)
243
174
 
244
175
  async def detect_objects(
245
176
  self,
246
177
  image: Union[str, BinaryIO],
247
178
  confidence_threshold: float = 0.5
248
179
  ) -> Dict[str, Any]:
249
- """Detect objects in image"""
250
- prompt = """List all objects visible in this image. For each object, provide:
251
- 1. Object name
252
- 2. Approximate location as percentages from top-left corner (x%, y%)
253
- 3. Approximate size as percentages of image dimensions (width%, height%)
254
- 4. Brief description
255
-
256
- Format each object as: "ObjectName: x=X%, y=Y%, width=W%, height=H% - Description"
257
-
258
- Example: "Car: x=25%, y=40%, width=15%, height=12% - Red sedan in the center"
259
- """
260
- result = await self.analyze_image(image, prompt, 1500)
261
-
262
- # Parse the response to extract object information with coordinates
263
- objects = []
264
- bounding_boxes = []
265
- lines = result["text"].split('\n')
266
-
267
- for line in lines:
268
- line = line.strip()
269
- if line and ':' in line and ('x=' in line or 'width=' in line):
270
- try:
271
- # Extract object name and details
272
- parts = line.split(':', 1)
273
- if len(parts) == 2:
274
- object_name = parts[0].strip()
275
- details = parts[1].strip()
276
-
277
- # Extract coordinates using regex-like parsing
278
- coords = {}
279
- for param in ['x', 'y', 'width', 'height']:
280
- param_pattern = f"{param}="
281
- if param_pattern in details:
282
- start_idx = details.find(param_pattern) + len(param_pattern)
283
- end_idx = details.find('%', start_idx)
284
- if end_idx > start_idx:
285
- try:
286
- value = float(details[start_idx:end_idx])
287
- coords[param] = value
288
- except ValueError:
289
- continue
290
-
291
- # Extract description (after the coordinates)
292
- desc_start = details.find(' - ')
293
- description = details[desc_start + 3:] if desc_start != -1 else details
294
-
295
- objects.append({
296
- "label": object_name,
297
- "confidence": 1.0,
298
- "coordinates": coords,
299
- "description": description
300
- })
301
-
302
- # Add bounding box if we have coordinates
303
- if all(k in coords for k in ['x', 'y', 'width', 'height']):
304
- bounding_boxes.append({
305
- "label": object_name,
306
- "x_percent": coords['x'],
307
- "y_percent": coords['y'],
308
- "width_percent": coords['width'],
309
- "height_percent": coords['height']
310
- })
311
-
312
- except Exception:
313
- # Fallback for objects that don't match expected format
314
- objects.append({
315
- "label": line,
316
- "confidence": 1.0,
317
- "coordinates": {},
318
- "description": line
319
- })
180
+ """
181
+ 物体检测 - 使用专门提示词
182
+ """
183
+ prompt = self.get_task_prompt("detect_objects", confidence_threshold=confidence_threshold)
320
184
 
321
- return {
322
- "objects": objects,
323
- "count": len(objects),
324
- "bounding_boxes": bounding_boxes,
325
- "metadata": result["metadata"]
326
- }
185
+ return await self.analyze_image(image, prompt)
327
186
 
328
- async def get_object_coordinates(
187
+ async def detect_ui_elements(
329
188
  self,
330
189
  image: Union[str, BinaryIO],
331
- object_name: str
190
+ element_types: Optional[List[str]] = None,
191
+ confidence_threshold: float = 0.5
332
192
  ) -> Dict[str, Any]:
333
- """Get coordinates of a specific object in the image"""
334
- prompt = f"""Locate the {object_name} in this image and return its center coordinates as [x, y] pixels.
335
-
336
- Respond in this exact format:
337
- FOUND: YES/NO
338
- CENTER: [x, y]
339
- DESCRIPTION: [Brief description]
340
-
341
- If found, provide the pixel coordinates of the center point.
342
- If not found, explain why.
343
-
344
- Example:
345
- FOUND: YES
346
- CENTER: [640, 360]
347
- DESCRIPTION: Blue login button in the center-left area
348
- """
349
-
350
- result = await self.analyze_image(image, prompt, 300)
351
- response_text = result["text"]
352
-
353
- # Parse the structured response
354
- found = False
355
- center_coords = None
356
- description = ""
193
+ """
194
+ UI元素检测 - 使用专门提示词
195
+ """
196
+ prompt = self.get_task_prompt("detect_ui_elements", element_types=element_types, confidence_threshold=confidence_threshold)
357
197
 
358
- lines = response_text.split('\n')
359
- for line in lines:
360
- line = line.strip()
361
- if line.startswith('FOUND:'):
362
- found = 'YES' in line.upper()
363
- elif line.startswith('CENTER:') and found:
364
- # Extract center coordinates [x, y]
365
- coords_text = line.replace('CENTER:', '').strip()
366
- try:
367
- # Remove brackets and split
368
- coords_text = coords_text.replace('[', '').replace(']', '')
369
- if ',' in coords_text:
370
- x_str, y_str = coords_text.split(',')
371
- x = int(float(x_str.strip()))
372
- y = int(float(y_str.strip()))
373
- center_coords = [x, y]
374
- except (ValueError, IndexError):
375
- pass
376
- elif line.startswith('DESCRIPTION:'):
377
- description = line.replace('DESCRIPTION:', '').strip()
198
+ return await self.analyze_image(image, prompt)
199
+
200
+ async def detect_document_elements(
201
+ self,
202
+ image: Union[str, BinaryIO],
203
+ element_types: Optional[List[str]] = None,
204
+ confidence_threshold: float = 0.5
205
+ ) -> Dict[str, Any]:
206
+ """
207
+ 文档元素检测 - 使用专门提示词
208
+ """
209
+ prompt = self.get_task_prompt("detect_document_elements", element_types=element_types, confidence_threshold=confidence_threshold)
378
210
 
379
- return {
380
- "found": found,
381
- "center_coordinates": center_coords,
382
- "confidence": 1.0 if found else 0.0,
383
- "description": description,
384
- "metadata": result["metadata"]
385
- }
211
+ return await self.analyze_image(image, prompt)
386
212
 
387
213
  async def classify_image(
388
214
  self,
389
215
  image: Union[str, BinaryIO],
390
216
  categories: Optional[List[str]] = None
391
217
  ) -> Dict[str, Any]:
392
- """Classify image into categories"""
393
- if categories:
394
- category_list = ", ".join(categories)
395
- prompt = f"Classify this image into one of these categories: {category_list}. Respond with only the most appropriate category name."
396
- else:
397
- prompt = "What category best describes this image? Provide a single category name."
398
-
399
- result = await self.analyze_image(image, prompt, 100)
400
- category = result["text"].strip()
218
+ """
219
+ 图像分类 - 使用专门提示词
220
+ """
221
+ prompt = self.get_task_prompt("classify", categories=categories)
401
222
 
402
- return {
403
- "category": category,
404
- "confidence": 1.0,
405
- "all_predictions": [{"category": category, "confidence": 1.0}],
406
- "metadata": result["metadata"]
407
- }
223
+ return await self.analyze_image(image, prompt)
408
224
 
409
- async def compare_images(
410
- self,
411
- image1: Union[str, BinaryIO],
412
- image2: Union[str, BinaryIO]
225
+ async def get_object_coordinates(
226
+ self,
227
+ image: Union[str, BinaryIO],
228
+ object_name: str
413
229
  ) -> Dict[str, Any]:
414
- """Compare two images for similarity"""
415
- # For now, analyze both images separately and compare descriptions
416
- result1 = await self.analyze_image(image1, "Describe this image in detail.")
417
- result2 = await self.analyze_image(image2, "Describe this image in detail.")
418
-
419
- # Use LLM to compare the descriptions
420
- comparison_prompt = f"Compare these two image descriptions and provide a similarity analysis:\n\nImage 1: {result1['text']}\n\nImage 2: {result2['text']}\n\nProvide: 1) A similarity score from 0.0 to 1.0, 2) Key differences, 3) Common elements."
421
-
422
- comparison_result = await self._client.chat.completions.create(
423
- model=self.model_name,
424
- messages=[{"role": "user", "content": comparison_prompt}],
425
- max_tokens=500,
426
- temperature=0.3
427
- )
428
-
429
- comparison_text = comparison_result.choices[0].message.content or ""
230
+ """
231
+ 获取对象坐标 - 使用专门提示词
232
+ """
233
+ prompt = self.get_task_prompt("get_coordinates", object_name=object_name)
430
234
 
431
- return {
432
- "similarity_score": 0.5, # Would need better parsing to extract actual score
433
- "differences": comparison_text,
434
- "common_elements": comparison_text,
435
- "metadata": {
436
- "model": self.model_name,
437
- "comparison_method": "description_based"
438
- }
439
- }
440
-
441
- def get_supported_formats(self) -> List[str]:
442
- """Get list of supported image formats"""
443
- return ['jpg', 'jpeg', 'png', 'gif', 'webp']
235
+ return await self.analyze_image(image, prompt)
444
236
 
445
- def get_max_image_size(self) -> Dict[str, int]:
446
- """Get maximum supported image dimensions"""
447
- return {
448
- "width": 2048,
449
- "height": 2048,
450
- "file_size_mb": 20
451
- }
237
+ async def extract_table_data(
238
+ self,
239
+ image: Union[str, BinaryIO],
240
+ table_format: str = "json",
241
+ preserve_formatting: bool = True
242
+ ) -> Dict[str, Any]:
243
+ """
244
+ 表格数据结构化抽取 - 使用专门的表格抽取提示词
245
+ """
246
+ prompt = self.get_task_prompt("extract_table_data", table_format=table_format, preserve_formatting=preserve_formatting)
247
+
248
+ return await self.analyze_image(image, prompt)
452
249
 
453
250
  async def close(self):
454
251
  """Clean up resources"""