isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -63,16 +63,19 @@ class YydsLLMService(BaseLLMService):
63
63
 
64
64
  return bound_service
65
65
 
66
- async def astream(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[str, None]:
66
+ async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> AsyncGenerator[str, None]:
67
67
  """
68
68
  True streaming method - yields tokens one by one as they arrive
69
69
 
70
70
  Args:
71
71
  input_data: Same as ainvoke
72
+ **kwargs: Additional parameters (will filter out unsupported ones)
72
73
 
73
74
  Yields:
74
75
  Individual tokens as they arrive from the API
75
76
  """
77
+ # Remove parameters that yyds doesn't support
78
+ kwargs.pop('show_reasoning', None) # OpenAI-specific parameter
76
79
  try:
77
80
  # Use adapter manager to prepare messages
78
81
  messages = self._prepare_messages(input_data)
@@ -115,8 +118,11 @@ class YydsLLMService(BaseLLMService):
115
118
  logger.error(f"Error in astream: {e}")
116
119
  raise
117
120
 
118
- async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
121
+ async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> Union[str, Any]:
119
122
  """Unified invoke method for all input types"""
123
+ # Remove parameters that yyds doesn't support
124
+ kwargs.pop('show_reasoning', None) # OpenAI-specific parameter
125
+ kwargs.pop('task', None) # Handled internally
120
126
  try:
121
127
  # Use adapter manager to prepare messages
122
128
  messages = self._prepare_messages(input_data)
@@ -252,7 +258,8 @@ class YydsLLMService(BaseLLMService):
252
258
  async def chat(
253
259
  self,
254
260
  input_data: Union[str, List[Dict[str, str]], Any],
255
- max_tokens: Optional[int] = None
261
+ max_tokens: Optional[int] = None,
262
+ show_reasoning: bool = False
256
263
  ) -> Dict[str, Any]:
257
264
  """
258
265
  Chat method that wraps ainvoke for compatibility with base class
@@ -11,9 +11,9 @@ from .base_vision_service import BaseVisionService
11
11
  from .openai_vision_service import OpenAIVisionService
12
12
  from .replicate_vision_service import ReplicateVisionService
13
13
 
14
- # Stacked Vision Services
15
- from .doc_analysis_service import DocAnalysisStackedService
16
- from .ui_analysis_service import UIAnalysisService
14
+ # Stacked Vision Services (disabled - files don't exist)
15
+ # from .doc_analysis_service import DocAnalysisStackedService
16
+ # from .ui_analysis_service import UIAnalysisService
17
17
 
18
18
  # ISA Vision service
19
19
  try:
@@ -31,16 +31,37 @@ except ImportError:
31
31
  OllamaVisionService = None
32
32
  OLLAMA_VISION_AVAILABLE = False
33
33
 
34
+ # Computer Vision specialized services
35
+ try:
36
+ from .vgg16_vision_service import VGG16VisionService
37
+ VGG16_VISION_AVAILABLE = True
38
+ except ImportError:
39
+ VGG16VisionService = None
40
+ VGG16_VISION_AVAILABLE = False
41
+
42
+ try:
43
+ from .blip_vision_service import BLIPVisionService
44
+ BLIP_VISION_AVAILABLE = True
45
+ except ImportError:
46
+ BLIPVisionService = None
47
+ BLIP_VISION_AVAILABLE = False
48
+
34
49
  __all__ = [
35
50
  "BaseVisionService",
36
51
  "OpenAIVisionService",
37
52
  "ReplicateVisionService",
38
- "DocAnalysisStackedService",
39
- "UIAnalysisService"
53
+ # "DocAnalysisStackedService", # Disabled - file doesn't exist
54
+ # "UIAnalysisService" # Disabled - file doesn't exist
40
55
  ]
41
56
 
42
57
  if ISA_VISION_AVAILABLE:
43
58
  __all__.append("ISAVisionService")
44
59
 
45
60
  if OLLAMA_VISION_AVAILABLE:
46
- __all__.append("OllamaVisionService")
61
+ __all__.append("OllamaVisionService")
62
+
63
+ if VGG16_VISION_AVAILABLE:
64
+ __all__.append("VGG16VisionService")
65
+
66
+ if BLIP_VISION_AVAILABLE:
67
+ __all__.append("BLIPVisionService")
@@ -21,43 +21,61 @@ class BaseVisionService(BaseService):
21
21
  **kwargs
22
22
  ) -> Dict[str, Any]:
23
23
  """
24
- 统一的任务分发方法 - Base类提供通用实现
24
+ 统一的任务分发方法 - 基于6个核心任务的设计
25
25
 
26
26
  Args:
27
27
  image: Path to image file or image data
28
28
  prompt: Optional text prompt/question about the image
29
- task: Task type - 支持两大类:图像理解 + 检测抽取
30
- **kwargs: Additional task-specific parameters
29
+ task: Core task type (analyze, describe, extract, detect, classify, compare)
30
+ **kwargs: Additional task-specific parameters including:
31
+ - target: Sub-task specification (e.g., "text"/"table" for extract, "objects"/"ui" for detect)
32
+ - max_tokens: Maximum tokens for text generation
33
+ - confidence_threshold: Confidence threshold for detection
34
+ - categories: Categories for classification
35
+ - image2: Second image for comparison
31
36
 
32
37
  Returns:
33
38
  Dict containing task results
34
39
  """
35
40
  task = task or "analyze"
36
41
 
37
- # ==================== 图像理解类任务 ====================
38
- if task == "analyze":
42
+ # Core task dispatch with parameterized sub-tasks
43
+ if task == "analyze" and hasattr(self, 'analyze_image'):
39
44
  return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
40
- elif task == "describe":
45
+
46
+ elif task == "describe" and hasattr(self, 'describe_image'):
41
47
  return await self.describe_image(image, kwargs.get("detail_level", "medium"))
42
- elif task == "classify":
48
+
49
+ elif task == "extract":
50
+ # Extract with target specification
51
+ target = kwargs.get("target", "text")
52
+ if target == "table" and hasattr(self, 'extract_table_data'):
53
+ return await self.extract_table_data(image, kwargs.get("table_format", "json"))
54
+ elif hasattr(self, 'extract_text'):
55
+ return await self.extract_text(image)
56
+ else:
57
+ raise NotImplementedError(f"{self.__class__.__name__} does not support extract task")
58
+
59
+ elif task == "detect":
60
+ # Detect with target specification
61
+ target = kwargs.get("target", "objects")
62
+ if target == "ui" and hasattr(self, 'detect_ui_elements'):
63
+ return await self.detect_ui_elements(image,
64
+ kwargs.get("element_types"),
65
+ kwargs.get("confidence_threshold", 0.5))
66
+ elif target == "coordinates" and hasattr(self, 'get_object_coordinates'):
67
+ return await self.get_object_coordinates(image, kwargs.get("object_name", ""))
68
+ elif hasattr(self, 'detect_objects'):
69
+ return await self.detect_objects(image, kwargs.get("confidence_threshold", 0.5))
70
+ else:
71
+ raise NotImplementedError(f"{self.__class__.__name__} does not support detect task")
72
+
73
+ elif task == "classify" and hasattr(self, 'classify_image'):
43
74
  return await self.classify_image(image, kwargs.get("categories"))
44
- elif task == "compare":
75
+
76
+ elif task == "compare" and hasattr(self, 'compare_images'):
45
77
  return await self.compare_images(image, kwargs.get("image2"))
46
-
47
- # ==================== 检测抽取类任务 ====================
48
- elif task == "extract_text":
49
- return await self.extract_text(image)
50
- elif task == "detect_objects":
51
- return await self.detect_objects(image, kwargs.get("confidence_threshold", 0.5))
52
- elif task == "detect_ui_elements":
53
- return await self.detect_ui_elements(image, kwargs.get("element_types"), kwargs.get("confidence_threshold", 0.5))
54
- elif task == "detect_document_elements":
55
- return await self.detect_document_elements(image, kwargs.get("element_types"), kwargs.get("confidence_threshold", 0.5))
56
- elif task == "extract_table_data":
57
- return await self.extract_table_data(image, kwargs.get("table_format", "json"), kwargs.get("preserve_formatting", True))
58
- elif task == "get_coordinates":
59
- return await self.get_object_coordinates(image, kwargs.get("object_name", ""))
60
-
78
+
61
79
  else:
62
80
  raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
63
81
 
@@ -83,40 +101,84 @@ class BaseVisionService(BaseService):
83
101
  - metadata: Additional metadata about the analysis
84
102
  """
85
103
  raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_image task")
104
+
86
105
 
87
- # ==================== 图像理解类方法 ====================
88
106
 
89
- async def describe_image(
90
- self,
91
- image: Union[str, BinaryIO],
92
- detail_level: str = "medium"
93
- ) -> Dict[str, Any]:
107
+
108
+ async def close(self):
109
+ """Cleanup resources - default implementation does nothing"""
110
+ pass
111
+
112
+ def get_supported_tasks(self) -> List[str]:
94
113
  """
95
- 图像描述 - Provider可选实现
114
+ 获取provider支持的核心任务列表
115
+
116
+ Returns:
117
+ List of core task names (analyze, describe, extract, detect, classify, compare)
96
118
  """
97
- raise NotImplementedError(f"{self.__class__.__name__} does not support describe_image task")
119
+ supported = []
120
+
121
+ # Check core task support based on implemented methods
122
+ task_method_map = {
123
+ 'analyze': 'analyze_image',
124
+ 'describe': 'describe_image',
125
+ 'extract': 'extract_text', # Basic extract support
126
+ 'detect': 'detect_objects', # Basic detect support
127
+ 'classify': 'classify_image',
128
+ 'compare': 'compare_images'
129
+ }
130
+
131
+ for task_name, method_name in task_method_map.items():
132
+ if hasattr(self, method_name):
133
+ try:
134
+ # Check if method is actually implemented (not just raising NotImplementedError)
135
+ import inspect
136
+ method = getattr(self, method_name)
137
+ if callable(method):
138
+ source = inspect.getsource(method)
139
+ # If it's not just raising NotImplementedError, consider it supported
140
+ if not ('raise NotImplementedError' in source and len(source.split('\n')) < 10):
141
+ supported.append(task_name)
142
+ except:
143
+ # If we can't inspect, assume it's supported if the method exists
144
+ supported.append(task_name)
145
+
146
+ return supported
98
147
 
99
- async def classify_image(
148
+ # ==================== COMMON TASK IMPLEMENTATIONS ====================
149
+ # 为每个provider提供可选的默认实现,provider可以覆盖这些方法
150
+
151
+ async def analyze_images(
100
152
  self,
101
- image: Union[str, BinaryIO],
102
- categories: Optional[List[str]] = None
103
- ) -> Dict[str, Any]:
153
+ images: List[Union[str, BinaryIO]],
154
+ prompt: Optional[str] = None,
155
+ max_tokens: int = 1000
156
+ ) -> List[Dict[str, Any]]:
104
157
  """
105
- 图像分类 - Provider可选实现
158
+ 批量图像分析 - Provider可选实现
159
+ 默认实现:如果provider支持analyze_image,则逐个调用
106
160
  """
107
- raise NotImplementedError(f"{self.__class__.__name__} does not support classify_image task")
161
+ if hasattr(self, 'analyze_image'):
162
+ results = []
163
+ for image in images:
164
+ try:
165
+ result = await self.analyze_image(image, prompt, max_tokens)
166
+ results.append(result)
167
+ except NotImplementedError:
168
+ raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_images task")
169
+ return results
170
+ else:
171
+ raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_images task")
108
172
 
109
- async def compare_images(
173
+ async def describe_image(
110
174
  self,
111
- image1: Union[str, BinaryIO],
112
- image2: Union[str, BinaryIO]
175
+ image: Union[str, BinaryIO],
176
+ detail_level: str = "medium"
113
177
  ) -> Dict[str, Any]:
114
178
  """
115
- 图像比较 - Provider可选实现
179
+ 图像描述 - Provider可选实现
116
180
  """
117
- raise NotImplementedError(f"{self.__class__.__name__} does not support compare_images task")
118
-
119
- # ==================== 检测抽取类方法 ====================
181
+ raise NotImplementedError(f"{self.__class__.__name__} does not support describe_image task")
120
182
 
121
183
  async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
122
184
  """
@@ -130,48 +192,10 @@ class BaseVisionService(BaseService):
130
192
  confidence_threshold: float = 0.5
131
193
  ) -> Dict[str, Any]:
132
194
  """
133
- 通用物体检测 - Provider可选实现
195
+ 物体检测 - Provider可选实现
134
196
  """
135
197
  raise NotImplementedError(f"{self.__class__.__name__} does not support detect_objects task")
136
198
 
137
- async def detect_ui_elements(
138
- self,
139
- image: Union[str, BinaryIO],
140
- element_types: Optional[List[str]] = None,
141
- confidence_threshold: float = 0.5
142
- ) -> Dict[str, Any]:
143
- """
144
- UI界面元素检测 - Provider可选实现
145
-
146
- Args:
147
- image: 输入图像
148
- element_types: 要检测的元素类型 ['button', 'input', 'text', 'image', 'link', etc.]
149
- confidence_threshold: 置信度阈值
150
-
151
- Returns:
152
- Dict containing detected UI elements with their bounding boxes and types
153
- """
154
- raise NotImplementedError(f"{self.__class__.__name__} does not support detect_ui_elements task")
155
-
156
- async def detect_document_elements(
157
- self,
158
- image: Union[str, BinaryIO],
159
- element_types: Optional[List[str]] = None,
160
- confidence_threshold: float = 0.5
161
- ) -> Dict[str, Any]:
162
- """
163
- 文档结构元素检测 - Provider可选实现
164
-
165
- Args:
166
- image: 输入图像
167
- element_types: 要检测的元素类型 ['table', 'header', 'paragraph', 'list', etc.]
168
- confidence_threshold: 置信度阈值
169
-
170
- Returns:
171
- Dict containing detected document elements with their structure and content
172
- """
173
- raise NotImplementedError(f"{self.__class__.__name__} does not support detect_document_elements task")
174
-
175
199
  async def get_object_coordinates(
176
200
  self,
177
201
  image: Union[str, BinaryIO],
@@ -182,116 +206,25 @@ class BaseVisionService(BaseService):
182
206
  """
183
207
  raise NotImplementedError(f"{self.__class__.__name__} does not support get_object_coordinates task")
184
208
 
185
- async def extract_table_data(
186
- self,
209
+ async def classify_image(
210
+ self,
187
211
  image: Union[str, BinaryIO],
188
- table_format: str = "json",
189
- preserve_formatting: bool = True
212
+ categories: Optional[List[str]] = None
190
213
  ) -> Dict[str, Any]:
191
214
  """
192
- 表格数据结构化抽取 - Provider可选实现
193
-
194
- Args:
195
- image: 输入图像
196
- table_format: 输出格式 ('json', 'csv', 'markdown', 'html')
197
- preserve_formatting: 是否保持原始格式(合并单元格、样式等)
198
-
199
- Returns:
200
- Dict containing extracted table data in structured format:
201
- {
202
- "tables": [
203
- {
204
- "table_id": "table_1",
205
- "headers": ["Column1", "Column2", "Column3"],
206
- "rows": [
207
- ["cell1", "cell2", "cell3"],
208
- ["cell4", "cell5", "cell6"]
209
- ],
210
- "metadata": {
211
- "row_count": 2,
212
- "column_count": 3,
213
- "has_headers": true,
214
- "merged_cells": [],
215
- "table_caption": "optional_caption"
216
- }
217
- }
218
- ],
219
- "raw_data": "original_table_text",
220
- "format": "json"
221
- }
222
- """
223
- raise NotImplementedError(f"{self.__class__.__name__} does not support extract_table_data task")
224
-
225
- async def close(self):
226
- """Cleanup resources - default implementation does nothing"""
227
- pass
228
-
229
- def get_supported_tasks(self) -> List[str]:
230
- """
231
- 获取provider支持的任务列表
232
-
233
- Returns:
234
- List of supported task names
215
+ 图像分类 - Provider可选实现
235
216
  """
236
- supported = []
237
-
238
- # 检查哪些方法被实现了
239
- if hasattr(self, 'analyze_image') and callable(getattr(self, 'analyze_image')):
240
- try:
241
- # 尝试调用看是否抛出NotImplementedError
242
- import inspect
243
- if not 'NotImplementedError' in inspect.getsource(self.analyze_image):
244
- supported.append('analyze')
245
- except:
246
- pass
247
-
248
- # 检查各类任务支持情况
249
- method_task_map = {
250
- # 图像理解类
251
- 'describe_image': 'describe',
252
- 'classify_image': 'classify',
253
- 'compare_images': 'compare',
254
- # 检测抽取类
255
- 'extract_text': 'extract_text',
256
- 'detect_objects': 'detect_objects',
257
- 'detect_ui_elements': 'detect_ui_elements',
258
- 'detect_document_elements': 'detect_document_elements',
259
- 'extract_table_data': 'extract_table_data',
260
- 'get_object_coordinates': 'get_coordinates'
261
- }
262
-
263
- for method_name, task_name in method_task_map.items():
264
- if hasattr(self, method_name):
265
- # 检查是否是默认实现(基于analyze_image)还是provider自己的实现
266
- supported.append(task_name)
267
-
268
- return supported
269
-
270
- # ==================== COMMON TASK IMPLEMENTATIONS ====================
271
- # 为每个provider提供可选的默认实现,provider可以覆盖这些方法
217
+ raise NotImplementedError(f"{self.__class__.__name__} does not support classify_image task")
272
218
 
273
- async def analyze_images(
219
+ async def compare_images(
274
220
  self,
275
- images: List[Union[str, BinaryIO]],
276
- prompt: Optional[str] = None,
277
- max_tokens: int = 1000
278
- ) -> List[Dict[str, Any]]:
221
+ image1: Union[str, BinaryIO],
222
+ image2: Union[str, BinaryIO]
223
+ ) -> Dict[str, Any]:
279
224
  """
280
- 批量图像分析 - Provider可选实现
281
- 默认实现:如果provider支持analyze_image,则逐个调用
225
+ 图像比较 - Provider可选实现
282
226
  """
283
- if hasattr(self, 'analyze_image'):
284
- results = []
285
- for image in images:
286
- try:
287
- result = await self.analyze_image(image, prompt, max_tokens)
288
- results.append(result)
289
- except NotImplementedError:
290
- raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_images task")
291
- return results
292
- else:
293
- raise NotImplementedError(f"{self.__class__.__name__} does not support analyze_images task")
294
-
227
+ raise NotImplementedError(f"{self.__class__.__name__} does not support compare_images task")
295
228
 
296
229
  def get_supported_formats(self) -> List[str]:
297
230
  """