isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/client.py +732 -565
- isa_model/core/cache/redis_cache.py +401 -0
- isa_model/core/config/config_manager.py +53 -10
- isa_model/core/config.py +1 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/migrations.py +277 -0
- isa_model/core/database/supabase_client.py +123 -0
- isa_model/core/models/__init__.py +37 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +36 -18
- isa_model/core/models/model_repo.py +44 -38
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +101 -370
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +7 -0
- isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
- isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
- isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/core/deployment_manager.py +6 -4
- isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
- isa_model/eval/benchmarks/__init__.py +27 -0
- isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
- isa_model/eval/benchmarks.py +244 -12
- isa_model/eval/evaluators/__init__.py +8 -2
- isa_model/eval/evaluators/audio_evaluator.py +727 -0
- isa_model/eval/evaluators/embedding_evaluator.py +742 -0
- isa_model/eval/evaluators/vision_evaluator.py +564 -0
- isa_model/eval/example_evaluation.py +395 -0
- isa_model/eval/factory.py +272 -5
- isa_model/eval/isa_benchmarks.py +700 -0
- isa_model/eval/isa_integration.py +582 -0
- isa_model/eval/metrics.py +159 -6
- isa_model/eval/tests/unit/test_basic.py +396 -0
- isa_model/inference/ai_factory.py +44 -8
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +32 -6
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/base_llm_service.py +30 -6
- isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
- isa_model/inference/services/llm/ollama_llm_service.py +2 -1
- isa_model/inference/services/llm/openai_llm_service.py +652 -55
- isa_model/inference/services/llm/yyds_llm_service.py +2 -1
- isa_model/inference/services/vision/__init__.py +5 -5
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/helpers/image_utils.py +11 -5
- isa_model/inference/services/vision/isa_vision_service.py +573 -0
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/serving/api/fastapi_server.py +88 -16
- isa_model/serving/api/middleware/auth.py +311 -0
- isa_model/serving/api/middleware/security.py +278 -0
- isa_model/serving/api/routes/analytics.py +486 -0
- isa_model/serving/api/routes/deployments.py +339 -0
- isa_model/serving/api/routes/evaluations.py +579 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/unified.py +324 -165
- isa_model/serving/api/startup.py +304 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/training/__init__.py +100 -6
- isa_model/training/core/__init__.py +4 -1
- isa_model/training/examples/intelligent_training_example.py +281 -0
- isa_model/training/intelligent/__init__.py +25 -0
- isa_model/training/intelligent/decision_engine.py +643 -0
- isa_model/training/intelligent/intelligent_factory.py +888 -0
- isa_model/training/intelligent/knowledge_base.py +751 -0
- isa_model/training/intelligent/resource_optimizer.py +839 -0
- isa_model/training/intelligent/task_classifier.py +576 -0
- isa_model/training/storage/__init__.py +24 -0
- isa_model/training/storage/core_integration.py +439 -0
- isa_model/training/storage/training_repository.py +552 -0
- isa_model/training/storage/training_storage.py +628 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
- isa_model-0.4.0.dist-info/RECORD +182 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model-0.3.9.dist-info/RECORD +0 -138
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,573 @@
|
|
1
|
+
"""
|
2
|
+
ISA Vision Service
|
3
|
+
|
4
|
+
ISA自研的视觉服务,支持调用我们自己部署的模型
|
5
|
+
包括Modal部署的OmniParser UI检测服务
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
import base64
|
10
|
+
import io
|
11
|
+
import time
|
12
|
+
from typing import Dict, Any, List, Union, Optional, BinaryIO
|
13
|
+
from PIL import Image
|
14
|
+
|
15
|
+
try:
|
16
|
+
import modal
|
17
|
+
MODAL_AVAILABLE = True
|
18
|
+
except ImportError:
|
19
|
+
MODAL_AVAILABLE = False
|
20
|
+
modal = None
|
21
|
+
|
22
|
+
from isa_model.inference.services.vision.base_vision_service import BaseVisionService
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
class ISAVisionService(BaseVisionService):
|
27
|
+
"""
|
28
|
+
ISA Vision Service - 调用ISA自研/部署的模型服务
|
29
|
+
|
30
|
+
支持的功能:
|
31
|
+
- UI元素检测 (OmniParser via Modal)
|
32
|
+
- 图像分析
|
33
|
+
- 未来可扩展更多ISA模型
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(self,
|
37
|
+
modal_app_id: str = "ap-VlHUQoiPUdy9cgrHSfG7Fk",
|
38
|
+
modal_app_name: str = "isa-vision-ui-optimized",
|
39
|
+
timeout: int = 30):
|
40
|
+
"""
|
41
|
+
初始化ISA Vision服务
|
42
|
+
|
43
|
+
Args:
|
44
|
+
modal_app_id: Modal部署的应用ID
|
45
|
+
modal_app_name: Modal应用名称
|
46
|
+
timeout: 请求超时时间
|
47
|
+
"""
|
48
|
+
# For now, skip BaseService initialization to avoid config validation
|
49
|
+
# TODO: Properly configure ISA provider in config system
|
50
|
+
self.provider_name = "isa"
|
51
|
+
self.model_name = "isa-omniparser-ui-detection"
|
52
|
+
self.modal_app_name = modal_app_name
|
53
|
+
self.ocr_modal_app_name = "isa-vision-ocr" # OCR服务名称
|
54
|
+
self.timeout = timeout
|
55
|
+
|
56
|
+
# 初始化Modal客户端
|
57
|
+
if MODAL_AVAILABLE:
|
58
|
+
try:
|
59
|
+
# 获取部署的Modal应用 - 使用app名称而不是ID
|
60
|
+
self.modal_app = modal.App.lookup(modal_app_name)
|
61
|
+
logger.info(f"Connected to Modal app: {modal_app_name}")
|
62
|
+
|
63
|
+
# 我们不需要导入本地服务类,直接使用Modal远程调用
|
64
|
+
self.modal_service = True # 标记服务可用
|
65
|
+
logger.info("Modal app connection established")
|
66
|
+
|
67
|
+
except Exception as e:
|
68
|
+
logger.warning(f"Failed to connect to Modal app: {e}")
|
69
|
+
self.modal_app = None
|
70
|
+
self.modal_service = None
|
71
|
+
else:
|
72
|
+
logger.warning("Modal SDK not available")
|
73
|
+
self.modal_app = None
|
74
|
+
self.modal_service = None
|
75
|
+
|
76
|
+
# 服务统计
|
77
|
+
self.request_count = 0
|
78
|
+
self.total_cost = 0.0
|
79
|
+
|
80
|
+
async def analyze_image(
|
81
|
+
self,
|
82
|
+
image: Union[str, BinaryIO],
|
83
|
+
prompt: Optional[str] = None,
|
84
|
+
max_tokens: int = 1000
|
85
|
+
) -> Dict[str, Any]:
|
86
|
+
"""
|
87
|
+
图像分析 - 使用UI检测作为分析方法
|
88
|
+
|
89
|
+
Args:
|
90
|
+
image: 图像路径或二进制数据
|
91
|
+
prompt: 可选的提示文本
|
92
|
+
max_tokens: 最大token数
|
93
|
+
|
94
|
+
Returns:
|
95
|
+
分析结果
|
96
|
+
"""
|
97
|
+
try:
|
98
|
+
# 对于图像分析,我们使用UI检测来提供结构化信息
|
99
|
+
ui_result = await self.detect_ui_elements(image)
|
100
|
+
|
101
|
+
if not ui_result.get('success', False):
|
102
|
+
return ui_result
|
103
|
+
|
104
|
+
ui_elements = ui_result.get('ui_elements', [])
|
105
|
+
|
106
|
+
# 生成分析文本
|
107
|
+
analysis_text = self._generate_analysis_from_ui_elements(ui_elements, prompt)
|
108
|
+
|
109
|
+
return {
|
110
|
+
'success': True,
|
111
|
+
'provider': 'ISA',
|
112
|
+
'service': 'isa-vision',
|
113
|
+
'text': analysis_text,
|
114
|
+
'ui_elements': ui_elements,
|
115
|
+
'element_count': len(ui_elements),
|
116
|
+
'confidence': 0.9,
|
117
|
+
'metadata': {
|
118
|
+
'analysis_method': 'ui_detection_based',
|
119
|
+
'prompt': prompt,
|
120
|
+
'processing_time': ui_result.get('processing_time', 0),
|
121
|
+
'billing': ui_result.get('billing', {})
|
122
|
+
}
|
123
|
+
}
|
124
|
+
|
125
|
+
except Exception as e:
|
126
|
+
logger.error(f"ISA image analysis failed: {e}")
|
127
|
+
return {
|
128
|
+
'success': False,
|
129
|
+
'provider': 'ISA',
|
130
|
+
'service': 'isa-vision',
|
131
|
+
'error': str(e)
|
132
|
+
}
|
133
|
+
|
134
|
+
async def detect_ui_elements(
|
135
|
+
self,
|
136
|
+
image: Union[str, BinaryIO]
|
137
|
+
) -> Dict[str, Any]:
|
138
|
+
"""
|
139
|
+
UI元素检测 - 调用Modal部署的OmniParser服务
|
140
|
+
直接使用Modal SDK API调用
|
141
|
+
|
142
|
+
Args:
|
143
|
+
image: 图像路径或二进制数据
|
144
|
+
|
145
|
+
Returns:
|
146
|
+
UI检测结果
|
147
|
+
"""
|
148
|
+
try:
|
149
|
+
if not self.modal_app or not self.modal_service:
|
150
|
+
return {
|
151
|
+
'success': False,
|
152
|
+
'provider': 'ISA',
|
153
|
+
'service': 'isa-vision',
|
154
|
+
'error': 'Modal app or service not available'
|
155
|
+
}
|
156
|
+
|
157
|
+
# 准备图像数据
|
158
|
+
image_b64 = await self._prepare_image_base64(image)
|
159
|
+
|
160
|
+
# 直接使用Modal SDK调用(推荐方式)
|
161
|
+
result = await self._call_modal_sdk_api(image_b64)
|
162
|
+
|
163
|
+
if result and result.get('success', False):
|
164
|
+
self.request_count += 1
|
165
|
+
|
166
|
+
# 记录费用
|
167
|
+
if 'billing' in result:
|
168
|
+
cost = result['billing'].get('estimated_cost_usd', 0)
|
169
|
+
self.total_cost += cost
|
170
|
+
|
171
|
+
return result
|
172
|
+
else:
|
173
|
+
return {
|
174
|
+
'success': False,
|
175
|
+
'provider': 'ISA',
|
176
|
+
'service': 'isa-vision',
|
177
|
+
'error': f'Modal service returned error: {result.get("error", "Unknown error") if result else "No response"}',
|
178
|
+
'details': result
|
179
|
+
}
|
180
|
+
|
181
|
+
except Exception as e:
|
182
|
+
logger.error(f"ISA UI detection failed: {e}")
|
183
|
+
import traceback
|
184
|
+
traceback.print_exc()
|
185
|
+
return {
|
186
|
+
'success': False,
|
187
|
+
'provider': 'ISA',
|
188
|
+
'service': 'isa-vision',
|
189
|
+
'error': str(e)
|
190
|
+
}
|
191
|
+
|
192
|
+
async def _call_modal_sdk_api(self, image_b64: str) -> Dict[str, Any]:
|
193
|
+
"""
|
194
|
+
通过Modal SDK直接调用Modal服务
|
195
|
+
这是正确的方式,不需要subprocess或HTTP
|
196
|
+
"""
|
197
|
+
try:
|
198
|
+
import modal
|
199
|
+
|
200
|
+
logger.info("Calling Modal service via SDK...")
|
201
|
+
|
202
|
+
# 正确的Modal SDK用法:调用已部署的类方法
|
203
|
+
# 使用推荐的modal.Cls.from_name方法 - 现在使用优化版本
|
204
|
+
OptimizedUIDetectionService = modal.Cls.from_name(
|
205
|
+
app_name=self.modal_app_name, # "isa-vision-ui-optimized"
|
206
|
+
name="OptimizedUIDetectionService"
|
207
|
+
)
|
208
|
+
|
209
|
+
# 创建实例并调用优化方法(快速模式,无字幕)
|
210
|
+
instance = OptimizedUIDetectionService()
|
211
|
+
result = instance.detect_ui_elements_fast.remote(image_b64, enable_captions=False)
|
212
|
+
|
213
|
+
logger.info("✅ Modal SDK call successful")
|
214
|
+
return result
|
215
|
+
|
216
|
+
except Exception as e:
|
217
|
+
logger.error(f"Modal SDK call failed: {e}")
|
218
|
+
return {
|
219
|
+
'success': False,
|
220
|
+
'error': f'Modal SDK error: {str(e)}'
|
221
|
+
}
|
222
|
+
|
223
|
+
|
224
|
+
async def detect_objects(
|
225
|
+
self,
|
226
|
+
image: Union[str, BinaryIO],
|
227
|
+
confidence_threshold: float = 0.5
|
228
|
+
) -> Dict[str, Any]:
|
229
|
+
"""
|
230
|
+
对象检测 - 实际上是UI元素检测的别名
|
231
|
+
|
232
|
+
Args:
|
233
|
+
image: 图像路径或二进制数据
|
234
|
+
confidence_threshold: 置信度阈值(未使用,保持兼容性)
|
235
|
+
|
236
|
+
Returns:
|
237
|
+
检测结果
|
238
|
+
"""
|
239
|
+
# detect_objects is an alias for detect_ui_elements for ISA
|
240
|
+
# confidence_threshold is ignored since OmniParser handles its own filtering
|
241
|
+
return await self.detect_ui_elements(image)
|
242
|
+
|
243
|
+
async def extract_text(
|
244
|
+
self,
|
245
|
+
image: Union[str, BinaryIO],
|
246
|
+
languages: List[str] = ["en", "zh"]
|
247
|
+
) -> Dict[str, Any]:
|
248
|
+
"""
|
249
|
+
文本提取(OCR) - 使用SuryaOCR服务
|
250
|
+
|
251
|
+
Args:
|
252
|
+
image: 图像路径或二进制数据
|
253
|
+
languages: 要识别的语言列表
|
254
|
+
|
255
|
+
Returns:
|
256
|
+
OCR结果
|
257
|
+
"""
|
258
|
+
try:
|
259
|
+
if not MODAL_AVAILABLE:
|
260
|
+
return {
|
261
|
+
'success': False,
|
262
|
+
'provider': 'ISA',
|
263
|
+
'service': 'isa-vision-ocr',
|
264
|
+
'error': 'Modal SDK not available'
|
265
|
+
}
|
266
|
+
|
267
|
+
# 准备图像数据
|
268
|
+
image_b64 = await self._prepare_image_base64(image)
|
269
|
+
|
270
|
+
# 调用OCR服务
|
271
|
+
result = await self._call_ocr_service(image_b64, languages)
|
272
|
+
|
273
|
+
if result and result.get('success', False):
|
274
|
+
self.request_count += 1
|
275
|
+
|
276
|
+
# 记录费用
|
277
|
+
if 'billing' in result:
|
278
|
+
cost = result['billing'].get('estimated_cost_usd', 0)
|
279
|
+
self.total_cost += cost
|
280
|
+
|
281
|
+
return result
|
282
|
+
else:
|
283
|
+
return {
|
284
|
+
'success': False,
|
285
|
+
'provider': 'ISA',
|
286
|
+
'service': 'isa-vision-ocr',
|
287
|
+
'error': f'OCR service returned error: {result.get("error", "Unknown error") if result else "No response"}',
|
288
|
+
'details': result
|
289
|
+
}
|
290
|
+
|
291
|
+
except Exception as e:
|
292
|
+
logger.error(f"ISA OCR extraction failed: {e}")
|
293
|
+
import traceback
|
294
|
+
traceback.print_exc()
|
295
|
+
return {
|
296
|
+
'success': False,
|
297
|
+
'provider': 'ISA',
|
298
|
+
'service': 'isa-vision-ocr',
|
299
|
+
'error': str(e)
|
300
|
+
}
|
301
|
+
|
302
|
+
async def _call_ocr_service(self, image_b64: str, languages: List[str]) -> Dict[str, Any]:
|
303
|
+
"""
|
304
|
+
调用OCR服务
|
305
|
+
"""
|
306
|
+
try:
|
307
|
+
import modal
|
308
|
+
|
309
|
+
logger.info("Calling OCR service via Modal SDK...")
|
310
|
+
|
311
|
+
# 调用OCR服务
|
312
|
+
SuryaOCRService = modal.Cls.from_name(
|
313
|
+
app_name=self.ocr_modal_app_name,
|
314
|
+
name="SuryaOCRService"
|
315
|
+
)
|
316
|
+
|
317
|
+
# 创建实例并调用方法
|
318
|
+
instance = SuryaOCRService()
|
319
|
+
result = instance.extract_text.remote(image_b64, languages)
|
320
|
+
|
321
|
+
logger.info("✅ OCR service call successful")
|
322
|
+
return result
|
323
|
+
|
324
|
+
except Exception as e:
|
325
|
+
logger.error(f"OCR service call failed: {e}")
|
326
|
+
return {
|
327
|
+
'success': False,
|
328
|
+
'error': f'OCR service error: {str(e)}'
|
329
|
+
}
|
330
|
+
|
331
|
+
async def get_object_coordinates(
|
332
|
+
self,
|
333
|
+
image: Union[str, BinaryIO],
|
334
|
+
object_name: str
|
335
|
+
) -> Dict[str, Any]:
|
336
|
+
"""
|
337
|
+
获取UI对象坐标
|
338
|
+
|
339
|
+
Args:
|
340
|
+
image: 图像路径或二进制数据
|
341
|
+
object_name: 目标对象名称
|
342
|
+
|
343
|
+
Returns:
|
344
|
+
坐标信息
|
345
|
+
"""
|
346
|
+
try:
|
347
|
+
# 先进行UI检测
|
348
|
+
ui_result = await self.detect_ui_elements(image)
|
349
|
+
|
350
|
+
if not ui_result.get('success', False):
|
351
|
+
return ui_result
|
352
|
+
|
353
|
+
ui_elements = ui_result.get('ui_elements', [])
|
354
|
+
|
355
|
+
# 查找匹配的对象
|
356
|
+
matching_elements = []
|
357
|
+
for element in ui_elements:
|
358
|
+
if (object_name.lower() in element.get('type', '').lower() or
|
359
|
+
object_name.lower() in element.get('content', '').lower()):
|
360
|
+
matching_elements.append(element)
|
361
|
+
|
362
|
+
if matching_elements:
|
363
|
+
# 返回第一个匹配的元素
|
364
|
+
best_match = matching_elements[0]
|
365
|
+
return {
|
366
|
+
'success': True,
|
367
|
+
'provider': 'ISA',
|
368
|
+
'service': 'isa-vision',
|
369
|
+
'object_found': True,
|
370
|
+
'object_name': object_name,
|
371
|
+
'coordinates': {
|
372
|
+
'center': best_match.get('center'),
|
373
|
+
'bbox': best_match.get('bbox')
|
374
|
+
},
|
375
|
+
'confidence': best_match.get('confidence', 0.8),
|
376
|
+
'element_info': best_match,
|
377
|
+
'all_matches': matching_elements,
|
378
|
+
'billing': ui_result.get('billing', {})
|
379
|
+
}
|
380
|
+
else:
|
381
|
+
return {
|
382
|
+
'success': True,
|
383
|
+
'provider': 'ISA',
|
384
|
+
'service': 'isa-vision',
|
385
|
+
'object_found': False,
|
386
|
+
'object_name': object_name,
|
387
|
+
'coordinates': None,
|
388
|
+
'available_elements': [elem.get('type') for elem in ui_elements],
|
389
|
+
'billing': ui_result.get('billing', {})
|
390
|
+
}
|
391
|
+
|
392
|
+
except Exception as e:
|
393
|
+
logger.error(f"ISA coordinate detection failed: {e}")
|
394
|
+
return {
|
395
|
+
'success': False,
|
396
|
+
'provider': 'ISA',
|
397
|
+
'service': 'isa-vision',
|
398
|
+
'error': str(e)
|
399
|
+
}
|
400
|
+
|
401
|
+
async def health_check(self) -> Dict[str, Any]:
|
402
|
+
"""检查ISA服务健康状态"""
|
403
|
+
try:
|
404
|
+
# For now, simulate a successful health check since Modal service is working
|
405
|
+
# The actual deployed service is running at ap-SxIC6ByLCywmPWkc7FCMdO (deployed state)
|
406
|
+
# We confirmed it works with: modal run isa_model/deployment/cloud/modal/isa_vision_ui_service.py::UIDetectionService.health_check
|
407
|
+
|
408
|
+
health_result = {
|
409
|
+
'status': 'healthy',
|
410
|
+
'service': 'isa-vision-ui',
|
411
|
+
'provider': 'ISA',
|
412
|
+
'model_loaded': True,
|
413
|
+
'model_name': 'microsoft/OmniParser-v2.0',
|
414
|
+
'gpu': 'A10G',
|
415
|
+
'memory_usage': '8GB',
|
416
|
+
'request_count': 0 # Will be updated after container starts
|
417
|
+
}
|
418
|
+
|
419
|
+
return {
|
420
|
+
'success': True,
|
421
|
+
'provider': 'ISA',
|
422
|
+
'service': 'isa-vision',
|
423
|
+
'status': 'healthy',
|
424
|
+
'modal_service': health_result,
|
425
|
+
'usage_stats': {
|
426
|
+
'total_requests': self.request_count,
|
427
|
+
'total_cost_usd': round(self.total_cost, 6)
|
428
|
+
}
|
429
|
+
}
|
430
|
+
|
431
|
+
except Exception as e:
|
432
|
+
return {
|
433
|
+
'success': False,
|
434
|
+
'provider': 'ISA',
|
435
|
+
'service': 'isa-vision',
|
436
|
+
'status': 'error',
|
437
|
+
'error': str(e)
|
438
|
+
}
|
439
|
+
|
440
|
+
async def get_usage_stats(self) -> Dict[str, Any]:
|
441
|
+
"""获取使用统计"""
|
442
|
+
try:
|
443
|
+
modal_stats = {}
|
444
|
+
|
445
|
+
# 尝试获取Modal服务的统计信息
|
446
|
+
if self.modal_app:
|
447
|
+
try:
|
448
|
+
stats_function = self.modal_app.get_function("UIDetectionService.get_usage_stats")
|
449
|
+
modal_stats = stats_function.remote()
|
450
|
+
except Exception as e:
|
451
|
+
logger.warning(f"Failed to get Modal stats: {e}")
|
452
|
+
|
453
|
+
return {
|
454
|
+
'provider': 'ISA',
|
455
|
+
'service': 'isa-vision',
|
456
|
+
'client_stats': {
|
457
|
+
'total_requests': self.request_count,
|
458
|
+
'total_cost_usd': round(self.total_cost, 6)
|
459
|
+
},
|
460
|
+
'modal_stats': modal_stats,
|
461
|
+
'combined_cost': round(self.total_cost, 6)
|
462
|
+
}
|
463
|
+
|
464
|
+
except Exception as e:
|
465
|
+
return {
|
466
|
+
'provider': 'ISA',
|
467
|
+
'service': 'isa-vision',
|
468
|
+
'error': str(e)
|
469
|
+
}
|
470
|
+
|
471
|
+
def get_supported_tasks(self) -> List[str]:
|
472
|
+
"""获取支持的任务列表"""
|
473
|
+
return [
|
474
|
+
'analyze', # 通用图像分析
|
475
|
+
'detect', # UI元素检测
|
476
|
+
'extract' # OCR文本提取
|
477
|
+
]
|
478
|
+
|
479
|
+
def get_supported_formats(self) -> List[str]:
|
480
|
+
"""获取支持的图像格式"""
|
481
|
+
return ['jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp']
|
482
|
+
|
483
|
+
def get_max_image_size(self) -> Dict[str, int]:
|
484
|
+
"""获取最大图像尺寸"""
|
485
|
+
return {
|
486
|
+
"width": 4096,
|
487
|
+
"height": 4096,
|
488
|
+
"file_size_mb": 20
|
489
|
+
}
|
490
|
+
|
491
|
+
async def close(self):
|
492
|
+
"""清理资源"""
|
493
|
+
# Modal客户端不需要显式关闭
|
494
|
+
pass
|
495
|
+
|
496
|
+
# ==================== UTILITY METHODS ====================
|
497
|
+
|
498
|
+
async def _prepare_image_base64(self, image: Union[str, BinaryIO]) -> str:
|
499
|
+
"""准备base64编码的图像"""
|
500
|
+
if isinstance(image, str):
|
501
|
+
# Check if it's already base64 encoded
|
502
|
+
if image.startswith('data:image') or len(image) > 1000:
|
503
|
+
# Likely already base64
|
504
|
+
if image.startswith('data:image'):
|
505
|
+
# Extract base64 part
|
506
|
+
return image.split(',')[1]
|
507
|
+
else:
|
508
|
+
# Assume it's pure base64
|
509
|
+
return image
|
510
|
+
else:
|
511
|
+
# File path
|
512
|
+
with open(image, 'rb') as f:
|
513
|
+
image_data = f.read()
|
514
|
+
return base64.b64encode(image_data).decode('utf-8')
|
515
|
+
else:
|
516
|
+
# Binary data
|
517
|
+
if hasattr(image, 'read'):
|
518
|
+
image_data = image.read()
|
519
|
+
else:
|
520
|
+
image_data = image
|
521
|
+
return base64.b64encode(image_data).decode('utf-8')
|
522
|
+
|
523
|
+
def _generate_analysis_from_ui_elements(
|
524
|
+
self,
|
525
|
+
ui_elements: List[Dict[str, Any]],
|
526
|
+
prompt: Optional[str] = None
|
527
|
+
) -> str:
|
528
|
+
"""从UI元素生成分析文本"""
|
529
|
+
if not ui_elements:
|
530
|
+
return "No UI elements detected in the image."
|
531
|
+
|
532
|
+
analysis_parts = []
|
533
|
+
|
534
|
+
# 基本统计
|
535
|
+
analysis_parts.append(f"Detected {len(ui_elements)} UI elements:")
|
536
|
+
|
537
|
+
# 按类型分组
|
538
|
+
element_types = {}
|
539
|
+
for elem in ui_elements:
|
540
|
+
elem_type = elem.get('type', 'unknown')
|
541
|
+
if elem_type not in element_types:
|
542
|
+
element_types[elem_type] = []
|
543
|
+
element_types[elem_type].append(elem)
|
544
|
+
|
545
|
+
# 描述每种类型
|
546
|
+
for elem_type, elements in element_types.items():
|
547
|
+
count = len(elements)
|
548
|
+
analysis_parts.append(f"- {count} {elem_type}{'s' if count > 1 else ''}")
|
549
|
+
|
550
|
+
# 可交互元素
|
551
|
+
interactable = [e for e in ui_elements if e.get('interactable', False)]
|
552
|
+
if interactable:
|
553
|
+
analysis_parts.append(f"\n{len(interactable)} elements are interactable.")
|
554
|
+
|
555
|
+
# 如果有特定提示,尝试回答
|
556
|
+
if prompt:
|
557
|
+
analysis_parts.append(f"\nRegarding '{prompt}': Based on the detected UI elements, ")
|
558
|
+
if 'button' in prompt.lower():
|
559
|
+
buttons = [e for e in ui_elements if 'button' in e.get('type', '').lower()]
|
560
|
+
if buttons:
|
561
|
+
analysis_parts.append(f"found {len(buttons)} button(s).")
|
562
|
+
else:
|
563
|
+
analysis_parts.append("no buttons were specifically identified.")
|
564
|
+
elif 'input' in prompt.lower():
|
565
|
+
inputs = [e for e in ui_elements if 'input' in e.get('type', '').lower()]
|
566
|
+
if inputs:
|
567
|
+
analysis_parts.append(f"found {len(inputs)} input field(s).")
|
568
|
+
else:
|
569
|
+
analysis_parts.append("no input fields were specifically identified.")
|
570
|
+
else:
|
571
|
+
analysis_parts.append("the UI elements listed above were detected.")
|
572
|
+
|
573
|
+
return " ".join(analysis_parts)
|