isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +40 -17
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,564 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Vision Evaluator for ISA Model evaluation framework.
|
3
|
-
|
4
|
-
Provides comprehensive evaluation capabilities for vision tasks including:
|
5
|
-
- OCR (Optical Character Recognition) evaluation
|
6
|
-
- Table extraction evaluation
|
7
|
-
- UI detection evaluation
|
8
|
-
- Document analysis evaluation
|
9
|
-
- Image captioning evaluation
|
10
|
-
- Visual question answering evaluation
|
11
|
-
|
12
|
-
Supports ISA custom services and standard vision models.
|
13
|
-
"""
|
14
|
-
|
15
|
-
import asyncio
|
16
|
-
import logging
|
17
|
-
import base64
|
18
|
-
import io
|
19
|
-
from typing import Dict, List, Any, Optional, Union, Tuple
|
20
|
-
from PIL import Image
|
21
|
-
import numpy as np
|
22
|
-
from pathlib import Path
|
23
|
-
|
24
|
-
from .base_evaluator import BaseEvaluator, EvaluationResult
|
25
|
-
from ..metrics import compute_text_metrics, compute_vision_metrics
|
26
|
-
|
27
|
-
logger = logging.getLogger(__name__)
|
28
|
-
|
29
|
-
|
30
|
-
class VisionEvaluator(BaseEvaluator):
|
31
|
-
"""
|
32
|
-
Comprehensive vision model evaluator.
|
33
|
-
|
34
|
-
Supports evaluation of:
|
35
|
-
- OCR accuracy and multilingual capability
|
36
|
-
- Table extraction and structure recognition
|
37
|
-
- UI element detection and classification
|
38
|
-
- Document understanding and analysis
|
39
|
-
- Image captioning quality
|
40
|
-
- Visual question answering accuracy
|
41
|
-
"""
|
42
|
-
|
43
|
-
def __init__(self,
|
44
|
-
config: Optional[Dict[str, Any]] = None,
|
45
|
-
experiment_tracker: Optional[Any] = None):
|
46
|
-
"""
|
47
|
-
Initialize the vision evaluator.
|
48
|
-
|
49
|
-
Args:
|
50
|
-
config: Evaluation configuration
|
51
|
-
experiment_tracker: Optional experiment tracking instance
|
52
|
-
"""
|
53
|
-
super().__init__(
|
54
|
-
evaluator_name="vision_evaluator",
|
55
|
-
config=config,
|
56
|
-
experiment_tracker=experiment_tracker
|
57
|
-
)
|
58
|
-
|
59
|
-
# Vision-specific configuration
|
60
|
-
self.supported_formats = self.config.get("supported_formats", ["png", "jpg", "jpeg", "pdf", "webp"])
|
61
|
-
self.max_image_size = self.config.get("max_image_size", (2048, 2048))
|
62
|
-
self.enable_multilingual = self.config.get("enable_multilingual", True)
|
63
|
-
|
64
|
-
# Evaluation task types
|
65
|
-
self.task_type = self.config.get("task_type", "ocr") # ocr, table, ui, vqa, caption
|
66
|
-
|
67
|
-
logger.info(f"Initialized VisionEvaluator for task: {self.task_type}")
|
68
|
-
|
69
|
-
async def evaluate_sample(self,
|
70
|
-
sample: Dict[str, Any],
|
71
|
-
model_interface: Any) -> Dict[str, Any]:
|
72
|
-
"""
|
73
|
-
Evaluate a single vision sample.
|
74
|
-
|
75
|
-
Args:
|
76
|
-
sample: Vision sample containing image and expected output
|
77
|
-
model_interface: Vision model interface
|
78
|
-
|
79
|
-
Returns:
|
80
|
-
Evaluation result for the sample
|
81
|
-
"""
|
82
|
-
try:
|
83
|
-
# Extract sample data
|
84
|
-
image_data = sample.get("image")
|
85
|
-
expected_output = sample.get("expected_output", "")
|
86
|
-
task_type = sample.get("task_type", self.task_type)
|
87
|
-
prompt = sample.get("prompt", "")
|
88
|
-
|
89
|
-
# Process image
|
90
|
-
processed_image = await self._process_image(image_data)
|
91
|
-
|
92
|
-
# Get model prediction based on task type
|
93
|
-
prediction = await self._get_model_prediction(
|
94
|
-
model_interface, processed_image, prompt, task_type
|
95
|
-
)
|
96
|
-
|
97
|
-
# Compute sample-level metrics
|
98
|
-
sample_metrics = self._compute_sample_metrics(
|
99
|
-
prediction, expected_output, task_type
|
100
|
-
)
|
101
|
-
|
102
|
-
return {
|
103
|
-
"prediction": prediction,
|
104
|
-
"expected_output": expected_output,
|
105
|
-
"task_type": task_type,
|
106
|
-
"sample_metrics": sample_metrics,
|
107
|
-
"image_info": self._get_image_info(processed_image)
|
108
|
-
}
|
109
|
-
|
110
|
-
except Exception as e:
|
111
|
-
logger.error(f"Error evaluating vision sample: {e}")
|
112
|
-
raise
|
113
|
-
|
114
|
-
async def _process_image(self, image_data: Union[str, bytes, Image.Image, Path]) -> Image.Image:
|
115
|
-
"""
|
116
|
-
Process and validate image data.
|
117
|
-
|
118
|
-
Args:
|
119
|
-
image_data: Image in various formats
|
120
|
-
|
121
|
-
Returns:
|
122
|
-
Processed PIL Image
|
123
|
-
"""
|
124
|
-
try:
|
125
|
-
if isinstance(image_data, str):
|
126
|
-
# Handle base64 encoded images or file paths
|
127
|
-
if image_data.startswith("data:"):
|
128
|
-
# Base64 data URL
|
129
|
-
header, encoded = image_data.split(",", 1)
|
130
|
-
image_bytes = base64.b64decode(encoded)
|
131
|
-
image = Image.open(io.BytesIO(image_bytes))
|
132
|
-
elif Path(image_data).exists():
|
133
|
-
# File path
|
134
|
-
image = Image.open(image_data)
|
135
|
-
else:
|
136
|
-
# Assume base64 string
|
137
|
-
image_bytes = base64.b64decode(image_data)
|
138
|
-
image = Image.open(io.BytesIO(image_bytes))
|
139
|
-
|
140
|
-
elif isinstance(image_data, bytes):
|
141
|
-
# Raw bytes
|
142
|
-
image = Image.open(io.BytesIO(image_data))
|
143
|
-
|
144
|
-
elif isinstance(image_data, Path):
|
145
|
-
# Path object
|
146
|
-
image = Image.open(image_data)
|
147
|
-
|
148
|
-
elif isinstance(image_data, Image.Image):
|
149
|
-
# PIL Image
|
150
|
-
image = image_data
|
151
|
-
|
152
|
-
else:
|
153
|
-
raise ValueError(f"Unsupported image data type: {type(image_data)}")
|
154
|
-
|
155
|
-
# Convert to RGB if needed
|
156
|
-
if image.mode != "RGB":
|
157
|
-
image = image.convert("RGB")
|
158
|
-
|
159
|
-
# Resize if too large
|
160
|
-
if image.size[0] > self.max_image_size[0] or image.size[1] > self.max_image_size[1]:
|
161
|
-
image.thumbnail(self.max_image_size, Image.Resampling.LANCZOS)
|
162
|
-
logger.info(f"Resized image to {image.size}")
|
163
|
-
|
164
|
-
return image
|
165
|
-
|
166
|
-
except Exception as e:
|
167
|
-
logger.error(f"Error processing image: {e}")
|
168
|
-
raise
|
169
|
-
|
170
|
-
async def _get_model_prediction(self,
|
171
|
-
model_interface: Any,
|
172
|
-
image: Image.Image,
|
173
|
-
prompt: str,
|
174
|
-
task_type: str) -> str:
|
175
|
-
"""
|
176
|
-
Get model prediction for vision task.
|
177
|
-
|
178
|
-
Args:
|
179
|
-
model_interface: Vision model interface
|
180
|
-
image: Processed PIL image
|
181
|
-
prompt: Task-specific prompt
|
182
|
-
task_type: Type of vision task
|
183
|
-
|
184
|
-
Returns:
|
185
|
-
Model prediction as string
|
186
|
-
"""
|
187
|
-
try:
|
188
|
-
# Prepare task-specific prompt
|
189
|
-
if not prompt:
|
190
|
-
prompt = self._get_default_prompt(task_type)
|
191
|
-
|
192
|
-
# Convert image to format expected by model
|
193
|
-
if hasattr(model_interface, 'process_image'):
|
194
|
-
# ISA custom vision service
|
195
|
-
result = await model_interface.process_image(image, prompt, task_type)
|
196
|
-
prediction = result.get("text", "") if isinstance(result, dict) else str(result)
|
197
|
-
|
198
|
-
elif hasattr(model_interface, 'vision_completion'):
|
199
|
-
# OpenAI-style vision API
|
200
|
-
# Convert image to base64
|
201
|
-
buffer = io.BytesIO()
|
202
|
-
image.save(buffer, format="PNG")
|
203
|
-
image_base64 = base64.b64encode(buffer.getvalue()).decode()
|
204
|
-
|
205
|
-
result = await model_interface.vision_completion(
|
206
|
-
prompt=prompt,
|
207
|
-
image_base64=image_base64
|
208
|
-
)
|
209
|
-
prediction = result.get("content", "") if isinstance(result, dict) else str(result)
|
210
|
-
|
211
|
-
else:
|
212
|
-
# Generic interface
|
213
|
-
prediction = await model_interface.predict(image, prompt)
|
214
|
-
prediction = str(prediction)
|
215
|
-
|
216
|
-
return prediction.strip()
|
217
|
-
|
218
|
-
except Exception as e:
|
219
|
-
logger.error(f"Error getting model prediction: {e}")
|
220
|
-
raise
|
221
|
-
|
222
|
-
def _get_default_prompt(self, task_type: str) -> str:
|
223
|
-
"""Get default prompt for task type."""
|
224
|
-
prompts = {
|
225
|
-
"ocr": "Extract all text from this image. Preserve the original formatting and layout.",
|
226
|
-
"table": "Extract the table structure and content from this image. Provide the data in a structured format.",
|
227
|
-
"ui": "Analyze the UI elements in this image. Identify buttons, text fields, labels, and their relationships.",
|
228
|
-
"vqa": "Answer the question about this image accurately and concisely.",
|
229
|
-
"caption": "Generate a detailed and accurate caption describing this image.",
|
230
|
-
"document": "Analyze this document image and extract the key information, structure, and content."
|
231
|
-
}
|
232
|
-
return prompts.get(task_type, "Analyze this image and provide relevant information.")
|
233
|
-
|
234
|
-
def _compute_sample_metrics(self,
|
235
|
-
prediction: str,
|
236
|
-
expected_output: str,
|
237
|
-
task_type: str) -> Dict[str, float]:
|
238
|
-
"""
|
239
|
-
Compute metrics for a single sample.
|
240
|
-
|
241
|
-
Args:
|
242
|
-
prediction: Model prediction
|
243
|
-
expected_output: Expected/reference output
|
244
|
-
task_type: Type of vision task
|
245
|
-
|
246
|
-
Returns:
|
247
|
-
Dictionary of sample-level metrics
|
248
|
-
"""
|
249
|
-
try:
|
250
|
-
metrics = {}
|
251
|
-
|
252
|
-
# Common text-based metrics
|
253
|
-
text_metrics = compute_text_metrics(prediction, expected_output)
|
254
|
-
metrics.update(text_metrics)
|
255
|
-
|
256
|
-
# Task-specific metrics
|
257
|
-
if task_type == "ocr":
|
258
|
-
metrics.update(self._compute_ocr_metrics(prediction, expected_output))
|
259
|
-
elif task_type == "table":
|
260
|
-
metrics.update(self._compute_table_metrics(prediction, expected_output))
|
261
|
-
elif task_type == "ui":
|
262
|
-
metrics.update(self._compute_ui_metrics(prediction, expected_output))
|
263
|
-
elif task_type in ["vqa", "caption"]:
|
264
|
-
metrics.update(self._compute_semantic_metrics(prediction, expected_output))
|
265
|
-
|
266
|
-
return metrics
|
267
|
-
|
268
|
-
except Exception as e:
|
269
|
-
logger.error(f"Error computing sample metrics: {e}")
|
270
|
-
return {"error": 1.0}
|
271
|
-
|
272
|
-
def _compute_ocr_metrics(self, prediction: str, expected: str) -> Dict[str, float]:
|
273
|
-
"""Compute OCR-specific metrics."""
|
274
|
-
try:
|
275
|
-
# Character-level accuracy
|
276
|
-
pred_chars = list(prediction.lower().replace(" ", ""))
|
277
|
-
exp_chars = list(expected.lower().replace(" ", ""))
|
278
|
-
|
279
|
-
char_accuracy = self._compute_sequence_accuracy(pred_chars, exp_chars)
|
280
|
-
|
281
|
-
# Word-level accuracy
|
282
|
-
pred_words = prediction.lower().split()
|
283
|
-
exp_words = expected.lower().split()
|
284
|
-
|
285
|
-
word_accuracy = self._compute_sequence_accuracy(pred_words, exp_words)
|
286
|
-
|
287
|
-
# Line-level accuracy (for formatted text)
|
288
|
-
pred_lines = prediction.strip().split("\n")
|
289
|
-
exp_lines = expected.strip().split("\n")
|
290
|
-
|
291
|
-
line_accuracy = self._compute_sequence_accuracy(pred_lines, exp_lines)
|
292
|
-
|
293
|
-
return {
|
294
|
-
"char_accuracy": char_accuracy,
|
295
|
-
"word_accuracy": word_accuracy,
|
296
|
-
"line_accuracy": line_accuracy,
|
297
|
-
"length_ratio": len(prediction) / max(len(expected), 1)
|
298
|
-
}
|
299
|
-
|
300
|
-
except Exception as e:
|
301
|
-
logger.error(f"Error computing OCR metrics: {e}")
|
302
|
-
return {"ocr_error": 1.0}
|
303
|
-
|
304
|
-
def _compute_table_metrics(self, prediction: str, expected: str) -> Dict[str, float]:
|
305
|
-
"""Compute table extraction metrics."""
|
306
|
-
try:
|
307
|
-
# Simple table structure metrics
|
308
|
-
pred_rows = prediction.count("\n") + 1
|
309
|
-
exp_rows = expected.count("\n") + 1
|
310
|
-
|
311
|
-
pred_cells = prediction.count("|") + prediction.count("\t")
|
312
|
-
exp_cells = expected.count("|") + expected.count("\t")
|
313
|
-
|
314
|
-
row_accuracy = 1.0 - abs(pred_rows - exp_rows) / max(exp_rows, 1)
|
315
|
-
cell_count_accuracy = 1.0 - abs(pred_cells - exp_cells) / max(exp_cells, 1)
|
316
|
-
|
317
|
-
return {
|
318
|
-
"row_accuracy": max(0.0, row_accuracy),
|
319
|
-
"cell_count_accuracy": max(0.0, cell_count_accuracy),
|
320
|
-
"structure_similarity": (row_accuracy + cell_count_accuracy) / 2
|
321
|
-
}
|
322
|
-
|
323
|
-
except Exception as e:
|
324
|
-
logger.error(f"Error computing table metrics: {e}")
|
325
|
-
return {"table_error": 1.0}
|
326
|
-
|
327
|
-
def _compute_ui_metrics(self, prediction: str, expected: str) -> Dict[str, float]:
|
328
|
-
"""Compute UI detection metrics."""
|
329
|
-
try:
|
330
|
-
# Extract UI elements (simplified approach)
|
331
|
-
ui_keywords = ["button", "text", "input", "label", "image", "link", "menu", "icon"]
|
332
|
-
|
333
|
-
pred_elements = []
|
334
|
-
exp_elements = []
|
335
|
-
|
336
|
-
for keyword in ui_keywords:
|
337
|
-
pred_count = prediction.lower().count(keyword)
|
338
|
-
exp_count = expected.lower().count(keyword)
|
339
|
-
pred_elements.extend([keyword] * pred_count)
|
340
|
-
exp_elements.extend([keyword] * exp_count)
|
341
|
-
|
342
|
-
element_accuracy = self._compute_sequence_accuracy(pred_elements, exp_elements)
|
343
|
-
|
344
|
-
return {
|
345
|
-
"element_detection_accuracy": element_accuracy,
|
346
|
-
"element_count_ratio": len(pred_elements) / max(len(exp_elements), 1)
|
347
|
-
}
|
348
|
-
|
349
|
-
except Exception as e:
|
350
|
-
logger.error(f"Error computing UI metrics: {e}")
|
351
|
-
return {"ui_error": 1.0}
|
352
|
-
|
353
|
-
def _compute_semantic_metrics(self, prediction: str, expected: str) -> Dict[str, float]:
|
354
|
-
"""Compute semantic similarity metrics for VQA/captioning."""
|
355
|
-
try:
|
356
|
-
# Simple semantic metrics
|
357
|
-
pred_words = set(prediction.lower().split())
|
358
|
-
exp_words = set(expected.lower().split())
|
359
|
-
|
360
|
-
if not exp_words:
|
361
|
-
return {"semantic_error": 1.0}
|
362
|
-
|
363
|
-
intersection = pred_words.intersection(exp_words)
|
364
|
-
union = pred_words.union(exp_words)
|
365
|
-
|
366
|
-
jaccard_similarity = len(intersection) / len(union) if union else 0.0
|
367
|
-
word_overlap = len(intersection) / len(exp_words)
|
368
|
-
|
369
|
-
return {
|
370
|
-
"jaccard_similarity": jaccard_similarity,
|
371
|
-
"word_overlap": word_overlap,
|
372
|
-
"semantic_score": (jaccard_similarity + word_overlap) / 2
|
373
|
-
}
|
374
|
-
|
375
|
-
except Exception as e:
|
376
|
-
logger.error(f"Error computing semantic metrics: {e}")
|
377
|
-
return {"semantic_error": 1.0}
|
378
|
-
|
379
|
-
def _compute_sequence_accuracy(self, pred_seq: List[str], exp_seq: List[str]) -> float:
|
380
|
-
"""Compute sequence-level accuracy using edit distance."""
|
381
|
-
try:
|
382
|
-
if not exp_seq:
|
383
|
-
return 1.0 if not pred_seq else 0.0
|
384
|
-
|
385
|
-
# Simple edit distance computation
|
386
|
-
m, n = len(pred_seq), len(exp_seq)
|
387
|
-
dp = [[0] * (n + 1) for _ in range(m + 1)]
|
388
|
-
|
389
|
-
for i in range(m + 1):
|
390
|
-
dp[i][0] = i
|
391
|
-
for j in range(n + 1):
|
392
|
-
dp[0][j] = j
|
393
|
-
|
394
|
-
for i in range(1, m + 1):
|
395
|
-
for j in range(1, n + 1):
|
396
|
-
if pred_seq[i-1] == exp_seq[j-1]:
|
397
|
-
dp[i][j] = dp[i-1][j-1]
|
398
|
-
else:
|
399
|
-
dp[i][j] = 1 + min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1])
|
400
|
-
|
401
|
-
edit_distance = dp[m][n]
|
402
|
-
accuracy = 1.0 - edit_distance / max(n, 1)
|
403
|
-
return max(0.0, accuracy)
|
404
|
-
|
405
|
-
except Exception as e:
|
406
|
-
logger.error(f"Error computing sequence accuracy: {e}")
|
407
|
-
return 0.0
|
408
|
-
|
409
|
-
def _get_image_info(self, image: Image.Image) -> Dict[str, Any]:
|
410
|
-
"""Get image metadata for analysis."""
|
411
|
-
return {
|
412
|
-
"width": image.size[0],
|
413
|
-
"height": image.size[1],
|
414
|
-
"mode": image.mode,
|
415
|
-
"format": getattr(image, "format", "unknown"),
|
416
|
-
"has_transparency": image.mode in ("RGBA", "LA") or "transparency" in image.info
|
417
|
-
}
|
418
|
-
|
419
|
-
def compute_metrics(self,
|
420
|
-
predictions: List[str],
|
421
|
-
references: List[str],
|
422
|
-
**kwargs) -> Dict[str, float]:
|
423
|
-
"""
|
424
|
-
Compute aggregate vision evaluation metrics.
|
425
|
-
|
426
|
-
Args:
|
427
|
-
predictions: List of model predictions
|
428
|
-
references: List of reference outputs
|
429
|
-
**kwargs: Additional parameters
|
430
|
-
|
431
|
-
Returns:
|
432
|
-
Dictionary of computed metrics
|
433
|
-
"""
|
434
|
-
try:
|
435
|
-
if not predictions or not references:
|
436
|
-
logger.warning("Empty predictions or references provided")
|
437
|
-
return {}
|
438
|
-
|
439
|
-
# Ensure equal lengths
|
440
|
-
min_len = min(len(predictions), len(references))
|
441
|
-
predictions = predictions[:min_len]
|
442
|
-
references = references[:min_len]
|
443
|
-
|
444
|
-
# Compute text-based metrics
|
445
|
-
metrics = compute_text_metrics(predictions, references, aggregate=True)
|
446
|
-
|
447
|
-
# Compute vision-specific metrics
|
448
|
-
vision_metrics = self._compute_vision_aggregate_metrics(predictions, references)
|
449
|
-
metrics.update(vision_metrics)
|
450
|
-
|
451
|
-
# Add evaluation metadata
|
452
|
-
metrics.update({
|
453
|
-
"total_samples": len(predictions),
|
454
|
-
"task_type": self.task_type,
|
455
|
-
"multilingual_enabled": self.enable_multilingual
|
456
|
-
})
|
457
|
-
|
458
|
-
return metrics
|
459
|
-
|
460
|
-
except Exception as e:
|
461
|
-
logger.error(f"Error computing aggregate metrics: {e}")
|
462
|
-
return {"error_rate": 1.0}
|
463
|
-
|
464
|
-
def _compute_vision_aggregate_metrics(self,
|
465
|
-
predictions: List[str],
|
466
|
-
references: List[str]) -> Dict[str, float]:
|
467
|
-
"""Compute aggregate vision-specific metrics."""
|
468
|
-
try:
|
469
|
-
task_type = self.task_type
|
470
|
-
|
471
|
-
if task_type == "ocr":
|
472
|
-
return self._compute_aggregate_ocr_metrics(predictions, references)
|
473
|
-
elif task_type == "table":
|
474
|
-
return self._compute_aggregate_table_metrics(predictions, references)
|
475
|
-
elif task_type == "ui":
|
476
|
-
return self._compute_aggregate_ui_metrics(predictions, references)
|
477
|
-
elif task_type in ["vqa", "caption"]:
|
478
|
-
return self._compute_aggregate_semantic_metrics(predictions, references)
|
479
|
-
else:
|
480
|
-
return {}
|
481
|
-
|
482
|
-
except Exception as e:
|
483
|
-
logger.error(f"Error computing vision aggregate metrics: {e}")
|
484
|
-
return {}
|
485
|
-
|
486
|
-
def _compute_aggregate_ocr_metrics(self,
|
487
|
-
predictions: List[str],
|
488
|
-
references: List[str]) -> Dict[str, float]:
|
489
|
-
"""Compute aggregate OCR metrics."""
|
490
|
-
char_accuracies = []
|
491
|
-
word_accuracies = []
|
492
|
-
|
493
|
-
for pred, ref in zip(predictions, references):
|
494
|
-
sample_metrics = self._compute_ocr_metrics(pred, ref)
|
495
|
-
char_accuracies.append(sample_metrics.get("char_accuracy", 0.0))
|
496
|
-
word_accuracies.append(sample_metrics.get("word_accuracy", 0.0))
|
497
|
-
|
498
|
-
return {
|
499
|
-
"avg_char_accuracy": np.mean(char_accuracies) if char_accuracies else 0.0,
|
500
|
-
"avg_word_accuracy": np.mean(word_accuracies) if word_accuracies else 0.0,
|
501
|
-
"ocr_score": np.mean(char_accuracies + word_accuracies) if char_accuracies else 0.0
|
502
|
-
}
|
503
|
-
|
504
|
-
def _compute_aggregate_table_metrics(self,
|
505
|
-
predictions: List[str],
|
506
|
-
references: List[str]) -> Dict[str, float]:
|
507
|
-
"""Compute aggregate table metrics."""
|
508
|
-
structure_similarities = []
|
509
|
-
|
510
|
-
for pred, ref in zip(predictions, references):
|
511
|
-
sample_metrics = self._compute_table_metrics(pred, ref)
|
512
|
-
structure_similarities.append(sample_metrics.get("structure_similarity", 0.0))
|
513
|
-
|
514
|
-
return {
|
515
|
-
"avg_structure_similarity": np.mean(structure_similarities) if structure_similarities else 0.0,
|
516
|
-
"table_extraction_score": np.mean(structure_similarities) if structure_similarities else 0.0
|
517
|
-
}
|
518
|
-
|
519
|
-
def _compute_aggregate_ui_metrics(self,
|
520
|
-
predictions: List[str],
|
521
|
-
references: List[str]) -> Dict[str, float]:
|
522
|
-
"""Compute aggregate UI metrics."""
|
523
|
-
detection_accuracies = []
|
524
|
-
|
525
|
-
for pred, ref in zip(predictions, references):
|
526
|
-
sample_metrics = self._compute_ui_metrics(pred, ref)
|
527
|
-
detection_accuracies.append(sample_metrics.get("element_detection_accuracy", 0.0))
|
528
|
-
|
529
|
-
return {
|
530
|
-
"avg_element_detection": np.mean(detection_accuracies) if detection_accuracies else 0.0,
|
531
|
-
"ui_detection_score": np.mean(detection_accuracies) if detection_accuracies else 0.0
|
532
|
-
}
|
533
|
-
|
534
|
-
def _compute_aggregate_semantic_metrics(self,
|
535
|
-
predictions: List[str],
|
536
|
-
references: List[str]) -> Dict[str, float]:
|
537
|
-
"""Compute aggregate semantic metrics."""
|
538
|
-
semantic_scores = []
|
539
|
-
|
540
|
-
for pred, ref in zip(predictions, references):
|
541
|
-
sample_metrics = self._compute_semantic_metrics(pred, ref)
|
542
|
-
semantic_scores.append(sample_metrics.get("semantic_score", 0.0))
|
543
|
-
|
544
|
-
return {
|
545
|
-
"avg_semantic_similarity": np.mean(semantic_scores) if semantic_scores else 0.0,
|
546
|
-
"semantic_understanding_score": np.mean(semantic_scores) if semantic_scores else 0.0
|
547
|
-
}
|
548
|
-
|
549
|
-
def get_supported_metrics(self) -> List[str]:
|
550
|
-
"""Get list of metrics supported by this evaluator."""
|
551
|
-
base_metrics = [
|
552
|
-
"exact_match", "f1_score", "bleu_score", "rouge_l",
|
553
|
-
"char_accuracy", "word_accuracy", "line_accuracy"
|
554
|
-
]
|
555
|
-
|
556
|
-
task_specific_metrics = {
|
557
|
-
"ocr": ["char_accuracy", "word_accuracy", "ocr_score"],
|
558
|
-
"table": ["structure_similarity", "table_extraction_score"],
|
559
|
-
"ui": ["element_detection_accuracy", "ui_detection_score"],
|
560
|
-
"vqa": ["semantic_similarity", "semantic_understanding_score"],
|
561
|
-
"caption": ["semantic_similarity", "semantic_understanding_score"]
|
562
|
-
}
|
563
|
-
|
564
|
-
return base_metrics + task_specific_metrics.get(self.task_type, [])
|