isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +937 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +257 -601
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -17
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
- isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
- isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +492 -40
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +51 -17
- isa_model/inference/services/llm/openai_llm_service.py +70 -19
- isa_model/inference/services/llm/yyds_llm_service.py +24 -23
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +218 -117
- isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
- isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +104 -307
- isa_model/inference/services/vision/replicate_vision_service.py +140 -325
- isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/api/fastapi_server.py +6 -1
- isa_model/serving/api/routes/unified.py +274 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
- isa_model/config/__init__.py +0 -9
- isa_model/config/config_manager.py +0 -213
- isa_model/core/model_manager.py +0 -213
- isa_model/core/model_registry.py +0 -375
- isa_model/core/vision_models_init.py +0 -116
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/stacked/__init__.py +0 -26
- isa_model/inference/services/stacked/config.py +0 -426
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,275 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Simple Auto-Deploy Vision Service Wrapper
|
4
|
+
|
5
|
+
A simplified version that avoids complex import dependencies.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
import subprocess
|
10
|
+
import logging
|
11
|
+
import time
|
12
|
+
from typing import Dict, Any, Optional, Union, List, BinaryIO
|
13
|
+
from pathlib import Path
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
class SimpleAutoDeployVisionService:
|
18
|
+
"""
|
19
|
+
Simplified vision service wrapper that handles automatic deployment
|
20
|
+
of Modal services for ISA vision tasks without complex inheritance.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, model_name: str = "isa_vision_ui", config: dict = None):
|
24
|
+
self.model_name = model_name
|
25
|
+
self.config = config or {}
|
26
|
+
self.underlying_service = None
|
27
|
+
self._factory = None
|
28
|
+
self._modal_deployed = False
|
29
|
+
|
30
|
+
logger.info(f"Initialized SimpleAutoDeployVisionService for {model_name}")
|
31
|
+
|
32
|
+
def _get_factory(self):
|
33
|
+
"""Get AIFactory instance for service management"""
|
34
|
+
if not self._factory:
|
35
|
+
from isa_model.inference.ai_factory import AIFactory
|
36
|
+
self._factory = AIFactory()
|
37
|
+
return self._factory
|
38
|
+
|
39
|
+
async def _ensure_service_deployed(self) -> bool:
|
40
|
+
"""Ensure the Modal service is deployed before use"""
|
41
|
+
if self._modal_deployed:
|
42
|
+
logger.info(f"Service {self.model_name} already deployed")
|
43
|
+
return True
|
44
|
+
|
45
|
+
try:
|
46
|
+
factory = self._get_factory()
|
47
|
+
|
48
|
+
# Check if service is available
|
49
|
+
app_name = factory._get_modal_app_name(self.model_name)
|
50
|
+
if not factory._check_modal_service_availability(app_name):
|
51
|
+
logger.info(f"Deploying {self.model_name} service...")
|
52
|
+
success = factory._auto_deploy_modal_service(self.model_name)
|
53
|
+
if not success:
|
54
|
+
logger.error(f"Failed to deploy {self.model_name}")
|
55
|
+
return False
|
56
|
+
|
57
|
+
# Wait for service to be ready
|
58
|
+
logger.info(f"Waiting for {self.model_name} service to be ready...")
|
59
|
+
await self._wait_for_service_ready(app_name)
|
60
|
+
|
61
|
+
# Mark as deployed
|
62
|
+
self._modal_deployed = True
|
63
|
+
|
64
|
+
# Initialize underlying service using proper factory method
|
65
|
+
if not self.underlying_service:
|
66
|
+
# Create a simple mock service for testing
|
67
|
+
self.underlying_service = MockModalVisionService(self.model_name)
|
68
|
+
|
69
|
+
return True
|
70
|
+
|
71
|
+
except Exception as e:
|
72
|
+
logger.error(f"Failed to ensure service deployment: {e}")
|
73
|
+
return False
|
74
|
+
|
75
|
+
async def _wait_for_service_ready(self, app_name: str, max_wait_time: int = 300):
|
76
|
+
"""Wait for Modal service to be ready"""
|
77
|
+
logger.info(f"Waiting up to {max_wait_time} seconds for {app_name} to be ready...")
|
78
|
+
start_time = time.time()
|
79
|
+
|
80
|
+
while time.time() - start_time < max_wait_time:
|
81
|
+
try:
|
82
|
+
# Simple wait simulation
|
83
|
+
await asyncio.sleep(5)
|
84
|
+
logger.info(f"Still waiting for {app_name}... ({int(time.time() - start_time)}s elapsed)")
|
85
|
+
|
86
|
+
# For testing, assume service is ready after 10 seconds
|
87
|
+
if time.time() - start_time > 10:
|
88
|
+
logger.info(f"Service {app_name} assumed ready for testing!")
|
89
|
+
return
|
90
|
+
|
91
|
+
except Exception as e:
|
92
|
+
logger.debug(f"Service not ready yet: {e}")
|
93
|
+
|
94
|
+
logger.warning(f"Service {app_name} may not be fully ready after {max_wait_time}s")
|
95
|
+
|
96
|
+
async def detect_ui_elements(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
97
|
+
"""Detect UI elements with auto-deploy"""
|
98
|
+
|
99
|
+
# Ensure service is deployed
|
100
|
+
if not await self._ensure_service_deployed():
|
101
|
+
return {
|
102
|
+
'success': False,
|
103
|
+
'error': f'Failed to deploy {self.model_name} service',
|
104
|
+
'service': self.model_name
|
105
|
+
}
|
106
|
+
|
107
|
+
try:
|
108
|
+
# Call the underlying service (mock for testing)
|
109
|
+
logger.info(f"Calling UI detection service for {self.model_name}")
|
110
|
+
result = await self.underlying_service.detect_ui_elements(image)
|
111
|
+
|
112
|
+
return result
|
113
|
+
|
114
|
+
except Exception as e:
|
115
|
+
logger.error(f"UI detection failed: {e}")
|
116
|
+
return {
|
117
|
+
'success': False,
|
118
|
+
'error': str(e),
|
119
|
+
'service': self.model_name
|
120
|
+
}
|
121
|
+
|
122
|
+
async def analyze_image(
|
123
|
+
self,
|
124
|
+
image: Union[str, BinaryIO],
|
125
|
+
prompt: Optional[str] = None,
|
126
|
+
max_tokens: int = 1000
|
127
|
+
) -> Dict[str, Any]:
|
128
|
+
"""Analyze image with auto-deploy"""
|
129
|
+
if not await self._ensure_service_deployed():
|
130
|
+
return {
|
131
|
+
'success': False,
|
132
|
+
'error': f'Failed to deploy {self.model_name} service',
|
133
|
+
'service': self.model_name
|
134
|
+
}
|
135
|
+
|
136
|
+
try:
|
137
|
+
result = await self.underlying_service.analyze_image(image, prompt, max_tokens)
|
138
|
+
return result
|
139
|
+
except Exception as e:
|
140
|
+
logger.error(f"Image analysis failed: {e}")
|
141
|
+
return {
|
142
|
+
'success': False,
|
143
|
+
'error': str(e),
|
144
|
+
'service': self.model_name
|
145
|
+
}
|
146
|
+
|
147
|
+
async def invoke(
|
148
|
+
self,
|
149
|
+
image: Union[str, BinaryIO],
|
150
|
+
prompt: Optional[str] = None,
|
151
|
+
task: Optional[str] = None,
|
152
|
+
**kwargs
|
153
|
+
) -> Dict[str, Any]:
|
154
|
+
"""Unified invoke method for all vision operations"""
|
155
|
+
if not await self._ensure_service_deployed():
|
156
|
+
return {
|
157
|
+
'success': False,
|
158
|
+
'error': f'Failed to deploy {self.model_name} service',
|
159
|
+
'service': self.model_name
|
160
|
+
}
|
161
|
+
|
162
|
+
try:
|
163
|
+
# Route to appropriate method based on task
|
164
|
+
if task == "detect_ui_elements" or task == "ui_detection":
|
165
|
+
return await self.detect_ui_elements(image)
|
166
|
+
elif task == "analyze" or task is None:
|
167
|
+
return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
|
168
|
+
else:
|
169
|
+
return await self.underlying_service.invoke(image, prompt, task, **kwargs)
|
170
|
+
except Exception as e:
|
171
|
+
logger.error(f"Vision invoke failed: {e}")
|
172
|
+
return {
|
173
|
+
'success': False,
|
174
|
+
'error': str(e),
|
175
|
+
'service': self.model_name
|
176
|
+
}
|
177
|
+
|
178
|
+
def get_supported_formats(self) -> List[str]:
|
179
|
+
"""Get list of supported image formats"""
|
180
|
+
return ['jpg', 'jpeg', 'png', 'gif', 'webp']
|
181
|
+
|
182
|
+
def get_max_image_size(self) -> Dict[str, int]:
|
183
|
+
"""Get maximum supported image dimensions"""
|
184
|
+
return {"width": 2048, "height": 2048, "file_size_mb": 10}
|
185
|
+
|
186
|
+
async def close(self):
|
187
|
+
"""Cleanup resources"""
|
188
|
+
if self.underlying_service:
|
189
|
+
await self.underlying_service.close()
|
190
|
+
logger.info(f"Closed {self.model_name} service")
|
191
|
+
|
192
|
+
|
193
|
+
class MockModalVisionService:
|
194
|
+
"""Mock Modal vision service for testing"""
|
195
|
+
|
196
|
+
def __init__(self, model_name: str):
|
197
|
+
self.model_name = model_name
|
198
|
+
logger.info(f"Initialized mock service for {model_name}")
|
199
|
+
|
200
|
+
async def detect_ui_elements(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
201
|
+
"""Mock UI element detection"""
|
202
|
+
await asyncio.sleep(0.1) # Simulate processing time
|
203
|
+
|
204
|
+
# Return mock UI elements based on model type
|
205
|
+
if "ui" in self.model_name:
|
206
|
+
ui_elements = [
|
207
|
+
{
|
208
|
+
'id': 'ui_0',
|
209
|
+
'type': 'button',
|
210
|
+
'content': 'Search Button',
|
211
|
+
'center': [400, 200],
|
212
|
+
'bbox': [350, 180, 450, 220],
|
213
|
+
'confidence': 0.95,
|
214
|
+
'interactable': True
|
215
|
+
},
|
216
|
+
{
|
217
|
+
'id': 'ui_1',
|
218
|
+
'type': 'input',
|
219
|
+
'content': 'Search Input',
|
220
|
+
'center': [300, 150],
|
221
|
+
'bbox': [200, 130, 400, 170],
|
222
|
+
'confidence': 0.88,
|
223
|
+
'interactable': True
|
224
|
+
}
|
225
|
+
]
|
226
|
+
else:
|
227
|
+
ui_elements = []
|
228
|
+
|
229
|
+
return {
|
230
|
+
'success': True,
|
231
|
+
'service': self.model_name,
|
232
|
+
'ui_elements': ui_elements,
|
233
|
+
'element_count': len(ui_elements),
|
234
|
+
'processing_time': 0.1,
|
235
|
+
'detection_method': 'mock_omniparser',
|
236
|
+
'model_info': {
|
237
|
+
'primary': 'Mock OmniParser v2.0',
|
238
|
+
'gpu': 'T4',
|
239
|
+
'container_id': 'mock-container'
|
240
|
+
}
|
241
|
+
}
|
242
|
+
|
243
|
+
async def analyze_image(
|
244
|
+
self,
|
245
|
+
image: Union[str, BinaryIO],
|
246
|
+
prompt: Optional[str] = None,
|
247
|
+
max_tokens: int = 1000
|
248
|
+
) -> Dict[str, Any]:
|
249
|
+
"""Mock image analysis"""
|
250
|
+
await asyncio.sleep(0.1)
|
251
|
+
|
252
|
+
return {
|
253
|
+
'success': True,
|
254
|
+
'service': self.model_name,
|
255
|
+
'text': f'Mock analysis of image with prompt: {prompt}',
|
256
|
+
'confidence': 0.9,
|
257
|
+
'processing_time': 0.1
|
258
|
+
}
|
259
|
+
|
260
|
+
async def invoke(
|
261
|
+
self,
|
262
|
+
image: Union[str, BinaryIO],
|
263
|
+
prompt: Optional[str] = None,
|
264
|
+
task: Optional[str] = None,
|
265
|
+
**kwargs
|
266
|
+
) -> Dict[str, Any]:
|
267
|
+
"""Mock invoke method"""
|
268
|
+
if task == "detect_ui_elements":
|
269
|
+
return await self.detect_ui_elements(image)
|
270
|
+
else:
|
271
|
+
return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
|
272
|
+
|
273
|
+
async def close(self):
|
274
|
+
"""Mock cleanup"""
|
275
|
+
pass
|
isa_model/eval/__init__.py
CHANGED
@@ -1,56 +1,92 @@
|
|
1
1
|
"""
|
2
2
|
ISA Model Evaluation Framework
|
3
3
|
|
4
|
-
|
5
|
-
- LLM evaluation (perplexity, BLEU, ROUGE, custom metrics)
|
6
|
-
- Image model evaluation (FID, IS, LPIPS)
|
7
|
-
- Benchmark testing (MMLU, HellaSwag, ARC, etc.)
|
8
|
-
- Custom evaluation pipelines
|
4
|
+
Enterprise-grade evaluation framework implementing MLOps best practices:
|
9
5
|
|
10
|
-
|
6
|
+
Key Features:
|
7
|
+
- Multi-modal evaluation (LLM, Vision, Multimodal)
|
8
|
+
- Async evaluation with smart concurrency management
|
9
|
+
- Comprehensive experiment tracking (W&B, MLflow)
|
10
|
+
- Production-ready error handling and monitoring
|
11
|
+
- Distributed evaluation support
|
12
|
+
- Cost tracking and optimization
|
13
|
+
- Reproducible evaluation pipelines
|
14
|
+
|
15
|
+
Quick Start:
|
16
|
+
```python
|
17
|
+
import asyncio
|
11
18
|
from isa_model.eval import EvaluationFactory
|
12
19
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
20
|
+
async def main():
|
21
|
+
# Initialize factory with experiment tracking
|
22
|
+
factory = EvaluationFactory(
|
23
|
+
experiment_tracking={
|
24
|
+
"type": "wandb",
|
25
|
+
"project": "model-evaluation"
|
26
|
+
}
|
27
|
+
)
|
28
|
+
|
29
|
+
# Evaluate LLM
|
30
|
+
result = await factory.evaluate_llm(
|
31
|
+
model_name="gpt-4.1-mini",
|
32
|
+
provider="openai",
|
33
|
+
dataset_path="eval_data.json",
|
34
|
+
save_results=True
|
35
|
+
)
|
36
|
+
|
37
|
+
print(f"Accuracy: {result.metrics['exact_match']:.3f}")
|
38
|
+
|
39
|
+
# Cleanup
|
40
|
+
await factory.cleanup()
|
22
41
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
42
|
+
asyncio.run(main())
|
43
|
+
```
|
44
|
+
|
45
|
+
Architecture:
|
46
|
+
- evaluators/: Specialized evaluators by modality
|
47
|
+
- infrastructure/: Experiment tracking, async runners, storage
|
48
|
+
- config/: Configuration management
|
49
|
+
- metrics/: Metric computation by type
|
50
|
+
- benchmarks/: Standard benchmark implementations
|
51
|
+
- utils/: Data processing and visualization utilities
|
28
52
|
"""
|
29
53
|
|
30
|
-
|
31
|
-
from .
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
)
|
54
|
+
# Main interfaces
|
55
|
+
from .factory import EvaluationFactory, evaluate_llm_quick, run_benchmark_quick
|
56
|
+
|
57
|
+
# Core components
|
58
|
+
from .evaluators import BaseEvaluator, EvaluationResult, LLMEvaluator
|
59
|
+
from .config import EvaluationConfig, ConfigManager
|
60
|
+
|
61
|
+
# Infrastructure (optional imports)
|
62
|
+
try:
|
63
|
+
from .infrastructure import ExperimentTracker, WandBTracker, MLflowTracker
|
64
|
+
INFRASTRUCTURE_AVAILABLE = True
|
65
|
+
except ImportError:
|
66
|
+
INFRASTRUCTURE_AVAILABLE = False
|
44
67
|
|
45
68
|
__all__ = [
|
69
|
+
# Main interfaces
|
46
70
|
"EvaluationFactory",
|
47
|
-
"
|
48
|
-
"
|
49
|
-
|
50
|
-
|
51
|
-
"
|
52
|
-
"
|
53
|
-
"
|
54
|
-
"
|
55
|
-
"
|
56
|
-
]
|
71
|
+
"evaluate_llm_quick",
|
72
|
+
"run_benchmark_quick",
|
73
|
+
|
74
|
+
# Core components
|
75
|
+
"BaseEvaluator",
|
76
|
+
"EvaluationResult",
|
77
|
+
"LLMEvaluator",
|
78
|
+
"EvaluationConfig",
|
79
|
+
"ConfigManager"
|
80
|
+
]
|
81
|
+
|
82
|
+
# Add infrastructure components if available
|
83
|
+
if INFRASTRUCTURE_AVAILABLE:
|
84
|
+
__all__.extend([
|
85
|
+
"ExperimentTracker",
|
86
|
+
"WandBTracker",
|
87
|
+
"MLflowTracker"
|
88
|
+
])
|
89
|
+
|
90
|
+
# Version info
|
91
|
+
__version__ = "1.0.0"
|
92
|
+
__author__ = "ISA Model Team"
|
@@ -0,0 +1,108 @@
|
|
1
|
+
"""
|
2
|
+
Configuration management for evaluation framework
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
import json
|
7
|
+
import logging
|
8
|
+
from typing import Dict, Any, Optional, List
|
9
|
+
from dataclasses import dataclass, asdict
|
10
|
+
from pathlib import Path
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class EvaluationConfig:
|
17
|
+
"""
|
18
|
+
Configuration class for evaluation settings.
|
19
|
+
"""
|
20
|
+
|
21
|
+
# General settings
|
22
|
+
output_dir: str = "evaluation_results"
|
23
|
+
max_concurrent_evaluations: int = 3
|
24
|
+
timeout_seconds: int = 600
|
25
|
+
|
26
|
+
# Model settings
|
27
|
+
default_provider: str = "openai"
|
28
|
+
default_max_tokens: int = 150
|
29
|
+
default_temperature: float = 0.1
|
30
|
+
batch_size: int = 8
|
31
|
+
|
32
|
+
# Metrics settings
|
33
|
+
compute_all_metrics: bool = False
|
34
|
+
custom_metrics: List[str] = None
|
35
|
+
|
36
|
+
# Benchmark settings
|
37
|
+
max_samples_per_benchmark: Optional[int] = None
|
38
|
+
enable_few_shot: bool = True
|
39
|
+
num_shots: int = 5
|
40
|
+
|
41
|
+
# Experiment tracking
|
42
|
+
use_wandb: bool = False
|
43
|
+
wandb_project: Optional[str] = None
|
44
|
+
wandb_entity: Optional[str] = None
|
45
|
+
use_mlflow: bool = False
|
46
|
+
mlflow_tracking_uri: Optional[str] = None
|
47
|
+
|
48
|
+
# Results settings
|
49
|
+
save_predictions: bool = True
|
50
|
+
save_detailed_results: bool = True
|
51
|
+
export_format: str = "json" # json, csv, html
|
52
|
+
|
53
|
+
def __post_init__(self):
|
54
|
+
"""Initialize default values after creation."""
|
55
|
+
if self.custom_metrics is None:
|
56
|
+
self.custom_metrics = []
|
57
|
+
|
58
|
+
# Ensure output directory exists
|
59
|
+
os.makedirs(self.output_dir, exist_ok=True)
|
60
|
+
|
61
|
+
@classmethod
|
62
|
+
def from_dict(cls, config_dict: Dict[str, Any]) -> 'EvaluationConfig':
|
63
|
+
"""
|
64
|
+
Create configuration from dictionary.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
config_dict: Configuration dictionary
|
68
|
+
|
69
|
+
Returns:
|
70
|
+
EvaluationConfig instance
|
71
|
+
"""
|
72
|
+
# Filter out unknown keys
|
73
|
+
valid_keys = {field.name for field in cls.__dataclass_fields__.values()}
|
74
|
+
filtered_dict = {k: v for k, v in config_dict.items() if k in valid_keys}
|
75
|
+
|
76
|
+
return cls(**filtered_dict)
|
77
|
+
|
78
|
+
def to_dict(self) -> Dict[str, Any]:
|
79
|
+
"""
|
80
|
+
Convert configuration to dictionary.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
Configuration as dictionary
|
84
|
+
"""
|
85
|
+
return asdict(self)
|
86
|
+
|
87
|
+
|
88
|
+
class ConfigManager:
|
89
|
+
"""Manager for handling multiple evaluation configurations."""
|
90
|
+
|
91
|
+
def __init__(self, config_dir: str = "configs"):
|
92
|
+
"""Initialize configuration manager."""
|
93
|
+
self.config_dir = config_dir
|
94
|
+
self.configs: Dict[str, EvaluationConfig] = {}
|
95
|
+
self.default_config = EvaluationConfig()
|
96
|
+
|
97
|
+
# Ensure config directory exists
|
98
|
+
os.makedirs(config_dir, exist_ok=True)
|
99
|
+
|
100
|
+
def get_config(self, config_name: Optional[str] = None) -> EvaluationConfig:
|
101
|
+
"""Get configuration by name."""
|
102
|
+
if config_name is None:
|
103
|
+
return self.default_config
|
104
|
+
|
105
|
+
if config_name in self.configs:
|
106
|
+
return self.configs[config_name]
|
107
|
+
|
108
|
+
return self.default_config
|
@@ -0,0 +1,18 @@
|
|
1
|
+
"""
|
2
|
+
Evaluators module for ISA Model Framework
|
3
|
+
|
4
|
+
Provides specialized evaluators for different model types and evaluation tasks.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from .base_evaluator import BaseEvaluator, EvaluationResult
|
8
|
+
from .llm_evaluator import LLMEvaluator
|
9
|
+
from .vision_evaluator import VisionEvaluator
|
10
|
+
from .multimodal_evaluator import MultimodalEvaluator
|
11
|
+
|
12
|
+
__all__ = [
|
13
|
+
"BaseEvaluator",
|
14
|
+
"EvaluationResult",
|
15
|
+
"LLMEvaluator",
|
16
|
+
"VisionEvaluator",
|
17
|
+
"MultimodalEvaluator"
|
18
|
+
]
|