isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,396 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Unit tests for basic ISA Model evaluation framework functionality.
|
3
|
-
|
4
|
-
This test file focuses on core functionality without complex dependencies.
|
5
|
-
"""
|
6
|
-
|
7
|
-
import pytest
|
8
|
-
import asyncio
|
9
|
-
from dataclasses import dataclass, field
|
10
|
-
from typing import Dict, List, Any
|
11
|
-
from abc import ABC, abstractmethod
|
12
|
-
|
13
|
-
|
14
|
-
@dataclass
|
15
|
-
class MockEvaluationResult:
|
16
|
-
"""Mock evaluation result for testing."""
|
17
|
-
metrics: Dict[str, float] = field(default_factory=dict)
|
18
|
-
predictions: List[Any] = field(default_factory=list)
|
19
|
-
references: List[Any] = field(default_factory=list)
|
20
|
-
|
21
|
-
def to_dict(self):
|
22
|
-
"""Convert to dictionary."""
|
23
|
-
return {
|
24
|
-
"metrics": self.metrics,
|
25
|
-
"predictions": self.predictions,
|
26
|
-
"references": self.references
|
27
|
-
}
|
28
|
-
|
29
|
-
|
30
|
-
class MockBaseEvaluator(ABC):
|
31
|
-
"""Mock base evaluator for testing."""
|
32
|
-
|
33
|
-
def __init__(self, config: Dict = None):
|
34
|
-
self.config = config or {}
|
35
|
-
|
36
|
-
@abstractmethod
|
37
|
-
async def evaluate(self, model_interface, dataset, **kwargs):
|
38
|
-
pass
|
39
|
-
|
40
|
-
|
41
|
-
class TestEvaluationResult:
|
42
|
-
"""Test the EvaluationResult data structure."""
|
43
|
-
|
44
|
-
def test_evaluation_result_creation(self):
|
45
|
-
"""Test basic EvaluationResult creation and properties."""
|
46
|
-
result = MockEvaluationResult(
|
47
|
-
metrics={"accuracy": 0.85, "f1_score": 0.78},
|
48
|
-
predictions=["response1", "response2"],
|
49
|
-
references=["expected1", "expected2"]
|
50
|
-
)
|
51
|
-
|
52
|
-
assert result.metrics["accuracy"] == 0.85
|
53
|
-
assert result.metrics["f1_score"] == 0.78
|
54
|
-
assert len(result.predictions) == 2
|
55
|
-
assert len(result.references) == 2
|
56
|
-
|
57
|
-
def test_evaluation_result_default_values(self):
|
58
|
-
"""Test EvaluationResult with default values."""
|
59
|
-
result = MockEvaluationResult()
|
60
|
-
|
61
|
-
assert isinstance(result.metrics, dict)
|
62
|
-
assert isinstance(result.predictions, list)
|
63
|
-
assert isinstance(result.references, list)
|
64
|
-
assert len(result.metrics) == 0
|
65
|
-
assert len(result.predictions) == 0
|
66
|
-
assert len(result.references) == 0
|
67
|
-
|
68
|
-
def test_evaluation_result_to_dict(self):
|
69
|
-
"""Test EvaluationResult serialization."""
|
70
|
-
result = MockEvaluationResult(
|
71
|
-
metrics={"accuracy": 0.9},
|
72
|
-
predictions=["test"],
|
73
|
-
references=["expected"]
|
74
|
-
)
|
75
|
-
|
76
|
-
result_dict = result.to_dict()
|
77
|
-
assert isinstance(result_dict, dict)
|
78
|
-
assert "metrics" in result_dict
|
79
|
-
assert result_dict["metrics"]["accuracy"] == 0.9
|
80
|
-
|
81
|
-
|
82
|
-
class MockModelInterface:
|
83
|
-
"""Mock model interface for testing."""
|
84
|
-
|
85
|
-
def __init__(self, responses: List[str] = None):
|
86
|
-
self.responses = responses or ["mock response"]
|
87
|
-
self.call_count = 0
|
88
|
-
|
89
|
-
async def generate_response(self, prompt: str, **kwargs) -> str:
|
90
|
-
"""Mock response generation."""
|
91
|
-
response = self.responses[self.call_count % len(self.responses)]
|
92
|
-
self.call_count += 1
|
93
|
-
await asyncio.sleep(0.01) # Simulate async processing
|
94
|
-
return response
|
95
|
-
|
96
|
-
|
97
|
-
class TestBasicMetrics:
|
98
|
-
"""Test basic metric calculation functions."""
|
99
|
-
|
100
|
-
def test_exact_match_metric(self):
|
101
|
-
"""Test exact match calculation."""
|
102
|
-
predictions = ["Paris", "London", "Berlin"]
|
103
|
-
references = ["Paris", "Madrid", "Berlin"]
|
104
|
-
|
105
|
-
def calculate_exact_match(pred_list, ref_list):
|
106
|
-
"""Simple exact match implementation."""
|
107
|
-
matches = sum(1 for p, r in zip(pred_list, ref_list)
|
108
|
-
if p.strip().lower() == r.strip().lower())
|
109
|
-
return matches / len(pred_list)
|
110
|
-
|
111
|
-
accuracy = calculate_exact_match(predictions, references)
|
112
|
-
assert accuracy == 2/3 # 2 out of 3 matches
|
113
|
-
|
114
|
-
def test_f1_score_calculation(self):
|
115
|
-
"""Test F1 score calculation."""
|
116
|
-
predictions = ["The cat sits", "A dog runs"]
|
117
|
-
references = ["The cat sits on mat", "The dog runs fast"]
|
118
|
-
|
119
|
-
def calculate_f1_score(pred_list, ref_list):
|
120
|
-
"""Simple token-based F1 calculation."""
|
121
|
-
total_f1 = 0
|
122
|
-
for pred, ref in zip(pred_list, ref_list):
|
123
|
-
pred_tokens = set(pred.lower().split())
|
124
|
-
ref_tokens = set(ref.lower().split())
|
125
|
-
|
126
|
-
if len(pred_tokens) == 0 and len(ref_tokens) == 0:
|
127
|
-
f1 = 1.0
|
128
|
-
elif len(pred_tokens) == 0 or len(ref_tokens) == 0:
|
129
|
-
f1 = 0.0
|
130
|
-
else:
|
131
|
-
intersection = len(pred_tokens & ref_tokens)
|
132
|
-
precision = intersection / len(pred_tokens)
|
133
|
-
recall = intersection / len(ref_tokens)
|
134
|
-
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
|
135
|
-
|
136
|
-
total_f1 += f1
|
137
|
-
|
138
|
-
return total_f1 / len(pred_list)
|
139
|
-
|
140
|
-
f1 = calculate_f1_score(predictions, references)
|
141
|
-
assert isinstance(f1, float)
|
142
|
-
assert 0 <= f1 <= 1
|
143
|
-
|
144
|
-
|
145
|
-
class TestBasicEvaluator:
|
146
|
-
"""Test basic evaluator functionality."""
|
147
|
-
|
148
|
-
@pytest.fixture
|
149
|
-
def mock_config(self):
|
150
|
-
"""Create a mock evaluation config."""
|
151
|
-
return {
|
152
|
-
"batch_size": 2,
|
153
|
-
"max_concurrent_requests": 3,
|
154
|
-
"timeout_seconds": 30
|
155
|
-
}
|
156
|
-
|
157
|
-
@pytest.fixture
|
158
|
-
def sample_dataset(self):
|
159
|
-
"""Create a sample dataset for testing."""
|
160
|
-
return [
|
161
|
-
{
|
162
|
-
"id": "test_1",
|
163
|
-
"prompt": "What is 2+2?",
|
164
|
-
"expected_output": "4",
|
165
|
-
"metadata": {"category": "math"}
|
166
|
-
},
|
167
|
-
{
|
168
|
-
"id": "test_2",
|
169
|
-
"input": "Name the capital of France",
|
170
|
-
"expected_output": "Paris",
|
171
|
-
"metadata": {"category": "geography"}
|
172
|
-
},
|
173
|
-
{
|
174
|
-
"id": "test_3",
|
175
|
-
"prompt": "What color is the sky?",
|
176
|
-
"expected_output": "blue",
|
177
|
-
"metadata": {"category": "general"}
|
178
|
-
}
|
179
|
-
]
|
180
|
-
|
181
|
-
def test_evaluator_initialization(self, mock_config):
|
182
|
-
"""Test basic evaluator initialization."""
|
183
|
-
class TestEvaluator(MockBaseEvaluator):
|
184
|
-
async def evaluate(self, model_interface, dataset, **kwargs):
|
185
|
-
return MockEvaluationResult()
|
186
|
-
|
187
|
-
evaluator = TestEvaluator(config=mock_config)
|
188
|
-
assert evaluator.config["batch_size"] == 2
|
189
|
-
assert evaluator.config["max_concurrent_requests"] == 3
|
190
|
-
|
191
|
-
@pytest.mark.asyncio
|
192
|
-
async def test_mock_evaluation_workflow(self, sample_dataset, mock_config):
|
193
|
-
"""Test basic evaluation workflow with mocked components."""
|
194
|
-
|
195
|
-
class TestEvaluator(MockBaseEvaluator):
|
196
|
-
async def evaluate(self, model_interface, dataset, **kwargs):
|
197
|
-
"""Simple evaluation implementation for testing."""
|
198
|
-
predictions = []
|
199
|
-
references = []
|
200
|
-
|
201
|
-
for item in dataset:
|
202
|
-
# Mock model call
|
203
|
-
response = await model_interface.generate_response(
|
204
|
-
item.get("prompt", item.get("input", ""))
|
205
|
-
)
|
206
|
-
predictions.append(response)
|
207
|
-
references.append(item["expected_output"])
|
208
|
-
|
209
|
-
# Calculate simple accuracy
|
210
|
-
matches = sum(1 for p, r in zip(predictions, references)
|
211
|
-
if p.strip().lower() == r.strip().lower())
|
212
|
-
accuracy = matches / len(predictions) if predictions else 0
|
213
|
-
|
214
|
-
return MockEvaluationResult(
|
215
|
-
metrics={"accuracy": accuracy, "total_samples": len(dataset)},
|
216
|
-
predictions=predictions,
|
217
|
-
references=references
|
218
|
-
)
|
219
|
-
|
220
|
-
# Create evaluator and mock model
|
221
|
-
evaluator = TestEvaluator(config=mock_config)
|
222
|
-
model_interface = MockModelInterface(responses=["4", "Paris", "blue"])
|
223
|
-
|
224
|
-
# Run evaluation
|
225
|
-
result = await evaluator.evaluate(
|
226
|
-
model_interface=model_interface,
|
227
|
-
dataset=sample_dataset,
|
228
|
-
dataset_name="test_dataset"
|
229
|
-
)
|
230
|
-
|
231
|
-
# Verify results
|
232
|
-
assert isinstance(result, MockEvaluationResult)
|
233
|
-
assert "accuracy" in result.metrics
|
234
|
-
assert "total_samples" in result.metrics
|
235
|
-
assert result.metrics["total_samples"] == 3
|
236
|
-
assert result.metrics["accuracy"] == 1.0 # All mock responses match expected
|
237
|
-
assert len(result.predictions) == 3
|
238
|
-
assert len(result.references) == 3
|
239
|
-
|
240
|
-
|
241
|
-
class TestEvaluationConfig:
|
242
|
-
"""Test evaluation configuration functionality."""
|
243
|
-
|
244
|
-
def test_config_creation(self):
|
245
|
-
"""Test basic config creation."""
|
246
|
-
config_data = {
|
247
|
-
"batch_size": 16,
|
248
|
-
"max_concurrent_requests": 5,
|
249
|
-
"timeout_seconds": 60,
|
250
|
-
"output_dir": "test_results"
|
251
|
-
}
|
252
|
-
|
253
|
-
class MockConfig:
|
254
|
-
def __init__(self, **kwargs):
|
255
|
-
for k, v in kwargs.items():
|
256
|
-
setattr(self, k, v)
|
257
|
-
|
258
|
-
config = MockConfig(**config_data)
|
259
|
-
assert config.batch_size == 16
|
260
|
-
assert config.output_dir == "test_results"
|
261
|
-
|
262
|
-
def test_config_validation(self):
|
263
|
-
"""Test config validation logic."""
|
264
|
-
def validate_config(config_dict):
|
265
|
-
"""Validate configuration values."""
|
266
|
-
if config_dict.get("batch_size", 1) <= 0:
|
267
|
-
raise ValueError("batch_size must be positive")
|
268
|
-
if config_dict.get("max_concurrent_requests", 1) <= 0:
|
269
|
-
raise ValueError("max_concurrent_requests must be positive")
|
270
|
-
if config_dict.get("timeout_seconds", 1) <= 0:
|
271
|
-
raise ValueError("timeout_seconds must be positive")
|
272
|
-
return True
|
273
|
-
|
274
|
-
# Test valid config
|
275
|
-
valid_config = {"batch_size": 10, "max_concurrent_requests": 5, "timeout_seconds": 60}
|
276
|
-
assert validate_config(valid_config) is True
|
277
|
-
|
278
|
-
# Test invalid configs
|
279
|
-
invalid_configs = [
|
280
|
-
{"batch_size": -1},
|
281
|
-
{"max_concurrent_requests": 0},
|
282
|
-
{"timeout_seconds": -5}
|
283
|
-
]
|
284
|
-
|
285
|
-
for invalid_config in invalid_configs:
|
286
|
-
with pytest.raises(ValueError):
|
287
|
-
validate_config(invalid_config)
|
288
|
-
|
289
|
-
|
290
|
-
class TestAsyncEvaluation:
|
291
|
-
"""Test asynchronous evaluation capabilities."""
|
292
|
-
|
293
|
-
@pytest.mark.asyncio
|
294
|
-
async def test_concurrent_evaluation(self):
|
295
|
-
"""Test that evaluations can run concurrently."""
|
296
|
-
async def mock_evaluation_task(task_id: int, delay: float = 0.1):
|
297
|
-
"""Mock evaluation task with delay."""
|
298
|
-
await asyncio.sleep(delay)
|
299
|
-
return {"task_id": task_id, "result": f"completed_{task_id}"}
|
300
|
-
|
301
|
-
# Run multiple evaluations concurrently
|
302
|
-
start_time = asyncio.get_event_loop().time()
|
303
|
-
|
304
|
-
tasks = [mock_evaluation_task(i, 0.1) for i in range(3)]
|
305
|
-
results = await asyncio.gather(*tasks)
|
306
|
-
|
307
|
-
end_time = asyncio.get_event_loop().time()
|
308
|
-
|
309
|
-
# Should complete in roughly 0.1 seconds (concurrent) rather than 0.3 (sequential)
|
310
|
-
assert end_time - start_time < 0.2
|
311
|
-
assert len(results) == 3
|
312
|
-
assert all(r["result"].startswith("completed_") for r in results)
|
313
|
-
|
314
|
-
@pytest.mark.asyncio
|
315
|
-
async def test_batch_processing(self):
|
316
|
-
"""Test batch processing functionality."""
|
317
|
-
async def process_batch(batch: List[Dict], batch_size: int = 2):
|
318
|
-
"""Process items in batches."""
|
319
|
-
results = []
|
320
|
-
for i in range(0, len(batch), batch_size):
|
321
|
-
batch_items = batch[i:i + batch_size]
|
322
|
-
# Simulate processing time proportional to batch size
|
323
|
-
await asyncio.sleep(0.01 * len(batch_items))
|
324
|
-
batch_results = [{"processed": item["id"]} for item in batch_items]
|
325
|
-
results.extend(batch_results)
|
326
|
-
return results
|
327
|
-
|
328
|
-
# Test data
|
329
|
-
test_items = [{"id": f"item_{i}"} for i in range(5)]
|
330
|
-
|
331
|
-
# Process in batches
|
332
|
-
results = await process_batch(test_items, batch_size=2)
|
333
|
-
|
334
|
-
assert len(results) == 5
|
335
|
-
assert all("processed" in r for r in results)
|
336
|
-
|
337
|
-
|
338
|
-
class TestErrorHandling:
|
339
|
-
"""Test error handling and edge cases."""
|
340
|
-
|
341
|
-
@pytest.mark.asyncio
|
342
|
-
async def test_timeout_handling(self):
|
343
|
-
"""Test timeout handling in async operations."""
|
344
|
-
async def slow_operation():
|
345
|
-
"""Simulate a slow operation."""
|
346
|
-
await asyncio.sleep(1.0)
|
347
|
-
return "completed"
|
348
|
-
|
349
|
-
# Test timeout
|
350
|
-
with pytest.raises(asyncio.TimeoutError):
|
351
|
-
await asyncio.wait_for(slow_operation(), timeout=0.1)
|
352
|
-
|
353
|
-
def test_empty_dataset_handling(self):
|
354
|
-
"""Test handling of empty datasets."""
|
355
|
-
def calculate_metrics(predictions, references):
|
356
|
-
"""Calculate metrics with empty data handling."""
|
357
|
-
if not predictions or not references:
|
358
|
-
return {"accuracy": 0.0, "count": 0}
|
359
|
-
|
360
|
-
matches = sum(1 for p, r in zip(predictions, references) if p == r)
|
361
|
-
return {
|
362
|
-
"accuracy": matches / len(predictions),
|
363
|
-
"count": len(predictions)
|
364
|
-
}
|
365
|
-
|
366
|
-
# Test empty data
|
367
|
-
empty_metrics = calculate_metrics([], [])
|
368
|
-
assert empty_metrics["accuracy"] == 0.0
|
369
|
-
assert empty_metrics["count"] == 0
|
370
|
-
|
371
|
-
def test_mismatched_data_lengths(self):
|
372
|
-
"""Test handling of mismatched prediction and reference lengths."""
|
373
|
-
def safe_calculate_accuracy(predictions, references):
|
374
|
-
"""Safely calculate accuracy with length mismatch handling."""
|
375
|
-
if len(predictions) != len(references):
|
376
|
-
min_len = min(len(predictions), len(references))
|
377
|
-
predictions = predictions[:min_len]
|
378
|
-
references = references[:min_len]
|
379
|
-
|
380
|
-
if not predictions:
|
381
|
-
return 0.0
|
382
|
-
|
383
|
-
matches = sum(1 for p, r in zip(predictions, references) if p == r)
|
384
|
-
return matches / len(predictions)
|
385
|
-
|
386
|
-
# Test mismatched lengths
|
387
|
-
predictions = ["a", "b", "c"]
|
388
|
-
references = ["a", "b"] # Shorter
|
389
|
-
|
390
|
-
accuracy = safe_calculate_accuracy(predictions, references)
|
391
|
-
assert accuracy == 1.0 # Both "a" and "b" match
|
392
|
-
|
393
|
-
|
394
|
-
if __name__ == "__main__":
|
395
|
-
# Allow running tests directly
|
396
|
-
pytest.main([__file__, "-v"])
|