PyPI - isa-model - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

isa_model/client.py +466 -43
isa_model/core/cache/redis_cache.py +12 -3
isa_model/core/config/config_manager.py +230 -3
isa_model/core/config.py +90 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +21 -1
isa_model/core/database/supabase_client.py +154 -19
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +27 -18
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_manager.py +35 -80
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +174 -18
isa_model/core/models/system_models.py +857 -0
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/services/intelligent_model_selector.py +399 -21
isa_model/core/types.py +1 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -370
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +137 -10
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/openai_stt_service.py +22 -6
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +335 -24
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/ollama_llm_service.py +9 -2
isa_model/inference/services/llm/openai_llm_service.py +33 -16
isa_model/inference/services/llm/yyds_llm_service.py +8 -2
isa_model/inference/services/vision/__init__.py +22 -1
isa_model/inference/services/vision/helpers/image_utils.py +8 -5
isa_model/inference/services/vision/isa_vision_service.py +65 -4
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +172 -22
isa_model/serving/api/middleware/auth.py +8 -2
isa_model/serving/api/middleware/security.py +23 -33
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +4 -1
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +138 -2
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +680 -18
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +68 -54
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
isa_model-0.4.4.dist-info/RECORD +180 -0
isa_model/core/security/secrets.py +0 -358
isa_model/core/storage/hf_storage.py +0 -419
isa_model/core/storage/minio_storage.py +0 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks/__init__.py +0 -27
isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
isa_model/eval/benchmarks.py +0 -701
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -24
isa_model/eval/evaluators/audio_evaluator.py +0 -727
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/embedding_evaluator.py +0 -742
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/evaluators/vision_evaluator.py +0 -564
isa_model/eval/example_evaluation.py +0 -395
isa_model/eval/factory.py +0 -798
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/isa_benchmarks.py +0 -700
isa_model/eval/isa_integration.py +0 -582
isa_model/eval/metrics.py +0 -951
isa_model/eval/tests/unit/test_basic.py +0 -396
isa_model/serving/api/routes/evaluations.py +0 -579
isa_model/training/__init__.py +0 -168
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -26
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/examples/intelligent_training_example.py +0 -281
isa_model/training/factory.py +0 -424
isa_model/training/intelligent/__init__.py +0 -25
isa_model/training/intelligent/decision_engine.py +0 -643
isa_model/training/intelligent/intelligent_factory.py +0 -888
isa_model/training/intelligent/knowledge_base.py +0 -751
isa_model/training/intelligent/resource_optimizer.py +0 -839
isa_model/training/intelligent/task_classifier.py +0 -576
isa_model/training/storage/__init__.py +0 -24
isa_model/training/storage/core_integration.py +0 -439
isa_model/training/storage/training_repository.py +0 -552
isa_model/training/storage/training_storage.py +0 -628
isa_model-0.4.0.dist-info/RECORD +0 -182
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0

isa_model/eval/evaluators/base_evaluator.py DELETED Viewed

@@ -1,503 +0,0 @@
-"""
-Base evaluator class implementing industry best practices for AI model evaluation.
-Features:
-- Async/await support for concurrent evaluation
-- Comprehensive error handling and retry logic
-- Experiment tracking integration (W&B, MLflow)
-- Distributed evaluation support
-- Memory-efficient batch processing
-- Comprehensive logging and metrics
-"""
-import asyncio
-import logging
-import time
-import traceback
-from abc import ABC, abstractmethod
-from dataclasses import dataclass, field
-from typing import Dict, List, Any, Optional, Union, Callable, AsyncGenerator
-from datetime import datetime
-from pathlib import Path
-import json
-try:
-    import wandb
-    WANDB_AVAILABLE = True
-except ImportError:
-    WANDB_AVAILABLE = False
-try:
-    import mlflow
-    MLFLOW_AVAILABLE = True
-except ImportError:
-    MLFLOW_AVAILABLE = False
-logger = logging.getLogger(__name__)
-@dataclass
-class EvaluationResult:
-    """
-    Standardized evaluation result container.
-    Follows MLOps best practices for result tracking and reproducibility.
-    """
-    # Core results
-    metrics: Dict[str, float] = field(default_factory=dict)
-    predictions: List[Any] = field(default_factory=list)
-    references: List[Any] = field(default_factory=list)
-    # Metadata
-    model_name: str = ""
-    dataset_name: str = ""
-    evaluation_type: str = ""
-    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
-    # Performance metrics
-    total_samples: int = 0
-    successful_samples: int = 0
-    failed_samples: int = 0
-    evaluation_time_seconds: float = 0.0
-    throughput_samples_per_second: float = 0.0
-    # Cost and resource tracking
-    total_tokens_used: int = 0
-    estimated_cost_usd: float = 0.0
-    memory_peak_mb: float = 0.0
-    # Configuration
-    config: Dict[str, Any] = field(default_factory=dict)
-    environment_info: Dict[str, Any] = field(default_factory=dict)
-    # Error tracking
-    errors: List[Dict[str, Any]] = field(default_factory=list)
-    warnings: List[str] = field(default_factory=list)
-    # Detailed results
-    sample_results: List[Dict[str, Any]] = field(default_factory=list)
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary for serialization."""
-        return {
-            "metrics": self.metrics,
-            "predictions": self.predictions,
-            "references": self.references,
-            "model_name": self.model_name,
-            "dataset_name": self.dataset_name,
-            "evaluation_type": self.evaluation_type,
-            "timestamp": self.timestamp,
-            "total_samples": self.total_samples,
-            "successful_samples": self.successful_samples,
-            "failed_samples": self.failed_samples,
-            "evaluation_time_seconds": self.evaluation_time_seconds,
-            "throughput_samples_per_second": self.throughput_samples_per_second,
-            "total_tokens_used": self.total_tokens_used,
-            "estimated_cost_usd": self.estimated_cost_usd,
-            "memory_peak_mb": self.memory_peak_mb,
-            "config": self.config,
-            "environment_info": self.environment_info,
-            "errors": self.errors,
-            "warnings": self.warnings,
-            "sample_results": self.sample_results
-        }
-    def save_to_file(self, file_path: Union[str, Path]) -> None:
-        """Save results to JSON file."""
-        with open(file_path, 'w', encoding='utf-8') as f:
-            json.dump(self.to_dict(), f, indent=2, ensure_ascii=False)
-    @classmethod
-    def load_from_file(cls, file_path: Union[str, Path]) -> 'EvaluationResult':
-        """Load results from JSON file."""
-        with open(file_path, 'r', encoding='utf-8') as f:
-            data = json.load(f)
-        result = cls()
-        for key, value in data.items():
-            if hasattr(result, key):
-                setattr(result, key, value)
-        return result
-    def get_summary(self) -> Dict[str, Any]:
-        """Get evaluation summary."""
-        success_rate = self.successful_samples / self.total_samples if self.total_samples > 0 else 0.0
-        return {
-            "model_name": self.model_name,
-            "dataset_name": self.dataset_name,
-            "evaluation_type": self.evaluation_type,
-            "timestamp": self.timestamp,
-            "success_rate": success_rate,
-            "total_samples": self.total_samples,
-            "evaluation_time_seconds": self.evaluation_time_seconds,
-            "throughput_samples_per_second": self.throughput_samples_per_second,
-            "estimated_cost_usd": self.estimated_cost_usd,
-            "key_metrics": self.metrics,
-            "error_count": len(self.errors),
-            "warning_count": len(self.warnings)
-        }
-class BaseEvaluator(ABC):
-    """
-    Abstract base evaluator implementing industry best practices.
-    Features:
-    - Async evaluation with concurrency control
-    - Comprehensive error handling and retry logic
-    - Experiment tracking integration
-    - Memory-efficient batch processing
-    - Progress monitoring and cancellation support
-    """
-    def __init__(self,
-                 evaluator_name: str,
-                 config: Optional[Dict[str, Any]] = None,
-                 experiment_tracker: Optional[Any] = None):
-        """
-        Initialize the base evaluator.
-        Args:
-            evaluator_name: Name identifier for this evaluator
-            config: Evaluation configuration
-            experiment_tracker: Optional experiment tracking instance
-        """
-        self.evaluator_name = evaluator_name
-        self.config = config or {}
-        self.experiment_tracker = experiment_tracker
-        # State management
-        self._is_running = False
-        self._should_stop = False
-        self._current_result: Optional[EvaluationResult] = None
-        # Performance monitoring
-        self._start_time: Optional[float] = None
-        self._peak_memory_mb: float = 0.0
-        # Concurrency control
-        self.max_concurrent_requests = self.config.get("max_concurrent_requests", 10)
-        self.semaphore = asyncio.Semaphore(self.max_concurrent_requests)
-        # Retry configuration
-        self.max_retries = self.config.get("max_retries", 3)
-        self.retry_delay = self.config.get("retry_delay_seconds", 1.0)
-        logger.info(f"Initialized {evaluator_name} evaluator with config: {self.config}")
-    @abstractmethod
-    async def evaluate_sample(self,
-                            sample: Dict[str, Any],
-                            model_interface: Any) -> Dict[str, Any]:
-        """
-        Evaluate a single sample.
-        Args:
-            sample: Data sample to evaluate
-            model_interface: Model interface for inference
-        Returns:
-            Evaluation result for the sample
-        """
-        pass
-    @abstractmethod
-    def compute_metrics(self,
-                       predictions: List[Any],
-                       references: List[Any],
-                       **kwargs) -> Dict[str, float]:
-        """
-        Compute evaluation metrics.
-        Args:
-            predictions: Model predictions
-            references: Ground truth references
-            **kwargs: Additional parameters
-        Returns:
-            Dictionary of computed metrics
-        """
-        pass
-    async def evaluate(self,
-                      model_interface: Any,
-                      dataset: List[Dict[str, Any]],
-                      dataset_name: str = "unknown",
-                      model_name: str = "unknown",
-                      batch_size: Optional[int] = None,
-                      save_predictions: bool = True,
-                      progress_callback: Optional[Callable] = None) -> EvaluationResult:
-        """
-        Perform comprehensive evaluation with industry best practices.
-        Args:
-            model_interface: Model interface for inference
-            dataset: Dataset to evaluate on
-            dataset_name: Name of the dataset
-            model_name: Name of the model
-            batch_size: Batch size for processing
-            save_predictions: Whether to save individual predictions
-            progress_callback: Optional callback for progress updates
-        Returns:
-            Comprehensive evaluation results
-        """
-        # Initialize evaluation
-        self._start_evaluation()
-        result = EvaluationResult(
-            model_name=model_name,
-            dataset_name=dataset_name,
-            evaluation_type=self.evaluator_name,
-            config=self.config.copy(),
-            environment_info=self._get_environment_info()
-        )
-        try:
-            # Start experiment tracking
-            await self._start_experiment_tracking(model_name, dataset_name)
-            # Process dataset in batches
-            batch_size = batch_size or self.config.get("batch_size", 32)
-            total_batches = (len(dataset) + batch_size - 1) // batch_size
-            all_predictions = []
-            all_references = []
-            all_sample_results = []
-            for batch_idx in range(total_batches):
-                if self._should_stop:
-                    logger.info("Evaluation stopped by user request")
-                    break
-                # Get batch
-                start_idx = batch_idx * batch_size
-                end_idx = min(start_idx + batch_size, len(dataset))
-                batch = dataset[start_idx:end_idx]
-                # Process batch
-                batch_results = await self._process_batch(batch, model_interface)
-                # Collect results
-                for sample_result in batch_results:
-                    if sample_result.get("success", False):
-                        all_predictions.append(sample_result.get("prediction"))
-                        all_references.append(sample_result.get("reference"))
-                        result.successful_samples += 1
-                    else:
-                        result.failed_samples += 1
-                        result.errors.append({
-                            "sample_id": sample_result.get("sample_id"),
-                            "error": sample_result.get("error"),
-                            "timestamp": datetime.now().isoformat()
-                        })
-                    if save_predictions:
-                        all_sample_results.append(sample_result)
-                # Update progress
-                progress = (batch_idx + 1) / total_batches
-                if progress_callback:
-                    await progress_callback(progress, batch_idx + 1, total_batches)
-                # Log progress
-                if (batch_idx + 1) % 10 == 0 or batch_idx == total_batches - 1:
-                    logger.info(f"Processed {batch_idx + 1}/{total_batches} batches "
-                              f"({result.successful_samples} successful, {result.failed_samples} failed)")
-            # Compute final metrics
-            if all_predictions and all_references:
-                result.metrics = self.compute_metrics(all_predictions, all_references)
-                logger.info(f"Computed metrics: {result.metrics}")
-            else:
-                logger.warning("No valid predictions available for metric computation")
-                result.warnings.append("No valid predictions available for metric computation")
-            # Finalize results
-            result.predictions = all_predictions
-            result.references = all_references
-            result.sample_results = all_sample_results
-            result.total_samples = len(dataset)
-            # Log experiment results
-            await self._log_experiment_results(result)
-        except Exception as e:
-            logger.error(f"Evaluation failed: {e}")
-            logger.error(traceback.format_exc())
-            result.errors.append({
-                "error": str(e),
-                "error_type": type(e).__name__,
-                "traceback": traceback.format_exc(),
-                "timestamp": datetime.now().isoformat()
-            })
-        finally:
-            # Finalize evaluation
-            self._end_evaluation(result)
-            await self._end_experiment_tracking()
-            self._current_result = result
-        return result
-    async def _process_batch(self,
-                           batch: List[Dict[str, Any]],
-                           model_interface: Any) -> List[Dict[str, Any]]:
-        """Process a batch of samples with concurrency control."""
-        tasks = []
-        for sample in batch:
-            task = asyncio.create_task(
-                self._process_sample_with_retry(sample, model_interface)
-            )
-            tasks.append(task)
-        # Wait for all tasks in batch to complete
-        results = await asyncio.gather(*tasks, return_exceptions=True)
-        # Process results and handle exceptions
-        processed_results = []
-        for i, result in enumerate(results):
-            if isinstance(result, Exception):
-                processed_results.append({
-                    "sample_id": batch[i].get("id", f"sample_{i}"),
-                    "success": False,
-                    "error": str(result),
-                    "prediction": None,
-                    "reference": batch[i].get("reference")
-                })
-            else:
-                processed_results.append(result)
-        return processed_results
-    async def _process_sample_with_retry(self,
-                                       sample: Dict[str, Any],
-                                       model_interface: Any) -> Dict[str, Any]:
-        """Process a single sample with retry logic and concurrency control."""
-        async with self.semaphore:  # Limit concurrent requests
-            for attempt in range(self.max_retries + 1):
-                try:
-                    result = await self.evaluate_sample(sample, model_interface)
-                    result["success"] = True
-                    result["sample_id"] = sample.get("id", "unknown")
-                    result["reference"] = sample.get("reference")
-                    return result
-                except Exception as e:
-                    if attempt == self.max_retries:
-                        # Final attempt failed
-                        logger.error(f"Sample evaluation failed after {self.max_retries + 1} attempts: {e}")
-                        return {
-                            "sample_id": sample.get("id", "unknown"),
-                            "success": False,
-                            "error": str(e),
-                            "prediction": None,
-                            "reference": sample.get("reference")
-                        }
-                    else:
-                        # Retry with exponential backoff
-                        delay = self.retry_delay * (2 ** attempt)
-                        logger.warning(f"Sample evaluation failed (attempt {attempt + 1}), retrying in {delay}s: {e}")
-                        await asyncio.sleep(delay)
-    def _start_evaluation(self) -> None:
-        """Mark the start of evaluation."""
-        self._is_running = True
-        self._should_stop = False
-        self._start_time = time.time()
-        # Monitor memory usage
-        try:
-            import psutil
-            process = psutil.Process()
-            self._peak_memory_mb = process.memory_info().rss / 1024 / 1024
-        except ImportError:
-            pass
-    def _end_evaluation(self, result: EvaluationResult) -> None:
-        """Finalize evaluation with performance metrics."""
-        self._is_running = False
-        end_time = time.time()
-        if self._start_time:
-            result.evaluation_time_seconds = end_time - self._start_time
-            if result.total_samples > 0:
-                result.throughput_samples_per_second = result.total_samples / result.evaluation_time_seconds
-        result.memory_peak_mb = self._peak_memory_mb
-        logger.info(f"Evaluation completed in {result.evaluation_time_seconds:.2f}s "
-                   f"({result.throughput_samples_per_second:.2f} samples/sec)")
-    def _get_environment_info(self) -> Dict[str, Any]:
-        """Get environment information for reproducibility."""
-        import platform
-        import sys
-        env_info = {
-            "python_version": sys.version,
-            "platform": platform.platform(),
-            "hostname": platform.node(),
-            "timestamp": datetime.now().isoformat()
-        }
-        try:
-            import torch
-            env_info["torch_version"] = torch.__version__
-            env_info["cuda_available"] = torch.cuda.is_available()
-            if torch.cuda.is_available():
-                env_info["cuda_device_count"] = torch.cuda.device_count()
-                env_info["cuda_device_name"] = torch.cuda.get_device_name()
-        except ImportError:
-            pass
-        return env_info
-    async def _start_experiment_tracking(self, model_name: str, dataset_name: str) -> None:
-        """Start experiment tracking if available."""
-        if self.experiment_tracker:
-            try:
-                await self.experiment_tracker.start_run(
-                    name=f"{self.evaluator_name}_{model_name}_{dataset_name}",
-                    config=self.config
-                )
-            except Exception as e:
-                logger.warning(f"Failed to start experiment tracking: {e}")
-    async def _log_experiment_results(self, result: EvaluationResult) -> None:
-        """Log results to experiment tracker."""
-        if self.experiment_tracker:
-            try:
-                await self.experiment_tracker.log_metrics(result.metrics)
-                await self.experiment_tracker.log_params(result.config)
-            except Exception as e:
-                logger.warning(f"Failed to log experiment results: {e}")
-    async def _end_experiment_tracking(self) -> None:
-        """End experiment tracking."""
-        if self.experiment_tracker:
-            try:
-                await self.experiment_tracker.end_run()
-            except Exception as e:
-                logger.warning(f"Failed to end experiment tracking: {e}")
-    def stop_evaluation(self) -> None:
-        """Request evaluation to stop gracefully."""
-        self._should_stop = True
-        logger.info("Evaluation stop requested")
-    def is_running(self) -> bool:
-        """Check if evaluation is currently running."""
-        return self._is_running
-    def get_current_result(self) -> Optional[EvaluationResult]:
-        """Get the current/latest evaluation result."""
-        return self._current_result
-    def get_supported_metrics(self) -> List[str]:
-        """Get list of metrics supported by this evaluator."""
-        return []  # To be overridden by subclasses

isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl