PyPI - isa-model - Versions diffs - 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

isa-model 0.3.91py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

isa_model/client.py +1166 -584
isa_model/core/cache/redis_cache.py +410 -0
isa_model/core/config/config_manager.py +282 -12
isa_model/core/config.py +91 -1
isa_model/core/database/__init__.py +1 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +297 -0
isa_model/core/database/supabase_client.py +258 -0
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +46 -0
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_billing_tracker.py +60 -88
isa_model/core/models/model_manager.py +66 -25
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +217 -55
isa_model/core/models/model_statistics_tracker.py +234 -0
isa_model/core/models/model_storage.py +0 -1
isa_model/core/models/model_version_manager.py +959 -0
isa_model/core/models/system_models.py +857 -0
isa_model/core/pricing_manager.py +2 -249
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/resilience/circuit_breaker.py +366 -0
isa_model/core/security/secrets.py +358 -0
isa_model/core/services/__init__.py +2 -4
isa_model/core/services/intelligent_model_selector.py +479 -370
isa_model/core/storage/hf_storage.py +2 -2
isa_model/core/types.py +8 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -368
isa_model/deployment/local/__init__.py +31 -0
isa_model/deployment/local/config.py +248 -0
isa_model/deployment/local/gpu_gateway.py +607 -0
isa_model/deployment/local/health_checker.py +428 -0
isa_model/deployment/local/provider.py +586 -0
isa_model/deployment/local/tensorrt_service.py +621 -0
isa_model/deployment/local/transformers_service.py +644 -0
isa_model/deployment/local/vllm_service.py +527 -0
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/modal/deployer.py +894 -0
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +179 -16
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/__init__.py +21 -0
isa_model/inference/services/audio/base_realtime_service.py +225 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/isa_tts_service.py +0 -0
isa_model/inference/services/audio/openai_realtime_service.py +320 -124
isa_model/inference/services/audio/openai_stt_service.py +53 -11
isa_model/inference/services/base_service.py +17 -1
isa_model/inference/services/custom_model_manager.py +277 -0
isa_model/inference/services/embedding/__init__.py +13 -0
isa_model/inference/services/embedding/base_embed_service.py +111 -8
isa_model/inference/services/embedding/isa_embed_service.py +305 -0
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/openai_embed_service.py +2 -4
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
isa_model/inference/services/img/__init__.py +2 -2
isa_model/inference/services/img/base_image_gen_service.py +24 -7
isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
isa_model/inference/services/img/services/replicate_flux.py +226 -0
isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
isa_model/inference/services/img/tests/test_img_client.py +297 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +361 -26
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/local_llm_service.py +747 -0
isa_model/inference/services/llm/ollama_llm_service.py +11 -3
isa_model/inference/services/llm/openai_llm_service.py +670 -56
isa_model/inference/services/llm/yyds_llm_service.py +10 -3
isa_model/inference/services/vision/__init__.py +27 -6
isa_model/inference/services/vision/base_vision_service.py +118 -185
isa_model/inference/services/vision/blip_vision_service.py +359 -0
isa_model/inference/services/vision/helpers/image_utils.py +19 -10
isa_model/inference/services/vision/isa_vision_service.py +634 -0
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +240 -18
isa_model/serving/api/middleware/auth.py +317 -0
isa_model/serving/api/middleware/security.py +268 -0
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +489 -0
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +475 -0
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/logs.py +430 -0
isa_model/serving/api/routes/settings.py +582 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +992 -171
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +318 -0
isa_model/serving/modal_proxy_server.py +249 -0
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
isa_model-0.4.3.dist-info/RECORD +193 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
isa_model/deployment/cloud/modal/register_models.py +0 -321
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks.py +0 -469
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -18
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/factory.py +0 -531
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/metrics.py +0 -798
isa_model/inference/adapter/unified_api.py +0 -248
isa_model/inference/services/helpers/stacked_config.py +0 -148
isa_model/inference/services/img/flux_professional_service.py +0 -603
isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/others/table_transformer_service.py +0 -61
isa_model/inference/services/vision/doc_analysis_service.py +0 -640
isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/vision/ui_analysis_service.py +0 -823
isa_model/scripts/inference_tracker.py +0 -283
isa_model/scripts/mlflow_manager.py +0 -379
isa_model/scripts/model_registry.py +0 -465
isa_model/scripts/register_models.py +0 -370
isa_model/scripts/register_models_with_embeddings.py +0 -510
isa_model/scripts/start_mlflow.py +0 -95
isa_model/scripts/training_tracker.py +0 -257
isa_model/training/__init__.py +0 -74
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -23
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/factory.py +0 -424
isa_model-0.3.91.dist-info/RECORD +0 -138
/isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0

isa_model/eval/factory.py DELETED Viewed

@@ -1,531 +0,0 @@
-"""
-Enterprise-Grade Evaluation Factory for ISA Model Framework
-Implements industry best practices for AI model evaluation at scale:
-- Async evaluation with concurrency control
-- Comprehensive experiment tracking (W&B, MLflow)
-- Distributed evaluation support
-- Production-ready monitoring and alerting
-- Cost tracking and optimization
-- Reproducible evaluation pipelines
-"""
-import asyncio
-import logging
-from typing import Optional, Dict, Any, List, Union, Callable
-from pathlib import Path
-import json
-from .evaluators import LLMEvaluator, VisionEvaluator, MultimodalEvaluator, EvaluationResult
-from .infrastructure import ExperimentTracker, create_experiment_tracker
-from .config import EvaluationConfig
-logger = logging.getLogger(__name__)
-class EvaluationFactory:
-    """
-    Enterprise-grade evaluation factory implementing MLOps best practices.
-    Features:
-    - Multi-modal evaluation support (LLM, Vision, Multimodal)
-    - Async evaluation with smart concurrency management
-    - Comprehensive experiment tracking and visualization
-    - Cost optimization and resource monitoring
-    - Distributed evaluation across multiple GPUs/nodes
-    - Production-ready error handling and retry logic
-    - Automated result storage and comparison
-    Example usage:
-        ```python
-        from isa_model.eval import EvaluationFactory
-        # Initialize with experiment tracking
-        factory = EvaluationFactory(
-            experiment_tracking={
-                "type": "wandb",
-                "project": "model-evaluation",
-                "entity": "my-team"
-            }
-        )
-        # Evaluate LLM on dataset
-        result = await factory.evaluate_llm(
-            model_name="gpt-4.1-mini",
-            provider="openai",
-            dataset_path="path/to/evaluation_data.json",
-            metrics=["accuracy", "f1_score", "bleu_score"],
-            save_results=True
-        )
-        # Run benchmark evaluation
-        benchmark_result = await factory.run_benchmark(
-            model_name="claude-sonnet-4",
-            provider="yyds",
-            benchmark_name="mmlu",
-            subjects=["math", "physics", "chemistry"]
-        )
-        # Compare multiple models
-        comparison = await factory.compare_models(
-            models=[
-                {"name": "gpt-4.1-mini", "provider": "openai"},
-                {"name": "claude-sonnet-4", "provider": "yyds"}
-            ],
-            dataset_path="comparison_dataset.json"
-        )
-        ```
-    """
-    def __init__(self,
-                 config: Optional[Union[Dict[str, Any], EvaluationConfig]] = None,
-                 experiment_tracking: Optional[Dict[str, Any]] = None,
-                 output_dir: Optional[str] = None):
-        """
-        Initialize the enterprise evaluation factory.
-        Args:
-            config: Evaluation configuration (dict or EvaluationConfig object)
-            experiment_tracking: Experiment tracking configuration
-            output_dir: Output directory for results
-        """
-        # Initialize configuration
-        if isinstance(config, dict):
-            self.config = EvaluationConfig.from_dict(config)
-        elif isinstance(config, EvaluationConfig):
-            self.config = config
-        else:
-            self.config = EvaluationConfig()
-        # Override output directory if provided
-        if output_dir:
-            self.config.output_dir = output_dir
-        # Initialize experiment tracker
-        self.experiment_tracker = None
-        if experiment_tracking:
-            try:
-                self.experiment_tracker = create_experiment_tracker(**experiment_tracking)
-                logger.info(f"Initialized experiment tracking: {experiment_tracking['type']}")
-            except Exception as e:
-                logger.warning(f"Failed to initialize experiment tracking: {e}")
-        # Initialize evaluators
-        self.llm_evaluator = LLMEvaluator(
-            config=self.config.to_dict(),
-            experiment_tracker=self.experiment_tracker
-        )
-        # State tracking
-        self._active_evaluations: Dict[str, asyncio.Task] = {}
-        logger.info(f"EvaluationFactory initialized with output dir: {self.config.output_dir}")
-    async def evaluate_llm(self,
-                          model_name: str,
-                          provider: str = "openai",
-                          dataset_path: Optional[str] = None,
-                          dataset: Optional[List[Dict[str, Any]]] = None,
-                          metrics: Optional[List[str]] = None,
-                          batch_size: Optional[int] = None,
-                          save_results: bool = True,
-                          experiment_name: Optional[str] = None,
-                          progress_callback: Optional[Callable] = None) -> EvaluationResult:
-        """
-        Evaluate LLM with comprehensive metrics and tracking.
-        Args:
-            model_name: Name of the model to evaluate
-            provider: Model provider (openai, yyds, ollama, etc.)
-            dataset_path: Path to evaluation dataset JSON file
-            dataset: Direct dataset input (alternative to dataset_path)
-            metrics: List of metrics to compute
-            batch_size: Batch size for evaluation
-            save_results: Whether to save results to disk
-            experiment_name: Custom experiment name
-            progress_callback: Optional progress callback function
-        Returns:
-            Comprehensive evaluation results
-        """
-        # Load dataset
-        if dataset is None:
-            if dataset_path is None:
-                raise ValueError("Either dataset_path or dataset must be provided")
-            dataset = self._load_dataset(dataset_path)
-        # Configure LLM evaluator
-        llm_config = {
-            "provider": provider,
-            "model_name": model_name,
-            "batch_size": batch_size or self.config.batch_size,
-            "temperature": self.config.default_temperature,
-            "max_tokens": self.config.default_max_tokens
-        }
-        self.llm_evaluator.config.update(llm_config)
-        # Generate experiment name
-        dataset_name = Path(dataset_path).stem if dataset_path else "custom_dataset"
-        experiment_name = experiment_name or f"llm_eval_{model_name}_{dataset_name}"
-        # Run evaluation
-        result = await self.llm_evaluator.evaluate(
-            model_interface=None,  # Will use AI factory
-            dataset=dataset,
-            dataset_name=dataset_name,
-            model_name=f"{provider}:{model_name}",
-            batch_size=batch_size,
-            progress_callback=progress_callback
-        )
-        # Save results if requested
-        if save_results:
-            await self._save_results(result, experiment_name)
-        return result
-    async def run_benchmark(self,
-                          model_name: str,
-                          provider: str,
-                          benchmark_name: str,
-                          subjects: Optional[List[str]] = None,
-                          max_samples: Optional[int] = None,
-                          few_shot: bool = True,
-                          num_shots: int = 5,
-                          save_results: bool = True,
-                          experiment_name: Optional[str] = None) -> EvaluationResult:
-        """
-        Run standardized benchmark evaluation.
-        Args:
-            model_name: Name of the model to evaluate
-            provider: Model provider
-            benchmark_name: Name of benchmark (mmlu, hellaswag, arc, gsm8k, etc.)
-            subjects: List of subjects to evaluate (for MMLU)
-            max_samples: Maximum number of samples to evaluate
-            few_shot: Whether to use few-shot examples
-            num_shots: Number of few-shot examples
-            save_results: Whether to save results
-            experiment_name: Custom experiment name
-        Returns:
-            Benchmark evaluation results
-        """
-        # Load benchmark dataset
-        benchmark_dataset = await self._load_benchmark(
-            benchmark_name,
-            subjects=subjects,
-            max_samples=max_samples,
-            few_shot=few_shot,
-            num_shots=num_shots
-        )
-        # Configure for benchmark evaluation
-        benchmark_config = {
-            "provider": provider,
-            "model_name": model_name,
-            "temperature": 0.0,  # Deterministic for benchmarks
-            "max_tokens": 50,    # Short answers for most benchmarks
-            "task_type": "benchmark",
-            "benchmark_name": benchmark_name
-        }
-        self.llm_evaluator.config.update(benchmark_config)
-        # Generate experiment name
-        experiment_name = experiment_name or f"benchmark_{benchmark_name}_{model_name}"
-        # Run evaluation
-        result = await self.llm_evaluator.evaluate(
-            model_interface=None,
-            dataset=benchmark_dataset,
-            dataset_name=benchmark_name,
-            model_name=f"{provider}:{model_name}",
-            batch_size=self.config.batch_size
-        )
-        # Add benchmark-specific metadata
-        result.config.update({
-            "benchmark_name": benchmark_name,
-            "subjects": subjects,
-            "few_shot": few_shot,
-            "num_shots": num_shots
-        })
-        # Save results if requested
-        if save_results:
-            await self._save_results(result, experiment_name)
-        return result
-    async def compare_models(self,
-                           models: List[Dict[str, str]],
-                           dataset_path: Optional[str] = None,
-                           dataset: Optional[List[Dict[str, Any]]] = None,
-                           benchmark_name: Optional[str] = None,
-                           metrics: Optional[List[str]] = None,
-                           save_results: bool = True,
-                           experiment_name: Optional[str] = None) -> Dict[str, EvaluationResult]:
-        """
-        Compare multiple models on the same evaluation task.
-        Args:
-            models: List of model configs [{"name": "gpt-4", "provider": "openai"}, ...]
-            dataset_path: Path to evaluation dataset
-            dataset: Direct dataset input
-            benchmark_name: Benchmark name (alternative to dataset)
-            metrics: Metrics to compute
-            save_results: Whether to save comparison results
-            experiment_name: Custom experiment name
-        Returns:
-            Dictionary mapping model names to evaluation results
-        """
-        results = {}
-        # Run evaluations concurrently (with concurrency limits)
-        semaphore = asyncio.Semaphore(self.config.max_concurrent_evaluations)
-        async def evaluate_single_model(model_config: Dict[str, str]) -> tuple:
-            async with semaphore:
-                model_name = model_config["name"]
-                provider = model_config["provider"]
-                if benchmark_name:
-                    result = await self.run_benchmark(
-                        model_name=model_name,
-                        provider=provider,
-                        benchmark_name=benchmark_name,
-                        save_results=False  # Save comparison results together
-                    )
-                else:
-                    result = await self.evaluate_llm(
-                        model_name=model_name,
-                        provider=provider,
-                        dataset_path=dataset_path,
-                        dataset=dataset,
-                        metrics=metrics,
-                        save_results=False
-                    )
-                return f"{provider}:{model_name}", result
-        # Execute all evaluations
-        tasks = [evaluate_single_model(model) for model in models]
-        evaluation_results = await asyncio.gather(*tasks)
-        # Collect results
-        for model_id, result in evaluation_results:
-            results[model_id] = result
-        # Generate comparison report
-        comparison_report = self._generate_comparison_report(results)
-        # Save results if requested
-        if save_results:
-            experiment_name = experiment_name or f"model_comparison_{len(models)}_models"
-            await self._save_comparison_results(results, comparison_report, experiment_name)
-        return results
-    def _load_dataset(self, dataset_path: str) -> List[Dict[str, Any]]:
-        """Load dataset from file."""
-        with open(dataset_path, 'r', encoding='utf-8') as f:
-            if dataset_path.endswith('.json'):
-                dataset = json.load(f)
-            elif dataset_path.endswith('.jsonl'):
-                dataset = [json.loads(line) for line in f]
-            else:
-                raise ValueError(f"Unsupported dataset format: {dataset_path}")
-        logger.info(f"Loaded dataset with {len(dataset)} samples from {dataset_path}")
-        return dataset
-    async def _load_benchmark(self,
-                            benchmark_name: str,
-                            subjects: Optional[List[str]] = None,
-                            max_samples: Optional[int] = None,
-                            few_shot: bool = True,
-                            num_shots: int = 5) -> List[Dict[str, Any]]:
-        """Load benchmark dataset."""
-        # This would integrate with the benchmark loaders
-        # For now, return a placeholder
-        logger.warning(f"Benchmark {benchmark_name} loading not yet implemented")
-        # Placeholder benchmark data
-        return [
-            {
-                "id": f"sample_{i}",
-                "prompt": f"Sample question {i} for {benchmark_name}",
-                "reference": "A",
-                "choices": ["A", "B", "C", "D"] if benchmark_name != "gsm8k" else None
-            }
-            for i in range(min(max_samples or 10, 10))
-        ]
-    async def _save_results(self, result: EvaluationResult, experiment_name: str) -> None:
-        """Save evaluation results to disk."""
-        # Create output directory
-        output_dir = Path(self.config.output_dir) / experiment_name
-        output_dir.mkdir(parents=True, exist_ok=True)
-        # Save main results
-        results_path = output_dir / "results.json"
-        result.save_to_file(results_path)
-        # Save detailed predictions if available
-        if result.sample_results:
-            predictions_path = output_dir / "predictions.json"
-            with open(predictions_path, 'w', encoding='utf-8') as f:
-                json.dump(result.sample_results, f, indent=2, ensure_ascii=False)
-        # Save summary
-        summary_path = output_dir / "summary.json"
-        with open(summary_path, 'w', encoding='utf-8') as f:
-            json.dump(result.get_summary(), f, indent=2, ensure_ascii=False)
-        logger.info(f"Saved evaluation results to {output_dir}")
-    async def _save_comparison_results(self,
-                                     results: Dict[str, EvaluationResult],
-                                     comparison_report: Dict[str, Any],
-                                     experiment_name: str) -> None:
-        """Save model comparison results."""
-        output_dir = Path(self.config.output_dir) / experiment_name
-        output_dir.mkdir(parents=True, exist_ok=True)
-        # Save individual results
-        for model_id, result in results.items():
-            model_dir = output_dir / model_id.replace(":", "_")
-            model_dir.mkdir(exist_ok=True)
-            result.save_to_file(model_dir / "results.json")
-        # Save comparison report
-        comparison_path = output_dir / "comparison_report.json"
-        with open(comparison_path, 'w', encoding='utf-8') as f:
-            json.dump(comparison_report, f, indent=2, ensure_ascii=False)
-        logger.info(f"Saved comparison results to {output_dir}")
-    def _generate_comparison_report(self, results: Dict[str, EvaluationResult]) -> Dict[str, Any]:
-        """Generate comparison report from multiple model results."""
-        report = {
-            "models_compared": list(results.keys()),
-            "comparison_timestamp": results[list(results.keys())[0]].timestamp,
-            "metric_comparison": {},
-            "rankings": {},
-            "best_model_per_metric": {}
-        }
-        # Extract all metrics
-        all_metrics = set()
-        for result in results.values():
-            all_metrics.update(result.metrics.keys())
-        # Compare each metric
-        for metric in all_metrics:
-            metric_values = {}
-            for model_id, result in results.items():
-                if metric in result.metrics:
-                    metric_values[model_id] = result.metrics[metric]
-            if metric_values:
-                # Determine if higher is better
-                higher_is_better = metric not in ["perplexity", "loss", "error_rate"]
-                # Find best model
-                best_model = max(metric_values.items(), key=lambda x: x[1]) if higher_is_better else min(metric_values.items(), key=lambda x: x[1])
-                # Create ranking
-                sorted_models = sorted(metric_values.items(), key=lambda x: x[1], reverse=higher_is_better)
-                report["metric_comparison"][metric] = metric_values
-                report["rankings"][metric] = [{"model": model, "value": value} for model, value in sorted_models]
-                report["best_model_per_metric"][metric] = {"model": best_model[0], "value": best_model[1]}
-        return report
-    def get_configuration(self) -> Dict[str, Any]:
-        """Get current factory configuration."""
-        return self.config.to_dict()
-    def get_active_evaluations(self) -> List[str]:
-        """Get list of currently running evaluations."""
-        return list(self._active_evaluations.keys())
-    async def stop_evaluation(self, evaluation_id: str) -> bool:
-        """Stop a running evaluation."""
-        if evaluation_id in self._active_evaluations:
-            task = self._active_evaluations[evaluation_id]
-            task.cancel()
-            del self._active_evaluations[evaluation_id]
-            logger.info(f"Stopped evaluation: {evaluation_id}")
-            return True
-        return False
-    async def cleanup(self) -> None:
-        """Cleanup resources and stop all running evaluations."""
-        # Cancel all active evaluations
-        for evaluation_id in list(self._active_evaluations.keys()):
-            await self.stop_evaluation(evaluation_id)
-        # Close experiment tracker
-        if self.experiment_tracker and self.experiment_tracker.is_running:
-            await self.experiment_tracker.end_run()
-        logger.info("EvaluationFactory cleanup completed")
-# Convenience functions for quick evaluation
-async def evaluate_llm_quick(model_name: str,
-                           provider: str,
-                           dataset_path: str,
-                           metrics: Optional[List[str]] = None) -> EvaluationResult:
-    """
-    Quick LLM evaluation function.
-    Args:
-        model_name: Name of the model
-        provider: Model provider
-        dataset_path: Path to dataset
-        metrics: Metrics to compute
-    Returns:
-        Evaluation results
-    """
-    factory = EvaluationFactory()
-    try:
-        return await factory.evaluate_llm(
-            model_name=model_name,
-            provider=provider,
-            dataset_path=dataset_path,
-            metrics=metrics
-        )
-    finally:
-        await factory.cleanup()
-async def run_benchmark_quick(model_name: str,
-                            provider: str,
-                            benchmark_name: str) -> EvaluationResult:
-    """
-    Quick benchmark evaluation function.
-    Args:
-        model_name: Name of the model
-        provider: Model provider
-        benchmark_name: Benchmark name
-    Returns:
-        Benchmark results
-    """
-    factory = EvaluationFactory()
-    try:
-        return await factory.run_benchmark(
-            model_name=model_name,
-            provider=provider,
-            benchmark_name=benchmark_name
-        )
-    finally:
-        await factory.cleanup()

isa_model/eval/infrastructure/__init__.py DELETED Viewed

@@ -1,24 +0,0 @@
-"""
-Infrastructure components for evaluation framework.
-Provides robust infrastructure for production-scale evaluation:
-- Async execution and concurrency management
-- Distributed evaluation support
-- Experiment tracking integration
-- Result storage and caching
-- Resource monitoring
-"""
-from .experiment_tracker import ExperimentTracker, WandBTracker, MLflowTracker
-from .async_runner import AsyncEvaluationRunner
-from .result_storage import ResultStorage
-from .cache_manager import CacheManager
-__all__ = [
-    "ExperimentTracker",
-    "WandBTracker",
-    "MLflowTracker",
-    "AsyncEvaluationRunner",
-    "ResultStorage",
-    "CacheManager"
-]

isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

isa-model 0.3.91py3-none-any.whl → 0.4.3py3-none-any.whl