PyPI - isa-model - Versions diffs - 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

isa-model 0.3.91py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

isa_model/client.py +1166 -584
isa_model/core/cache/redis_cache.py +410 -0
isa_model/core/config/config_manager.py +282 -12
isa_model/core/config.py +91 -1
isa_model/core/database/__init__.py +1 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +297 -0
isa_model/core/database/supabase_client.py +258 -0
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +46 -0
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_billing_tracker.py +60 -88
isa_model/core/models/model_manager.py +66 -25
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +217 -55
isa_model/core/models/model_statistics_tracker.py +234 -0
isa_model/core/models/model_storage.py +0 -1
isa_model/core/models/model_version_manager.py +959 -0
isa_model/core/models/system_models.py +857 -0
isa_model/core/pricing_manager.py +2 -249
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/resilience/circuit_breaker.py +366 -0
isa_model/core/security/secrets.py +358 -0
isa_model/core/services/__init__.py +2 -4
isa_model/core/services/intelligent_model_selector.py +479 -370
isa_model/core/storage/hf_storage.py +2 -2
isa_model/core/types.py +8 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -368
isa_model/deployment/local/__init__.py +31 -0
isa_model/deployment/local/config.py +248 -0
isa_model/deployment/local/gpu_gateway.py +607 -0
isa_model/deployment/local/health_checker.py +428 -0
isa_model/deployment/local/provider.py +586 -0
isa_model/deployment/local/tensorrt_service.py +621 -0
isa_model/deployment/local/transformers_service.py +644 -0
isa_model/deployment/local/vllm_service.py +527 -0
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/modal/deployer.py +894 -0
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +179 -16
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/__init__.py +21 -0
isa_model/inference/services/audio/base_realtime_service.py +225 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/isa_tts_service.py +0 -0
isa_model/inference/services/audio/openai_realtime_service.py +320 -124
isa_model/inference/services/audio/openai_stt_service.py +53 -11
isa_model/inference/services/base_service.py +17 -1
isa_model/inference/services/custom_model_manager.py +277 -0
isa_model/inference/services/embedding/__init__.py +13 -0
isa_model/inference/services/embedding/base_embed_service.py +111 -8
isa_model/inference/services/embedding/isa_embed_service.py +305 -0
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/openai_embed_service.py +2 -4
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
isa_model/inference/services/img/__init__.py +2 -2
isa_model/inference/services/img/base_image_gen_service.py +24 -7
isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
isa_model/inference/services/img/services/replicate_flux.py +226 -0
isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
isa_model/inference/services/img/tests/test_img_client.py +297 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +361 -26
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/local_llm_service.py +747 -0
isa_model/inference/services/llm/ollama_llm_service.py +11 -3
isa_model/inference/services/llm/openai_llm_service.py +670 -56
isa_model/inference/services/llm/yyds_llm_service.py +10 -3
isa_model/inference/services/vision/__init__.py +27 -6
isa_model/inference/services/vision/base_vision_service.py +118 -185
isa_model/inference/services/vision/blip_vision_service.py +359 -0
isa_model/inference/services/vision/helpers/image_utils.py +19 -10
isa_model/inference/services/vision/isa_vision_service.py +634 -0
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +240 -18
isa_model/serving/api/middleware/auth.py +317 -0
isa_model/serving/api/middleware/security.py +268 -0
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +489 -0
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +475 -0
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/logs.py +430 -0
isa_model/serving/api/routes/settings.py +582 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +992 -171
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +318 -0
isa_model/serving/modal_proxy_server.py +249 -0
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
isa_model-0.4.3.dist-info/RECORD +193 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
isa_model/deployment/cloud/modal/register_models.py +0 -321
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks.py +0 -469
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -18
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/factory.py +0 -531
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/metrics.py +0 -798
isa_model/inference/adapter/unified_api.py +0 -248
isa_model/inference/services/helpers/stacked_config.py +0 -148
isa_model/inference/services/img/flux_professional_service.py +0 -603
isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/others/table_transformer_service.py +0 -61
isa_model/inference/services/vision/doc_analysis_service.py +0 -640
isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/vision/ui_analysis_service.py +0 -823
isa_model/scripts/inference_tracker.py +0 -283
isa_model/scripts/mlflow_manager.py +0 -379
isa_model/scripts/model_registry.py +0 -465
isa_model/scripts/register_models.py +0 -370
isa_model/scripts/register_models_with_embeddings.py +0 -510
isa_model/scripts/start_mlflow.py +0 -95
isa_model/scripts/training_tracker.py +0 -257
isa_model/training/__init__.py +0 -74
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -23
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/factory.py +0 -424
isa_model-0.3.91.dist-info/RECORD +0 -138
/isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0

isa_model/eval/benchmarks.py DELETED Viewed

@@ -1,469 +0,0 @@
-"""
-Standard AI Benchmarks for ISA Model Framework
-This module provides implementations of standard AI benchmarks:
-- MMLU (Massive Multitask Language Understanding)
-- HellaSwag (Commonsense Reasoning)
-- ARC (AI2 Reasoning Challenge)
-- GSM8K (Grade School Math)
-"""
-import os
-import json
-import logging
-from typing import Dict, List, Any, Optional
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-logger = logging.getLogger(__name__)
-@dataclass
-class BenchmarkConfig:
-    """Configuration for benchmark evaluation."""
-    name: str
-    description: str
-    num_choices: int = 4
-    few_shot_examples: int = 5
-    max_samples: Optional[int] = None
-    subjects: Optional[List[str]] = None
-class BaseBenchmark(ABC):
-    """Base class for all benchmarks."""
-    def __init__(self, config: BenchmarkConfig):
-        self.config = config
-        self.name = config.name
-        self.data = None
-    @abstractmethod
-    def load_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
-        """Load benchmark data."""
-        pass
-    @abstractmethod
-    def evaluate_sample(self, sample: Dict[str, Any], prediction: str) -> bool:
-        """Evaluate a single sample."""
-        pass
-    def format_prompt(self, sample: Dict[str, Any], few_shot_examples: Optional[List[Dict[str, Any]]] = None) -> str:
-        """Format prompt for the sample."""
-        prompt = ""
-        # Add few-shot examples if provided
-        if few_shot_examples:
-            for example in few_shot_examples:
-                prompt += self._format_single_example(example, include_answer=True) + "\n\n"
-        # Add the actual question
-        prompt += self._format_single_example(sample, include_answer=False)
-        return prompt
-    @abstractmethod
-    def _format_single_example(self, sample: Dict[str, Any], include_answer: bool = False) -> str:
-        """Format a single example."""
-        pass
-class MMLU(BaseBenchmark):
-    """
-    MMLU (Massive Multitask Language Understanding) Benchmark
-    Tests knowledge across 57 subjects including mathematics, history,
-    computer science, law, and more.
-    """
-    def __init__(self, subjects: Optional[List[str]] = None):
-        config = BenchmarkConfig(
-            name="MMLU",
-            description="Massive Multitask Language Understanding",
-            num_choices=4,
-            few_shot_examples=5,
-            subjects=subjects
-        )
-        super().__init__(config)
-        # MMLU subjects
-        self.all_subjects = [
-            "abstract_algebra", "anatomy", "astronomy", "business_ethics",
-            "clinical_knowledge", "college_biology", "college_chemistry",
-            "college_computer_science", "college_mathematics", "college_medicine",
-            "college_physics", "computer_security", "conceptual_physics",
-            "econometrics", "electrical_engineering", "elementary_mathematics",
-            "formal_logic", "global_facts", "high_school_biology",
-            "high_school_chemistry", "high_school_computer_science",
-            "high_school_european_history", "high_school_geography",
-            "high_school_government_and_politics", "high_school_macroeconomics",
-            "high_school_mathematics", "high_school_microeconomics",
-            "high_school_physics", "high_school_psychology", "high_school_statistics",
-            "high_school_us_history", "high_school_world_history", "human_aging",
-            "human_sexuality", "international_law", "jurisprudence",
-            "logical_fallacies", "machine_learning", "management", "marketing",
-            "medical_genetics", "miscellaneous", "moral_disputes", "moral_scenarios",
-            "nutrition", "philosophy", "prehistory", "professional_accounting",
-            "professional_law", "professional_medicine", "professional_psychology",
-            "public_relations", "security_studies", "sociology", "us_foreign_policy",
-            "virology", "world_religions"
-        ]
-        self.subjects = subjects or self.all_subjects[:10]  # Use first 10 subjects by default
-    def load_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
-        """Load MMLU data (simplified implementation)."""
-        # This is a simplified implementation
-        # In practice, you'd load from the actual MMLU dataset
-        data = []
-        for subject in self.subjects:
-            # Generate sample questions for each subject
-            for i in range(min(10, max_samples // len(self.subjects) if max_samples else 10)):
-                sample = {
-                    "subject": subject,
-                    "question": f"Sample {subject} question {i+1}",
-                    "choices": [
-                        f"Option A for {subject}",
-                        f"Option B for {subject}",
-                        f"Option C for {subject}",
-                        f"Option D for {subject}"
-                    ],
-                    "answer": "A",  # Simplified
-                    "id": f"{subject}_{i}"
-                }
-                data.append(sample)
-        if max_samples:
-            data = data[:max_samples]
-        logger.info(f"Loaded {len(data)} MMLU samples across {len(self.subjects)} subjects")
-        return data
-    def evaluate_sample(self, sample: Dict[str, Any], prediction: str) -> bool:
-        """Evaluate a single MMLU sample."""
-        # Extract the letter choice from prediction
-        prediction = prediction.strip().upper()
-        # Handle various response formats
-        if prediction in ["A", "B", "C", "D"]:
-            return prediction == sample["answer"]
-        elif prediction.startswith("(") and prediction.endswith(")"):
-            letter = prediction[1]
-            return letter == sample["answer"]
-        else:
-            # Try to find A, B, C, or D in the response
-            for choice in ["A", "B", "C", "D"]:
-                if choice in prediction:
-                    return choice == sample["answer"]
-        return False
-    def _format_single_example(self, sample: Dict[str, Any], include_answer: bool = False) -> str:
-        """Format a single MMLU example."""
-        prompt = f"Subject: {sample['subject'].replace('_', ' ').title()}\n"
-        prompt += f"Question: {sample['question']}\n"
-        choices = sample['choices']
-        for i, choice in enumerate(choices):
-            letter = chr(65 + i)  # A, B, C, D
-            prompt += f"{letter}. {choice}\n"
-        if include_answer:
-            prompt += f"Answer: {sample['answer']}"
-        else:
-            prompt += "Answer:"
-        return prompt
-class HellaSwag(BaseBenchmark):
-    """
-    HellaSwag Benchmark
-    Tests commonsense reasoning about physical situations.
-    """
-    def __init__(self):
-        config = BenchmarkConfig(
-            name="HellaSwag",
-            description="Commonsense Reasoning about Physical Situations",
-            num_choices=4,
-            few_shot_examples=10
-        )
-        super().__init__(config)
-    def load_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
-        """Load HellaSwag data (simplified implementation)."""
-        # This is a simplified implementation
-        # In practice, you'd load from the actual HellaSwag dataset
-        data = []
-        sample_contexts = [
-            "A person is washing dishes in the kitchen",
-            "Someone is riding a bicycle down a hill",
-            "A chef is preparing ingredients for cooking",
-            "A student is taking notes in class",
-            "A gardener is planting flowers"
-        ]
-        for i, context in enumerate(sample_contexts):
-            if max_samples and i >= max_samples:
-                break
-            sample = {
-                "context": context,
-                "question": "What happens next?",
-                "choices": [
-                    f"They continue with the logical next step for scenario {i+1}",
-                    f"They do something completely unrelated to scenario {i+1}",
-                    f"They stop and do something random in scenario {i+1}",
-                    f"They repeat the same action in scenario {i+1}"
-                ],
-                "answer": "A",  # First choice is usually most logical
-                "id": f"hellaswag_{i}"
-            }
-            data.append(sample)
-        logger.info(f"Loaded {len(data)} HellaSwag samples")
-        return data
-    def evaluate_sample(self, sample: Dict[str, Any], prediction: str) -> bool:
-        """Evaluate a single HellaSwag sample."""
-        prediction = prediction.strip().upper()
-        if prediction in ["A", "B", "C", "D"]:
-            return prediction == sample["answer"]
-        # Try to extract choice from longer response
-        for choice in ["A", "B", "C", "D"]:
-            if choice in prediction:
-                return choice == sample["answer"]
-        return False
-    def _format_single_example(self, sample: Dict[str, Any], include_answer: bool = False) -> str:
-        """Format a single HellaSwag example."""
-        prompt = f"Context: {sample['context']}\n"
-        prompt += f"Question: {sample['question']}\n"
-        choices = sample['choices']
-        for i, choice in enumerate(choices):
-            letter = chr(65 + i)  # A, B, C, D
-            prompt += f"{letter}. {choice}\n"
-        if include_answer:
-            prompt += f"Answer: {sample['answer']}"
-        else:
-            prompt += "Answer:"
-        return prompt
-class ARC(BaseBenchmark):
-    """
-    ARC (AI2 Reasoning Challenge) Benchmark
-    Tests scientific reasoning with grade-school level science questions.
-    """
-    def __init__(self, challenge_set: str = "easy"):
-        config = BenchmarkConfig(
-            name=f"ARC-{challenge_set}",
-            description=f"AI2 Reasoning Challenge ({challenge_set})",
-            num_choices=4,
-            few_shot_examples=25
-        )
-        super().__init__(config)
-        self.challenge_set = challenge_set  # "easy" or "challenge"
-    def load_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
-        """Load ARC data (simplified implementation)."""
-        # This is a simplified implementation
-        # In practice, you'd load from the actual ARC dataset
-        data = []
-        sample_questions = [
-            {
-                "question": "What happens to water when it freezes?",
-                "choices": ["It becomes ice", "It becomes gas", "It disappears", "It becomes hot"],
-                "answer": "A"
-            },
-            {
-                "question": "Which planet is closest to the Sun?",
-                "choices": ["Earth", "Mars", "Mercury", "Venus"],
-                "answer": "C"
-            },
-            {
-                "question": "What do plants need to make their own food?",
-                "choices": ["Sunlight and water", "Only water", "Only sunlight", "Soil only"],
-                "answer": "A"
-            },
-            {
-                "question": "What is the main gas in Earth's atmosphere?",
-                "choices": ["Oxygen", "Carbon dioxide", "Nitrogen", "Hydrogen"],
-                "answer": "C"
-            },
-            {
-                "question": "How many legs does a spider have?",
-                "choices": ["6", "8", "10", "12"],
-                "answer": "B"
-            }
-        ]
-        for i, q in enumerate(sample_questions):
-            if max_samples and i >= max_samples:
-                break
-            sample = {
-                "question": q["question"],
-                "choices": q["choices"],
-                "answer": q["answer"],
-                "challenge_set": self.challenge_set,
-                "id": f"arc_{self.challenge_set}_{i}"
-            }
-            data.append(sample)
-        logger.info(f"Loaded {len(data)} ARC-{self.challenge_set} samples")
-        return data
-    def evaluate_sample(self, sample: Dict[str, Any], prediction: str) -> bool:
-        """Evaluate a single ARC sample."""
-        prediction = prediction.strip().upper()
-        if prediction in ["A", "B", "C", "D"]:
-            return prediction == sample["answer"]
-        # Try to extract choice from longer response
-        for choice in ["A", "B", "C", "D"]:
-            if choice in prediction:
-                return choice == sample["answer"]
-        return False
-    def _format_single_example(self, sample: Dict[str, Any], include_answer: bool = False) -> str:
-        """Format a single ARC example."""
-        prompt = f"Question: {sample['question']}\n"
-        choices = sample['choices']
-        for i, choice in enumerate(choices):
-            letter = chr(65 + i)  # A, B, C, D
-            prompt += f"{letter}. {choice}\n"
-        if include_answer:
-            prompt += f"Answer: {sample['answer']}"
-        else:
-            prompt += "Answer:"
-        return prompt
-class GSM8K(BaseBenchmark):
-    """
-    GSM8K Benchmark
-    Tests mathematical reasoning with grade school math word problems.
-    """
-    def __init__(self):
-        config = BenchmarkConfig(
-            name="GSM8K",
-            description="Grade School Math 8K",
-            num_choices=1,  # Open-ended numerical answers
-            few_shot_examples=8
-        )
-        super().__init__(config)
-    def load_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
-        """Load GSM8K data (simplified implementation)."""
-        # This is a simplified implementation
-        # In practice, you'd load from the actual GSM8K dataset
-        data = []
-        sample_problems = [
-            {
-                "question": "Janet has 12 apples. She gives 3 apples to her friend and eats 2 apples. How many apples does Janet have left?",
-                "answer": "7"
-            },
-            {
-                "question": "A school has 24 students in each class. If there are 5 classes, how many students are there in total?",
-                "answer": "120"
-            },
-            {
-                "question": "Tom buys 4 books for $8 each. How much money does Tom spend in total?",
-                "answer": "32"
-            },
-            {
-                "question": "Sarah has 36 stickers. She wants to put them equally into 6 albums. How many stickers will be in each album?",
-                "answer": "6"
-            },
-            {
-                "question": "A rectangle has a length of 15 cm and a width of 8 cm. What is the area of the rectangle?",
-                "answer": "120"
-            }
-        ]
-        for i, problem in enumerate(sample_problems):
-            if max_samples and i >= max_samples:
-                break
-            sample = {
-                "question": problem["question"],
-                "answer": problem["answer"],
-                "id": f"gsm8k_{i}"
-            }
-            data.append(sample)
-        logger.info(f"Loaded {len(data)} GSM8K samples")
-        return data
-    def evaluate_sample(self, sample: Dict[str, Any], prediction: str) -> bool:
-        """Evaluate a single GSM8K sample."""
-        # Extract numerical answer from prediction
-        prediction = prediction.strip()
-        # Try to find the numerical answer
-        import re
-        numbers = re.findall(r'\d+', prediction)
-        if numbers:
-            # Take the last number found (often the final answer)
-            predicted_answer = numbers[-1]
-            return predicted_answer == sample["answer"]
-        return False
-    def _format_single_example(self, sample: Dict[str, Any], include_answer: bool = False) -> str:
-        """Format a single GSM8K example."""
-        prompt = f"Problem: {sample['question']}\n"
-        if include_answer:
-            prompt += f"Answer: {sample['answer']}"
-        else:
-            prompt += "Answer:"
-        return prompt
-# Convenience functions for creating benchmark instances
-def create_mmlu_benchmark(subjects: Optional[List[str]] = None) -> MMLU:
-    """Create MMLU benchmark instance."""
-    return MMLU(subjects=subjects)
-def create_hellaswag_benchmark() -> HellaSwag:
-    """Create HellaSwag benchmark instance."""
-    return HellaSwag()
-def create_arc_benchmark(challenge_set: str = "easy") -> ARC:
-    """Create ARC benchmark instance."""
-    return ARC(challenge_set=challenge_set)
-def create_gsm8k_benchmark() -> GSM8K:
-    """Create GSM8K benchmark instance."""
-    return GSM8K()

isa_model/eval/config/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-"""
-Configuration management for evaluation framework.
-"""
-from .evaluation_config import EvaluationConfig, ConfigManager
-__all__ = [
-    "EvaluationConfig",
-    "ConfigManager"
-]

isa_model/eval/config/evaluation_config.py DELETED Viewed

@@ -1,108 +0,0 @@
-"""
-Configuration management for evaluation framework
-"""
-import os
-import json
-import logging
-from typing import Dict, Any, Optional, List
-from dataclasses import dataclass, asdict
-from pathlib import Path
-logger = logging.getLogger(__name__)
-@dataclass
-class EvaluationConfig:
-    """
-    Configuration class for evaluation settings.
-    """
-    # General settings
-    output_dir: str = "evaluation_results"
-    max_concurrent_evaluations: int = 3
-    timeout_seconds: int = 600
-    # Model settings
-    default_provider: str = "openai"
-    default_max_tokens: int = 150
-    default_temperature: float = 0.1
-    batch_size: int = 8
-    # Metrics settings
-    compute_all_metrics: bool = False
-    custom_metrics: List[str] = None
-    # Benchmark settings
-    max_samples_per_benchmark: Optional[int] = None
-    enable_few_shot: bool = True
-    num_shots: int = 5
-    # Experiment tracking
-    use_wandb: bool = False
-    wandb_project: Optional[str] = None
-    wandb_entity: Optional[str] = None
-    use_mlflow: bool = False
-    mlflow_tracking_uri: Optional[str] = None
-    # Results settings
-    save_predictions: bool = True
-    save_detailed_results: bool = True
-    export_format: str = "json"  # json, csv, html
-    def __post_init__(self):
-        """Initialize default values after creation."""
-        if self.custom_metrics is None:
-            self.custom_metrics = []
-        # Ensure output directory exists
-        os.makedirs(self.output_dir, exist_ok=True)
-    @classmethod
-    def from_dict(cls, config_dict: Dict[str, Any]) -> 'EvaluationConfig':
-        """
-        Create configuration from dictionary.
-        Args:
-            config_dict: Configuration dictionary
-        Returns:
-            EvaluationConfig instance
-        """
-        # Filter out unknown keys
-        valid_keys = {field.name for field in cls.__dataclass_fields__.values()}
-        filtered_dict = {k: v for k, v in config_dict.items() if k in valid_keys}
-        return cls(**filtered_dict)
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        Convert configuration to dictionary.
-        Returns:
-            Configuration as dictionary
-        """
-        return asdict(self)
-class ConfigManager:
-    """Manager for handling multiple evaluation configurations."""
-    def __init__(self, config_dir: str = "configs"):
-        """Initialize configuration manager."""
-        self.config_dir = config_dir
-        self.configs: Dict[str, EvaluationConfig] = {}
-        self.default_config = EvaluationConfig()
-        # Ensure config directory exists
-        os.makedirs(config_dir, exist_ok=True)
-    def get_config(self, config_name: Optional[str] = None) -> EvaluationConfig:
-        """Get configuration by name."""
-        if config_name is None:
-            return self.default_config
-        if config_name in self.configs:
-            return self.configs[config_name]
-        return self.default_config

isa_model/eval/evaluators/__init__.py DELETED Viewed

@@ -1,18 +0,0 @@
-"""
-Evaluators module for ISA Model Framework
-Provides specialized evaluators for different model types and evaluation tasks.
-"""
-from .base_evaluator import BaseEvaluator, EvaluationResult
-from .llm_evaluator import LLMEvaluator
-from .vision_evaluator import VisionEvaluator
-from .multimodal_evaluator import MultimodalEvaluator
-__all__ = [
-    "BaseEvaluator",
-    "EvaluationResult",
-    "LLMEvaluator",
-    "VisionEvaluator",
-    "MultimodalEvaluator"
-]

isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

isa-model 0.3.91py3-none-any.whl → 0.4.3py3-none-any.whl