PyPI - isa-model - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

isa-model 0.4.0py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (199) hide show

isa_model/client.py +466 -43
isa_model/core/cache/redis_cache.py +12 -3
isa_model/core/config/config_manager.py +230 -3
isa_model/core/config.py +90 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +21 -1
isa_model/core/database/supabase_client.py +154 -19
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +27 -18
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_manager.py +40 -17
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +174 -18
isa_model/core/models/system_models.py +857 -0
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/services/intelligent_model_selector.py +399 -21
isa_model/core/storage/hf_storage.py +1 -1
isa_model/core/types.py +1 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -370
isa_model/deployment/local/__init__.py +31 -0
isa_model/deployment/local/config.py +248 -0
isa_model/deployment/local/gpu_gateway.py +607 -0
isa_model/deployment/local/health_checker.py +428 -0
isa_model/deployment/local/provider.py +586 -0
isa_model/deployment/local/tensorrt_service.py +621 -0
isa_model/deployment/local/transformers_service.py +644 -0
isa_model/deployment/local/vllm_service.py +527 -0
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +137 -10
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/openai_stt_service.py +22 -6
isa_model/inference/services/custom_model_manager.py +277 -0
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +335 -24
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/local_llm_service.py +747 -0
isa_model/inference/services/llm/ollama_llm_service.py +9 -2
isa_model/inference/services/llm/openai_llm_service.py +33 -16
isa_model/inference/services/llm/yyds_llm_service.py +8 -2
isa_model/inference/services/vision/__init__.py +22 -1
isa_model/inference/services/vision/blip_vision_service.py +359 -0
isa_model/inference/services/vision/helpers/image_utils.py +8 -5
isa_model/inference/services/vision/isa_vision_service.py +65 -4
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +172 -22
isa_model/serving/api/middleware/auth.py +8 -2
isa_model/serving/api/middleware/security.py +23 -33
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +4 -1
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +138 -2
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +680 -18
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +68 -54
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
isa_model-0.4.3.dist-info/RECORD +193 -0
isa_model/core/storage/minio_storage.py +0 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks/__init__.py +0 -27
isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
isa_model/eval/benchmarks.py +0 -701
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -24
isa_model/eval/evaluators/audio_evaluator.py +0 -727
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/embedding_evaluator.py +0 -742
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/evaluators/vision_evaluator.py +0 -564
isa_model/eval/example_evaluation.py +0 -395
isa_model/eval/factory.py +0 -798
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/isa_benchmarks.py +0 -700
isa_model/eval/isa_integration.py +0 -582
isa_model/eval/metrics.py +0 -951
isa_model/eval/tests/unit/test_basic.py +0 -396
isa_model/serving/api/routes/evaluations.py +0 -579
isa_model/training/__init__.py +0 -168
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -26
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/examples/intelligent_training_example.py +0 -281
isa_model/training/factory.py +0 -424
isa_model/training/intelligent/__init__.py +0 -25
isa_model/training/intelligent/decision_engine.py +0 -643
isa_model/training/intelligent/intelligent_factory.py +0 -888
isa_model/training/intelligent/knowledge_base.py +0 -751
isa_model/training/intelligent/resource_optimizer.py +0 -839
isa_model/training/intelligent/task_classifier.py +0 -576
isa_model/training/storage/__init__.py +0 -24
isa_model/training/storage/core_integration.py +0 -439
isa_model/training/storage/training_repository.py +0 -552
isa_model/training/storage/training_storage.py +0 -628
isa_model-0.4.0.dist-info/RECORD +0 -182
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0

isa_model/training/__init__.py DELETED Viewed

@@ -1,168 +0,0 @@
-"""
-ISA Model Training Module
-Provides unified training capabilities for AI models including:
-- Local training with SFT (Supervised Fine-Tuning)
-- Cloud training on RunPod
-- Model evaluation and management
-- HuggingFace integration
-- 🧠 Intelligent training with AI-powered optimization
-Example usage:
-    ```python
-    from isa_model.training import TrainingFactory, train_gemma
-    # Quick Gemma training
-    model_path = train_gemma(
-        dataset_path="tatsu-lab/alpaca",
-        model_size="4b",
-        num_epochs=3
-    )
-    # Advanced training with custom configuration
-    factory = TrainingFactory()
-    model_path = factory.train_model(
-        model_name="google/gemma-2-4b-it",
-        dataset_path="your-dataset.json",
-        use_lora=True,
-        batch_size=4,
-        num_epochs=3
-    )
-    # 🧠 Intelligent training with natural language
-    from isa_model.training import IntelligentTrainingFactory
-    intelligent_factory = IntelligentTrainingFactory()
-    recommendation = intelligent_factory.analyze_training_request(
-        "Train a customer service chatbot for medical domain",
-        dataset_path="medical_dialogues.json",
-        quality_target="high",
-        budget_limit=200.0
-    )
-    model_path = intelligent_factory.train_with_recommendation(recommendation)
-    ```
-"""
-# Import the new clean factory
-from .factory import TrainingFactory, train_gemma
-# Import core components
-from .core import (
-    TrainingConfig,
-    LoRAConfig,
-    DatasetConfig,
-    BaseTrainer,
-    SFTTrainer,
-    TrainingUtils,
-    DatasetManager,
-    RunPodConfig,
-    StorageConfig,
-    JobConfig
-)
-# Import cloud training components
-from .cloud import (
-    TrainingJobOrchestrator
-)
-# Import intelligent training components (optional)
-try:
-    from .intelligent import (
-        IntelligentTrainingFactory,
-        IntelligentDecisionEngine,
-        TaskClassifier,
-        KnowledgeBase,
-        ResourceOptimizer,
-        TrainingRequest,
-        TrainingRecommendation
-    )
-    INTELLIGENT_AVAILABLE = True
-except ImportError as e:
-    INTELLIGENT_AVAILABLE = False
-    # Create placeholder classes for graceful degradation
-    class IntelligentTrainingFactory:
-        def __init__(self, *args, **kwargs):
-            raise ImportError("Intelligent training features not available. Please install required dependencies.")
-    class IntelligentDecisionEngine:
-        def __init__(self, *args, **kwargs):
-            raise ImportError("Intelligent training features not available.")
-    class TaskClassifier:
-        def __init__(self, *args, **kwargs):
-            raise ImportError("Intelligent training features not available.")
-    class KnowledgeBase:
-        def __init__(self, *args, **kwargs):
-            raise ImportError("Intelligent training features not available.")
-    class ResourceOptimizer:
-        def __init__(self, *args, **kwargs):
-            raise ImportError("Intelligent training features not available.")
-    class TrainingRequest:
-        def __init__(self, *args, **kwargs):
-            raise ImportError("Intelligent training features not available.")
-    class TrainingRecommendation:
-        def __init__(self, *args, **kwargs):
-            raise ImportError("Intelligent training features not available.")
-__all__ = [
-    # Main factory
-    'TrainingFactory',
-    'train_gemma',
-    # Core components
-    'TrainingConfig',
-    'LoRAConfig',
-    'DatasetConfig',
-    'BaseTrainer',
-    'SFTTrainer',
-    'TrainingUtils',
-    'DatasetManager',
-    # Cloud components
-    'RunPodConfig',
-    'StorageConfig',
-    'JobConfig',
-    'TrainingJobOrchestrator',
-    # Intelligent training components
-    'IntelligentTrainingFactory',
-    'IntelligentDecisionEngine',
-    'TaskClassifier',
-    'KnowledgeBase',
-    'ResourceOptimizer',
-    'TrainingRequest',
-    'TrainingRecommendation',
-    'INTELLIGENT_AVAILABLE',
-    # Training storage components (optional)
-    'TrainingStorage',
-    'TrainingRepository',
-    'CoreModelIntegration'
-]
-# Import training storage components (optional)
-try:
-    from .storage import (
-        TrainingStorage,
-        TrainingRepository,
-        CoreModelIntegration
-    )
-    STORAGE_AVAILABLE = True
-except ImportError:
-    STORAGE_AVAILABLE = False
-    # Create placeholder classes for graceful degradation
-    class TrainingStorage:
-        def __init__(self, *args, **kwargs):
-            raise ImportError("Training storage features not available.")
-    class TrainingRepository:
-        def __init__(self, *args, **kwargs):
-            raise ImportError("Training repository features not available.")
-    class CoreModelIntegration:
-        def __init__(self, *args, **kwargs):
-            raise ImportError("Core model integration features not available.")

isa_model/training/annotation/annotation_schema.py DELETED Viewed

@@ -1,47 +0,0 @@
-# app/services/llm_model/tracing/annotation/annotation_schema.py
-from enum import Enum
-from pydantic import BaseModel, Field
-from typing import Dict, Any, List, Optional
-from datetime import datetime
-class AnnotationType(str, Enum):
-    ACCURACY = "accuracy"
-    HELPFULNESS = "helpfulness"
-    TOXICITY = "toxicity"
-    CUSTOM = "custom"
-class RatingScale(int, Enum):
-    POOR = 1
-    FAIR = 2
-    GOOD = 3
-    EXCELLENT = 4
-class AnnotationAspects(BaseModel):
-    factually_correct: bool = True
-    relevant: bool = True
-    harmful: bool = False
-    biased: bool = False
-    complete: bool = True
-    efficient: bool = True
-class BetterResponse(BaseModel):
-    content: str
-    reason: Optional[str]
-    metadata: Optional[Dict[str, Any]] = {}
-class AnnotationFeedback(BaseModel):
-    rating: RatingScale
-    category: AnnotationType
-    aspects: AnnotationAspects
-    better_response: Optional[BetterResponse]
-    comment: Optional[str]
-    metadata: Optional[Dict[str, Any]] = {}
-    is_selected_for_training: bool = False
-class ItemAnnotation(BaseModel):
-    item_id: str
-    feedback: Optional[AnnotationFeedback]
-    status: str = "pending"
-    annotated_at: Optional[datetime]
-    annotator_id: Optional[str]
-    training_status: Optional[str] = None

isa_model/training/annotation/processors/annotation_processor.py DELETED Viewed

@@ -1,126 +0,0 @@
-from typing import Dict, Any, List
-from datetime import datetime
-from app.config.config_manager import config_manager
-from app.services.training.llm_model.annotation.annotation_schema import AnnotationFeedback, RatingScale, AnnotationAspects
-from bson.objectid import ObjectId
-from app.services.training.llm_model.annotation.storage.dataset_manager import DatasetManager
-class AnnotationProcessor:
-    def __init__(self):
-        self.logger = config_manager.get_logger(__name__)
-        self.dataset_manager = DatasetManager()
-        self.batch_size = 1000  # Configure as needed
-    async def process_queue(self) -> None:
-        """Process pending items and create datasets"""
-        db = await config_manager.get_db('mongodb')
-        queue = db['training_queue']
-        # Process SFT items
-        sft_items = await self._get_pending_items("sft")
-        if len(sft_items) >= self.batch_size:
-            await self._create_sft_dataset(sft_items)
-        # Process RLHF items
-        rlhf_items = await self._get_pending_items("rlhf")
-        if len(rlhf_items) >= self.batch_size:
-            await self._create_rlhf_dataset(rlhf_items)
-    async def _create_sft_dataset(self, items: List[Dict[str, Any]]):
-        """Create and upload SFT dataset"""
-        dataset = await self.dataset_manager.create_dataset(
-            name=f"sft_dataset_v{datetime.now().strftime('%Y%m%d')}",
-            type="sft",
-            version=datetime.now().strftime("%Y%m%d"),
-            source_annotations=[item["annotation_id"] for item in items]
-        )
-        formatted_data = [
-            await self._process_sft_item(item)
-            for item in items
-        ]
-        await self.dataset_manager.upload_dataset_file(
-            dataset.id,
-            formatted_data
-        )
-    async def _process_sft_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
-        """Process item for SFT dataset generation
-        Format follows HF conversation format for SFT training
-        """
-        db = await config_manager.get_db('mongodb')
-        annotations = db['annotations']
-        # Get full annotation context
-        annotation = await annotations.find_one({"_id": ObjectId(item["annotation_id"])})
-        target_item = next(i for i in annotation["items"] if i["item_id"] == item["item_id"])
-        # Format as conversation
-        messages = [
-            {
-                "role": "system",
-                "content": "You are a helpful AI assistant that provides accurate and relevant information."
-            },
-            {
-                "role": "user",
-                "content": target_item["input"]["messages"][0]["content"]
-            },
-            {
-                "role": "assistant",
-                "content": target_item["output"]["content"]
-            }
-        ]
-        return {
-            "messages": messages,
-            "metadata": {
-                "rating": item["feedback"]["rating"],
-                "aspects": item["feedback"]["aspects"],
-                "category": item["feedback"]["category"]
-            }
-        }
-    async def _process_rlhf_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
-        """Process item for RLHF dataset generation
-        Format follows preference pairs structure for RLHF training
-        """
-        db = await config_manager.get_db('mongodb')
-        annotations = db['annotations']
-        # Get full annotation context
-        annotation = await annotations.find_one({"_id": ObjectId(item["annotation_id"])})
-        target_item = next(i for i in annotation["items"] if i["item_id"] == item["item_id"])
-        # Format as preference pairs
-        return {
-            "prompt": target_item["input"]["messages"][0]["content"],
-            "chosen": item["feedback"]["better_response"]["content"],
-            "rejected": target_item["output"]["content"],
-            "metadata": {
-                "reason": item["feedback"]["better_response"]["reason"],
-                "category": item["feedback"]["category"]
-            }
-        }
-    async def get_training_data(
-        self,
-        data_type: str,
-        limit: int = 1000
-    ) -> List[Dict[str, Any]]:
-        """Retrieve formatted training data"""
-        db = await config_manager.get_db('mongodb')
-        training_data = db['training_data']
-        data = await training_data.find(
-            {"type": data_type}
-        ).limit(limit).to_list(length=limit)
-        if data_type == "sft":
-            return [item["data"]["messages"] for item in data]
-        else:  # rlhf
-            return [{
-                "prompt": item["data"]["prompt"],
-                "chosen": item["data"]["chosen"],
-                "rejected": item["data"]["rejected"]
-            } for item in data]

isa_model/training/annotation/storage/dataset_manager.py DELETED Viewed

@@ -1,131 +0,0 @@
-# app/services/llm_model/annotation/dataset/dataset_manager.py
-from typing import Dict, Any, List
-from datetime import datetime
-import json
-import io
-from app.config.config_manager import config_manager
-from .dataset_schema import Dataset, DatasetType, DatasetStatus, DatasetFiles, DatasetStats
-from bson import ObjectId
-class DatasetManager:
-    def __init__(self):
-        self.logger = config_manager.get_logger(__name__)
-        self.minio_client = None
-        self.bucket_name = "training-datasets"
-    async def _ensure_minio_client(self):
-        if not self.minio_client:
-            self.minio_client = await config_manager.get_storage_client()
-    async def create_dataset(
-        self,
-        name: str,
-        type: DatasetType,
-        version: str,
-        source_annotations: List[str]
-    ) -> Dataset:
-        """Create a new dataset record"""
-        db = await config_manager.get_db('mongodb')
-        collection = db['training_datasets']
-        dataset = Dataset(
-            name=name,
-            type=type,
-            version=version,
-            storage_path=f"datasets/{type.value}/{version}",
-            files=DatasetFiles(
-                train="train.jsonl",
-                eval=None,
-                test=None
-            ),
-            stats=DatasetStats(
-                total_examples=0,
-                avg_length=0.0,
-                num_conversations=0,
-                additional_metrics={}
-            ),
-            source_annotations=source_annotations,
-            created_at=datetime.utcnow(),
-            status=DatasetStatus.PENDING,
-            metadata={}
-        )
-        result = await collection.insert_one(dataset.dict(exclude={'id'}))
-        return Dataset(**{**dataset.dict(), '_id': result.inserted_id})
-    async def upload_dataset_file(
-        self,
-        dataset_id: str,
-        data: List[Dict[str, Any]],
-        file_type: str = "train"
-    ) -> bool:
-        """Upload dataset to MinIO"""
-        try:
-            await self._ensure_minio_client()
-            db = await config_manager.get_db('mongodb')
-            object_id = ObjectId(dataset_id)
-            dataset = await db['training_datasets'].find_one({"_id": object_id})
-            if not dataset:
-                self.logger.error(f"Dataset not found with id: {dataset_id}")
-                return False
-            # Convert to JSONL
-            buffer = io.StringIO()
-            for item in data:
-                buffer.write(json.dumps(item) + "\n")
-            storage_path = dataset['storage_path'].rstrip('/')
-            file_path = f"{storage_path}/{file_type}.jsonl"
-            buffer_value = buffer.getvalue().encode()
-            self.logger.debug(f"Uploading to MinIO path: {file_path}")
-            self.minio_client.put_object(
-                self.bucket_name,
-                file_path,
-                io.BytesIO(buffer_value),
-                len(buffer_value)
-            )
-            avg_length = sum(len(str(item)) for item in data) / len(data) if data else 0
-            await db['training_datasets'].update_one(
-                {"_id": object_id},
-                {
-                    "$set": {
-                        f"files.{file_type}": f"{file_type}.jsonl",
-                        "stats.total_examples": len(data),
-                        "stats.avg_length": avg_length,
-                        "stats.num_conversations": len(data),
-                        "status": DatasetStatus.READY
-                    }
-                }
-            )
-            return True
-        except Exception as e:
-            self.logger.error(f"Failed to upload dataset: {e}")
-            return False
-    async def get_dataset_info(self, dataset_id: str) -> Dict[str, Any]:
-        """Get dataset information"""
-        try:
-            db = await config_manager.get_db('mongodb')
-            object_id = ObjectId(dataset_id)  # Convert string ID to ObjectId
-            dataset = await db['training_datasets'].find_one({"_id": object_id})
-            if not dataset:
-                self.logger.error(f"Dataset not found with id: {dataset_id}")
-                return None
-            # Convert ObjectId to string for JSON serialization
-            dataset['_id'] = str(dataset['_id'])
-            return dataset
-        except Exception as e:
-            self.logger.error(f"Failed to get dataset info: {e}")
-            return None

isa_model/training/annotation/storage/dataset_schema.py DELETED Viewed

@@ -1,44 +0,0 @@
-# app/services/llm_model/annotation/dataset/dataset_schema.py
-from enum import Enum
-from pydantic import BaseModel, Field
-from typing import Dict, List, Optional
-from datetime import datetime
-from bson import ObjectId
-class DatasetType(str, Enum):
-    SFT = "sft"
-    RLHF = "rlhf"
-class DatasetStatus(str, Enum):
-    PENDING = "pending"
-    PROCESSING = "processing"
-    READY = "ready"
-    ERROR = "error"
-class DatasetFiles(BaseModel):
-    train: str
-    eval: Optional[str]
-    test: Optional[str]
-class DatasetStats(BaseModel):
-    total_examples: int
-    avg_length: Optional[float]
-    num_conversations: Optional[int]
-    additional_metrics: Optional[Dict] = {}
-class Dataset(BaseModel):
-    id: Optional[ObjectId] = Field(None, alias="_id")
-    name: str
-    type: DatasetType
-    version: str
-    storage_path: str
-    files: DatasetFiles
-    stats: DatasetStats
-    source_annotations: List[str]
-    created_at: datetime
-    status: DatasetStatus
-    metadata: Optional[Dict] = {}
-    class Config:
-        arbitrary_types_allowed = True
-        populate_by_name = True

isa_model/training/annotation/tests/test_annotation_flow.py DELETED Viewed

@@ -1,109 +0,0 @@
-# test_annotation_flow.py
-import os
-os.environ["ENV"] = "local"
-import asyncio
-from datetime import datetime
-from bson import ObjectId
-from app.services.llm_model.annotation.views.annotation_controller import AnnotationController
-from app.services.llm_model.annotation.processors.annotation_processor import AnnotationProcessor
-from app.services.llm_model.annotation.annotation_schema import (
-    AnnotationFeedback,
-    RatingScale,
-    AnnotationType,
-    AnnotationAspects,
-    BetterResponse
-)
-from app.config.config_manager import config_manager
-async def setup_test_data():
-    """Setup initial test data in MongoDB"""
-    db = await config_manager.get_db('mongodb')
-    # Create a test annotation
-    test_annotation = {
-        "_id": ObjectId(),
-        "project_name": "test_project",
-        "items": [{
-            "item_id": "test_item_1",
-            "input": {
-                "messages": [{
-                    "role": "user",
-                    "content": "What is the capital of France?"
-                }]
-            },
-            "output": {
-                "content": "The capital of France is Paris."
-            },
-            "status": "pending"
-        }],
-        "created_at": datetime.utcnow().isoformat()
-    }
-    await db['annotations'].insert_one(test_annotation)
-    return test_annotation
-async def test_annotation_flow():
-    """Test the complete annotation flow"""
-    try:
-        # Initialize controllers
-        annotation_controller = AnnotationController()
-        annotation_processor = AnnotationProcessor()
-        # Setup test data
-        test_data = await setup_test_data()
-        annotation_id = str(test_data["_id"])
-        item_id = test_data["items"][0]["item_id"]
-        print("1. Created test annotation")
-        # Create test feedback
-        feedback = AnnotationFeedback(
-            rating=RatingScale.EXCELLENT,
-            category=AnnotationType.ACCURACY,
-            aspects=AnnotationAspects(
-                factually_correct=True,
-                relevant=True,
-                harmful=False,
-                biased=False,
-                complete=True,
-                efficient=True
-            ),
-            better_response=BetterResponse(
-                content="Paris is the capital city of France, known for its iconic Eiffel Tower.",
-                reason="Added more context and detail"
-            ),
-            comment="Good response, but could be more detailed"
-        )
-        # Submit annotation
-        result = await annotation_controller.submit_annotation(
-            annotation_id=annotation_id,
-            item_id=item_id,
-            feedback=feedback,
-            annotator_id="test_annotator"
-        )
-        print("2. Submitted annotation:", result)
-        # Process annotation queue
-        await annotation_processor.process_queue()
-        print("3. Processed annotation queue")
-        # Verify dataset creation
-        db = await config_manager.get_db('mongodb')
-        datasets = await db['training_datasets'].find().to_list(length=10)
-        print("\nCreated Datasets:")
-        for dataset in datasets:
-            print(f"- {dataset['name']} ({dataset['type']})")
-            print(f"  Status: {dataset['status']}")
-            print(f"  Examples: {dataset['stats']['total_examples']}")
-    except Exception as e:
-        print(f"Error during test: {e}")
-if __name__ == "__main__":
-    # Run the test
-    print("Starting annotation flow test...")
-    asyncio.run(test_annotation_flow())

isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

isa-model 0.4.0py3-none-any.whl → 0.4.3py3-none-any.whl