PyPI - isa-model - Versions diffs - 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

isa-model 0.3.91py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

isa_model/client.py +1166 -584
isa_model/core/cache/redis_cache.py +410 -0
isa_model/core/config/config_manager.py +282 -12
isa_model/core/config.py +91 -1
isa_model/core/database/__init__.py +1 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +297 -0
isa_model/core/database/supabase_client.py +258 -0
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +46 -0
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_billing_tracker.py +60 -88
isa_model/core/models/model_manager.py +66 -25
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +217 -55
isa_model/core/models/model_statistics_tracker.py +234 -0
isa_model/core/models/model_storage.py +0 -1
isa_model/core/models/model_version_manager.py +959 -0
isa_model/core/models/system_models.py +857 -0
isa_model/core/pricing_manager.py +2 -249
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/resilience/circuit_breaker.py +366 -0
isa_model/core/security/secrets.py +358 -0
isa_model/core/services/__init__.py +2 -4
isa_model/core/services/intelligent_model_selector.py +479 -370
isa_model/core/storage/hf_storage.py +2 -2
isa_model/core/types.py +8 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -368
isa_model/deployment/local/__init__.py +31 -0
isa_model/deployment/local/config.py +248 -0
isa_model/deployment/local/gpu_gateway.py +607 -0
isa_model/deployment/local/health_checker.py +428 -0
isa_model/deployment/local/provider.py +586 -0
isa_model/deployment/local/tensorrt_service.py +621 -0
isa_model/deployment/local/transformers_service.py +644 -0
isa_model/deployment/local/vllm_service.py +527 -0
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/modal/deployer.py +894 -0
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +179 -16
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/__init__.py +21 -0
isa_model/inference/services/audio/base_realtime_service.py +225 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/isa_tts_service.py +0 -0
isa_model/inference/services/audio/openai_realtime_service.py +320 -124
isa_model/inference/services/audio/openai_stt_service.py +53 -11
isa_model/inference/services/base_service.py +17 -1
isa_model/inference/services/custom_model_manager.py +277 -0
isa_model/inference/services/embedding/__init__.py +13 -0
isa_model/inference/services/embedding/base_embed_service.py +111 -8
isa_model/inference/services/embedding/isa_embed_service.py +305 -0
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/openai_embed_service.py +2 -4
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
isa_model/inference/services/img/__init__.py +2 -2
isa_model/inference/services/img/base_image_gen_service.py +24 -7
isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
isa_model/inference/services/img/services/replicate_flux.py +226 -0
isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
isa_model/inference/services/img/tests/test_img_client.py +297 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +361 -26
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/local_llm_service.py +747 -0
isa_model/inference/services/llm/ollama_llm_service.py +11 -3
isa_model/inference/services/llm/openai_llm_service.py +670 -56
isa_model/inference/services/llm/yyds_llm_service.py +10 -3
isa_model/inference/services/vision/__init__.py +27 -6
isa_model/inference/services/vision/base_vision_service.py +118 -185
isa_model/inference/services/vision/blip_vision_service.py +359 -0
isa_model/inference/services/vision/helpers/image_utils.py +19 -10
isa_model/inference/services/vision/isa_vision_service.py +634 -0
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +240 -18
isa_model/serving/api/middleware/auth.py +317 -0
isa_model/serving/api/middleware/security.py +268 -0
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +489 -0
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +475 -0
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/logs.py +430 -0
isa_model/serving/api/routes/settings.py +582 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +992 -171
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +318 -0
isa_model/serving/modal_proxy_server.py +249 -0
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
isa_model-0.4.3.dist-info/RECORD +193 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
isa_model/deployment/cloud/modal/register_models.py +0 -321
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks.py +0 -469
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -18
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/factory.py +0 -531
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/metrics.py +0 -798
isa_model/inference/adapter/unified_api.py +0 -248
isa_model/inference/services/helpers/stacked_config.py +0 -148
isa_model/inference/services/img/flux_professional_service.py +0 -603
isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/others/table_transformer_service.py +0 -61
isa_model/inference/services/vision/doc_analysis_service.py +0 -640
isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/vision/ui_analysis_service.py +0 -823
isa_model/scripts/inference_tracker.py +0 -283
isa_model/scripts/mlflow_manager.py +0 -379
isa_model/scripts/model_registry.py +0 -465
isa_model/scripts/register_models.py +0 -370
isa_model/scripts/register_models_with_embeddings.py +0 -510
isa_model/scripts/start_mlflow.py +0 -95
isa_model/scripts/training_tracker.py +0 -257
isa_model/training/__init__.py +0 -74
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -23
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/factory.py +0 -424
isa_model-0.3.91.dist-info/RECORD +0 -138
/isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0

isa_model/inference/models/inference_record.py ADDED Viewed

@@ -0,0 +1,675 @@
+"""
+Inference Record Models
+Core data models for inference requests, usage statistics, and model snapshots,
+extracted from repository layer to follow the standard ISA Model architecture pattern.
+"""
+import logging
+import hashlib
+from datetime import datetime, timezone, timedelta
+from typing import Dict, List, Optional, Any, Union
+from dataclasses import dataclass, field
+from enum import Enum
+logger = logging.getLogger(__name__)
+class InferenceStatus(str, Enum):
+    """Inference status enumeration"""
+    PENDING = "pending"
+    PROCESSING = "processing"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    TIMEOUT = "timeout"
+    CANCELLED = "cancelled"
+    QUEUED = "queued"
+    RETRYING = "retrying"
+class ServiceType(str, Enum):
+    """Service type enumeration"""
+    LLM = "llm"
+    VISION = "vision"
+    EMBEDDING = "embedding"
+    TTS = "tts"
+    STT = "stt"
+    IMAGE_GEN = "image_gen"
+    AUDIO = "audio"
+    RERANK = "rerank"
+    OCR = "ocr"
+    TRANSLATION = "translation"
+    SUMMARIZATION = "summarization"
+    CLASSIFICATION = "classification"
+class ErrorCategory(str, Enum):
+    """Error category enumeration"""
+    TIMEOUT = "timeout"
+    RATE_LIMIT = "rate_limit"
+    AUTHENTICATION = "authentication"
+    VALIDATION = "validation"
+    MODEL_ERROR = "model_error"
+    NETWORK_ERROR = "network_error"
+    SERVER_ERROR = "server_error"
+    QUOTA_EXCEEDED = "quota_exceeded"
+    UNKNOWN = "unknown"
+@dataclass
+class InferenceRequest:
+    """
+    Core inference request record
+    Represents a single inference request with its input, output, performance metrics,
+    and tracking information for analytics and billing purposes.
+    """
+    request_id: str
+    service_type: str
+    model_id: str
+    provider: str
+    endpoint: str
+    request_data: Dict[str, Any]
+    status: str = InferenceStatus.PENDING
+    created_at: datetime = None
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+    user_id: Optional[str] = None
+    session_id: Optional[str] = None
+    client_id: Optional[str] = None
+    ip_address: Optional[str] = None
+    user_agent: Optional[str] = None
+    response_data: Optional[Dict[str, Any]] = None
+    error_message: Optional[str] = None
+    error_category: Optional[str] = None
+    execution_time_ms: Optional[int] = None
+    queue_time_ms: Optional[int] = None
+    tokens_used: Optional[int] = None
+    input_tokens: Optional[int] = None
+    output_tokens: Optional[int] = None
+    cost_usd: Optional[float] = None
+    request_size_bytes: Optional[int] = None
+    response_size_bytes: Optional[int] = None
+    cache_hit: bool = False
+    retry_count: int = 0
+    priority: int = 5  # 1-10 scale
+    request_hash: Optional[str] = None
+    response_hash: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+    def __post_init__(self):
+        if self.created_at is None:
+            self.created_at = datetime.now(timezone.utc)
+        if self.metadata is None:
+            self.metadata = {}
+        # Generate request hash for deduplication
+        if self.request_hash is None and self.request_data:
+            self.request_hash = self._generate_content_hash(self.request_data)
+    def _generate_content_hash(self, content: Any) -> str:
+        """Generate consistent hash for content"""
+        import json
+        try:
+            content_str = json.dumps(content, sort_keys=True, ensure_ascii=True)
+            return hashlib.sha256(content_str.encode()).hexdigest()[:16]
+        except Exception:
+            return hashlib.sha256(str(content).encode()).hexdigest()[:16]
+    @property
+    def is_active(self) -> bool:
+        """Check if request is in active processing state"""
+        return self.status in [InferenceStatus.PENDING, InferenceStatus.PROCESSING,
+                              InferenceStatus.QUEUED, InferenceStatus.RETRYING]
+    @property
+    def is_completed(self) -> bool:
+        """Check if request is completed (successfully or not)"""
+        return self.status in [InferenceStatus.COMPLETED, InferenceStatus.FAILED,
+                              InferenceStatus.TIMEOUT, InferenceStatus.CANCELLED]
+    @property
+    def was_successful(self) -> bool:
+        """Check if request completed successfully"""
+        return self.status == InferenceStatus.COMPLETED
+    @property
+    def total_duration_ms(self) -> Optional[int]:
+        """Calculate total request duration including queue time"""
+        if self.created_at and self.completed_at:
+            return int((self.completed_at - self.created_at).total_seconds() * 1000)
+        return None
+    @property
+    def total_tokens(self) -> Optional[int]:
+        """Get total tokens used (input + output)"""
+        if self.input_tokens is not None and self.output_tokens is not None:
+            return self.input_tokens + self.output_tokens
+        return self.tokens_used
+    @property
+    def cost_per_token(self) -> Optional[float]:
+        """Calculate cost per token"""
+        total = self.total_tokens
+        if self.cost_usd and total and total > 0:
+            return self.cost_usd / total
+        return None
+    @property
+    def throughput_tokens_per_second(self) -> Optional[float]:
+        """Calculate token throughput"""
+        total = self.total_tokens
+        if total and self.execution_time_ms and self.execution_time_ms > 0:
+            return (total * 1000) / self.execution_time_ms
+        return None
+    def update_status(self, new_status: str, error_message: Optional[str] = None,
+                     error_category: Optional[str] = None):
+        """Update request status with timestamp tracking"""
+        old_status = self.status
+        self.status = new_status
+        now = datetime.now(timezone.utc)
+        if new_status == InferenceStatus.PROCESSING and old_status in [InferenceStatus.PENDING, InferenceStatus.QUEUED]:
+            self.started_at = now
+            if self.created_at:
+                self.queue_time_ms = int((now - self.created_at).total_seconds() * 1000)
+        elif new_status in [InferenceStatus.COMPLETED, InferenceStatus.FAILED,
+                           InferenceStatus.TIMEOUT, InferenceStatus.CANCELLED]:
+            if not self.completed_at:
+                self.completed_at = now
+            if self.started_at:
+                self.execution_time_ms = int((self.completed_at - self.started_at).total_seconds() * 1000)
+        if error_message:
+            self.error_message = error_message
+        if error_category:
+            self.error_category = error_category
+        logger.debug(f"Request {self.request_id} status: {old_status} -> {new_status}")
+    def complete_request(self, response_data: Dict[str, Any], tokens_used: Optional[int] = None,
+                        cost_usd: Optional[float] = None, **kwargs):
+        """Mark request as completed with response data"""
+        self.response_data = response_data
+        self.response_hash = self._generate_content_hash(response_data)
+        if tokens_used:
+            self.tokens_used = tokens_used
+        if cost_usd:
+            self.cost_usd = cost_usd
+        # Update any additional metrics
+        for key, value in kwargs.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+        self.update_status(InferenceStatus.COMPLETED)
+    def fail_request(self, error_message: str, error_category: str = ErrorCategory.UNKNOWN,
+                    **kwargs):
+        """Mark request as failed with error details"""
+        self.error_message = error_message
+        self.error_category = error_category
+        # Update any additional error metrics
+        for key, value in kwargs.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+        self.update_status(InferenceStatus.FAILED, error_message, error_category)
+    def increment_retry(self):
+        """Increment retry count and reset to retrying status"""
+        self.retry_count += 1
+        self.update_status(InferenceStatus.RETRYING)
+    def add_metadata(self, key: str, value: Any):
+        """Add metadata entry"""
+        self.metadata[key] = value
+    def get_metadata(self, key: str, default: Any = None) -> Any:
+        """Get metadata entry"""
+        return self.metadata.get(key, default)
+@dataclass
+class UsageStatistics:
+    """
+    Aggregated usage statistics for analytics and billing
+    Contains summarized metrics for a specific time period, service type,
+    model, or user for reporting and analysis purposes.
+    """
+    stat_id: str
+    period_start: datetime
+    period_end: datetime
+    service_type: str
+    model_id: Optional[str] = None
+    provider: Optional[str] = None
+    user_id: Optional[str] = None
+    client_id: Optional[str] = None
+    total_requests: int = 0
+    successful_requests: int = 0
+    failed_requests: int = 0
+    timeout_requests: int = 0
+    retry_requests: int = 0
+    cache_hits: int = 0
+    total_tokens: int = 0
+    input_tokens: int = 0
+    output_tokens: int = 0
+    total_cost_usd: float = 0.0
+    avg_response_time_ms: float = 0.0
+    p50_response_time_ms: float = 0.0
+    p95_response_time_ms: float = 0.0
+    p99_response_time_ms: float = 0.0
+    avg_queue_time_ms: float = 0.0
+    requests_per_hour: float = 0.0
+    tokens_per_hour: float = 0.0
+    error_rate: float = 0.0
+    timeout_rate: float = 0.0
+    cache_hit_rate: float = 0.0
+    avg_tokens_per_request: float = 0.0
+    cost_per_token: float = 0.0
+    cost_per_request: float = 0.0
+    throughput_tokens_per_second: float = 0.0
+    created_at: datetime = None
+    def __post_init__(self):
+        if self.created_at is None:
+            self.created_at = datetime.now(timezone.utc)
+        # Calculate derived metrics
+        self._calculate_derived_metrics()
+    def _calculate_derived_metrics(self):
+        """Calculate derived metrics from base counts"""
+        # Error and success rates
+        if self.total_requests > 0:
+            self.error_rate = (self.failed_requests / self.total_requests) * 100
+            self.timeout_rate = (self.timeout_requests / self.total_requests) * 100
+            self.cache_hit_rate = (self.cache_hits / self.total_requests) * 100
+            self.cost_per_request = self.total_cost_usd / self.total_requests
+        # Token metrics
+        if self.total_tokens > 0:
+            self.cost_per_token = self.total_cost_usd / self.total_tokens
+        if self.successful_requests > 0:
+            self.avg_tokens_per_request = self.total_tokens / self.successful_requests
+        # Time-based metrics
+        period_hours = (self.period_end - self.period_start).total_seconds() / 3600
+        if period_hours > 0:
+            self.requests_per_hour = self.total_requests / period_hours
+            self.tokens_per_hour = self.total_tokens / period_hours
+        # Throughput
+        if self.avg_response_time_ms > 0:
+            self.throughput_tokens_per_second = (self.avg_tokens_per_request * 1000) / self.avg_response_time_ms
+    @property
+    def success_rate(self) -> float:
+        """Calculate success rate percentage"""
+        return 100.0 - self.error_rate
+    @property
+    def period_duration_hours(self) -> float:
+        """Get period duration in hours"""
+        return (self.period_end - self.period_start).total_seconds() / 3600
+    @property
+    def efficiency_score(self) -> float:
+        """Calculate efficiency score (0-100) based on performance metrics"""
+        score = 100.0
+        # Penalty for high error rates
+        score -= self.error_rate
+        # Penalty for high timeout rates
+        score -= self.timeout_rate * 2  # Timeouts are worse than regular errors
+        # Bonus for cache hits
+        score += self.cache_hit_rate * 0.1
+        # Penalty for slow responses (relative to service type)
+        if self.avg_response_time_ms > 5000:  # 5+ seconds
+            score -= 20
+        elif self.avg_response_time_ms > 2000:  # 2+ seconds
+            score -= 10
+        elif self.avg_response_time_ms > 1000:  # 1+ seconds
+            score -= 5
+        return max(0.0, min(100.0, score))
+    @property
+    def performance_tier(self) -> str:
+        """Get performance tier classification"""
+        efficiency = self.efficiency_score
+        if efficiency >= 90:
+            return "excellent"
+        elif efficiency >= 75:
+            return "good"
+        elif efficiency >= 60:
+            return "average"
+        elif efficiency >= 40:
+            return "poor"
+        else:
+            return "critical"
+    def add_request_data(self, request: InferenceRequest):
+        """Add data from an individual request to the statistics"""
+        self.total_requests += 1
+        if request.was_successful:
+            self.successful_requests += 1
+            if request.total_tokens:
+                self.total_tokens += request.total_tokens
+            if request.input_tokens:
+                self.input_tokens += request.input_tokens
+            if request.output_tokens:
+                self.output_tokens += request.output_tokens
+            if request.cost_usd:
+                self.total_cost_usd += request.cost_usd
+        elif request.status == InferenceStatus.FAILED:
+            self.failed_requests += 1
+        elif request.status == InferenceStatus.TIMEOUT:
+            self.timeout_requests += 1
+        if request.retry_count > 0:
+            self.retry_requests += 1
+        if request.cache_hit:
+            self.cache_hits += 1
+        # Recalculate derived metrics
+        self._calculate_derived_metrics()
+    def merge_with(self, other: 'UsageStatistics') -> 'UsageStatistics':
+        """Merge this statistics with another to create combined stats"""
+        # This would implement proper statistical aggregation
+        # For now, just sum the counts and recalculate
+        merged = UsageStatistics(
+            stat_id=f"merged_{self.stat_id}_{other.stat_id}",
+            period_start=min(self.period_start, other.period_start),
+            period_end=max(self.period_end, other.period_end),
+            service_type="combined" if self.service_type != other.service_type else self.service_type,
+            total_requests=self.total_requests + other.total_requests,
+            successful_requests=self.successful_requests + other.successful_requests,
+            failed_requests=self.failed_requests + other.failed_requests,
+            timeout_requests=self.timeout_requests + other.timeout_requests,
+            retry_requests=self.retry_requests + other.retry_requests,
+            cache_hits=self.cache_hits + other.cache_hits,
+            total_tokens=self.total_tokens + other.total_tokens,
+            input_tokens=self.input_tokens + other.input_tokens,
+            output_tokens=self.output_tokens + other.output_tokens,
+            total_cost_usd=self.total_cost_usd + other.total_cost_usd
+        )
+        # Calculate weighted averages for timing metrics
+        if merged.total_requests > 0:
+            weight_self = self.total_requests / merged.total_requests
+            weight_other = other.total_requests / merged.total_requests
+            merged.avg_response_time_ms = (self.avg_response_time_ms * weight_self +
+                                         other.avg_response_time_ms * weight_other)
+            merged.avg_queue_time_ms = (self.avg_queue_time_ms * weight_self +
+                                      other.avg_queue_time_ms * weight_other)
+        return merged
+@dataclass
+class ModelUsageSnapshot:
+    """
+    Point-in-time usage snapshot for quick analytics
+    Provides a snapshot view of model usage at different time granularities
+    for real-time monitoring and dashboard displays.
+    """
+    snapshot_id: str
+    model_id: str
+    provider: str
+    snapshot_time: datetime
+    hourly_requests: int = 0
+    daily_requests: int = 0
+    weekly_requests: int = 0
+    monthly_requests: int = 0
+    total_tokens_hour: int = 0
+    total_tokens_day: int = 0
+    total_tokens_week: int = 0
+    total_tokens_month: int = 0
+    total_cost_hour: float = 0.0
+    total_cost_day: float = 0.0
+    total_cost_week: float = 0.0
+    total_cost_month: float = 0.0
+    avg_response_time_hour: float = 0.0
+    avg_response_time_day: float = 0.0
+    success_rate_hour: float = 100.0
+    success_rate_day: float = 100.0
+    cache_hit_rate_hour: float = 0.0
+    cache_hit_rate_day: float = 0.0
+    unique_users_hour: int = 0
+    unique_users_day: int = 0
+    peak_requests_per_minute: int = 0
+    current_queue_size: int = 0
+    last_used: Optional[datetime] = None
+    health_status: str = "healthy"  # healthy, degraded, critical, offline
+    def __post_init__(self):
+        if self.snapshot_time is None:
+            self.snapshot_time = datetime.now(timezone.utc)
+    @property
+    def is_active(self) -> bool:
+        """Check if model has been used recently"""
+        if not self.last_used:
+            return False
+        time_since_use = datetime.now(timezone.utc) - self.last_used
+        return time_since_use.total_seconds() < 3600  # Active if used in last hour
+    @property
+    def utilization_trend(self) -> str:
+        """Analyze utilization trend"""
+        if self.weekly_requests == 0:
+            return "unused"
+        daily_avg = self.weekly_requests / 7
+        hourly_avg = self.daily_requests / 24
+        if self.hourly_requests > hourly_avg * 2:
+            return "surge"
+        elif self.hourly_requests > hourly_avg * 1.5:
+            return "high"
+        elif self.hourly_requests > hourly_avg * 0.8:
+            return "normal"
+        elif self.hourly_requests > hourly_avg * 0.3:
+            return "low"
+        else:
+            return "minimal"
+    @property
+    def cost_trend(self) -> str:
+        """Analyze cost trend"""
+        if self.total_cost_week == 0:
+            return "no_cost"
+        daily_avg = self.total_cost_week / 7
+        hourly_avg = self.total_cost_day / 24
+        if self.total_cost_hour > hourly_avg * 3:
+            return "expensive_spike"
+        elif self.total_cost_hour > hourly_avg * 1.5:
+            return "above_average"
+        elif self.total_cost_hour > hourly_avg * 0.8:
+            return "normal"
+        else:
+            return "below_average"
+    @property
+    def efficiency_metrics(self) -> Dict[str, float]:
+        """Get efficiency metrics"""
+        return {
+            "requests_per_dollar_hour": self.hourly_requests / max(self.total_cost_hour, 0.01),
+            "tokens_per_dollar_hour": self.total_tokens_hour / max(self.total_cost_hour, 0.01),
+            "requests_per_dollar_day": self.daily_requests / max(self.total_cost_day, 0.01),
+            "tokens_per_dollar_day": self.total_tokens_day / max(self.total_cost_day, 0.01),
+            "avg_cost_per_request_hour": self.total_cost_hour / max(self.hourly_requests, 1),
+            "avg_cost_per_request_day": self.total_cost_day / max(self.daily_requests, 1)
+        }
+    @property
+    def performance_score(self) -> float:
+        """Calculate overall performance score (0-100)"""
+        score = 100.0
+        # Response time penalty
+        if self.avg_response_time_day > 5000:
+            score -= 30
+        elif self.avg_response_time_day > 2000:
+            score -= 15
+        elif self.avg_response_time_day > 1000:
+            score -= 5
+        # Success rate bonus/penalty
+        score = score * (self.success_rate_day / 100)
+        # Cache hit bonus
+        score += self.cache_hit_rate_day * 0.1
+        # Health status penalty
+        if self.health_status == "critical":
+            score *= 0.5
+        elif self.health_status == "degraded":
+            score *= 0.8
+        elif self.health_status == "offline":
+            score = 0
+        return max(0.0, min(100.0, score))
+    def update_health_status(self, new_status: str):
+        """Update health status"""
+        self.health_status = new_status
+        self.snapshot_time = datetime.now(timezone.utc)
+    def record_usage(self, requests: int = 1, tokens: int = 0, cost: float = 0.0,
+                    response_time_ms: float = 0.0, success: bool = True, cache_hit: bool = False):
+        """Record usage activity"""
+        self.hourly_requests += requests
+        self.daily_requests += requests
+        self.weekly_requests += requests
+        self.monthly_requests += requests
+        self.total_tokens_hour += tokens
+        self.total_tokens_day += tokens
+        self.total_tokens_week += tokens
+        self.total_tokens_month += tokens
+        self.total_cost_hour += cost
+        self.total_cost_day += cost
+        self.total_cost_week += cost
+        self.total_cost_month += cost
+        # Update averages (simplified - would use proper moving averages in production)
+        if self.hourly_requests > 0:
+            self.avg_response_time_hour = ((self.avg_response_time_hour * (self.hourly_requests - requests)) +
+                                         (response_time_ms * requests)) / self.hourly_requests
+        if self.daily_requests > 0:
+            self.avg_response_time_day = ((self.avg_response_time_day * (self.daily_requests - requests)) +
+                                        (response_time_ms * requests)) / self.daily_requests
+        self.last_used = datetime.now(timezone.utc)
+# Utility functions for working with inference models
+def create_inference_request(
+    service_type: str,
+    model_id: str,
+    provider: str,
+    endpoint: str,
+    request_data: Dict[str, Any],
+    user_id: Optional[str] = None,
+    **kwargs
+) -> InferenceRequest:
+    """Factory function to create a new inference request"""
+    import uuid
+    request_id = f"inf_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
+    return InferenceRequest(
+        request_id=request_id,
+        service_type=service_type,
+        model_id=model_id,
+        provider=provider,
+        endpoint=endpoint,
+        request_data=request_data,
+        user_id=user_id,
+        **kwargs
+    )
+def create_usage_statistics(
+    period_start: datetime,
+    period_end: datetime,
+    service_type: str,
+    model_id: Optional[str] = None,
+    provider: Optional[str] = None,
+    user_id: Optional[str] = None
+) -> UsageStatistics:
+    """Factory function to create usage statistics"""
+    import uuid
+    stat_id = f"stat_{period_start.strftime('%Y%m%d_%H')}_{uuid.uuid4().hex[:6]}"
+    return UsageStatistics(
+        stat_id=stat_id,
+        period_start=period_start,
+        period_end=period_end,
+        service_type=service_type,
+        model_id=model_id,
+        provider=provider,
+        user_id=user_id
+    )
+def create_model_snapshot(
+    model_id: str,
+    provider: str
+) -> ModelUsageSnapshot:
+    """Factory function to create model usage snapshot"""
+    snapshot_id = f"snap_{model_id}_{provider}_{datetime.now().strftime('%Y%m%d_%H')}"
+    return ModelUsageSnapshot(
+        snapshot_id=snapshot_id,
+        model_id=model_id,
+        provider=provider
+    )
+def calculate_usage_summary(requests: List[InferenceRequest]) -> Dict[str, Any]:
+    """Calculate usage summary from list of requests"""
+    if not requests:
+        return {"total_requests": 0}
+    total_requests = len(requests)
+    successful = sum(1 for r in requests if r.was_successful)
+    failed = sum(1 for r in requests if r.status == InferenceStatus.FAILED)
+    timeouts = sum(1 for r in requests if r.status == InferenceStatus.TIMEOUT)
+    total_cost = sum(r.cost_usd or 0 for r in requests)
+    total_tokens = sum(r.total_tokens or 0 for r in requests)
+    execution_times = [r.execution_time_ms for r in requests if r.execution_time_ms]
+    avg_execution_time = sum(execution_times) / len(execution_times) if execution_times else 0
+    return {
+        "total_requests": total_requests,
+        "successful_requests": successful,
+        "failed_requests": failed,
+        "timeout_requests": timeouts,
+        "success_rate": (successful / total_requests) * 100 if total_requests > 0 else 0,
+        "total_cost_usd": round(total_cost, 4),
+        "total_tokens": total_tokens,
+        "avg_execution_time_ms": round(avg_execution_time, 2),
+        "cost_per_request": round(total_cost / total_requests, 6) if total_requests > 0 else 0,
+        "cost_per_token": round(total_cost / total_tokens, 8) if total_tokens > 0 else 0
+    }

isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

isa-model 0.3.91py3-none-any.whl → 0.4.3py3-none-any.whl