PyPI - isa-model - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

isa_model/client.py +466 -43
isa_model/core/cache/redis_cache.py +12 -3
isa_model/core/config/config_manager.py +230 -3
isa_model/core/config.py +90 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +21 -1
isa_model/core/database/supabase_client.py +154 -19
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +27 -18
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_manager.py +35 -80
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +174 -18
isa_model/core/models/system_models.py +857 -0
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/services/intelligent_model_selector.py +399 -21
isa_model/core/types.py +1 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -370
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +137 -10
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/openai_stt_service.py +22 -6
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +335 -24
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/ollama_llm_service.py +9 -2
isa_model/inference/services/llm/openai_llm_service.py +33 -16
isa_model/inference/services/llm/yyds_llm_service.py +8 -2
isa_model/inference/services/vision/__init__.py +22 -1
isa_model/inference/services/vision/helpers/image_utils.py +8 -5
isa_model/inference/services/vision/isa_vision_service.py +65 -4
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +172 -22
isa_model/serving/api/middleware/auth.py +8 -2
isa_model/serving/api/middleware/security.py +23 -33
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +4 -1
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +138 -2
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +680 -18
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +68 -54
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
isa_model-0.4.4.dist-info/RECORD +180 -0
isa_model/core/security/secrets.py +0 -358
isa_model/core/storage/hf_storage.py +0 -419
isa_model/core/storage/minio_storage.py +0 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks/__init__.py +0 -27
isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
isa_model/eval/benchmarks.py +0 -701
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -24
isa_model/eval/evaluators/audio_evaluator.py +0 -727
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/embedding_evaluator.py +0 -742
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/evaluators/vision_evaluator.py +0 -564
isa_model/eval/example_evaluation.py +0 -395
isa_model/eval/factory.py +0 -798
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/isa_benchmarks.py +0 -700
isa_model/eval/isa_integration.py +0 -582
isa_model/eval/metrics.py +0 -951
isa_model/eval/tests/unit/test_basic.py +0 -396
isa_model/serving/api/routes/evaluations.py +0 -579
isa_model/training/__init__.py +0 -168
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -26
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/examples/intelligent_training_example.py +0 -281
isa_model/training/factory.py +0 -424
isa_model/training/intelligent/__init__.py +0 -25
isa_model/training/intelligent/decision_engine.py +0 -643
isa_model/training/intelligent/intelligent_factory.py +0 -888
isa_model/training/intelligent/knowledge_base.py +0 -751
isa_model/training/intelligent/resource_optimizer.py +0 -839
isa_model/training/intelligent/task_classifier.py +0 -576
isa_model/training/storage/__init__.py +0 -24
isa_model/training/storage/core_integration.py +0 -439
isa_model/training/storage/training_repository.py +0 -552
isa_model/training/storage/training_storage.py +0 -628
isa_model-0.4.0.dist-info/RECORD +0 -182
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0

isa_model/core/models/deployment_billing_tracker.py ADDED Viewed

@@ -0,0 +1,430 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Deployment Billing Tracker - Specialized billing for deployment and training operations
+Extends the core ModelBillingTracker with deployment-specific metrics:
+- GPU runtime hours
+- Instance type costs
+- Training epochs/steps billing
+- Deployment lifecycle costs
+"""
+from typing import Dict, List, Optional, Any, Union
+from datetime import datetime, timezone, timedelta
+from dataclasses import dataclass, asdict
+import json
+import logging
+from enum import Enum
+from .model_billing_tracker import ModelBillingTracker, ModelUsageRecord, ModelOperationType
+logger = logging.getLogger(__name__)
+class DeploymentProvider(Enum):
+    """Deployment providers"""
+    MODAL = "modal"
+    TRITON_LOCAL = "triton_local"
+    TRITON_CLOUD = "triton_cloud"
+    RUNPOD = "runpod"
+    LAMBDA_LABS = "lambda_labs"
+    COREWEAVE = "coreweave"
+class GPUType(Enum):
+    """GPU types for cost calculation"""
+    RTX_4090 = "rtx_4090"
+    RTX_A6000 = "rtx_a6000"
+    A100_40GB = "a100_40gb"
+    A100_80GB = "a100_80gb"
+    H100 = "h100"
+    T4 = "t4"
+    V100 = "v100"
+@dataclass
+class DeploymentUsageRecord(ModelUsageRecord):
+    """Extended usage record for deployment operations"""
+    # GPU/Infrastructure metrics
+    gpu_type: Optional[str] = None
+    gpu_count: Optional[int] = None
+    runtime_hours: Optional[float] = None
+    cpu_cores: Optional[int] = None
+    memory_gb: Optional[int] = None
+    # Training-specific metrics
+    training_epochs: Optional[int] = None
+    training_steps: Optional[int] = None
+    dataset_size: Optional[int] = None
+    # Deployment-specific metrics
+    deployment_duration_hours: Optional[float] = None
+    requests_served: Optional[int] = None
+    avg_latency_ms: Optional[float] = None
+    # Infrastructure costs
+    compute_cost_usd: Optional[float] = None
+    storage_cost_usd: Optional[float] = None
+    network_cost_usd: Optional[float] = None
+class DeploymentBillingTracker(ModelBillingTracker):
+    """
+    Specialized billing tracker for deployment and training operations
+    Extends ModelBillingTracker with deployment-specific cost calculations
+    and metrics tracking for GPU-based operations.
+    """
+    def __init__(self, model_registry=None, storage_path: Optional[str] = None):
+        super().__init__(model_registry, storage_path)
+        # Load pricing data for deployment providers
+        self.pricing_data = self._load_deployment_pricing()
+    def _load_deployment_pricing(self) -> Dict[str, Dict[str, float]]:
+        """Load pricing data for different deployment providers and GPU types"""
+        return {
+            "modal": {
+                "t4": 0.50,      # $/hour
+                "rtx_4090": 0.80,
+                "a100_40gb": 2.50,
+                "a100_80gb": 4.00,
+                "h100": 8.00,
+                "base_compute": 0.10  # $/hour base compute
+            },
+            "triton_local": {
+                "electricity": 0.12,  # $/kWh
+                "gpu_tdp": {
+                    "rtx_4090": 450,   # Watts
+                    "a100_40gb": 400,
+                    "a100_80gb": 400,
+                    "h100": 700
+                }
+            },
+            "runpod": {
+                "rtx_4090": 0.44,
+                "rtx_a6000": 0.79,
+                "a100_40gb": 1.69,
+                "a100_80gb": 2.89,
+                "h100": 4.89
+            },
+            "lambda_labs": {
+                "rtx_4090": 0.50,
+                "a100_40gb": 1.50,
+                "a100_80gb": 2.50,
+                "h100": 4.50
+            },
+            "coreweave": {
+                "rtx_4090": 0.57,
+                "a100_40gb": 2.06,
+                "a100_80gb": 2.23,
+                "h100": 4.76
+            }
+        }
+    def track_deployment_usage(
+        self,
+        model_id: str,
+        provider: Union[str, DeploymentProvider],
+        operation_type: Union[str, ModelOperationType],
+        service_type: str,
+        operation: str,
+        # GPU/Infrastructure metrics
+        gpu_type: Optional[Union[str, GPUType]] = None,
+        gpu_count: Optional[int] = None,
+        runtime_hours: Optional[float] = None,
+        cpu_cores: Optional[int] = None,
+        memory_gb: Optional[int] = None,
+        # Training-specific
+        training_epochs: Optional[int] = None,
+        training_steps: Optional[int] = None,
+        dataset_size: Optional[int] = None,
+        # Deployment-specific
+        deployment_duration_hours: Optional[float] = None,
+        requests_served: Optional[int] = None,
+        avg_latency_ms: Optional[float] = None,
+        # Standard billing
+        input_tokens: Optional[int] = None,
+        output_tokens: Optional[int] = None,
+        cost_usd: Optional[float] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> DeploymentUsageRecord:
+        """
+        Track deployment/training usage with specialized metrics
+        Args:
+            model_id: Model identifier
+            provider: Deployment provider
+            operation_type: Type of operation (training, deployment, inference)
+            service_type: Service type (llm, vision, etc.)
+            operation: Specific operation
+            gpu_type: Type of GPU used
+            gpu_count: Number of GPUs
+            runtime_hours: Hours of runtime
+            training_epochs: Number of training epochs
+            deployment_duration_hours: Hours deployment was active
+            ... (other parameters as documented)
+        Returns:
+            DeploymentUsageRecord with calculated costs
+        """
+        # Convert enums to strings
+        if isinstance(provider, DeploymentProvider):
+            provider = provider.value
+        if isinstance(operation_type, ModelOperationType):
+            operation_type = operation_type.value
+        if isinstance(gpu_type, GPUType):
+            gpu_type = gpu_type.value
+        # Calculate deployment-specific costs
+        if cost_usd is None:
+            cost_breakdown = self._calculate_deployment_cost(
+                provider, gpu_type, gpu_count, runtime_hours,
+                deployment_duration_hours, training_epochs, training_steps
+            )
+            cost_usd = cost_breakdown["total_cost"]
+            compute_cost = cost_breakdown["compute_cost"]
+            storage_cost = cost_breakdown["storage_cost"]
+            network_cost = cost_breakdown["network_cost"]
+        else:
+            compute_cost = cost_usd  # If provided, assume it's compute cost
+            storage_cost = 0.0
+            network_cost = 0.0
+        # Create deployment usage record
+        record = DeploymentUsageRecord(
+            timestamp=datetime.now(timezone.utc).isoformat(),
+            model_id=model_id,
+            operation_type=operation_type,
+            provider=provider,
+            service_type=service_type,
+            operation=operation,
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            total_tokens=(input_tokens or 0) + (output_tokens or 0) if input_tokens or output_tokens else None,
+            cost_usd=cost_usd,
+            metadata=metadata or {},
+            # Deployment-specific fields
+            gpu_type=gpu_type,
+            gpu_count=gpu_count,
+            runtime_hours=runtime_hours,
+            cpu_cores=cpu_cores,
+            memory_gb=memory_gb,
+            training_epochs=training_epochs,
+            training_steps=training_steps,
+            dataset_size=dataset_size,
+            deployment_duration_hours=deployment_duration_hours,
+            requests_served=requests_served,
+            avg_latency_ms=avg_latency_ms,
+            compute_cost_usd=compute_cost,
+            storage_cost_usd=storage_cost,
+            network_cost_usd=network_cost
+        )
+        # Add to records and save
+        self.usage_records.append(record)
+        self._save_data()
+        logger.info(f"Tracked deployment usage: {model_id} - {provider} - {gpu_type} - ${cost_usd:.4f}")
+        return record
+    def _calculate_deployment_cost(
+        self,
+        provider: str,
+        gpu_type: Optional[str],
+        gpu_count: Optional[int],
+        runtime_hours: Optional[float],
+        deployment_duration_hours: Optional[float],
+        training_epochs: Optional[int],
+        training_steps: Optional[int]
+    ) -> Dict[str, float]:
+        """Calculate deployment costs based on provider and usage"""
+        gpu_count = gpu_count or 1
+        runtime_hours = runtime_hours or deployment_duration_hours or 1.0
+        compute_cost = 0.0
+        storage_cost = 0.0
+        network_cost = 0.0
+        try:
+            if provider in self.pricing_data:
+                pricing = self.pricing_data[provider]
+                if provider == "modal":
+                    # Modal pricing: per-GPU hourly rate
+                    if gpu_type and gpu_type in pricing:
+                        compute_cost = pricing[gpu_type] * gpu_count * runtime_hours
+                    else:
+                        compute_cost = pricing.get("base_compute", 0.10) * runtime_hours
+                elif provider == "triton_local":
+                    # Local deployment: electricity costs
+                    if gpu_type and gpu_type in pricing["gpu_tdp"]:
+                        power_watts = pricing["gpu_tdp"][gpu_type] * gpu_count
+                        kwh_used = (power_watts / 1000) * runtime_hours
+                        compute_cost = kwh_used * pricing["electricity"]
+                elif provider in ["runpod", "lambda_labs", "coreweave"]:
+                    # Cloud GPU providers: per-GPU hourly rates
+                    if gpu_type and gpu_type in pricing:
+                        compute_cost = pricing[gpu_type] * gpu_count * runtime_hours
+                # Add storage costs (simplified)
+                storage_cost = runtime_hours * 0.01  # $0.01/hour for storage
+                # Add network costs for training (data transfer)
+                if training_epochs and training_epochs > 0:
+                    network_cost = training_epochs * 0.05  # $0.05 per epoch for data
+        except Exception as e:
+            logger.error(f"Error calculating deployment cost: {e}")
+            compute_cost = 0.0
+        total_cost = compute_cost + storage_cost + network_cost
+        return {
+            "total_cost": round(total_cost, 6),
+            "compute_cost": round(compute_cost, 6),
+            "storage_cost": round(storage_cost, 6),
+            "network_cost": round(network_cost, 6)
+        }
+    def estimate_deployment_cost(
+        self,
+        provider: str,
+        gpu_type: str,
+        gpu_count: int = 1,
+        estimated_hours: float = 1.0,
+        operation_type: str = "deployment"
+    ) -> Dict[str, float]:
+        """
+        Estimate deployment costs before starting deployment
+        Args:
+            provider: Deployment provider
+            gpu_type: GPU type to use
+            gpu_count: Number of GPUs
+            estimated_hours: Estimated runtime hours
+            operation_type: Type of operation
+        Returns:
+            Cost breakdown dictionary
+        """
+        return self._calculate_deployment_cost(
+            provider, gpu_type, gpu_count, estimated_hours,
+            estimated_hours, None, None
+        )
+    def get_deployment_summary(
+        self,
+        start_date: Optional[datetime] = None,
+        end_date: Optional[datetime] = None,
+        provider: Optional[str] = None,
+        gpu_type: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Get deployment cost summary with filters"""
+        # Filter records
+        filtered_records = []
+        for record in self.usage_records:
+            # Check if it's a deployment record
+            if not isinstance(record, DeploymentUsageRecord):
+                continue
+            # Apply filters
+            if start_date and datetime.fromisoformat(record.timestamp.replace('Z', '+00:00')) < start_date:
+                continue
+            if end_date and datetime.fromisoformat(record.timestamp.replace('Z', '+00:00')) > end_date:
+                continue
+            if provider and record.provider != provider:
+                continue
+            if gpu_type and record.gpu_type != gpu_type:
+                continue
+            filtered_records.append(record)
+        if not filtered_records:
+            return {
+                "total_cost": 0.0,
+                "total_gpu_hours": 0.0,
+                "deployments": 0,
+                "by_provider": {},
+                "by_gpu_type": {},
+                "by_operation": {}
+            }
+        # Calculate summary
+        total_cost = sum(record.cost_usd or 0 for record in filtered_records)
+        total_gpu_hours = sum((record.runtime_hours or 0) * (record.gpu_count or 1) for record in filtered_records)
+        total_deployments = len(filtered_records)
+        # Group by provider
+        by_provider = {}
+        for record in filtered_records:
+            if record.provider not in by_provider:
+                by_provider[record.provider] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
+            by_provider[record.provider]["cost"] += record.cost_usd or 0
+            by_provider[record.provider]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
+            by_provider[record.provider]["count"] += 1
+        # Group by GPU type
+        by_gpu_type = {}
+        for record in filtered_records:
+            gpu = record.gpu_type or "unknown"
+            if gpu not in by_gpu_type:
+                by_gpu_type[gpu] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
+            by_gpu_type[gpu]["cost"] += record.cost_usd or 0
+            by_gpu_type[gpu]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
+            by_gpu_type[gpu]["count"] += 1
+        # Group by operation
+        by_operation = {}
+        for record in filtered_records:
+            op = record.operation_type
+            if op not in by_operation:
+                by_operation[op] = {"cost": 0.0, "gpu_hours": 0.0, "count": 0}
+            by_operation[op]["cost"] += record.cost_usd or 0
+            by_operation[op]["gpu_hours"] += (record.runtime_hours or 0) * (record.gpu_count or 1)
+            by_operation[op]["count"] += 1
+        return {
+            "total_cost": round(total_cost, 6),
+            "total_gpu_hours": round(total_gpu_hours, 2),
+            "deployments": total_deployments,
+            "avg_cost_per_deployment": round(total_cost / total_deployments, 6) if total_deployments > 0 else 0,
+            "avg_cost_per_gpu_hour": round(total_cost / total_gpu_hours, 6) if total_gpu_hours > 0 else 0,
+            "by_provider": by_provider,
+            "by_gpu_type": by_gpu_type,
+            "by_operation": by_operation,
+            "period": {
+                "start": filtered_records[0].timestamp if filtered_records else None,
+                "end": filtered_records[-1].timestamp if filtered_records else None
+            }
+        }
+# Global deployment billing tracker instance
+_global_deployment_tracker: Optional[DeploymentBillingTracker] = None
+def get_deployment_billing_tracker() -> DeploymentBillingTracker:
+    """Get the global deployment billing tracker instance"""
+    global _global_deployment_tracker
+    if _global_deployment_tracker is None:
+        try:
+            from .model_repo import ModelRegistry
+            registry = ModelRegistry()
+            _global_deployment_tracker = DeploymentBillingTracker(model_registry=registry)
+        except Exception:
+            _global_deployment_tracker = DeploymentBillingTracker()
+    return _global_deployment_tracker
+def track_deployment_usage(**kwargs) -> DeploymentUsageRecord:
+    """Convenience function to track deployment usage"""
+    return get_deployment_billing_tracker().track_deployment_usage(**kwargs)
+def estimate_deployment_cost(**kwargs) -> Dict[str, float]:
+    """Convenience function to estimate deployment cost"""
+    return get_deployment_billing_tracker().estimate_deployment_cost(**kwargs)

isa_model/core/models/model_manager.py CHANGED Viewed

@@ -2,8 +2,6 @@ from typing import Dict, Optional, List, Any
 import logging
 from pathlib import Path
 from datetime import datetime
-from huggingface_hub import hf_hub_download, snapshot_download
-from huggingface_hub.errors import HfHubHTTPError
 from .model_storage import ModelStorage, LocalModelStorage
 from .model_repo import ModelRegistry, ModelType, ModelCapability
 from .model_billing_tracker import ModelBillingTracker, ModelOperationType
@@ -37,20 +35,43 @@ class ModelManager:
         self.config_manager = config_manager or ConfigManager()
     def get_model_pricing(self, provider: str, model_name: str) -> Dict[str, float]:
-        """获取模型定价信息"""
+        """获取模型定价信息（从数据库）"""
         try:
-            models = self.config_manager.get_models_by_provider(provider)
-            for model in models:
-                if model.get("model_id") == model_name:
-                    metadata = model.get("metadata", {})
-                    if "cost_per_1000_tokens" in metadata:
-                        return {"input": metadata["cost_per_1000_tokens"], "output": metadata["cost_per_1000_tokens"]}
-                    elif "cost_per_minute" in metadata:
-                        return {"input": metadata["cost_per_minute"], "output": 0.0}
-                    elif "cost_per_1000_chars" in metadata:
-                        return {"input": metadata["cost_per_1000_chars"], "output": 0.0}
+            if not self.registry or not hasattr(self.registry, 'supabase_client'):
+                logger.warning("No database connection for pricing lookup")
+                return {"input": 0.0, "output": 0.0}
+            # 查询统一定价表
+            result = self.registry.supabase_client.table('current_model_pricing').select('*').eq(
+                'model_id', model_name
+            ).eq('provider', provider).execute()
+            if result.data and len(result.data) > 0:
+                pricing = result.data[0]
+                # 根据定价模型转换为统一格式
+                pricing_model = pricing.get('pricing_model')
+                unit_size = pricing.get('unit_size', 1)
+                if pricing_model == 'per_token':
+                    # 转换为每个 token 的成本
+                    input_cost = float(pricing.get('input_cost_per_unit', 0)) * unit_size
+                    output_cost = float(pricing.get('output_cost_per_unit', 0)) * unit_size
+                elif pricing_model in ['per_character', 'per_minute', 'per_request']:
+                    # 这些按原始单位计费
+                    input_cost = float(pricing.get('input_cost_per_unit', 0))
+                    output_cost = float(pricing.get('output_cost_per_unit', 0))
+                    # 如果有基础请求费用，加到 input 成本中
+                    if pricing.get('base_cost_per_request', 0) > 0:
+                        input_cost += float(pricing.get('base_cost_per_request', 0))
+                else:
+                    input_cost = output_cost = 0.0
+                return {"input": input_cost, "output": output_cost}
         except Exception as e:
             logger.warning(f"Failed to get pricing for {provider}/{model_name}: {e}")
         return {"input": 0.0, "output": 0.0}
     def calculate_cost(self, provider: str, model_name: str, input_tokens: int, output_tokens: int) -> float:
@@ -79,73 +100,7 @@ class ModelManager:
             logger.warning(f"Failed to find cheapest model for {provider}: {e}")
             return None
-    async def get_model(self,
-                       model_id: str,
-                       repo_id: str,
-                       model_type: ModelType,
-                       capabilities: List[ModelCapability],
-                       revision: Optional[str] = None,
-                       force_download: bool = False) -> Optional[Path]:
-        """
-        Get model files, downloading if necessary
-        Args:
-            model_id: Unique identifier for the model
-            repo_id: Hugging Face repository ID
-            model_type: Type of model (LLM, embedding, etc.)
-            capabilities: List of model capabilities
-            revision: Specific model version/tag
-            force_download: Force re-download even if cached
-        Returns:
-            Path to the model files or None if failed
-        """
-        # Check if model is already downloaded
-        if not force_download:
-            model_path = await self.storage.load_model(model_id)
-            if model_path:
-                logger.info(f"Using cached model {model_id}")
-                return model_path
-        try:
-            # Download model files
-            logger.info(f"Downloading model {model_id} from {repo_id}")
-            model_dir = Path(f"./models/temp/{model_id}")
-            model_dir.mkdir(parents=True, exist_ok=True)
-            snapshot_download(
-                repo_id=repo_id,
-                revision=revision,
-                local_dir=model_dir,
-                local_dir_use_symlinks=False
-            )
-            # Save model and metadata
-            metadata = {
-                "repo_id": repo_id,
-                "revision": revision,
-                "downloaded_at": str(Path(model_dir).stat().st_mtime)
-            }
-            # Register model
-            self.registry.register_model(
-                model_id=model_id,
-                model_type=model_type,
-                capabilities=capabilities,
-                metadata=metadata
-            )
-            # Save model files
-            await self.storage.save_model(model_id, str(model_dir), metadata)
-            return await self.storage.load_model(model_id)
-        except HfHubHTTPError as e:
-            logger.error(f"Failed to download model {model_id}: {e}")
-            return None
-        except Exception as e:
-            logger.error(f"Unexpected error downloading model {model_id}: {e}")
-            return None
+    # Local model download functionality removed - use cloud API services only
     async def list_models(self) -> List[Dict[str, Any]]:
         """List all downloaded models with their metadata"""

isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl