PyPI - isa-model - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

isa_model/client.py +466 -43
isa_model/core/cache/redis_cache.py +12 -3
isa_model/core/config/config_manager.py +230 -3
isa_model/core/config.py +90 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +21 -1
isa_model/core/database/supabase_client.py +154 -19
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +27 -18
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_manager.py +35 -80
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +174 -18
isa_model/core/models/system_models.py +857 -0
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/services/intelligent_model_selector.py +399 -21
isa_model/core/types.py +1 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -370
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +137 -10
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/openai_stt_service.py +22 -6
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +335 -24
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/ollama_llm_service.py +9 -2
isa_model/inference/services/llm/openai_llm_service.py +33 -16
isa_model/inference/services/llm/yyds_llm_service.py +8 -2
isa_model/inference/services/vision/__init__.py +22 -1
isa_model/inference/services/vision/helpers/image_utils.py +8 -5
isa_model/inference/services/vision/isa_vision_service.py +65 -4
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +172 -22
isa_model/serving/api/middleware/auth.py +8 -2
isa_model/serving/api/middleware/security.py +23 -33
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +4 -1
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +138 -2
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +680 -18
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +68 -54
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
isa_model-0.4.4.dist-info/RECORD +180 -0
isa_model/core/security/secrets.py +0 -358
isa_model/core/storage/hf_storage.py +0 -419
isa_model/core/storage/minio_storage.py +0 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks/__init__.py +0 -27
isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
isa_model/eval/benchmarks.py +0 -701
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -24
isa_model/eval/evaluators/audio_evaluator.py +0 -727
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/embedding_evaluator.py +0 -742
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/evaluators/vision_evaluator.py +0 -564
isa_model/eval/example_evaluation.py +0 -395
isa_model/eval/factory.py +0 -798
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/isa_benchmarks.py +0 -700
isa_model/eval/isa_integration.py +0 -582
isa_model/eval/metrics.py +0 -951
isa_model/eval/tests/unit/test_basic.py +0 -396
isa_model/serving/api/routes/evaluations.py +0 -579
isa_model/training/__init__.py +0 -168
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -26
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/examples/intelligent_training_example.py +0 -281
isa_model/training/factory.py +0 -424
isa_model/training/intelligent/__init__.py +0 -25
isa_model/training/intelligent/decision_engine.py +0 -643
isa_model/training/intelligent/intelligent_factory.py +0 -888
isa_model/training/intelligent/knowledge_base.py +0 -751
isa_model/training/intelligent/resource_optimizer.py +0 -839
isa_model/training/intelligent/task_classifier.py +0 -576
isa_model/training/storage/__init__.py +0 -24
isa_model/training/storage/core_integration.py +0 -439
isa_model/training/storage/training_repository.py +0 -552
isa_model/training/storage/training_storage.py +0 -628
isa_model-0.4.0.dist-info/RECORD +0 -182
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0

isa_model/serving/api/routes/deployment_billing.py ADDED Viewed

@@ -0,0 +1,315 @@
+"""
+Deployment Billing API Routes
+API endpoints for deployment cost estimation, tracking, and billing information.
+"""
+from fastapi import APIRouter, HTTPException, Query, Depends
+from typing import Dict, Any, Optional, List
+from datetime import datetime, timedelta
+import logging
+from pydantic import BaseModel
+from ..auth import optional_auth
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/deployment", tags=["deployment-billing"])
+class CostEstimationRequest(BaseModel):
+    """Request model for deployment cost estimation"""
+    provider: str
+    gpu_type: str
+    gpu_count: int = 1
+    estimated_hours: float = 1.0
+    operation_type: str = "deployment"
+class DeploymentBillingQuery(BaseModel):
+    """Query parameters for deployment billing"""
+    start_date: Optional[str] = None
+    end_date: Optional[str] = None
+    provider: Optional[str] = None
+    gpu_type: Optional[str] = None
+    model_id: Optional[str] = None
+@router.post("/estimate-cost")
+async def estimate_deployment_cost(
+    request: CostEstimationRequest,
+    user = Depends(optional_auth)
+):
+    """
+    Estimate deployment costs before starting deployment
+    Returns cost breakdown for specified provider, GPU type, and duration.
+    """
+    try:
+        from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
+        billing_tracker = get_deployment_billing_tracker()
+        cost_estimate = billing_tracker.estimate_deployment_cost(
+            provider=request.provider,
+            gpu_type=request.gpu_type,
+            gpu_count=request.gpu_count,
+            estimated_hours=request.estimated_hours,
+            operation_type=request.operation_type
+        )
+        # Add additional cost breakdown details
+        hourly_rate = cost_estimate["compute_cost"] / request.estimated_hours if request.estimated_hours > 0 else 0
+        return {
+            "success": True,
+            "estimation": {
+                "provider": request.provider,
+                "gpu_type": request.gpu_type,
+                "gpu_count": request.gpu_count,
+                "estimated_hours": request.estimated_hours,
+                "cost_breakdown": cost_estimate,
+                "hourly_rate": round(hourly_rate, 6),
+                "recommendations": _get_cost_recommendations(request.provider, request.gpu_type, cost_estimate)
+            }
+        }
+    except Exception as e:
+        logger.error(f"Failed to estimate deployment cost: {e}")
+        raise HTTPException(status_code=500, detail=f"Cost estimation failed: {str(e)}")
+@router.get("/billing/summary")
+async def get_deployment_billing_summary(
+    start_date: Optional[str] = Query(None, description="Start date (ISO format)"),
+    end_date: Optional[str] = Query(None, description="End date (ISO format)"),
+    provider: Optional[str] = Query(None, description="Filter by provider"),
+    gpu_type: Optional[str] = Query(None, description="Filter by GPU type"),
+    model_id: Optional[str] = Query(None, description="Filter by model ID"),
+    user = Depends(optional_auth)
+):
+    """
+    Get deployment billing summary with optional filters
+    Returns comprehensive billing information for deployments within specified period.
+    """
+    try:
+        from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
+        # Parse dates
+        start_dt = None
+        end_dt = None
+        if start_date:
+            start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
+        if end_date:
+            end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
+        billing_tracker = get_deployment_billing_tracker()
+        # Get deployment summary
+        deployment_summary = billing_tracker.get_deployment_summary(
+            start_date=start_dt,
+            end_date=end_dt,
+            provider=provider,
+            gpu_type=gpu_type
+        )
+        # If model_id filter is specified, get model-specific data
+        model_summary = None
+        if model_id:
+            model_summary = billing_tracker.get_model_usage_summary(model_id)
+        return {
+            "success": True,
+            "filters": {
+                "start_date": start_date,
+                "end_date": end_date,
+                "provider": provider,
+                "gpu_type": gpu_type,
+                "model_id": model_id
+            },
+            "deployment_summary": deployment_summary,
+            "model_summary": model_summary,
+            "recommendations": _get_billing_recommendations(deployment_summary)
+        }
+    except Exception as e:
+        logger.error(f"Failed to get deployment billing summary: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get billing summary: {str(e)}")
+@router.get("/pricing")
+async def get_deployment_pricing(
+    user = Depends(optional_auth)
+):
+    """
+    Get current deployment pricing for all providers and GPU types
+    Returns up-to-date pricing information for cost planning.
+    """
+    try:
+        from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
+        billing_tracker = get_deployment_billing_tracker()
+        pricing_data = billing_tracker.pricing_data
+        # Add provider descriptions and recommendations
+        enhanced_pricing = {}
+        for provider, pricing in pricing_data.items():
+            enhanced_pricing[provider] = {
+                "pricing": pricing,
+                "description": _get_provider_description(provider),
+                "best_for": _get_provider_recommendations(provider),
+                "availability": _check_provider_availability(provider)
+            }
+        return {
+            "success": True,
+            "pricing": enhanced_pricing,
+            "currency": "USD",
+            "unit": "per hour",
+            "last_updated": datetime.now().isoformat()
+        }
+    except Exception as e:
+        logger.error(f"Failed to get deployment pricing: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get pricing: {str(e)}")
+@router.get("/providers/compare")
+async def compare_providers(
+    gpu_type: str = Query(..., description="GPU type to compare"),
+    hours: float = Query(1.0, description="Number of hours for comparison"),
+    user = Depends(optional_auth)
+):
+    """
+    Compare costs across different providers for the same GPU type
+    Helps users choose the most cost-effective deployment option.
+    """
+    try:
+        from isa_model.core.models.deployment_billing_tracker import get_deployment_billing_tracker
+        billing_tracker = get_deployment_billing_tracker()
+        comparisons = []
+        providers = ["modal", "runpod", "lambda_labs", "coreweave"]
+        for provider in providers:
+            try:
+                cost_estimate = billing_tracker.estimate_deployment_cost(
+                    provider=provider,
+                    gpu_type=gpu_type,
+                    gpu_count=1,
+                    estimated_hours=hours
+                )
+                comparisons.append({
+                    "provider": provider,
+                    "total_cost": cost_estimate["total_cost"],
+                    "hourly_rate": cost_estimate["compute_cost"] / hours if hours > 0 else 0,
+                    "breakdown": cost_estimate,
+                    "description": _get_provider_description(provider),
+                    "availability": _check_provider_availability(provider)
+                })
+            except Exception as e:
+                logger.warning(f"Could not get pricing for {provider}: {e}")
+        # Sort by total cost
+        comparisons.sort(key=lambda x: x["total_cost"])
+        return {
+            "success": True,
+            "comparison": {
+                "gpu_type": gpu_type,
+                "duration_hours": hours,
+                "providers": comparisons,
+                "cheapest": comparisons[0] if comparisons else None,
+                "savings": {
+                    "max_savings": comparisons[-1]["total_cost"] - comparisons[0]["total_cost"] if len(comparisons) > 1 else 0,
+                    "percentage": ((comparisons[-1]["total_cost"] - comparisons[0]["total_cost"]) / comparisons[-1]["total_cost"] * 100) if len(comparisons) > 1 and comparisons[-1]["total_cost"] > 0 else 0
+                }
+            }
+        }
+    except Exception as e:
+        logger.error(f"Failed to compare providers: {e}")
+        raise HTTPException(status_code=500, detail=f"Provider comparison failed: {str(e)}")
+def _get_cost_recommendations(provider: str, gpu_type: str, cost_estimate: Dict[str, float]) -> List[str]:
+    """Generate cost optimization recommendations"""
+    recommendations = []
+    if cost_estimate["total_cost"] > 10.0:
+        recommendations.append("Consider using spot instances if available for significant savings")
+    if gpu_type in ["h100", "a100_80gb"]:
+        recommendations.append("High-end GPU selected - ensure workload requires this performance")
+    if provider == "modal":
+        recommendations.append("Modal offers automatic scaling - costs only incurred during active use")
+    if provider in ["runpod", "lambda_labs"]:
+        recommendations.append("Consider longer-term contracts for better rates on extended deployments")
+    return recommendations
+def _get_billing_recommendations(summary: Dict[str, Any]) -> List[str]:
+    """Generate billing optimization recommendations based on usage patterns"""
+    recommendations = []
+    if summary["total_cost"] > 100.0:
+        recommendations.append("High usage detected - consider reserved instances for cost savings")
+    # Analyze provider distribution
+    providers = summary.get("by_provider", {})
+    if len(providers) > 1:
+        costs = [(p, data["cost"]) for p, data in providers.items()]
+        costs.sort(key=lambda x: x[1])
+        if len(costs) > 1 and costs[-1][1] > costs[0][1] * 2:
+            recommendations.append(f"Consider migrating from {costs[-1][0]} to {costs[0][0]} for potential savings")
+    # Analyze GPU usage
+    gpu_types = summary.get("by_gpu_type", {})
+    if "h100" in gpu_types and gpu_types["h100"]["gpu_hours"] < 10:
+        recommendations.append("Low H100 usage - consider A100 for similar performance at lower cost")
+    return recommendations
+def _get_provider_description(provider: str) -> str:
+    """Get description for deployment provider"""
+    descriptions = {
+        "modal": "Serverless GPU platform with automatic scaling and pay-per-use billing",
+        "triton_local": "Local deployment using your own hardware with electricity costs",
+        "runpod": "Cloud GPU rental with competitive pricing and flexible instances",
+        "lambda_labs": "Professional GPU cloud with reliable infrastructure and support",
+        "coreweave": "High-performance GPU infrastructure optimized for AI workloads"
+    }
+    return descriptions.get(provider, "Unknown provider")
+def _get_provider_recommendations(provider: str) -> List[str]:
+    """Get recommendations for when to use each provider"""
+    recommendations = {
+        "modal": ["Development and testing", "Variable workloads", "Automatic scaling needs"],
+        "triton_local": ["Long-term deployments", "Data privacy requirements", "Cost optimization"],
+        "runpod": ["Budget-conscious deployments", "Flexible scaling", "Spot instance savings"],
+        "lambda_labs": ["Production workloads", "Reliable performance", "Enterprise support"],
+        "coreweave": ["High-performance requirements", "Large-scale deployments", "Bare metal access"]
+    }
+    return recommendations.get(provider, [])
+def _check_provider_availability(provider: str) -> str:
+    """Check if provider is currently available"""
+    # This would implement actual availability checking
+    # For now, return static status
+    availability = {
+        "modal": "Available",
+        "triton_local": "Available (requires local setup)",
+        "runpod": "Available",
+        "lambda_labs": "Available",
+        "coreweave": "Available (requires signup)"
+    }
+    return availability.get(provider, "Unknown")

isa_model/serving/api/routes/deployments.py CHANGED Viewed

@@ -4,15 +4,17 @@ Deployments API Routes
 Handles automated HuggingFace model deployment to Modal
 """
-from fastapi import APIRouter, HTTPException, BackgroundTasks
+from fastapi import APIRouter, HTTPException, BackgroundTasks, Request
 from pydantic import BaseModel
 from typing import Optional, List, Dict, Any
 import logging
 import asyncio
 import json
+import time
+from datetime import datetime
 from pathlib import Path
-from isa_model.deployment.services.auto_hf_modal_deployer import HuggingFaceModalDeployer
+from isa_model.deployment.modal.deployer import ModalDeployer as HuggingFaceModalDeployer
 logger = logging.getLogger(__name__)
@@ -233,6 +235,140 @@ async def get_deployment(deployment_id: str):
         logger.error(f"Failed to get deployment {deployment_id}: {e}")
         raise HTTPException(status_code=500, detail=f"Failed to get deployment: {str(e)}")
+@router.get("/{deployment_id}/status")
+async def get_deployment_status(deployment_id: str, request: Request):
+    """
+    Get real-time deployment status and monitoring information with tenant isolation
+    """
+    try:
+        from isa_model.deployment.core.deployment_manager import DeploymentManager
+        from isa_model.serving.api.middleware.tenant_context import get_tenant_context
+        # Get tenant context for isolation
+        tenant_context = get_tenant_context()
+        tenant_dict = {
+            "organization_id": tenant_context.organization_id,
+            "user_id": tenant_context.user_id,
+            "role": tenant_context.role
+        } if tenant_context else None
+        # Initialize deployment manager
+        manager = DeploymentManager()
+        # Verify tenant access to deployment first
+        deployment = await manager.get_deployment(deployment_id, tenant_dict)
+        if not deployment:
+            raise HTTPException(status_code=404, detail="Deployment not found or access denied")
+        # Get deployment status
+        status_info = await manager.get_modal_service_status(deployment_id)
+        return {
+            "success": True,
+            "deployment_status": status_info
+        }
+    except Exception as e:
+        logger.error(f"Failed to get deployment status {deployment_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get deployment status: {str(e)}")
+@router.get("/{deployment_id}/monitoring")
+async def get_deployment_monitoring(deployment_id: str, request: Request):
+    """
+    Get detailed monitoring metrics for Modal deployment with tenant isolation
+    """
+    try:
+        from isa_model.deployment.core.deployment_manager import DeploymentManager
+        from isa_model.serving.api.middleware.tenant_context import get_tenant_context
+        # Get tenant context for isolation
+        tenant_context = get_tenant_context()
+        tenant_dict = {
+            "organization_id": tenant_context.organization_id,
+            "user_id": tenant_context.user_id,
+            "role": tenant_context.role
+        } if tenant_context else None
+        manager = DeploymentManager()
+        # Verify tenant access to deployment first
+        deployment = await manager.get_deployment(deployment_id, tenant_dict)
+        if not deployment:
+            raise HTTPException(status_code=404, detail="Deployment not found or access denied")
+        status_info = await manager.get_modal_service_status(deployment_id)
+        if status_info.get("status") == "not_found":
+            raise HTTPException(status_code=404, detail="Deployment not found")
+        # Extract detailed monitoring data
+        monitoring_data = status_info.get("monitoring", {})
+        return {
+            "success": True,
+            "deployment_id": deployment_id,
+            "monitoring": {
+                "health_check": monitoring_data.get("health_check"),
+                "resource_usage": monitoring_data.get("resource_usage"),
+                "request_metrics": monitoring_data.get("request_metrics"),
+                "cost_tracking": monitoring_data.get("cost_tracking"),
+                "last_updated": datetime.now().isoformat()
+            }
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to get monitoring data {deployment_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get monitoring data: {str(e)}")
+@router.post("/{deployment_id}/restart")
+async def restart_deployment(deployment_id: str, request: Request):
+    """
+    Restart a Modal deployment with tenant isolation
+    """
+    try:
+        from isa_model.deployment.core.deployment_manager import DeploymentManager
+        from isa_model.serving.api.middleware.tenant_context import get_tenant_context
+        # Get tenant context for isolation
+        tenant_context = get_tenant_context()
+        tenant_dict = {
+            "organization_id": tenant_context.organization_id,
+            "user_id": tenant_context.user_id,
+            "role": tenant_context.role
+        } if tenant_context else None
+        manager = DeploymentManager()
+        # Check if deployment exists and user has access
+        deployment = await manager.get_deployment(deployment_id, tenant_dict)
+        if not deployment:
+            raise HTTPException(status_code=404, detail="Deployment not found or access denied")
+        # Update status to restarting
+        await manager.update_deployment_status(deployment_id, "restarting")
+        # TODO: Implement actual Modal service restart
+        # For now, simulate restart process
+        await asyncio.sleep(1)
+        # Update status to running
+        await manager.update_deployment_status(deployment_id, "running")
+        return {
+            "success": True,
+            "message": f"Deployment {deployment_id} restarted successfully",
+            "deployment_id": deployment_id,
+            "status": "running"
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to restart deployment {deployment_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to restart deployment: {str(e)}")
 @router.delete("/{deployment_id}")
 async def cancel_deployment(deployment_id: str):
     """

isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl