PyPI - isa-model - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

isa_model/client.py +466 -43
isa_model/core/cache/redis_cache.py +12 -3
isa_model/core/config/config_manager.py +230 -3
isa_model/core/config.py +90 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +21 -1
isa_model/core/database/supabase_client.py +154 -19
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +27 -18
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_manager.py +35 -80
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +174 -18
isa_model/core/models/system_models.py +857 -0
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/services/intelligent_model_selector.py +399 -21
isa_model/core/types.py +1 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -370
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +137 -10
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/openai_stt_service.py +22 -6
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +335 -24
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/ollama_llm_service.py +9 -2
isa_model/inference/services/llm/openai_llm_service.py +33 -16
isa_model/inference/services/llm/yyds_llm_service.py +8 -2
isa_model/inference/services/vision/__init__.py +22 -1
isa_model/inference/services/vision/helpers/image_utils.py +8 -5
isa_model/inference/services/vision/isa_vision_service.py +65 -4
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +172 -22
isa_model/serving/api/middleware/auth.py +8 -2
isa_model/serving/api/middleware/security.py +23 -33
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +4 -1
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +138 -2
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +680 -18
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +68 -54
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
isa_model-0.4.4.dist-info/RECORD +180 -0
isa_model/core/security/secrets.py +0 -358
isa_model/core/storage/hf_storage.py +0 -419
isa_model/core/storage/minio_storage.py +0 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks/__init__.py +0 -27
isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
isa_model/eval/benchmarks.py +0 -701
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -24
isa_model/eval/evaluators/audio_evaluator.py +0 -727
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/embedding_evaluator.py +0 -742
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/evaluators/vision_evaluator.py +0 -564
isa_model/eval/example_evaluation.py +0 -395
isa_model/eval/factory.py +0 -798
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/isa_benchmarks.py +0 -700
isa_model/eval/isa_integration.py +0 -582
isa_model/eval/metrics.py +0 -951
isa_model/eval/tests/unit/test_basic.py +0 -396
isa_model/serving/api/routes/evaluations.py +0 -579
isa_model/training/__init__.py +0 -168
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -26
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/examples/intelligent_training_example.py +0 -281
isa_model/training/factory.py +0 -424
isa_model/training/intelligent/__init__.py +0 -25
isa_model/training/intelligent/decision_engine.py +0 -643
isa_model/training/intelligent/intelligent_factory.py +0 -888
isa_model/training/intelligent/knowledge_base.py +0 -751
isa_model/training/intelligent/resource_optimizer.py +0 -839
isa_model/training/intelligent/task_classifier.py +0 -576
isa_model/training/storage/__init__.py +0 -24
isa_model/training/storage/core_integration.py +0 -439
isa_model/training/storage/training_repository.py +0 -552
isa_model/training/storage/training_storage.py +0 -628
isa_model-0.4.0.dist-info/RECORD +0 -182
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0

isa_model/serving/api/routes/inference_monitoring.py ADDED Viewed

@@ -0,0 +1,486 @@
+"""
+Inference Monitoring API Routes
+Provides comprehensive monitoring and analytics for model inference activities.
+Uses InfluxDB as the backend for time-series data analysis.
+Features:
+- Real-time inference metrics
+- Cost analysis and tracking
+- Performance monitoring
+- Usage statistics by provider/model
+- Error tracking and alerting
+- Token usage analytics
+"""
+from fastapi import APIRouter, Query, HTTPException, Depends
+from pydantic import BaseModel, Field
+from typing import Optional, List, Dict, Any, Union
+import logging
+from datetime import datetime, timedelta
+import json
+from isa_model.core.logging import get_inference_logger
+from ..middleware.auth import optional_auth, require_read_access
+logger = logging.getLogger(__name__)
+router = APIRouter()
+# Request/Response Models
+class InferenceMetricsResponse(BaseModel):
+    """Response model for inference metrics"""
+    success: bool
+    data: Any  # More flexible data field
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+    timestamp: datetime = Field(default_factory=datetime.now)
+class UsageStatsRequest(BaseModel):
+    """Request model for usage statistics"""
+    hours: int = Field(24, ge=1, le=168, description="Time range in hours (1-168)")
+    group_by: str = Field("provider", description="Group by: provider, model_name, service_type")
+    include_costs: bool = Field(True, description="Include cost analysis")
+class ErrorAnalysisRequest(BaseModel):
+    """Request model for error analysis"""
+    hours: int = Field(24, ge=1, le=168, description="Time range in hours")
+    error_types: Optional[List[str]] = Field(None, description="Filter by error types")
+    providers: Optional[List[str]] = Field(None, description="Filter by providers")
+@router.get("/health")
+async def monitoring_health():
+    """Health check for inference monitoring service"""
+    inference_logger = get_inference_logger()
+    return {
+        "status": "healthy" if inference_logger.enabled else "disabled",
+        "service": "inference_monitoring",
+        "influxdb_enabled": inference_logger.enabled,
+        "influxdb_url": inference_logger.url if inference_logger.enabled else None,
+        "bucket": inference_logger.bucket if inference_logger.enabled else None
+    }
+@router.get("/recent-requests", response_model=InferenceMetricsResponse)
+async def get_recent_requests(
+    limit: int = Query(50, ge=1, le=500, description="Number of recent requests to fetch"),
+    hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
+    service_type: Optional[str] = Query(None, description="Filter by service type"),
+    provider: Optional[str] = Query(None, description="Filter by provider"),
+    status: Optional[str] = Query(None, description="Filter by status (completed, failed)"),
+    user = Depends(optional_auth)
+):
+    """
+    Get recent inference requests with optional filtering
+    """
+    try:
+        inference_logger = get_inference_logger()
+        if not inference_logger.enabled:
+            raise HTTPException(
+                status_code=503,
+                detail="Inference logging is disabled. Enable ENABLE_INFERENCE_LOGGING."
+            )
+        # Fetch recent requests
+        requests = inference_logger.get_recent_requests(
+            limit=limit,
+            hours=hours,
+            service_type=service_type,
+            provider=provider,
+            status=status
+        )
+        return InferenceMetricsResponse(
+            success=True,
+            data=requests,
+            metadata={
+                "total_requests": len(requests),
+                "time_range_hours": hours,
+                "filters": {
+                    "service_type": service_type,
+                    "provider": provider,
+                    "status": status
+                }
+            }
+        )
+    except Exception as e:
+        logger.error(f"Error fetching recent requests: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to fetch recent requests: {str(e)}")
+@router.post("/usage-stats", response_model=InferenceMetricsResponse)
+async def get_usage_statistics(
+    request: UsageStatsRequest,
+    user = Depends(optional_auth)
+):
+    """
+    Get usage statistics and analytics
+    """
+    try:
+        inference_logger = get_inference_logger()
+        if not inference_logger.enabled:
+            raise HTTPException(
+                status_code=503,
+                detail="Inference logging is disabled"
+            )
+        # Get usage statistics
+        stats = inference_logger.get_usage_statistics(
+            hours=request.hours,
+            group_by=request.group_by
+        )
+        # Calculate totals and summaries
+        total_requests = sum(data.get('total_requests', 0) for data in stats.values())
+        metadata = {
+            "time_range_hours": request.hours,
+            "group_by": request.group_by,
+            "total_requests": total_requests,
+            "unique_groups": len(stats),
+            "include_costs": request.include_costs
+        }
+        return InferenceMetricsResponse(
+            success=True,
+            data=stats,
+            metadata=metadata
+        )
+    except Exception as e:
+        logger.error(f"Error fetching usage statistics: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to fetch usage statistics: {str(e)}")
+@router.get("/cost-analysis", response_model=InferenceMetricsResponse)
+async def get_cost_analysis(
+    hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
+    group_by: str = Query("provider", description="Group by: provider, model_name, service_type"),
+    user = Depends(optional_auth)
+):
+    """
+    Get cost analysis and spending breakdown
+    """
+    try:
+        inference_logger = get_inference_logger()
+        if not inference_logger.enabled:
+            raise HTTPException(status_code=503, detail="Inference logging is disabled")
+        # This would typically involve more complex InfluxDB queries
+        # For now, we'll use the existing usage statistics method
+        stats = inference_logger.get_usage_statistics(hours=hours, group_by=group_by)
+        # Calculate cost summaries (this would be enhanced with actual cost queries)
+        cost_analysis = {}
+        total_cost = 0.0
+        total_requests = 0
+        for group, data in stats.items():
+            requests = data.get('total_requests', 0)
+            # Estimate costs (in a real implementation, this would come from the database)
+            estimated_cost = requests * 0.002  # Rough estimate
+            cost_analysis[group] = {
+                "requests": requests,
+                "estimated_cost_usd": estimated_cost,
+                "cost_per_request": estimated_cost / requests if requests > 0 else 0,
+                "hourly_data": data.get('hourly_data', [])
+            }
+            total_cost += estimated_cost
+            total_requests += requests
+        return InferenceMetricsResponse(
+            success=True,
+            data=cost_analysis,
+            metadata={
+                "time_range_hours": hours,
+                "group_by": group_by,
+                "total_cost_usd": total_cost,
+                "total_requests": total_requests,
+                "average_cost_per_request": total_cost / total_requests if total_requests > 0 else 0
+            }
+        )
+    except Exception as e:
+        logger.error(f"Error performing cost analysis: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to perform cost analysis: {str(e)}")
+@router.get("/performance-metrics", response_model=InferenceMetricsResponse)
+async def get_performance_metrics(
+    hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
+    provider: Optional[str] = Query(None, description="Filter by provider"),
+    model_name: Optional[str] = Query(None, description="Filter by model"),
+    user = Depends(optional_auth)
+):
+    """
+    Get performance metrics including response times, success rates, etc.
+    """
+    try:
+        inference_logger = get_inference_logger()
+        if not inference_logger.enabled:
+            raise HTTPException(status_code=503, detail="Inference logging is disabled")
+        # Get recent requests for performance analysis
+        requests = inference_logger.get_recent_requests(
+            limit=1000,  # Large sample for accurate metrics
+            hours=hours,
+            provider=provider
+        )
+        if not requests:
+            return InferenceMetricsResponse(
+                success=True,
+                data={},
+                metadata={"message": "No data found for the specified criteria"}
+            )
+        # Calculate performance metrics
+        total_requests = len(requests)
+        successful_requests = len([r for r in requests if r.get('status') == 'completed'])
+        failed_requests = total_requests - successful_requests
+        execution_times = [r.get('execution_time_ms', 0) for r in requests if r.get('execution_time_ms')]
+        performance_data = {
+            "request_counts": {
+                "total": total_requests,
+                "successful": successful_requests,
+                "failed": failed_requests,
+                "success_rate": (successful_requests / total_requests) * 100 if total_requests > 0 else 0
+            },
+            "response_times": {
+                "count": len(execution_times),
+                "average_ms": sum(execution_times) / len(execution_times) if execution_times else 0,
+                "min_ms": min(execution_times) if execution_times else 0,
+                "max_ms": max(execution_times) if execution_times else 0,
+            } if execution_times else {}
+        }
+        # Group by provider if not filtered
+        if not provider:
+            provider_stats = {}
+            for req in requests:
+                prov = req.get('provider', 'unknown')
+                if prov not in provider_stats:
+                    provider_stats[prov] = {"requests": 0, "successful": 0, "total_time": 0}
+                provider_stats[prov]["requests"] += 1
+                if req.get('status') == 'completed':
+                    provider_stats[prov]["successful"] += 1
+                if req.get('execution_time_ms'):
+                    provider_stats[prov]["total_time"] += req.get('execution_time_ms', 0)
+            performance_data["by_provider"] = {
+                prov: {
+                    "requests": stats["requests"],
+                    "success_rate": (stats["successful"] / stats["requests"]) * 100,
+                    "avg_response_time_ms": stats["total_time"] / stats["requests"] if stats["requests"] > 0 else 0
+                }
+                for prov, stats in provider_stats.items()
+            }
+        return InferenceMetricsResponse(
+            success=True,
+            data=performance_data,
+            metadata={
+                "time_range_hours": hours,
+                "provider": provider,
+                "model_name": model_name,
+                "sample_size": total_requests
+            }
+        )
+    except Exception as e:
+        logger.error(f"Error fetching performance metrics: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to fetch performance metrics: {str(e)}")
+@router.post("/error-analysis", response_model=InferenceMetricsResponse)
+async def get_error_analysis(
+    request: ErrorAnalysisRequest,
+    user = Depends(optional_auth)
+):
+    """
+    Analyze errors and failure patterns
+    """
+    try:
+        inference_logger = get_inference_logger()
+        if not inference_logger.enabled:
+            raise HTTPException(status_code=503, detail="Inference logging is disabled")
+        # Get recent failed requests
+        failed_requests = inference_logger.get_recent_requests(
+            limit=500,
+            hours=request.hours,
+            status="failed"
+        )
+        if not failed_requests:
+            return InferenceMetricsResponse(
+                success=True,
+                data={"message": "No errors found in the specified time range"},
+                metadata={"error_count": 0}
+            )
+        # Analyze error patterns
+        error_analysis = {
+            "total_errors": len(failed_requests),
+            "error_rate": 0,  # Would calculate from total requests
+            "by_provider": {},
+            "by_model": {},
+            "by_service_type": {},
+            "recent_errors": failed_requests[:10]  # Most recent 10 errors
+        }
+        # Group errors by different dimensions
+        for req in failed_requests:
+            provider = req.get('provider', 'unknown')
+            model = req.get('model_name', 'unknown')
+            service_type = req.get('service_type', 'unknown')
+            # Count by provider
+            if provider not in error_analysis["by_provider"]:
+                error_analysis["by_provider"][provider] = 0
+            error_analysis["by_provider"][provider] += 1
+            # Count by model
+            if model not in error_analysis["by_model"]:
+                error_analysis["by_model"][model] = 0
+            error_analysis["by_model"][model] += 1
+            # Count by service type
+            if service_type not in error_analysis["by_service_type"]:
+                error_analysis["by_service_type"][service_type] = 0
+            error_analysis["by_service_type"][service_type] += 1
+        return InferenceMetricsResponse(
+            success=True,
+            data=error_analysis,
+            metadata={
+                "time_range_hours": request.hours,
+                "filters": {
+                    "error_types": request.error_types,
+                    "providers": request.providers
+                }
+            }
+        )
+    except Exception as e:
+        logger.error(f"Error performing error analysis: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to perform error analysis: {str(e)}")
+@router.get("/dashboard-summary", response_model=InferenceMetricsResponse)
+async def get_dashboard_summary(
+    hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
+    user = Depends(optional_auth)
+):
+    """
+    Get summary metrics for the monitoring dashboard
+    """
+    try:
+        inference_logger = get_inference_logger()
+        if not inference_logger.enabled:
+            raise HTTPException(status_code=503, detail="Inference logging is disabled")
+        # Get recent requests for summary
+        recent_requests = inference_logger.get_recent_requests(limit=1000, hours=hours)
+        if not recent_requests:
+            return InferenceMetricsResponse(
+                success=True,
+                data={"message": "No data available"},
+                metadata={"hours": hours}
+            )
+        # Calculate summary metrics
+        total_requests = len(recent_requests)
+        successful_requests = len([r for r in recent_requests if r.get('status') == 'completed'])
+        failed_requests = total_requests - successful_requests
+        # Cost summary
+        total_cost = sum(r.get('cost_usd', 0) or 0 for r in recent_requests)
+        avg_cost = total_cost / total_requests if total_requests > 0 else 0
+        # Token summary
+        total_tokens = sum(r.get('tokens', 0) or 0 for r in recent_requests)
+        avg_tokens = total_tokens / total_requests if total_requests > 0 else 0
+        # Top providers
+        provider_counts = {}
+        for req in recent_requests:
+            provider = req.get('provider', 'unknown')
+            provider_counts[provider] = provider_counts.get(provider, 0) + 1
+        # Top models
+        model_counts = {}
+        for req in recent_requests:
+            model = req.get('model_name', 'unknown')
+            model_counts[model] = model_counts.get(model, 0) + 1
+        summary = {
+            "overview": {
+                "total_requests": total_requests,
+                "successful_requests": successful_requests,
+                "failed_requests": failed_requests,
+                "success_rate": (successful_requests / total_requests) * 100 if total_requests > 0 else 0,
+                "total_cost_usd": total_cost,
+                "average_cost_per_request": avg_cost,
+                "total_tokens": total_tokens,
+                "average_tokens_per_request": avg_tokens
+            },
+            "top_providers": dict(sorted(provider_counts.items(), key=lambda x: x[1], reverse=True)[:5]),
+            "top_models": dict(sorted(model_counts.items(), key=lambda x: x[1], reverse=True)[:5]),
+            "time_range": {
+                "hours": hours,
+                "start_time": (datetime.now() - timedelta(hours=hours)).isoformat(),
+                "end_time": datetime.now().isoformat()
+            }
+        }
+        return InferenceMetricsResponse(
+            success=True,
+            data=summary,
+            metadata={"generated_at": datetime.now()}
+        )
+    except Exception as e:
+        logger.error(f"Error generating dashboard summary: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to generate dashboard summary: {str(e)}")
+@router.delete("/clear-logs")
+async def clear_inference_logs(
+    confirm: bool = Query(False, description="Confirmation required to clear logs"),
+    user = Depends(require_read_access)  # Require authentication for destructive operations
+):
+    """
+    Clear all inference logs (DANGEROUS - requires confirmation)
+    """
+    if not confirm:
+        raise HTTPException(
+            status_code=400,
+            detail="Confirmation required. Set confirm=true to clear all logs."
+        )
+    try:
+        inference_logger = get_inference_logger()
+        if not inference_logger.enabled:
+            raise HTTPException(status_code=503, detail="Inference logging is disabled")
+        # This would implement log clearing in InfluxDB
+        # For safety, we'll just return a warning for now
+        logger.warning("Log clearing requested but not implemented for safety")
+        return {
+            "success": False,
+            "message": "Log clearing not implemented for safety. Contact administrator.",
+            "timestamp": datetime.now()
+        }
+    except Exception as e:
+        logger.error(f"Error clearing logs: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to clear logs: {str(e)}")

isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl