isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,486 @@
|
|
1
|
+
"""
|
2
|
+
Inference Monitoring API Routes
|
3
|
+
|
4
|
+
Provides comprehensive monitoring and analytics for model inference activities.
|
5
|
+
Uses InfluxDB as the backend for time-series data analysis.
|
6
|
+
|
7
|
+
Features:
|
8
|
+
- Real-time inference metrics
|
9
|
+
- Cost analysis and tracking
|
10
|
+
- Performance monitoring
|
11
|
+
- Usage statistics by provider/model
|
12
|
+
- Error tracking and alerting
|
13
|
+
- Token usage analytics
|
14
|
+
"""
|
15
|
+
|
16
|
+
from fastapi import APIRouter, Query, HTTPException, Depends
|
17
|
+
from pydantic import BaseModel, Field
|
18
|
+
from typing import Optional, List, Dict, Any, Union
|
19
|
+
import logging
|
20
|
+
from datetime import datetime, timedelta
|
21
|
+
import json
|
22
|
+
|
23
|
+
from isa_model.core.logging import get_inference_logger
|
24
|
+
from ..middleware.auth import optional_auth, require_read_access
|
25
|
+
|
26
|
+
logger = logging.getLogger(__name__)
|
27
|
+
router = APIRouter()
|
28
|
+
|
29
|
+
# Request/Response Models
|
30
|
+
class InferenceMetricsResponse(BaseModel):
|
31
|
+
"""Response model for inference metrics"""
|
32
|
+
success: bool
|
33
|
+
data: Any # More flexible data field
|
34
|
+
metadata: Dict[str, Any] = Field(default_factory=dict)
|
35
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
36
|
+
|
37
|
+
class UsageStatsRequest(BaseModel):
|
38
|
+
"""Request model for usage statistics"""
|
39
|
+
hours: int = Field(24, ge=1, le=168, description="Time range in hours (1-168)")
|
40
|
+
group_by: str = Field("provider", description="Group by: provider, model_name, service_type")
|
41
|
+
include_costs: bool = Field(True, description="Include cost analysis")
|
42
|
+
|
43
|
+
class ErrorAnalysisRequest(BaseModel):
|
44
|
+
"""Request model for error analysis"""
|
45
|
+
hours: int = Field(24, ge=1, le=168, description="Time range in hours")
|
46
|
+
error_types: Optional[List[str]] = Field(None, description="Filter by error types")
|
47
|
+
providers: Optional[List[str]] = Field(None, description="Filter by providers")
|
48
|
+
|
49
|
+
@router.get("/health")
|
50
|
+
async def monitoring_health():
|
51
|
+
"""Health check for inference monitoring service"""
|
52
|
+
inference_logger = get_inference_logger()
|
53
|
+
|
54
|
+
return {
|
55
|
+
"status": "healthy" if inference_logger.enabled else "disabled",
|
56
|
+
"service": "inference_monitoring",
|
57
|
+
"influxdb_enabled": inference_logger.enabled,
|
58
|
+
"influxdb_url": inference_logger.url if inference_logger.enabled else None,
|
59
|
+
"bucket": inference_logger.bucket if inference_logger.enabled else None
|
60
|
+
}
|
61
|
+
|
62
|
+
@router.get("/recent-requests", response_model=InferenceMetricsResponse)
|
63
|
+
async def get_recent_requests(
|
64
|
+
limit: int = Query(50, ge=1, le=500, description="Number of recent requests to fetch"),
|
65
|
+
hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
|
66
|
+
service_type: Optional[str] = Query(None, description="Filter by service type"),
|
67
|
+
provider: Optional[str] = Query(None, description="Filter by provider"),
|
68
|
+
status: Optional[str] = Query(None, description="Filter by status (completed, failed)"),
|
69
|
+
user = Depends(optional_auth)
|
70
|
+
):
|
71
|
+
"""
|
72
|
+
Get recent inference requests with optional filtering
|
73
|
+
"""
|
74
|
+
try:
|
75
|
+
inference_logger = get_inference_logger()
|
76
|
+
|
77
|
+
if not inference_logger.enabled:
|
78
|
+
raise HTTPException(
|
79
|
+
status_code=503,
|
80
|
+
detail="Inference logging is disabled. Enable ENABLE_INFERENCE_LOGGING."
|
81
|
+
)
|
82
|
+
|
83
|
+
# Fetch recent requests
|
84
|
+
requests = inference_logger.get_recent_requests(
|
85
|
+
limit=limit,
|
86
|
+
hours=hours,
|
87
|
+
service_type=service_type,
|
88
|
+
provider=provider,
|
89
|
+
status=status
|
90
|
+
)
|
91
|
+
|
92
|
+
return InferenceMetricsResponse(
|
93
|
+
success=True,
|
94
|
+
data=requests,
|
95
|
+
metadata={
|
96
|
+
"total_requests": len(requests),
|
97
|
+
"time_range_hours": hours,
|
98
|
+
"filters": {
|
99
|
+
"service_type": service_type,
|
100
|
+
"provider": provider,
|
101
|
+
"status": status
|
102
|
+
}
|
103
|
+
}
|
104
|
+
)
|
105
|
+
|
106
|
+
except Exception as e:
|
107
|
+
logger.error(f"Error fetching recent requests: {e}")
|
108
|
+
raise HTTPException(status_code=500, detail=f"Failed to fetch recent requests: {str(e)}")
|
109
|
+
|
110
|
+
@router.post("/usage-stats", response_model=InferenceMetricsResponse)
|
111
|
+
async def get_usage_statistics(
|
112
|
+
request: UsageStatsRequest,
|
113
|
+
user = Depends(optional_auth)
|
114
|
+
):
|
115
|
+
"""
|
116
|
+
Get usage statistics and analytics
|
117
|
+
"""
|
118
|
+
try:
|
119
|
+
inference_logger = get_inference_logger()
|
120
|
+
|
121
|
+
if not inference_logger.enabled:
|
122
|
+
raise HTTPException(
|
123
|
+
status_code=503,
|
124
|
+
detail="Inference logging is disabled"
|
125
|
+
)
|
126
|
+
|
127
|
+
# Get usage statistics
|
128
|
+
stats = inference_logger.get_usage_statistics(
|
129
|
+
hours=request.hours,
|
130
|
+
group_by=request.group_by
|
131
|
+
)
|
132
|
+
|
133
|
+
# Calculate totals and summaries
|
134
|
+
total_requests = sum(data.get('total_requests', 0) for data in stats.values())
|
135
|
+
|
136
|
+
metadata = {
|
137
|
+
"time_range_hours": request.hours,
|
138
|
+
"group_by": request.group_by,
|
139
|
+
"total_requests": total_requests,
|
140
|
+
"unique_groups": len(stats),
|
141
|
+
"include_costs": request.include_costs
|
142
|
+
}
|
143
|
+
|
144
|
+
return InferenceMetricsResponse(
|
145
|
+
success=True,
|
146
|
+
data=stats,
|
147
|
+
metadata=metadata
|
148
|
+
)
|
149
|
+
|
150
|
+
except Exception as e:
|
151
|
+
logger.error(f"Error fetching usage statistics: {e}")
|
152
|
+
raise HTTPException(status_code=500, detail=f"Failed to fetch usage statistics: {str(e)}")
|
153
|
+
|
154
|
+
@router.get("/cost-analysis", response_model=InferenceMetricsResponse)
|
155
|
+
async def get_cost_analysis(
|
156
|
+
hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
|
157
|
+
group_by: str = Query("provider", description="Group by: provider, model_name, service_type"),
|
158
|
+
user = Depends(optional_auth)
|
159
|
+
):
|
160
|
+
"""
|
161
|
+
Get cost analysis and spending breakdown
|
162
|
+
"""
|
163
|
+
try:
|
164
|
+
inference_logger = get_inference_logger()
|
165
|
+
|
166
|
+
if not inference_logger.enabled:
|
167
|
+
raise HTTPException(status_code=503, detail="Inference logging is disabled")
|
168
|
+
|
169
|
+
# This would typically involve more complex InfluxDB queries
|
170
|
+
# For now, we'll use the existing usage statistics method
|
171
|
+
stats = inference_logger.get_usage_statistics(hours=hours, group_by=group_by)
|
172
|
+
|
173
|
+
# Calculate cost summaries (this would be enhanced with actual cost queries)
|
174
|
+
cost_analysis = {}
|
175
|
+
total_cost = 0.0
|
176
|
+
total_requests = 0
|
177
|
+
|
178
|
+
for group, data in stats.items():
|
179
|
+
requests = data.get('total_requests', 0)
|
180
|
+
# Estimate costs (in a real implementation, this would come from the database)
|
181
|
+
estimated_cost = requests * 0.002 # Rough estimate
|
182
|
+
|
183
|
+
cost_analysis[group] = {
|
184
|
+
"requests": requests,
|
185
|
+
"estimated_cost_usd": estimated_cost,
|
186
|
+
"cost_per_request": estimated_cost / requests if requests > 0 else 0,
|
187
|
+
"hourly_data": data.get('hourly_data', [])
|
188
|
+
}
|
189
|
+
|
190
|
+
total_cost += estimated_cost
|
191
|
+
total_requests += requests
|
192
|
+
|
193
|
+
return InferenceMetricsResponse(
|
194
|
+
success=True,
|
195
|
+
data=cost_analysis,
|
196
|
+
metadata={
|
197
|
+
"time_range_hours": hours,
|
198
|
+
"group_by": group_by,
|
199
|
+
"total_cost_usd": total_cost,
|
200
|
+
"total_requests": total_requests,
|
201
|
+
"average_cost_per_request": total_cost / total_requests if total_requests > 0 else 0
|
202
|
+
}
|
203
|
+
)
|
204
|
+
|
205
|
+
except Exception as e:
|
206
|
+
logger.error(f"Error performing cost analysis: {e}")
|
207
|
+
raise HTTPException(status_code=500, detail=f"Failed to perform cost analysis: {str(e)}")
|
208
|
+
|
209
|
+
@router.get("/performance-metrics", response_model=InferenceMetricsResponse)
|
210
|
+
async def get_performance_metrics(
|
211
|
+
hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
|
212
|
+
provider: Optional[str] = Query(None, description="Filter by provider"),
|
213
|
+
model_name: Optional[str] = Query(None, description="Filter by model"),
|
214
|
+
user = Depends(optional_auth)
|
215
|
+
):
|
216
|
+
"""
|
217
|
+
Get performance metrics including response times, success rates, etc.
|
218
|
+
"""
|
219
|
+
try:
|
220
|
+
inference_logger = get_inference_logger()
|
221
|
+
|
222
|
+
if not inference_logger.enabled:
|
223
|
+
raise HTTPException(status_code=503, detail="Inference logging is disabled")
|
224
|
+
|
225
|
+
# Get recent requests for performance analysis
|
226
|
+
requests = inference_logger.get_recent_requests(
|
227
|
+
limit=1000, # Large sample for accurate metrics
|
228
|
+
hours=hours,
|
229
|
+
provider=provider
|
230
|
+
)
|
231
|
+
|
232
|
+
if not requests:
|
233
|
+
return InferenceMetricsResponse(
|
234
|
+
success=True,
|
235
|
+
data={},
|
236
|
+
metadata={"message": "No data found for the specified criteria"}
|
237
|
+
)
|
238
|
+
|
239
|
+
# Calculate performance metrics
|
240
|
+
total_requests = len(requests)
|
241
|
+
successful_requests = len([r for r in requests if r.get('status') == 'completed'])
|
242
|
+
failed_requests = total_requests - successful_requests
|
243
|
+
|
244
|
+
execution_times = [r.get('execution_time_ms', 0) for r in requests if r.get('execution_time_ms')]
|
245
|
+
|
246
|
+
performance_data = {
|
247
|
+
"request_counts": {
|
248
|
+
"total": total_requests,
|
249
|
+
"successful": successful_requests,
|
250
|
+
"failed": failed_requests,
|
251
|
+
"success_rate": (successful_requests / total_requests) * 100 if total_requests > 0 else 0
|
252
|
+
},
|
253
|
+
"response_times": {
|
254
|
+
"count": len(execution_times),
|
255
|
+
"average_ms": sum(execution_times) / len(execution_times) if execution_times else 0,
|
256
|
+
"min_ms": min(execution_times) if execution_times else 0,
|
257
|
+
"max_ms": max(execution_times) if execution_times else 0,
|
258
|
+
} if execution_times else {}
|
259
|
+
}
|
260
|
+
|
261
|
+
# Group by provider if not filtered
|
262
|
+
if not provider:
|
263
|
+
provider_stats = {}
|
264
|
+
for req in requests:
|
265
|
+
prov = req.get('provider', 'unknown')
|
266
|
+
if prov not in provider_stats:
|
267
|
+
provider_stats[prov] = {"requests": 0, "successful": 0, "total_time": 0}
|
268
|
+
|
269
|
+
provider_stats[prov]["requests"] += 1
|
270
|
+
if req.get('status') == 'completed':
|
271
|
+
provider_stats[prov]["successful"] += 1
|
272
|
+
if req.get('execution_time_ms'):
|
273
|
+
provider_stats[prov]["total_time"] += req.get('execution_time_ms', 0)
|
274
|
+
|
275
|
+
performance_data["by_provider"] = {
|
276
|
+
prov: {
|
277
|
+
"requests": stats["requests"],
|
278
|
+
"success_rate": (stats["successful"] / stats["requests"]) * 100,
|
279
|
+
"avg_response_time_ms": stats["total_time"] / stats["requests"] if stats["requests"] > 0 else 0
|
280
|
+
}
|
281
|
+
for prov, stats in provider_stats.items()
|
282
|
+
}
|
283
|
+
|
284
|
+
return InferenceMetricsResponse(
|
285
|
+
success=True,
|
286
|
+
data=performance_data,
|
287
|
+
metadata={
|
288
|
+
"time_range_hours": hours,
|
289
|
+
"provider": provider,
|
290
|
+
"model_name": model_name,
|
291
|
+
"sample_size": total_requests
|
292
|
+
}
|
293
|
+
)
|
294
|
+
|
295
|
+
except Exception as e:
|
296
|
+
logger.error(f"Error fetching performance metrics: {e}")
|
297
|
+
raise HTTPException(status_code=500, detail=f"Failed to fetch performance metrics: {str(e)}")
|
298
|
+
|
299
|
+
@router.post("/error-analysis", response_model=InferenceMetricsResponse)
|
300
|
+
async def get_error_analysis(
|
301
|
+
request: ErrorAnalysisRequest,
|
302
|
+
user = Depends(optional_auth)
|
303
|
+
):
|
304
|
+
"""
|
305
|
+
Analyze errors and failure patterns
|
306
|
+
"""
|
307
|
+
try:
|
308
|
+
inference_logger = get_inference_logger()
|
309
|
+
|
310
|
+
if not inference_logger.enabled:
|
311
|
+
raise HTTPException(status_code=503, detail="Inference logging is disabled")
|
312
|
+
|
313
|
+
# Get recent failed requests
|
314
|
+
failed_requests = inference_logger.get_recent_requests(
|
315
|
+
limit=500,
|
316
|
+
hours=request.hours,
|
317
|
+
status="failed"
|
318
|
+
)
|
319
|
+
|
320
|
+
if not failed_requests:
|
321
|
+
return InferenceMetricsResponse(
|
322
|
+
success=True,
|
323
|
+
data={"message": "No errors found in the specified time range"},
|
324
|
+
metadata={"error_count": 0}
|
325
|
+
)
|
326
|
+
|
327
|
+
# Analyze error patterns
|
328
|
+
error_analysis = {
|
329
|
+
"total_errors": len(failed_requests),
|
330
|
+
"error_rate": 0, # Would calculate from total requests
|
331
|
+
"by_provider": {},
|
332
|
+
"by_model": {},
|
333
|
+
"by_service_type": {},
|
334
|
+
"recent_errors": failed_requests[:10] # Most recent 10 errors
|
335
|
+
}
|
336
|
+
|
337
|
+
# Group errors by different dimensions
|
338
|
+
for req in failed_requests:
|
339
|
+
provider = req.get('provider', 'unknown')
|
340
|
+
model = req.get('model_name', 'unknown')
|
341
|
+
service_type = req.get('service_type', 'unknown')
|
342
|
+
|
343
|
+
# Count by provider
|
344
|
+
if provider not in error_analysis["by_provider"]:
|
345
|
+
error_analysis["by_provider"][provider] = 0
|
346
|
+
error_analysis["by_provider"][provider] += 1
|
347
|
+
|
348
|
+
# Count by model
|
349
|
+
if model not in error_analysis["by_model"]:
|
350
|
+
error_analysis["by_model"][model] = 0
|
351
|
+
error_analysis["by_model"][model] += 1
|
352
|
+
|
353
|
+
# Count by service type
|
354
|
+
if service_type not in error_analysis["by_service_type"]:
|
355
|
+
error_analysis["by_service_type"][service_type] = 0
|
356
|
+
error_analysis["by_service_type"][service_type] += 1
|
357
|
+
|
358
|
+
return InferenceMetricsResponse(
|
359
|
+
success=True,
|
360
|
+
data=error_analysis,
|
361
|
+
metadata={
|
362
|
+
"time_range_hours": request.hours,
|
363
|
+
"filters": {
|
364
|
+
"error_types": request.error_types,
|
365
|
+
"providers": request.providers
|
366
|
+
}
|
367
|
+
}
|
368
|
+
)
|
369
|
+
|
370
|
+
except Exception as e:
|
371
|
+
logger.error(f"Error performing error analysis: {e}")
|
372
|
+
raise HTTPException(status_code=500, detail=f"Failed to perform error analysis: {str(e)}")
|
373
|
+
|
374
|
+
@router.get("/dashboard-summary", response_model=InferenceMetricsResponse)
|
375
|
+
async def get_dashboard_summary(
|
376
|
+
hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
|
377
|
+
user = Depends(optional_auth)
|
378
|
+
):
|
379
|
+
"""
|
380
|
+
Get summary metrics for the monitoring dashboard
|
381
|
+
"""
|
382
|
+
try:
|
383
|
+
inference_logger = get_inference_logger()
|
384
|
+
|
385
|
+
if not inference_logger.enabled:
|
386
|
+
raise HTTPException(status_code=503, detail="Inference logging is disabled")
|
387
|
+
|
388
|
+
# Get recent requests for summary
|
389
|
+
recent_requests = inference_logger.get_recent_requests(limit=1000, hours=hours)
|
390
|
+
|
391
|
+
if not recent_requests:
|
392
|
+
return InferenceMetricsResponse(
|
393
|
+
success=True,
|
394
|
+
data={"message": "No data available"},
|
395
|
+
metadata={"hours": hours}
|
396
|
+
)
|
397
|
+
|
398
|
+
# Calculate summary metrics
|
399
|
+
total_requests = len(recent_requests)
|
400
|
+
successful_requests = len([r for r in recent_requests if r.get('status') == 'completed'])
|
401
|
+
failed_requests = total_requests - successful_requests
|
402
|
+
|
403
|
+
# Cost summary
|
404
|
+
total_cost = sum(r.get('cost_usd', 0) or 0 for r in recent_requests)
|
405
|
+
avg_cost = total_cost / total_requests if total_requests > 0 else 0
|
406
|
+
|
407
|
+
# Token summary
|
408
|
+
total_tokens = sum(r.get('tokens', 0) or 0 for r in recent_requests)
|
409
|
+
avg_tokens = total_tokens / total_requests if total_requests > 0 else 0
|
410
|
+
|
411
|
+
# Top providers
|
412
|
+
provider_counts = {}
|
413
|
+
for req in recent_requests:
|
414
|
+
provider = req.get('provider', 'unknown')
|
415
|
+
provider_counts[provider] = provider_counts.get(provider, 0) + 1
|
416
|
+
|
417
|
+
# Top models
|
418
|
+
model_counts = {}
|
419
|
+
for req in recent_requests:
|
420
|
+
model = req.get('model_name', 'unknown')
|
421
|
+
model_counts[model] = model_counts.get(model, 0) + 1
|
422
|
+
|
423
|
+
summary = {
|
424
|
+
"overview": {
|
425
|
+
"total_requests": total_requests,
|
426
|
+
"successful_requests": successful_requests,
|
427
|
+
"failed_requests": failed_requests,
|
428
|
+
"success_rate": (successful_requests / total_requests) * 100 if total_requests > 0 else 0,
|
429
|
+
"total_cost_usd": total_cost,
|
430
|
+
"average_cost_per_request": avg_cost,
|
431
|
+
"total_tokens": total_tokens,
|
432
|
+
"average_tokens_per_request": avg_tokens
|
433
|
+
},
|
434
|
+
"top_providers": dict(sorted(provider_counts.items(), key=lambda x: x[1], reverse=True)[:5]),
|
435
|
+
"top_models": dict(sorted(model_counts.items(), key=lambda x: x[1], reverse=True)[:5]),
|
436
|
+
"time_range": {
|
437
|
+
"hours": hours,
|
438
|
+
"start_time": (datetime.now() - timedelta(hours=hours)).isoformat(),
|
439
|
+
"end_time": datetime.now().isoformat()
|
440
|
+
}
|
441
|
+
}
|
442
|
+
|
443
|
+
return InferenceMetricsResponse(
|
444
|
+
success=True,
|
445
|
+
data=summary,
|
446
|
+
metadata={"generated_at": datetime.now()}
|
447
|
+
)
|
448
|
+
|
449
|
+
except Exception as e:
|
450
|
+
logger.error(f"Error generating dashboard summary: {e}")
|
451
|
+
raise HTTPException(status_code=500, detail=f"Failed to generate dashboard summary: {str(e)}")
|
452
|
+
|
453
|
+
@router.delete("/clear-logs")
|
454
|
+
async def clear_inference_logs(
|
455
|
+
confirm: bool = Query(False, description="Confirmation required to clear logs"),
|
456
|
+
user = Depends(require_read_access) # Require authentication for destructive operations
|
457
|
+
):
|
458
|
+
"""
|
459
|
+
Clear all inference logs (DANGEROUS - requires confirmation)
|
460
|
+
"""
|
461
|
+
if not confirm:
|
462
|
+
raise HTTPException(
|
463
|
+
status_code=400,
|
464
|
+
detail="Confirmation required. Set confirm=true to clear all logs."
|
465
|
+
)
|
466
|
+
|
467
|
+
try:
|
468
|
+
inference_logger = get_inference_logger()
|
469
|
+
|
470
|
+
if not inference_logger.enabled:
|
471
|
+
raise HTTPException(status_code=503, detail="Inference logging is disabled")
|
472
|
+
|
473
|
+
# This would implement log clearing in InfluxDB
|
474
|
+
# For safety, we'll just return a warning for now
|
475
|
+
|
476
|
+
logger.warning("Log clearing requested but not implemented for safety")
|
477
|
+
|
478
|
+
return {
|
479
|
+
"success": False,
|
480
|
+
"message": "Log clearing not implemented for safety. Contact administrator.",
|
481
|
+
"timestamp": datetime.now()
|
482
|
+
}
|
483
|
+
|
484
|
+
except Exception as e:
|
485
|
+
logger.error(f"Error clearing logs: {e}")
|
486
|
+
raise HTTPException(status_code=500, detail=f"Failed to clear logs: {str(e)}")
|