isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,486 @@
1
+ """
2
+ Inference Monitoring API Routes
3
+
4
+ Provides comprehensive monitoring and analytics for model inference activities.
5
+ Uses InfluxDB as the backend for time-series data analysis.
6
+
7
+ Features:
8
+ - Real-time inference metrics
9
+ - Cost analysis and tracking
10
+ - Performance monitoring
11
+ - Usage statistics by provider/model
12
+ - Error tracking and alerting
13
+ - Token usage analytics
14
+ """
15
+
16
+ from fastapi import APIRouter, Query, HTTPException, Depends
17
+ from pydantic import BaseModel, Field
18
+ from typing import Optional, List, Dict, Any, Union
19
+ import logging
20
+ from datetime import datetime, timedelta
21
+ import json
22
+
23
+ from isa_model.core.logging import get_inference_logger
24
+ from ..middleware.auth import optional_auth, require_read_access
25
+
26
+ logger = logging.getLogger(__name__)
27
+ router = APIRouter()
28
+
29
+ # Request/Response Models
30
+ class InferenceMetricsResponse(BaseModel):
31
+ """Response model for inference metrics"""
32
+ success: bool
33
+ data: Any # More flexible data field
34
+ metadata: Dict[str, Any] = Field(default_factory=dict)
35
+ timestamp: datetime = Field(default_factory=datetime.now)
36
+
37
+ class UsageStatsRequest(BaseModel):
38
+ """Request model for usage statistics"""
39
+ hours: int = Field(24, ge=1, le=168, description="Time range in hours (1-168)")
40
+ group_by: str = Field("provider", description="Group by: provider, model_name, service_type")
41
+ include_costs: bool = Field(True, description="Include cost analysis")
42
+
43
+ class ErrorAnalysisRequest(BaseModel):
44
+ """Request model for error analysis"""
45
+ hours: int = Field(24, ge=1, le=168, description="Time range in hours")
46
+ error_types: Optional[List[str]] = Field(None, description="Filter by error types")
47
+ providers: Optional[List[str]] = Field(None, description="Filter by providers")
48
+
49
+ @router.get("/health")
50
+ async def monitoring_health():
51
+ """Health check for inference monitoring service"""
52
+ inference_logger = get_inference_logger()
53
+
54
+ return {
55
+ "status": "healthy" if inference_logger.enabled else "disabled",
56
+ "service": "inference_monitoring",
57
+ "influxdb_enabled": inference_logger.enabled,
58
+ "influxdb_url": inference_logger.url if inference_logger.enabled else None,
59
+ "bucket": inference_logger.bucket if inference_logger.enabled else None
60
+ }
61
+
62
+ @router.get("/recent-requests", response_model=InferenceMetricsResponse)
63
+ async def get_recent_requests(
64
+ limit: int = Query(50, ge=1, le=500, description="Number of recent requests to fetch"),
65
+ hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
66
+ service_type: Optional[str] = Query(None, description="Filter by service type"),
67
+ provider: Optional[str] = Query(None, description="Filter by provider"),
68
+ status: Optional[str] = Query(None, description="Filter by status (completed, failed)"),
69
+ user = Depends(optional_auth)
70
+ ):
71
+ """
72
+ Get recent inference requests with optional filtering
73
+ """
74
+ try:
75
+ inference_logger = get_inference_logger()
76
+
77
+ if not inference_logger.enabled:
78
+ raise HTTPException(
79
+ status_code=503,
80
+ detail="Inference logging is disabled. Enable ENABLE_INFERENCE_LOGGING."
81
+ )
82
+
83
+ # Fetch recent requests
84
+ requests = inference_logger.get_recent_requests(
85
+ limit=limit,
86
+ hours=hours,
87
+ service_type=service_type,
88
+ provider=provider,
89
+ status=status
90
+ )
91
+
92
+ return InferenceMetricsResponse(
93
+ success=True,
94
+ data=requests,
95
+ metadata={
96
+ "total_requests": len(requests),
97
+ "time_range_hours": hours,
98
+ "filters": {
99
+ "service_type": service_type,
100
+ "provider": provider,
101
+ "status": status
102
+ }
103
+ }
104
+ )
105
+
106
+ except Exception as e:
107
+ logger.error(f"Error fetching recent requests: {e}")
108
+ raise HTTPException(status_code=500, detail=f"Failed to fetch recent requests: {str(e)}")
109
+
110
+ @router.post("/usage-stats", response_model=InferenceMetricsResponse)
111
+ async def get_usage_statistics(
112
+ request: UsageStatsRequest,
113
+ user = Depends(optional_auth)
114
+ ):
115
+ """
116
+ Get usage statistics and analytics
117
+ """
118
+ try:
119
+ inference_logger = get_inference_logger()
120
+
121
+ if not inference_logger.enabled:
122
+ raise HTTPException(
123
+ status_code=503,
124
+ detail="Inference logging is disabled"
125
+ )
126
+
127
+ # Get usage statistics
128
+ stats = inference_logger.get_usage_statistics(
129
+ hours=request.hours,
130
+ group_by=request.group_by
131
+ )
132
+
133
+ # Calculate totals and summaries
134
+ total_requests = sum(data.get('total_requests', 0) for data in stats.values())
135
+
136
+ metadata = {
137
+ "time_range_hours": request.hours,
138
+ "group_by": request.group_by,
139
+ "total_requests": total_requests,
140
+ "unique_groups": len(stats),
141
+ "include_costs": request.include_costs
142
+ }
143
+
144
+ return InferenceMetricsResponse(
145
+ success=True,
146
+ data=stats,
147
+ metadata=metadata
148
+ )
149
+
150
+ except Exception as e:
151
+ logger.error(f"Error fetching usage statistics: {e}")
152
+ raise HTTPException(status_code=500, detail=f"Failed to fetch usage statistics: {str(e)}")
153
+
154
+ @router.get("/cost-analysis", response_model=InferenceMetricsResponse)
155
+ async def get_cost_analysis(
156
+ hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
157
+ group_by: str = Query("provider", description="Group by: provider, model_name, service_type"),
158
+ user = Depends(optional_auth)
159
+ ):
160
+ """
161
+ Get cost analysis and spending breakdown
162
+ """
163
+ try:
164
+ inference_logger = get_inference_logger()
165
+
166
+ if not inference_logger.enabled:
167
+ raise HTTPException(status_code=503, detail="Inference logging is disabled")
168
+
169
+ # This would typically involve more complex InfluxDB queries
170
+ # For now, we'll use the existing usage statistics method
171
+ stats = inference_logger.get_usage_statistics(hours=hours, group_by=group_by)
172
+
173
+ # Calculate cost summaries (this would be enhanced with actual cost queries)
174
+ cost_analysis = {}
175
+ total_cost = 0.0
176
+ total_requests = 0
177
+
178
+ for group, data in stats.items():
179
+ requests = data.get('total_requests', 0)
180
+ # Estimate costs (in a real implementation, this would come from the database)
181
+ estimated_cost = requests * 0.002 # Rough estimate
182
+
183
+ cost_analysis[group] = {
184
+ "requests": requests,
185
+ "estimated_cost_usd": estimated_cost,
186
+ "cost_per_request": estimated_cost / requests if requests > 0 else 0,
187
+ "hourly_data": data.get('hourly_data', [])
188
+ }
189
+
190
+ total_cost += estimated_cost
191
+ total_requests += requests
192
+
193
+ return InferenceMetricsResponse(
194
+ success=True,
195
+ data=cost_analysis,
196
+ metadata={
197
+ "time_range_hours": hours,
198
+ "group_by": group_by,
199
+ "total_cost_usd": total_cost,
200
+ "total_requests": total_requests,
201
+ "average_cost_per_request": total_cost / total_requests if total_requests > 0 else 0
202
+ }
203
+ )
204
+
205
+ except Exception as e:
206
+ logger.error(f"Error performing cost analysis: {e}")
207
+ raise HTTPException(status_code=500, detail=f"Failed to perform cost analysis: {str(e)}")
208
+
209
+ @router.get("/performance-metrics", response_model=InferenceMetricsResponse)
210
+ async def get_performance_metrics(
211
+ hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
212
+ provider: Optional[str] = Query(None, description="Filter by provider"),
213
+ model_name: Optional[str] = Query(None, description="Filter by model"),
214
+ user = Depends(optional_auth)
215
+ ):
216
+ """
217
+ Get performance metrics including response times, success rates, etc.
218
+ """
219
+ try:
220
+ inference_logger = get_inference_logger()
221
+
222
+ if not inference_logger.enabled:
223
+ raise HTTPException(status_code=503, detail="Inference logging is disabled")
224
+
225
+ # Get recent requests for performance analysis
226
+ requests = inference_logger.get_recent_requests(
227
+ limit=1000, # Large sample for accurate metrics
228
+ hours=hours,
229
+ provider=provider
230
+ )
231
+
232
+ if not requests:
233
+ return InferenceMetricsResponse(
234
+ success=True,
235
+ data={},
236
+ metadata={"message": "No data found for the specified criteria"}
237
+ )
238
+
239
+ # Calculate performance metrics
240
+ total_requests = len(requests)
241
+ successful_requests = len([r for r in requests if r.get('status') == 'completed'])
242
+ failed_requests = total_requests - successful_requests
243
+
244
+ execution_times = [r.get('execution_time_ms', 0) for r in requests if r.get('execution_time_ms')]
245
+
246
+ performance_data = {
247
+ "request_counts": {
248
+ "total": total_requests,
249
+ "successful": successful_requests,
250
+ "failed": failed_requests,
251
+ "success_rate": (successful_requests / total_requests) * 100 if total_requests > 0 else 0
252
+ },
253
+ "response_times": {
254
+ "count": len(execution_times),
255
+ "average_ms": sum(execution_times) / len(execution_times) if execution_times else 0,
256
+ "min_ms": min(execution_times) if execution_times else 0,
257
+ "max_ms": max(execution_times) if execution_times else 0,
258
+ } if execution_times else {}
259
+ }
260
+
261
+ # Group by provider if not filtered
262
+ if not provider:
263
+ provider_stats = {}
264
+ for req in requests:
265
+ prov = req.get('provider', 'unknown')
266
+ if prov not in provider_stats:
267
+ provider_stats[prov] = {"requests": 0, "successful": 0, "total_time": 0}
268
+
269
+ provider_stats[prov]["requests"] += 1
270
+ if req.get('status') == 'completed':
271
+ provider_stats[prov]["successful"] += 1
272
+ if req.get('execution_time_ms'):
273
+ provider_stats[prov]["total_time"] += req.get('execution_time_ms', 0)
274
+
275
+ performance_data["by_provider"] = {
276
+ prov: {
277
+ "requests": stats["requests"],
278
+ "success_rate": (stats["successful"] / stats["requests"]) * 100,
279
+ "avg_response_time_ms": stats["total_time"] / stats["requests"] if stats["requests"] > 0 else 0
280
+ }
281
+ for prov, stats in provider_stats.items()
282
+ }
283
+
284
+ return InferenceMetricsResponse(
285
+ success=True,
286
+ data=performance_data,
287
+ metadata={
288
+ "time_range_hours": hours,
289
+ "provider": provider,
290
+ "model_name": model_name,
291
+ "sample_size": total_requests
292
+ }
293
+ )
294
+
295
+ except Exception as e:
296
+ logger.error(f"Error fetching performance metrics: {e}")
297
+ raise HTTPException(status_code=500, detail=f"Failed to fetch performance metrics: {str(e)}")
298
+
299
+ @router.post("/error-analysis", response_model=InferenceMetricsResponse)
300
+ async def get_error_analysis(
301
+ request: ErrorAnalysisRequest,
302
+ user = Depends(optional_auth)
303
+ ):
304
+ """
305
+ Analyze errors and failure patterns
306
+ """
307
+ try:
308
+ inference_logger = get_inference_logger()
309
+
310
+ if not inference_logger.enabled:
311
+ raise HTTPException(status_code=503, detail="Inference logging is disabled")
312
+
313
+ # Get recent failed requests
314
+ failed_requests = inference_logger.get_recent_requests(
315
+ limit=500,
316
+ hours=request.hours,
317
+ status="failed"
318
+ )
319
+
320
+ if not failed_requests:
321
+ return InferenceMetricsResponse(
322
+ success=True,
323
+ data={"message": "No errors found in the specified time range"},
324
+ metadata={"error_count": 0}
325
+ )
326
+
327
+ # Analyze error patterns
328
+ error_analysis = {
329
+ "total_errors": len(failed_requests),
330
+ "error_rate": 0, # Would calculate from total requests
331
+ "by_provider": {},
332
+ "by_model": {},
333
+ "by_service_type": {},
334
+ "recent_errors": failed_requests[:10] # Most recent 10 errors
335
+ }
336
+
337
+ # Group errors by different dimensions
338
+ for req in failed_requests:
339
+ provider = req.get('provider', 'unknown')
340
+ model = req.get('model_name', 'unknown')
341
+ service_type = req.get('service_type', 'unknown')
342
+
343
+ # Count by provider
344
+ if provider not in error_analysis["by_provider"]:
345
+ error_analysis["by_provider"][provider] = 0
346
+ error_analysis["by_provider"][provider] += 1
347
+
348
+ # Count by model
349
+ if model not in error_analysis["by_model"]:
350
+ error_analysis["by_model"][model] = 0
351
+ error_analysis["by_model"][model] += 1
352
+
353
+ # Count by service type
354
+ if service_type not in error_analysis["by_service_type"]:
355
+ error_analysis["by_service_type"][service_type] = 0
356
+ error_analysis["by_service_type"][service_type] += 1
357
+
358
+ return InferenceMetricsResponse(
359
+ success=True,
360
+ data=error_analysis,
361
+ metadata={
362
+ "time_range_hours": request.hours,
363
+ "filters": {
364
+ "error_types": request.error_types,
365
+ "providers": request.providers
366
+ }
367
+ }
368
+ )
369
+
370
+ except Exception as e:
371
+ logger.error(f"Error performing error analysis: {e}")
372
+ raise HTTPException(status_code=500, detail=f"Failed to perform error analysis: {str(e)}")
373
+
374
+ @router.get("/dashboard-summary", response_model=InferenceMetricsResponse)
375
+ async def get_dashboard_summary(
376
+ hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
377
+ user = Depends(optional_auth)
378
+ ):
379
+ """
380
+ Get summary metrics for the monitoring dashboard
381
+ """
382
+ try:
383
+ inference_logger = get_inference_logger()
384
+
385
+ if not inference_logger.enabled:
386
+ raise HTTPException(status_code=503, detail="Inference logging is disabled")
387
+
388
+ # Get recent requests for summary
389
+ recent_requests = inference_logger.get_recent_requests(limit=1000, hours=hours)
390
+
391
+ if not recent_requests:
392
+ return InferenceMetricsResponse(
393
+ success=True,
394
+ data={"message": "No data available"},
395
+ metadata={"hours": hours}
396
+ )
397
+
398
+ # Calculate summary metrics
399
+ total_requests = len(recent_requests)
400
+ successful_requests = len([r for r in recent_requests if r.get('status') == 'completed'])
401
+ failed_requests = total_requests - successful_requests
402
+
403
+ # Cost summary
404
+ total_cost = sum(r.get('cost_usd', 0) or 0 for r in recent_requests)
405
+ avg_cost = total_cost / total_requests if total_requests > 0 else 0
406
+
407
+ # Token summary
408
+ total_tokens = sum(r.get('tokens', 0) or 0 for r in recent_requests)
409
+ avg_tokens = total_tokens / total_requests if total_requests > 0 else 0
410
+
411
+ # Top providers
412
+ provider_counts = {}
413
+ for req in recent_requests:
414
+ provider = req.get('provider', 'unknown')
415
+ provider_counts[provider] = provider_counts.get(provider, 0) + 1
416
+
417
+ # Top models
418
+ model_counts = {}
419
+ for req in recent_requests:
420
+ model = req.get('model_name', 'unknown')
421
+ model_counts[model] = model_counts.get(model, 0) + 1
422
+
423
+ summary = {
424
+ "overview": {
425
+ "total_requests": total_requests,
426
+ "successful_requests": successful_requests,
427
+ "failed_requests": failed_requests,
428
+ "success_rate": (successful_requests / total_requests) * 100 if total_requests > 0 else 0,
429
+ "total_cost_usd": total_cost,
430
+ "average_cost_per_request": avg_cost,
431
+ "total_tokens": total_tokens,
432
+ "average_tokens_per_request": avg_tokens
433
+ },
434
+ "top_providers": dict(sorted(provider_counts.items(), key=lambda x: x[1], reverse=True)[:5]),
435
+ "top_models": dict(sorted(model_counts.items(), key=lambda x: x[1], reverse=True)[:5]),
436
+ "time_range": {
437
+ "hours": hours,
438
+ "start_time": (datetime.now() - timedelta(hours=hours)).isoformat(),
439
+ "end_time": datetime.now().isoformat()
440
+ }
441
+ }
442
+
443
+ return InferenceMetricsResponse(
444
+ success=True,
445
+ data=summary,
446
+ metadata={"generated_at": datetime.now()}
447
+ )
448
+
449
+ except Exception as e:
450
+ logger.error(f"Error generating dashboard summary: {e}")
451
+ raise HTTPException(status_code=500, detail=f"Failed to generate dashboard summary: {str(e)}")
452
+
453
+ @router.delete("/clear-logs")
454
+ async def clear_inference_logs(
455
+ confirm: bool = Query(False, description="Confirmation required to clear logs"),
456
+ user = Depends(require_read_access) # Require authentication for destructive operations
457
+ ):
458
+ """
459
+ Clear all inference logs (DANGEROUS - requires confirmation)
460
+ """
461
+ if not confirm:
462
+ raise HTTPException(
463
+ status_code=400,
464
+ detail="Confirmation required. Set confirm=true to clear all logs."
465
+ )
466
+
467
+ try:
468
+ inference_logger = get_inference_logger()
469
+
470
+ if not inference_logger.enabled:
471
+ raise HTTPException(status_code=503, detail="Inference logging is disabled")
472
+
473
+ # This would implement log clearing in InfluxDB
474
+ # For safety, we'll just return a warning for now
475
+
476
+ logger.warning("Log clearing requested but not implemented for safety")
477
+
478
+ return {
479
+ "success": False,
480
+ "message": "Log clearing not implemented for safety. Contact administrator.",
481
+ "timestamp": datetime.now()
482
+ }
483
+
484
+ except Exception as e:
485
+ logger.error(f"Error clearing logs: {e}")
486
+ raise HTTPException(status_code=500, detail=f"Failed to clear logs: {str(e)}")