isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,486 @@
1
+ """
2
+ Inference Monitoring API Routes
3
+
4
+ Provides comprehensive monitoring and analytics for model inference activities.
5
+ Uses InfluxDB as the backend for time-series data analysis.
6
+
7
+ Features:
8
+ - Real-time inference metrics
9
+ - Cost analysis and tracking
10
+ - Performance monitoring
11
+ - Usage statistics by provider/model
12
+ - Error tracking and alerting
13
+ - Token usage analytics
14
+ """
15
+
16
+ from fastapi import APIRouter, Query, HTTPException, Depends
17
+ from pydantic import BaseModel, Field
18
+ from typing import Optional, List, Dict, Any, Union
19
+ import logging
20
+ from datetime import datetime, timedelta
21
+ import json
22
+
23
+ from isa_model.core.logging import get_inference_logger
24
+ from ..middleware.auth import optional_auth, require_read_access
25
+
26
+ logger = logging.getLogger(__name__)
27
+ router = APIRouter()
28
+
29
+ # Request/Response Models
30
+ class InferenceMetricsResponse(BaseModel):
31
+ """Response model for inference metrics"""
32
+ success: bool
33
+ data: Any # More flexible data field
34
+ metadata: Dict[str, Any] = Field(default_factory=dict)
35
+ timestamp: datetime = Field(default_factory=datetime.now)
36
+
37
+ class UsageStatsRequest(BaseModel):
38
+ """Request model for usage statistics"""
39
+ hours: int = Field(24, ge=1, le=168, description="Time range in hours (1-168)")
40
+ group_by: str = Field("provider", description="Group by: provider, model_name, service_type")
41
+ include_costs: bool = Field(True, description="Include cost analysis")
42
+
43
+ class ErrorAnalysisRequest(BaseModel):
44
+ """Request model for error analysis"""
45
+ hours: int = Field(24, ge=1, le=168, description="Time range in hours")
46
+ error_types: Optional[List[str]] = Field(None, description="Filter by error types")
47
+ providers: Optional[List[str]] = Field(None, description="Filter by providers")
48
+
49
+ @router.get("/health")
50
+ async def monitoring_health():
51
+ """Health check for inference monitoring service"""
52
+ inference_logger = get_inference_logger()
53
+
54
+ return {
55
+ "status": "healthy" if inference_logger.enabled else "disabled",
56
+ "service": "inference_monitoring",
57
+ "influxdb_enabled": inference_logger.enabled,
58
+ "influxdb_url": inference_logger.url if inference_logger.enabled else None,
59
+ "bucket": inference_logger.bucket if inference_logger.enabled else None
60
+ }
61
+
62
+ @router.get("/recent-requests", response_model=InferenceMetricsResponse)
63
+ async def get_recent_requests(
64
+ limit: int = Query(50, ge=1, le=500, description="Number of recent requests to fetch"),
65
+ hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
66
+ service_type: Optional[str] = Query(None, description="Filter by service type"),
67
+ provider: Optional[str] = Query(None, description="Filter by provider"),
68
+ status: Optional[str] = Query(None, description="Filter by status (completed, failed)"),
69
+ user = Depends(optional_auth)
70
+ ):
71
+ """
72
+ Get recent inference requests with optional filtering
73
+ """
74
+ try:
75
+ inference_logger = get_inference_logger()
76
+
77
+ if not inference_logger.enabled:
78
+ raise HTTPException(
79
+ status_code=503,
80
+ detail="Inference logging is disabled. Enable ENABLE_INFERENCE_LOGGING."
81
+ )
82
+
83
+ # Fetch recent requests
84
+ requests = inference_logger.get_recent_requests(
85
+ limit=limit,
86
+ hours=hours,
87
+ service_type=service_type,
88
+ provider=provider,
89
+ status=status
90
+ )
91
+
92
+ return InferenceMetricsResponse(
93
+ success=True,
94
+ data=requests,
95
+ metadata={
96
+ "total_requests": len(requests),
97
+ "time_range_hours": hours,
98
+ "filters": {
99
+ "service_type": service_type,
100
+ "provider": provider,
101
+ "status": status
102
+ }
103
+ }
104
+ )
105
+
106
+ except Exception as e:
107
+ logger.error(f"Error fetching recent requests: {e}")
108
+ raise HTTPException(status_code=500, detail=f"Failed to fetch recent requests: {str(e)}")
109
+
110
+ @router.post("/usage-stats", response_model=InferenceMetricsResponse)
111
+ async def get_usage_statistics(
112
+ request: UsageStatsRequest,
113
+ user = Depends(optional_auth)
114
+ ):
115
+ """
116
+ Get usage statistics and analytics
117
+ """
118
+ try:
119
+ inference_logger = get_inference_logger()
120
+
121
+ if not inference_logger.enabled:
122
+ raise HTTPException(
123
+ status_code=503,
124
+ detail="Inference logging is disabled"
125
+ )
126
+
127
+ # Get usage statistics
128
+ stats = inference_logger.get_usage_statistics(
129
+ hours=request.hours,
130
+ group_by=request.group_by
131
+ )
132
+
133
+ # Calculate totals and summaries
134
+ total_requests = sum(data.get('total_requests', 0) for data in stats.values())
135
+
136
+ metadata = {
137
+ "time_range_hours": request.hours,
138
+ "group_by": request.group_by,
139
+ "total_requests": total_requests,
140
+ "unique_groups": len(stats),
141
+ "include_costs": request.include_costs
142
+ }
143
+
144
+ return InferenceMetricsResponse(
145
+ success=True,
146
+ data=stats,
147
+ metadata=metadata
148
+ )
149
+
150
+ except Exception as e:
151
+ logger.error(f"Error fetching usage statistics: {e}")
152
+ raise HTTPException(status_code=500, detail=f"Failed to fetch usage statistics: {str(e)}")
153
+
154
+ @router.get("/cost-analysis", response_model=InferenceMetricsResponse)
155
+ async def get_cost_analysis(
156
+ hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
157
+ group_by: str = Query("provider", description="Group by: provider, model_name, service_type"),
158
+ user = Depends(optional_auth)
159
+ ):
160
+ """
161
+ Get cost analysis and spending breakdown
162
+ """
163
+ try:
164
+ inference_logger = get_inference_logger()
165
+
166
+ if not inference_logger.enabled:
167
+ raise HTTPException(status_code=503, detail="Inference logging is disabled")
168
+
169
+ # This would typically involve more complex InfluxDB queries
170
+ # For now, we'll use the existing usage statistics method
171
+ stats = inference_logger.get_usage_statistics(hours=hours, group_by=group_by)
172
+
173
+ # Calculate cost summaries (this would be enhanced with actual cost queries)
174
+ cost_analysis = {}
175
+ total_cost = 0.0
176
+ total_requests = 0
177
+
178
+ for group, data in stats.items():
179
+ requests = data.get('total_requests', 0)
180
+ # Estimate costs (in a real implementation, this would come from the database)
181
+ estimated_cost = requests * 0.002 # Rough estimate
182
+
183
+ cost_analysis[group] = {
184
+ "requests": requests,
185
+ "estimated_cost_usd": estimated_cost,
186
+ "cost_per_request": estimated_cost / requests if requests > 0 else 0,
187
+ "hourly_data": data.get('hourly_data', [])
188
+ }
189
+
190
+ total_cost += estimated_cost
191
+ total_requests += requests
192
+
193
+ return InferenceMetricsResponse(
194
+ success=True,
195
+ data=cost_analysis,
196
+ metadata={
197
+ "time_range_hours": hours,
198
+ "group_by": group_by,
199
+ "total_cost_usd": total_cost,
200
+ "total_requests": total_requests,
201
+ "average_cost_per_request": total_cost / total_requests if total_requests > 0 else 0
202
+ }
203
+ )
204
+
205
+ except Exception as e:
206
+ logger.error(f"Error performing cost analysis: {e}")
207
+ raise HTTPException(status_code=500, detail=f"Failed to perform cost analysis: {str(e)}")
208
+
209
+ @router.get("/performance-metrics", response_model=InferenceMetricsResponse)
210
+ async def get_performance_metrics(
211
+ hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
212
+ provider: Optional[str] = Query(None, description="Filter by provider"),
213
+ model_name: Optional[str] = Query(None, description="Filter by model"),
214
+ user = Depends(optional_auth)
215
+ ):
216
+ """
217
+ Get performance metrics including response times, success rates, etc.
218
+ """
219
+ try:
220
+ inference_logger = get_inference_logger()
221
+
222
+ if not inference_logger.enabled:
223
+ raise HTTPException(status_code=503, detail="Inference logging is disabled")
224
+
225
+ # Get recent requests for performance analysis
226
+ requests = inference_logger.get_recent_requests(
227
+ limit=1000, # Large sample for accurate metrics
228
+ hours=hours,
229
+ provider=provider
230
+ )
231
+
232
+ if not requests:
233
+ return InferenceMetricsResponse(
234
+ success=True,
235
+ data={},
236
+ metadata={"message": "No data found for the specified criteria"}
237
+ )
238
+
239
+ # Calculate performance metrics
240
+ total_requests = len(requests)
241
+ successful_requests = len([r for r in requests if r.get('status') == 'completed'])
242
+ failed_requests = total_requests - successful_requests
243
+
244
+ execution_times = [r.get('execution_time_ms', 0) for r in requests if r.get('execution_time_ms')]
245
+
246
+ performance_data = {
247
+ "request_counts": {
248
+ "total": total_requests,
249
+ "successful": successful_requests,
250
+ "failed": failed_requests,
251
+ "success_rate": (successful_requests / total_requests) * 100 if total_requests > 0 else 0
252
+ },
253
+ "response_times": {
254
+ "count": len(execution_times),
255
+ "average_ms": sum(execution_times) / len(execution_times) if execution_times else 0,
256
+ "min_ms": min(execution_times) if execution_times else 0,
257
+ "max_ms": max(execution_times) if execution_times else 0,
258
+ } if execution_times else {}
259
+ }
260
+
261
+ # Group by provider if not filtered
262
+ if not provider:
263
+ provider_stats = {}
264
+ for req in requests:
265
+ prov = req.get('provider', 'unknown')
266
+ if prov not in provider_stats:
267
+ provider_stats[prov] = {"requests": 0, "successful": 0, "total_time": 0}
268
+
269
+ provider_stats[prov]["requests"] += 1
270
+ if req.get('status') == 'completed':
271
+ provider_stats[prov]["successful"] += 1
272
+ if req.get('execution_time_ms'):
273
+ provider_stats[prov]["total_time"] += req.get('execution_time_ms', 0)
274
+
275
+ performance_data["by_provider"] = {
276
+ prov: {
277
+ "requests": stats["requests"],
278
+ "success_rate": (stats["successful"] / stats["requests"]) * 100,
279
+ "avg_response_time_ms": stats["total_time"] / stats["requests"] if stats["requests"] > 0 else 0
280
+ }
281
+ for prov, stats in provider_stats.items()
282
+ }
283
+
284
+ return InferenceMetricsResponse(
285
+ success=True,
286
+ data=performance_data,
287
+ metadata={
288
+ "time_range_hours": hours,
289
+ "provider": provider,
290
+ "model_name": model_name,
291
+ "sample_size": total_requests
292
+ }
293
+ )
294
+
295
+ except Exception as e:
296
+ logger.error(f"Error fetching performance metrics: {e}")
297
+ raise HTTPException(status_code=500, detail=f"Failed to fetch performance metrics: {str(e)}")
298
+
299
+ @router.post("/error-analysis", response_model=InferenceMetricsResponse)
300
+ async def get_error_analysis(
301
+ request: ErrorAnalysisRequest,
302
+ user = Depends(optional_auth)
303
+ ):
304
+ """
305
+ Analyze errors and failure patterns
306
+ """
307
+ try:
308
+ inference_logger = get_inference_logger()
309
+
310
+ if not inference_logger.enabled:
311
+ raise HTTPException(status_code=503, detail="Inference logging is disabled")
312
+
313
+ # Get recent failed requests
314
+ failed_requests = inference_logger.get_recent_requests(
315
+ limit=500,
316
+ hours=request.hours,
317
+ status="failed"
318
+ )
319
+
320
+ if not failed_requests:
321
+ return InferenceMetricsResponse(
322
+ success=True,
323
+ data={"message": "No errors found in the specified time range"},
324
+ metadata={"error_count": 0}
325
+ )
326
+
327
+ # Analyze error patterns
328
+ error_analysis = {
329
+ "total_errors": len(failed_requests),
330
+ "error_rate": 0, # Would calculate from total requests
331
+ "by_provider": {},
332
+ "by_model": {},
333
+ "by_service_type": {},
334
+ "recent_errors": failed_requests[:10] # Most recent 10 errors
335
+ }
336
+
337
+ # Group errors by different dimensions
338
+ for req in failed_requests:
339
+ provider = req.get('provider', 'unknown')
340
+ model = req.get('model_name', 'unknown')
341
+ service_type = req.get('service_type', 'unknown')
342
+
343
+ # Count by provider
344
+ if provider not in error_analysis["by_provider"]:
345
+ error_analysis["by_provider"][provider] = 0
346
+ error_analysis["by_provider"][provider] += 1
347
+
348
+ # Count by model
349
+ if model not in error_analysis["by_model"]:
350
+ error_analysis["by_model"][model] = 0
351
+ error_analysis["by_model"][model] += 1
352
+
353
+ # Count by service type
354
+ if service_type not in error_analysis["by_service_type"]:
355
+ error_analysis["by_service_type"][service_type] = 0
356
+ error_analysis["by_service_type"][service_type] += 1
357
+
358
+ return InferenceMetricsResponse(
359
+ success=True,
360
+ data=error_analysis,
361
+ metadata={
362
+ "time_range_hours": request.hours,
363
+ "filters": {
364
+ "error_types": request.error_types,
365
+ "providers": request.providers
366
+ }
367
+ }
368
+ )
369
+
370
+ except Exception as e:
371
+ logger.error(f"Error performing error analysis: {e}")
372
+ raise HTTPException(status_code=500, detail=f"Failed to perform error analysis: {str(e)}")
373
+
374
+ @router.get("/dashboard-summary", response_model=InferenceMetricsResponse)
375
+ async def get_dashboard_summary(
376
+ hours: int = Query(24, ge=1, le=168, description="Time range in hours"),
377
+ user = Depends(optional_auth)
378
+ ):
379
+ """
380
+ Get summary metrics for the monitoring dashboard
381
+ """
382
+ try:
383
+ inference_logger = get_inference_logger()
384
+
385
+ if not inference_logger.enabled:
386
+ raise HTTPException(status_code=503, detail="Inference logging is disabled")
387
+
388
+ # Get recent requests for summary
389
+ recent_requests = inference_logger.get_recent_requests(limit=1000, hours=hours)
390
+
391
+ if not recent_requests:
392
+ return InferenceMetricsResponse(
393
+ success=True,
394
+ data={"message": "No data available"},
395
+ metadata={"hours": hours}
396
+ )
397
+
398
+ # Calculate summary metrics
399
+ total_requests = len(recent_requests)
400
+ successful_requests = len([r for r in recent_requests if r.get('status') == 'completed'])
401
+ failed_requests = total_requests - successful_requests
402
+
403
+ # Cost summary
404
+ total_cost = sum(r.get('cost_usd', 0) or 0 for r in recent_requests)
405
+ avg_cost = total_cost / total_requests if total_requests > 0 else 0
406
+
407
+ # Token summary
408
+ total_tokens = sum(r.get('tokens', 0) or 0 for r in recent_requests)
409
+ avg_tokens = total_tokens / total_requests if total_requests > 0 else 0
410
+
411
+ # Top providers
412
+ provider_counts = {}
413
+ for req in recent_requests:
414
+ provider = req.get('provider', 'unknown')
415
+ provider_counts[provider] = provider_counts.get(provider, 0) + 1
416
+
417
+ # Top models
418
+ model_counts = {}
419
+ for req in recent_requests:
420
+ model = req.get('model_name', 'unknown')
421
+ model_counts[model] = model_counts.get(model, 0) + 1
422
+
423
+ summary = {
424
+ "overview": {
425
+ "total_requests": total_requests,
426
+ "successful_requests": successful_requests,
427
+ "failed_requests": failed_requests,
428
+ "success_rate": (successful_requests / total_requests) * 100 if total_requests > 0 else 0,
429
+ "total_cost_usd": total_cost,
430
+ "average_cost_per_request": avg_cost,
431
+ "total_tokens": total_tokens,
432
+ "average_tokens_per_request": avg_tokens
433
+ },
434
+ "top_providers": dict(sorted(provider_counts.items(), key=lambda x: x[1], reverse=True)[:5]),
435
+ "top_models": dict(sorted(model_counts.items(), key=lambda x: x[1], reverse=True)[:5]),
436
+ "time_range": {
437
+ "hours": hours,
438
+ "start_time": (datetime.now() - timedelta(hours=hours)).isoformat(),
439
+ "end_time": datetime.now().isoformat()
440
+ }
441
+ }
442
+
443
+ return InferenceMetricsResponse(
444
+ success=True,
445
+ data=summary,
446
+ metadata={"generated_at": datetime.now()}
447
+ )
448
+
449
+ except Exception as e:
450
+ logger.error(f"Error generating dashboard summary: {e}")
451
+ raise HTTPException(status_code=500, detail=f"Failed to generate dashboard summary: {str(e)}")
452
+
453
+ @router.delete("/clear-logs")
454
+ async def clear_inference_logs(
455
+ confirm: bool = Query(False, description="Confirmation required to clear logs"),
456
+ user = Depends(require_read_access) # Require authentication for destructive operations
457
+ ):
458
+ """
459
+ Clear all inference logs (DANGEROUS - requires confirmation)
460
+ """
461
+ if not confirm:
462
+ raise HTTPException(
463
+ status_code=400,
464
+ detail="Confirmation required. Set confirm=true to clear all logs."
465
+ )
466
+
467
+ try:
468
+ inference_logger = get_inference_logger()
469
+
470
+ if not inference_logger.enabled:
471
+ raise HTTPException(status_code=503, detail="Inference logging is disabled")
472
+
473
+ # This would implement log clearing in InfluxDB
474
+ # For safety, we'll just return a warning for now
475
+
476
+ logger.warning("Log clearing requested but not implemented for safety")
477
+
478
+ return {
479
+ "success": False,
480
+ "message": "Log clearing not implemented for safety. Contact administrator.",
481
+ "timestamp": datetime.now()
482
+ }
483
+
484
+ except Exception as e:
485
+ logger.error(f"Error clearing logs: {e}")
486
+ raise HTTPException(status_code=500, detail=f"Failed to clear logs: {str(e)}")