isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,828 @@
1
+ """
2
+ Inference Repository - Data persistence layer for inference operations
3
+
4
+ Provides standardized data access for inference requests, usage statistics, and history
5
+ following the ISA Model architecture pattern.
6
+ """
7
+
8
+ import logging
9
+ import json
10
+ import uuid
11
+ from datetime import datetime, timezone, timedelta
12
+ from typing import Dict, List, Optional, Any, Union
13
+ from pathlib import Path
14
+ from dataclasses import dataclass, asdict
15
+ from enum import Enum
16
+
17
+ try:
18
+ # Try to import Supabase for centralized data storage
19
+ from ...core.database.supabase_client import get_supabase_client
20
+ SUPABASE_AVAILABLE = True
21
+ except ImportError:
22
+ SUPABASE_AVAILABLE = False
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ class InferenceStatus(str, Enum):
27
+ """Inference status enumeration"""
28
+ PENDING = "pending"
29
+ PROCESSING = "processing"
30
+ COMPLETED = "completed"
31
+ FAILED = "failed"
32
+ TIMEOUT = "timeout"
33
+ CANCELLED = "cancelled"
34
+
35
+ class ServiceType(str, Enum):
36
+ """Service type enumeration"""
37
+ LLM = "llm"
38
+ VISION = "vision"
39
+ EMBEDDING = "embedding"
40
+ TTS = "tts"
41
+ STT = "stt"
42
+ IMAGE_GEN = "image_gen"
43
+ AUDIO = "audio"
44
+ RERANK = "rerank"
45
+ OCR = "ocr"
46
+
47
+ @dataclass
48
+ class InferenceRequest:
49
+ """Inference request record"""
50
+ request_id: str
51
+ service_type: str
52
+ model_id: str
53
+ provider: str
54
+ endpoint: str
55
+ request_data: Dict[str, Any]
56
+ status: str = InferenceStatus.PENDING
57
+ created_at: datetime = None
58
+ started_at: Optional[datetime] = None
59
+ completed_at: Optional[datetime] = None
60
+ user_id: Optional[str] = None
61
+ session_id: Optional[str] = None
62
+ ip_address: Optional[str] = None
63
+ user_agent: Optional[str] = None
64
+ response_data: Optional[Dict[str, Any]] = None
65
+ error_message: Optional[str] = None
66
+ execution_time_ms: Optional[int] = None
67
+ tokens_used: Optional[int] = None
68
+ cost_usd: Optional[float] = None
69
+ metadata: Optional[Dict[str, Any]] = None
70
+
71
+ def __post_init__(self):
72
+ if self.created_at is None:
73
+ self.created_at = datetime.now(timezone.utc)
74
+
75
+ @dataclass
76
+ class UsageStatistics:
77
+ """Usage statistics record"""
78
+ stat_id: str
79
+ period_start: datetime
80
+ period_end: datetime
81
+ service_type: str
82
+ model_id: Optional[str] = None
83
+ provider: Optional[str] = None
84
+ user_id: Optional[str] = None
85
+ total_requests: int = 0
86
+ successful_requests: int = 0
87
+ failed_requests: int = 0
88
+ total_tokens: int = 0
89
+ total_cost_usd: float = 0.0
90
+ avg_response_time_ms: float = 0.0
91
+ p95_response_time_ms: float = 0.0
92
+ requests_per_hour: float = 0.0
93
+ error_rate: float = 0.0
94
+ created_at: datetime = None
95
+
96
+ def __post_init__(self):
97
+ if self.created_at is None:
98
+ self.created_at = datetime.now(timezone.utc)
99
+
100
+ @dataclass
101
+ class ModelUsageSnapshot:
102
+ """Model usage snapshot for quick analytics"""
103
+ snapshot_id: str
104
+ model_id: str
105
+ provider: str
106
+ snapshot_time: datetime
107
+ hourly_requests: int = 0
108
+ daily_requests: int = 0
109
+ weekly_requests: int = 0
110
+ monthly_requests: int = 0
111
+ total_tokens_today: int = 0
112
+ total_cost_today: float = 0.0
113
+ avg_response_time_today: float = 0.0
114
+ success_rate_today: float = 100.0
115
+ last_used: Optional[datetime] = None
116
+
117
+ def __post_init__(self):
118
+ if self.snapshot_time is None:
119
+ self.snapshot_time = datetime.now(timezone.utc)
120
+
121
+ class InferenceRepository:
122
+ """
123
+ Repository for inference data persistence
124
+
125
+ Supports multiple backend storage options:
126
+ 1. Supabase (preferred for centralized storage)
127
+ 2. Local file system (fallback for development)
128
+ 3. In-memory storage (for testing)
129
+ """
130
+
131
+ def __init__(self, storage_backend: str = "auto", **kwargs):
132
+ """
133
+ Initialize inference repository
134
+
135
+ Args:
136
+ storage_backend: "supabase", "file", "memory", or "auto"
137
+ **kwargs: Backend-specific configuration
138
+ """
139
+ self.storage_backend = self._determine_backend(storage_backend)
140
+ self.config = kwargs
141
+
142
+ # Initialize storage backend
143
+ if self.storage_backend == "supabase":
144
+ self._init_supabase()
145
+ elif self.storage_backend == "memory":
146
+ self._init_memory()
147
+ else: # file system fallback
148
+ self._init_file_system()
149
+
150
+ logger.info(f"Inference repository initialized with {self.storage_backend} backend")
151
+
152
+ def _determine_backend(self, preference: str) -> str:
153
+ """Determine the best available storage backend"""
154
+ if preference == "supabase" and SUPABASE_AVAILABLE:
155
+ return "supabase"
156
+ elif preference in ["supabase", "file", "memory"]:
157
+ return preference
158
+
159
+ # Auto-select best available backend
160
+ if SUPABASE_AVAILABLE:
161
+ return "supabase"
162
+ else:
163
+ return "file"
164
+
165
+ def _init_supabase(self):
166
+ """Initialize Supabase backend"""
167
+ try:
168
+ self.supabase_client = get_supabase_client()
169
+ self._ensure_supabase_tables()
170
+ logger.info("Supabase backend initialized for inference")
171
+ except Exception as e:
172
+ logger.error(f"Failed to initialize Supabase backend: {e}")
173
+ self.storage_backend = "file"
174
+ self._init_file_system()
175
+
176
+ def _init_file_system(self):
177
+ """Initialize file system backend"""
178
+ self.data_dir = Path(self.config.get("data_dir", "./inference_data"))
179
+ self.data_dir.mkdir(parents=True, exist_ok=True)
180
+
181
+ # Create subdirectories
182
+ (self.data_dir / "requests").mkdir(exist_ok=True)
183
+ (self.data_dir / "statistics").mkdir(exist_ok=True)
184
+ (self.data_dir / "snapshots").mkdir(exist_ok=True)
185
+
186
+ logger.info(f"File system backend initialized: {self.data_dir}")
187
+
188
+ def _init_memory(self):
189
+ """Initialize in-memory backend for testing"""
190
+ self.requests = {}
191
+ self.statistics = {}
192
+ self.snapshots = {}
193
+ logger.info("In-memory backend initialized for inference")
194
+
195
+ def _ensure_supabase_tables(self):
196
+ """Ensure required Supabase tables exist"""
197
+ try:
198
+ self.supabase_client.table("inference_requests").select("request_id").limit(1).execute()
199
+ self.supabase_client.table("usage_statistics").select("stat_id").limit(1).execute()
200
+ self.supabase_client.table("model_usage_snapshots").select("snapshot_id").limit(1).execute()
201
+ except Exception as e:
202
+ logger.warning(f"Some inference tables may not exist in Supabase: {e}")
203
+
204
+ # Request Management Methods
205
+
206
+ def create_inference_request(
207
+ self,
208
+ service_type: str,
209
+ model_id: str,
210
+ provider: str,
211
+ endpoint: str,
212
+ request_data: Dict[str, Any],
213
+ user_id: Optional[str] = None,
214
+ session_id: Optional[str] = None,
215
+ ip_address: Optional[str] = None,
216
+ user_agent: Optional[str] = None,
217
+ metadata: Optional[Dict[str, Any]] = None
218
+ ) -> str:
219
+ """Create a new inference request record"""
220
+ request_id = f"inf_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
221
+
222
+ request = InferenceRequest(
223
+ request_id=request_id,
224
+ service_type=service_type,
225
+ model_id=model_id,
226
+ provider=provider,
227
+ endpoint=endpoint,
228
+ request_data=request_data,
229
+ user_id=user_id,
230
+ session_id=session_id,
231
+ ip_address=ip_address,
232
+ user_agent=user_agent,
233
+ metadata=metadata
234
+ )
235
+
236
+ if self.storage_backend == "supabase":
237
+ return self._create_request_supabase(request)
238
+ elif self.storage_backend == "memory":
239
+ return self._create_request_memory(request)
240
+ else:
241
+ return self._create_request_file(request)
242
+
243
+ def update_inference_status(
244
+ self,
245
+ request_id: str,
246
+ status: str,
247
+ response_data: Optional[Dict[str, Any]] = None,
248
+ error_message: Optional[str] = None,
249
+ execution_time_ms: Optional[int] = None,
250
+ tokens_used: Optional[int] = None,
251
+ cost_usd: Optional[float] = None,
252
+ additional_updates: Optional[Dict[str, Any]] = None
253
+ ) -> bool:
254
+ """Update inference request status and results"""
255
+ updates = {"status": status}
256
+
257
+ if status == InferenceStatus.PROCESSING:
258
+ updates["started_at"] = datetime.now(timezone.utc).isoformat()
259
+ elif status in [InferenceStatus.COMPLETED, InferenceStatus.FAILED, InferenceStatus.TIMEOUT]:
260
+ updates["completed_at"] = datetime.now(timezone.utc).isoformat()
261
+
262
+ if response_data:
263
+ updates["response_data"] = response_data
264
+ if error_message:
265
+ updates["error_message"] = error_message
266
+ if execution_time_ms:
267
+ updates["execution_time_ms"] = execution_time_ms
268
+ if tokens_used:
269
+ updates["tokens_used"] = tokens_used
270
+ if cost_usd:
271
+ updates["cost_usd"] = cost_usd
272
+
273
+ if additional_updates:
274
+ updates.update(additional_updates)
275
+
276
+ if self.storage_backend == "supabase":
277
+ return self._update_request_supabase(request_id, updates)
278
+ elif self.storage_backend == "memory":
279
+ return self._update_request_memory(request_id, updates)
280
+ else:
281
+ return self._update_request_file(request_id, updates)
282
+
283
+ def get_inference_request(self, request_id: str) -> Optional[InferenceRequest]:
284
+ """Get inference request by ID"""
285
+ if self.storage_backend == "supabase":
286
+ return self._get_request_supabase(request_id)
287
+ elif self.storage_backend == "memory":
288
+ return self._get_request_memory(request_id)
289
+ else:
290
+ return self._get_request_file(request_id)
291
+
292
+ def list_recent_requests(
293
+ self,
294
+ service_type: Optional[str] = None,
295
+ model_id: Optional[str] = None,
296
+ user_id: Optional[str] = None,
297
+ status: Optional[str] = None,
298
+ hours: int = 24,
299
+ limit: int = 100
300
+ ) -> List[InferenceRequest]:
301
+ """List recent inference requests with optional filtering"""
302
+ if self.storage_backend == "supabase":
303
+ return self._list_requests_supabase(service_type, model_id, user_id, status, hours, limit)
304
+ elif self.storage_backend == "memory":
305
+ return self._list_requests_memory(service_type, model_id, user_id, status, hours, limit)
306
+ else:
307
+ return self._list_requests_file(service_type, model_id, user_id, status, hours, limit)
308
+
309
+ # Usage Statistics Methods
310
+
311
+ def record_usage_statistics(
312
+ self,
313
+ period_start: datetime,
314
+ period_end: datetime,
315
+ service_type: str,
316
+ model_id: Optional[str] = None,
317
+ provider: Optional[str] = None,
318
+ user_id: Optional[str] = None,
319
+ total_requests: int = 0,
320
+ successful_requests: int = 0,
321
+ failed_requests: int = 0,
322
+ total_tokens: int = 0,
323
+ total_cost_usd: float = 0.0,
324
+ avg_response_time_ms: float = 0.0,
325
+ p95_response_time_ms: float = 0.0,
326
+ requests_per_hour: float = 0.0,
327
+ error_rate: float = 0.0
328
+ ) -> str:
329
+ """Record usage statistics for a time period"""
330
+ stat_id = f"stat_{period_start.strftime('%Y%m%d_%H')}_{uuid.uuid4().hex[:6]}"
331
+
332
+ stats = UsageStatistics(
333
+ stat_id=stat_id,
334
+ period_start=period_start,
335
+ period_end=period_end,
336
+ service_type=service_type,
337
+ model_id=model_id,
338
+ provider=provider,
339
+ user_id=user_id,
340
+ total_requests=total_requests,
341
+ successful_requests=successful_requests,
342
+ failed_requests=failed_requests,
343
+ total_tokens=total_tokens,
344
+ total_cost_usd=total_cost_usd,
345
+ avg_response_time_ms=avg_response_time_ms,
346
+ p95_response_time_ms=p95_response_time_ms,
347
+ requests_per_hour=requests_per_hour,
348
+ error_rate=error_rate
349
+ )
350
+
351
+ if self.storage_backend == "supabase":
352
+ return self._record_stats_supabase(stats)
353
+ elif self.storage_backend == "memory":
354
+ return self._record_stats_memory(stats)
355
+ else:
356
+ return self._record_stats_file(stats)
357
+
358
+ def get_usage_statistics(
359
+ self,
360
+ service_type: Optional[str] = None,
361
+ model_id: Optional[str] = None,
362
+ user_id: Optional[str] = None,
363
+ days: int = 7,
364
+ limit: int = 100
365
+ ) -> List[UsageStatistics]:
366
+ """Get usage statistics for specified period"""
367
+ if self.storage_backend == "supabase":
368
+ return self._get_stats_supabase(service_type, model_id, user_id, days, limit)
369
+ elif self.storage_backend == "memory":
370
+ return self._get_stats_memory(service_type, model_id, user_id, days, limit)
371
+ else:
372
+ return self._get_stats_file(service_type, model_id, user_id, days, limit)
373
+
374
+ def get_aggregated_usage(
375
+ self,
376
+ service_type: Optional[str] = None,
377
+ model_id: Optional[str] = None,
378
+ user_id: Optional[str] = None,
379
+ days: int = 30
380
+ ) -> Dict[str, Any]:
381
+ """Get aggregated usage statistics"""
382
+ stats = self.get_usage_statistics(service_type, model_id, user_id, days, 1000)
383
+
384
+ if not stats:
385
+ return {
386
+ "total_requests": 0,
387
+ "total_cost_usd": 0.0,
388
+ "total_tokens": 0,
389
+ "avg_response_time_ms": 0.0,
390
+ "success_rate": 100.0,
391
+ "period_days": days
392
+ }
393
+
394
+ total_requests = sum(s.total_requests for s in stats)
395
+ total_successful = sum(s.successful_requests for s in stats)
396
+ total_cost = sum(s.total_cost_usd for s in stats)
397
+ total_tokens = sum(s.total_tokens for s in stats)
398
+
399
+ # Weighted average for response time
400
+ weighted_response_times = [s.avg_response_time_ms * s.total_requests for s in stats if s.total_requests > 0]
401
+ avg_response_time = sum(weighted_response_times) / total_requests if total_requests > 0 else 0.0
402
+
403
+ success_rate = (total_successful / total_requests * 100) if total_requests > 0 else 100.0
404
+
405
+ return {
406
+ "total_requests": total_requests,
407
+ "successful_requests": total_successful,
408
+ "total_cost_usd": round(total_cost, 4),
409
+ "total_tokens": total_tokens,
410
+ "avg_response_time_ms": round(avg_response_time, 2),
411
+ "success_rate": round(success_rate, 2),
412
+ "period_days": days,
413
+ "stats_count": len(stats)
414
+ }
415
+
416
+ # Model Usage Snapshots Methods
417
+
418
+ def update_model_snapshot(
419
+ self,
420
+ model_id: str,
421
+ provider: str,
422
+ hourly_requests: int = 0,
423
+ daily_requests: int = 0,
424
+ weekly_requests: int = 0,
425
+ monthly_requests: int = 0,
426
+ total_tokens_today: int = 0,
427
+ total_cost_today: float = 0.0,
428
+ avg_response_time_today: float = 0.0,
429
+ success_rate_today: float = 100.0
430
+ ) -> str:
431
+ """Update or create model usage snapshot"""
432
+ snapshot_id = f"snap_{model_id}_{provider}_{datetime.now().strftime('%Y%m%d')}"
433
+
434
+ snapshot = ModelUsageSnapshot(
435
+ snapshot_id=snapshot_id,
436
+ model_id=model_id,
437
+ provider=provider,
438
+ snapshot_time=datetime.now(timezone.utc),
439
+ hourly_requests=hourly_requests,
440
+ daily_requests=daily_requests,
441
+ weekly_requests=weekly_requests,
442
+ monthly_requests=monthly_requests,
443
+ total_tokens_today=total_tokens_today,
444
+ total_cost_today=total_cost_today,
445
+ avg_response_time_today=avg_response_time_today,
446
+ success_rate_today=success_rate_today,
447
+ last_used=datetime.now(timezone.utc)
448
+ )
449
+
450
+ if self.storage_backend == "supabase":
451
+ return self._update_snapshot_supabase(snapshot)
452
+ elif self.storage_backend == "memory":
453
+ return self._update_snapshot_memory(snapshot)
454
+ else:
455
+ return self._update_snapshot_file(snapshot)
456
+
457
+ def get_model_snapshots(
458
+ self,
459
+ model_id: Optional[str] = None,
460
+ provider: Optional[str] = None,
461
+ days: int = 7
462
+ ) -> List[ModelUsageSnapshot]:
463
+ """Get model usage snapshots"""
464
+ if self.storage_backend == "supabase":
465
+ return self._get_snapshots_supabase(model_id, provider, days)
466
+ elif self.storage_backend == "memory":
467
+ return self._get_snapshots_memory(model_id, provider, days)
468
+ else:
469
+ return self._get_snapshots_file(model_id, provider, days)
470
+
471
+ def get_top_models(self, metric: str = "daily_requests", limit: int = 10) -> List[Dict[str, Any]]:
472
+ """Get top models by specified metric"""
473
+ snapshots = self.get_model_snapshots(days=1) # Get latest snapshots
474
+
475
+ if not snapshots:
476
+ return []
477
+
478
+ # Sort by the specified metric
479
+ valid_metrics = ["hourly_requests", "daily_requests", "weekly_requests", "monthly_requests",
480
+ "total_tokens_today", "total_cost_today"]
481
+
482
+ if metric not in valid_metrics:
483
+ metric = "daily_requests"
484
+
485
+ sorted_snapshots = sorted(
486
+ snapshots,
487
+ key=lambda x: getattr(x, metric, 0),
488
+ reverse=True
489
+ )[:limit]
490
+
491
+ return [
492
+ {
493
+ "model_id": s.model_id,
494
+ "provider": s.provider,
495
+ "metric_value": getattr(s, metric, 0),
496
+ "daily_requests": s.daily_requests,
497
+ "total_cost_today": s.total_cost_today,
498
+ "success_rate_today": s.success_rate_today,
499
+ "last_used": s.last_used.isoformat() if s.last_used else None
500
+ }
501
+ for s in sorted_snapshots
502
+ ]
503
+
504
+ # Cleanup and Maintenance Methods
505
+
506
+ def cleanup_old_requests(self, days: int = 30) -> int:
507
+ """Clean up old inference requests"""
508
+ cutoff_date = datetime.now(timezone.utc) - timedelta(days=days)
509
+
510
+ if self.storage_backend == "supabase":
511
+ return self._cleanup_requests_supabase(cutoff_date)
512
+ elif self.storage_backend == "memory":
513
+ return self._cleanup_requests_memory(cutoff_date)
514
+ else:
515
+ return self._cleanup_requests_file(cutoff_date)
516
+
517
+ def cleanup_old_statistics(self, days: int = 90) -> int:
518
+ """Clean up old usage statistics"""
519
+ cutoff_date = datetime.now(timezone.utc) - timedelta(days=days)
520
+
521
+ if self.storage_backend == "supabase":
522
+ return self._cleanup_stats_supabase(cutoff_date)
523
+ elif self.storage_backend == "memory":
524
+ return self._cleanup_stats_memory(cutoff_date)
525
+ else:
526
+ return self._cleanup_stats_file(cutoff_date)
527
+
528
+ # Backend-specific implementations
529
+
530
+ def _create_request_file(self, request: InferenceRequest) -> str:
531
+ """Create request in file system"""
532
+ try:
533
+ request_file = self.data_dir / "requests" / f"{request.request_id}.json"
534
+ request_data = asdict(request)
535
+
536
+ # Convert datetime objects to ISO strings
537
+ for key in ['created_at', 'started_at', 'completed_at']:
538
+ if request_data[key] and isinstance(request_data[key], datetime):
539
+ request_data[key] = request_data[key].isoformat()
540
+
541
+ with open(request_file, 'w') as f:
542
+ json.dump(request_data, f, indent=2, ensure_ascii=False)
543
+
544
+ return request.request_id
545
+ except Exception as e:
546
+ logger.error(f"Failed to create request in file system: {e}")
547
+ raise
548
+
549
+ def _create_request_memory(self, request: InferenceRequest) -> str:
550
+ """Create request in memory"""
551
+ self.requests[request.request_id] = request
552
+ return request.request_id
553
+
554
+ def _update_request_file(self, request_id: str, updates: Dict[str, Any]) -> bool:
555
+ """Update request in file system"""
556
+ try:
557
+ request_file = self.data_dir / "requests" / f"{request_id}.json"
558
+ if not request_file.exists():
559
+ return False
560
+
561
+ with open(request_file, 'r') as f:
562
+ request_data = json.load(f)
563
+
564
+ request_data.update(updates)
565
+
566
+ with open(request_file, 'w') as f:
567
+ json.dump(request_data, f, indent=2, ensure_ascii=False)
568
+
569
+ return True
570
+ except Exception as e:
571
+ logger.error(f"Failed to update request in file system: {e}")
572
+ return False
573
+
574
+ def _update_request_memory(self, request_id: str, updates: Dict[str, Any]) -> bool:
575
+ """Update request in memory"""
576
+ if request_id not in self.requests:
577
+ return False
578
+
579
+ request_dict = asdict(self.requests[request_id])
580
+ request_dict.update(updates)
581
+
582
+ # Convert datetime strings back to datetime objects if needed
583
+ for key in ['created_at', 'started_at', 'completed_at']:
584
+ if key in request_dict and isinstance(request_dict[key], str):
585
+ request_dict[key] = datetime.fromisoformat(request_dict[key])
586
+
587
+ self.requests[request_id] = InferenceRequest(**request_dict)
588
+ return True
589
+
590
+ def _get_request_file(self, request_id: str) -> Optional[InferenceRequest]:
591
+ """Get request from file system"""
592
+ try:
593
+ request_file = self.data_dir / "requests" / f"{request_id}.json"
594
+ if not request_file.exists():
595
+ return None
596
+
597
+ with open(request_file, 'r') as f:
598
+ request_data = json.load(f)
599
+
600
+ # Convert ISO strings back to datetime objects
601
+ for key in ['created_at', 'started_at', 'completed_at']:
602
+ if request_data[key]:
603
+ request_data[key] = datetime.fromisoformat(request_data[key])
604
+
605
+ return InferenceRequest(**request_data)
606
+ except Exception as e:
607
+ logger.error(f"Failed to get request from file system: {e}")
608
+ return None
609
+
610
+ def _get_request_memory(self, request_id: str) -> Optional[InferenceRequest]:
611
+ """Get request from memory"""
612
+ return self.requests.get(request_id)
613
+
614
+ def _list_requests_file(
615
+ self, service_type: Optional[str], model_id: Optional[str],
616
+ user_id: Optional[str], status: Optional[str], hours: int, limit: int
617
+ ) -> List[InferenceRequest]:
618
+ """List requests from file system"""
619
+ try:
620
+ requests = []
621
+ requests_dir = self.data_dir / "requests"
622
+ cutoff_time = datetime.now(timezone.utc) - timedelta(hours=hours)
623
+
624
+ for request_file in requests_dir.glob("*.json"):
625
+ with open(request_file, 'r') as f:
626
+ request_data = json.load(f)
627
+
628
+ # Convert datetime fields
629
+ for key in ['created_at', 'started_at', 'completed_at']:
630
+ if request_data[key]:
631
+ request_data[key] = datetime.fromisoformat(request_data[key])
632
+
633
+ request = InferenceRequest(**request_data)
634
+
635
+ # Apply filters
636
+ if request.created_at < cutoff_time:
637
+ continue
638
+ if service_type and request.service_type != service_type:
639
+ continue
640
+ if model_id and request.model_id != model_id:
641
+ continue
642
+ if user_id and request.user_id != user_id:
643
+ continue
644
+ if status and request.status != status:
645
+ continue
646
+
647
+ requests.append(request)
648
+
649
+ if len(requests) >= limit:
650
+ break
651
+
652
+ return sorted(requests, key=lambda x: x.created_at, reverse=True)
653
+ except Exception as e:
654
+ logger.error(f"Failed to list requests from file system: {e}")
655
+ return []
656
+
657
+ def _list_requests_memory(
658
+ self, service_type: Optional[str], model_id: Optional[str],
659
+ user_id: Optional[str], status: Optional[str], hours: int, limit: int
660
+ ) -> List[InferenceRequest]:
661
+ """List requests from memory"""
662
+ cutoff_time = datetime.now(timezone.utc) - timedelta(hours=hours)
663
+ requests = []
664
+
665
+ for request in self.requests.values():
666
+ # Apply filters
667
+ if request.created_at < cutoff_time:
668
+ continue
669
+ if service_type and request.service_type != service_type:
670
+ continue
671
+ if model_id and request.model_id != model_id:
672
+ continue
673
+ if user_id and request.user_id != user_id:
674
+ continue
675
+ if status and request.status != status:
676
+ continue
677
+
678
+ requests.append(request)
679
+
680
+ if len(requests) >= limit:
681
+ break
682
+
683
+ return sorted(requests, key=lambda x: x.created_at, reverse=True)
684
+
685
+ # Simplified placeholder implementations for statistics and snapshots
686
+ def _record_stats_file(self, stats: UsageStatistics) -> str:
687
+ """Record statistics in file system"""
688
+ try:
689
+ stats_file = self.data_dir / "statistics" / f"{stats.stat_id}.json"
690
+ stats_data = asdict(stats)
691
+
692
+ # Convert datetime objects to ISO strings
693
+ for key in ['period_start', 'period_end', 'created_at']:
694
+ if stats_data[key] and isinstance(stats_data[key], datetime):
695
+ stats_data[key] = stats_data[key].isoformat()
696
+
697
+ with open(stats_file, 'w') as f:
698
+ json.dump(stats_data, f, indent=2, ensure_ascii=False)
699
+
700
+ return stats.stat_id
701
+ except Exception as e:
702
+ logger.error(f"Failed to record statistics in file system: {e}")
703
+ raise
704
+
705
+ def _record_stats_memory(self, stats: UsageStatistics) -> str:
706
+ """Record statistics in memory"""
707
+ self.statistics[stats.stat_id] = stats
708
+ return stats.stat_id
709
+
710
+ def _update_snapshot_file(self, snapshot: ModelUsageSnapshot) -> str:
711
+ """Update snapshot in file system"""
712
+ try:
713
+ snapshot_file = self.data_dir / "snapshots" / f"{snapshot.snapshot_id}.json"
714
+ snapshot_data = asdict(snapshot)
715
+
716
+ # Convert datetime objects to ISO strings
717
+ for key in ['snapshot_time', 'last_used']:
718
+ if snapshot_data[key] and isinstance(snapshot_data[key], datetime):
719
+ snapshot_data[key] = snapshot_data[key].isoformat()
720
+
721
+ with open(snapshot_file, 'w') as f:
722
+ json.dump(snapshot_data, f, indent=2, ensure_ascii=False)
723
+
724
+ return snapshot.snapshot_id
725
+ except Exception as e:
726
+ logger.error(f"Failed to update snapshot in file system: {e}")
727
+ raise
728
+
729
+ def _update_snapshot_memory(self, snapshot: ModelUsageSnapshot) -> str:
730
+ """Update snapshot in memory"""
731
+ self.snapshots[snapshot.snapshot_id] = snapshot
732
+ return snapshot.snapshot_id
733
+
734
+ # Cleanup implementations
735
+ def _cleanup_requests_file(self, cutoff_date: datetime) -> int:
736
+ """Cleanup old requests from file system"""
737
+ count = 0
738
+ try:
739
+ requests_dir = self.data_dir / "requests"
740
+ for request_file in requests_dir.glob("*.json"):
741
+ if request_file.stat().st_mtime < cutoff_date.timestamp():
742
+ request_file.unlink()
743
+ count += 1
744
+ except Exception as e:
745
+ logger.error(f"Failed to cleanup requests from file system: {e}")
746
+ return count
747
+
748
+ def _cleanup_requests_memory(self, cutoff_date: datetime) -> int:
749
+ """Cleanup old requests from memory"""
750
+ count = 0
751
+ to_remove = []
752
+ for request_id, request in self.requests.items():
753
+ if request.created_at < cutoff_date:
754
+ to_remove.append(request_id)
755
+
756
+ for request_id in to_remove:
757
+ del self.requests[request_id]
758
+ count += 1
759
+
760
+ return count
761
+
762
+ # Placeholder implementations for Supabase backend
763
+ def _create_request_supabase(self, request: InferenceRequest) -> str:
764
+ return request.request_id # Implementation needed
765
+
766
+ def _update_request_supabase(self, request_id: str, updates: Dict[str, Any]) -> bool:
767
+ return False # Implementation needed
768
+
769
+ def _get_request_supabase(self, request_id: str) -> Optional[InferenceRequest]:
770
+ return None # Implementation needed
771
+
772
+ def _list_requests_supabase(self, service_type, model_id, user_id, status, hours, limit) -> List[InferenceRequest]:
773
+ return [] # Implementation needed
774
+
775
+ def _record_stats_supabase(self, stats: UsageStatistics) -> str:
776
+ return stats.stat_id # Implementation needed
777
+
778
+ def _get_stats_supabase(self, service_type, model_id, user_id, days, limit) -> List[UsageStatistics]:
779
+ return [] # Implementation needed
780
+
781
+ def _get_stats_memory(self, service_type, model_id, user_id, days, limit) -> List[UsageStatistics]:
782
+ return list(self.statistics.values())[:limit]
783
+
784
+ def _get_stats_file(self, service_type, model_id, user_id, days, limit) -> List[UsageStatistics]:
785
+ return [] # Implementation needed
786
+
787
+ def _update_snapshot_supabase(self, snapshot: ModelUsageSnapshot) -> str:
788
+ return snapshot.snapshot_id # Implementation needed
789
+
790
+ def _get_snapshots_supabase(self, model_id, provider, days) -> List[ModelUsageSnapshot]:
791
+ return [] # Implementation needed
792
+
793
+ def _get_snapshots_memory(self, model_id, provider, days) -> List[ModelUsageSnapshot]:
794
+ return list(self.snapshots.values())
795
+
796
+ def _get_snapshots_file(self, model_id, provider, days) -> List[ModelUsageSnapshot]:
797
+ return [] # Implementation needed
798
+
799
+ def _cleanup_requests_supabase(self, cutoff_date: datetime) -> int:
800
+ return 0 # Implementation needed
801
+
802
+ def _cleanup_stats_supabase(self, cutoff_date: datetime) -> int:
803
+ return 0 # Implementation needed
804
+
805
+ def _cleanup_stats_memory(self, cutoff_date: datetime) -> int:
806
+ count = 0
807
+ to_remove = []
808
+ for stat_id, stat in self.statistics.items():
809
+ if stat.created_at < cutoff_date:
810
+ to_remove.append(stat_id)
811
+
812
+ for stat_id in to_remove:
813
+ del self.statistics[stat_id]
814
+ count += 1
815
+
816
+ return count
817
+
818
+ def _cleanup_stats_file(self, cutoff_date: datetime) -> int:
819
+ count = 0
820
+ try:
821
+ stats_dir = self.data_dir / "statistics"
822
+ for stats_file in stats_dir.glob("*.json"):
823
+ if stats_file.stat().st_mtime < cutoff_date.timestamp():
824
+ stats_file.unlink()
825
+ count += 1
826
+ except Exception as e:
827
+ logger.error(f"Failed to cleanup statistics from file system: {e}")
828
+ return count