isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,828 @@
|
|
1
|
+
"""
|
2
|
+
Inference Repository - Data persistence layer for inference operations
|
3
|
+
|
4
|
+
Provides standardized data access for inference requests, usage statistics, and history
|
5
|
+
following the ISA Model architecture pattern.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
import json
|
10
|
+
import uuid
|
11
|
+
from datetime import datetime, timezone, timedelta
|
12
|
+
from typing import Dict, List, Optional, Any, Union
|
13
|
+
from pathlib import Path
|
14
|
+
from dataclasses import dataclass, asdict
|
15
|
+
from enum import Enum
|
16
|
+
|
17
|
+
try:
|
18
|
+
# Try to import Supabase for centralized data storage
|
19
|
+
from ...core.database.supabase_client import get_supabase_client
|
20
|
+
SUPABASE_AVAILABLE = True
|
21
|
+
except ImportError:
|
22
|
+
SUPABASE_AVAILABLE = False
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
class InferenceStatus(str, Enum):
|
27
|
+
"""Inference status enumeration"""
|
28
|
+
PENDING = "pending"
|
29
|
+
PROCESSING = "processing"
|
30
|
+
COMPLETED = "completed"
|
31
|
+
FAILED = "failed"
|
32
|
+
TIMEOUT = "timeout"
|
33
|
+
CANCELLED = "cancelled"
|
34
|
+
|
35
|
+
class ServiceType(str, Enum):
|
36
|
+
"""Service type enumeration"""
|
37
|
+
LLM = "llm"
|
38
|
+
VISION = "vision"
|
39
|
+
EMBEDDING = "embedding"
|
40
|
+
TTS = "tts"
|
41
|
+
STT = "stt"
|
42
|
+
IMAGE_GEN = "image_gen"
|
43
|
+
AUDIO = "audio"
|
44
|
+
RERANK = "rerank"
|
45
|
+
OCR = "ocr"
|
46
|
+
|
47
|
+
@dataclass
|
48
|
+
class InferenceRequest:
|
49
|
+
"""Inference request record"""
|
50
|
+
request_id: str
|
51
|
+
service_type: str
|
52
|
+
model_id: str
|
53
|
+
provider: str
|
54
|
+
endpoint: str
|
55
|
+
request_data: Dict[str, Any]
|
56
|
+
status: str = InferenceStatus.PENDING
|
57
|
+
created_at: datetime = None
|
58
|
+
started_at: Optional[datetime] = None
|
59
|
+
completed_at: Optional[datetime] = None
|
60
|
+
user_id: Optional[str] = None
|
61
|
+
session_id: Optional[str] = None
|
62
|
+
ip_address: Optional[str] = None
|
63
|
+
user_agent: Optional[str] = None
|
64
|
+
response_data: Optional[Dict[str, Any]] = None
|
65
|
+
error_message: Optional[str] = None
|
66
|
+
execution_time_ms: Optional[int] = None
|
67
|
+
tokens_used: Optional[int] = None
|
68
|
+
cost_usd: Optional[float] = None
|
69
|
+
metadata: Optional[Dict[str, Any]] = None
|
70
|
+
|
71
|
+
def __post_init__(self):
|
72
|
+
if self.created_at is None:
|
73
|
+
self.created_at = datetime.now(timezone.utc)
|
74
|
+
|
75
|
+
@dataclass
|
76
|
+
class UsageStatistics:
|
77
|
+
"""Usage statistics record"""
|
78
|
+
stat_id: str
|
79
|
+
period_start: datetime
|
80
|
+
period_end: datetime
|
81
|
+
service_type: str
|
82
|
+
model_id: Optional[str] = None
|
83
|
+
provider: Optional[str] = None
|
84
|
+
user_id: Optional[str] = None
|
85
|
+
total_requests: int = 0
|
86
|
+
successful_requests: int = 0
|
87
|
+
failed_requests: int = 0
|
88
|
+
total_tokens: int = 0
|
89
|
+
total_cost_usd: float = 0.0
|
90
|
+
avg_response_time_ms: float = 0.0
|
91
|
+
p95_response_time_ms: float = 0.0
|
92
|
+
requests_per_hour: float = 0.0
|
93
|
+
error_rate: float = 0.0
|
94
|
+
created_at: datetime = None
|
95
|
+
|
96
|
+
def __post_init__(self):
|
97
|
+
if self.created_at is None:
|
98
|
+
self.created_at = datetime.now(timezone.utc)
|
99
|
+
|
100
|
+
@dataclass
|
101
|
+
class ModelUsageSnapshot:
|
102
|
+
"""Model usage snapshot for quick analytics"""
|
103
|
+
snapshot_id: str
|
104
|
+
model_id: str
|
105
|
+
provider: str
|
106
|
+
snapshot_time: datetime
|
107
|
+
hourly_requests: int = 0
|
108
|
+
daily_requests: int = 0
|
109
|
+
weekly_requests: int = 0
|
110
|
+
monthly_requests: int = 0
|
111
|
+
total_tokens_today: int = 0
|
112
|
+
total_cost_today: float = 0.0
|
113
|
+
avg_response_time_today: float = 0.0
|
114
|
+
success_rate_today: float = 100.0
|
115
|
+
last_used: Optional[datetime] = None
|
116
|
+
|
117
|
+
def __post_init__(self):
|
118
|
+
if self.snapshot_time is None:
|
119
|
+
self.snapshot_time = datetime.now(timezone.utc)
|
120
|
+
|
121
|
+
class InferenceRepository:
|
122
|
+
"""
|
123
|
+
Repository for inference data persistence
|
124
|
+
|
125
|
+
Supports multiple backend storage options:
|
126
|
+
1. Supabase (preferred for centralized storage)
|
127
|
+
2. Local file system (fallback for development)
|
128
|
+
3. In-memory storage (for testing)
|
129
|
+
"""
|
130
|
+
|
131
|
+
def __init__(self, storage_backend: str = "auto", **kwargs):
|
132
|
+
"""
|
133
|
+
Initialize inference repository
|
134
|
+
|
135
|
+
Args:
|
136
|
+
storage_backend: "supabase", "file", "memory", or "auto"
|
137
|
+
**kwargs: Backend-specific configuration
|
138
|
+
"""
|
139
|
+
self.storage_backend = self._determine_backend(storage_backend)
|
140
|
+
self.config = kwargs
|
141
|
+
|
142
|
+
# Initialize storage backend
|
143
|
+
if self.storage_backend == "supabase":
|
144
|
+
self._init_supabase()
|
145
|
+
elif self.storage_backend == "memory":
|
146
|
+
self._init_memory()
|
147
|
+
else: # file system fallback
|
148
|
+
self._init_file_system()
|
149
|
+
|
150
|
+
logger.info(f"Inference repository initialized with {self.storage_backend} backend")
|
151
|
+
|
152
|
+
def _determine_backend(self, preference: str) -> str:
|
153
|
+
"""Determine the best available storage backend"""
|
154
|
+
if preference == "supabase" and SUPABASE_AVAILABLE:
|
155
|
+
return "supabase"
|
156
|
+
elif preference in ["supabase", "file", "memory"]:
|
157
|
+
return preference
|
158
|
+
|
159
|
+
# Auto-select best available backend
|
160
|
+
if SUPABASE_AVAILABLE:
|
161
|
+
return "supabase"
|
162
|
+
else:
|
163
|
+
return "file"
|
164
|
+
|
165
|
+
def _init_supabase(self):
|
166
|
+
"""Initialize Supabase backend"""
|
167
|
+
try:
|
168
|
+
self.supabase_client = get_supabase_client()
|
169
|
+
self._ensure_supabase_tables()
|
170
|
+
logger.info("Supabase backend initialized for inference")
|
171
|
+
except Exception as e:
|
172
|
+
logger.error(f"Failed to initialize Supabase backend: {e}")
|
173
|
+
self.storage_backend = "file"
|
174
|
+
self._init_file_system()
|
175
|
+
|
176
|
+
def _init_file_system(self):
|
177
|
+
"""Initialize file system backend"""
|
178
|
+
self.data_dir = Path(self.config.get("data_dir", "./inference_data"))
|
179
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
180
|
+
|
181
|
+
# Create subdirectories
|
182
|
+
(self.data_dir / "requests").mkdir(exist_ok=True)
|
183
|
+
(self.data_dir / "statistics").mkdir(exist_ok=True)
|
184
|
+
(self.data_dir / "snapshots").mkdir(exist_ok=True)
|
185
|
+
|
186
|
+
logger.info(f"File system backend initialized: {self.data_dir}")
|
187
|
+
|
188
|
+
def _init_memory(self):
|
189
|
+
"""Initialize in-memory backend for testing"""
|
190
|
+
self.requests = {}
|
191
|
+
self.statistics = {}
|
192
|
+
self.snapshots = {}
|
193
|
+
logger.info("In-memory backend initialized for inference")
|
194
|
+
|
195
|
+
def _ensure_supabase_tables(self):
|
196
|
+
"""Ensure required Supabase tables exist"""
|
197
|
+
try:
|
198
|
+
self.supabase_client.table("inference_requests").select("request_id").limit(1).execute()
|
199
|
+
self.supabase_client.table("usage_statistics").select("stat_id").limit(1).execute()
|
200
|
+
self.supabase_client.table("model_usage_snapshots").select("snapshot_id").limit(1).execute()
|
201
|
+
except Exception as e:
|
202
|
+
logger.warning(f"Some inference tables may not exist in Supabase: {e}")
|
203
|
+
|
204
|
+
# Request Management Methods
|
205
|
+
|
206
|
+
def create_inference_request(
|
207
|
+
self,
|
208
|
+
service_type: str,
|
209
|
+
model_id: str,
|
210
|
+
provider: str,
|
211
|
+
endpoint: str,
|
212
|
+
request_data: Dict[str, Any],
|
213
|
+
user_id: Optional[str] = None,
|
214
|
+
session_id: Optional[str] = None,
|
215
|
+
ip_address: Optional[str] = None,
|
216
|
+
user_agent: Optional[str] = None,
|
217
|
+
metadata: Optional[Dict[str, Any]] = None
|
218
|
+
) -> str:
|
219
|
+
"""Create a new inference request record"""
|
220
|
+
request_id = f"inf_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
|
221
|
+
|
222
|
+
request = InferenceRequest(
|
223
|
+
request_id=request_id,
|
224
|
+
service_type=service_type,
|
225
|
+
model_id=model_id,
|
226
|
+
provider=provider,
|
227
|
+
endpoint=endpoint,
|
228
|
+
request_data=request_data,
|
229
|
+
user_id=user_id,
|
230
|
+
session_id=session_id,
|
231
|
+
ip_address=ip_address,
|
232
|
+
user_agent=user_agent,
|
233
|
+
metadata=metadata
|
234
|
+
)
|
235
|
+
|
236
|
+
if self.storage_backend == "supabase":
|
237
|
+
return self._create_request_supabase(request)
|
238
|
+
elif self.storage_backend == "memory":
|
239
|
+
return self._create_request_memory(request)
|
240
|
+
else:
|
241
|
+
return self._create_request_file(request)
|
242
|
+
|
243
|
+
def update_inference_status(
|
244
|
+
self,
|
245
|
+
request_id: str,
|
246
|
+
status: str,
|
247
|
+
response_data: Optional[Dict[str, Any]] = None,
|
248
|
+
error_message: Optional[str] = None,
|
249
|
+
execution_time_ms: Optional[int] = None,
|
250
|
+
tokens_used: Optional[int] = None,
|
251
|
+
cost_usd: Optional[float] = None,
|
252
|
+
additional_updates: Optional[Dict[str, Any]] = None
|
253
|
+
) -> bool:
|
254
|
+
"""Update inference request status and results"""
|
255
|
+
updates = {"status": status}
|
256
|
+
|
257
|
+
if status == InferenceStatus.PROCESSING:
|
258
|
+
updates["started_at"] = datetime.now(timezone.utc).isoformat()
|
259
|
+
elif status in [InferenceStatus.COMPLETED, InferenceStatus.FAILED, InferenceStatus.TIMEOUT]:
|
260
|
+
updates["completed_at"] = datetime.now(timezone.utc).isoformat()
|
261
|
+
|
262
|
+
if response_data:
|
263
|
+
updates["response_data"] = response_data
|
264
|
+
if error_message:
|
265
|
+
updates["error_message"] = error_message
|
266
|
+
if execution_time_ms:
|
267
|
+
updates["execution_time_ms"] = execution_time_ms
|
268
|
+
if tokens_used:
|
269
|
+
updates["tokens_used"] = tokens_used
|
270
|
+
if cost_usd:
|
271
|
+
updates["cost_usd"] = cost_usd
|
272
|
+
|
273
|
+
if additional_updates:
|
274
|
+
updates.update(additional_updates)
|
275
|
+
|
276
|
+
if self.storage_backend == "supabase":
|
277
|
+
return self._update_request_supabase(request_id, updates)
|
278
|
+
elif self.storage_backend == "memory":
|
279
|
+
return self._update_request_memory(request_id, updates)
|
280
|
+
else:
|
281
|
+
return self._update_request_file(request_id, updates)
|
282
|
+
|
283
|
+
def get_inference_request(self, request_id: str) -> Optional[InferenceRequest]:
|
284
|
+
"""Get inference request by ID"""
|
285
|
+
if self.storage_backend == "supabase":
|
286
|
+
return self._get_request_supabase(request_id)
|
287
|
+
elif self.storage_backend == "memory":
|
288
|
+
return self._get_request_memory(request_id)
|
289
|
+
else:
|
290
|
+
return self._get_request_file(request_id)
|
291
|
+
|
292
|
+
def list_recent_requests(
|
293
|
+
self,
|
294
|
+
service_type: Optional[str] = None,
|
295
|
+
model_id: Optional[str] = None,
|
296
|
+
user_id: Optional[str] = None,
|
297
|
+
status: Optional[str] = None,
|
298
|
+
hours: int = 24,
|
299
|
+
limit: int = 100
|
300
|
+
) -> List[InferenceRequest]:
|
301
|
+
"""List recent inference requests with optional filtering"""
|
302
|
+
if self.storage_backend == "supabase":
|
303
|
+
return self._list_requests_supabase(service_type, model_id, user_id, status, hours, limit)
|
304
|
+
elif self.storage_backend == "memory":
|
305
|
+
return self._list_requests_memory(service_type, model_id, user_id, status, hours, limit)
|
306
|
+
else:
|
307
|
+
return self._list_requests_file(service_type, model_id, user_id, status, hours, limit)
|
308
|
+
|
309
|
+
# Usage Statistics Methods
|
310
|
+
|
311
|
+
def record_usage_statistics(
|
312
|
+
self,
|
313
|
+
period_start: datetime,
|
314
|
+
period_end: datetime,
|
315
|
+
service_type: str,
|
316
|
+
model_id: Optional[str] = None,
|
317
|
+
provider: Optional[str] = None,
|
318
|
+
user_id: Optional[str] = None,
|
319
|
+
total_requests: int = 0,
|
320
|
+
successful_requests: int = 0,
|
321
|
+
failed_requests: int = 0,
|
322
|
+
total_tokens: int = 0,
|
323
|
+
total_cost_usd: float = 0.0,
|
324
|
+
avg_response_time_ms: float = 0.0,
|
325
|
+
p95_response_time_ms: float = 0.0,
|
326
|
+
requests_per_hour: float = 0.0,
|
327
|
+
error_rate: float = 0.0
|
328
|
+
) -> str:
|
329
|
+
"""Record usage statistics for a time period"""
|
330
|
+
stat_id = f"stat_{period_start.strftime('%Y%m%d_%H')}_{uuid.uuid4().hex[:6]}"
|
331
|
+
|
332
|
+
stats = UsageStatistics(
|
333
|
+
stat_id=stat_id,
|
334
|
+
period_start=period_start,
|
335
|
+
period_end=period_end,
|
336
|
+
service_type=service_type,
|
337
|
+
model_id=model_id,
|
338
|
+
provider=provider,
|
339
|
+
user_id=user_id,
|
340
|
+
total_requests=total_requests,
|
341
|
+
successful_requests=successful_requests,
|
342
|
+
failed_requests=failed_requests,
|
343
|
+
total_tokens=total_tokens,
|
344
|
+
total_cost_usd=total_cost_usd,
|
345
|
+
avg_response_time_ms=avg_response_time_ms,
|
346
|
+
p95_response_time_ms=p95_response_time_ms,
|
347
|
+
requests_per_hour=requests_per_hour,
|
348
|
+
error_rate=error_rate
|
349
|
+
)
|
350
|
+
|
351
|
+
if self.storage_backend == "supabase":
|
352
|
+
return self._record_stats_supabase(stats)
|
353
|
+
elif self.storage_backend == "memory":
|
354
|
+
return self._record_stats_memory(stats)
|
355
|
+
else:
|
356
|
+
return self._record_stats_file(stats)
|
357
|
+
|
358
|
+
def get_usage_statistics(
|
359
|
+
self,
|
360
|
+
service_type: Optional[str] = None,
|
361
|
+
model_id: Optional[str] = None,
|
362
|
+
user_id: Optional[str] = None,
|
363
|
+
days: int = 7,
|
364
|
+
limit: int = 100
|
365
|
+
) -> List[UsageStatistics]:
|
366
|
+
"""Get usage statistics for specified period"""
|
367
|
+
if self.storage_backend == "supabase":
|
368
|
+
return self._get_stats_supabase(service_type, model_id, user_id, days, limit)
|
369
|
+
elif self.storage_backend == "memory":
|
370
|
+
return self._get_stats_memory(service_type, model_id, user_id, days, limit)
|
371
|
+
else:
|
372
|
+
return self._get_stats_file(service_type, model_id, user_id, days, limit)
|
373
|
+
|
374
|
+
def get_aggregated_usage(
|
375
|
+
self,
|
376
|
+
service_type: Optional[str] = None,
|
377
|
+
model_id: Optional[str] = None,
|
378
|
+
user_id: Optional[str] = None,
|
379
|
+
days: int = 30
|
380
|
+
) -> Dict[str, Any]:
|
381
|
+
"""Get aggregated usage statistics"""
|
382
|
+
stats = self.get_usage_statistics(service_type, model_id, user_id, days, 1000)
|
383
|
+
|
384
|
+
if not stats:
|
385
|
+
return {
|
386
|
+
"total_requests": 0,
|
387
|
+
"total_cost_usd": 0.0,
|
388
|
+
"total_tokens": 0,
|
389
|
+
"avg_response_time_ms": 0.0,
|
390
|
+
"success_rate": 100.0,
|
391
|
+
"period_days": days
|
392
|
+
}
|
393
|
+
|
394
|
+
total_requests = sum(s.total_requests for s in stats)
|
395
|
+
total_successful = sum(s.successful_requests for s in stats)
|
396
|
+
total_cost = sum(s.total_cost_usd for s in stats)
|
397
|
+
total_tokens = sum(s.total_tokens for s in stats)
|
398
|
+
|
399
|
+
# Weighted average for response time
|
400
|
+
weighted_response_times = [s.avg_response_time_ms * s.total_requests for s in stats if s.total_requests > 0]
|
401
|
+
avg_response_time = sum(weighted_response_times) / total_requests if total_requests > 0 else 0.0
|
402
|
+
|
403
|
+
success_rate = (total_successful / total_requests * 100) if total_requests > 0 else 100.0
|
404
|
+
|
405
|
+
return {
|
406
|
+
"total_requests": total_requests,
|
407
|
+
"successful_requests": total_successful,
|
408
|
+
"total_cost_usd": round(total_cost, 4),
|
409
|
+
"total_tokens": total_tokens,
|
410
|
+
"avg_response_time_ms": round(avg_response_time, 2),
|
411
|
+
"success_rate": round(success_rate, 2),
|
412
|
+
"period_days": days,
|
413
|
+
"stats_count": len(stats)
|
414
|
+
}
|
415
|
+
|
416
|
+
# Model Usage Snapshots Methods
|
417
|
+
|
418
|
+
def update_model_snapshot(
|
419
|
+
self,
|
420
|
+
model_id: str,
|
421
|
+
provider: str,
|
422
|
+
hourly_requests: int = 0,
|
423
|
+
daily_requests: int = 0,
|
424
|
+
weekly_requests: int = 0,
|
425
|
+
monthly_requests: int = 0,
|
426
|
+
total_tokens_today: int = 0,
|
427
|
+
total_cost_today: float = 0.0,
|
428
|
+
avg_response_time_today: float = 0.0,
|
429
|
+
success_rate_today: float = 100.0
|
430
|
+
) -> str:
|
431
|
+
"""Update or create model usage snapshot"""
|
432
|
+
snapshot_id = f"snap_{model_id}_{provider}_{datetime.now().strftime('%Y%m%d')}"
|
433
|
+
|
434
|
+
snapshot = ModelUsageSnapshot(
|
435
|
+
snapshot_id=snapshot_id,
|
436
|
+
model_id=model_id,
|
437
|
+
provider=provider,
|
438
|
+
snapshot_time=datetime.now(timezone.utc),
|
439
|
+
hourly_requests=hourly_requests,
|
440
|
+
daily_requests=daily_requests,
|
441
|
+
weekly_requests=weekly_requests,
|
442
|
+
monthly_requests=monthly_requests,
|
443
|
+
total_tokens_today=total_tokens_today,
|
444
|
+
total_cost_today=total_cost_today,
|
445
|
+
avg_response_time_today=avg_response_time_today,
|
446
|
+
success_rate_today=success_rate_today,
|
447
|
+
last_used=datetime.now(timezone.utc)
|
448
|
+
)
|
449
|
+
|
450
|
+
if self.storage_backend == "supabase":
|
451
|
+
return self._update_snapshot_supabase(snapshot)
|
452
|
+
elif self.storage_backend == "memory":
|
453
|
+
return self._update_snapshot_memory(snapshot)
|
454
|
+
else:
|
455
|
+
return self._update_snapshot_file(snapshot)
|
456
|
+
|
457
|
+
def get_model_snapshots(
|
458
|
+
self,
|
459
|
+
model_id: Optional[str] = None,
|
460
|
+
provider: Optional[str] = None,
|
461
|
+
days: int = 7
|
462
|
+
) -> List[ModelUsageSnapshot]:
|
463
|
+
"""Get model usage snapshots"""
|
464
|
+
if self.storage_backend == "supabase":
|
465
|
+
return self._get_snapshots_supabase(model_id, provider, days)
|
466
|
+
elif self.storage_backend == "memory":
|
467
|
+
return self._get_snapshots_memory(model_id, provider, days)
|
468
|
+
else:
|
469
|
+
return self._get_snapshots_file(model_id, provider, days)
|
470
|
+
|
471
|
+
def get_top_models(self, metric: str = "daily_requests", limit: int = 10) -> List[Dict[str, Any]]:
|
472
|
+
"""Get top models by specified metric"""
|
473
|
+
snapshots = self.get_model_snapshots(days=1) # Get latest snapshots
|
474
|
+
|
475
|
+
if not snapshots:
|
476
|
+
return []
|
477
|
+
|
478
|
+
# Sort by the specified metric
|
479
|
+
valid_metrics = ["hourly_requests", "daily_requests", "weekly_requests", "monthly_requests",
|
480
|
+
"total_tokens_today", "total_cost_today"]
|
481
|
+
|
482
|
+
if metric not in valid_metrics:
|
483
|
+
metric = "daily_requests"
|
484
|
+
|
485
|
+
sorted_snapshots = sorted(
|
486
|
+
snapshots,
|
487
|
+
key=lambda x: getattr(x, metric, 0),
|
488
|
+
reverse=True
|
489
|
+
)[:limit]
|
490
|
+
|
491
|
+
return [
|
492
|
+
{
|
493
|
+
"model_id": s.model_id,
|
494
|
+
"provider": s.provider,
|
495
|
+
"metric_value": getattr(s, metric, 0),
|
496
|
+
"daily_requests": s.daily_requests,
|
497
|
+
"total_cost_today": s.total_cost_today,
|
498
|
+
"success_rate_today": s.success_rate_today,
|
499
|
+
"last_used": s.last_used.isoformat() if s.last_used else None
|
500
|
+
}
|
501
|
+
for s in sorted_snapshots
|
502
|
+
]
|
503
|
+
|
504
|
+
# Cleanup and Maintenance Methods
|
505
|
+
|
506
|
+
def cleanup_old_requests(self, days: int = 30) -> int:
|
507
|
+
"""Clean up old inference requests"""
|
508
|
+
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days)
|
509
|
+
|
510
|
+
if self.storage_backend == "supabase":
|
511
|
+
return self._cleanup_requests_supabase(cutoff_date)
|
512
|
+
elif self.storage_backend == "memory":
|
513
|
+
return self._cleanup_requests_memory(cutoff_date)
|
514
|
+
else:
|
515
|
+
return self._cleanup_requests_file(cutoff_date)
|
516
|
+
|
517
|
+
def cleanup_old_statistics(self, days: int = 90) -> int:
|
518
|
+
"""Clean up old usage statistics"""
|
519
|
+
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days)
|
520
|
+
|
521
|
+
if self.storage_backend == "supabase":
|
522
|
+
return self._cleanup_stats_supabase(cutoff_date)
|
523
|
+
elif self.storage_backend == "memory":
|
524
|
+
return self._cleanup_stats_memory(cutoff_date)
|
525
|
+
else:
|
526
|
+
return self._cleanup_stats_file(cutoff_date)
|
527
|
+
|
528
|
+
# Backend-specific implementations
|
529
|
+
|
530
|
+
def _create_request_file(self, request: InferenceRequest) -> str:
|
531
|
+
"""Create request in file system"""
|
532
|
+
try:
|
533
|
+
request_file = self.data_dir / "requests" / f"{request.request_id}.json"
|
534
|
+
request_data = asdict(request)
|
535
|
+
|
536
|
+
# Convert datetime objects to ISO strings
|
537
|
+
for key in ['created_at', 'started_at', 'completed_at']:
|
538
|
+
if request_data[key] and isinstance(request_data[key], datetime):
|
539
|
+
request_data[key] = request_data[key].isoformat()
|
540
|
+
|
541
|
+
with open(request_file, 'w') as f:
|
542
|
+
json.dump(request_data, f, indent=2, ensure_ascii=False)
|
543
|
+
|
544
|
+
return request.request_id
|
545
|
+
except Exception as e:
|
546
|
+
logger.error(f"Failed to create request in file system: {e}")
|
547
|
+
raise
|
548
|
+
|
549
|
+
def _create_request_memory(self, request: InferenceRequest) -> str:
|
550
|
+
"""Create request in memory"""
|
551
|
+
self.requests[request.request_id] = request
|
552
|
+
return request.request_id
|
553
|
+
|
554
|
+
def _update_request_file(self, request_id: str, updates: Dict[str, Any]) -> bool:
|
555
|
+
"""Update request in file system"""
|
556
|
+
try:
|
557
|
+
request_file = self.data_dir / "requests" / f"{request_id}.json"
|
558
|
+
if not request_file.exists():
|
559
|
+
return False
|
560
|
+
|
561
|
+
with open(request_file, 'r') as f:
|
562
|
+
request_data = json.load(f)
|
563
|
+
|
564
|
+
request_data.update(updates)
|
565
|
+
|
566
|
+
with open(request_file, 'w') as f:
|
567
|
+
json.dump(request_data, f, indent=2, ensure_ascii=False)
|
568
|
+
|
569
|
+
return True
|
570
|
+
except Exception as e:
|
571
|
+
logger.error(f"Failed to update request in file system: {e}")
|
572
|
+
return False
|
573
|
+
|
574
|
+
def _update_request_memory(self, request_id: str, updates: Dict[str, Any]) -> bool:
|
575
|
+
"""Update request in memory"""
|
576
|
+
if request_id not in self.requests:
|
577
|
+
return False
|
578
|
+
|
579
|
+
request_dict = asdict(self.requests[request_id])
|
580
|
+
request_dict.update(updates)
|
581
|
+
|
582
|
+
# Convert datetime strings back to datetime objects if needed
|
583
|
+
for key in ['created_at', 'started_at', 'completed_at']:
|
584
|
+
if key in request_dict and isinstance(request_dict[key], str):
|
585
|
+
request_dict[key] = datetime.fromisoformat(request_dict[key])
|
586
|
+
|
587
|
+
self.requests[request_id] = InferenceRequest(**request_dict)
|
588
|
+
return True
|
589
|
+
|
590
|
+
def _get_request_file(self, request_id: str) -> Optional[InferenceRequest]:
|
591
|
+
"""Get request from file system"""
|
592
|
+
try:
|
593
|
+
request_file = self.data_dir / "requests" / f"{request_id}.json"
|
594
|
+
if not request_file.exists():
|
595
|
+
return None
|
596
|
+
|
597
|
+
with open(request_file, 'r') as f:
|
598
|
+
request_data = json.load(f)
|
599
|
+
|
600
|
+
# Convert ISO strings back to datetime objects
|
601
|
+
for key in ['created_at', 'started_at', 'completed_at']:
|
602
|
+
if request_data[key]:
|
603
|
+
request_data[key] = datetime.fromisoformat(request_data[key])
|
604
|
+
|
605
|
+
return InferenceRequest(**request_data)
|
606
|
+
except Exception as e:
|
607
|
+
logger.error(f"Failed to get request from file system: {e}")
|
608
|
+
return None
|
609
|
+
|
610
|
+
def _get_request_memory(self, request_id: str) -> Optional[InferenceRequest]:
|
611
|
+
"""Get request from memory"""
|
612
|
+
return self.requests.get(request_id)
|
613
|
+
|
614
|
+
def _list_requests_file(
|
615
|
+
self, service_type: Optional[str], model_id: Optional[str],
|
616
|
+
user_id: Optional[str], status: Optional[str], hours: int, limit: int
|
617
|
+
) -> List[InferenceRequest]:
|
618
|
+
"""List requests from file system"""
|
619
|
+
try:
|
620
|
+
requests = []
|
621
|
+
requests_dir = self.data_dir / "requests"
|
622
|
+
cutoff_time = datetime.now(timezone.utc) - timedelta(hours=hours)
|
623
|
+
|
624
|
+
for request_file in requests_dir.glob("*.json"):
|
625
|
+
with open(request_file, 'r') as f:
|
626
|
+
request_data = json.load(f)
|
627
|
+
|
628
|
+
# Convert datetime fields
|
629
|
+
for key in ['created_at', 'started_at', 'completed_at']:
|
630
|
+
if request_data[key]:
|
631
|
+
request_data[key] = datetime.fromisoformat(request_data[key])
|
632
|
+
|
633
|
+
request = InferenceRequest(**request_data)
|
634
|
+
|
635
|
+
# Apply filters
|
636
|
+
if request.created_at < cutoff_time:
|
637
|
+
continue
|
638
|
+
if service_type and request.service_type != service_type:
|
639
|
+
continue
|
640
|
+
if model_id and request.model_id != model_id:
|
641
|
+
continue
|
642
|
+
if user_id and request.user_id != user_id:
|
643
|
+
continue
|
644
|
+
if status and request.status != status:
|
645
|
+
continue
|
646
|
+
|
647
|
+
requests.append(request)
|
648
|
+
|
649
|
+
if len(requests) >= limit:
|
650
|
+
break
|
651
|
+
|
652
|
+
return sorted(requests, key=lambda x: x.created_at, reverse=True)
|
653
|
+
except Exception as e:
|
654
|
+
logger.error(f"Failed to list requests from file system: {e}")
|
655
|
+
return []
|
656
|
+
|
657
|
+
def _list_requests_memory(
|
658
|
+
self, service_type: Optional[str], model_id: Optional[str],
|
659
|
+
user_id: Optional[str], status: Optional[str], hours: int, limit: int
|
660
|
+
) -> List[InferenceRequest]:
|
661
|
+
"""List requests from memory"""
|
662
|
+
cutoff_time = datetime.now(timezone.utc) - timedelta(hours=hours)
|
663
|
+
requests = []
|
664
|
+
|
665
|
+
for request in self.requests.values():
|
666
|
+
# Apply filters
|
667
|
+
if request.created_at < cutoff_time:
|
668
|
+
continue
|
669
|
+
if service_type and request.service_type != service_type:
|
670
|
+
continue
|
671
|
+
if model_id and request.model_id != model_id:
|
672
|
+
continue
|
673
|
+
if user_id and request.user_id != user_id:
|
674
|
+
continue
|
675
|
+
if status and request.status != status:
|
676
|
+
continue
|
677
|
+
|
678
|
+
requests.append(request)
|
679
|
+
|
680
|
+
if len(requests) >= limit:
|
681
|
+
break
|
682
|
+
|
683
|
+
return sorted(requests, key=lambda x: x.created_at, reverse=True)
|
684
|
+
|
685
|
+
# Simplified placeholder implementations for statistics and snapshots
|
686
|
+
def _record_stats_file(self, stats: UsageStatistics) -> str:
|
687
|
+
"""Record statistics in file system"""
|
688
|
+
try:
|
689
|
+
stats_file = self.data_dir / "statistics" / f"{stats.stat_id}.json"
|
690
|
+
stats_data = asdict(stats)
|
691
|
+
|
692
|
+
# Convert datetime objects to ISO strings
|
693
|
+
for key in ['period_start', 'period_end', 'created_at']:
|
694
|
+
if stats_data[key] and isinstance(stats_data[key], datetime):
|
695
|
+
stats_data[key] = stats_data[key].isoformat()
|
696
|
+
|
697
|
+
with open(stats_file, 'w') as f:
|
698
|
+
json.dump(stats_data, f, indent=2, ensure_ascii=False)
|
699
|
+
|
700
|
+
return stats.stat_id
|
701
|
+
except Exception as e:
|
702
|
+
logger.error(f"Failed to record statistics in file system: {e}")
|
703
|
+
raise
|
704
|
+
|
705
|
+
def _record_stats_memory(self, stats: UsageStatistics) -> str:
|
706
|
+
"""Record statistics in memory"""
|
707
|
+
self.statistics[stats.stat_id] = stats
|
708
|
+
return stats.stat_id
|
709
|
+
|
710
|
+
def _update_snapshot_file(self, snapshot: ModelUsageSnapshot) -> str:
|
711
|
+
"""Update snapshot in file system"""
|
712
|
+
try:
|
713
|
+
snapshot_file = self.data_dir / "snapshots" / f"{snapshot.snapshot_id}.json"
|
714
|
+
snapshot_data = asdict(snapshot)
|
715
|
+
|
716
|
+
# Convert datetime objects to ISO strings
|
717
|
+
for key in ['snapshot_time', 'last_used']:
|
718
|
+
if snapshot_data[key] and isinstance(snapshot_data[key], datetime):
|
719
|
+
snapshot_data[key] = snapshot_data[key].isoformat()
|
720
|
+
|
721
|
+
with open(snapshot_file, 'w') as f:
|
722
|
+
json.dump(snapshot_data, f, indent=2, ensure_ascii=False)
|
723
|
+
|
724
|
+
return snapshot.snapshot_id
|
725
|
+
except Exception as e:
|
726
|
+
logger.error(f"Failed to update snapshot in file system: {e}")
|
727
|
+
raise
|
728
|
+
|
729
|
+
def _update_snapshot_memory(self, snapshot: ModelUsageSnapshot) -> str:
|
730
|
+
"""Update snapshot in memory"""
|
731
|
+
self.snapshots[snapshot.snapshot_id] = snapshot
|
732
|
+
return snapshot.snapshot_id
|
733
|
+
|
734
|
+
# Cleanup implementations
|
735
|
+
def _cleanup_requests_file(self, cutoff_date: datetime) -> int:
|
736
|
+
"""Cleanup old requests from file system"""
|
737
|
+
count = 0
|
738
|
+
try:
|
739
|
+
requests_dir = self.data_dir / "requests"
|
740
|
+
for request_file in requests_dir.glob("*.json"):
|
741
|
+
if request_file.stat().st_mtime < cutoff_date.timestamp():
|
742
|
+
request_file.unlink()
|
743
|
+
count += 1
|
744
|
+
except Exception as e:
|
745
|
+
logger.error(f"Failed to cleanup requests from file system: {e}")
|
746
|
+
return count
|
747
|
+
|
748
|
+
def _cleanup_requests_memory(self, cutoff_date: datetime) -> int:
|
749
|
+
"""Cleanup old requests from memory"""
|
750
|
+
count = 0
|
751
|
+
to_remove = []
|
752
|
+
for request_id, request in self.requests.items():
|
753
|
+
if request.created_at < cutoff_date:
|
754
|
+
to_remove.append(request_id)
|
755
|
+
|
756
|
+
for request_id in to_remove:
|
757
|
+
del self.requests[request_id]
|
758
|
+
count += 1
|
759
|
+
|
760
|
+
return count
|
761
|
+
|
762
|
+
# Placeholder implementations for Supabase backend
|
763
|
+
def _create_request_supabase(self, request: InferenceRequest) -> str:
|
764
|
+
return request.request_id # Implementation needed
|
765
|
+
|
766
|
+
def _update_request_supabase(self, request_id: str, updates: Dict[str, Any]) -> bool:
|
767
|
+
return False # Implementation needed
|
768
|
+
|
769
|
+
def _get_request_supabase(self, request_id: str) -> Optional[InferenceRequest]:
|
770
|
+
return None # Implementation needed
|
771
|
+
|
772
|
+
def _list_requests_supabase(self, service_type, model_id, user_id, status, hours, limit) -> List[InferenceRequest]:
|
773
|
+
return [] # Implementation needed
|
774
|
+
|
775
|
+
def _record_stats_supabase(self, stats: UsageStatistics) -> str:
|
776
|
+
return stats.stat_id # Implementation needed
|
777
|
+
|
778
|
+
def _get_stats_supabase(self, service_type, model_id, user_id, days, limit) -> List[UsageStatistics]:
|
779
|
+
return [] # Implementation needed
|
780
|
+
|
781
|
+
def _get_stats_memory(self, service_type, model_id, user_id, days, limit) -> List[UsageStatistics]:
|
782
|
+
return list(self.statistics.values())[:limit]
|
783
|
+
|
784
|
+
def _get_stats_file(self, service_type, model_id, user_id, days, limit) -> List[UsageStatistics]:
|
785
|
+
return [] # Implementation needed
|
786
|
+
|
787
|
+
def _update_snapshot_supabase(self, snapshot: ModelUsageSnapshot) -> str:
|
788
|
+
return snapshot.snapshot_id # Implementation needed
|
789
|
+
|
790
|
+
def _get_snapshots_supabase(self, model_id, provider, days) -> List[ModelUsageSnapshot]:
|
791
|
+
return [] # Implementation needed
|
792
|
+
|
793
|
+
def _get_snapshots_memory(self, model_id, provider, days) -> List[ModelUsageSnapshot]:
|
794
|
+
return list(self.snapshots.values())
|
795
|
+
|
796
|
+
def _get_snapshots_file(self, model_id, provider, days) -> List[ModelUsageSnapshot]:
|
797
|
+
return [] # Implementation needed
|
798
|
+
|
799
|
+
def _cleanup_requests_supabase(self, cutoff_date: datetime) -> int:
|
800
|
+
return 0 # Implementation needed
|
801
|
+
|
802
|
+
def _cleanup_stats_supabase(self, cutoff_date: datetime) -> int:
|
803
|
+
return 0 # Implementation needed
|
804
|
+
|
805
|
+
def _cleanup_stats_memory(self, cutoff_date: datetime) -> int:
|
806
|
+
count = 0
|
807
|
+
to_remove = []
|
808
|
+
for stat_id, stat in self.statistics.items():
|
809
|
+
if stat.created_at < cutoff_date:
|
810
|
+
to_remove.append(stat_id)
|
811
|
+
|
812
|
+
for stat_id in to_remove:
|
813
|
+
del self.statistics[stat_id]
|
814
|
+
count += 1
|
815
|
+
|
816
|
+
return count
|
817
|
+
|
818
|
+
def _cleanup_stats_file(self, cutoff_date: datetime) -> int:
|
819
|
+
count = 0
|
820
|
+
try:
|
821
|
+
stats_dir = self.data_dir / "statistics"
|
822
|
+
for stats_file in stats_dir.glob("*.json"):
|
823
|
+
if stats_file.stat().st_mtime < cutoff_date.timestamp():
|
824
|
+
stats_file.unlink()
|
825
|
+
count += 1
|
826
|
+
except Exception as e:
|
827
|
+
logger.error(f"Failed to cleanup statistics from file system: {e}")
|
828
|
+
return count
|