isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,690 @@
|
|
1
|
+
"""
|
2
|
+
Model Metadata Models
|
3
|
+
|
4
|
+
Data models for model metadata, versioning, and billing information,
|
5
|
+
following the ISA Model architecture pattern.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
from datetime import datetime, timezone
|
10
|
+
from typing import Dict, List, Optional, Any, Union
|
11
|
+
from dataclasses import dataclass, field
|
12
|
+
from enum import Enum
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
class ModelStatus(str, Enum):
|
17
|
+
"""Model status enumeration"""
|
18
|
+
DRAFT = "draft"
|
19
|
+
TRAINING = "training"
|
20
|
+
VALIDATING = "validating"
|
21
|
+
ACTIVE = "active"
|
22
|
+
DEPRECATED = "deprecated"
|
23
|
+
ARCHIVED = "archived"
|
24
|
+
FAILED = "failed"
|
25
|
+
|
26
|
+
class ModelType(str, Enum):
|
27
|
+
"""Model type enumeration"""
|
28
|
+
LLM = "llm"
|
29
|
+
VISION = "vision"
|
30
|
+
AUDIO = "audio"
|
31
|
+
EMBEDDING = "embedding"
|
32
|
+
MULTIMODAL = "multimodal"
|
33
|
+
CLASSIFICATION = "classification"
|
34
|
+
REGRESSION = "regression"
|
35
|
+
GENERATIVE = "generative"
|
36
|
+
CUSTOM = "custom"
|
37
|
+
|
38
|
+
class BillingModel(str, Enum):
|
39
|
+
"""Billing model enumeration"""
|
40
|
+
PAY_PER_USE = "pay_per_use"
|
41
|
+
SUBSCRIPTION = "subscription"
|
42
|
+
TIER_BASED = "tier_based"
|
43
|
+
FREE = "free"
|
44
|
+
CUSTOM = "custom"
|
45
|
+
|
46
|
+
class LicenseType(str, Enum):
|
47
|
+
"""License type enumeration"""
|
48
|
+
OPEN_SOURCE = "open_source"
|
49
|
+
COMMERCIAL = "commercial"
|
50
|
+
ACADEMIC = "academic"
|
51
|
+
PROPRIETARY = "proprietary"
|
52
|
+
CUSTOM = "custom"
|
53
|
+
|
54
|
+
@dataclass
|
55
|
+
class ModelMetadata:
|
56
|
+
"""
|
57
|
+
Comprehensive model metadata
|
58
|
+
|
59
|
+
Contains all metadata about a model including its capabilities,
|
60
|
+
performance characteristics, usage information, and billing details.
|
61
|
+
"""
|
62
|
+
model_id: str
|
63
|
+
model_name: str
|
64
|
+
model_type: str
|
65
|
+
provider: str
|
66
|
+
status: str = ModelStatus.DRAFT
|
67
|
+
version: str = "1.0.0"
|
68
|
+
description: Optional[str] = None
|
69
|
+
|
70
|
+
# Model characteristics
|
71
|
+
capabilities: Optional[List[str]] = None
|
72
|
+
supported_languages: Optional[List[str]] = None
|
73
|
+
context_length: Optional[int] = None
|
74
|
+
parameter_count: Optional[int] = None
|
75
|
+
training_data_size: Optional[str] = None
|
76
|
+
training_completion_date: Optional[datetime] = None
|
77
|
+
|
78
|
+
# Performance metrics
|
79
|
+
benchmark_scores: Optional[Dict[str, float]] = None
|
80
|
+
latency_ms: Optional[float] = None
|
81
|
+
throughput_tokens_per_second: Optional[float] = None
|
82
|
+
accuracy_metrics: Optional[Dict[str, float]] = None
|
83
|
+
|
84
|
+
# Usage and availability
|
85
|
+
is_public: bool = False
|
86
|
+
requires_approval: bool = False
|
87
|
+
max_concurrent_requests: Optional[int] = None
|
88
|
+
rate_limits: Optional[Dict[str, Any]] = None
|
89
|
+
geographic_restrictions: Optional[List[str]] = None
|
90
|
+
|
91
|
+
# Billing and cost
|
92
|
+
billing_model: str = BillingModel.PAY_PER_USE
|
93
|
+
cost_per_1k_input_tokens: Optional[float] = None
|
94
|
+
cost_per_1k_output_tokens: Optional[float] = None
|
95
|
+
cost_per_request: Optional[float] = None
|
96
|
+
monthly_subscription_cost: Optional[float] = None
|
97
|
+
free_tier_limits: Optional[Dict[str, Any]] = None
|
98
|
+
|
99
|
+
# Legal and compliance
|
100
|
+
license_type: str = LicenseType.PROPRIETARY
|
101
|
+
license_url: Optional[str] = None
|
102
|
+
terms_of_service_url: Optional[str] = None
|
103
|
+
privacy_policy_url: Optional[str] = None
|
104
|
+
compliance_certifications: Optional[List[str]] = None
|
105
|
+
data_residency_requirements: Optional[List[str]] = None
|
106
|
+
|
107
|
+
# Technical details
|
108
|
+
supported_formats: Optional[List[str]] = None
|
109
|
+
input_modalities: Optional[List[str]] = None
|
110
|
+
output_modalities: Optional[List[str]] = None
|
111
|
+
api_endpoints: Optional[Dict[str, str]] = None
|
112
|
+
sdk_availability: Optional[Dict[str, str]] = None
|
113
|
+
documentation_url: Optional[str] = None
|
114
|
+
|
115
|
+
# Metadata
|
116
|
+
created_at: datetime = None
|
117
|
+
updated_at: datetime = None
|
118
|
+
created_by: Optional[str] = None
|
119
|
+
updated_by: Optional[str] = None
|
120
|
+
tags: Optional[Dict[str, str]] = None
|
121
|
+
custom_metadata: Optional[Dict[str, Any]] = None
|
122
|
+
|
123
|
+
def __post_init__(self):
|
124
|
+
if self.created_at is None:
|
125
|
+
self.created_at = datetime.now(timezone.utc)
|
126
|
+
if self.updated_at is None:
|
127
|
+
self.updated_at = self.created_at
|
128
|
+
if self.capabilities is None:
|
129
|
+
self.capabilities = []
|
130
|
+
if self.supported_languages is None:
|
131
|
+
self.supported_languages = []
|
132
|
+
if self.benchmark_scores is None:
|
133
|
+
self.benchmark_scores = {}
|
134
|
+
if self.accuracy_metrics is None:
|
135
|
+
self.accuracy_metrics = {}
|
136
|
+
if self.rate_limits is None:
|
137
|
+
self.rate_limits = {}
|
138
|
+
if self.geographic_restrictions is None:
|
139
|
+
self.geographic_restrictions = []
|
140
|
+
if self.free_tier_limits is None:
|
141
|
+
self.free_tier_limits = {}
|
142
|
+
if self.compliance_certifications is None:
|
143
|
+
self.compliance_certifications = []
|
144
|
+
if self.data_residency_requirements is None:
|
145
|
+
self.data_residency_requirements = []
|
146
|
+
if self.supported_formats is None:
|
147
|
+
self.supported_formats = []
|
148
|
+
if self.input_modalities is None:
|
149
|
+
self.input_modalities = []
|
150
|
+
if self.output_modalities is None:
|
151
|
+
self.output_modalities = []
|
152
|
+
if self.api_endpoints is None:
|
153
|
+
self.api_endpoints = {}
|
154
|
+
if self.sdk_availability is None:
|
155
|
+
self.sdk_availability = {}
|
156
|
+
if self.tags is None:
|
157
|
+
self.tags = {}
|
158
|
+
if self.custom_metadata is None:
|
159
|
+
self.custom_metadata = {}
|
160
|
+
|
161
|
+
@property
|
162
|
+
def is_active(self) -> bool:
|
163
|
+
"""Check if model is active and available"""
|
164
|
+
return self.status == ModelStatus.ACTIVE
|
165
|
+
|
166
|
+
@property
|
167
|
+
def is_multimodal(self) -> bool:
|
168
|
+
"""Check if model supports multiple modalities"""
|
169
|
+
return len(self.input_modalities) > 1 or len(self.output_modalities) > 1
|
170
|
+
|
171
|
+
@property
|
172
|
+
def supports_streaming(self) -> bool:
|
173
|
+
"""Check if model supports streaming responses"""
|
174
|
+
return "streaming" in self.capabilities
|
175
|
+
|
176
|
+
@property
|
177
|
+
def supports_function_calling(self) -> bool:
|
178
|
+
"""Check if model supports function calling"""
|
179
|
+
return "function_calling" in self.capabilities or "tools" in self.capabilities
|
180
|
+
|
181
|
+
@property
|
182
|
+
def model_size_category(self) -> str:
|
183
|
+
"""Categorize model size based on parameter count"""
|
184
|
+
if not self.parameter_count:
|
185
|
+
return "unknown"
|
186
|
+
|
187
|
+
if self.parameter_count < 1_000_000: # < 1M
|
188
|
+
return "small"
|
189
|
+
elif self.parameter_count < 10_000_000: # < 10M
|
190
|
+
return "medium"
|
191
|
+
elif self.parameter_count < 100_000_000: # < 100M
|
192
|
+
return "large"
|
193
|
+
elif self.parameter_count < 1_000_000_000: # < 1B
|
194
|
+
return "very_large"
|
195
|
+
else: # >= 1B
|
196
|
+
return "massive"
|
197
|
+
|
198
|
+
@property
|
199
|
+
def estimated_cost_per_request(self) -> float:
|
200
|
+
"""Estimate cost per request based on billing model"""
|
201
|
+
if self.billing_model == BillingModel.FREE:
|
202
|
+
return 0.0
|
203
|
+
|
204
|
+
if self.cost_per_request:
|
205
|
+
return self.cost_per_request
|
206
|
+
|
207
|
+
# Estimate based on token costs (assume average 1000 tokens per request)
|
208
|
+
if self.cost_per_1k_input_tokens and self.cost_per_1k_output_tokens:
|
209
|
+
return (self.cost_per_1k_input_tokens + self.cost_per_1k_output_tokens) / 2
|
210
|
+
elif self.cost_per_1k_input_tokens:
|
211
|
+
return self.cost_per_1k_input_tokens
|
212
|
+
|
213
|
+
return 0.01 # Default estimate
|
214
|
+
|
215
|
+
@property
|
216
|
+
def performance_tier(self) -> str:
|
217
|
+
"""Classify model performance tier"""
|
218
|
+
if not self.benchmark_scores:
|
219
|
+
return "unknown"
|
220
|
+
|
221
|
+
# Calculate average benchmark score
|
222
|
+
scores = [score for score in self.benchmark_scores.values() if isinstance(score, (int, float))]
|
223
|
+
if not scores:
|
224
|
+
return "unknown"
|
225
|
+
|
226
|
+
avg_score = sum(scores) / len(scores)
|
227
|
+
|
228
|
+
if avg_score >= 90:
|
229
|
+
return "excellent"
|
230
|
+
elif avg_score >= 80:
|
231
|
+
return "good"
|
232
|
+
elif avg_score >= 70:
|
233
|
+
return "average"
|
234
|
+
elif avg_score >= 60:
|
235
|
+
return "below_average"
|
236
|
+
else:
|
237
|
+
return "poor"
|
238
|
+
|
239
|
+
def add_capability(self, capability: str):
|
240
|
+
"""Add a model capability"""
|
241
|
+
if capability not in self.capabilities:
|
242
|
+
self.capabilities.append(capability)
|
243
|
+
self.updated_at = datetime.now(timezone.utc)
|
244
|
+
|
245
|
+
def add_benchmark_score(self, benchmark_name: str, score: float):
|
246
|
+
"""Add or update a benchmark score"""
|
247
|
+
self.benchmark_scores[benchmark_name] = score
|
248
|
+
self.updated_at = datetime.now(timezone.utc)
|
249
|
+
|
250
|
+
def update_status(self, new_status: str, updated_by: Optional[str] = None):
|
251
|
+
"""Update model status"""
|
252
|
+
self.status = new_status
|
253
|
+
self.updated_at = datetime.now(timezone.utc)
|
254
|
+
if updated_by:
|
255
|
+
self.updated_by = updated_by
|
256
|
+
|
257
|
+
def calculate_usage_cost(self, input_tokens: int, output_tokens: int) -> float:
|
258
|
+
"""Calculate cost for specific token usage"""
|
259
|
+
total_cost = 0.0
|
260
|
+
|
261
|
+
if self.cost_per_1k_input_tokens:
|
262
|
+
total_cost += (input_tokens / 1000) * self.cost_per_1k_input_tokens
|
263
|
+
|
264
|
+
if self.cost_per_1k_output_tokens:
|
265
|
+
total_cost += (output_tokens / 1000) * self.cost_per_1k_output_tokens
|
266
|
+
|
267
|
+
return total_cost
|
268
|
+
|
269
|
+
def check_rate_limit_compliance(self, requests_per_minute: int, tokens_per_minute: int) -> bool:
|
270
|
+
"""Check if usage complies with rate limits"""
|
271
|
+
if "requests_per_minute" in self.rate_limits:
|
272
|
+
if requests_per_minute > self.rate_limits["requests_per_minute"]:
|
273
|
+
return False
|
274
|
+
|
275
|
+
if "tokens_per_minute" in self.rate_limits:
|
276
|
+
if tokens_per_minute > self.rate_limits["tokens_per_minute"]:
|
277
|
+
return False
|
278
|
+
|
279
|
+
return True
|
280
|
+
|
281
|
+
def validate_metadata(self) -> List[str]:
|
282
|
+
"""Validate model metadata completeness and consistency"""
|
283
|
+
issues = []
|
284
|
+
|
285
|
+
if not self.model_id:
|
286
|
+
issues.append("Model ID is required")
|
287
|
+
|
288
|
+
if not self.model_name:
|
289
|
+
issues.append("Model name is required")
|
290
|
+
|
291
|
+
if not self.provider:
|
292
|
+
issues.append("Provider is required")
|
293
|
+
|
294
|
+
# Billing validation
|
295
|
+
if self.billing_model == BillingModel.PAY_PER_USE:
|
296
|
+
if not (self.cost_per_1k_input_tokens or self.cost_per_1k_output_tokens or self.cost_per_request):
|
297
|
+
issues.append("Pay-per-use billing requires cost information")
|
298
|
+
|
299
|
+
if self.billing_model == BillingModel.SUBSCRIPTION:
|
300
|
+
if not self.monthly_subscription_cost:
|
301
|
+
issues.append("Subscription billing requires monthly cost")
|
302
|
+
|
303
|
+
# Performance validation
|
304
|
+
if self.latency_ms and self.latency_ms < 0:
|
305
|
+
issues.append("Latency cannot be negative")
|
306
|
+
|
307
|
+
if self.parameter_count and self.parameter_count < 0:
|
308
|
+
issues.append("Parameter count cannot be negative")
|
309
|
+
|
310
|
+
# Context length validation
|
311
|
+
if self.context_length and self.context_length < 1:
|
312
|
+
issues.append("Context length must be positive")
|
313
|
+
|
314
|
+
return issues
|
315
|
+
|
316
|
+
@dataclass
|
317
|
+
class ModelVersion:
|
318
|
+
"""
|
319
|
+
Model version information
|
320
|
+
|
321
|
+
Tracks different versions of a model with their specific characteristics
|
322
|
+
and deployment information.
|
323
|
+
"""
|
324
|
+
version_id: str
|
325
|
+
model_id: str
|
326
|
+
version_number: str
|
327
|
+
is_current: bool = False
|
328
|
+
is_deprecated: bool = False
|
329
|
+
release_date: Optional[datetime] = None
|
330
|
+
deprecation_date: Optional[datetime] = None
|
331
|
+
end_of_life_date: Optional[datetime] = None
|
332
|
+
|
333
|
+
# Version-specific details
|
334
|
+
changes_from_previous: Optional[List[str]] = None
|
335
|
+
performance_improvements: Optional[Dict[str, float]] = None
|
336
|
+
bug_fixes: Optional[List[str]] = None
|
337
|
+
new_features: Optional[List[str]] = None
|
338
|
+
breaking_changes: Optional[List[str]] = None
|
339
|
+
|
340
|
+
# Technical specifications for this version
|
341
|
+
model_file_url: Optional[str] = None
|
342
|
+
model_file_size_gb: Optional[float] = None
|
343
|
+
model_file_checksum: Optional[str] = None
|
344
|
+
docker_image: Optional[str] = None
|
345
|
+
deployment_config: Optional[Dict[str, Any]] = None
|
346
|
+
|
347
|
+
# Compatibility and requirements
|
348
|
+
minimum_hardware_requirements: Optional[Dict[str, Any]] = None
|
349
|
+
supported_frameworks: Optional[List[str]] = None
|
350
|
+
python_version_requirements: Optional[str] = None
|
351
|
+
dependencies: Optional[List[str]] = None
|
352
|
+
|
353
|
+
created_at: datetime = None
|
354
|
+
created_by: Optional[str] = None
|
355
|
+
metadata: Optional[Dict[str, Any]] = None
|
356
|
+
|
357
|
+
def __post_init__(self):
|
358
|
+
if self.created_at is None:
|
359
|
+
self.created_at = datetime.now(timezone.utc)
|
360
|
+
if self.changes_from_previous is None:
|
361
|
+
self.changes_from_previous = []
|
362
|
+
if self.performance_improvements is None:
|
363
|
+
self.performance_improvements = {}
|
364
|
+
if self.bug_fixes is None:
|
365
|
+
self.bug_fixes = []
|
366
|
+
if self.new_features is None:
|
367
|
+
self.new_features = []
|
368
|
+
if self.breaking_changes is None:
|
369
|
+
self.breaking_changes = []
|
370
|
+
if self.supported_frameworks is None:
|
371
|
+
self.supported_frameworks = []
|
372
|
+
if self.dependencies is None:
|
373
|
+
self.dependencies = []
|
374
|
+
if self.metadata is None:
|
375
|
+
self.metadata = {}
|
376
|
+
|
377
|
+
@property
|
378
|
+
def is_active(self) -> bool:
|
379
|
+
"""Check if version is active (not deprecated and within lifecycle)"""
|
380
|
+
now = datetime.now(timezone.utc)
|
381
|
+
|
382
|
+
if self.is_deprecated:
|
383
|
+
return False
|
384
|
+
|
385
|
+
if self.end_of_life_date and now > self.end_of_life_date:
|
386
|
+
return False
|
387
|
+
|
388
|
+
return True
|
389
|
+
|
390
|
+
@property
|
391
|
+
def lifecycle_stage(self) -> str:
|
392
|
+
"""Get current lifecycle stage"""
|
393
|
+
now = datetime.now(timezone.utc)
|
394
|
+
|
395
|
+
if self.end_of_life_date and now > self.end_of_life_date:
|
396
|
+
return "end_of_life"
|
397
|
+
elif self.deprecation_date and now > self.deprecation_date:
|
398
|
+
return "deprecated"
|
399
|
+
elif self.is_current:
|
400
|
+
return "current"
|
401
|
+
else:
|
402
|
+
return "legacy"
|
403
|
+
|
404
|
+
@property
|
405
|
+
def has_breaking_changes(self) -> bool:
|
406
|
+
"""Check if version introduces breaking changes"""
|
407
|
+
return len(self.breaking_changes) > 0
|
408
|
+
|
409
|
+
def add_change(self, change_type: str, description: str):
|
410
|
+
"""Add a change description"""
|
411
|
+
if change_type == "improvement":
|
412
|
+
self.changes_from_previous.append(f"Improvement: {description}")
|
413
|
+
elif change_type == "bug_fix":
|
414
|
+
self.bug_fixes.append(description)
|
415
|
+
elif change_type == "new_feature":
|
416
|
+
self.new_features.append(description)
|
417
|
+
elif change_type == "breaking_change":
|
418
|
+
self.breaking_changes.append(description)
|
419
|
+
|
420
|
+
def deprecate(self, deprecation_date: Optional[datetime] = None,
|
421
|
+
end_of_life_date: Optional[datetime] = None):
|
422
|
+
"""Mark version as deprecated"""
|
423
|
+
self.is_deprecated = True
|
424
|
+
self.deprecation_date = deprecation_date or datetime.now(timezone.utc)
|
425
|
+
|
426
|
+
if end_of_life_date:
|
427
|
+
self.end_of_life_date = end_of_life_date
|
428
|
+
|
429
|
+
@dataclass
|
430
|
+
class ModelBilling:
|
431
|
+
"""
|
432
|
+
Model billing and usage tracking
|
433
|
+
|
434
|
+
Tracks billing information, usage patterns, and cost analytics
|
435
|
+
for model usage across different time periods and users.
|
436
|
+
"""
|
437
|
+
billing_id: str
|
438
|
+
model_id: str
|
439
|
+
user_id: Optional[str] = None
|
440
|
+
organization_id: Optional[str] = None
|
441
|
+
billing_period_start: datetime = None
|
442
|
+
billing_period_end: datetime = None
|
443
|
+
|
444
|
+
# Usage metrics
|
445
|
+
total_requests: int = 0
|
446
|
+
total_input_tokens: int = 0
|
447
|
+
total_output_tokens: int = 0
|
448
|
+
total_processing_time_ms: int = 0
|
449
|
+
unique_users: int = 0
|
450
|
+
|
451
|
+
# Cost breakdown
|
452
|
+
input_token_cost: float = 0.0
|
453
|
+
output_token_cost: float = 0.0
|
454
|
+
request_cost: float = 0.0
|
455
|
+
subscription_cost: float = 0.0
|
456
|
+
overage_charges: float = 0.0
|
457
|
+
discounts_applied: float = 0.0
|
458
|
+
taxes: float = 0.0
|
459
|
+
total_cost: float = 0.0
|
460
|
+
|
461
|
+
# Billing status
|
462
|
+
billing_status: str = "active" # active, suspended, overdue, paid
|
463
|
+
last_payment_date: Optional[datetime] = None
|
464
|
+
next_billing_date: Optional[datetime] = None
|
465
|
+
payment_method: Optional[str] = None
|
466
|
+
|
467
|
+
# Cost analytics
|
468
|
+
cost_per_request: float = 0.0
|
469
|
+
cost_per_token: float = 0.0
|
470
|
+
cost_per_minute: float = 0.0
|
471
|
+
daily_average_cost: float = 0.0
|
472
|
+
projected_monthly_cost: float = 0.0
|
473
|
+
|
474
|
+
# Usage analytics
|
475
|
+
avg_requests_per_day: float = 0.0
|
476
|
+
avg_tokens_per_request: float = 0.0
|
477
|
+
peak_usage_hour: Optional[int] = None
|
478
|
+
usage_trend: str = "stable" # growing, stable, declining
|
479
|
+
|
480
|
+
created_at: datetime = None
|
481
|
+
updated_at: datetime = None
|
482
|
+
|
483
|
+
def __post_init__(self):
|
484
|
+
if self.created_at is None:
|
485
|
+
self.created_at = datetime.now(timezone.utc)
|
486
|
+
if self.updated_at is None:
|
487
|
+
self.updated_at = self.created_at
|
488
|
+
|
489
|
+
# Calculate derived metrics
|
490
|
+
self._calculate_derived_metrics()
|
491
|
+
|
492
|
+
def _calculate_derived_metrics(self):
|
493
|
+
"""Calculate derived billing and usage metrics"""
|
494
|
+
# Cost analytics
|
495
|
+
if self.total_requests > 0:
|
496
|
+
self.cost_per_request = self.total_cost / self.total_requests
|
497
|
+
|
498
|
+
total_tokens = self.total_input_tokens + self.total_output_tokens
|
499
|
+
if total_tokens > 0:
|
500
|
+
self.cost_per_token = self.total_cost / total_tokens
|
501
|
+
self.avg_tokens_per_request = total_tokens / self.total_requests if self.total_requests > 0 else 0
|
502
|
+
|
503
|
+
if self.total_processing_time_ms > 0:
|
504
|
+
processing_minutes = self.total_processing_time_ms / (1000 * 60)
|
505
|
+
self.cost_per_minute = self.total_cost / processing_minutes
|
506
|
+
|
507
|
+
# Period-based analytics
|
508
|
+
if self.billing_period_start and self.billing_period_end:
|
509
|
+
period_days = (self.billing_period_end - self.billing_period_start).days
|
510
|
+
if period_days > 0:
|
511
|
+
self.daily_average_cost = self.total_cost / period_days
|
512
|
+
self.avg_requests_per_day = self.total_requests / period_days
|
513
|
+
|
514
|
+
# Project monthly cost based on current usage
|
515
|
+
self.projected_monthly_cost = self.daily_average_cost * 30
|
516
|
+
|
517
|
+
@property
|
518
|
+
def billing_period_days(self) -> int:
|
519
|
+
"""Get billing period duration in days"""
|
520
|
+
if self.billing_period_start and self.billing_period_end:
|
521
|
+
return (self.billing_period_end - self.billing_period_start).days
|
522
|
+
return 0
|
523
|
+
|
524
|
+
@property
|
525
|
+
def is_over_budget(self) -> bool:
|
526
|
+
"""Check if costs exceed typical thresholds (would be configurable)"""
|
527
|
+
# This would be based on user-defined budgets
|
528
|
+
return self.projected_monthly_cost > 1000 # Example threshold
|
529
|
+
|
530
|
+
@property
|
531
|
+
def efficiency_score(self) -> float:
|
532
|
+
"""Calculate cost efficiency score (0-100)"""
|
533
|
+
if self.cost_per_token == 0:
|
534
|
+
return 100.0
|
535
|
+
|
536
|
+
# Compare to industry benchmarks (simplified)
|
537
|
+
benchmark_cost_per_token = 0.002 # Example benchmark
|
538
|
+
efficiency = min(100, (benchmark_cost_per_token / self.cost_per_token) * 100)
|
539
|
+
return max(0, efficiency)
|
540
|
+
|
541
|
+
@property
|
542
|
+
def usage_intensity(self) -> str:
|
543
|
+
"""Classify usage intensity"""
|
544
|
+
if self.avg_requests_per_day < 10:
|
545
|
+
return "light"
|
546
|
+
elif self.avg_requests_per_day < 100:
|
547
|
+
return "moderate"
|
548
|
+
elif self.avg_requests_per_day < 1000:
|
549
|
+
return "heavy"
|
550
|
+
else:
|
551
|
+
return "intensive"
|
552
|
+
|
553
|
+
def add_usage(self, requests: int = 0, input_tokens: int = 0, output_tokens: int = 0,
|
554
|
+
processing_time_ms: int = 0, cost: float = 0.0):
|
555
|
+
"""Add usage data to billing record"""
|
556
|
+
self.total_requests += requests
|
557
|
+
self.total_input_tokens += input_tokens
|
558
|
+
self.total_output_tokens += output_tokens
|
559
|
+
self.total_processing_time_ms += processing_time_ms
|
560
|
+
self.total_cost += cost
|
561
|
+
|
562
|
+
# Recalculate derived metrics
|
563
|
+
self._calculate_derived_metrics()
|
564
|
+
self.updated_at = datetime.now(timezone.utc)
|
565
|
+
|
566
|
+
def apply_discount(self, discount_amount: float, reason: str = ""):
|
567
|
+
"""Apply discount to billing"""
|
568
|
+
self.discounts_applied += discount_amount
|
569
|
+
self.total_cost = max(0, self.total_cost - discount_amount)
|
570
|
+
self.updated_at = datetime.now(timezone.utc)
|
571
|
+
|
572
|
+
def process_payment(self, payment_amount: float, payment_method: str):
|
573
|
+
"""Record payment processing"""
|
574
|
+
self.last_payment_date = datetime.now(timezone.utc)
|
575
|
+
self.payment_method = payment_method
|
576
|
+
|
577
|
+
if payment_amount >= self.total_cost:
|
578
|
+
self.billing_status = "paid"
|
579
|
+
|
580
|
+
self.updated_at = datetime.now(timezone.utc)
|
581
|
+
|
582
|
+
def generate_billing_summary(self) -> Dict[str, Any]:
|
583
|
+
"""Generate comprehensive billing summary"""
|
584
|
+
return {
|
585
|
+
"billing_period": {
|
586
|
+
"start": self.billing_period_start.isoformat() if self.billing_period_start else None,
|
587
|
+
"end": self.billing_period_end.isoformat() if self.billing_period_end else None,
|
588
|
+
"days": self.billing_period_days
|
589
|
+
},
|
590
|
+
"usage_summary": {
|
591
|
+
"total_requests": self.total_requests,
|
592
|
+
"total_tokens": self.total_input_tokens + self.total_output_tokens,
|
593
|
+
"avg_requests_per_day": round(self.avg_requests_per_day, 2),
|
594
|
+
"avg_tokens_per_request": round(self.avg_tokens_per_request, 2),
|
595
|
+
"usage_intensity": self.usage_intensity
|
596
|
+
},
|
597
|
+
"cost_breakdown": {
|
598
|
+
"input_token_cost": round(self.input_token_cost, 4),
|
599
|
+
"output_token_cost": round(self.output_token_cost, 4),
|
600
|
+
"request_cost": round(self.request_cost, 4),
|
601
|
+
"subscription_cost": round(self.subscription_cost, 4),
|
602
|
+
"overage_charges": round(self.overage_charges, 4),
|
603
|
+
"discounts_applied": round(self.discounts_applied, 4),
|
604
|
+
"taxes": round(self.taxes, 4),
|
605
|
+
"total_cost": round(self.total_cost, 4)
|
606
|
+
},
|
607
|
+
"analytics": {
|
608
|
+
"cost_per_request": round(self.cost_per_request, 6),
|
609
|
+
"cost_per_token": round(self.cost_per_token, 8),
|
610
|
+
"efficiency_score": round(self.efficiency_score, 2),
|
611
|
+
"projected_monthly_cost": round(self.projected_monthly_cost, 2),
|
612
|
+
"usage_trend": self.usage_trend
|
613
|
+
},
|
614
|
+
"billing_status": {
|
615
|
+
"status": self.billing_status,
|
616
|
+
"last_payment_date": self.last_payment_date.isoformat() if self.last_payment_date else None,
|
617
|
+
"next_billing_date": self.next_billing_date.isoformat() if self.next_billing_date else None,
|
618
|
+
"is_over_budget": self.is_over_budget
|
619
|
+
}
|
620
|
+
}
|
621
|
+
|
622
|
+
# Utility functions for working with model models
|
623
|
+
|
624
|
+
def create_model_metadata(
|
625
|
+
model_id: str,
|
626
|
+
model_name: str,
|
627
|
+
model_type: str,
|
628
|
+
provider: str,
|
629
|
+
created_by: Optional[str] = None
|
630
|
+
) -> ModelMetadata:
|
631
|
+
"""Factory function to create model metadata"""
|
632
|
+
return ModelMetadata(
|
633
|
+
model_id=model_id,
|
634
|
+
model_name=model_name,
|
635
|
+
model_type=model_type,
|
636
|
+
provider=provider,
|
637
|
+
created_by=created_by
|
638
|
+
)
|
639
|
+
|
640
|
+
def create_model_version(
|
641
|
+
model_id: str,
|
642
|
+
version_number: str,
|
643
|
+
is_current: bool = False,
|
644
|
+
created_by: Optional[str] = None
|
645
|
+
) -> ModelVersion:
|
646
|
+
"""Factory function to create model version"""
|
647
|
+
import uuid
|
648
|
+
|
649
|
+
version_id = f"version_{model_id}_{version_number}_{uuid.uuid4().hex[:8]}"
|
650
|
+
|
651
|
+
return ModelVersion(
|
652
|
+
version_id=version_id,
|
653
|
+
model_id=model_id,
|
654
|
+
version_number=version_number,
|
655
|
+
is_current=is_current,
|
656
|
+
created_by=created_by
|
657
|
+
)
|
658
|
+
|
659
|
+
def create_model_billing(
|
660
|
+
model_id: str,
|
661
|
+
user_id: Optional[str] = None,
|
662
|
+
organization_id: Optional[str] = None,
|
663
|
+
billing_period_start: Optional[datetime] = None,
|
664
|
+
billing_period_end: Optional[datetime] = None
|
665
|
+
) -> ModelBilling:
|
666
|
+
"""Factory function to create model billing record"""
|
667
|
+
import uuid
|
668
|
+
|
669
|
+
billing_id = f"bill_{model_id}_{datetime.now().strftime('%Y%m')}_{uuid.uuid4().hex[:8]}"
|
670
|
+
|
671
|
+
# Default to current month if no period specified
|
672
|
+
if not billing_period_start:
|
673
|
+
now = datetime.now(timezone.utc)
|
674
|
+
billing_period_start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
675
|
+
|
676
|
+
if not billing_period_end:
|
677
|
+
# End of current month
|
678
|
+
import calendar
|
679
|
+
now = datetime.now(timezone.utc)
|
680
|
+
last_day = calendar.monthrange(now.year, now.month)[1]
|
681
|
+
billing_period_end = now.replace(day=last_day, hour=23, minute=59, second=59, microsecond=999999)
|
682
|
+
|
683
|
+
return ModelBilling(
|
684
|
+
billing_id=billing_id,
|
685
|
+
model_id=model_id,
|
686
|
+
user_id=user_id,
|
687
|
+
organization_id=organization_id,
|
688
|
+
billing_period_start=billing_period_start,
|
689
|
+
billing_period_end=billing_period_end
|
690
|
+
)
|