isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/client.py +732 -565
- isa_model/core/cache/redis_cache.py +401 -0
- isa_model/core/config/config_manager.py +53 -10
- isa_model/core/config.py +1 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/migrations.py +277 -0
- isa_model/core/database/supabase_client.py +123 -0
- isa_model/core/models/__init__.py +37 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +36 -18
- isa_model/core/models/model_repo.py +44 -38
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +101 -370
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +7 -0
- isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
- isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
- isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/core/deployment_manager.py +6 -4
- isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
- isa_model/eval/benchmarks/__init__.py +27 -0
- isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
- isa_model/eval/benchmarks.py +244 -12
- isa_model/eval/evaluators/__init__.py +8 -2
- isa_model/eval/evaluators/audio_evaluator.py +727 -0
- isa_model/eval/evaluators/embedding_evaluator.py +742 -0
- isa_model/eval/evaluators/vision_evaluator.py +564 -0
- isa_model/eval/example_evaluation.py +395 -0
- isa_model/eval/factory.py +272 -5
- isa_model/eval/isa_benchmarks.py +700 -0
- isa_model/eval/isa_integration.py +582 -0
- isa_model/eval/metrics.py +159 -6
- isa_model/eval/tests/unit/test_basic.py +396 -0
- isa_model/inference/ai_factory.py +44 -8
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +32 -6
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/base_llm_service.py +30 -6
- isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
- isa_model/inference/services/llm/ollama_llm_service.py +2 -1
- isa_model/inference/services/llm/openai_llm_service.py +652 -55
- isa_model/inference/services/llm/yyds_llm_service.py +2 -1
- isa_model/inference/services/vision/__init__.py +5 -5
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/helpers/image_utils.py +11 -5
- isa_model/inference/services/vision/isa_vision_service.py +573 -0
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/serving/api/fastapi_server.py +88 -16
- isa_model/serving/api/middleware/auth.py +311 -0
- isa_model/serving/api/middleware/security.py +278 -0
- isa_model/serving/api/routes/analytics.py +486 -0
- isa_model/serving/api/routes/deployments.py +339 -0
- isa_model/serving/api/routes/evaluations.py +579 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/unified.py +324 -165
- isa_model/serving/api/startup.py +304 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/training/__init__.py +100 -6
- isa_model/training/core/__init__.py +4 -1
- isa_model/training/examples/intelligent_training_example.py +281 -0
- isa_model/training/intelligent/__init__.py +25 -0
- isa_model/training/intelligent/decision_engine.py +643 -0
- isa_model/training/intelligent/intelligent_factory.py +888 -0
- isa_model/training/intelligent/knowledge_base.py +751 -0
- isa_model/training/intelligent/resource_optimizer.py +839 -0
- isa_model/training/intelligent/task_classifier.py +576 -0
- isa_model/training/storage/__init__.py +24 -0
- isa_model/training/storage/core_integration.py +439 -0
- isa_model/training/storage/training_repository.py +552 -0
- isa_model/training/storage/training_storage.py +628 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
- isa_model-0.4.0.dist-info/RECORD +182 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model-0.3.9.dist-info/RECORD +0 -138
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,628 @@
|
|
1
|
+
"""
|
2
|
+
Training Data Storage System
|
3
|
+
|
4
|
+
Provides persistent storage for training jobs, metrics, and model lifecycle data.
|
5
|
+
Integrates with the core database system while maintaining training module independence.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import json
|
9
|
+
import logging
|
10
|
+
from typing import Dict, List, Optional, Any, Union
|
11
|
+
from dataclasses import dataclass, field, asdict
|
12
|
+
from datetime import datetime
|
13
|
+
from pathlib import Path
|
14
|
+
import uuid
|
15
|
+
|
16
|
+
try:
|
17
|
+
from ...core.database.supabase_client import SupabaseClient
|
18
|
+
SUPABASE_AVAILABLE = True
|
19
|
+
except ImportError:
|
20
|
+
SUPABASE_AVAILABLE = False
|
21
|
+
|
22
|
+
logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
|
25
|
+
@dataclass
|
26
|
+
class TrainingJobRecord:
|
27
|
+
"""Training job record for persistent storage."""
|
28
|
+
|
29
|
+
# Basic information
|
30
|
+
job_id: str
|
31
|
+
job_name: str
|
32
|
+
status: str # "pending", "running", "completed", "failed", "cancelled"
|
33
|
+
|
34
|
+
# Model and task information
|
35
|
+
base_model: str
|
36
|
+
task_type: str
|
37
|
+
domain: str
|
38
|
+
dataset_source: str
|
39
|
+
|
40
|
+
# Training configuration
|
41
|
+
training_config: Dict[str, Any]
|
42
|
+
resource_config: Dict[str, Any]
|
43
|
+
|
44
|
+
# Results and metrics
|
45
|
+
output_model_path: Optional[str] = None
|
46
|
+
training_metrics: Optional[Dict[str, Any]] = None
|
47
|
+
cost_breakdown: Optional[Dict[str, float]] = None
|
48
|
+
|
49
|
+
# Timing information
|
50
|
+
created_at: datetime = field(default_factory=datetime.now)
|
51
|
+
started_at: Optional[datetime] = None
|
52
|
+
completed_at: Optional[datetime] = None
|
53
|
+
|
54
|
+
# User and project information
|
55
|
+
user_id: Optional[str] = None
|
56
|
+
project_name: Optional[str] = None
|
57
|
+
tags: Dict[str, str] = field(default_factory=dict)
|
58
|
+
|
59
|
+
# Error information
|
60
|
+
error_message: Optional[str] = None
|
61
|
+
error_details: Optional[Dict[str, Any]] = None
|
62
|
+
|
63
|
+
def to_dict(self) -> Dict[str, Any]:
|
64
|
+
"""Convert to dictionary for storage."""
|
65
|
+
data = asdict(self)
|
66
|
+
# Convert datetime objects to ISO strings
|
67
|
+
for key, value in data.items():
|
68
|
+
if isinstance(value, datetime):
|
69
|
+
data[key] = value.isoformat()
|
70
|
+
return data
|
71
|
+
|
72
|
+
@classmethod
|
73
|
+
def from_dict(cls, data: Dict[str, Any]) -> 'TrainingJobRecord':
|
74
|
+
"""Create from dictionary."""
|
75
|
+
# Convert ISO strings back to datetime objects
|
76
|
+
datetime_fields = ['created_at', 'started_at', 'completed_at']
|
77
|
+
for field_name in datetime_fields:
|
78
|
+
if field_name in data and data[field_name]:
|
79
|
+
if isinstance(data[field_name], str):
|
80
|
+
data[field_name] = datetime.fromisoformat(data[field_name])
|
81
|
+
|
82
|
+
return cls(**data)
|
83
|
+
|
84
|
+
|
85
|
+
@dataclass
|
86
|
+
class TrainingMetrics:
|
87
|
+
"""Training metrics and performance data."""
|
88
|
+
|
89
|
+
job_id: str
|
90
|
+
|
91
|
+
# Training progress
|
92
|
+
epoch: int
|
93
|
+
step: int
|
94
|
+
total_steps: int
|
95
|
+
|
96
|
+
# Loss metrics
|
97
|
+
training_loss: Optional[float] = None
|
98
|
+
validation_loss: Optional[float] = None
|
99
|
+
perplexity: Optional[float] = None
|
100
|
+
|
101
|
+
# Performance metrics
|
102
|
+
accuracy: Optional[float] = None
|
103
|
+
f1_score: Optional[float] = None
|
104
|
+
bleu_score: Optional[float] = None
|
105
|
+
rouge_score: Optional[Dict[str, float]] = None
|
106
|
+
|
107
|
+
# Resource utilization
|
108
|
+
gpu_utilization: Optional[float] = None
|
109
|
+
memory_usage: Optional[float] = None
|
110
|
+
|
111
|
+
# Time tracking
|
112
|
+
epoch_time: Optional[float] = None
|
113
|
+
samples_per_second: Optional[float] = None
|
114
|
+
|
115
|
+
# Custom metrics
|
116
|
+
custom_metrics: Dict[str, Any] = field(default_factory=dict)
|
117
|
+
|
118
|
+
# Timestamp
|
119
|
+
recorded_at: datetime = field(default_factory=datetime.now)
|
120
|
+
|
121
|
+
def to_dict(self) -> Dict[str, Any]:
|
122
|
+
"""Convert to dictionary for storage."""
|
123
|
+
data = asdict(self)
|
124
|
+
if isinstance(data['recorded_at'], datetime):
|
125
|
+
data['recorded_at'] = data['recorded_at'].isoformat()
|
126
|
+
return data
|
127
|
+
|
128
|
+
|
129
|
+
class TrainingStorage:
|
130
|
+
"""
|
131
|
+
Training data storage system.
|
132
|
+
|
133
|
+
Provides persistent storage for training jobs, metrics, and related data.
|
134
|
+
Uses Supabase when available, falls back to local JSON storage.
|
135
|
+
|
136
|
+
Example:
|
137
|
+
```python
|
138
|
+
storage = TrainingStorage()
|
139
|
+
|
140
|
+
# Store training job
|
141
|
+
job_record = TrainingJobRecord(
|
142
|
+
job_id="training_123",
|
143
|
+
job_name="medical_chatbot_training",
|
144
|
+
status="running",
|
145
|
+
base_model="google/gemma-2-4b-it",
|
146
|
+
task_type="chat",
|
147
|
+
domain="medical",
|
148
|
+
dataset_source="medical_qa.json",
|
149
|
+
training_config={"epochs": 3, "lr": 2e-5},
|
150
|
+
resource_config={"gpu": "RTX 4090", "provider": "runpod"}
|
151
|
+
)
|
152
|
+
|
153
|
+
storage.save_training_job(job_record)
|
154
|
+
|
155
|
+
# Store metrics
|
156
|
+
metrics = TrainingMetrics(
|
157
|
+
job_id="training_123",
|
158
|
+
epoch=1,
|
159
|
+
step=100,
|
160
|
+
total_steps=1000,
|
161
|
+
training_loss=0.5,
|
162
|
+
validation_loss=0.6
|
163
|
+
)
|
164
|
+
|
165
|
+
storage.save_training_metrics(metrics)
|
166
|
+
```
|
167
|
+
"""
|
168
|
+
|
169
|
+
def __init__(self, storage_dir: Optional[str] = None, use_database: bool = True):
|
170
|
+
"""
|
171
|
+
Initialize training storage.
|
172
|
+
|
173
|
+
Args:
|
174
|
+
storage_dir: Local storage directory (fallback)
|
175
|
+
use_database: Whether to use database storage
|
176
|
+
"""
|
177
|
+
self.use_database = use_database and SUPABASE_AVAILABLE
|
178
|
+
self.storage_dir = Path(storage_dir or "./training_data")
|
179
|
+
self.storage_dir.mkdir(exist_ok=True)
|
180
|
+
|
181
|
+
if self.use_database:
|
182
|
+
try:
|
183
|
+
self.db_client = SupabaseClient()
|
184
|
+
logger.info("Training storage initialized with database backend")
|
185
|
+
except Exception as e:
|
186
|
+
logger.warning(f"Failed to initialize database client: {e}")
|
187
|
+
self.use_database = False
|
188
|
+
|
189
|
+
if not self.use_database:
|
190
|
+
logger.info("Training storage initialized with local file backend")
|
191
|
+
|
192
|
+
def save_training_job(self, job_record: TrainingJobRecord) -> bool:
|
193
|
+
"""
|
194
|
+
Save training job record.
|
195
|
+
|
196
|
+
Args:
|
197
|
+
job_record: Training job record to save
|
198
|
+
|
199
|
+
Returns:
|
200
|
+
True if successful
|
201
|
+
"""
|
202
|
+
try:
|
203
|
+
if self.use_database:
|
204
|
+
return self._save_job_to_database(job_record)
|
205
|
+
else:
|
206
|
+
return self._save_job_to_file(job_record)
|
207
|
+
except Exception as e:
|
208
|
+
logger.error(f"Failed to save training job {job_record.job_id}: {e}")
|
209
|
+
return False
|
210
|
+
|
211
|
+
def get_training_job(self, job_id: str) -> Optional[TrainingJobRecord]:
|
212
|
+
"""
|
213
|
+
Get training job record by ID.
|
214
|
+
|
215
|
+
Args:
|
216
|
+
job_id: Job ID to retrieve
|
217
|
+
|
218
|
+
Returns:
|
219
|
+
Training job record or None if not found
|
220
|
+
"""
|
221
|
+
try:
|
222
|
+
if self.use_database:
|
223
|
+
return self._get_job_from_database(job_id)
|
224
|
+
else:
|
225
|
+
return self._get_job_from_file(job_id)
|
226
|
+
except Exception as e:
|
227
|
+
logger.error(f"Failed to get training job {job_id}: {e}")
|
228
|
+
return None
|
229
|
+
|
230
|
+
def update_training_job(self, job_id: str, updates: Dict[str, Any]) -> bool:
|
231
|
+
"""
|
232
|
+
Update training job record.
|
233
|
+
|
234
|
+
Args:
|
235
|
+
job_id: Job ID to update
|
236
|
+
updates: Fields to update
|
237
|
+
|
238
|
+
Returns:
|
239
|
+
True if successful
|
240
|
+
"""
|
241
|
+
try:
|
242
|
+
if self.use_database:
|
243
|
+
return self._update_job_in_database(job_id, updates)
|
244
|
+
else:
|
245
|
+
return self._update_job_in_file(job_id, updates)
|
246
|
+
except Exception as e:
|
247
|
+
logger.error(f"Failed to update training job {job_id}: {e}")
|
248
|
+
return False
|
249
|
+
|
250
|
+
def list_training_jobs(
|
251
|
+
self,
|
252
|
+
status: Optional[str] = None,
|
253
|
+
user_id: Optional[str] = None,
|
254
|
+
limit: int = 100
|
255
|
+
) -> List[TrainingJobRecord]:
|
256
|
+
"""
|
257
|
+
List training jobs with optional filtering.
|
258
|
+
|
259
|
+
Args:
|
260
|
+
status: Filter by job status
|
261
|
+
user_id: Filter by user ID
|
262
|
+
limit: Maximum number of jobs to return
|
263
|
+
|
264
|
+
Returns:
|
265
|
+
List of training job records
|
266
|
+
"""
|
267
|
+
try:
|
268
|
+
if self.use_database:
|
269
|
+
return self._list_jobs_from_database(status, user_id, limit)
|
270
|
+
else:
|
271
|
+
return self._list_jobs_from_files(status, user_id, limit)
|
272
|
+
except Exception as e:
|
273
|
+
logger.error(f"Failed to list training jobs: {e}")
|
274
|
+
return []
|
275
|
+
|
276
|
+
def save_training_metrics(self, metrics: TrainingMetrics) -> bool:
|
277
|
+
"""
|
278
|
+
Save training metrics.
|
279
|
+
|
280
|
+
Args:
|
281
|
+
metrics: Training metrics to save
|
282
|
+
|
283
|
+
Returns:
|
284
|
+
True if successful
|
285
|
+
"""
|
286
|
+
try:
|
287
|
+
if self.use_database:
|
288
|
+
return self._save_metrics_to_database(metrics)
|
289
|
+
else:
|
290
|
+
return self._save_metrics_to_file(metrics)
|
291
|
+
except Exception as e:
|
292
|
+
logger.error(f"Failed to save training metrics for job {metrics.job_id}: {e}")
|
293
|
+
return False
|
294
|
+
|
295
|
+
def get_training_metrics(self, job_id: str) -> List[TrainingMetrics]:
|
296
|
+
"""
|
297
|
+
Get training metrics for a job.
|
298
|
+
|
299
|
+
Args:
|
300
|
+
job_id: Job ID to get metrics for
|
301
|
+
|
302
|
+
Returns:
|
303
|
+
List of training metrics
|
304
|
+
"""
|
305
|
+
try:
|
306
|
+
if self.use_database:
|
307
|
+
return self._get_metrics_from_database(job_id)
|
308
|
+
else:
|
309
|
+
return self._get_metrics_from_files(job_id)
|
310
|
+
except Exception as e:
|
311
|
+
logger.error(f"Failed to get training metrics for job {job_id}: {e}")
|
312
|
+
return []
|
313
|
+
|
314
|
+
def delete_training_job(self, job_id: str) -> bool:
|
315
|
+
"""
|
316
|
+
Delete training job and associated data.
|
317
|
+
|
318
|
+
Args:
|
319
|
+
job_id: Job ID to delete
|
320
|
+
|
321
|
+
Returns:
|
322
|
+
True if successful
|
323
|
+
"""
|
324
|
+
try:
|
325
|
+
if self.use_database:
|
326
|
+
return self._delete_job_from_database(job_id)
|
327
|
+
else:
|
328
|
+
return self._delete_job_from_files(job_id)
|
329
|
+
except Exception as e:
|
330
|
+
logger.error(f"Failed to delete training job {job_id}: {e}")
|
331
|
+
return False
|
332
|
+
|
333
|
+
# Database backend methods
|
334
|
+
def _save_job_to_database(self, job_record: TrainingJobRecord) -> bool:
|
335
|
+
"""Save job record to database."""
|
336
|
+
if not self.use_database:
|
337
|
+
return False
|
338
|
+
|
339
|
+
try:
|
340
|
+
client = self.db_client.get_client()
|
341
|
+
data = job_record.to_dict()
|
342
|
+
|
343
|
+
result = client.table("training_jobs").insert(data).execute()
|
344
|
+
return len(result.data) > 0
|
345
|
+
except Exception as e:
|
346
|
+
logger.error(f"Database save failed: {e}")
|
347
|
+
return False
|
348
|
+
|
349
|
+
def _get_job_from_database(self, job_id: str) -> Optional[TrainingJobRecord]:
|
350
|
+
"""Get job record from database."""
|
351
|
+
if not self.use_database:
|
352
|
+
return None
|
353
|
+
|
354
|
+
try:
|
355
|
+
client = self.db_client.get_client()
|
356
|
+
result = client.table("training_jobs").select("*").eq("job_id", job_id).execute()
|
357
|
+
|
358
|
+
if result.data:
|
359
|
+
return TrainingJobRecord.from_dict(result.data[0])
|
360
|
+
return None
|
361
|
+
except Exception as e:
|
362
|
+
logger.error(f"Database get failed: {e}")
|
363
|
+
return None
|
364
|
+
|
365
|
+
def _update_job_in_database(self, job_id: str, updates: Dict[str, Any]) -> bool:
|
366
|
+
"""Update job record in database."""
|
367
|
+
if not self.use_database:
|
368
|
+
return False
|
369
|
+
|
370
|
+
try:
|
371
|
+
client = self.db_client.get_client()
|
372
|
+
result = client.table("training_jobs").update(updates).eq("job_id", job_id).execute()
|
373
|
+
return len(result.data) > 0
|
374
|
+
except Exception as e:
|
375
|
+
logger.error(f"Database update failed: {e}")
|
376
|
+
return False
|
377
|
+
|
378
|
+
def _list_jobs_from_database(
|
379
|
+
self,
|
380
|
+
status: Optional[str],
|
381
|
+
user_id: Optional[str],
|
382
|
+
limit: int
|
383
|
+
) -> List[TrainingJobRecord]:
|
384
|
+
"""List job records from database."""
|
385
|
+
if not self.use_database:
|
386
|
+
return []
|
387
|
+
|
388
|
+
try:
|
389
|
+
client = self.db_client.get_client()
|
390
|
+
query = client.table("training_jobs").select("*")
|
391
|
+
|
392
|
+
if status:
|
393
|
+
query = query.eq("status", status)
|
394
|
+
if user_id:
|
395
|
+
query = query.eq("user_id", user_id)
|
396
|
+
|
397
|
+
query = query.order("created_at", desc=True).limit(limit)
|
398
|
+
result = query.execute()
|
399
|
+
|
400
|
+
return [TrainingJobRecord.from_dict(record) for record in result.data]
|
401
|
+
except Exception as e:
|
402
|
+
logger.error(f"Database list failed: {e}")
|
403
|
+
return []
|
404
|
+
|
405
|
+
def _save_metrics_to_database(self, metrics: TrainingMetrics) -> bool:
|
406
|
+
"""Save metrics to database."""
|
407
|
+
if not self.use_database:
|
408
|
+
return False
|
409
|
+
|
410
|
+
try:
|
411
|
+
client = self.db_client.get_client()
|
412
|
+
data = metrics.to_dict()
|
413
|
+
|
414
|
+
result = client.table("training_metrics").insert(data).execute()
|
415
|
+
return len(result.data) > 0
|
416
|
+
except Exception as e:
|
417
|
+
logger.error(f"Database metrics save failed: {e}")
|
418
|
+
return False
|
419
|
+
|
420
|
+
def _get_metrics_from_database(self, job_id: str) -> List[TrainingMetrics]:
|
421
|
+
"""Get metrics from database."""
|
422
|
+
if not self.use_database:
|
423
|
+
return []
|
424
|
+
|
425
|
+
try:
|
426
|
+
client = self.db_client.get_client()
|
427
|
+
result = client.table("training_metrics").select("*").eq("job_id", job_id).order("recorded_at").execute()
|
428
|
+
|
429
|
+
metrics_list = []
|
430
|
+
for record in result.data:
|
431
|
+
if isinstance(record['recorded_at'], str):
|
432
|
+
record['recorded_at'] = datetime.fromisoformat(record['recorded_at'])
|
433
|
+
metrics_list.append(TrainingMetrics(**record))
|
434
|
+
|
435
|
+
return metrics_list
|
436
|
+
except Exception as e:
|
437
|
+
logger.error(f"Database metrics get failed: {e}")
|
438
|
+
return []
|
439
|
+
|
440
|
+
def _delete_job_from_database(self, job_id: str) -> bool:
|
441
|
+
"""Delete job from database."""
|
442
|
+
if not self.use_database:
|
443
|
+
return False
|
444
|
+
|
445
|
+
try:
|
446
|
+
client = self.db_client.get_client()
|
447
|
+
|
448
|
+
# Delete metrics first
|
449
|
+
client.table("training_metrics").delete().eq("job_id", job_id).execute()
|
450
|
+
|
451
|
+
# Delete job record
|
452
|
+
result = client.table("training_jobs").delete().eq("job_id", job_id).execute()
|
453
|
+
return len(result.data) > 0
|
454
|
+
except Exception as e:
|
455
|
+
logger.error(f"Database delete failed: {e}")
|
456
|
+
return False
|
457
|
+
|
458
|
+
# File backend methods (fallback)
|
459
|
+
def _save_job_to_file(self, job_record: TrainingJobRecord) -> bool:
|
460
|
+
"""Save job record to local file."""
|
461
|
+
try:
|
462
|
+
job_file = self.storage_dir / "jobs" / f"{job_record.job_id}.json"
|
463
|
+
job_file.parent.mkdir(exist_ok=True)
|
464
|
+
|
465
|
+
with open(job_file, 'w') as f:
|
466
|
+
json.dump(job_record.to_dict(), f, indent=2, default=str)
|
467
|
+
|
468
|
+
return True
|
469
|
+
except Exception as e:
|
470
|
+
logger.error(f"File save failed: {e}")
|
471
|
+
return False
|
472
|
+
|
473
|
+
def _get_job_from_file(self, job_id: str) -> Optional[TrainingJobRecord]:
|
474
|
+
"""Get job record from local file."""
|
475
|
+
try:
|
476
|
+
job_file = self.storage_dir / "jobs" / f"{job_id}.json"
|
477
|
+
if not job_file.exists():
|
478
|
+
return None
|
479
|
+
|
480
|
+
with open(job_file, 'r') as f:
|
481
|
+
data = json.load(f)
|
482
|
+
|
483
|
+
return TrainingJobRecord.from_dict(data)
|
484
|
+
except Exception as e:
|
485
|
+
logger.error(f"File get failed: {e}")
|
486
|
+
return None
|
487
|
+
|
488
|
+
def _update_job_in_file(self, job_id: str, updates: Dict[str, Any]) -> bool:
|
489
|
+
"""Update job record in local file."""
|
490
|
+
try:
|
491
|
+
job_record = self._get_job_from_file(job_id)
|
492
|
+
if not job_record:
|
493
|
+
return False
|
494
|
+
|
495
|
+
# Update fields
|
496
|
+
for key, value in updates.items():
|
497
|
+
if hasattr(job_record, key):
|
498
|
+
setattr(job_record, key, value)
|
499
|
+
|
500
|
+
return self._save_job_to_file(job_record)
|
501
|
+
except Exception as e:
|
502
|
+
logger.error(f"File update failed: {e}")
|
503
|
+
return False
|
504
|
+
|
505
|
+
def _list_jobs_from_files(
|
506
|
+
self,
|
507
|
+
status: Optional[str],
|
508
|
+
user_id: Optional[str],
|
509
|
+
limit: int
|
510
|
+
) -> List[TrainingJobRecord]:
|
511
|
+
"""List job records from local files."""
|
512
|
+
try:
|
513
|
+
jobs_dir = self.storage_dir / "jobs"
|
514
|
+
if not jobs_dir.exists():
|
515
|
+
return []
|
516
|
+
|
517
|
+
jobs = []
|
518
|
+
for job_file in jobs_dir.glob("*.json"):
|
519
|
+
try:
|
520
|
+
with open(job_file, 'r') as f:
|
521
|
+
data = json.load(f)
|
522
|
+
|
523
|
+
job_record = TrainingJobRecord.from_dict(data)
|
524
|
+
|
525
|
+
# Apply filters
|
526
|
+
if status and job_record.status != status:
|
527
|
+
continue
|
528
|
+
if user_id and job_record.user_id != user_id:
|
529
|
+
continue
|
530
|
+
|
531
|
+
jobs.append(job_record)
|
532
|
+
except Exception as e:
|
533
|
+
logger.warning(f"Failed to load job file {job_file}: {e}")
|
534
|
+
continue
|
535
|
+
|
536
|
+
# Sort by creation time (newest first)
|
537
|
+
jobs.sort(key=lambda x: x.created_at, reverse=True)
|
538
|
+
|
539
|
+
return jobs[:limit]
|
540
|
+
except Exception as e:
|
541
|
+
logger.error(f"File list failed: {e}")
|
542
|
+
return []
|
543
|
+
|
544
|
+
def _save_metrics_to_file(self, metrics: TrainingMetrics) -> bool:
|
545
|
+
"""Save metrics to local file."""
|
546
|
+
try:
|
547
|
+
metrics_dir = self.storage_dir / "metrics" / metrics.job_id
|
548
|
+
metrics_dir.mkdir(parents=True, exist_ok=True)
|
549
|
+
|
550
|
+
# Use timestamp for unique filename
|
551
|
+
timestamp = metrics.recorded_at.strftime("%Y%m%d_%H%M%S_%f")
|
552
|
+
metrics_file = metrics_dir / f"metrics_{timestamp}.json"
|
553
|
+
|
554
|
+
with open(metrics_file, 'w') as f:
|
555
|
+
json.dump(metrics.to_dict(), f, indent=2, default=str)
|
556
|
+
|
557
|
+
return True
|
558
|
+
except Exception as e:
|
559
|
+
logger.error(f"File metrics save failed: {e}")
|
560
|
+
return False
|
561
|
+
|
562
|
+
def _get_metrics_from_files(self, job_id: str) -> List[TrainingMetrics]:
|
563
|
+
"""Get metrics from local files."""
|
564
|
+
try:
|
565
|
+
metrics_dir = self.storage_dir / "metrics" / job_id
|
566
|
+
if not metrics_dir.exists():
|
567
|
+
return []
|
568
|
+
|
569
|
+
metrics_list = []
|
570
|
+
for metrics_file in metrics_dir.glob("metrics_*.json"):
|
571
|
+
try:
|
572
|
+
with open(metrics_file, 'r') as f:
|
573
|
+
data = json.load(f)
|
574
|
+
|
575
|
+
if isinstance(data['recorded_at'], str):
|
576
|
+
data['recorded_at'] = datetime.fromisoformat(data['recorded_at'])
|
577
|
+
|
578
|
+
metrics_list.append(TrainingMetrics(**data))
|
579
|
+
except Exception as e:
|
580
|
+
logger.warning(f"Failed to load metrics file {metrics_file}: {e}")
|
581
|
+
continue
|
582
|
+
|
583
|
+
# Sort by recording time
|
584
|
+
metrics_list.sort(key=lambda x: x.recorded_at)
|
585
|
+
|
586
|
+
return metrics_list
|
587
|
+
except Exception as e:
|
588
|
+
logger.error(f"File metrics get failed: {e}")
|
589
|
+
return []
|
590
|
+
|
591
|
+
def _delete_job_from_files(self, job_id: str) -> bool:
|
592
|
+
"""Delete job from local files."""
|
593
|
+
try:
|
594
|
+
# Delete job file
|
595
|
+
job_file = self.storage_dir / "jobs" / f"{job_id}.json"
|
596
|
+
if job_file.exists():
|
597
|
+
job_file.unlink()
|
598
|
+
|
599
|
+
# Delete metrics directory
|
600
|
+
metrics_dir = self.storage_dir / "metrics" / job_id
|
601
|
+
if metrics_dir.exists():
|
602
|
+
import shutil
|
603
|
+
shutil.rmtree(metrics_dir)
|
604
|
+
|
605
|
+
return True
|
606
|
+
except Exception as e:
|
607
|
+
logger.error(f"File delete failed: {e}")
|
608
|
+
return False
|
609
|
+
|
610
|
+
def get_statistics(self) -> Dict[str, Any]:
|
611
|
+
"""Get storage statistics."""
|
612
|
+
try:
|
613
|
+
all_jobs = self.list_training_jobs(limit=1000)
|
614
|
+
|
615
|
+
total_jobs = len(all_jobs)
|
616
|
+
status_counts = {}
|
617
|
+
for job in all_jobs:
|
618
|
+
status_counts[job.status] = status_counts.get(job.status, 0) + 1
|
619
|
+
|
620
|
+
return {
|
621
|
+
"total_jobs": total_jobs,
|
622
|
+
"status_breakdown": status_counts,
|
623
|
+
"backend": "database" if self.use_database else "file",
|
624
|
+
"storage_available": SUPABASE_AVAILABLE
|
625
|
+
}
|
626
|
+
except Exception as e:
|
627
|
+
logger.error(f"Failed to get statistics: {e}")
|
628
|
+
return {"error": str(e)}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: isa_model
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Unified AI model serving framework
|
5
5
|
Author: isA_Model Contributors
|
6
6
|
Classifier: Development Status :: 3 - Alpha
|
@@ -37,6 +37,18 @@ Requires-Dist: trl>=0.4.0
|
|
37
37
|
Requires-Dist: supabase>=2.0.0
|
38
38
|
Requires-Dist: pgvector>=0.2.0
|
39
39
|
Requires-Dist: psycopg2-binary>=2.9.0
|
40
|
+
Requires-Dist: asyncpg>=0.28.0
|
41
|
+
Requires-Dist: slowapi>=0.1.8
|
42
|
+
Requires-Dist: redis>=4.5.0
|
43
|
+
Requires-Dist: circuitbreaker>=1.3.2
|
44
|
+
Requires-Dist: prometheus-fastapi-instrumentator>=6.1.0
|
45
|
+
Requires-Dist: structlog>=23.1.0
|
46
|
+
Provides-Extra: dev
|
47
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
48
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
49
|
+
Requires-Dist: flake8>=4.0.0; extra == "dev"
|
50
|
+
Requires-Dist: mypy>=0.991; extra == "dev"
|
51
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
40
52
|
|
41
53
|
# isa_model_sdk - Unified AI Model Serving Framework
|
42
54
|
|