isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/client.py +732 -565
- isa_model/core/cache/redis_cache.py +401 -0
- isa_model/core/config/config_manager.py +53 -10
- isa_model/core/config.py +1 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/migrations.py +277 -0
- isa_model/core/database/supabase_client.py +123 -0
- isa_model/core/models/__init__.py +37 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +36 -18
- isa_model/core/models/model_repo.py +44 -38
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +101 -370
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +7 -0
- isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
- isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
- isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/core/deployment_manager.py +6 -4
- isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
- isa_model/eval/benchmarks/__init__.py +27 -0
- isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
- isa_model/eval/benchmarks.py +244 -12
- isa_model/eval/evaluators/__init__.py +8 -2
- isa_model/eval/evaluators/audio_evaluator.py +727 -0
- isa_model/eval/evaluators/embedding_evaluator.py +742 -0
- isa_model/eval/evaluators/vision_evaluator.py +564 -0
- isa_model/eval/example_evaluation.py +395 -0
- isa_model/eval/factory.py +272 -5
- isa_model/eval/isa_benchmarks.py +700 -0
- isa_model/eval/isa_integration.py +582 -0
- isa_model/eval/metrics.py +159 -6
- isa_model/eval/tests/unit/test_basic.py +396 -0
- isa_model/inference/ai_factory.py +44 -8
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +32 -6
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/base_llm_service.py +30 -6
- isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
- isa_model/inference/services/llm/ollama_llm_service.py +2 -1
- isa_model/inference/services/llm/openai_llm_service.py +652 -55
- isa_model/inference/services/llm/yyds_llm_service.py +2 -1
- isa_model/inference/services/vision/__init__.py +5 -5
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/helpers/image_utils.py +11 -5
- isa_model/inference/services/vision/isa_vision_service.py +573 -0
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/serving/api/fastapi_server.py +88 -16
- isa_model/serving/api/middleware/auth.py +311 -0
- isa_model/serving/api/middleware/security.py +278 -0
- isa_model/serving/api/routes/analytics.py +486 -0
- isa_model/serving/api/routes/deployments.py +339 -0
- isa_model/serving/api/routes/evaluations.py +579 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/unified.py +324 -165
- isa_model/serving/api/startup.py +304 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/training/__init__.py +100 -6
- isa_model/training/core/__init__.py +4 -1
- isa_model/training/examples/intelligent_training_example.py +281 -0
- isa_model/training/intelligent/__init__.py +25 -0
- isa_model/training/intelligent/decision_engine.py +643 -0
- isa_model/training/intelligent/intelligent_factory.py +888 -0
- isa_model/training/intelligent/knowledge_base.py +751 -0
- isa_model/training/intelligent/resource_optimizer.py +839 -0
- isa_model/training/intelligent/task_classifier.py +576 -0
- isa_model/training/storage/__init__.py +24 -0
- isa_model/training/storage/core_integration.py +439 -0
- isa_model/training/storage/training_repository.py +552 -0
- isa_model/training/storage/training_storage.py +628 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
- isa_model-0.4.0.dist-info/RECORD +182 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model-0.3.9.dist-info/RECORD +0 -138
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,552 @@
|
|
1
|
+
"""
|
2
|
+
Training Repository
|
3
|
+
|
4
|
+
High-level repository pattern for training data access.
|
5
|
+
Provides a clean, unified interface for training data operations
|
6
|
+
with automatic core integration.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import logging
|
10
|
+
from typing import Dict, List, Optional, Any, Union
|
11
|
+
from datetime import datetime, timedelta
|
12
|
+
|
13
|
+
from .training_storage import TrainingStorage, TrainingJobRecord, TrainingMetrics
|
14
|
+
from .core_integration import CoreModelIntegration
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
class TrainingRepository:
|
20
|
+
"""
|
21
|
+
High-level repository for training data management.
|
22
|
+
|
23
|
+
Provides a unified interface for all training data operations
|
24
|
+
with automatic core integration and intelligent features.
|
25
|
+
|
26
|
+
Example:
|
27
|
+
```python
|
28
|
+
repo = TrainingRepository()
|
29
|
+
|
30
|
+
# Create and track training job
|
31
|
+
job_id = repo.create_training_job(
|
32
|
+
job_name="medical_chatbot_training",
|
33
|
+
base_model="google/gemma-2-4b-it",
|
34
|
+
task_type="chat",
|
35
|
+
domain="medical",
|
36
|
+
dataset_source="medical_qa.json",
|
37
|
+
training_config={"epochs": 3},
|
38
|
+
user_id="user_123"
|
39
|
+
)
|
40
|
+
|
41
|
+
# Update job status
|
42
|
+
repo.update_job_status(job_id, "running")
|
43
|
+
|
44
|
+
# Record training metrics
|
45
|
+
repo.record_metrics(job_id, {
|
46
|
+
"epoch": 1,
|
47
|
+
"training_loss": 0.5,
|
48
|
+
"validation_loss": 0.6
|
49
|
+
})
|
50
|
+
|
51
|
+
# Complete training and register model
|
52
|
+
repo.complete_training(
|
53
|
+
job_id,
|
54
|
+
model_path="/path/to/model",
|
55
|
+
final_metrics={"accuracy": 0.95}
|
56
|
+
)
|
57
|
+
```
|
58
|
+
"""
|
59
|
+
|
60
|
+
def __init__(self,
|
61
|
+
storage: Optional[TrainingStorage] = None,
|
62
|
+
core_integration: Optional[CoreModelIntegration] = None):
|
63
|
+
"""
|
64
|
+
Initialize training repository.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
storage: Training storage backend
|
68
|
+
core_integration: Core model integration
|
69
|
+
"""
|
70
|
+
self.storage = storage or TrainingStorage()
|
71
|
+
self.core_integration = core_integration or CoreModelIntegration(
|
72
|
+
training_storage=self.storage
|
73
|
+
)
|
74
|
+
|
75
|
+
logger.info("Training repository initialized")
|
76
|
+
|
77
|
+
def create_training_job(
|
78
|
+
self,
|
79
|
+
job_name: str,
|
80
|
+
base_model: str,
|
81
|
+
task_type: str,
|
82
|
+
domain: str,
|
83
|
+
dataset_source: str,
|
84
|
+
training_config: Dict[str, Any],
|
85
|
+
resource_config: Optional[Dict[str, Any]] = None,
|
86
|
+
user_id: Optional[str] = None,
|
87
|
+
project_name: Optional[str] = None,
|
88
|
+
tags: Optional[Dict[str, str]] = None
|
89
|
+
) -> str:
|
90
|
+
"""
|
91
|
+
Create a new training job record.
|
92
|
+
|
93
|
+
Args:
|
94
|
+
job_name: Human-readable job name
|
95
|
+
base_model: Base model identifier
|
96
|
+
task_type: Type of training task
|
97
|
+
domain: Application domain
|
98
|
+
dataset_source: Dataset source path or identifier
|
99
|
+
training_config: Training configuration parameters
|
100
|
+
resource_config: Resource configuration (GPU, cloud provider, etc.)
|
101
|
+
user_id: User identifier
|
102
|
+
project_name: Project name
|
103
|
+
tags: Additional tags
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
Job ID of created training job
|
107
|
+
"""
|
108
|
+
try:
|
109
|
+
import uuid
|
110
|
+
|
111
|
+
job_id = f"training_{uuid.uuid4().hex[:8]}"
|
112
|
+
|
113
|
+
job_record = TrainingJobRecord(
|
114
|
+
job_id=job_id,
|
115
|
+
job_name=job_name,
|
116
|
+
status="pending",
|
117
|
+
base_model=base_model,
|
118
|
+
task_type=task_type,
|
119
|
+
domain=domain,
|
120
|
+
dataset_source=dataset_source,
|
121
|
+
training_config=training_config,
|
122
|
+
resource_config=resource_config or {},
|
123
|
+
user_id=user_id,
|
124
|
+
project_name=project_name,
|
125
|
+
tags=tags or {}
|
126
|
+
)
|
127
|
+
|
128
|
+
success = self.storage.save_training_job(job_record)
|
129
|
+
|
130
|
+
if success:
|
131
|
+
logger.info(f"Created training job: {job_id} ({job_name})")
|
132
|
+
return job_id
|
133
|
+
else:
|
134
|
+
raise Exception("Failed to save training job")
|
135
|
+
|
136
|
+
except Exception as e:
|
137
|
+
logger.error(f"Failed to create training job: {e}")
|
138
|
+
raise
|
139
|
+
|
140
|
+
def update_job_status(
|
141
|
+
self,
|
142
|
+
job_id: str,
|
143
|
+
status: str,
|
144
|
+
error_message: Optional[str] = None,
|
145
|
+
additional_updates: Optional[Dict[str, Any]] = None
|
146
|
+
) -> bool:
|
147
|
+
"""
|
148
|
+
Update training job status.
|
149
|
+
|
150
|
+
Args:
|
151
|
+
job_id: Job ID to update
|
152
|
+
status: New status ("pending", "running", "completed", "failed", "cancelled")
|
153
|
+
error_message: Error message if failed
|
154
|
+
additional_updates: Additional fields to update
|
155
|
+
|
156
|
+
Returns:
|
157
|
+
True if successful
|
158
|
+
"""
|
159
|
+
try:
|
160
|
+
updates = {"status": status}
|
161
|
+
|
162
|
+
if status == "running" and not additional_updates or "started_at" not in additional_updates:
|
163
|
+
updates["started_at"] = datetime.now()
|
164
|
+
elif status in ["completed", "failed", "cancelled"]:
|
165
|
+
updates["completed_at"] = datetime.now()
|
166
|
+
|
167
|
+
if error_message:
|
168
|
+
updates["error_message"] = error_message
|
169
|
+
|
170
|
+
if additional_updates:
|
171
|
+
updates.update(additional_updates)
|
172
|
+
|
173
|
+
success = self.storage.update_training_job(job_id, updates)
|
174
|
+
|
175
|
+
if success:
|
176
|
+
logger.info(f"Updated job {job_id} status to: {status}")
|
177
|
+
|
178
|
+
return success
|
179
|
+
|
180
|
+
except Exception as e:
|
181
|
+
logger.error(f"Failed to update job status for {job_id}: {e}")
|
182
|
+
return False
|
183
|
+
|
184
|
+
def record_metrics(
|
185
|
+
self,
|
186
|
+
job_id: str,
|
187
|
+
metrics_data: Dict[str, Any]
|
188
|
+
) -> bool:
|
189
|
+
"""
|
190
|
+
Record training metrics for a job.
|
191
|
+
|
192
|
+
Args:
|
193
|
+
job_id: Job ID
|
194
|
+
metrics_data: Metrics data dictionary
|
195
|
+
|
196
|
+
Returns:
|
197
|
+
True if successful
|
198
|
+
"""
|
199
|
+
try:
|
200
|
+
# Create TrainingMetrics object
|
201
|
+
metrics = TrainingMetrics(
|
202
|
+
job_id=job_id,
|
203
|
+
epoch=metrics_data.get("epoch", 0),
|
204
|
+
step=metrics_data.get("step", 0),
|
205
|
+
total_steps=metrics_data.get("total_steps", 0),
|
206
|
+
training_loss=metrics_data.get("training_loss"),
|
207
|
+
validation_loss=metrics_data.get("validation_loss"),
|
208
|
+
perplexity=metrics_data.get("perplexity"),
|
209
|
+
accuracy=metrics_data.get("accuracy"),
|
210
|
+
f1_score=metrics_data.get("f1_score"),
|
211
|
+
bleu_score=metrics_data.get("bleu_score"),
|
212
|
+
rouge_score=metrics_data.get("rouge_score"),
|
213
|
+
gpu_utilization=metrics_data.get("gpu_utilization"),
|
214
|
+
memory_usage=metrics_data.get("memory_usage"),
|
215
|
+
epoch_time=metrics_data.get("epoch_time"),
|
216
|
+
samples_per_second=metrics_data.get("samples_per_second"),
|
217
|
+
custom_metrics=metrics_data.get("custom_metrics", {})
|
218
|
+
)
|
219
|
+
|
220
|
+
success = self.storage.save_training_metrics(metrics)
|
221
|
+
|
222
|
+
if success:
|
223
|
+
logger.debug(f"Recorded metrics for job {job_id}")
|
224
|
+
|
225
|
+
return success
|
226
|
+
|
227
|
+
except Exception as e:
|
228
|
+
logger.error(f"Failed to record metrics for job {job_id}: {e}")
|
229
|
+
return False
|
230
|
+
|
231
|
+
def complete_training(
|
232
|
+
self,
|
233
|
+
job_id: str,
|
234
|
+
model_path: str,
|
235
|
+
final_metrics: Optional[Dict[str, Any]] = None,
|
236
|
+
cost_breakdown: Optional[Dict[str, float]] = None
|
237
|
+
) -> Optional[str]:
|
238
|
+
"""
|
239
|
+
Complete training and register the trained model.
|
240
|
+
|
241
|
+
Args:
|
242
|
+
job_id: Job ID
|
243
|
+
model_path: Path to the trained model
|
244
|
+
final_metrics: Final performance metrics
|
245
|
+
cost_breakdown: Training cost breakdown
|
246
|
+
|
247
|
+
Returns:
|
248
|
+
Core model ID if successful, None otherwise
|
249
|
+
"""
|
250
|
+
try:
|
251
|
+
# Update job status to completed
|
252
|
+
updates = {
|
253
|
+
"status": "completed",
|
254
|
+
"completed_at": datetime.now(),
|
255
|
+
"output_model_path": model_path
|
256
|
+
}
|
257
|
+
|
258
|
+
if final_metrics:
|
259
|
+
updates["training_metrics"] = final_metrics
|
260
|
+
|
261
|
+
if cost_breakdown:
|
262
|
+
updates["cost_breakdown"] = cost_breakdown
|
263
|
+
|
264
|
+
success = self.storage.update_training_job(job_id, updates)
|
265
|
+
|
266
|
+
if not success:
|
267
|
+
logger.error(f"Failed to update job {job_id} as completed")
|
268
|
+
return None
|
269
|
+
|
270
|
+
# Get updated job record
|
271
|
+
job_record = self.storage.get_training_job(job_id)
|
272
|
+
if not job_record:
|
273
|
+
logger.error(f"Failed to retrieve completed job record {job_id}")
|
274
|
+
return None
|
275
|
+
|
276
|
+
# Register model in core system
|
277
|
+
core_model_id = self.core_integration.register_trained_model(
|
278
|
+
job_record=job_record,
|
279
|
+
model_path=model_path,
|
280
|
+
performance_metrics=final_metrics
|
281
|
+
)
|
282
|
+
|
283
|
+
if core_model_id:
|
284
|
+
logger.info(f"Training completed and model registered: {core_model_id}")
|
285
|
+
|
286
|
+
return core_model_id
|
287
|
+
|
288
|
+
except Exception as e:
|
289
|
+
logger.error(f"Failed to complete training for job {job_id}: {e}")
|
290
|
+
return None
|
291
|
+
|
292
|
+
def get_job(self, job_id: str) -> Optional[TrainingJobRecord]:
|
293
|
+
"""Get training job by ID."""
|
294
|
+
return self.storage.get_training_job(job_id)
|
295
|
+
|
296
|
+
def list_jobs(
|
297
|
+
self,
|
298
|
+
status: Optional[str] = None,
|
299
|
+
user_id: Optional[str] = None,
|
300
|
+
project_name: Optional[str] = None,
|
301
|
+
limit: int = 100
|
302
|
+
) -> List[TrainingJobRecord]:
|
303
|
+
"""
|
304
|
+
List training jobs with filtering.
|
305
|
+
|
306
|
+
Args:
|
307
|
+
status: Filter by status
|
308
|
+
user_id: Filter by user ID
|
309
|
+
project_name: Filter by project name
|
310
|
+
limit: Maximum number of jobs
|
311
|
+
|
312
|
+
Returns:
|
313
|
+
List of training job records
|
314
|
+
"""
|
315
|
+
jobs = self.storage.list_training_jobs(status=status, user_id=user_id, limit=limit)
|
316
|
+
|
317
|
+
# Additional filtering for project_name
|
318
|
+
if project_name:
|
319
|
+
jobs = [job for job in jobs if job.project_name == project_name]
|
320
|
+
|
321
|
+
return jobs
|
322
|
+
|
323
|
+
def get_job_metrics(self, job_id: str) -> List[TrainingMetrics]:
|
324
|
+
"""Get all metrics for a training job."""
|
325
|
+
return self.storage.get_training_metrics(job_id)
|
326
|
+
|
327
|
+
def get_job_progress(self, job_id: str) -> Optional[Dict[str, Any]]:
|
328
|
+
"""
|
329
|
+
Get training job progress summary.
|
330
|
+
|
331
|
+
Args:
|
332
|
+
job_id: Job ID
|
333
|
+
|
334
|
+
Returns:
|
335
|
+
Progress summary with latest metrics
|
336
|
+
"""
|
337
|
+
try:
|
338
|
+
job = self.get_job(job_id)
|
339
|
+
if not job:
|
340
|
+
return None
|
341
|
+
|
342
|
+
metrics_list = self.get_job_metrics(job_id)
|
343
|
+
latest_metrics = metrics_list[-1] if metrics_list else None
|
344
|
+
|
345
|
+
progress = {
|
346
|
+
"job_id": job_id,
|
347
|
+
"job_name": job.job_name,
|
348
|
+
"status": job.status,
|
349
|
+
"created_at": job.created_at.isoformat(),
|
350
|
+
"started_at": job.started_at.isoformat() if job.started_at else None,
|
351
|
+
"completed_at": job.completed_at.isoformat() if job.completed_at else None
|
352
|
+
}
|
353
|
+
|
354
|
+
if latest_metrics:
|
355
|
+
progress.update({
|
356
|
+
"current_epoch": latest_metrics.epoch,
|
357
|
+
"current_step": latest_metrics.step,
|
358
|
+
"total_steps": latest_metrics.total_steps,
|
359
|
+
"progress_percentage": (latest_metrics.step / latest_metrics.total_steps * 100) if latest_metrics.total_steps > 0 else 0,
|
360
|
+
"latest_loss": latest_metrics.training_loss,
|
361
|
+
"latest_validation_loss": latest_metrics.validation_loss
|
362
|
+
})
|
363
|
+
|
364
|
+
# Calculate duration
|
365
|
+
if job.started_at:
|
366
|
+
end_time = job.completed_at or datetime.now()
|
367
|
+
duration = end_time - job.started_at
|
368
|
+
progress["duration_seconds"] = duration.total_seconds()
|
369
|
+
progress["duration_formatted"] = str(duration).split(".")[0] # Remove microseconds
|
370
|
+
|
371
|
+
return progress
|
372
|
+
|
373
|
+
except Exception as e:
|
374
|
+
logger.error(f"Failed to get job progress for {job_id}: {e}")
|
375
|
+
return None
|
376
|
+
|
377
|
+
def delete_job(self, job_id: str) -> bool:
|
378
|
+
"""Delete training job and all associated data."""
|
379
|
+
try:
|
380
|
+
success = self.storage.delete_training_job(job_id)
|
381
|
+
|
382
|
+
if success:
|
383
|
+
logger.info(f"Deleted training job: {job_id}")
|
384
|
+
|
385
|
+
return success
|
386
|
+
|
387
|
+
except Exception as e:
|
388
|
+
logger.error(f"Failed to delete job {job_id}: {e}")
|
389
|
+
return False
|
390
|
+
|
391
|
+
def get_user_statistics(self, user_id: str) -> Dict[str, Any]:
|
392
|
+
"""
|
393
|
+
Get training statistics for a specific user.
|
394
|
+
|
395
|
+
Args:
|
396
|
+
user_id: User identifier
|
397
|
+
|
398
|
+
Returns:
|
399
|
+
User training statistics
|
400
|
+
"""
|
401
|
+
try:
|
402
|
+
user_jobs = self.list_jobs(user_id=user_id, limit=1000)
|
403
|
+
|
404
|
+
total_jobs = len(user_jobs)
|
405
|
+
status_counts = {}
|
406
|
+
total_cost = 0.0
|
407
|
+
total_duration = timedelta()
|
408
|
+
|
409
|
+
for job in user_jobs:
|
410
|
+
# Count by status
|
411
|
+
status_counts[job.status] = status_counts.get(job.status, 0) + 1
|
412
|
+
|
413
|
+
# Sum costs
|
414
|
+
if job.cost_breakdown:
|
415
|
+
total_cost += sum(job.cost_breakdown.values())
|
416
|
+
|
417
|
+
# Sum duration
|
418
|
+
if job.started_at and job.completed_at:
|
419
|
+
total_duration += (job.completed_at - job.started_at)
|
420
|
+
|
421
|
+
return {
|
422
|
+
"user_id": user_id,
|
423
|
+
"total_jobs": total_jobs,
|
424
|
+
"status_breakdown": status_counts,
|
425
|
+
"total_cost_usd": total_cost,
|
426
|
+
"total_training_time": str(total_duration).split(".")[0],
|
427
|
+
"average_cost_per_job": total_cost / total_jobs if total_jobs > 0 else 0,
|
428
|
+
"success_rate": status_counts.get("completed", 0) / total_jobs if total_jobs > 0 else 0
|
429
|
+
}
|
430
|
+
|
431
|
+
except Exception as e:
|
432
|
+
logger.error(f"Failed to get user statistics for {user_id}: {e}")
|
433
|
+
return {"error": str(e)}
|
434
|
+
|
435
|
+
def get_recent_activity(self, days: int = 7, limit: int = 50) -> List[Dict[str, Any]]:
|
436
|
+
"""
|
437
|
+
Get recent training activity.
|
438
|
+
|
439
|
+
Args:
|
440
|
+
days: Number of days to look back
|
441
|
+
limit: Maximum number of activities
|
442
|
+
|
443
|
+
Returns:
|
444
|
+
List of recent activities
|
445
|
+
"""
|
446
|
+
try:
|
447
|
+
cutoff_date = datetime.now() - timedelta(days=days)
|
448
|
+
|
449
|
+
all_jobs = self.list_jobs(limit=limit * 2) # Get more to filter by date
|
450
|
+
|
451
|
+
recent_jobs = [
|
452
|
+
job for job in all_jobs
|
453
|
+
if job.created_at >= cutoff_date
|
454
|
+
][:limit]
|
455
|
+
|
456
|
+
activities = []
|
457
|
+
for job in recent_jobs:
|
458
|
+
activity = {
|
459
|
+
"job_id": job.job_id,
|
460
|
+
"job_name": job.job_name,
|
461
|
+
"status": job.status,
|
462
|
+
"base_model": job.base_model,
|
463
|
+
"task_type": job.task_type,
|
464
|
+
"domain": job.domain,
|
465
|
+
"created_at": job.created_at.isoformat(),
|
466
|
+
"user_id": job.user_id
|
467
|
+
}
|
468
|
+
|
469
|
+
if job.completed_at:
|
470
|
+
activity["completed_at"] = job.completed_at.isoformat()
|
471
|
+
|
472
|
+
if job.cost_breakdown:
|
473
|
+
activity["total_cost"] = sum(job.cost_breakdown.values())
|
474
|
+
|
475
|
+
activities.append(activity)
|
476
|
+
|
477
|
+
return activities
|
478
|
+
|
479
|
+
except Exception as e:
|
480
|
+
logger.error(f"Failed to get recent activity: {e}")
|
481
|
+
return []
|
482
|
+
|
483
|
+
def cleanup_old_jobs(self, days: int = 30, dry_run: bool = True) -> Dict[str, Any]:
|
484
|
+
"""
|
485
|
+
Cleanup old training jobs.
|
486
|
+
|
487
|
+
Args:
|
488
|
+
days: Delete jobs older than this many days
|
489
|
+
dry_run: If True, only return what would be deleted
|
490
|
+
|
491
|
+
Returns:
|
492
|
+
Cleanup summary
|
493
|
+
"""
|
494
|
+
try:
|
495
|
+
cutoff_date = datetime.now() - timedelta(days=days)
|
496
|
+
|
497
|
+
all_jobs = self.list_jobs(limit=1000)
|
498
|
+
old_jobs = [
|
499
|
+
job for job in all_jobs
|
500
|
+
if job.created_at < cutoff_date and job.status in ["completed", "failed", "cancelled"]
|
501
|
+
]
|
502
|
+
|
503
|
+
summary = {
|
504
|
+
"total_jobs_found": len(old_jobs),
|
505
|
+
"cutoff_date": cutoff_date.isoformat(),
|
506
|
+
"dry_run": dry_run,
|
507
|
+
"deleted_jobs": []
|
508
|
+
}
|
509
|
+
|
510
|
+
if not dry_run:
|
511
|
+
deleted_count = 0
|
512
|
+
for job in old_jobs:
|
513
|
+
if self.delete_job(job.job_id):
|
514
|
+
deleted_count += 1
|
515
|
+
summary["deleted_jobs"].append({
|
516
|
+
"job_id": job.job_id,
|
517
|
+
"job_name": job.job_name,
|
518
|
+
"created_at": job.created_at.isoformat()
|
519
|
+
})
|
520
|
+
|
521
|
+
summary["deleted_count"] = deleted_count
|
522
|
+
else:
|
523
|
+
summary["would_delete"] = [
|
524
|
+
{
|
525
|
+
"job_id": job.job_id,
|
526
|
+
"job_name": job.job_name,
|
527
|
+
"created_at": job.created_at.isoformat()
|
528
|
+
}
|
529
|
+
for job in old_jobs
|
530
|
+
]
|
531
|
+
|
532
|
+
return summary
|
533
|
+
|
534
|
+
except Exception as e:
|
535
|
+
logger.error(f"Failed to cleanup old jobs: {e}")
|
536
|
+
return {"error": str(e)}
|
537
|
+
|
538
|
+
def get_repository_statistics(self) -> Dict[str, Any]:
|
539
|
+
"""Get overall repository statistics."""
|
540
|
+
try:
|
541
|
+
storage_stats = self.storage.get_statistics()
|
542
|
+
integration_status = self.core_integration.get_integration_status()
|
543
|
+
|
544
|
+
return {
|
545
|
+
"storage": storage_stats,
|
546
|
+
"core_integration": integration_status,
|
547
|
+
"repository_version": "1.0.0"
|
548
|
+
}
|
549
|
+
|
550
|
+
except Exception as e:
|
551
|
+
logger.error(f"Failed to get repository statistics: {e}")
|
552
|
+
return {"error": str(e)}
|