isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/client.py +732 -565
  3. isa_model/core/cache/redis_cache.py +401 -0
  4. isa_model/core/config/config_manager.py +53 -10
  5. isa_model/core/config.py +1 -1
  6. isa_model/core/database/__init__.py +1 -0
  7. isa_model/core/database/migrations.py +277 -0
  8. isa_model/core/database/supabase_client.py +123 -0
  9. isa_model/core/models/__init__.py +37 -0
  10. isa_model/core/models/model_billing_tracker.py +60 -88
  11. isa_model/core/models/model_manager.py +36 -18
  12. isa_model/core/models/model_repo.py +44 -38
  13. isa_model/core/models/model_statistics_tracker.py +234 -0
  14. isa_model/core/models/model_storage.py +0 -1
  15. isa_model/core/models/model_version_manager.py +959 -0
  16. isa_model/core/pricing_manager.py +2 -249
  17. isa_model/core/resilience/circuit_breaker.py +366 -0
  18. isa_model/core/security/secrets.py +358 -0
  19. isa_model/core/services/__init__.py +2 -4
  20. isa_model/core/services/intelligent_model_selector.py +101 -370
  21. isa_model/core/storage/hf_storage.py +1 -1
  22. isa_model/core/types.py +7 -0
  23. isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
  24. isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
  25. isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
  26. isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
  27. isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
  28. isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
  29. isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
  30. isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
  31. isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
  32. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
  33. isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
  34. isa_model/deployment/core/deployment_manager.py +6 -4
  35. isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
  36. isa_model/eval/benchmarks/__init__.py +27 -0
  37. isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
  38. isa_model/eval/benchmarks.py +244 -12
  39. isa_model/eval/evaluators/__init__.py +8 -2
  40. isa_model/eval/evaluators/audio_evaluator.py +727 -0
  41. isa_model/eval/evaluators/embedding_evaluator.py +742 -0
  42. isa_model/eval/evaluators/vision_evaluator.py +564 -0
  43. isa_model/eval/example_evaluation.py +395 -0
  44. isa_model/eval/factory.py +272 -5
  45. isa_model/eval/isa_benchmarks.py +700 -0
  46. isa_model/eval/isa_integration.py +582 -0
  47. isa_model/eval/metrics.py +159 -6
  48. isa_model/eval/tests/unit/test_basic.py +396 -0
  49. isa_model/inference/ai_factory.py +44 -8
  50. isa_model/inference/services/audio/__init__.py +21 -0
  51. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  52. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  53. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  54. isa_model/inference/services/audio/openai_stt_service.py +32 -6
  55. isa_model/inference/services/base_service.py +17 -1
  56. isa_model/inference/services/embedding/__init__.py +13 -0
  57. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  58. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  59. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  60. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  61. isa_model/inference/services/img/__init__.py +2 -2
  62. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  63. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  64. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  65. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  66. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  67. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  68. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  69. isa_model/inference/services/llm/base_llm_service.py +30 -6
  70. isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
  71. isa_model/inference/services/llm/ollama_llm_service.py +2 -1
  72. isa_model/inference/services/llm/openai_llm_service.py +652 -55
  73. isa_model/inference/services/llm/yyds_llm_service.py +2 -1
  74. isa_model/inference/services/vision/__init__.py +5 -5
  75. isa_model/inference/services/vision/base_vision_service.py +118 -185
  76. isa_model/inference/services/vision/helpers/image_utils.py +11 -5
  77. isa_model/inference/services/vision/isa_vision_service.py +573 -0
  78. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  79. isa_model/serving/api/fastapi_server.py +88 -16
  80. isa_model/serving/api/middleware/auth.py +311 -0
  81. isa_model/serving/api/middleware/security.py +278 -0
  82. isa_model/serving/api/routes/analytics.py +486 -0
  83. isa_model/serving/api/routes/deployments.py +339 -0
  84. isa_model/serving/api/routes/evaluations.py +579 -0
  85. isa_model/serving/api/routes/logs.py +430 -0
  86. isa_model/serving/api/routes/settings.py +582 -0
  87. isa_model/serving/api/routes/unified.py +324 -165
  88. isa_model/serving/api/startup.py +304 -0
  89. isa_model/serving/modal_proxy_server.py +249 -0
  90. isa_model/training/__init__.py +100 -6
  91. isa_model/training/core/__init__.py +4 -1
  92. isa_model/training/examples/intelligent_training_example.py +281 -0
  93. isa_model/training/intelligent/__init__.py +25 -0
  94. isa_model/training/intelligent/decision_engine.py +643 -0
  95. isa_model/training/intelligent/intelligent_factory.py +888 -0
  96. isa_model/training/intelligent/knowledge_base.py +751 -0
  97. isa_model/training/intelligent/resource_optimizer.py +839 -0
  98. isa_model/training/intelligent/task_classifier.py +576 -0
  99. isa_model/training/storage/__init__.py +24 -0
  100. isa_model/training/storage/core_integration.py +439 -0
  101. isa_model/training/storage/training_repository.py +552 -0
  102. isa_model/training/storage/training_storage.py +628 -0
  103. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
  104. isa_model-0.4.0.dist-info/RECORD +182 -0
  105. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  106. isa_model/deployment/cloud/modal/register_models.py +0 -321
  107. isa_model/inference/adapter/unified_api.py +0 -248
  108. isa_model/inference/services/helpers/stacked_config.py +0 -148
  109. isa_model/inference/services/img/flux_professional_service.py +0 -603
  110. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  111. isa_model/inference/services/others/table_transformer_service.py +0 -61
  112. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  113. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  114. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  115. isa_model/scripts/inference_tracker.py +0 -283
  116. isa_model/scripts/mlflow_manager.py +0 -379
  117. isa_model/scripts/model_registry.py +0 -465
  118. isa_model/scripts/register_models.py +0 -370
  119. isa_model/scripts/register_models_with_embeddings.py +0 -510
  120. isa_model/scripts/start_mlflow.py +0 -95
  121. isa_model/scripts/training_tracker.py +0 -257
  122. isa_model-0.3.9.dist-info/RECORD +0 -138
  123. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
  124. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,552 @@
1
+ """
2
+ Training Repository
3
+
4
+ High-level repository pattern for training data access.
5
+ Provides a clean, unified interface for training data operations
6
+ with automatic core integration.
7
+ """
8
+
9
+ import logging
10
+ from typing import Dict, List, Optional, Any, Union
11
+ from datetime import datetime, timedelta
12
+
13
+ from .training_storage import TrainingStorage, TrainingJobRecord, TrainingMetrics
14
+ from .core_integration import CoreModelIntegration
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class TrainingRepository:
20
+ """
21
+ High-level repository for training data management.
22
+
23
+ Provides a unified interface for all training data operations
24
+ with automatic core integration and intelligent features.
25
+
26
+ Example:
27
+ ```python
28
+ repo = TrainingRepository()
29
+
30
+ # Create and track training job
31
+ job_id = repo.create_training_job(
32
+ job_name="medical_chatbot_training",
33
+ base_model="google/gemma-2-4b-it",
34
+ task_type="chat",
35
+ domain="medical",
36
+ dataset_source="medical_qa.json",
37
+ training_config={"epochs": 3},
38
+ user_id="user_123"
39
+ )
40
+
41
+ # Update job status
42
+ repo.update_job_status(job_id, "running")
43
+
44
+ # Record training metrics
45
+ repo.record_metrics(job_id, {
46
+ "epoch": 1,
47
+ "training_loss": 0.5,
48
+ "validation_loss": 0.6
49
+ })
50
+
51
+ # Complete training and register model
52
+ repo.complete_training(
53
+ job_id,
54
+ model_path="/path/to/model",
55
+ final_metrics={"accuracy": 0.95}
56
+ )
57
+ ```
58
+ """
59
+
60
+ def __init__(self,
61
+ storage: Optional[TrainingStorage] = None,
62
+ core_integration: Optional[CoreModelIntegration] = None):
63
+ """
64
+ Initialize training repository.
65
+
66
+ Args:
67
+ storage: Training storage backend
68
+ core_integration: Core model integration
69
+ """
70
+ self.storage = storage or TrainingStorage()
71
+ self.core_integration = core_integration or CoreModelIntegration(
72
+ training_storage=self.storage
73
+ )
74
+
75
+ logger.info("Training repository initialized")
76
+
77
+ def create_training_job(
78
+ self,
79
+ job_name: str,
80
+ base_model: str,
81
+ task_type: str,
82
+ domain: str,
83
+ dataset_source: str,
84
+ training_config: Dict[str, Any],
85
+ resource_config: Optional[Dict[str, Any]] = None,
86
+ user_id: Optional[str] = None,
87
+ project_name: Optional[str] = None,
88
+ tags: Optional[Dict[str, str]] = None
89
+ ) -> str:
90
+ """
91
+ Create a new training job record.
92
+
93
+ Args:
94
+ job_name: Human-readable job name
95
+ base_model: Base model identifier
96
+ task_type: Type of training task
97
+ domain: Application domain
98
+ dataset_source: Dataset source path or identifier
99
+ training_config: Training configuration parameters
100
+ resource_config: Resource configuration (GPU, cloud provider, etc.)
101
+ user_id: User identifier
102
+ project_name: Project name
103
+ tags: Additional tags
104
+
105
+ Returns:
106
+ Job ID of created training job
107
+ """
108
+ try:
109
+ import uuid
110
+
111
+ job_id = f"training_{uuid.uuid4().hex[:8]}"
112
+
113
+ job_record = TrainingJobRecord(
114
+ job_id=job_id,
115
+ job_name=job_name,
116
+ status="pending",
117
+ base_model=base_model,
118
+ task_type=task_type,
119
+ domain=domain,
120
+ dataset_source=dataset_source,
121
+ training_config=training_config,
122
+ resource_config=resource_config or {},
123
+ user_id=user_id,
124
+ project_name=project_name,
125
+ tags=tags or {}
126
+ )
127
+
128
+ success = self.storage.save_training_job(job_record)
129
+
130
+ if success:
131
+ logger.info(f"Created training job: {job_id} ({job_name})")
132
+ return job_id
133
+ else:
134
+ raise Exception("Failed to save training job")
135
+
136
+ except Exception as e:
137
+ logger.error(f"Failed to create training job: {e}")
138
+ raise
139
+
140
+ def update_job_status(
141
+ self,
142
+ job_id: str,
143
+ status: str,
144
+ error_message: Optional[str] = None,
145
+ additional_updates: Optional[Dict[str, Any]] = None
146
+ ) -> bool:
147
+ """
148
+ Update training job status.
149
+
150
+ Args:
151
+ job_id: Job ID to update
152
+ status: New status ("pending", "running", "completed", "failed", "cancelled")
153
+ error_message: Error message if failed
154
+ additional_updates: Additional fields to update
155
+
156
+ Returns:
157
+ True if successful
158
+ """
159
+ try:
160
+ updates = {"status": status}
161
+
162
+ if status == "running" and not additional_updates or "started_at" not in additional_updates:
163
+ updates["started_at"] = datetime.now()
164
+ elif status in ["completed", "failed", "cancelled"]:
165
+ updates["completed_at"] = datetime.now()
166
+
167
+ if error_message:
168
+ updates["error_message"] = error_message
169
+
170
+ if additional_updates:
171
+ updates.update(additional_updates)
172
+
173
+ success = self.storage.update_training_job(job_id, updates)
174
+
175
+ if success:
176
+ logger.info(f"Updated job {job_id} status to: {status}")
177
+
178
+ return success
179
+
180
+ except Exception as e:
181
+ logger.error(f"Failed to update job status for {job_id}: {e}")
182
+ return False
183
+
184
+ def record_metrics(
185
+ self,
186
+ job_id: str,
187
+ metrics_data: Dict[str, Any]
188
+ ) -> bool:
189
+ """
190
+ Record training metrics for a job.
191
+
192
+ Args:
193
+ job_id: Job ID
194
+ metrics_data: Metrics data dictionary
195
+
196
+ Returns:
197
+ True if successful
198
+ """
199
+ try:
200
+ # Create TrainingMetrics object
201
+ metrics = TrainingMetrics(
202
+ job_id=job_id,
203
+ epoch=metrics_data.get("epoch", 0),
204
+ step=metrics_data.get("step", 0),
205
+ total_steps=metrics_data.get("total_steps", 0),
206
+ training_loss=metrics_data.get("training_loss"),
207
+ validation_loss=metrics_data.get("validation_loss"),
208
+ perplexity=metrics_data.get("perplexity"),
209
+ accuracy=metrics_data.get("accuracy"),
210
+ f1_score=metrics_data.get("f1_score"),
211
+ bleu_score=metrics_data.get("bleu_score"),
212
+ rouge_score=metrics_data.get("rouge_score"),
213
+ gpu_utilization=metrics_data.get("gpu_utilization"),
214
+ memory_usage=metrics_data.get("memory_usage"),
215
+ epoch_time=metrics_data.get("epoch_time"),
216
+ samples_per_second=metrics_data.get("samples_per_second"),
217
+ custom_metrics=metrics_data.get("custom_metrics", {})
218
+ )
219
+
220
+ success = self.storage.save_training_metrics(metrics)
221
+
222
+ if success:
223
+ logger.debug(f"Recorded metrics for job {job_id}")
224
+
225
+ return success
226
+
227
+ except Exception as e:
228
+ logger.error(f"Failed to record metrics for job {job_id}: {e}")
229
+ return False
230
+
231
+ def complete_training(
232
+ self,
233
+ job_id: str,
234
+ model_path: str,
235
+ final_metrics: Optional[Dict[str, Any]] = None,
236
+ cost_breakdown: Optional[Dict[str, float]] = None
237
+ ) -> Optional[str]:
238
+ """
239
+ Complete training and register the trained model.
240
+
241
+ Args:
242
+ job_id: Job ID
243
+ model_path: Path to the trained model
244
+ final_metrics: Final performance metrics
245
+ cost_breakdown: Training cost breakdown
246
+
247
+ Returns:
248
+ Core model ID if successful, None otherwise
249
+ """
250
+ try:
251
+ # Update job status to completed
252
+ updates = {
253
+ "status": "completed",
254
+ "completed_at": datetime.now(),
255
+ "output_model_path": model_path
256
+ }
257
+
258
+ if final_metrics:
259
+ updates["training_metrics"] = final_metrics
260
+
261
+ if cost_breakdown:
262
+ updates["cost_breakdown"] = cost_breakdown
263
+
264
+ success = self.storage.update_training_job(job_id, updates)
265
+
266
+ if not success:
267
+ logger.error(f"Failed to update job {job_id} as completed")
268
+ return None
269
+
270
+ # Get updated job record
271
+ job_record = self.storage.get_training_job(job_id)
272
+ if not job_record:
273
+ logger.error(f"Failed to retrieve completed job record {job_id}")
274
+ return None
275
+
276
+ # Register model in core system
277
+ core_model_id = self.core_integration.register_trained_model(
278
+ job_record=job_record,
279
+ model_path=model_path,
280
+ performance_metrics=final_metrics
281
+ )
282
+
283
+ if core_model_id:
284
+ logger.info(f"Training completed and model registered: {core_model_id}")
285
+
286
+ return core_model_id
287
+
288
+ except Exception as e:
289
+ logger.error(f"Failed to complete training for job {job_id}: {e}")
290
+ return None
291
+
292
+ def get_job(self, job_id: str) -> Optional[TrainingJobRecord]:
293
+ """Get training job by ID."""
294
+ return self.storage.get_training_job(job_id)
295
+
296
+ def list_jobs(
297
+ self,
298
+ status: Optional[str] = None,
299
+ user_id: Optional[str] = None,
300
+ project_name: Optional[str] = None,
301
+ limit: int = 100
302
+ ) -> List[TrainingJobRecord]:
303
+ """
304
+ List training jobs with filtering.
305
+
306
+ Args:
307
+ status: Filter by status
308
+ user_id: Filter by user ID
309
+ project_name: Filter by project name
310
+ limit: Maximum number of jobs
311
+
312
+ Returns:
313
+ List of training job records
314
+ """
315
+ jobs = self.storage.list_training_jobs(status=status, user_id=user_id, limit=limit)
316
+
317
+ # Additional filtering for project_name
318
+ if project_name:
319
+ jobs = [job for job in jobs if job.project_name == project_name]
320
+
321
+ return jobs
322
+
323
+ def get_job_metrics(self, job_id: str) -> List[TrainingMetrics]:
324
+ """Get all metrics for a training job."""
325
+ return self.storage.get_training_metrics(job_id)
326
+
327
+ def get_job_progress(self, job_id: str) -> Optional[Dict[str, Any]]:
328
+ """
329
+ Get training job progress summary.
330
+
331
+ Args:
332
+ job_id: Job ID
333
+
334
+ Returns:
335
+ Progress summary with latest metrics
336
+ """
337
+ try:
338
+ job = self.get_job(job_id)
339
+ if not job:
340
+ return None
341
+
342
+ metrics_list = self.get_job_metrics(job_id)
343
+ latest_metrics = metrics_list[-1] if metrics_list else None
344
+
345
+ progress = {
346
+ "job_id": job_id,
347
+ "job_name": job.job_name,
348
+ "status": job.status,
349
+ "created_at": job.created_at.isoformat(),
350
+ "started_at": job.started_at.isoformat() if job.started_at else None,
351
+ "completed_at": job.completed_at.isoformat() if job.completed_at else None
352
+ }
353
+
354
+ if latest_metrics:
355
+ progress.update({
356
+ "current_epoch": latest_metrics.epoch,
357
+ "current_step": latest_metrics.step,
358
+ "total_steps": latest_metrics.total_steps,
359
+ "progress_percentage": (latest_metrics.step / latest_metrics.total_steps * 100) if latest_metrics.total_steps > 0 else 0,
360
+ "latest_loss": latest_metrics.training_loss,
361
+ "latest_validation_loss": latest_metrics.validation_loss
362
+ })
363
+
364
+ # Calculate duration
365
+ if job.started_at:
366
+ end_time = job.completed_at or datetime.now()
367
+ duration = end_time - job.started_at
368
+ progress["duration_seconds"] = duration.total_seconds()
369
+ progress["duration_formatted"] = str(duration).split(".")[0] # Remove microseconds
370
+
371
+ return progress
372
+
373
+ except Exception as e:
374
+ logger.error(f"Failed to get job progress for {job_id}: {e}")
375
+ return None
376
+
377
+ def delete_job(self, job_id: str) -> bool:
378
+ """Delete training job and all associated data."""
379
+ try:
380
+ success = self.storage.delete_training_job(job_id)
381
+
382
+ if success:
383
+ logger.info(f"Deleted training job: {job_id}")
384
+
385
+ return success
386
+
387
+ except Exception as e:
388
+ logger.error(f"Failed to delete job {job_id}: {e}")
389
+ return False
390
+
391
+ def get_user_statistics(self, user_id: str) -> Dict[str, Any]:
392
+ """
393
+ Get training statistics for a specific user.
394
+
395
+ Args:
396
+ user_id: User identifier
397
+
398
+ Returns:
399
+ User training statistics
400
+ """
401
+ try:
402
+ user_jobs = self.list_jobs(user_id=user_id, limit=1000)
403
+
404
+ total_jobs = len(user_jobs)
405
+ status_counts = {}
406
+ total_cost = 0.0
407
+ total_duration = timedelta()
408
+
409
+ for job in user_jobs:
410
+ # Count by status
411
+ status_counts[job.status] = status_counts.get(job.status, 0) + 1
412
+
413
+ # Sum costs
414
+ if job.cost_breakdown:
415
+ total_cost += sum(job.cost_breakdown.values())
416
+
417
+ # Sum duration
418
+ if job.started_at and job.completed_at:
419
+ total_duration += (job.completed_at - job.started_at)
420
+
421
+ return {
422
+ "user_id": user_id,
423
+ "total_jobs": total_jobs,
424
+ "status_breakdown": status_counts,
425
+ "total_cost_usd": total_cost,
426
+ "total_training_time": str(total_duration).split(".")[0],
427
+ "average_cost_per_job": total_cost / total_jobs if total_jobs > 0 else 0,
428
+ "success_rate": status_counts.get("completed", 0) / total_jobs if total_jobs > 0 else 0
429
+ }
430
+
431
+ except Exception as e:
432
+ logger.error(f"Failed to get user statistics for {user_id}: {e}")
433
+ return {"error": str(e)}
434
+
435
+ def get_recent_activity(self, days: int = 7, limit: int = 50) -> List[Dict[str, Any]]:
436
+ """
437
+ Get recent training activity.
438
+
439
+ Args:
440
+ days: Number of days to look back
441
+ limit: Maximum number of activities
442
+
443
+ Returns:
444
+ List of recent activities
445
+ """
446
+ try:
447
+ cutoff_date = datetime.now() - timedelta(days=days)
448
+
449
+ all_jobs = self.list_jobs(limit=limit * 2) # Get more to filter by date
450
+
451
+ recent_jobs = [
452
+ job for job in all_jobs
453
+ if job.created_at >= cutoff_date
454
+ ][:limit]
455
+
456
+ activities = []
457
+ for job in recent_jobs:
458
+ activity = {
459
+ "job_id": job.job_id,
460
+ "job_name": job.job_name,
461
+ "status": job.status,
462
+ "base_model": job.base_model,
463
+ "task_type": job.task_type,
464
+ "domain": job.domain,
465
+ "created_at": job.created_at.isoformat(),
466
+ "user_id": job.user_id
467
+ }
468
+
469
+ if job.completed_at:
470
+ activity["completed_at"] = job.completed_at.isoformat()
471
+
472
+ if job.cost_breakdown:
473
+ activity["total_cost"] = sum(job.cost_breakdown.values())
474
+
475
+ activities.append(activity)
476
+
477
+ return activities
478
+
479
+ except Exception as e:
480
+ logger.error(f"Failed to get recent activity: {e}")
481
+ return []
482
+
483
+ def cleanup_old_jobs(self, days: int = 30, dry_run: bool = True) -> Dict[str, Any]:
484
+ """
485
+ Cleanup old training jobs.
486
+
487
+ Args:
488
+ days: Delete jobs older than this many days
489
+ dry_run: If True, only return what would be deleted
490
+
491
+ Returns:
492
+ Cleanup summary
493
+ """
494
+ try:
495
+ cutoff_date = datetime.now() - timedelta(days=days)
496
+
497
+ all_jobs = self.list_jobs(limit=1000)
498
+ old_jobs = [
499
+ job for job in all_jobs
500
+ if job.created_at < cutoff_date and job.status in ["completed", "failed", "cancelled"]
501
+ ]
502
+
503
+ summary = {
504
+ "total_jobs_found": len(old_jobs),
505
+ "cutoff_date": cutoff_date.isoformat(),
506
+ "dry_run": dry_run,
507
+ "deleted_jobs": []
508
+ }
509
+
510
+ if not dry_run:
511
+ deleted_count = 0
512
+ for job in old_jobs:
513
+ if self.delete_job(job.job_id):
514
+ deleted_count += 1
515
+ summary["deleted_jobs"].append({
516
+ "job_id": job.job_id,
517
+ "job_name": job.job_name,
518
+ "created_at": job.created_at.isoformat()
519
+ })
520
+
521
+ summary["deleted_count"] = deleted_count
522
+ else:
523
+ summary["would_delete"] = [
524
+ {
525
+ "job_id": job.job_id,
526
+ "job_name": job.job_name,
527
+ "created_at": job.created_at.isoformat()
528
+ }
529
+ for job in old_jobs
530
+ ]
531
+
532
+ return summary
533
+
534
+ except Exception as e:
535
+ logger.error(f"Failed to cleanup old jobs: {e}")
536
+ return {"error": str(e)}
537
+
538
+ def get_repository_statistics(self) -> Dict[str, Any]:
539
+ """Get overall repository statistics."""
540
+ try:
541
+ storage_stats = self.storage.get_statistics()
542
+ integration_status = self.core_integration.get_integration_status()
543
+
544
+ return {
545
+ "storage": storage_stats,
546
+ "core_integration": integration_status,
547
+ "repository_version": "1.0.0"
548
+ }
549
+
550
+ except Exception as e:
551
+ logger.error(f"Failed to get repository statistics: {e}")
552
+ return {"error": str(e)}