isa-model 0.0.3__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/core/model_registry.py +273 -46
  3. isa_model/core/storage/hf_storage.py +419 -0
  4. isa_model/deployment/__init__.py +52 -0
  5. isa_model/deployment/core/__init__.py +34 -0
  6. isa_model/deployment/core/deployment_config.py +356 -0
  7. isa_model/deployment/core/deployment_manager.py +549 -0
  8. isa_model/deployment/core/isa_deployment_service.py +401 -0
  9. isa_model/eval/factory.py +381 -140
  10. isa_model/inference/ai_factory.py +142 -240
  11. isa_model/inference/providers/ml_provider.py +50 -0
  12. isa_model/inference/services/audio/openai_tts_service.py +104 -3
  13. isa_model/inference/services/embedding/base_embed_service.py +112 -0
  14. isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
  15. isa_model/inference/services/llm/__init__.py +2 -0
  16. isa_model/inference/services/llm/base_llm_service.py +111 -1
  17. isa_model/inference/services/llm/ollama_llm_service.py +234 -26
  18. isa_model/inference/services/llm/openai_llm_service.py +180 -26
  19. isa_model/inference/services/llm/triton_llm_service.py +481 -0
  20. isa_model/inference/services/ml/base_ml_service.py +78 -0
  21. isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
  22. isa_model/inference/services/vision/__init__.py +3 -3
  23. isa_model/inference/services/vision/base_image_gen_service.py +161 -0
  24. isa_model/inference/services/vision/base_vision_service.py +177 -0
  25. isa_model/inference/services/vision/ollama_vision_service.py +143 -17
  26. isa_model/inference/services/vision/replicate_image_gen_service.py +139 -7
  27. isa_model/training/__init__.py +62 -32
  28. isa_model/training/cloud/__init__.py +22 -0
  29. isa_model/training/cloud/job_orchestrator.py +402 -0
  30. isa_model/training/cloud/runpod_trainer.py +454 -0
  31. isa_model/training/cloud/storage_manager.py +482 -0
  32. isa_model/training/core/__init__.py +23 -0
  33. isa_model/training/core/config.py +181 -0
  34. isa_model/training/core/dataset.py +222 -0
  35. isa_model/training/core/trainer.py +720 -0
  36. isa_model/training/core/utils.py +213 -0
  37. isa_model/training/factory.py +229 -198
  38. isa_model-0.0.8.dist-info/METADATA +465 -0
  39. isa_model-0.0.8.dist-info/RECORD +86 -0
  40. isa_model/core/model_router.py +0 -226
  41. isa_model/core/model_version.py +0 -0
  42. isa_model/core/resource_manager.py +0 -202
  43. isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
  44. isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
  45. isa_model/training/engine/llama_factory/__init__.py +0 -39
  46. isa_model/training/engine/llama_factory/config.py +0 -115
  47. isa_model/training/engine/llama_factory/data_adapter.py +0 -284
  48. isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
  49. isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
  50. isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
  51. isa_model/training/engine/llama_factory/factory.py +0 -331
  52. isa_model/training/engine/llama_factory/rl.py +0 -254
  53. isa_model/training/engine/llama_factory/trainer.py +0 -171
  54. isa_model/training/image_model/configs/create_config.py +0 -37
  55. isa_model/training/image_model/configs/create_flux_config.py +0 -26
  56. isa_model/training/image_model/configs/create_lora_config.py +0 -21
  57. isa_model/training/image_model/prepare_massed_compute.py +0 -97
  58. isa_model/training/image_model/prepare_upload.py +0 -17
  59. isa_model/training/image_model/raw_data/create_captions.py +0 -16
  60. isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
  61. isa_model/training/image_model/raw_data/pre_processing.py +0 -200
  62. isa_model/training/image_model/train/train.py +0 -42
  63. isa_model/training/image_model/train/train_flux.py +0 -41
  64. isa_model/training/image_model/train/train_lora.py +0 -57
  65. isa_model/training/image_model/train_main.py +0 -25
  66. isa_model-0.0.3.dist-info/METADATA +0 -327
  67. isa_model-0.0.3.dist-info/RECORD +0 -92
  68. isa_model-0.0.3.dist-info/licenses/LICENSE +0 -21
  69. /isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
  70. /isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
  71. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
  72. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
  73. /isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
  74. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
  75. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
  76. /isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
  77. {isa_model-0.0.3.dist-info → isa_model-0.0.8.dist-info}/WHEEL +0 -0
  78. {isa_model-0.0.3.dist-info → isa_model-0.0.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,213 @@
1
+ """
2
+ Training Utilities
3
+
4
+ Helper functions and utilities for training operations.
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import logging
10
+ import datetime
11
+ from typing import Dict, Any, Optional, List
12
+ from pathlib import Path
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class TrainingUtils:
18
+ """Utility functions for training operations."""
19
+
20
+ @staticmethod
21
+ def generate_output_dir(model_name: str, training_type: str, base_dir: str = "training_outputs") -> str:
22
+ """Generate a timestamped output directory."""
23
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
24
+ safe_model_name = model_name.replace("/", "_").replace(":", "_")
25
+ output_dir = os.path.join(base_dir, f"{safe_model_name}_{training_type}_{timestamp}")
26
+ return output_dir
27
+
28
+ @staticmethod
29
+ def save_training_args(args: Dict[str, Any], output_dir: str) -> None:
30
+ """Save training arguments to file."""
31
+ args_path = Path(output_dir) / "training_args.json"
32
+ args_path.parent.mkdir(parents=True, exist_ok=True)
33
+
34
+ with open(args_path, 'w') as f:
35
+ json.dump(args, f, indent=2, default=str)
36
+
37
+ logger.info(f"Training arguments saved to: {args_path}")
38
+
39
+ @staticmethod
40
+ def load_training_args(output_dir: str) -> Dict[str, Any]:
41
+ """Load training arguments from file."""
42
+ args_path = Path(output_dir) / "training_args.json"
43
+
44
+ if not args_path.exists():
45
+ raise FileNotFoundError(f"Training args not found: {args_path}")
46
+
47
+ with open(args_path, 'r') as f:
48
+ args = json.load(f)
49
+
50
+ return args
51
+
52
+ @staticmethod
53
+ def get_model_info(model_name: str) -> Dict[str, Any]:
54
+ """Get information about a model."""
55
+ try:
56
+ from transformers import AutoConfig
57
+
58
+ config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
59
+
60
+ model_info = {
61
+ "model_name": model_name,
62
+ "model_type": config.model_type,
63
+ "vocab_size": getattr(config, 'vocab_size', None),
64
+ "hidden_size": getattr(config, 'hidden_size', None),
65
+ "num_layers": getattr(config, 'num_hidden_layers', None),
66
+ "num_attention_heads": getattr(config, 'num_attention_heads', None),
67
+ "max_position_embeddings": getattr(config, 'max_position_embeddings', None),
68
+ }
69
+
70
+ return model_info
71
+
72
+ except Exception as e:
73
+ logger.warning(f"Could not get model info for {model_name}: {e}")
74
+ return {"model_name": model_name, "error": str(e)}
75
+
76
+ @staticmethod
77
+ def estimate_memory_usage(
78
+ model_name: str,
79
+ batch_size: int = 1,
80
+ max_length: int = 1024,
81
+ use_lora: bool = True
82
+ ) -> Dict[str, Any]:
83
+ """Estimate memory usage for training."""
84
+ try:
85
+ model_info = TrainingUtils.get_model_info(model_name)
86
+
87
+ # Rough estimation based on model parameters
88
+ hidden_size = model_info.get('hidden_size', 4096)
89
+ num_layers = model_info.get('num_layers', 32)
90
+ vocab_size = model_info.get('vocab_size', 32000)
91
+
92
+ # Estimate model parameters (in millions)
93
+ param_count = (hidden_size * hidden_size * 12 * num_layers + vocab_size * hidden_size) / 1e6
94
+
95
+ # Base memory for model (assuming fp16)
96
+ model_memory_gb = param_count * 2 / 1024 # 2 bytes per parameter
97
+
98
+ # Training memory overhead (gradients, optimizer states, activations)
99
+ if use_lora:
100
+ training_overhead = 2.0 # LoRA reduces memory usage significantly
101
+ else:
102
+ training_overhead = 4.0 # Full fine-tuning needs more memory
103
+
104
+ # Batch and sequence length impact
105
+ sequence_memory = batch_size * max_length * hidden_size * 2 / (1024**3) # Activation memory
106
+
107
+ total_memory_gb = model_memory_gb * training_overhead + sequence_memory
108
+
109
+ return {
110
+ "estimated_params_millions": param_count,
111
+ "model_memory_gb": model_memory_gb,
112
+ "total_training_memory_gb": total_memory_gb,
113
+ "recommended_gpu": TrainingUtils._recommend_gpu(total_memory_gb),
114
+ "use_lora": use_lora,
115
+ "batch_size": batch_size,
116
+ "max_length": max_length
117
+ }
118
+
119
+ except Exception as e:
120
+ logger.warning(f"Could not estimate memory usage: {e}")
121
+ return {"error": str(e)}
122
+
123
+ @staticmethod
124
+ def _recommend_gpu(memory_gb: float) -> str:
125
+ """Recommend GPU based on memory requirements."""
126
+ if memory_gb <= 8:
127
+ return "RTX 3080/4070 (8-12GB)"
128
+ elif memory_gb <= 16:
129
+ return "RTX 4080/4090 (16GB)"
130
+ elif memory_gb <= 24:
131
+ return "RTX A6000/4090 (24GB)"
132
+ elif memory_gb <= 40:
133
+ return "A100 40GB"
134
+ elif memory_gb <= 80:
135
+ return "A100 80GB"
136
+ else:
137
+ return "Multiple A100 80GB (Multi-GPU required)"
138
+
139
+ @staticmethod
140
+ def validate_training_config(config: Dict[str, Any]) -> List[str]:
141
+ """Validate training configuration and return any issues."""
142
+ issues = []
143
+
144
+ # Check required fields
145
+ required_fields = ["model_name", "output_dir"]
146
+ for field in required_fields:
147
+ if field not in config:
148
+ issues.append(f"Missing required field: {field}")
149
+
150
+ # Check batch size
151
+ if config.get("batch_size", 0) <= 0:
152
+ issues.append("batch_size must be positive")
153
+
154
+ # Check learning rate
155
+ lr = config.get("learning_rate", 0)
156
+ if lr <= 0 or lr > 1:
157
+ issues.append("learning_rate should be between 0 and 1")
158
+
159
+ # Check epochs
160
+ if config.get("num_epochs", 0) <= 0:
161
+ issues.append("num_epochs must be positive")
162
+
163
+ # Check LoRA config
164
+ if config.get("use_lora", False):
165
+ lora_rank = config.get("lora_rank", 8)
166
+ if lora_rank <= 0 or lora_rank > 256:
167
+ issues.append("lora_rank should be between 1 and 256")
168
+
169
+ return issues
170
+
171
+ @staticmethod
172
+ def format_training_summary(
173
+ config: Dict[str, Any],
174
+ model_info: Dict[str, Any],
175
+ memory_estimate: Dict[str, Any]
176
+ ) -> str:
177
+ """Format a training summary for display."""
178
+ summary = []
179
+ summary.append("=" * 60)
180
+ summary.append("TRAINING CONFIGURATION SUMMARY")
181
+ summary.append("=" * 60)
182
+
183
+ # Model information
184
+ summary.append(f"Model: {config.get('model_name', 'Unknown')}")
185
+ summary.append(f"Model Type: {model_info.get('model_type', 'Unknown')}")
186
+ summary.append(f"Parameters: ~{memory_estimate.get('estimated_params_millions', 0):.1f}M")
187
+
188
+ # Training configuration
189
+ summary.append(f"\nTraining Configuration:")
190
+ summary.append(f" Training Type: {config.get('training_type', 'sft')}")
191
+ summary.append(f" Epochs: {config.get('num_epochs', 3)}")
192
+ summary.append(f" Batch Size: {config.get('batch_size', 4)}")
193
+ summary.append(f" Learning Rate: {config.get('learning_rate', 2e-5)}")
194
+ summary.append(f" Max Length: {config.get('max_length', 1024)}")
195
+
196
+ # LoRA configuration
197
+ if config.get('use_lora', True):
198
+ summary.append(f"\nLoRA Configuration:")
199
+ summary.append(f" LoRA Rank: {config.get('lora_rank', 8)}")
200
+ summary.append(f" LoRA Alpha: {config.get('lora_alpha', 16)}")
201
+ summary.append(f" LoRA Dropout: {config.get('lora_dropout', 0.05)}")
202
+
203
+ # Memory estimation
204
+ summary.append(f"\nMemory Estimation:")
205
+ summary.append(f" Estimated Memory: ~{memory_estimate.get('total_training_memory_gb', 0):.1f}GB")
206
+ summary.append(f" Recommended GPU: {memory_estimate.get('recommended_gpu', 'Unknown')}")
207
+
208
+ # Output
209
+ summary.append(f"\nOutput Directory: {config.get('output_dir', 'Unknown')}")
210
+
211
+ summary.append("=" * 60)
212
+
213
+ return "\n".join(summary)