isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,424 +0,0 @@
1
- """
2
- ISA Model Training Factory
3
-
4
- A clean, simplified training factory that uses HuggingFace Transformers directly
5
- without external dependencies like LlamaFactory.
6
- """
7
-
8
- import os
9
- import logging
10
- from typing import Optional, Dict, Any, Union, List
11
- from pathlib import Path
12
- import datetime
13
-
14
- from .core import (
15
- TrainingConfig,
16
- LoRAConfig,
17
- DatasetConfig,
18
- BaseTrainer,
19
- SFTTrainer,
20
- TrainingUtils,
21
- DatasetManager,
22
- )
23
- from .cloud import TrainingJobOrchestrator
24
-
25
- logger = logging.getLogger(__name__)
26
-
27
-
28
- class TrainingFactory:
29
- """
30
- Unified Training Factory for ISA Model SDK
31
-
32
- Provides a clean interface for:
33
- - Local training with SFT (Supervised Fine-Tuning)
34
- - Cloud training on RunPod
35
- - Model evaluation and management
36
-
37
- Example usage:
38
- ```python
39
- from isa_model.training import TrainingFactory
40
-
41
- factory = TrainingFactory()
42
-
43
- # Local training
44
- model_path = factory.train_model(
45
- model_name="google/gemma-2-4b-it",
46
- dataset_path="tatsu-lab/alpaca",
47
- use_lora=True,
48
- num_epochs=3
49
- )
50
-
51
- # Cloud training on RunPod
52
- result = factory.train_on_runpod(
53
- model_name="google/gemma-2-4b-it",
54
- dataset_path="tatsu-lab/alpaca",
55
- runpod_api_key="your-api-key",
56
- template_id="your-template-id"
57
- )
58
- ```
59
- """
60
-
61
- def __init__(self, base_output_dir: Optional[str] = None):
62
- """
63
- Initialize the training factory.
64
-
65
- Args:
66
- base_output_dir: Base directory for training outputs
67
- """
68
- self.base_output_dir = base_output_dir or os.path.join(os.getcwd(), "training_outputs")
69
- os.makedirs(self.base_output_dir, exist_ok=True)
70
-
71
- logger.info(f"TrainingFactory initialized with output dir: {self.base_output_dir}")
72
-
73
- def train_model(
74
- self,
75
- model_name: str,
76
- dataset_path: str,
77
- output_dir: Optional[str] = None,
78
- training_type: str = "sft",
79
- dataset_format: str = "alpaca",
80
- use_lora: bool = True,
81
- batch_size: int = 4,
82
- num_epochs: int = 3,
83
- learning_rate: float = 2e-5,
84
- max_length: int = 1024,
85
- lora_rank: int = 8,
86
- lora_alpha: int = 16,
87
- validation_split: float = 0.1,
88
- **kwargs
89
- ) -> str:
90
- """
91
- Train a model locally.
92
-
93
- Args:
94
- model_name: Model identifier (e.g., "google/gemma-2-4b-it")
95
- dataset_path: Path to dataset or HuggingFace dataset name
96
- output_dir: Custom output directory
97
- training_type: Type of training ("sft" supported)
98
- dataset_format: Dataset format ("alpaca", "sharegpt", "custom")
99
- use_lora: Whether to use LoRA for efficient training
100
- batch_size: Training batch size
101
- num_epochs: Number of training epochs
102
- learning_rate: Learning rate
103
- max_length: Maximum sequence length
104
- lora_rank: LoRA rank parameter
105
- lora_alpha: LoRA alpha parameter
106
- validation_split: Fraction of data for validation
107
- **kwargs: Additional training parameters
108
-
109
- Returns:
110
- Path to the trained model
111
-
112
- Example:
113
- ```python
114
- model_path = factory.train_model(
115
- model_name="google/gemma-2-4b-it",
116
- dataset_path="tatsu-lab/alpaca",
117
- use_lora=True,
118
- num_epochs=3,
119
- batch_size=4
120
- )
121
- ```
122
- """
123
- # Generate output directory if not provided
124
- if not output_dir:
125
- output_dir = TrainingUtils.generate_output_dir(
126
- model_name, training_type, self.base_output_dir
127
- )
128
-
129
- # Create configurations
130
- lora_config = LoRAConfig(
131
- use_lora=use_lora,
132
- lora_rank=lora_rank,
133
- lora_alpha=lora_alpha
134
- ) if use_lora else None
135
-
136
- dataset_config = DatasetConfig(
137
- dataset_path=dataset_path,
138
- dataset_format=dataset_format,
139
- max_length=max_length,
140
- validation_split=validation_split
141
- )
142
-
143
- training_config = TrainingConfig(
144
- model_name=model_name,
145
- output_dir=output_dir,
146
- training_type=training_type,
147
- num_epochs=num_epochs,
148
- batch_size=batch_size,
149
- learning_rate=learning_rate,
150
- lora_config=lora_config,
151
- dataset_config=dataset_config,
152
- **kwargs
153
- )
154
-
155
- # Print training summary
156
- model_info = TrainingUtils.get_model_info(model_name)
157
- memory_estimate = TrainingUtils.estimate_memory_usage(
158
- model_name, batch_size, max_length, use_lora
159
- )
160
-
161
- summary = TrainingUtils.format_training_summary(
162
- training_config.to_dict(), model_info, memory_estimate
163
- )
164
- print(summary)
165
-
166
- # Validate configuration
167
- issues = TrainingUtils.validate_training_config(training_config.to_dict())
168
- if issues:
169
- raise ValueError(f"Training configuration issues: {issues}")
170
-
171
- # Initialize trainer based on training type
172
- if training_type.lower() == "sft":
173
- trainer = SFTTrainer(training_config)
174
- else:
175
- raise ValueError(f"Training type '{training_type}' not supported yet")
176
-
177
- # Execute training
178
- logger.info(f"Starting {training_type.upper()} training...")
179
- result_path = trainer.train()
180
-
181
- logger.info(f"Training completed! Model saved to: {result_path}")
182
- return result_path
183
-
184
- def train_on_runpod(
185
- self,
186
- model_name: str,
187
- dataset_path: str,
188
- runpod_api_key: str,
189
- template_id: str,
190
- gpu_type: str = "NVIDIA RTX A6000",
191
- storage_config: Optional[Dict[str, Any]] = None,
192
- job_name: Optional[str] = None,
193
- **training_params
194
- ) -> Dict[str, Any]:
195
- """
196
- Train a model on RunPod cloud infrastructure.
197
-
198
- Args:
199
- model_name: Model identifier
200
- dataset_path: Dataset path or HuggingFace dataset name
201
- runpod_api_key: RunPod API key
202
- template_id: RunPod template ID
203
- gpu_type: GPU type to use
204
- storage_config: Optional cloud storage configuration
205
- job_name: Optional job name
206
- **training_params: Additional training parameters
207
-
208
- Returns:
209
- Training job results
210
-
211
- Example:
212
- ```python
213
- result = factory.train_on_runpod(
214
- model_name="google/gemma-2-4b-it",
215
- dataset_path="tatsu-lab/alpaca",
216
- runpod_api_key="your-api-key",
217
- template_id="your-template-id",
218
- use_lora=True,
219
- num_epochs=3
220
- )
221
- ```
222
- """
223
- # Import cloud components
224
- from .cloud import TrainingJobOrchestrator
225
- from .cloud.runpod_trainer import RunPodConfig
226
- from .cloud.storage_manager import StorageConfig
227
- from .cloud.job_orchestrator import JobConfig
228
-
229
- # Create RunPod configuration
230
- runpod_config = RunPodConfig(
231
- api_key=runpod_api_key,
232
- template_id=template_id,
233
- gpu_type=gpu_type
234
- )
235
-
236
- # Create storage configuration if provided
237
- storage_cfg = None
238
- if storage_config:
239
- storage_cfg = StorageConfig(**storage_config)
240
-
241
- # Create job configuration
242
- job_config = JobConfig(
243
- model_name=model_name,
244
- dataset_source=dataset_path,
245
- job_name=job_name or f"gemma-training-{int(datetime.datetime.now().timestamp())}",
246
- **training_params
247
- )
248
-
249
- # Initialize orchestrator and execute training
250
- orchestrator = TrainingJobOrchestrator(
251
- runpod_config=runpod_config,
252
- storage_config=storage_cfg
253
- )
254
-
255
- logger.info(f"Starting RunPod training for {model_name}")
256
- result = orchestrator.execute_training_workflow(job_config)
257
-
258
- return result
259
-
260
- async def upload_to_huggingface(
261
- self,
262
- model_path: str,
263
- hf_model_name: str,
264
- hf_token: Optional[str] = None,
265
- metadata: Optional[Dict[str, Any]] = None
266
- ) -> str:
267
- """
268
- Upload a trained model to HuggingFace Hub using HuggingFaceStorage.
269
-
270
- Args:
271
- model_path: Path to the trained model
272
- hf_model_name: Name for the model on HuggingFace Hub
273
- hf_token: HuggingFace token
274
- metadata: Additional metadata for the model
275
-
276
- Returns:
277
- URL of the uploaded model
278
- """
279
- try:
280
- from ..core.storage.hf_storage import HuggingFaceStorage
281
-
282
- logger.info(f"Uploading model to HuggingFace: {hf_model_name}")
283
-
284
- # Initialize HuggingFace storage
285
- storage = HuggingFaceStorage(
286
- username="xenobordom",
287
- token=hf_token
288
- )
289
-
290
- # Prepare metadata
291
- upload_metadata = metadata or {}
292
- upload_metadata.update({
293
- "description": f"Fine-tuned model: {hf_model_name}",
294
- "training_framework": "ISA Model SDK",
295
- "uploaded_from": "training_factory"
296
- })
297
-
298
- # Upload model
299
- success = await storage.save_model(
300
- model_id=hf_model_name,
301
- model_path=model_path,
302
- metadata=upload_metadata
303
- )
304
-
305
- if success:
306
- model_url = storage.get_public_url(hf_model_name)
307
- logger.info(f"Model uploaded successfully: {model_url}")
308
- return model_url
309
- else:
310
- raise Exception("Failed to upload model")
311
-
312
- except Exception as e:
313
- logger.error(f"Failed to upload to HuggingFace: {e}")
314
- raise
315
-
316
- def get_training_status(self, output_dir: str) -> Dict[str, Any]:
317
- """
318
- Get training status from output directory.
319
-
320
- Args:
321
- output_dir: Training output directory
322
-
323
- Returns:
324
- Dictionary with training status information
325
- """
326
- status = {
327
- "output_dir": output_dir,
328
- "exists": os.path.exists(output_dir),
329
- "files": []
330
- }
331
-
332
- if status["exists"]:
333
- status["files"] = os.listdir(output_dir)
334
-
335
- # Check for specific files
336
- config_path = os.path.join(output_dir, "training_config.json")
337
- metrics_path = os.path.join(output_dir, "training_metrics.json")
338
- model_path = os.path.join(output_dir, "pytorch_model.bin")
339
-
340
- status["has_config"] = os.path.exists(config_path)
341
- status["has_metrics"] = os.path.exists(metrics_path)
342
- status["has_model"] = os.path.exists(model_path) or os.path.exists(os.path.join(output_dir, "adapter_model.bin"))
343
-
344
- if status["has_config"]:
345
- try:
346
- status["config"] = TrainingUtils.load_training_args(output_dir)
347
- except:
348
- pass
349
-
350
- return status
351
-
352
- def list_trained_models(self) -> List[Dict[str, Any]]:
353
- """
354
- List all trained models in the output directory.
355
-
356
- Returns:
357
- List of model information dictionaries
358
- """
359
- models = []
360
-
361
- if os.path.exists(self.base_output_dir):
362
- for item in os.listdir(self.base_output_dir):
363
- item_path = os.path.join(self.base_output_dir, item)
364
- if os.path.isdir(item_path):
365
- status = self.get_training_status(item_path)
366
- models.append({
367
- "name": item,
368
- "path": item_path,
369
- "created": datetime.datetime.fromtimestamp(
370
- os.path.getctime(item_path)
371
- ).isoformat(),
372
- "status": status
373
- })
374
-
375
- return sorted(models, key=lambda x: x["created"], reverse=True)
376
-
377
-
378
- # Convenience functions for quick access
379
- def train_gemma(
380
- dataset_path: str,
381
- model_size: str = "4b",
382
- output_dir: Optional[str] = None,
383
- **kwargs
384
- ) -> str:
385
- """
386
- Quick function to train Gemma models.
387
-
388
- Args:
389
- dataset_path: Path to training dataset
390
- model_size: Model size ("2b", "4b", "7b")
391
- output_dir: Output directory
392
- **kwargs: Additional training parameters
393
-
394
- Returns:
395
- Path to trained model
396
-
397
- Example:
398
- ```python
399
- from isa_model.training import train_gemma
400
-
401
- model_path = train_gemma(
402
- dataset_path="tatsu-lab/alpaca",
403
- model_size="4b",
404
- num_epochs=3,
405
- batch_size=4
406
- )
407
- ```
408
- """
409
- factory = TrainingFactory()
410
-
411
- model_map = {
412
- "2b": "google/gemma-2-2b-it",
413
- "4b": "google/gemma-2-4b-it",
414
- "7b": "google/gemma-2-7b-it"
415
- }
416
-
417
- model_name = model_map.get(model_size, "google/gemma-2-4b-it")
418
-
419
- return factory.train_model(
420
- model_name=model_name,
421
- dataset_path=dataset_path,
422
- output_dir=output_dir,
423
- **kwargs
424
- )
@@ -1,25 +0,0 @@
1
- """
2
- Intelligent Training Service Components
3
-
4
- This module provides AI-powered training optimization and automation:
5
- - Intelligent decision engine for configuration recommendations
6
- - Task classification and model selection
7
- - Resource optimization and cost estimation
8
- - Natural language interface for training requests
9
- """
10
-
11
- from .decision_engine import IntelligentDecisionEngine, TrainingRequest, TrainingRecommendation
12
- from .task_classifier import TaskClassifier
13
- from .knowledge_base import KnowledgeBase
14
- from .resource_optimizer import ResourceOptimizer
15
- from .intelligent_factory import IntelligentTrainingFactory
16
-
17
- __all__ = [
18
- 'IntelligentDecisionEngine',
19
- 'TaskClassifier',
20
- 'KnowledgeBase',
21
- 'ResourceOptimizer',
22
- 'IntelligentTrainingFactory',
23
- 'TrainingRequest',
24
- 'TrainingRecommendation'
25
- ]