isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,424 +0,0 @@
1
- """
2
- ISA Model Training Factory
3
-
4
- A clean, simplified training factory that uses HuggingFace Transformers directly
5
- without external dependencies like LlamaFactory.
6
- """
7
-
8
- import os
9
- import logging
10
- from typing import Optional, Dict, Any, Union, List
11
- from pathlib import Path
12
- import datetime
13
-
14
- from .core import (
15
- TrainingConfig,
16
- LoRAConfig,
17
- DatasetConfig,
18
- BaseTrainer,
19
- SFTTrainer,
20
- TrainingUtils,
21
- DatasetManager,
22
- )
23
- from .cloud import TrainingJobOrchestrator
24
-
25
- logger = logging.getLogger(__name__)
26
-
27
-
28
- class TrainingFactory:
29
- """
30
- Unified Training Factory for ISA Model SDK
31
-
32
- Provides a clean interface for:
33
- - Local training with SFT (Supervised Fine-Tuning)
34
- - Cloud training on RunPod
35
- - Model evaluation and management
36
-
37
- Example usage:
38
- ```python
39
- from isa_model.training import TrainingFactory
40
-
41
- factory = TrainingFactory()
42
-
43
- # Local training
44
- model_path = factory.train_model(
45
- model_name="google/gemma-2-4b-it",
46
- dataset_path="tatsu-lab/alpaca",
47
- use_lora=True,
48
- num_epochs=3
49
- )
50
-
51
- # Cloud training on RunPod
52
- result = factory.train_on_runpod(
53
- model_name="google/gemma-2-4b-it",
54
- dataset_path="tatsu-lab/alpaca",
55
- runpod_api_key="your-api-key",
56
- template_id="your-template-id"
57
- )
58
- ```
59
- """
60
-
61
- def __init__(self, base_output_dir: Optional[str] = None):
62
- """
63
- Initialize the training factory.
64
-
65
- Args:
66
- base_output_dir: Base directory for training outputs
67
- """
68
- self.base_output_dir = base_output_dir or os.path.join(os.getcwd(), "training_outputs")
69
- os.makedirs(self.base_output_dir, exist_ok=True)
70
-
71
- logger.info(f"TrainingFactory initialized with output dir: {self.base_output_dir}")
72
-
73
- def train_model(
74
- self,
75
- model_name: str,
76
- dataset_path: str,
77
- output_dir: Optional[str] = None,
78
- training_type: str = "sft",
79
- dataset_format: str = "alpaca",
80
- use_lora: bool = True,
81
- batch_size: int = 4,
82
- num_epochs: int = 3,
83
- learning_rate: float = 2e-5,
84
- max_length: int = 1024,
85
- lora_rank: int = 8,
86
- lora_alpha: int = 16,
87
- validation_split: float = 0.1,
88
- **kwargs
89
- ) -> str:
90
- """
91
- Train a model locally.
92
-
93
- Args:
94
- model_name: Model identifier (e.g., "google/gemma-2-4b-it")
95
- dataset_path: Path to dataset or HuggingFace dataset name
96
- output_dir: Custom output directory
97
- training_type: Type of training ("sft" supported)
98
- dataset_format: Dataset format ("alpaca", "sharegpt", "custom")
99
- use_lora: Whether to use LoRA for efficient training
100
- batch_size: Training batch size
101
- num_epochs: Number of training epochs
102
- learning_rate: Learning rate
103
- max_length: Maximum sequence length
104
- lora_rank: LoRA rank parameter
105
- lora_alpha: LoRA alpha parameter
106
- validation_split: Fraction of data for validation
107
- **kwargs: Additional training parameters
108
-
109
- Returns:
110
- Path to the trained model
111
-
112
- Example:
113
- ```python
114
- model_path = factory.train_model(
115
- model_name="google/gemma-2-4b-it",
116
- dataset_path="tatsu-lab/alpaca",
117
- use_lora=True,
118
- num_epochs=3,
119
- batch_size=4
120
- )
121
- ```
122
- """
123
- # Generate output directory if not provided
124
- if not output_dir:
125
- output_dir = TrainingUtils.generate_output_dir(
126
- model_name, training_type, self.base_output_dir
127
- )
128
-
129
- # Create configurations
130
- lora_config = LoRAConfig(
131
- use_lora=use_lora,
132
- lora_rank=lora_rank,
133
- lora_alpha=lora_alpha
134
- ) if use_lora else None
135
-
136
- dataset_config = DatasetConfig(
137
- dataset_path=dataset_path,
138
- dataset_format=dataset_format,
139
- max_length=max_length,
140
- validation_split=validation_split
141
- )
142
-
143
- training_config = TrainingConfig(
144
- model_name=model_name,
145
- output_dir=output_dir,
146
- training_type=training_type,
147
- num_epochs=num_epochs,
148
- batch_size=batch_size,
149
- learning_rate=learning_rate,
150
- lora_config=lora_config,
151
- dataset_config=dataset_config,
152
- **kwargs
153
- )
154
-
155
- # Print training summary
156
- model_info = TrainingUtils.get_model_info(model_name)
157
- memory_estimate = TrainingUtils.estimate_memory_usage(
158
- model_name, batch_size, max_length, use_lora
159
- )
160
-
161
- summary = TrainingUtils.format_training_summary(
162
- training_config.to_dict(), model_info, memory_estimate
163
- )
164
- print(summary)
165
-
166
- # Validate configuration
167
- issues = TrainingUtils.validate_training_config(training_config.to_dict())
168
- if issues:
169
- raise ValueError(f"Training configuration issues: {issues}")
170
-
171
- # Initialize trainer based on training type
172
- if training_type.lower() == "sft":
173
- trainer = SFTTrainer(training_config)
174
- else:
175
- raise ValueError(f"Training type '{training_type}' not supported yet")
176
-
177
- # Execute training
178
- logger.info(f"Starting {training_type.upper()} training...")
179
- result_path = trainer.train()
180
-
181
- logger.info(f"Training completed! Model saved to: {result_path}")
182
- return result_path
183
-
184
- def train_on_runpod(
185
- self,
186
- model_name: str,
187
- dataset_path: str,
188
- runpod_api_key: str,
189
- template_id: str,
190
- gpu_type: str = "NVIDIA RTX A6000",
191
- storage_config: Optional[Dict[str, Any]] = None,
192
- job_name: Optional[str] = None,
193
- **training_params
194
- ) -> Dict[str, Any]:
195
- """
196
- Train a model on RunPod cloud infrastructure.
197
-
198
- Args:
199
- model_name: Model identifier
200
- dataset_path: Dataset path or HuggingFace dataset name
201
- runpod_api_key: RunPod API key
202
- template_id: RunPod template ID
203
- gpu_type: GPU type to use
204
- storage_config: Optional cloud storage configuration
205
- job_name: Optional job name
206
- **training_params: Additional training parameters
207
-
208
- Returns:
209
- Training job results
210
-
211
- Example:
212
- ```python
213
- result = factory.train_on_runpod(
214
- model_name="google/gemma-2-4b-it",
215
- dataset_path="tatsu-lab/alpaca",
216
- runpod_api_key="your-api-key",
217
- template_id="your-template-id",
218
- use_lora=True,
219
- num_epochs=3
220
- )
221
- ```
222
- """
223
- # Import cloud components
224
- from .cloud import TrainingJobOrchestrator
225
- from .cloud.runpod_trainer import RunPodConfig
226
- from .cloud.storage_manager import StorageConfig
227
- from .cloud.job_orchestrator import JobConfig
228
-
229
- # Create RunPod configuration
230
- runpod_config = RunPodConfig(
231
- api_key=runpod_api_key,
232
- template_id=template_id,
233
- gpu_type=gpu_type
234
- )
235
-
236
- # Create storage configuration if provided
237
- storage_cfg = None
238
- if storage_config:
239
- storage_cfg = StorageConfig(**storage_config)
240
-
241
- # Create job configuration
242
- job_config = JobConfig(
243
- model_name=model_name,
244
- dataset_source=dataset_path,
245
- job_name=job_name or f"gemma-training-{int(datetime.datetime.now().timestamp())}",
246
- **training_params
247
- )
248
-
249
- # Initialize orchestrator and execute training
250
- orchestrator = TrainingJobOrchestrator(
251
- runpod_config=runpod_config,
252
- storage_config=storage_cfg
253
- )
254
-
255
- logger.info(f"Starting RunPod training for {model_name}")
256
- result = orchestrator.execute_training_workflow(job_config)
257
-
258
- return result
259
-
260
- async def upload_to_huggingface(
261
- self,
262
- model_path: str,
263
- hf_model_name: str,
264
- hf_token: Optional[str] = None,
265
- metadata: Optional[Dict[str, Any]] = None
266
- ) -> str:
267
- """
268
- Upload a trained model to HuggingFace Hub using HuggingFaceStorage.
269
-
270
- Args:
271
- model_path: Path to the trained model
272
- hf_model_name: Name for the model on HuggingFace Hub
273
- hf_token: HuggingFace token
274
- metadata: Additional metadata for the model
275
-
276
- Returns:
277
- URL of the uploaded model
278
- """
279
- try:
280
- from ..core.storage.hf_storage import HuggingFaceStorage
281
-
282
- logger.info(f"Uploading model to HuggingFace: {hf_model_name}")
283
-
284
- # Initialize HuggingFace storage
285
- storage = HuggingFaceStorage(
286
- username="xenobordom",
287
- token=hf_token
288
- )
289
-
290
- # Prepare metadata
291
- upload_metadata = metadata or {}
292
- upload_metadata.update({
293
- "description": f"Fine-tuned model: {hf_model_name}",
294
- "training_framework": "ISA Model SDK",
295
- "uploaded_from": "training_factory"
296
- })
297
-
298
- # Upload model
299
- success = await storage.save_model(
300
- model_id=hf_model_name,
301
- model_path=model_path,
302
- metadata=upload_metadata
303
- )
304
-
305
- if success:
306
- model_url = storage.get_public_url(hf_model_name)
307
- logger.info(f"Model uploaded successfully: {model_url}")
308
- return model_url
309
- else:
310
- raise Exception("Failed to upload model")
311
-
312
- except Exception as e:
313
- logger.error(f"Failed to upload to HuggingFace: {e}")
314
- raise
315
-
316
- def get_training_status(self, output_dir: str) -> Dict[str, Any]:
317
- """
318
- Get training status from output directory.
319
-
320
- Args:
321
- output_dir: Training output directory
322
-
323
- Returns:
324
- Dictionary with training status information
325
- """
326
- status = {
327
- "output_dir": output_dir,
328
- "exists": os.path.exists(output_dir),
329
- "files": []
330
- }
331
-
332
- if status["exists"]:
333
- status["files"] = os.listdir(output_dir)
334
-
335
- # Check for specific files
336
- config_path = os.path.join(output_dir, "training_config.json")
337
- metrics_path = os.path.join(output_dir, "training_metrics.json")
338
- model_path = os.path.join(output_dir, "pytorch_model.bin")
339
-
340
- status["has_config"] = os.path.exists(config_path)
341
- status["has_metrics"] = os.path.exists(metrics_path)
342
- status["has_model"] = os.path.exists(model_path) or os.path.exists(os.path.join(output_dir, "adapter_model.bin"))
343
-
344
- if status["has_config"]:
345
- try:
346
- status["config"] = TrainingUtils.load_training_args(output_dir)
347
- except:
348
- pass
349
-
350
- return status
351
-
352
- def list_trained_models(self) -> List[Dict[str, Any]]:
353
- """
354
- List all trained models in the output directory.
355
-
356
- Returns:
357
- List of model information dictionaries
358
- """
359
- models = []
360
-
361
- if os.path.exists(self.base_output_dir):
362
- for item in os.listdir(self.base_output_dir):
363
- item_path = os.path.join(self.base_output_dir, item)
364
- if os.path.isdir(item_path):
365
- status = self.get_training_status(item_path)
366
- models.append({
367
- "name": item,
368
- "path": item_path,
369
- "created": datetime.datetime.fromtimestamp(
370
- os.path.getctime(item_path)
371
- ).isoformat(),
372
- "status": status
373
- })
374
-
375
- return sorted(models, key=lambda x: x["created"], reverse=True)
376
-
377
-
378
- # Convenience functions for quick access
379
- def train_gemma(
380
- dataset_path: str,
381
- model_size: str = "4b",
382
- output_dir: Optional[str] = None,
383
- **kwargs
384
- ) -> str:
385
- """
386
- Quick function to train Gemma models.
387
-
388
- Args:
389
- dataset_path: Path to training dataset
390
- model_size: Model size ("2b", "4b", "7b")
391
- output_dir: Output directory
392
- **kwargs: Additional training parameters
393
-
394
- Returns:
395
- Path to trained model
396
-
397
- Example:
398
- ```python
399
- from isa_model.training import train_gemma
400
-
401
- model_path = train_gemma(
402
- dataset_path="tatsu-lab/alpaca",
403
- model_size="4b",
404
- num_epochs=3,
405
- batch_size=4
406
- )
407
- ```
408
- """
409
- factory = TrainingFactory()
410
-
411
- model_map = {
412
- "2b": "google/gemma-2-2b-it",
413
- "4b": "google/gemma-2-4b-it",
414
- "7b": "google/gemma-2-7b-it"
415
- }
416
-
417
- model_name = model_map.get(model_size, "google/gemma-2-4b-it")
418
-
419
- return factory.train_model(
420
- model_name=model_name,
421
- dataset_path=dataset_path,
422
- output_dir=output_dir,
423
- **kwargs
424
- )
@@ -1,25 +0,0 @@
1
- """
2
- Intelligent Training Service Components
3
-
4
- This module provides AI-powered training optimization and automation:
5
- - Intelligent decision engine for configuration recommendations
6
- - Task classification and model selection
7
- - Resource optimization and cost estimation
8
- - Natural language interface for training requests
9
- """
10
-
11
- from .decision_engine import IntelligentDecisionEngine, TrainingRequest, TrainingRecommendation
12
- from .task_classifier import TaskClassifier
13
- from .knowledge_base import KnowledgeBase
14
- from .resource_optimizer import ResourceOptimizer
15
- from .intelligent_factory import IntelligentTrainingFactory
16
-
17
- __all__ = [
18
- 'IntelligentDecisionEngine',
19
- 'TaskClassifier',
20
- 'KnowledgeBase',
21
- 'ResourceOptimizer',
22
- 'IntelligentTrainingFactory',
23
- 'TrainingRequest',
24
- 'TrainingRecommendation'
25
- ]