PyPI - gptmed - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

gptmed 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

gptmed/api.py +49 -96
gptmed/configs/config_loader.py +10 -1
gptmed/services/__init__.py +15 -0
gptmed/services/device_manager.py +252 -0
gptmed/services/training_service.py +335 -0
{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/METADATA +13 -141
{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/RECORD +11 -8
{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/WHEEL +0 -0
{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/entry_points.txt +0 -0
{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/licenses/LICENSE +0 -0
{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/top_level.txt +0 -0

gptmed/api.py CHANGED Viewed

@@ -39,6 +39,8 @@ from gptmed.configs.train_config import TrainingConfig
 from gptmed.training.dataset import create_dataloaders
 from gptmed.training.trainer import Trainer
 from gptmed.inference.generator import TextGenerator
+from gptmed.services.device_manager import DeviceManager
+from gptmed.services.training_service import TrainingService
 def create_config(output_path: str = 'training_config.yaml') -> None:
@@ -58,7 +60,11 @@ def create_config(output_path: str = 'training_config.yaml') -> None:
     create_default_config_file(output_path)
-def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
+def train_from_config(
+    config_path: str,
+    verbose: bool = True,
+    device: Optional[str] = None
+) -> Dict[str, Any]:
     """
     Train a GPT model using a YAML configuration file.
@@ -68,6 +74,8 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
     Args:
         config_path: Path to YAML configuration file
         verbose: Whether to print training progress (default: True)
+        device: Device to use ('cuda', 'cpu', or 'auto'). If None, uses config value.
+                'auto' will select best available device.
     Returns:
         Dictionary with training results:
@@ -82,13 +90,16 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
         >>> gptmed.create_config('config.yaml')
         >>> # ... edit config.yaml ...
         >>>
-        >>> # Train the model
-        >>> results = gptmed.train_from_config('config.yaml')
+        >>> # Train the model on CPU
+        >>> results = gptmed.train_from_config('config.yaml', device='cpu')
         >>> print(f"Best model: {results['best_checkpoint']}")
+        >>>
+        >>> # Train with auto device selection
+        >>> results = gptmed.train_from_config('config.yaml', device='auto')
     Raises:
         FileNotFoundError: If config file or data files don't exist
-        ValueError: If configuration is invalid
+        ValueError: If configuration is invalid or device is invalid
     """
     if verbose:
         print("=" * 60)
@@ -111,47 +122,42 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
     # Convert to arguments
     args = config_to_args(config)
-    # Import here to avoid circular imports
-    import random
-    import numpy as np
+    # Override device if provided as parameter
+    if device is not None:
+        # Validate and normalize device
+        device = DeviceManager.validate_device(device)
+        if verbose:
+            print(f"\n⚙️  Device override: {device} (from parameter)")
+        args['device'] = device
-    # Set random seed
-    def set_seed(seed: int):
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-        if torch.cuda.is_available():
-            torch.cuda.manual_seed(seed)
-            torch.cuda.manual_seed_all(seed)
-        torch.backends.cudnn.deterministic = True
-        torch.backends.cudnn.benchmark = False
+    # Create DeviceManager with the selected device
+    device_manager = DeviceManager(
+        preferred_device=args['device'],
+        allow_fallback=True
+    )
+    # Print device information
+    device_manager.print_device_info(verbose=verbose)
+    # Create TrainingService with DeviceManager
+    training_service = TrainingService(
+        device_manager=device_manager,
+        verbose=verbose
+    )
+    # Set random seed
     if verbose:
         print(f"\n🎲 Setting random seed: {args['seed']}")
-    set_seed(args['seed'])
+    training_service.set_seed(args['seed'])
-    # Check device
-    device = args['device']
-    if device == 'cuda' and not torch.cuda.is_available():
-        if verbose:
-            print("⚠️  CUDA not available, using CPU")
-        device = 'cpu'
+    # Get actual device to use
+    actual_device = device_manager.get_device()
     # Load model config
     if verbose:
         print(f"\n🧠 Creating model: {args['model_size']}")
-    if args['model_size'] == 'tiny':
-        model_config = get_tiny_config()
-    elif args['model_size'] == 'small':
-        model_config = get_small_config()
-    elif args['model_size'] == 'medium':
-        model_config = get_medium_config()
-    else:
-        raise ValueError(f"Unknown model size: {args['model_size']}")
-    # Create model
-    model = GPTTransformer(model_config)
+    model = training_service.create_model(args['model_size'])
     total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
     if verbose:
@@ -159,7 +165,7 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
         print(f"  Parameters: {total_params:,}")
         print(f"  Memory: ~{total_params * 4 / 1024 / 1024:.2f} MB")
-    # Load data
+    # Load data using TrainingService
     if verbose:
         print(f"\n📊 Loading data...")
         print(f"  Train: {args['train_data']}")
@@ -176,7 +182,7 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
         print(f"  Train batches: {len(train_loader)}")
         print(f"  Val batches: {len(val_loader)}")
-    # Create training config
+    # Create training config with actual device
     train_config = TrainingConfig(
         batch_size=args['batch_size'],
         learning_rate=args['learning_rate'],
@@ -195,7 +201,7 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
         val_data_path=args['val_data'],
         checkpoint_dir=args['checkpoint_dir'],
         log_dir=args['log_dir'],
-        device=device,
+        device=actual_device,  # Use actual device from DeviceManager
         seed=args['seed'],
     )
@@ -213,70 +219,17 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
         weight_decay=args['weight_decay'],
     )
-    # Create trainer
-    if verbose:
-        print(f"\n🎯 Initializing trainer...")
-    trainer = Trainer(
+    # Execute training using TrainingService
+    results = training_service.execute_training(
         model=model,
         train_loader=train_loader,
         val_loader=val_loader,
         optimizer=optimizer,
-        config=train_config,
-        device=device,
+        train_config=train_config,
+        device=actual_device,
+        model_config_dict=model.config.to_dict()
     )
-    # Resume if requested
-    if args['resume_from'] is not None:
-        if verbose:
-            print(f"\n📥 Resuming from checkpoint: {args['resume_from']}")
-        trainer.resume_from_checkpoint(Path(args['resume_from']))
-    # Start training
-    if verbose:
-        print(f"\n{'='*60}")
-        print("🚀 Starting Training!")
-        print(f"{'='*60}\n")
-    try:
-        trainer.train()
-    except KeyboardInterrupt:
-        if verbose:
-            print("\n\n⏸️  Training interrupted by user")
-            print("💾 Saving checkpoint...")
-        trainer.checkpoint_manager.save_checkpoint(
-            model=model,
-            optimizer=optimizer,
-            step=trainer.global_step,
-            epoch=trainer.current_epoch,
-            val_loss=trainer.best_val_loss,
-            model_config=model_config.to_dict(),
-            train_config=train_config.to_dict(),
-        )
-        if verbose:
-            print("✓ Checkpoint saved. Resume with resume_from in config.")
-    # Return results
-    best_checkpoint = Path(train_config.checkpoint_dir) / "best_model.pt"
-    results = {
-        'best_checkpoint': str(best_checkpoint),
-        'final_val_loss': trainer.best_val_loss,
-        'total_epochs': trainer.current_epoch,
-        'checkpoint_dir': train_config.checkpoint_dir,
-        'log_dir': train_config.log_dir,
-    }
-    if verbose:
-        print(f"\n{'='*60}")
-        print("✅ Training Complete!")
-        print(f"{'='*60}")
-        print(f"\n📁 Results:")
-        print(f"  Best checkpoint: {results['best_checkpoint']}")
-        print(f"  Best val loss: {results['final_val_loss']:.4f}")
-        print(f"  Total epochs: {results['total_epochs']}")
-        print(f"  Logs: {results['log_dir']}")
     return results

gptmed/configs/config_loader.py CHANGED Viewed

@@ -76,6 +76,15 @@ def validate_config(config: Dict[str, Any]) -> None:
         raise ValueError("batch_size must be positive")
     if config['training']['learning_rate'] <= 0:
         raise ValueError("learning_rate must be positive")
+    # Validate device
+    valid_devices = ['cuda', 'cpu', 'auto']
+    device_value = config.get('device', {}).get('device', 'cuda').lower()
+    if device_value not in valid_devices:
+        raise ValueError(
+            f"Invalid device: {device_value}. "
+            f"Must be one of {valid_devices}"
+        )
 def config_to_args(config: Dict[str, Any]) -> Dict[str, Any]:
@@ -169,7 +178,7 @@ def create_default_config_file(output_path: str = 'training_config.yaml') -> Non
             'log_interval': 10
         },
         'device': {
-            'device': 'cuda',
+            'device': 'cuda',  # Options: 'cuda', 'cpu', or 'auto'
             'seed': 42
         },
         'advanced': {

gptmed/services/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""
+Services Layer
+Business logic services following SOLID principles.
+This layer implements the service pattern to encapsulate complex operations.
+"""
+from gptmed.services.device_manager import DeviceManager, DeviceStrategy
+from gptmed.services.training_service import TrainingService
+__all__ = [
+    'DeviceManager',
+    'DeviceStrategy',
+    'TrainingService',
+]

gptmed/services/device_manager.py ADDED Viewed

@@ -0,0 +1,252 @@
+"""
+Device Manager Service
+PURPOSE:
+Manages device selection and configuration for model training and inference.
+Implements Strategy Pattern for flexible device handling.
+DESIGN PATTERNS:
+- Strategy Pattern: Different strategies for CPU vs GPU
+- Dependency Injection: DeviceManager can be injected into services
+- Single Responsibility: Only handles device-related concerns
+WHAT THIS FILE DOES:
+1. Validates device availability (CUDA check)
+2. Provides device selection logic with fallback
+3. Manages device-specific configurations
+4. Ensures consistent device handling across the codebase
+PACKAGES USED:
+- torch: Device detection and management
+- abc: Abstract base classes for strategy pattern
+"""
+from abc import ABC, abstractmethod
+from typing import Optional
+import torch
+class DeviceStrategy(ABC):
+    """
+    Abstract base class for device strategies.
+    Implements Strategy Pattern for different device types.
+    """
+    @abstractmethod
+    def get_device(self) -> str:
+        """
+        Get the device string for PyTorch.
+        Returns:
+            Device string ('cuda' or 'cpu')
+        """
+        pass
+    @abstractmethod
+    def is_available(self) -> bool:
+        """
+        Check if the device is available.
+        Returns:
+            True if device is available, False otherwise
+        """
+        pass
+    @abstractmethod
+    def get_device_info(self) -> dict:
+        """
+        Get information about the device.
+        Returns:
+            Dictionary with device information
+        """
+        pass
+class CUDAStrategy(DeviceStrategy):
+    """Strategy for CUDA/GPU devices."""
+    def get_device(self) -> str:
+        """Get CUDA device if available."""
+        return 'cuda' if self.is_available() else 'cpu'
+    def is_available(self) -> bool:
+        """Check if CUDA is available."""
+        return torch.cuda.is_available()
+    def get_device_info(self) -> dict:
+        """Get CUDA device information."""
+        if not self.is_available():
+            return {
+                'device': 'cuda',
+                'available': False,
+                'message': 'CUDA not available'
+            }
+        return {
+            'device': 'cuda',
+            'available': True,
+            'device_name': torch.cuda.get_device_name(0),
+            'device_count': torch.cuda.device_count(),
+            'cuda_version': torch.version.cuda if torch.version.cuda else 'N/A',
+        }
+class CPUStrategy(DeviceStrategy):
+    """Strategy for CPU devices."""
+    def get_device(self) -> str:
+        """Always return CPU."""
+        return 'cpu'
+    def is_available(self) -> bool:
+        """CPU is always available."""
+        return True
+    def get_device_info(self) -> dict:
+        """Get CPU device information."""
+        return {
+            'device': 'cpu',
+            'available': True,
+            'num_threads': torch.get_num_threads(),
+        }
+class DeviceManager:
+    """
+    Manages device selection and configuration.
+    Follows Single Responsibility Principle - only handles device concerns.
+    Uses Strategy Pattern for different device types.
+    Example:
+        >>> device_manager = DeviceManager(preferred_device='cuda')
+        >>> device = device_manager.get_device()
+        >>> print(f"Using device: {device}")
+    """
+    def __init__(self, preferred_device: str = 'cuda', allow_fallback: bool = True):
+        """
+        Initialize DeviceManager.
+        Args:
+            preferred_device: Preferred device ('cuda' or 'cpu')
+            allow_fallback: If True, fallback to CPU if CUDA unavailable
+        """
+        self.preferred_device = preferred_device.lower()
+        self.allow_fallback = allow_fallback
+        # Validate device input
+        if self.preferred_device not in ['cuda', 'cpu']:
+            raise ValueError(
+                f"Invalid device: {preferred_device}. Must be 'cuda' or 'cpu'"
+            )
+        # Select strategy based on preferred device
+        if self.preferred_device == 'cuda':
+            self.strategy = CUDAStrategy()
+        else:
+            self.strategy = CPUStrategy()
+    def get_device(self) -> str:
+        """
+        Get the actual device to use.
+        Returns fallback device if preferred is unavailable and fallback is allowed.
+        Returns:
+            Device string ('cuda' or 'cpu')
+        Raises:
+            RuntimeError: If preferred device unavailable and fallback disabled
+        """
+        if self.strategy.is_available():
+            return self.strategy.get_device()
+        # Handle unavailable device
+        if self.allow_fallback and self.preferred_device == 'cuda':
+            # Fallback to CPU
+            return 'cpu'
+        else:
+            raise RuntimeError(
+                f"Device '{self.preferred_device}' is not available and "
+                f"fallback is {'disabled' if not self.allow_fallback else 'not applicable'}"
+            )
+    def get_device_info(self) -> dict:
+        """
+        Get information about the current device.
+        Returns:
+            Dictionary with device information
+        """
+        info = self.strategy.get_device_info()
+        info['preferred_device'] = self.preferred_device
+        info['actual_device'] = self.get_device()
+        info['allow_fallback'] = self.allow_fallback
+        return info
+    def print_device_info(self, verbose: bool = True) -> None:
+        """
+        Print device information.
+        Args:
+            verbose: If True, print detailed information
+        """
+        if not verbose:
+            return
+        info = self.get_device_info()
+        actual = info['actual_device']
+        preferred = info['preferred_device']
+        print(f"\n💻 Device Configuration:")
+        print(f"  Preferred: {preferred}")
+        print(f"  Using: {actual}")
+        if preferred != actual:
+            print(f"  ⚠️  Fallback to CPU (CUDA not available)")
+        if actual == 'cuda' and info.get('available'):
+            print(f"  GPU: {info.get('device_name', 'Unknown')}")
+            print(f"  CUDA Version: {info.get('cuda_version', 'N/A')}")
+            print(f"  GPU Count: {info.get('device_count', 0)}")
+        elif actual == 'cpu':
+            print(f"  CPU Threads: {info.get('num_threads', 'N/A')}")
+    @staticmethod
+    def validate_device(device: str) -> str:
+        """
+        Validate and normalize device string.
+        Args:
+            device: Device string to validate
+        Returns:
+            Normalized device string
+        Raises:
+            ValueError: If device is invalid
+        """
+        device = device.lower().strip()
+        if device not in ['cuda', 'cpu', 'auto']:
+            raise ValueError(
+                f"Invalid device: '{device}'. Must be 'cuda', 'cpu', or 'auto'"
+            )
+        # Auto-select best available device
+        if device == 'auto':
+            return 'cuda' if torch.cuda.is_available() else 'cpu'
+        return device
+    @staticmethod
+    def get_optimal_device() -> str:
+        """
+        Get the optimal device for the current environment.
+        Returns:
+            'cuda' if available, otherwise 'cpu'
+        """
+        return 'cuda' if torch.cuda.is_available() else 'cpu'

gptmed/services/training_service.py ADDED Viewed

@@ -0,0 +1,335 @@
+"""
+Training Service
+PURPOSE:
+Encapsulates training logic following Service Layer Pattern.
+Provides a high-level interface for model training with device flexibility.
+DESIGN PATTERNS:
+- Service Layer Pattern: Business logic separated from API layer
+- Dependency Injection: DeviceManager injected for flexibility
+- Single Responsibility: Only handles training orchestration
+- Open/Closed Principle: Extensible without modification
+WHAT THIS FILE DOES:
+1. Orchestrates the training process
+2. Manages device configuration via DeviceManager
+3. Coordinates model, data, optimizer, and trainer
+4. Provides clean interface for training operations
+PACKAGES USED:
+- torch: PyTorch training
+- pathlib: Path handling
+"""
+import torch
+import random
+import numpy as np
+from pathlib import Path
+from typing import Dict, Any, Optional
+from gptmed.services.device_manager import DeviceManager
+from gptmed.model.architecture import GPTTransformer
+from gptmed.model.configs.model_config import get_tiny_config, get_small_config, get_medium_config
+from gptmed.configs.train_config import TrainingConfig
+from gptmed.training.dataset import create_dataloaders
+from gptmed.training.trainer import Trainer
+class TrainingService:
+    """
+    High-level service for model training.
+    Implements Service Layer Pattern to encapsulate training logic.
+    Uses Dependency Injection for DeviceManager.
+    Example:
+        >>> device_manager = DeviceManager(preferred_device='cpu')
+        >>> service = TrainingService(device_manager=device_manager)
+        >>> results = service.train_from_config('config.yaml', verbose=True)
+    """
+    def __init__(
+        self,
+        device_manager: Optional[DeviceManager] = None,
+        verbose: bool = True
+    ):
+        """
+        Initialize TrainingService.
+        Args:
+            device_manager: DeviceManager instance (if None, creates default)
+            verbose: Whether to print training information
+        """
+        self.device_manager = device_manager or DeviceManager(preferred_device='cuda')
+        self.verbose = verbose
+    def set_seed(self, seed: int) -> None:
+        """
+        Set random seeds for reproducibility.
+        Args:
+            seed: Random seed value
+        """
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed(seed)
+            torch.cuda.manual_seed_all(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+    def create_model(self, model_size: str) -> GPTTransformer:
+        """
+        Create model based on size specification.
+        Args:
+            model_size: Model size ('tiny', 'small', or 'medium')
+        Returns:
+            GPTTransformer model instance
+        Raises:
+            ValueError: If model_size is invalid
+        """
+        if model_size == 'tiny':
+            model_config = get_tiny_config()
+        elif model_size == 'small':
+            model_config = get_small_config()
+        elif model_size == 'medium':
+            model_config = get_medium_config()
+        else:
+            raise ValueError(f"Unknown model size: {model_size}")
+        return GPTTransformer(model_config)
+    def prepare_training(
+        self,
+        model: GPTTransformer,
+        train_config: TrainingConfig,
+        device: str
+    ) -> tuple:
+        """
+        Prepare components for training.
+        Args:
+            model: Model to train
+            train_config: Training configuration
+            device: Device to use
+        Returns:
+            Tuple of (train_loader, val_loader, optimizer)
+        """
+        # Load data
+        if self.verbose:
+            print(f"\n📊 Loading data...")
+            print(f"  Train: {train_config.train_data_path}")
+            print(f"  Val: {train_config.val_data_path}")
+        train_loader, val_loader = create_dataloaders(
+            train_path=Path(train_config.train_data_path),
+            val_path=Path(train_config.val_data_path),
+            batch_size=train_config.batch_size,
+            num_workers=0,
+        )
+        if self.verbose:
+            print(f"  Train batches: {len(train_loader)}")
+            print(f"  Val batches: {len(val_loader)}")
+        # Create optimizer
+        if self.verbose:
+            print(f"\n⚙️  Setting up optimizer...")
+            print(f"  Learning rate: {train_config.learning_rate}")
+            print(f"  Weight decay: {train_config.weight_decay}")
+        optimizer = torch.optim.AdamW(
+            model.parameters(),
+            lr=train_config.learning_rate,
+            betas=train_config.betas,
+            eps=train_config.eps,
+            weight_decay=train_config.weight_decay,
+        )
+        return train_loader, val_loader, optimizer
+    def execute_training(
+        self,
+        model: GPTTransformer,
+        train_loader,
+        val_loader,
+        optimizer,
+        train_config: TrainingConfig,
+        device: str,
+        model_config_dict: dict
+    ) -> Dict[str, Any]:
+        """
+        Execute the training process.
+        Args:
+            model: Model to train
+            train_loader: Training data loader
+            val_loader: Validation data loader
+            optimizer: Optimizer
+            train_config: Training configuration
+            device: Device to use
+            model_config_dict: Model configuration as dictionary
+        Returns:
+            Dictionary with training results
+        """
+        # Create trainer
+        if self.verbose:
+            print(f"\n🎯 Initializing trainer...")
+        trainer = Trainer(
+            model=model,
+            train_loader=train_loader,
+            val_loader=val_loader,
+            optimizer=optimizer,
+            config=train_config,
+            device=device,
+        )
+        # Resume if requested
+        if hasattr(train_config, 'resume_from') and train_config.resume_from is not None:
+            if self.verbose:
+                print(f"\n📥 Resuming from checkpoint: {train_config.resume_from}")
+            trainer.resume_from_checkpoint(Path(train_config.resume_from))
+        elif train_config.checkpoint_dir and hasattr(train_config, 'checkpoint_dir'):
+            # Check if there's a resume_from in the checkpoint dir
+            resume_path = Path(train_config.checkpoint_dir) / "resume_from.pt"
+            if resume_path.exists() and self.verbose:
+                print(f"\n📥 Found checkpoint to resume: {resume_path}")
+        # Start training
+        if self.verbose:
+            print(f"\n{'='*60}")
+            print("🚀 Starting Training!")
+            print(f"{'='*60}\n")
+        try:
+            trainer.train()
+        except KeyboardInterrupt:
+            if self.verbose:
+                print("\n\n⏸️  Training interrupted by user")
+                print("💾 Saving checkpoint...")
+            trainer.checkpoint_manager.save_checkpoint(
+                model=model,
+                optimizer=optimizer,
+                step=trainer.global_step,
+                epoch=trainer.current_epoch,
+                val_loss=trainer.best_val_loss,
+                model_config=model_config_dict,
+                train_config=train_config.to_dict(),
+            )
+            if self.verbose:
+                print("✓ Checkpoint saved. Resume with resume_from in config.")
+        # Return results
+        best_checkpoint = Path(train_config.checkpoint_dir) / "best_model.pt"
+        results = {
+            'best_checkpoint': str(best_checkpoint),
+            'final_val_loss': trainer.best_val_loss,
+            'total_epochs': trainer.current_epoch,
+            'checkpoint_dir': train_config.checkpoint_dir,
+            'log_dir': train_config.log_dir,
+        }
+        if self.verbose:
+            print(f"\n{'='*60}")
+            print("✅ Training Complete!")
+            print(f"{'='*60}")
+            print(f"\n📁 Results:")
+            print(f"  Best checkpoint: {results['best_checkpoint']}")
+            print(f"  Best val loss: {results['final_val_loss']:.4f}")
+            print(f"  Total epochs: {results['total_epochs']}")
+            print(f"  Logs: {results['log_dir']}")
+        return results
+    def train(
+        self,
+        model_size: str,
+        train_data_path: str,
+        val_data_path: str,
+        batch_size: int = 16,
+        learning_rate: float = 3e-4,
+        num_epochs: int = 10,
+        checkpoint_dir: str = "./model/checkpoints",
+        log_dir: str = "./logs",
+        seed: int = 42,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        High-level training interface.
+        Args:
+            model_size: Model size ('tiny', 'small', 'medium')
+            train_data_path: Path to training data
+            val_data_path: Path to validation data
+            batch_size: Training batch size
+            learning_rate: Learning rate
+            num_epochs: Number of training epochs
+            checkpoint_dir: Directory for checkpoints
+            log_dir: Directory for logs
+            seed: Random seed
+            **kwargs: Additional training config parameters
+        Returns:
+            Dictionary with training results
+        """
+        # Set seed
+        if self.verbose:
+            print(f"\n🎲 Setting random seed: {seed}")
+        self.set_seed(seed)
+        # Get device
+        device = self.device_manager.get_device()
+        self.device_manager.print_device_info(verbose=self.verbose)
+        # Create model
+        if self.verbose:
+            print(f"\n🧠 Creating model: {model_size}")
+        model = self.create_model(model_size)
+        total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+        if self.verbose:
+            print(f"  Model size: {model_size}")
+            print(f"  Parameters: {total_params:,}")
+            print(f"  Memory: ~{total_params * 4 / 1024 / 1024:.2f} MB")
+        # Create training config
+        train_config = TrainingConfig(
+            train_data_path=train_data_path,
+            val_data_path=val_data_path,
+            batch_size=batch_size,
+            learning_rate=learning_rate,
+            num_epochs=num_epochs,
+            checkpoint_dir=checkpoint_dir,
+            log_dir=log_dir,
+            device=device,
+            seed=seed,
+            **{k: v for k, v in kwargs.items() if hasattr(TrainingConfig, k)}
+        )
+        # Prepare training components
+        train_loader, val_loader, optimizer = self.prepare_training(
+            model, train_config, device
+        )
+        # Execute training
+        results = self.execute_training(
+            model=model,
+            train_loader=train_loader,
+            val_loader=val_loader,
+            optimizer=optimizer,
+            train_config=train_config,
+            device=device,
+            model_config_dict=model.config.to_dict()
+        )
+        return results

{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gptmed
-Version: 0.3.3
+Version: 0.3.5
 Summary: A lightweight GPT-based language model framework for training custom question-answering models on any domain
 Author-email: Sanjog Sigdel <sigdelsanjog@gmail.com>
 Maintainer-email: Sanjog Sigdel <sigdelsanjog@gmail.com>
@@ -61,27 +61,6 @@ A lightweight GPT-based language model framework for training custom question-an
 - 📦 **Lightweight**: Small model size suitable for edge deployment
 - 🛠️ **Complete Toolkit**: Includes tokenizer training, model training, and inference utilities
-## Table of Contents
-- [Features](#features)
-- [Installation](#installation)
-- [Quick Start](#quick-start)
-- [Package Structure](#package-structure)
-  - [Core Modules](#core-modules)
-  - [Model Components](#model-components)
-  - [Training Components](#training-components)
-  - [Inference Components](#inference-components)
-  - [Data Processing](#data-processing)
-  - [Utilities](#utilities)
-- [Model Architecture](#model-architecture)
-- [Configuration](#configuration)
-- [Documentation](#documentation)
-- [Performance](#performance)
-- [Examples](#examples)
-- [Contributing](#contributing)
-- [License](#license)
-- [Support](#support)
 ## Installation
 ### From PyPI (Recommended)
@@ -208,134 +187,27 @@ config = TrainingConfig(
 )
 ```
-## Package Structure
-### Core Modules
-The `gptmed` package contains the following main modules:
-```
-gptmed/
-├── model/                  # Model architecture and configurations
-├── inference/              # Text generation and sampling
-├── training/               # Training loops and datasets
-├── tokenizer/              # Tokenizer training and data processing
-├── data/                   # Data parsers and formatters
-├── configs/                # Training configurations
-└── utils/                  # Utilities (checkpoints, logging)
-```
-### Model Components
-**`gptmed.model.architecture`** - GPT Transformer Implementation
-- `GPTTransformer` - Main model class
-- `TransformerBlock` - Individual transformer layers
-- `MultiHeadAttention` - Attention mechanism
-- `FeedForward` - Feed-forward networks
-- `RoPEPositionalEncoding` - Rotary position embeddings
-**`gptmed.model.configs`** - Model Configurations
-- `get_tiny_config()` - ~2M parameters (testing)
-- `get_small_config()` - ~10M parameters (recommended)
-- `get_medium_config()` - ~50M parameters (high quality)
-- `ModelConfig` - Custom configuration class
-### Training Components
-**`gptmed.training`** - Training Pipeline
-- `train.py` - Main training script (CLI: `gptmed-train`)
-- `Trainer` - Training loop with checkpointing
-- `TokenizedDataset` - PyTorch dataset for tokenized data
-- `create_dataloaders()` - DataLoader creation utilities
-**`gptmed.configs`** - Training Configurations
-- `TrainingConfig` - Training hyperparameters
-- `get_default_config()` - Default training settings
-- `get_quick_test_config()` - Fast testing configuration
-### Inference Components
-**`gptmed.inference`** - Text Generation
-- `TextGenerator` - Main generation class
-- `generator.py` - CLI command (CLI: `gptmed-generate`)
-- `sampling.py` - Sampling strategies (top-k, top-p, temperature)
-- `decoding_utils.py` - Decoding utilities
-- `GenerationConfig` - Generation parameters
-### Data Processing
-**`gptmed.tokenizer`** - Tokenizer Training & Data Processing
-- `train_tokenizer.py` - Train SentencePiece tokenizer
-- `tokenize_data.py` - Convert text to token sequences
-- SentencePiece BPE tokenizer support
-**`gptmed.data.parsers`** - Data Parsing & Formatting
-- `MedQuADParser` - XML Q&A parser (example)
-- `CausalTextFormatter` - Format Q&A pairs for training
-- `FormatConfig` - Formatting configuration
-### Utilities
-**`gptmed.utils`** - Helper Functions
-- `checkpoints.py` - Model checkpoint management
-- `logging.py` - Training metrics logging
----
-## Detailed Project Structure
+## Project Structure
 ```
 gptmed/
 ├── model/
-│   ├── architecture/
-│   │   ├── gpt.py              # GPT transformer model
-│   │   ├── attention.py        # Multi-head attention
-│   │   ├── feedforward.py      # Feed-forward networks
-│   │   └── embeddings.py       # Token + positional embeddings
-│   └── configs/
-│       └── model_config.py     # Model size configurations
+│   ├── architecture/      # GPT transformer implementation
+│   └── configs/           # Model configurations
 ├── inference/
-│   ├── generator.py            # Text generation (CLI command)
-│   ├── sampling.py             # Sampling strategies
-│   ├── decoding_utils.py       # Decoding utilities
-│   └── generation_config.py    # Generation parameters
+│   ├── generator.py       # Text generation
+│   └── sampling.py        # Sampling strategies
 ├── training/
-│   ├── train.py                # Main training script (CLI command)
-│   ├── trainer.py              # Training loop
-│   ├── dataset.py              # PyTorch dataset
-│   └── utils.py                # Training utilities
+│   ├── train.py          # Training script
+│   ├── trainer.py        # Training loop
+│   └── dataset.py        # Data loading
 ├── tokenizer/
-│   ├── train_tokenizer.py      # Train SentencePiece tokenizer
-│   └── tokenize_data.py        # Tokenize text data
-├── data/
-│   └── parsers/
-│       ├── medquad_parser.py   # Example XML parser
-│       └── text_formatter.py   # Q&A text formatter
+│   └── train_tokenizer.py # SentencePiece tokenizer
 ├── configs/
-│   └── train_config.py         # Training configurations
+│   └── train_config.py   # Training configurations
 └── utils/
-    ├── checkpoints.py          # Model checkpointing
-    └── logging.py              # Training logging
-```
-### Command-Line Interface
-The package provides two main CLI commands:
-```bash
-# Train a model
-gptmed-train --model-size small --num-epochs 10 --batch-size 16
-# Generate text
-gptmed-generate --prompt "Your question?" --max-length 100
+    ├── checkpoints.py    # Model checkpointing
+    └── logging.py        # Training logging
 ```
 ## Requirements

{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 gptmed/__init__.py,sha256=mwzeW2Qc6j1z5f6HOvZ_BNOnFSncWEK2KEkdqq91yYY,1676
-gptmed/api.py,sha256=gUWooWsXDaGb1r22YnzS3w-sU-n-b4gB4-gh0fMsT4A,11109
+gptmed/api.py,sha256=k9a_1F2h__xgKnH2l0FaJqAqu-iTYt5tu_VfVO0UhrA,9806
 gptmed/configs/__init__.py,sha256=yRa-zgPQ-OCzu8fvCrfWMG-CjF3dru3PZzknzm0oUaQ,23
-gptmed/configs/config_loader.py,sha256=ZWdH63XOOu0T8seWBiJFZtzlyFmzHzKmMxon6ZgZHlg,6000
+gptmed/configs/config_loader.py,sha256=3GQ1iCNpdJ5yALWXA3SPPHRkaUO-117vdArEL6u7sK8,6354
 gptmed/configs/train_config.py,sha256=KqfNBh9hdTTd_6gEAlrClU8sVFSlVDmZJOrf3cPwFe8,4657
 gptmed/configs/training_config.yaml,sha256=EEZZa3kcsZr3g-_fKDPYZt4_NTpmS-3NvJrTYSWNc8g,2874
 gptmed/data/__init__.py,sha256=iAHeakB5pBAd7MkmarPPY0UKS9bTaO_winLZ23Y2O90,54
@@ -22,6 +22,9 @@ gptmed/model/architecture/feedforward.py,sha256=uJ5QOlWX0ritKDQLUE7GPmMojelR9-sT
 gptmed/model/architecture/transformer.py,sha256=H1njPoy0Uam59JbA24C0olEDwPfhh3ev4HsUFRIC_0Y,6626
 gptmed/model/configs/__init__.py,sha256=LDCWhlCDOU7490wcfSId_jXBPfQrtYQEw8FoD67rqBs,275
 gptmed/model/configs/model_config.py,sha256=wI-i2Dw_pTdIKCDe1pqLvP3ky3YedEy7DwZYN5lwmKE,4673
+gptmed/services/__init__.py,sha256=FtM7NQ_S4VOfl2n6A6cLcOxG9-w7BK7DicQsUvOMmGE,369
+gptmed/services/device_manager.py,sha256=RSsu0RlsexCIO-p4eejOZAPLgpaVA0y9niTg8wf1luY,7513
+gptmed/services/training_service.py,sha256=o9Kxxoi6CVDvvM9rwGYNX426qTnmqxLXLt_bVi1ZSK4,11253
 gptmed/tokenizer/__init__.py,sha256=KhLAHPmQyoWhnKDenyIJRxgFflKI7xklip28j4cKfKw,157
 gptmed/tokenizer/tokenize_data.py,sha256=KgMtMfaz_RtOhN_CrvC267k9ujxRdO89rToVJ6nzdwg,9139
 gptmed/tokenizer/train_tokenizer.py,sha256=f0Hucyft9e8LU2RtpTqg8h_0SpOC_oMABl0_me-wfL8,7068
@@ -33,9 +36,9 @@ gptmed/training/utils.py,sha256=pJxCwneNr2STITIYwIDCxRzIICDFOxOMzK8DT7ck2oQ,5651
 gptmed/utils/__init__.py,sha256=XuMhIqOXF7mjnog_6Iky-hSbwvFb0iK42B4iDUpgi0U,44
 gptmed/utils/checkpoints.py,sha256=L4q1-_4GbHCoD7QuEKYeQ-xXDTF-6sqZOxKQ_LT8YmQ,7112
 gptmed/utils/logging.py,sha256=7dJc1tayMxCBjFSDXe4r9ACUTpoPTTGsJ0UZMTqZIDY,5303
-gptmed-0.3.3.dist-info/licenses/LICENSE,sha256=v2spsd7N1pKFFh2G8wGP_45iwe5S0DYiJzG4im8Rupc,1066
-gptmed-0.3.3.dist-info/METADATA,sha256=0ohKwsi3802GMhVUIx2n76i4QHhY0dkzdG4a_g1p_Hw,13605
-gptmed-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-gptmed-0.3.3.dist-info/entry_points.txt,sha256=ATqOzTtPVdUiFX5ZSeo3n9JkUCqocUxEXTgy1CfNRZE,110
-gptmed-0.3.3.dist-info/top_level.txt,sha256=mhyEq3rG33t21ziJz5w3TPgx0RjPf4zXMNUx2JTiNmE,7
-gptmed-0.3.3.dist-info/RECORD,,
+gptmed-0.3.5.dist-info/licenses/LICENSE,sha256=v2spsd7N1pKFFh2G8wGP_45iwe5S0DYiJzG4im8Rupc,1066
+gptmed-0.3.5.dist-info/METADATA,sha256=Zx3kFlZiBdkXco_VkEqOnIeasCYrgWl2XP21D2QcmuA,9382
+gptmed-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+gptmed-0.3.5.dist-info/entry_points.txt,sha256=ATqOzTtPVdUiFX5ZSeo3n9JkUCqocUxEXTgy1CfNRZE,110
+gptmed-0.3.5.dist-info/top_level.txt,sha256=mhyEq3rG33t21ziJz5w3TPgx0RjPf4zXMNUx2JTiNmE,7
+gptmed-0.3.5.dist-info/RECORD,,

{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{gptmed-0.3.3.dist-info → gptmed-0.3.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

gptmed 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

gptmed 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl