PyPI - gptmed - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

gptmed 0.3.3py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

gptmed/api.py CHANGED Viewed

@@ -39,6 +39,8 @@ from gptmed.configs.train_config import TrainingConfig
 from gptmed.training.dataset import create_dataloaders
 from gptmed.training.trainer import Trainer
 from gptmed.inference.generator import TextGenerator
+from gptmed.services.device_manager import DeviceManager
+from gptmed.services.training_service import TrainingService
 def create_config(output_path: str = 'training_config.yaml') -> None:
@@ -58,7 +60,11 @@ def create_config(output_path: str = 'training_config.yaml') -> None:
     create_default_config_file(output_path)
-def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
+def train_from_config(
+    config_path: str,
+    verbose: bool = True,
+    device: Optional[str] = None
+) -> Dict[str, Any]:
     """
     Train a GPT model using a YAML configuration file.
@@ -68,6 +74,8 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
     Args:
         config_path: Path to YAML configuration file
         verbose: Whether to print training progress (default: True)
+        device: Device to use ('cuda', 'cpu', or 'auto'). If None, uses config value.
+                'auto' will select best available device.
     Returns:
         Dictionary with training results:
@@ -82,13 +90,16 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
         >>> gptmed.create_config('config.yaml')
         >>> # ... edit config.yaml ...
         >>>
-        >>> # Train the model
-        >>> results = gptmed.train_from_config('config.yaml')
+        >>> # Train the model on CPU
+        >>> results = gptmed.train_from_config('config.yaml', device='cpu')
         >>> print(f"Best model: {results['best_checkpoint']}")
+        >>>
+        >>> # Train with auto device selection
+        >>> results = gptmed.train_from_config('config.yaml', device='auto')
     Raises:
         FileNotFoundError: If config file or data files don't exist
-        ValueError: If configuration is invalid
+        ValueError: If configuration is invalid or device is invalid
     """
     if verbose:
         print("=" * 60)
@@ -111,47 +122,42 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
     # Convert to arguments
     args = config_to_args(config)
-    # Import here to avoid circular imports
-    import random
-    import numpy as np
+    # Override device if provided as parameter
+    if device is not None:
+        # Validate and normalize device
+        device = DeviceManager.validate_device(device)
+        if verbose:
+            print(f"\n⚙️  Device override: {device} (from parameter)")
+        args['device'] = device
-    # Set random seed
-    def set_seed(seed: int):
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-        if torch.cuda.is_available():
-            torch.cuda.manual_seed(seed)
-            torch.cuda.manual_seed_all(seed)
-        torch.backends.cudnn.deterministic = True
-        torch.backends.cudnn.benchmark = False
+    # Create DeviceManager with the selected device
+    device_manager = DeviceManager(
+        preferred_device=args['device'],
+        allow_fallback=True
+    )
+    # Print device information
+    device_manager.print_device_info(verbose=verbose)
+    # Create TrainingService with DeviceManager
+    training_service = TrainingService(
+        device_manager=device_manager,
+        verbose=verbose
+    )
+    # Set random seed
     if verbose:
         print(f"\n🎲 Setting random seed: {args['seed']}")
-    set_seed(args['seed'])
+    training_service.set_seed(args['seed'])
-    # Check device
-    device = args['device']
-    if device == 'cuda' and not torch.cuda.is_available():
-        if verbose:
-            print("⚠️  CUDA not available, using CPU")
-        device = 'cpu'
+    # Get actual device to use
+    actual_device = device_manager.get_device()
     # Load model config
     if verbose:
         print(f"\n🧠 Creating model: {args['model_size']}")
-    if args['model_size'] == 'tiny':
-        model_config = get_tiny_config()
-    elif args['model_size'] == 'small':
-        model_config = get_small_config()
-    elif args['model_size'] == 'medium':
-        model_config = get_medium_config()
-    else:
-        raise ValueError(f"Unknown model size: {args['model_size']}")
-    # Create model
-    model = GPTTransformer(model_config)
+    model = training_service.create_model(args['model_size'])
     total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
     if verbose:
@@ -159,7 +165,7 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
         print(f"  Parameters: {total_params:,}")
         print(f"  Memory: ~{total_params * 4 / 1024 / 1024:.2f} MB")
-    # Load data
+    # Load data using TrainingService
     if verbose:
         print(f"\n📊 Loading data...")
         print(f"  Train: {args['train_data']}")
@@ -176,7 +182,7 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
         print(f"  Train batches: {len(train_loader)}")
         print(f"  Val batches: {len(val_loader)}")
-    # Create training config
+    # Create training config with actual device
     train_config = TrainingConfig(
         batch_size=args['batch_size'],
         learning_rate=args['learning_rate'],
@@ -195,7 +201,7 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
         val_data_path=args['val_data'],
         checkpoint_dir=args['checkpoint_dir'],
         log_dir=args['log_dir'],
-        device=device,
+        device=actual_device,  # Use actual device from DeviceManager
         seed=args['seed'],
     )
@@ -213,70 +219,17 @@ def train_from_config(config_path: str, verbose: bool = True) -> Dict[str, Any]:
         weight_decay=args['weight_decay'],
     )
-    # Create trainer
-    if verbose:
-        print(f"\n🎯 Initializing trainer...")
-    trainer = Trainer(
+    # Execute training using TrainingService
+    results = training_service.execute_training(
         model=model,
         train_loader=train_loader,
         val_loader=val_loader,
         optimizer=optimizer,
-        config=train_config,
-        device=device,
+        train_config=train_config,
+        device=actual_device,
+        model_config_dict=model.config.to_dict()
     )
-    # Resume if requested
-    if args['resume_from'] is not None:
-        if verbose:
-            print(f"\n📥 Resuming from checkpoint: {args['resume_from']}")
-        trainer.resume_from_checkpoint(Path(args['resume_from']))
-    # Start training
-    if verbose:
-        print(f"\n{'='*60}")
-        print("🚀 Starting Training!")
-        print(f"{'='*60}\n")
-    try:
-        trainer.train()
-    except KeyboardInterrupt:
-        if verbose:
-            print("\n\n⏸️  Training interrupted by user")
-            print("💾 Saving checkpoint...")
-        trainer.checkpoint_manager.save_checkpoint(
-            model=model,
-            optimizer=optimizer,
-            step=trainer.global_step,
-            epoch=trainer.current_epoch,
-            val_loss=trainer.best_val_loss,
-            model_config=model_config.to_dict(),
-            train_config=train_config.to_dict(),
-        )
-        if verbose:
-            print("✓ Checkpoint saved. Resume with resume_from in config.")
-    # Return results
-    best_checkpoint = Path(train_config.checkpoint_dir) / "best_model.pt"
-    results = {
-        'best_checkpoint': str(best_checkpoint),
-        'final_val_loss': trainer.best_val_loss,
-        'total_epochs': trainer.current_epoch,
-        'checkpoint_dir': train_config.checkpoint_dir,
-        'log_dir': train_config.log_dir,
-    }
-    if verbose:
-        print(f"\n{'='*60}")
-        print("✅ Training Complete!")
-        print(f"{'='*60}")
-        print(f"\n📁 Results:")
-        print(f"  Best checkpoint: {results['best_checkpoint']}")
-        print(f"  Best val loss: {results['final_val_loss']:.4f}")
-        print(f"  Total epochs: {results['total_epochs']}")
-        print(f"  Logs: {results['log_dir']}")
     return results

gptmed/configs/config_loader.py CHANGED Viewed

@@ -76,6 +76,15 @@ def validate_config(config: Dict[str, Any]) -> None:
         raise ValueError("batch_size must be positive")
     if config['training']['learning_rate'] <= 0:
         raise ValueError("learning_rate must be positive")
+    # Validate device
+    valid_devices = ['cuda', 'cpu', 'auto']
+    device_value = config.get('device', {}).get('device', 'cuda').lower()
+    if device_value not in valid_devices:
+        raise ValueError(
+            f"Invalid device: {device_value}. "
+            f"Must be one of {valid_devices}"
+        )
 def config_to_args(config: Dict[str, Any]) -> Dict[str, Any]:
@@ -169,7 +178,7 @@ def create_default_config_file(output_path: str = 'training_config.yaml') -> Non
             'log_interval': 10
         },
         'device': {
-            'device': 'cuda',
+            'device': 'cuda',  # Options: 'cuda', 'cpu', or 'auto'
             'seed': 42
         },
         'advanced': {

{gptmed-0.3.3.dist-info → gptmed-0.3.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gptmed
-Version: 0.3.3
+Version: 0.3.4
 Summary: A lightweight GPT-based language model framework for training custom question-answering models on any domain
 Author-email: Sanjog Sigdel <sigdelsanjog@gmail.com>
 Maintainer-email: Sanjog Sigdel <sigdelsanjog@gmail.com>
@@ -61,27 +61,6 @@ A lightweight GPT-based language model framework for training custom question-an
 - 📦 **Lightweight**: Small model size suitable for edge deployment
 - 🛠️ **Complete Toolkit**: Includes tokenizer training, model training, and inference utilities
-## Table of Contents
-- [Features](#features)
-- [Installation](#installation)
-- [Quick Start](#quick-start)
-- [Package Structure](#package-structure)
-  - [Core Modules](#core-modules)
-  - [Model Components](#model-components)
-  - [Training Components](#training-components)
-  - [Inference Components](#inference-components)
-  - [Data Processing](#data-processing)
-  - [Utilities](#utilities)
-- [Model Architecture](#model-architecture)
-- [Configuration](#configuration)
-- [Documentation](#documentation)
-- [Performance](#performance)
-- [Examples](#examples)
-- [Contributing](#contributing)
-- [License](#license)
-- [Support](#support)
 ## Installation
 ### From PyPI (Recommended)
@@ -208,134 +187,27 @@ config = TrainingConfig(
 )
 ```
-## Package Structure
-### Core Modules
-The `gptmed` package contains the following main modules:
-```
-gptmed/
-├── model/                  # Model architecture and configurations
-├── inference/              # Text generation and sampling
-├── training/               # Training loops and datasets
-├── tokenizer/              # Tokenizer training and data processing
-├── data/                   # Data parsers and formatters
-├── configs/                # Training configurations
-└── utils/                  # Utilities (checkpoints, logging)
-```
-### Model Components
-**`gptmed.model.architecture`** - GPT Transformer Implementation
-- `GPTTransformer` - Main model class
-- `TransformerBlock` - Individual transformer layers
-- `MultiHeadAttention` - Attention mechanism
-- `FeedForward` - Feed-forward networks
-- `RoPEPositionalEncoding` - Rotary position embeddings
-**`gptmed.model.configs`** - Model Configurations
-- `get_tiny_config()` - ~2M parameters (testing)
-- `get_small_config()` - ~10M parameters (recommended)
-- `get_medium_config()` - ~50M parameters (high quality)
-- `ModelConfig` - Custom configuration class
-### Training Components
-**`gptmed.training`** - Training Pipeline
-- `train.py` - Main training script (CLI: `gptmed-train`)
-- `Trainer` - Training loop with checkpointing
-- `TokenizedDataset` - PyTorch dataset for tokenized data
-- `create_dataloaders()` - DataLoader creation utilities
-**`gptmed.configs`** - Training Configurations
-- `TrainingConfig` - Training hyperparameters
-- `get_default_config()` - Default training settings
-- `get_quick_test_config()` - Fast testing configuration
-### Inference Components
-**`gptmed.inference`** - Text Generation
-- `TextGenerator` - Main generation class
-- `generator.py` - CLI command (CLI: `gptmed-generate`)
-- `sampling.py` - Sampling strategies (top-k, top-p, temperature)
-- `decoding_utils.py` - Decoding utilities
-- `GenerationConfig` - Generation parameters
-### Data Processing
-**`gptmed.tokenizer`** - Tokenizer Training & Data Processing
-- `train_tokenizer.py` - Train SentencePiece tokenizer
-- `tokenize_data.py` - Convert text to token sequences
-- SentencePiece BPE tokenizer support
-**`gptmed.data.parsers`** - Data Parsing & Formatting
-- `MedQuADParser` - XML Q&A parser (example)
-- `CausalTextFormatter` - Format Q&A pairs for training
-- `FormatConfig` - Formatting configuration
-### Utilities
-**`gptmed.utils`** - Helper Functions
-- `checkpoints.py` - Model checkpoint management
-- `logging.py` - Training metrics logging
----
-## Detailed Project Structure
+## Project Structure
 ```
 gptmed/
 ├── model/
-│   ├── architecture/
-│   │   ├── gpt.py              # GPT transformer model
-│   │   ├── attention.py        # Multi-head attention
-│   │   ├── feedforward.py      # Feed-forward networks
-│   │   └── embeddings.py       # Token + positional embeddings
-│   └── configs/
-│       └── model_config.py     # Model size configurations
+│   ├── architecture/      # GPT transformer implementation
+│   └── configs/           # Model configurations
 ├── inference/
-│   ├── generator.py            # Text generation (CLI command)
-│   ├── sampling.py             # Sampling strategies
-│   ├── decoding_utils.py       # Decoding utilities
-│   └── generation_config.py    # Generation parameters
+│   ├── generator.py       # Text generation
+│   └── sampling.py        # Sampling strategies
 ├── training/
-│   ├── train.py                # Main training script (CLI command)
-│   ├── trainer.py              # Training loop
-│   ├── dataset.py              # PyTorch dataset
-│   └── utils.py                # Training utilities
+│   ├── train.py          # Training script
+│   ├── trainer.py        # Training loop
+│   └── dataset.py        # Data loading
 ├── tokenizer/
-│   ├── train_tokenizer.py      # Train SentencePiece tokenizer
-│   └── tokenize_data.py        # Tokenize text data
-├── data/
-│   └── parsers/
-│       ├── medquad_parser.py   # Example XML parser
-│       └── text_formatter.py   # Q&A text formatter
+│   └── train_tokenizer.py # SentencePiece tokenizer
 ├── configs/
-│   └── train_config.py         # Training configurations
+│   └── train_config.py   # Training configurations
 └── utils/
-    ├── checkpoints.py          # Model checkpointing
-    └── logging.py              # Training logging
-```
-### Command-Line Interface
-The package provides two main CLI commands:
-```bash
-# Train a model
-gptmed-train --model-size small --num-epochs 10 --batch-size 16
-# Generate text
-gptmed-generate --prompt "Your question?" --max-length 100
+    ├── checkpoints.py    # Model checkpointing
+    └── logging.py        # Training logging
 ```
 ## Requirements

{gptmed-0.3.3.dist-info → gptmed-0.3.4.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 gptmed/__init__.py,sha256=mwzeW2Qc6j1z5f6HOvZ_BNOnFSncWEK2KEkdqq91yYY,1676
-gptmed/api.py,sha256=gUWooWsXDaGb1r22YnzS3w-sU-n-b4gB4-gh0fMsT4A,11109
+gptmed/api.py,sha256=k9a_1F2h__xgKnH2l0FaJqAqu-iTYt5tu_VfVO0UhrA,9806
 gptmed/configs/__init__.py,sha256=yRa-zgPQ-OCzu8fvCrfWMG-CjF3dru3PZzknzm0oUaQ,23
-gptmed/configs/config_loader.py,sha256=ZWdH63XOOu0T8seWBiJFZtzlyFmzHzKmMxon6ZgZHlg,6000
+gptmed/configs/config_loader.py,sha256=3GQ1iCNpdJ5yALWXA3SPPHRkaUO-117vdArEL6u7sK8,6354
 gptmed/configs/train_config.py,sha256=KqfNBh9hdTTd_6gEAlrClU8sVFSlVDmZJOrf3cPwFe8,4657
 gptmed/configs/training_config.yaml,sha256=EEZZa3kcsZr3g-_fKDPYZt4_NTpmS-3NvJrTYSWNc8g,2874
 gptmed/data/__init__.py,sha256=iAHeakB5pBAd7MkmarPPY0UKS9bTaO_winLZ23Y2O90,54
@@ -33,9 +33,9 @@ gptmed/training/utils.py,sha256=pJxCwneNr2STITIYwIDCxRzIICDFOxOMzK8DT7ck2oQ,5651
 gptmed/utils/__init__.py,sha256=XuMhIqOXF7mjnog_6Iky-hSbwvFb0iK42B4iDUpgi0U,44
 gptmed/utils/checkpoints.py,sha256=L4q1-_4GbHCoD7QuEKYeQ-xXDTF-6sqZOxKQ_LT8YmQ,7112
 gptmed/utils/logging.py,sha256=7dJc1tayMxCBjFSDXe4r9ACUTpoPTTGsJ0UZMTqZIDY,5303
-gptmed-0.3.3.dist-info/licenses/LICENSE,sha256=v2spsd7N1pKFFh2G8wGP_45iwe5S0DYiJzG4im8Rupc,1066
-gptmed-0.3.3.dist-info/METADATA,sha256=0ohKwsi3802GMhVUIx2n76i4QHhY0dkzdG4a_g1p_Hw,13605
-gptmed-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-gptmed-0.3.3.dist-info/entry_points.txt,sha256=ATqOzTtPVdUiFX5ZSeo3n9JkUCqocUxEXTgy1CfNRZE,110
-gptmed-0.3.3.dist-info/top_level.txt,sha256=mhyEq3rG33t21ziJz5w3TPgx0RjPf4zXMNUx2JTiNmE,7
-gptmed-0.3.3.dist-info/RECORD,,
+gptmed-0.3.4.dist-info/licenses/LICENSE,sha256=v2spsd7N1pKFFh2G8wGP_45iwe5S0DYiJzG4im8Rupc,1066
+gptmed-0.3.4.dist-info/METADATA,sha256=G86yfOKlnK4YfNvC6HAAY_z2Z_rhWoSF2_3508mebKA,9382
+gptmed-0.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+gptmed-0.3.4.dist-info/entry_points.txt,sha256=ATqOzTtPVdUiFX5ZSeo3n9JkUCqocUxEXTgy1CfNRZE,110
+gptmed-0.3.4.dist-info/top_level.txt,sha256=mhyEq3rG33t21ziJz5w3TPgx0RjPf4zXMNUx2JTiNmE,7
+gptmed-0.3.4.dist-info/RECORD,,

{gptmed-0.3.3.dist-info → gptmed-0.3.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{gptmed-0.3.3.dist-info → gptmed-0.3.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{gptmed-0.3.3.dist-info → gptmed-0.3.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{gptmed-0.3.3.dist-info → gptmed-0.3.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

gptmed 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl

gptmed 0.3.3py3-none-any.whl → 0.3.4py3-none-any.whl