PyPI - cifar10-tools - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

cifar10-tools 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

cifar10_tools/pytorch/__init__.py CHANGED Viewed

@@ -0,0 +1,33 @@
+'''PyTorch utilities for CIFAR-10 classification.'''
+from cifar10_tools.pytorch.data import download_cifar10_data
+from cifar10_tools.pytorch.evaluation import evaluate_model
+from cifar10_tools.pytorch.training import train_model
+from cifar10_tools.pytorch.plotting import (
+    plot_sample_images,
+    plot_learning_curves,
+    plot_confusion_matrix,
+    plot_class_probability_distributions,
+    plot_evaluation_curves,
+    plot_optimization_results
+)
+from cifar10_tools.pytorch.hyperparameter_optimization import (
+    create_cnn,
+    train_trial,
+    create_objective
+)
+__all__ = [
+    'download_cifar10_data',
+    'evaluate_model',
+    'train_model',
+    'plot_sample_images',
+    'plot_learning_curves',
+    'plot_confusion_matrix',
+    'plot_class_probability_distributions',
+    'plot_evaluation_curves',
+    'plot_optimization_results',
+    'create_cnn',
+    'train_trial',
+    'create_objective'
+]

cifar10_tools/pytorch/hyperparameter_optimization.py ADDED Viewed

@@ -0,0 +1,236 @@
+'''Hyperparameter optimization utilities for CNN models using Optuna.
+This module provides functions for building configurable CNN architectures
+and running hyperparameter optimization with Optuna.
+'''
+from typing import Callable
+import optuna
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader
+def create_cnn(
+    n_conv_blocks: int,
+    initial_filters: int,
+    fc_units_1: int,
+    fc_units_2: int,
+    dropout_rate: float,
+    use_batch_norm: bool,
+    num_classes: int = 10,
+    in_channels: int = 3,
+    input_size: int = 32
+) -> nn.Sequential:
+    '''Create a CNN with configurable architecture.
+    Args:
+        n_conv_blocks: Number of convolutional blocks (1-5)
+        initial_filters: Number of filters in first conv layer (doubles each block)
+        fc_units_1: Number of units in first fully connected layer
+        fc_units_2: Number of units in second fully connected layer
+        dropout_rate: Dropout probability
+        use_batch_norm: Whether to use batch normalization
+        num_classes: Number of output classes (default: 10 for CIFAR-10)
+        in_channels: Number of input channels (default: 3 for RGB)
+        input_size: Input image size (default: 32 for CIFAR-10)
+    Returns:
+        nn.Sequential model
+    '''
+    layers = []
+    current_channels = in_channels
+    current_size = input_size
+    for block_idx in range(n_conv_blocks):
+        out_channels = initial_filters * (2 ** block_idx)
+        # First conv in block
+        layers.append(nn.Conv2d(current_channels, out_channels, kernel_size=3, padding=1))
+        if use_batch_norm:
+            layers.append(nn.BatchNorm2d(out_channels))
+        layers.append(nn.ReLU())
+        # Second conv in block
+        layers.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))
+        if use_batch_norm:
+            layers.append(nn.BatchNorm2d(out_channels))
+        layers.append(nn.ReLU())
+        # Pooling and dropout
+        layers.append(nn.MaxPool2d(2, 2))
+        layers.append(nn.Dropout(dropout_rate))
+        current_channels = out_channels
+        current_size //= 2
+    # Calculate flattened size
+    final_channels = initial_filters * (2 ** (n_conv_blocks - 1))
+    flattened_size = final_channels * current_size * current_size
+    # Classifier (3 fully connected layers)
+    layers.append(nn.Flatten())
+    layers.append(nn.Linear(flattened_size, fc_units_1))
+    layers.append(nn.ReLU())
+    layers.append(nn.Dropout(dropout_rate))
+    layers.append(nn.Linear(fc_units_1, fc_units_2))
+    layers.append(nn.ReLU())
+    layers.append(nn.Dropout(dropout_rate))
+    layers.append(nn.Linear(fc_units_2, num_classes))
+    return nn.Sequential(*layers)
+def train_trial(
+    model: nn.Module,
+    optimizer: optim.Optimizer,
+    criterion: nn.Module,
+    train_loader: DataLoader,
+    val_loader: DataLoader,
+    n_epochs: int,
+    trial: optuna.Trial
+) -> float:
+    '''Train a model for a single Optuna trial with pruning support.
+    Args:
+        model: PyTorch model to train
+        optimizer: Optimizer for training
+        criterion: Loss function
+        train_loader: DataLoader for training data
+        val_loader: DataLoader for validation data
+        n_epochs: Number of epochs to train
+        trial: Optuna trial object for reporting and pruning
+    Returns:
+        Best validation accuracy achieved during training
+    '''
+    best_val_accuracy = 0.0
+    for epoch in range(n_epochs):
+        # Training phase
+        model.train()
+        for images, labels in train_loader:
+            optimizer.zero_grad()
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+        # Validation phase
+        model.eval()
+        val_correct = 0
+        val_total = 0
+        with torch.no_grad():
+            for images, labels in val_loader:
+                outputs = model(images)
+                _, predicted = torch.max(outputs.data, 1)
+                val_total += labels.size(0)
+                val_correct += (predicted == labels).sum().item()
+        val_accuracy = 100 * val_correct / val_total
+        best_val_accuracy = max(best_val_accuracy, val_accuracy)
+        # Report intermediate value for pruning
+        trial.report(val_accuracy, epoch)
+        # Prune unpromising trials
+        if trial.should_prune():
+            raise optuna.TrialPruned()
+    return best_val_accuracy
+def create_objective(
+    train_loader: DataLoader,
+    val_loader: DataLoader,
+    n_epochs: int,
+    device: torch.device,
+    num_classes: int = 10,
+    in_channels: int = 3
+) -> Callable[[optuna.Trial], float]:
+    '''Create an Optuna objective function for CNN hyperparameter optimization.
+    This factory function creates a closure that captures the data loaders and
+    training configuration, returning an objective function suitable for Optuna.
+    Args:
+        train_loader: DataLoader for training data
+        val_loader: DataLoader for validation data
+        n_epochs: Number of epochs per trial
+        device: Device to train on (cuda or cpu)
+        num_classes: Number of output classes (default: 10)
+        in_channels: Number of input channels (default: 3 for RGB)
+    Returns:
+        Objective function for optuna.Study.optimize()
+    Example:
+        >>> objective = create_objective(train_loader, val_loader, n_epochs=50, device=device)
+        >>> study = optuna.create_study(direction='maximize')
+        >>> study.optimize(objective, n_trials=100)
+    '''
+    def objective(trial: optuna.Trial) -> float:
+        '''Optuna objective function for CNN hyperparameter optimization.'''
+        # Suggest hyperparameters
+        n_conv_blocks = trial.suggest_int('n_conv_blocks', 1, 5)
+        initial_filters = trial.suggest_categorical('initial_filters', [8, 16, 32, 64, 128])
+        fc_units_1 = trial.suggest_categorical('fc_units_1', [128, 256, 512, 1024, 2048])
+        fc_units_2 = trial.suggest_categorical('fc_units_2', [32, 64, 128, 256, 512])
+        dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.75)
+        use_batch_norm = trial.suggest_categorical('use_batch_norm', [True, False])
+        learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
+        optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD', 'RMSprop'])
+        # Create model
+        model = create_cnn(
+            n_conv_blocks=n_conv_blocks,
+            initial_filters=initial_filters,
+            fc_units_1=fc_units_1,
+            fc_units_2=fc_units_2,
+            dropout_rate=dropout_rate,
+            use_batch_norm=use_batch_norm,
+            num_classes=num_classes,
+            in_channels=in_channels
+        ).to(device)
+        # Define optimizer
+        if optimizer_name == 'Adam':
+            optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+        elif optimizer_name == 'SGD':
+            momentum = trial.suggest_float('sgd_momentum', 0.8, 0.99)
+            optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
+        else:  # RMSprop
+            optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
+        criterion = nn.CrossEntropyLoss()
+        # Train model and return best validation accuracy
+        try:
+            return train_trial(
+                model=model,
+                optimizer=optimizer,
+                criterion=criterion,
+                train_loader=train_loader,
+                val_loader=val_loader,
+                n_epochs=n_epochs,
+                trial=trial
+            )
+        except torch.cuda.OutOfMemoryError:
+            # Clear CUDA cache and skip this trial
+            torch.cuda.empty_cache()
+            raise optuna.TrialPruned(f'CUDA OOM with params: {trial.params}')
+    return objective

cifar10_tools/pytorch/plotting.py CHANGED Viewed

@@ -10,18 +10,18 @@ def plot_sample_images(
     class_names: list[str],
     nrows: int = 2,
     ncols: int = 5,
-    figsize: tuple[float, float] | None = None,
-    cmap: str = 'gray'
+    figsize: tuple[float, float] | None = None
 ) -> tuple[plt.Figure, np.ndarray]:
     '''Plot sample images from a dataset.
+    Automatically handles both grayscale (1 channel) and RGB (3 channel) images.
     Args:
         dataset: PyTorch dataset containing (image, label) tuples.
         class_names: List of class names for labeling.
         nrows: Number of rows in the grid.
         ncols: Number of columns in the grid.
         figsize: Figure size (width, height). Defaults to (ncols*1.5, nrows*1.5).
-        cmap: Colormap for displaying images.
     Returns:
         Tuple of (figure, axes array).
@@ -36,11 +36,24 @@ def plot_sample_images(
         # Get image and label from dataset
         img, label = dataset[i]
-        # Unnormalize and squeeze for plotting
+        # Unnormalize for plotting
         img = img * 0.5 + 0.5
-        img = img.numpy().squeeze()
+        img = img.numpy()
+        # Handle grayscale vs RGB images
+        if img.shape[0] == 1:
+            # Grayscale: squeeze channel dimension
+            img = img.squeeze()
+            ax.imshow(img, cmap='gray')
+        else:
+            # RGB: transpose from (C, H, W) to (H, W, C)
+            img = np.transpose(img, (1, 2, 0))
+            ax.imshow(img)
         ax.set_title(class_names[label])
-        ax.imshow(img, cmap=cmap)
         ax.axis('off')
     plt.tight_layout()
@@ -235,4 +248,60 @@ def plot_evaluation_curves(
     plt.tight_layout()
-    return fig, (ax1, ax2)
+    return fig, (ax1, ax2)
+def plot_optimization_results(
+    study,
+    figsize: tuple[float, float] = (12, 4)
+) -> tuple[plt.Figure, np.ndarray]:
+    '''Plot Optuna optimization history and hyperparameter importance.
+    Args:
+        study: Optuna study object with completed trials.
+        figsize: Figure size (width, height).
+    Returns:
+        Tuple of (figure, axes array).
+    '''
+    import optuna
+    fig, axes = plt.subplots(1, 2, figsize=figsize)
+    # Optimization history
+    axes[0].set_title('Optimization History')
+    trial_numbers = [t.number for t in study.trials if t.value is not None]
+    trial_values = [t.value for t in study.trials if t.value is not None]
+    axes[0].plot(trial_numbers, trial_values, 'ko-', alpha=0.6)
+    axes[0].axhline(
+        y=study.best_value,
+        color='r', linestyle='--', label=f'Best: {study.best_value:.2f}%'
+    )
+    axes[0].set_xlabel('Trial')
+    axes[0].set_ylabel('Validation Accuracy (%)')
+    axes[0].legend()
+    # Hyperparameter importance (if enough trials completed)
+    axes[1].set_title('Hyperparameter Importance')
+    completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
+    if len(completed_trials) >= 5:
+        importance = optuna.importance.get_param_importances(study)
+        params = list(importance.keys())
+        values = list(importance.values())
+        axes[1].set_xlabel('Importance')
+        axes[1].barh(params, values, color='black')
+    else:
+        axes[1].text(
+            0.5, 0.5,
+            'Not enough completed trials\nfor importance analysis',
+            ha='center', va='center', transform=axes[1].transAxes
+        )
+    plt.tight_layout()
+    return fig, axes

cifar10_tools/pytorch/training.py CHANGED Viewed

@@ -12,11 +12,28 @@ def train_model(
     criterion: nn.Module,
     optimizer: optim.Optimizer,
     epochs: int = 10,
-    print_every: int = 1
+    print_every: int = 1,
+    device: torch.device | str | None = None
 ) -> dict[str, list[float]]:
     '''Training loop for PyTorch classification model.
-    Note: Assumes data is already on the correct device.
+    Handles both pre-loaded GPU data and lazy-loading (CPU data moved per-batch).
+    Args:
+        model: PyTorch model to train.
+        train_loader: DataLoader for training data.
+        val_loader: DataLoader for validation data.
+        criterion: Loss function.
+        optimizer: Optimizer.
+        epochs: Number of training epochs.
+        print_every: Print progress every n epochs.
+        device: Device to move data to per-batch. If None, assumes data is
+                already on the correct device (GPU pre-loading). If specified,
+                data will be moved to this device per-batch (lazy loading).
+    Returns:
+        Dictionary containing training history with keys:
+        'train_loss', 'val_loss', 'train_accuracy', 'val_accuracy'.
     '''
     history = {'train_loss': [], 'val_loss': [], 'train_accuracy': [], 'val_accuracy': []}
@@ -30,6 +47,11 @@ def train_model(
         total = 0
         for images, labels in train_loader:
+            # Move to device if lazy loading
+            if device is not None:
+                images = images.to(device, non_blocking=True)
+                labels = labels.to(device, non_blocking=True)
             # Forward pass
             optimizer.zero_grad()
@@ -59,6 +81,11 @@ def train_model(
         with torch.no_grad():
             for images, labels in val_loader:
+                # Move to device if lazy loading
+                if device is not None:
+                    images = images.to(device, non_blocking=True)
+                    labels = labels.to(device, non_blocking=True)
                 outputs = model(images)
                 loss = criterion(outputs, labels)

{cifar10_tools-0.2.0.dist-info → cifar10_tools-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cifar10_tools
-Version: 0.2.0
+Version: 0.3.0
 Summary: Tools for training neural networks on the CIFAR-10 task with PyTorch and TensorFlow
 License: GPLv3
 License-File: LICENSE

cifar10_tools-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+cifar10_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+cifar10_tools/pytorch/__init__.py,sha256=4er-aMGK-MZTlkH3Owz3x-Pz_Gl_NjplKwOBYdBA1p0,909
+cifar10_tools/pytorch/data.py,sha256=09zodpjto0xLq95tDAyq57CFh6MSYRuUBPcMmQcyKZM,626
+cifar10_tools/pytorch/evaluation.py,sha256=i4tRYOqWATVqQVkWT_fATWRbzo9ziX2DDkXKPaiQlFE,923
+cifar10_tools/pytorch/hyperparameter_optimization.py,sha256=92MwDp6CarFp6O-tkJqeVqDyn0Az15gu3pluAvnO2mw,8056
+cifar10_tools/pytorch/plotting.py,sha256=9kRDt9ZEX0uOUlt-9wzJHrx4WELuFYMeeQiJrmwyXNs,9550
+cifar10_tools/pytorch/training.py,sha256=KNaH-Q9u61o3DIcTfBhjnOvOD7yExZeXwBm6qvMGL9I,3859
+cifar10_tools/tensorflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+cifar10_tools-0.3.0.dist-info/METADATA,sha256=Ll6YMa77t9ubJLaiFF8BsMmDuj_pzTLejL6Wlje2Qwo,1580
+cifar10_tools-0.3.0.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
+cifar10_tools-0.3.0.dist-info/licenses/LICENSE,sha256=wtHfRwmCF5-_XUmYwrBKwJkGipvHVmh7GXJOKKeOe2U,1073
+cifar10_tools-0.3.0.dist-info/RECORD,,

cifar10_tools-0.2.0.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-cifar10_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cifar10_tools/pytorch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cifar10_tools/pytorch/data.py,sha256=09zodpjto0xLq95tDAyq57CFh6MSYRuUBPcMmQcyKZM,626
-cifar10_tools/pytorch/evaluation.py,sha256=i4tRYOqWATVqQVkWT_fATWRbzo9ziX2DDkXKPaiQlFE,923
-cifar10_tools/pytorch/plotting.py,sha256=B1ifJxbSEDpInnVk9c3o1fjVx534TPPKTWM5iusyzrE,7494
-cifar10_tools/pytorch/training.py,sha256=Sg6NlBT_DTyLzf-Ls3bYI8-8AwGFJblRj0MDnUmGP3Q,2642
-cifar10_tools/tensorflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cifar10_tools-0.2.0.dist-info/METADATA,sha256=3s6_5lP8rAnEu5F9r5YKU-EqUi9UO3mNUFK1ikVgUfc,1580
-cifar10_tools-0.2.0.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
-cifar10_tools-0.2.0.dist-info/licenses/LICENSE,sha256=wtHfRwmCF5-_XUmYwrBKwJkGipvHVmh7GXJOKKeOe2U,1073
-cifar10_tools-0.2.0.dist-info/RECORD,,

{cifar10_tools-0.2.0.dist-info → cifar10_tools-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{cifar10_tools-0.2.0.dist-info → cifar10_tools-0.3.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

cifar10-tools 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

cifar10-tools 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl