PyPI - SURE-tools - Versions diffs - 2.4.5__py3-none-any.whl → 2.4.13__py3-none-any.whl - Mend - Supply Chain Defender

SURE-tools 2.4.5py3-none-any.whl → 2.4.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of SURE-tools might be problematic. Click here for more details.

Files changed (10) hide show

SURE/TranscriptomeDecoder.py CHANGED Viewed

@@ -1,28 +1,28 @@
 import torch
 import torch.nn as nn
-import torch.optim as optim
 import torch.nn.functional as F
+import torch.optim as optim
 from torch.utils.data import Dataset, DataLoader
 import numpy as np
-from typing import Dict, List, Tuple, Optional
-import matplotlib.pyplot as plt
-from tqdm import tqdm
+from typing import Dict, Optional
 import warnings
 warnings.filterwarnings('ignore')
 class TranscriptomeDecoder:
+    """Transcriptome decoder"""
     def __init__(self,
                  latent_dim: int = 100,
                  gene_dim: int = 60000,
-                 hidden_dim: int = 512,  # Reduced for memory efficiency
+                 hidden_dim: int = 512,
                  device: str = None):
         """
-        Whole-transcriptome decoder
+        Simple but powerful decoder for latent to transcriptome mapping
         Args:
             latent_dim: Latent variable dimension (typically 50-100)
             gene_dim: Number of genes (full transcriptome ~60,000)
-            hidden_dim: Hidden dimension (reduced for memory efficiency)
+            hidden_dim: Hidden dimension optimized
             device: Computation device
         """
         self.latent_dim = latent_dim
@@ -30,10 +30,6 @@ class TranscriptomeDecoder:
         self.hidden_dim = hidden_dim
         self.device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
-        # Memory optimization settings
-        self.gradient_checkpointing = True
-        self.mixed_precision = True
         # Initialize model
         self.model = self._build_model()
         self.model.to(self.device)
@@ -43,259 +39,163 @@ class TranscriptomeDecoder:
         self.training_history = None
         self.best_val_loss = float('inf')
-        print(f"🚀 TranscriptomeDecoder Initialized:")
+        print(f"🚀 SimpleTranscriptomeDecoder Initialized:")
         print(f"   - Latent Dimension: {latent_dim}")
         print(f"   - Gene Dimension: {gene_dim}")
         print(f"   - Hidden Dimension: {hidden_dim}")
         print(f"   - Device: {self.device}")
         print(f"   - Parameters: {sum(p.numel() for p in self.model.parameters()):,}")
-    class MemoryEfficientBlock(nn.Module):
-        """Memory-efficient building block with gradient checkpointing"""
-        def __init__(self, input_dim, output_dim, use_checkpointing=True):
-            super().__init__()
-            self.use_checkpointing = use_checkpointing
-            self.net = nn.Sequential(
-                nn.Linear(input_dim, output_dim),
-                nn.BatchNorm1d(output_dim),
-                nn.GELU(),
-                nn.Dropout(0.1)
-            )
-        def forward(self, x):
-            if self.use_checkpointing and self.training:
-                return torch.utils.checkpoint.checkpoint(self.net, x)
-            return self.net(x)
-    class SparseGeneProjection(nn.Module):
-        """Sparse gene projection to reduce memory usage"""
-        def __init__(self, latent_dim, gene_dim, projection_dim=256):
-            super().__init__()
-            self.projection_dim = projection_dim
-            self.gene_embeddings = nn.Parameter(torch.randn(gene_dim, projection_dim) * 0.02)
-            self.latent_projection = nn.Linear(latent_dim, projection_dim)
-            self.activation = nn.GELU()
-        def forward(self, latent):
-            # Project latent to gene space efficiently
-            batch_size = latent.shape[0]
-            latent_proj = self.latent_projection(latent)  # [batch, projection_dim]
-            # Efficient matrix multiplication
-            gene_embeds = self.gene_embeddings.T  # [projection_dim, gene_dim]
-            output = torch.matmul(latent_proj, gene_embeds)  # [batch, gene_dim]
-            return self.activation(output)
-    class ChunkedTransformer(nn.Module):
-        def __init__(self, gene_dim, hidden_dim=512, chunk_size=2000, num_layers=3):
-            super().__init__()
-            self.chunk_size = chunk_size
-            self.hidden_dim = hidden_dim
-            self.num_chunks = (gene_dim + chunk_size - 1) // chunk_size
-            # 共享的Transformer层
-            self.transformer_layers = nn.ModuleList([
-                nn.Sequential(
-                    nn.Linear(hidden_dim, hidden_dim),
-                    nn.GELU(),
-                    nn.Dropout(0.1),
-                    nn.Linear(hidden_dim, hidden_dim),
-                ) for _ in range(num_layers)
-            ])
-            # 每个chunk独立的投影层
-            self.input_projections = nn.ModuleList([
-                nn.Linear(min(chunk_size, gene_dim - i * chunk_size), hidden_dim)
-                for i in range(self.num_chunks)
-            ])
-            self.output_projections = nn.ModuleList([
-                nn.Linear(hidden_dim, min(chunk_size, gene_dim - i * chunk_size))
-                for i in range(self.num_chunks)
-            ])
-        def forward(self, x):
-            batch_size, gene_dim = x.shape
-            output = torch.zeros_like(x)
-            for i in range(self.num_chunks):
-                start_idx = i * self.chunk_size
-                end_idx = min((i + 1) * self.chunk_size, gene_dim)
-                current_chunk_size = end_idx - start_idx
-                chunk = x[:, start_idx:end_idx]  # [batch_size, current_chunk_size]
-                # 投影到hidden_dim
-                chunk_proj = self.input_projections[i](chunk)  # [batch_size, hidden_dim]
-                # Transformer处理
-                for layer in self.transformer_layers:
-                    chunk_proj = layer(chunk_proj) + chunk_proj
-                # 投影回原始维度
-                chunk_out = self.output_projections[i](chunk_proj)  # [batch_size, current_chunk_size]
-                output[:, start_idx:end_idx] = chunk_out
-            return output
     class Decoder(nn.Module):
-        """Decoder model"""
-        def __init__(self, latent_dim, gene_dim, hidden_dim):
+        """Memory-efficient decoder architecture with dimension handling"""
+        def __init__(self, latent_dim: int, gene_dim: int, hidden_dim: int):
             super().__init__()
             self.latent_dim = latent_dim
             self.gene_dim = gene_dim
             self.hidden_dim = hidden_dim
-            # Stage 1: Latent expansion (memory efficient)
+            # Stage 1: Latent variable expansion
             self.latent_expansion = nn.Sequential(
                 nn.Linear(latent_dim, hidden_dim * 2),
+                nn.BatchNorm1d(hidden_dim * 2),
                 nn.GELU(),
                 nn.Dropout(0.1),
                 nn.Linear(hidden_dim * 2, hidden_dim),
+                nn.BatchNorm1d(hidden_dim),
+                nn.GELU(),
             )
-            # Stage 2: Sparse gene projection
-            self.gene_projection = TranscriptomeDecoder.SparseGeneProjection(
-                latent_dim, gene_dim, hidden_dim
-            )
-            # Stage 3: Chunked processing
-            self.chunked_processor = TranscriptomeDecoder.ChunkedTransformer(
-                gene_dim, hidden_dim, chunk_size=2000, num_layers=3
+            # Stage 2: Direct projection to gene dimension (simpler approach)
+            self.gene_projector = nn.Sequential(
+                nn.Linear(hidden_dim, hidden_dim * 2),
+                nn.GELU(),
+                nn.Dropout(0.1),
+                nn.Linear(hidden_dim * 2, gene_dim),  # Direct projection to gene_dim
             )
-            # Stage 4: Multi-head output with memory efficiency
-            self.output_heads = nn.ModuleList([
-                nn.Sequential(
-                    nn.Linear(hidden_dim, hidden_dim // 2),
-                    nn.GELU(),
-                    nn.Linear(hidden_dim // 2, 1)
-                ) for _ in range(2)  # Reduced from 3 to 2 heads
-            ])
-            # Adaptive fusion
-            self.fusion_gate = nn.Sequential(
-                nn.Linear(hidden_dim, hidden_dim // 4),
+            # Stage 3: Lightweight gene interaction
+            self.gene_interaction = nn.Sequential(
+                nn.Conv1d(1, 32, kernel_size=3, padding=1),
                 nn.GELU(),
-                nn.Linear(hidden_dim // 4, len(self.output_heads)),
-                nn.Softmax(dim=-1)
+                nn.Dropout1d(0.1),
+                nn.Conv1d(32, 1, kernel_size=3, padding=1),
             )
             # Output scaling
             self.output_scale = nn.Parameter(torch.ones(1))
             self.output_bias = nn.Parameter(torch.zeros(1))
-            self.latent_to_gene = nn.Linear(hidden_dim, gene_dim)
             self._init_weights()
         def _init_weights(self):
+            """Weight initialization"""
             for module in self.modules():
                 if isinstance(module, nn.Linear):
                     nn.init.xavier_uniform_(module.weight)
                     if module.bias is not None:
                         nn.init.zeros_(module.bias)
+                elif isinstance(module, nn.Conv1d):
+                    nn.init.kaiming_uniform_(module.weight)
+                    if module.bias is not None:
+                        nn.init.zeros_(module.bias)
-        def forward(self, latent):
+        def forward(self, latent: torch.Tensor) -> torch.Tensor:
             batch_size = latent.shape[0]
-            # 1. Latent expansion
-            latent_expanded = self.latent_expansion(latent)
-            # 2. Gene projection (memory efficient)
-            gene_features = self.gene_projection(latent)
+            # 1. Expand latent variables
+            latent_features = self.latent_expansion(latent)  # [batch_size, hidden_dim]
-            # 3. Add latent information
-            latent_gene_injection = self.latent_to_gene(latent_expanded)
-            gene_features = gene_features + latent_gene_injection
+            # 2. Direct projection to gene dimension
+            gene_output = self.gene_projector(latent_features)  # [batch_size, gene_dim]
-            # 4. Chunked processing (memory efficient)
-            gene_features = self.chunked_processor(gene_features)
+            # 3. Gene interaction with dimension safety
+            if self.gene_dim > 1:  # Only apply if gene_dim > 1
+                gene_output = gene_output.unsqueeze(1)  # [batch_size, 1, gene_dim]
+                interaction_output = self.gene_interaction(gene_output)  # [batch_size, 1, gene_dim]
+                gene_output = gene_output + interaction_output  # Residual connection
+                gene_output = gene_output.squeeze(1)  # [batch_size, gene_dim]
-            # 5. Multi-head output with chunking
-            final_output = torch.zeros(batch_size, self.gene_dim, device=latent.device)
+            # 4. Final activation (ensure non-negative)
+            gene_output = F.softplus(gene_output * self.output_scale + self.output_bias)
-            # Process output in chunks
-            chunk_size = 5000
-            for i in range(0, self.gene_dim, chunk_size):
-                end_idx = min(i + chunk_size, self.gene_dim)
-                chunk = gene_features[:, i:end_idx]
-                head_outputs = []
-                for head in self.output_heads:
-                    head_out = head(chunk).squeeze(-1)
-                    head_outputs.append(head_out)
-                # Adaptive fusion
-                gate_weights = self.fusion_gate(chunk.mean(dim=1, keepdim=True))
-                gate_weights = gate_weights.unsqueeze(1)
-                # Weighted fusion
-                chunk_output = torch.zeros_like(head_outputs[0])
-                for j, head_out in enumerate(head_outputs):
-                    chunk_output = chunk_output + gate_weights[:, :, j] * head_out
-                final_output[:, i:end_idx] = chunk_output
-            # Final activation
-            final_output = F.softplus(final_output * self.output_scale + self.output_bias)
-            return final_output
+            return gene_output
     def _build_model(self):
-        """Build model"""
+        """Build the decoder model"""
         return self.Decoder(self.latent_dim, self.gene_dim, self.hidden_dim)
+    def _create_dataset(self, latent_data, expression_data):
+        """Create dataset with dimension validation"""
+        class SimpleDataset(Dataset):
+            def __init__(self, latent, expression):
+                # Ensure dimensions match
+                assert latent.shape[0] == expression.shape[0], "Sample count mismatch"
+                assert latent.shape[1] == self.latent_dim, f"Latent dim mismatch: expected {self.latent_dim}, got {latent.shape[1]}"
+                assert expression.shape[1] == self.gene_dim, f"Gene dim mismatch: expected {self.gene_dim}, got {expression.shape[1]}"
+                self.latent = torch.FloatTensor(latent)
+                self.expression = torch.FloatTensor(expression)
+            def __len__(self):
+                return len(self.latent)
+            def __getitem__(self, idx):
+                return self.latent[idx], self.expression[idx]
+        return SimpleDataset(latent_data, expression_data)
     def train(self,
               train_latent: np.ndarray,
               train_expression: np.ndarray,
               val_latent: np.ndarray = None,
               val_expression: np.ndarray = None,
-              batch_size: int = 16,  # Reduced batch size for memory
+              batch_size: int = 32,
               num_epochs: int = 100,
               learning_rate: float = 1e-4,
               checkpoint_path: str = 'transcriptome_decoder.pth'):
         """
-        Memory-efficient training with optimizations
+        Train the decoder model with dimension safety
         Args:
-            train_latent: Training latent variables
-            train_expression: Training expression data
-            val_latent: Validation latent variables
-            val_expression: Validation expression data
-            batch_size: Reduced batch size for memory constraints
+            train_latent: Training latent variables [n_samples, latent_dim]
+            train_expression: Training expression data [n_samples, gene_dim]
+            val_latent: Validation latent variables (optional)
+            val_expression: Validation expression data (optional)
+            batch_size: Batch size optimized for memory
             num_epochs: Number of training epochs
             learning_rate: Learning rate
-            checkpoint_path: Model save path
+            checkpoint_path: Path to save the best model
         """
-        print("🚀 Starting Training...")
-        print(f"📊 Batch size: {batch_size}")
+        print("🚀 Starting training...")
-        # Enable memory optimizations
-        torch.backends.cudnn.benchmark = True
-        if self.mixed_precision:
-            scaler = torch.cuda.amp.GradScaler()
+        # Dimension validation
+        self._validate_data_dimensions(train_latent, train_expression, "Training")
+        if val_latent is not None and val_expression is not None:
+            self._validate_data_dimensions(val_latent, val_expression, "Validation")
         # Data preparation
-        train_dataset = self._create_dataset(train_latent, train_expression)
+        train_dataset = self._create_safe_dataset(train_latent, train_expression)
         if val_latent is not None and val_expression is not None:
-            val_dataset = self._create_dataset(val_latent, val_expression)
+            val_dataset = self._create_safe_dataset(val_latent, val_expression)
+            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
+            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
+            print(f"📈 Using provided validation data: {len(val_dataset)} samples")
         else:
             # Auto split
             train_size = int(0.9 * len(train_dataset))
             val_size = len(train_dataset) - train_size
-            train_dataset, val_dataset = torch.utils.data.random_split(
-                train_dataset, [train_size, val_size]
-            )
+            train_subset, val_subset = torch.utils.data.random_split(train_dataset, [train_size, val_size])
+            train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
+            val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)
+            print(f"📈 Auto-split validation: {val_size} samples")
-        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
-                                 pin_memory=True, num_workers=2)
-        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
-                               pin_memory=True, num_workers=2)
+        print(f"📊 Training samples: {len(train_loader.dataset)}")
+        print(f"📊 Validation samples: {len(val_loader.dataset)}")
+        print(f"📊 Batch size: {batch_size}")
-        # Optimizer with memory-friendly settings
+        # Optimizer configuration
         optimizer = optim.AdamW(
             self.model.parameters(),
             lr=learning_rate,
@@ -306,130 +206,181 @@ class TranscriptomeDecoder:
         # Learning rate scheduler
         scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
-        # Loss function
-        criterion = nn.MSELoss()
+        # Loss function with dimension safety
+        def safe_loss(pred, target):
+            # Ensure dimensions match
+            if pred.shape != target.shape:
+                print(f"⚠️ Dimension mismatch: pred {pred.shape}, target {target.shape}")
+                # Truncate to minimum dimension (safety measure)
+                min_dim = min(pred.shape[1], target.shape[1])
+                pred = pred[:, :min_dim]
+                target = target[:, :min_dim]
+            def correlation_loss(pred, target):
+                pred_centered = pred - pred.mean(dim=1, keepdim=True)
+                target_centered = target - target.mean(dim=1, keepdim=True)
+                correlation = (pred_centered * target_centered).sum(dim=1) / (
+                    torch.sqrt(torch.sum(pred_centered ** 2, dim=1)) *
+                    torch.sqrt(torch.sum(target_centered ** 2, dim=1)) + 1e-8
+                )
+                return 1 - correlation.mean()
+            mse_loss = F.mse_loss(pred, target)
+            poisson_loss = (pred - target * torch.log(pred + 1e-8)).mean()
+            corr_loss = correlation_loss(pred, target)
+            return mse_loss + 0.5 * poisson_loss + 0.3 * corr_loss
         # Training history
         history = {
-            'train_loss': [], 'val_loss': [],
-            'learning_rate': [], 'memory_usage': []
+            'train_loss': [],
+            'val_loss': [],
+            'learning_rate': []
         }
         best_val_loss = float('inf')
+        patience = 15
+        patience_counter = 0
+        print("\n📈 Starting training loop...")
         for epoch in range(1, num_epochs + 1):
-            print(f"\n📍 Epoch {epoch}/{num_epochs}")
-            # Training phase with memory monitoring
-            train_loss = self._train_epoch(
-                train_loader, optimizer, criterion, scaler if self.mixed_precision else None
-            )
+            # Training phase
+            train_loss = self._train_epoch(train_loader, optimizer, safe_loss)
             # Validation phase
-            val_loss = self._validate_epoch(val_loader, criterion)
+            val_loss = self._validate_epoch(val_loader, safe_loss)
             # Update scheduler
             scheduler.step()
+            current_lr = scheduler.get_last_lr()[0]
             # Record history
             history['train_loss'].append(train_loss)
             history['val_loss'].append(val_loss)
-            history['learning_rate'].append(optimizer.param_groups[0]['lr'])
-            # Memory usage tracking
-            if torch.cuda.is_available():
-                memory_used = torch.cuda.memory_allocated() / 1024**3  # GB
-                history['memory_usage'].append(memory_used)
-                print(f"💾 GPU Memory: {memory_used:.1f}GB / 20GB")
+            history['learning_rate'].append(current_lr)
-            print(f"📊 Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
-            print(f"⚡ Learning Rate: {optimizer.param_groups[0]['lr']:.2e}")
+            # Print progress
+            if epoch % 5 == 0 or epoch == 1:
+                print(f"📍 Epoch {epoch:3d}/{num_epochs} | "
+                      f"Train Loss: {train_loss:.4f} | "
+                      f"Val Loss: {val_loss:.4f} | "
+                      f"LR: {current_lr:.2e}")
-            # Save best model
+            # Early stopping and model saving
             if val_loss < best_val_loss:
                 best_val_loss = val_loss
+                patience_counter = 0
                 self._save_checkpoint(epoch, optimizer, scheduler, best_val_loss, history, checkpoint_path)
-                print("💾 Best model saved!")
+                if epoch % 10 == 0:
+                    print(f"💾 Best model saved (Val Loss: {best_val_loss:.4f})")
+            else:
+                patience_counter += 1
+                if patience_counter >= patience:
+                    print(f"🛑 Early stopping at epoch {epoch}")
+                    break
+        # Training completed
         self.is_trained = True
         self.training_history = history
         self.best_val_loss = best_val_loss
-        print(f"\n🎉 Training completed! Best validation loss: {best_val_loss:.4f}")
+        print(f"\n🎉 Training completed!")
+        print(f"🏆 Best validation loss: {best_val_loss:.4f}")
+        print(f"📊 Final training loss: {history['train_loss'][-1]:.4f}")
         return history
-    def _train_epoch(self, train_loader, optimizer, criterion, scaler=None):
-        """Training epoch"""
+    def _validate_data_dimensions(self, latent_data, expression_data, data_type):
+        """Validate input data dimensions"""
+        assert latent_data.shape[1] == self.latent_dim, (
+            f"{data_type} latent dimension mismatch: expected {self.latent_dim}, got {latent_data.shape[1]}")
+        assert expression_data.shape[1] == self.gene_dim, (
+            f"{data_type} gene dimension mismatch: expected {self.gene_dim}, got {expression_data.shape[1]}")
+        assert latent_data.shape[0] == expression_data.shape[0], (
+            f"{data_type} sample count mismatch: latent {latent_data.shape[0]}, expression {expression_data.shape[0]}")
+        print(f"✅ {data_type} data dimensions validated")
+    def _create_safe_dataset(self, latent_data, expression_data):
+        """Create dataset with safety checks"""
+        class SafeDataset(Dataset):
+            def __init__(self, latent, expression):
+                self.latent = torch.FloatTensor(latent)
+                self.expression = torch.FloatTensor(expression)
+                # Safety check
+                if self.latent.shape[0] != self.expression.shape[0]:
+                    raise ValueError(f"Sample count mismatch: latent {self.latent.shape[0]}, expression {self.expression.shape[0]}")
+            def __len__(self):
+                return len(self.latent)
+            def __getitem__(self, idx):
+                return self.latent[idx], self.expression[idx]
+        return SafeDataset(latent_data, expression_data)
+    def _train_epoch(self, train_loader, optimizer, loss_fn):
+        """Train for one epoch with dimension safety"""
         self.model.train()
         total_loss = 0
-        pbar = tqdm(train_loader, desc='Training')
-        for latent, target in pbar:
-            latent = latent.to(self.device, non_blocking=True)
-            target = target.to(self.device, non_blocking=True)
-            optimizer.zero_grad(set_to_none=True)  # Memory optimization
+        for batch_idx, (latent, target) in enumerate(train_loader):
+            latent = latent.to(self.device)
+            target = target.to(self.device)
-            if scaler:  # Mixed precision training
-                with torch.cuda.amp.autocast():
-                    pred = self.model(latent)
-                    loss = criterion(pred, target)
+            # Dimension check
+            if latent.shape[1] != self.latent_dim:
+                print(f"⚠️ Batch {batch_idx}: Latent dim mismatch {latent.shape[1]} != {self.latent_dim}")
+                continue
-                scaler.scale(loss).backward()
-                scaler.step(optimizer)
-                scaler.update()
-            else:
-                pred = self.model(latent)
-                loss = criterion(pred, target)
-                loss.backward()
-                optimizer.step()
+            optimizer.zero_grad()
+            pred = self.model(latent)
-            total_loss += loss.item()
-            pbar.set_postfix({'Loss': f'{loss.item():.4f}'})
+            # Final dimension check before loss calculation
+            if pred.shape[1] != target.shape[1]:
+                min_dim = min(pred.shape[1], target.shape[1])
+                pred = pred[:, :min_dim]
+                target = target[:, :min_dim]
+                if batch_idx == 0:  # Only warn once
+                    print(f"⚠️ Truncating to min dimension: {min_dim}")
-            # Clear memory
-            del pred, loss
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
+            loss = loss_fn(pred, target)
+            loss.backward()
+            # Gradient clipping for stability
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
+            optimizer.step()
+            total_loss += loss.item()
         return total_loss / len(train_loader)
-    def _validate_epoch(self, val_loader, criterion):
-        """Validation"""
+    def _validate_epoch(self, val_loader, loss_fn):
+        """Validate for one epoch with dimension safety"""
         self.model.eval()
         total_loss = 0
         with torch.no_grad():
-            for latent, target in val_loader:
-                latent = latent.to(self.device, non_blocking=True)
-                target = target.to(self.device, non_blocking=True)
+            for batch_idx, (latent, target) in enumerate(val_loader):
+                latent = latent.to(self.device)
+                target = target.to(self.device)
                 pred = self.model(latent)
-                loss = criterion(pred, target)
-                total_loss += loss.item()
-                # Clear memory
-                del pred, loss
+                # Dimension safety
+                if pred.shape[1] != target.shape[1]:
+                    min_dim = min(pred.shape[1], target.shape[1])
+                    pred = pred[:, :min_dim]
+                    target = target[:, :min_dim]
+                loss = loss_fn(pred, target)
+                total_loss += loss.item()
         return total_loss / len(val_loader)
-    def _create_dataset(self, latent_data, expression_data):
-        """Create dataset"""
-        class EfficientDataset(Dataset):
-            def __init__(self, latent, expression):
-                self.latent = torch.FloatTensor(latent)
-                self.expression = torch.FloatTensor(expression)
-            def __len__(self):
-                return len(self.latent)
-            def __getitem__(self, idx):
-                return self.latent[idx], self.expression[idx]
-        return EfficientDataset(latent_data, expression_data)
     def _save_checkpoint(self, epoch, optimizer, scheduler, best_loss, history, path):
-        """Save checkpoint"""
+        """Save model checkpoint"""
         torch.save({
             'epoch': epoch,
             'model_state_dict': self.model.state_dict(),
@@ -444,22 +395,26 @@ class TranscriptomeDecoder:
             }
         }, path)
-    def predict(self, latent_data: np.ndarray, batch_size: int = 8) -> np.ndarray:
+    def predict(self, latent_data: np.ndarray, batch_size: int = 32) -> np.ndarray:
         """
-        Prediction
+        Predict gene expression from latent variables
         Args:
             latent_data: Latent variables [n_samples, latent_dim]
-            batch_size: Prediction batch size for memory control
+            batch_size: Prediction batch size
         Returns:
             expression: Predicted expression [n_samples, gene_dim]
         """
         if not self.is_trained:
-            warnings.warn("Model not trained. Predictions may be inaccurate.")
+            warnings.warn("⚠️ Model not trained. Predictions may be inaccurate.")
         self.model.eval()
+        # Input validation
+        if latent_data.shape[1] != self.latent_dim:
+            raise ValueError(f"Latent dimension mismatch: expected {self.latent_dim}, got {latent_data.shape[1]}")
         if isinstance(latent_data, np.ndarray):
             latent_data = torch.FloatTensor(latent_data)
@@ -470,76 +425,83 @@ class TranscriptomeDecoder:
                 batch_latent = latent_data[i:i+batch_size].to(self.device)
                 batch_pred = self.model(batch_latent)
                 predictions.append(batch_pred.cpu())
-                # Clear memory
-                del batch_pred
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
         return torch.cat(predictions).numpy()
     def load_model(self, model_path: str):
         """Load pre-trained model"""
         checkpoint = torch.load(model_path, map_location=self.device)
+        # Check model configuration
+        if 'model_config' in checkpoint:
+            config = checkpoint['model_config']
+            if (config['latent_dim'] != self.latent_dim or
+                config['gene_dim'] != self.gene_dim):
+                print("⚠️ Model configuration mismatch. Reinitializing model.")
+                self.model = self._build_model()
+                self.model.to(self.device)
         self.model.load_state_dict(checkpoint['model_state_dict'])
         self.is_trained = True
         self.training_history = checkpoint.get('training_history')
         self.best_val_loss = checkpoint.get('best_val_loss', float('inf'))
-        print(f"✅ Model loaded! Best validation loss: {self.best_val_loss:.4f}")
-    def get_memory_info(self) -> Dict:
-        """Get memory usage information"""
-        if torch.cuda.is_available():
-            memory_allocated = torch.cuda.memory_allocated() / 1024**3
-            memory_reserved = torch.cuda.memory_reserved() / 1024**3
-            return {
-                'allocated_gb': memory_allocated,
-                'reserved_gb': memory_reserved,
-                'available_gb': 20 - memory_allocated,
-                'utilization_percent': (memory_allocated / 20) * 100
-            }
-        return {'available_gb': 'N/A (CPU mode)'}
+        print(f"✅ Model loaded successfully!")
+        print(f"🏆 Best validation loss: {self.best_val_loss:.4f}")
+    def get_model_info(self) -> Dict:
+        """Get model information"""
+        return {
+            'is_trained': self.is_trained,
+            'best_val_loss': self.best_val_loss,
+            'parameters': sum(p.numel() for p in self.model.parameters()),
+            'latent_dim': self.latent_dim,
+            'gene_dim': self.gene_dim,
+            'hidden_dim': self.hidden_dim,
+            'device': str(self.device)
+        }
 '''
-# Example usage with memory monitoring
+# Example usage
 def example_usage():
-    """Memory-efficient example"""
+    """Example demonstration with dimension safety"""
-    # 1. Initialize memory-efficient decoder
-    decoder = TranscriptomeDecoder(
+    # 1. Initialize decoder
+    decoder = SimpleTranscriptomeDecoder(
         latent_dim=100,
         gene_dim=2000,  # Reduced for example
-        hidden_dim=256   # Reduced for memory
+        hidden_dim=256
     )
-    # Check memory info
-    memory_info = decoder.get_memory_info()
-    print(f"📊 Memory Info: {memory_info}")
-    # 2. Generate example data
-    n_samples = 500  # Reduced for memory
+    # 2. Generate example data with correct dimensions
+    n_samples = 1000
     latent_data = np.random.randn(n_samples, 100).astype(np.float32)
-    expression_data = np.random.randn(n_samples, 2000).astype(np.float32)
+    # Create simulated expression data
+    weights = np.random.randn(100, 2000) * 0.1
+    expression_data = np.tanh(latent_data.dot(weights))
     expression_data = np.maximum(expression_data, 0)  # Non-negative
-    print(f"📈 Data shapes: Latent {latent_data.shape}, Expression {expression_data.shape}")
+    print(f"📊 Data shapes: Latent {latent_data.shape}, Expression {expression_data.shape}")
-    # 3. Train with memory monitoring
+    # 3. Train the model
     history = decoder.train(
         train_latent=latent_data,
         train_expression=expression_data,
-        batch_size=8,  # Small batch for memory
-        num_epochs=20   # Reduced for example
+        batch_size=32,
+        num_epochs=50,
+        learning_rate=1e-4
     )
-    # 4. Memory-efficient prediction
-    test_latent = np.random.randn(5, 100).astype(np.float32)
-    predictions = decoder.predict(test_latent, batch_size=2)
+    # 4. Make predictions
+    test_latent = np.random.randn(10, 100).astype(np.float32)
+    predictions = decoder.predict(test_latent)
     print(f"🔮 Prediction shape: {predictions.shape}")
-    # 5. Final memory check
-    final_memory = decoder.get_memory_info()
-    print(f"💾 Final memory usage: {final_memory}")
+    # 5. Get model info
+    info = decoder.get_model_info()
+    print(f"\n📋 Model Info:")
+    for key, value in info.items():
+        print(f"   {key}: {value}")
     return decoder