PyPI - gridfm-graphkit - Versions diffs - 0.0.1__py3-none-any.whl - Mend

gridfm-graphkit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

gridfm_graphkit/__init__.py +0 -0
gridfm_graphkit/__main__.py +62 -0
gridfm_graphkit/cli.py +530 -0
gridfm_graphkit/datasets/__init__.py +0 -0
gridfm_graphkit/datasets/data_normalization.py +227 -0
gridfm_graphkit/datasets/globals.py +19 -0
gridfm_graphkit/datasets/powergrid.py +192 -0
gridfm_graphkit/datasets/transforms.py +223 -0
gridfm_graphkit/datasets/utils.py +65 -0
gridfm_graphkit/io/__init__.py +0 -0
gridfm_graphkit/io/param_handler.py +293 -0
gridfm_graphkit/models/__init__.py +0 -0
gridfm_graphkit/models/gps_transformer.py +143 -0
gridfm_graphkit/models/graphTransformer.py +96 -0
gridfm_graphkit/training/__init__.py +0 -0
gridfm_graphkit/training/callbacks.py +47 -0
gridfm_graphkit/training/plugins.py +218 -0
gridfm_graphkit/training/trainer.py +156 -0
gridfm_graphkit/utils/__init__.py +0 -0
gridfm_graphkit/utils/loss.py +198 -0
gridfm_graphkit/utils/visualization.py +324 -0
gridfm_graphkit-0.0.1.dist-info/METADATA +163 -0
gridfm_graphkit-0.0.1.dist-info/RECORD +27 -0
gridfm_graphkit-0.0.1.dist-info/WHEEL +5 -0
gridfm_graphkit-0.0.1.dist-info/entry_points.txt +2 -0
gridfm_graphkit-0.0.1.dist-info/licenses/LICENSE +201 -0
gridfm_graphkit-0.0.1.dist-info/top_level.txt +1 -0

gridfm_graphkit/training/plugins.py ADDED Viewed

@@ -0,0 +1,218 @@
+from abc import abstractmethod
+from typing import Dict, Optional
+import mlflow
+import os
+import torch
+import torch.nn as nn
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import LRScheduler
+class TrainerPlugin:
+    """
+    Base class for training plugins.
+    A `TrainerPlugin` is invoked during the training process either at regular step intervals,
+    at the end of each epoch, or both. It can be extended to perform actions like logging,
+    checkpointing, or validation.
+    Args:
+        steps (int, optional): Interval (in steps) to run the plugin. If `None`, only runs at end of epoch
+    """
+    def __init__(self, steps: Optional[int] = None):
+        self.steps = steps
+    def run(self, step: int, end_of_epoch: bool) -> bool:
+        """
+        Determines whether to execute the plugin at the current step.
+        Args:
+            step (int): The current step number.
+            end_of_epoch (bool): Whether this is the end of the epoch.
+        Returns:
+            bool: True if the plugin should run; False otherwise.
+        """
+        # By default we always run for epoch ends.
+        if end_of_epoch:
+            return True
+        # If self.steps is None, we're only recording epoch ends and this isn't one.
+        if self.steps is None:
+            return False
+        # record every `step` steps, starting from step `step`
+        if step != 0 and (step + 1) % self.steps == 0:
+            return True
+        return False
+    @abstractmethod
+    def step(
+        self,
+        epoch: int,
+        step: int,
+        metrics: Dict = {},
+        end_of_epoch: bool = False,
+        **kwargs,
+    ):
+        """
+        This method is called on every step of training, or with step=None
+        at the end of each epoch. Implementations can use the passed in
+        parameters for validation, checkpointing, logging, etc.
+        Args:
+            epoch (int): The current epoch number.
+            step (int): The current step within the epoch.
+            metrics (dict): Dictionary of training metrics (e.g., loss).
+            end_of_epoch (bool): Indicates if this call is at the end of an epoch.
+            **kwargs (Any): Additional parameters such as model, optimizer, scheduler.
+        """
+        pass
+class MLflowLoggerPlugin(TrainerPlugin):
+    """
+    Plugin to log training metrics to MLflow.
+    Logs metrics dynamically during training at defined step intervals and/or
+    at the end of each epoch. Also logs initial training parameters once.
+    Args:
+        steps (int, optional): Interval in steps to log metrics.
+        params (dict, optional): Parameters to log to MLflow at the start.
+    """
+    def __init__(self, steps: Optional[int] = None, params: dict = None):
+        super().__init__(steps=steps)  # Initialize the steps from the base class
+        self.steps = steps
+        self.metrics_history = {}  # Dictionary to hold lists of all metrics over time
+        if params:
+            # Log parameters to MLflow at the beginning of training
+            mlflow.log_params(params)
+    def step(
+        self,
+        epoch: int,
+        step: int,
+        metrics: Dict = {},
+        end_of_epoch: bool = False,
+        **kwargs,
+    ):
+        """
+        Logs metrics to MLflow dynamically at each specified step and at the end of each epoch.
+        Args:
+            epoch (int): The current epoch number.
+            step (int): The current step within the epoch.
+            metrics (Dict): Dictionary of metrics to log, e.g., {'train_loss': value}.
+            end_of_epoch (bool): Flag indicating whether this is the end of the epoch.
+        """
+        for metric_name, metric_value in metrics.items():
+            # Add metric to history
+            if metric_name not in self.metrics_history:
+                self.metrics_history[metric_name] = []
+            self.metrics_history[metric_name].append(metric_value)
+        if end_of_epoch:
+            for metric_name, values in self.metrics_history.items():
+                if values:  # Avoid division by zero or empty lists
+                    avg_value = sum(values) / len(values)
+                    mlflow.log_metric(f"{metric_name}", avg_value, step=epoch)
+            # Clear metrics for the next epoch
+            self.metrics_history = {}
+class CheckpointerPlugin(TrainerPlugin):
+    """
+    Plugin to periodically save model checkpoints.
+    Stores the model, optimizer, and scheduler states to a given directory
+    at specified step intervals or at the end of each epoch.
+    Args:
+        checkpoint_dir (str): Directory where checkpoints will be saved.
+        steps (int, optional): Interval in steps for checkpointing.
+    """
+    def __init__(
+        self,
+        checkpoint_dir: str,
+        steps: Optional[int] = None,
+    ):
+        super().__init__(steps=steps)
+        self.checkpoint_dir = checkpoint_dir
+        os.makedirs(self.checkpoint_dir, exist_ok=True)
+    def step(
+        self,
+        epoch: int,
+        step: int,
+        metrics: Dict = {},
+        end_of_epoch: bool = False,
+        model: Optional[nn.Module] = None,
+        optimizer: Optional[Optimizer] = None,
+        scheduler: Optional[LRScheduler] = None,
+    ):
+        """
+        Saves a checkpoint if the conditions to run the plugin are met.
+        Args:
+            epoch (int): Current epoch number.
+            step (int): Current training step.
+            metrics (dict): Optional metrics dictionary (unused here).
+            end_of_epoch (bool): Whether this is the end of the epoch.
+            model (nn.Module, optional): Model to be checkpointed.
+            optimizer (Optimizer, optional): Optimizer to save.
+            scheduler (LRScheduler, optional): Scheduler to save.
+        """
+        # Check if we should save at this step or end of epoch
+        if not self.run(step, end_of_epoch):
+            return
+        checkpoint = {
+            "epoch": epoch,
+            "model_state_dict": model.state_dict() if model else None,
+            "optimizer_state_dict": optimizer.state_dict() if optimizer else None,
+            "scheduler_state_dict": scheduler.state_dict() if scheduler else None,
+        }
+        checkpoint_path = os.path.join(
+            self.checkpoint_dir,
+            "checkpoint_last_epoch.pth",
+        )
+        torch.save(checkpoint, checkpoint_path)
+class MetricsTrackerPlugin(TrainerPlugin):
+    """
+    Logs metrics at the end of each epoch. Currently only returning the validation loss.
+    """
+    def __init__(self):
+        super().__init__()
+        self.validation_losses = []
+        self.metrics_history = {}
+    def step(
+        self,
+        epoch: int,
+        step: int,
+        metrics: Dict = {},
+        end_of_epoch: bool = False,
+        **kwargs,
+    ):
+        for metric_name, metric_value in metrics.items():
+            # Add metric to history
+            if metric_name not in self.metrics_history:
+                self.metrics_history[metric_name] = []
+            self.metrics_history[metric_name].append(metric_value)
+        if end_of_epoch:
+            for metric_name, values in self.metrics_history.items():
+                if values:  # Avoid division by zero or empty lists
+                    avg_value = sum(values) / len(values)
+                    if metric_name == "Validation Loss":
+                        self.validation_losses.append(avg_value)
+    def get_losses(self):
+        return self.validation_losses

gridfm_graphkit/training/trainer.py ADDED Viewed

@@ -0,0 +1,156 @@
+from gridfm_graphkit.training.plugins import TrainerPlugin
+from gridfm_graphkit.training.callbacks import EarlyStopper
+from typing import List
+import torch
+from torch import nn
+from torch.optim import Optimizer
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+class Trainer:
+    """
+    A flexible training loop for GridFM models with optional validation, learning rate scheduling,
+    and plugin callbacks for logging or custom behavior.
+    Attributes:
+        model (nn.Module): The PyTorch model to train.
+        optimizer (Optimizer): The optimizer used for updating model parameters.
+        device: The device to train on (CPU or CUDA).
+        loss_fn (nn.Module): Loss function that returns a loss dictionary.
+        early_stopper (EarlyStopper): Callback for early stopping based on validation loss.
+        train_dataloader (DataLoader): Dataloader for training data.
+        val_dataloader (DataLoader, optional): Dataloader for validation data.
+        lr_scheduler (optional): Learning rate scheduler.
+        plugins (List[TrainerPlugin]): List of plugin callbacks.
+    """
+    def __init__(
+        self,
+        model: nn.Module,
+        optimizer: Optimizer,
+        device,
+        loss_fn: nn.Module,
+        early_stopper: EarlyStopper,
+        train_dataloader: DataLoader,
+        val_dataloader: DataLoader,
+        lr_scheduler=None,
+        plugins: List[TrainerPlugin] = [],
+    ):
+        self.model = model
+        self.optimizer = optimizer
+        self.device = device
+        self.early_stopper = early_stopper
+        self.loss_fn = loss_fn
+        self.train_dataloader = train_dataloader
+        self.val_dataloader = val_dataloader
+        self.lr_scheduler = lr_scheduler
+        self.plugins = plugins
+    def __one_step(
+        self,
+        input: torch.Tensor,
+        edge_index: torch.Tensor,
+        label: torch.Tensor,
+        edge_attr: torch.Tensor,
+        mask: torch.Tensor = None,
+        batch: torch.Tensor = None,
+        pe: torch.Tensor = None,
+        val: bool = False,
+    ):
+        # expand the learnable mask to the input shape
+        mask_value_expanded = self.model.mask_value.expand(input.shape[0], -1)
+        # The line below will overwrite the last mask values, which is fine as long as the features which are masked do not change between batches
+        # set the learnable mask to the inout where it should be masked
+        input[:, : mask.shape[1]][mask] = mask_value_expanded[mask]
+        output = self.model(input, pe, edge_index, edge_attr, batch)
+        loss_dict = self.loss_fn(output, label, edge_index, edge_attr, mask)
+        if not val:
+            self.optimizer.zero_grad()
+            loss_dict["loss"].backward()
+            self.optimizer.step()
+        return loss_dict
+    def __one_epoch(self, epoch: int, prev_step: int):
+        self.model.train()
+        highest_step = prev_step
+        for step, batch in enumerate(self.train_dataloader):
+            step = prev_step + step + 1
+            highest_step = step
+            batch = batch.to(self.device)
+            mask = getattr(batch, "mask", None)
+            loss_dict = self.__one_step(
+                batch.x,
+                batch.edge_index,
+                batch.y,
+                batch.edge_attr,
+                mask,
+                batch.batch,
+                batch.pe,
+            )
+            current_lr = self.optimizer.param_groups[0]["lr"]
+            metrics = {}
+            metrics["Training Loss"] = loss_dict["loss"].item()
+            metrics["Learning Rate"] = current_lr
+            if self.model.learn_mask:
+                metrics["Mask Gradient Norm"] = self.model.mask_value.grad.norm().item()
+            for plugin in self.plugins:
+                plugin.step(epoch, step, metrics=metrics)
+        self.model.eval()
+        val_loss = 0.0
+        with torch.no_grad():
+            for batch in self.val_dataloader:
+                batch = batch.to(self.device)
+                mask = getattr(batch, "mask", None)
+                metrics = self.__one_step(
+                    batch.x,
+                    batch.edge_index,
+                    batch.y,
+                    batch.edge_attr,
+                    mask,
+                    batch.batch,
+                    batch.pe,
+                    True,
+                )
+                val_loss += metrics["loss"].item()
+                metrics["Validation Loss"] = metrics.pop("loss").item()
+                for plugin in self.plugins:
+                    plugin.step(epoch, step, metrics=metrics)
+        val_loss /= len(self.val_dataloader)
+        if self.lr_scheduler is not None:
+            self.lr_scheduler.step(val_loss)
+        for plugin in self.plugins:
+            plugin.step(
+                epoch,
+                step=highest_step,
+                end_of_epoch=True,
+                model=self.model,
+                optimizer=self.optimizer,
+                scheduler=self.lr_scheduler,
+            )
+        return val_loss
+    def train(self, start_epoch: int = 0, epochs: int = 1, prev_step: int = -1):
+        """
+        Main training loop.
+        Args:
+            start_epoch (int): Epoch to start training from.
+            epochs (int): Total number of epochs to train.
+            prev_step (int): Previous training step (for logging continuity).
+        """
+        for epoch in tqdm(range(start_epoch, start_epoch + epochs), desc="Epochs"):
+            val_loss = self.__one_epoch(epoch, prev_step)
+            if self.early_stopper.early_stop(val_loss, self.model):
+                break

gridfm_graphkit/utils/__init__.py ADDED Viewed

File without changes

gridfm_graphkit/utils/loss.py ADDED Viewed

@@ -0,0 +1,198 @@
+from gridfm_graphkit.datasets.globals import PD, QD, PG, QG, VM, VA, G, B
+import torch.nn.functional as F
+import torch
+from torch_geometric.utils import to_torch_coo_tensor
+import torch.nn as nn
+class MaskedMSELoss(nn.Module):
+    """
+    Mean Squared Error loss computed only on masked elements.
+    """
+    def __init__(self, reduction="mean"):
+        super(MaskedMSELoss, self).__init__()
+        self.reduction = reduction
+    def forward(self, pred, target, edge_index=None, edge_attr=None, mask=None):
+        loss = F.mse_loss(pred[mask], target[mask], reduction=self.reduction)
+        return {"loss": loss, "Masked MSE loss": loss.item()}
+class MSELoss(nn.Module):
+    """Standard Mean Squared Error loss."""
+    def __init__(self, reduction="mean"):
+        super(MSELoss, self).__init__()
+        self.reduction = reduction
+    def forward(self, pred, target, edge_index=None, edge_attr=None, mask=None):
+        loss = F.mse_loss(pred, target, reduction=self.reduction)
+        return {"loss": loss, "MSE loss": loss.item()}
+class SCELoss(nn.Module):
+    """Scaled Cosine Error Loss with optional masking and normalization."""
+    def __init__(self, alpha=3):
+        super(SCELoss, self).__init__()
+        self.alpha = alpha
+    def forward(self, pred, target, edge_index=None, edge_attr=None, mask=None):
+        if mask is not None:
+            pred = F.normalize(pred[mask], p=2, dim=-1)
+            target = F.normalize(target[mask], p=2, dim=-1)
+        else:
+            pred = F.normalize(pred, p=2, dim=-1)
+            target = F.normalize(target, p=2, dim=-1)
+        loss = ((1 - (pred * target).sum(dim=-1)).pow(self.alpha)).mean()
+        return {
+            "loss": loss,
+            "SCE loss": loss.item(),
+        }
+class PBELoss(nn.Module):
+    """
+    Loss based on the Power Balance Equations.
+    """
+    def __init__(self, visualization=False):
+        super(PBELoss, self).__init__()
+        self.visualization = visualization
+    def forward(self, pred, target, edge_index, edge_attr, mask):
+        # Create a temporary copy of pred to avoid modifying it
+        temp_pred = pred.clone()
+        # If a value is not masked, then use the original one
+        unmasked = ~mask
+        temp_pred[unmasked] = target[unmasked]
+        # Voltage magnitudes and angles
+        V_m = temp_pred[:, VM]  # Voltage magnitudes
+        V_a = temp_pred[:, VA]  # Voltage angles
+        # Compute the complex voltage vector V
+        V = V_m * torch.exp(1j * V_a)
+        # Compute the conjugate of V
+        V_conj = torch.conj(V)
+        # Extract edge attributes for Y_bus
+        edge_complex = edge_attr[:, G] + 1j * edge_attr[:, B]
+        # Construct sparse admittance matrix (real and imaginary parts separately)
+        Y_bus_sparse = to_torch_coo_tensor(
+            edge_index,
+            edge_complex,
+            size=(target.size(0), target.size(0)),
+        )
+        # Conjugate of the admittance matrix
+        Y_bus_conj = torch.conj(Y_bus_sparse)
+        # Compute the complex power injection S_injection
+        S_injection = torch.diag(V) @ Y_bus_conj @ V_conj
+        # Compute net power balance
+        net_P = temp_pred[:, PG] - temp_pred[:, PD]
+        net_Q = temp_pred[:, QG] - temp_pred[:, QD]
+        S_net_power_balance = net_P + 1j * net_Q
+        # Power balance loss
+        loss = torch.mean(
+            torch.abs(S_net_power_balance - S_injection),
+        )  # Mean of absolute complex power value
+        real_loss_power = torch.mean(
+            torch.abs(torch.real(S_net_power_balance - S_injection)),
+        )
+        imag_loss_power = torch.mean(
+            torch.abs(torch.imag(S_net_power_balance - S_injection)),
+        )
+        if self.visualization:
+            return {
+                "loss": loss,
+                "Power power loss in p.u.": loss.item(),
+                "Active Power Loss in p.u.": real_loss_power.item(),
+                "Reactive Power Loss in p.u.": imag_loss_power.item(),
+                "Nodal Active Power Loss in p.u.": torch.abs(
+                    torch.real(S_net_power_balance - S_injection),
+                ),
+                "Nodal Reactive Power Loss in p.u.": torch.abs(
+                    torch.imag(S_net_power_balance - S_injection),
+                ),
+            }
+        else:
+            return {
+                "loss": loss,
+                "Power power loss in p.u.": loss.item(),
+                "Active Power Loss in p.u.": real_loss_power.item(),
+                "Reactive Power Loss in p.u.": imag_loss_power.item(),
+            }
+class MixedLoss(nn.Module):
+    """
+    Combines multiple loss functions with weighted sum.
+    Args:
+        loss_functions (list[nn.Module]): List of loss functions.
+        weights (list[float]): Corresponding weights for each loss function.
+    """
+    def __init__(self, loss_functions, weights):
+        super(MixedLoss, self).__init__()
+        if len(loss_functions) != len(weights):
+            raise ValueError(
+                "The number of loss functions must match the number of weights.",
+            )
+        self.loss_functions = nn.ModuleList(loss_functions)
+        self.weights = weights
+    def forward(self, pred, target, edge_index=None, edge_attr=None, mask=None):
+        """
+        Compute the weighted sum of all specified losses.
+        Parameters:
+        - pred: Predictions.
+        - target: Ground truth.
+        - edge_index: Optional edge index for graph-based losses.
+        - edge_attr: Optional edge attributes for graph-based losses.
+        - mask: Optional mask to filter the inputs for certain losses.
+        Returns:
+        - A dictionary with the total loss and individual losses.
+        """
+        total_loss = 0.0
+        loss_details = {}
+        for i, loss_fn in enumerate(self.loss_functions):
+            loss_output = loss_fn(
+                pred,
+                target,
+                edge_index=edge_index,
+                edge_attr=edge_attr,
+                mask=mask,
+            )
+            # Assume each loss function returns a dictionary with a "loss" key
+            individual_loss = loss_output.pop("loss")
+            weighted_loss = self.weights[i] * individual_loss
+            total_loss += weighted_loss
+            # Add other keys from the loss output to the details
+            for key, val in loss_output.items():
+                loss_details[key] = val
+        loss_details["loss"] = total_loss
+        return loss_details