PyPI - SWoTTeD - Versions diffs - 1.0.2a4__py3-none-any.whl - Mend

SWoTTeD 1.0.2a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

swotted/__init__.py +17 -0
swotted/decomposition_contraints.py +87 -0
swotted/fastswotted.py +604 -0
swotted/loss_metrics.py +39 -0
swotted/slidingWindow_model.py +161 -0
swotted/swotted.py +417 -0
swotted/temporal_regularization.py +56 -0
swotted/utils.py +52 -0
swotted/version.py +9 -0
swotted-1.0.2a4.dist-info/METADATA +249 -0
swotted-1.0.2a4.dist-info/RECORD +13 -0
swotted-1.0.2a4.dist-info/WHEEL +4 -0
swotted-1.0.2a4.dist-info/licenses/LICENSE.txt +165 -0

swotted/loss_metrics.py ADDED Viewed

@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+"""This module contains the alternative losses that can be used
+in tensor decomposition tasks.
+"""
+import torch
+class Loss:
+    """Difference loss"""
+    def compute(self, X, Y):
+        return (X - Y).sum()
+class Frobenius(Loss):
+    """Frobenius loss to be used with data assuming a gaussian distribution
+    of their values."""
+    def compute(self, X, Y):
+        return torch.norm((X - Y), p="fro").sum()
+class Poisson(Loss):
+    """Frobenius loss to be used with data assuming a Poisson distribution
+    of their values (counting attribute)."""
+    def compute(self, X, Y):
+        return Y.sum() - (X * torch.log(Y.clamp(min=1e-10))).sum()
+class Bernoulli(Loss):
+    """Frobenius loss to be used with data assuming a bernoulli distribution
+    of their values (discrete values)."""
+    def compute(self, X, Y):
+        return (torch.log(1 + Y.clamp(min=1e-10))).sum() - (
+            X * torch.log(Y.clamp(min=1e-10))
+        ).sum()

swotted/slidingWindow_model.py ADDED Viewed

@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+"""Sliding Windows reconstruction module
+This module implements the SWoTTeD reconstruction of tensors based
+on the temporal convolution of the temporal phenotypes with a pathways.
+Example
+--------
+.. code-block:: python
+    from model.slidingWindow_model import SlidingWindow
+    from model.loss_metrics import Bernoulli
+    import torch
+    Ph = torch.rand( (5,10,3) ) # generation of 5 phenotypes with 10 features and 3 timestamps
+    Wp = torch.rand( (5,12) )   # generation of a pathway describing the occurrences of the 5 phenotypes across time
+    sw=SlidingWindow()
+    sw.setMetric(Bernoulli())
+    Yp=sw.reconstruct(Wp,Ph)
+"""
+from functools import reduce
+import torch
+import torch.nn as nn
+from swotted.loss_metrics import *
+class SlidingWindow(nn.Module):
+    """Torch module for the computation of the reconstruction error
+    by sliding phenotypes.
+    """
+    def setMetric(self, dist=Loss()):
+        """
+        Define the loss used to evaluate the tensor reconstruction.
+        Parameters
+        -----------
+        dist: Loss
+            one of the loss metric available in the loss_metric module.
+        """
+        self.metric = dist
+    def reconstruct(self, Wp, Ph):
+        """
+        Implementation of the SWoTTeD reconstruction scheme (convolutional reconstruction).
+        Notes
+        -----
+        The function does not ensure that the output values belongs to [0,1]
+        Parameters
+        ----------
+        Ph: torch.Tensor
+            Phenotypes of size :math:`R * N * \\omega`, where :math:`R` is the
+            number of phenotypes and :math:`\\omega` the length of the temporal window
+        Wp: torch.Tensor
+            Assignement tensor of size :math:`R * (Tp-\\omega+1)` for patient :math:`p`
+        Returns
+        -------
+        torch.Tensor
+            the **SWoTTeD** reconstruction of a pathway from :math:`Wp` and :math:`Ph`.
+        """
+        # create a tensor of windows
+        Yp = torch.conv1d(
+            Wp.squeeze(dim=0), Ph.transpose(0, 1).flip(2), padding=Ph.shape[2] - 1
+        )
+        return Yp
+    def loss(self, Xp, Wp, Ph, padding=None):
+        """Evaluation of the SWoTTeD reconstruction loss (see reconstruct method).
+        Parameters
+        -----------
+        Xp: torch.Tensor
+            A 2nd-order tensor of size :math:`N * Tp`, where :math:`N` is the number
+            of drugs and :math:`Tp` is the time of the patient's stay
+        Ph: torch.Tensor
+            Phenotypes of size :math:`R * N * \\omega`, where :math:`R` is the
+            number of phenotypes and :math:`\\omega` the length of the temporal window
+        Wp: torch.Tensor
+            Assignement tensor of size :math:`R * Tp` for patient :math:`p`
+        padding: None, bool or tuple
+            If `padding` is True then the loss is evaluated on the interval
+            :math:`[\\omega, L-\\omega]` of the pathway.
+            If `padding` is a tuple `(a,b)`, then the loss is evaluated on the
+            interval :math:`[a, L-b]`.
+            Default is None (no padding)
+        Returns
+        -------
+        float
+            the SWoTTeD reconstruction loss of one patient.
+        """
+        Yp = self.reconstruct(Wp, Ph)
+        Twindow = Ph.shape[2]
+        if padding is not None:
+            if isinstance(padding, bool) and padding:
+                Yp = torch.split(
+                    Yp,
+                    [Twindow - 1, Yp.shape[1] - 2 * (Twindow - 1), Twindow - 1],
+                    dim=1,
+                )[1]
+                Xp = torch.split(
+                    Xp,
+                    [Twindow - 1, Xp.shape[1] - 2 * (Twindow - 1), Twindow - 1],
+                    dim=1,
+                )[1]
+            elif isinstance(padding, tuple) and len(padding) == 2:
+                Yp = torch.split(
+                    Yp,
+                    [padding[0], Yp.shape[1] - padding[0] - padding[1], padding[1]],
+                    dim=1,
+                )[1]
+                Xp = torch.split(
+                    Xp,
+                    [padding[0], Xp.shape[1] - padding[0] - padding[1], padding[1]],
+                    dim=1,
+                )[1]
+        # evaluate the loss
+        return self.metric.compute(Xp, Yp)
+    def forward(self, X, W, Ph, padding=None):
+        """Evaluation of the SWoTTeD reconstruction loss for a collection of patients
+        (see reconstruct method).
+        Parameters
+        ----------
+        Xp: list[torch.Tensor]
+            A 3nd-order tensor of size :math:`K* N * Tp`, where :math:`K` is the number
+            of patients, :math:`N` is the number of drugs and :math:`Tp` is the time of the
+            patient's stay
+        Ph: torch.Tensor
+            Phenotypes of size :math:`R * N * \\omega`, where :math:`R` is the
+            number of phenotypes and :math:`\\omega` the length of the temporal window
+        Wp: list[torch.Tensor]
+            Assignement tensor of size :math:`K* R * Tp` for patient :math:`p`
+        padding: None, bool or tuple
+            If `padding` is True then the loss is evaluated on the interval
+            :math:`[\\omega, L-\\omega]` of the pathway.
+            If `padding` is a tuple `(a,b)`, then the loss is evaluated on the interval
+            :math:`[a, L-b]`.
+            Default is `None` (no padding)
+        Returns
+        -------
+        float
+            The SWoTTeD reconstruction loss of a collection of patients, that is the sum of
+            the losses for all patients.
+        """
+        return reduce(
+            torch.add, [self.loss(Xp, Wp, Ph, padding) for Xp, Wp in zip(X, W)]
+        )

swotted/swotted.py ADDED Viewed

@@ -0,0 +1,417 @@
+# -*- coding: utf-8 -*-
+"""The SWoTTeD module
+"""
+import numpy as np
+import torch
+import lightning.pytorch as pl
+import torch.optim as optim
+from torch.autograd import Variable
+from munkres import Munkres
+from omegaconf import DictConfig
+from swotted.slidingWindow_model import SlidingWindow
+from swotted.loss_metrics import *
+from swotted.decomposition_contraints import *
+from swotted.temporal_regularization import *
+from swotted.utils import *
+class swottedModule(pl.LightningModule):
+    """SwoTTeD module (lightning module)"""
+    def __init__(self, config: DictConfig):
+        super().__init__()
+        # use config as parameter
+        self.params = config
+        self.model = SlidingWindow()
+        self.model.setMetric(eval(self.params.model.metric)())
+        self.alpha = self.params.model.sparsity  # sparsity
+        self.beta = self.params.model.non_succession  # non-succession
+        self.adam = True
+        self.sparsity = self.params.model.sparsity > 0
+        self.pheno_succession = self.params.model.non_succession > 0
+        self.non_negativity = True
+        self.normalization = True
+        self.rank = self.params.model.rank
+        self.N = self.params.model.N
+        self.twl = self.params.model.twl
+        self.Ph = torch.nn.Parameter(
+            torch.rand(
+                (self.params.model.rank, self.params.model.N, self.params.model.twl)
+            )
+        )
+        # Important: Wk is not directly part of the model
+        self.Wk = None
+        # Important: This property activates manual optimization.
+        self.automatic_optimization = False
+    def configure_optimizers(self):
+        """
+        Parent override.
+        """
+        if self.adam:
+            optimizerPh = optim.Adam([self.Ph], lr=self.params.training.lr)
+        else:
+            optimizerPh = optim.SGD([self.Ph], lr=self.params.training.lr, momentum=0.9)
+        if self.adam:
+            optimizerW = optim.Adam(self.Wk, lr=self.params.training.lr)
+        else:
+            optimizerW = optim.SGD(self.Wk, lr=self.params.training.lr, momentum=0.9)
+        return optimizerPh, optimizerW
+    def forward(self, X):
+        """
+        This forward function makes the decomposition of the tensor `X`.
+        It contains an optimisation stage to find the best decomposition.
+        The optimisation does not modifies the phenotypes of the model.
+        Parameters
+        -----------
+        X: torch.Tensor
+            tensor of dimension :math:`K * N * T` to decompose according to
+            the phenotype of the model
+        Returns
+        --------
+        torch.Tensor
+            A tensor of dimension :math:`K * R * (T-Tw)` that is the decomposition
+            of X according to the :math:`R` phenotypes of the model
+        """
+        # self.unfreeze()
+        K = len(X)  # number of patients
+        if self.N != X[0].shape[0]:  # number of medical events
+            # TODO throw an error
+            return None
+        with torch.inference_mode(False):
+            # torchlightning activates the inference mode that deeply disable the computation
+            # of gradients in the function. This is not sufficient to enable_grad() only.
+            Wk_batch = [
+                Variable(
+                    torch.rand(self.rank, X[Tp].shape[1] - self.twl + 1),
+                    requires_grad=True,
+                )
+                for Tp in range(K)
+            ]
+            optimizerW = optim.Adam(Wk_batch, lr=self.params["predict"]["lr"])
+            n_epochs = self.params["predict"]["nepochs"]
+            for _ in range(n_epochs):
+                def closure():
+                    optimizerW.zero_grad()
+                    loss = self.model(X, Wk_batch, self.Ph.data)
+                    if self.pheno_succession:
+                        loss += self.beta * phenotypeSuccession_constraint(
+                            Wk_batch, self.twl
+                        )
+                    loss.backward()
+                    return loss
+                optimizerW.step(closure)
+                if self.non_negativity:
+                    nonnegative_projection(*Wk_batch)
+                if self.normalization:
+                    normalization_constraint(*Wk_batch)
+            # self.freeze()
+        return Wk_batch
+    def predict_step(self, batch, batch_idx, dataloader_idx=0):
+        """
+        Parent override.
+        """
+        return self(batch)  # it only calls the forward function
+    def training_step(self, batch, idx):
+        """
+        Parent override.
+        """
+        optimizerPh, optimizerW = self.optimizers()
+        D, indices = zip(*batch)
+        X = D
+        Wk_batch = [self.Wk[p] for p in indices]
+        Wk_batch_nograd = [self.Wk[p].data for p in indices]
+        def closure():
+            optimizerPh.zero_grad()
+            loss = self.model(X, Wk_batch_nograd, self.Ph)
+            self.log(
+                "train_reconstr_Ph",
+                loss,
+                on_step=True,
+                on_epoch=False,
+                prog_bar=False,
+                logger=True,
+            )
+            if self.sparsity:
+                sparsity_loss = sparsity_constraint(self.Ph)
+                self.log(
+                    "train_sparsity_Ph",
+                    sparsity_loss,
+                    on_step=True,
+                    on_epoch=False,
+                    prog_bar=False,
+                    logger=True,
+                )
+                loss += self.alpha * sparsity_loss
+            loss.backward()
+            self.log(
+                "train_loss_Ph",
+                loss,
+                on_step=True,
+                on_epoch=False,
+                prog_bar=False,
+                logger=True,
+                batch_size=len(indices),
+            )
+            return loss
+        optimizerPh.step(closure)
+        if self.non_negativity:
+            nonnegative_projection(*self.Ph)  # non-negativity constraint
+        if self.normalization:
+            normalization_constraint(*self.Ph)  # normalization constraint
+        # update W
+        def closure():
+            optimizerW.zero_grad()
+            loss = self.model(X, Wk_batch, self.Ph.data)
+            self.log(
+                "train_reconstr_W",
+                loss,
+                on_step=True,
+                on_epoch=False,
+                prog_bar=False,
+                logger=True,
+            )
+            if self.pheno_succession:
+                nonsucc_loss = phenotypeSuccession_constraint(Wk_batch, self.twl)
+                self.log(
+                    "train_nonsucc_W",
+                    nonsucc_loss,
+                    on_step=True,
+                    on_epoch=False,
+                    prog_bar=False,
+                    logger=True,
+                )
+                loss += self.beta * nonsucc_loss
+            loss.backward()
+            self.log(
+                "train_loss_W",
+                loss,
+                on_step=True,
+                on_epoch=False,
+                prog_bar=False,
+                logger=True,
+                batch_size=len(indices),
+            )
+            return loss
+        optimizerW.step(closure)
+        if self.non_negativity:
+            nonnegative_projection(*Wk_batch)
+        if self.normalization:
+            normalization_constraint(*Wk_batch)
+    def test_step(self, batch, batch_idx):
+        """test step"""
+        X, _ = zip(*batch)
+        W_hat = self(X)
+        loss = self.model(X, W_hat, self.Ph)
+        self.log("test_loss", loss)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        """
+        Parent override.
+        ***This function has not been tested***
+        """
+        X, y = zip(*batch)
+        W_hat = self(
+            X
+        )  # Apply the model on the data (requires optimisation of local W)
+        loss = self.model(X, W_hat, self.Ph)
+        # self.log("val_loss", loss)
+        return loss
+    def forecast(self, X):
+        """
+        This function forecasts the next time step using the trained phenotypes.
+        This function can be used only with the parameter :math:`$\\omega\\geq 2` (`twl>=2`)
+        (phenotypes with more than two instant).
+        This function makes a projection of the data with the phenotypes of the model.
+        For computational efficiency, the time dimension of :math:`X` is reduced to
+        :math:`\\omega`, and then is extended :math:`\\omega-1` time steps on the right with
+        empty values.
+        Parameters
+        ----------
+        X: torch.Tensor
+            tensor of dimension :math:`K* N* T` with :math:`T` to decompose
+            according to the phenotype of the model.
+        Returns
+        --------
+        torch.Tensor
+            A tensor of dimension :math:`K* N` that is the forecast of the
+            next time step of :math:`X`.
+        """
+        if self.twl < 2:
+            # trained with daily phenotypes
+            # TODO throw an error
+            return None
+        K = len(X)  # number of patients
+        if self.N != X[0].shape[0]:  # number of medical events
+            # TODO throw an error
+            return None
+        # reduction of the data based on the last "window" of size twl with zeros
+        # of length twl (region to predict)
+        X = [
+            torch.cat(
+                (xi[:, -(self.twl - 1) :], torch.zeros((self.N, self.twl))), axis=1
+            )
+            for xi in X
+        ]
+        # now, we decompose the tensor ... without considering the last part of the
+        # reconstruction, ie the predicted part
+        with torch.inference_mode(False):
+            # torchlightning activates the inference mode that deeply disable the computation
+            # of gradients in the function. This is not sufficient to enable_grad() only.
+            Wk_batch = [
+                Variable(
+                    torch.rand(self.rank, X[Tp].shape[1] - self.twl + 1),
+                    requires_grad=True,
+                )
+                for Tp in range(K)
+            ]
+            optimizerW = optim.Adam(Wk_batch, lr=self.params["predict"]["lr"])
+            n_epochs = self.params["predict"]["nepochs"]
+            for _ in range(n_epochs):
+                def closure():
+                    optimizerW.zero_grad()
+                    # evaluate the loss based on the beginning of the reconstruction only
+                    loss = self.model(X, Wk_batch, self.Ph.data, padding=(0, self.twl))
+                    if self.pheno_succession:
+                        loss += self.beta * phenotypeSuccession_constraint(
+                            Wk_batch, self.twl
+                        )
+                    loss.backward()
+                    return loss
+                optimizerW.step(closure)
+                if self.non_negativity:
+                    nonnegative_projection(*Wk_batch)
+                if self.normalization:
+                    normalization_constraint(*Wk_batch)
+        # make a reconstruction, and select only the next event
+        with torch.no_grad():
+            pred = [
+                self.model.reconstruct(x, self.Ph.data)[:, self.twl] for x in Wk_batch
+            ]
+        return pred
+    def reorderPhenotypes(self, gen_pheno, Wk=None, tw=2):
+        """
+        This function outputs reordered internal phenotypes and pathways.
+        Parameters
+        ----------
+        gen_pheno: torch.Tensor
+            generated phenotypes of size :math:`R x N x Tw`, where :math:`R` is the number of
+            phenotypes, :math:`N` is the number of drugs and :math:`Tw` is the length of the
+            temporal window
+        Wk: torch.Tensor
+            pathway to reorder, if None, it uses the internal pathways
+        tw: int
+            windows size
+        Returns
+        -------
+        A pair :math:`(rPh,rW)` with reordered phenotypes (aligned at best with gen_pheno) and the
+        corresponding reodering of the pathways
+        """
+        if Wk is None:
+            Wk = self.Wk
+        if tw == 1:
+            gen_pheno = torch.unsqueeze(gen_pheno, 2)  # transform into a matrix
+        if gen_pheno[0].shape != self.Ph[0].shape:
+            raise ValueError(
+                f"The generated phenotypes ({gen_pheno[0].shape}) and computed phenotypes \
+                    ({self.Ph[0].shape}) doesn't have the same shape."
+            )
+        dic = np.zeros(
+            (gen_pheno.shape[0], self.Ph.shape[0])
+        )  # construct a cost matrix
+        for i in range(gen_pheno.shape[0]):
+            for j in range(self.Ph.shape[0]):
+                dic[i][j] = torch.norm((gen_pheno[i] - self.Ph[j]), p="fro").item()
+        m = Munkres()  # Use of Hungarian Algorithm to find phenotypes correspondances
+        indexes = m.compute(dic)
+        # Reorder phenotypes
+        reordered_pheno = self.Ph.clone()
+        for row, column in indexes:
+            reordered_pheno[row] = self.Ph[column]
+        # Reorder pathways
+        reordered_pathways = [Wk[i].clone() for i in range(len(Wk))]
+        for i in range(len(Wk)):
+            for row, column in indexes:
+                reordered_pathways[i][row] = Wk[i][column]
+        return reordered_pheno, reordered_pathways
+class swottedTrainer(pl.Trainer):
+    def fit(
+        self,
+        model: swottedModule,
+        train_dataloaders,
+        val_dataloaders=None,
+        datamodule=None,
+        ckpt_path=None,
+    ):
+        model.Wk = [
+            Variable(
+                torch.rand(model.rank, ds[0].shape[1] - model.twl + 1),
+                requires_grad=True,
+            )
+            for ds in train_dataloaders.dataset
+        ]
+        return super().fit(
+            model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
+        )

swotted/temporal_regularization.py ADDED Viewed

@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+"""Temporal regularization module
+"""
+import torch
+from torch import nn
+class TemporalDependency(nn.Module):
+    """Torch Module to implement the temporal regularization losses
+    This module is based on a LSTM.
+    """
+    def __init__(self, rank, nlayers, nhidden, dropout):
+        super(TemporalDependency, self).__init__()
+        self.nlayers = nlayers
+        self.nhid = nhidden
+        self.rnn = nn.LSTM(
+            input_size=rank,
+            hidden_size=nhidden,
+            num_layers=nlayers,
+            dropout=dropout,
+            batch_first=True,
+        )
+        self.decoder = nn.Sequential(nn.Linear(nhidden, rank), nn.ReLU())
+        self.init_weights()
+    def init_weights(self):
+        init_range = 0.1
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                m.weight.data.uniform_(-init_range, init_range)
+                m.bias.data.zero_()
+    def forward(self, Ws, device):
+        train_loss = 0.0
+        for Wp in Ws:
+            inputs, targets = Wp[:-1, :], Wp[1:, :]  # seq_len x n_dim
+            seq_len, n_dims = inputs.size()
+            hidden = self.init_hidden(1)
+            # seq_len x n_dims --> 1 x seq_len x n_dims
+            outputs, _ = self.rnn(inputs.unsqueeze(0), hidden)
+            logits = self.decoder(outputs.contiguous().view(-1, self.nhid))
+            loss = self.loss(logits, targets)
+            train_loss += loss
+        return train_loss
+    def init_hidden(self, batch_sz):
+        size = (self.nlayers, batch_sz, self.nhid)
+        weight = next(self.parameters())
+        return (weight.new_zeros(*size), weight.new_zeros(*size))
+    def loss(self, input, target):
+        return torch.mean((input - target) ** 2)