PyPI - dsipts - Versions diffs - 1.1.5__py3-none-any.whl - Mend

dsipts 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dsipts might be problematic. Click here for more details.

Files changed (81) hide show

dsipts/__init__.py +48 -0
dsipts/data_management/__init__.py +0 -0
dsipts/data_management/monash.py +338 -0
dsipts/data_management/public_datasets.py +162 -0
dsipts/data_structure/__init__.py +0 -0
dsipts/data_structure/data_structure.py +1167 -0
dsipts/data_structure/modifiers.py +213 -0
dsipts/data_structure/utils.py +173 -0
dsipts/models/Autoformer.py +199 -0
dsipts/models/CrossFormer.py +152 -0
dsipts/models/D3VAE.py +196 -0
dsipts/models/Diffusion.py +818 -0
dsipts/models/DilatedConv.py +342 -0
dsipts/models/DilatedConvED.py +310 -0
dsipts/models/Duet.py +197 -0
dsipts/models/ITransformer.py +167 -0
dsipts/models/Informer.py +180 -0
dsipts/models/LinearTS.py +222 -0
dsipts/models/PatchTST.py +181 -0
dsipts/models/Persistent.py +44 -0
dsipts/models/RNN.py +213 -0
dsipts/models/Samformer.py +139 -0
dsipts/models/TFT.py +269 -0
dsipts/models/TIDE.py +296 -0
dsipts/models/TTM.py +252 -0
dsipts/models/TimeXER.py +184 -0
dsipts/models/VQVAEA.py +299 -0
dsipts/models/VVA.py +247 -0
dsipts/models/__init__.py +0 -0
dsipts/models/autoformer/__init__.py +0 -0
dsipts/models/autoformer/layers.py +352 -0
dsipts/models/base.py +439 -0
dsipts/models/base_v2.py +444 -0
dsipts/models/crossformer/__init__.py +0 -0
dsipts/models/crossformer/attn.py +118 -0
dsipts/models/crossformer/cross_decoder.py +77 -0
dsipts/models/crossformer/cross_embed.py +18 -0
dsipts/models/crossformer/cross_encoder.py +99 -0
dsipts/models/d3vae/__init__.py +0 -0
dsipts/models/d3vae/diffusion_process.py +169 -0
dsipts/models/d3vae/embedding.py +108 -0
dsipts/models/d3vae/encoder.py +326 -0
dsipts/models/d3vae/model.py +211 -0
dsipts/models/d3vae/neural_operations.py +314 -0
dsipts/models/d3vae/resnet.py +153 -0
dsipts/models/d3vae/utils.py +630 -0
dsipts/models/duet/__init__.py +0 -0
dsipts/models/duet/layers.py +438 -0
dsipts/models/duet/masked.py +202 -0
dsipts/models/informer/__init__.py +0 -0
dsipts/models/informer/attn.py +185 -0
dsipts/models/informer/decoder.py +50 -0
dsipts/models/informer/embed.py +125 -0
dsipts/models/informer/encoder.py +100 -0
dsipts/models/itransformer/Embed.py +142 -0
dsipts/models/itransformer/SelfAttention_Family.py +355 -0
dsipts/models/itransformer/Transformer_EncDec.py +134 -0
dsipts/models/itransformer/__init__.py +0 -0
dsipts/models/patchtst/__init__.py +0 -0
dsipts/models/patchtst/layers.py +569 -0
dsipts/models/samformer/__init__.py +0 -0
dsipts/models/samformer/utils.py +154 -0
dsipts/models/tft/__init__.py +0 -0
dsipts/models/tft/sub_nn.py +234 -0
dsipts/models/timexer/Layers.py +127 -0
dsipts/models/timexer/__init__.py +0 -0
dsipts/models/ttm/__init__.py +0 -0
dsipts/models/ttm/configuration_tinytimemixer.py +307 -0
dsipts/models/ttm/consts.py +16 -0
dsipts/models/ttm/modeling_tinytimemixer.py +2099 -0
dsipts/models/ttm/utils.py +438 -0
dsipts/models/utils.py +624 -0
dsipts/models/vva/__init__.py +0 -0
dsipts/models/vva/minigpt.py +83 -0
dsipts/models/vva/vqvae.py +459 -0
dsipts/models/xlstm/__init__.py +0 -0
dsipts/models/xlstm/xLSTM.py +255 -0
dsipts-1.1.5.dist-info/METADATA +31 -0
dsipts-1.1.5.dist-info/RECORD +81 -0
dsipts-1.1.5.dist-info/WHEEL +5 -0
dsipts-1.1.5.dist-info/top_level.txt +1 -0

dsipts/models/d3vae/encoder.py ADDED Viewed

@@ -0,0 +1,326 @@
+# -*-Encoding: utf-8 -*-
+"""
+Description:
+    The model architecture of the bidirectional vae.
+    Note: Part of the code are borrowed from 'https://github.com/NVlabs/NVAE'
+Authors:
+    Li,Yan (liyan22021121@gmail.com)
+"""
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+from .neural_operations import OPS, EncCombinerCell, DecCombinerCell, Conv2D, get_skip_connection
+from .utils import get_stride_for_cell_type, get_arch_cells
+class Cell(nn.Module):
+    def __init__(self, Cin, Cout, cell_type, arch, use_se):
+        super(Cell, self).__init__()
+        self.cell_type = cell_type
+        stride = get_stride_for_cell_type(self.cell_type)
+        self.skip = get_skip_connection(Cin, stride, channel_mult=2)
+        self.use_se = use_se
+        self._num_nodes = len(arch)
+        self._ops = nn.ModuleList()
+        for i in range(self._num_nodes):
+            stride = get_stride_for_cell_type(self.cell_type) if i == 0 else 1
+            if i==0:
+                primitive = arch[i]
+                op = OPS[primitive](Cin, Cout, stride)
+            else:
+                primitive = arch[i]
+                op = OPS[primitive](Cout, Cout, stride)
+            self._ops.append(op)
+    def forward(self, s):
+        # skip branch
+        skip = self.skip(s)
+        for i in range(self._num_nodes):
+            s = self._ops[i](s)
+        return skip + 0.1 * s
+def soft_clamp5(x: torch.Tensor):
+    return x.div(5.).tanh_().mul(5.)
+def sample_normal_jit(mu, sigma):
+    eps = mu.mul(0).normal_()
+    # print(eps)
+    z = eps.mul_(sigma).add_(mu)
+    # print(z.shape)
+    return z, eps
+class Normal:
+    def __init__(self, mu, log_sigma, temp=1.):
+        self.mu = soft_clamp5(mu)
+        log_sigma = soft_clamp5(log_sigma)
+        self.sigma = torch.exp(log_sigma)
+        if temp != 1.:
+            self.sigma *= temp
+    def sample(self):
+        return sample_normal_jit(self.mu, self.sigma)
+    def sample_given_eps(self, eps):
+        return eps * self.sigma + self.mu
+    def log_p(self, samples):
+        normalized_samples = (samples - self.mu) / self.sigma
+        log_p = - 0.5 * normalized_samples * normalized_samples - 0.5 * np.log(2 * np.pi) - torch.log(self.sigma)
+        return log_p
+    def kl(self, normal_dist):
+        term1 = (self.mu - normal_dist.mu) / normal_dist.sigma
+        term2 = self.sigma / normal_dist.sigma
+        return 0.5 * (term1 * term1 + term2 * term2) - 0.5 - torch.log(term2)
+class NormalDecoder:
+    def __init__(self, param):
+        B, C, H, W = param.size()
+        self.num_c = C // 2
+        self.mu = param[:, :self.num_c, :, :]                                 # B, 3, H, W
+        self.log_sigma = param[:, self.num_c:, :, :]                          # B, 3, H, W
+        self.sigma = torch.exp(self.log_sigma) + 1e-2
+        self.dist = Normal(self.mu, self.log_sigma)
+    def log_prob(self, samples):
+        return self.dist.log_p(samples)
+    def sample(self,):
+        x, _ = self.dist.sample()
+        return x
+def log_density_gaussian(sample, mu, logvar):
+    """Calculates log density of a Gaussian.
+    Parameters
+    ----------
+    x: torch.Tensor or np.ndarray or float
+        Value at which to compute the density.
+    mu: torch.Tensor or np.ndarray or float
+        Mean.
+    logvar: torch.Tensor or np.ndarray or float
+        Log variance.
+    """
+    normalization = - 0.5 * (math.log(2 * math.pi) + logvar)
+    inv_var = torch.exp(-logvar)
+    log_density = normalization - 0.5 * ((sample - mu)**2 * inv_var)
+    log_qz = torch.logsumexp(torch.sum(log_density, [2,3]), dim=1, keepdim=False)
+    log_prod_qzi = torch.logsumexp(log_density, dim=1, keepdim=False).sum((1,2))
+    loss_p_z = (log_qz - log_prod_qzi)
+    loss_p_z = ((loss_p_z - torch.min(loss_p_z))/(torch.max(loss_p_z)-torch.min(loss_p_z))).mean()
+    return loss_p_z
+class Encoder(nn.Module):
+    def __init__(self, channel_mult,mult,prediction_length,num_preprocess_blocks,num_preprocess_cells,num_channels_enc,
+                 arch_instance,num_latent_per_group,num_channels_dec,groups_per_scale,num_postprocess_blocks,num_postprocess_cells,embedding_dimension,hidden_size,target_dim,sequence_length,num_layers,dropout_rate):
+        super(Encoder, self).__init__()
+        self.channel_mult = channel_mult
+        self.mult = mult
+        self.prediction_length = prediction_length
+        self.num_preprocess_blocks = num_preprocess_blocks
+        self.num_preprocess_cells = num_preprocess_cells
+        self.num_channels_enc = num_channels_enc
+        self.arch_instance = get_arch_cells(arch_instance)
+        self.stem = Conv2D(1, num_channels_enc, 3, padding=1, bias=True)
+        self.num_latent_per_group = num_latent_per_group
+        self.num_channels_dec = num_channels_dec
+        self.groups_per_scale = groups_per_scale
+        self.num_postprocess_blocks = num_postprocess_blocks
+        self.num_postprocess_cells = num_postprocess_cells
+        self.use_se = False
+        self.input_size = embedding_dimension
+        self.hidden_size = hidden_size
+        self.projection = nn.Linear(embedding_dimension+hidden_size, target_dim)
+        c_scaling = self.channel_mult ** (self.num_preprocess_blocks) #4
+        spatial_scaling = 2 ** (self.num_preprocess_blocks) #4
+        prior_ftr0_size = (int(c_scaling * self.num_channels_dec),
+                           sequence_length// spatial_scaling, #prediction_length
+                           (embedding_dimension + hidden_size + 1) // spatial_scaling)
+        self.prior_ftr0 = nn.Parameter(torch.rand(size=prior_ftr0_size), requires_grad=True)
+        self.z0_size = [self.num_latent_per_group, sequence_length // spatial_scaling, #prediction_length
+                        (embedding_dimension+ hidden_size + 1) // spatial_scaling]
+        self.pre_process = self.init_pre_process(self.mult)
+        self.enc_tower = self.init_encoder_tower(self.mult)
+        self.enc0 = nn.Sequential(nn.ELU(), Conv2D(self.num_channels_enc * self.mult,
+                        self.num_channels_enc * self.mult, kernel_size=1, bias=True), nn.ELU())
+        self.enc_sampler, self.dec_sampler = self.init_sampler(self.mult)
+        self.dec_tower = self.init_decoder_tower(self.mult)
+        self.post_process = self.init_post_process(self.mult)
+        self.image_conditional = nn.Sequential(nn.ELU(),
+                             Conv2D(int(self.num_channels_dec * self.mult), 2, 3, padding=1, bias=True))
+        self.rnn = nn.GRU(
+            input_size=sequence_length,
+            hidden_size=prediction_length,
+            num_layers=num_layers,
+            dropout=dropout_rate,
+            batch_first=True,
+        )
+    def init_pre_process(self, mult):
+        pre_process = nn.ModuleList()
+        for b in range(self.num_preprocess_blocks):
+            for c in range(self.num_preprocess_cells):
+                if c == self.num_preprocess_cells - 1:
+                    arch = self.arch_instance['down_pre']
+                    num_ci = int(self.num_channels_enc * mult)
+                    num_co = int(self.channel_mult * num_ci)
+                    cell = Cell(num_ci, num_co, cell_type='down_pre', arch=arch, use_se=self.use_se)
+                    mult = self.channel_mult * mult
+                else:
+                    arch = self.arch_instance['normal_pre']
+                    num_c = self.num_channels_enc * mult
+                    cell = Cell(num_c, num_c, cell_type='normal_pre', arch=arch, use_se=self.use_se)
+                pre_process.append(cell)
+        self.mult = mult
+        return pre_process
+    def init_encoder_tower(self, mult):
+        enc_tower = nn.ModuleList()
+        for g in range(self.groups_per_scale):
+            arch = self.arch_instance['normal_enc']
+            num_c = int(self.num_channels_enc * mult)
+            cell = Cell(num_c, num_c, cell_type='normal_enc', arch=arch, use_se=self.use_se)
+            enc_tower.append(cell)
+            if not (g == self.groups_per_scale - 1):
+                num_ce = int(self.num_channels_enc * mult)
+                num_cd = int(self.num_channels_dec * mult)
+                cell = EncCombinerCell(num_ce, num_cd, num_ce, cell_type='combiner_enc')
+                enc_tower.append(cell)
+        self.mult = mult
+        return enc_tower
+    def init_decoder_tower(self, mult):
+        dec_tower = nn.ModuleList()
+        for g in range(self.groups_per_scale):
+            num_c = int(self.num_channels_dec * mult)
+            if not (g == 0):
+                arch = self.arch_instance['normal_dec']
+                cell = Cell(num_c, num_c, cell_type='normal_dec', arch=arch, use_se=self.use_se)
+                dec_tower.append(cell)
+            #print(num_c)
+            cell = DecCombinerCell(num_c, self.num_latent_per_group, num_c, cell_type='combiner_dec')
+            dec_tower.append(cell)
+        self.mult = mult
+        return dec_tower
+    def init_sampler(self, mult):
+        enc_sampler = nn.ModuleList()
+        dec_sampler = nn.ModuleList()
+        for g in range(self.groups_per_scale):
+            num_c = int(self.num_channels_enc * mult)
+            cell = Conv2D(num_c, 2 * self.num_latent_per_group, kernel_size=3, padding=1, bias=True)
+            enc_sampler.append(cell)
+            if g != 0:
+                num_c = int(self.num_channels_dec * mult)
+                cell = nn.Sequential(
+                    nn.ELU(),
+                    Conv2D(num_c, 2 * self.num_latent_per_group, kernel_size=1, padding=0, bias=True))
+                dec_sampler.append(cell)
+        mult = mult/self.channel_mult
+        return enc_sampler, dec_sampler
+    def init_post_process(self, mult):
+        post_process = nn.ModuleList()
+        for b in range(self.num_postprocess_blocks):
+            for c in range(self.num_postprocess_cells):
+                if c == 0:
+                    arch = self.arch_instance['up_post']
+                    num_ci = int(self.num_channels_dec * mult)
+                    num_co = int(num_ci / self.channel_mult)
+                    cell = Cell(num_ci, num_co, cell_type='up_post', arch=arch, use_se=self.use_se)
+                    mult = mult / self.channel_mult
+                else:
+                    arch = self.arch_instance['normal_post']
+                    num_c = int(self.num_channels_dec * mult)
+                    cell = Cell(num_c, num_c, cell_type='normal_post', arch=arch, use_se=self.use_se)
+                post_process.append(cell)
+        self.mult = mult
+        return post_process
+    def forward(self, x):
+        s = self.stem(2 * x - 1.0)
+        for cell in self.pre_process:
+            s = cell(s)
+        combiner_cells_enc = []
+        combiner_cells_s = []
+        all_z = []
+        for cell in self.enc_tower:
+            if cell.cell_type == 'combiner_enc':
+                combiner_cells_enc.append(cell)
+                combiner_cells_s.append(s)
+            else:
+                s = cell(s)
+        combiner_cells_enc.reverse()
+        combiner_cells_s.reverse()
+        idx_dec = 0
+        ftr = self.enc0(s)   #conv
+        param0 = self.enc_sampler[idx_dec](ftr) # another conv2d
+        mu_q, log_sig_q = torch.chunk(param0, 2, dim=1)
+        dist = Normal(mu_q, log_sig_q)
+        z, _ = dist.sample()   #z_0
+        all_z.append(z)
+        loss_qz = log_density_gaussian(z, mu_q, log_sig_q)
+        # total_c = [loss_qz]
+        idx_dec = 0
+        s = self.prior_ftr0.unsqueeze(0) # random value
+        batch_size = z.size(0)
+        s = s.expand(batch_size, -1, -1, -1)
+        total_c = 0
+        idx_dec = 0
+        for cell in self.dec_tower:
+            if cell.cell_type == 'combiner_dec':
+                if idx_dec > 0:
+                    ftr = combiner_cells_enc[idx_dec - 1](combiner_cells_s[idx_dec - 1], s)
+                    param = self.enc_sampler[idx_dec](ftr)
+                    mu_q, log_sig_q = torch.chunk(param, 2, dim=1)
+                    dist = Normal(mu_q, log_sig_q)
+                    z, _ = dist.sample()    # z_n
+                    all_z.append(z)
+                    #print(z.shape)
+                    loss_qz = log_density_gaussian(z, mu_q, log_sig_q)
+                    total_c += loss_qz
+                    #total_c.append(loss_qz)
+                s = cell(s, z)
+                idx_dec += 1
+            else:
+                s = cell(s)
+        for cell in self.post_process:
+            s = cell(s)
+        # print(s.shape)
+        logits = self.image_conditional(s)
+        tmp_tot =[]
+        for i in range(idx_dec):
+            tmp, _ = self.rnn(logits[:,i,:,:].squeeze().permute(0,2,1))
+            tmp_tot.append(tmp.permute(0,2,1))
+        logits = torch.stack(tmp_tot,1)
+        logits = self.projection(logits[...,-(self.input_size + self.hidden_size):])
+        # total_c = torch.mean(torch.tensor(total_c))
+        total_c = total_c/idx_dec
+        return logits, total_c, all_z# , log_q, log_p, kl_all, kl_diag
+    def decoder_output(self, logits):
+        return NormalDecoder(logits)

dsipts/models/d3vae/model.py ADDED Viewed

@@ -0,0 +1,211 @@
+# -*-Encoding: utf-8 -*-
+"""
+Authors:
+    Li,Yan (liyan22021121@gmail.com)
+"""
+import torch
+import torch.nn as nn
+import numpy as np
+from .resnet import Res12_Quadratic
+from .diffusion_process import GaussianDiffusion, get_beta_schedule
+from .encoder import Encoder
+from .embedding import DataEmbedding
+from ...data_structure.utils import beauty_string
+class diffusion_generate(nn.Module):
+    def __init__(self, target_dim,embedding_dimension,prediction_length,sequence_length,scale,hidden_size,num_layers,dropout_rate,diff_steps,loss_type,beta_end,beta_schedule, channel_mult,mult,
+                 num_preprocess_blocks,num_preprocess_cells,num_channels_enc,arch_instance,num_latent_per_group,num_channels_dec,groups_per_scale,num_postprocess_blocks,num_postprocess_cells):
+        super().__init__()
+        self.target_dim = target_dim
+        self.input_size = embedding_dimension
+        self.prediction_length = prediction_length
+        self.seq_length = sequence_length
+        self.scale = scale
+        self.rnn = nn.GRU(
+            input_size=self.input_size,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            dropout=dropout_rate,
+            batch_first=True,
+        )
+        self.generative = Encoder(channel_mult,mult,prediction_length,
+                                  #sequence_length,
+                                  num_preprocess_blocks,num_preprocess_cells,num_channels_enc,arch_instance,num_latent_per_group,num_channels_dec,groups_per_scale,num_postprocess_blocks,num_postprocess_cells,embedding_dimension,hidden_size,target_dim,sequence_length,num_layers,dropout_rate)
+        self.diffusion = GaussianDiffusion(
+            self.generative,
+            input_size=target_dim,
+            diff_steps=diff_steps,
+            loss_type=loss_type,
+            beta_end=beta_end,
+            beta_schedule=beta_schedule,
+            scale = scale,
+        )
+        self.projection = nn.Linear(embedding_dimension+hidden_size, embedding_dimension)
+    def forward(self, past_time_feat, future_time_feat, t):
+        """
+        Output the generative results and related variables.
+        """
+        time_feat, _ = self.rnn(past_time_feat)
+        input = torch.cat([time_feat, past_time_feat], dim=-1)
+        output, y_noisy, total_c, all_z = self.diffusion.log_prob(input, future_time_feat, t)
+        return output, y_noisy, total_c, all_z
+class denoise_net(nn.Module):
+    def __init__(self, target_dim,embedding_dimension,prediction_length,sequence_length,scale,hidden_size,num_layers,dropout_rate,diff_steps,loss_type,beta_end,beta_schedule, channel_mult,mult,
+                 num_preprocess_blocks,num_preprocess_cells,num_channels_enc,arch_instance,num_latent_per_group,num_channels_dec,groups_per_scale,num_postprocess_blocks,num_postprocess_cells,beta_start,input_dim,freq,embs):
+        super().__init__()
+        """
+        The whole model architecture consists of three main parts, the coupled diffusion process and the generative model are
+         included in diffusion_generate module, an resnet is used to calculate the score.
+        """
+        # ResNet that used to calculate the scores.
+        self.score_net = Res12_Quadratic(1, 64, 32, normalize=False, AF=nn.ELU())
+        # Generate the diffusion schedule.
+        sigmas = get_beta_schedule(beta_schedule, beta_start, beta_end, diff_steps)
+        alphas = 1.0 - sigmas*0.5
+        self.alphas_cumprod = torch.tensor(np.cumprod(alphas, axis=0))
+        self.sqrt_alphas_cumprod = torch.tensor(np.sqrt(np.cumprod(alphas, axis=0)))
+        self.sqrt_one_minus_alphas_cumprod = torch.tensor(np.sqrt(1-np.cumprod(alphas, axis=0)))
+        self.sigmas = torch.tensor(1. - self.alphas_cumprod)
+        # The generative bvae model.
+        self.diffusion_gen = diffusion_generate(target_dim,embedding_dimension,prediction_length,sequence_length,scale,hidden_size,num_layers,dropout_rate,diff_steps,loss_type,beta_end,beta_schedule, channel_mult,mult,
+                 num_preprocess_blocks,num_preprocess_cells,num_channels_enc,arch_instance,num_latent_per_group,num_channels_dec,groups_per_scale,num_postprocess_blocks,num_postprocess_cells)
+        # Data embedding module.
+        self.embedding = DataEmbedding(input_dim, embedding_dimension, embs,dropout_rate)
+    def extract(self, a, t, x_shape):
+        """ extract the t-th element from a"""
+        b, *_ = t.shape
+        out = a.gather(-1, t)
+        return out.reshape(b, *((1,) * (len(x_shape) - 1)))
+    def forward(self, past_time_feat, mark, future_time_feat, t):
+        """
+        Params:
+           past_time_feat: Tensor
+               the input time series.
+           mark: Tensor
+               the time feature mark.
+           future_time_feat: Tensor
+               the target time series.
+           t: Tensor
+             the diffusion step.
+        -------------
+        return:
+           output: Tensor
+               The gauaaian distribution of the generative results.
+           y_noisy: Tensor
+               The diffused target.
+           total_c: Float
+               Total correlation of all the latent variables in the BVAE, used for disentangling.
+           all_z: List
+               All the latent variables of bvae.
+           loss: Float
+               The loss of score matching.
+        """
+        # Embed the original time series.
+        input = self.embedding(past_time_feat, mark)
+        #input, _ = self.diffusion_gen.rnn(input)
+        # Output the distribution of the generative results, the sampled generative results and the total correlations of the generative model.
+        output, y_noisy, total_c, all_z = self.diffusion_gen(input, future_time_feat, t)
+        # Score matching.
+        sigmas_t = self.extract(self.sigmas.to(y_noisy.device), t, y_noisy.shape)
+        y = future_time_feat.unsqueeze(1).float()
+        y_noisy1 = output.sample().float().requires_grad_()
+        E = self.score_net(y_noisy1).sum()
+        # The Loss of multiscale score matching.
+        grad_x = torch.autograd.grad(E, y_noisy1, create_graph=True)[0]
+        loss = torch.mean(torch.sum(((y-y_noisy1.detach())+grad_x*0.001)**2*sigmas_t, [1,2,3])).float()
+        return output, y_noisy, total_c, all_z, loss
+class pred_net(denoise_net):
+    def forward(self, x, mark):
+        """
+        generate the prediction by the trained model.
+        Return:
+            y: The noisy generative results
+            out: Denoised results, remove the noise from y through score matching.
+            tc: Total correlations, indicator of extent of disentangling.
+        """
+        input = self.embedding(x, mark)
+        x_t, _ = self.diffusion_gen.rnn(input)
+        input = torch.cat([x_t, input], dim=-1)
+        input = input.unsqueeze(1)
+        logits, tc, all_z= self.diffusion_gen.generative(input)
+        output = self.diffusion_gen.generative.decoder_output(logits)
+        y = output.mu.float().requires_grad_()
+        try:
+            E = self.score_net(y).sum()
+            grad_x = torch.autograd.grad(E, y, create_graph=True,allow_unused=True)[0]
+        except Exception as e:
+            beauty_string(e,'')
+            grad_x = 0
+        out = y - grad_x*0.001
+        return y, out, tc, all_z
+class Discriminator(nn.Module):
+    def __init__(self, neg_slope=0.2, latent_dim=10, hidden_units=1000, out_units=2):
+        """Discriminator proposed in [1].
+        Parameters
+        ----------
+        neg_slope: float
+            Hyperparameter for the Leaky ReLu
+        latent_dim : int
+            Dimensionality of latent variables.
+        hidden_units: int
+            Number of hidden units in the MLP
+        Model Architecture
+        ------------
+        - 6 layer multi-layer perceptron, each with 1000 hidden units
+        - Leaky ReLu activations
+        - Output 2 logits
+        References:
+            [1] Kim, Hyunjik, and Andriy Mnih. "Disentangling by factorising."
+            arXiv preprint arXiv:1802.05983 (2018).
+        """
+        super(Discriminator, self).__init__()
+        # Activation parameters
+        self.neg_slope = neg_slope
+        self.leaky_relu = nn.LeakyReLU(self.neg_slope, True)
+        # Layer parameters
+        self.z_dim = latent_dim
+        self.hidden_units = hidden_units
+        # theoretically 1 with sigmoid but gives bad results => use 2 and softmax
+        out_units = out_units
+        # Fully connected layers
+        self.lin1 = nn.Linear(self.z_dim, hidden_units)
+        self.lin2 = nn.Linear(hidden_units, hidden_units)
+        self.lin3 = nn.Linear(hidden_units, hidden_units)
+        self.lin4 = nn.Linear(hidden_units, hidden_units)
+        self.lin5 = nn.Linear(hidden_units, hidden_units)
+        self.lin6 = nn.Linear(hidden_units, out_units)
+        self.softmax = nn.Softmax()
+    def forward(self, z):
+        # Fully connected layers with leaky ReLu activations
+        z = self.leaky_relu(self.lin1(z))
+        z = self.leaky_relu(self.lin2(z))
+        z = self.leaky_relu(self.lin3(z))
+        z = self.leaky_relu(self.lin4(z))
+        z = self.leaky_relu(self.lin5(z))
+        z = self.lin6(z)
+        return z