PyPI - dsipts - Versions diffs - 1.1.5__py3-none-any.whl - Mend

dsipts 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dsipts might be problematic. Click here for more details.

Files changed (81) hide show

dsipts/__init__.py +48 -0
dsipts/data_management/__init__.py +0 -0
dsipts/data_management/monash.py +338 -0
dsipts/data_management/public_datasets.py +162 -0
dsipts/data_structure/__init__.py +0 -0
dsipts/data_structure/data_structure.py +1167 -0
dsipts/data_structure/modifiers.py +213 -0
dsipts/data_structure/utils.py +173 -0
dsipts/models/Autoformer.py +199 -0
dsipts/models/CrossFormer.py +152 -0
dsipts/models/D3VAE.py +196 -0
dsipts/models/Diffusion.py +818 -0
dsipts/models/DilatedConv.py +342 -0
dsipts/models/DilatedConvED.py +310 -0
dsipts/models/Duet.py +197 -0
dsipts/models/ITransformer.py +167 -0
dsipts/models/Informer.py +180 -0
dsipts/models/LinearTS.py +222 -0
dsipts/models/PatchTST.py +181 -0
dsipts/models/Persistent.py +44 -0
dsipts/models/RNN.py +213 -0
dsipts/models/Samformer.py +139 -0
dsipts/models/TFT.py +269 -0
dsipts/models/TIDE.py +296 -0
dsipts/models/TTM.py +252 -0
dsipts/models/TimeXER.py +184 -0
dsipts/models/VQVAEA.py +299 -0
dsipts/models/VVA.py +247 -0
dsipts/models/__init__.py +0 -0
dsipts/models/autoformer/__init__.py +0 -0
dsipts/models/autoformer/layers.py +352 -0
dsipts/models/base.py +439 -0
dsipts/models/base_v2.py +444 -0
dsipts/models/crossformer/__init__.py +0 -0
dsipts/models/crossformer/attn.py +118 -0
dsipts/models/crossformer/cross_decoder.py +77 -0
dsipts/models/crossformer/cross_embed.py +18 -0
dsipts/models/crossformer/cross_encoder.py +99 -0
dsipts/models/d3vae/__init__.py +0 -0
dsipts/models/d3vae/diffusion_process.py +169 -0
dsipts/models/d3vae/embedding.py +108 -0
dsipts/models/d3vae/encoder.py +326 -0
dsipts/models/d3vae/model.py +211 -0
dsipts/models/d3vae/neural_operations.py +314 -0
dsipts/models/d3vae/resnet.py +153 -0
dsipts/models/d3vae/utils.py +630 -0
dsipts/models/duet/__init__.py +0 -0
dsipts/models/duet/layers.py +438 -0
dsipts/models/duet/masked.py +202 -0
dsipts/models/informer/__init__.py +0 -0
dsipts/models/informer/attn.py +185 -0
dsipts/models/informer/decoder.py +50 -0
dsipts/models/informer/embed.py +125 -0
dsipts/models/informer/encoder.py +100 -0
dsipts/models/itransformer/Embed.py +142 -0
dsipts/models/itransformer/SelfAttention_Family.py +355 -0
dsipts/models/itransformer/Transformer_EncDec.py +134 -0
dsipts/models/itransformer/__init__.py +0 -0
dsipts/models/patchtst/__init__.py +0 -0
dsipts/models/patchtst/layers.py +569 -0
dsipts/models/samformer/__init__.py +0 -0
dsipts/models/samformer/utils.py +154 -0
dsipts/models/tft/__init__.py +0 -0
dsipts/models/tft/sub_nn.py +234 -0
dsipts/models/timexer/Layers.py +127 -0
dsipts/models/timexer/__init__.py +0 -0
dsipts/models/ttm/__init__.py +0 -0
dsipts/models/ttm/configuration_tinytimemixer.py +307 -0
dsipts/models/ttm/consts.py +16 -0
dsipts/models/ttm/modeling_tinytimemixer.py +2099 -0
dsipts/models/ttm/utils.py +438 -0
dsipts/models/utils.py +624 -0
dsipts/models/vva/__init__.py +0 -0
dsipts/models/vva/minigpt.py +83 -0
dsipts/models/vva/vqvae.py +459 -0
dsipts/models/xlstm/__init__.py +0 -0
dsipts/models/xlstm/xLSTM.py +255 -0
dsipts-1.1.5.dist-info/METADATA +31 -0
dsipts-1.1.5.dist-info/RECORD +81 -0
dsipts-1.1.5.dist-info/WHEEL +5 -0
dsipts-1.1.5.dist-info/top_level.txt +1 -0

dsipts/models/Duet.py ADDED Viewed

@@ -0,0 +1,197 @@
+## Copyright 2025    DUET (https://github.com/decisionintelligence/DUET)
+## Code modified for align the notation and the batch generation
+## extended to all present in duet and autoformer folder
+import torch
+import torch.nn as nn
+import numpy as np
+from .duet.layers import Linear_extractor_cluster
+from .duet.masked import Mahalanobis_mask, Encoder, EncoderLayer, FullAttention, AttentionLayer
+from einops import rearrange
+try:
+    import lightning.pytorch as pl
+    from .base_v2 import Base
+    OLD_PL = False
+except:
+    import pytorch_lightning as pl
+    OLD_PL = True
+    from .base import Base
+from .utils import QuantileLossMO,Permute, get_activation
+from typing import List, Union
+from ..data_structure.utils import beauty_string
+from .utils import  get_scope
+from .utils import Embedding_cat_variables
+class Duet(Base):
+    handle_multivariate = True
+    handle_future_covariates = True
+    handle_categorical_variables = True
+    handle_quantile_loss = True
+    description = get_scope(handle_multivariate,handle_future_covariates,handle_categorical_variables,handle_quantile_loss)
+    def __init__(self,
+                 factor:int,
+                 d_model: int,
+                 n_head: int,
+                 n_layer: int,
+                 CI: bool,
+                 d_ff: int,
+                 noisy_gating:bool,
+                 num_experts: int,
+                 kernel_size:int,
+                 hidden_size:int,
+                 k: int,
+                 dropout_rate: float=0.1,
+                 activation: str='',
+                 **kwargs)->None:
+        """Initializes the model with the specified parameters. https://github.com/decisionintelligence/DUET
+        Args:
+            factor (int): The factor for attention scaling. NOT USED but in the original implementation
+            d_model (int): The dimensionality of the model.
+            n_head (int): The number of attention heads.
+            n_layer (int): The number of layers in the encoder.
+            CI (bool): Perform channel independent operations.
+            d_ff (int): The dimensionality of the feedforward layer.
+            noisy_gating (bool): Flag to indicate if noisy gating is used.
+            num_experts (int): The number of experts in the mixture of experts.
+            kernel_size (int): The size of the convolutional kernel.
+            hidden_size (int): The size of the hidden layer.
+            k (int): The number of clusters for the linear extractor.
+            dropout_rate (float, optional): The dropout rate. Defaults to 0.1.
+            activation (str, optional): The activation function to use. Defaults to ''.
+            **kwargs: Additional keyword arguments.
+        Raises:
+            ValueError: If the activation function is not recognized.
+        """
+        super().__init__(**kwargs)
+        if activation == 'torch.nn.SELU':
+            beauty_string('SELU do not require BN','info',self.verbose)
+            use_bn = False
+        if isinstance(activation,str):
+            activation = get_activation(activation)
+        self.save_hyperparameters(logger=False)
+        self.emb_past = Embedding_cat_variables(self.past_steps,self.emb_dim,self.embs_past, reduction_mode=self.reduction_mode,use_classical_positional_encoder=self.use_classical_positional_encoder,device = self.device)
+        self.emb_fut = Embedding_cat_variables(self.future_steps,self.emb_dim,self.embs_fut, reduction_mode=self.reduction_mode,use_classical_positional_encoder=self.use_classical_positional_encoder,device = self.device)
+        emb_past_out_channel = self.emb_past.output_channels
+        emb_fut_out_channel = self.emb_fut.output_channels
+        self.cluster = Linear_extractor_cluster(noisy_gating,
+                                                num_experts,
+                                                self.past_steps,
+                                                k,
+                                                d_model,
+                                                self.past_channels+emb_past_out_channel,
+                                                CI,kernel_size,
+                                                hidden_size)
+        self.CI = CI
+        self.n_vars = self.out_channels
+        self.mask_generator = Mahalanobis_mask(self.future_steps)
+        self.Channel_transformer = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FullAttention(
+                            True,
+                            factor,
+                            attention_dropout=dropout_rate,
+                            output_attention=0,
+                        ),
+                        d_model,
+                        n_head,
+                    ),
+                    d_model,
+                    d_ff,
+                    dropout=dropout_rate,
+                    activation=activation,
+                )
+                for _ in range(n_layer)
+            ],
+            norm_layer=torch.nn.LayerNorm(d_model)
+        )
+        self.linear_head = nn.Sequential(nn.Linear(d_model, self.future_steps), nn.Dropout(dropout_rate))
+        dim = self.past_channels+emb_past_out_channel+emb_fut_out_channel+self.future_channels
+        self.final_layer = nn.Sequential(activation(),
+                                         nn.Linear(dim, dim*2),
+                                         activation(),
+                                         nn.Linear(dim*2,self.out_channels*self.mul  ))
+    def forward(self, batch:dict)-> float:
+        # x: [Batch, Input length, Channel]
+        x_enc = batch['x_num_past'].to(self.device)
+        idx_target = batch['idx_target'][0]
+        BS = x_enc.shape[0]
+        if 'x_cat_past' in batch.keys():
+            emb_past = self.emb_past(BS,batch['x_cat_past'].to(self.device))
+        else:
+            emb_past = self.emb_past(BS,None)
+        if 'x_cat_future' in batch.keys():
+            emb_fut = self.emb_fut(BS,batch['x_cat_future'].to(self.device))
+        else:
+            emb_fut = self.emb_fut(BS,None)
+        tmp_future = [emb_fut]
+        if 'x_num_future' in batch.keys():
+            x_future = batch['x_num_future'].to(self.device)
+            tmp_future.append(x_future)
+        x_enc = torch.concat([x_enc,emb_past],axis=-1)
+        if self.CI:
+            channel_independent_input = rearrange(x_enc, 'b l n -> (b n) l 1')
+            reshaped_output, _ = self.cluster(channel_independent_input)
+            temporal_feature = rearrange(reshaped_output, '(b n) l 1 -> b l n', b=x_enc.shape[0])
+        else:
+            temporal_feature, _ = self.cluster(x_enc)
+        # B x d_model x n_vars -> B x n_vars x d_model
+        temporal_feature = rearrange(temporal_feature, 'b d n -> b n d')
+        if self.n_vars > 1:
+            changed_input = rearrange(x_enc, 'b l n -> b n l')
+            channel_mask = self.mask_generator(changed_input)
+            channel_group_feature, _ = self.Channel_transformer(x=temporal_feature, attn_mask=channel_mask)
+            output = self.linear_head(channel_group_feature)
+        else:
+            output = temporal_feature
+            output = self.linear_head(output)
+        output = rearrange(output, 'b n d -> b d n')
+        output = self.cluster.revin(output, "denorm")
+        tmp_future.append(output)
+        tmp_future = torch.cat(tmp_future,2)
+        output = self.final_layer(tmp_future)
+        return output.reshape(BS,self.future_steps,self.n_vars,self.mul)

dsipts/models/ITransformer.py ADDED Viewed

@@ -0,0 +1,167 @@
+## Copyright https://github.com/thuml/iTransformer?tab=MIT-1-ov-file#readme
+## Modified for notation alignmenet and batch structure
+## extended to what inside itransformer folder
+import torch
+import torch.nn as nn
+import numpy as np
+from .itransformer.Transformer_EncDec import Encoder, EncoderLayer
+from .itransformer.SelfAttention_Family import FullAttention, AttentionLayer
+from .itransformer.Embed import DataEmbedding_inverted
+try:
+    import lightning.pytorch as pl
+    from .base_v2 import Base
+    OLD_PL = False
+except:
+    import pytorch_lightning as pl
+    OLD_PL = True
+    from .base import Base
+from .utils import QuantileLossMO,Permute, get_activation
+from typing import List, Union
+from ..data_structure.utils import beauty_string
+from .utils import  get_scope
+from .utils import Embedding_cat_variables
+class ITransformer(Base):
+    handle_multivariate = True
+    handle_future_covariates = True
+    handle_categorical_variables = True
+    handle_quantile_loss = True
+    description = get_scope(handle_multivariate,handle_future_covariates,handle_categorical_variables,handle_quantile_loss)
+    def __init__(self,
+                 # specific params
+                 hidden_size:int,
+                 d_model: int,
+                 n_head: int,
+                 n_layer_decoder: int,
+                 use_norm: bool,
+                 class_strategy: str = 'projection', #projection/average/cls_token
+                 dropout_rate: float=0.1,
+                 activation: str='',
+                 **kwargs)->None:
+        """Initialize the ITransformer model for time series forecasting.
+        This class implements the Inverted Transformer architecture as described in the paper
+        "ITRANSFORMER: INVERTED TRANSFORMERS ARE EFFECTIVE FOR TIME SERIES FORECASTING"
+        (https://arxiv.org/pdf/2310.06625).
+        Args:
+            hidden_size (int): The first embedding size of the model ('r' in the paper).
+            d_model (int): The second embedding size (r^{tilda} in the model). Should be smaller than hidden_size.
+            n_head (int): The number of attention heads.
+            n_layer_decoder (int): The number of layers in the decoder.
+            use_norm (bool): Flag to indicate whether to use normalization.
+            class_strategy (str, optional): The strategy for classification, can be 'projection', 'average', or 'cls_token'. Defaults to 'projection'.
+            dropout_rate (float, optional): The dropout rate for regularization. Defaults to 0.1.
+            activation (str, optional): The activation function to be used. Defaults to ''.
+            **kwargs: Additional keyword arguments.
+        Raises:
+            ValueError: If the activation function is not recognized.
+        """
+        super().__init__(**kwargs)
+        if activation == 'torch.nn.SELU':
+            beauty_string('SELU do not require BN','info',self.verbose)
+            use_bn = False
+        if isinstance(activation,str):
+            activation = get_activation(activation)
+        self.save_hyperparameters(logger=False)
+        self.emb_past = Embedding_cat_variables(self.past_steps,self.emb_dim,self.embs_past, reduction_mode=self.reduction_mode,use_classical_positional_encoder=self.use_classical_positional_encoder,device = self.device)
+        self.emb_fut = Embedding_cat_variables(self.future_steps,self.emb_dim,self.embs_fut, reduction_mode=self.reduction_mode,use_classical_positional_encoder=self.use_classical_positional_encoder,device = self.device)
+        emb_past_out_channel = self.emb_past.output_channels
+        emb_fut_out_channel = self.emb_fut.output_channels
+        self.output_attention = False## not need output attention
+        self.use_norm = use_norm
+        # Embedding
+        self.enc_embedding = DataEmbedding_inverted(self.past_steps, d_model, embed_type='what?', freq='what?', dropout=dropout_rate)  ##embed, freq not used inside
+        self.class_strategy = class_strategy
+        # Encoder-only architecture
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FullAttention(False, factor=0.1, attention_dropout=dropout_rate, ##factor is not used in the Full attention
+                                      output_attention=self.output_attention), d_model, n_head), ## not need output attention
+                    d_model,
+                    hidden_size,
+                    dropout = dropout_rate,
+                    activation = activation()
+                ) for l in range(n_layer_decoder)
+            ],
+            norm_layer=torch.nn.LayerNorm(d_model)
+        )
+        self.projector = nn.Linear(d_model, self.future_steps*self.mul, bias=True)
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        if self.use_norm:
+            # Normalization from Non-stationary Transformer
+            means = x_enc.mean(1, keepdim=True).detach()
+            x_enc = x_enc - means
+            stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+            x_enc /= stdev
+        _, _, N = x_enc.shape # B L N
+        # B: batch_size;    E: d_model;
+        # L: seq_len;       S: pred_len;
+        # N: number of variate (tokens), can also includes covariates
+        # Embedding
+        # B L N -> B N E                (B L N -> B L E in the vanilla Transformer)
+        enc_out = self.enc_embedding(x_enc, x_mark_enc) # covariates (e.g timestamp) can be also embedded as tokens
+        # B N E -> B N E                (B L E -> B L E in the vanilla Transformer)
+        # the dimensions of embedded time series has been inverted, and then processed by native attn, layernorm and ffn modules
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # B N E -> B N S -> B S N
+        dec_out = self.projector(enc_out).permute(0, 2, 1)[:, :, :N] # filter the covariates
+        if self.use_norm:
+            # De-Normalization from Non-stationary Transformer
+            dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.future_steps*self.mul, 1))
+            dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.future_steps*self.mul, 1))
+        return dec_out
+    def forward(self, batch:dict)-> float:
+        x_enc = batch['x_num_past'].to(self.device)
+        BS = x_enc.shape[0]
+        if 'x_cat_future' in batch.keys():
+            emb_fut = self.emb_fut(BS,batch['x_cat_future'].to(self.device))
+        else:
+            emb_fut = self.emb_fut(BS,None)
+        if 'x_cat_past' in batch.keys():
+            emb_past = self.emb_past(BS,batch['x_cat_past'].to(self.device))
+        else:
+            emb_past = self.emb_past(BS,None)
+        ##row 124 Transformer/experiments/exp_long_term_forecasting.py ma in realta' NON USATO!
+        x_dec = torch.zeros(x_enc.shape[0],self.past_steps,self.out_channels).float().to(self.device)
+        x_dec = torch.cat([batch['y'].to(self.device), x_dec], dim=1).float()
+        dec_out = self.forecast(x_enc, emb_past, x_dec, emb_fut)
+        idx_target = batch['idx_target'][0]
+        return dec_out[:, :,idx_target].reshape(BS,self.future_steps,self.out_channels,self.mul)
+        #return dec_out[:, -self.pred_len:, :]  # [B, L, D]

dsipts/models/Informer.py ADDED Viewed

@@ -0,0 +1,180 @@
+## Copyright 2020    Informer (hhttps://github.com/zhouhaoyi/Informer2020/tree/main/models)
+## Code modified for align the notation and the batch generation
+## extended to all present in informer, autoformer folder
+from torch import  nn
+import torch
+try:
+    import lightning.pytorch as pl
+    from .base_v2 import Base
+    OLD_PL = False
+except:
+    import pytorch_lightning as pl
+    OLD_PL = True
+    from .base import Base
+from typing import List,Union
+from .informer.encoder import Encoder, EncoderLayer, ConvLayer
+from .informer.decoder import Decoder, DecoderLayer
+from .informer.attn import FullAttention, ProbAttention, AttentionLayer
+from .informer.embed import DataEmbedding
+from ..data_structure.utils import beauty_string
+#from .utils import Embedding_cat_variables not used here, custom cat embedding
+from .utils import  get_scope
+class Informer(Base):
+    handle_multivariate = True
+    handle_future_covariates = True
+    handle_categorical_variables = True
+    handle_quantile_loss = True
+    description = get_scope(handle_multivariate,handle_future_covariates,handle_categorical_variables,handle_quantile_loss)
+    def __init__(self,
+                 d_model:int,
+                 hidden_size:int,
+                 n_layer_encoder:int,
+                 n_layer_decoder:int,
+                 mix:bool=True,
+                 activation:str='torch.nn.ReLU',
+                 remove_last = False,
+                 attn: str='prob',
+                 distil:bool=True,
+                 factor:int=5,
+                 n_head:int=1,
+                 dropout_rate:float=0.1,
+                 **kwargs)->None:
+        """Initialize the model with specified parameters. hhttps://github.com/zhouhaoyi/Informer2020/tree/main/models
+        Args:
+            d_model (int): The dimensionality of the model.
+            hidden_size (int): The size of the hidden layers.
+            n_layer_encoder (int): The number of layers in the encoder.
+            n_layer_decoder (int): The number of layers in the decoder.
+            mix (bool, optional): Whether to use mixed attention. Defaults to True.
+            activation (str, optional): The activation function to use. Defaults to 'torch.nn.ReLU'.
+            remove_last (bool, optional): Whether to remove the last layer. Defaults to False.
+            attn (str, optional): The type of attention mechanism to use. Defaults to 'prob'.
+            distil (bool, optional): Whether to use distillation. Defaults to True.
+            factor (int, optional): The factor for attention. Defaults to 5.
+            n_head (int, optional): The number of attention heads. Defaults to 1.
+            dropout_rate (float, optional): The dropout rate. Defaults to 0.1.
+            **kwargs: Additional keyword arguments.
+        Raises:
+            ValueError: If any of the parameters are invalid.
+        Notes:
+            Ensure to set up split_params: shift: ${model_configs.future_steps} as it is required!!
+        """
+        super().__init__(**kwargs)
+        self.save_hyperparameters(logger=False)
+        beauty_string("BE SURE TO SETUP split_params:  shift:  ${model_configs.future_steps} BECAUSE IT IS REQUIRED",'info',True)
+        self.remove_last = remove_last
+        self.enc_embedding = DataEmbedding(self.past_channels, d_model, self.embs_past, dropout_rate)
+        self.dec_embedding = DataEmbedding(self.future_channels, d_model, self.embs_fut, dropout_rate)
+        # Attention
+        Attn = ProbAttention if attn=='prob' else FullAttention
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(Attn(False, factor, attention_dropout=dropout_rate, output_attention=False),
+                                d_model, n_head, mix=False),
+                    d_model,
+                    hidden_size,
+                    dropout=dropout_rate,
+                    activation=activation
+                ) for _ in range(n_layer_encoder)
+            ],
+            [
+                ConvLayer(
+                    d_model
+                ) for _ in range(n_layer_encoder-1)
+            ] if distil else None,
+            norm_layer=torch.nn.LayerNorm(d_model)
+        )
+        # Decoder
+        self.decoder = Decoder(
+            [
+                DecoderLayer(
+                    AttentionLayer(Attn(True, factor, attention_dropout=dropout_rate, output_attention=False),
+                                d_model, n_head, mix=mix),
+                    AttentionLayer(FullAttention(False, factor, attention_dropout=dropout_rate, output_attention=False),
+                                d_model, n_head, mix=False),
+                    d_model,
+                    hidden_size,
+                    dropout=dropout_rate,
+                    activation=activation,
+                )
+                for _ in range(n_layer_decoder)
+            ],
+            norm_layer=torch.nn.LayerNorm(d_model)
+        )
+        self.projection = nn.Linear(d_model, self.out_channels*self.mul, bias=True)
+    def forward(self,batch):
+        #x_enc, x_mark_enc, x_dec, x_mark_dec,enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
+        x_enc = batch['x_num_past'].to(self.device)
+        idx_target_future = batch['idx_target_future'][0]
+        if 'x_cat_past' in batch.keys():
+            x_mark_enc = batch['x_cat_past'].to(self.device)
+        else:
+            x_mark_enc = None
+        enc_self_mask = None
+        x_dec = batch['x_num_future'].to(self.device)
+        x_dec[:,-self.future_steps:,idx_target_future] = 0
+        if 'x_cat_future' in batch.keys():
+            x_mark_dec = batch['x_cat_future'].to(self.device)
+        else:
+            x_mark_dec = None
+        dec_self_mask = None
+        dec_enc_mask = None
+        if self.remove_last:
+            idx_target = batch['idx_target'][0]
+            x_start = x_enc[:,-1,idx_target].unsqueeze(1)
+            x_enc[:,:,idx_target]-=x_start
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
+        dec_out = self.dec_embedding(x_dec, x_mark_dec)
+        dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
+        dec_out = self.projection(dec_out)
+        # dec_out = self.end_conv1(dec_out)
+        # dec_out = self.end_conv2(dec_out.transpose(2,1)).transpose(1,2)
+        #import pdb
+        #pdb.set_trace()
+        res = dec_out[:,-self.future_steps:,:].unsqueeze(3)
+        if self.remove_last:
+            res+=x_start.unsqueeze(1)
+        BS = res.shape[0]
+        return  res.reshape(BS,self.future_steps,-1,self.mul)