PyPI - dsipts - Versions diffs - 1.1.5__py3-none-any.whl - Mend

dsipts 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dsipts might be problematic. Click here for more details.

Files changed (81) hide show

dsipts/__init__.py +48 -0
dsipts/data_management/__init__.py +0 -0
dsipts/data_management/monash.py +338 -0
dsipts/data_management/public_datasets.py +162 -0
dsipts/data_structure/__init__.py +0 -0
dsipts/data_structure/data_structure.py +1167 -0
dsipts/data_structure/modifiers.py +213 -0
dsipts/data_structure/utils.py +173 -0
dsipts/models/Autoformer.py +199 -0
dsipts/models/CrossFormer.py +152 -0
dsipts/models/D3VAE.py +196 -0
dsipts/models/Diffusion.py +818 -0
dsipts/models/DilatedConv.py +342 -0
dsipts/models/DilatedConvED.py +310 -0
dsipts/models/Duet.py +197 -0
dsipts/models/ITransformer.py +167 -0
dsipts/models/Informer.py +180 -0
dsipts/models/LinearTS.py +222 -0
dsipts/models/PatchTST.py +181 -0
dsipts/models/Persistent.py +44 -0
dsipts/models/RNN.py +213 -0
dsipts/models/Samformer.py +139 -0
dsipts/models/TFT.py +269 -0
dsipts/models/TIDE.py +296 -0
dsipts/models/TTM.py +252 -0
dsipts/models/TimeXER.py +184 -0
dsipts/models/VQVAEA.py +299 -0
dsipts/models/VVA.py +247 -0
dsipts/models/__init__.py +0 -0
dsipts/models/autoformer/__init__.py +0 -0
dsipts/models/autoformer/layers.py +352 -0
dsipts/models/base.py +439 -0
dsipts/models/base_v2.py +444 -0
dsipts/models/crossformer/__init__.py +0 -0
dsipts/models/crossformer/attn.py +118 -0
dsipts/models/crossformer/cross_decoder.py +77 -0
dsipts/models/crossformer/cross_embed.py +18 -0
dsipts/models/crossformer/cross_encoder.py +99 -0
dsipts/models/d3vae/__init__.py +0 -0
dsipts/models/d3vae/diffusion_process.py +169 -0
dsipts/models/d3vae/embedding.py +108 -0
dsipts/models/d3vae/encoder.py +326 -0
dsipts/models/d3vae/model.py +211 -0
dsipts/models/d3vae/neural_operations.py +314 -0
dsipts/models/d3vae/resnet.py +153 -0
dsipts/models/d3vae/utils.py +630 -0
dsipts/models/duet/__init__.py +0 -0
dsipts/models/duet/layers.py +438 -0
dsipts/models/duet/masked.py +202 -0
dsipts/models/informer/__init__.py +0 -0
dsipts/models/informer/attn.py +185 -0
dsipts/models/informer/decoder.py +50 -0
dsipts/models/informer/embed.py +125 -0
dsipts/models/informer/encoder.py +100 -0
dsipts/models/itransformer/Embed.py +142 -0
dsipts/models/itransformer/SelfAttention_Family.py +355 -0
dsipts/models/itransformer/Transformer_EncDec.py +134 -0
dsipts/models/itransformer/__init__.py +0 -0
dsipts/models/patchtst/__init__.py +0 -0
dsipts/models/patchtst/layers.py +569 -0
dsipts/models/samformer/__init__.py +0 -0
dsipts/models/samformer/utils.py +154 -0
dsipts/models/tft/__init__.py +0 -0
dsipts/models/tft/sub_nn.py +234 -0
dsipts/models/timexer/Layers.py +127 -0
dsipts/models/timexer/__init__.py +0 -0
dsipts/models/ttm/__init__.py +0 -0
dsipts/models/ttm/configuration_tinytimemixer.py +307 -0
dsipts/models/ttm/consts.py +16 -0
dsipts/models/ttm/modeling_tinytimemixer.py +2099 -0
dsipts/models/ttm/utils.py +438 -0
dsipts/models/utils.py +624 -0
dsipts/models/vva/__init__.py +0 -0
dsipts/models/vva/minigpt.py +83 -0
dsipts/models/vva/vqvae.py +459 -0
dsipts/models/xlstm/__init__.py +0 -0
dsipts/models/xlstm/xLSTM.py +255 -0
dsipts-1.1.5.dist-info/METADATA +31 -0
dsipts-1.1.5.dist-info/RECORD +81 -0
dsipts-1.1.5.dist-info/WHEEL +5 -0
dsipts-1.1.5.dist-info/top_level.txt +1 -0

dsipts/data_structure/modifiers.py ADDED Viewed

@@ -0,0 +1,213 @@
+from abc import  abstractmethod,ABC
+from sklearn.cluster import BisectingKMeans
+from scipy.stats import bootstrap
+from torch.utils.data import Dataset
+import torch
+import numpy as np
+import logging
+from .utils import MyDataset
+class VVADataset(Dataset):
+    def __init__(self,x,y,y_orig,t,length_in,length_out, num_digits):
+        self.length_in = length_in
+        self.length_out = length_out
+        self.num_digits = num_digits
+        self.x_emb = torch.tensor(x).long()
+        self.y_emb = torch.tensor(y).long()
+        self.y = torch.tensor(y_orig)
+        self.t = t
+    def __len__(self):
+        """
+        :meta private:
+        """
+        return len(self.x_emb) # ...
+    def get_vocab_size(self):
+        """
+        :meta private:
+        """
+        return self.num_digits
+    def get_block_size(self):
+        """
+        :meta private:
+        """
+        return self.length * 2 - 1
+    def __getitem__(self, idx):
+        """
+        :meta private:
+        """
+        inp = self.x_emb[idx]
+        sol = self.y_emb[idx]
+        cat = torch.cat((inp, sol), dim=0)
+        # the inputs to the transformer will be the offset sequence
+        x = cat[:-1].clone()
+        y = cat[1:].clone()
+        # we only want to predict at output locations, mask out the loss at the input locations
+        y[:self.length_out-1] = -1
+        return {'x_emb':x, 'y_emb':y, 'y':self.y[idx]}
+class Modifier(ABC):
+    def __init__(self,**kwargs):
+        """In the constructor you can store some parameters of the modifier. It will be saved when the timeseries is saved.
+        """
+        super(Modifier, self).__init__()
+        self.__dict__.update(kwargs)
+    @abstractmethod
+    def fit_transform(self,train:MyDataset,val:MyDataset)->[Dataset,Dataset]:
+        """This funtion is called before the training procedure and it should tasnform the standard Dataset into the new Dataset
+        Args:
+            train (MyDataset): initial train `Dataset`
+            val (MyDataset): initial validation `Dataset`
+        Returns:
+            Dataset, Dataset: transformed train and validation `Datasets`
+        """
+        return train,val
+    @abstractmethod
+    def transform(self,test:MyDataset)->Dataset:
+        """Similar to `fit_transform` but only transformation task will be performed, it is used in the inference function before calling the inference method
+        Args:
+            test (MyDataset): initial test `Dataset`
+        Returns:
+            Dataset: transformed test `Dataset`
+        """
+        return test
+    @abstractmethod
+    def inverse_transform(self,res:np.array,real:np.array)->[np.array,np.array]:
+        """The results must be reverted respect to the prediction task
+        Args:
+            res (np.array): raw prediction
+            real (np.array): raw real data
+        Returns:
+            [np.array, np.array] : inverse transfrmation of the predictions and the real data
+        """
+        return res
+class ModifierVVA(Modifier):
+    """This modifiers is used for the custom model VVA. The initial data are divided in smaller segments and then tokenized using a clustering procedure (fit_trasform).
+    The centroids of the clusters are stored. A GPT model is then trained on the tokens an the predictions are reverted using the centroid information.
+    """
+    def fit_transform(self,train:MyDataset,val:MyDataset)->[Dataset,Dataset]:
+        """BisectingKMeans is used on segments of length `token_split`
+        Args:
+            train (MyDataset): initial train `Dataset`
+            val (MyDataset): initial validation `Dataset`
+        Returns:
+            Dataset, Dataset: transformed train and validation `Datasets`
+        """
+        idx_target =  train.idx_target
+        assert len(idx_target)==1, print('This works only with single channel prediction')
+        samples,length,_ = train.data['y'].shape
+        tmp = train.data['x_num_past'][:,:,idx_target[0]].reshape(samples,-1,self.token_split)
+        _,length_in, _ = tmp.shape
+        length_out = length//self.token_split
+        tmp = tmp.reshape(-1,self.token_split)
+        cl = BisectingKMeans(n_clusters=self.max_voc_size)
+        clusters = cl.fit_predict(tmp)
+        self.cl = cl
+        self.centroids = []
+        cls, counts = np.unique(clusters,return_counts=True)
+        logging.info(counts)
+        for i in cls:
+            res = []
+            data = tmp[np.where(clusters==i)[0]]
+            if len(data)>1:
+                for j in range(data.shape[1]):
+                    bootstrap_ci = bootstrap((data[:,j],), np.median,n_resamples=50, confidence_level=0.9,random_state=1, method='percentile')
+                    res.append([bootstrap_ci.confidence_interval.low,np.median(data[:,j]),bootstrap_ci.confidence_interval.high])
+                self.centroids.append(np.array(res))
+            else:
+                self.centroids.append(np.repeat(data.T,3,axis=1))
+        self.centroids = np.array(self.centroids) ##clusters x length x 3
+        x_train = clusters.reshape(-1,length_in)
+        samples = train.data['y'].shape[0]
+        y_train = cl.predict(train.data['y'].squeeze().reshape(samples,-1,self.token_split).reshape(-1,self.token_split)).reshape(-1,length_out)
+        samples = val.data['y'].shape[0]
+        y_validation = cl.predict(val.data['y'].squeeze().reshape(samples,-1,self.token_split).reshape(-1,self.token_split)).reshape(-1,length_out)
+        x_validation = cl.predict(val.data['x_num_past'][:,:,idx_target[0]].reshape(samples,-1,self.token_split).reshape(-1,self.token_split)).reshape(-1,length_in)
+        train_dataset = VVADataset(x_train,y_train,train.data['y'].squeeze(),train.t,length_in,length_out,self.max_voc_size)
+        validation_dataset = VVADataset(x_validation,y_validation,val.data['y'].squeeze(),val.t,length_in,length_out,self.max_voc_size)
+        return train_dataset,validation_dataset
+    def transform(self,test:MyDataset)->Dataset:
+        """Similar to `fit_transform` but only transformation task will be performed
+        Args:
+            test (MyDataset): test val `Dataset`
+        Returns:
+            Dataset: transformed test `Dataset`
+        """
+        idx_target =  test.idx_target
+        samples,length,_ = test.data['y'].shape
+        tmp = test.data['x_num_past'][:,:,idx_target[0]].reshape(samples,-1,self.token_split)
+        _,length_in, _ = tmp.shape
+        length_out = length//self.token_split
+        tmp = tmp.reshape(-1,self.token_split)
+        clusters = self.cl.predict(tmp)
+        x = clusters.reshape(-1,length_in)
+        y = self.cl.predict(test.data['y'].squeeze().reshape(samples,-1,self.token_split).reshape(-1,self.token_split)).reshape(-1,length_out)
+        return VVADataset(x,y,test.data['y'].squeeze(),test.t,length_in,length_out,self.max_voc_size)
+    def inverse_transform(self,res:np.array,real:np.array)->[np.array,np.array]:
+        """The results must be reverted respect to the prediction task
+        Args:
+            res (np.array): raw prediction
+        Returns:
+            np.array: inverse transofrmation of the predictions
+        """
+        tot = []
+        for sample in res:
+            tmp_sample = []
+            for index in sample:
+                tmp = []
+                for i in index:
+                    tmp.append(self.centroids[i])
+                tmp = np.array(tmp)
+                if tmp.shape[0]==1:
+                    tmp2 = tmp[0,:,:]
+                else:
+                    tmp2 = tmp.mean(axis=0)
+                    tmp2[:,0] -= 1.96*tmp.std(axis=0)[:,0]  #using confidence interval
+                    tmp2[:,2] += 1.96*tmp.std(axis=0)[:,2]
+                tmp_sample.append(tmp2)
+            tot.append(np.vstack(tmp_sample))
+        return np.expand_dims(np.stack(tot),2),np.expand_dims(real,2)

dsipts/data_structure/utils.py ADDED Viewed

@@ -0,0 +1,173 @@
+from enum import Enum
+from typing import Union
+import pandas as pd
+from torch.utils.data import Dataset
+import numpy as np
+try:
+    from lightning.pytorch.callbacks import Callback
+except:
+    from pytorch_lightning import Callback
+import torch
+import os
+import logging
+from typing import Union
+def beauty_string(message:str,type:str,verbose:bool):
+    size = 150
+    if verbose is True:
+        if type=='block':
+            characters = len(message)
+            border = max((100-characters)//2-5,0)
+            logging.info('\n')
+            logging.info(f"{'#'*size}")
+            logging.info(f"{'#'*border}{' '*(size-border*2)}{'#'*border}")
+            logging.info(f"{ message:^{size}}")
+            logging.info(f"{'#'*border}{' '*(size-border*2)}{'#'*border}")
+            logging.info(f"{'#'*size}")
+        elif type=='section':
+            logging.info('\n')
+            logging.info(f"{'#'*size}")
+            logging.info(f"{ message:^{size}}")
+            logging.info(f"{'#'*size}")
+        elif type=='info':
+            logging.info(f"{ message:^{size}}")
+        else:
+            logging.info(message)
+def extend_time_df(x:pd.DataFrame,freq:Union[str,int],group:Union[str,None]=None,global_minmax:bool=False)-> pd.DataFrame:
+    """Utility for generating a full dataset and then merge the real data
+    Args:
+        x (pd.DataFrame): dataframe containing the column time
+        freq (str): frequency (in pandas notation) of the resulting dataframe
+        group (string or None): if not None the min max are computed by the group column, default None
+        global_minmax (bool): if True the min_max is computed globally for each group. Usually used for stacked model
+    Returns:
+        pd.DataFrame: a dataframe with the column time ranging from thr minumum of x to the maximum with frequency `freq`
+    """
+    if group is None:
+        if isinstance(freq,int):
+            empty = pd.DataFrame({'time':list(range(x.time.min(),x.time.max(),freq))})
+        else:
+            empty = pd.DataFrame({'time':pd.date_range(x.time.min(),x.time.max(),freq=freq)})
+    else:
+        if global_minmax:
+            _min = pd.DataFrame({group:x[group].unique(),'time':x.time.min()})
+            _max = pd.DataFrame({group:x[group].unique(),'time':x.time.max()})
+        else:
+            _min = x.groupby(group).time.min().reset_index()
+            _max = x.groupby(group).time.max().reset_index()
+        empty = []
+        for c in x[group].unique():
+            if isinstance(freq,int):
+                empty.append(pd.DataFrame({group:c,'time':np.arange(_min.time[_min[group]==c].values[0],_max.time[_max[group]==c].values[0],freq)}))
+            else:
+                empty.append(pd.DataFrame({group:c,'time':pd.date_range(_min.time[_min[group]==c].values[0],_max.time[_max[group]==c].values[0],freq=freq)}))
+        empty = pd.concat(empty,ignore_index=True)
+    return empty
+class MetricsCallback(Callback):
+    """PyTorch Lightning metric callback.
+    :meta private:
+    """
+    def __init__(self,dirpath):
+        super().__init__()
+        self.dirpath = dirpath
+        self.metrics = {'val_loss':[],'train_loss':[]}
+    def on_validation_end(self, trainer, pl_module):
+        for c in trainer.callback_metrics:
+            self.metrics[c].append(trainer.callback_metrics[c].item())
+        ##Write csv in a convenient way
+        tmp  = self.metrics.copy()
+        if len(tmp['train_loss']) >0:
+            tmp['val_loss'] = tmp['val_loss'][-len(tmp['train_loss']):]
+        else:
+            tmp['val_loss'] = tmp['val_loss'][2:]
+        losses = pd.DataFrame(tmp)
+        losses.to_csv(os.path.join(self.dirpath,'loss.csv'),index=False)
+    def on_train_end(self, trainer, pl_module):
+        losses = self.metrics
+        ##non so perche' le prime due le chiama prima del train
+        if len(losses['train_loss']) >0:
+            losses['val_loss'] =losses['val_loss'][-len(losses['train_loss']):]
+        else:
+            losses['val_loss'] = losses['val_loss'][2:]
+        #losses['val_loss'] = losses['val_loss'][2:]
+        losses = pd.DataFrame(losses)
+        ##accrocchio per quando ci sono piu' gpu!
+        losses.to_csv(os.path.join(self.dirpath,f'{np.random.randint(10000)}__losses__.csv'),index=False)
+        print("Saving losses on file because multigpu not working")
+class MyDataset(Dataset):
+    def __init__(self, data:dict,t:np.array,groups:np.array,idx_target:Union[np.array,None],idx_target_future:Union[np.array,None])->torch.utils.data.Dataset:
+        """
+            Extension of Dataset class. While training the returned item is a batch containing the standard keys
+        Args:
+            data (dict): a dictionary. Each key is a np.array containing the data. The keys are:
+                y : the target variable(s)
+                x_num_past: the numerical past variables
+                x_num_future: the numerical future variables
+                x_cat_past: the categorical past variables
+                x_cat_future: the categorical future variables
+                idx_target: index of target features in the past array
+            t (np.array): the time array related to the target variables
+            idx_target (Union[np.array,None]): you can specify the index in the past data that represent the input features (for differntial analysis or detrending strategies)
+            idx_target_future (Union[np.array,None]): you can specify the index in the future data that represent the input features (for differntial analysis or detrending strategies)
+        Returns:
+            torch.utils.data.Dataset: a torch Dataset to be used in a Dataloader
+        """
+        self.data = data
+        self.t = t
+        self.groups = groups
+        self.idx_target = np.array(idx_target) if idx_target is not None else None
+        self.idx_target_future = np.array(idx_target_future) if idx_target_future is not None else None
+    def __len__(self):
+        return len(self.data['x_num_past'])
+    def __getitem__(self, idxs):
+        sample = {}
+        for k in self.data:
+            sample[k] = self.data[k][idxs]
+        if self.idx_target is not None:
+            sample['idx_target'] = self.idx_target
+        if self.idx_target_future is not None:
+            sample['idx_target_future'] = self.idx_target_future
+        return sample
+class ActionEnum(Enum):
+    """action of categorical variable
+    :meta private:
+    """
+    multiplicative: str = 'multiplicative'
+    additive: str = 'additive'

dsipts/models/Autoformer.py ADDED Viewed

@@ -0,0 +1,199 @@
+## Copyright 2022 DLinear Authors (https://github.com/cure-lab/LTSF-Linear/tree/main?tab=Apache-2.0-1-ov-file#readme)
+## Code modified for align the notation and the batch generation
+## extended to all present in informer, autoformer folder
+from torch import  nn
+import torch
+try:
+    import lightning.pytorch as pl
+    from .base_v2 import Base
+    OLD_PL = False
+except:
+    import pytorch_lightning as pl
+    OLD_PL = True
+    from .base import Base
+from typing import List,Union
+from ..data_structure.utils import beauty_string
+from .utils import  get_activation,get_scope,QuantileLossMO
+from .autoformer.layers import AutoCorrelation, AutoCorrelationLayer, Encoder, Decoder,\
+    EncoderLayer, DecoderLayer, my_Layernorm, series_decomp,PositionalEmbedding
+from .utils import Embedding_cat_variables
+class Autoformer(Base):
+    handle_multivariate = True
+    handle_future_covariates = True
+    handle_categorical_variables = True
+    handle_quantile_loss= True
+    description = get_scope(handle_multivariate,handle_future_covariates,handle_categorical_variables,handle_quantile_loss)
+    def __init__(self,
+                 label_len: int,
+                 d_model:int,
+                 dropout_rate:float,
+                 kernel_size:int,
+                 activation:str='torch.nn.ReLU',
+                 factor: float=0.5,
+                 n_head:int=1,
+                 n_layer_encoder:int=2,
+                 n_layer_decoder:int=2,
+                 hidden_size:int=1048,
+                 **kwargs
+                )->None:
+        """Autoformer from https://github.com/cure-lab/LTSF-Linear
+        Args:
+            label_len (int): see the original implementation, seems like a warmup dimension (the decoder part will produce also some past predictions that are filter out at the end)
+            d_model (int): embedding dimension of the attention layer
+            dropout_rate (float): dropout raye
+            kernel_size (int): kernel size
+            activation (str, optional): _description_. Defaults to 'torch.nn.ReLU'.
+            factor (int, optional): parameter of `.autoformer.layers.AutoCorrelation` for find the top k. Defaults to 0.5.
+            n_head (int, optional): number of heads. Defaults to 1.
+            n_layer_encoder (int, optional): number of  encoder layers. Defaults to 2.
+            n_layer_decoder (int, optional): number of decoder layers. Defaults to 2.
+            hidden_size (int, optional): output dimension of the transformer layer. Defaults to 1048.
+        """
+        super().__init__(**kwargs)
+        beauty_string(self.description,'info',True)
+        if activation == 'torch.nn.SELU':
+            beauty_string('SELU do not require BN','info',self.verbose)
+        if isinstance(activation,str):
+            activation = get_activation(activation)
+        else:
+            beauty_string('There is a bug in pytorch lightening, the constructior is called twice ','info',self.verbose)
+        self.save_hyperparameters(logger=False)
+        self.seq_len = self.past_steps
+        self.label_len = label_len
+        self.pred_len = self.future_steps
+        self.emb_past = Embedding_cat_variables(self.past_steps,self.emb_dim,self.embs_past, reduction_mode=self.reduction_mode,use_classical_positional_encoder=self.use_classical_positional_encoder,device = self.device)
+        self.emb_fut = Embedding_cat_variables(self.future_steps+label_len,self.emb_dim,self.embs_fut, reduction_mode=self.reduction_mode,use_classical_positional_encoder=self.use_classical_positional_encoder,device = self.device)
+        emb_past_out_channel = self.emb_past.output_channels
+        emb_fut_out_channel = self.emb_fut.output_channels
+        # Decomp
+        self.decomp = series_decomp(kernel_size)
+        self.linear_encoder = nn.Sequential(nn.Linear(self.past_channels+emb_past_out_channel,self.past_channels*2),
+                                            activation(),
+                                            nn.Dropout(dropout_rate),
+                                            nn.Linear(self.past_channels*2,d_model*2),
+                                            activation(),
+                                            nn.Dropout(dropout_rate),
+                                            nn.Linear(d_model*2,d_model))
+        self.linear_decoder = nn.Sequential(nn.Linear(self.future_channels+emb_fut_out_channel,self.future_channels*2),
+                                            activation(),
+                                            nn.Dropout(dropout_rate),
+                                            nn.Linear(self.future_channels*2,d_model*2),
+                                            activation() ,nn.Dropout(dropout_rate),
+                                            nn.Linear(d_model*2,d_model))
+        #self.final_layer =  nn.Linear(self.past_channels,self.out_channels)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AutoCorrelationLayer(
+                        AutoCorrelation(False, factor, attention_dropout=dropout_rate,
+                                        output_attention=False),
+                        d_model, n_head),
+                    d_model,
+                    hidden_size,
+                    moving_avg=kernel_size,
+                    dropout=dropout_rate,
+                    activation=activation
+                ) for _ in range(n_layer_encoder)
+            ],
+            norm_layer=my_Layernorm(d_model)
+        )
+        # Decoder
+        self.decoder = Decoder(
+            [
+                DecoderLayer(
+                    AutoCorrelationLayer(
+                        AutoCorrelation(True, factor, attention_dropout=dropout_rate,
+                                        output_attention=False),
+                        d_model, n_head),
+                    AutoCorrelationLayer(
+                        AutoCorrelation(False, factor, attention_dropout=dropout_rate,
+                                        output_attention=False),
+                        d_model, n_head),
+                    d_model,
+                    self.out_channels,
+                    hidden_size,
+                    moving_avg=kernel_size,
+                    dropout=dropout_rate,
+                    activation=activation,
+                )
+                for _ in range(n_layer_decoder)
+            ],
+            norm_layer=my_Layernorm(d_model),
+            projection=nn.Linear(d_model, self.out_channels*self.mul, bias=True)
+        )
+        self.projection = nn.Linear(self.past_channels,self.out_channels*self.mul )
+    def forward(self, batch):
+        idx_target_future = batch['idx_target_future'][0]
+        x = batch['x_num_past'].to(self.device)
+        BS = x.shape[0]
+        if 'x_cat_future' in batch.keys():
+            emb_fut = self.emb_fut(BS,batch['x_cat_future'].to(self.device))
+        else:
+            emb_fut = self.emb_fut(BS,None)
+        if 'x_cat_past' in batch.keys():
+            emb_past = self.emb_past(BS,batch['x_cat_past'].to(self.device))
+        else:
+            emb_past = self.emb_past(BS,None)
+        if 'x_num_future' in batch.keys():
+            x_future = batch['x_num_future'].to(self.device)
+            x_future[:,-self.pred_len:,idx_target_future] = 0
+        mean = torch.mean(x, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
+        zeros = torch.zeros([x_future.shape[0], self.pred_len, x.shape[2]], device=x.device)
+        seasonal_init, trend_init = self.decomp(x)
+        # decoder input
+        trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
+        seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1)
+        # enc
+        enc_out = self.linear_encoder(torch.cat([x,emb_past],2))
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # dec
+        dec_out = self.linear_decoder(torch.cat([x_future,emb_fut],2))
+        seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None, trend=trend_init)
+        # final
+        trend_part = self.projection(trend_part)
+        dec_out = trend_part + seasonal_part
+        BS = dec_out.shape[0]
+        return dec_out[:, -self.pred_len:, :].reshape(BS,self.pred_len,-1,self.mul)  # [B, L, D,MUL]