PyPI - dsipts - Versions diffs - 1.1.5__py3-none-any.whl - Mend

dsipts 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dsipts might be problematic. Click here for more details.

Files changed (81) hide show

dsipts/__init__.py +48 -0
dsipts/data_management/__init__.py +0 -0
dsipts/data_management/monash.py +338 -0
dsipts/data_management/public_datasets.py +162 -0
dsipts/data_structure/__init__.py +0 -0
dsipts/data_structure/data_structure.py +1167 -0
dsipts/data_structure/modifiers.py +213 -0
dsipts/data_structure/utils.py +173 -0
dsipts/models/Autoformer.py +199 -0
dsipts/models/CrossFormer.py +152 -0
dsipts/models/D3VAE.py +196 -0
dsipts/models/Diffusion.py +818 -0
dsipts/models/DilatedConv.py +342 -0
dsipts/models/DilatedConvED.py +310 -0
dsipts/models/Duet.py +197 -0
dsipts/models/ITransformer.py +167 -0
dsipts/models/Informer.py +180 -0
dsipts/models/LinearTS.py +222 -0
dsipts/models/PatchTST.py +181 -0
dsipts/models/Persistent.py +44 -0
dsipts/models/RNN.py +213 -0
dsipts/models/Samformer.py +139 -0
dsipts/models/TFT.py +269 -0
dsipts/models/TIDE.py +296 -0
dsipts/models/TTM.py +252 -0
dsipts/models/TimeXER.py +184 -0
dsipts/models/VQVAEA.py +299 -0
dsipts/models/VVA.py +247 -0
dsipts/models/__init__.py +0 -0
dsipts/models/autoformer/__init__.py +0 -0
dsipts/models/autoformer/layers.py +352 -0
dsipts/models/base.py +439 -0
dsipts/models/base_v2.py +444 -0
dsipts/models/crossformer/__init__.py +0 -0
dsipts/models/crossformer/attn.py +118 -0
dsipts/models/crossformer/cross_decoder.py +77 -0
dsipts/models/crossformer/cross_embed.py +18 -0
dsipts/models/crossformer/cross_encoder.py +99 -0
dsipts/models/d3vae/__init__.py +0 -0
dsipts/models/d3vae/diffusion_process.py +169 -0
dsipts/models/d3vae/embedding.py +108 -0
dsipts/models/d3vae/encoder.py +326 -0
dsipts/models/d3vae/model.py +211 -0
dsipts/models/d3vae/neural_operations.py +314 -0
dsipts/models/d3vae/resnet.py +153 -0
dsipts/models/d3vae/utils.py +630 -0
dsipts/models/duet/__init__.py +0 -0
dsipts/models/duet/layers.py +438 -0
dsipts/models/duet/masked.py +202 -0
dsipts/models/informer/__init__.py +0 -0
dsipts/models/informer/attn.py +185 -0
dsipts/models/informer/decoder.py +50 -0
dsipts/models/informer/embed.py +125 -0
dsipts/models/informer/encoder.py +100 -0
dsipts/models/itransformer/Embed.py +142 -0
dsipts/models/itransformer/SelfAttention_Family.py +355 -0
dsipts/models/itransformer/Transformer_EncDec.py +134 -0
dsipts/models/itransformer/__init__.py +0 -0
dsipts/models/patchtst/__init__.py +0 -0
dsipts/models/patchtst/layers.py +569 -0
dsipts/models/samformer/__init__.py +0 -0
dsipts/models/samformer/utils.py +154 -0
dsipts/models/tft/__init__.py +0 -0
dsipts/models/tft/sub_nn.py +234 -0
dsipts/models/timexer/Layers.py +127 -0
dsipts/models/timexer/__init__.py +0 -0
dsipts/models/ttm/__init__.py +0 -0
dsipts/models/ttm/configuration_tinytimemixer.py +307 -0
dsipts/models/ttm/consts.py +16 -0
dsipts/models/ttm/modeling_tinytimemixer.py +2099 -0
dsipts/models/ttm/utils.py +438 -0
dsipts/models/utils.py +624 -0
dsipts/models/vva/__init__.py +0 -0
dsipts/models/vva/minigpt.py +83 -0
dsipts/models/vva/vqvae.py +459 -0
dsipts/models/xlstm/__init__.py +0 -0
dsipts/models/xlstm/xLSTM.py +255 -0
dsipts-1.1.5.dist-info/METADATA +31 -0
dsipts-1.1.5.dist-info/RECORD +81 -0
dsipts-1.1.5.dist-info/WHEEL +5 -0
dsipts-1.1.5.dist-info/top_level.txt +1 -0

dsipts/models/base_v2.py ADDED Viewed

@@ -0,0 +1,444 @@
+from torch import optim
+import torch
+import lightning.pytorch as pl
+from torch.optim.lr_scheduler import StepLR
+from abc import  abstractmethod
+from .utils import SinkhornDistance, SoftDTWBatch,PathDTWBatch,pairwise_distances
+from ..data_structure.utils import beauty_string
+from .samformer.utils import SAM
+from .utils import  get_scope
+import numpy as np
+from aim import Image
+import matplotlib.pyplot as plt
+from typing import List, Union
+from .utils import QuantileLossMO
+import torch.nn as nn
+def standardize_momentum(x,order):
+    mean = torch.mean(x,1).unsqueeze(1).repeat(1,x.shape[1],1)
+    num = torch.pow(x-mean,order).mean(axis=1)
+    #den = torch.sqrt(torch.pow(x-mean,2).mean(axis=1)+1e-8)
+    #den = torch.pow(den,order)
+    return num#/den
+def dilate_loss(outputs, targets, alpha, gamma, device):
+	# outputs, targets: shape (batch_size, N_output, 1)
+	batch_size, N_output = outputs.shape[0:2]
+	loss_shape = 0
+	softdtw_batch = SoftDTWBatch.apply
+	D = torch.zeros((batch_size, N_output,N_output )).to(device)
+	for k in range(batch_size):
+		Dk = pairwise_distances(targets[k,:,:].view(-1,1),outputs[k,:,:].view(-1,1))
+		D[k:k+1,:,:] = Dk
+	loss_shape = softdtw_batch(D,gamma)
+	path_dtw = PathDTWBatch.apply
+	path = path_dtw(D,gamma)
+	Omega =  pairwise_distances(torch.range(1,N_output).view(N_output,1)).to(device)
+	loss_temporal =  torch.sum( path*Omega ) / (N_output*N_output)
+	loss = alpha*loss_shape+ (1-alpha)*loss_temporal
+	return loss#, loss_shape, loss_temporal
+class Base(pl.LightningModule):
+    ############### SET THE PROPERTIES OF THE ARCHITECTURE##############
+    handle_multivariate = False
+    handle_future_covariates = False
+    handle_categorical_variables = False
+    handle_quantile_loss = False
+    description = get_scope(handle_multivariate,handle_future_covariates,handle_categorical_variables,handle_quantile_loss)
+    #####################################################################
+    @abstractmethod
+    def __init__(self,verbose:bool,
+                 past_steps:int,
+                 future_steps:int,
+                 past_channels:int,
+                 future_channels:int,
+                 out_channels:int,
+                 embs_past:List[int],
+                 embs_fut:List[int],
+                 n_classes:int=0,
+                 persistence_weight:float=0.0,
+                 loss_type: str='l1',
+                 quantiles:List[int]=[],
+                 reduction_mode:str = 'mean',
+                 use_classical_positional_encoder:bool=False,
+                 emb_dim: int=16,
+                 optim:Union[str,None]=None,
+                 optim_config:dict=None,
+                 scheduler_config:dict=None,):
+        """
+        This is the basic model, each model implemented must overwrite the init method and the forward method.
+        The inference step is optional, by default it uses the forward method but for recurrent
+        network you should implement your own method
+        Args:
+            verbose (bool): Flag to enable verbose logging.
+            past_steps (int): Number of past time steps to consider.
+            future_steps (int): Number of future time steps to predict.
+            past_channels (int): Number of channels in the past input data.
+            future_channels (int): Number of channels in the future input data.
+            out_channels (int): Number of output channels.
+            embs_past (List[int]): List of embedding dimensions for past data.
+            embs_fut (List[int]): List of embedding dimensions for future data.
+            n_classes (int, optional): Number of classes for classification. Defaults to 0.
+            persistence_weight (float, optional): Weight for persistence in loss calculation. Defaults to 0.0.
+            loss_type (str, optional): Type of loss function to use ('l1' or 'mse'). Defaults to 'l1'.
+            quantiles (List[int], optional): List of quantiles for quantile loss. Defaults to an empty list.
+            reduction_mode (str, optional): Mode for reduction for categorical embedding layer ('mean', 'sum', 'none'). Defaults to 'mean'.
+            use_classical_positional_encoder (bool, optional): Flag to use classical positional encoding or using embedding layer also for the positions. Defaults to False.
+            emb_dim (int, optional): Dimension of categorical embeddings. Defaults to 16.
+            optim (Union[str, None], optional): Optimizer type. Defaults to None.
+            optim_config (dict, optional): Configuration for the optimizer. Defaults to None.
+            scheduler_config (dict, optional): Configuration for the learning rate scheduler. Defaults to None.
+        Raises:
+            AssertionError: If the number of quantiles is not equal to 3 when quantiles are provided.
+            AssertionError: If the number of output channels is not 1 for classification tasks.
+        """
+        beauty_string('V2','block',True)
+        super(Base, self).__init__()
+        self.save_hyperparameters(logger=False)
+        self.count_epoch = 0
+        self.initialize = False
+        self.train_loss_epoch = -100.0
+        self.verbose = verbose
+        self.name = self.__class__.__name__
+        self.train_epoch_metrics = []
+        self.validation_epoch_metrics = []
+        self.use_quantiles = True if len(quantiles)>0 else False
+        self.quantiles =  quantiles
+        self.optim = optim
+        self.optim_config = optim_config
+        self.scheduler_config = scheduler_config
+        self.loss_type = loss_type
+        self.persistence_weight = persistence_weight
+        self.use_classical_positional_encoder = use_classical_positional_encoder
+        self.reduction_mode = reduction_mode
+        self.past_steps = past_steps
+        self.future_steps = future_steps
+        self.embs_past = embs_past
+        self.embs_fut = embs_fut
+        self.past_channels = past_channels
+        self.future_channels = future_channels
+        self.emb_dim = emb_dim
+        self.out_channels = out_channels
+        self.n_classes = n_classes
+        if n_classes==0:
+            self.is_classification = False
+            if len(self.quantiles)>0:
+                assert len(self.quantiles)==3, beauty_string('ONLY 3 quantiles premitted','info',True)
+                self.use_quantiles = True
+                self.mul = len(self.quantiles)
+                self.loss = QuantileLossMO(quantiles)
+            else:
+                self.use_quantiles = False
+                self.mul = 1
+                if self.loss_type == 'mse':
+                    self.loss = nn.MSELoss()
+                else:
+                    self.loss = nn.L1Loss()
+        else:
+            self.is_classification = True
+            self.use_quantiles = False
+            self.mul = n_classes
+            self.loss = torch.nn.CrossEntropyLoss()
+            assert self.out_channels==1, "Classification require only one channel"
+        self.future_steps = future_steps
+        beauty_string(self.description,'info',True)
+    @abstractmethod
+    def forward(self, batch:dict)-> torch.tensor:
+        """Forlward method used during the training loop
+        Args:
+            batch (dict): the batch structure. The keys are:
+                y : the target variable(s). This is always present
+                x_num_past: the numerical past variables. This is always present
+                x_num_future: the numerical future variables
+                x_cat_past: the categorical past variables
+                x_cat_future: the categorical future variables
+                idx_target: index of target features in the past array
+        Returns:
+            torch.tensor: output of the mode;
+        """
+        return None
+    def inference(self, batch:dict)->torch.tensor:
+        """Usually it is ok to return the output of the forward method but sometimes not (e.g. RNN)
+        Args:
+            batch (dict): batch
+        Returns:
+            torch.tensor: result
+        """
+        return self(batch)
+    def configure_optimizers(self):
+        """
+        Each model has optim_config and scheduler_config
+        :meta private:
+        """
+        self.has_sam_optim = False
+        if self.optim_config is None:
+            self.optim_config = {'lr': 5e-05}
+        if self.optim is None:
+            optimizer = optim.Adam(self.parameters(),  **self.optim_config)
+            self.initialize = True
+        else:
+            if self.initialize is False:
+                if self.optim=='SAM':
+                    self.has_sam_optim = True
+                    self.automatic_optimization = False
+                    self.my_step = 0
+                else:
+                    self.optim = eval(self.optim)
+                    self.has_sam_optim = False
+                    self.automatic_optimization = True
+            beauty_string(self.optim,'',self.verbose)
+            if self.has_sam_optim:
+                optimizer = SAM(self.parameters(), base_optimizer=torch.optim.Adam, **self.optim_config)
+            else:
+                optimizer = self.optim(self.parameters(),  **self.optim_config)
+            beauty_string(optimizer,'',self.verbose)
+            self.initialize = True
+        self.lr = self.optim_config['lr']  ##CHECK THISs
+        if self.scheduler_config is not None:
+            scheduler = StepLR(optimizer,**self.scheduler_config)
+            return [optimizer], [scheduler]
+        else:
+            return optimizer
+    def training_step(self, batch, batch_idx):
+        """
+        pythotrch lightening stuff
+        :meta private:
+        """
+        #loss = self.compute_loss(batch,y_hat)
+        #import pdb
+        #pdb.set_trace()
+        if self.has_sam_optim:
+            opt = self.optimizers()
+            def closure():
+                opt.zero_grad()
+                y_hat = self(batch)
+                loss = self.compute_loss(batch,y_hat)
+                self.manual_backward(loss)
+                return loss
+            opt.step(closure)
+            y_hat = self(batch)
+            loss = self.compute_loss(batch,y_hat)
+            #opt.first_step(zero_grad=True)
+            #y_hat = self(batch)
+            #loss = self.compute_loss(batch, y_hat)
+            #self.my_step+=1
+            #self.manual_backward(loss,retain_graph=True)
+            #opt.second_step(zero_grad=True)
+            #self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True)
+            #self.log("global_step",  self.my_step, on_step=True)  # Correct way to log
+            #self.trainer.fit_loop.epoch_loop.manual_optimization.optim_step_progress.increment("optimizer")
+        else:
+            y_hat = self(batch)
+            loss = self.compute_loss(batch,y_hat)
+        self.train_epoch_metrics.append(loss.item())
+        return loss
+    def validation_step(self, batch, batch_idx):
+        """
+        pythotrch lightening stuff
+        :meta private:
+        """
+        y_hat = self(batch)
+        if batch_idx==0:
+            if self.use_quantiles:
+                idx = 1
+            else:
+                idx = 0
+            #track the predictions! We can do better than this but maybe it is better to firstly update pytorch-lightening
+            if self.count_epoch%int(max(self.trainer.max_epochs/100,1))==1:
+                for i in range(batch['y'].shape[2]):
+                    real =  batch['y'][0,:,i].cpu().detach().numpy()
+                    pred =  y_hat[0,:,i,idx].cpu().detach().numpy()
+                    fig, ax = plt.subplots(figsize=(7,5))
+                    ax.plot(real,'o-',label='real')
+                    ax.plot(pred,'o-',label='pred')
+                    ax.legend()
+                    ax.set_title(f'Channel {i} first element first batch validation {int(100*self.count_epoch/self.trainer.max_epochs)}%')
+                    self.logger.experiment.track(Image(fig), name='cm_training_end')
+                    #self.log(f"example_{i}", np.stack([real, pred]).T,sync_dist=True)
+        self.validation_epoch_metrics.append(self.compute_loss(batch,y_hat))
+        return
+    def on_validation_epoch_end(self):
+        """
+        pythotrch lightening stuff
+        :meta private:
+        """
+        avg = torch.stack(self.validation_epoch_metrics).mean()
+        self.validation_epoch_metrics = []
+        self.log("val_loss", avg,sync_dist=True)
+        beauty_string(f'Epoch: {self.count_epoch} train error: {self.train_loss_epoch:.4f} validation loss: {avg:.4f}','info',self.verbose)
+    def on_train_epoch_end(self):
+        """
+        pythotrch lightening stuff
+        :meta private:
+        """
+        avg = np.stack(self.train_epoch_metrics).mean()
+        self.log("train_loss", avg,sync_dist=True)
+        self.count_epoch+=1
+        self.train_epoch_metrics = []
+        self.train_loss_epoch = avg
+    def compute_loss(self,batch,y_hat):
+        """
+        custom loss calculation
+        :meta private:
+        """
+        if self.use_quantiles is False:
+            initial_loss = self.loss(y_hat[:,:,:,0], batch['y'])
+        else:
+            initial_loss = self.loss(y_hat, batch['y'])
+        x =  batch['x_num_past'].to(self.device)
+        idx_target = batch['idx_target'][0]
+        x_start = x[:,-1,idx_target].unsqueeze(1)
+        y_persistence = x_start.repeat(1,self.future_steps,1)
+        ##generally you want to work without quantile loss
+        if self.use_quantiles is False:
+            x = y_hat[:,:,:,0]
+        else:
+            x = y_hat[:,:,:,1]
+        if self.loss_type == 'linear_penalization':
+            persistence_error = (2.0-10.0*torch.clamp( torch.abs((y_persistence-x)/(0.001+torch.abs(y_persistence))),min=0.0,max=max(0.05,0.1*(1+np.log10(self.persistence_weight)  ))))
+            loss = torch.mean(torch.abs(x- batch['y'])*persistence_error)
+        if self.loss_type == 'mda':
+            #import pdb
+            #pdb.set_trace()
+            mda =  (1-torch.mean( torch.sign(torch.diff(x,axis=1))*torch.sign(torch.diff(batch['y'],axis=1))))
+            loss =   torch.mean( torch.abs(x-batch['y']).mean(axis=1).flatten()) + self.persistence_weight*mda/10
+        elif self.loss_type == 'exponential_penalization':
+            weights = (1+self.persistence_weight*torch.exp(-torch.abs(y_persistence-x)))
+            loss =  torch.mean(torch.abs(x- batch['y'])*weights)
+        elif self.loss_type=='sinkhorn':
+            sinkhorn = SinkhornDistance(eps=0.1, max_iter=100, reduction='mean')
+            loss = sinkhorn.compute(x,batch['y'])
+        elif self.loss_type == 'additive_iv':
+            std = torch.sqrt(torch.var(batch['y'], dim=(1))+ 1e-8) ##--> BSxChannel
+            x_std = torch.sqrt(torch.var(x, dim=(1))+ 1e-8)
+            loss = torch.mean( torch.abs(x-batch['y']).mean(axis=1).flatten() + self.persistence_weight*torch.abs(x_std-std).flatten())
+        elif self.loss_type == 'multiplicative_iv':
+            std = torch.sqrt(torch.var(batch['y'], dim=(1))+ 1e-8) ##--> BSxChannel
+            x_std = torch.sqrt(torch.var(x, dim=(1))+ 1e-8)
+            if self.persistence_weight>0:
+                loss = torch.mean( torch.abs(x-batch['y']).mean(axis=1)*torch.abs(x_std-std))
+            else:
+                loss = torch.mean( torch.abs(x-batch['y']).mean(axis=1))
+        elif self.loss_type=='global_iv':
+            std_real = torch.sqrt(torch.var(batch['y'], dim=(0,1)))
+            std_predict = torch.sqrt(torch.var(x, dim=(0,1)))
+            loss = initial_loss +  self.persistence_weight*torch.abs(std_real-std_predict).mean()
+        elif self.loss_type=='smape':
+            loss = torch.mean(2*torch.abs(x-batch['y']) / (torch.abs(x)+torch.abs(batch['y'])))
+        elif self.loss_type=='triplet':
+            loss_fn = torch.nn.TripletMarginLoss(margin=0.01, p=1.0,swap=False)
+            loss =  initial_loss + self.persistence_weight*loss_fn(x, batch['y'], y_persistence)
+        elif self.loss_type=='high_order':
+            loss = initial_loss
+            for i in range(2,5):
+                mom_real = standardize_momentum( batch['y'],i)
+                mom_pred = standardize_momentum(x,i)
+                mom_loss = torch.abs(mom_real-mom_pred).mean()
+                loss+=self.persistence_weight*mom_loss
+        elif self.loss_type=='dilated':
+            #BxLxCxMUL
+            if self.persistence_weight==0.1:
+                alpha = 0.25
+            if self.persistence_weight==1:
+                alpha = 0.5
+            else:
+                alpha  =0.75
+            alpha = self.persistence_weight
+            gamma = 0.01
+            loss = 0
+            ##no multichannel here
+            for i in range(y_hat.shape[2]):
+                ##error here
+                loss+= dilate_loss( batch['y'][:,:,i:i+1],x[:,:,i:i+1], alpha, gamma, y_hat.device)
+        elif self.loss_type=='huber':
+            loss = torch.nn.HuberLoss(reduction='mean', delta=self.persistence_weight/10)
+            #loss = torch.nn.HuberLoss(reduction='mean', delta=self.persistence_weight)
+            if self.use_quantiles is False:
+                x = y_hat[:,:,:,0]
+            else:
+                x = y_hat[:,:,:,1]
+            BS = x.shape[0]
+            loss = loss(y_hat.reshape(BS,-1), batch['y'].reshape(BS,-1))
+        else:
+            loss = initial_loss
+        return loss

dsipts/models/crossformer/__init__.py ADDED Viewed

File without changes

dsipts/models/crossformer/attn.py ADDED Viewed

@@ -0,0 +1,118 @@
+import torch
+import torch.nn as nn
+from einops import rearrange, repeat
+from math import sqrt
+class FullAttention(nn.Module):
+    '''
+    The Attention operation
+    '''
+    def __init__(self, scale=None, attention_dropout=0.1):
+        super(FullAttention, self).__init__()
+        self.scale = scale
+        self.dropout = nn.Dropout(attention_dropout)
+    def forward(self, queries, keys, values):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        scale = self.scale or 1./sqrt(E)
+        scores = torch.einsum("blhe,bshe->bhls", queries, keys)
+        A = self.dropout(torch.softmax(scale * scores, dim=-1))
+        V = torch.einsum("bhls,bshd->blhd", A, values)
+        return V.contiguous()
+class AttentionLayer(nn.Module):
+    '''
+    The Multi-head Self-Attention (MSA) Layer
+    '''
+    def __init__(self, d_model, n_heads, d_keys=None, d_values=None, mix=True, dropout = 0.1):
+        super(AttentionLayer, self).__init__()
+        d_keys = d_keys or (d_model//n_heads)
+        d_values = d_values or (d_model//n_heads)
+        self.inner_attention = FullAttention(scale=None, attention_dropout = dropout)
+        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.value_projection = nn.Linear(d_model, d_values * n_heads)
+        self.out_projection = nn.Linear(d_values * n_heads, d_model)
+        self.n_heads = n_heads
+        self.mix = mix
+    def forward(self, queries, keys, values):
+        B, L, _ = queries.shape
+        _, S, _ = keys.shape
+        H = self.n_heads
+        queries = self.query_projection(queries).view(B, L, H, -1)
+        keys = self.key_projection(keys).view(B, S, H, -1)
+        values = self.value_projection(values).view(B, S, H, -1)
+        out = self.inner_attention(
+            queries,
+            keys,
+            values,
+        )
+        if self.mix:
+            out = out.transpose(2,1).contiguous()
+        out = out.view(B, L, -1)
+        return self.out_projection(out)
+class TwoStageAttentionLayer(nn.Module):
+    '''
+    The Two Stage Attention (TSA) Layer
+    input/output shape: [batch_size, Data_dim(D), Seg_num(L), d_model]
+    '''
+    def __init__(self, seg_num, factor, d_model, n_heads, d_ff = None, dropout=0.1):
+        super(TwoStageAttentionLayer, self).__init__()
+        d_ff = d_ff or 4*d_model
+        self.time_attention = AttentionLayer(d_model, n_heads, dropout = dropout)
+        self.dim_sender = AttentionLayer(d_model, n_heads, dropout = dropout)
+        self.dim_receiver = AttentionLayer(d_model, n_heads, dropout = dropout)
+        self.router = nn.Parameter(torch.randn(seg_num, factor, d_model))
+        self.dropout = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.norm4 = nn.LayerNorm(d_model)
+        self.MLP1 = nn.Sequential(nn.Linear(d_model, d_ff),
+                                nn.GELU(),
+                                nn.Linear(d_ff, d_model))
+        self.MLP2 = nn.Sequential(nn.Linear(d_model, d_ff),
+                                nn.GELU(),
+                                nn.Linear(d_ff, d_model))
+    def forward(self, x):
+        #Cross Time Stage: Directly apply MSA to each dimension
+        batch = x.shape[0]
+        time_in = rearrange(x, 'b ts_d seg_num d_model -> (b ts_d) seg_num d_model')
+        time_enc = self.time_attention(
+            time_in, time_in, time_in
+        )
+        dim_in = time_in + self.dropout(time_enc)
+        dim_in = self.norm1(dim_in)
+        dim_in = dim_in + self.dropout(self.MLP1(dim_in))
+        dim_in = self.norm2(dim_in)
+        #Cross Dimension Stage: use a small set of learnable vectors to aggregate and distribute messages to build the D-to-D connection
+        dim_send = rearrange(dim_in, '(b ts_d) seg_num d_model -> (b seg_num) ts_d d_model', b = batch)
+        batch_router = repeat(self.router, 'seg_num factor d_model -> (repeat seg_num) factor d_model', repeat = batch)
+        dim_buffer = self.dim_sender(batch_router, dim_send, dim_send)
+        dim_receive = self.dim_receiver(dim_send, dim_buffer, dim_buffer)
+        dim_enc = dim_send + self.dropout(dim_receive)
+        dim_enc = self.norm3(dim_enc)
+        dim_enc = dim_enc + self.dropout(self.MLP2(dim_enc))
+        dim_enc = self.norm4(dim_enc)
+        final_out = rearrange(dim_enc, '(b seg_num) ts_d d_model -> b ts_d seg_num d_model', b = batch)
+        return final_out

dsipts/models/crossformer/cross_decoder.py ADDED Viewed

@@ -0,0 +1,77 @@
+import torch.nn as nn
+from einops import rearrange
+from .attn import  AttentionLayer, TwoStageAttentionLayer
+class DecoderLayer(nn.Module):
+    '''
+    The decoder layer of Crossformer, each layer will make a prediction at its scale
+    '''
+    def __init__(self, seg_len, d_model, n_heads, d_ff=None, dropout=0.1, out_seg_num = 10, factor = 10):
+        super(DecoderLayer, self).__init__()
+        self.self_attention = TwoStageAttentionLayer(out_seg_num, factor, d_model, n_heads,d_ff, dropout)
+        self.cross_attention = AttentionLayer(d_model, n_heads, dropout = dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.MLP1 = nn.Sequential(nn.Linear(d_model, d_model),
+                                nn.GELU(),
+                                nn.Linear(d_model, d_model))
+        self.linear_pred = nn.Linear(d_model, seg_len)
+    def forward(self, x, cross):
+        '''
+        x: the output of last decoder layer
+        cross: the output of the corresponding encoder layer
+        '''
+        batch = x.shape[0]
+        x = self.self_attention(x)
+        x = rearrange(x, 'b ts_d out_seg_num d_model -> (b ts_d) out_seg_num d_model')
+        cross = rearrange(cross, 'b ts_d in_seg_num d_model -> (b ts_d) in_seg_num d_model')
+        tmp = self.cross_attention(
+            x, cross, cross,
+        )
+        x = x + self.dropout(tmp)
+        y = x = self.norm1(x)
+        y = self.MLP1(y)
+        dec_output = self.norm2(x+y)
+        dec_output = rearrange(dec_output, '(b ts_d) seg_dec_num d_model -> b ts_d seg_dec_num d_model', b = batch)
+        layer_predict = self.linear_pred(dec_output)
+        layer_predict = rearrange(layer_predict, 'b out_d seg_num seg_len -> b (out_d seg_num) seg_len')
+        return dec_output, layer_predict
+class Decoder(nn.Module):
+    '''
+    The decoder of Crossformer, making the final prediction by adding up predictions at each scale
+    '''
+    def __init__(self, seg_len, d_layers, d_model, n_heads, d_ff, dropout,\
+                router=False, out_seg_num = 10, factor=10):
+        super(Decoder, self).__init__()
+        self.router = router
+        self.decode_layers = nn.ModuleList()
+        for i in range(d_layers):
+            self.decode_layers.append(DecoderLayer(seg_len, d_model, n_heads, d_ff, dropout, \
+                                        out_seg_num, factor))
+    def forward(self, x, cross):
+        final_predict = None
+        i = 0
+        ts_d = x.shape[1]
+        for layer in self.decode_layers:
+            cross_enc = cross[i]
+            x, layer_predict = layer(x,  cross_enc)
+            if final_predict is None:
+                final_predict = layer_predict
+            else:
+                final_predict = final_predict + layer_predict
+            i += 1
+        final_predict = rearrange(final_predict, 'b (out_d seg_num) seg_len -> b (seg_num seg_len) out_d', out_d = ts_d)
+        return final_predict

dsipts/models/crossformer/cross_embed.py ADDED Viewed

@@ -0,0 +1,18 @@
+import torch.nn as nn
+from einops import rearrange
+class DSW_embedding(nn.Module):
+    def __init__(self, seg_len, d_model):
+        super(DSW_embedding, self).__init__()
+        self.seg_len = seg_len
+        self.linear = nn.Linear(seg_len, d_model)
+    def forward(self, x):
+        batch, ts_len, ts_dim = x.shape
+        x_segment = rearrange(x, 'b (seg_num seg_len) d -> (b d seg_num) seg_len', seg_len = self.seg_len)
+        x_embed = self.linear(x_segment)
+        x_embed = rearrange(x_embed, '(b d seg_num) d_model -> b d seg_num d_model', b = batch, d = ts_dim)
+        return x_embed