PyPI - dsipts - Versions diffs - 1.1.12__py3-none-any.whl → 1.1.13__py3-none-any.whl - Mend

dsipts 1.1.12py3-none-any.whl → 1.1.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dsipts might be problematic. Click here for more details.

Files changed (7) hide show

dsipts/data_structure/data_structure.py CHANGED Viewed

@@ -6,7 +6,7 @@ from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
 from sklearn.preprocessing import *
 from torch.utils.data import DataLoader
 from .utils import extend_time_df,MetricsCallback, MyDataset, ActionEnum,beauty_string
+from torch.utils.data.sampler import WeightedRandomSampler
 try:
     #new version of lightning
@@ -249,7 +249,8 @@ class TimeSeries():
                     check_past:bool=True,
                     group:Union[None,str]=None,
                     check_holes_and_duplicates:bool=True,
-                    silly_model:bool=False)->None:
+                    silly_model:bool=False,
+                    sampler_weights:Union[None,str]=None)->None:
         """ This is a crucial point in the data structure. We expect here to have a dataset with time as timestamp.
             There are some checks:
                 1- the duplicates will tbe removed taking the first instance
@@ -270,6 +271,7 @@ class TimeSeries():
             group (str or None, optional): if not None the time serie dataset is considered composed by omogeneus timeseries coming from different realization (for example point of sales, cities, locations) and the relative series are not splitted during the sample generation. Defaults to None
             check_holes_and_duplicates (bool, optional): if False duplicates or holes will not checked, the dataloader can not correctly work, disable at your own risk. Defaults True
             silly_model (bool, optional): if True, target variables will be added to the pool of the future variables. This can be useful to see if information passes throught the decoder part of your model (if any)
+            sampler_weights  group (str or None, optional): if it is a column name it will be used as weight for the sampler. Careful that the weight of the sample is the weight value of the fist target value (index)
         """
@@ -322,7 +324,7 @@ class TimeSeries():
         if group is not None:
             if group not in cat_past_var:
                 beauty_string(f'I will add {group} to the categorical past/future variables','info',self.verbose)
-                self.cat_var.append(group)
+                self.cat_past_var.append(group)
             if group not in cat_fut_var:
                 beauty_string(f'I will add {group} to the categorical past/future variables','info',self.verbose)
                 self.cat_fut_var.append(group)
@@ -350,7 +352,7 @@ class TimeSeries():
         if silly_model:
             beauty_string('YOU ARE TRAINING A SILLY MODEL WITH THE TARGETS IN THE INPUTS','section',self.verbose)
             self.future_variables+=self.target_variables
+        self.sampler_weights = sampler_weights
     def plot(self):
         """
         Easy way to control the loaded data
@@ -409,6 +411,7 @@ class TimeSeries():
         y_samples = []
         t_samples = []
         g_samples = []
+        sampler_weights_samples = []
         if starting_point is not None:
             kk = list(starting_point.keys())[0]
@@ -475,7 +478,8 @@ class TimeSeries():
             if len(self.cat_fut_var)>0:
                 x_fut_cat = tmp[self.cat_fut_var].values
             y_target = tmp[self.target_variables].values
+            if self.sampler_weights is not None:
+                sampler_weights = tmp[self.sampler_weights].values.flatten()
             if starting_point is not None:
                 check = tmp[list(starting_point.keys())[0]].values == starting_point[list(starting_point.keys())[0]]
@@ -512,6 +516,8 @@ class TimeSeries():
                                 x_cat_future_samples.append(x_fut_cat[i-shift+skip_stacked:i+future_steps-shift+skip_stacked])
                         y_samples.append(y_target[i+skip_stacked:i+future_steps+skip_stacked])
+                        if self.sampler_weights is not None:
+                            sampler_weights_samples.append(sampler_weights[i+skip_stacked])
                         t_samples.append(t[i+skip_stacked:i+future_steps+skip_stacked])
                         g_samples.append(groups[i])
@@ -524,6 +530,8 @@ class TimeSeries():
                 beauty_string('WARNING x_num_future_samples is empty and it should not','info',True)
         y_samples = np.stack(y_samples)
+        if self.sampler_weights is not None:
+            sampler_weights_samples = np.stack(sampler_weights_samples)
         t_samples = np.stack(t_samples)
         g_samples = np.stack(g_samples)
@@ -537,7 +545,6 @@ class TimeSeries():
         else:
             mod = 1.0
         dd = {'y':y_samples.astype(np.float32),
               'x_num_past':(x_num_past_samples*mod).astype(np.float32)}
         if len(self.cat_past_var)>0:
             dd['x_cat_past'] = x_cat_past_samples
@@ -545,7 +552,10 @@ class TimeSeries():
             dd['x_cat_future'] = x_cat_future_samples
         if len(self.future_variables)>0:
             dd['x_num_future'] = x_num_future_samples.astype(np.float32)
+        if self.sampler_weights is not None:
+            dd['sampler_weights'] = sampler_weights_samples.astype(np.float32)
+        else:
+            dd['sampler_weights'] = np.ones(len(y_samples)).astype(np.float32)
         return MyDataset(dd,t_samples,g_samples,idx_target,idx_target_future)
@@ -753,8 +763,14 @@ class TimeSeries():
         else:
             self.modifier = None
+        if self.sampler_weights is not None:
+            beauty_string(f'USING SAMPLER IN TRAIN {min(train.sampler_weights)}-{max(train.sampler_weights)}','section',self.verbose)
+            sampler = WeightedRandomSampler(train.sampler_weights, num_samples= len(train))
+            train_dl = DataLoader(train, batch_size = batch_size , shuffle=False,sampler=sampler,drop_last=True,num_workers=num_workers,persistent_workers=persistent_workers)
-        train_dl = DataLoader(train, batch_size = batch_size , shuffle=True,drop_last=True,num_workers=num_workers,persistent_workers=persistent_workers)
+        else:
+            train_dl = DataLoader(train, batch_size = batch_size , shuffle=True,drop_last=True,num_workers=num_workers,persistent_workers=persistent_workers)
         valid_dl = DataLoader(validation, batch_size = batch_size , shuffle=False,drop_last=True,num_workers=num_workers,persistent_workers=persistent_workers)
         checkpoint_callback = ModelCheckpoint(dirpath=dirpath,
@@ -1026,7 +1042,7 @@ class TimeSeries():
             if self.group is not None:
                 time[self.group] = groups
-                time = time.melt(id_vars=['region'])
+                time = time.melt(id_vars=[self.group])
             else:
                 time = time.melt()
             time.rename(columns={'value':'time','variable':'lag'},inplace=True)
@@ -1048,7 +1064,8 @@ class TimeSeries():
             if self.group is not None:
                 time[self.group] = groups
-                time = time.melt(id_vars=['region'])
+                time = time.melt(id_vars=[self.group])
             else:
                 time = time.melt()
             time.rename(columns={'value':'time','variable':'lag'},inplace=True)

dsipts/data_structure/utils.py CHANGED Viewed

@@ -142,12 +142,13 @@ class MyDataset(Dataset):
         Returns:
             torch.utils.data.Dataset: a torch Dataset to be used in a Dataloader
         """
         self.data = data
         self.t = t
         self.groups = groups
         self.idx_target = np.array(idx_target) if idx_target is not None else None
         self.idx_target_future = np.array(idx_target_future) if idx_target_future is not None else None
+        self.sampler_weights = data['sampler_weights']
     def __len__(self):
@@ -157,7 +158,8 @@ class MyDataset(Dataset):
     def __getitem__(self, idxs):
         sample = {}
         for k in self.data:
-            sample[k] = self.data[k][idxs]
+            if k!='sampler_weights':
+                sample[k] = self.data[k][idxs]
         if self.idx_target is not None:
             sample['idx_target'] = self.idx_target
         if self.idx_target_future is not None:

dsipts/models/TTM.py CHANGED Viewed

@@ -38,6 +38,7 @@ class TTM(Base):
                 fcm_mix_layers,
                 fcm_prepend_past,
                 enable_forecast_channel_mixing,
+                force_return,
                 **kwargs)->None:
         super().__init__(**kwargs)
@@ -48,7 +49,9 @@ class TTM(Base):
         self.index_fut = list(exogenous_channel_indices_cont)
         if len(exogenous_channel_indices_cat)>0:
-            self.index_fut_cat = (self.past_channels+len(self.embs_past))+list(exogenous_channel_indices_cat)
+            self.index_fut_cat = [self.past_channels+c for c in list(exogenous_channel_indices_cat)]
         else:
             self.index_fut_cat = []
         self.freq = freq
@@ -75,6 +78,7 @@ class TTM(Base):
             fcm_use_mixer=fcm_use_mixer,
             fcm_mix_layers=fcm_mix_layers,
             freq=freq,
+            force_return=force_return,
             freq_prefix_tuning=freq_prefix_tuning,
             fcm_prepend_past=fcm_prepend_past,
             enable_forecast_channel_mixing=enable_forecast_channel_mixing,
@@ -83,7 +87,7 @@ class TTM(Base):
         hidden_size =  self.model.config.hidden_size
         self.model.prediction_head = torch.nn.Linear(hidden_size, self.out_channels*self.mul)
         self._freeze_backbone()
+        self.zero_pad = (force_return=='zeropad')
     def _freeze_backbone(self):
         """
         Freeze the backbone of the model.
@@ -108,29 +112,44 @@ class TTM(Base):
         return input
     def can_be_compiled(self):
-        return True
+        return True#not self.zero_pad
     def forward(self, batch):
         x_enc = batch['x_num_past'].to(self.device)
+        if self.zero_pad:
+            B,L,C = batch['x_num_past'].shape
+            x_enc = torch.zeros((B,512,C)).to(self.device)
+            x_enc[:,-L:,:] = batch['x_num_past'].to(self.device)
+        else:
+            x_enc = batch['x_num_past'].to(self.device)
         original_indexes = batch['idx_target'][0].tolist()
         if 'x_cat_past' in batch.keys():
-            x_mark_enc = batch['x_cat_past'].to(torch.float32).to(self.device)
-            x_mark_enc = self._scaler_past(x_mark_enc)
+            if self.zero_pad:
+                B,L,C = batch['x_cat_past'].shape
+                x_mark_enc = torch.zeros((B,512,C)).to(self.device)
+                x_mark_enc[:,-L:,:] = batch['x_cat_past'].to(torch.float32).to(self.device)
+            else:
+                x_mark_enc = batch['x_cat_past'].to(torch.float32).to(self.device)
+                x_mark_enc = self._scaler_past(x_mark_enc)
             past_values = torch.cat((x_enc,x_mark_enc), axis=-1).type(torch.float32)
         else:
             past_values = x_enc
+        B,L,C = past_values.shape
+        future_values = torch.zeros((B,self.future_steps,C)).to(self.device)
-        future_values = torch.zeros_like(past_values).to(self.device)
-        future_values = future_values[:,:self.future_steps,:]
         if 'x_num_future' in batch.keys():
             future_values[:,:,self.index_fut] = batch['x_num_future'].to(self.device)
         if 'x_cat_future' in batch.keys():
             x_mark_dec = batch['x_cat_future'].to(torch.float32).to(self.device)
             x_mark_dec = self._scaler_fut(x_mark_dec)
-            future_values[:,:,self.index_cat_fut] = x_mark_dec
+            future_values[:,:,self.index_fut_cat] = x_mark_dec
         #investigating!! problem with dynamo!

{dsipts-1.1.12.dist-info → dsipts-1.1.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dsipts
-Version: 1.1.12
+Version: 1.1.13
 Summary: Unified library for timeseries modelling
 Author-email: Andrea Gobbi <agobbi@fbk.eu>
 Project-URL: Homepage, https://github.com/DSIP-FBK/DSIPTS

{dsipts-1.1.12.dist-info → dsipts-1.1.13.dist-info}/RECORD RENAMED Viewed

@@ -3,9 +3,9 @@ dsipts/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
 dsipts/data_management/monash.py,sha256=aZxq9FbIH6IsU8Lwou1hAokXjgOAK-wdl2VAeFg2k4M,13075
 dsipts/data_management/public_datasets.py,sha256=yXFzOZZ-X0ZG1DoqVU-zFmEGVMc2033YDQhRgYxY8ws,6793
 dsipts/data_structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dsipts/data_structure/data_structure.py,sha256=KVkjTVjc7NznJIou4LYGzMbzE7ye-K3ll65GEgn2qKg,60814
+dsipts/data_structure/data_structure.py,sha256=vOiVuTbEprXwpf8l5hk-7HP3v_r5d3YiXuQdGwo4nV0,62295
 dsipts/data_structure/modifiers.py,sha256=qlry9dfw8pEE0GrvgwROZJkJ6oPpUnjEHPIG5qIetss,7948
-dsipts/data_structure/utils.py,sha256=QwfKPZgSy6DIw5n6ztOdPJIAnzo4EnlMTgRbpiWnyko,6593
+dsipts/data_structure/utils.py,sha256=ZL-z_InmFUkge5kQoHSrev1t6nyve9sTYTVeA75Or-I,6689
 dsipts/models/Autoformer.py,sha256=nUQvPC_qtajLT1AHdNJmF_P3ZL01j3spkZ4ubxdGF3g,8497
 dsipts/models/CrossFormer.py,sha256=ClW6H_hrtLJH0iqTC7q_ya_Bwc_Xu-0lpAN5w2DSUYk,6526
 dsipts/models/D3VAE.py,sha256=d1aY6kGjBSxZncN-KPWpdUGunu182ng2QFInGFrKYQM,6903
@@ -23,7 +23,7 @@ dsipts/models/Samformer.py,sha256=Kt7B9ID3INtFDAVKIM1LTly5-UfKCaVZ9uxAJmYv6B4,56
 dsipts/models/Simple.py,sha256=8wRSO-gh_Z6Sl8fYMV-RIXIL0RrO5u5dDtsaq-OsKg0,3960
 dsipts/models/TFT.py,sha256=JiI90ikfP8aaR_rtczu8CyGMNLTgml13aYQifgIC_yo,13888
 dsipts/models/TIDE.py,sha256=S1KlKqFOR3jJ9DDiTqeaKvya9hYBsNHBVqwJsYX3FLU,13094
-dsipts/models/TTM.py,sha256=lOOo5dR5nOmf37cND6C8ft8TVl0kzNeraIuABw7eI5g,5897
+dsipts/models/TTM.py,sha256=PoRDT-KYoMqv6yIOU-73E7Y2pRyd4lga0u6KrJRd5DU,6561
 dsipts/models/TimeXER.py,sha256=EkmlHfT2RegY6Ce6q8EUEV1a_WZ6SkYibnOZXqsyd_8,7111
 dsipts/models/VQVAEA.py,sha256=sNJi8UZh-10qEIKcZK3SzhlOFUUjvqjoglzeZBFaeZM,13789
 dsipts/models/VVA.py,sha256=BnPkJ0Nzue0oShSHZVRNlf5RvT0Iwtf9bx19vLB9Nn0,11939
@@ -76,7 +76,7 @@ dsipts/models/vva/minigpt.py,sha256=bg0JddqSD322uxSGexen3nPXL_hGTsk3vNLR62d7-w8,
 dsipts/models/vva/vqvae.py,sha256=RzCQ_M9xBprp7_x20dSV3EQqlO0FjPUGWV-qdyKrQsM,19680
 dsipts/models/xlstm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dsipts/models/xlstm/xLSTM.py,sha256=ZKZZmffmIq1Vb71CR4GSyM8viqVx-u0FChxhcNgHub8,10081
-dsipts-1.1.12.dist-info/METADATA,sha256=nxE2kAg9RvG5Py27sMNbQ-mUIu9mtZrDo2WocLpJdQ4,24795
-dsipts-1.1.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dsipts-1.1.12.dist-info/top_level.txt,sha256=i6o0rf5ScFwZK21E89dSKjVNjUBkrEQpn0-Vij43748,7
-dsipts-1.1.12.dist-info/RECORD,,
+dsipts-1.1.13.dist-info/METADATA,sha256=6UZ0nHk0RoGXxxkPYCyB0w41m8LlOE5BfoiswplloXQ,24795
+dsipts-1.1.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dsipts-1.1.13.dist-info/top_level.txt,sha256=i6o0rf5ScFwZK21E89dSKjVNjUBkrEQpn0-Vij43748,7
+dsipts-1.1.13.dist-info/RECORD,,

{dsipts-1.1.12.dist-info → dsipts-1.1.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{dsipts-1.1.12.dist-info → dsipts-1.1.13.dist-info}/top_level.txt RENAMED Viewed

File without changes

dsipts 1.1.12__py3-none-any.whl → 1.1.13__py3-none-any.whl

Potentially problematic release.

dsipts 1.1.12py3-none-any.whl → 1.1.13py3-none-any.whl