PyPI - flexynesis - Versions diffs - 0.2.2__tar.gz → 0.2.3__tar.gz - Mend

flexynesis 0.2.2tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{flexynesis-0.2.2 → flexynesis-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: flexynesis
-Version: 0.2.2
+Version: 0.2.3
 Summary: A deep-learning based multi-omics bulk sequencing data integration suite with a focus on (pre-)clinical endpoint prediction.
 Author-email: Bora Uyar <bora.uyar@mdc-berlin.de>, Taras Savchyn <Taras.Savchyn@mdc-berlin.de>, Ricardo Wurmus <Ricardo.Wurmus@mdc-berlin.de>, Ahmet Sarigun <Ahmet.Sariguen@mdc-berlin.de>
 Project-URL: homepage, https://github.com/BIMSBbioinfo/flexynesis

{flexynesis-0.2.2 → flexynesis-0.2.3}/flexynesis/__main__.py RENAMED Viewed

@@ -46,6 +46,7 @@ def main():
         --use_loss_weighting (str): Whether to apply loss-balancing using uncertainty weights method. Choices are ['True', 'False']. Default is 'True'.
         --evaluate_baseline_performance (str): Whether to run Random Forest + SVMs to see the performance of off-the-shelf tools on the same dataset. Choices are ['True', 'False']. Default is 'True'.
         --threads (int): How many threads to use when using CPU. Default is 4.
+        --num_workers (int): How many workers to use for model training. Default is 2
         --use_gpu (bool): If set, the system will attempt to use CUDA/GPU if available.
         --disable_marker_finding (bool): If set, marker discovery after model training is disabled.
         --string_organism (int): STRING DB organism id. Default is 9606.
@@ -99,6 +100,7 @@ def main():
     parser.add_argument("--use_loss_weighting", help="whether to apply loss-balancing using uncertainty weights method", type=str, choices=['True', 'False'], default = 'True')
     parser.add_argument("--evaluate_baseline_performance", help="whether to run Random Forest + SVMs to see the performance of off-the-shelf tools on the same dataset", type=str, choices=['True', 'False'], default = 'True')
     parser.add_argument("--threads", help="(Optional) How many threads to use when using CPU (default is 4)", type=int, default = 4)
+    parser.add_argument("--num_workers", help="(Optional) How many workers to use for model training (default is 2)", type=int, default = 2)
     parser.add_argument("--use_gpu", action="store_true",
                         help="(Optional) If set, the system will attempt to use CUDA/GPU if available.")
     parser.add_argument("--disable_marker_finding", action="store_true",
@@ -253,7 +255,8 @@ def main():
                                             device_type = device_type,
                                             gnn_conv_type = gnn_conv_type,
                                             input_layers = input_layers,
-                                            output_layers = output_layers)
+                                            output_layers = output_layers,
+                                            num_workers = args.num_workers)
     # do a hyperparameter search training multiple models and get the best_configuration
     model, best_params = tuner.perform_tuning(hpo_patience = args.hpo_patience)

{flexynesis-0.2.2 → flexynesis-0.2.3}/flexynesis/config.py RENAMED Viewed

@@ -6,28 +6,28 @@ epochs = [500]
 search_spaces = {
     'DirectPred': [
         Integer(16, 128, name='latent_dim'),
-        Real(0.2, 1, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
+        Real(0.2, 0.5, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
         Real(0.0001, 0.01, prior='log-uniform', name='lr'),
         Integer(8, 32, name='supervisor_hidden_dim'),
         Categorical(epochs, name='epochs')
     ],
     'supervised_vae': [
         Integer(16, 128, name='latent_dim'),
-        Real(0.2, 1, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
+        Real(0.2, 0.5, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
         Integer(8, 32, name='supervisor_hidden_dim'),
         Real(0.0001, 0.01, prior='log-uniform', name='lr'),
         Categorical(epochs, name='epochs')
     ],
     'CrossModalPred': [
         Integer(16, 128, name='latent_dim'),
-        Real(0.2, 1, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
+        Real(0.2, 0.5, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
         Integer(8, 32, name='supervisor_hidden_dim'),
         Real(0.0001, 0.01, prior='log-uniform', name='lr'),
         Categorical(epochs, name='epochs')
     ],
     'MultiTripletNetwork': [
         Integer(16, 128, name='latent_dim'),
-        Real(0.2, 1, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
+        Real(0.2, 0.5, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
         Integer(8, 32, name='supervisor_hidden_dim'),
         Real(0.0001, 0.01, prior='log-uniform', name='lr'),
         Categorical(epochs, name='epochs')

{flexynesis-0.2.2 → flexynesis-0.2.3}/flexynesis/main.py RENAMED Viewed

@@ -56,7 +56,7 @@ class HyperparameterTuning:
                  cv_splits=5, use_loss_weighting=True, early_stop_patience=-1, device_type=None, gnn_conv_type=None,
                  input_layers=None, output_layers=None): Initializes the hyperparameter tuner with specific settings.
-        get_batch_space(min_size=16, max_size=256): Determines the batch size search space based on the dataset size.
+        get_batch_space(min_size=16, max_size=128): Determines the batch size search space based on the dataset size.
         setup_trainer(params, current_step, total_steps, full_train=False): Sets up the trainer with appropriate callbacks
             and configurations for either full training or validation based training.
@@ -80,7 +80,7 @@ class HyperparameterTuning:
                  val_size = 0.2,  use_cv = False, cv_splits = 5,
                  use_loss_weighting = True, early_stop_patience = -1,
                  device_type = None, gnn_conv_type = None,
-                 input_layers = None, output_layers = None):
+                 input_layers = None, output_layers = None, num_workers = 2):
         self.dataset = dataset # dataset for model initiation
         self.loader_dataset = dataset # dataset for defining data loaders (this can be model specific)
         self.model_class = model_class
@@ -107,6 +107,7 @@ class HyperparameterTuning:
         self.gnn_conv_type = gnn_conv_type
         self.input_layers = input_layers
         self.output_layers = output_layers
+        self.num_workers = num_workers
         self.DataLoader = torch.utils.data.DataLoader # use torch data loader by default
@@ -128,7 +129,7 @@ class HyperparameterTuning:
             else:
                 raise ValueError(f"'{self.config_name}' not found in the default config.")
-    def get_batch_space(self, min_size = 32, max_size = 256):
+    def get_batch_space(self, min_size = 32, max_size = 128):
         m = int(np.log2(len(self.dataset) * 0.8))
         st = int(np.log2(min_size))
         end = int(np.log2(max_size))
@@ -214,9 +215,11 @@ class HyperparameterTuning:
                 train_subset = torch.utils.data.Subset(self.loader_dataset, train_index)
                 val_subset = torch.utils.data.Subset(self.loader_dataset, val_index)
                 train_loader = self.DataLoader(train_subset, batch_size=int(params['batch_size']),
-                                               pin_memory=True, shuffle=True, drop_last=True, num_workers = 4, prefetch_factor = None, persistent_workers = True)
+                                               pin_memory=True, shuffle=True, drop_last=True, num_workers = self.num_workers, prefetch_factor = None,
+                                               persistent_workers = self.num_workers > 0)
                 val_loader = self.DataLoader(val_subset, batch_size=int(params['batch_size']),
-                                             pin_memory=True, shuffle=False, num_workers = 4, prefetch_factor = None, persistent_workers = True)
+                                             pin_memory=True, shuffle=False, num_workers = self.num_workers, prefetch_factor = None,
+                                             persistent_workers = self.num_workers > 0)
                 model = self.model_class(**model_args)
                 trainer, early_stop_callback = self.setup_trainer(params, current_step, total_steps)

{flexynesis-0.2.2 → flexynesis-0.2.3}/flexynesis.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: flexynesis
-Version: 0.2.2
+Version: 0.2.3
 Summary: A deep-learning based multi-omics bulk sequencing data integration suite with a focus on (pre-)clinical endpoint prediction.
 Author-email: Bora Uyar <bora.uyar@mdc-berlin.de>, Taras Savchyn <Taras.Savchyn@mdc-berlin.de>, Ricardo Wurmus <Ricardo.Wurmus@mdc-berlin.de>, Ahmet Sarigun <Ahmet.Sariguen@mdc-berlin.de>
 Project-URL: homepage, https://github.com/BIMSBbioinfo/flexynesis

{flexynesis-0.2.2 → flexynesis-0.2.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "flexynesis"
-version = "0.2.2"
+version = "0.2.3"
 authors = [
     {name = "Bora Uyar", email = "bora.uyar@mdc-berlin.de"},
     {name = "Taras Savchyn", email = "Taras.Savchyn@mdc-berlin.de"},