PyPI - SURE-tools - Versions diffs - 3.6.13__tar.gz → 3.7.0__tar.gz - Mend

SURE-tools 3.6.13tar.gz → 3.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{sure_tools-3.6.13 → sure_tools-3.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: SURE-tools
-Version: 3.6.13
+Version: 3.7.0
 Summary: Succinct Representation of Single Cells
 Home-page: https://github.com/ZengFLab/SURE
 Author: Feng Zeng

{sure_tools-3.6.13 → sure_tools-3.7.0}/SURE/SURE.py RENAMED Viewed

@@ -109,7 +109,7 @@ class SURE(nn.Module):
     def __init__(self,
                  input_dim: int,
                  codebook_size: int,
-                 condition_size: int = 0,
+                 condition_sizes: int = 0,
                  covariate_size: int = 0,
                  method: Literal['flow','vae'] = 'vae',
                  transforms: int = 1,
@@ -134,7 +134,7 @@ class SURE(nn.Module):
         if method == 'flow':
             self.engine = SURENF(input_dim=input_dim,
                                  codebook_size=codebook_size,
-                                 condition_size=condition_size,
+                                 condition_sizes=condition_sizes,
                                  covariate_size=covariate_size,
                                  transforms=transforms,
                                  z_dim=z_dim,
@@ -155,7 +155,7 @@ class SURE(nn.Module):
         elif method == 'vae':
             self.engine = SUREVAE(input_dim=input_dim,
                                   codebook_size=codebook_size,
-                                  condition_size=condition_size,
+                                  condition_sizes=condition_sizes,
                                   covariate_size=covariate_size,
                                   z_dim=z_dim,
                                   z_dist=z_dist,
@@ -214,13 +214,13 @@ class SURE(nn.Module):
         """
         return self.engine.hard_assignments(xs=xs, batch_size=batch_size, show_progress=show_progress)
-    def get_condition_effects(self, xs, cs, batch_size=1024, show_progress=True):
-        return self.engine.get_condition_effects(xs, cs, batch_size=batch_size, show_progress=show_progress)
+    def get_condition_effect(self, xs, cs, i, batch_size=1024, show_progress=True):
+        return self.engine.get_condition_effect(xs, cs, i, batch_size=batch_size, show_progress=show_progress)
     def predict_cluster(self, xs, batch_size=1024, show_progress=True):
         return self.engine.predict_cluster(xs, batch_size=batch_size, show_progress=show_progress)
-    def predict(self, xs, cs, batch_size=1024, show_progress=True):
+    def predict(self, xs, cs_list, batch_size=1024, show_progress=True):
         """
         Generate gene expression prediction from given cell data and covariates.
         This function can be used for simulating cells' transcription profiles at new conditions.
@@ -231,14 +231,14 @@ class SURE(nn.Module):
         :param batch_size: Data size per batch
         :param show_progress: Toggle on or off message output
         """
-        return self.engine.predict(xs, cs, batch_size, show_progress)
+        return self.engine.predict(xs, cs_list, batch_size, show_progress)
     def preprocess(self, xs, threshold=0):
         return self.engine.preprocess(xs=xs, threshold=threshold)
-    def fit(self, xs,
-            cs = None,
-            fs = None,
+    def fit(self, xs:np.array,
+            css:list = None,
+            fs:np.array = None,
             num_epochs: int = 100,
             learning_rate: float = 0.0001,
             use_mask: bool = False,
@@ -284,7 +284,7 @@ class SURE(nn.Module):
             If toggled on, Jax will be used for speeding up. CAUTION: This will raise errors because of unknown reasons when it is called in
             the Python script or Jupyter notebook. It is OK if it is used when runing SURE in the shell command.
         """
-        self.engine.fit(xs=xs, cs=cs, fs=fs, num_epochs=num_epochs, learning_rate=learning_rate, use_mask=use_mask, mask_ratio=mask_ratio, batch_size=batch_size, algo=algo,
+        self.engine.fit(xs=xs, css=css, fs=fs, num_epochs=num_epochs, learning_rate=learning_rate, use_mask=use_mask, mask_ratio=mask_ratio, batch_size=batch_size, algo=algo,
                         beta_1=beta_1, weight_decay=weight_decay, decay_rate=decay_rate, config_enum=config_enum, threshold=threshold,
                         use_jax=use_jax, show_progress=show_progress, patience=patience, min_delta=min_delta, restore_best_weights=restore_best_weights,
                         monitor=monitor)

{sure_tools-3.6.13 → sure_tools-3.7.0}/SURE/SURE_nsf.py RENAMED Viewed

@@ -119,12 +119,12 @@ class SURENF(nn.Module):
     def __init__(self,
                  input_dim: int,
                  codebook_size: int,
-                 condition_size: int = 0,
+                 condition_sizes: list = [0],
                  covariate_size: int = 0,
                  transforms: int = 1,
                  z_dim: int = 50,
                  z_dist: Literal['normal','studentt','laplacian','cauchy','gumbel'] = 'studentt',
-                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'negbinomial',
+                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'multinomial',
                  dispersion: float = 10.0,
                  use_zeroinflate: bool = True,
                  hidden_layers: list = [500],
@@ -141,7 +141,7 @@ class SURENF(nn.Module):
         super().__init__()
         self.input_dim = input_dim
-        self.condition_size = condition_size
+        self.condition_sizes = condition_sizes
         self.covariate_size = covariate_size
         self.dispersion = dispersion
         self.latent_dim = z_dim
@@ -247,16 +247,19 @@ class SURENF(nn.Module):
         self.encoder_zn = zuko.flows.NSF(features=self.latent_dim, context=self.input_dim,
                                          transforms=self.transforms, hidden_features=self.flow_hidden_layers)
-        if self.condition_size>0:
-            self.condition_effect = ZeroBiasMLP3(
-                [self.condition_size + self.latent_dim] + self.decoder_hidden_layers + [self.latent_dim],
-                activation=activate_fct,
-                output_activation=None,
-                post_layer_fct=post_layer_fct,
-                post_act_fct=post_act_fct,
-                allow_broadcast=self.allow_broadcast,
-                use_cuda=self.use_cuda,
-            )
+        if np.sum(self.condition_sizes)>0:
+            self.condition_effects = nn.ModuleList()
+            for condition_size in self.condition_sizes:
+                self.condition_effects.append(ZeroBiasMLP3(
+                        [condition_size + self.latent_dim] + self.decoder_hidden_layers + [self.latent_dim],
+                        activation=activate_fct,
+                        output_activation=None,
+                        post_layer_fct=post_layer_fct,
+                        post_act_fct=post_act_fct,
+                        allow_broadcast=self.allow_broadcast,
+                        use_cuda=self.use_cuda,
+                    )
+                )
         if self.covariate_size>0:
             self.covariate_effect = ZeroBiasMLP2(
                 [self.covariate_size] + self.decoder_hidden_layers + [self.latent_dim],
@@ -393,8 +396,13 @@ class SURENF(nn.Module):
                 zns = pyro.sample('zn', dist.Gumbel(zn_loc, zn_scale).to_event(1))
             zs = zns
-            if (self.covariate_size>0) and (cs is not None):
-                zcs = self.condition_effect([cs,zns])
+            if (np.sum(self.condition_sizes)>0) and (cs is not None):
+                zcs = torch.zeros_like(zs)
+                shift = 0
+                for i,condition_size in enumerate(self.condition_sizes):
+                    cs_i = cs[:,shift:(shift+condition_size)]
+                    zcs += self.condition_effects[i]([cs_i,zns])
+                    shift += condition_size
             else:
                 zcs = torch.zeros_like(zs)
             if (self.covariate_size>0) and (fs is not None):
@@ -565,7 +573,7 @@ class SURENF(nn.Module):
         A = np.concatenate(A)
         return A
-    def get_condition_effects(self, xs, cs, batch_size=1024, show_progress=True):
+    def get_condition_effect(self, xs, cs, i, batch_size=1024, show_progress=True):
         xs = self.preprocess(xs)
         xs = convert_to_tensor(xs, dtype=self.dtype, device='cpu')
         cs = convert_to_tensor(cs, dtype=self.dtype, device='cpu')
@@ -580,11 +588,8 @@ class SURENF(nn.Module):
                 X_batch = X_batch.to(self.get_device())
                 C_batch = cs[idx].to(self.get_device())
-                #_,ind = self._hard_assignments(X_batch)
-                #z_basal = cb_loc[ind.squeeze()]
-                ns = self._soft_assignments(X_batch)
-                z_basal = torch.matmul(ns, cb_loc)
-                dzs = self.condition_effect([C_batch,z_basal])
+                z_basal = self._get_cell_embedding(X_batch)
+                dzs = self.condition_effects[i]([C_batch,z_basal])
                 A.append(tensor_to_numpy(dzs))
                 pbar.update(1)
@@ -596,7 +601,7 @@ class SURENF(nn.Module):
         zs = self.get_cell_embedding(xs, batch_size=batch_size, show_progress=show_progress)
         return self.kmeans.predict(zs)
-    def predict(self, xs, cs, batch_size=1024, show_progress=True):
+    def predict(self, xs, cs_list, batch_size=1024, show_progress=True):
         """
         Generate gene expression prediction from given cell data and covariates.
         This function can be used for simulating cells' transcription profiles at new conditions.
@@ -609,6 +614,7 @@ class SURENF(nn.Module):
         """
         xs = self.preprocess(xs)
         xs = convert_to_tensor(xs, dtype=self.dtype, device='cpu')
+        cs = np.hstack(cs_list)
         cs = convert_to_tensor(cs, dtype=self.dtype, device='cpu')
         dataset = CustomDataset(xs)
@@ -622,7 +628,14 @@ class SURENF(nn.Module):
                 library_size = torch.sum(X_batch, 1)
                 z_basal = self._get_cell_embedding(X_batch)
-                zcs = self.condition_effect([C_batch, z_basal])
+                zcs = torch.zeros_like(z_basal)
+                shift = 0
+                for i, condition_size in enumerate(self.condition_sizes):
+                    C_batch_i = C_batch[:, shift:(shift+condition_size)]
+                    zcs += self.condition_effects[i]([C_batch_i,z_basal])
+                    shift += condition_size
                 zfs = torch.zeros_like(z_basal)
                 log_mu = self.decoder_log_mu([z_basal, zcs, zfs])
@@ -829,9 +842,9 @@ class SURENF(nn.Module):
                 if name in param_store:
                     param_store[name] = param'''
-    def fit(self, xs,
-            cs = None,
-            fs = None,
+    def fit(self, xs:np.array,
+            css:list = None,
+            fs:np.array = None,
             num_epochs: int = 100,
             learning_rate: float = 0.0001,
             use_mask: bool = False,
@@ -892,7 +905,8 @@ class SURENF(nn.Module):
         xs = self.preprocess(xs, threshold=threshold)
         xs = convert_to_tensor(xs, dtype=self.dtype, device='cpu')
-        if cs is not None:
+        if css is not None:
+            cs = np.hstack(css)
             cs = convert_to_tensor(cs, dtype=self.dtype, device='cpu')
         if fs is not None:
             fs = convert_to_tensor(fs, dtype=self.dtype, device='cpu')
@@ -948,7 +962,7 @@ class SURENF(nn.Module):
                 for batch_x, idx in dataloader:
                     batch_x = batch_x.to(self.get_device())
                     for loss_id in range(num_losses):
-                        if cs is None:
+                        if css is None:
                             batch_c = None
                         else:
                             batch_c = cs[idx].to(self.get_device())

{sure_tools-3.6.13 → sure_tools-3.7.0}/SURE/SURE_vae.py RENAMED Viewed

@@ -119,7 +119,7 @@ class SUREVAE(nn.Module):
     def __init__(self,
                  input_dim: int,
                  codebook_size: int,
-                 condition_size: int = 0,
+                 condition_sizes: list = [0],
                  covariate_size: int = 0,
                  transforms: int = 1,
                  z_dim: int = 50,
@@ -141,7 +141,7 @@ class SUREVAE(nn.Module):
         super().__init__()
         self.input_dim = input_dim
-        self.condition_size = condition_size
+        self.condition_sizes = condition_sizes
         self.covariate_size = covariate_size
         self.dispersion = dispersion
         self.latent_dim = z_dim
@@ -255,16 +255,19 @@ class SUREVAE(nn.Module):
             use_cuda=self.use_cuda,
         )
-        if self.condition_size>0:
-            self.condition_effect = ZeroBiasMLP3(
-                [self.condition_size + self.latent_dim] + self.decoder_hidden_layers + [self.latent_dim],
-                activation=activate_fct,
-                output_activation=None,
-                post_layer_fct=post_layer_fct,
-                post_act_fct=post_act_fct,
-                allow_broadcast=self.allow_broadcast,
-                use_cuda=self.use_cuda,
-            )
+        if np.sum(self.condition_sizes)>0:
+            self.condition_effects = nn.ModuleList()
+            for condition_size in self.condition_sizes:
+                self.condition_effects.append(ZeroBiasMLP3(
+                        [condition_size + self.latent_dim] + self.decoder_hidden_layers + [self.latent_dim],
+                        activation=activate_fct,
+                        output_activation=None,
+                        post_layer_fct=post_layer_fct,
+                        post_act_fct=post_act_fct,
+                        allow_broadcast=self.allow_broadcast,
+                        use_cuda=self.use_cuda,
+                    )
+                )
         if self.covariate_size>0:
             self.covariate_effect = ZeroBiasMLP2(
                 [self.covariate_size] + self.decoder_hidden_layers + [self.latent_dim],
@@ -401,8 +404,13 @@ class SUREVAE(nn.Module):
                 zns = pyro.sample('zn', dist.Gumbel(zn_loc, zn_scale).to_event(1))
             zs = zns
-            if (self.condition_size>0) and (cs is not None):
-                zcs = self.condition_effect([cs,zns])
+            if (np.sum(self.condition_sizes)>0) and (cs is not None):
+                zcs = torch.zeros_like(zs)
+                shift = 0
+                for i, condition_size in enumerate(self.condition_sizes):
+                    cs_i = cs[:,shift:(shift+condition_size)]
+                    zcs += self.condition_effects[i]([cs_i,zns])
+                    shift += condition_size
             else:
                 zcs = torch.zeros_like(zs)
             if (self.covariate_size>0) and (fs is not None):
@@ -578,7 +586,7 @@ class SUREVAE(nn.Module):
         A = np.concatenate(A)
         return A
-    def get_condition_effects(self, xs, cs, batch_size=1024, show_progress=True):
+    def get_condition_effect(self, xs, cs, i, batch_size=1024, show_progress=True):
         xs = self.preprocess(xs)
         xs = convert_to_tensor(xs, dtype=self.dtype, device='cpu')
         cs = convert_to_tensor(cs, dtype=self.dtype, device='cpu')
@@ -593,10 +601,8 @@ class SUREVAE(nn.Module):
                 X_batch = X_batch.to(self.get_device())
                 C_batch = cs[idx].to(self.get_device())
-                #ns = self._soft_assignments(X_batch)
-                #z_basal = torch.matmul(ns, cb_loc)
                 z_basal = self._get_cell_embedding(X_batch)
-                dzs = self.condition_effect([C_batch,z_basal])
+                dzs = self.condition_effects[i]([C_batch,z_basal])
                 A.append(tensor_to_numpy(dzs))
                 pbar.update(1)
@@ -608,7 +614,7 @@ class SUREVAE(nn.Module):
         zs = self.get_cell_embedding(xs, batch_size=batch_size, show_progress=show_progress)
         return self.kmeans.predict(zs)
-    def predict(self, xs, cs, batch_size=1024, show_progress=True):
+    def predict(self, xs, cs_list, batch_size=1024, show_progress=True):
         """
         Generate gene expression prediction from given cell data and covariates.
         This function can be used for simulating cells' transcription profiles at new conditions.
@@ -621,6 +627,7 @@ class SUREVAE(nn.Module):
         """
         xs = self.preprocess(xs)
         xs = convert_to_tensor(xs, dtype=self.dtype, device='cpu')
+        cs = np.hstack(cs_list)
         cs = convert_to_tensor(cs, dtype=self.dtype, device='cpu')
         dataset = CustomDataset(xs)
@@ -634,7 +641,14 @@ class SUREVAE(nn.Module):
                 library_size = torch.sum(X_batch, 1)
                 z_basal = self._get_cell_embedding(X_batch)
-                zcs = self.condition_effect([C_batch,z_basal])
+                zcs = torch.zeros_like(z_basal)
+                shift = 0
+                for i, condition_size in enumerate(self.condition_sizes):
+                    C_batch_i = C_batch[:, shift:(shift+condition_size)]
+                    zcs += self.condition_effects[i]([C_batch_i,z_basal])
+                    shift += condition_size
                 zfs = torch.zeros_like(z_basal)
                 log_mu = self.decoder_log_mu([z_basal, zcs, zfs])
@@ -760,9 +774,9 @@ class SUREVAE(nn.Module):
                 pbar.set_postfix({'loss': str_loss})
                 pbar.update(1)'''
-    def fit(self, xs,
-            cs = None,
-            fs = None,
+    def fit(self, xs: np.array,
+            css: list = None,
+            fs: np.array = None,
             num_epochs: int = 100,
             learning_rate: float = 0.0001,
             use_mask: bool = False,
@@ -823,7 +837,8 @@ class SUREVAE(nn.Module):
         xs = self.preprocess(xs, threshold=threshold)
         xs = convert_to_tensor(xs, dtype=self.dtype, device='cpu')
-        if cs is not None:
+        if css is not None:
+            cs = np.hstack(css)
             cs = convert_to_tensor(cs, dtype=self.dtype, device='cpu')
         if fs is not None:
             fs = convert_to_tensor(fs, dtype=self.dtype, device='cpu')
@@ -879,7 +894,7 @@ class SUREVAE(nn.Module):
                 for batch_x, idx in dataloader:
                     batch_x = batch_x.to(self.get_device())
                     for loss_id in range(num_losses):
-                        if cs is None:
+                        if css is None:
                             batch_c = None
                         else:
                             batch_c = cs[idx].to(self.get_device())

{sure_tools-3.6.13 → sure_tools-3.7.0}/SURE_tools.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: SURE-tools
-Version: 3.6.13
+Version: 3.7.0
 Summary: Succinct Representation of Single Cells
 Home-page: https://github.com/ZengFLab/SURE
 Author: Feng Zeng

{sure_tools-3.6.13 → sure_tools-3.7.0}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
 setup(
     name='SURE-tools',
-    version='3.6.13',
+    version='3.7.0',
     description='Succinct Representation of Single Cells',
     long_description=long_description,
     long_description_content_type="text/markdown",