PyPI - SURE-tools - Versions diffs - 2.1.87__py3-none-any.whl → 2.4.3__py3-none-any.whl - Mend

SURE-tools 2.1.87py3-none-any.whl → 2.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of SURE-tools might be problematic. Click here for more details.

Files changed (15) hide show

SURE/DensityFlow.py +1388 -0
SURE/{PerturbFlow.py → PerturbE.py} +51 -122
SURE/SURE.py +6 -6
SURE/TranscriptomeDecoder.py +527 -0
SURE/__init__.py +7 -3
SURE/flow/flow_stats.py +12 -0
SURE/perturb/perturb.py +27 -1
SURE/utils/custom_mlp.py +39 -2
{sure_tools-2.1.87.dist-info → sure_tools-2.4.3.dist-info}/METADATA +1 -1
sure_tools-2.4.3.dist-info/RECORD +27 -0
sure_tools-2.1.87.dist-info/RECORD +0 -25
{sure_tools-2.1.87.dist-info → sure_tools-2.4.3.dist-info}/WHEEL +0 -0
{sure_tools-2.1.87.dist-info → sure_tools-2.4.3.dist-info}/entry_points.txt +0 -0
{sure_tools-2.1.87.dist-info → sure_tools-2.4.3.dist-info}/licenses/LICENSE +0 -0
{sure_tools-2.1.87.dist-info → sure_tools-2.4.3.dist-info}/top_level.txt +0 -0

SURE/{PerturbFlow.py → PerturbE.py} RENAMED Viewed

@@ -10,7 +10,7 @@ from torch.distributions.utils import logits_to_probs, probs_to_logits, clamp_pr
 from torch.distributions import constraints
 from torch.distributions.transforms import SoftmaxTransform
-from .utils.custom_mlp import MLP, Exp, ZeroBiasMLP
+from .utils.custom_mlp import MLP, Exp, ZeroBiasMLP2
 from .utils.utils import CustomDataset, CustomDataset2, CustomDataset4, tensor_to_numpy, convert_to_tensor
@@ -54,7 +54,7 @@ def set_random_seed(seed):
     # Set seed for Pyro
     pyro.set_rng_seed(seed)
-class PerturbFlow(nn.Module):
+class PerturbE(nn.Module):
     def __init__(self,
                  input_size: int,
                  codebook_size: int = 200,
@@ -62,10 +62,10 @@ class PerturbFlow(nn.Module):
                  supervised_mode: bool = False,
                  z_dim: int = 10,
                  z_dist: Literal['normal','studentt','laplacian','cauchy','gumbel'] = 'gumbel',
-                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'poisson',
+                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'multinomial',
                  inverse_dispersion: float = 10.0,
                  use_zeroinflate: bool = False,
-                 hidden_layers: list = [300],
+                 hidden_layers: list = [500],
                  hidden_layer_activation: Literal['relu','softplus','leakyrelu','linear'] = 'relu',
                  nn_dropout: float = 0.1,
                  post_layer_fct: list = ['layernorm'],
@@ -73,8 +73,6 @@ class PerturbFlow(nn.Module):
                  config_enum: str = 'parallel',
                  use_cuda: bool = True,
                  seed: int = 42,
-                 zero_bias: bool|list = True,
-                 enumrate: bool = False,
                  dtype = torch.float32, # type: ignore
                  ):
         super().__init__()
@@ -98,12 +96,6 @@ class PerturbFlow(nn.Module):
         self.post_layer_fct = post_layer_fct
         self.post_act_fct = post_act_fct
         self.hidden_layer_activation = hidden_layer_activation
-        if type(zero_bias) == list:
-            self.use_bias = [not x for x in zero_bias]
-        else:
-            self.use_bias = [not zero_bias] * self.cell_factor_size
-        #self.use_bias = not zero_bias
-        self.enumrate = enumrate
         self.codebook_weights = None
@@ -202,29 +194,14 @@ class PerturbFlow(nn.Module):
         )
         if self.cell_factor_size>0:
-            self.cell_factor_effect = nn.ModuleList()
-            for i in np.arange(self.cell_factor_size):
-                if self.use_bias[i]:
-                    self.cell_factor_effect.append(MLP(
-                        [self.latent_dim+1] + self.decoder_hidden_layers + [self.latent_dim],
+            self.cell_factor_effect = ZeroBiasMLP2(
+                        [self.cell_factor_size] + self.decoder_hidden_layers + [self.latent_dim],
                         activation=activate_fct,
                         output_activation=None,
                         post_layer_fct=post_layer_fct,
                         post_act_fct=post_act_fct,
                         allow_broadcast=self.allow_broadcast,
                         use_cuda=self.use_cuda,
-                        )
-                    )
-                else:
-                    self.cell_factor_effect.append(ZeroBiasMLP(
-                        [self.latent_dim+1] + self.decoder_hidden_layers + [self.latent_dim],
-                        activation=activate_fct,
-                        output_activation=None,
-                        post_layer_fct=post_layer_fct,
-                        post_act_fct=post_act_fct,
-                        allow_broadcast=self.allow_broadcast,
-                        use_cuda=self.use_cuda,
-                        )
                     )
         self.decoder_concentrate = MLP(
@@ -310,7 +287,7 @@ class PerturbFlow(nn.Module):
         return xs
     def model1(self, xs):
-        pyro.module('PerturbFlow', self)
+        pyro.module('PerturbE', self)
         eps = torch.finfo(xs.dtype).eps
         batch_size = xs.size(0)
@@ -372,7 +349,8 @@ class PerturbFlow(nn.Module):
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -389,7 +367,7 @@ class PerturbFlow(nn.Module):
             ns = pyro.sample('n', dist.OneHotCategorical(logits=alpha))
     def model2(self, xs, us=None):
-        pyro.module('PerturbFlow', self)
+        pyro.module('PerturbE', self)
         eps = torch.finfo(xs.dtype).eps
         batch_size = xs.size(0)
@@ -431,12 +409,7 @@ class PerturbFlow(nn.Module):
                 zns = pyro.sample('zn', dist.Gumbel(zn_loc, zn_scale).to_event(1))
             if self.cell_factor_size>0:
-                if self.enumrate:
-                    idx = torch.argmax(ns, dim=1)
-                    zn_loc = acs_loc[idx]
-                    zus = self._total_effects(zn_loc, us)
-                else:
-                    zus = self._total_effects(zns, us)
+                zus = self._perturb_effects(us)
                 zs = zns+zus
             else:
                 zs = zns
@@ -461,7 +434,8 @@ class PerturbFlow(nn.Module):
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -478,7 +452,7 @@ class PerturbFlow(nn.Module):
             ns = pyro.sample('n', dist.OneHotCategorical(logits=alpha))
     def model3(self, xs, ys, embeds=None):
-        pyro.module('PerturbFlow', self)
+        pyro.module('PerturbE', self)
         eps = torch.finfo(xs.dtype).eps
         batch_size = xs.size(0)
@@ -557,7 +531,8 @@ class PerturbFlow(nn.Module):
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -574,7 +549,7 @@ class PerturbFlow(nn.Module):
                 zns = embeds
     def model4(self, xs, us, ys, embeds=None):
-        pyro.module('PerturbFlow', self)
+        pyro.module('PerturbE', self)
         eps = torch.finfo(xs.dtype).eps
         batch_size = xs.size(0)
@@ -638,12 +613,7 @@ class PerturbFlow(nn.Module):
                 #        zus = self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
                 #    else:
                 #        zus = zus + self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
-                if self.enumrate:
-                    idx = torch.argmax(ns, dim=1)
-                    zn_loc = acs_loc[idx]
-                    zus = self._total_effects(zn_loc, us)
-                else:
-                    zus = self._total_effects(zns, us)
+                zus = self._perturb_effects(us)
                 zs = zns+zus
             else:
                 zs = zns
@@ -668,7 +638,8 @@ class PerturbFlow(nn.Module):
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -684,13 +655,8 @@ class PerturbFlow(nn.Module):
             else:
                 zns = embeds
-    def _total_effects(self, zns, us):
-        zus = None
-        for i in np.arange(self.cell_factor_size):
-            if i==0:
-                zus = self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
-            else:
-                zus = zus + self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
+    def _perturb_effects(self, us):
+        zus = self._cell_response(us)
         return zus
     def _get_codebook_identity(self):
@@ -708,7 +674,7 @@ class PerturbFlow(nn.Module):
         """
         Return the mean part of metacell codebook
         """
-        cb = self._get_metacell_coordinates()
+        cb = self._get_codebook()
         cb = tensor_to_numpy(cb)
         return cb
@@ -822,23 +788,13 @@ class PerturbFlow(nn.Module):
         A = np.concatenate(A)
         return A
-    def predict(self, xs, us, perturbs_predict:list, perturbs_reference:list, library_sizes=None):
+    def predict(self, xs, perturbs_us, library_sizes=None):
         perturbs_reference = np.array(perturbs_reference)
         # basal embedding
         zs = self.get_basal_embedding(xs)
-        for pert in perturbs_predict:
-            pert_idx = int(np.where(perturbs_reference==pert)[0])
-            us_i = us[:,pert_idx].reshape(-1,1)
-            # factor effect of xs
-            dzs0 = self.get_cell_response(xs, factor_idx=pert_idx, perturb=us_i)
-            # perturbation effect
-            ps = np.ones_like(us_i)
-            dzs = self.get_cell_response(xs, factor_idx=pert_idx, perturb=ps)
-            zs = zs + dzs0 + dzs
+        dzs = self.get_cell_response(perturbs_us)
+        zs = zs + dzs
         if library_sizes is None:
             library_sizes = np.sum(xs, axis=1, keepdims=True)
@@ -852,47 +808,32 @@ class PerturbFlow(nn.Module):
         return counts, zs
-    def _cell_response(self, xs, factor_idx, perturb):
-        #zns,_ = self.encoder_zn(xs)
-        zns,_ = self._get_basal_embedding(xs)
-        if perturb.ndim==2:
-            ms = self.cell_factor_effect[factor_idx]([zns, perturb])
-        else:
-            ms = self.cell_factor_effect[factor_idx]([zns, perturb.reshape(-1,1)])
+    def _cell_response(self, perturb):
+        ms = self.cell_factor_effect(perturb)
         return ms
     def get_cell_response(self,
-                             xs,
-                             factor_idx,
-                             perturb,
+                             perturb_us,
                              batch_size: int = 1024):
         """
         Return cells' changes in the latent space induced by specific perturbation of a factor
         """
-        xs = self.preprocess(xs)
-        xs = convert_to_tensor(xs, device=self.get_device())
-        ps = convert_to_tensor(perturb, device=self.get_device())
-        dataset = CustomDataset2(xs,ps)
+        #xs = self.preprocess(xs)
+        ps = convert_to_tensor(perturb_us, device=self.get_device())
+        dataset = CustomDataset(ps)
         dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
         Z = []
         with tqdm(total=len(dataloader), desc='', unit='batch') as pbar:
-            for X_batch, P_batch, _ in dataloader:
-                zns = self._cell_response(X_batch, factor_idx, P_batch)
+            for P_batch, _ in dataloader:
+                zns = self._cell_response(P_batch)
                 Z.append(tensor_to_numpy(zns))
                 pbar.update(1)
         Z = np.concatenate(Z)
         return Z
-    def get_metacell_response(self, factor_idx, perturb):
-        zs = self._get_codebook()
-        ps = convert_to_tensor(perturb, device=self.get_device())
-        ms = self.cell_factor_effect[factor_idx]([zs,ps])
-        return tensor_to_numpy(ms)
     def _get_expression_response(self, delta_zs):
         return self.decoder_concentrate(delta_zs)
@@ -917,36 +858,28 @@ class PerturbFlow(nn.Module):
         R = np.concatenate(R)
         return R
-    def _count(self,concentrate, library_size=None):
+    def _count(self, concentrate, library_size=None):
         if self.loss_func == 'bernoulli':
             #counts = self.sigmoid(concentrate)
             counts = dist.Bernoulli(logits=concentrate).to_event(1).mean
+        elif self.loss_func == 'multinomial':
+            theta = dist.Multinomial(total_count=int(1e8), logits=concentrate).mean
+            counts = theta * library_size
         else:
             rate = concentrate.exp()
             theta = dist.DirichletMultinomial(total_count=1, concentration=rate).mean
             counts = theta * library_size
-            #counts = dist.Poisson(rate=rate).to_event(1).mean
-        return counts
-    def _count_sample(self,concentrate):
-        if self.loss_func == 'bernoulli':
-            logits = concentrate
-            counts = dist.Bernoulli(logits=logits).to_event(1).sample()
-        else:
-            counts = self._count(concentrate=concentrate)
-            counts = dist.Poisson(rate=counts).to_event(1).sample()
         return counts
     def get_counts(self, zs, library_sizes,
-                        batch_size: int = 1024,
-                        use_sampler: bool = False):
+                        batch_size: int = 1024):
         zs = convert_to_tensor(zs, device=self.get_device())
         if type(library_sizes) == list:
-            library_sizes = np.array(library_sizes).view(-1,1)
+            library_sizes = np.array(library_sizes).reshape(-1,1)
         elif len(library_sizes.shape)==1:
-            library_sizes = library_sizes.view(-1,1)
+            library_sizes = library_sizes.reshape(-1,1)
         ls = convert_to_tensor(library_sizes, device=self.get_device())
         dataset = CustomDataset2(zs,ls)
@@ -956,10 +889,7 @@ class PerturbFlow(nn.Module):
         with tqdm(total=len(dataloader), desc='', unit='batch') as pbar:
             for Z_batch, L_batch, _ in dataloader:
                 concentrate = self._get_expression_response(Z_batch)
-                if use_sampler:
-                    counts = self._count_sample(concentrate)
-                else:
-                    counts = self._count(concentrate, L_batch)
+                counts = self._count(concentrate, L_batch)
                 E.append(tensor_to_numpy(counts))
                 pbar.update(1)
@@ -982,7 +912,7 @@ class PerturbFlow(nn.Module):
             us = None,
             ys = None,
             zs = None,
-            num_epochs: int = 200,
+            num_epochs: int = 500,
             learning_rate: float = 0.0001,
             batch_size: int = 256,
             algo: Literal['adam','rmsprop','adamw'] = 'adam',
@@ -993,7 +923,7 @@ class PerturbFlow(nn.Module):
             threshold: int = 0,
             use_jax: bool = True):
         """
-        Train the PerturbFlow model.
+        Train the PerturbE model.
         Parameters
         ----------
@@ -1019,7 +949,7 @@ class PerturbFlow(nn.Module):
             Parameter for optimization.
         use_jax
             If toggled on, Jax will be used for speeding up. CAUTION: This will raise errors because of unknown reasons when it is called in
-            the Python script or Jupyter notebook. It is OK if it is used when runing PerturbFlow in the shell command.
+            the Python script or Jupyter notebook. It is OK if it is used when runing PerturbE in the shell command.
         """
         xs = self.preprocess(xs, threshold=threshold)
         xs = convert_to_tensor(xs, dtype=self.dtype, device=self.get_device())
@@ -1137,12 +1067,12 @@ class PerturbFlow(nn.Module):
 EXAMPLE_RUN = (
-    "example run: PerturbFlow --help"
+    "example run: PerturbE --help"
 )
 def parse_args():
     parser = argparse.ArgumentParser(
-        description="PerturbFlow\n{}".format(EXAMPLE_RUN))
+        description="PerturbE\n{}".format(EXAMPLE_RUN))
     parser.add_argument(
         "--cuda", action="store_true", help="use GPU(s) to speed up training"
@@ -1329,7 +1259,7 @@ def main():
     cell_factor_size = 0 if us is None else us.shape[1]
     ###########################################
-    perturbflow = PerturbFlow(
+    perturbe = PerturbE(
         input_size=input_size,
         cell_factor_size=cell_factor_size,
         inverse_dispersion=args.inverse_dispersion,
@@ -1348,7 +1278,7 @@ def main():
         dtype=dtype,
     )
-    perturbflow.fit(xs, us=us,
+    perturbe.fit(xs, us=us,
              num_epochs=args.num_epochs,
              learning_rate=args.learning_rate,
              batch_size=args.batch_size,
@@ -1360,12 +1290,11 @@ def main():
     if args.save_model is not None:
         if args.save_model.endswith('gz'):
-            PerturbFlow.save_model(perturbflow, args.save_model, compression=True)
+            PerturbE.save_model(perturbe, args.save_model, compression=True)
         else:
-            PerturbFlow.save_model(perturbflow, args.save_model)
+            PerturbE.save_model(perturbe, args.save_model)
 if __name__ == "__main__":
     main()

SURE/SURE.py CHANGED Viewed

@@ -99,17 +99,17 @@ class SURE(nn.Module):
                  cell_factor_size: int = 0,
                  supervised_mode: bool = False,
                  z_dim: int = 10,
-                 z_dist: Literal['normal','studentt','laplacian','cauchy','gumbel'] = 'normal',
-                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'negbinomial',
+                 z_dist: Literal['normal','studentt','laplacian','cauchy','gumbel'] = 'gumbel',
+                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'poisson',
                  inverse_dispersion: float = 10.0,
                  use_zeroinflate: bool = True,
-                 hidden_layers: list = [300],
+                 hidden_layers: list = [500],
                  hidden_layer_activation: Literal['relu','softplus','leakyrelu','linear'] = 'relu',
                  nn_dropout: float = 0.1,
                  post_layer_fct: list = ['layernorm'],
                  post_act_fct: list = None,
                  config_enum: str = 'parallel',
-                 use_cuda: bool = False,
+                 use_cuda: bool = True,
                  seed: int = 42,
                  dtype = torch.float32, # type: ignore
                  ):
@@ -817,7 +817,7 @@ class SURE(nn.Module):
             us = None,
             ys = None,
             zs = None,
-            num_epochs: int = 200,
+            num_epochs: int = 500,
             learning_rate: float = 0.0001,
             batch_size: int = 256,
             algo: Literal['adam','rmsprop','adamw'] = 'adam',
@@ -826,7 +826,7 @@ class SURE(nn.Module):
             decay_rate: float = 0.9,
             config_enum: str = 'parallel',
             threshold: int = 0,
-            use_jax: bool = False):
+            use_jax: bool = True):
         """
         Train the SURE model.

SURE-tools 2.1.87__py3-none-any.whl → 2.4.3__py3-none-any.whl

Potentially problematic release.

SURE-tools 2.1.87py3-none-any.whl → 2.4.3py3-none-any.whl