PyPI - SURE-tools - Versions diffs - 2.2.2__py3-none-any.whl → 2.4.3__py3-none-any.whl - Mend

SURE-tools 2.2.2py3-none-any.whl → 2.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of SURE-tools might be problematic. Click here for more details.

Files changed (12) hide show

SURE/DensityFlow.py +103 -74
SURE/{PerturbFlow.py → PerturbE.py} +51 -110
SURE/TranscriptomeDecoder.py +527 -0
SURE/__init__.py +5 -1
SURE/perturb/perturb.py +27 -1
SURE/utils/custom_mlp.py +39 -2
{sure_tools-2.2.2.dist-info → sure_tools-2.4.3.dist-info}/METADATA +1 -1
{sure_tools-2.2.2.dist-info → sure_tools-2.4.3.dist-info}/RECORD +12 -11
{sure_tools-2.2.2.dist-info → sure_tools-2.4.3.dist-info}/WHEEL +0 -0
{sure_tools-2.2.2.dist-info → sure_tools-2.4.3.dist-info}/entry_points.txt +0 -0
{sure_tools-2.2.2.dist-info → sure_tools-2.4.3.dist-info}/licenses/LICENSE +0 -0
{sure_tools-2.2.2.dist-info → sure_tools-2.4.3.dist-info}/top_level.txt +0 -0

SURE/DensityFlow.py CHANGED Viewed

@@ -59,12 +59,13 @@ class DensityFlow(nn.Module):
                  input_size: int,
                  codebook_size: int = 200,
                  cell_factor_size: int = 0,
+                 turn_off_cell_specific: bool = False,
                  supervised_mode: bool = False,
                  z_dim: int = 10,
                  z_dist: Literal['normal','studentt','laplacian','cauchy','gumbel'] = 'gumbel',
-                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'poisson',
+                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'multinomial',
                  inverse_dispersion: float = 10.0,
-                 use_zeroinflate: bool = True,
+                 use_zeroinflate: bool = False,
                  hidden_layers: list = [500],
                  hidden_layer_activation: Literal['relu','softplus','leakyrelu','linear'] = 'relu',
                  nn_dropout: float = 0.1,
@@ -102,6 +103,7 @@ class DensityFlow(nn.Module):
         else:
             self.use_bias = [not zero_bias] * self.cell_factor_size
         #self.use_bias = not zero_bias
+        self.turn_off_cell_specific = turn_off_cell_specific
         self.codebook_weights = None
@@ -203,27 +205,51 @@ class DensityFlow(nn.Module):
             self.cell_factor_effect = nn.ModuleList()
             for i in np.arange(self.cell_factor_size):
                 if self.use_bias[i]:
-                    self.cell_factor_effect.append(MLP(
-                        [self.latent_dim+1] + self.decoder_hidden_layers + [self.latent_dim],
-                        activation=activate_fct,
-                        output_activation=None,
-                        post_layer_fct=post_layer_fct,
-                        post_act_fct=post_act_fct,
-                        allow_broadcast=self.allow_broadcast,
-                        use_cuda=self.use_cuda,
+                    if self.turn_off_cell_specific:
+                        self.cell_factor_effect.append(MLP(
+                            [1] + self.decoder_hidden_layers + [self.latent_dim],
+                            activation=activate_fct,
+                            output_activation=None,
+                            post_layer_fct=post_layer_fct,
+                            post_act_fct=post_act_fct,
+                            allow_broadcast=self.allow_broadcast,
+                            use_cuda=self.use_cuda,
+                            )
+                        )
+                    else:
+                        self.cell_factor_effect.append(MLP(
+                            [self.latent_dim+1] + self.decoder_hidden_layers + [self.latent_dim],
+                            activation=activate_fct,
+                            output_activation=None,
+                            post_layer_fct=post_layer_fct,
+                            post_act_fct=post_act_fct,
+                            allow_broadcast=self.allow_broadcast,
+                            use_cuda=self.use_cuda,
+                            )
                         )
-                    )
                 else:
-                    self.cell_factor_effect.append(ZeroBiasMLP(
-                        [self.latent_dim+1] + self.decoder_hidden_layers + [self.latent_dim],
-                        activation=activate_fct,
-                        output_activation=None,
-                        post_layer_fct=post_layer_fct,
-                        post_act_fct=post_act_fct,
-                        allow_broadcast=self.allow_broadcast,
-                        use_cuda=self.use_cuda,
+                    if self.turn_off_cell_specific:
+                        self.cell_factor_effect.append(ZeroBiasMLP(
+                            [1] + self.decoder_hidden_layers + [self.latent_dim],
+                            activation=activate_fct,
+                            output_activation=None,
+                            post_layer_fct=post_layer_fct,
+                            post_act_fct=post_act_fct,
+                            allow_broadcast=self.allow_broadcast,
+                            use_cuda=self.use_cuda,
+                            )
+                        )
+                    else:
+                        self.cell_factor_effect.append(ZeroBiasMLP(
+                            [self.latent_dim+1] + self.decoder_hidden_layers + [self.latent_dim],
+                            activation=activate_fct,
+                            output_activation=None,
+                            post_layer_fct=post_layer_fct,
+                            post_act_fct=post_act_fct,
+                            allow_broadcast=self.allow_broadcast,
+                            use_cuda=self.use_cuda,
+                            )
                         )
-                    )
         self.decoder_concentrate = MLP(
                     [self.latent_dim] + self.decoder_hidden_layers + [self.input_size],
@@ -370,7 +396,8 @@ class DensityFlow(nn.Module):
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -454,7 +481,8 @@ class DensityFlow(nn.Module):
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -550,7 +578,8 @@ class DensityFlow(nn.Module):
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -656,7 +685,8 @@ class DensityFlow(nn.Module):
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -676,9 +706,17 @@ class DensityFlow(nn.Module):
         zus = None
         for i in np.arange(self.cell_factor_size):
             if i==0:
-                zus = self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
+                #if self.turn_off_cell_specific:
+                #    zus = self.cell_factor_effect[i](us[:,i].reshape(-1,1))
+                #else:
+                #    zus = self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
+                zus = self._cell_response(zns, i, us[:,i].reshape(-1,1))
             else:
-                zus = zus + self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
+                #if self.turn_off_cell_specific:
+                #    zus = zus + self.cell_factor_effect[i](us[:,i].reshape(-1,1))
+                #else:
+                #    zus = zus + self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
+                zus = zus + self._cell_response(zns, i, us[:,i].reshape(-1,1))
         return zus
     def _get_codebook_identity(self):
@@ -696,7 +734,7 @@ class DensityFlow(nn.Module):
         """
         Return the mean part of metacell codebook
         """
-        cb = self._get_metacell_coordinates()
+        cb = self._get_codebook()
         cb = tensor_to_numpy(cb)
         return cb
@@ -820,13 +858,15 @@ class DensityFlow(nn.Module):
             us_i = us[:,pert_idx].reshape(-1,1)
             # factor effect of xs
-            dzs0 = self.get_cell_response(xs, factor_idx=pert_idx, perturb=us_i)
+            dzs0 = self.get_cell_response(zs, perturb_idx=pert_idx, perturb_us=us_i)
             # perturbation effect
             ps = np.ones_like(us_i)
-            dzs = self.get_cell_response(xs, factor_idx=pert_idx, perturb=ps)
-            zs = zs + dzs0 + dzs
+            if np.sum(np.abs(ps-us_i))>=1:
+                dzs = self.get_cell_response(zs, perturb_idx=pert_idx, perturb_us=ps)
+                zs = zs + dzs0 + dzs
+            else:
+                zs = zs + dzs0
         if library_sizes is None:
             library_sizes = np.sum(xs, axis=1, keepdims=True)
@@ -840,47 +880,48 @@ class DensityFlow(nn.Module):
         return counts, zs
-    def _cell_response(self, xs, factor_idx, perturb):
+    def _cell_response(self, zs, perturb_idx, perturb):
         #zns,_ = self.encoder_zn(xs)
-        zns,_ = self._get_basal_embedding(xs)
+        #zns,_ = self._get_basal_embedding(xs)
+        zns = zs
         if perturb.ndim==2:
-            ms = self.cell_factor_effect[factor_idx]([zns, perturb])
+            if self.turn_off_cell_specific:
+                ms = self.cell_factor_effect[perturb_idx](perturb)
+            else:
+                ms = self.cell_factor_effect[perturb_idx]([zns, perturb])
         else:
-            ms = self.cell_factor_effect[factor_idx]([zns, perturb.reshape(-1,1)])
+            if self.turn_off_cell_specific:
+                ms = self.cell_factor_effect[perturb_idx](perturb.reshape(-1,1))
+            else:
+                ms = self.cell_factor_effect[perturb_idx]([zns, perturb.reshape(-1,1)])
         return ms
     def get_cell_response(self,
-                             xs,
-                             factor_idx,
-                             perturb,
+                             zs,
+                             perturb_idx,
+                             perturb_us,
                              batch_size: int = 1024):
         """
         Return cells' changes in the latent space induced by specific perturbation of a factor
         """
-        xs = self.preprocess(xs)
-        xs = convert_to_tensor(xs, device=self.get_device())
-        ps = convert_to_tensor(perturb, device=self.get_device())
-        dataset = CustomDataset2(xs,ps)
+        #xs = self.preprocess(xs)
+        zs = convert_to_tensor(zs, device=self.get_device())
+        ps = convert_to_tensor(perturb_us, device=self.get_device())
+        dataset = CustomDataset2(zs,ps)
         dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
         Z = []
         with tqdm(total=len(dataloader), desc='', unit='batch') as pbar:
-            for X_batch, P_batch, _ in dataloader:
-                zns = self._cell_response(X_batch, factor_idx, P_batch)
+            for Z_batch, P_batch, _ in dataloader:
+                zns = self._cell_response(Z_batch, perturb_idx, P_batch)
                 Z.append(tensor_to_numpy(zns))
                 pbar.update(1)
         Z = np.concatenate(Z)
         return Z
-    def get_metacell_response(self, factor_idx, perturb):
-        zs = self._get_codebook()
-        ps = convert_to_tensor(perturb, device=self.get_device())
-        ms = self.cell_factor_effect[factor_idx]([zs,ps])
-        return tensor_to_numpy(ms)
     def _get_expression_response(self, delta_zs):
         return self.decoder_concentrate(delta_zs)
@@ -905,36 +946,28 @@ class DensityFlow(nn.Module):
         R = np.concatenate(R)
         return R
-    def _count(self,concentrate, library_size=None):
+    def _count(self, concentrate, library_size=None):
         if self.loss_func == 'bernoulli':
             #counts = self.sigmoid(concentrate)
             counts = dist.Bernoulli(logits=concentrate).to_event(1).mean
+        elif self.loss_func == 'multinomial':
+            theta = dist.Multinomial(total_count=int(1e8), logits=concentrate).mean
+            counts = theta * library_size
         else:
             rate = concentrate.exp()
             theta = dist.DirichletMultinomial(total_count=1, concentration=rate).mean
             counts = theta * library_size
-            #counts = dist.Poisson(rate=rate).to_event(1).mean
-        return counts
-    def _count_sample(self,concentrate):
-        if self.loss_func == 'bernoulli':
-            logits = concentrate
-            counts = dist.Bernoulli(logits=logits).to_event(1).sample()
-        else:
-            counts = self._count(concentrate=concentrate)
-            counts = dist.Poisson(rate=counts).to_event(1).sample()
         return counts
     def get_counts(self, zs, library_sizes,
-                        batch_size: int = 1024,
-                        use_sampler: bool = False):
+                        batch_size: int = 1024):
         zs = convert_to_tensor(zs, device=self.get_device())
         if type(library_sizes) == list:
-            library_sizes = np.array(library_sizes).view(-1,1)
+            library_sizes = np.array(library_sizes).reshape(-1,1)
         elif len(library_sizes.shape)==1:
-            library_sizes = library_sizes.view(-1,1)
+            library_sizes = library_sizes.reshape(-1,1)
         ls = convert_to_tensor(library_sizes, device=self.get_device())
         dataset = CustomDataset2(zs,ls)
@@ -944,10 +977,7 @@ class DensityFlow(nn.Module):
         with tqdm(total=len(dataloader), desc='', unit='batch') as pbar:
             for Z_batch, L_batch, _ in dataloader:
                 concentrate = self._get_expression_response(Z_batch)
-                if use_sampler:
-                    counts = self._count_sample(concentrate)
-                else:
-                    counts = self._count(concentrate, L_batch)
+                counts = self._count(concentrate, L_batch)
                 E.append(tensor_to_numpy(counts))
                 pbar.update(1)
@@ -1317,7 +1347,7 @@ def main():
     cell_factor_size = 0 if us is None else us.shape[1]
     ###########################################
-    DensityFlow = DensityFlow(
+    df = DensityFlow(
         input_size=input_size,
         cell_factor_size=cell_factor_size,
         inverse_dispersion=args.inverse_dispersion,
@@ -1336,7 +1366,7 @@ def main():
         dtype=dtype,
     )
-    DensityFlow.fit(xs, us=us,
+    df.fit(xs, us=us,
              num_epochs=args.num_epochs,
              learning_rate=args.learning_rate,
              batch_size=args.batch_size,
@@ -1348,12 +1378,11 @@ def main():
     if args.save_model is not None:
         if args.save_model.endswith('gz'):
-            DensityFlow.save_model(DensityFlow, args.save_model, compression=True)
+            DensityFlow.save_model(df, args.save_model, compression=True)
         else:
-            DensityFlow.save_model(DensityFlow, args.save_model)
+            DensityFlow.save_model(df, args.save_model)
 if __name__ == "__main__":
     main()

SURE-tools 2.2.2__py3-none-any.whl → 2.4.3__py3-none-any.whl

Potentially problematic release.

SURE-tools 2.2.2py3-none-any.whl → 2.4.3py3-none-any.whl