PyPI - SURE-tools - Versions diffs - 2.4.17__py3-none-any.whl → 2.4.32__py3-none-any.whl - Mend

SURE-tools 2.4.17py3-none-any.whl → 2.4.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of SURE-tools might be problematic. Click here for more details.

Files changed (12) hide show

SURE/DensityFlow.py +85 -68
SURE/DensityFlow2.py +1422 -0
SURE/DensityFlowLinear.py +1413 -0
SURE/PerturbationAwareDecoder.py +737 -0
SURE/VirtualCellDecoder.py +0 -1
SURE/__init__.py +7 -2
{sure_tools-2.4.17.dist-info → sure_tools-2.4.32.dist-info}/METADATA +1 -1
{sure_tools-2.4.17.dist-info → sure_tools-2.4.32.dist-info}/RECORD +12 -9
{sure_tools-2.4.17.dist-info → sure_tools-2.4.32.dist-info}/WHEEL +0 -0
{sure_tools-2.4.17.dist-info → sure_tools-2.4.32.dist-info}/entry_points.txt +0 -0
{sure_tools-2.4.17.dist-info → sure_tools-2.4.32.dist-info}/licenses/LICENSE +0 -0
{sure_tools-2.4.17.dist-info → sure_tools-2.4.32.dist-info}/top_level.txt +0 -0

SURE/DensityFlow.py CHANGED Viewed

@@ -57,16 +57,16 @@ def set_random_seed(seed):
 class DensityFlow(nn.Module):
     def __init__(self,
                  input_size: int,
-                 codebook_size: int = 200,
+                 codebook_size: int = 100,
                  cell_factor_size: int = 0,
                  turn_off_cell_specific: bool = False,
                  supervised_mode: bool = False,
-                 z_dim: int = 10,
+                 z_dim: int = 50,
                  z_dist: Literal['normal','studentt','laplacian','cauchy','gumbel'] = 'gumbel',
-                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'multinomial',
-                 inverse_dispersion: float = 10.0,
+                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'negbinomial',
+                 dispersion: float = 8.0,
                  use_zeroinflate: bool = False,
-                 hidden_layers: list = [500],
+                 hidden_layers: list = [1024],
                  hidden_layer_activation: Literal['relu','softplus','leakyrelu','linear'] = 'relu',
                  nn_dropout: float = 0.1,
                  post_layer_fct: list = ['layernorm'],
@@ -81,7 +81,7 @@ class DensityFlow(nn.Module):
         self.input_size = input_size
         self.cell_factor_size = cell_factor_size
-        self.inverse_dispersion = inverse_dispersion
+        self.dispersion = dispersion
         self.latent_dim = z_dim
         self.hidden_layers = hidden_layers
         self.decoder_hidden_layers = hidden_layers[::-1]
@@ -109,6 +109,13 @@ class DensityFlow(nn.Module):
         set_random_seed(seed)
         self.setup_networks()
+        print(f"🧬 DensityFlow Initialized:")
+        print(f"   - Latent Dimension: {self.latent_dim}")
+        print(f"   - Gene Dimension: {self.input_size}")
+        print(f"   - Hidden Dimensions: {self.hidden_layers}")
+        print(f"   - Device: {self.get_device()}")
+        print(f"   - Parameters: {sum(p.numel() for p in self.parameters()):,}")
     def setup_networks(self):
         latent_dim = self.latent_dim
@@ -251,7 +258,7 @@ class DensityFlow(nn.Module):
                             )
                         )
-        self.decoder_concentrate = MLP(
+        self.decoder_log_mu = MLP(
                     [self.latent_dim] + self.decoder_hidden_layers + [self.input_size],
                     activation=activate_fct,
                     output_activation=None,
@@ -341,8 +348,8 @@ class DensityFlow(nn.Module):
         self.options = dict(dtype=xs.dtype, device=xs.device)
         if self.loss_func=='negbinomial':
-            total_count = pyro.param("inverse_dispersion", self.inverse_dispersion *
-                                     xs.new_ones(self.input_size), constraint=constraints.positive)
+            dispersion = pyro.param("dispersion", self.dispersion *
+                                            xs.new_ones(self.input_size), constraint=constraints.positive)
         if self.use_zeroinflate:
             gate_logits = pyro.param("dropout_rate", xs.new_zeros(self.input_size))
@@ -376,28 +383,32 @@ class DensityFlow(nn.Module):
                 zns = pyro.sample('zn', dist.Gumbel(zn_loc, zn_scale).to_event(1))
             zs = zns
-            concentrate = self.decoder_concentrate(zs)
+            log_mu = self.decoder_log_mu(zs)
             if self.loss_func in ['bernoulli']:
-                log_theta = concentrate
+                log_theta = log_mu
+            elif self.loss_func == 'negbinomial':
+                mu = log_mu.exp()
             else:
-                rate = concentrate.exp()
+                rate = log_mu.exp()
                 theta = dist.DirichletMultinomial(total_count=1, concentration=rate).mean
                 if self.loss_func == 'poisson':
                     rate = theta * torch.sum(xs, dim=1, keepdim=True)
             if self.loss_func == 'negbinomial':
+                logits = (mu.log()-dispersion.log()).clamp(min=-15, max=15)
                 if self.use_zeroinflate:
-                    pyro.sample('x', dist.ZeroInflatedDistribution(dist.NegativeBinomial(total_count=total_count, probs=theta),gate_logits=gate_logits).to_event(1), obs=xs)
+                    pyro.sample('x', dist.ZeroInflatedDistribution(dist.NegativeBinomial(total_count=dispersion,
+                                                                                         logits=logits),gate_logits=gate_logits).to_event(1), obs=xs)
                 else:
-                    pyro.sample('x', dist.NegativeBinomial(total_count=total_count, probs=theta).to_event(1), obs=xs)
+                    pyro.sample('x', dist.NegativeBinomial(total_count=dispersion,
+                                                           logits=logits).to_event(1), obs=xs)
             elif self.loss_func == 'poisson':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Poisson(rate=rate),gate_logits=gate_logits).to_event(1), obs=xs.round())
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -421,8 +432,8 @@ class DensityFlow(nn.Module):
         self.options = dict(dtype=xs.dtype, device=xs.device)
         if self.loss_func=='negbinomial':
-            total_count = pyro.param("inverse_dispersion", self.inverse_dispersion *
-                                     xs.new_ones(self.input_size), constraint=constraints.positive)
+            dispersion = pyro.param("dispersion", self.dispersion *
+                                            xs.new_ones(self.input_size), constraint=constraints.positive)
         if self.use_zeroinflate:
             gate_logits = pyro.param("dropout_rate", xs.new_zeros(self.input_size))
@@ -461,28 +472,30 @@ class DensityFlow(nn.Module):
             else:
                 zs = zns
-            concentrate = self.decoder_concentrate(zs)
+            log_mu = self.decoder_log_mu(zs)
             if self.loss_func in ['bernoulli']:
-                log_theta = concentrate
+                log_theta = log_mu
+            elif self.loss_func == 'negbinomial':
+                mu = log_mu.exp()
             else:
-                rate = concentrate.exp()
+                rate = log_mu.exp()
                 theta = dist.DirichletMultinomial(total_count=1, concentration=rate).mean
                 if self.loss_func == 'poisson':
                     rate = theta * torch.sum(xs, dim=1, keepdim=True)
             if self.loss_func == 'negbinomial':
+                logits = (mu.log()-dispersion.log()).clamp(min=-15, max=15)
                 if self.use_zeroinflate:
-                    pyro.sample('x', dist.ZeroInflatedDistribution(dist.NegativeBinomial(total_count=total_count, probs=theta),gate_logits=gate_logits).to_event(1), obs=xs)
+                    pyro.sample('x', dist.ZeroInflatedDistribution(dist.NegativeBinomial(total_count=dispersion, logits=logits),gate_logits=gate_logits).to_event(1), obs=xs)
                 else:
-                    pyro.sample('x', dist.NegativeBinomial(total_count=total_count, probs=theta).to_event(1), obs=xs)
+                    pyro.sample('x', dist.NegativeBinomial(total_count=dispersion, logits=logits).to_event(1), obs=xs)
             elif self.loss_func == 'poisson':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Poisson(rate=rate),gate_logits=gate_logits).to_event(1), obs=xs.round())
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -506,8 +519,8 @@ class DensityFlow(nn.Module):
         self.options = dict(dtype=xs.dtype, device=xs.device)
         if self.loss_func=='negbinomial':
-            total_count = pyro.param("inverse_dispersion", self.inverse_dispersion *
-                                     xs.new_ones(self.input_size), constraint=constraints.positive)
+            dispersion = pyro.param("dispersion", self.dispersion *
+                                            xs.new_ones(self.input_size), constraint=constraints.positive)
         if self.use_zeroinflate:
             gate_logits = pyro.param("dropout_rate", xs.new_zeros(self.input_size))
@@ -558,28 +571,31 @@ class DensityFlow(nn.Module):
             zs = zns
-            concentrate = self.decoder_concentrate(zs)
+            log_mu = self.decoder_log_mu(zs)
             if self.loss_func in ['bernoulli']:
-                log_theta = concentrate
+                log_theta = log_mu
+            elif self.loss_func in ['negbinomial']:
+                mu = log_mu.exp()
             else:
-                rate = concentrate.exp()
+                rate = log_mu.exp()
                 theta = dist.DirichletMultinomial(total_count=1, concentration=rate).mean
                 if self.loss_func == 'poisson':
                     rate = theta * torch.sum(xs, dim=1, keepdim=True)
             if self.loss_func == 'negbinomial':
+                logits = (mu.log()-dispersion.log()).clamp(min=-15, max=15)
                 if self.use_zeroinflate:
-                    pyro.sample('x', dist.ZeroInflatedDistribution(dist.NegativeBinomial(total_count=total_count, probs=theta),gate_logits=gate_logits).to_event(1), obs=xs)
+                    pyro.sample('x', dist.ZeroInflatedDistribution(dist.NegativeBinomial(total_count=dispersion,
+                                                                                         logits=logits),gate_logits=gate_logits).to_event(1), obs=xs)
                 else:
-                    pyro.sample('x', dist.NegativeBinomial(total_count=total_count, probs=theta).to_event(1), obs=xs)
+                    pyro.sample('x', dist.NegativeBinomial(total_count=dispersion, logits=logits).to_event(1), obs=xs)
             elif self.loss_func == 'poisson':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Poisson(rate=rate),gate_logits=gate_logits).to_event(1), obs=xs.round())
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -603,8 +619,8 @@ class DensityFlow(nn.Module):
         self.options = dict(dtype=xs.dtype, device=xs.device)
         if self.loss_func=='negbinomial':
-            total_count = pyro.param("inverse_dispersion", self.inverse_dispersion *
-                                     xs.new_ones(self.input_size), constraint=constraints.positive)
+            dispersion = pyro.param("dispersion", self.dispersion *
+                                            xs.new_ones(self.input_size), constraint=constraints.positive)
         if self.use_zeroinflate:
             gate_logits = pyro.param("dropout_rate", xs.new_zeros(self.input_size))
@@ -665,28 +681,31 @@ class DensityFlow(nn.Module):
             else:
                 zs = zns
-            concentrate = self.decoder_concentrate(zs)
+            log_mu = self.decoder_log_mu(zs)
             if self.loss_func in ['bernoulli']:
-                log_theta = concentrate
+                log_theta = log_mu
+            elif self.loss_func in ['negbinomial']:
+                mu = log_mu.exp()
             else:
-                rate = concentrate.exp()
+                rate = log_mu.exp()
                 theta = dist.DirichletMultinomial(total_count=1, concentration=rate).mean
                 if self.loss_func == 'poisson':
                     rate = theta * torch.sum(xs, dim=1, keepdim=True)
             if self.loss_func == 'negbinomial':
+                logits = (mu.log()-dispersion.log()).clamp(min=-15, max=15)
                 if self.use_zeroinflate:
-                    pyro.sample('x', dist.ZeroInflatedDistribution(dist.NegativeBinomial(total_count=total_count, probs=theta),gate_logits=gate_logits).to_event(1), obs=xs)
+                    pyro.sample('x', dist.ZeroInflatedDistribution(dist.NegativeBinomial(total_count=dispersion,
+                                                                                         logits=logits),gate_logits=gate_logits).to_event(1), obs=xs)
                 else:
-                    pyro.sample('x', dist.NegativeBinomial(total_count=total_count, probs=theta).to_event(1), obs=xs)
+                    pyro.sample('x', dist.NegativeBinomial(total_count=dispersion, logits=logits).to_event(1), obs=xs)
             elif self.loss_func == 'poisson':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Poisson(rate=rate),gate_logits=gate_logits).to_event(1), obs=xs.round())
                 else:
                     pyro.sample('x', dist.Poisson(rate=rate).to_event(1), obs=xs.round())
             elif self.loss_func == 'multinomial':
-                #pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
-                pyro.sample('x', dist.Multinomial(total_count=int(1e8), logits=concentrate), obs=xs)
+                pyro.sample('x', dist.Multinomial(total_count=int(1e8), probs=theta), obs=xs)
             elif self.loss_func == 'bernoulli':
                 if self.use_zeroinflate:
                     pyro.sample('x', dist.ZeroInflatedDistribution(dist.Bernoulli(logits=log_theta),gate_logits=gate_logits).to_event(1), obs=xs)
@@ -710,13 +729,13 @@ class DensityFlow(nn.Module):
                 #    zus = self.cell_factor_effect[i](us[:,i].reshape(-1,1))
                 #else:
                 #    zus = self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
-                zus = self._cell_response(zns, i, us[:,i].reshape(-1,1))
+                zus = self._cell_shift(zns, i, us[:,i].reshape(-1,1))
             else:
                 #if self.turn_off_cell_specific:
                 #    zus = zus + self.cell_factor_effect[i](us[:,i].reshape(-1,1))
                 #else:
                 #    zus = zus + self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
-                zus = zus + self._cell_response(zns, i, us[:,i].reshape(-1,1))
+                zus = zus + self._cell_shift(zns, i, us[:,i].reshape(-1,1))
         return zus
     def _get_codebook_identity(self):
@@ -858,12 +877,12 @@ class DensityFlow(nn.Module):
             us_i = us[:,pert_idx].reshape(-1,1)
             # factor effect of xs
-            dzs0 = self.get_cell_response(zs, perturb_idx=pert_idx, perturb_us=us_i)
+            dzs0 = self.get_cell_shift(zs, perturb_idx=pert_idx, perturb_us=us_i)
             # perturbation effect
             ps = np.ones_like(us_i)
             if np.sum(np.abs(ps-us_i))>=1:
-                dzs = self.get_cell_response(zs, perturb_idx=pert_idx, perturb_us=ps)
+                dzs = self.get_cell_shift(zs, perturb_idx=pert_idx, perturb_us=ps)
                 zs = zs + dzs0 + dzs
             else:
                 zs = zs + dzs0
@@ -877,10 +896,11 @@ class DensityFlow(nn.Module):
             library_sizes = library_sizes.reshape(-1,1)
         counts = self.get_counts(zs, library_sizes=library_sizes)
+        log_mu = self.get_log_mu(zs)
-        return counts, zs
+        return counts, log_mu
-    def _cell_response(self, zs, perturb_idx, perturb):
+    def _cell_shift(self, zs, perturb_idx, perturb):
         #zns,_ = self.encoder_zn(xs)
         #zns,_ = self._get_basal_embedding(xs)
         zns = zs
@@ -897,7 +917,7 @@ class DensityFlow(nn.Module):
         return ms
-    def get_cell_response(self,
+    def get_cell_shift(self,
                              zs,
                              perturb_idx,
                              perturb_us,
@@ -915,46 +935,43 @@ class DensityFlow(nn.Module):
         Z = []
         with tqdm(total=len(dataloader), desc='', unit='batch') as pbar:
             for Z_batch, P_batch, _ in dataloader:
-                zns = self._cell_response(Z_batch, perturb_idx, P_batch)
+                zns = self._cell_shift(Z_batch, perturb_idx, P_batch)
                 Z.append(tensor_to_numpy(zns))
                 pbar.update(1)
         Z = np.concatenate(Z)
         return Z
-    def _get_expression_response(self, delta_zs):
-        return self.decoder_concentrate(delta_zs)
+    def _log_mu(self, zs):
+        return self.decoder_log_mu(zs)
-    def get_expression_response(self,
-                             delta_zs,
-                             batch_size: int = 1024):
+    def get_log_mu(self, zs, batch_size: int = 1024):
         """
         Return cells' changes in the feature space induced by specific perturbation of a factor
         """
-        delta_zs = convert_to_tensor(delta_zs, device=self.get_device())
-        dataset = CustomDataset(delta_zs)
+        zs = convert_to_tensor(zs, device=self.get_device())
+        dataset = CustomDataset(zs)
         dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
         R = []
         with tqdm(total=len(dataloader), desc='', unit='batch') as pbar:
-            for delta_Z_batch, _ in dataloader:
-                r = self._get_expression_response(delta_Z_batch)
+            for Z_batch, _ in dataloader:
+                r = self._log_mu(Z_batch)
                 R.append(tensor_to_numpy(r))
                 pbar.update(1)
         R = np.concatenate(R)
         return R
-    def _count(self, concentrate, library_size=None):
+    def _count(self, log_mu, library_size=None):
         if self.loss_func == 'bernoulli':
-            #counts = self.sigmoid(concentrate)
-            counts = dist.Bernoulli(logits=concentrate).to_event(1).mean
+            counts = dist.Bernoulli(logits=log_mu).to_event(1).mean
         elif self.loss_func == 'multinomial':
-            theta = dist.Multinomial(total_count=int(1e8), logits=concentrate).mean
+            theta = dist.Multinomial(total_count=int(1e8), logits=log_mu).mean
             counts = theta * library_size
         else:
-            rate = concentrate.exp()
+            rate = log_mu.exp()
             theta = dist.DirichletMultinomial(total_count=1, concentration=rate).mean
             counts = theta * library_size
         return counts
@@ -976,8 +993,8 @@ class DensityFlow(nn.Module):
         E = []
         with tqdm(total=len(dataloader), desc='', unit='batch') as pbar:
             for Z_batch, L_batch, _ in dataloader:
-                concentrate = self._get_expression_response(Z_batch)
-                counts = self._count(concentrate, L_batch)
+                log_mu = self._log_mu(Z_batch)
+                counts = self._count(log_mu, L_batch)
                 E.append(tensor_to_numpy(counts))
                 pbar.update(1)
@@ -1350,7 +1367,7 @@ def main():
     df = DensityFlow(
         input_size=input_size,
         cell_factor_size=cell_factor_size,
-        inverse_dispersion=args.inverse_dispersion,
+        dispersion=args.dispersion,
         z_dim=args.z_dim,
         hidden_layers=args.hidden_layers,
         hidden_layer_activation=args.hidden_layer_activation,

SURE-tools 2.4.17__py3-none-any.whl → 2.4.32__py3-none-any.whl

Potentially problematic release.

SURE-tools 2.4.17py3-none-any.whl → 2.4.32py3-none-any.whl