PyPI - SURE-tools - Versions diffs - 2.1.91__tar.gz → 2.2.14__tar.gz - Mend

SURE-tools 2.1.91tar.gz → 2.2.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of SURE-tools might be problematic. Click here for more details.

Files changed (30) hide show

{sure_tools-2.1.91 → sure_tools-2.2.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: SURE-tools
-Version: 2.1.91
+Version: 2.2.14
 Summary: Succinct Representation of Single Cells
 Home-page: https://github.com/ZengFLab/SURE
 Author: Feng Zeng

sure_tools-2.1.91/SURE/PerturbFlow.py → sure_tools-2.2.14/SURE/DensityFlow.py RENAMED Viewed

@@ -54,19 +54,18 @@ def set_random_seed(seed):
     # Set seed for Pyro
     pyro.set_rng_seed(seed)
-class PerturbFlow(nn.Module):
+class DensityFlow(nn.Module):
     def __init__(self,
                  input_size: int,
                  codebook_size: int = 200,
                  cell_factor_size: int = 0,
-                 cell_factor_effect_discrete: bool = False,
                  supervised_mode: bool = False,
                  z_dim: int = 10,
                  z_dist: Literal['normal','studentt','laplacian','cauchy','gumbel'] = 'gumbel',
-                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'poisson',
+                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'negbinomial',
                  inverse_dispersion: float = 10.0,
-                 use_zeroinflate: bool = False,
-                 hidden_layers: list = [300],
+                 use_zeroinflate: bool = True,
+                 hidden_layers: list = [500],
                  hidden_layer_activation: Literal['relu','softplus','leakyrelu','linear'] = 'relu',
                  nn_dropout: float = 0.1,
                  post_layer_fct: list = ['layernorm'],
@@ -103,7 +102,6 @@ class PerturbFlow(nn.Module):
         else:
             self.use_bias = [not zero_bias] * self.cell_factor_size
         #self.use_bias = not zero_bias
-        self.enumrate = cell_factor_effect_discrete
         self.codebook_weights = None
@@ -310,7 +308,7 @@ class PerturbFlow(nn.Module):
         return xs
     def model1(self, xs):
-        pyro.module('PerturbFlow', self)
+        pyro.module('DensityFlow', self)
         eps = torch.finfo(xs.dtype).eps
         batch_size = xs.size(0)
@@ -389,7 +387,7 @@ class PerturbFlow(nn.Module):
             ns = pyro.sample('n', dist.OneHotCategorical(logits=alpha))
     def model2(self, xs, us=None):
-        pyro.module('PerturbFlow', self)
+        pyro.module('DensityFlow', self)
         eps = torch.finfo(xs.dtype).eps
         batch_size = xs.size(0)
@@ -431,10 +429,7 @@ class PerturbFlow(nn.Module):
                 zns = pyro.sample('zn', dist.Gumbel(zn_loc, zn_scale).to_event(1))
             if self.cell_factor_size>0:
-                if self.enumrate:
-                    zus = self._total_effects(zn_loc, us)
-                else:
-                    zus = self._total_effects(zns, us)
+                zus = self._total_effects(zns, us)
                 zs = zns+zus
             else:
                 zs = zns
@@ -476,7 +471,7 @@ class PerturbFlow(nn.Module):
             ns = pyro.sample('n', dist.OneHotCategorical(logits=alpha))
     def model3(self, xs, ys, embeds=None):
-        pyro.module('PerturbFlow', self)
+        pyro.module('DensityFlow', self)
         eps = torch.finfo(xs.dtype).eps
         batch_size = xs.size(0)
@@ -572,7 +567,7 @@ class PerturbFlow(nn.Module):
                 zns = embeds
     def model4(self, xs, us, ys, embeds=None):
-        pyro.module('PerturbFlow', self)
+        pyro.module('DensityFlow', self)
         eps = torch.finfo(xs.dtype).eps
         batch_size = xs.size(0)
@@ -636,10 +631,7 @@ class PerturbFlow(nn.Module):
                 #        zus = self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
                 #    else:
                 #        zus = zus + self.cell_factor_effect[i]([zns,us[:,i].reshape(-1,1)])
-                if self.enumrate:
-                    zus = self._total_effects(zn_loc, us)
-                else:
-                    zus = self._total_effects(zns, us)
+                zus = self._total_effects(zns, us)
                 zs = zns+zus
             else:
                 zs = zns
@@ -832,9 +824,11 @@ class PerturbFlow(nn.Module):
             # perturbation effect
             ps = np.ones_like(us_i)
-            dzs = self.get_cell_response(xs, factor_idx=pert_idx, perturb=ps)
-            zs = zs + dzs0 + dzs
+            if np.sum(np.abs(ps-us_i))>=1:
+                dzs = self.get_cell_response(xs, factor_idx=pert_idx, perturb=ps)
+                zs = zs + dzs0 + dzs
+            else:
+                zs = zs + dzs0
         if library_sizes is None:
             library_sizes = np.sum(xs, axis=1, keepdims=True)
@@ -848,35 +842,36 @@ class PerturbFlow(nn.Module):
         return counts, zs
-    def _cell_response(self, xs, factor_idx, perturb):
+    def _cell_response(self, zs, perturb_idx, perturb):
         #zns,_ = self.encoder_zn(xs)
-        zns,_ = self._get_basal_embedding(xs)
+        #zns,_ = self._get_basal_embedding(xs)
+        zns = zs
         if perturb.ndim==2:
-            ms = self.cell_factor_effect[factor_idx]([zns, perturb])
+            ms = self.cell_factor_effect[perturb_idx]([zns, perturb])
         else:
-            ms = self.cell_factor_effect[factor_idx]([zns, perturb.reshape(-1,1)])
+            ms = self.cell_factor_effect[perturb_idx]([zns, perturb.reshape(-1,1)])
         return ms
     def get_cell_response(self,
-                             xs,
-                             factor_idx,
-                             perturb,
+                             zs,
+                             perturb_idx,
+                             perturb_us,
                              batch_size: int = 1024):
         """
         Return cells' changes in the latent space induced by specific perturbation of a factor
         """
-        xs = self.preprocess(xs)
-        xs = convert_to_tensor(xs, device=self.get_device())
-        ps = convert_to_tensor(perturb, device=self.get_device())
-        dataset = CustomDataset2(xs,ps)
+        #xs = self.preprocess(xs)
+        zs = convert_to_tensor(zs, device=self.get_device())
+        ps = convert_to_tensor(perturb_us, device=self.get_device())
+        dataset = CustomDataset2(zs,ps)
         dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
         Z = []
         with tqdm(total=len(dataloader), desc='', unit='batch') as pbar:
-            for X_batch, P_batch, _ in dataloader:
-                zns = self._cell_response(X_batch, factor_idx, P_batch)
+            for Z_batch, P_batch, _ in dataloader:
+                zns = self._cell_response(Z_batch, perturb_idx, P_batch)
                 Z.append(tensor_to_numpy(zns))
                 pbar.update(1)
@@ -913,7 +908,7 @@ class PerturbFlow(nn.Module):
         R = np.concatenate(R)
         return R
-    def _count(self,concentrate, library_size=None):
+    def _count(self, concentrate, library_size=None):
         if self.loss_func == 'bernoulli':
             #counts = self.sigmoid(concentrate)
             counts = dist.Bernoulli(logits=concentrate).to_event(1).mean
@@ -921,28 +916,17 @@ class PerturbFlow(nn.Module):
             rate = concentrate.exp()
             theta = dist.DirichletMultinomial(total_count=1, concentration=rate).mean
             counts = theta * library_size
-            #counts = dist.Poisson(rate=rate).to_event(1).mean
-        return counts
-    def _count_sample(self,concentrate):
-        if self.loss_func == 'bernoulli':
-            logits = concentrate
-            counts = dist.Bernoulli(logits=logits).to_event(1).sample()
-        else:
-            counts = self._count(concentrate=concentrate)
-            counts = dist.Poisson(rate=counts).to_event(1).sample()
         return counts
     def get_counts(self, zs, library_sizes,
-                        batch_size: int = 1024,
-                        use_sampler: bool = False):
+                        batch_size: int = 1024):
         zs = convert_to_tensor(zs, device=self.get_device())
         if type(library_sizes) == list:
-            library_sizes = np.array(library_sizes).view(-1,1)
+            library_sizes = np.array(library_sizes).reshape(-1,1)
         elif len(library_sizes.shape)==1:
-            library_sizes = library_sizes.view(-1,1)
+            library_sizes = library_sizes.reshape(-1,1)
         ls = convert_to_tensor(library_sizes, device=self.get_device())
         dataset = CustomDataset2(zs,ls)
@@ -952,10 +936,7 @@ class PerturbFlow(nn.Module):
         with tqdm(total=len(dataloader), desc='', unit='batch') as pbar:
             for Z_batch, L_batch, _ in dataloader:
                 concentrate = self._get_expression_response(Z_batch)
-                if use_sampler:
-                    counts = self._count_sample(concentrate)
-                else:
-                    counts = self._count(concentrate, L_batch)
+                counts = self._count(concentrate, L_batch)
                 E.append(tensor_to_numpy(counts))
                 pbar.update(1)
@@ -978,7 +959,7 @@ class PerturbFlow(nn.Module):
             us = None,
             ys = None,
             zs = None,
-            num_epochs: int = 200,
+            num_epochs: int = 500,
             learning_rate: float = 0.0001,
             batch_size: int = 256,
             algo: Literal['adam','rmsprop','adamw'] = 'adam',
@@ -989,7 +970,7 @@ class PerturbFlow(nn.Module):
             threshold: int = 0,
             use_jax: bool = True):
         """
-        Train the PerturbFlow model.
+        Train the DensityFlow model.
         Parameters
         ----------
@@ -1015,7 +996,7 @@ class PerturbFlow(nn.Module):
             Parameter for optimization.
         use_jax
             If toggled on, Jax will be used for speeding up. CAUTION: This will raise errors because of unknown reasons when it is called in
-            the Python script or Jupyter notebook. It is OK if it is used when runing PerturbFlow in the shell command.
+            the Python script or Jupyter notebook. It is OK if it is used when runing DensityFlow in the shell command.
         """
         xs = self.preprocess(xs, threshold=threshold)
         xs = convert_to_tensor(xs, dtype=self.dtype, device=self.get_device())
@@ -1133,12 +1114,12 @@ class PerturbFlow(nn.Module):
 EXAMPLE_RUN = (
-    "example run: PerturbFlow --help"
+    "example run: DensityFlow --help"
 )
 def parse_args():
     parser = argparse.ArgumentParser(
-        description="PerturbFlow\n{}".format(EXAMPLE_RUN))
+        description="DensityFlow\n{}".format(EXAMPLE_RUN))
     parser.add_argument(
         "--cuda", action="store_true", help="use GPU(s) to speed up training"
@@ -1325,7 +1306,7 @@ def main():
     cell_factor_size = 0 if us is None else us.shape[1]
     ###########################################
-    perturbflow = PerturbFlow(
+    DensityFlow = DensityFlow(
         input_size=input_size,
         cell_factor_size=cell_factor_size,
         inverse_dispersion=args.inverse_dispersion,
@@ -1344,7 +1325,7 @@ def main():
         dtype=dtype,
     )
-    perturbflow.fit(xs, us=us,
+    DensityFlow.fit(xs, us=us,
              num_epochs=args.num_epochs,
              learning_rate=args.learning_rate,
              batch_size=args.batch_size,
@@ -1356,9 +1337,9 @@ def main():
     if args.save_model is not None:
         if args.save_model.endswith('gz'):
-            PerturbFlow.save_model(perturbflow, args.save_model, compression=True)
+            DensityFlow.save_model(DensityFlow, args.save_model, compression=True)
         else:
-            PerturbFlow.save_model(perturbflow, args.save_model)
+            DensityFlow.save_model(DensityFlow, args.save_model)

{sure_tools-2.1.91 → sure_tools-2.2.14}/SURE/SURE.py RENAMED Viewed

@@ -99,17 +99,17 @@ class SURE(nn.Module):
                  cell_factor_size: int = 0,
                  supervised_mode: bool = False,
                  z_dim: int = 10,
-                 z_dist: Literal['normal','studentt','laplacian','cauchy','gumbel'] = 'normal',
-                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'negbinomial',
+                 z_dist: Literal['normal','studentt','laplacian','cauchy','gumbel'] = 'gumbel',
+                 loss_func: Literal['negbinomial','poisson','multinomial','bernoulli'] = 'poisson',
                  inverse_dispersion: float = 10.0,
                  use_zeroinflate: bool = True,
-                 hidden_layers: list = [300],
+                 hidden_layers: list = [500],
                  hidden_layer_activation: Literal['relu','softplus','leakyrelu','linear'] = 'relu',
                  nn_dropout: float = 0.1,
                  post_layer_fct: list = ['layernorm'],
                  post_act_fct: list = None,
                  config_enum: str = 'parallel',
-                 use_cuda: bool = False,
+                 use_cuda: bool = True,
                  seed: int = 42,
                  dtype = torch.float32, # type: ignore
                  ):
@@ -817,7 +817,7 @@ class SURE(nn.Module):
             us = None,
             ys = None,
             zs = None,
-            num_epochs: int = 200,
+            num_epochs: int = 500,
             learning_rate: float = 0.0001,
             batch_size: int = 256,
             algo: Literal['adam','rmsprop','adamw'] = 'adam',
@@ -826,7 +826,7 @@ class SURE(nn.Module):
             decay_rate: float = 0.9,
             config_enum: str = 'parallel',
             threshold: int = 0,
-            use_jax: bool = False):
+            use_jax: bool = True):
         """
         Train the SURE model.

{sure_tools-2.1.91 → sure_tools-2.2.14}/SURE/__init__.py RENAMED Viewed

@@ -1,12 +1,12 @@
 from .SURE import SURE
-from .PerturbFlow import PerturbFlow
+from .DensityFlow import DensityFlow
 from . import utils
 from . import codebook
 from . import SURE
-from . import PerturbFlow
+from . import DensityFlow
 from . import atac
 from . import flow
 from . import perturb
-__all__ = ['SURE', 'PerturbFlow', 'flow', 'perturb', 'atac', 'utils', 'codebook']
+__all__ = ['SURE', 'DensityFlow', 'flow', 'perturb', 'atac', 'utils', 'codebook']

{sure_tools-2.1.91 → sure_tools-2.2.14}/SURE/flow/flow_stats.py RENAMED Viewed

@@ -41,6 +41,18 @@ class VectorFieldEval:
         divergence[np.isnan(divergence)] = 0
         return divergence
+    def movement_stats(self,vectors):
+        return calculate_movement_stats(vectors)
+    def direction_stats(self, vectors):
+        return calculate_direction_stats(vectors)
+    def movement_energy(self, vectors, masses=None):
+        return calculate_movement_energy(vectors, masses)
+    def movement_divergence(self, positions, vectors):
+        return calculate_movement_divergence(positions, vectors)
 def calculate_movement_stats(vectors):

{sure_tools-2.1.91 → sure_tools-2.2.14}/SURE/perturb/perturb.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import re
 import numpy as np
+import pandas as pd
 from numba import njit
 from itertools import chain
 from joblib import Parallel, delayed
@@ -8,6 +9,8 @@ from typing import Literal
 class LabelMatrix:
     def __init__(self):
         self.labels_ = None
+        self.control_label = None
+        self.sep_pattern = None
     def fit_transform(self, labels, control_label=None, sep_pattern=r'[,;_\s]', speedup: Literal['none','vectorize','parallel']='none'):
         if speedup=='none':
@@ -24,8 +27,31 @@ class LabelMatrix:
             mat = np.delete(mat, idx, axis=1)
             self.labels_ = np.delete(self.labels_, idx)
+        self.control_label = control_label
+        self.sep_pattern=sep_pattern
         return mat
+    def transform(self, labels, speedup: Literal['none','vectorize','parallel']='none'):
+        sep_pattern = self.sep_pattern
+        if speedup=='none':
+            mat, labels_ = label_to_matrix(labels=labels, sep_pattern=sep_pattern)
+        elif speedup=='vectorize':
+            mat, labels_ = vectorized_label_to_matrix(labels=labels, sep_pattern=sep_pattern)
+        elif speedup=='parallel':
+            mat, labels_ = parallel_label_to_matrix(labels=labels, sep_pattern=sep_pattern)
+        mat_df = pd.DataFrame(mat, columns=labels_)
+        labels_valid = [x for x in labels_ if x in self.labels_]
+        mat_df = mat_df[labels_valid]
+        mat_valid = np.zeros([mat.shape[0], len(self.labels_)])
+        mat_valid_df = pd.DataFrame(mat_valid, columns=self.labels_)
+        mat_valid_df[labels_valid] = mat_df
+        return mat_valid_df.values
     def inverse_transform(self, matrix):
         return matrix_to_labels(matrix=matrix, unique_labels=self.labels_)

{sure_tools-2.1.91 → sure_tools-2.2.14}/SURE_tools.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: SURE-tools
-Version: 2.1.91
+Version: 2.2.14
 Summary: Succinct Representation of Single Cells
 Home-page: https://github.com/ZengFLab/SURE
 Author: Feng Zeng

{sure_tools-2.1.91 → sure_tools-2.2.14}/SURE_tools.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,7 +1,7 @@
 LICENSE
 README.md
 setup.py
-SURE/PerturbFlow.py
+SURE/DensityFlow.py
 SURE/SURE.py
 SURE/__init__.py
 SURE/assembly/__init__.py

{sure_tools-2.1.91 → sure_tools-2.2.14}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
 setup(
     name='SURE-tools',
-    version='2.1.91',
+    version='2.2.14',
     description='Succinct Representation of Single Cells',
     long_description=long_description,
     long_description_content_type="text/markdown",