pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. pertpy/__init__.py +4 -2
  2. pertpy/data/__init__.py +66 -1
  3. pertpy/data/_dataloader.py +28 -26
  4. pertpy/data/_datasets.py +261 -92
  5. pertpy/metadata/__init__.py +6 -0
  6. pertpy/metadata/_cell_line.py +795 -0
  7. pertpy/metadata/_compound.py +128 -0
  8. pertpy/metadata/_drug.py +238 -0
  9. pertpy/metadata/_look_up.py +569 -0
  10. pertpy/metadata/_metadata.py +70 -0
  11. pertpy/metadata/_moa.py +125 -0
  12. pertpy/plot/__init__.py +0 -13
  13. pertpy/preprocessing/__init__.py +2 -0
  14. pertpy/preprocessing/_guide_rna.py +89 -6
  15. pertpy/tools/__init__.py +48 -15
  16. pertpy/tools/_augur.py +329 -32
  17. pertpy/tools/_cinemaot.py +145 -6
  18. pertpy/tools/_coda/_base_coda.py +1237 -116
  19. pertpy/tools/_coda/_sccoda.py +66 -36
  20. pertpy/tools/_coda/_tasccoda.py +46 -39
  21. pertpy/tools/_dialogue.py +180 -77
  22. pertpy/tools/_differential_gene_expression/__init__.py +20 -0
  23. pertpy/tools/_differential_gene_expression/_base.py +657 -0
  24. pertpy/tools/_differential_gene_expression/_checks.py +41 -0
  25. pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
  26. pertpy/tools/_differential_gene_expression/_edger.py +125 -0
  27. pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
  28. pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
  29. pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
  30. pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
  31. pertpy/tools/_distances/_distance_tests.py +29 -24
  32. pertpy/tools/_distances/_distances.py +584 -98
  33. pertpy/tools/_enrichment.py +460 -0
  34. pertpy/tools/_kernel_pca.py +1 -1
  35. pertpy/tools/_milo.py +406 -49
  36. pertpy/tools/_mixscape.py +677 -55
  37. pertpy/tools/_perturbation_space/_clustering.py +10 -3
  38. pertpy/tools/_perturbation_space/_comparison.py +112 -0
  39. pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
  40. pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
  41. pertpy/tools/_perturbation_space/_simple.py +52 -11
  42. pertpy/tools/_scgen/__init__.py +1 -1
  43. pertpy/tools/_scgen/_base_components.py +2 -3
  44. pertpy/tools/_scgen/_scgen.py +706 -0
  45. pertpy/tools/_scgen/_utils.py +3 -5
  46. pertpy/tools/decoupler_LICENSE +674 -0
  47. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
  48. pertpy-0.8.0.dist-info/RECORD +57 -0
  49. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
  50. pertpy/plot/_augur.py +0 -234
  51. pertpy/plot/_cinemaot.py +0 -81
  52. pertpy/plot/_coda.py +0 -1001
  53. pertpy/plot/_dialogue.py +0 -91
  54. pertpy/plot/_guide_rna.py +0 -82
  55. pertpy/plot/_milopy.py +0 -284
  56. pertpy/plot/_mixscape.py +0 -594
  57. pertpy/plot/_scgen.py +0 -337
  58. pertpy/tools/_differential_gene_expression.py +0 -99
  59. pertpy/tools/_metadata/__init__.py +0 -0
  60. pertpy/tools/_metadata/_cell_line.py +0 -613
  61. pertpy/tools/_metadata/_look_up.py +0 -342
  62. pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
  63. pertpy/tools/_scgen/_jax_scgen.py +0 -370
  64. pertpy-0.6.0.dist-info/RECORD +0 -50
  65. /pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
  66. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
pertpy/tools/_cinemaot.py CHANGED
@@ -2,14 +2,17 @@ from __future__ import annotations
2
2
 
3
3
  from typing import TYPE_CHECKING
4
4
 
5
+ import matplotlib.pyplot as plt
5
6
  import numpy as np
6
7
  import pandas as pd
7
8
  import scanpy as sc
8
9
  import scipy.stats as ss
10
+ import seaborn as sns
9
11
  import sklearn.metrics
10
12
  from ott.geometry import pointcloud
11
13
  from ott.problems.linear import linear_problem
12
14
  from ott.solvers.linear import sinkhorn, sinkhorn_lr
15
+ from scanpy.plotting import _utils
13
16
  from scipy.sparse import issparse
14
17
  from sklearn.decomposition import FastICA
15
18
  from sklearn.linear_model import LinearRegression
@@ -17,6 +20,7 @@ from sklearn.neighbors import NearestNeighbors
17
20
 
18
21
  if TYPE_CHECKING:
19
22
  from anndata import AnnData
23
+ from matplotlib.axes import Axes
20
24
  from statsmodels.tools.typing import ArrayLike
21
25
 
22
26
 
@@ -67,6 +71,14 @@ class Cinemaot:
67
71
  Returns an AnnData object that contains the single-cell level treatment effect as de.X and the
68
72
  corresponding low dimensional embedding in de.obsm['X_embedding'], and optional matching matrix
69
73
  stored in the de.obsm['ot']. Also puts the confounding variation in adata.obsm[cf_rep].
74
+
75
+ Examples:
76
+ >>> import pertpy as pt
77
+ >>> adata = pt.dt.cinemaot_example()
78
+ >>> model = pt.tl.Cinemaot()
79
+ >>> out_adata = model.causaleffect(
80
+ >>> adata, pert_key="perturbation", control="No stimulation", return_matching=True,
81
+ >>> thres=0.5, smoothness=1e-5, eps=1e-3, solver="Sinkhorn", preweight_label="cell_type0528")
70
82
  """
71
83
  available_solvers = ["Sinkhorn", "LRSinkhorn"]
72
84
  if solver not in available_solvers:
@@ -225,6 +237,14 @@ class Cinemaot:
225
237
  Returns an anndata object that contains the single-cell level treatment effect as de.X and the
226
238
  corresponding low dimensional embedding in de.obsm['X_embedding'], and optional matching matrix
227
239
  stored in the de.obsm['ot']. Also puts the confounding variation in adata.obsm[cf_rep].
240
+
241
+ Examples:
242
+ >>> import pertpy as pt
243
+ >>> adata = pt.dt.cinemaot_example()
244
+ >>> model = pt.tl.Cinemaot()
245
+ >>> ad, de = model.causaleffect_weighted(
246
+ >>> adata, pert_key="perturbation", control="No stimulation", return_matching=True,
247
+ >>> thres=0.5, smoothness=1e-5, eps=1e-3, solver="Sinkhorn")
228
248
  """
229
249
  available_solvers = ["Sinkhorn", "LRSinkhorn"]
230
250
  assert solver in available_solvers, (
@@ -288,6 +308,16 @@ class Cinemaot:
288
308
 
289
309
  Returns:
290
310
  Returns an anndata object that contains aggregated pseudobulk profiles and associated metadata.
311
+
312
+ Examples:
313
+ >>> import pertpy as pt
314
+ >>> adata = pt.dt.cinemaot_example()
315
+ >>> model = pt.tl.Cinemaot()
316
+ >>> de = model.causaleffect(
317
+ >>> adata, pert_key="perturbation", control="No stimulation", return_matching=True, thres=0.5,
318
+ >>> smoothness=1e-5, eps=1e-3, solver="Sinkhorn", preweight_label="cell_type0528")
319
+ >>> adata_pb = model.generate_pseudobulk(
320
+ >>> adata, de, pert_key="perturbation", control="No stimulation", label_list=None)
291
321
  """
292
322
  sc.pp.neighbors(de, use_rep=de_rep)
293
323
  sc.tl.leiden(de, resolution=de_resolution)
@@ -308,7 +338,7 @@ class Cinemaot:
308
338
  sc.tl.leiden(adata, resolution=cf_resolution)
309
339
  df["ct"] = adata.obs["leiden"].astype(str)
310
340
  df["ptb"] = "control"
311
- df["ptb"][adata.obs[pert_key] != control] = de.obs["leiden"].astype(str)
341
+ df.loc[adata.obs[pert_key] != control, "ptb"] = de.obs["leiden"].astype(str)
312
342
  label_list.append("ptb")
313
343
  df = df.groupby(label_list).sum()
314
344
  new_index = df.index.map(lambda x: "_".join(map(str, x)))
@@ -336,6 +366,12 @@ class Cinemaot:
336
366
 
337
367
  Returns:
338
368
  Returns the estimated dimension number.
369
+
370
+ Examples:
371
+ >>> import pertpy as pt
372
+ >>> adata = pt.dt.cinemaot_example()
373
+ >>> model = pt.tl.Cinemaot()
374
+ >>> dim = model.get_dim(adata)
339
375
  """
340
376
  sk = SinkhornKnopp()
341
377
  if issparse(adata.raw.X):
@@ -369,6 +405,12 @@ class Cinemaot:
369
405
 
370
406
  Returns:
371
407
  Returns the indices.
408
+
409
+ Examples:
410
+ >>> import pertpy as pt
411
+ >>> adata = pt.dt.cinemaot_example()
412
+ >>> model = pt.tl.Cinemaot()
413
+ >>> idx = model.get_weightidx(adata, pert_key="perturbation", control="No stimulation")
372
414
  """
373
415
  adata_ = adata.copy()
374
416
  X_pca1 = adata_.obsm[use_rep][adata_.obs[pert_key] == control, :]
@@ -390,7 +432,7 @@ class Cinemaot:
390
432
  expr_label = "control"
391
433
 
392
434
  adata_.obs["ct"] = ref_label
393
- adata_.obs["ct"][adata_.obs[pert_key] == control] = expr_label
435
+ adata_.obs.loc[adata_.obs[pert_key] == control, "ct"] = expr_label
394
436
  pert_key = "ct"
395
437
  z = np.zeros(adata_.shape[0]) + 1
396
438
 
@@ -485,6 +527,15 @@ class Cinemaot:
485
527
 
486
528
  Returns:
487
529
  Returns an AnnData object that contains the single-cell level synergy matrix de.X and the embedding.
530
+
531
+ Examples:
532
+ >>> import pertpy as pt
533
+ >>> adata = pt.dt.dong_2023()
534
+ >>> sc.pp.pca(adata)
535
+ >>> model = pt.tl.Cinemaot()
536
+ >>> combo = model.synergy(adata, pert_key='perturbation', base='No stimulation', A='IFNb', B='IFNg',
537
+ >>> AB='IFNb+ IFNg', thres=0.5, smoothness=1e-5, eps=1e-3, solver='Sinkhorn')
538
+
488
539
  """
489
540
  adata1 = adata[adata.obs[pert_key].isin([base, A]), :].copy()
490
541
  adata2 = adata[adata.obs[pert_key].isin([B, AB]), :].copy()
@@ -552,6 +603,12 @@ class Cinemaot:
552
603
 
553
604
  Returns:
554
605
  Returns the confounder effect (c_effect) and the residual effect (s_effect).
606
+
607
+ Examples:
608
+ >>> import pertpy as pt
609
+ >>> adata = pt.dt.cinemaot_example()
610
+ >>> model = pt.tl.Cinemaot()
611
+ >>> c_effect, s_effect = model.attribution_scatter(adata, pert_key="perturbation", control="No stimulation")
555
612
  """
556
613
  cf = adata.obsm[cf_rep]
557
614
  if use_raw:
@@ -582,6 +639,84 @@ class Cinemaot:
582
639
  s_effect = (np.linalg.norm(e1, axis=0) + 1e-6) / (np.linalg.norm(e0, axis=0) + 1e-6)
583
640
  return c_effect, s_effect
584
641
 
642
+ def plot_vis_matching(
643
+ self,
644
+ adata: AnnData,
645
+ de: AnnData,
646
+ pert_key: str,
647
+ control: str,
648
+ de_label: str,
649
+ source_label: str,
650
+ matching_rep: str = "ot",
651
+ resolution: float = 0.5,
652
+ normalize: str = "col",
653
+ title: str = "CINEMA-OT matching matrix",
654
+ min_val: float = 0.01,
655
+ show: bool = True,
656
+ save: str | None = None,
657
+ ax: Axes | None = None,
658
+ **kwargs,
659
+ ) -> None:
660
+ """Visualize the CINEMA-OT matching matrix.
661
+
662
+ Args:
663
+ adata: the original anndata after running cinemaot.causaleffect or cinemaot.causaleffect_weighted.
664
+ de: The anndata output from Cinemaot.causaleffect() or Cinemaot.causaleffect_weighted().
665
+ pert_key: The column of `.obs` with perturbation categories, should also contain `control`.
666
+ control: Control category from the `pert_key` column.
667
+ de_label: the label for differential response. If none, use leiden cluster labels at resolution 1.0.
668
+ source_label: the confounder / cell type label.
669
+ matching_rep: the place that stores the matching matrix. default de.obsm['ot'].
670
+ normalize: normalize the coarse-grained matching matrix by row / column.
671
+ title: the title for the figure.
672
+ min_val: The min value to truncate the matching matrix.
673
+ show: Show the plot, do not return axis.
674
+ save: If `True` or a `str`, save the figure. A string is appended to the default filename.
675
+ Infer the filetype if ending on {`'.pdf'`, `'.png'`, `'.svg'`}.
676
+ **kwargs: Other parameters to input for seaborn.heatmap.
677
+
678
+ Examples:
679
+ >>> import pertpy as pt
680
+ >>> adata = pt.dt.cinemaot_example()
681
+ >>> cot = pt.tl.Cinemaot()
682
+ >>> de = cot.causaleffect(
683
+ >>> adata, pert_key="perturbation", control="No stimulation", return_matching=True,
684
+ >>> thres=0.5, smoothness=1e-5, eps=1e-3, solver="Sinkhorn", preweight_label="cell_type0528")
685
+ >>> cot.plot_vis_matching(
686
+ >>> adata, de, pert_key="perturbation",control="No stimulation", de_label=None, source_label="cell_type0528")
687
+ """
688
+ adata_ = adata[adata.obs[pert_key] == control]
689
+
690
+ df = pd.DataFrame(de.obsm[matching_rep])
691
+ if de_label is None:
692
+ de_label = "leiden"
693
+ sc.pp.neighbors(de, use_rep="X_embedding")
694
+ sc.tl.leiden(de, resolution=resolution)
695
+ df["de_label"] = de.obs[de_label].astype(str).values
696
+ df["de_label"] = "Response " + df["de_label"]
697
+ df = df.groupby("de_label").sum().T
698
+ df["source_label"] = adata_.obs[source_label].astype(str).values
699
+ df = df.groupby("source_label").sum()
700
+
701
+ if normalize == "col":
702
+ df = df / df.sum(axis=0)
703
+ else:
704
+ df = (df.T / df.sum(axis=1)).T
705
+ df = df.clip(lower=min_val) - min_val
706
+ if normalize == "col":
707
+ df = df / df.sum(axis=0)
708
+ else:
709
+ df = (df.T / df.sum(axis=1)).T
710
+
711
+ g = sns.heatmap(df, annot=True, ax=ax, **kwargs)
712
+ plt.title(title)
713
+ _utils.savefig_or_show("matching_heatmap", show=show, save=save)
714
+ if not show:
715
+ if ax is not None:
716
+ return ax
717
+ else:
718
+ return g
719
+
585
720
 
586
721
  class Xi:
587
722
  """
@@ -610,7 +745,7 @@ class Xi:
610
745
  # same as pandas rank method 'first'
611
746
  rankdata = ss.rankdata(randomized, method="ordinal")
612
747
  # Reindexing based on pairs of indices before and after
613
- unrandomized = [rankdata[j] for i, j in sorted(zip(randomized_indices, range(len_x)))]
748
+ unrandomized = [rankdata[j] for i, j in sorted(zip(randomized_indices, range(len_x), strict=False))]
614
749
  return unrandomized
615
750
 
616
751
  @property
@@ -648,6 +783,7 @@ class Xi:
648
783
  for x, y in zip(
649
784
  x1,
650
785
  x2,
786
+ strict=False,
651
787
  )
652
788
  ]
653
789
  )
@@ -694,13 +830,16 @@ class Xi:
694
830
  ind = [i + 1 for i in range(self.sample_size)]
695
831
  ind2 = [2 * self.sample_size - 2 * ind[i - 1] + 1 for i in ind]
696
832
 
697
- a = np.mean([i * j * j for i, j in zip(ind2, sorted_ordered_x_rank)]) / self.sample_size
833
+ a = np.mean([i * j * j for i, j in zip(ind2, sorted_ordered_x_rank, strict=False)]) / self.sample_size
698
834
 
699
- c = np.mean([i * j for i, j in zip(ind2, sorted_ordered_x_rank)]) / self.sample_size
835
+ c = np.mean([i * j for i, j in zip(ind2, sorted_ordered_x_rank, strict=False)]) / self.sample_size
700
836
 
701
837
  cq = np.cumsum(sorted_ordered_x_rank)
702
838
 
703
- m = [(i + (self.sample_size - j) * k) / self.sample_size for i, j, k in zip(cq, ind, sorted_ordered_x_rank)]
839
+ m = [
840
+ (i + (self.sample_size - j) * k) / self.sample_size
841
+ for i, j, k in zip(cq, ind, sorted_ordered_x_rank, strict=False)
842
+ ]
704
843
 
705
844
  b = np.mean([np.square(i) for i in m])
706
845
  v = (a - 2 * b + np.square(c)) / np.square(self.inverse_g_mean)