pertpy 0.11.5__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pertpy/__init__.py +4 -1
- pertpy/data/_dataloader.py +61 -58
- pertpy/tools/__init__.py +18 -27
- pertpy/tools/_coda/_base_coda.py +10 -4
- pertpy/tools/_coda/_sccoda.py +42 -0
- pertpy/tools/_dialogue.py +3 -3
- pertpy/tools/_differential_gene_expression/__init__.py +45 -4
- pertpy/tools/_differential_gene_expression/_base.py +2 -1
- pertpy/tools/_differential_gene_expression/_edger.py +9 -12
- pertpy/tools/_differential_gene_expression/_pydeseq2.py +0 -2
- pertpy/tools/_distances/_distance_tests.py +2 -2
- pertpy/tools/_distances/_distances.py +33 -8
- pertpy/tools/_milo.py +80 -25
- pertpy/tools/_perturbation_space/_discriminator_classifiers.py +16 -25
- pertpy/tools/_perturbation_space/_perturbation_space.py +1 -1
- pertpy/tools/_perturbation_space/_simple.py +8 -0
- {pertpy-0.11.5.dist-info → pertpy-1.0.1.dist-info}/METADATA +10 -4
- {pertpy-0.11.5.dist-info → pertpy-1.0.1.dist-info}/RECORD +20 -20
- {pertpy-0.11.5.dist-info → pertpy-1.0.1.dist-info}/WHEEL +0 -0
- {pertpy-0.11.5.dist-info → pertpy-1.0.1.dist-info}/licenses/LICENSE +0 -0
pertpy/__init__.py
CHANGED
@@ -2,10 +2,11 @@
|
|
2
2
|
|
3
3
|
__author__ = "Lukas Heumos"
|
4
4
|
__email__ = "lukas.heumos@posteo.net"
|
5
|
-
__version__ = "0.
|
5
|
+
__version__ = "1.0.1"
|
6
6
|
|
7
7
|
import warnings
|
8
8
|
|
9
|
+
from anndata._core.aligned_df import ImplicitModificationWarning
|
9
10
|
from matplotlib import MatplotlibDeprecationWarning
|
10
11
|
from numba import NumbaDeprecationWarning
|
11
12
|
|
@@ -13,6 +14,8 @@ warnings.filterwarnings("ignore", category=NumbaDeprecationWarning)
|
|
13
14
|
warnings.filterwarnings("ignore", category=MatplotlibDeprecationWarning)
|
14
15
|
warnings.filterwarnings("ignore", category=SyntaxWarning)
|
15
16
|
warnings.filterwarnings("ignore", category=UserWarning, module="scvi._settings")
|
17
|
+
warnings.filterwarnings("ignore", message="Environment variable.*redefined by R")
|
18
|
+
warnings.filterwarnings("ignore", message="Transforming to str index.", category=ImplicitModificationWarning)
|
16
19
|
|
17
20
|
import mudata
|
18
21
|
|
pertpy/data/_dataloader.py
CHANGED
@@ -49,66 +49,69 @@ def _download( # pragma: no cover
|
|
49
49
|
Path(output_path).mkdir(parents=True, exist_ok=True)
|
50
50
|
lock_path = Path(output_path) / f"{output_file_name}.lock"
|
51
51
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
temp_file_name = Path(f"{download_to_path}.part")
|
58
|
-
|
59
|
-
retry_count = 0
|
60
|
-
while retry_count <= max_retries:
|
61
|
-
try:
|
62
|
-
head_response = requests.head(url, timeout=timeout)
|
63
|
-
head_response.raise_for_status()
|
64
|
-
content_length = int(head_response.headers.get("content-length", 0))
|
65
|
-
|
66
|
-
free_space = shutil.disk_usage(output_path).free
|
67
|
-
if content_length > free_space:
|
68
|
-
raise OSError(
|
69
|
-
f"Insufficient disk space. Need {content_length} bytes, but only {free_space} available."
|
70
|
-
)
|
71
|
-
|
72
|
-
response = requests.get(url, stream=True)
|
73
|
-
response.raise_for_status()
|
74
|
-
total = int(response.headers.get("content-length", 0))
|
75
|
-
|
76
|
-
with Progress(refresh_per_second=5) as progress:
|
77
|
-
task = progress.add_task("[red]Downloading...", total=total)
|
78
|
-
with Path(temp_file_name).open("wb") as file:
|
79
|
-
for data in response.iter_content(block_size):
|
80
|
-
file.write(data)
|
81
|
-
progress.update(task, advance=len(data))
|
82
|
-
progress.update(task, completed=total, refresh=True)
|
83
|
-
|
84
|
-
Path(temp_file_name).replace(download_to_path)
|
85
|
-
|
86
|
-
if is_zip:
|
87
|
-
with ZipFile(download_to_path, "r") as zip_obj:
|
88
|
-
zip_obj.extractall(path=output_path)
|
89
|
-
return Path(output_path)
|
90
|
-
|
52
|
+
try:
|
53
|
+
with FileLock(lock_path, timeout=300):
|
54
|
+
if Path(download_to_path).exists() and not overwrite:
|
55
|
+
logger.warning(f"File {download_to_path} already exists!")
|
91
56
|
return download_to_path
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
57
|
+
|
58
|
+
temp_file_name = Path(f"{download_to_path}.part")
|
59
|
+
|
60
|
+
retry_count = 0
|
61
|
+
while retry_count <= max_retries:
|
62
|
+
try:
|
63
|
+
head_response = requests.head(url, timeout=timeout)
|
64
|
+
head_response.raise_for_status()
|
65
|
+
content_length = int(head_response.headers.get("content-length", 0))
|
66
|
+
|
67
|
+
free_space = shutil.disk_usage(output_path).free
|
68
|
+
if content_length > free_space:
|
69
|
+
raise OSError(
|
70
|
+
f"Insufficient disk space. Need {content_length} bytes, but only {free_space} available."
|
71
|
+
)
|
72
|
+
|
73
|
+
response = requests.get(url, stream=True)
|
74
|
+
response.raise_for_status()
|
75
|
+
total = int(response.headers.get("content-length", 0))
|
76
|
+
|
77
|
+
with Progress(refresh_per_second=5) as progress:
|
78
|
+
task = progress.add_task("[red]Downloading...", total=total)
|
79
|
+
with Path(temp_file_name).open("wb") as file:
|
80
|
+
for data in response.iter_content(block_size):
|
81
|
+
file.write(data)
|
82
|
+
progress.update(task, advance=len(data))
|
83
|
+
progress.update(task, completed=total, refresh=True)
|
84
|
+
|
85
|
+
Path(temp_file_name).replace(download_to_path)
|
86
|
+
|
87
|
+
if is_zip:
|
88
|
+
with ZipFile(download_to_path, "r") as zip_obj:
|
89
|
+
zip_obj.extractall(path=output_path)
|
90
|
+
return Path(output_path)
|
91
|
+
|
92
|
+
return download_to_path
|
93
|
+
except (OSError, RequestException) as e:
|
94
|
+
retry_count += 1
|
95
|
+
if retry_count <= max_retries:
|
96
|
+
logger.warning(
|
97
|
+
f"Download attempt {retry_count}/{max_retries} failed: {str(e)}. Retrying in {retry_delay} seconds..."
|
98
|
+
)
|
99
|
+
time.sleep(retry_delay)
|
100
|
+
else:
|
101
|
+
logger.error(f"Download failed after {max_retries} attempts: {str(e)}")
|
102
|
+
if Path(temp_file_name).exists():
|
103
|
+
Path(temp_file_name).unlink(missing_ok=True)
|
104
|
+
raise
|
105
|
+
|
106
|
+
except Exception as e:
|
107
|
+
logger.error(f"Download failed: {str(e)}")
|
101
108
|
if Path(temp_file_name).exists():
|
102
109
|
Path(temp_file_name).unlink(missing_ok=True)
|
103
110
|
raise
|
111
|
+
finally:
|
112
|
+
if Path(temp_file_name).exists():
|
113
|
+
Path(temp_file_name).unlink(missing_ok=True)
|
114
|
+
finally:
|
115
|
+
lock_path.unlink(missing_ok=True)
|
104
116
|
|
105
|
-
|
106
|
-
logger.error(f"Download failed: {str(e)}")
|
107
|
-
if Path(temp_file_name).exists():
|
108
|
-
Path(temp_file_name).unlink(missing_ok=True)
|
109
|
-
raise
|
110
|
-
finally:
|
111
|
-
if Path(temp_file_name).exists():
|
112
|
-
Path(temp_file_name).unlink(missing_ok=True)
|
113
|
-
|
114
|
-
return Path(download_to_path)
|
117
|
+
return Path(download_to_path)
|
pertpy/tools/__init__.py
CHANGED
@@ -1,24 +1,5 @@
|
|
1
1
|
from importlib import import_module
|
2
2
|
|
3
|
-
|
4
|
-
def lazy_import(module_path: str, class_name: str, extras: list[str]):
|
5
|
-
try:
|
6
|
-
for extra in extras:
|
7
|
-
import_module(extra)
|
8
|
-
module = import_module(module_path)
|
9
|
-
return getattr(module, class_name)
|
10
|
-
except ImportError:
|
11
|
-
|
12
|
-
class Placeholder:
|
13
|
-
def __init__(self, *args, **kwargs):
|
14
|
-
raise ImportError(
|
15
|
-
f"Extra dependencies required: {', '.join(extras)}. "
|
16
|
-
f"Please install with: pip install {' '.join(extras)}"
|
17
|
-
)
|
18
|
-
|
19
|
-
return Placeholder
|
20
|
-
|
21
|
-
|
22
3
|
from pertpy.tools._augur import Augur
|
23
4
|
from pertpy.tools._cinemaot import Cinemaot
|
24
5
|
from pertpy.tools._coda._sccoda import Sccoda
|
@@ -42,15 +23,25 @@ from pertpy.tools._perturbation_space._simple import (
|
|
42
23
|
)
|
43
24
|
from pertpy.tools._scgen import Scgen
|
44
25
|
|
45
|
-
CODA_EXTRAS = ["toytree", "ete4"] # also "pyqt6" but it cannot be imported
|
46
|
-
Tasccoda = lazy_import("pertpy.tools._coda._tasccoda", "Tasccoda", CODA_EXTRAS)
|
47
26
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
27
|
+
def __getattr__(name: str):
|
28
|
+
if name == "Tasccoda":
|
29
|
+
try:
|
30
|
+
for extra in ["toytree", "ete4"]:
|
31
|
+
import_module(extra)
|
32
|
+
module = import_module("pertpy.tools._coda._tasccoda")
|
33
|
+
return module.Tasccoda
|
34
|
+
except ImportError:
|
35
|
+
raise ImportError(
|
36
|
+
"Extra dependencies required: toytree, ete4. Please install with: pip install toytree ete4"
|
37
|
+
) from None
|
38
|
+
|
39
|
+
elif name in ["EdgeR", "PyDESeq2", "Statsmodels", "TTest", "WilcoxonTest"]:
|
40
|
+
module = import_module("pertpy.tools._differential_gene_expression")
|
41
|
+
return getattr(module, name)
|
42
|
+
|
43
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
44
|
+
|
54
45
|
|
55
46
|
__all__ = [
|
56
47
|
"Augur",
|
pertpy/tools/_coda/_base_coda.py
CHANGED
@@ -1181,7 +1181,7 @@ class CompositionalModel2(ABC):
|
|
1181
1181
|
r,
|
1182
1182
|
bars,
|
1183
1183
|
bottom=cum_bars,
|
1184
|
-
color=palette(n % palette.N),
|
1184
|
+
color=palette(n % palette.N), # type: ignore
|
1185
1185
|
width=barwidth,
|
1186
1186
|
label=type_names[n],
|
1187
1187
|
linewidth=0,
|
@@ -1377,6 +1377,7 @@ class CompositionalModel2(ABC):
|
|
1377
1377
|
plot_df.columns = covariate_names
|
1378
1378
|
plot_df = pd.melt(plot_df, ignore_index=False, var_name="Covariate")
|
1379
1379
|
|
1380
|
+
plot_df.index.name = "Cell Type"
|
1380
1381
|
plot_df = plot_df.reset_index()
|
1381
1382
|
|
1382
1383
|
if len(covariate_names_zero) != 0 and plot_facets and plot_zero_covariate and not plot_zero_cell_type:
|
@@ -1472,6 +1473,7 @@ class CompositionalModel2(ABC):
|
|
1472
1473
|
if return_fig and not plot_facets:
|
1473
1474
|
return plt.gcf()
|
1474
1475
|
plt.show()
|
1476
|
+
|
1475
1477
|
return None
|
1476
1478
|
|
1477
1479
|
@_doc_params(common_plot_args=doc_common_plot_args)
|
@@ -1823,6 +1825,7 @@ class CompositionalModel2(ABC):
|
|
1823
1825
|
if return_fig:
|
1824
1826
|
return plt.gcf()
|
1825
1827
|
plt.show()
|
1828
|
+
|
1826
1829
|
return None
|
1827
1830
|
|
1828
1831
|
@_doc_params(common_plot_args=doc_common_plot_args)
|
@@ -1881,7 +1884,7 @@ class CompositionalModel2(ABC):
|
|
1881
1884
|
from ete4.treeview import CircleFace, NodeStyle, TextFace, TreeStyle, faces
|
1882
1885
|
except ImportError:
|
1883
1886
|
raise ImportError(
|
1884
|
-
"To use tasccoda please install additional dependencies
|
1887
|
+
"To use tasccoda please install additional dependencies: `pip install pertpy[coda]`"
|
1885
1888
|
) from None
|
1886
1889
|
|
1887
1890
|
if isinstance(data, MuData):
|
@@ -1902,8 +1905,8 @@ class CompositionalModel2(ABC):
|
|
1902
1905
|
tree.render(save, tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi) # type: ignore
|
1903
1906
|
if return_fig:
|
1904
1907
|
return tree, tree_style
|
1908
|
+
|
1905
1909
|
return tree.render("%%inline", tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi) # type: ignore
|
1906
|
-
return None
|
1907
1910
|
|
1908
1911
|
@_doc_params(common_plot_args=doc_common_plot_args)
|
1909
1912
|
def plot_draw_effects( # pragma: no cover # noqa: D417
|
@@ -1969,7 +1972,7 @@ class CompositionalModel2(ABC):
|
|
1969
1972
|
from ete4.treeview import CircleFace, NodeStyle, TextFace, TreeStyle, faces
|
1970
1973
|
except ImportError:
|
1971
1974
|
raise ImportError(
|
1972
|
-
"To use tasccoda please install additional dependencies
|
1975
|
+
"To use tasccoda please install additional dependencies: `pip install pertpy[coda]`"
|
1973
1976
|
) from None
|
1974
1977
|
|
1975
1978
|
if isinstance(data, MuData):
|
@@ -2207,6 +2210,7 @@ class CompositionalModel2(ABC):
|
|
2207
2210
|
if return_fig:
|
2208
2211
|
return fig
|
2209
2212
|
plt.show()
|
2213
|
+
|
2210
2214
|
return None
|
2211
2215
|
|
2212
2216
|
|
@@ -2325,6 +2329,7 @@ def df2newick(df: pd.DataFrame, levels: list[str], inner_label: bool = True) ->
|
|
2325
2329
|
strs = [traverse(df_tax, a, 0, inner_label) for a in alevel]
|
2326
2330
|
|
2327
2331
|
newick = f"({','.join(strs)});"
|
2332
|
+
|
2328
2333
|
return newick
|
2329
2334
|
|
2330
2335
|
|
@@ -2562,6 +2567,7 @@ def from_scanpy(
|
|
2562
2567
|
covariate_obs = list(set(covariate_obs or []) | set(sample_identifier))
|
2563
2568
|
|
2564
2569
|
if isinstance(sample_identifier, list):
|
2570
|
+
adata.obs = adata.obs.copy()
|
2565
2571
|
adata.obs["scCODA_sample_id"] = adata.obs[sample_identifier].agg("-".join, axis=1)
|
2566
2572
|
sample_identifier = "scCODA_sample_id"
|
2567
2573
|
|
pertpy/tools/_coda/_sccoda.py
CHANGED
@@ -409,6 +409,48 @@ class Sccoda(CompositionalModel2):
|
|
409
409
|
import arviz as az
|
410
410
|
|
411
411
|
# Create arviz object
|
412
|
+
if use_posterior_predictive:
|
413
|
+
posterior_predictive = Predictive(self.model, self.mcmc.get_samples())(
|
414
|
+
rng_key,
|
415
|
+
counts=None,
|
416
|
+
covariates=numpyro_covariates,
|
417
|
+
n_total=numpyro_n_total,
|
418
|
+
ref_index=ref_index,
|
419
|
+
sample_adata=sample_adata,
|
420
|
+
)
|
421
|
+
# Remove problematic posterior predictive arrays with wrong dimensions
|
422
|
+
if posterior_predictive and "counts" in posterior_predictive:
|
423
|
+
counts_shape = posterior_predictive["counts"].shape
|
424
|
+
expected_dims = 2 # ['sample', 'cell_type']
|
425
|
+
if len(counts_shape) != expected_dims:
|
426
|
+
posterior_predictive = {k: v for k, v in posterior_predictive.items() if k != "counts"}
|
427
|
+
logger.warning(
|
428
|
+
f"Removed 'counts' from posterior_predictive due to dimension mismatch: got {len(counts_shape)}D, expected {expected_dims}D"
|
429
|
+
)
|
430
|
+
else:
|
431
|
+
posterior_predictive = None
|
432
|
+
|
433
|
+
if num_prior_samples > 0:
|
434
|
+
prior = Predictive(self.model, num_samples=num_prior_samples)(
|
435
|
+
rng_key,
|
436
|
+
counts=None,
|
437
|
+
covariates=numpyro_covariates,
|
438
|
+
n_total=numpyro_n_total,
|
439
|
+
ref_index=ref_index,
|
440
|
+
sample_adata=sample_adata,
|
441
|
+
)
|
442
|
+
# Remove problematic prior arrays with wrong dimensions
|
443
|
+
if prior and "counts" in prior:
|
444
|
+
counts_shape = prior["counts"].shape
|
445
|
+
expected_dims = 2 # ['sample', 'cell_type']
|
446
|
+
if len(counts_shape) != expected_dims:
|
447
|
+
prior = {k: v for k, v in prior.items() if k != "counts"}
|
448
|
+
logger.warning(
|
449
|
+
f"Removed 'counts' from prior due to dimension mismatch: got {len(counts_shape)}D, expected {expected_dims}D"
|
450
|
+
)
|
451
|
+
else:
|
452
|
+
prior = None
|
453
|
+
|
412
454
|
arviz_data = az.from_numpyro(
|
413
455
|
self.mcmc, prior=prior, posterior_predictive=posterior_predictive, dims=dims, coords=coords
|
414
456
|
)
|
pertpy/tools/_dialogue.py
CHANGED
@@ -882,9 +882,9 @@ class Dialogue:
|
|
882
882
|
if len(conditions_compare) != 2:
|
883
883
|
raise ValueError("Please specify conditions to compare or supply an object with only 2 conditions")
|
884
884
|
|
885
|
-
pvals = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
|
886
|
-
tstats = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
|
887
|
-
pvals_adj = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
|
885
|
+
pvals = pd.DataFrame(1.0, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
|
886
|
+
tstats = pd.DataFrame(1.0, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
|
887
|
+
pvals_adj = pd.DataFrame(1.0, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
|
888
888
|
|
889
889
|
response = adata.obs.groupby(sample_label)[condition_label].agg(pd.Series.mode)
|
890
890
|
for celltype in adata.obs[celltype_label].unique():
|
@@ -1,9 +1,52 @@
|
|
1
|
+
import contextlib
|
2
|
+
from importlib import import_module
|
3
|
+
from importlib.util import find_spec
|
4
|
+
|
1
5
|
from ._base import LinearModelBase, MethodBase
|
2
6
|
from ._dge_comparison import DGEEVAL
|
3
7
|
from ._edger import EdgeR
|
4
|
-
from ._pydeseq2 import PyDESeq2
|
5
8
|
from ._simple_tests import SimpleComparisonBase, TTest, WilcoxonTest
|
6
|
-
|
9
|
+
|
10
|
+
|
11
|
+
def __getattr__(name: str):
|
12
|
+
deps = {
|
13
|
+
"PyDESeq2": ["pydeseq2", "formulaic_contrasts", "formulaic"],
|
14
|
+
"EdgeR": ["rpy2", "formulaic_contrasts", "formulaic"],
|
15
|
+
"Statsmodels": ["formulaic_contrasts", "formulaic"],
|
16
|
+
}
|
17
|
+
|
18
|
+
if name in deps:
|
19
|
+
for dep in deps[name]:
|
20
|
+
if find_spec(dep) is None:
|
21
|
+
raise ImportError(f"{dep} is required but not installed")
|
22
|
+
|
23
|
+
module_map = {
|
24
|
+
"PyDESeq2": "pertpy.tools._differential_gene_expression._pydeseq2",
|
25
|
+
"EdgeR": "pertpy.tools._differential_gene_expression._edger",
|
26
|
+
"Statsmodels": "pertpy.tools._differential_gene_expression._statsmodels",
|
27
|
+
}
|
28
|
+
|
29
|
+
module = import_module(module_map[name])
|
30
|
+
return getattr(module, name)
|
31
|
+
|
32
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
33
|
+
|
34
|
+
|
35
|
+
def _get_available_methods():
|
36
|
+
methods = [WilcoxonTest, TTest]
|
37
|
+
from importlib.util import find_spec
|
38
|
+
|
39
|
+
for name in ["Statsmodels", "PyDESeq2", "EdgeR"]:
|
40
|
+
with contextlib.suppress(ImportError):
|
41
|
+
methods.append(__getattr__(name))
|
42
|
+
|
43
|
+
return methods
|
44
|
+
|
45
|
+
|
46
|
+
AVAILABLE_METHODS = _get_available_methods()
|
47
|
+
|
48
|
+
|
49
|
+
AVAILABLE_METHODS = _get_available_methods()
|
7
50
|
|
8
51
|
__all__ = [
|
9
52
|
"MethodBase",
|
@@ -15,5 +58,3 @@ __all__ = [
|
|
15
58
|
"WilcoxonTest",
|
16
59
|
"TTest",
|
17
60
|
]
|
18
|
-
|
19
|
-
AVAILABLE_METHODS = [Statsmodels, EdgeR, PyDESeq2, WilcoxonTest, TTest]
|
@@ -12,7 +12,6 @@ import matplotlib.pyplot as plt
|
|
12
12
|
import numpy as np
|
13
13
|
import pandas as pd
|
14
14
|
import seaborn as sns
|
15
|
-
from formulaic_contrasts import FormulaicContrasts
|
16
15
|
from lamin_utils import logger
|
17
16
|
from matplotlib.pyplot import Figure
|
18
17
|
from matplotlib.ticker import MaxNLocator
|
@@ -881,6 +880,8 @@ class LinearModelBase(MethodBase):
|
|
881
880
|
super().__init__(adata, mask=mask, layer=layer)
|
882
881
|
self._check_counts()
|
883
882
|
|
883
|
+
from formulaic_contrasts import FormulaicContrasts
|
884
|
+
|
884
885
|
self.formulaic_contrasts = None
|
885
886
|
if isinstance(design, str):
|
886
887
|
self.formulaic_contrasts = FormulaicContrasts(adata.obs, design)
|
@@ -23,9 +23,6 @@ class EdgeR(LinearModelBase):
|
|
23
23
|
Args:
|
24
24
|
**kwargs: Keyword arguments specific to glmQLFit()
|
25
25
|
"""
|
26
|
-
# For running in notebook
|
27
|
-
# pandas2ri.activate()
|
28
|
-
# rpy2.robjects.numpy2ri.activate()
|
29
26
|
try:
|
30
27
|
from rpy2 import robjects as ro
|
31
28
|
from rpy2.robjects import numpy2ri, pandas2ri
|
@@ -47,17 +44,17 @@ class EdgeR(LinearModelBase):
|
|
47
44
|
expr = self.adata.X if self.layer is None else self.adata.layers[self.layer]
|
48
45
|
expr = expr.T.toarray() if issparse(expr) else expr.T
|
49
46
|
|
50
|
-
with localconverter(get_conversion() + pandas2ri.converter):
|
51
|
-
expr_r =
|
52
|
-
samples_r =
|
47
|
+
with localconverter(get_conversion() + pandas2ri.converter) as cv:
|
48
|
+
expr_r = cv.py2rpy(pd.DataFrame(expr, index=self.adata.var_names, columns=self.adata.obs_names))
|
49
|
+
samples_r = cv.py2rpy(self.adata.obs)
|
53
50
|
|
54
51
|
dge = edger.DGEList(counts=expr_r, samples=samples_r)
|
55
52
|
|
56
53
|
logger.info("Calculating NormFactors")
|
57
54
|
dge = edger.calcNormFactors(dge)
|
58
55
|
|
59
|
-
with localconverter(get_conversion() + numpy2ri.converter):
|
60
|
-
design_r =
|
56
|
+
with localconverter(get_conversion() + numpy2ri.converter) as cv:
|
57
|
+
design_r = cv.py2rpy(self.design.values)
|
61
58
|
|
62
59
|
logger.info("Estimating Dispersions")
|
63
60
|
dge = edger.estimateDisp(dge, design=design_r)
|
@@ -100,8 +97,8 @@ class EdgeR(LinearModelBase):
|
|
100
97
|
) from None
|
101
98
|
|
102
99
|
# Convert vector to R, which drops a category like `self.design_matrix` to use the intercept for the left out.
|
103
|
-
with localconverter(get_conversion() + numpy2ri.converter):
|
104
|
-
contrast_vec_r =
|
100
|
+
with localconverter(get_conversion() + numpy2ri.converter) as cv:
|
101
|
+
contrast_vec_r = cv.py2rpy(np.asarray(contrast))
|
105
102
|
ro.globalenv["contrast_vec"] = contrast_vec_r
|
106
103
|
|
107
104
|
# Test contrast with R
|
@@ -121,8 +118,8 @@ class EdgeR(LinearModelBase):
|
|
121
118
|
return de_res.reset_index().rename(columns={"PValue": "p_value", "logFC": "log_fc", "FDR": "adj_p_value"})
|
122
119
|
|
123
120
|
# Convert to Pandas DataFrame if still an R object
|
124
|
-
with localconverter(get_conversion() + pandas2ri.converter):
|
125
|
-
de_res =
|
121
|
+
with localconverter(get_conversion() + pandas2ri.converter) as cv:
|
122
|
+
de_res = cv.rpy2py(de_res)
|
126
123
|
|
127
124
|
de_res.index.name = "variable"
|
128
125
|
de_res = de_res.reset_index()
|
@@ -8,7 +8,7 @@ from rich.progress import track
|
|
8
8
|
from sklearn.metrics import pairwise_distances
|
9
9
|
from statsmodels.stats.multitest import multipletests
|
10
10
|
|
11
|
-
from ._distances import Distance
|
11
|
+
from ._distances import Distance, Metric
|
12
12
|
|
13
13
|
if TYPE_CHECKING:
|
14
14
|
from anndata import AnnData
|
@@ -43,7 +43,7 @@ class DistanceTest:
|
|
43
43
|
|
44
44
|
def __init__(
|
45
45
|
self,
|
46
|
-
metric:
|
46
|
+
metric: Metric,
|
47
47
|
n_perms: int = 1000,
|
48
48
|
layer_key: str = None,
|
49
49
|
obsm_key: str = None,
|
@@ -34,6 +34,31 @@ class MeanVar(NamedTuple):
|
|
34
34
|
variance: float
|
35
35
|
|
36
36
|
|
37
|
+
Metric = Literal[
|
38
|
+
"edistance",
|
39
|
+
"euclidean",
|
40
|
+
"root_mean_squared_error",
|
41
|
+
"mse",
|
42
|
+
"mean_absolute_error",
|
43
|
+
"pearson_distance",
|
44
|
+
"spearman_distance",
|
45
|
+
"kendalltau_distance",
|
46
|
+
"cosine_distance",
|
47
|
+
"r2_distance",
|
48
|
+
"mean_pairwise",
|
49
|
+
"mmd",
|
50
|
+
"wasserstein",
|
51
|
+
"sym_kldiv",
|
52
|
+
"t_test",
|
53
|
+
"ks_test",
|
54
|
+
"nb_ll",
|
55
|
+
"classifier_proba",
|
56
|
+
"classifier_cp",
|
57
|
+
"mean_var_distribution",
|
58
|
+
"mahalanobis",
|
59
|
+
]
|
60
|
+
|
61
|
+
|
37
62
|
class Distance:
|
38
63
|
"""Distance class, used to compute distances between groups of cells.
|
39
64
|
|
@@ -112,7 +137,7 @@ class Distance:
|
|
112
137
|
|
113
138
|
def __init__(
|
114
139
|
self,
|
115
|
-
metric:
|
140
|
+
metric: Metric = "edistance",
|
116
141
|
agg_fct: Callable = np.mean,
|
117
142
|
layer_key: str = None,
|
118
143
|
obsm_key: str = None,
|
@@ -660,19 +685,19 @@ class MMD(AbstractDistance):
|
|
660
685
|
super().__init__()
|
661
686
|
self.accepts_precomputed = False
|
662
687
|
|
663
|
-
def __call__(self, X: np.ndarray, Y: np.ndarray, kernel="linear", **kwargs) -> float:
|
688
|
+
def __call__(self, X: np.ndarray, Y: np.ndarray, *, kernel="linear", gamma=1.0, degree=2, **kwargs) -> float:
|
664
689
|
if kernel == "linear":
|
665
690
|
XX = np.dot(X, X.T)
|
666
691
|
YY = np.dot(Y, Y.T)
|
667
692
|
XY = np.dot(X, Y.T)
|
668
693
|
elif kernel == "rbf":
|
669
|
-
XX = rbf_kernel(X, X, gamma=
|
670
|
-
YY = rbf_kernel(Y, Y, gamma=
|
671
|
-
XY = rbf_kernel(X, Y, gamma=
|
694
|
+
XX = rbf_kernel(X, X, gamma=gamma)
|
695
|
+
YY = rbf_kernel(Y, Y, gamma=gamma)
|
696
|
+
XY = rbf_kernel(X, Y, gamma=gamma)
|
672
697
|
elif kernel == "poly":
|
673
|
-
XX = polynomial_kernel(X, X, degree=
|
674
|
-
YY = polynomial_kernel(Y, Y, degree=
|
675
|
-
XY = polynomial_kernel(X, Y, degree=
|
698
|
+
XX = polynomial_kernel(X, X, degree=degree, gamma=gamma, coef0=0)
|
699
|
+
YY = polynomial_kernel(Y, Y, degree=degree, gamma=gamma, coef0=0)
|
700
|
+
XY = polynomial_kernel(X, Y, degree=degree, gamma=gamma, coef0=0)
|
676
701
|
else:
|
677
702
|
raise ValueError(f"Kernel {kernel} not recognized.")
|
678
703
|
|
pertpy/tools/_milo.py
CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import random
|
4
4
|
import re
|
5
|
+
from importlib.util import find_spec
|
5
6
|
from typing import TYPE_CHECKING, Literal
|
6
7
|
|
7
8
|
import matplotlib.pyplot as plt
|
@@ -29,18 +30,6 @@ from sklearn.metrics.pairwise import euclidean_distances
|
|
29
30
|
class Milo:
|
30
31
|
"""Python implementation of Milo."""
|
31
32
|
|
32
|
-
def __init__(self):
|
33
|
-
try:
|
34
|
-
from rpy2.robjects import conversion, numpy2ri, pandas2ri
|
35
|
-
from rpy2.robjects.packages import STAP, PackageNotInstalledError, importr
|
36
|
-
except ModuleNotFoundError:
|
37
|
-
raise ImportError("milo requires rpy2 to be installed.") from None
|
38
|
-
|
39
|
-
try:
|
40
|
-
importr("edgeR")
|
41
|
-
except ImportError as e:
|
42
|
-
raise ImportError("milo requires a valid R installation with edger installed:\n") from e
|
43
|
-
|
44
33
|
def load(
|
45
34
|
self,
|
46
35
|
input: AnnData,
|
@@ -266,7 +255,7 @@ class Milo:
|
|
266
255
|
subset_samples: list[str] | None = None,
|
267
256
|
add_intercept: bool = True,
|
268
257
|
feature_key: str | None = "rna",
|
269
|
-
solver: Literal["edger", "
|
258
|
+
solver: Literal["edger", "pydeseq2"] = "edger",
|
270
259
|
):
|
271
260
|
"""Performs differential abundance testing on neighbourhoods using QLF test implementation as implemented in edgeR.
|
272
261
|
|
@@ -279,7 +268,9 @@ class Milo:
|
|
279
268
|
subset_samples: subset of samples (obs in `milo_mdata['milo']`) to use for the test.
|
280
269
|
add_intercept: whether to include an intercept in the model. If False, this is equivalent to adding + 0 in the design formula. When model_contrasts is specified, this is set to False by default.
|
281
270
|
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
282
|
-
solver: The solver to fit the model to.
|
271
|
+
solver: The solver to fit the model to.
|
272
|
+
The "edger" solver requires R, rpy2 and edgeR to be installed and is the closest to the R implementation.
|
273
|
+
The "pydeseq2" requires pydeseq2 to be installed. It is still very comparable to the "edger" solver but might be a bit slower.
|
283
274
|
|
284
275
|
Returns:
|
285
276
|
None, modifies `milo_mdata['milo']` in place, adding the results of the DA test to `.var`:
|
@@ -298,7 +289,6 @@ class Milo:
|
|
298
289
|
>>> milo.make_nhoods(mdata["rna"])
|
299
290
|
>>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
|
300
291
|
>>> milo.da_nhoods(mdata, design="~label")
|
301
|
-
|
302
292
|
"""
|
303
293
|
try:
|
304
294
|
sample_adata = mdata["milo"]
|
@@ -421,17 +411,71 @@ class Milo:
|
|
421
411
|
res = base.as_data_frame(
|
422
412
|
edgeR.topTags(edgeR.glmQLFTest(fit, coef=n_coef), sort_by="none", n=np.inf)
|
423
413
|
)
|
414
|
+
if res is None:
|
415
|
+
raise ValueError("Unable to generate results with edgeR. Is your installation correct?")
|
424
416
|
if not isinstance(res, pd.DataFrame):
|
425
417
|
res = pd.DataFrame(res)
|
426
418
|
# The columns of res looks like e.g. table.A, table.B, so remove the prefix
|
427
419
|
res.columns = [col.replace("table.", "") for col in res.columns]
|
428
|
-
|
420
|
+
elif solver == "pydeseq2":
|
421
|
+
if find_spec("pydeseq2") is None:
|
422
|
+
raise ImportError("pydeseq2 is required but not installed. Install with: pip install pydeseq2")
|
423
|
+
|
424
|
+
from pydeseq2.dds import DeseqDataSet
|
425
|
+
from pydeseq2.ds import DeseqStats
|
426
|
+
|
427
|
+
counts_filtered = count_mat[np.ix_(keep_nhoods, keep_smp)]
|
428
|
+
design_df_filtered = design_df.iloc[keep_smp].copy()
|
429
|
+
|
430
|
+
design_df_filtered = design_df_filtered.astype(
|
431
|
+
dict.fromkeys(design_df_filtered.select_dtypes(exclude=["number"]).columns, "category")
|
432
|
+
)
|
433
|
+
|
434
|
+
design_clean = design if design.startswith("~") else f"~{design}"
|
435
|
+
|
436
|
+
dds = DeseqDataSet(
|
437
|
+
counts=pd.DataFrame(counts_filtered.T, index=design_df_filtered.index),
|
438
|
+
metadata=design_df_filtered,
|
439
|
+
design=design_clean,
|
440
|
+
refit_cooks=True,
|
441
|
+
)
|
442
|
+
|
443
|
+
dds.deseq2()
|
444
|
+
|
445
|
+
if model_contrasts is not None and "-" in model_contrasts:
|
446
|
+
if "(" in model_contrasts or "+" in model_contrasts.split("-")[1]:
|
447
|
+
raise ValueError(
|
448
|
+
f"Complex contrasts like '{model_contrasts}' are not supported by pydeseq2. "
|
449
|
+
"Use simple pairwise contrasts (e.g., 'GroupA-GroupB') or switch to solver='edger'."
|
450
|
+
)
|
451
|
+
|
452
|
+
parts = model_contrasts.split("-")
|
453
|
+
factor_name = design_clean.replace("~", "").split("+")[-1].strip()
|
454
|
+
group1 = parts[0].replace(factor_name, "").strip()
|
455
|
+
group2 = parts[1].replace(factor_name, "").strip()
|
456
|
+
stat_res = DeseqStats(dds, contrast=[factor_name, group1, group2])
|
457
|
+
else:
|
458
|
+
factor_name = design_clean.replace("~", "").split("+")[-1].strip()
|
459
|
+
if not isinstance(design_df_filtered[factor_name], pd.CategoricalDtype):
|
460
|
+
design_df_filtered[factor_name] = design_df_filtered[factor_name].astype("category")
|
461
|
+
categories = design_df_filtered[factor_name].cat.categories
|
462
|
+
stat_res = DeseqStats(dds, contrast=[factor_name, categories[-1], categories[0]])
|
463
|
+
|
464
|
+
stat_res.summary()
|
465
|
+
res = stat_res.results_df
|
466
|
+
|
467
|
+
res = res.rename(
|
468
|
+
columns={"baseMean": "logCPM", "log2FoldChange": "logFC", "pvalue": "PValue", "padj": "FDR"}
|
469
|
+
)
|
470
|
+
|
471
|
+
res = res[["logCPM", "logFC", "PValue", "FDR"]]
|
472
|
+
|
429
473
|
res.index = sample_adata.var_names[keep_nhoods] # type: ignore
|
430
474
|
if any(col in sample_adata.var.columns for col in res.columns):
|
431
475
|
sample_adata.var = sample_adata.var.drop(res.columns, axis=1)
|
432
476
|
sample_adata.var = pd.concat([sample_adata.var, res], axis=1)
|
433
|
-
|
434
|
-
self._graph_spatial_fdr(sample_adata
|
477
|
+
|
478
|
+
self._graph_spatial_fdr(sample_adata)
|
435
479
|
|
436
480
|
def annotate_nhoods(
|
437
481
|
self,
|
@@ -488,7 +532,7 @@ class Milo:
|
|
488
532
|
|
489
533
|
anno_frac_dataframe = pd.DataFrame(anno_frac, columns=anno_dummies.columns, index=sample_adata.var_names)
|
490
534
|
sample_adata.varm["frac_annotation"] = anno_frac_dataframe.values
|
491
|
-
sample_adata.uns["annotation_labels"] = anno_frac_dataframe.columns
|
535
|
+
sample_adata.uns["annotation_labels"] = anno_frac_dataframe.columns.to_list()
|
492
536
|
sample_adata.uns["annotation_obs"] = anno_col
|
493
537
|
sample_adata.var["nhood_annotation"] = anno_frac_dataframe.idxmax(1)
|
494
538
|
sample_adata.var["nhood_annotation_frac"] = anno_frac_dataframe.max(1)
|
@@ -674,6 +718,17 @@ class Milo:
|
|
674
718
|
self,
|
675
719
|
):
|
676
720
|
"""Set up rpy2 to run edgeR."""
|
721
|
+
try:
|
722
|
+
from rpy2.robjects import conversion, numpy2ri, pandas2ri
|
723
|
+
from rpy2.robjects.packages import STAP, PackageNotInstalledError, importr
|
724
|
+
except ModuleNotFoundError:
|
725
|
+
raise ImportError("milo requires rpy2 to be installed.") from None
|
726
|
+
|
727
|
+
try:
|
728
|
+
importr("edgeR")
|
729
|
+
except ImportError as e:
|
730
|
+
raise ImportError("milo requires a valid R installation with edger installed.") from e
|
731
|
+
|
677
732
|
from rpy2.robjects.packages import importr
|
678
733
|
|
679
734
|
edgeR = self._try_import_bioc_library("edgeR")
|
@@ -685,26 +740,27 @@ class Milo:
|
|
685
740
|
|
686
741
|
def _try_import_bioc_library(
|
687
742
|
self,
|
688
|
-
|
743
|
+
r_package: str,
|
689
744
|
):
|
690
745
|
"""Import R packages.
|
691
746
|
|
692
747
|
Args:
|
693
|
-
|
748
|
+
r_package: R packages name
|
694
749
|
"""
|
695
750
|
from rpy2.robjects.packages import PackageNotInstalledError, importr
|
696
751
|
|
697
752
|
try:
|
698
|
-
_r_lib = importr(
|
753
|
+
_r_lib = importr(r_package)
|
699
754
|
return _r_lib
|
700
755
|
except PackageNotInstalledError:
|
701
|
-
logger.error(
|
756
|
+
logger.error(
|
757
|
+
f"Install Bioconductor library `{r_package!r}` first as `BiocManager::install({r_package!r}).`"
|
758
|
+
)
|
702
759
|
raise
|
703
760
|
|
704
761
|
def _graph_spatial_fdr(
|
705
762
|
self,
|
706
763
|
sample_adata: AnnData,
|
707
|
-
neighbors_key: str | None = None,
|
708
764
|
):
|
709
765
|
"""FDR correction weighted on inverse of connectivity of neighbourhoods.
|
710
766
|
|
@@ -712,7 +768,6 @@ class Milo:
|
|
712
768
|
|
713
769
|
Args:
|
714
770
|
sample_adata: Sample-level AnnData.
|
715
|
-
neighbors_key: The key in `adata.obsp` to use as KNN graph.
|
716
771
|
"""
|
717
772
|
# use 1/connectivity as the weighting for the weighted BH adjustment from Cydar
|
718
773
|
w = 1 / sample_adata.var["kth_distance"]
|
@@ -1,13 +1,11 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import warnings
|
4
|
-
|
5
3
|
import anndata
|
6
4
|
import numpy as np
|
7
|
-
import pandas as pd
|
8
5
|
import scipy
|
9
6
|
import torch
|
10
7
|
from anndata import AnnData
|
8
|
+
from fast_array_utils.conv import to_dense
|
11
9
|
from pytorch_lightning import LightningModule, Trainer
|
12
10
|
from pytorch_lightning.callbacks import EarlyStopping
|
13
11
|
from sklearn.linear_model import LogisticRegression
|
@@ -112,18 +110,6 @@ class LRClassifierSpace(PerturbationSpace):
|
|
112
110
|
return pert_adata
|
113
111
|
|
114
112
|
|
115
|
-
# Ensure backward compatibility with DiscriminatorClassifierSpace
|
116
|
-
def DiscriminatorClassifierSpace():
|
117
|
-
warnings.warn(
|
118
|
-
"The DiscriminatorClassifierSpace class is deprecated and will be removed in the future."
|
119
|
-
"Please use the MLPClassifierSpace or the LRClassifierSpace class instead.",
|
120
|
-
DeprecationWarning,
|
121
|
-
stacklevel=2,
|
122
|
-
)
|
123
|
-
|
124
|
-
return MLPClassifierSpace()
|
125
|
-
|
126
|
-
|
127
113
|
class MLPClassifierSpace(PerturbationSpace):
|
128
114
|
"""Fits an ANN classifier to the data and takes the feature space (weights in the last layer) as embedding.
|
129
115
|
|
@@ -202,7 +188,7 @@ class MLPClassifierSpace(PerturbationSpace):
|
|
202
188
|
labels = adata.obs[target_col].values.reshape(-1, 1)
|
203
189
|
encoder = OneHotEncoder()
|
204
190
|
encoded_labels = encoder.fit_transform(labels).toarray()
|
205
|
-
adata.
|
191
|
+
adata.obsm["encoded_perturbations"] = encoded_labels.astype(np.float32)
|
206
192
|
|
207
193
|
# Split the data in train, test and validation
|
208
194
|
X = list(range(adata.n_obs))
|
@@ -226,7 +212,7 @@ class MLPClassifierSpace(PerturbationSpace):
|
|
226
212
|
# Fix class unbalance (likely to happen in perturbation datasets)
|
227
213
|
# Usually control cells are overrepresented such that predicting control all time would give good results
|
228
214
|
# Cells with rare perturbations are sampled more
|
229
|
-
train_weights = 1 / (1 + torch.sum(torch.tensor(train_dataset.labels
|
215
|
+
train_weights = 1 / (1 + torch.sum(torch.tensor(train_dataset.labels), dim=1))
|
230
216
|
train_sampler = WeightedRandomSampler(train_weights, len(train_weights))
|
231
217
|
|
232
218
|
self.train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=4)
|
@@ -278,11 +264,10 @@ class MLPClassifierSpace(PerturbationSpace):
|
|
278
264
|
pert_adata.obs = pert_adata.obs.reset_index(drop=True)
|
279
265
|
if "perturbations" in self.adata_obs.columns:
|
280
266
|
self.adata_obs = self.adata_obs.drop("perturbations", axis=1)
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
pert_adata.obs = pert_adata.obs.drop("encoded_perturbations", axis=1)
|
267
|
+
obs_subset = self.adata_obs.iloc[: len(pert_adata.obs)].copy()
|
268
|
+
for col in obs_subset.columns:
|
269
|
+
if col not in ["perturbations", "encoded_perturbations"]:
|
270
|
+
pert_adata.obs[col] = obs_subset[col].values
|
286
271
|
|
287
272
|
return pert_adata
|
288
273
|
|
@@ -397,7 +382,13 @@ class PLDataset(Dataset):
|
|
397
382
|
else:
|
398
383
|
self.data = adata.X
|
399
384
|
|
400
|
-
|
385
|
+
if target_col in adata.obs.columns:
|
386
|
+
self.labels = adata.obs[target_col]
|
387
|
+
elif target_col in adata.obsm:
|
388
|
+
self.labels = adata.obsm[target_col]
|
389
|
+
else:
|
390
|
+
raise ValueError(f"Target column {target_col} not found in obs or obsm")
|
391
|
+
|
401
392
|
self.pert_labels = adata.obs[label_col]
|
402
393
|
|
403
394
|
def __len__(self):
|
@@ -405,8 +396,8 @@ class PLDataset(Dataset):
|
|
405
396
|
|
406
397
|
def __getitem__(self, idx):
|
407
398
|
"""Returns a sample and corresponding perturbations applied (labels)."""
|
408
|
-
sample = self.data[idx]
|
409
|
-
num_label = self.labels.iloc[idx]
|
399
|
+
sample = to_dense(self.data[idx]).squeeze() if scipy.sparse.issparse(self.data) else self.data[idx]
|
400
|
+
num_label = self.labels.iloc[idx] if hasattr(self.labels, "iloc") else self.labels[idx]
|
410
401
|
str_label = self.pert_labels.iloc[idx]
|
411
402
|
|
412
403
|
return sample, num_label, str_label
|
@@ -161,12 +161,20 @@ class PseudobulkSpace(PerturbationSpace):
|
|
161
161
|
adata = adata_emb
|
162
162
|
|
163
163
|
adata.obs[target_col] = adata.obs[target_col].astype("category")
|
164
|
+
grouping_cols = [target_col] if groups_col is None else [target_col, groups_col]
|
165
|
+
original_obs = adata.obs.copy()
|
164
166
|
ps_adata = sc.get.aggregate(
|
165
167
|
adata, by=[target_col] if groups_col is None else [target_col, groups_col], func=mode, layer=layer_key
|
166
168
|
)
|
169
|
+
|
167
170
|
if mode in ps_adata.layers:
|
168
171
|
ps_adata.X = ps_adata.layers[mode]
|
169
172
|
|
173
|
+
for col in original_obs.columns:
|
174
|
+
if col not in ps_adata.obs.columns:
|
175
|
+
grouped_values = original_obs.groupby(grouping_cols)[col].first()
|
176
|
+
ps_adata.obs[col] = grouped_values.reindex(ps_adata.obs.index).values
|
177
|
+
|
170
178
|
ps_adata.obs[target_col] = ps_adata.obs[target_col].astype("category")
|
171
179
|
|
172
180
|
return ps_adata
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pertpy
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.1
|
4
4
|
Summary: Perturbation Analysis in the scverse ecosystem.
|
5
5
|
Project-URL: Documentation, https://pertpy.readthedocs.io
|
6
6
|
Project-URL: Source, https://github.com/scverse/pertpy
|
@@ -49,7 +49,7 @@ Requires-Python: <3.14,>=3.11
|
|
49
49
|
Requires-Dist: adjusttext
|
50
50
|
Requires-Dist: arviz
|
51
51
|
Requires-Dist: blitzgsea
|
52
|
-
Requires-Dist: fast-array-utils
|
52
|
+
Requires-Dist: fast-array-utils[accel,sparse]
|
53
53
|
Requires-Dist: lamin-utils
|
54
54
|
Requires-Dist: mudata
|
55
55
|
Requires-Dist: openpyxl
|
@@ -93,7 +93,7 @@ Requires-Dist: sphinxext-opengraph; extra == 'doc'
|
|
93
93
|
Provides-Extra: tcoda
|
94
94
|
Requires-Dist: ete4; extra == 'tcoda'
|
95
95
|
Requires-Dist: pyqt6; extra == 'tcoda'
|
96
|
-
Requires-Dist: toytree; extra == 'tcoda'
|
96
|
+
Requires-Dist: toytree>=3.0; extra == 'tcoda'
|
97
97
|
Provides-Extra: test
|
98
98
|
Requires-Dist: coverage; extra == 'test'
|
99
99
|
Requires-Dist: leidenalg; extra == 'test'
|
@@ -155,7 +155,13 @@ pip install 'pertpy[tcoda]'
|
|
155
155
|
|
156
156
|
### milo
|
157
157
|
|
158
|
-
milo
|
158
|
+
milo requires either the "de" extra for the "pydeseq2" solver:
|
159
|
+
|
160
|
+
```console
|
161
|
+
pip install 'pertpy[de]'
|
162
|
+
```
|
163
|
+
|
164
|
+
or, edger, statmod, and rpy2 for the "edger" solver:
|
159
165
|
|
160
166
|
```R
|
161
167
|
BiocManager::install("edgeR")
|
@@ -1,9 +1,9 @@
|
|
1
|
-
pertpy/__init__.py,sha256=
|
1
|
+
pertpy/__init__.py,sha256=R-RrfgZrhMfn7G60kmTDN88ECqS90AyY9Ed5615hQEg,972
|
2
2
|
pertpy/_doc.py,sha256=j5TMNC-DA9yIMqIIUNpjpcVgWfRqyBBfvbRjnCM_OLs,427
|
3
3
|
pertpy/_types.py,sha256=IcHCojCUqx8CapibNkcYf2TUqjBFP2ujeELvn_IBSBQ,154
|
4
4
|
pertpy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
pertpy/data/__init__.py,sha256=ah3yvoxkgbdMUNAWxS3SyqcUuVamBOSeuWkF2QRAEwM,2703
|
6
|
-
pertpy/data/_dataloader.py,sha256=
|
6
|
+
pertpy/data/_dataloader.py,sha256=Cb08iJj7vH-VishU4NJ5nzPPgviFLY1f5p3K4eVvTeg,4679
|
7
7
|
pertpy/data/_datasets.py,sha256=4IceyYURpstZSFRrD6gBjoYg8uRbEPo1QLXTt-SwB5k,65507
|
8
8
|
pertpy/metadata/__init__.py,sha256=wROPCXmJX2v5schJaBTPQtGW-FGCNWPfO_6bpnXwk-c,276
|
9
9
|
pertpy/metadata/_cell_line.py,sha256=hKmaZvjIsQ3wHo__0aKo3JlWvsf8-4OD-gIBNQnW8_E,42716
|
@@ -16,43 +16,43 @@ pertpy/plot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
pertpy/preprocessing/__init__.py,sha256=VAPFaeq2_qCvdFkQTCj_Hm460HC4Tersu8Rig_tnp_Y,71
|
17
17
|
pertpy/preprocessing/_guide_rna.py,sha256=ijL-bjQ-9qn4r3DlhBxzsqk-bD4RqPsFlok-Otj4hg8,15872
|
18
18
|
pertpy/preprocessing/_guide_rna_mixture.py,sha256=pT_YkjmN4iEJ-THBROu_dpbr8E6u8GJw36YoGseikD0,6422
|
19
|
-
pertpy/tools/__init__.py,sha256=
|
19
|
+
pertpy/tools/__init__.py,sha256=oOhB99SZBx1XUohaqXV7T_hZoN6xf-WMOpqgvw5gn1I,2073
|
20
20
|
pertpy/tools/_augur.py,sha256=tc1YKyc0BwzrEGgctsfyy7DsTNKxyvy7ZvWraTWCc1A,55262
|
21
21
|
pertpy/tools/_cinemaot.py,sha256=54-rS0AEj31dMe7iU4kEmLoAunq3jNuhsBE3IEp9hrI,38071
|
22
|
-
pertpy/tools/_dialogue.py,sha256=
|
22
|
+
pertpy/tools/_dialogue.py,sha256=xWW5XiQUEHTvB7WURzndmm-EF4EhLeEnOV_-7NP6heU,52360
|
23
23
|
pertpy/tools/_enrichment.py,sha256=55mwotLH9DXQOhl85MCkxXu-MX0RysLyrPheJysAnF0,21369
|
24
|
-
pertpy/tools/_milo.py,sha256=
|
24
|
+
pertpy/tools/_milo.py,sha256=PfLgEsYa5i35j25BNgGEkXk9UPM-ZhFxQbEWDA1e_fc,48192
|
25
25
|
pertpy/tools/_mixscape.py,sha256=HfrpBeRlxHXaOpZkF2FmX7dg35kUB1rL0_-n2aSi2_0,57905
|
26
26
|
pertpy/tools/decoupler_LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
27
27
|
pertpy/tools/transferlearning_MMD_LICENSE,sha256=MUvDA-o_j9htRpI8fStVdCRuyLdPkQUuIH0a_EIc57w,1069
|
28
28
|
pertpy/tools/_coda/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
29
|
-
pertpy/tools/_coda/_base_coda.py,sha256=
|
30
|
-
pertpy/tools/_coda/_sccoda.py,sha256=
|
29
|
+
pertpy/tools/_coda/_base_coda.py,sha256=2YIB7rVf9CxwYyQCpC8e3VYCxfurvyAqUs-4qJ5pubg,111817
|
30
|
+
pertpy/tools/_coda/_sccoda.py,sha256=FaXn20K4ROYtbrZop_dWRokfwX0vlCizKV02V3Cf7zo,24611
|
31
31
|
pertpy/tools/_coda/_tasccoda.py,sha256=BTaOAmL458zQ_og3x4ENlDnJHD6_F4YkdCoXWsF4i1U,30465
|
32
|
-
pertpy/tools/_differential_gene_expression/__init__.py,sha256=
|
33
|
-
pertpy/tools/_differential_gene_expression/_base.py,sha256=
|
32
|
+
pertpy/tools/_differential_gene_expression/__init__.py,sha256=8_u7nsHY5GfFITT0Rs3v1p1vXpsGIWcBzwipuhXM3Ww,1653
|
33
|
+
pertpy/tools/_differential_gene_expression/_base.py,sha256=GbBi8o7rTA3wH5DZJ9C1QzfqB5yak4r3xMvKQrizsTY,38274
|
34
34
|
pertpy/tools/_differential_gene_expression/_checks.py,sha256=hH_GP0lWGO-5zrCFX4YiIVCZBCuK0ZJ0jFmdlx2Qm4k,1639
|
35
35
|
pertpy/tools/_differential_gene_expression/_dge_comparison.py,sha256=LXhp5djKKCAk9VI7OqxOuja849G5lnd8Ehcs9Epk8rg,4159
|
36
|
-
pertpy/tools/_differential_gene_expression/_edger.py,sha256=
|
37
|
-
pertpy/tools/_differential_gene_expression/_pydeseq2.py,sha256=
|
36
|
+
pertpy/tools/_differential_gene_expression/_edger.py,sha256=P7smGNOIsEF5-zHDP1IPU0vhR5yF9Y54akrZYjjg9H0,4658
|
37
|
+
pertpy/tools/_differential_gene_expression/_pydeseq2.py,sha256=dtRgVkfqLlLw-N0-Y8cEJ06KVjr6LIUWJAeDQx4ZA1I,2917
|
38
38
|
pertpy/tools/_differential_gene_expression/_simple_tests.py,sha256=SfU8s_P2JzEA1RkmS0_xsARs4-BdnIYAwMmQv9-fxRg,6610
|
39
39
|
pertpy/tools/_differential_gene_expression/_statsmodels.py,sha256=90h9EPuoCtNxAbJ1Xq4j_E4yYJJpk64zTP7GyTdmrxY,2220
|
40
40
|
pertpy/tools/_distances/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
41
|
-
pertpy/tools/_distances/_distance_tests.py,sha256=
|
42
|
-
pertpy/tools/_distances/_distances.py,sha256=
|
41
|
+
pertpy/tools/_distances/_distance_tests.py,sha256=wDDRkM-WmcfT5m5Zat-3ianGU1aC6HfRy51tuWA_sJg,13524
|
42
|
+
pertpy/tools/_distances/_distances.py,sha256=iJNqMc9CxgZfEMHcxIocKGc4X4S2dJsRy3YerxVnFKo,51003
|
43
43
|
pertpy/tools/_perturbation_space/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
pertpy/tools/_perturbation_space/_clustering.py,sha256=pNx_SpPkZfCbgF7vzHWqAaiiHdbxPaA-L-hTWTbzFhI,3528
|
45
45
|
pertpy/tools/_perturbation_space/_comparison.py,sha256=-NzCPRT-IlhJ9hOz7NQLSk0riIzr2C0yZvX6zm3kon4,4291
|
46
|
-
pertpy/tools/_perturbation_space/_discriminator_classifiers.py,sha256=
|
46
|
+
pertpy/tools/_perturbation_space/_discriminator_classifiers.py,sha256=ARdNatcmsz370CF-PexDHPwUrih7KW4A5URzHVDNrcw,23126
|
47
47
|
pertpy/tools/_perturbation_space/_metrics.py,sha256=y8-baP8WRdB1iDgvP3uuQxSCDxA2lcxvEHHM2C_vWHY,3248
|
48
|
-
pertpy/tools/_perturbation_space/_perturbation_space.py,sha256=
|
49
|
-
pertpy/tools/_perturbation_space/_simple.py,sha256=
|
48
|
+
pertpy/tools/_perturbation_space/_perturbation_space.py,sha256=Vyh15wWw9dcu2YUWhziQd2mA9-4IY8EC5dzkBT9HaIo,19457
|
49
|
+
pertpy/tools/_perturbation_space/_simple.py,sha256=xS7Lrq3RzJC8IgVKKs2Utqbs7iJ5L1DANquCRcS3Fhg,13109
|
50
50
|
pertpy/tools/_scgen/__init__.py,sha256=uERFlFyF88TH0uLiwmsUGEfHfLVCiZMFuk8gO5f7164,45
|
51
51
|
pertpy/tools/_scgen/_base_components.py,sha256=Qq8myRUm43q9XBrZ9gBggfa2cSV2wbz_KYoLgH7iF1A,3009
|
52
52
|
pertpy/tools/_scgen/_scgen.py,sha256=AQNGsDe-9HEqli3oq7UBDg68ofLCoXm-R_jnLFQ-rlc,30856
|
53
53
|
pertpy/tools/_scgen/_scgenvae.py,sha256=bPk4v7EdJc7ROdLuDitHiX_Pvwa7Flw2qHRUwBvjLJY,3889
|
54
54
|
pertpy/tools/_scgen/_utils.py,sha256=qz5QUn_Bvk2NGyYVzp3jgjWTFOMt1YyHwUo6HWtoThY,2871
|
55
|
-
pertpy-0.
|
56
|
-
pertpy-0.
|
57
|
-
pertpy-0.
|
58
|
-
pertpy-0.
|
55
|
+
pertpy-1.0.1.dist-info/METADATA,sha256=U9XINrPTrqx9UfLIsUt1X9p6LQLvNSV89GGAZnEHnks,8939
|
56
|
+
pertpy-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
57
|
+
pertpy-1.0.1.dist-info/licenses/LICENSE,sha256=XuiT2hxeRInhquEIBKMZ5M21n5syhDQ4XbABoposIAg,1100
|
58
|
+
pertpy-1.0.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|