DeConveil 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deconveil/__version__.py +1 -1
- deconveil/dds.py +1 -1
- deconveil/utils_processing.py +82 -0
- {deconveil-0.1.2.dist-info → deconveil-0.1.4.dist-info}/METADATA +1 -1
- {deconveil-0.1.2.dist-info → deconveil-0.1.4.dist-info}/RECORD +8 -8
- {deconveil-0.1.2.dist-info → deconveil-0.1.4.dist-info}/WHEEL +0 -0
- {deconveil-0.1.2.dist-info → deconveil-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {deconveil-0.1.2.dist-info → deconveil-0.1.4.dist-info}/top_level.txt +0 -0
deconveil/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.4"
|
deconveil/dds.py
CHANGED
|
@@ -18,6 +18,7 @@ from deconveil.utils_fit import fit_moments_dispersions2
|
|
|
18
18
|
from deconveil.utils_fit import grid_fit_beta
|
|
19
19
|
from deconveil.utils_fit import irls_glm
|
|
20
20
|
from deconveil.utils_fit import build_design_matrix
|
|
21
|
+
from deconveil.utils_processing import replace_underscores
|
|
21
22
|
|
|
22
23
|
from pydeseq2.preprocessing import deseq2_norm_fit
|
|
23
24
|
from pydeseq2.preprocessing import deseq2_norm_transform
|
|
@@ -25,7 +26,6 @@ from pydeseq2.utils import dispersion_trend
|
|
|
25
26
|
from pydeseq2.utils import mean_absolute_deviation
|
|
26
27
|
from pydeseq2.utils import n_or_more_replicates
|
|
27
28
|
from pydeseq2.utils import nb_nll
|
|
28
|
-
from pydeseq2.utils import replace_underscores
|
|
29
29
|
from pydeseq2.utils import robust_method_of_moments_disp
|
|
30
30
|
from pydeseq2.utils import test_valid_counts
|
|
31
31
|
from pydeseq2.utils import trimmed_mean
|
deconveil/utils_processing.py
CHANGED
|
@@ -5,11 +5,93 @@ from pathlib import Path
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
8
|
+
import deconveil
|
|
8
9
|
|
|
9
10
|
from typing import List, Literal, Optional, Dict, Any, cast
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
def load_test_data(
|
|
15
|
+
modality: Literal["rna", "cnv", "metadata", "cnv_tumor"] = "rna",
|
|
16
|
+
dataset: Literal["tcga_brca"] = "tcga_brca",
|
|
17
|
+
debug: bool = False,
|
|
18
|
+
debug_seed: int = 42,
|
|
19
|
+
) -> pd.DataFrame:
|
|
20
|
+
"""Load TCGA-BRCA example data from the DeConveil package.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
modality : {"rna", "cnv", "metadata", "cnv_tumor"}
|
|
25
|
+
Type of data to load.
|
|
26
|
+
|
|
27
|
+
dataset : {"tcga_brca"}
|
|
28
|
+
Dataset name. Only "tcga_brca" is currently supported.
|
|
29
|
+
|
|
30
|
+
debug : bool, optional
|
|
31
|
+
If True, randomly subsample 10 samples and 100 features (if applicable).
|
|
32
|
+
Default is False.
|
|
33
|
+
|
|
34
|
+
debug_seed : int, optional
|
|
35
|
+
Random seed for reproducibility of debug subsampling. Default is 42.
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
pandas.DataFrame
|
|
40
|
+
The requested data modality as a DataFrame.
|
|
41
|
+
"""
|
|
42
|
+
assert modality in ["rna", "cnv", "metadata", "cnv_tumor"], (
|
|
43
|
+
"modality must be one of: 'rna', 'cnv', 'metadata', 'cnv_tumor'"
|
|
44
|
+
)
|
|
45
|
+
assert dataset in ["tcga_brca"], (
|
|
46
|
+
"dataset must be one of: 'tcga_brca'"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Locate data within the package
|
|
50
|
+
datasets_path = Path(__file__).resolve().parent.parent / "datasets" / dataset
|
|
51
|
+
|
|
52
|
+
# Construct file paths
|
|
53
|
+
file_map = {
|
|
54
|
+
"rna": datasets_path / "rna.csv",
|
|
55
|
+
"cnv": datasets_path / "cnv.csv",
|
|
56
|
+
"metadata": datasets_path / "metadata.csv",
|
|
57
|
+
"cnv_tumor": datasets_path / "cnv_tumor.csv",
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
data_path = file_map[modality]
|
|
61
|
+
if not data_path.exists():
|
|
62
|
+
raise FileNotFoundError(f"Data file not found: {data_path}")
|
|
63
|
+
|
|
64
|
+
# Load the CSV
|
|
65
|
+
df = pd.read_csv(data_path, index_col=0)
|
|
66
|
+
|
|
67
|
+
# Apply debug mode subsampling
|
|
68
|
+
if debug:
|
|
69
|
+
df = df.sample(n=min(10, df.shape[0]), random_state=debug_seed)
|
|
70
|
+
if modality in ["rna", "cnv"]:
|
|
71
|
+
df = df.sample(n=min(100, df.shape[1]), axis=1, random_state=debug_seed)
|
|
72
|
+
|
|
73
|
+
return df
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def replace_underscores(factors: List[str]):
|
|
77
|
+
"""Replace all underscores from strings in a list by hyphens.
|
|
78
|
+
|
|
79
|
+
To be used on design factors to avoid bugs due to the reliance on
|
|
80
|
+
``str.split("_")`` in parts of the code.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
factors : list
|
|
85
|
+
A list of strings which may contain underscores.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
list
|
|
90
|
+
A list of strings in which underscores were replaced by hyphens.
|
|
91
|
+
"""
|
|
92
|
+
return [factor.replace("_", "-") for factor in factors]
|
|
93
|
+
|
|
94
|
+
|
|
13
95
|
def filter_low_count_genes(
|
|
14
96
|
df: pd.DataFrame,
|
|
15
97
|
other_dfs: Optional[List[pd.DataFrame]] = None,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
deconveil/__init__.py,sha256=_6FL_AYiycv9nP3mKJiQ4zl4aU83YSWnV2YoIZr9Mv0,188
|
|
2
|
-
deconveil/__version__.py,sha256=
|
|
3
|
-
deconveil/dds.py,sha256=
|
|
2
|
+
deconveil/__version__.py,sha256=Wzf5T3NBDfhQoTnhnRNHSlAsE0XMqbclXG-M81Vas70,22
|
|
3
|
+
deconveil/dds.py,sha256=FlZ9Cm92oKmsuOmhP9XWrQaz9cWT37s4ZVxTElI0J8w,49051
|
|
4
4
|
deconveil/default_inference.py,sha256=J40O0-qZChLnLrLGmhwxjaTVsV7REWAUQOTf8qSwWk0,9466
|
|
5
5
|
deconveil/ds.py,sha256=Vb9p152U1KXltrXFpMoBxY6YRW25dP4CO26_osbz6Aw,29476
|
|
6
6
|
deconveil/grid_search.py,sha256=iOHR8ur10MyrrfEZHr409lGulGxODufsjG6j7lQ7tWs,5181
|
|
@@ -8,9 +8,9 @@ deconveil/inference.py,sha256=B3zf3q_mbCTX3gHJwuXnTuy9uyXOxEjuWyaSR6VtVEo,10429
|
|
|
8
8
|
deconveil/utils_clustering.py,sha256=twspPvXQ6pvw_NaY1ebyvswuH3ZvVBGn7DeOpZ1XatI,5939
|
|
9
9
|
deconveil/utils_fit.py,sha256=SdGcBQjN3cyzbSFessufYOOOJAQCOjNcy3etbwmodsM,21583
|
|
10
10
|
deconveil/utils_plot.py,sha256=1JQthYXaEUKUWa0fy8owkyJ1CTkQxlrSRAqPkXMk7Us,9857
|
|
11
|
-
deconveil/utils_processing.py,sha256=
|
|
12
|
-
deconveil-0.1.
|
|
13
|
-
deconveil-0.1.
|
|
14
|
-
deconveil-0.1.
|
|
15
|
-
deconveil-0.1.
|
|
16
|
-
deconveil-0.1.
|
|
11
|
+
deconveil/utils_processing.py,sha256=9j35FAfQ7oNjdH1FWHP90DBTyL5RwlgdVbbW9de10VI,6560
|
|
12
|
+
deconveil-0.1.4.dist-info/licenses/LICENSE,sha256=BJ0f3JRteiF7tjiARi8syxiu4yKmckc0nWlHCKXttKQ,1078
|
|
13
|
+
deconveil-0.1.4.dist-info/METADATA,sha256=uxr_y-JarwDKuEbzC3kdN_W-Wf-X-G2fELNvH0HwwM4,1097
|
|
14
|
+
deconveil-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
deconveil-0.1.4.dist-info/top_level.txt,sha256=yAWZbw0eg8XpbMsswoq-VzBGfQHrfWOqNHnu2qQ2xO4,10
|
|
16
|
+
deconveil-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|