DeConveil 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deconveil/__version__.py +1 -1
- deconveil/utils_processing.py +82 -0
- {deconveil-0.1.2.dist-info → deconveil-0.1.3.dist-info}/METADATA +1 -1
- {deconveil-0.1.2.dist-info → deconveil-0.1.3.dist-info}/RECORD +7 -7
- {deconveil-0.1.2.dist-info → deconveil-0.1.3.dist-info}/WHEEL +0 -0
- {deconveil-0.1.2.dist-info → deconveil-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {deconveil-0.1.2.dist-info → deconveil-0.1.3.dist-info}/top_level.txt +0 -0
deconveil/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.3"
|
deconveil/utils_processing.py
CHANGED
|
@@ -5,11 +5,93 @@ from pathlib import Path
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
8
|
+
import deconveil
|
|
8
9
|
|
|
9
10
|
from typing import List, Literal, Optional, Dict, Any, cast
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
def load_test_data(
|
|
15
|
+
modality: Literal["rna", "cnv", "metadata", "cnv_tumor"] = "rna",
|
|
16
|
+
dataset: Literal["tcga_brca"] = "tcga_brca",
|
|
17
|
+
debug: bool = False,
|
|
18
|
+
debug_seed: int = 42,
|
|
19
|
+
) -> pd.DataFrame:
|
|
20
|
+
"""Load TCGA-BRCA example data from the DeConveil package.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
modality : {"rna", "cnv", "metadata", "cnv_tumor"}
|
|
25
|
+
Type of data to load.
|
|
26
|
+
|
|
27
|
+
dataset : {"tcga_brca"}
|
|
28
|
+
Dataset name. Only "tcga_brca" is currently supported.
|
|
29
|
+
|
|
30
|
+
debug : bool, optional
|
|
31
|
+
If True, randomly subsample 10 samples and 100 features (if applicable).
|
|
32
|
+
Default is False.
|
|
33
|
+
|
|
34
|
+
debug_seed : int, optional
|
|
35
|
+
Random seed for reproducibility of debug subsampling. Default is 42.
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
pandas.DataFrame
|
|
40
|
+
The requested data modality as a DataFrame.
|
|
41
|
+
"""
|
|
42
|
+
assert modality in ["rna", "cnv", "metadata", "cnv_tumor"], (
|
|
43
|
+
"modality must be one of: 'rna', 'cnv', 'metadata', 'cnv_tumor'"
|
|
44
|
+
)
|
|
45
|
+
assert dataset in ["tcga_brca"], (
|
|
46
|
+
"dataset must be one of: 'tcga_brca'"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Locate data within the package
|
|
50
|
+
datasets_path = Path(__file__).resolve().parent.parent / "datasets" / dataset
|
|
51
|
+
|
|
52
|
+
# Construct file paths
|
|
53
|
+
file_map = {
|
|
54
|
+
"rna": datasets_path / "rna.csv",
|
|
55
|
+
"cnv": datasets_path / "cnv.csv",
|
|
56
|
+
"metadata": datasets_path / "metadata.csv",
|
|
57
|
+
"cnv_tumor": datasets_path / "cnv_tumor.csv",
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
data_path = file_map[modality]
|
|
61
|
+
if not data_path.exists():
|
|
62
|
+
raise FileNotFoundError(f"Data file not found: {data_path}")
|
|
63
|
+
|
|
64
|
+
# Load the CSV
|
|
65
|
+
df = pd.read_csv(data_path, index_col=0)
|
|
66
|
+
|
|
67
|
+
# Apply debug mode subsampling
|
|
68
|
+
if debug:
|
|
69
|
+
df = df.sample(n=min(10, df.shape[0]), random_state=debug_seed)
|
|
70
|
+
if modality in ["rna", "cnv"]:
|
|
71
|
+
df = df.sample(n=min(100, df.shape[1]), axis=1, random_state=debug_seed)
|
|
72
|
+
|
|
73
|
+
return df
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def replace_underscores(factors: List[str]):
|
|
77
|
+
"""Replace all underscores from strings in a list by hyphens.
|
|
78
|
+
|
|
79
|
+
To be used on design factors to avoid bugs due to the reliance on
|
|
80
|
+
``str.split("_")`` in parts of the code.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
factors : list
|
|
85
|
+
A list of strings which may contain underscores.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
list
|
|
90
|
+
A list of strings in which underscores were replaced by hyphens.
|
|
91
|
+
"""
|
|
92
|
+
return [factor.replace("_", "-") for factor in factors]
|
|
93
|
+
|
|
94
|
+
|
|
13
95
|
def filter_low_count_genes(
|
|
14
96
|
df: pd.DataFrame,
|
|
15
97
|
other_dfs: Optional[List[pd.DataFrame]] = None,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
deconveil/__init__.py,sha256=_6FL_AYiycv9nP3mKJiQ4zl4aU83YSWnV2YoIZr9Mv0,188
|
|
2
|
-
deconveil/__version__.py,sha256=
|
|
2
|
+
deconveil/__version__.py,sha256=XEqb2aiIn8fzGE68Mph4ck1FtQqsR_am0wRWvrYPffQ,22
|
|
3
3
|
deconveil/dds.py,sha256=0MNwtDzCjqjoJR-rrCmVu3JOaDd3gXuToOzTBXJMxak,49039
|
|
4
4
|
deconveil/default_inference.py,sha256=J40O0-qZChLnLrLGmhwxjaTVsV7REWAUQOTf8qSwWk0,9466
|
|
5
5
|
deconveil/ds.py,sha256=Vb9p152U1KXltrXFpMoBxY6YRW25dP4CO26_osbz6Aw,29476
|
|
@@ -8,9 +8,9 @@ deconveil/inference.py,sha256=B3zf3q_mbCTX3gHJwuXnTuy9uyXOxEjuWyaSR6VtVEo,10429
|
|
|
8
8
|
deconveil/utils_clustering.py,sha256=twspPvXQ6pvw_NaY1ebyvswuH3ZvVBGn7DeOpZ1XatI,5939
|
|
9
9
|
deconveil/utils_fit.py,sha256=SdGcBQjN3cyzbSFessufYOOOJAQCOjNcy3etbwmodsM,21583
|
|
10
10
|
deconveil/utils_plot.py,sha256=1JQthYXaEUKUWa0fy8owkyJ1CTkQxlrSRAqPkXMk7Us,9857
|
|
11
|
-
deconveil/utils_processing.py,sha256=
|
|
12
|
-
deconveil-0.1.
|
|
13
|
-
deconveil-0.1.
|
|
14
|
-
deconveil-0.1.
|
|
15
|
-
deconveil-0.1.
|
|
16
|
-
deconveil-0.1.
|
|
11
|
+
deconveil/utils_processing.py,sha256=9j35FAfQ7oNjdH1FWHP90DBTyL5RwlgdVbbW9de10VI,6560
|
|
12
|
+
deconveil-0.1.3.dist-info/licenses/LICENSE,sha256=BJ0f3JRteiF7tjiARi8syxiu4yKmckc0nWlHCKXttKQ,1078
|
|
13
|
+
deconveil-0.1.3.dist-info/METADATA,sha256=yL6AwQ5ziGhrI5lE4FDCHOadT81W6yEIDWGsHni6Q5w,1097
|
|
14
|
+
deconveil-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
deconveil-0.1.3.dist-info/top_level.txt,sha256=yAWZbw0eg8XpbMsswoq-VzBGfQHrfWOqNHnu2qQ2xO4,10
|
|
16
|
+
deconveil-0.1.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|