DeConveil 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deconveil/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.2"
1
+ __version__ = "0.1.4"
deconveil/dds.py CHANGED
@@ -18,6 +18,7 @@ from deconveil.utils_fit import fit_moments_dispersions2
18
18
  from deconveil.utils_fit import grid_fit_beta
19
19
  from deconveil.utils_fit import irls_glm
20
20
  from deconveil.utils_fit import build_design_matrix
21
+ from deconveil.utils_processing import replace_underscores
21
22
 
22
23
  from pydeseq2.preprocessing import deseq2_norm_fit
23
24
  from pydeseq2.preprocessing import deseq2_norm_transform
@@ -25,7 +26,6 @@ from pydeseq2.utils import dispersion_trend
25
26
  from pydeseq2.utils import mean_absolute_deviation
26
27
  from pydeseq2.utils import n_or_more_replicates
27
28
  from pydeseq2.utils import nb_nll
28
- from pydeseq2.utils import replace_underscores
29
29
  from pydeseq2.utils import robust_method_of_moments_disp
30
30
  from pydeseq2.utils import test_valid_counts
31
31
  from pydeseq2.utils import trimmed_mean
@@ -5,11 +5,93 @@ from pathlib import Path
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
+ import deconveil
8
9
 
9
10
  from typing import List, Literal, Optional, Dict, Any, cast
10
11
 
11
12
 
12
13
 
14
+ def load_test_data(
15
+ modality: Literal["rna", "cnv", "metadata", "cnv_tumor"] = "rna",
16
+ dataset: Literal["tcga_brca"] = "tcga_brca",
17
+ debug: bool = False,
18
+ debug_seed: int = 42,
19
+ ) -> pd.DataFrame:
20
+ """Load TCGA-BRCA example data from the DeConveil package.
21
+
22
+ Parameters
23
+ ----------
24
+ modality : {"rna", "cnv", "metadata", "cnv_tumor"}
25
+ Type of data to load.
26
+
27
+ dataset : {"tcga_brca"}
28
+ Dataset name. Only "tcga_brca" is currently supported.
29
+
30
+ debug : bool, optional
31
+ If True, randomly subsample 10 samples and 100 features (if applicable).
32
+ Default is False.
33
+
34
+ debug_seed : int, optional
35
+ Random seed for reproducibility of debug subsampling. Default is 42.
36
+
37
+ Returns
38
+ -------
39
+ pandas.DataFrame
40
+ The requested data modality as a DataFrame.
41
+ """
42
+ assert modality in ["rna", "cnv", "metadata", "cnv_tumor"], (
43
+ "modality must be one of: 'rna', 'cnv', 'metadata', 'cnv_tumor'"
44
+ )
45
+ assert dataset in ["tcga_brca"], (
46
+ "dataset must be one of: 'tcga_brca'"
47
+ )
48
+
49
+ # Locate data within the package
50
+ datasets_path = Path(__file__).resolve().parent.parent / "datasets" / dataset
51
+
52
+ # Construct file paths
53
+ file_map = {
54
+ "rna": datasets_path / "rna.csv",
55
+ "cnv": datasets_path / "cnv.csv",
56
+ "metadata": datasets_path / "metadata.csv",
57
+ "cnv_tumor": datasets_path / "cnv_tumor.csv",
58
+ }
59
+
60
+ data_path = file_map[modality]
61
+ if not data_path.exists():
62
+ raise FileNotFoundError(f"Data file not found: {data_path}")
63
+
64
+ # Load the CSV
65
+ df = pd.read_csv(data_path, index_col=0)
66
+
67
+ # Apply debug mode subsampling
68
+ if debug:
69
+ df = df.sample(n=min(10, df.shape[0]), random_state=debug_seed)
70
+ if modality in ["rna", "cnv"]:
71
+ df = df.sample(n=min(100, df.shape[1]), axis=1, random_state=debug_seed)
72
+
73
+ return df
74
+
75
+
76
+ def replace_underscores(factors: List[str]):
77
+ """Replace all underscores from strings in a list by hyphens.
78
+
79
+ To be used on design factors to avoid bugs due to the reliance on
80
+ ``str.split("_")`` in parts of the code.
81
+
82
+ Parameters
83
+ ----------
84
+ factors : list
85
+ A list of strings which may contain underscores.
86
+
87
+ Returns
88
+ -------
89
+ list
90
+ A list of strings in which underscores were replaced by hyphens.
91
+ """
92
+ return [factor.replace("_", "-") for factor in factors]
93
+
94
+
13
95
  def filter_low_count_genes(
14
96
  df: pd.DataFrame,
15
97
  other_dfs: Optional[List[pd.DataFrame]] = None,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: DeConveil
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: An extension of PyDESeq2/DESeq2 designed to account for genome aneuploidy
5
5
  Home-page: https://github.com/caravagnalab/DeConveil
6
6
  Author: Katsiaryna Davydzenka
@@ -1,6 +1,6 @@
1
1
  deconveil/__init__.py,sha256=_6FL_AYiycv9nP3mKJiQ4zl4aU83YSWnV2YoIZr9Mv0,188
2
- deconveil/__version__.py,sha256=K5SiDdEGYMpdqXThrqwTqECJJBOQNTQDrnpc2K5mzKs,21
3
- deconveil/dds.py,sha256=0MNwtDzCjqjoJR-rrCmVu3JOaDd3gXuToOzTBXJMxak,49039
2
+ deconveil/__version__.py,sha256=Wzf5T3NBDfhQoTnhnRNHSlAsE0XMqbclXG-M81Vas70,22
3
+ deconveil/dds.py,sha256=FlZ9Cm92oKmsuOmhP9XWrQaz9cWT37s4ZVxTElI0J8w,49051
4
4
  deconveil/default_inference.py,sha256=J40O0-qZChLnLrLGmhwxjaTVsV7REWAUQOTf8qSwWk0,9466
5
5
  deconveil/ds.py,sha256=Vb9p152U1KXltrXFpMoBxY6YRW25dP4CO26_osbz6Aw,29476
6
6
  deconveil/grid_search.py,sha256=iOHR8ur10MyrrfEZHr409lGulGxODufsjG6j7lQ7tWs,5181
@@ -8,9 +8,9 @@ deconveil/inference.py,sha256=B3zf3q_mbCTX3gHJwuXnTuy9uyXOxEjuWyaSR6VtVEo,10429
8
8
  deconveil/utils_clustering.py,sha256=twspPvXQ6pvw_NaY1ebyvswuH3ZvVBGn7DeOpZ1XatI,5939
9
9
  deconveil/utils_fit.py,sha256=SdGcBQjN3cyzbSFessufYOOOJAQCOjNcy3etbwmodsM,21583
10
10
  deconveil/utils_plot.py,sha256=1JQthYXaEUKUWa0fy8owkyJ1CTkQxlrSRAqPkXMk7Us,9857
11
- deconveil/utils_processing.py,sha256=CB99CwQst7eUiIgE58yl7_3E6uD9CgQoU_Qmprjyt-s,4141
12
- deconveil-0.1.2.dist-info/licenses/LICENSE,sha256=BJ0f3JRteiF7tjiARi8syxiu4yKmckc0nWlHCKXttKQ,1078
13
- deconveil-0.1.2.dist-info/METADATA,sha256=JqHZYXo0lLvPjoj_cDT-IwHADSKdESJQxorDbpsk3-k,1097
14
- deconveil-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- deconveil-0.1.2.dist-info/top_level.txt,sha256=yAWZbw0eg8XpbMsswoq-VzBGfQHrfWOqNHnu2qQ2xO4,10
16
- deconveil-0.1.2.dist-info/RECORD,,
11
+ deconveil/utils_processing.py,sha256=9j35FAfQ7oNjdH1FWHP90DBTyL5RwlgdVbbW9de10VI,6560
12
+ deconveil-0.1.4.dist-info/licenses/LICENSE,sha256=BJ0f3JRteiF7tjiARi8syxiu4yKmckc0nWlHCKXttKQ,1078
13
+ deconveil-0.1.4.dist-info/METADATA,sha256=uxr_y-JarwDKuEbzC3kdN_W-Wf-X-G2fELNvH0HwwM4,1097
14
+ deconveil-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ deconveil-0.1.4.dist-info/top_level.txt,sha256=yAWZbw0eg8XpbMsswoq-VzBGfQHrfWOqNHnu2qQ2xO4,10
16
+ deconveil-0.1.4.dist-info/RECORD,,