DeConveil 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deconveil/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.2"
1
+ __version__ = "0.1.3"
@@ -5,11 +5,93 @@ from pathlib import Path
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
+ import deconveil
8
9
 
9
10
  from typing import List, Literal, Optional, Dict, Any, cast
10
11
 
11
12
 
12
13
 
14
+ def load_test_data(
15
+ modality: Literal["rna", "cnv", "metadata", "cnv_tumor"] = "rna",
16
+ dataset: Literal["tcga_brca"] = "tcga_brca",
17
+ debug: bool = False,
18
+ debug_seed: int = 42,
19
+ ) -> pd.DataFrame:
20
+ """Load TCGA-BRCA example data from the DeConveil package.
21
+
22
+ Parameters
23
+ ----------
24
+ modality : {"rna", "cnv", "metadata", "cnv_tumor"}
25
+ Type of data to load.
26
+
27
+ dataset : {"tcga_brca"}
28
+ Dataset name. Only "tcga_brca" is currently supported.
29
+
30
+ debug : bool, optional
31
+ If True, randomly subsample 10 samples and 100 features (if applicable).
32
+ Default is False.
33
+
34
+ debug_seed : int, optional
35
+ Random seed for reproducibility of debug subsampling. Default is 42.
36
+
37
+ Returns
38
+ -------
39
+ pandas.DataFrame
40
+ The requested data modality as a DataFrame.
41
+ """
42
+ assert modality in ["rna", "cnv", "metadata", "cnv_tumor"], (
43
+ "modality must be one of: 'rna', 'cnv', 'metadata', 'cnv_tumor'"
44
+ )
45
+ assert dataset in ["tcga_brca"], (
46
+ "dataset must be one of: 'tcga_brca'"
47
+ )
48
+
49
+ # Locate data within the package
50
+ datasets_path = Path(__file__).resolve().parent.parent / "datasets" / dataset
51
+
52
+ # Construct file paths
53
+ file_map = {
54
+ "rna": datasets_path / "rna.csv",
55
+ "cnv": datasets_path / "cnv.csv",
56
+ "metadata": datasets_path / "metadata.csv",
57
+ "cnv_tumor": datasets_path / "cnv_tumor.csv",
58
+ }
59
+
60
+ data_path = file_map[modality]
61
+ if not data_path.exists():
62
+ raise FileNotFoundError(f"Data file not found: {data_path}")
63
+
64
+ # Load the CSV
65
+ df = pd.read_csv(data_path, index_col=0)
66
+
67
+ # Apply debug mode subsampling
68
+ if debug:
69
+ df = df.sample(n=min(10, df.shape[0]), random_state=debug_seed)
70
+ if modality in ["rna", "cnv"]:
71
+ df = df.sample(n=min(100, df.shape[1]), axis=1, random_state=debug_seed)
72
+
73
+ return df
74
+
75
+
76
+ def replace_underscores(factors: List[str]):
77
+ """Replace all underscores from strings in a list by hyphens.
78
+
79
+ To be used on design factors to avoid bugs due to the reliance on
80
+ ``str.split("_")`` in parts of the code.
81
+
82
+ Parameters
83
+ ----------
84
+ factors : list
85
+ A list of strings which may contain underscores.
86
+
87
+ Returns
88
+ -------
89
+ list
90
+ A list of strings in which underscores were replaced by hyphens.
91
+ """
92
+ return [factor.replace("_", "-") for factor in factors]
93
+
94
+
13
95
  def filter_low_count_genes(
14
96
  df: pd.DataFrame,
15
97
  other_dfs: Optional[List[pd.DataFrame]] = None,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: DeConveil
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: An extension of PyDESeq2/DESeq2 designed to account for genome aneuploidy
5
5
  Home-page: https://github.com/caravagnalab/DeConveil
6
6
  Author: Katsiaryna Davydzenka
@@ -1,5 +1,5 @@
1
1
  deconveil/__init__.py,sha256=_6FL_AYiycv9nP3mKJiQ4zl4aU83YSWnV2YoIZr9Mv0,188
2
- deconveil/__version__.py,sha256=K5SiDdEGYMpdqXThrqwTqECJJBOQNTQDrnpc2K5mzKs,21
2
+ deconveil/__version__.py,sha256=XEqb2aiIn8fzGE68Mph4ck1FtQqsR_am0wRWvrYPffQ,22
3
3
  deconveil/dds.py,sha256=0MNwtDzCjqjoJR-rrCmVu3JOaDd3gXuToOzTBXJMxak,49039
4
4
  deconveil/default_inference.py,sha256=J40O0-qZChLnLrLGmhwxjaTVsV7REWAUQOTf8qSwWk0,9466
5
5
  deconveil/ds.py,sha256=Vb9p152U1KXltrXFpMoBxY6YRW25dP4CO26_osbz6Aw,29476
@@ -8,9 +8,9 @@ deconveil/inference.py,sha256=B3zf3q_mbCTX3gHJwuXnTuy9uyXOxEjuWyaSR6VtVEo,10429
8
8
  deconveil/utils_clustering.py,sha256=twspPvXQ6pvw_NaY1ebyvswuH3ZvVBGn7DeOpZ1XatI,5939
9
9
  deconveil/utils_fit.py,sha256=SdGcBQjN3cyzbSFessufYOOOJAQCOjNcy3etbwmodsM,21583
10
10
  deconveil/utils_plot.py,sha256=1JQthYXaEUKUWa0fy8owkyJ1CTkQxlrSRAqPkXMk7Us,9857
11
- deconveil/utils_processing.py,sha256=CB99CwQst7eUiIgE58yl7_3E6uD9CgQoU_Qmprjyt-s,4141
12
- deconveil-0.1.2.dist-info/licenses/LICENSE,sha256=BJ0f3JRteiF7tjiARi8syxiu4yKmckc0nWlHCKXttKQ,1078
13
- deconveil-0.1.2.dist-info/METADATA,sha256=JqHZYXo0lLvPjoj_cDT-IwHADSKdESJQxorDbpsk3-k,1097
14
- deconveil-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- deconveil-0.1.2.dist-info/top_level.txt,sha256=yAWZbw0eg8XpbMsswoq-VzBGfQHrfWOqNHnu2qQ2xO4,10
16
- deconveil-0.1.2.dist-info/RECORD,,
11
+ deconveil/utils_processing.py,sha256=9j35FAfQ7oNjdH1FWHP90DBTyL5RwlgdVbbW9de10VI,6560
12
+ deconveil-0.1.3.dist-info/licenses/LICENSE,sha256=BJ0f3JRteiF7tjiARi8syxiu4yKmckc0nWlHCKXttKQ,1078
13
+ deconveil-0.1.3.dist-info/METADATA,sha256=yL6AwQ5ziGhrI5lE4FDCHOadT81W6yEIDWGsHni6Q5w,1097
14
+ deconveil-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ deconveil-0.1.3.dist-info/top_level.txt,sha256=yAWZbw0eg8XpbMsswoq-VzBGfQHrfWOqNHnu2qQ2xO4,10
16
+ deconveil-0.1.3.dist-info/RECORD,,