msreport 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- msreport/__init__.py +4 -6
- msreport/aggregate/condense.py +1 -1
- msreport/aggregate/pivot.py +1 -0
- msreport/aggregate/summarize.py +2 -2
- msreport/analyze.py +171 -38
- msreport/errors.py +1 -2
- msreport/export.py +16 -13
- msreport/fasta.py +2 -1
- msreport/helper/__init__.py +7 -7
- msreport/helper/calc.py +29 -24
- msreport/helper/maxlfq.py +2 -2
- msreport/helper/table.py +5 -6
- msreport/impute.py +7 -8
- msreport/isobar.py +10 -9
- msreport/normalize.py +54 -36
- msreport/peptidoform.py +6 -4
- msreport/plot/__init__.py +41 -0
- msreport/plot/_partial_plots.py +159 -0
- msreport/plot/comparison.py +490 -0
- msreport/plot/distribution.py +253 -0
- msreport/plot/multivariate.py +355 -0
- msreport/plot/quality.py +431 -0
- msreport/plot/style.py +286 -0
- msreport/plot/style_sheets/msreport-notebook.mplstyle +57 -0
- msreport/plot/style_sheets/seaborn-whitegrid.mplstyle +45 -0
- msreport/qtable.py +109 -17
- msreport/reader.py +73 -79
- msreport/rinterface/__init__.py +2 -1
- msreport/rinterface/limma.py +2 -1
- msreport/rinterface/rinstaller.py +3 -3
- {msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/METADATA +7 -3
- msreport-0.0.28.dist-info/RECORD +38 -0
- msreport/plot.py +0 -1132
- msreport-0.0.26.dist-info/RECORD +0 -30
- {msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/WHEEL +0 -0
- {msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/licenses/LICENSE.txt +0 -0
- {msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/top_level.txt +0 -0
msreport/plot/style.py
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""Manage and apply custom plotting styles for the `msreport.plot` module.
|
|
2
|
+
|
|
3
|
+
Active styles in msreport are predefined or customizable Matplotlib style sheets that
|
|
4
|
+
are automatically applied to all plots generated within the library. By changing the
|
|
5
|
+
active style, users can define the rcParams used for styling the plots, such as color
|
|
6
|
+
and font settings.
|
|
7
|
+
|
|
8
|
+
The `set_active_style` function allows users to select style sheets from the msreport
|
|
9
|
+
library or any style sheets available in Matplotlib. Additionally, it supports passing
|
|
10
|
+
a dictionary of rcParams to further customize the active style. The additional
|
|
11
|
+
parameters are applied after the style sheet, potentially overriding settings from the
|
|
12
|
+
style sheet.
|
|
13
|
+
|
|
14
|
+
Available msreport style sheets:
|
|
15
|
+
- "msreport-notebook"
|
|
16
|
+
- "seaborn-whitegrid
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import colorsys
|
|
20
|
+
import functools
|
|
21
|
+
import pathlib
|
|
22
|
+
import re
|
|
23
|
+
from collections import UserDict
|
|
24
|
+
from contextlib import contextmanager
|
|
25
|
+
from typing import Any, Optional
|
|
26
|
+
|
|
27
|
+
import matplotlib.colors as mcolors
|
|
28
|
+
import matplotlib.pyplot as plt
|
|
29
|
+
import matplotlib.style
|
|
30
|
+
|
|
31
|
+
__all__ = ["ColorWheelDict", "set_active_style", "set_dpi"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ColorWheelDict(UserDict):
|
|
35
|
+
"""Lookup dictionary that maps keys to hex colors by using a color wheel.
|
|
36
|
+
|
|
37
|
+
When a key is not present the first color of the color wheel is added as the value,
|
|
38
|
+
and the color is moved from the beginning to the end of the color wheel. If no list
|
|
39
|
+
of colors is specified, a default list of ten colors is added to the color wheel.
|
|
40
|
+
It is also possible to manually set key and color pairs by using the same syntax as
|
|
41
|
+
for a regular dictionary.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, colors: Optional[list[str]] = None):
|
|
45
|
+
"""Initializes a ColorWheelDict.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
colors: Optional, a list of hex colors used for the color wheel. By default
|
|
49
|
+
a list with ten colors is used.
|
|
50
|
+
"""
|
|
51
|
+
self.data = {}
|
|
52
|
+
|
|
53
|
+
if colors is not None:
|
|
54
|
+
self.colors = colors
|
|
55
|
+
else:
|
|
56
|
+
self.colors = [
|
|
57
|
+
"#80b1d3",
|
|
58
|
+
"#fdb462",
|
|
59
|
+
"#8dd3c7",
|
|
60
|
+
"#bebada",
|
|
61
|
+
"#fb8072",
|
|
62
|
+
"#b3de69",
|
|
63
|
+
"#fccde5",
|
|
64
|
+
"#d9d9d9",
|
|
65
|
+
"#bc80bd",
|
|
66
|
+
"#ccebc5",
|
|
67
|
+
]
|
|
68
|
+
self._color_wheel = self.colors.copy()
|
|
69
|
+
|
|
70
|
+
def modified_color(self, key: str, factor: float) -> str:
|
|
71
|
+
"""Returns a color for the specified key with modified lightness.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
key: The key for which to get the color.
|
|
75
|
+
factor: The factor by which to modify the lightness. Values > 1 lighten,
|
|
76
|
+
< 1 darken.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
A hex color string with modified lightness.
|
|
80
|
+
"""
|
|
81
|
+
return _modify_lightness_hex(self[key], factor)
|
|
82
|
+
|
|
83
|
+
def _next_color(self) -> str:
|
|
84
|
+
color = self._color_wheel.pop(0)
|
|
85
|
+
self._color_wheel.append(color)
|
|
86
|
+
return color
|
|
87
|
+
|
|
88
|
+
def __setitem__(self, key, value):
|
|
89
|
+
is_hexcolor = re.search(r"^#(?:[0-9a-fA-F]{3}){1,2}$", value)
|
|
90
|
+
if is_hexcolor:
|
|
91
|
+
self.data[key] = value
|
|
92
|
+
else:
|
|
93
|
+
raise ValueError(f"the specified value {value} is not a hexcolor.")
|
|
94
|
+
|
|
95
|
+
def __getitem__(self, key):
|
|
96
|
+
if key not in self.data:
|
|
97
|
+
self.data[key] = self._next_color()
|
|
98
|
+
return self.data[key]
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def set_dpi(dpi: int) -> None:
|
|
102
|
+
"""Changes the default dots per inch settings for matplotlib plots.
|
|
103
|
+
|
|
104
|
+
This effectively makes figures smaller or larger, without affecting the relative
|
|
105
|
+
sizes of elements within the figures.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
dpi: New default dots per inch.
|
|
109
|
+
"""
|
|
110
|
+
plt.rcParams["figure.dpi"] = dpi
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@contextmanager
|
|
114
|
+
def use_active_style():
|
|
115
|
+
"""Context manager to temporarily apply the active style for plotting.
|
|
116
|
+
|
|
117
|
+
The rc parameters 'backend' and 'interactive' will not be reset by the context
|
|
118
|
+
manager. This is required for compatibility with jupyter notebooks automatically
|
|
119
|
+
setting up the backend and interactive mode for inline plotting.
|
|
120
|
+
"""
|
|
121
|
+
active_style_context_arg = _get_active_style_context_arg()
|
|
122
|
+
|
|
123
|
+
orig = dict(matplotlib.rcParams.copy())
|
|
124
|
+
del orig["backend"]
|
|
125
|
+
del orig["interactive"]
|
|
126
|
+
try:
|
|
127
|
+
matplotlib.style.use(active_style_context_arg)
|
|
128
|
+
yield
|
|
129
|
+
finally:
|
|
130
|
+
# Use `.update` instead of `._update_raw` for matplotlib backward compatibility
|
|
131
|
+
matplotlib.rcParams.update(orig)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def with_active_style(func):
|
|
135
|
+
"""Decorator to apply the active style context to a function."""
|
|
136
|
+
|
|
137
|
+
@functools.wraps(func)
|
|
138
|
+
def wrapper(*args, **kwargs):
|
|
139
|
+
with use_active_style():
|
|
140
|
+
return func(*args, **kwargs)
|
|
141
|
+
|
|
142
|
+
return wrapper
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def set_active_style(style: str | None, rc: dict[str, Any] | None = None):
|
|
146
|
+
"""Set the active plotting style for the msreport.plot submodule.
|
|
147
|
+
|
|
148
|
+
The chosen style, potentially modified by the rc dictionary, will be
|
|
149
|
+
applied temporarily using a context manager within the library's
|
|
150
|
+
plotting functions. This does not modify the global matplotlib rcParams
|
|
151
|
+
permanently.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
style: The name of the base style to activate. This can be one of the
|
|
155
|
+
built-in msreport styles (e.g., 'notebook', 'powerpoint'),
|
|
156
|
+
a standard matplotlib style, or a style registered by another
|
|
157
|
+
library like Seaborn (if available).
|
|
158
|
+
rc: An optional dictionary mapping matplotlib rcParams names (strings)
|
|
159
|
+
to their desired values. These settings will be applied *after*
|
|
160
|
+
the base style, overriding any conflicting parameters from the
|
|
161
|
+
base style for the duration of the plot context.
|
|
162
|
+
|
|
163
|
+
Raises:
|
|
164
|
+
ValueError: If the specified base style name is not found among the
|
|
165
|
+
library's styles or the available matplotlib styles.
|
|
166
|
+
TypeError: If rc is not a dictionary or None.
|
|
167
|
+
"""
|
|
168
|
+
global _active_style_name, _active_style_rc_override
|
|
169
|
+
|
|
170
|
+
if style is not None and style not in _AVAILABLE_STYLES:
|
|
171
|
+
current_available = _get_available_styles()
|
|
172
|
+
if style not in current_available:
|
|
173
|
+
raise ValueError(
|
|
174
|
+
f"Style '{style}' not found. Available styles are: "
|
|
175
|
+
f"{', '.join(current_available)}"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
if rc is not None and not isinstance(rc, dict):
|
|
179
|
+
raise TypeError(f"rc argument must be a dictionary or None, got {type(rc)}")
|
|
180
|
+
|
|
181
|
+
_active_style_name = style
|
|
182
|
+
_active_style_rc_override = rc.copy() if rc is not None else None
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def get_active_style() -> str | None:
|
|
186
|
+
"""Return the name of the currently active 'msreport.plot' plotting style."""
|
|
187
|
+
return _active_style_name
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def get_active_override() -> dict[str, Any] | None:
|
|
191
|
+
"""Return the currently active rcParam overrides for the 'msreport.plot' style."""
|
|
192
|
+
return _active_style_rc_override
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _get_active_style_context_arg() -> list[str | dict[str, Any]]:
|
|
196
|
+
"""Get the argument needed for matplotlib.style.context for the active style.
|
|
197
|
+
|
|
198
|
+
This combines the base style name/path with any active rcParam overrides.
|
|
199
|
+
Matplotlib's style context manager can accept a list where later elements
|
|
200
|
+
override earlier ones.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
A list containing the style name or path and any active rcParam overrides.
|
|
204
|
+
"""
|
|
205
|
+
context_args: list[str | dict[str, Any]] = []
|
|
206
|
+
|
|
207
|
+
active_style_name = get_active_style()
|
|
208
|
+
if active_style_name is None:
|
|
209
|
+
...
|
|
210
|
+
elif active_style_name in _LIBRARY_STYLE_PATHS:
|
|
211
|
+
context_args.append(_LIBRARY_STYLE_PATHS[active_style_name])
|
|
212
|
+
else:
|
|
213
|
+
context_args.append(active_style_name)
|
|
214
|
+
|
|
215
|
+
active_override = get_active_override()
|
|
216
|
+
if active_override is not None:
|
|
217
|
+
context_args.append(active_override)
|
|
218
|
+
return context_args
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _modify_lightness_rgb(
|
|
222
|
+
rgb_color: tuple[float, float, float], lightness_scale_factor: float
|
|
223
|
+
) -> tuple[float, float, float]:
|
|
224
|
+
"""Modifies the lightness of a color while preserving hue and saturation.
|
|
225
|
+
|
|
226
|
+
Parameters:
|
|
227
|
+
rgb_color: A tuple of RGB values in the range [0, 1]
|
|
228
|
+
lightness_scale_factor: Factor to scale the lightness by (values > 1 lighten, < 1 darken)
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
A tuple of RGB values with adjusted lightness
|
|
232
|
+
"""
|
|
233
|
+
hue, lightness, saturation = colorsys.rgb_to_hls(*rgb_color)
|
|
234
|
+
new_lightness = min(1.0, lightness * lightness_scale_factor)
|
|
235
|
+
return colorsys.hls_to_rgb(hue, new_lightness, saturation)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _modify_lightness_hex(hex_color: str, lightness_scale_factor: float) -> str:
|
|
239
|
+
"""Modifies the lightness of a hex color while preserving hue and saturation.
|
|
240
|
+
|
|
241
|
+
Parameters:
|
|
242
|
+
hex_color: A hex color string (e.g., "#80b1d3").
|
|
243
|
+
lightness_scale_factor: Factor to scale the lightness by (values > 1 lighten, < 1 darken).
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
A hex color string with adjusted lightness.
|
|
247
|
+
"""
|
|
248
|
+
rgb_color = mcolors.to_rgb(hex_color)
|
|
249
|
+
new_ligthness_rgb = _modify_lightness_rgb(rgb_color, lightness_scale_factor)
|
|
250
|
+
return mcolors.to_hex(new_ligthness_rgb)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _get_library_styles() -> dict[str, str]:
|
|
254
|
+
"""Scan the style directory and returns a dict of available library styles.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
A dictionary mapping style names (without extension) to their full paths.
|
|
258
|
+
Returns an empty dictionary if the style directory doesn't exist or is empty.
|
|
259
|
+
"""
|
|
260
|
+
styles = {}
|
|
261
|
+
try:
|
|
262
|
+
for filepath in pathlib.Path(_STYLE_DIR).iterdir():
|
|
263
|
+
if filepath.suffix == ".mplstyle":
|
|
264
|
+
styles[filepath.stem] = filepath.resolve().as_posix()
|
|
265
|
+
except OSError as err:
|
|
266
|
+
raise OSError(
|
|
267
|
+
f"Could not read 'msreport.plot' style directory {_STYLE_DIR}: {err}. "
|
|
268
|
+
"Please check if the directory exists and is accessible."
|
|
269
|
+
) from err
|
|
270
|
+
|
|
271
|
+
return styles
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _get_available_styles() -> list[str]:
|
|
275
|
+
"""Get a list of all available style names from library and matplotlib."""
|
|
276
|
+
lib_styles = _get_library_styles().keys()
|
|
277
|
+
mpl_styles = matplotlib.style.available
|
|
278
|
+
return list(set(lib_styles) | set(mpl_styles))
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
_STYLE_DIR: str = (pathlib.Path(__file__).parent / "style_sheets").resolve().as_posix()
|
|
282
|
+
_AVAILABLE_STYLES: list[str] = _get_available_styles()
|
|
283
|
+
_LIBRARY_STYLE_PATHS: dict[str, str] = _get_library_styles()
|
|
284
|
+
_DEFAULT_STYLE: str = "msreport-notebook"
|
|
285
|
+
_active_style_name: str | None = _DEFAULT_STYLE
|
|
286
|
+
_active_style_rc_override: dict[str, Any] | None = None
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
## LINES
|
|
2
|
+
lines.solid_capstyle: round
|
|
3
|
+
|
|
4
|
+
## PATCHES
|
|
5
|
+
patch.edgecolor: white
|
|
6
|
+
patch.force_edgecolor: True
|
|
7
|
+
|
|
8
|
+
## FONT
|
|
9
|
+
font.family: sans-serif
|
|
10
|
+
font.sans-serif: Arial, DejaVu Sans, Liberation Sans, Bitstream Vera Sans, sans-serif
|
|
11
|
+
font.size: 10
|
|
12
|
+
|
|
13
|
+
## TEXT
|
|
14
|
+
text.color: black
|
|
15
|
+
|
|
16
|
+
## FIGURE
|
|
17
|
+
figure.facecolor: white
|
|
18
|
+
figure.titlesize: 12
|
|
19
|
+
|
|
20
|
+
## AXES
|
|
21
|
+
axes.facecolor: white
|
|
22
|
+
axes.edgecolor: black # Color of the axes border / spines
|
|
23
|
+
axes.linewidth: 1 # Width of the axes border / spines
|
|
24
|
+
axes.labelcolor: black
|
|
25
|
+
axes.labelsize: 10
|
|
26
|
+
axes.axisbelow: True
|
|
27
|
+
axes.grid: True
|
|
28
|
+
axes.titlesize: 10
|
|
29
|
+
axes.spines.left: True
|
|
30
|
+
axes.spines.bottom: True
|
|
31
|
+
axes.spines.right: True
|
|
32
|
+
axes.spines.top: True
|
|
33
|
+
|
|
34
|
+
## TICKS
|
|
35
|
+
xtick.top: False
|
|
36
|
+
xtick.bottom: False
|
|
37
|
+
xtick.color: black
|
|
38
|
+
xtick.direction: out
|
|
39
|
+
xtick.major.width: 1
|
|
40
|
+
xtick.labelsize: 8
|
|
41
|
+
|
|
42
|
+
ytick.left: False
|
|
43
|
+
ytick.right: False
|
|
44
|
+
ytick.color: black
|
|
45
|
+
ytick.direction: out
|
|
46
|
+
ytick.major.width: 1
|
|
47
|
+
ytick.labelsize: 8
|
|
48
|
+
|
|
49
|
+
## GRIDS
|
|
50
|
+
grid.alpha: 1.0
|
|
51
|
+
grid.color: 0.8
|
|
52
|
+
grid.linestyle: dashed
|
|
53
|
+
grid.linewidth: 1
|
|
54
|
+
|
|
55
|
+
## LEGEND
|
|
56
|
+
legend.fontsize: 10
|
|
57
|
+
legend.title_fontsize: None # Set to None to use the same as axes.titlesize
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
## LINES
|
|
2
|
+
lines.solid_capstyle: round
|
|
3
|
+
|
|
4
|
+
## PATCHES
|
|
5
|
+
patch.edgecolor: white
|
|
6
|
+
patch.force_edgecolor: True
|
|
7
|
+
|
|
8
|
+
## FONT
|
|
9
|
+
font.family: sans-serif
|
|
10
|
+
font.sans-serif: Arial, DejaVu Sans, Liberation Sans, Bitstream Vera Sans, sans-serif
|
|
11
|
+
|
|
12
|
+
## TEXT
|
|
13
|
+
text.color: 0.15
|
|
14
|
+
|
|
15
|
+
## FIGURE
|
|
16
|
+
figure.facecolor: white
|
|
17
|
+
|
|
18
|
+
## AXES
|
|
19
|
+
axes.facecolor: white
|
|
20
|
+
axes.edgecolor: 0.15
|
|
21
|
+
axes.labelcolor: 0.15
|
|
22
|
+
axes.axisbelow: True
|
|
23
|
+
axes.grid: True
|
|
24
|
+
axes.spines.left: True
|
|
25
|
+
axes.spines.bottom: True
|
|
26
|
+
axes.spines.right: True
|
|
27
|
+
axes.spines.top: True
|
|
28
|
+
|
|
29
|
+
## TICKS
|
|
30
|
+
xtick.top: False
|
|
31
|
+
xtick.bottom: False
|
|
32
|
+
xtick.color: 0.15
|
|
33
|
+
xtick.direction: out
|
|
34
|
+
|
|
35
|
+
ytick.left: False
|
|
36
|
+
ytick.right: False
|
|
37
|
+
ytick.color: 0.15
|
|
38
|
+
ytick.direction: out
|
|
39
|
+
|
|
40
|
+
## GRID
|
|
41
|
+
grid.color: 0.8
|
|
42
|
+
grid.linestyle: -
|
|
43
|
+
|
|
44
|
+
## IMAGE
|
|
45
|
+
image.cmap: rocket
|
msreport/qtable.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
import copy
|
|
3
4
|
import os
|
|
4
5
|
import warnings
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from typing import Any, Iterable, Optional
|
|
5
8
|
|
|
6
9
|
import numpy as np
|
|
7
10
|
import pandas as pd
|
|
@@ -24,7 +27,14 @@ class Qtable:
|
|
|
24
27
|
design: A pandas.DataFrame describing the experimental design.
|
|
25
28
|
"""
|
|
26
29
|
|
|
27
|
-
|
|
30
|
+
_default_id_column = "Representative protein"
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
data: pd.DataFrame,
|
|
35
|
+
design: Optional[pd.DataFrame] = None,
|
|
36
|
+
id_column: str = "Representative protein",
|
|
37
|
+
):
|
|
28
38
|
"""Initializes the Qtable.
|
|
29
39
|
|
|
30
40
|
If data does not contain a "Valid" column, this column is added and all its row
|
|
@@ -36,11 +46,34 @@ class Qtable:
|
|
|
36
46
|
contain the columns "Sample" and "Experiment". The "Sample" entries
|
|
37
47
|
should correspond to the Sample names present in the quantitative
|
|
38
48
|
columns of the data.
|
|
49
|
+
id_column: The name of the column that contains the unique identifiers for
|
|
50
|
+
the entries in the data table. Default is "Representative protein".
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
KeyError: If the specified id_column is not found in data.
|
|
54
|
+
ValueError: If the specified id_column does not contain unique identifiers.
|
|
39
55
|
"""
|
|
40
56
|
self.design: pd.DataFrame
|
|
41
57
|
self.data: pd.DataFrame
|
|
58
|
+
self._id_column: str
|
|
59
|
+
|
|
60
|
+
if not data.index.is_unique:
|
|
61
|
+
raise ValueError(
|
|
62
|
+
"The index of the 'data' table must contain unique values."
|
|
63
|
+
)
|
|
64
|
+
if id_column not in data.columns:
|
|
65
|
+
raise KeyError(
|
|
66
|
+
f"Column '{id_column}' not found in 'data'. Please specify a valid "
|
|
67
|
+
"column that contains unique identifiers for the entries in 'data'."
|
|
68
|
+
)
|
|
69
|
+
if not data[id_column].is_unique:
|
|
70
|
+
raise ValueError(
|
|
71
|
+
f"Column '{id_column}' in 'data' table must contain unique identifiers"
|
|
72
|
+
", i.e. no duplicated values. Please provide a valid 'id_column'."
|
|
73
|
+
)
|
|
42
74
|
|
|
43
75
|
self.data = data.copy()
|
|
76
|
+
self._id_column = id_column
|
|
44
77
|
if "Valid" not in self.data.columns:
|
|
45
78
|
self.data["Valid"] = True
|
|
46
79
|
if design is not None:
|
|
@@ -73,12 +106,12 @@ class Qtable:
|
|
|
73
106
|
"""
|
|
74
107
|
columns = design.columns.tolist()
|
|
75
108
|
required_columns = ["Experiment", "Sample", "Replicate"]
|
|
76
|
-
if not all(
|
|
109
|
+
if not all(c in columns for c in required_columns):
|
|
77
110
|
exception_message = "".join(
|
|
78
111
|
[
|
|
79
112
|
"The design table must at least contain the columns: ",
|
|
80
113
|
", ".join(f'"{c}"' for c in required_columns),
|
|
81
|
-
".
|
|
114
|
+
". It only contains the columns: ",
|
|
82
115
|
", ".join(f'"{c}"' for c in columns),
|
|
83
116
|
".",
|
|
84
117
|
]
|
|
@@ -105,6 +138,11 @@ class Qtable:
|
|
|
105
138
|
"""Returns a copy of the design table."""
|
|
106
139
|
return self.design.copy()
|
|
107
140
|
|
|
141
|
+
@property
|
|
142
|
+
def id_column(self) -> str:
|
|
143
|
+
"""Returns the name of the id column."""
|
|
144
|
+
return self._id_column
|
|
145
|
+
|
|
108
146
|
def get_samples(self, experiment: Optional[str] = None) -> list[str]:
|
|
109
147
|
"""Returns a list of samples present in the design table.
|
|
110
148
|
|
|
@@ -315,6 +353,50 @@ class Qtable:
|
|
|
315
353
|
expression_features.columns.difference(self._expression_features)
|
|
316
354
|
)
|
|
317
355
|
|
|
356
|
+
@contextmanager
|
|
357
|
+
def temp_design(
|
|
358
|
+
self,
|
|
359
|
+
design: Optional[pd.DataFrame] = None,
|
|
360
|
+
exclude_experiments: Optional[Iterable[str]] = None,
|
|
361
|
+
keep_experiments: Optional[Iterable[str]] = None,
|
|
362
|
+
exclude_samples: Optional[Iterable[str]] = None,
|
|
363
|
+
keep_samples: Optional[Iterable[str]] = None,
|
|
364
|
+
):
|
|
365
|
+
"""Context manager to temporarily modify the design table.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
design: A DataFrame to temporarily replace the current design table.
|
|
369
|
+
exclude_experiments: A list of experiments to exclude from the design.
|
|
370
|
+
keep_experiments: A list of experiments to keep in the design (all others are removed).
|
|
371
|
+
exclude_samples: A list of samples to exclude from the design.
|
|
372
|
+
keep_samples: A list of samples to keep in the design (all others are removed).
|
|
373
|
+
|
|
374
|
+
Yields:
|
|
375
|
+
None. Restores the original design table after the context ends.
|
|
376
|
+
"""
|
|
377
|
+
original_design = self.design
|
|
378
|
+
|
|
379
|
+
_design: pd.DataFrame
|
|
380
|
+
if design is None:
|
|
381
|
+
_design = self.get_design()
|
|
382
|
+
else:
|
|
383
|
+
_design = design
|
|
384
|
+
|
|
385
|
+
if exclude_experiments is not None:
|
|
386
|
+
_design = _design[~_design["Experiment"].isin(exclude_experiments)]
|
|
387
|
+
if keep_experiments is not None:
|
|
388
|
+
_design = _design[_design["Experiment"].isin(keep_experiments)]
|
|
389
|
+
if exclude_samples is not None:
|
|
390
|
+
_design = _design[~_design["Sample"].isin(exclude_samples)]
|
|
391
|
+
if keep_samples is not None:
|
|
392
|
+
_design = _design[_design["Sample"].isin(keep_samples)]
|
|
393
|
+
|
|
394
|
+
try:
|
|
395
|
+
self.add_design(_design)
|
|
396
|
+
yield
|
|
397
|
+
finally:
|
|
398
|
+
self.add_design(original_design)
|
|
399
|
+
|
|
318
400
|
def save(self, directory: str, basename: str):
|
|
319
401
|
"""Save qtable to disk, creating a data, design, and config file.
|
|
320
402
|
|
|
@@ -333,6 +415,8 @@ class Qtable:
|
|
|
333
415
|
"Expression features": self._expression_features,
|
|
334
416
|
"Expression sample mapping": self._expression_sample_mapping,
|
|
335
417
|
"Data dtypes": self.data.dtypes.astype(str).to_dict(),
|
|
418
|
+
"Design dtypes": self.design.dtypes.astype(str).to_dict(),
|
|
419
|
+
"Unique ID column": self._id_column,
|
|
336
420
|
}
|
|
337
421
|
with open(filepaths["config"], "w") as openfile:
|
|
338
422
|
yaml.safe_dump(config_data, openfile)
|
|
@@ -363,14 +447,24 @@ class Qtable:
|
|
|
363
447
|
data = _read_csv_str_safe(
|
|
364
448
|
filepaths["data"], dtypes, **{"sep": "\t", "index_col": 0}
|
|
365
449
|
)
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
450
|
+
# This check is required for backwards compatibility with msreport <= 0.0.27
|
|
451
|
+
if "Design dtypes" in config_data:
|
|
452
|
+
design_dtypes = config_data["Design dtypes"]
|
|
453
|
+
design = _read_csv_str_safe(
|
|
454
|
+
filepaths["design"], design_dtypes, **{"sep": "\t", "index_col": 0}
|
|
455
|
+
)
|
|
456
|
+
else:
|
|
457
|
+
design = pd.read_csv(
|
|
458
|
+
filepaths["design"], sep="\t", index_col=0, keep_default_na=True
|
|
459
|
+
)
|
|
369
460
|
|
|
370
461
|
qtable = Qtable(data, design)
|
|
371
462
|
qtable._expression_columns = config_data["Expression columns"]
|
|
372
463
|
qtable._expression_features = config_data["Expression features"]
|
|
373
464
|
qtable._expression_sample_mapping = config_data["Expression sample mapping"]
|
|
465
|
+
# This check is required for backwards compatibility with msreport <= 0.0.27
|
|
466
|
+
if "Unique ID column" in config_data:
|
|
467
|
+
qtable._id_column = config_data["Unique ID column"]
|
|
374
468
|
return qtable
|
|
375
469
|
|
|
376
470
|
def to_tsv(self, path: str, index: bool = False):
|
|
@@ -388,7 +482,6 @@ class Qtable:
|
|
|
388
482
|
|
|
389
483
|
def copy(self) -> Qtable:
|
|
390
484
|
"""Returns a copy of this Qtable instance."""
|
|
391
|
-
# not tested #
|
|
392
485
|
return self.__copy__()
|
|
393
486
|
|
|
394
487
|
def _set_expression(
|
|
@@ -417,22 +510,22 @@ class Qtable:
|
|
|
417
510
|
samples = list(columns_to_samples.values())
|
|
418
511
|
|
|
419
512
|
if not expression_columns:
|
|
420
|
-
raise KeyError(
|
|
421
|
-
if not all(
|
|
513
|
+
raise KeyError("No expression columns matched in qtable")
|
|
514
|
+
if not all(e in data_columns for e in expression_columns):
|
|
422
515
|
exception_message = (
|
|
423
516
|
f"Not all specified columns {expression_columns} are present in the"
|
|
424
517
|
" qtable"
|
|
425
518
|
)
|
|
426
519
|
raise KeyError(exception_message)
|
|
427
|
-
if not all(
|
|
520
|
+
if not all(s in self.get_samples() for s in samples):
|
|
428
521
|
exception_message = (
|
|
429
522
|
f"Not all specified samples {samples} are present in the qtable.design"
|
|
430
523
|
)
|
|
431
524
|
raise ValueError(exception_message)
|
|
432
|
-
if not all(
|
|
525
|
+
if not all(s in samples for s in self.get_samples()):
|
|
433
526
|
exception_message = (
|
|
434
|
-
|
|
435
|
-
|
|
527
|
+
"Not all samples from qtable.design are also present in the specified"
|
|
528
|
+
"samples."
|
|
436
529
|
)
|
|
437
530
|
raise ValueError(exception_message)
|
|
438
531
|
|
|
@@ -477,7 +570,6 @@ class Qtable:
|
|
|
477
570
|
self._expression_sample_mapping = {}
|
|
478
571
|
|
|
479
572
|
def __copy__(self) -> Qtable:
|
|
480
|
-
# not tested #
|
|
481
573
|
new_instance = Qtable(self.data, self.design)
|
|
482
574
|
# Copy all private attributes
|
|
483
575
|
for attr in dir(self):
|
|
@@ -486,7 +578,7 @@ class Qtable:
|
|
|
486
578
|
and attr.startswith("_")
|
|
487
579
|
and not attr.startswith("__")
|
|
488
580
|
):
|
|
489
|
-
attr_values = self.__getattribute__(attr)
|
|
581
|
+
attr_values = copy.deepcopy(self.__getattribute__(attr))
|
|
490
582
|
new_instance.__setattr__(attr, attr_values)
|
|
491
583
|
return new_instance
|
|
492
584
|
|
|
@@ -521,7 +613,7 @@ def _match_samples_to_tag_columns(
|
|
|
521
613
|
"""
|
|
522
614
|
WHITESPACE_CHARS = " ."
|
|
523
615
|
|
|
524
|
-
mapping =
|
|
616
|
+
mapping = {}
|
|
525
617
|
for sample in samples:
|
|
526
618
|
for col in columns:
|
|
527
619
|
if col.replace(tag, "").replace(sample, "").strip(WHITESPACE_CHARS) == "":
|