msreport 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
msreport/plot/style.py ADDED
@@ -0,0 +1,286 @@
1
+ """Manage and apply custom plotting styles for the `msreport.plot` module.
2
+
3
+ Active styles in msreport are predefined or customizable Matplotlib style sheets that
4
+ are automatically applied to all plots generated within the library. By changing the
5
+ active style, users can define the rcParams used for styling the plots, such as color
6
+ and font settings.
7
+
8
+ The `set_active_style` function allows users to select style sheets from the msreport
9
+ library or any style sheets available in Matplotlib. Additionally, it supports passing
10
+ a dictionary of rcParams to further customize the active style. The additional
11
+ parameters are applied after the style sheet, potentially overriding settings from the
12
+ style sheet.
13
+
14
+ Available msreport style sheets:
15
+ - "msreport-notebook"
16
+ - "seaborn-whitegrid
17
+ """
18
+
19
+ import colorsys
20
+ import functools
21
+ import pathlib
22
+ import re
23
+ from collections import UserDict
24
+ from contextlib import contextmanager
25
+ from typing import Any, Optional
26
+
27
+ import matplotlib.colors as mcolors
28
+ import matplotlib.pyplot as plt
29
+ import matplotlib.style
30
+
31
+ __all__ = ["ColorWheelDict", "set_active_style", "set_dpi"]
32
+
33
+
34
+ class ColorWheelDict(UserDict):
35
+ """Lookup dictionary that maps keys to hex colors by using a color wheel.
36
+
37
+ When a key is not present the first color of the color wheel is added as the value,
38
+ and the color is moved from the beginning to the end of the color wheel. If no list
39
+ of colors is specified, a default list of ten colors is added to the color wheel.
40
+ It is also possible to manually set key and color pairs by using the same syntax as
41
+ for a regular dictionary.
42
+ """
43
+
44
+ def __init__(self, colors: Optional[list[str]] = None):
45
+ """Initializes a ColorWheelDict.
46
+
47
+ Args:
48
+ colors: Optional, a list of hex colors used for the color wheel. By default
49
+ a list with ten colors is used.
50
+ """
51
+ self.data = {}
52
+
53
+ if colors is not None:
54
+ self.colors = colors
55
+ else:
56
+ self.colors = [
57
+ "#80b1d3",
58
+ "#fdb462",
59
+ "#8dd3c7",
60
+ "#bebada",
61
+ "#fb8072",
62
+ "#b3de69",
63
+ "#fccde5",
64
+ "#d9d9d9",
65
+ "#bc80bd",
66
+ "#ccebc5",
67
+ ]
68
+ self._color_wheel = self.colors.copy()
69
+
70
+ def modified_color(self, key: str, factor: float) -> str:
71
+ """Returns a color for the specified key with modified lightness.
72
+
73
+ Args:
74
+ key: The key for which to get the color.
75
+ factor: The factor by which to modify the lightness. Values > 1 lighten,
76
+ < 1 darken.
77
+
78
+ Returns:
79
+ A hex color string with modified lightness.
80
+ """
81
+ return _modify_lightness_hex(self[key], factor)
82
+
83
+ def _next_color(self) -> str:
84
+ color = self._color_wheel.pop(0)
85
+ self._color_wheel.append(color)
86
+ return color
87
+
88
+ def __setitem__(self, key, value):
89
+ is_hexcolor = re.search(r"^#(?:[0-9a-fA-F]{3}){1,2}$", value)
90
+ if is_hexcolor:
91
+ self.data[key] = value
92
+ else:
93
+ raise ValueError(f"the specified value {value} is not a hexcolor.")
94
+
95
+ def __getitem__(self, key):
96
+ if key not in self.data:
97
+ self.data[key] = self._next_color()
98
+ return self.data[key]
99
+
100
+
101
+ def set_dpi(dpi: int) -> None:
102
+ """Changes the default dots per inch settings for matplotlib plots.
103
+
104
+ This effectively makes figures smaller or larger, without affecting the relative
105
+ sizes of elements within the figures.
106
+
107
+ Args:
108
+ dpi: New default dots per inch.
109
+ """
110
+ plt.rcParams["figure.dpi"] = dpi
111
+
112
+
113
+ @contextmanager
114
+ def use_active_style():
115
+ """Context manager to temporarily apply the active style for plotting.
116
+
117
+ The rc parameters 'backend' and 'interactive' will not be reset by the context
118
+ manager. This is required for compatibility with jupyter notebooks automatically
119
+ setting up the backend and interactive mode for inline plotting.
120
+ """
121
+ active_style_context_arg = _get_active_style_context_arg()
122
+
123
+ orig = dict(matplotlib.rcParams.copy())
124
+ del orig["backend"]
125
+ del orig["interactive"]
126
+ try:
127
+ matplotlib.style.use(active_style_context_arg)
128
+ yield
129
+ finally:
130
+ # Use `.update` instead of `._update_raw` for matplotlib backward compatibility
131
+ matplotlib.rcParams.update(orig)
132
+
133
+
134
+ def with_active_style(func):
135
+ """Decorator to apply the active style context to a function."""
136
+
137
+ @functools.wraps(func)
138
+ def wrapper(*args, **kwargs):
139
+ with use_active_style():
140
+ return func(*args, **kwargs)
141
+
142
+ return wrapper
143
+
144
+
145
+ def set_active_style(style: str | None, rc: dict[str, Any] | None = None):
146
+ """Set the active plotting style for the msreport.plot submodule.
147
+
148
+ The chosen style, potentially modified by the rc dictionary, will be
149
+ applied temporarily using a context manager within the library's
150
+ plotting functions. This does not modify the global matplotlib rcParams
151
+ permanently.
152
+
153
+ Args:
154
+ style: The name of the base style to activate. This can be one of the
155
+ built-in msreport styles (e.g., 'notebook', 'powerpoint'),
156
+ a standard matplotlib style, or a style registered by another
157
+ library like Seaborn (if available).
158
+ rc: An optional dictionary mapping matplotlib rcParams names (strings)
159
+ to their desired values. These settings will be applied *after*
160
+ the base style, overriding any conflicting parameters from the
161
+ base style for the duration of the plot context.
162
+
163
+ Raises:
164
+ ValueError: If the specified base style name is not found among the
165
+ library's styles or the available matplotlib styles.
166
+ TypeError: If rc is not a dictionary or None.
167
+ """
168
+ global _active_style_name, _active_style_rc_override
169
+
170
+ if style is not None and style not in _AVAILABLE_STYLES:
171
+ current_available = _get_available_styles()
172
+ if style not in current_available:
173
+ raise ValueError(
174
+ f"Style '{style}' not found. Available styles are: "
175
+ f"{', '.join(current_available)}"
176
+ )
177
+
178
+ if rc is not None and not isinstance(rc, dict):
179
+ raise TypeError(f"rc argument must be a dictionary or None, got {type(rc)}")
180
+
181
+ _active_style_name = style
182
+ _active_style_rc_override = rc.copy() if rc is not None else None
183
+
184
+
185
+ def get_active_style() -> str | None:
186
+ """Return the name of the currently active 'msreport.plot' plotting style."""
187
+ return _active_style_name
188
+
189
+
190
+ def get_active_override() -> dict[str, Any] | None:
191
+ """Return the currently active rcParam overrides for the 'msreport.plot' style."""
192
+ return _active_style_rc_override
193
+
194
+
195
+ def _get_active_style_context_arg() -> list[str | dict[str, Any]]:
196
+ """Get the argument needed for matplotlib.style.context for the active style.
197
+
198
+ This combines the base style name/path with any active rcParam overrides.
199
+ Matplotlib's style context manager can accept a list where later elements
200
+ override earlier ones.
201
+
202
+ Returns:
203
+ A list containing the style name or path and any active rcParam overrides.
204
+ """
205
+ context_args: list[str | dict[str, Any]] = []
206
+
207
+ active_style_name = get_active_style()
208
+ if active_style_name is None:
209
+ ...
210
+ elif active_style_name in _LIBRARY_STYLE_PATHS:
211
+ context_args.append(_LIBRARY_STYLE_PATHS[active_style_name])
212
+ else:
213
+ context_args.append(active_style_name)
214
+
215
+ active_override = get_active_override()
216
+ if active_override is not None:
217
+ context_args.append(active_override)
218
+ return context_args
219
+
220
+
221
+ def _modify_lightness_rgb(
222
+ rgb_color: tuple[float, float, float], lightness_scale_factor: float
223
+ ) -> tuple[float, float, float]:
224
+ """Modifies the lightness of a color while preserving hue and saturation.
225
+
226
+ Parameters:
227
+ rgb_color: A tuple of RGB values in the range [0, 1]
228
+ lightness_scale_factor: Factor to scale the lightness by (values > 1 lighten, < 1 darken)
229
+
230
+ Returns:
231
+ A tuple of RGB values with adjusted lightness
232
+ """
233
+ hue, lightness, saturation = colorsys.rgb_to_hls(*rgb_color)
234
+ new_lightness = min(1.0, lightness * lightness_scale_factor)
235
+ return colorsys.hls_to_rgb(hue, new_lightness, saturation)
236
+
237
+
238
+ def _modify_lightness_hex(hex_color: str, lightness_scale_factor: float) -> str:
239
+ """Modifies the lightness of a hex color while preserving hue and saturation.
240
+
241
+ Parameters:
242
+ hex_color: A hex color string (e.g., "#80b1d3").
243
+ lightness_scale_factor: Factor to scale the lightness by (values > 1 lighten, < 1 darken).
244
+
245
+ Returns:
246
+ A hex color string with adjusted lightness.
247
+ """
248
+ rgb_color = mcolors.to_rgb(hex_color)
249
+ new_ligthness_rgb = _modify_lightness_rgb(rgb_color, lightness_scale_factor)
250
+ return mcolors.to_hex(new_ligthness_rgb)
251
+
252
+
253
+ def _get_library_styles() -> dict[str, str]:
254
+ """Scan the style directory and returns a dict of available library styles.
255
+
256
+ Returns:
257
+ A dictionary mapping style names (without extension) to their full paths.
258
+ Returns an empty dictionary if the style directory doesn't exist or is empty.
259
+ """
260
+ styles = {}
261
+ try:
262
+ for filepath in pathlib.Path(_STYLE_DIR).iterdir():
263
+ if filepath.suffix == ".mplstyle":
264
+ styles[filepath.stem] = filepath.resolve().as_posix()
265
+ except OSError as err:
266
+ raise OSError(
267
+ f"Could not read 'msreport.plot' style directory {_STYLE_DIR}: {err}. "
268
+ "Please check if the directory exists and is accessible."
269
+ ) from err
270
+
271
+ return styles
272
+
273
+
274
+ def _get_available_styles() -> list[str]:
275
+ """Get a list of all available style names from library and matplotlib."""
276
+ lib_styles = _get_library_styles().keys()
277
+ mpl_styles = matplotlib.style.available
278
+ return list(set(lib_styles) | set(mpl_styles))
279
+
280
+
281
+ _STYLE_DIR: str = (pathlib.Path(__file__).parent / "style_sheets").resolve().as_posix()
282
+ _AVAILABLE_STYLES: list[str] = _get_available_styles()
283
+ _LIBRARY_STYLE_PATHS: dict[str, str] = _get_library_styles()
284
+ _DEFAULT_STYLE: str = "msreport-notebook"
285
+ _active_style_name: str | None = _DEFAULT_STYLE
286
+ _active_style_rc_override: dict[str, Any] | None = None
@@ -0,0 +1,57 @@
1
+ ## LINES
2
+ lines.solid_capstyle: round
3
+
4
+ ## PATCHES
5
+ patch.edgecolor: white
6
+ patch.force_edgecolor: True
7
+
8
+ ## FONT
9
+ font.family: sans-serif
10
+ font.sans-serif: Arial, DejaVu Sans, Liberation Sans, Bitstream Vera Sans, sans-serif
11
+ font.size: 10
12
+
13
+ ## TEXT
14
+ text.color: black
15
+
16
+ ## FIGURE
17
+ figure.facecolor: white
18
+ figure.titlesize: 12
19
+
20
+ ## AXES
21
+ axes.facecolor: white
22
+ axes.edgecolor: black # Color of the axes border / spines
23
+ axes.linewidth: 1 # Width of the axes border / spines
24
+ axes.labelcolor: black
25
+ axes.labelsize: 10
26
+ axes.axisbelow: True
27
+ axes.grid: True
28
+ axes.titlesize: 10
29
+ axes.spines.left: True
30
+ axes.spines.bottom: True
31
+ axes.spines.right: True
32
+ axes.spines.top: True
33
+
34
+ ## TICKS
35
+ xtick.top: False
36
+ xtick.bottom: False
37
+ xtick.color: black
38
+ xtick.direction: out
39
+ xtick.major.width: 1
40
+ xtick.labelsize: 8
41
+
42
+ ytick.left: False
43
+ ytick.right: False
44
+ ytick.color: black
45
+ ytick.direction: out
46
+ ytick.major.width: 1
47
+ ytick.labelsize: 8
48
+
49
+ ## GRIDS
50
+ grid.alpha: 1.0
51
+ grid.color: 0.8
52
+ grid.linestyle: dashed
53
+ grid.linewidth: 1
54
+
55
+ ## LEGEND
56
+ legend.fontsize: 10
57
+ legend.title_fontsize: None # Set to None to use the same as axes.titlesize
@@ -0,0 +1,45 @@
1
+ ## LINES
2
+ lines.solid_capstyle: round
3
+
4
+ ## PATCHES
5
+ patch.edgecolor: white
6
+ patch.force_edgecolor: True
7
+
8
+ ## FONT
9
+ font.family: sans-serif
10
+ font.sans-serif: Arial, DejaVu Sans, Liberation Sans, Bitstream Vera Sans, sans-serif
11
+
12
+ ## TEXT
13
+ text.color: 0.15
14
+
15
+ ## FIGURE
16
+ figure.facecolor: white
17
+
18
+ ## AXES
19
+ axes.facecolor: white
20
+ axes.edgecolor: 0.15
21
+ axes.labelcolor: 0.15
22
+ axes.axisbelow: True
23
+ axes.grid: True
24
+ axes.spines.left: True
25
+ axes.spines.bottom: True
26
+ axes.spines.right: True
27
+ axes.spines.top: True
28
+
29
+ ## TICKS
30
+ xtick.top: False
31
+ xtick.bottom: False
32
+ xtick.color: 0.15
33
+ xtick.direction: out
34
+
35
+ ytick.left: False
36
+ ytick.right: False
37
+ ytick.color: 0.15
38
+ ytick.direction: out
39
+
40
+ ## GRID
41
+ grid.color: 0.8
42
+ grid.linestyle: -
43
+
44
+ ## IMAGE
45
+ image.cmap: rocket
msreport/qtable.py CHANGED
@@ -1,7 +1,10 @@
1
1
  from __future__ import annotations
2
- from typing import Any, Iterable, Optional
2
+
3
+ import copy
3
4
  import os
4
5
  import warnings
6
+ from contextlib import contextmanager
7
+ from typing import Any, Iterable, Optional
5
8
 
6
9
  import numpy as np
7
10
  import pandas as pd
@@ -24,7 +27,14 @@ class Qtable:
24
27
  design: A pandas.DataFrame describing the experimental design.
25
28
  """
26
29
 
27
- def __init__(self, data: pd.DataFrame, design: Optional[pd.DataFrame] = None):
30
+ _default_id_column = "Representative protein"
31
+
32
+ def __init__(
33
+ self,
34
+ data: pd.DataFrame,
35
+ design: Optional[pd.DataFrame] = None,
36
+ id_column: str = "Representative protein",
37
+ ):
28
38
  """Initializes the Qtable.
29
39
 
30
40
  If data does not contain a "Valid" column, this column is added and all its row
@@ -36,11 +46,34 @@ class Qtable:
36
46
  contain the columns "Sample" and "Experiment". The "Sample" entries
37
47
  should correspond to the Sample names present in the quantitative
38
48
  columns of the data.
49
+ id_column: The name of the column that contains the unique identifiers for
50
+ the entries in the data table. Default is "Representative protein".
51
+
52
+ Raises:
53
+ KeyError: If the specified id_column is not found in data.
54
+ ValueError: If the specified id_column does not contain unique identifiers.
39
55
  """
40
56
  self.design: pd.DataFrame
41
57
  self.data: pd.DataFrame
58
+ self._id_column: str
59
+
60
+ if not data.index.is_unique:
61
+ raise ValueError(
62
+ "The index of the 'data' table must contain unique values."
63
+ )
64
+ if id_column not in data.columns:
65
+ raise KeyError(
66
+ f"Column '{id_column}' not found in 'data'. Please specify a valid "
67
+ "column that contains unique identifiers for the entries in 'data'."
68
+ )
69
+ if not data[id_column].is_unique:
70
+ raise ValueError(
71
+ f"Column '{id_column}' in 'data' table must contain unique identifiers"
72
+ ", i.e. no duplicated values. Please provide a valid 'id_column'."
73
+ )
42
74
 
43
75
  self.data = data.copy()
76
+ self._id_column = id_column
44
77
  if "Valid" not in self.data.columns:
45
78
  self.data["Valid"] = True
46
79
  if design is not None:
@@ -73,12 +106,12 @@ class Qtable:
73
106
  """
74
107
  columns = design.columns.tolist()
75
108
  required_columns = ["Experiment", "Sample", "Replicate"]
76
- if not all([c in columns for c in required_columns]):
109
+ if not all(c in columns for c in required_columns):
77
110
  exception_message = "".join(
78
111
  [
79
112
  "The design table must at least contain the columns: ",
80
113
  ", ".join(f'"{c}"' for c in required_columns),
81
- ". " "It only contains the columns: ",
114
+ ". It only contains the columns: ",
82
115
  ", ".join(f'"{c}"' for c in columns),
83
116
  ".",
84
117
  ]
@@ -105,6 +138,11 @@ class Qtable:
105
138
  """Returns a copy of the design table."""
106
139
  return self.design.copy()
107
140
 
141
+ @property
142
+ def id_column(self) -> str:
143
+ """Returns the name of the id column."""
144
+ return self._id_column
145
+
108
146
  def get_samples(self, experiment: Optional[str] = None) -> list[str]:
109
147
  """Returns a list of samples present in the design table.
110
148
 
@@ -315,6 +353,50 @@ class Qtable:
315
353
  expression_features.columns.difference(self._expression_features)
316
354
  )
317
355
 
356
+ @contextmanager
357
+ def temp_design(
358
+ self,
359
+ design: Optional[pd.DataFrame] = None,
360
+ exclude_experiments: Optional[Iterable[str]] = None,
361
+ keep_experiments: Optional[Iterable[str]] = None,
362
+ exclude_samples: Optional[Iterable[str]] = None,
363
+ keep_samples: Optional[Iterable[str]] = None,
364
+ ):
365
+ """Context manager to temporarily modify the design table.
366
+
367
+ Args:
368
+ design: A DataFrame to temporarily replace the current design table.
369
+ exclude_experiments: A list of experiments to exclude from the design.
370
+ keep_experiments: A list of experiments to keep in the design (all others are removed).
371
+ exclude_samples: A list of samples to exclude from the design.
372
+ keep_samples: A list of samples to keep in the design (all others are removed).
373
+
374
+ Yields:
375
+ None. Restores the original design table after the context ends.
376
+ """
377
+ original_design = self.design
378
+
379
+ _design: pd.DataFrame
380
+ if design is None:
381
+ _design = self.get_design()
382
+ else:
383
+ _design = design
384
+
385
+ if exclude_experiments is not None:
386
+ _design = _design[~_design["Experiment"].isin(exclude_experiments)]
387
+ if keep_experiments is not None:
388
+ _design = _design[_design["Experiment"].isin(keep_experiments)]
389
+ if exclude_samples is not None:
390
+ _design = _design[~_design["Sample"].isin(exclude_samples)]
391
+ if keep_samples is not None:
392
+ _design = _design[_design["Sample"].isin(keep_samples)]
393
+
394
+ try:
395
+ self.add_design(_design)
396
+ yield
397
+ finally:
398
+ self.add_design(original_design)
399
+
318
400
  def save(self, directory: str, basename: str):
319
401
  """Save qtable to disk, creating a data, design, and config file.
320
402
 
@@ -333,6 +415,8 @@ class Qtable:
333
415
  "Expression features": self._expression_features,
334
416
  "Expression sample mapping": self._expression_sample_mapping,
335
417
  "Data dtypes": self.data.dtypes.astype(str).to_dict(),
418
+ "Design dtypes": self.design.dtypes.astype(str).to_dict(),
419
+ "Unique ID column": self._id_column,
336
420
  }
337
421
  with open(filepaths["config"], "w") as openfile:
338
422
  yaml.safe_dump(config_data, openfile)
@@ -363,14 +447,24 @@ class Qtable:
363
447
  data = _read_csv_str_safe(
364
448
  filepaths["data"], dtypes, **{"sep": "\t", "index_col": 0}
365
449
  )
366
- design = pd.read_csv(
367
- filepaths["design"], sep="\t", index_col=0, keep_default_na=True
368
- )
450
+ # This check is required for backwards compatibility with msreport <= 0.0.27
451
+ if "Design dtypes" in config_data:
452
+ design_dtypes = config_data["Design dtypes"]
453
+ design = _read_csv_str_safe(
454
+ filepaths["design"], design_dtypes, **{"sep": "\t", "index_col": 0}
455
+ )
456
+ else:
457
+ design = pd.read_csv(
458
+ filepaths["design"], sep="\t", index_col=0, keep_default_na=True
459
+ )
369
460
 
370
461
  qtable = Qtable(data, design)
371
462
  qtable._expression_columns = config_data["Expression columns"]
372
463
  qtable._expression_features = config_data["Expression features"]
373
464
  qtable._expression_sample_mapping = config_data["Expression sample mapping"]
465
+ # This check is required for backwards compatibility with msreport <= 0.0.27
466
+ if "Unique ID column" in config_data:
467
+ qtable._id_column = config_data["Unique ID column"]
374
468
  return qtable
375
469
 
376
470
  def to_tsv(self, path: str, index: bool = False):
@@ -388,7 +482,6 @@ class Qtable:
388
482
 
389
483
  def copy(self) -> Qtable:
390
484
  """Returns a copy of this Qtable instance."""
391
- # not tested #
392
485
  return self.__copy__()
393
486
 
394
487
  def _set_expression(
@@ -417,22 +510,22 @@ class Qtable:
417
510
  samples = list(columns_to_samples.values())
418
511
 
419
512
  if not expression_columns:
420
- raise KeyError(f"No expression columns matched in qtable")
421
- if not all([e in data_columns for e in expression_columns]):
513
+ raise KeyError("No expression columns matched in qtable")
514
+ if not all(e in data_columns for e in expression_columns):
422
515
  exception_message = (
423
516
  f"Not all specified columns {expression_columns} are present in the"
424
517
  " qtable"
425
518
  )
426
519
  raise KeyError(exception_message)
427
- if not all([s in self.get_samples() for s in samples]):
520
+ if not all(s in self.get_samples() for s in samples):
428
521
  exception_message = (
429
522
  f"Not all specified samples {samples} are present in the qtable.design"
430
523
  )
431
524
  raise ValueError(exception_message)
432
- if not all([s in samples for s in self.get_samples()]):
525
+ if not all(s in samples for s in self.get_samples()):
433
526
  exception_message = (
434
- f"Not all samples from qtable.design are also present in the specified"
435
- f"samples."
527
+ "Not all samples from qtable.design are also present in the specified"
528
+ "samples."
436
529
  )
437
530
  raise ValueError(exception_message)
438
531
 
@@ -477,7 +570,6 @@ class Qtable:
477
570
  self._expression_sample_mapping = {}
478
571
 
479
572
  def __copy__(self) -> Qtable:
480
- # not tested #
481
573
  new_instance = Qtable(self.data, self.design)
482
574
  # Copy all private attributes
483
575
  for attr in dir(self):
@@ -486,7 +578,7 @@ class Qtable:
486
578
  and attr.startswith("_")
487
579
  and not attr.startswith("__")
488
580
  ):
489
- attr_values = self.__getattribute__(attr).copy()
581
+ attr_values = copy.deepcopy(self.__getattribute__(attr))
490
582
  new_instance.__setattr__(attr, attr_values)
491
583
  return new_instance
492
584
 
@@ -521,7 +613,7 @@ def _match_samples_to_tag_columns(
521
613
  """
522
614
  WHITESPACE_CHARS = " ."
523
615
 
524
- mapping = dict()
616
+ mapping = {}
525
617
  for sample in samples:
526
618
  for col in columns:
527
619
  if col.replace(tag, "").replace(sample, "").strip(WHITESPACE_CHARS) == "":