msreport 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
msreport/__init__.py CHANGED
@@ -8,4 +8,4 @@ from msreport.fasta import import_protein_database
8
8
  from msreport.qtable import Qtable
9
9
  from msreport.reader import FragPipeReader, MaxQuantReader, SpectronautReader
10
10
 
11
- __version__ = "0.0.30"
11
+ __version__ = "0.0.32"
@@ -0,0 +1,10 @@
1
+ """A comprehensive set of tools for aggregating and reshaping tabular proteomics data.
2
+
3
+ The `aggregation` module contains submodules that offer functionalities to transform
4
+ data from lower levels of abstraction (e.g. ions, peptides) to higher levels (e.g.
5
+ peptides, proteins, PTMs) through various summarization and condensation techniques.
6
+ It also includes methods for reshaping tables from "long" to "wide" format, a common
7
+ prerequisite for aggregation. The MaxLFQ algorithm is integrated for specific
8
+ quantitative summarizations, enabling users to build customized, higher-level data
9
+ tables.
10
+ """
@@ -1,3 +1,12 @@
1
+ """Low-level functions for aggregating numerical and string data.
2
+
3
+ This module defines fundamental "condenser" functions that operate directly on NumPy
4
+ arrays. These functions are designed to be applied to groups of data, performing
5
+ operations such as summing values, finding maximum/minimum, counting or joining unique
6
+ elements, and calculating abundance profiles. It includes the core implementations for
7
+ MaxLFQ summation.
8
+ """
9
+
1
10
  import numpy as np
2
11
 
3
12
  import msreport.helper.maxlfq as MAXLFQ
@@ -1,4 +1,12 @@
1
- from typing import Iterable, Union
1
+ """Functionalities for reshaping tabular quantitative proteomics data.
2
+
3
+ This module offers methods to transform data from a "long" format into a "wide" format,
4
+ which is a common and often necessary step before aggregation or analysis. It supports
5
+ pivoting data based on specified index and grouping columns, and can handle both
6
+ quantitative values and annotation columns.
7
+ """
8
+
9
+ from typing import Iterable
2
10
 
3
11
  import pandas as pd
4
12
 
@@ -12,11 +20,12 @@ def pivot_table(
12
20
  group_by: str,
13
21
  annotation_columns: Iterable[str],
14
22
  pivoting_columns: Iterable[str],
15
- ):
23
+ ) -> pd.DataFrame:
16
24
  """Generates a pivoted table in wide format.
17
25
 
18
26
  Args:
19
- table: Dataframe in long format that is used to generate a table in wide format.
27
+ long_table: Dataframe in long format that is used to generate a table in wide
28
+ format.
20
29
  index: One or multiple column names that are used to group the table for
21
30
  pivoting.
22
31
  group_by: Column that is used to split the table on its unique entries.
@@ -58,7 +67,7 @@ def pivot_table(
58
67
 
59
68
 
60
69
  def pivot_column(
61
- table: pd.DataFrame, index: Union[str, Iterable], group_by: str, values: str
70
+ table: pd.DataFrame, index: str | Iterable[str], group_by: str, values: str
62
71
  ) -> pd.DataFrame:
63
72
  """Returns a reshaped dataframe, generated by pivoting the table on one column.
64
73
 
@@ -98,7 +107,7 @@ def pivot_column(
98
107
 
99
108
 
100
109
  def join_unique(
101
- table: pd.DataFrame, index: Union[str, Iterable], values: str
110
+ table: pd.DataFrame, index: str | Iterable[str], values: str
102
111
  ) -> pd.DataFrame:
103
112
  """Returns a new dataframe with unique values from a column and grouped by 'index'.
104
113
 
@@ -1,4 +1,14 @@
1
- from typing import Callable, Iterable, Optional, Union
1
+ """High-level functions for aggregating quantitative proteomics data.
2
+
3
+ This module offers functions to summarize data from a lower level of abstraction (e.g.
4
+ ions, peptides) to a higher level (e.g., peptides, proteins, PTMs). It operates directly
5
+ on pandas DataFrames, allowing users to specify a grouping column and the columns to be
6
+ summarized. These functions often leverage low-level condenser operations defined in
7
+ `msreport.aggregate.condense`. It includes specific functions for MaxLFQ summation, as
8
+ well as general counting, joining, and summing of columns.
9
+ """
10
+
11
+ from typing import Callable, Iterable, Optional
2
12
 
3
13
  import numpy as np
4
14
  import pandas as pd
@@ -10,7 +20,7 @@ from msreport.helper import find_sample_columns
10
20
  def count_unique(
11
21
  table: pd.DataFrame,
12
22
  group_by: str,
13
- input_column: Union[str, Iterable],
23
+ input_column: str | Iterable[str],
14
24
  output_column: str = "Unique counts",
15
25
  is_sorted: bool = False,
16
26
  ) -> pd.DataFrame:
@@ -55,7 +65,7 @@ def count_unique(
55
65
  def join_unique(
56
66
  table: pd.DataFrame,
57
67
  group_by: str,
58
- input_column: Union[str, Iterable],
68
+ input_column: str | Iterable[str],
59
69
  output_column: str = "Unique values",
60
70
  sep: str = ";",
61
71
  is_sorted: bool = False,
@@ -215,7 +225,7 @@ def sum_columns_maxlfq(
215
225
  def aggregate_unique_groups(
216
226
  table: pd.DataFrame,
217
227
  group_by: str,
218
- columns_to_aggregate: Union[str, Iterable],
228
+ columns_to_aggregate: str | Iterable[str],
219
229
  condenser: Callable,
220
230
  is_sorted: bool,
221
231
  ) -> tuple[np.ndarray, np.ndarray]:
msreport/analyze.py CHANGED
@@ -1,12 +1,16 @@
1
- """The analyze module contains methods for analysing quantification results."""
1
+ """Tools for post-processing and statistical analysis of `Qtable` data.
2
2
 
3
- from __future__ import annotations
3
+ All functions in this module take a `Qtable` object and modify its data in place. The
4
+ module provides functionality for data evaluation, normalization, imputation of missing
5
+ values, and statistical testing, including integration with R's LIMMA package.
6
+ """
4
7
 
5
8
  import warnings
6
9
  from typing import Iterable, Optional, Protocol, Sequence
7
10
 
8
11
  import numpy as np
9
12
  import pandas as pd
13
+ from typing_extensions import Self
10
14
 
11
15
  import msreport.normalize
12
16
  from msreport.errors import OptionalDependencyError
@@ -24,7 +28,7 @@ except OptionalDependencyError as err:
24
28
 
25
29
 
26
30
  class Transformer(Protocol):
27
- def fit(self, table: pd.DataFrame) -> Transformer:
31
+ def fit(self, table: pd.DataFrame) -> Self:
28
32
  """Fits the Transformer and returns a fitted Transformer instance."""
29
33
 
30
34
  def is_fitted(self) -> bool:
@@ -35,7 +39,7 @@ class Transformer(Protocol):
35
39
 
36
40
 
37
41
  class CategoryTransformer(Protocol):
38
- def fit(self, table: pd.DataFrame) -> Transformer:
42
+ def fit(self, table: pd.DataFrame) -> Self:
39
43
  """Fits the Transformer and returns a fitted Transformer instance."""
40
44
 
41
45
  def is_fitted(self) -> bool:
@@ -162,7 +166,7 @@ def validate_proteins(
162
166
 
163
167
 
164
168
  def apply_transformer(
165
- qtable: msreport.Qtable,
169
+ qtable: Qtable,
166
170
  transformer: Transformer,
167
171
  tag: str,
168
172
  exclude_invalid: bool,
@@ -205,6 +209,64 @@ def apply_transformer(
205
209
  qtable.data[data_table.columns] = data_table
206
210
 
207
211
 
212
+ def apply_category_transformer(
213
+ qtable: Qtable,
214
+ transformer: CategoryTransformer,
215
+ tag: str,
216
+ exclude_invalid: bool,
217
+ remove_invalid: bool,
218
+ new_tag: Optional[str] = None,
219
+ ) -> None:
220
+ """Apply a category transformer to Qtable columns selected by tag.
221
+
222
+ Args:
223
+ qtable: A Qtable instance, to which the transformer is applied.
224
+ transformer: The CategoryTransformer to apply.
225
+ tag: The tag used to identify the columns for applying the transformer.
226
+ exclude_invalid: Exclude invalid values from the transformation.
227
+ remove_invalid: Remove invalid values from the table after the transformation.
228
+ new_tag: Optional, if specified than the tag is replaced with this value in the
229
+ column names and the transformed data is stored to these new columns.
230
+
231
+ Raises:
232
+ KeyError: If the category column of the `transformer` is not found in the
233
+ `qtable.data`.
234
+ ValueError: If no sample columns are found for the specified tag.
235
+ """
236
+ category_column = transformer.get_category_column()
237
+ if category_column not in qtable.data.columns:
238
+ raise KeyError(
239
+ f'The category column "{category_column}" in the transformer '
240
+ f"is not found in `qtable.data`."
241
+ )
242
+
243
+ valid = qtable.data["Valid"]
244
+ samples = qtable.get_samples()
245
+ sample_columns = find_sample_columns(qtable.data, tag, samples)
246
+
247
+ if not sample_columns:
248
+ raise ValueError(f"No sample columns found for tag '{tag}'.")
249
+
250
+ if new_tag is not None:
251
+ sample_columns = [c.replace(tag, new_tag) for c in sample_columns]
252
+ column_mapping = dict(zip(samples, sample_columns))
253
+
254
+ data_table = qtable.make_sample_table(tag, samples_as_columns=True)
255
+ data_table[category_column] = qtable.data[category_column]
256
+
257
+ if exclude_invalid:
258
+ data_table.loc[valid, :] = transformer.transform(data_table.loc[valid, :])
259
+ else:
260
+ data_table = transformer.transform(data_table)
261
+ data_table = data_table.drop(columns=[category_column])
262
+
263
+ if remove_invalid:
264
+ data_table[~valid] = np.nan
265
+
266
+ data_table.columns = [column_mapping[s] for s in data_table.columns]
267
+ qtable.data[data_table.columns] = data_table
268
+
269
+
208
270
  def normalize_expression(
209
271
  qtable: Qtable,
210
272
  normalizer: Transformer,
msreport/export.py CHANGED
@@ -1,19 +1,13 @@
1
- """
2
- Columns that are not yet present in the amica output at the moment:
3
- Index([
4
- 'Protein Probability',
5
- 'Top Peptide Probability',
6
- 'Total peptides',
7
- 'Leading proteins',
8
- 'Protein entry name',
9
- 'Fasta header',
10
- 'Protein length',
11
- 'iBAQ peptides',
12
- 'Sequence coverage',
13
- ], dtype='object')
1
+ """Exporting of proteomics data from `Qtable` into external formats.
2
+
3
+ This module offers functionalities to convert and save `Qtable` data into files
4
+ compatible with external tools (Amica and Perseus), and creating sequence coverage maps
5
+ in HTML format. While most functions operate on `Qtable` instances, some may accept
6
+ other data structures.
14
7
  """
15
8
 
16
9
  import os
10
+ import pathlib
17
11
  import warnings
18
12
  from collections import defaultdict as ddict
19
13
  from typing import Iterable, Optional, Protocol, Sequence
@@ -99,7 +93,7 @@ def contaminants_to_clipboard(qtable: Qtable) -> None:
99
93
 
100
94
  def to_perseus_matrix(
101
95
  qtable: Qtable,
102
- directory,
96
+ directory: str | pathlib.Path,
103
97
  table_name: str = "perseus_matrix.tsv",
104
98
  ) -> None:
105
99
  """Exports a qtable to a perseus matrix file in tsv format.
@@ -151,7 +145,7 @@ def to_perseus_matrix(
151
145
 
152
146
  def to_amica(
153
147
  qtable: Qtable,
154
- directory,
148
+ directory: str | pathlib.Path,
155
149
  table_name: str = "amica_table.tsv",
156
150
  design_name: str = "amica_design.tsv",
157
151
  ) -> None:
@@ -508,7 +502,7 @@ def _find_covered_region_boundaries(
508
502
  Examples:
509
503
  >>> coverage_mask = [True, True, False, False, True]
510
504
  >>> _find_covered_region_boundaries(coverage_mask)
511
- ... [(0, 1), (4, 4)]
505
+ [(0, 1), (4, 4)]
512
506
  """
513
507
  start = []
514
508
  stop = []
msreport/fasta.py CHANGED
@@ -1,11 +1,18 @@
1
+ """Functionalities for import and access to protein sequence databases from FASTA files.
2
+
3
+ This module serves as an interface to the `profasta` library, offering a convenient way
4
+ to generate a `profasta.db.ProteinDatabase` from one or multiple FASTA files. It
5
+ supports custom FASTA header parsing through a configurable header parser.
6
+ """
7
+
1
8
  import pathlib
2
- from typing import Iterable, Union
9
+ from typing import Iterable
3
10
 
4
11
  from profasta.db import ProteinDatabase
5
12
 
6
13
 
7
14
  def import_protein_database(
8
- fasta_path: Union[str, pathlib.Path, Iterable[Union[str, pathlib.Path]]],
15
+ fasta_path: str | pathlib.Path | Iterable[str | pathlib.Path],
9
16
  header_parser: str = "uniprot",
10
17
  ) -> ProteinDatabase:
11
18
  """Generates a protein database from one or a list of fasta files.
@@ -1,3 +1,9 @@
1
+ """A collection of widely used helper and utility functions.
2
+
3
+ This module re-exports commonly used functions from various `msreport.helper`
4
+ submodules for convenience.
5
+ """
6
+
1
7
  from .calc import (
2
8
  calculate_monoisotopic_mass,
3
9
  calculate_sequence_coverage,
@@ -21,3 +27,15 @@ from .temp import (
21
27
  extract_modifications,
22
28
  modify_peptide,
23
29
  )
30
+
31
+ __all__ = [
32
+ "apply_intensity_cutoff",
33
+ "find_columns",
34
+ "find_sample_columns",
35
+ "guess_design",
36
+ "intensities_in_logspace",
37
+ "keep_rows_by_partial_match",
38
+ "remove_rows_by_partial_match",
39
+ "rename_mq_reporter_channels",
40
+ "rename_sample_columns",
41
+ ]
msreport/helper/maxlfq.py CHANGED
@@ -113,9 +113,9 @@ def calculate_pairwise_mode_log_ratio_matrix(
113
113
  ... ]
114
114
  ... )
115
115
  >>> calculate_pairwise_mode_log_ratio_matrix(array)
116
- array([[ 0. , -0.0849625, -1. ],
117
- [ 0.0849625, 0. , -1. ],
118
- [ 1. , 1. , 0. ]])
116
+ array([[ 0. , -0.08496251, -1. ],
117
+ [ 0.08496251, 0. , -1. ],
118
+ [ 1. , 1. , 0. ]])
119
119
  """
120
120
  ratio_marix = _calculate_pairwise_centered_log_ratio_matrix(
121
121
  array, msreport.helper.mode, log_transformed=log_transformed
msreport/impute.py CHANGED
@@ -1,9 +1,17 @@
1
- from __future__ import annotations
1
+ """Transformer classes for imputing missing values in quantitative proteomics data.
2
+
3
+ This module defines transformer classes that can be fitted to a table containing
4
+ quantitative values to learn imputation parameters. Once fitted, these transformers can
5
+ then be applied to another table to transform it by filling in missing values. The
6
+ transformation returns a new copy of the table with the imputed values, leaving the
7
+ original table unchanged.
8
+ """
2
9
 
3
10
  from typing import Any, Optional
4
11
 
5
12
  import numpy as np
6
13
  import pandas as pd
14
+ from typing_extensions import Self
7
15
 
8
16
  from msreport.errors import NotFittedError
9
17
 
@@ -42,7 +50,7 @@ class FixedValueImputer:
42
50
  self.column_wise = column_wise
43
51
  self._sample_fill_values: dict[str, float] = {}
44
52
 
45
- def fit(self, table: pd.DataFrame) -> FixedValueImputer:
53
+ def fit(self, table: pd.DataFrame) -> Self:
46
54
  """Fits the FixedValueImputer.
47
55
 
48
56
  Args:
@@ -79,7 +87,7 @@ class FixedValueImputer:
79
87
  Returns:
80
88
  'table' with imputed missing values.
81
89
  """
82
- confirm_is_fitted(self)
90
+ _confirm_is_fitted(self)
83
91
 
84
92
  _table = table.copy()
85
93
  for column in _table.columns:
@@ -108,7 +116,7 @@ class GaussianImputer:
108
116
  self.sigma = sigma
109
117
  self.seed = seed
110
118
 
111
- def fit(self, table: pd.DataFrame) -> GaussianImputer:
119
+ def fit(self, table: pd.DataFrame) -> Self:
112
120
  """Fits the GaussianImputer, altough this is not necessary.
113
121
 
114
122
  Args:
@@ -134,7 +142,7 @@ class GaussianImputer:
134
142
  Returns:
135
143
  'table' with imputed missing values.
136
144
  """
137
- confirm_is_fitted(self)
145
+ _confirm_is_fitted(self)
138
146
  np.random.seed(self.seed)
139
147
 
140
148
  _table = table.copy()
@@ -182,9 +190,9 @@ class PerseusImputer:
182
190
  self.std_width = std_width
183
191
  self.column_wise = column_wise
184
192
  self.seed = seed
185
- self._column_params: dict[str, dict] = {}
193
+ self._column_params: dict[str, dict[str, float]] = {}
186
194
 
187
- def fit(self, table: pd.DataFrame) -> PerseusImputer:
195
+ def fit(self, table: pd.DataFrame) -> Self:
188
196
  """Fits the PerseusImputer.
189
197
 
190
198
  Args:
@@ -223,7 +231,7 @@ class PerseusImputer:
223
231
  Returns:
224
232
  'table' with imputed missing values.
225
233
  """
226
- confirm_is_fitted(self)
234
+ _confirm_is_fitted(self)
227
235
  np.random.seed(self.seed)
228
236
 
229
237
  _table = table.copy()
@@ -239,7 +247,7 @@ class PerseusImputer:
239
247
  return _table
240
248
 
241
249
 
242
- def confirm_is_fitted(imputer: Any, msg: Optional[str] = None) -> None:
250
+ def _confirm_is_fitted(imputer: Any, msg: Optional[str] = None) -> None:
243
251
  """Perform is_fitted validation for imputer instances.
244
252
 
245
253
  Checks if the imputer is fitted by verifying the presence of fitted attributes
@@ -266,7 +274,7 @@ def confirm_is_fitted(imputer: Any, msg: Optional[str] = None) -> None:
266
274
  raise NotFittedError(msg % {"name": type(imputer).__name__})
267
275
 
268
276
 
269
- def _calculate_integer_below_min(table) -> int:
277
+ def _calculate_integer_below_min(table: pd.DataFrame) -> int:
270
278
  minimal_value = np.nanmin(table.to_numpy().flatten())
271
279
  below_minimal = np.floor(minimal_value)
272
280
  if minimal_value <= below_minimal:
msreport/isobar.py CHANGED
@@ -1,34 +1,31 @@
1
- from __future__ import annotations
1
+ """Provides a transformer class for processing isobarically labeled proteomics data.
2
+
3
+ This module defines the `IsotopeImpurityCorrecter` class for processing of isobaric
4
+ (e.g., TMT, iTRAQ) reporter intensities. This transformer must be fitted with an isotope
5
+ impurity matrix to correct interference in reporter intensities. Once fitted, the
6
+ transformer can then be applied to a table containing reporter ion intensities to adjust
7
+ its intensity values. The transformation returns a new copy of the table with the
8
+ processed values, leaving the original table unchanged.
9
+ """
2
10
 
3
11
  import functools
4
- from typing import Protocol
5
12
 
6
13
  import numpy as np
7
14
  import pandas as pd
8
15
  import scipy
16
+ from typing_extensions import Self
9
17
 
10
18
  import msreport.helper
11
19
  from msreport.errors import NotFittedError
12
20
 
13
21
 
14
- class Transformer(Protocol):
15
- def fit(self, table: pd.DataFrame) -> Transformer:
16
- """Fits the Transformer and returns a fitted Transformer instance."""
17
-
18
- def is_fitted(self) -> bool:
19
- """Returns True if the Transformer has been fitted."""
20
-
21
- def transform(self, table: pd.DataFrame) -> pd.DataFrame:
22
- """Transform values in 'table'."""
23
-
24
-
25
22
  class IsotopeImpurityCorrecter:
26
23
  """Corrects isotope impurity interference in isobaric reporter expression values."""
27
24
 
28
25
  def __init__(self):
29
26
  self._impurity_matrix = None
30
27
 
31
- def fit(self, impurity_matrix: np.ndarray) -> IsotopeImpurityCorrecter:
28
+ def fit(self, impurity_matrix: np.ndarray) -> Self:
32
29
  """Fits the isotope impurity correcter to a given impurity matrix.
33
30
 
34
31
  Args:
msreport/normalize.py CHANGED
@@ -1,4 +1,16 @@
1
- from __future__ import annotations
1
+ """Transformer classes for normalizing and transforming quantitative proteomics data.
2
+
3
+ This module defines various transformer classes for normalizing and scaling quantitative
4
+ values in tabular data. Examples include normalizers like median, mode, and LOWESS, as
5
+ well as scalers such as PercentageScaler and ZScoreScaler. A specialized
6
+ `CategoricalNormalizer` is also provided, which, when appropriately fitted and applied,
7
+ can be used for complex transformations such as iBAQ or site-to-protein normalization.
8
+
9
+ These transformers can be fitted to a table containing quantitative values to learn
10
+ parameters. Once fitted, they can then be applied to another table to adjust its values.
11
+ The transformation returns a new copy of the table with the normalized/scaled values,
12
+ leaving the original table unchanged.
13
+ """
2
14
 
3
15
  from typing import Callable, Iterable, Optional, Protocol
4
16
 
@@ -79,7 +91,7 @@ class FixedValueNormalizer:
79
91
  Raises:
80
92
  NotFittedError: If the FixedValueNormalizer has not been fitted yet.
81
93
  """
82
- confirm_is_fitted(self)
94
+ _confirm_is_fitted(self)
83
95
  return self._sample_fits.copy()
84
96
 
85
97
  def transform(self, table: pd.DataFrame) -> pd.DataFrame:
@@ -95,7 +107,7 @@ class FixedValueNormalizer:
95
107
  Raises:
96
108
  NotFittedError: If the FixedValueNormalizer has not been fitted yet.
97
109
  """
98
- confirm_is_fitted(self)
110
+ _confirm_is_fitted(self)
99
111
 
100
112
  _table = table.copy()
101
113
  for column in _table.columns:
@@ -195,7 +207,7 @@ class ValueDependentNormalizer:
195
207
  Raises:
196
208
  NotFittedError: If the ValueDependentNormalizer has not been fitted yet.
197
209
  """
198
- confirm_is_fitted(self)
210
+ _confirm_is_fitted(self)
199
211
  return self._sample_fits.copy()
200
212
 
201
213
  def transform(self, table: pd.DataFrame) -> pd.DataFrame:
@@ -211,7 +223,7 @@ class ValueDependentNormalizer:
211
223
  Raises:
212
224
  NotFittedError: If the ValueDependentNormalizer has not been fitted yet.
213
225
  """
214
- confirm_is_fitted(self)
226
+ _confirm_is_fitted(self)
215
227
 
216
228
  _table = table.copy()
217
229
  for column in _table.columns:
@@ -250,6 +262,59 @@ class ValueDependentNormalizer:
250
262
  self._sample_fits[sample] = sample_fit
251
263
 
252
264
 
265
+ class SumNormalizer:
266
+ """Normalizer that uses the sum of all values in each sample for normalization.
267
+
268
+ Expects log2-transformed intensity values. To obtain normalization factors, the sum
269
+ of non-log2-transformed values is calculated for each sample, then divided by the
270
+ average of all sample sums and log2-transformed.
271
+ """
272
+
273
+ def __init__(self):
274
+ """Initializes the SumNormalizer."""
275
+ self._sample_fits: dict[str, float] = {}
276
+
277
+ def fit(self, table: pd.DataFrame) -> Self:
278
+ """Fits the SumNormalizer and returns a fitted instance.
279
+
280
+ Args:
281
+ table: Dataframe used to calculate normalization values for each column.
282
+
283
+ Returns:
284
+ Returns the instance itself.
285
+ """
286
+ _sums = np.power(2, table).sum()
287
+ _log2_fits = np.log2(_sums.divide(_sums.mean()))
288
+ self._sample_fits = _log2_fits.to_dict()
289
+ return self
290
+
291
+ def is_fitted(self) -> bool:
292
+ """Returns True if the Transformer has been fitted."""
293
+ return True if self._sample_fits else False
294
+
295
+ def get_fits(self) -> dict[str, float]:
296
+ """Returns a dictionary containing the fitted center values per sample.
297
+
298
+ Raises:
299
+ NotFittedError: If the FixedValueNormalizer has not been fitted yet.
300
+ """
301
+ _confirm_is_fitted(self)
302
+ return self._sample_fits.copy()
303
+
304
+ def transform(self, table: pd.DataFrame) -> pd.DataFrame:
305
+ """Transform values in table."""
306
+ _confirm_is_fitted(self)
307
+
308
+ _table = table.copy()
309
+ for column in _table.columns:
310
+ column_data = np.array(_table[column], dtype=float)
311
+ mask = np.isfinite(column_data)
312
+ column_data[mask] = column_data[mask] - self._sample_fits[column]
313
+
314
+ _table[column] = column_data
315
+ return _table
316
+
317
+
253
318
  class MedianNormalizer(FixedValueNormalizer):
254
319
  """A FixedValueNormalizer that uses the median as the fitting function.
255
320
 
@@ -346,7 +411,7 @@ class CategoricalNormalizer:
346
411
  Raises:
347
412
  NotFittedError: If the CategoricalNormalizer has not been fitted yet.
348
413
  """
349
- confirm_is_fitted(self)
414
+ _confirm_is_fitted(self)
350
415
  return self._fitted_table.copy()
351
416
 
352
417
  def get_category_column(self) -> str:
@@ -367,7 +432,7 @@ class CategoricalNormalizer:
367
432
  table.
368
433
  NotFittedError: If the CategoricalNormalizer has not been fitted yet.
369
434
  """
370
- confirm_is_fitted(self)
435
+ _confirm_is_fitted(self)
371
436
 
372
437
  original_index = table.index
373
438
  table = table.set_index(self.get_category_column(), drop=True, inplace=False)
@@ -396,11 +461,11 @@ class PercentageScaler:
396
461
  return self
397
462
 
398
463
  def is_fitted(self) -> bool:
399
- """Always returns True because the ZscoreScaler does not need to be fitted."""
464
+ """Always returns True because the Scaler does not need to be fitted."""
400
465
  return True
401
466
 
402
467
  def get_fits(self) -> dict:
403
- """Returns a dictionary containing the parameters 'with_mean' and 'with_std'."""
468
+ """Returns an empty dictionary."""
404
469
  return {}
405
470
 
406
471
  def transform(self, table: pd.DataFrame) -> pd.DataFrame:
@@ -457,7 +522,27 @@ class ZscoreScaler:
457
522
  return scaled_table
458
523
 
459
524
 
460
- def confirm_is_fitted(
525
+ class Log2Transformer:
526
+ """Apply log2 transformation to column values."""
527
+
528
+ def fit(self, table: pd.DataFrame) -> Self:
529
+ """Returns the instance itself."""
530
+ return self
531
+
532
+ def is_fitted(self) -> bool:
533
+ """Returns True if the transformer is fitted."""
534
+ return True
535
+
536
+ def transform(self, table: pd.DataFrame) -> pd.DataFrame:
537
+ """Applies a log2 transformation to each column of the table.
538
+
539
+ Zero values are replaced with NaN before the transformation to avoid an error
540
+ during the log2 calculation.
541
+ """
542
+ return pd.DataFrame(np.log2(table.replace({0: np.nan})))
543
+
544
+
545
+ def _confirm_is_fitted(
461
546
  normalizer: AbstractTransformer, msg: Optional[str] = None
462
547
  ) -> None:
463
548
  """Perform is_fitted validation for normalizer instances.