msreport 0.0.29__tar.gz → 0.0.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {msreport-0.0.29 → msreport-0.0.31}/PKG-INFO +20 -2
  2. {msreport-0.0.29 → msreport-0.0.31}/README.md +10 -0
  3. {msreport-0.0.29 → msreport-0.0.31}/msreport/__init__.py +1 -1
  4. msreport-0.0.31/msreport/aggregate/__init__.py +10 -0
  5. {msreport-0.0.29 → msreport-0.0.31}/msreport/aggregate/condense.py +9 -0
  6. {msreport-0.0.29 → msreport-0.0.31}/msreport/aggregate/pivot.py +14 -5
  7. {msreport-0.0.29 → msreport-0.0.31}/msreport/aggregate/summarize.py +14 -4
  8. {msreport-0.0.29 → msreport-0.0.31}/msreport/analyze.py +67 -5
  9. {msreport-0.0.29 → msreport-0.0.31}/msreport/export.py +9 -15
  10. {msreport-0.0.29 → msreport-0.0.31}/msreport/fasta.py +9 -2
  11. {msreport-0.0.29 → msreport-0.0.31}/msreport/helper/__init__.py +18 -0
  12. {msreport-0.0.29 → msreport-0.0.31}/msreport/impute.py +18 -10
  13. {msreport-0.0.29 → msreport-0.0.31}/msreport/isobar.py +11 -14
  14. {msreport-0.0.29 → msreport-0.0.31}/msreport/normalize.py +95 -10
  15. {msreport-0.0.29 → msreport-0.0.31}/msreport/peptidoform.py +21 -11
  16. {msreport-0.0.29 → msreport-0.0.31}/msreport/plot/__init__.py +3 -3
  17. {msreport-0.0.29 → msreport-0.0.31}/msreport/plot/distribution.py +2 -1
  18. {msreport-0.0.29 → msreport-0.0.31}/msreport/plot/quality.py +1 -1
  19. {msreport-0.0.29 → msreport-0.0.31}/msreport/qtable.py +44 -20
  20. {msreport-0.0.29 → msreport-0.0.31}/msreport/reader.py +321 -40
  21. {msreport-0.0.29 → msreport-0.0.31}/msreport/rinterface/limma.py +1 -1
  22. {msreport-0.0.29 → msreport-0.0.31}/msreport.egg-info/PKG-INFO +20 -2
  23. {msreport-0.0.29 → msreport-0.0.31}/msreport.egg-info/requires.txt +9 -1
  24. {msreport-0.0.29 → msreport-0.0.31}/pyproject.toml +10 -1
  25. {msreport-0.0.29 → msreport-0.0.31}/tests/test_analyze.py +71 -1
  26. {msreport-0.0.29 → msreport-0.0.31}/tests/test_plot.py +22 -12
  27. {msreport-0.0.29 → msreport-0.0.31}/tests/test_qtable.py +17 -31
  28. msreport-0.0.29/msreport/aggregate/__init__.py +0 -0
  29. {msreport-0.0.29 → msreport-0.0.31}/LICENSE.txt +0 -0
  30. {msreport-0.0.29 → msreport-0.0.31}/msreport/errors.py +0 -0
  31. {msreport-0.0.29 → msreport-0.0.31}/msreport/helper/calc.py +0 -0
  32. {msreport-0.0.29 → msreport-0.0.31}/msreport/helper/maxlfq.py +0 -0
  33. {msreport-0.0.29 → msreport-0.0.31}/msreport/helper/table.py +0 -0
  34. {msreport-0.0.29 → msreport-0.0.31}/msreport/helper/temp.py +0 -0
  35. {msreport-0.0.29 → msreport-0.0.31}/msreport/plot/_partial_plots.py +0 -0
  36. {msreport-0.0.29 → msreport-0.0.31}/msreport/plot/comparison.py +0 -0
  37. {msreport-0.0.29 → msreport-0.0.31}/msreport/plot/multivariate.py +0 -0
  38. {msreport-0.0.29 → msreport-0.0.31}/msreport/plot/style.py +0 -0
  39. {msreport-0.0.29 → msreport-0.0.31}/msreport/plot/style_sheets/msreport-notebook.mplstyle +0 -0
  40. {msreport-0.0.29 → msreport-0.0.31}/msreport/plot/style_sheets/seaborn-whitegrid.mplstyle +0 -0
  41. {msreport-0.0.29 → msreport-0.0.31}/msreport/rinterface/__init__.py +0 -0
  42. {msreport-0.0.29 → msreport-0.0.31}/msreport/rinterface/rinstaller.py +0 -0
  43. {msreport-0.0.29 → msreport-0.0.31}/msreport/rinterface/rscripts/limma.R +0 -0
  44. {msreport-0.0.29 → msreport-0.0.31}/msreport.egg-info/SOURCES.txt +0 -0
  45. {msreport-0.0.29 → msreport-0.0.31}/msreport.egg-info/dependency_links.txt +0 -0
  46. {msreport-0.0.29 → msreport-0.0.31}/msreport.egg-info/top_level.txt +0 -0
  47. {msreport-0.0.29 → msreport-0.0.31}/setup.cfg +0 -0
  48. {msreport-0.0.29 → msreport-0.0.31}/setup.py +0 -0
  49. {msreport-0.0.29 → msreport-0.0.31}/tests/test_export.py +0 -0
  50. {msreport-0.0.29 → msreport-0.0.31}/tests/test_helper.py +0 -0
  51. {msreport-0.0.29 → msreport-0.0.31}/tests/test_impute.py +0 -0
  52. {msreport-0.0.29 → msreport-0.0.31}/tests/test_isobar.py +0 -0
  53. {msreport-0.0.29 → msreport-0.0.31}/tests/test_maxlfq.py +0 -0
  54. {msreport-0.0.29 → msreport-0.0.31}/tests/test_peptidoform.py +0 -0
@@ -1,10 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: msreport
3
- Version: 0.0.29
3
+ Version: 0.0.31
4
4
  Summary: Post processing and analysis of quantitative proteomics data
5
5
  Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
6
6
  License-Expression: Apache-2.0
7
7
  Project-URL: homepage, https://github.com/hollenstein/msreport
8
+ Project-URL: documentation, https://hollenstein.github.io/msreport/
8
9
  Project-URL: changelog, https://github.com/hollenstein/msreport/blob/main/CHANGELOG.md
9
10
  Keywords: mass spectrometry,proteomics,post processing,data analysis
10
11
  Classifier: Development Status :: 4 - Beta
@@ -29,10 +30,17 @@ Requires-Dist: seaborn>=0.12.0
29
30
  Requires-Dist: statsmodels>=0.13.2
30
31
  Requires-Dist: typing_extensions>=4
31
32
  Provides-Extra: r
32
- Requires-Dist: rpy2!=3.5.13,>=3.5.3; extra == "r"
33
+ Requires-Dist: rpy2<3.5.13,>=3.5.3; extra == "r"
33
34
  Provides-Extra: dev
34
35
  Requires-Dist: mypy>=1.15.0; extra == "dev"
35
36
  Requires-Dist: pytest>=8.3.5; extra == "dev"
37
+ Provides-Extra: docs
38
+ Requires-Dist: mkdocs-awesome-nav>=3.1.2; extra == "docs"
39
+ Requires-Dist: mkdocs-macros-plugin>=1.3.7; extra == "docs"
40
+ Requires-Dist: mkdocs-material>=9.6.15; extra == "docs"
41
+ Requires-Dist: mkdocs-roamlinks-plugin>=0.3.2; extra == "docs"
42
+ Requires-Dist: mkdocstrings-python>=1.16.12; extra == "docs"
43
+ Requires-Dist: ruff>=0.12.2; extra == "docs"
36
44
  Provides-Extra: test
37
45
  Requires-Dist: pytest>=8.3.5; extra == "test"
38
46
  Dynamic: license-file
@@ -40,6 +48,7 @@ Dynamic: license-file
40
48
  # MsReport
41
49
 
42
50
  [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
51
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15309090.svg)](https://doi.org/10.5281/zenodo.15309090)
43
52
  ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fhollenstein%2Fmsreport%2Fmain%2Fpyproject.toml)
44
53
  [![Run tests](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml/badge.svg)](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml)
45
54
 
@@ -55,6 +64,7 @@ bottom-up mass spectrometry experiments.
55
64
  - [Additional requirements](#additional-requirements)
56
65
  - [Optional Dependencies](#optional-dependencies)
57
66
  - [Development status](#development-status)
67
+ - [How to cite](#how-to-cite)
58
68
 
59
69
  ## What is MsReport?
60
70
 
@@ -62,6 +72,8 @@ MsReport is a Python library designed to simplify the post-processing and analys
62
72
 
63
73
  The library supports importing protein and peptide-level quantification results from MaxQuant, FragPipe, and Spectronaut, as well as post-translational modification (PTM) data from MaxQuant and FragPipe. MsReport provides tools for data annotation, normalization and transformation, statistical testing, and data visualization.
64
74
 
75
+ The [documentation](https://hollenstein.github.io/msreport/) provides an overview of the library's public API.
76
+
65
77
  ### Key features of MsReport
66
78
 
67
79
  #### Data Import and Standardization
@@ -134,3 +146,9 @@ For example, the R home directory might look like this on Windows: `C:\Program F
134
146
  ## Development status
135
147
 
136
148
  MsReport is a stable and reliable library that has been used on a daily basis for over two years in the Mass Spectrometry Facility at the Max Perutz Labs and the Mass Spectrometry Facility of IMP/IMBA/GMI. While the current interface of MsReport is stable, the library is still under active development, with new features being added regularly. Please note that a major rewrite is planned, which may introduce changes to the API in the future.
149
+
150
+ ## How to cite
151
+
152
+ If you use MsReport for your research or publications, please include the following citation and consider giving the project a star on GitHub.
153
+
154
+ > Hollenstein, D. M., & Hartl, M. (2025). hollenstein/msreport: v0.0.29 (0.0.29). Zenodo. https://doi.org/10.5281/zenodo.15309090
@@ -1,6 +1,7 @@
1
1
  # MsReport
2
2
 
3
3
  [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
4
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15309090.svg)](https://doi.org/10.5281/zenodo.15309090)
4
5
  ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fhollenstein%2Fmsreport%2Fmain%2Fpyproject.toml)
5
6
  [![Run tests](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml/badge.svg)](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml)
6
7
 
@@ -16,6 +17,7 @@ bottom-up mass spectrometry experiments.
16
17
  - [Additional requirements](#additional-requirements)
17
18
  - [Optional Dependencies](#optional-dependencies)
18
19
  - [Development status](#development-status)
20
+ - [How to cite](#how-to-cite)
19
21
 
20
22
  ## What is MsReport?
21
23
 
@@ -23,6 +25,8 @@ MsReport is a Python library designed to simplify the post-processing and analys
23
25
 
24
26
  The library supports importing protein and peptide-level quantification results from MaxQuant, FragPipe, and Spectronaut, as well as post-translational modification (PTM) data from MaxQuant and FragPipe. MsReport provides tools for data annotation, normalization and transformation, statistical testing, and data visualization.
25
27
 
28
+ The [documentation](https://hollenstein.github.io/msreport/) provides an overview of the library's public API.
29
+
26
30
  ### Key features of MsReport
27
31
 
28
32
  #### Data Import and Standardization
@@ -95,3 +99,9 @@ For example, the R home directory might look like this on Windows: `C:\Program F
95
99
  ## Development status
96
100
 
97
101
  MsReport is a stable and reliable library that has been used on a daily basis for over two years in the Mass Spectrometry Facility at the Max Perutz Labs and the Mass Spectrometry Facility of IMP/IMBA/GMI. While the current interface of MsReport is stable, the library is still under active development, with new features being added regularly. Please note that a major rewrite is planned, which may introduce changes to the API in the future.
102
+
103
+ ## How to cite
104
+
105
+ If you use MsReport for your research or publications, please include the following citation and consider giving the project a star on GitHub.
106
+
107
+ > Hollenstein, D. M., & Hartl, M. (2025). hollenstein/msreport: v0.0.29 (0.0.29). Zenodo. https://doi.org/10.5281/zenodo.15309090
@@ -8,4 +8,4 @@ from msreport.fasta import import_protein_database
8
8
  from msreport.qtable import Qtable
9
9
  from msreport.reader import FragPipeReader, MaxQuantReader, SpectronautReader
10
10
 
11
- __version__ = "0.0.29"
11
+ __version__ = "0.0.31"
@@ -0,0 +1,10 @@
1
+ """A comprehensive set of tools for aggregating and reshaping tabular proteomics data.
2
+
3
+ The `aggregation` module contains submodules that offer functionalities to transform
4
+ data from lower levels of abstraction (e.g. ions, peptides) to higher levels (e.g.
5
+ peptides, proteins, PTMs) through various summarization and condensation techniques.
6
+ It also includes methods for reshaping tables from "long" to "wide" format, a common
7
+ prerequisite for aggregation. The MaxLFQ algorithm is integrated for specific
8
+ quantitative summarizations, enabling users to build customized, higher-level data
9
+ tables.
10
+ """
@@ -1,3 +1,12 @@
1
+ """Low-level functions for aggregating numerical and string data.
2
+
3
+ This module defines fundamental "condenser" functions that operate directly on NumPy
4
+ arrays. These functions are designed to be applied to groups of data, performing
5
+ operations such as summing values, finding maximum/minimum, counting or joining unique
6
+ elements, and calculating abundance profiles. It includes the core implementations for
7
+ MaxLFQ summation.
8
+ """
9
+
1
10
  import numpy as np
2
11
 
3
12
  import msreport.helper.maxlfq as MAXLFQ
@@ -1,4 +1,12 @@
1
- from typing import Iterable, Union
1
+ """Functionalities for reshaping tabular quantitative proteomics data.
2
+
3
+ This module offers methods to transform data from a "long" format into a "wide" format,
4
+ which is a common and often necessary step before aggregation or analysis. It supports
5
+ pivoting data based on specified index and grouping columns, and can handle both
6
+ quantitative values and annotation columns.
7
+ """
8
+
9
+ from typing import Iterable
2
10
 
3
11
  import pandas as pd
4
12
 
@@ -12,11 +20,12 @@ def pivot_table(
12
20
  group_by: str,
13
21
  annotation_columns: Iterable[str],
14
22
  pivoting_columns: Iterable[str],
15
- ):
23
+ ) -> pd.DataFrame:
16
24
  """Generates a pivoted table in wide format.
17
25
 
18
26
  Args:
19
- table: Dataframe in long format that is used to generate a table in wide format.
27
+ long_table: Dataframe in long format that is used to generate a table in wide
28
+ format.
20
29
  index: One or multiple column names that are used to group the table for
21
30
  pivoting.
22
31
  group_by: Column that is used to split the table on its unique entries.
@@ -58,7 +67,7 @@ def pivot_table(
58
67
 
59
68
 
60
69
  def pivot_column(
61
- table: pd.DataFrame, index: Union[str, Iterable], group_by: str, values: str
70
+ table: pd.DataFrame, index: str | Iterable[str], group_by: str, values: str
62
71
  ) -> pd.DataFrame:
63
72
  """Returns a reshaped dataframe, generated by pivoting the table on one column.
64
73
 
@@ -98,7 +107,7 @@ def pivot_column(
98
107
 
99
108
 
100
109
  def join_unique(
101
- table: pd.DataFrame, index: Union[str, Iterable], values: str
110
+ table: pd.DataFrame, index: str | Iterable[str], values: str
102
111
  ) -> pd.DataFrame:
103
112
  """Returns a new dataframe with unique values from a column and grouped by 'index'.
104
113
 
@@ -1,4 +1,14 @@
1
- from typing import Callable, Iterable, Optional, Union
1
+ """High-level functions for aggregating quantitative proteomics data.
2
+
3
+ This module offers functions to summarize data from a lower level of abstraction (e.g.
4
+ ions, peptides) to a higher level (e.g., peptides, proteins, PTMs). It operates directly
5
+ on pandas DataFrames, allowing users to specify a grouping column and the columns to be
6
+ summarized. These functions often leverage low-level condenser operations defined in
7
+ `msreport.aggregate.condense`. It includes specific functions for MaxLFQ summation, as
8
+ well as general counting, joining, and summing of columns.
9
+ """
10
+
11
+ from typing import Callable, Iterable, Optional
2
12
 
3
13
  import numpy as np
4
14
  import pandas as pd
@@ -10,7 +20,7 @@ from msreport.helper import find_sample_columns
10
20
  def count_unique(
11
21
  table: pd.DataFrame,
12
22
  group_by: str,
13
- input_column: Union[str, Iterable],
23
+ input_column: str | Iterable[str],
14
24
  output_column: str = "Unique counts",
15
25
  is_sorted: bool = False,
16
26
  ) -> pd.DataFrame:
@@ -55,7 +65,7 @@ def count_unique(
55
65
  def join_unique(
56
66
  table: pd.DataFrame,
57
67
  group_by: str,
58
- input_column: Union[str, Iterable],
68
+ input_column: str | Iterable[str],
59
69
  output_column: str = "Unique values",
60
70
  sep: str = ";",
61
71
  is_sorted: bool = False,
@@ -215,7 +225,7 @@ def sum_columns_maxlfq(
215
225
  def aggregate_unique_groups(
216
226
  table: pd.DataFrame,
217
227
  group_by: str,
218
- columns_to_aggregate: Union[str, Iterable],
228
+ columns_to_aggregate: str | Iterable[str],
219
229
  condenser: Callable,
220
230
  is_sorted: bool,
221
231
  ) -> tuple[np.ndarray, np.ndarray]:
@@ -1,12 +1,16 @@
1
- """The analyze module contains methods for analysing quantification results."""
1
+ """Tools for post-processing and statistical analysis of `Qtable` data.
2
2
 
3
- from __future__ import annotations
3
+ All functions in this module take a `Qtable` object and modify its data in place. The
4
+ module provides functionality for data evaluation, normalization, imputation of missing
5
+ values, and statistical testing, including integration with R's LIMMA package.
6
+ """
4
7
 
5
8
  import warnings
6
9
  from typing import Iterable, Optional, Protocol, Sequence
7
10
 
8
11
  import numpy as np
9
12
  import pandas as pd
13
+ from typing_extensions import Self
10
14
 
11
15
  import msreport.normalize
12
16
  from msreport.errors import OptionalDependencyError
@@ -24,7 +28,7 @@ except OptionalDependencyError as err:
24
28
 
25
29
 
26
30
  class Transformer(Protocol):
27
- def fit(self, table: pd.DataFrame) -> Transformer:
31
+ def fit(self, table: pd.DataFrame) -> Self:
28
32
  """Fits the Transformer and returns a fitted Transformer instance."""
29
33
 
30
34
  def is_fitted(self) -> bool:
@@ -35,7 +39,7 @@ class Transformer(Protocol):
35
39
 
36
40
 
37
41
  class CategoryTransformer(Protocol):
38
- def fit(self, table: pd.DataFrame) -> Transformer:
42
+ def fit(self, table: pd.DataFrame) -> Self:
39
43
  """Fits the Transformer and returns a fitted Transformer instance."""
40
44
 
41
45
  def is_fitted(self) -> bool:
@@ -162,7 +166,7 @@ def validate_proteins(
162
166
 
163
167
 
164
168
  def apply_transformer(
165
- qtable: msreport.Qtable,
169
+ qtable: Qtable,
166
170
  transformer: Transformer,
167
171
  tag: str,
168
172
  exclude_invalid: bool,
@@ -205,6 +209,64 @@ def apply_transformer(
205
209
  qtable.data[data_table.columns] = data_table
206
210
 
207
211
 
212
+ def apply_category_transformer(
213
+ qtable: Qtable,
214
+ transformer: CategoryTransformer,
215
+ tag: str,
216
+ exclude_invalid: bool,
217
+ remove_invalid: bool,
218
+ new_tag: Optional[str] = None,
219
+ ) -> None:
220
+ """Apply a category transformer to Qtable columns selected by tag.
221
+
222
+ Args:
223
+ qtable: A Qtable instance, to which the transformer is applied.
224
+ transformer: The CategoryTransformer to apply.
225
+ tag: The tag used to identify the columns for applying the transformer.
226
+ exclude_invalid: Exclude invalid values from the transformation.
227
+ remove_invalid: Remove invalid values from the table after the transformation.
228
+ new_tag: Optional, if specified than the tag is replaced with this value in the
229
+ column names and the transformed data is stored to these new columns.
230
+
231
+ Raises:
232
+ KeyError: If the category column of the `transformer` is not found in the
233
+ `qtable.data`.
234
+ ValueError: If no sample columns are found for the specified tag.
235
+ """
236
+ category_column = transformer.get_category_column()
237
+ if category_column not in qtable.data.columns:
238
+ raise KeyError(
239
+ f'The category column "{category_column}" in the transformer '
240
+ f"is not found in `qtable.data`."
241
+ )
242
+
243
+ valid = qtable.data["Valid"]
244
+ samples = qtable.get_samples()
245
+ sample_columns = find_sample_columns(qtable.data, tag, samples)
246
+
247
+ if not sample_columns:
248
+ raise ValueError(f"No sample columns found for tag '{tag}'.")
249
+
250
+ if new_tag is not None:
251
+ sample_columns = [c.replace(tag, new_tag) for c in sample_columns]
252
+ column_mapping = dict(zip(samples, sample_columns))
253
+
254
+ data_table = qtable.make_sample_table(tag, samples_as_columns=True)
255
+ data_table[category_column] = qtable.data[category_column]
256
+
257
+ if exclude_invalid:
258
+ data_table.loc[valid, :] = transformer.transform(data_table.loc[valid, :])
259
+ else:
260
+ data_table = transformer.transform(data_table)
261
+ data_table = data_table.drop(columns=[category_column])
262
+
263
+ if remove_invalid:
264
+ data_table[~valid] = np.nan
265
+
266
+ data_table.columns = [column_mapping[s] for s in data_table.columns]
267
+ qtable.data[data_table.columns] = data_table
268
+
269
+
208
270
  def normalize_expression(
209
271
  qtable: Qtable,
210
272
  normalizer: Transformer,
@@ -1,19 +1,13 @@
1
- """
2
- Columns that are not yet present in the amica output at the moment:
3
- Index([
4
- 'Protein Probability',
5
- 'Top Peptide Probability',
6
- 'Total peptides',
7
- 'Leading proteins',
8
- 'Protein entry name',
9
- 'Fasta header',
10
- 'Protein length',
11
- 'iBAQ peptides',
12
- 'Sequence coverage',
13
- ], dtype='object')
1
+ """Exporting of proteomics data from `Qtable` into external formats.
2
+
3
+ This module offers functionalities to convert and save `Qtable` data into files
4
+ compatible with external tools (Amica and Perseus), and creating sequence coverage maps
5
+ in HTML format. While most functions operate on `Qtable` instances, some may accept
6
+ other data structures.
14
7
  """
15
8
 
16
9
  import os
10
+ import pathlib
17
11
  import warnings
18
12
  from collections import defaultdict as ddict
19
13
  from typing import Iterable, Optional, Protocol, Sequence
@@ -99,7 +93,7 @@ def contaminants_to_clipboard(qtable: Qtable) -> None:
99
93
 
100
94
  def to_perseus_matrix(
101
95
  qtable: Qtable,
102
- directory,
96
+ directory: str | pathlib.Path,
103
97
  table_name: str = "perseus_matrix.tsv",
104
98
  ) -> None:
105
99
  """Exports a qtable to a perseus matrix file in tsv format.
@@ -151,7 +145,7 @@ def to_perseus_matrix(
151
145
 
152
146
  def to_amica(
153
147
  qtable: Qtable,
154
- directory,
148
+ directory: str | pathlib.Path,
155
149
  table_name: str = "amica_table.tsv",
156
150
  design_name: str = "amica_design.tsv",
157
151
  ) -> None:
@@ -1,11 +1,18 @@
1
+ """Functionalities for import and access to protein sequence databases from FASTA files.
2
+
3
+ This module serves as an interface to the `profasta` library, offering a convenient way
4
+ to generate a `profasta.db.ProteinDatabase` from one or multiple FASTA files. It
5
+ supports custom FASTA header parsing through a configurable header parser.
6
+ """
7
+
1
8
  import pathlib
2
- from typing import Iterable, Union
9
+ from typing import Iterable
3
10
 
4
11
  from profasta.db import ProteinDatabase
5
12
 
6
13
 
7
14
  def import_protein_database(
8
- fasta_path: Union[str, pathlib.Path, Iterable[Union[str, pathlib.Path]]],
15
+ fasta_path: str | pathlib.Path | Iterable[str | pathlib.Path],
9
16
  header_parser: str = "uniprot",
10
17
  ) -> ProteinDatabase:
11
18
  """Generates a protein database from one or a list of fasta files.
@@ -1,3 +1,9 @@
1
+ """A collection of widely used helper and utility functions.
2
+
3
+ This module re-exports commonly used functions from various `msreport.helper`
4
+ submodules for convenience.
5
+ """
6
+
1
7
  from .calc import (
2
8
  calculate_monoisotopic_mass,
3
9
  calculate_sequence_coverage,
@@ -21,3 +27,15 @@ from .temp import (
21
27
  extract_modifications,
22
28
  modify_peptide,
23
29
  )
30
+
31
+ __all__ = [
32
+ "apply_intensity_cutoff",
33
+ "find_columns",
34
+ "find_sample_columns",
35
+ "guess_design",
36
+ "intensities_in_logspace",
37
+ "keep_rows_by_partial_match",
38
+ "remove_rows_by_partial_match",
39
+ "rename_mq_reporter_channels",
40
+ "rename_sample_columns",
41
+ ]
@@ -1,9 +1,17 @@
1
- from __future__ import annotations
1
+ """Transformer classes for imputing missing values in quantitative proteomics data.
2
+
3
+ This module defines transformer classes that can be fitted to a table containing
4
+ quantitative values to learn imputation parameters. Once fitted, these transformers can
5
+ then be applied to another table to transform it by filling in missing values. The
6
+ transformation returns a new copy of the table with the imputed values, leaving the
7
+ original table unchanged.
8
+ """
2
9
 
3
10
  from typing import Any, Optional
4
11
 
5
12
  import numpy as np
6
13
  import pandas as pd
14
+ from typing_extensions import Self
7
15
 
8
16
  from msreport.errors import NotFittedError
9
17
 
@@ -42,7 +50,7 @@ class FixedValueImputer:
42
50
  self.column_wise = column_wise
43
51
  self._sample_fill_values: dict[str, float] = {}
44
52
 
45
- def fit(self, table: pd.DataFrame) -> FixedValueImputer:
53
+ def fit(self, table: pd.DataFrame) -> Self:
46
54
  """Fits the FixedValueImputer.
47
55
 
48
56
  Args:
@@ -79,7 +87,7 @@ class FixedValueImputer:
79
87
  Returns:
80
88
  'table' with imputed missing values.
81
89
  """
82
- confirm_is_fitted(self)
90
+ _confirm_is_fitted(self)
83
91
 
84
92
  _table = table.copy()
85
93
  for column in _table.columns:
@@ -108,7 +116,7 @@ class GaussianImputer:
108
116
  self.sigma = sigma
109
117
  self.seed = seed
110
118
 
111
- def fit(self, table: pd.DataFrame) -> GaussianImputer:
119
+ def fit(self, table: pd.DataFrame) -> Self:
112
120
  """Fits the GaussianImputer, altough this is not necessary.
113
121
 
114
122
  Args:
@@ -134,7 +142,7 @@ class GaussianImputer:
134
142
  Returns:
135
143
  'table' with imputed missing values.
136
144
  """
137
- confirm_is_fitted(self)
145
+ _confirm_is_fitted(self)
138
146
  np.random.seed(self.seed)
139
147
 
140
148
  _table = table.copy()
@@ -182,9 +190,9 @@ class PerseusImputer:
182
190
  self.std_width = std_width
183
191
  self.column_wise = column_wise
184
192
  self.seed = seed
185
- self._column_params: dict[str, dict] = {}
193
+ self._column_params: dict[str, dict[str, float]] = {}
186
194
 
187
- def fit(self, table: pd.DataFrame) -> PerseusImputer:
195
+ def fit(self, table: pd.DataFrame) -> Self:
188
196
  """Fits the PerseusImputer.
189
197
 
190
198
  Args:
@@ -223,7 +231,7 @@ class PerseusImputer:
223
231
  Returns:
224
232
  'table' with imputed missing values.
225
233
  """
226
- confirm_is_fitted(self)
234
+ _confirm_is_fitted(self)
227
235
  np.random.seed(self.seed)
228
236
 
229
237
  _table = table.copy()
@@ -239,7 +247,7 @@ class PerseusImputer:
239
247
  return _table
240
248
 
241
249
 
242
- def confirm_is_fitted(imputer: Any, msg: Optional[str] = None) -> None:
250
+ def _confirm_is_fitted(imputer: Any, msg: Optional[str] = None) -> None:
243
251
  """Perform is_fitted validation for imputer instances.
244
252
 
245
253
  Checks if the imputer is fitted by verifying the presence of fitted attributes
@@ -266,7 +274,7 @@ def confirm_is_fitted(imputer: Any, msg: Optional[str] = None) -> None:
266
274
  raise NotFittedError(msg % {"name": type(imputer).__name__})
267
275
 
268
276
 
269
- def _calculate_integer_below_min(table) -> int:
277
+ def _calculate_integer_below_min(table: pd.DataFrame) -> int:
270
278
  minimal_value = np.nanmin(table.to_numpy().flatten())
271
279
  below_minimal = np.floor(minimal_value)
272
280
  if minimal_value <= below_minimal:
@@ -1,34 +1,31 @@
1
- from __future__ import annotations
1
+ """Provides a transformer class for processing isobarically labeled proteomics data.
2
+
3
+ This module defines the `IsotopeImpurityCorrecter` class for processing of isobaric
4
+ (e.g., TMT, iTRAQ) reporter intensities. This transformer must be fitted with an isotope
5
+ impurity matrix to correct interference in reporter intensities. Once fitted, the
6
+ transformer can then be applied to a table containing reporter ion intensities to adjust
7
+ its intensity values. The transformation returns a new copy of the table with the
8
+ processed values, leaving the original table unchanged.
9
+ """
2
10
 
3
11
  import functools
4
- from typing import Protocol
5
12
 
6
13
  import numpy as np
7
14
  import pandas as pd
8
15
  import scipy
16
+ from typing_extensions import Self
9
17
 
10
18
  import msreport.helper
11
19
  from msreport.errors import NotFittedError
12
20
 
13
21
 
14
- class Transformer(Protocol):
15
- def fit(self, table: pd.DataFrame) -> Transformer:
16
- """Fits the Transformer and returns a fitted Transformer instance."""
17
-
18
- def is_fitted(self) -> bool:
19
- """Returns True if the Transformer has been fitted."""
20
-
21
- def transform(self, table: pd.DataFrame) -> pd.DataFrame:
22
- """Transform values in 'table'."""
23
-
24
-
25
22
  class IsotopeImpurityCorrecter:
26
23
  """Corrects isotope impurity interference in isobaric reporter expression values."""
27
24
 
28
25
  def __init__(self):
29
26
  self._impurity_matrix = None
30
27
 
31
- def fit(self, impurity_matrix: np.ndarray) -> IsotopeImpurityCorrecter:
28
+ def fit(self, impurity_matrix: np.ndarray) -> Self:
32
29
  """Fits the isotope impurity correcter to a given impurity matrix.
33
30
 
34
31
  Args: