msreport 0.0.28__tar.gz → 0.0.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- msreport-0.0.30/PKG-INFO +144 -0
- msreport-0.0.30/README.md +105 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/__init__.py +1 -1
- {msreport-0.0.28 → msreport-0.0.30}/msreport/analyze.py +16 -3
- {msreport-0.0.28 → msreport-0.0.30}/msreport/errors.py +4 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/plot/distribution.py +2 -1
- {msreport-0.0.28 → msreport-0.0.30}/msreport/qtable.py +21 -11
- {msreport-0.0.28 → msreport-0.0.30}/msreport/reader.py +253 -10
- msreport-0.0.30/msreport/rinterface/__init__.py +16 -0
- msreport-0.0.30/msreport.egg-info/PKG-INFO +144 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport.egg-info/requires.txt +6 -1
- {msreport-0.0.28 → msreport-0.0.30}/pyproject.toml +17 -6
- {msreport-0.0.28 → msreport-0.0.30}/tests/test_analyze.py +3 -1
- {msreport-0.0.28 → msreport-0.0.30}/tests/test_plot.py +11 -1
- {msreport-0.0.28 → msreport-0.0.30}/tests/test_qtable.py +17 -31
- msreport-0.0.28/PKG-INFO +0 -132
- msreport-0.0.28/README.md +0 -100
- msreport-0.0.28/msreport/rinterface/__init__.py +0 -4
- msreport-0.0.28/msreport.egg-info/PKG-INFO +0 -132
- {msreport-0.0.28 → msreport-0.0.30}/LICENSE.txt +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/aggregate/__init__.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/aggregate/condense.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/aggregate/pivot.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/aggregate/summarize.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/export.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/fasta.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/helper/__init__.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/helper/calc.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/helper/maxlfq.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/helper/table.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/helper/temp.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/impute.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/isobar.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/normalize.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/peptidoform.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/plot/__init__.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/plot/_partial_plots.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/plot/comparison.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/plot/multivariate.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/plot/quality.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/plot/style.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/plot/style_sheets/msreport-notebook.mplstyle +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/plot/style_sheets/seaborn-whitegrid.mplstyle +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/rinterface/limma.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/rinterface/rinstaller.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport/rinterface/rscripts/limma.R +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport.egg-info/SOURCES.txt +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport.egg-info/dependency_links.txt +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/msreport.egg-info/top_level.txt +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/setup.cfg +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/setup.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/tests/test_export.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/tests/test_helper.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/tests/test_impute.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/tests/test_isobar.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/tests/test_maxlfq.py +0 -0
- {msreport-0.0.28 → msreport-0.0.30}/tests/test_peptidoform.py +0 -0
msreport-0.0.30/PKG-INFO
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: msreport
|
|
3
|
+
Version: 0.0.30
|
|
4
|
+
Summary: Post processing and analysis of quantitative proteomics data
|
|
5
|
+
Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: homepage, https://github.com/hollenstein/msreport
|
|
8
|
+
Project-URL: changelog, https://github.com/hollenstein/msreport/blob/main/CHANGELOG.md
|
|
9
|
+
Keywords: mass spectrometry,proteomics,post processing,data analysis
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE.txt
|
|
19
|
+
Requires-Dist: adjustText<1.0.0,>=0.7.0
|
|
20
|
+
Requires-Dist: matplotlib>=3.5.2
|
|
21
|
+
Requires-Dist: numpy>=1.21.5
|
|
22
|
+
Requires-Dist: pandas>=1.4.4
|
|
23
|
+
Requires-Dist: profasta>=0.0.4
|
|
24
|
+
Requires-Dist: pyteomics>=4.6.0
|
|
25
|
+
Requires-Dist: pyyaml>=6.0.0
|
|
26
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
27
|
+
Requires-Dist: scipy>=1.9.1
|
|
28
|
+
Requires-Dist: seaborn>=0.12.0
|
|
29
|
+
Requires-Dist: statsmodels>=0.13.2
|
|
30
|
+
Requires-Dist: typing_extensions>=4
|
|
31
|
+
Provides-Extra: r
|
|
32
|
+
Requires-Dist: rpy2<3.5.13,>=3.5.3; extra == "r"
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: mypy>=1.15.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest>=8.3.5; extra == "dev"
|
|
36
|
+
Provides-Extra: test
|
|
37
|
+
Requires-Dist: pytest>=8.3.5; extra == "test"
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
|
|
40
|
+
# MsReport
|
|
41
|
+
|
|
42
|
+
[](https://www.repostatus.org/#wip)
|
|
43
|
+
[](https://doi.org/10.5281/zenodo.15309090)
|
|
44
|
+

|
|
45
|
+
[](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml)
|
|
46
|
+
|
|
47
|
+
**MsReport** is a Python library for post-processing quantitative proteomics data from
|
|
48
|
+
bottom-up mass spectrometry experiments.
|
|
49
|
+
|
|
50
|
+
## Table of Contents
|
|
51
|
+
|
|
52
|
+
- [What is MsReport?](#what-is-msreport)
|
|
53
|
+
- [Key features of MsReport](#key-features-of-msreport)
|
|
54
|
+
- [Installation](#installation)
|
|
55
|
+
- [Installation when using Anaconda](#installation-when-using-anaconda)
|
|
56
|
+
- [Additional requirements](#additional-requirements)
|
|
57
|
+
- [Optional Dependencies](#optional-dependencies)
|
|
58
|
+
- [Development status](#development-status)
|
|
59
|
+
- [How to cite](#how-to-cite)
|
|
60
|
+
|
|
61
|
+
## What is MsReport?
|
|
62
|
+
|
|
63
|
+
MsReport is a Python library designed to simplify the post-processing and analysis of quantitative proteomics data from bottom-up mass spectrometry experiments. It provides a high-level, abstraction-focused API for efficient and standardized workflows. The modular design of the library provides the flexibility to meet project specific data processing needs and customize workflows as required.
|
|
64
|
+
|
|
65
|
+
The library supports importing protein and peptide-level quantification results from MaxQuant, FragPipe, and Spectronaut, as well as post-translational modification (PTM) data from MaxQuant and FragPipe. MsReport provides tools for data annotation, normalization and transformation, statistical testing, and data visualization.
|
|
66
|
+
|
|
67
|
+
### Key features of MsReport
|
|
68
|
+
|
|
69
|
+
#### Data Import and Standardization
|
|
70
|
+
|
|
71
|
+
The `reader` module provides software-specific reader classes for importing data from MaxQuant, FragPipe, and Spectronaut that enable the import of protein, peptide and ion tables. During the import process, these classes transform tables column names and table values into a standardized format to ensure that the rest of the library can operate in a tool-agnostic manner.
|
|
72
|
+
|
|
73
|
+
#### Data management
|
|
74
|
+
|
|
75
|
+
The `qtable` module provides a structured approach to managing quantitative data through its central `Qtable` class. This class combines quantitative data with an experimental design table that defines the relationship between samples and experimental conditions. The quantitative data is stored in a wide format, where each sample's measurements are stored in separate columns. The `Qtable` class serves as the foundation for data analysis workflows in MsReport, providing the standardized data structure used by the `analyze`, `plot`, and `export` modules.
|
|
76
|
+
|
|
77
|
+
#### Data processing and analysis
|
|
78
|
+
|
|
79
|
+
The `analyze` module provides tools for post-processing of mass spectrometry data generated by software such as MaxQuant, FragPipe, or Spectronaut. It includes functions for filtering, normalization, imputation of missing values, and statistical testing. The library integrates with the R package LIMMA to enable differential expression analysis.
|
|
80
|
+
|
|
81
|
+
> [!NOTE]
|
|
82
|
+
> In order to use the R integration you need to install msreport with optional dependencies, see [Optional Dependencies](#optional-dependencies) for more information.
|
|
83
|
+
|
|
84
|
+
#### Data visualization
|
|
85
|
+
|
|
86
|
+
The `plot` module supports the generation of visualizations for quality control and data analysis. It includes functions for creating various plots, such as intensity and ratio distributions, heatmaps, volcano plots, and PCA plots.
|
|
87
|
+
|
|
88
|
+
#### Data export
|
|
89
|
+
|
|
90
|
+
Finally, the `export` module enables the conversion and export into formats compatible with external tools. This includes generating input files for [Amica](https://bioapps.maxperutzlabs.ac.at/app/amica) and exporting tables for easier integration with Perseus.
|
|
91
|
+
|
|
92
|
+
## Installation
|
|
93
|
+
|
|
94
|
+
If you do not already have a Python installation, we recommend installing the [Anaconda distribution](https://www.anaconda.com/download) or [Miniconda](https://docs.anaconda.com/free/miniconda/index.html) distribution from Continuum Analytics, which already contains a large number of popular Python packages for Data Science. Alternatively, you can also get Python from the [Python homepage](https://www.python.org/downloads/windows). Note that MsReport requires Python version 3.10 or higher.
|
|
95
|
+
|
|
96
|
+
The following command will install MsReport and its dependencies by using a wheel file.
|
|
97
|
+
|
|
98
|
+
```shell
|
|
99
|
+
pip install msreport
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
To uninstall the MsReport library use:
|
|
103
|
+
|
|
104
|
+
```shell
|
|
105
|
+
pip uninstall msreport
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Installation when using Anaconda
|
|
109
|
+
|
|
110
|
+
To install the MsReport library using Anaconda, you need to either activate a custom conda environment or install it into the default base environment. Open the Anaconda Navigator, activate the desired conda environment or use the base environment, and then open a command line by running the "CMD.exe" application. Finally, use the `pip install` command as before.
|
|
111
|
+
|
|
112
|
+
### Optional Dependencies
|
|
113
|
+
|
|
114
|
+
#### R Integration
|
|
115
|
+
|
|
116
|
+
MsReport provides an interface to the R package LIMMA for differential expression analysis. To use this functionality, you need:
|
|
117
|
+
|
|
118
|
+
- A local installation of **R (version 4.0 or higher)**.
|
|
119
|
+
- The system environment variable R_HOME set to the R home directory.
|
|
120
|
+
- To install msreport with the optional dependencies for R integration.
|
|
121
|
+
|
|
122
|
+
```shell
|
|
123
|
+
pip install msreport[R]
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
#### Setting the R_HOME environment variable
|
|
127
|
+
|
|
128
|
+
On Windows, you may need to restart your computer after modifying the system environment variables for the changes to take effect. To find the R home directory, you can run the following command in R:
|
|
129
|
+
|
|
130
|
+
```R
|
|
131
|
+
normalizePath(R.home("home"))
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
For example, the R home directory might look like this on Windows: `C:\Program Files\R\R-4.2.1`
|
|
135
|
+
|
|
136
|
+
## Development status
|
|
137
|
+
|
|
138
|
+
MsReport is a stable and reliable library that has been used on a daily basis for over two years in the Mass Spectrometry Facility at the Max Perutz Labs and the Mass Spectrometry Facility of IMP/IMBA/GMI. While the current interface of MsReport is stable, the library is still under active development, with new features being added regularly. Please note that a major rewrite is planned, which may introduce changes to the API in the future.
|
|
139
|
+
|
|
140
|
+
## How to cite
|
|
141
|
+
|
|
142
|
+
If you use MsReport for your research or publications, please include the following citation and consider giving the project a star on GitHub.
|
|
143
|
+
|
|
144
|
+
> Hollenstein, D. M., & Hartl, M. (2025). hollenstein/msreport: v0.0.29 (0.0.29). Zenodo. https://doi.org/10.5281/zenodo.15309090
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# MsReport
|
|
2
|
+
|
|
3
|
+
[](https://www.repostatus.org/#wip)
|
|
4
|
+
[](https://doi.org/10.5281/zenodo.15309090)
|
|
5
|
+

|
|
6
|
+
[](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml)
|
|
7
|
+
|
|
8
|
+
**MsReport** is a Python library for post-processing quantitative proteomics data from
|
|
9
|
+
bottom-up mass spectrometry experiments.
|
|
10
|
+
|
|
11
|
+
## Table of Contents
|
|
12
|
+
|
|
13
|
+
- [What is MsReport?](#what-is-msreport)
|
|
14
|
+
- [Key features of MsReport](#key-features-of-msreport)
|
|
15
|
+
- [Installation](#installation)
|
|
16
|
+
- [Installation when using Anaconda](#installation-when-using-anaconda)
|
|
17
|
+
- [Additional requirements](#additional-requirements)
|
|
18
|
+
- [Optional Dependencies](#optional-dependencies)
|
|
19
|
+
- [Development status](#development-status)
|
|
20
|
+
- [How to cite](#how-to-cite)
|
|
21
|
+
|
|
22
|
+
## What is MsReport?
|
|
23
|
+
|
|
24
|
+
MsReport is a Python library designed to simplify the post-processing and analysis of quantitative proteomics data from bottom-up mass spectrometry experiments. It provides a high-level, abstraction-focused API for efficient and standardized workflows. The modular design of the library provides the flexibility to meet project specific data processing needs and customize workflows as required.
|
|
25
|
+
|
|
26
|
+
The library supports importing protein and peptide-level quantification results from MaxQuant, FragPipe, and Spectronaut, as well as post-translational modification (PTM) data from MaxQuant and FragPipe. MsReport provides tools for data annotation, normalization and transformation, statistical testing, and data visualization.
|
|
27
|
+
|
|
28
|
+
### Key features of MsReport
|
|
29
|
+
|
|
30
|
+
#### Data Import and Standardization
|
|
31
|
+
|
|
32
|
+
The `reader` module provides software-specific reader classes for importing data from MaxQuant, FragPipe, and Spectronaut that enable the import of protein, peptide and ion tables. During the import process, these classes transform tables column names and table values into a standardized format to ensure that the rest of the library can operate in a tool-agnostic manner.
|
|
33
|
+
|
|
34
|
+
#### Data management
|
|
35
|
+
|
|
36
|
+
The `qtable` module provides a structured approach to managing quantitative data through its central `Qtable` class. This class combines quantitative data with an experimental design table that defines the relationship between samples and experimental conditions. The quantitative data is stored in a wide format, where each sample's measurements are stored in separate columns. The `Qtable` class serves as the foundation for data analysis workflows in MsReport, providing the standardized data structure used by the `analyze`, `plot`, and `export` modules.
|
|
37
|
+
|
|
38
|
+
#### Data processing and analysis
|
|
39
|
+
|
|
40
|
+
The `analyze` module provides tools for post-processing of mass spectrometry data generated by software such as MaxQuant, FragPipe, or Spectronaut. It includes functions for filtering, normalization, imputation of missing values, and statistical testing. The library integrates with the R package LIMMA to enable differential expression analysis.
|
|
41
|
+
|
|
42
|
+
> [!NOTE]
|
|
43
|
+
> In order to use the R integration you need to install msreport with optional dependencies, see [Optional Dependencies](#optional-dependencies) for more information.
|
|
44
|
+
|
|
45
|
+
#### Data visualization
|
|
46
|
+
|
|
47
|
+
The `plot` module supports the generation of visualizations for quality control and data analysis. It includes functions for creating various plots, such as intensity and ratio distributions, heatmaps, volcano plots, and PCA plots.
|
|
48
|
+
|
|
49
|
+
#### Data export
|
|
50
|
+
|
|
51
|
+
Finally, the `export` module enables the conversion and export into formats compatible with external tools. This includes generating input files for [Amica](https://bioapps.maxperutzlabs.ac.at/app/amica) and exporting tables for easier integration with Perseus.
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
If you do not already have a Python installation, we recommend installing the [Anaconda distribution](https://www.anaconda.com/download) or [Miniconda](https://docs.anaconda.com/free/miniconda/index.html) distribution from Continuum Analytics, which already contains a large number of popular Python packages for Data Science. Alternatively, you can also get Python from the [Python homepage](https://www.python.org/downloads/windows). Note that MsReport requires Python version 3.10 or higher.
|
|
56
|
+
|
|
57
|
+
The following command will install MsReport and its dependencies by using a wheel file.
|
|
58
|
+
|
|
59
|
+
```shell
|
|
60
|
+
pip install msreport
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
To uninstall the MsReport library use:
|
|
64
|
+
|
|
65
|
+
```shell
|
|
66
|
+
pip uninstall msreport
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Installation when using Anaconda
|
|
70
|
+
|
|
71
|
+
To install the MsReport library using Anaconda, you need to either activate a custom conda environment or install it into the default base environment. Open the Anaconda Navigator, activate the desired conda environment or use the base environment, and then open a command line by running the "CMD.exe" application. Finally, use the `pip install` command as before.
|
|
72
|
+
|
|
73
|
+
### Optional Dependencies
|
|
74
|
+
|
|
75
|
+
#### R Integration
|
|
76
|
+
|
|
77
|
+
MsReport provides an interface to the R package LIMMA for differential expression analysis. To use this functionality, you need:
|
|
78
|
+
|
|
79
|
+
- A local installation of **R (version 4.0 or higher)**.
|
|
80
|
+
- The system environment variable R_HOME set to the R home directory.
|
|
81
|
+
- To install msreport with the optional dependencies for R integration.
|
|
82
|
+
|
|
83
|
+
```shell
|
|
84
|
+
pip install msreport[R]
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
#### Setting the R_HOME environment variable
|
|
88
|
+
|
|
89
|
+
On Windows, you may need to restart your computer after modifying the system environment variables for the changes to take effect. To find the R home directory, you can run the following command in R:
|
|
90
|
+
|
|
91
|
+
```R
|
|
92
|
+
normalizePath(R.home("home"))
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
For example, the R home directory might look like this on Windows: `C:\Program Files\R\R-4.2.1`
|
|
96
|
+
|
|
97
|
+
## Development status
|
|
98
|
+
|
|
99
|
+
MsReport is a stable and reliable library that has been used on a daily basis for over two years in the Mass Spectrometry Facility at the Max Perutz Labs and the Mass Spectrometry Facility of IMP/IMBA/GMI. While the current interface of MsReport is stable, the library is still under active development, with new features being added regularly. Please note that a major rewrite is planned, which may introduce changes to the API in the future.
|
|
100
|
+
|
|
101
|
+
## How to cite
|
|
102
|
+
|
|
103
|
+
If you use MsReport for your research or publications, please include the following citation and consider giving the project a star on GitHub.
|
|
104
|
+
|
|
105
|
+
> Hollenstein, D. M., & Hartl, M. (2025). hollenstein/msreport: v0.0.29 (0.0.29). Zenodo. https://doi.org/10.5281/zenodo.15309090
|
|
@@ -9,10 +9,19 @@ import numpy as np
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
|
|
11
11
|
import msreport.normalize
|
|
12
|
-
|
|
12
|
+
from msreport.errors import OptionalDependencyError
|
|
13
13
|
from msreport.helper import find_sample_columns
|
|
14
14
|
from msreport.qtable import Qtable
|
|
15
15
|
|
|
16
|
+
try:
|
|
17
|
+
import msreport.rinterface
|
|
18
|
+
|
|
19
|
+
_rinterface_available = True
|
|
20
|
+
_rinterface_error = ""
|
|
21
|
+
except OptionalDependencyError as err:
|
|
22
|
+
_rinterface_available = False
|
|
23
|
+
_rinterface_error = str(err)
|
|
24
|
+
|
|
16
25
|
|
|
17
26
|
class Transformer(Protocol):
|
|
18
27
|
def fit(self, table: pd.DataFrame) -> Transformer:
|
|
@@ -528,8 +537,10 @@ def calculate_multi_group_limma(
|
|
|
528
537
|
ValueError: If all values from qtable.design["Batch"] are identical when 'batch'
|
|
529
538
|
is set to True.
|
|
530
539
|
"""
|
|
531
|
-
|
|
540
|
+
if not _rinterface_available:
|
|
541
|
+
raise OptionalDependencyError(_rinterface_error)
|
|
532
542
|
|
|
543
|
+
_validate_experiment_pairs(qtable, experiment_pairs)
|
|
533
544
|
# TODO: not tested #
|
|
534
545
|
if batch and "Batch" not in qtable.get_design():
|
|
535
546
|
raise KeyError(
|
|
@@ -618,8 +629,10 @@ def calculate_two_group_limma(
|
|
|
618
629
|
must have exactly two entries and the two entries must not be the same. Both
|
|
619
630
|
experiments must be present in qtable.design.
|
|
620
631
|
"""
|
|
621
|
-
|
|
632
|
+
if not _rinterface_available:
|
|
633
|
+
raise OptionalDependencyError(_rinterface_error)
|
|
622
634
|
|
|
635
|
+
_validate_experiment_pair(qtable, experiment_pair)
|
|
623
636
|
# TODO: LIMMA function not tested #
|
|
624
637
|
table = qtable.make_expression_table(samples_as_columns=True)
|
|
625
638
|
comparison_tag = " vs "
|
|
@@ -7,3 +7,7 @@ class NotFittedError(ValueError, AttributeError):
|
|
|
7
7
|
|
|
8
8
|
class ProteinsNotInFastaWarning(UserWarning):
|
|
9
9
|
"""Warning raised when queried proteins are absent from a FASTA file."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OptionalDependencyError(ImportError):
|
|
13
|
+
"""Raised when an optional dependency is required but not installed."""
|
|
@@ -204,7 +204,8 @@ def experiment_ratios(
|
|
|
204
204
|
mask = np.all([(qtable.data[f"Events {exp}"] > 0) for exp in experiments], axis=0)
|
|
205
205
|
if exclude_invalid:
|
|
206
206
|
mask = mask & qtable["Valid"]
|
|
207
|
-
|
|
207
|
+
# Use `mask.to_numpy` to solve issue with different indices of mask and dataframe
|
|
208
|
+
experiment_data = experiment_data[mask.to_numpy()]
|
|
208
209
|
pseudo_reference = np.nanmean(experiment_data, axis=1)
|
|
209
210
|
ratio_data = experiment_data.subtract(pseudo_reference, axis=0)
|
|
210
211
|
|
|
@@ -27,13 +27,11 @@ class Qtable:
|
|
|
27
27
|
design: A pandas.DataFrame describing the experimental design.
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
|
-
_default_id_column = "Representative protein"
|
|
31
|
-
|
|
32
30
|
def __init__(
|
|
33
31
|
self,
|
|
34
32
|
data: pd.DataFrame,
|
|
35
|
-
design:
|
|
36
|
-
id_column: str
|
|
33
|
+
design: pd.DataFrame,
|
|
34
|
+
id_column: str,
|
|
37
35
|
):
|
|
38
36
|
"""Initializes the Qtable.
|
|
39
37
|
|
|
@@ -42,12 +40,13 @@ class Qtable:
|
|
|
42
40
|
|
|
43
41
|
Args:
|
|
44
42
|
data: A dataframe containing quantitative proteomics data in a wide format.
|
|
43
|
+
The index of the dataframe must contain unique values.
|
|
45
44
|
design: A dataframe describing the experimental design that must at least
|
|
46
45
|
contain the columns "Sample" and "Experiment". The "Sample" entries
|
|
47
46
|
should correspond to the Sample names present in the quantitative
|
|
48
47
|
columns of the data.
|
|
49
48
|
id_column: The name of the column that contains the unique identifiers for
|
|
50
|
-
the entries in the data table.
|
|
49
|
+
the entries in the data table.
|
|
51
50
|
|
|
52
51
|
Raises:
|
|
53
52
|
KeyError: If the specified id_column is not found in data.
|
|
@@ -76,8 +75,7 @@ class Qtable:
|
|
|
76
75
|
self._id_column = id_column
|
|
77
76
|
if "Valid" not in self.data.columns:
|
|
78
77
|
self.data["Valid"] = True
|
|
79
|
-
|
|
80
|
-
self.add_design(design)
|
|
78
|
+
self.add_design(design)
|
|
81
79
|
|
|
82
80
|
self._expression_columns: list[str] = []
|
|
83
81
|
self._expression_features: list[str] = []
|
|
@@ -438,6 +436,11 @@ class Qtable:
|
|
|
438
436
|
|
|
439
437
|
Returns:
|
|
440
438
|
An instance of Qtable loaded from the specified files.
|
|
439
|
+
|
|
440
|
+
Raises:
|
|
441
|
+
ValueError: If the loaded config file does not contain the
|
|
442
|
+
"Unique ID column" key. This is due to the qtable being saved with a
|
|
443
|
+
version of msreport <= 0.0.27.
|
|
441
444
|
"""
|
|
442
445
|
filepaths = _get_qtable_export_filepaths(directory, basename)
|
|
443
446
|
with open(filepaths["config"]) as openfile:
|
|
@@ -458,13 +461,20 @@ class Qtable:
|
|
|
458
461
|
filepaths["design"], sep="\t", index_col=0, keep_default_na=True
|
|
459
462
|
)
|
|
460
463
|
|
|
461
|
-
|
|
464
|
+
if "Unique ID column" not in config_data:
|
|
465
|
+
# Mention that the qtable was likely saved with a version of msreport <= 0.0.27
|
|
466
|
+
raise ValueError(
|
|
467
|
+
"The qtable config file does not contain the 'Unique ID column' key. "
|
|
468
|
+
"This is likely due to the qtable being saved with a version of "
|
|
469
|
+
"msreport <= 0.0.27."
|
|
470
|
+
)
|
|
471
|
+
id_column = config_data["Unique ID column"]
|
|
472
|
+
|
|
473
|
+
qtable = Qtable(data, design, id_column)
|
|
462
474
|
qtable._expression_columns = config_data["Expression columns"]
|
|
463
475
|
qtable._expression_features = config_data["Expression features"]
|
|
464
476
|
qtable._expression_sample_mapping = config_data["Expression sample mapping"]
|
|
465
477
|
# This check is required for backwards compatibility with msreport <= 0.0.27
|
|
466
|
-
if "Unique ID column" in config_data:
|
|
467
|
-
qtable._id_column = config_data["Unique ID column"]
|
|
468
478
|
return qtable
|
|
469
479
|
|
|
470
480
|
def to_tsv(self, path: str, index: bool = False):
|
|
@@ -570,7 +580,7 @@ class Qtable:
|
|
|
570
580
|
self._expression_sample_mapping = {}
|
|
571
581
|
|
|
572
582
|
def __copy__(self) -> Qtable:
|
|
573
|
-
new_instance = Qtable(self.data, self.design)
|
|
583
|
+
new_instance = Qtable(self.data, self.design, self.id_column)
|
|
574
584
|
# Copy all private attributes
|
|
575
585
|
for attr in dir(self):
|
|
576
586
|
if (
|