msreport 0.0.32__py3-none-any.whl → 0.0.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,14 @@
1
1
  """Python interface to the 'limma.R' script."""
2
2
 
3
3
  import os
4
+ from typing import Sequence
4
5
 
5
6
  import pandas as pd
6
7
  import rpy2.robjects as robjects
8
+ from pyparsing import Optional
7
9
  from rpy2.robjects import numpy2ri, pandas2ri
8
10
  from rpy2.robjects.conversion import localconverter
11
+ from rpy2.robjects.vectors import StrVector
9
12
 
10
13
  from .rinstaller import install_limma_if_missing
11
14
 
@@ -76,7 +79,12 @@ def multi_group_limma(
76
79
 
77
80
 
78
81
  def two_group_limma(
79
- table: pd.DataFrame, groups: list[str], group1: str, group2: str, trend: bool
82
+ table: pd.DataFrame,
83
+ groups: Sequence[str],
84
+ group1: str,
85
+ group2: str,
86
+ trend: bool,
87
+ batch_groups: Sequence[str] | None = None,
80
88
  ) -> pd.DataFrame:
81
89
  """Use limma to calculate differential expression analysis of two groups.
82
90
 
@@ -84,11 +92,14 @@ def two_group_limma(
84
92
  table: Contains quantitative data for differential expression analysis.
85
93
  groups: A list that contains a group name for each column. List entries must
86
94
  be equal to 'group1' or 'group2'.
87
- group1: Experimental group 1
95
+ group1: Experimental group 1, used as the reference.
88
96
  group2: Experimental group 2, used as the coefficient
89
97
  trend: If true an intensity-dependent trend is fitted to the prior variance
90
98
  during calculation of the moderated t-statistics, refer to limma.eBayes for
91
99
  details.
100
+ batch_groups: Optional, a list that contains a batch name for each column. If
101
+ provided, batch effects are considered for the differential expression
102
+ analysis.
92
103
 
93
104
  Returns:
94
105
  A dataframe containing "Average expression", "Ratio [log2]", "P-value", and
@@ -100,7 +111,14 @@ def two_group_limma(
100
111
  R_two_group_limma = robjects.globalenv[".two_group_limma"]
101
112
 
102
113
  with localconverter(robjects.default_converter + pandas2ri.converter):
103
- limma_result = R_two_group_limma(table, groups, group1, group2, trend)
114
+ limma_result = R_two_group_limma(
115
+ table,
116
+ StrVector(groups),
117
+ group1,
118
+ group2,
119
+ trend,
120
+ StrVector(batch_groups) if batch_groups is not None else StrVector([]),
121
+ )
104
122
 
105
123
  column_mapping = {
106
124
  "AveExpr": "Average expression",
@@ -112,6 +130,53 @@ def two_group_limma(
112
130
  return limma_result[columns_to_keep].rename(columns=column_mapping)
113
131
 
114
132
 
133
+ def limma_anova(
134
+ table: pd.DataFrame,
135
+ design: pd.DataFrame,
136
+ batch: bool,
137
+ trend: bool,
138
+ ) -> pd.DataFrame:
139
+ """Use limma to calculate a one-way moderated ANOVA for multiple groups.
140
+
141
+ Args:
142
+ table: Contains quantitative data for differential expression analysis. Column
143
+ names must correspond to entries from `design["Sample"]`.
144
+ design: Dataframe describing the experimental design of the 'table', where each
145
+ row must correspond to a column in 'table'. The 'Design' must contain the
146
+ columns "Sample" and "Experiment". If batch correction should be applied,
147
+ batches must be described in the "Batch" column. Names must be valid R
148
+ names, for reference see the R function make.names.
149
+ batch: If true batch effects are considered for the differential expression
150
+ analysis. Batches must be specified in the design in a "Batch" column.
151
+ trend: If true an intensity-dependent trend is fitted to the prior variance
152
+ during calculation of the moderated t-statistics, refer to limma.eBayes for
153
+ details.
154
+
155
+ Returns:
156
+ A dataframe contain the following columns: "ANOVA p-value", and
157
+ "ANOVA adjusted p-value".
158
+ """
159
+ install_limma_if_missing()
160
+ rscript_path = _find_rscript_paths()["limma.R"]
161
+ robjects.r["source"](rscript_path)
162
+ R_limma_anova = robjects.globalenv[".limma_anova"]
163
+
164
+ column_mapping = {
165
+ "P.Value": "ANOVA p-value",
166
+ "adj.P.Val": "ANOVA adjusted p-value",
167
+ }
168
+ columns_to_keep = column_mapping.keys()
169
+
170
+ # `R_limma_anova` expects that the sample order in table and design are equal
171
+ table = table[design["Sample"]]
172
+
173
+ with localconverter(robjects.default_converter + pandas2ri.converter):
174
+ limma_result = R_limma_anova(table, design, batch, trend)
175
+ limma_result = limma_result[columns_to_keep].rename(columns=column_mapping)
176
+ limma_result.index = table.index
177
+ return limma_result
178
+
179
+
115
180
  def _find_rscript_paths() -> dict[str, str]:
116
181
  """Returns a mapping for filepaths from the msreport.rinterface.rscripts folder.
117
182
 
@@ -76,29 +76,90 @@ library(limma)
76
76
  }
77
77
 
78
78
 
79
- #' Performs differential expression analysis between two groups
79
+ #' Performs differential expression of two groups using limma
80
80
  #'
81
81
  #' @param data_frame A matrix-like data object containing log-expression values
82
82
  #' for a series of samples.
83
- #' @param groups: A list that contains a group name for each column. List entries must
84
- #' be equal to 'group1' or 'group2'.
85
- #' @param group1: Experimental group 1
86
- #' @param group2: Experimental group 2, used as the coefficient
87
- #' @param trend Logical, if true an intensity-dependent trend is fitted to the
88
- #' prior variance during calculation of the moderated t-statistics, refer to
89
- #' limma.eBayes for details.
90
- #' @return Returns a dataframe which contains the results of the differential expression
91
- #' analysis, generated by the topTable() function. Dataframe columns are "logFC",
92
- #' "AveExpr", "t", "P.Value", "adj.P.Val", and "B".
93
- .two_group_limma <- function(data_frame, column_groups, group1, group2, trend) {
94
- design <- model.matrix(~factor(column_groups, levels=c(group1, group2)))
95
- colnames(design) <- c(group1, group2)
83
+ #' @param column_groups A list or vector that contains a group name for each
84
+ #' column. List entries must be equal to 'group1' or 'group2'.
85
+ #' @param group1 Experimental group 1 (reference).
86
+ #' @param group2 Experimental group 2, used as the coefficient.
87
+ #' @param trend Logical. If TRUE, an intensity-dependent trend is fitted to the
88
+ #' prior variance. See \code{limma::eBayes} for details.
89
+ #' @param batch_groups A vector or factor specifying the batch for each column.
90
+ #' If empty, no batch correction is performed.
91
+ #' @return Returns a dataframe which contains the results of the differential
92
+ #' expression analysis, generated by the topTable() function. Dataframe
93
+ #' columns are "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", and "B".
94
+ .two_group_limma <- function(
95
+ data_frame,
96
+ column_groups,
97
+ group1,
98
+ group2,
99
+ trend,
100
+ batch_groups = NULL) {
101
+ group_factor <- factor(column_groups, levels = c(group1, group2))
96
102
 
103
+ if (length(batch_groups) > 0) {
104
+ batch_factor <- factor(batch_groups)
105
+ design <- model.matrix(~ batch_factor + group_factor)
106
+ target_coef <- paste0("group_factor", group2)
107
+ } else {
108
+ design <- model.matrix(~ group_factor)
109
+ colnames(design) <- c(group1, group2)
110
+ target_coef <- group2
111
+ }
97
112
  fit_lm <- lmFit(data_frame, design)
98
113
  fit_ebayes <- eBayes(fit_lm, trend = trend)
99
- limma_results <- topTable(fit_ebayes, number = Inf, coef = group2,
100
- adjust = "BH", sort.by = "none")
101
- limma_results <- cbind(rownames(limma_results), limma_results)
102
- colnames(limma_results)[1] <- "id"
114
+ limma_results <- topTable(
115
+ fit_ebayes,
116
+ number = Inf,
117
+ coef = target_coef,
118
+ adjust = "BH",
119
+ sort.by = "none"
120
+ )
121
+ limma_results <- cbind(id = rownames(limma_results), limma_results)
103
122
  return(limma_results)
104
123
  }
124
+
125
+
126
+ #' Performs a one-way moderated ANOVA for multiple groups using limma
127
+ #'
128
+ #' @param data_frame A matrix-like data object containing log-expression values
129
+ #' for a series of samples.
130
+ #' @param experimental_design A matrix-like data object describing the
131
+ #' experimental design of the data_frame. Must contain a column "Experiment".
132
+ #' @param batch Logical, if true batch effects are considered for the
133
+ #' differential expression analysis. Batches must be specified in the
134
+ #' experimental_design in a "Batch" column.
135
+ #' @param trend Logical, if true an intensity-dependent trend is fitted to the
136
+ #' prior variance (refer to limma::eBayes).
137
+ #' @return A dataframe containing the results of the ANOVA-style F-test.
138
+ .limma_anova <- function(
139
+ data_frame,
140
+ experimental_design,
141
+ batch = FALSE,
142
+ trend = FALSE) {
143
+ group <- as.factor(experimental_design[["Experiment"]])
144
+
145
+ if (batch) {
146
+ batch_factor <- factor(experimental_design[["Batch"]])
147
+ design <- model.matrix(~ group + batch_factor)
148
+ } else {
149
+ design <- model.matrix(~ group)
150
+ }
151
+
152
+ fit <- lmFit(data_frame, design)
153
+ fit <- eBayes(fit, trend = trend)
154
+ group_indices <- grep("^group", colnames(design))
155
+
156
+ anova_results <- topTable(
157
+ fit,
158
+ coef = group_indices,
159
+ number = Inf,
160
+ adjust.method = "BH",
161
+ sort.by = "none"
162
+ )
163
+
164
+ return(anova_results)
165
+ }
@@ -1,154 +1,153 @@
1
- Metadata-Version: 2.4
2
- Name: msreport
3
- Version: 0.0.32
4
- Summary: Post processing and analysis of quantitative proteomics data
5
- Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
6
- License-Expression: Apache-2.0
7
- Project-URL: homepage, https://github.com/hollenstein/msreport
8
- Project-URL: documentation, https://hollenstein.github.io/msreport/
9
- Project-URL: changelog, https://github.com/hollenstein/msreport/blob/main/CHANGELOG.md
10
- Keywords: mass spectrometry,proteomics,post processing,data analysis
11
- Classifier: Development Status :: 4 - Beta
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.11
14
- Classifier: Programming Language :: Python :: 3.12
15
- Classifier: Programming Language :: Python :: 3.13
16
- Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
17
- Requires-Python: >=3.10
18
- Description-Content-Type: text/markdown
19
- License-File: LICENSE.txt
20
- Requires-Dist: adjustText<1.0.0,>=0.7.0
21
- Requires-Dist: matplotlib>=3.5.2
22
- Requires-Dist: numpy>=1.21.5
23
- Requires-Dist: pandas>=1.4.4
24
- Requires-Dist: profasta>=0.0.4
25
- Requires-Dist: pyteomics>=4.6.0
26
- Requires-Dist: pyyaml>=6.0.0
27
- Requires-Dist: scikit-learn>=1.0.0
28
- Requires-Dist: scipy>=1.9.1
29
- Requires-Dist: seaborn>=0.12.0
30
- Requires-Dist: statsmodels>=0.13.2
31
- Requires-Dist: typing_extensions>=4
32
- Provides-Extra: r
33
- Requires-Dist: rpy2<3.5.13,>=3.5.3; extra == "r"
34
- Provides-Extra: dev
35
- Requires-Dist: mypy>=1.15.0; extra == "dev"
36
- Requires-Dist: pytest>=8.3.5; extra == "dev"
37
- Provides-Extra: docs
38
- Requires-Dist: mkdocs-awesome-nav>=3.1.2; extra == "docs"
39
- Requires-Dist: mkdocs-macros-plugin>=1.3.7; extra == "docs"
40
- Requires-Dist: mkdocs-material>=9.6.15; extra == "docs"
41
- Requires-Dist: mkdocs-roamlinks-plugin>=0.3.2; extra == "docs"
42
- Requires-Dist: mkdocstrings-python>=1.16.12; extra == "docs"
43
- Requires-Dist: ruff>=0.12.2; extra == "docs"
44
- Provides-Extra: test
45
- Requires-Dist: pytest>=8.3.5; extra == "test"
46
- Dynamic: license-file
47
-
48
- # MsReport
49
-
50
- [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
51
- [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15309090.svg)](https://doi.org/10.5281/zenodo.15309090)
52
- ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fhollenstein%2Fmsreport%2Fmain%2Fpyproject.toml)
53
- [![Run tests](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml/badge.svg)](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml)
54
-
55
- **MsReport** is a Python library for post-processing quantitative proteomics data from
56
- bottom-up mass spectrometry experiments.
57
-
58
- ## Table of Contents
59
-
60
- - [What is MsReport?](#what-is-msreport)
61
- - [Key features of MsReport](#key-features-of-msreport)
62
- - [Installation](#installation)
63
- - [Installation when using Anaconda](#installation-when-using-anaconda)
64
- - [Additional requirements](#additional-requirements)
65
- - [Optional Dependencies](#optional-dependencies)
66
- - [Development status](#development-status)
67
- - [How to cite](#how-to-cite)
68
-
69
- ## What is MsReport?
70
-
71
- MsReport is a Python library designed to simplify the post-processing and analysis of quantitative proteomics data from bottom-up mass spectrometry experiments. It provides a high-level, abstraction-focused API for efficient and standardized workflows. The modular design of the library provides the flexibility to meet project specific data processing needs and customize workflows as required.
72
-
73
- The library supports importing protein and peptide-level quantification results from MaxQuant, FragPipe, and Spectronaut, as well as post-translational modification (PTM) data from MaxQuant and FragPipe. MsReport provides tools for data annotation, normalization and transformation, statistical testing, and data visualization.
74
-
75
- The [documentation](https://hollenstein.github.io/msreport/) provides an overview of the library's public API.
76
-
77
- ### Key features of MsReport
78
-
79
- #### Data Import and Standardization
80
-
81
- The `reader` module provides software-specific reader classes for importing data from MaxQuant, FragPipe, and Spectronaut that enable the import of protein, peptide and ion tables. During the import process, these classes transform tables column names and table values into a standardized format to ensure that the rest of the library can operate in a tool-agnostic manner.
82
-
83
- #### Data management
84
-
85
- The `qtable` module provides a structured approach to managing quantitative data through its central `Qtable` class. This class combines quantitative data with an experimental design table that defines the relationship between samples and experimental conditions. The quantitative data is stored in a wide format, where each sample's measurements are stored in separate columns. The `Qtable` class serves as the foundation for data analysis workflows in MsReport, providing the standardized data structure used by the `analyze`, `plot`, and `export` modules.
86
-
87
- #### Data processing and analysis
88
-
89
- The `analyze` module provides tools for post-processing of mass spectrometry data generated by software such as MaxQuant, FragPipe, or Spectronaut. It includes functions for filtering, normalization, imputation of missing values, and statistical testing. The library integrates with the R package LIMMA to enable differential expression analysis.
90
-
91
- > [!NOTE]
92
- > In order to use the R integration you need to install msreport with optional dependencies, see [Optional Dependencies](#optional-dependencies) for more information.
93
-
94
- #### Data visualization
95
-
96
- The `plot` module supports the generation of visualizations for quality control and data analysis. It includes functions for creating various plots, such as intensity and ratio distributions, heatmaps, volcano plots, and PCA plots.
97
-
98
- #### Data export
99
-
100
- Finally, the `export` module enables the conversion and export into formats compatible with external tools. This includes generating input files for [Amica](https://bioapps.maxperutzlabs.ac.at/app/amica) and exporting tables for easier integration with Perseus.
101
-
102
- ## Installation
103
-
104
- If you do not already have a Python installation, we recommend installing the [Anaconda distribution](https://www.anaconda.com/download) or [Miniconda](https://docs.anaconda.com/free/miniconda/index.html) distribution from Continuum Analytics, which already contains a large number of popular Python packages for Data Science. Alternatively, you can also get Python from the [Python homepage](https://www.python.org/downloads/windows). Note that MsReport requires Python version 3.10 or higher.
105
-
106
- The following command will install MsReport and its dependencies by using a wheel file.
107
-
108
- ```shell
109
- pip install msreport
110
- ```
111
-
112
- To uninstall the MsReport library use:
113
-
114
- ```shell
115
- pip uninstall msreport
116
- ```
117
-
118
- ### Installation when using Anaconda
119
-
120
- To install the MsReport library using Anaconda, you need to either activate a custom conda environment or install it into the default base environment. Open the Anaconda Navigator, activate the desired conda environment or use the base environment, and then open a command line by running the "CMD.exe" application. Finally, use the `pip install` command as before.
121
-
122
- ### Optional Dependencies
123
-
124
- #### R Integration
125
-
126
- MsReport provides an interface to the R package LIMMA for differential expression analysis. To use this functionality, you need:
127
-
128
- - A local installation of **R (version 4.0 or higher)**.
129
- - The system environment variable R_HOME set to the R home directory.
130
- - To install msreport with the optional dependencies for R integration.
131
-
132
- ```shell
133
- pip install msreport[R]
134
- ```
135
-
136
- #### Setting the R_HOME environment variable
137
-
138
- On Windows, you may need to restart your computer after modifying the system environment variables for the changes to take effect. To find the R home directory, you can run the following command in R:
139
-
140
- ```R
141
- normalizePath(R.home("home"))
142
- ```
143
-
144
- For example, the R home directory might look like this on Windows: `C:\Program Files\R\R-4.2.1`
145
-
146
- ## Development status
147
-
148
- MsReport is a stable and reliable library that has been used on a daily basis for over two years in the Mass Spectrometry Facility at the Max Perutz Labs and the Mass Spectrometry Facility of IMP/IMBA/GMI. While the current interface of MsReport is stable, the library is still under active development, with new features being added regularly. Please note that a major rewrite is planned, which may introduce changes to the API in the future.
149
-
150
- ## How to cite
151
-
152
- If you use MsReport for your research or publications, please include the following citation and consider giving the project a star on GitHub.
153
-
154
- > Hollenstein, D. M., & Hartl, M. (2025). hollenstein/msreport: v0.0.29 (0.0.29). Zenodo. https://doi.org/10.5281/zenodo.15309090
1
+ Metadata-Version: 2.4
2
+ Name: msreport
3
+ Version: 0.0.33
4
+ Summary: Post processing and analysis of quantitative proteomics data
5
+ Project-URL: homepage, https://github.com/hollenstein/msreport
6
+ Project-URL: documentation, https://hollenstein.github.io/msreport/
7
+ Project-URL: changelog, https://github.com/hollenstein/msreport/blob/main/CHANGELOG.md
8
+ Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
9
+ License-Expression: Apache-2.0
10
+ License-File: LICENSE.txt
11
+ Keywords: data analysis,mass spectrometry,post processing,proteomics
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
17
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
+ Requires-Python: >=3.11
19
+ Requires-Dist: adjusttext<1.0.0,>=0.7.0
20
+ Requires-Dist: matplotlib>=3.8.0
21
+ Requires-Dist: numpy>=1.26.0
22
+ Requires-Dist: pandas<3.0.0,>=2.1.0
23
+ Requires-Dist: profasta>=0.0.4
24
+ Requires-Dist: pyteomics>=4.7.0
25
+ Requires-Dist: pyyaml>=6.0.1
26
+ Requires-Dist: scikit-learn>=1.2.0
27
+ Requires-Dist: scipy>=1.16.0
28
+ Requires-Dist: seaborn>=0.13.0
29
+ Requires-Dist: statsmodels>=0.14.0
30
+ Requires-Dist: typing-extensions>=4.4.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: mypy>=1.15.0; extra == 'dev'
33
+ Requires-Dist: pytest>=8.3.5; extra == 'dev'
34
+ Provides-Extra: docs
35
+ Requires-Dist: mkdocs-awesome-nav>=3.1.2; extra == 'docs'
36
+ Requires-Dist: mkdocs-macros-plugin>=1.3.7; extra == 'docs'
37
+ Requires-Dist: mkdocs-material>=9.6.15; extra == 'docs'
38
+ Requires-Dist: mkdocs-roamlinks-plugin>=0.3.2; extra == 'docs'
39
+ Requires-Dist: mkdocstrings-python>=1.16.12; extra == 'docs'
40
+ Requires-Dist: ruff>=0.12.2; extra == 'docs'
41
+ Provides-Extra: r
42
+ Requires-Dist: rpy2<3.5.13,>=3.5.3; extra == 'r'
43
+ Provides-Extra: test
44
+ Requires-Dist: pytest>=8.3.5; extra == 'test'
45
+ Description-Content-Type: text/markdown
46
+
47
+ # MsReport
48
+
49
+ [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
50
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15309090.svg)](https://doi.org/10.5281/zenodo.15309090)
51
+ ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fhollenstein%2Fmsreport%2Fmain%2Fpyproject.toml)
52
+ [![Run tests](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml/badge.svg)](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml)
53
+
54
+ **MsReport** is a Python library for post-processing quantitative proteomics data from
55
+ bottom-up mass spectrometry experiments.
56
+
57
+ ## Table of Contents
58
+
59
+ - [What is MsReport?](#what-is-msreport)
60
+ - [Key features of MsReport](#key-features-of-msreport)
61
+ - [Installation](#installation)
62
+ - [Installation when using Anaconda](#installation-when-using-anaconda)
63
+ - [Additional requirements](#additional-requirements)
64
+ - [Optional Dependencies](#optional-dependencies)
65
+ - [Development status](#development-status)
66
+ - [How to cite](#how-to-cite)
67
+
68
+ ## What is MsReport?
69
+
70
+ MsReport is a Python library designed to simplify the post-processing and analysis of quantitative proteomics data from bottom-up mass spectrometry experiments. It provides a high-level, abstraction-focused API for efficient and standardized workflows. The modular design of the library provides the flexibility to meet project specific data processing needs and customize workflows as required.
71
+
72
+ The library supports importing protein and peptide-level quantification results from MaxQuant, FragPipe, and Spectronaut, as well as post-translational modification (PTM) data from MaxQuant and FragPipe. MsReport provides tools for data annotation, normalization and transformation, statistical testing, and data visualization.
73
+
74
+ The [documentation](https://hollenstein.github.io/msreport/) provides an overview of the library's public API.
75
+
76
+ ### Key features of MsReport
77
+
78
+ #### Data Import and Standardization
79
+
80
+ The `reader` module provides software-specific reader classes for importing data from MaxQuant, FragPipe, and Spectronaut that enable the import of protein, peptide and ion tables. During the import process, these classes transform tables column names and table values into a standardized format to ensure that the rest of the library can operate in a tool-agnostic manner.
81
+
82
+ #### Data management
83
+
84
+ The `qtable` module provides a structured approach to managing quantitative data through its central `Qtable` class. This class combines quantitative data with an experimental design table that defines the relationship between samples and experimental conditions. The quantitative data is stored in a wide format, where each sample's measurements are stored in separate columns. The `Qtable` class serves as the foundation for data analysis workflows in MsReport, providing the standardized data structure used by the `analyze`, `plot`, and `export` modules.
85
+
86
+ #### Data processing and analysis
87
+
88
+ The `analyze` module provides tools for post-processing of mass spectrometry data generated by software such as MaxQuant, FragPipe, or Spectronaut. It includes functions for filtering, normalization, imputation of missing values, and statistical testing. The library integrates with the R package LIMMA to enable differential expression analysis.
89
+
90
+ > [!NOTE]
91
+ > In order to use the R integration you need to install msreport with optional dependencies, see [Optional Dependencies](#optional-dependencies) for more information.
92
+
93
+ #### Data visualization
94
+
95
+ The `plot` module supports the generation of visualizations for quality control and data analysis. It includes functions for creating various plots, such as intensity and ratio distributions, heatmaps, volcano plots, and PCA plots.
96
+
97
+ #### Data export
98
+
99
+ Finally, the `export` module enables the conversion and export into formats compatible with external tools. This includes generating input files for [Amica](https://bioapps.maxperutzlabs.ac.at/app/amica) and exporting tables for easier integration with Perseus.
100
+
101
+ ## Installation
102
+
103
+ If you do not already have a Python installation, we recommend installing the [Anaconda distribution](https://www.anaconda.com/download) or [Miniconda](https://docs.anaconda.com/free/miniconda/index.html) distribution from Continuum Analytics, which already contains a large number of popular Python packages for Data Science. Alternatively, you can also get Python from the [Python homepage](https://www.python.org/downloads/windows). Note that MsReport requires Python version 3.11 or higher.
104
+
105
+ The following command will install MsReport and its dependencies by using a wheel file.
106
+
107
+ ```shell
108
+ pip install msreport
109
+ ```
110
+
111
+ To uninstall the MsReport library use:
112
+
113
+ ```shell
114
+ pip uninstall msreport
115
+ ```
116
+
117
+ ### Installation when using Anaconda
118
+
119
+ To install the MsReport library using Anaconda, you need to either activate a custom conda environment or install it into the default base environment. Open the Anaconda Navigator, activate the desired conda environment or use the base environment, and then open a command line by running the "CMD.exe" application. Finally, use the `pip install` command as before.
120
+
121
+ ### Optional Dependencies
122
+
123
+ #### R Integration
124
+
125
+ MsReport provides an interface to the R package LIMMA for differential expression analysis. To use this functionality, you need:
126
+
127
+ - A local installation of **R (version 4.0 or higher)**.
128
+ - The system environment variable R_HOME set to the R home directory.
129
+ - To install msreport with the optional dependencies for R integration.
130
+
131
+ ```shell
132
+ pip install msreport[R]
133
+ ```
134
+
135
+ #### Setting the R_HOME environment variable
136
+
137
+ On Windows, you may need to restart your computer after modifying the system environment variables for the changes to take effect. To find the R home directory, you can run the following command in R:
138
+
139
+ ```R
140
+ normalizePath(R.home("home"))
141
+ ```
142
+
143
+ For example, the R home directory might look like this on Windows: `C:\Program Files\R\R-4.2.1`
144
+
145
+ ## Development status
146
+
147
+ MsReport is a stable and reliable library that has been used on a daily basis for over two years in the Mass Spectrometry Facility at the Max Perutz Labs and the Mass Spectrometry Facility of IMP/IMBA/GMI. While the current interface of MsReport is stable, the library is still under active development, with new features being added regularly. Please note that a major rewrite is planned, which may introduce changes to the API in the future.
148
+
149
+ ## How to cite
150
+
151
+ If you use MsReport for your research or publications, please include the following citation and consider giving the project a star on GitHub.
152
+
153
+ > Hollenstein, D. M., & Hartl, M. (2025). hollenstein/msreport: v0.0.29 (0.0.29). Zenodo. https://doi.org/10.5281/zenodo.15309090
@@ -1,5 +1,5 @@
1
- msreport/__init__.py,sha256=hmq4---v9oHxQm9gidnxGryrWB8HqPfMPHaPryBS_Oc,339
2
- msreport/analyze.py,sha256=T6ORhBYP3Qnil0r7qF5CkwS2KHUsedpU5P-0paqUmaA,33838
1
+ msreport/__init__.py,sha256=NwOJ59aWwUhcX8FdYu4ta66Rg2eM1u21JYG4E6C6F0k,493
2
+ msreport/analyze.py,sha256=UX12dPiAUD5BkmEwGaJI417BpMGfyidmu7SYfW1hSns,50454
3
3
  msreport/errors.py,sha256=X9yFxMiIOCWQdxuqBGr8L7O3vRV2KElXdX1uHbFcZMk,421
4
4
  msreport/export.py,sha256=wXQfaVd5UHlGKyKdrt2UWbhzNf-VyJy2Up5qfrPzO2M,20229
5
5
  msreport/fasta.py,sha256=hPz4xlkjeTV-2YCrtWMsQQJSkJSmH1ZzNZBxHI89Nqk,1489
@@ -7,8 +7,8 @@ msreport/impute.py,sha256=q21cFKnpENE4GHUPz-R5FipkvagWjX4fa31qeb8uaxc,10782
7
7
  msreport/isobar.py,sha256=nh2Wem1wheqJ6wAJYm8be9FuK21c7T1k7nectJjPw7o,6729
8
8
  msreport/normalize.py,sha256=73n344jBQ9u-Ube_wOxF5Svi2ltKMnBKaw8M36hEaQM,23441
9
9
  msreport/peptidoform.py,sha256=mJhqoolFL6ZzwnmQkWhgJn8zIBoxv_GdYVSb-6gw37g,12615
10
- msreport/qtable.py,sha256=RhfGdij7cIVO5JiUC-xSQkd7zV-Q8KmC94daA9JotHc,28203
11
- msreport/reader.py,sha256=02cst1NRyBoeBaspfM67BM_KsTR9pt1NZQX49J_Wev0,131276
10
+ msreport/qtable.py,sha256=wwronradSz0bkO-T6u1HiRZ6ISglUf6Mmay_TaQ7B2U,28290
11
+ msreport/reader.py,sha256=FxIEeM_U2VrVIxIFHYkS5nvtd_68QFeu3AuFqlU6Lyc,131548
12
12
  msreport/aggregate/__init__.py,sha256=Y5HnN9C2PRjWfq4epJAoNqyp4Pv6WQfguAcSYKIhRuw,609
13
13
  msreport/aggregate/condense.py,sha256=fspY8osQfjzzehw3v4Up2QSihNiixhQpAiCiwXLIpCQ,6301
14
14
  msreport/aggregate/pivot.py,sha256=Myk9QhOmQWge7MvGlFYwdD4u7pdqYaAaFZ0uxZH4d28,5491
@@ -21,18 +21,18 @@ msreport/helper/temp.py,sha256=jNulgDATf9sKXEFWMXAhjflciOZPAqlxg_7QZS7IkW8,3736
21
21
  msreport/plot/__init__.py,sha256=p-oLxmZIvfC--xkjB0ka321xddW-lst19PmokJq9lTk,1457
22
22
  msreport/plot/_partial_plots.py,sha256=tqZTSXEPuruMgVakaGR2tUQl5OrHgo2cROJ0S4cqkR0,5598
23
23
  msreport/plot/comparison.py,sha256=Y2KOuakj-TxqdT2XNt7lnVZwimKSszvFQI-K9Pm80k8,18770
24
- msreport/plot/distribution.py,sha256=QNFL5vG9p-vqhwEk5WcCSXa2B8u5QgySZlAQIPys0-0,10248
25
- msreport/plot/multivariate.py,sha256=v79gcb-8s5bZVpaJn13MOmqsNA0ZvrV25JlXmHmp4WA,14046
24
+ msreport/plot/distribution.py,sha256=NokEY9vzvvTV6pycyp0WDU_N2ysnoSE3VoKuT4yhYmE,10223
25
+ msreport/plot/multivariate.py,sha256=lK_bsj5k6ijreGwnwbszUssDfzyVyvT_k0gy2q5F9xk,14020
26
26
  msreport/plot/quality.py,sha256=ZZKMkghmVESjA49Qg-iukVFBoDIgI2iWLlFa7vJWX7M,15869
27
27
  msreport/plot/style.py,sha256=67jWf4uA1ub9RJDu4xhuSoXAW0lbLj6SMP4QXQO76Pc,10591
28
+ msreport/plot/style_sheets/_all_relevant_styles.md,sha256=Ba-CZ3soMzuy9nV57n-HTv6IIzGk_NTsSwaasCaM1jA,32770
28
29
  msreport/plot/style_sheets/msreport-notebook.mplstyle,sha256=SPYO_7vYT8Ha7tQ0KCTLtykiRQ13-_igAm7kyvsZj1I,1266
29
30
  msreport/plot/style_sheets/seaborn-whitegrid.mplstyle,sha256=eC8Zboy8R7ybBwbHPKvKbMIHACystN6X6I0lqm7B80U,833
30
31
  msreport/rinterface/__init__.py,sha256=Zs6STvbDqaVZVPRM6iU0kKjq0TWz_2p2ChvNAveRdTA,616
31
- msreport/rinterface/limma.py,sha256=P-Fs8HARSXz60rO_vLc--of1hafk_IgGgPaNXnS_aKg,5424
32
+ msreport/rinterface/limma.py,sha256=K5c3-5E_FawyTW02OOn7dIwWfpBgxDfiJPel8kAtwU0,8040
32
33
  msreport/rinterface/rinstaller.py,sha256=AGs6NFMSwTLrzrIJz1E5BE5jFUz8eQBHlpM_MWVChzA,1370
33
- msreport/rinterface/rscripts/limma.R,sha256=gr_yjMm_YoG45irDhWOo6gkRQSTwj_7uU_p3NBRHPm8,4331
34
- msreport-0.0.32.dist-info/licenses/LICENSE.txt,sha256=Pd-b5cKP4n2tFDpdx27qJSIq0d1ok0oEcGTlbtL6QMU,11560
35
- msreport-0.0.32.dist-info/METADATA,sha256=_OI-LkqJoperzDBo6KjAir7Xq6jANqyjpqyxUfu9T-4,8998
36
- msreport-0.0.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
- msreport-0.0.32.dist-info/top_level.txt,sha256=Drl8mCckJHFIw-Ovh5AnyjKnqvLJltDOBUr1JAcHAlI,9
38
- msreport-0.0.32.dist-info/RECORD,,
34
+ msreport/rinterface/rscripts/limma.R,sha256=ElL7wyIUNxMjIiRJjPCKKK3bZh8H7YuSelk_WjHusqE,6128
35
+ msreport-0.0.33.dist-info/METADATA,sha256=wTPtgrlNorqX_805BaMqfib2HO-2Vjm42fllfvnXSWw,8833
36
+ msreport-0.0.33.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
37
+ msreport-0.0.33.dist-info/licenses/LICENSE.txt,sha256=Pd-b5cKP4n2tFDpdx27qJSIq0d1ok0oEcGTlbtL6QMU,11560
38
+ msreport-0.0.33.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1 +0,0 @@
1
- msreport