PyPI - msreport - Versions diffs - 0.0.32__py3-none-any.whl → 0.0.33__py3-none-any.whl - Mend

msreport 0.0.32py3-none-any.whl → 0.0.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

msreport/__init__.py +16 -7
msreport/analyze.py +414 -39
msreport/plot/distribution.py +3 -3
msreport/plot/multivariate.py +0 -1
msreport/plot/style_sheets/_all_relevant_styles.md +594 -0
msreport/qtable.py +3 -2
msreport/reader.py +9 -1
msreport/rinterface/limma.py +68 -3
msreport/rinterface/rscripts/limma.R +79 -18
{msreport-0.0.32.dist-info → msreport-0.0.33.dist-info}/METADATA +153 -154
{msreport-0.0.32.dist-info → msreport-0.0.33.dist-info}/RECORD +13 -13
{msreport-0.0.32.dist-info → msreport-0.0.33.dist-info}/WHEEL +1 -2
msreport-0.0.32.dist-info/top_level.txt +0 -1
{msreport-0.0.32.dist-info → msreport-0.0.33.dist-info}/licenses/LICENSE.txt +0 -0

msreport/__init__.py CHANGED Viewed

@@ -1,11 +1,20 @@
-import msreport.analyze
-import msreport.export
-import msreport.impute
-import msreport.normalize
-import msreport.plot
-import msreport.reader
+from msreport import analyze, export, impute, normalize, plot, reader
 from msreport.fasta import import_protein_database
 from msreport.qtable import Qtable
 from msreport.reader import FragPipeReader, MaxQuantReader, SpectronautReader
-__version__ = "0.0.32"
+__version__ = "0.0.33"
+__all__ = [
+    "analyze",
+    "export",
+    "impute",
+    "normalize",
+    "plot",
+    "reader",
+    "import_protein_database",
+    "Qtable",
+    "FragPipeReader",
+    "MaxQuantReader",
+    "SpectronautReader",
+]

msreport/analyze.py CHANGED Viewed

@@ -10,6 +10,8 @@ from typing import Iterable, Optional, Protocol, Sequence
 import numpy as np
 import pandas as pd
+import scipy.stats
+import statsmodels.stats.multitest
 from typing_extensions import Self
 import msreport.normalize
@@ -533,7 +535,7 @@ def calculate_multi_group_comparison(
         qtable.add_expression_features(comparison_table)
-def two_group_comparison(
+def calculate_two_group_comparison(
     qtable: Qtable, experiment_pair: Iterable[str], exclude_invalid: bool = True
 ) -> None:
     """Calculates comparison values for two experiments.
@@ -560,19 +562,23 @@ def calculate_multi_group_limma(
     exclude_invalid: bool = True,
     batch: bool = False,
     limma_trend: bool = True,
+    min_replicates: int = 2,
 ) -> None:
     """Uses limma to perform a differential expression analysis of multiple experiments.
     For each experiment pair specified in 'experiment_pairs' the following new columns
     are added to the qtable:
-    - "P-value Experiment_1 vs Experiment_2"
-    - "Adjusted p-value Experiment_1 vs Experiment_2"
-    - "Average expression Experiment_1 vs Experiment_2"
-    - "Ratio [log2] Experiment_1 vs Experiment_2"
-    Requires that expression columns are set, and expression values are log2 transformed
-    All rows with missing values are ignored, impute missing values to allow
-    differential expression analysis of all rows.
+    - "P-value 'Experiment_1' vs 'Experiment_2'"
+    - "Adjusted p-value 'Experiment_1' vs 'Experiment_2'"
+    - "Average expression 'Experiment_1' vs 'Experiment_2'"
+    - "Ratio [log2] 'Experiment_1' vs 'Experiment_2'"
+    Requires that the Qtable has defined expression columns, and expression values are
+    log2 transformed. For each experiment and row, if the number of non-missing values
+    is below 'min_replicates', all values for that experiment and row are set to NaN for
+    the LIMMA calculation. As a result, p-values are only calculated for rows where both
+    experiments have at least 'min_replicates' non-missing values. Adjusted p-values are
+    calculated using the Benjamini-Hochberg (BH) method.
     Args:
         qtable: Qtable instance that contains expression values for differential
@@ -588,8 +594,12 @@ def calculate_multi_group_limma(
         limma_trend: If true, an intensity-dependent trend is fitted to the prior
             variance during calculation of the moderated t-statistics, refer to
             limma.eBayes for details; default True.
+        min_replicates: Minimum number of non-missing values required in the row of any
+            experiment to be included in the analysis; default 2.
     Raises:
+        OptionalDependencyError: If the R interface is not available, which is required
+            for using LIMMA.
         ValueError: If 'experiment_pairs' contains invalid entries. Each experiment pair
             must have exactly two entries and the two entries must not be the same. All
             experiments must be present in qtable.design. No duplicate experiment pairs
@@ -620,12 +630,27 @@ def calculate_multi_group_limma(
     table.index = table.index.astype(str)  # It appears that a string is required for R
     comparison_tag = " vs "
+    # Apply min_replicates filter
+    for experiment in qtable.get_experiments():
+        samples = qtable.get_samples(experiment)
+        counts = table[samples].notna().sum(axis=1)
+        table.loc[counts < min_replicates, samples] = np.nan
     if exclude_invalid:
-        valid = qtable["Valid"]
+        mask = qtable["Valid"].copy().to_numpy() & (table.notna().sum(axis=1) > 0)
     else:
-        valid = np.full(table.shape[0], True)
-    not_nan = table.isna().sum(axis=1) == 0
-    mask = np.all([valid, not_nan], axis=0)
+        mask = table.notna().sum(axis=1) > 0
+    # At least one row with one condition with two values are required for LIMMA
+    valid_exp_rows = []
+    for exp in qtable.get_experiments():
+        samples = qtable.get_samples(exp)
+        valid_exp_rows.append(table.loc[mask, samples].notna().sum(axis=1) >= 2)
+    if np.array(valid_exp_rows).any(axis=0).sum() == 0:
+        raise ValueError(
+            "No rows with sufficient data for differential expression analysis remain"
+            " after applying 'min_replicates' and 'exclude_invalid' filters."
+        )
     # Exchange experiment names with names that are guaranteed to be valid in R
     experiment_to_r = {}
@@ -637,7 +662,7 @@ def calculate_multi_group_limma(
     for exp1, exp2 in experiment_pairs:
         r_experiment_pairs.append(f"{experiment_to_r[exp1]}-{experiment_to_r[exp2]}")
-    design.replace({"Experiment": experiment_to_r}, inplace=True)
+    design = design.replace({"Experiment": experiment_to_r})
     # Run limma and join results for all comparison groups
     limma_results = msreport.rinterface.multi_group_limma(
@@ -647,35 +672,43 @@ def calculate_multi_group_limma(
         experiment_pair = [r_to_experiment[s] for s in r_comparison_group.split("-")]
         comparison_group = comparison_tag.join(experiment_pair)
         mapping = {col: f"{col} {comparison_group}" for col in limma_result.columns}
-        limma_result.rename(columns=mapping, inplace=True)
+        limma_result = limma_result.rename(columns=mapping)
     limma_table = pd.DataFrame(index=table.index)
     limma_table = limma_table.join(list(limma_results.values()))
-    limma_table.fillna(np.nan, inplace=True)
+    limma_table = limma_table.astype("float64")
     qtable.add_expression_features(limma_table)
     # Average expression from limma is the whole row mean, overwrite with the average
     # expression of the experiment group
     for experiment_pair in experiment_pairs:
-        two_group_comparison(qtable, experiment_pair, exclude_invalid=exclude_invalid)
+        calculate_two_group_comparison(
+            qtable, experiment_pair, exclude_invalid=exclude_invalid
+        )
 def calculate_two_group_limma(
     qtable: Qtable,
     experiment_pair: Sequence[str],
     exclude_invalid: bool = True,
+    batch: bool = False,
     limma_trend: bool = True,
+    min_replicates: int = 2,
 ) -> None:
     """Uses limma to perform a differential expression analysis of two experiments.
-    Adds new columns "P-value Experiment_1 vs Experiment_2",
-    "Adjusted p-value Experiment_1 vs Experiment_2",
-    "Average expression Experiment_1 vs Experiment_2", and
-    "Ratio [log2] Experiment_1 vs Experiment_2" to the qtable.
+    New columns that are added to the qtable:
+    - "P-value 'Experiment_1' vs 'Experiment_2'"
+    - "Adjusted p-value 'Experiment_1' vs 'Experiment_2'"
+    - "Average expression 'Experiment_1' vs 'Experiment_2'"
+    - "Ratio [log2] 'Experiment_1' vs 'Experiment_2'"
-    Requires that expression columns are set, and expression values are log2
-    transformed. All rows with missing values are ignored, impute missing values to
-    allow differential expression analysis of all rows.
+    Requires that the Qtable has defined expression columns, and expression values are
+    log2 transformed. For each experiment and row, if the number of non-missing values
+    is below 'min_replicates', all values for that experiment and row are set to NaN for
+    the LIMMA calculation. As a result, p-values are only calculated for rows where both
+    experiments have at least 'min_replicates' non-missing values. Adjusted p-values are
+    calculated using the Benjamini-Hochberg (BH) method.
     Args:
         qtable: Qtable instance that contains expression values for differential
@@ -684,56 +717,398 @@ def calculate_two_group_limma(
             experiments must be present in qtable.design
         exclude_invalid: If true, the column "Valid" is used to determine which rows are
             used for the differential expression analysis; default True.
+        batch: If true batch effects are considered for the differential expression
+            analysis. Batches must be specified in the design in a "Batch" column.
         limma_trend: If true, an intensity-dependent trend is fitted to the prior
             variances; default True.
+        min_replicates: Minimum number of non-missing values required in both
+            experiments to include a row in the analysis; default 2.
     Raises:
+        OptionalDependencyError: If the R interface is not available, which is required
+            for using LIMMA.
+        KeyError: If the "Batch" column is not present in the qtable.design when
+            'batch' is set to True.
         ValueError: If 'experiment_pair' contains invalid entries. The experiment pair
             must have exactly two entries and the two entries must not be the same. Both
             experiments must be present in qtable.design.
+        ValueError: If all values from qtable.design["Batch"] are identical when 'batch'
+            is set to True.
+        ValueError: If no rows with sufficient data for differential expression analysis
+            remain after applying 'min_replicates' and 'exclude_invalid' filters.
     """
     if not _rinterface_available:
         raise OptionalDependencyError(_rinterface_error)
     _validate_experiment_pair(qtable, experiment_pair)
+    if batch and "Batch" not in qtable.get_design():
+        raise KeyError(
+            "When using calculate_two_group_limma(batch=True) a"
+            ' "Batch" column must be present in qtable.design'
+        )
+    if batch and qtable.get_design()["Batch"].nunique() == 1:
+        raise ValueError(
+            "When using calculate_two_group_limma(batch=True), not all values from"
+            ' qtable.design["Batch"] are allowed to be identical.'
+        )
     # TODO: LIMMA function not tested #
     table = qtable.make_expression_table(samples_as_columns=True)
     comparison_tag = " vs "
-    if exclude_invalid:
-        valid = qtable["Valid"]
-    else:
-        valid = np.full(table.shape[0], True)
     samples_to_experiment = {}
-    for experiment in experiment_pair:
-        mapping = dict.fromkeys(qtable.get_samples(experiment), experiment)
-        samples_to_experiment.update(mapping)
+    for exp in experiment_pair:
+        samples_to_experiment.update(dict.fromkeys(qtable.get_samples(exp), exp))
     # Keep only samples that are present in the 'experiment_pair'
     table = table[samples_to_experiment.keys()]
     table.index = table.index.astype(str)  # It appears that a string is required for R
-    not_nan = table.isna().sum(axis=1) == 0
+    groups = [samples_to_experiment[s] for s in table.columns]
-    mask = np.all([valid, not_nan], axis=0)
-    experiments = list(samples_to_experiment.values())
+    # Apply min_replicates filter
+    for experiment in experiment_pair:
+        samples = qtable.get_samples(experiment)
+        counts = table[samples].notna().sum(axis=1)
+        table.loc[counts < min_replicates, samples] = np.nan
+    if exclude_invalid:
+        mask = qtable["Valid"].copy().to_numpy() & (table.notna().sum(axis=1) > 0)
+    else:
+        mask = table.notna().sum(axis=1) > 0
+    # At least one row with one condition with two values are required for LIMMA
+    valid_exp_rows = []
+    for exp in experiment_pair:
+        samples = qtable.get_samples(exp)
+        valid_exp_rows.append(table.loc[mask, samples].notna().sum(axis=1) >= 2)
+    if np.array(valid_exp_rows).any(axis=0).sum() == 0:
+        raise ValueError(
+            "No rows with sufficient data for differential expression analysis remain"
+            " after applying 'min_replicates' and 'exclude_invalid' filters."
+        )
+    batch_groups = None
+    if batch:
+        design_df = qtable.get_design().set_index("Sample")
+        batch_groups = [str(design_df.loc[s, "Batch"]) for s in table.columns]
+    print(table[mask])
     # Note that the order of experiments for calling limma is reversed
     limma_result = msreport.rinterface.two_group_limma(
-        table[mask], experiments, experiment_pair[1], experiment_pair[0], limma_trend
+        table[mask],
+        groups,
+        experiment_pair[1],
+        experiment_pair[0],
+        limma_trend,
+        batch_groups,
     )
     # For adding expression features to the qtable it is necessary that the
     # the limma_results have the same number of rows.
-    limma_table = pd.DataFrame(index=table.index, columns=limma_result.columns)
+    limma_table = pd.DataFrame(
+        index=table.index, columns=limma_result.columns, dtype="float64"
+    )
     limma_table[mask] = limma_result
-    limma_table.fillna(np.nan, inplace=True)
     comparison_group = comparison_tag.join(experiment_pair)
     mapping = {col: f"{col} {comparison_group}" for col in limma_table.columns}
-    limma_table.rename(columns=mapping, inplace=True)
+    limma_table = limma_table.rename(columns=mapping)
     qtable.add_expression_features(limma_table)
+def calculate_anova_limma(
+    qtable: Qtable,
+    experiments: Iterable[str] | None = None,
+    exclude_invalid: bool = True,
+    batch: bool = False,
+    limma_trend: bool = True,
+    min_replicates: int = 2,
+) -> None:
+    """Calculates one-way moderated ANOVA using LIMMA across multiple experiment groups.
+    New columns that are added to the qtable:
+    - "ANOVA p-value"
+    - "ANOVA adjusted p-value"
+    Requires that the Qtable has defined expression columns, and expression values are
+    log2 transformed. ANOVA is calculated for rows where at least two experiments meet
+    the 'min_replicates' threshold of non-missing values. For a given row, an experiment
+    group failing this threshold is treated as missing data for the calculation. At
+    least two valid experiments must remain for a p-value to be generated. Adjusted
+    p-values are calculated using the Benjamini-Hochberg (BH) method.
+    Args:
+        qtable: Qtable instance that contains expression values for the analysis.
+        experiments: A list of experiment names from qtable.design["Experiment"] to
+            include in the ANOVA. If None, all experiments in the design are used;
+            default None.
+        exclude_invalid: If true, the column "Valid" is used to determine which rows are
+            used for the ANOVA; default True.
+        batch: If true batch effects are considered for the differential expression
+            analysis. Batches must be specified in the design in a "Batch" column.
+        limma_trend: If true, an intensity-dependent trend is fitted to the prior
+            variances; default True.
+        min_replicates: Minimum number of non-missing values required per experiment to
+            include that experiment's data for a given row; default 2.
+    Raises:
+        OptionalDependencyError: If the R interface is not available, which is required
+            for using LIMMA.
+        KeyError: If the "Batch" column is not present in the qtable.design when
+            'batch' is set to True.
+        ValueError: If 'experiments' contains entries not present in qtable.design.
+        ValueError: If less than two experiments with at least 'min_replicates' are
+            are present in the qtable.design when performing the ANOVA.
+    """
+    if not _rinterface_available:
+        raise OptionalDependencyError(_rinterface_error)
+    # TODO: not tested #
+    if batch and "Batch" not in qtable.get_design():
+        raise KeyError(
+            'When using calculate_anova_limma(batch=True) a "Batch" column must be '
+            "present in qtable.design"
+        )
+    if batch and qtable.get_design()["Batch"].nunique() == 1:
+        raise ValueError(
+            "When using calculate_anova_limma(batch=True), not all values from"
+            ' qtable.design["Batch"] are allowed to be identical.'
+        )
+    if experiments is not None and any(
+        e not in qtable.design["Experiment"].unique() for e in experiments
+    ):
+        raise ValueError("Some specified experiments are not present in qtable.design.")
+    if experiments is None:
+        experiments = qtable.get_experiments()
+    valid_experiments = []
+    for experiment in experiments:
+        if len(qtable.get_samples(experiment)) >= min_replicates:
+            valid_experiments.append(experiment)
+    if len(valid_experiments) < 2:
+        raise ValueError(
+            f"At least two experiments with {min_replicates} or more replicates are "
+            "required for calculating moderated ANOVA statistics with LIMMA."
+        )
+    design = qtable.get_design()
+    design = design[design["Experiment"].isin(valid_experiments)]
+    table = qtable.make_expression_table(samples_as_columns=True)
+    table = table[design["Sample"]]
+    for experiment in valid_experiments:
+        samples = qtable.get_samples(experiment)
+        counts = table[samples].notna().sum(axis=1)
+        table.loc[counts < min_replicates, samples] = np.nan
+    if exclude_invalid:
+        mask = qtable["Valid"].to_numpy() & (table.notna().sum(axis=1) > 0)
+    else:
+        mask = table.notna().sum(axis=1) > 0
+    # At least one row with one condition with two values are required for LIMMA
+    valid_exp_rows = []
+    for exp in valid_experiments:
+        samples = qtable.get_samples(exp)
+        valid_exp_rows.append(table.loc[mask, samples].notna().sum(axis=1) >= 2)
+    if np.array(valid_exp_rows).any(axis=0).sum() == 0:
+        raise ValueError(
+            "No rows with sufficient data for ANOVA analysis remain"
+            " after applying 'min_replicates' and 'exclude_invalid' filters."
+        )
+    # Exchange experiment names with names that are guaranteed to be valid in R
+    experiment_to_r = {}
+    for i, experiment in enumerate(design["Experiment"].unique()):
+        experiment_to_r[experiment] = f".EXPERIMENT__{i:04d}"
+    design = design.replace({"Experiment": experiment_to_r})
+    limma_result = msreport.rinterface.limma.limma_anova(
+        table[mask], design, batch, limma_trend
+    )
+    # For adding expression features to the qtable it is necessary that the
+    # the limma_results have the same number of rows.
+    limma_table = pd.DataFrame(
+        index=table.index, columns=limma_result.columns, dtype="float64"
+    )
+    limma_table[mask] = limma_result
+    qtable.add_expression_features(limma_table)
+def calculate_multi_group_ttest(
+    qtable: Qtable,
+    experiment_pairs: Sequence[Iterable[str]],
+    exclude_invalid: bool = True,
+    equal_var: bool = False,
+) -> None:
+    """Calculates t-tests for multiple experiment pairs.
+    For each experiment pair specified in 'experiment_pairs' the following new columns
+    are added to the qtable:
+    - "P-value Experiment_1 vs Experiment_2"
+    - "Adjusted p-value Experiment_1 vs Experiment_2"
+    Missing values are ommitted and the ttest is calculated only for rows where both
+    experiment groups have at least two quantified values. Adjusted p-values are
+    calculated using the Benjamini-Hochberg (BH) method. Requires that expression
+    columns are set in the qtable.
+    Args:
+        qtable: Qtable instance that contains expression values for t-tests.
+        experiment_pairs: A list containing one or multiple experiment pairs for which
+            t-tests should be calculated. The specified experiments must correspond to
+            entries from qtable.design["Experiment"].
+        exclude_invalid: If true, the column "Valid" is used to determine which rows are
+            used for the t-tests; default True.
+        equal_var: If true, the two groups are assumed to have identical variances and
+            a standard independent 2 sample t-test is performed. If false, Welch's
+            t-test is performed; default False.
+    Raises:
+        ValueError: If 'experiment_pairs' contains invalid entries. Each experiment pair
+            must have exactly two entries and the two entries must not be the same. All
+            experiments must be present in qtable.design. No duplicate experiment pairs
+            are allowed.
+    """
+    _validate_experiment_pairs(qtable, experiment_pairs)
+    min_required_values = 2
+    table = qtable.make_expression_table(samples_as_columns=True, features=["Valid"])
+    comparison_tag = " vs "
+    if exclude_invalid:
+        valid = table["Valid"].to_numpy()
+    else:
+        valid = np.full(table.shape[0], True)
+    for experiment_pair in experiment_pairs:
+        group_expressions = []
+        for experiment in experiment_pair:
+            samples = qtable.get_samples(experiment)
+            group_expressions.append(table.loc[valid, samples])
+        # implement the at least two values per experiment rule here, set rows to nan
+        # where this is not the case
+        for i in range(2):
+            num_values = np.isfinite(group_expressions[i]).sum(axis=1)
+            insufficient_values = num_values < min_required_values
+            group_expressions[i].loc[insufficient_values, :] = np.nan
+        _, pvalues = scipy.stats.ttest_ind(
+            group_expressions[0],
+            group_expressions[1],
+            axis=1,
+            equal_var=equal_var,
+            nan_policy="omit",
+        )
+        finite_pvalues = pvalues[np.isfinite(pvalues)]
+        _, finite_adjusted_pvalues, _, _ = statsmodels.stats.multitest.multipletests(
+            finite_pvalues, method="fdr_bh"
+        )
+        adjusted_pvalues = pvalues.copy()
+        adjusted_pvalues[np.isfinite(pvalues)] = finite_adjusted_pvalues
+        comparison_group = comparison_tag.join(experiment_pair)
+        comparison_table = pd.DataFrame(
+            {
+                f"P-value {comparison_group}": np.full(table.shape[0], np.nan),
+                f"Adjusted p-value {comparison_group}": np.full(table.shape[0], np.nan),
+            }
+        )
+        comparison_table.loc[valid, f"P-value {comparison_group}"] = pvalues
+        comparison_table.loc[valid, f"Adjusted p-value {comparison_group}"] = (
+            adjusted_pvalues
+        )
+        qtable.add_expression_features(comparison_table)
+def calculate_anova(
+    qtable: Qtable,
+    experiments: Iterable[str] | None = None,
+    exclude_invalid: bool = True,
+    equal_var: bool = False,
+) -> None:
+    """Calculates one-way ANOVA across multiple experiment groups.
+    New columns are added to the qtable:
+    - "ANOVA P-value Experiments_Used"
+    - "ANOVA Adjusted p-value Experiments_Used"
+    Missing values are omitted, and ANOVA is calculated only for rows where all
+    experiment groups have at least two quantified values. Adjusted p-values are
+    calculated using the Benjamini-Hochberg (BH) method. Requires that expression
+    columns are set in the qtable.
+    Args:
+        qtable: Qtable instance that contains expression values for the analysis.
+        experiments: A list of experiment names from qtable.design["Experiment"] to
+            include in the ANOVA. If None, all experiments in the design are used;
+            default None.
+        exclude_invalid: If true, the column "Valid" is used to determine which rows are
+            used for the ANOVA; default True.
+        equal_var: If true, the groups are assumed to have identical variances and a
+            standard one-way ANOVA is performed. If false, Welch's ANOVA is performed;
+            default False.
+    Raises:
+        ValueError: If 'experiments' contains entries not present in qtable.design.
+    """
+    if experiments is not None and any(
+        e not in qtable.design["Experiment"].unique() for e in experiments
+    ):
+        raise ValueError("Some specified experiments are not present in qtable.design.")
+    min_required_values = 2
+    if experiments is None:
+        experiments = qtable.get_experiments()
+    table = qtable.make_expression_table(samples_as_columns=True, features=["Valid"])
+    if exclude_invalid:
+        valid = table["Valid"].to_numpy()
+    else:
+        valid = np.full(table.shape[0], True)
+    experiment_data = []
+    for experiment in experiments:
+        samples = qtable.get_samples(experiment)
+        experiment_data.append(table[samples].to_numpy())
+    experiment_array = np.array(experiment_data)
+    for replicate_data in experiment_data:
+        valid_entries = np.isfinite(replicate_data).sum(axis=1) < min_required_values
+        valid[valid_entries] = False
+    anova_pvalues = []
+    for row_data in experiment_array[:, valid, :].swapaxes(0, 1):
+        anova_input = []
+        for values in row_data:
+            anova_input.append(values[~np.isnan(values)])
+        _, pvalue = scipy.stats.f_oneway(*anova_input, equal_var=equal_var)
+        anova_pvalues.append(pvalue)
+    _, anova_adjusted_pvalues, _, _ = statsmodels.stats.multitest.multipletests(
+        anova_pvalues, method="fdr_bh"
+    )
+    pvalues = np.empty((table.shape[0],))
+    pvalues[:] = np.nan
+    pvalues[valid] = anova_pvalues
+    adjusted_pvalues = np.empty((table.shape[0],))
+    adjusted_pvalues[:] = np.nan
+    adjusted_pvalues[valid] = anova_adjusted_pvalues
+    comparison_table = pd.DataFrame(
+        {
+            "ANOVA p-value": pvalues,
+            "ANOVA adjusted p-value": adjusted_pvalues,
+        }
+    )
+    qtable.add_expression_features(comparison_table)
 def _validate_experiment_pairs(
     qtable: Qtable, exp_pairs: Iterable[Iterable[str]]
 ) -> None:

msreport/plot/distribution.py CHANGED Viewed

@@ -201,11 +201,11 @@ def experiment_ratios(
     experiment_data = pd.DataFrame(experiment_means)
     # Only consider rows with quantitative values in all experiments
-    mask = np.all([(qtable.data[f"Events {exp}"] > 0) for exp in experiments], axis=0)
+    mask = experiment_data.isna().sum(axis=1) == 0
     if exclude_invalid:
-        mask = mask & qtable["Valid"]
+        mask = mask.to_numpy() & qtable["Valid"].to_numpy()
     # Use `mask.to_numpy` to solve issue with different indices of mask and dataframe
-    experiment_data = experiment_data[mask.to_numpy()]
+    experiment_data = experiment_data[mask]
     pseudo_reference = np.nanmean(experiment_data, axis=1)
     ratio_data = experiment_data.subtract(pseudo_reference, axis=0)

msreport/plot/multivariate.py CHANGED Viewed

@@ -258,7 +258,6 @@ def expression_clustermap(
         raise ValueError("At least two samples are required to generate a clustermap.")
     data = qtable.make_expression_table(samples_as_columns=True, exclude_invalid=False)
-    data = data[samples]
     data = data.fillna(0)
     if not mean_center:  # Hide missing values in the heatmap, making them appear white

msreport 0.0.32__py3-none-any.whl → 0.0.33__py3-none-any.whl

msreport 0.0.32py3-none-any.whl → 0.0.33py3-none-any.whl