PyPI - msreport - Versions diffs - 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl - Mend

msreport 0.0.27py3-none-any.whl → 0.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

msreport/__init__.py +4 -6
msreport/aggregate/condense.py +1 -1
msreport/aggregate/pivot.py +1 -0
msreport/aggregate/summarize.py +2 -2
msreport/analyze.py +117 -36
msreport/errors.py +5 -2
msreport/export.py +16 -13
msreport/fasta.py +2 -1
msreport/helper/__init__.py +7 -7
msreport/helper/calc.py +14 -15
msreport/helper/maxlfq.py +2 -2
msreport/helper/table.py +5 -6
msreport/impute.py +4 -3
msreport/isobar.py +10 -9
msreport/normalize.py +2 -1
msreport/peptidoform.py +6 -4
msreport/plot/__init__.py +41 -0
msreport/plot/_partial_plots.py +159 -0
msreport/plot/comparison.py +490 -0
msreport/plot/distribution.py +253 -0
msreport/plot/multivariate.py +355 -0
msreport/plot/quality.py +431 -0
msreport/plot/style.py +286 -0
msreport/plot/style_sheets/msreport-notebook.mplstyle +57 -0
msreport/plot/style_sheets/seaborn-whitegrid.mplstyle +45 -0
msreport/qtable.py +109 -17
msreport/reader.py +235 -86
msreport/rinterface/__init__.py +16 -3
msreport/rinterface/limma.py +2 -1
msreport/rinterface/rinstaller.py +3 -3
msreport-0.0.29.dist-info/METADATA +136 -0
msreport-0.0.29.dist-info/RECORD +38 -0
{msreport-0.0.27.dist-info → msreport-0.0.29.dist-info}/WHEEL +1 -1
msreport/plot.py +0 -1134
msreport-0.0.27.dist-info/METADATA +0 -129
msreport-0.0.27.dist-info/RECORD +0 -30
{msreport-0.0.27.dist-info → msreport-0.0.29.dist-info}/licenses/LICENSE.txt +0 -0
{msreport-0.0.27.dist-info → msreport-0.0.29.dist-info}/top_level.txt +0 -0

msreport/__init__.py CHANGED Viewed

@@ -1,13 +1,11 @@
-from msreport.qtable import Qtable
-from msreport.reader import MaxQuantReader, FragPipeReader, SpectronautReader
-from msreport.fasta import import_protein_database
 import msreport.analyze
 import msreport.export
 import msreport.impute
 import msreport.normalize
 import msreport.plot
 import msreport.reader
+from msreport.fasta import import_protein_database
+from msreport.qtable import Qtable
+from msreport.reader import FragPipeReader, MaxQuantReader, SpectronautReader
-__version__ = "0.0.27"
+__version__ = "0.0.29"

msreport/aggregate/condense.py CHANGED Viewed

@@ -71,7 +71,7 @@ def maximum_per_column(array: np.ndarray) -> np.ndarray:
     return np.array([maximum(i) for i in array.transpose()])
-def minimum(array: np.ndarray) -> int:
+def minimum(array: np.ndarray) -> float:
     """Returns the lowest finite value from one or multiple columns."""
     array = array.flatten()
     if np.isfinite(array).any():

msreport/aggregate/pivot.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import Iterable, Union
 import pandas as pd
 import msreport.aggregate.condense as CONDENSE
 import msreport.helper

msreport/aggregate/summarize.py CHANGED Viewed

@@ -218,7 +218,7 @@ def aggregate_unique_groups(
     columns_to_aggregate: Union[str, Iterable],
     condenser: Callable,
     is_sorted: bool,
-) -> (np.ndarray, np.ndarray):
+) -> tuple[np.ndarray, np.ndarray]:
     """Aggregates column(s) by applying a condenser function to unique groups.
     The function returns two arrays containing the aggregated values and the
@@ -256,7 +256,7 @@ def aggregate_unique_groups(
 def _prepare_grouping_indices(
     table: pd.DataFrame, group_by: str, is_sorted: bool
-) -> (np.ndarray, np.ndarray, pd.DataFrame):
+) -> tuple[np.ndarray, np.ndarray, pd.DataFrame]:
     """Prepares start indices and names of unique groups from a sorted dataframe.
     Args:

msreport/analyze.py CHANGED Viewed

@@ -1,15 +1,26 @@
-""" The analyze module contains methods for analysing quantification results. """
+"""The analyze module contains methods for analysing quantification results."""
 from __future__ import annotations
-from typing import Iterable, Optional, Protocol
 import warnings
+from typing import Iterable, Optional, Protocol, Sequence
 import numpy as np
 import pandas as pd
 import msreport.normalize
-import msreport.rinterface
+from msreport.errors import OptionalDependencyError
 from msreport.helper import find_sample_columns
+from msreport.qtable import Qtable
+try:
+    import msreport.rinterface
+    _rinterface_available = True
+    _rinterface_error = ""
+except OptionalDependencyError as err:
+    _rinterface_available = False
+    _rinterface_error = str(err)
 class Transformer(Protocol):
@@ -234,7 +245,7 @@ def normalize_expression(
 def create_site_to_protein_normalizer(
     qtable: Qtable, category_column: str = "Representative protein"
-) -> msreport.normalizer.CategoricalNormalizer:
+) -> msreport.normalize.CategoricalNormalizer:
     """Creates a fitted `CategoricalNormalizer` for site-to-protein normalization.
     The `CategoricalNormalizer` is fitted to protein expression profiles of the provided
@@ -254,8 +265,8 @@ def create_site_to_protein_normalizer(
         samples_as_columns=True,
         features=[category_column],
     )
-    completely_quantified = (
-        ~reference_expression[qtable.get_samples()].isna().any(axis=1)
+    completely_quantified = ~reference_expression[qtable.get_samples()].isna().any(
+        axis=1
     )
     reference_expression = reference_expression[completely_quantified]
@@ -275,7 +286,7 @@ def create_ibaq_transformer(
     qtable: Qtable,
     category_column: str = "Representative protein",
     ibaq_column: str = "iBAQ peptides",
-) -> msreport.normalizer.CategoricalNormalizer:
+) -> msreport.normalize.CategoricalNormalizer:
     """Creates a fitted `CategoricalNormalizer` for iBAQ transformation.
     The `CategoricalNormalizer` is fitted to iBAQ peptide counts of the provided
@@ -301,7 +312,7 @@ def create_ibaq_transformer(
     ibaq_factor_values[ibaq_factor_values < 1] = 1
     ibaq_factor_values = np.log2(ibaq_factor_values)
-    reference_table = pd.DataFrame({c: ibaq_factor_values for c in sample_columns})
+    reference_table = pd.DataFrame(dict.fromkeys(sample_columns, ibaq_factor_values))
     reference_table[category_column] = category_values
     normalizer = msreport.normalize.CategoricalNormalizer(category_column)
@@ -422,7 +433,15 @@ def calculate_multi_group_comparison(
             correspond to entries from qtable.design["Experiment"].
         exclude_invalid: If true, the column "Valid" is used to determine which rows are
             used for calculating the group comparisons; default True.
+    Raises:
+        ValueError: If 'experiment_pairs' contains invalid entries. Each experiment pair
+            must have exactly two entries and the two entries must not be the same. All
+            experiments must be present in qtable.design. No duplicate experiment pairs
+            are allowed.
     """
+    _validate_experiment_pairs(qtable, experiment_pairs)
     table = qtable.make_expression_table(samples_as_columns=True, features=["Valid"])
     comparison_tag = " vs "
@@ -475,7 +494,7 @@ def two_group_comparison(
 def calculate_multi_group_limma(
     qtable: Qtable,
-    experiment_pairs: Iterable[Iterable[str]],
+    experiment_pairs: Sequence[Iterable[str]],
     exclude_invalid: bool = True,
     batch: bool = False,
     limma_trend: bool = True,
@@ -491,8 +510,7 @@ def calculate_multi_group_limma(
     Requires that expression columns are set, and expression values are log2 transformed
     All rows with missing values are ignored, impute missing values to allow
-    differential expression analysis of all rows. The qtable.data column
-    "Representative protein" is used as the index.
+    differential expression analysis of all rows.
     Args:
         qtable: Qtable instance that contains expression values for differential
@@ -510,13 +528,19 @@ def calculate_multi_group_limma(
             limma.eBayes for details; default True.
     Raises:
+        ValueError: If 'experiment_pairs' contains invalid entries. Each experiment pair
+            must have exactly two entries and the two entries must not be the same. All
+            experiments must be present in qtable.design. No duplicate experiment pairs
+            are allowed.
         KeyError: If the "Batch" column is not present in the qtable.design when
             'batch' is set to True.
         ValueError: If all values from qtable.design["Batch"] are identical when 'batch'
             is set to True.
-        ValueError: If the same experiment pair has been specified multiple times in
-            'experiment_pairs'.
     """
+    if not _rinterface_available:
+        raise OptionalDependencyError(_rinterface_error)
+    _validate_experiment_pairs(qtable, experiment_pairs)
     # TODO: not tested #
     if batch and "Batch" not in qtable.get_design():
         raise KeyError(
@@ -528,17 +552,10 @@ def calculate_multi_group_limma(
             "When using calculate_multi_group_limma(batch=True), not all values from"
             ' qtable.design["Batch"] are allowed to be identical.'
         )
-    if len(list(experiment_pairs)) != len(set(experiment_pairs)):
-        raise ValueError(
-            "The same experiment pair has been specified multiple times."
-            " Each entry in the `experiment_pairs` argument must be unique."
-        )
     design = qtable.get_design()
-    table = qtable.make_expression_table(
-        samples_as_columns=True, features=["Representative protein"]
-    )
-    table = table.set_index("Representative protein")
+    table = qtable.make_expression_table(samples_as_columns=True)
+    table.index = table.index.astype(str)  # It appears that a string is required for R
     comparison_tag = " vs "
     if exclude_invalid:
@@ -554,7 +571,7 @@ def calculate_multi_group_limma(
         experiment_to_r[experiment] = f".EXPERIMENT__{i:04d}"
     r_to_experiment = {v: k for k, v in experiment_to_r.items()}
-    r_experiment_pairs = []
+    r_experiment_pairs: list[str] = []
     for exp1, exp2 in experiment_pairs:
         r_experiment_pairs.append(f"{experiment_to_r[exp1]}-{experiment_to_r[exp2]}")
@@ -583,7 +600,7 @@ def calculate_multi_group_limma(
 def calculate_two_group_limma(
     qtable: Qtable,
-    experiment_pair: list[str],
+    experiment_pair: Sequence[str],
     exclude_invalid: bool = True,
     limma_trend: bool = True,
 ) -> None:
@@ -596,8 +613,7 @@ def calculate_two_group_limma(
     Requires that expression columns are set, and expression values are log2
     transformed. All rows with missing values are ignored, impute missing values to
-    allow differential expression analysis of all rows. The qtable.data
-    column "Representative protein" is used as the index.
+    allow differential expression analysis of all rows.
     Args:
         qtable: Qtable instance that contains expression values for differential
@@ -608,27 +624,32 @@ def calculate_two_group_limma(
             used for the differential expression analysis; default True.
         limma_trend: If true, an intensity-dependent trend is fitted to the prior
             variances; default True.
+    Raises:
+        ValueError: If 'experiment_pair' contains invalid entries. The experiment pair
+            must have exactly two entries and the two entries must not be the same. Both
+            experiments must be present in qtable.design.
     """
-    # TODO: not tested #
-    expression_table = qtable.make_expression_table(
-        samples_as_columns=True, features=["Representative protein"]
-    )
+    if not _rinterface_available:
+        raise OptionalDependencyError(_rinterface_error)
+    _validate_experiment_pair(qtable, experiment_pair)
+    # TODO: LIMMA function not tested #
+    table = qtable.make_expression_table(samples_as_columns=True)
     comparison_tag = " vs "
     if exclude_invalid:
         valid = qtable["Valid"]
     else:
-        valid = np.full(expression_table.shape[0], True)
+        valid = np.full(table.shape[0], True)
     samples_to_experiment = {}
     for experiment in experiment_pair:
-        mapping = {s: experiment for s in qtable.get_samples(experiment)}
+        mapping = dict.fromkeys(qtable.get_samples(experiment), experiment)
         samples_to_experiment.update(mapping)
-    table_columns = ["Representative protein"]
-    table_columns.extend(samples_to_experiment.keys())
-    table = expression_table[table_columns]
-    table = table.set_index("Representative protein")
+    # Keep only samples that are present in the 'experiment_pair'
+    table = table[samples_to_experiment.keys()]
+    table.index = table.index.astype(str)  # It appears that a string is required for R
     not_nan = table.isna().sum(axis=1) == 0
     mask = np.all([valid, not_nan], axis=0)
@@ -649,3 +670,63 @@ def calculate_two_group_limma(
     mapping = {col: f"{col} {comparison_group}" for col in limma_table.columns}
     limma_table.rename(columns=mapping, inplace=True)
     qtable.add_expression_features(limma_table)
+def _validate_experiment_pairs(
+    qtable: Qtable, exp_pairs: Iterable[Iterable[str]]
+) -> None:
+    """Validates that experiment pairs are valid and raises an error if not.
+    - All 'exp_pairs' entries must have a length of exactly 2.
+    - All experiments must be present in the qtable.design.
+    - No duplicate experiments are allowed in a pair.
+    - No duplicate experiment pairs are allowed.
+    Args:
+        qtable: Qtable instance containing experiment data.
+        exp_pairs: Iterable of experiment pairs to validate.
+    Raises:
+        ValueError: If any of the validation checks fail.
+    """
+    all_experiments = {exp for pair in exp_pairs for exp in pair}
+    missing_experiments = all_experiments - set(qtable.get_experiments())
+    if missing_experiments:
+        raise ValueError(
+            f"Experiments '{missing_experiments}' not found in qtable.design."
+        )
+    for experiment_pair in exp_pairs:
+        _validate_experiment_pair(qtable, experiment_pair)
+    if len(list(exp_pairs)) != len({tuple(pair) for pair in exp_pairs}):
+        raise ValueError(
+            f"Some experiment pairs in {exp_pairs} have been specified multiple "
+            "times. Each pair must occur only once."
+        )
+def _validate_experiment_pair(qtable: Qtable, exp_pair: Iterable[str]) -> None:
+    """Validates the experiment pair is valid and raises an error if not.
+    - The experiment pair must contain exactly two entries
+    - The two entries of the experiment pair must be different.
+    - Both  experiments must be present in the qtable.design.
+    Args:
+        qtable: Qtable instance containing experiment data.
+        experiment_pairs: Iterable of experiment pairs to validate.
+    Raises:
+        ValueError: If any of the validation checks fail.
+    """
+    if len(list(exp_pair)) != 2:
+        raise ValueError(
+            f"Experiment pair '{exp_pair}' contains more than two entries."
+        )
+    if len(list(exp_pair)) != len(set(exp_pair)):
+        raise ValueError(f"Experiment pair '{exp_pair}' contains the same entry twice.")
+    if set(exp_pair) - set(qtable.get_experiments()):
+        raise ValueError(
+            f"Experiments '{set(exp_pair) - set(qtable.get_experiments())}' "
+            "not found in qtable.design."
+        )

msreport/errors.py CHANGED Viewed

@@ -1,5 +1,4 @@
-class MsreportError(Exception):
-    ...
+class MsreportError(Exception): ...
 class NotFittedError(ValueError, AttributeError):
@@ -8,3 +7,7 @@ class NotFittedError(ValueError, AttributeError):
 class ProteinsNotInFastaWarning(UserWarning):
     """Warning raised when queried proteins are absent from a FASTA file."""
+class OptionalDependencyError(ImportError):
+    """Raised when an optional dependency is required but not installed."""

msreport/export.py CHANGED Viewed

@@ -13,10 +13,10 @@ Index([
 ], dtype='object')
 """
-from collections import defaultdict as ddict
 import os
-from typing import Iterable, Optional, Protocol
 import warnings
+from collections import defaultdict as ddict
+from typing import Iterable, Optional, Protocol, Sequence
 import numpy as np
 import pandas as pd
@@ -88,7 +88,7 @@ def contaminants_to_clipboard(qtable: Qtable) -> None:
     for column_tag in column_tags:
         columns.extend(helper.find_sample_columns(data, column_tag, samples))
-    columns = np.array(columns)[[c in data.columns for c in columns]]
+    columns = [c for c in columns if c in data.columns]
     contaminants = qtable["Potential contaminant"]
     data = data.loc[contaminants, columns]
@@ -135,10 +135,10 @@ def to_perseus_matrix(
     numeric_columns = set(numeric_columns).difference(expression_columns)
     numeric_columns = set(numeric_columns).difference(categorical_columns)
-    column_categories = ddict(lambda: default_category)
-    column_categories.update({c: "N" for c in numeric_columns})
-    column_categories.update({c: "C" for c in categorical_columns})
-    column_categories.update({c: "E" for c in expression_columns})
+    column_categories: ddict[str, str] = ddict(lambda: default_category)
+    column_categories.update(dict.fromkeys(numeric_columns, "N"))
+    column_categories.update(dict.fromkeys(categorical_columns, "C"))
+    column_categories.update(dict.fromkeys(expression_columns, "E"))
     column_annotation = [column_categories[column] for column in table.columns]
     column_annotation[0] = f"{annotation_row_prefix}{column_annotation[0]}"
@@ -219,6 +219,7 @@ def write_html_coverage_map(
             "change in a future release."
         ),
         FutureWarning,
+        stacklevel=2,
     )
     # Get protein information from the protein database
     protein_entry = protein_db[protein_id]
@@ -314,8 +315,8 @@ def _amica_table_from(qtable: Qtable) -> pd.DataFrame:
         sample_columns = helper.find_sample_columns(
             amica_table, tag, qtable.get_samples()
         )
-        non_sample_columns = set(columns).difference(set(sample_columns))
-        amica_table.drop(non_sample_columns, inplace=True, axis=1)
+        non_sample_columns = list(set(columns).difference(set(sample_columns)))
+        amica_table.drop(columns=non_sample_columns, inplace=True, axis=1)
     # Log transform columns if necessary
     for tag in intensity_column_tags:
@@ -437,7 +438,7 @@ def _generate_html_sequence_map(
     highlights = highlights if highlights is not None else {}
     sequence_length = len(sequence)
-    def write_row_index(pos: int, strings: list) -> str:
+    def write_row_index(pos: int, strings: list):
         ndigits = len(str(sequence_length))
         row_index = str(pos + 1).rjust(ndigits)
         html_entry = '<FONT COLOR="#000000">' + row_index + "   " + "</FONT>"
@@ -457,7 +458,7 @@ def _generate_html_sequence_map(
     in_covered_region: bool = False
     strings = []
-    strings.append(f'<FONT COLOR="#606060">')  # Set default text color to grey
+    strings.append('<FONT COLOR="#606060">')  # Set default text color to grey
     write_row_index(0, strings)
     for pos, character in enumerate(sequence):
         if pos in coverage_start_idx:
@@ -483,13 +484,15 @@ def _generate_html_sequence_map(
         if pos in coverage_stop_idx:
             in_covered_region = False
             close_coverage_region(strings)
-    strings.append(f"</FONT>")
+    strings.append("</FONT>")
     html_sequence_block = "".join(strings)
     return html_sequence_block
-def _find_covered_region_boundaries(coverage_mask: Iterable[bool]) -> list[tuple[int]]:
+def _find_covered_region_boundaries(
+    coverage_mask: Sequence[bool],
+) -> list[tuple[int, int]]:
     """Returns a list of boundaries from continuously covered regions in a protein.
     Args:

msreport/fasta.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import pathlib
 from typing import Iterable, Union
 from profasta.db import ProteinDatabase
@@ -24,5 +23,7 @@ def import_protein_database(
     database = ProteinDatabase()
     paths = [fasta_path] if isinstance(fasta_path, (str, pathlib.Path)) else fasta_path
     for path in paths:
+        if isinstance(path, pathlib.Path):
+            path = path.as_posix()
         database.add_fasta(path, header_parser=header_parser, overwrite=True)
     return database

msreport/helper/__init__.py CHANGED Viewed

@@ -1,21 +1,21 @@
 from .calc import (
-    mode,
+    calculate_monoisotopic_mass,
+    calculate_sequence_coverage,
     calculate_tryptic_ibaq_peptides,
     make_coverage_mask,
-    calculate_sequence_coverage,
-    calculate_monoisotopic_mass,
+    mode,
 )
 from .table import (
     apply_intensity_cutoff,
-    guess_design,
-    intensities_in_logspace,
     find_columns,
     find_sample_columns,
+    guess_design,
+    intensities_in_logspace,
+    join_tables,
     keep_rows_by_partial_match,
     remove_rows_by_partial_match,
-    join_tables,
-    rename_sample_columns,
     rename_mq_reporter_channels,
+    rename_sample_columns,
 )
 from .temp import (
     extract_modifications,

msreport/helper/calc.py CHANGED Viewed

@@ -1,15 +1,13 @@
-import itertools
-from typing import Iterable
+from typing import Iterable, Sequence
 import numpy as np
-import scipy.stats
-import scipy.optimize
 import pyteomics.mass
 import pyteomics.parser
+import scipy.optimize
+import scipy.stats
-def mode(values: Iterable) -> float:
+def mode(values: Sequence) -> float:
     """Calculate the mode by using kernel-density estimation.
     Args:
@@ -19,25 +17,26 @@ def mode(values: Iterable) -> float:
     Returns:
         The estimated mode. If no finite values are present, returns nan.
     """
-    values = np.asarray(values)
-    finite_values = values[np.isfinite(values)]
+    finite_values = np.asarray(values)[np.isfinite(values)]
     if len(finite_values) == 0:
         return np.nan
     elif len(np.unique(finite_values)) == 1:
         return np.unique(finite_values)[0]
     kde = scipy.stats.gaussian_kde(finite_values)
-    minimum_function = lambda x: -kde(x)[0]
+    def _minimum_function(x):
+        return -kde(x)[0]
     min_slice, max_sclice = np.percentile(finite_values, (2, 98))
     slice_step = 0.2
     brute_optimize_result = scipy.optimize.brute(
-        minimum_function, [slice(min_slice, max_sclice + slice_step, slice_step)]
+        _minimum_function, [slice(min_slice, max_sclice + slice_step, slice_step)]
     )
     rough_minimum = brute_optimize_result[0]
     local_optimize_result = scipy.optimize.minimize(
-        minimum_function, x0=rough_minimum, method="BFGS"
+        _minimum_function, x0=rough_minimum, method="BFGS"
     )
     fine_minimum = local_optimize_result.x[0]
     return fine_minimum
@@ -91,8 +90,8 @@ def calculate_monoisotopic_mass(protein_sequence: str) -> float:
 def make_coverage_mask(
-    protein_length: int, peptide_positions: list[(int, int)]
-) -> np.array:
+    protein_length: int, peptide_positions: Iterable[Iterable[int]]
+) -> np.ndarray:
     """Returns a Boolean array with True for positions present in 'peptide_positions'.
     Args:
@@ -109,8 +108,8 @@ def make_coverage_mask(
 def calculate_sequence_coverage(
-    protein_length: int, peptide_positions: list[(int, int)], ndigits: int = 1
-) -> np.array:
+    protein_length: int, peptide_positions: Iterable[Iterable[int]], ndigits: int = 1
+) -> float:
     """Calculates the protein sequence coverage given a list of peptide positions.
     Args:

msreport/helper/maxlfq.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import itertools
-from typing import Callable
 import warnings
+from typing import Callable
 import numpy as np
@@ -125,7 +125,7 @@ def calculate_pairwise_mode_log_ratio_matrix(
 def prepare_coefficient_matrix(
     ratio_matrix: np.ndarray,
-) -> (np.ndarray, np.ndarray, np.ndarray):
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
     """Prepares coefficients, ratios, and initial row indices from a log ratio matrix.
     Args:

msreport/helper/table.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import re
-from typing import Iterable, Union
+from typing import Iterable, Sequence, Union
 import numpy as np
 import pandas as pd
@@ -63,7 +63,7 @@ def intensities_in_logspace(data: Union[pd.DataFrame, np.ndarray, Iterable]) ->
     """
     data = np.array(data, dtype=float)
     mask = np.isfinite(data)
-    return np.all(data[mask].flatten() <= 64)
+    return bool(np.all(data[mask].flatten() <= 64))
 def rename_sample_columns(table: pd.DataFrame, mapping: dict[str, str]) -> pd.DataFrame:
@@ -102,7 +102,7 @@ def rename_sample_columns(table: pd.DataFrame, mapping: dict[str, str]) -> pd.Da
 def rename_mq_reporter_channels(
-    table: pd.DataFrame, channel_names: Iterable[str]
+    table: pd.DataFrame, channel_names: Sequence[str]
 ) -> None:
     """Renames reporter channel numbers with sample names.
@@ -157,8 +157,7 @@ def find_columns(
     Returns:
         A list of column names.
     """
-    matches = [substring in col for col in table.columns]
-    matched_columns = np.array(table.columns)[matches].tolist()
+    matched_columns = [col for col in table.columns if substring in col]
     if must_be_substring:
         matched_columns = [col for col in matched_columns if col != substring]
     return matched_columns
@@ -255,7 +254,7 @@ def remove_rows_by_partial_match(
 def join_tables(
-    tables: Iterable[pd.DataFrame], reset_index: bool = False
+    tables: Sequence[pd.DataFrame], reset_index: bool = False
 ) -> pd.DataFrame:
     """Returns a joined dataframe.

msreport/impute.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
-from typing import Optional, Any
+from typing import Any, Optional
 import numpy as np
 import pandas as pd
@@ -51,7 +52,7 @@ class FixedValueImputer:
             Returns the fitted FixedValueImputer instance.
         """
         if self.strategy == "constant":
-            fill_values = {column: self.fill_value for column in table.columns}
+            fill_values = dict.fromkeys(table.columns, self.fill_value)
         elif self.strategy == "below":
             if self.column_wise:
                 fill_values = {}
@@ -59,7 +60,7 @@ class FixedValueImputer:
                     fill_values[column] = _calculate_integer_below_min(table[column])
             else:
                 int_below_min = _calculate_integer_below_min(table)
-                fill_values = {column: int_below_min for column in table.columns}
+                fill_values = dict.fromkeys(table.columns, int_below_min)
         self._sample_fill_values = fill_values
         return self

msreport 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl

msreport 0.0.27py3-none-any.whl → 0.0.29py3-none-any.whl