msreport 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
msreport/__init__.py CHANGED
@@ -1,13 +1,11 @@
1
- from msreport.qtable import Qtable
2
- from msreport.reader import MaxQuantReader, FragPipeReader, SpectronautReader
3
-
4
- from msreport.fasta import import_protein_database
5
-
6
1
  import msreport.analyze
7
2
  import msreport.export
8
3
  import msreport.impute
9
4
  import msreport.normalize
10
5
  import msreport.plot
11
6
  import msreport.reader
7
+ from msreport.fasta import import_protein_database
8
+ from msreport.qtable import Qtable
9
+ from msreport.reader import FragPipeReader, MaxQuantReader, SpectronautReader
12
10
 
13
- __version__ = "0.0.27"
11
+ __version__ = "0.0.29"
@@ -71,7 +71,7 @@ def maximum_per_column(array: np.ndarray) -> np.ndarray:
71
71
  return np.array([maximum(i) for i in array.transpose()])
72
72
 
73
73
 
74
- def minimum(array: np.ndarray) -> int:
74
+ def minimum(array: np.ndarray) -> float:
75
75
  """Returns the lowest finite value from one or multiple columns."""
76
76
  array = array.flatten()
77
77
  if np.isfinite(array).any():
@@ -1,6 +1,7 @@
1
1
  from typing import Iterable, Union
2
2
 
3
3
  import pandas as pd
4
+
4
5
  import msreport.aggregate.condense as CONDENSE
5
6
  import msreport.helper
6
7
 
@@ -218,7 +218,7 @@ def aggregate_unique_groups(
218
218
  columns_to_aggregate: Union[str, Iterable],
219
219
  condenser: Callable,
220
220
  is_sorted: bool,
221
- ) -> (np.ndarray, np.ndarray):
221
+ ) -> tuple[np.ndarray, np.ndarray]:
222
222
  """Aggregates column(s) by applying a condenser function to unique groups.
223
223
 
224
224
  The function returns two arrays containing the aggregated values and the
@@ -256,7 +256,7 @@ def aggregate_unique_groups(
256
256
 
257
257
  def _prepare_grouping_indices(
258
258
  table: pd.DataFrame, group_by: str, is_sorted: bool
259
- ) -> (np.ndarray, np.ndarray, pd.DataFrame):
259
+ ) -> tuple[np.ndarray, np.ndarray, pd.DataFrame]:
260
260
  """Prepares start indices and names of unique groups from a sorted dataframe.
261
261
 
262
262
  Args:
msreport/analyze.py CHANGED
@@ -1,15 +1,26 @@
1
- """ The analyze module contains methods for analysing quantification results. """
1
+ """The analyze module contains methods for analysing quantification results."""
2
2
 
3
3
  from __future__ import annotations
4
- from typing import Iterable, Optional, Protocol
4
+
5
5
  import warnings
6
+ from typing import Iterable, Optional, Protocol, Sequence
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
 
10
11
  import msreport.normalize
11
- import msreport.rinterface
12
+ from msreport.errors import OptionalDependencyError
12
13
  from msreport.helper import find_sample_columns
14
+ from msreport.qtable import Qtable
15
+
16
+ try:
17
+ import msreport.rinterface
18
+
19
+ _rinterface_available = True
20
+ _rinterface_error = ""
21
+ except OptionalDependencyError as err:
22
+ _rinterface_available = False
23
+ _rinterface_error = str(err)
13
24
 
14
25
 
15
26
  class Transformer(Protocol):
@@ -234,7 +245,7 @@ def normalize_expression(
234
245
 
235
246
  def create_site_to_protein_normalizer(
236
247
  qtable: Qtable, category_column: str = "Representative protein"
237
- ) -> msreport.normalizer.CategoricalNormalizer:
248
+ ) -> msreport.normalize.CategoricalNormalizer:
238
249
  """Creates a fitted `CategoricalNormalizer` for site-to-protein normalization.
239
250
 
240
251
  The `CategoricalNormalizer` is fitted to protein expression profiles of the provided
@@ -254,8 +265,8 @@ def create_site_to_protein_normalizer(
254
265
  samples_as_columns=True,
255
266
  features=[category_column],
256
267
  )
257
- completely_quantified = (
258
- ~reference_expression[qtable.get_samples()].isna().any(axis=1)
268
+ completely_quantified = ~reference_expression[qtable.get_samples()].isna().any(
269
+ axis=1
259
270
  )
260
271
  reference_expression = reference_expression[completely_quantified]
261
272
 
@@ -275,7 +286,7 @@ def create_ibaq_transformer(
275
286
  qtable: Qtable,
276
287
  category_column: str = "Representative protein",
277
288
  ibaq_column: str = "iBAQ peptides",
278
- ) -> msreport.normalizer.CategoricalNormalizer:
289
+ ) -> msreport.normalize.CategoricalNormalizer:
279
290
  """Creates a fitted `CategoricalNormalizer` for iBAQ transformation.
280
291
 
281
292
  The `CategoricalNormalizer` is fitted to iBAQ peptide counts of the provided
@@ -301,7 +312,7 @@ def create_ibaq_transformer(
301
312
  ibaq_factor_values[ibaq_factor_values < 1] = 1
302
313
  ibaq_factor_values = np.log2(ibaq_factor_values)
303
314
 
304
- reference_table = pd.DataFrame({c: ibaq_factor_values for c in sample_columns})
315
+ reference_table = pd.DataFrame(dict.fromkeys(sample_columns, ibaq_factor_values))
305
316
  reference_table[category_column] = category_values
306
317
 
307
318
  normalizer = msreport.normalize.CategoricalNormalizer(category_column)
@@ -422,7 +433,15 @@ def calculate_multi_group_comparison(
422
433
  correspond to entries from qtable.design["Experiment"].
423
434
  exclude_invalid: If true, the column "Valid" is used to determine which rows are
424
435
  used for calculating the group comparisons; default True.
436
+
437
+ Raises:
438
+ ValueError: If 'experiment_pairs' contains invalid entries. Each experiment pair
439
+ must have exactly two entries and the two entries must not be the same. All
440
+ experiments must be present in qtable.design. No duplicate experiment pairs
441
+ are allowed.
425
442
  """
443
+ _validate_experiment_pairs(qtable, experiment_pairs)
444
+
426
445
  table = qtable.make_expression_table(samples_as_columns=True, features=["Valid"])
427
446
  comparison_tag = " vs "
428
447
 
@@ -475,7 +494,7 @@ def two_group_comparison(
475
494
 
476
495
  def calculate_multi_group_limma(
477
496
  qtable: Qtable,
478
- experiment_pairs: Iterable[Iterable[str]],
497
+ experiment_pairs: Sequence[Iterable[str]],
479
498
  exclude_invalid: bool = True,
480
499
  batch: bool = False,
481
500
  limma_trend: bool = True,
@@ -491,8 +510,7 @@ def calculate_multi_group_limma(
491
510
 
492
511
  Requires that expression columns are set, and expression values are log2 transformed
493
512
  All rows with missing values are ignored, impute missing values to allow
494
- differential expression analysis of all rows. The qtable.data column
495
- "Representative protein" is used as the index.
513
+ differential expression analysis of all rows.
496
514
 
497
515
  Args:
498
516
  qtable: Qtable instance that contains expression values for differential
@@ -510,13 +528,19 @@ def calculate_multi_group_limma(
510
528
  limma.eBayes for details; default True.
511
529
 
512
530
  Raises:
531
+ ValueError: If 'experiment_pairs' contains invalid entries. Each experiment pair
532
+ must have exactly two entries and the two entries must not be the same. All
533
+ experiments must be present in qtable.design. No duplicate experiment pairs
534
+ are allowed.
513
535
  KeyError: If the "Batch" column is not present in the qtable.design when
514
536
  'batch' is set to True.
515
537
  ValueError: If all values from qtable.design["Batch"] are identical when 'batch'
516
538
  is set to True.
517
- ValueError: If the same experiment pair has been specified multiple times in
518
- 'experiment_pairs'.
519
539
  """
540
+ if not _rinterface_available:
541
+ raise OptionalDependencyError(_rinterface_error)
542
+
543
+ _validate_experiment_pairs(qtable, experiment_pairs)
520
544
  # TODO: not tested #
521
545
  if batch and "Batch" not in qtable.get_design():
522
546
  raise KeyError(
@@ -528,17 +552,10 @@ def calculate_multi_group_limma(
528
552
  "When using calculate_multi_group_limma(batch=True), not all values from"
529
553
  ' qtable.design["Batch"] are allowed to be identical.'
530
554
  )
531
- if len(list(experiment_pairs)) != len(set(experiment_pairs)):
532
- raise ValueError(
533
- "The same experiment pair has been specified multiple times."
534
- " Each entry in the `experiment_pairs` argument must be unique."
535
- )
536
555
 
537
556
  design = qtable.get_design()
538
- table = qtable.make_expression_table(
539
- samples_as_columns=True, features=["Representative protein"]
540
- )
541
- table = table.set_index("Representative protein")
557
+ table = qtable.make_expression_table(samples_as_columns=True)
558
+ table.index = table.index.astype(str) # It appears that a string is required for R
542
559
  comparison_tag = " vs "
543
560
 
544
561
  if exclude_invalid:
@@ -554,7 +571,7 @@ def calculate_multi_group_limma(
554
571
  experiment_to_r[experiment] = f".EXPERIMENT__{i:04d}"
555
572
  r_to_experiment = {v: k for k, v in experiment_to_r.items()}
556
573
 
557
- r_experiment_pairs = []
574
+ r_experiment_pairs: list[str] = []
558
575
  for exp1, exp2 in experiment_pairs:
559
576
  r_experiment_pairs.append(f"{experiment_to_r[exp1]}-{experiment_to_r[exp2]}")
560
577
 
@@ -583,7 +600,7 @@ def calculate_multi_group_limma(
583
600
 
584
601
  def calculate_two_group_limma(
585
602
  qtable: Qtable,
586
- experiment_pair: list[str],
603
+ experiment_pair: Sequence[str],
587
604
  exclude_invalid: bool = True,
588
605
  limma_trend: bool = True,
589
606
  ) -> None:
@@ -596,8 +613,7 @@ def calculate_two_group_limma(
596
613
 
597
614
  Requires that expression columns are set, and expression values are log2
598
615
  transformed. All rows with missing values are ignored, impute missing values to
599
- allow differential expression analysis of all rows. The qtable.data
600
- column "Representative protein" is used as the index.
616
+ allow differential expression analysis of all rows.
601
617
 
602
618
  Args:
603
619
  qtable: Qtable instance that contains expression values for differential
@@ -608,27 +624,32 @@ def calculate_two_group_limma(
608
624
  used for the differential expression analysis; default True.
609
625
  limma_trend: If true, an intensity-dependent trend is fitted to the prior
610
626
  variances; default True.
627
+ Raises:
628
+ ValueError: If 'experiment_pair' contains invalid entries. The experiment pair
629
+ must have exactly two entries and the two entries must not be the same. Both
630
+ experiments must be present in qtable.design.
611
631
  """
612
- # TODO: not tested #
613
- expression_table = qtable.make_expression_table(
614
- samples_as_columns=True, features=["Representative protein"]
615
- )
632
+ if not _rinterface_available:
633
+ raise OptionalDependencyError(_rinterface_error)
634
+
635
+ _validate_experiment_pair(qtable, experiment_pair)
636
+ # TODO: LIMMA function not tested #
637
+ table = qtable.make_expression_table(samples_as_columns=True)
616
638
  comparison_tag = " vs "
617
639
 
618
640
  if exclude_invalid:
619
641
  valid = qtable["Valid"]
620
642
  else:
621
- valid = np.full(expression_table.shape[0], True)
643
+ valid = np.full(table.shape[0], True)
622
644
 
623
645
  samples_to_experiment = {}
624
646
  for experiment in experiment_pair:
625
- mapping = {s: experiment for s in qtable.get_samples(experiment)}
647
+ mapping = dict.fromkeys(qtable.get_samples(experiment), experiment)
626
648
  samples_to_experiment.update(mapping)
627
649
 
628
- table_columns = ["Representative protein"]
629
- table_columns.extend(samples_to_experiment.keys())
630
- table = expression_table[table_columns]
631
- table = table.set_index("Representative protein")
650
+ # Keep only samples that are present in the 'experiment_pair'
651
+ table = table[samples_to_experiment.keys()]
652
+ table.index = table.index.astype(str) # It appears that a string is required for R
632
653
  not_nan = table.isna().sum(axis=1) == 0
633
654
 
634
655
  mask = np.all([valid, not_nan], axis=0)
@@ -649,3 +670,63 @@ def calculate_two_group_limma(
649
670
  mapping = {col: f"{col} {comparison_group}" for col in limma_table.columns}
650
671
  limma_table.rename(columns=mapping, inplace=True)
651
672
  qtable.add_expression_features(limma_table)
673
+
674
+
675
+ def _validate_experiment_pairs(
676
+ qtable: Qtable, exp_pairs: Iterable[Iterable[str]]
677
+ ) -> None:
678
+ """Validates that experiment pairs are valid and raises an error if not.
679
+
680
+ - All 'exp_pairs' entries must have a length of exactly 2.
681
+ - All experiments must be present in the qtable.design.
682
+ - No duplicate experiments are allowed in a pair.
683
+ - No duplicate experiment pairs are allowed.
684
+
685
+ Args:
686
+ qtable: Qtable instance containing experiment data.
687
+ exp_pairs: Iterable of experiment pairs to validate.
688
+
689
+ Raises:
690
+ ValueError: If any of the validation checks fail.
691
+ """
692
+ all_experiments = {exp for pair in exp_pairs for exp in pair}
693
+ missing_experiments = all_experiments - set(qtable.get_experiments())
694
+ if missing_experiments:
695
+ raise ValueError(
696
+ f"Experiments '{missing_experiments}' not found in qtable.design."
697
+ )
698
+ for experiment_pair in exp_pairs:
699
+ _validate_experiment_pair(qtable, experiment_pair)
700
+
701
+ if len(list(exp_pairs)) != len({tuple(pair) for pair in exp_pairs}):
702
+ raise ValueError(
703
+ f"Some experiment pairs in {exp_pairs} have been specified multiple "
704
+ "times. Each pair must occur only once."
705
+ )
706
+
707
+
708
+ def _validate_experiment_pair(qtable: Qtable, exp_pair: Iterable[str]) -> None:
709
+ """Validates the experiment pair is valid and raises an error if not.
710
+
711
+ - The experiment pair must contain exactly two entries
712
+ - The two entries of the experiment pair must be different.
713
+ - Both experiments must be present in the qtable.design.
714
+
715
+ Args:
716
+ qtable: Qtable instance containing experiment data.
717
+ experiment_pairs: Iterable of experiment pairs to validate.
718
+
719
+ Raises:
720
+ ValueError: If any of the validation checks fail.
721
+ """
722
+ if len(list(exp_pair)) != 2:
723
+ raise ValueError(
724
+ f"Experiment pair '{exp_pair}' contains more than two entries."
725
+ )
726
+ if len(list(exp_pair)) != len(set(exp_pair)):
727
+ raise ValueError(f"Experiment pair '{exp_pair}' contains the same entry twice.")
728
+ if set(exp_pair) - set(qtable.get_experiments()):
729
+ raise ValueError(
730
+ f"Experiments '{set(exp_pair) - set(qtable.get_experiments())}' "
731
+ "not found in qtable.design."
732
+ )
msreport/errors.py CHANGED
@@ -1,5 +1,4 @@
1
- class MsreportError(Exception):
2
- ...
1
+ class MsreportError(Exception): ...
3
2
 
4
3
 
5
4
  class NotFittedError(ValueError, AttributeError):
@@ -8,3 +7,7 @@ class NotFittedError(ValueError, AttributeError):
8
7
 
9
8
  class ProteinsNotInFastaWarning(UserWarning):
10
9
  """Warning raised when queried proteins are absent from a FASTA file."""
10
+
11
+
12
+ class OptionalDependencyError(ImportError):
13
+ """Raised when an optional dependency is required but not installed."""
msreport/export.py CHANGED
@@ -13,10 +13,10 @@ Index([
13
13
  ], dtype='object')
14
14
  """
15
15
 
16
- from collections import defaultdict as ddict
17
16
  import os
18
- from typing import Iterable, Optional, Protocol
19
17
  import warnings
18
+ from collections import defaultdict as ddict
19
+ from typing import Iterable, Optional, Protocol, Sequence
20
20
 
21
21
  import numpy as np
22
22
  import pandas as pd
@@ -88,7 +88,7 @@ def contaminants_to_clipboard(qtable: Qtable) -> None:
88
88
 
89
89
  for column_tag in column_tags:
90
90
  columns.extend(helper.find_sample_columns(data, column_tag, samples))
91
- columns = np.array(columns)[[c in data.columns for c in columns]]
91
+ columns = [c for c in columns if c in data.columns]
92
92
 
93
93
  contaminants = qtable["Potential contaminant"]
94
94
  data = data.loc[contaminants, columns]
@@ -135,10 +135,10 @@ def to_perseus_matrix(
135
135
  numeric_columns = set(numeric_columns).difference(expression_columns)
136
136
  numeric_columns = set(numeric_columns).difference(categorical_columns)
137
137
 
138
- column_categories = ddict(lambda: default_category)
139
- column_categories.update({c: "N" for c in numeric_columns})
140
- column_categories.update({c: "C" for c in categorical_columns})
141
- column_categories.update({c: "E" for c in expression_columns})
138
+ column_categories: ddict[str, str] = ddict(lambda: default_category)
139
+ column_categories.update(dict.fromkeys(numeric_columns, "N"))
140
+ column_categories.update(dict.fromkeys(categorical_columns, "C"))
141
+ column_categories.update(dict.fromkeys(expression_columns, "E"))
142
142
 
143
143
  column_annotation = [column_categories[column] for column in table.columns]
144
144
  column_annotation[0] = f"{annotation_row_prefix}{column_annotation[0]}"
@@ -219,6 +219,7 @@ def write_html_coverage_map(
219
219
  "change in a future release."
220
220
  ),
221
221
  FutureWarning,
222
+ stacklevel=2,
222
223
  )
223
224
  # Get protein information from the protein database
224
225
  protein_entry = protein_db[protein_id]
@@ -314,8 +315,8 @@ def _amica_table_from(qtable: Qtable) -> pd.DataFrame:
314
315
  sample_columns = helper.find_sample_columns(
315
316
  amica_table, tag, qtable.get_samples()
316
317
  )
317
- non_sample_columns = set(columns).difference(set(sample_columns))
318
- amica_table.drop(non_sample_columns, inplace=True, axis=1)
318
+ non_sample_columns = list(set(columns).difference(set(sample_columns)))
319
+ amica_table.drop(columns=non_sample_columns, inplace=True, axis=1)
319
320
 
320
321
  # Log transform columns if necessary
321
322
  for tag in intensity_column_tags:
@@ -437,7 +438,7 @@ def _generate_html_sequence_map(
437
438
  highlights = highlights if highlights is not None else {}
438
439
  sequence_length = len(sequence)
439
440
 
440
- def write_row_index(pos: int, strings: list) -> str:
441
+ def write_row_index(pos: int, strings: list):
441
442
  ndigits = len(str(sequence_length))
442
443
  row_index = str(pos + 1).rjust(ndigits)
443
444
  html_entry = '<FONT COLOR="#000000">' + row_index + " " + "</FONT>"
@@ -457,7 +458,7 @@ def _generate_html_sequence_map(
457
458
 
458
459
  in_covered_region: bool = False
459
460
  strings = []
460
- strings.append(f'<FONT COLOR="#606060">') # Set default text color to grey
461
+ strings.append('<FONT COLOR="#606060">') # Set default text color to grey
461
462
  write_row_index(0, strings)
462
463
  for pos, character in enumerate(sequence):
463
464
  if pos in coverage_start_idx:
@@ -483,13 +484,15 @@ def _generate_html_sequence_map(
483
484
  if pos in coverage_stop_idx:
484
485
  in_covered_region = False
485
486
  close_coverage_region(strings)
486
- strings.append(f"</FONT>")
487
+ strings.append("</FONT>")
487
488
 
488
489
  html_sequence_block = "".join(strings)
489
490
  return html_sequence_block
490
491
 
491
492
 
492
- def _find_covered_region_boundaries(coverage_mask: Iterable[bool]) -> list[tuple[int]]:
493
+ def _find_covered_region_boundaries(
494
+ coverage_mask: Sequence[bool],
495
+ ) -> list[tuple[int, int]]:
493
496
  """Returns a list of boundaries from continuously covered regions in a protein.
494
497
 
495
498
  Args:
msreport/fasta.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import pathlib
2
2
  from typing import Iterable, Union
3
3
 
4
-
5
4
  from profasta.db import ProteinDatabase
6
5
 
7
6
 
@@ -24,5 +23,7 @@ def import_protein_database(
24
23
  database = ProteinDatabase()
25
24
  paths = [fasta_path] if isinstance(fasta_path, (str, pathlib.Path)) else fasta_path
26
25
  for path in paths:
26
+ if isinstance(path, pathlib.Path):
27
+ path = path.as_posix()
27
28
  database.add_fasta(path, header_parser=header_parser, overwrite=True)
28
29
  return database
@@ -1,21 +1,21 @@
1
1
  from .calc import (
2
- mode,
2
+ calculate_monoisotopic_mass,
3
+ calculate_sequence_coverage,
3
4
  calculate_tryptic_ibaq_peptides,
4
5
  make_coverage_mask,
5
- calculate_sequence_coverage,
6
- calculate_monoisotopic_mass,
6
+ mode,
7
7
  )
8
8
  from .table import (
9
9
  apply_intensity_cutoff,
10
- guess_design,
11
- intensities_in_logspace,
12
10
  find_columns,
13
11
  find_sample_columns,
12
+ guess_design,
13
+ intensities_in_logspace,
14
+ join_tables,
14
15
  keep_rows_by_partial_match,
15
16
  remove_rows_by_partial_match,
16
- join_tables,
17
- rename_sample_columns,
18
17
  rename_mq_reporter_channels,
18
+ rename_sample_columns,
19
19
  )
20
20
  from .temp import (
21
21
  extract_modifications,
msreport/helper/calc.py CHANGED
@@ -1,15 +1,13 @@
1
- import itertools
2
- from typing import Iterable
1
+ from typing import Iterable, Sequence
3
2
 
4
3
  import numpy as np
5
- import scipy.stats
6
- import scipy.optimize
7
-
8
4
  import pyteomics.mass
9
5
  import pyteomics.parser
6
+ import scipy.optimize
7
+ import scipy.stats
10
8
 
11
9
 
12
- def mode(values: Iterable) -> float:
10
+ def mode(values: Sequence) -> float:
13
11
  """Calculate the mode by using kernel-density estimation.
14
12
 
15
13
  Args:
@@ -19,25 +17,26 @@ def mode(values: Iterable) -> float:
19
17
  Returns:
20
18
  The estimated mode. If no finite values are present, returns nan.
21
19
  """
22
- values = np.asarray(values)
23
- finite_values = values[np.isfinite(values)]
20
+ finite_values = np.asarray(values)[np.isfinite(values)]
24
21
  if len(finite_values) == 0:
25
22
  return np.nan
26
23
  elif len(np.unique(finite_values)) == 1:
27
24
  return np.unique(finite_values)[0]
28
25
 
29
26
  kde = scipy.stats.gaussian_kde(finite_values)
30
- minimum_function = lambda x: -kde(x)[0]
27
+
28
+ def _minimum_function(x):
29
+ return -kde(x)[0]
31
30
 
32
31
  min_slice, max_sclice = np.percentile(finite_values, (2, 98))
33
32
  slice_step = 0.2
34
33
  brute_optimize_result = scipy.optimize.brute(
35
- minimum_function, [slice(min_slice, max_sclice + slice_step, slice_step)]
34
+ _minimum_function, [slice(min_slice, max_sclice + slice_step, slice_step)]
36
35
  )
37
36
  rough_minimum = brute_optimize_result[0]
38
37
 
39
38
  local_optimize_result = scipy.optimize.minimize(
40
- minimum_function, x0=rough_minimum, method="BFGS"
39
+ _minimum_function, x0=rough_minimum, method="BFGS"
41
40
  )
42
41
  fine_minimum = local_optimize_result.x[0]
43
42
  return fine_minimum
@@ -91,8 +90,8 @@ def calculate_monoisotopic_mass(protein_sequence: str) -> float:
91
90
 
92
91
 
93
92
  def make_coverage_mask(
94
- protein_length: int, peptide_positions: list[(int, int)]
95
- ) -> np.array:
93
+ protein_length: int, peptide_positions: Iterable[Iterable[int]]
94
+ ) -> np.ndarray:
96
95
  """Returns a Boolean array with True for positions present in 'peptide_positions'.
97
96
 
98
97
  Args:
@@ -109,8 +108,8 @@ def make_coverage_mask(
109
108
 
110
109
 
111
110
  def calculate_sequence_coverage(
112
- protein_length: int, peptide_positions: list[(int, int)], ndigits: int = 1
113
- ) -> np.array:
111
+ protein_length: int, peptide_positions: Iterable[Iterable[int]], ndigits: int = 1
112
+ ) -> float:
114
113
  """Calculates the protein sequence coverage given a list of peptide positions.
115
114
 
116
115
  Args:
msreport/helper/maxlfq.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import itertools
2
- from typing import Callable
3
2
  import warnings
3
+ from typing import Callable
4
4
 
5
5
  import numpy as np
6
6
 
@@ -125,7 +125,7 @@ def calculate_pairwise_mode_log_ratio_matrix(
125
125
 
126
126
  def prepare_coefficient_matrix(
127
127
  ratio_matrix: np.ndarray,
128
- ) -> (np.ndarray, np.ndarray, np.ndarray):
128
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
129
129
  """Prepares coefficients, ratios, and initial row indices from a log ratio matrix.
130
130
 
131
131
  Args:
msreport/helper/table.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import re
2
- from typing import Iterable, Union
2
+ from typing import Iterable, Sequence, Union
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -63,7 +63,7 @@ def intensities_in_logspace(data: Union[pd.DataFrame, np.ndarray, Iterable]) ->
63
63
  """
64
64
  data = np.array(data, dtype=float)
65
65
  mask = np.isfinite(data)
66
- return np.all(data[mask].flatten() <= 64)
66
+ return bool(np.all(data[mask].flatten() <= 64))
67
67
 
68
68
 
69
69
  def rename_sample_columns(table: pd.DataFrame, mapping: dict[str, str]) -> pd.DataFrame:
@@ -102,7 +102,7 @@ def rename_sample_columns(table: pd.DataFrame, mapping: dict[str, str]) -> pd.Da
102
102
 
103
103
 
104
104
  def rename_mq_reporter_channels(
105
- table: pd.DataFrame, channel_names: Iterable[str]
105
+ table: pd.DataFrame, channel_names: Sequence[str]
106
106
  ) -> None:
107
107
  """Renames reporter channel numbers with sample names.
108
108
 
@@ -157,8 +157,7 @@ def find_columns(
157
157
  Returns:
158
158
  A list of column names.
159
159
  """
160
- matches = [substring in col for col in table.columns]
161
- matched_columns = np.array(table.columns)[matches].tolist()
160
+ matched_columns = [col for col in table.columns if substring in col]
162
161
  if must_be_substring:
163
162
  matched_columns = [col for col in matched_columns if col != substring]
164
163
  return matched_columns
@@ -255,7 +254,7 @@ def remove_rows_by_partial_match(
255
254
 
256
255
 
257
256
  def join_tables(
258
- tables: Iterable[pd.DataFrame], reset_index: bool = False
257
+ tables: Sequence[pd.DataFrame], reset_index: bool = False
259
258
  ) -> pd.DataFrame:
260
259
  """Returns a joined dataframe.
261
260
 
msreport/impute.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, Any
2
+
3
+ from typing import Any, Optional
3
4
 
4
5
  import numpy as np
5
6
  import pandas as pd
@@ -51,7 +52,7 @@ class FixedValueImputer:
51
52
  Returns the fitted FixedValueImputer instance.
52
53
  """
53
54
  if self.strategy == "constant":
54
- fill_values = {column: self.fill_value for column in table.columns}
55
+ fill_values = dict.fromkeys(table.columns, self.fill_value)
55
56
  elif self.strategy == "below":
56
57
  if self.column_wise:
57
58
  fill_values = {}
@@ -59,7 +60,7 @@ class FixedValueImputer:
59
60
  fill_values[column] = _calculate_integer_below_min(table[column])
60
61
  else:
61
62
  int_below_min = _calculate_integer_below_min(table)
62
- fill_values = {column: int_below_min for column in table.columns}
63
+ fill_values = dict.fromkeys(table.columns, int_below_min)
63
64
  self._sample_fill_values = fill_values
64
65
  return self
65
66