msreport 0.0.26__tar.gz → 0.0.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {msreport-0.0.26 → msreport-0.0.28}/PKG-INFO +7 -3
  2. {msreport-0.0.26 → msreport-0.0.28}/README.md +1 -1
  3. {msreport-0.0.26 → msreport-0.0.28}/msreport/__init__.py +4 -6
  4. {msreport-0.0.26 → msreport-0.0.28}/msreport/aggregate/condense.py +1 -1
  5. {msreport-0.0.26 → msreport-0.0.28}/msreport/aggregate/pivot.py +1 -0
  6. {msreport-0.0.26 → msreport-0.0.28}/msreport/aggregate/summarize.py +2 -2
  7. {msreport-0.0.26 → msreport-0.0.28}/msreport/analyze.py +171 -38
  8. {msreport-0.0.26 → msreport-0.0.28}/msreport/errors.py +1 -2
  9. {msreport-0.0.26 → msreport-0.0.28}/msreport/export.py +16 -13
  10. {msreport-0.0.26 → msreport-0.0.28}/msreport/fasta.py +2 -1
  11. {msreport-0.0.26 → msreport-0.0.28}/msreport/helper/__init__.py +7 -7
  12. {msreport-0.0.26 → msreport-0.0.28}/msreport/helper/calc.py +29 -24
  13. {msreport-0.0.26 → msreport-0.0.28}/msreport/helper/maxlfq.py +2 -2
  14. {msreport-0.0.26 → msreport-0.0.28}/msreport/helper/table.py +5 -6
  15. {msreport-0.0.26 → msreport-0.0.28}/msreport/impute.py +7 -8
  16. {msreport-0.0.26 → msreport-0.0.28}/msreport/isobar.py +10 -9
  17. {msreport-0.0.26 → msreport-0.0.28}/msreport/normalize.py +54 -36
  18. {msreport-0.0.26 → msreport-0.0.28}/msreport/peptidoform.py +6 -4
  19. msreport-0.0.28/msreport/plot/__init__.py +41 -0
  20. msreport-0.0.28/msreport/plot/_partial_plots.py +159 -0
  21. msreport-0.0.28/msreport/plot/comparison.py +490 -0
  22. msreport-0.0.28/msreport/plot/distribution.py +253 -0
  23. msreport-0.0.28/msreport/plot/multivariate.py +355 -0
  24. msreport-0.0.28/msreport/plot/quality.py +431 -0
  25. msreport-0.0.28/msreport/plot/style.py +286 -0
  26. msreport-0.0.28/msreport/plot/style_sheets/msreport-notebook.mplstyle +57 -0
  27. msreport-0.0.28/msreport/plot/style_sheets/seaborn-whitegrid.mplstyle +45 -0
  28. {msreport-0.0.26 → msreport-0.0.28}/msreport/qtable.py +109 -17
  29. {msreport-0.0.26 → msreport-0.0.28}/msreport/reader.py +73 -79
  30. {msreport-0.0.26 → msreport-0.0.28}/msreport/rinterface/__init__.py +2 -1
  31. {msreport-0.0.26 → msreport-0.0.28}/msreport/rinterface/limma.py +2 -1
  32. {msreport-0.0.26 → msreport-0.0.28}/msreport/rinterface/rinstaller.py +3 -3
  33. {msreport-0.0.26 → msreport-0.0.28}/msreport.egg-info/PKG-INFO +7 -3
  34. {msreport-0.0.26 → msreport-0.0.28}/msreport.egg-info/SOURCES.txt +10 -1
  35. {msreport-0.0.26 → msreport-0.0.28}/msreport.egg-info/requires.txt +6 -1
  36. {msreport-0.0.26 → msreport-0.0.28}/pyproject.toml +40 -1
  37. {msreport-0.0.26 → msreport-0.0.28}/tests/test_analyze.py +115 -18
  38. {msreport-0.0.26 → msreport-0.0.28}/tests/test_peptidoform.py +2 -1
  39. msreport-0.0.28/tests/test_plot.py +144 -0
  40. {msreport-0.0.26 → msreport-0.0.28}/tests/test_qtable.py +90 -23
  41. msreport-0.0.26/msreport/plot.py +0 -1132
  42. {msreport-0.0.26 → msreport-0.0.28}/LICENSE.txt +0 -0
  43. {msreport-0.0.26 → msreport-0.0.28}/msreport/aggregate/__init__.py +0 -0
  44. {msreport-0.0.26 → msreport-0.0.28}/msreport/helper/temp.py +0 -0
  45. {msreport-0.0.26 → msreport-0.0.28}/msreport/rinterface/rscripts/limma.R +0 -0
  46. {msreport-0.0.26 → msreport-0.0.28}/msreport.egg-info/dependency_links.txt +0 -0
  47. {msreport-0.0.26 → msreport-0.0.28}/msreport.egg-info/top_level.txt +0 -0
  48. {msreport-0.0.26 → msreport-0.0.28}/setup.cfg +0 -0
  49. {msreport-0.0.26 → msreport-0.0.28}/setup.py +0 -0
  50. {msreport-0.0.26 → msreport-0.0.28}/tests/test_export.py +0 -0
  51. {msreport-0.0.26 → msreport-0.0.28}/tests/test_helper.py +0 -0
  52. {msreport-0.0.26 → msreport-0.0.28}/tests/test_impute.py +0 -0
  53. {msreport-0.0.26 → msreport-0.0.28}/tests/test_isobar.py +0 -0
  54. {msreport-0.0.26 → msreport-0.0.28}/tests/test_maxlfq.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: msreport
3
- Version: 0.0.26
3
+ Version: 0.0.28
4
4
  Summary: Post processing and analysis of quantitative proteomics data
5
5
  Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
6
6
  License: Apache-2.0
@@ -19,11 +19,15 @@ Requires-Dist: pandas>=1.4.4
19
19
  Requires-Dist: profasta>=0.0.4
20
20
  Requires-Dist: pyteomics>=4.6.0
21
21
  Requires-Dist: pyyaml>=6.0.0
22
- Requires-Dist: rpy2>=3.5.3
22
+ Requires-Dist: rpy2!=3.5.13,>=3.5.3
23
23
  Requires-Dist: scikit-learn>=1.0.0
24
24
  Requires-Dist: scipy>=1.9.1
25
25
  Requires-Dist: seaborn>=0.12.0
26
26
  Requires-Dist: statsmodels>=0.13.2
27
+ Requires-Dist: typing_extensions>=4
28
+ Provides-Extra: dev
29
+ Requires-Dist: mypy>=1.15.0; extra == "dev"
30
+ Requires-Dist: pytest>=8.3.5; extra == "dev"
27
31
  Dynamic: license-file
28
32
 
29
33
  [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
@@ -117,7 +121,7 @@ command as described above.
117
121
  ### Additional requirements
118
122
 
119
123
  MsReport provides an interface to the R package LIMMA for differential expression
120
- analysis, which requires a local installation of R (R version 3.4 or higher) and the
124
+ analysis, which requires a local installation of R (R version 4.0 or higher) and the
121
125
  system environment variable "R_HOME" to be set to the R home directory. Note that it
122
126
  might be necessary to restart the computer after adding the "R_HOME" variable. The R
123
127
  home directory can also be found from within R by using the command below, and might
@@ -89,7 +89,7 @@ command as described above.
89
89
  ### Additional requirements
90
90
 
91
91
  MsReport provides an interface to the R package LIMMA for differential expression
92
- analysis, which requires a local installation of R (R version 3.4 or higher) and the
92
+ analysis, which requires a local installation of R (R version 4.0 or higher) and the
93
93
  system environment variable "R_HOME" to be set to the R home directory. Note that it
94
94
  might be necessary to restart the computer after adding the "R_HOME" variable. The R
95
95
  home directory can also be found from within R by using the command below, and might
@@ -1,13 +1,11 @@
1
- from msreport.qtable import Qtable
2
- from msreport.reader import MaxQuantReader, FragPipeReader, SpectronautReader
3
-
4
- from msreport.fasta import import_protein_database
5
-
6
1
  import msreport.analyze
7
2
  import msreport.export
8
3
  import msreport.impute
9
4
  import msreport.normalize
10
5
  import msreport.plot
11
6
  import msreport.reader
7
+ from msreport.fasta import import_protein_database
8
+ from msreport.qtable import Qtable
9
+ from msreport.reader import FragPipeReader, MaxQuantReader, SpectronautReader
12
10
 
13
- __version__ = "0.0.26"
11
+ __version__ = "0.0.28"
@@ -71,7 +71,7 @@ def maximum_per_column(array: np.ndarray) -> np.ndarray:
71
71
  return np.array([maximum(i) for i in array.transpose()])
72
72
 
73
73
 
74
- def minimum(array: np.ndarray) -> int:
74
+ def minimum(array: np.ndarray) -> float:
75
75
  """Returns the lowest finite value from one or multiple columns."""
76
76
  array = array.flatten()
77
77
  if np.isfinite(array).any():
@@ -1,6 +1,7 @@
1
1
  from typing import Iterable, Union
2
2
 
3
3
  import pandas as pd
4
+
4
5
  import msreport.aggregate.condense as CONDENSE
5
6
  import msreport.helper
6
7
 
@@ -218,7 +218,7 @@ def aggregate_unique_groups(
218
218
  columns_to_aggregate: Union[str, Iterable],
219
219
  condenser: Callable,
220
220
  is_sorted: bool,
221
- ) -> (np.ndarray, np.ndarray):
221
+ ) -> tuple[np.ndarray, np.ndarray]:
222
222
  """Aggregates column(s) by applying a condenser function to unique groups.
223
223
 
224
224
  The function returns two arrays containing the aggregated values and the
@@ -256,7 +256,7 @@ def aggregate_unique_groups(
256
256
 
257
257
  def _prepare_grouping_indices(
258
258
  table: pd.DataFrame, group_by: str, is_sorted: bool
259
- ) -> (np.ndarray, np.ndarray, pd.DataFrame):
259
+ ) -> tuple[np.ndarray, np.ndarray, pd.DataFrame]:
260
260
  """Prepares start indices and names of unique groups from a sorted dataframe.
261
261
 
262
262
  Args:
@@ -1,14 +1,16 @@
1
- """ The analyze module contains methods for analysing quantification results. """
1
+ """The analyze module contains methods for analysing quantification results."""
2
2
 
3
3
  from __future__ import annotations
4
- from typing import Iterable, Optional, Protocol
4
+
5
5
  import warnings
6
+ from typing import Iterable, Optional, Protocol, Sequence
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
 
10
11
  import msreport.normalize
11
12
  import msreport.rinterface
13
+ from msreport.helper import find_sample_columns
12
14
  from msreport.qtable import Qtable
13
15
 
14
16
 
@@ -33,8 +35,8 @@ class CategoryTransformer(Protocol):
33
35
  def transform(self, table: pd.DataFrame) -> pd.DataFrame:
34
36
  """Transform values in 'table'."""
35
37
 
36
- def get_category_column(self, table: pd.DataFrame) -> pd.DataFrame:
37
- """Returns the specified category column."""
38
+ def get_category_column(self) -> str:
39
+ """Returns the name of the category column."""
38
40
 
39
41
 
40
42
  def analyze_missingness(qtable: Qtable) -> None:
@@ -75,6 +77,7 @@ def analyze_missingness(qtable: Qtable) -> None:
75
77
  def validate_proteins(
76
78
  qtable: Qtable,
77
79
  min_peptides: int = 0,
80
+ min_spectral_counts: int = 0,
78
81
  remove_contaminants: bool = True,
79
82
  min_events: Optional[int] = None,
80
83
  max_missing: Optional[int] = None,
@@ -84,12 +87,13 @@ def validate_proteins(
84
87
  Adds an additional column "Valid" to the qtable, containing Boolean values.
85
88
 
86
89
  Requires expression columns to be set. Depending on the arguments requires the
87
- columns "Total peptides", "Potential contaminant", and the experiment columns
88
- "Missing experiment_name" and "Events experiment_name".
90
+ columns "Total peptides", "Spectral count Combined", "Potential contaminant", and
91
+ the experiment columns "Missing experiment_name" and "Events experiment_name".
89
92
 
90
93
  Args:
91
94
  qtable: A Qtable instance.
92
95
  min_peptides: Minimum number of unique peptides, default 0.
96
+ min_spectral_counts: Minimum number of combined spectral counts, default 0.
93
97
  remove_contaminants: If true, the "Potential contaminant" column is used to
94
98
  remove invalid entries, default True. If no "Potential contaminant" column
95
99
  is present 'remove_contaminants' is ignored.
@@ -107,6 +111,16 @@ def validate_proteins(
107
111
  [valid_entries, qtable["Total peptides"] >= min_peptides], axis=0
108
112
  )
109
113
 
114
+ if min_spectral_counts > 0:
115
+ if "Spectral count Combined" not in qtable:
116
+ raise KeyError(
117
+ "'Spectral count Combined' column not present in qtable.data"
118
+ )
119
+ valid_entries = np.all(
120
+ [valid_entries, qtable["Spectral count Combined"] >= min_spectral_counts],
121
+ axis=0,
122
+ )
123
+
110
124
  # TODO: not tested from here #
111
125
  if remove_contaminants:
112
126
  if "Potential contaminant" not in qtable:
@@ -138,6 +152,50 @@ def validate_proteins(
138
152
  qtable["Valid"] = valid_entries
139
153
 
140
154
 
155
+ def apply_transformer(
156
+ qtable: msreport.Qtable,
157
+ transformer: Transformer,
158
+ tag: str,
159
+ exclude_invalid: bool,
160
+ remove_invalid: bool,
161
+ new_tag: Optional[str] = None,
162
+ ) -> None:
163
+ """Applies a transformer to the values of a Qtable selected with the tag parameter.
164
+
165
+ Args:
166
+ qtable: A Qtable instance, to which the transformer is applied.
167
+ transformer: The transformer to apply.
168
+ tag: The tag used to identify the columns for applying the transformer.
169
+ exclude_invalid: Exclude invalid values from the transformation.
170
+ remove_invalid: Remove invalid values from the table after the transformation.
171
+ new_tag: Optional, if specified than the tag is replaced with this value in the
172
+ column names and the transformed data is stored to these new columns.
173
+ """
174
+ valid = qtable.data["Valid"]
175
+ samples = qtable.get_samples()
176
+ sample_columns = find_sample_columns(qtable.data, tag, samples)
177
+
178
+ if not sample_columns:
179
+ raise ValueError(f"No sample columns found for tag '{tag}'.")
180
+
181
+ if new_tag is not None:
182
+ sample_columns = [c.replace(tag, new_tag) for c in sample_columns]
183
+ column_mapping = dict(zip(samples, sample_columns))
184
+
185
+ data_table = qtable.make_sample_table(tag, samples_as_columns=True)
186
+
187
+ if exclude_invalid:
188
+ data_table[valid] = transformer.transform(data_table[valid])
189
+ else:
190
+ data_table = transformer.transform(data_table)
191
+
192
+ if remove_invalid:
193
+ data_table[~valid] = np.nan
194
+
195
+ data_table.columns = [column_mapping[s] for s in data_table.columns]
196
+ qtable.data[data_table.columns] = data_table
197
+
198
+
141
199
  def normalize_expression(
142
200
  qtable: Qtable,
143
201
  normalizer: Transformer,
@@ -168,11 +226,9 @@ def normalize_expression(
168
226
  raw_data = table[sample_columns]
169
227
  if not normalizer.is_fitted():
170
228
  if exclude_invalid:
171
- valid_mask = table["Valid"]
229
+ normalizer.fit(raw_data[table["Valid"]])
172
230
  else:
173
- valid_mask = np.ones_like(table["Valid"], dtype=bool)
174
- fit_data = raw_data[valid_mask]
175
- normalizer = normalizer.fit(fit_data)
231
+ normalizer = normalizer.fit(raw_data)
176
232
 
177
233
  transformed_data = normalizer.transform(raw_data)
178
234
  qtable[expression_columns] = transformed_data[sample_columns]
@@ -180,7 +236,7 @@ def normalize_expression(
180
236
 
181
237
  def create_site_to_protein_normalizer(
182
238
  qtable: Qtable, category_column: str = "Representative protein"
183
- ) -> msreport.normalizer.CategoricalNormalizer:
239
+ ) -> msreport.normalize.CategoricalNormalizer:
184
240
  """Creates a fitted `CategoricalNormalizer` for site-to-protein normalization.
185
241
 
186
242
  The `CategoricalNormalizer` is fitted to protein expression profiles of the provided
@@ -200,8 +256,8 @@ def create_site_to_protein_normalizer(
200
256
  samples_as_columns=True,
201
257
  features=[category_column],
202
258
  )
203
- completely_quantified = (
204
- ~reference_expression[qtable.get_samples()].isna().any(axis=1)
259
+ completely_quantified = ~reference_expression[qtable.get_samples()].isna().any(
260
+ axis=1
205
261
  )
206
262
  reference_expression = reference_expression[completely_quantified]
207
263
 
@@ -221,7 +277,7 @@ def create_ibaq_transformer(
221
277
  qtable: Qtable,
222
278
  category_column: str = "Representative protein",
223
279
  ibaq_column: str = "iBAQ peptides",
224
- ) -> msreport.normalizer.CategoricalNormalizer:
280
+ ) -> msreport.normalize.CategoricalNormalizer:
225
281
  """Creates a fitted `CategoricalNormalizer` for iBAQ transformation.
226
282
 
227
283
  The `CategoricalNormalizer` is fitted to iBAQ peptide counts of the provided
@@ -247,7 +303,7 @@ def create_ibaq_transformer(
247
303
  ibaq_factor_values[ibaq_factor_values < 1] = 1
248
304
  ibaq_factor_values = np.log2(ibaq_factor_values)
249
305
 
250
- reference_table = pd.DataFrame({c: ibaq_factor_values for c in sample_columns})
306
+ reference_table = pd.DataFrame(dict.fromkeys(sample_columns, ibaq_factor_values))
251
307
  reference_table[category_column] = category_values
252
308
 
253
309
  normalizer = msreport.normalize.CategoricalNormalizer(category_column)
@@ -368,7 +424,15 @@ def calculate_multi_group_comparison(
368
424
  correspond to entries from qtable.design["Experiment"].
369
425
  exclude_invalid: If true, the column "Valid" is used to determine which rows are
370
426
  used for calculating the group comparisons; default True.
427
+
428
+ Raises:
429
+ ValueError: If 'experiment_pairs' contains invalid entries. Each experiment pair
430
+ must have exactly two entries and the two entries must not be the same. All
431
+ experiments must be present in qtable.design. No duplicate experiment pairs
432
+ are allowed.
371
433
  """
434
+ _validate_experiment_pairs(qtable, experiment_pairs)
435
+
372
436
  table = qtable.make_expression_table(samples_as_columns=True, features=["Valid"])
373
437
  comparison_tag = " vs "
374
438
 
@@ -421,7 +485,7 @@ def two_group_comparison(
421
485
 
422
486
  def calculate_multi_group_limma(
423
487
  qtable: Qtable,
424
- experiment_pairs: list[list[str, str]],
488
+ experiment_pairs: Sequence[Iterable[str]],
425
489
  exclude_invalid: bool = True,
426
490
  batch: bool = False,
427
491
  limma_trend: bool = True,
@@ -437,8 +501,7 @@ def calculate_multi_group_limma(
437
501
 
438
502
  Requires that expression columns are set, and expression values are log2 transformed
439
503
  All rows with missing values are ignored, impute missing values to allow
440
- differential expression analysis of all rows. The qtable.data column
441
- "Representative protein" is used as the index.
504
+ differential expression analysis of all rows.
442
505
 
443
506
  Args:
444
507
  qtable: Qtable instance that contains expression values for differential
@@ -454,7 +517,19 @@ def calculate_multi_group_limma(
454
517
  limma_trend: If true, an intensity-dependent trend is fitted to the prior
455
518
  variance during calculation of the moderated t-statistics, refer to
456
519
  limma.eBayes for details; default True.
520
+
521
+ Raises:
522
+ ValueError: If 'experiment_pairs' contains invalid entries. Each experiment pair
523
+ must have exactly two entries and the two entries must not be the same. All
524
+ experiments must be present in qtable.design. No duplicate experiment pairs
525
+ are allowed.
526
+ KeyError: If the "Batch" column is not present in the qtable.design when
527
+ 'batch' is set to True.
528
+ ValueError: If all values from qtable.design["Batch"] are identical when 'batch'
529
+ is set to True.
457
530
  """
531
+ _validate_experiment_pairs(qtable, experiment_pairs)
532
+
458
533
  # TODO: not tested #
459
534
  if batch and "Batch" not in qtable.get_design():
460
535
  raise KeyError(
@@ -468,10 +543,8 @@ def calculate_multi_group_limma(
468
543
  )
469
544
 
470
545
  design = qtable.get_design()
471
- table = qtable.make_expression_table(
472
- samples_as_columns=True, features=["Representative protein"]
473
- )
474
- table = table.set_index("Representative protein")
546
+ table = qtable.make_expression_table(samples_as_columns=True)
547
+ table.index = table.index.astype(str) # It appears that a string is required for R
475
548
  comparison_tag = " vs "
476
549
 
477
550
  if exclude_invalid:
@@ -487,7 +560,7 @@ def calculate_multi_group_limma(
487
560
  experiment_to_r[experiment] = f".EXPERIMENT__{i:04d}"
488
561
  r_to_experiment = {v: k for k, v in experiment_to_r.items()}
489
562
 
490
- r_experiment_pairs = []
563
+ r_experiment_pairs: list[str] = []
491
564
  for exp1, exp2 in experiment_pairs:
492
565
  r_experiment_pairs.append(f"{experiment_to_r[exp1]}-{experiment_to_r[exp2]}")
493
566
 
@@ -504,7 +577,7 @@ def calculate_multi_group_limma(
504
577
  limma_result.rename(columns=mapping, inplace=True)
505
578
 
506
579
  limma_table = pd.DataFrame(index=table.index)
507
- limma_table = limma_table.join(limma_results.values())
580
+ limma_table = limma_table.join(list(limma_results.values()))
508
581
  limma_table.fillna(np.nan, inplace=True)
509
582
  qtable.add_expression_features(limma_table)
510
583
 
@@ -516,7 +589,7 @@ def calculate_multi_group_limma(
516
589
 
517
590
  def calculate_two_group_limma(
518
591
  qtable: Qtable,
519
- experiment_pair: list[str, str],
592
+ experiment_pair: Sequence[str],
520
593
  exclude_invalid: bool = True,
521
594
  limma_trend: bool = True,
522
595
  ) -> None:
@@ -529,8 +602,7 @@ def calculate_two_group_limma(
529
602
 
530
603
  Requires that expression columns are set, and expression values are log2
531
604
  transformed. All rows with missing values are ignored, impute missing values to
532
- allow differential expression analysis of all rows. The qtable.data
533
- column "Representative protein" is used as the index.
605
+ allow differential expression analysis of all rows.
534
606
 
535
607
  Args:
536
608
  qtable: Qtable instance that contains expression values for differential
@@ -541,27 +613,30 @@ def calculate_two_group_limma(
541
613
  used for the differential expression analysis; default True.
542
614
  limma_trend: If true, an intensity-dependent trend is fitted to the prior
543
615
  variances; default True.
616
+ Raises:
617
+ ValueError: If 'experiment_pair' contains invalid entries. The experiment pair
618
+ must have exactly two entries and the two entries must not be the same. Both
619
+ experiments must be present in qtable.design.
544
620
  """
545
- # TODO: not tested #
546
- expression_table = qtable.make_expression_table(
547
- samples_as_columns=True, features=["Representative protein"]
548
- )
621
+ _validate_experiment_pair(qtable, experiment_pair)
622
+
623
+ # TODO: LIMMA function not tested #
624
+ table = qtable.make_expression_table(samples_as_columns=True)
549
625
  comparison_tag = " vs "
550
626
 
551
627
  if exclude_invalid:
552
628
  valid = qtable["Valid"]
553
629
  else:
554
- valid = np.full(expression_table.shape[0], True)
630
+ valid = np.full(table.shape[0], True)
555
631
 
556
632
  samples_to_experiment = {}
557
633
  for experiment in experiment_pair:
558
- mapping = {s: experiment for s in qtable.get_samples(experiment)}
634
+ mapping = dict.fromkeys(qtable.get_samples(experiment), experiment)
559
635
  samples_to_experiment.update(mapping)
560
636
 
561
- table_columns = ["Representative protein"]
562
- table_columns.extend(samples_to_experiment.keys())
563
- table = expression_table[table_columns]
564
- table = table.set_index("Representative protein")
637
+ # Keep only samples that are present in the 'experiment_pair'
638
+ table = table[samples_to_experiment.keys()]
639
+ table.index = table.index.astype(str) # It appears that a string is required for R
565
640
  not_nan = table.isna().sum(axis=1) == 0
566
641
 
567
642
  mask = np.all([valid, not_nan], axis=0)
@@ -583,4 +658,62 @@ def calculate_two_group_limma(
583
658
  limma_table.rename(columns=mapping, inplace=True)
584
659
  qtable.add_expression_features(limma_table)
585
660
 
586
- return limma_result
661
+
662
+ def _validate_experiment_pairs(
663
+ qtable: Qtable, exp_pairs: Iterable[Iterable[str]]
664
+ ) -> None:
665
+ """Validates that experiment pairs are valid and raises an error if not.
666
+
667
+ - All 'exp_pairs' entries must have a length of exactly 2.
668
+ - All experiments must be present in the qtable.design.
669
+ - No duplicate experiments are allowed in a pair.
670
+ - No duplicate experiment pairs are allowed.
671
+
672
+ Args:
673
+ qtable: Qtable instance containing experiment data.
674
+ exp_pairs: Iterable of experiment pairs to validate.
675
+
676
+ Raises:
677
+ ValueError: If any of the validation checks fail.
678
+ """
679
+ all_experiments = {exp for pair in exp_pairs for exp in pair}
680
+ missing_experiments = all_experiments - set(qtable.get_experiments())
681
+ if missing_experiments:
682
+ raise ValueError(
683
+ f"Experiments '{missing_experiments}' not found in qtable.design."
684
+ )
685
+ for experiment_pair in exp_pairs:
686
+ _validate_experiment_pair(qtable, experiment_pair)
687
+
688
+ if len(list(exp_pairs)) != len({tuple(pair) for pair in exp_pairs}):
689
+ raise ValueError(
690
+ f"Some experiment pairs in {exp_pairs} have been specified multiple "
691
+ "times. Each pair must occur only once."
692
+ )
693
+
694
+
695
+ def _validate_experiment_pair(qtable: Qtable, exp_pair: Iterable[str]) -> None:
696
+ """Validates the experiment pair is valid and raises an error if not.
697
+
698
+ - The experiment pair must contain exactly two entries
699
+ - The two entries of the experiment pair must be different.
700
+ - Both experiments must be present in the qtable.design.
701
+
702
+ Args:
703
+ qtable: Qtable instance containing experiment data.
704
+ experiment_pairs: Iterable of experiment pairs to validate.
705
+
706
+ Raises:
707
+ ValueError: If any of the validation checks fail.
708
+ """
709
+ if len(list(exp_pair)) != 2:
710
+ raise ValueError(
711
+ f"Experiment pair '{exp_pair}' contains more than two entries."
712
+ )
713
+ if len(list(exp_pair)) != len(set(exp_pair)):
714
+ raise ValueError(f"Experiment pair '{exp_pair}' contains the same entry twice.")
715
+ if set(exp_pair) - set(qtable.get_experiments()):
716
+ raise ValueError(
717
+ f"Experiments '{set(exp_pair) - set(qtable.get_experiments())}' "
718
+ "not found in qtable.design."
719
+ )
@@ -1,5 +1,4 @@
1
- class MsreportError(Exception):
2
- ...
1
+ class MsreportError(Exception): ...
3
2
 
4
3
 
5
4
  class NotFittedError(ValueError, AttributeError):
@@ -13,10 +13,10 @@ Index([
13
13
  ], dtype='object')
14
14
  """
15
15
 
16
- from collections import defaultdict as ddict
17
16
  import os
18
- from typing import Iterable, Optional, Protocol
19
17
  import warnings
18
+ from collections import defaultdict as ddict
19
+ from typing import Iterable, Optional, Protocol, Sequence
20
20
 
21
21
  import numpy as np
22
22
  import pandas as pd
@@ -88,7 +88,7 @@ def contaminants_to_clipboard(qtable: Qtable) -> None:
88
88
 
89
89
  for column_tag in column_tags:
90
90
  columns.extend(helper.find_sample_columns(data, column_tag, samples))
91
- columns = np.array(columns)[[c in data.columns for c in columns]]
91
+ columns = [c for c in columns if c in data.columns]
92
92
 
93
93
  contaminants = qtable["Potential contaminant"]
94
94
  data = data.loc[contaminants, columns]
@@ -135,10 +135,10 @@ def to_perseus_matrix(
135
135
  numeric_columns = set(numeric_columns).difference(expression_columns)
136
136
  numeric_columns = set(numeric_columns).difference(categorical_columns)
137
137
 
138
- column_categories = ddict(lambda: default_category)
139
- column_categories.update({c: "N" for c in numeric_columns})
140
- column_categories.update({c: "C" for c in categorical_columns})
141
- column_categories.update({c: "E" for c in expression_columns})
138
+ column_categories: ddict[str, str] = ddict(lambda: default_category)
139
+ column_categories.update(dict.fromkeys(numeric_columns, "N"))
140
+ column_categories.update(dict.fromkeys(categorical_columns, "C"))
141
+ column_categories.update(dict.fromkeys(expression_columns, "E"))
142
142
 
143
143
  column_annotation = [column_categories[column] for column in table.columns]
144
144
  column_annotation[0] = f"{annotation_row_prefix}{column_annotation[0]}"
@@ -219,6 +219,7 @@ def write_html_coverage_map(
219
219
  "change in a future release."
220
220
  ),
221
221
  FutureWarning,
222
+ stacklevel=2,
222
223
  )
223
224
  # Get protein information from the protein database
224
225
  protein_entry = protein_db[protein_id]
@@ -314,8 +315,8 @@ def _amica_table_from(qtable: Qtable) -> pd.DataFrame:
314
315
  sample_columns = helper.find_sample_columns(
315
316
  amica_table, tag, qtable.get_samples()
316
317
  )
317
- non_sample_columns = set(columns).difference(set(sample_columns))
318
- amica_table.drop(non_sample_columns, inplace=True, axis=1)
318
+ non_sample_columns = list(set(columns).difference(set(sample_columns)))
319
+ amica_table.drop(columns=non_sample_columns, inplace=True, axis=1)
319
320
 
320
321
  # Log transform columns if necessary
321
322
  for tag in intensity_column_tags:
@@ -437,7 +438,7 @@ def _generate_html_sequence_map(
437
438
  highlights = highlights if highlights is not None else {}
438
439
  sequence_length = len(sequence)
439
440
 
440
- def write_row_index(pos: int, strings: list) -> str:
441
+ def write_row_index(pos: int, strings: list):
441
442
  ndigits = len(str(sequence_length))
442
443
  row_index = str(pos + 1).rjust(ndigits)
443
444
  html_entry = '<FONT COLOR="#000000">' + row_index + " " + "</FONT>"
@@ -457,7 +458,7 @@ def _generate_html_sequence_map(
457
458
 
458
459
  in_covered_region: bool = False
459
460
  strings = []
460
- strings.append(f'<FONT COLOR="#606060">') # Set default text color to grey
461
+ strings.append('<FONT COLOR="#606060">') # Set default text color to grey
461
462
  write_row_index(0, strings)
462
463
  for pos, character in enumerate(sequence):
463
464
  if pos in coverage_start_idx:
@@ -483,13 +484,15 @@ def _generate_html_sequence_map(
483
484
  if pos in coverage_stop_idx:
484
485
  in_covered_region = False
485
486
  close_coverage_region(strings)
486
- strings.append(f"</FONT>")
487
+ strings.append("</FONT>")
487
488
 
488
489
  html_sequence_block = "".join(strings)
489
490
  return html_sequence_block
490
491
 
491
492
 
492
- def _find_covered_region_boundaries(coverage_mask: Iterable[bool]) -> list[tuple[int]]:
493
+ def _find_covered_region_boundaries(
494
+ coverage_mask: Sequence[bool],
495
+ ) -> list[tuple[int, int]]:
493
496
  """Returns a list of boundaries from continuously covered regions in a protein.
494
497
 
495
498
  Args:
@@ -1,7 +1,6 @@
1
1
  import pathlib
2
2
  from typing import Iterable, Union
3
3
 
4
-
5
4
  from profasta.db import ProteinDatabase
6
5
 
7
6
 
@@ -24,5 +23,7 @@ def import_protein_database(
24
23
  database = ProteinDatabase()
25
24
  paths = [fasta_path] if isinstance(fasta_path, (str, pathlib.Path)) else fasta_path
26
25
  for path in paths:
26
+ if isinstance(path, pathlib.Path):
27
+ path = path.as_posix()
27
28
  database.add_fasta(path, header_parser=header_parser, overwrite=True)
28
29
  return database
@@ -1,21 +1,21 @@
1
1
  from .calc import (
2
- mode,
2
+ calculate_monoisotopic_mass,
3
+ calculate_sequence_coverage,
3
4
  calculate_tryptic_ibaq_peptides,
4
5
  make_coverage_mask,
5
- calculate_sequence_coverage,
6
- calculate_monoisotopic_mass,
6
+ mode,
7
7
  )
8
8
  from .table import (
9
9
  apply_intensity_cutoff,
10
- guess_design,
11
- intensities_in_logspace,
12
10
  find_columns,
13
11
  find_sample_columns,
12
+ guess_design,
13
+ intensities_in_logspace,
14
+ join_tables,
14
15
  keep_rows_by_partial_match,
15
16
  remove_rows_by_partial_match,
16
- join_tables,
17
- rename_sample_columns,
18
17
  rename_mq_reporter_channels,
18
+ rename_sample_columns,
19
19
  )
20
20
  from .temp import (
21
21
  extract_modifications,