ms2rescore 3.1.0.dev7__tar.gz → 3.1.0.dev8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/PKG-INFO +1 -1
  2. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/__init__.py +1 -1
  3. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/core.py +67 -39
  4. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/config_default.json +2 -0
  5. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/config_schema.json +12 -0
  6. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/parse_psms.py +60 -65
  7. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/charts.py +22 -14
  8. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/rescoring_engines/mokapot.py +4 -1
  9. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/rescoring_engines/percolator.py +2 -0
  10. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/LICENSE +0 -0
  11. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/README.md +0 -0
  12. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/__main__.py +0 -0
  13. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/config_parser.py +0 -0
  14. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/exceptions.py +0 -0
  15. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/__init__.py +0 -0
  16. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/base.py +0 -0
  17. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/basic.py +0 -0
  18. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/deeplc.py +0 -0
  19. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/im2deep.py +0 -0
  20. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/ionmob.py +0 -0
  21. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/maxquant.py +0 -0
  22. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/ms2pip.py +0 -0
  23. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/gui/__init__.py +0 -0
  24. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/gui/__main__.py +0 -0
  25. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/gui/app.py +0 -0
  26. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/gui/function2ctk.py +0 -0
  27. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/gui/widgets.py +0 -0
  28. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/__init__.py +0 -0
  29. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/config_default_tims.json +0 -0
  30. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/__init__.py +0 -0
  31. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/config_icon.png +0 -0
  32. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/github-mark-white.png +0 -0
  33. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/github-mark.png +0 -0
  34. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/ms2rescore_logo.png +0 -0
  35. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/program_icon.ico +0 -0
  36. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/ms2rescore-gui-theme.json +0 -0
  37. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/parse_spectra.py +0 -0
  38. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/__init__.py +0 -0
  39. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/__main__.py +0 -0
  40. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/generate.py +0 -0
  41. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/__init__.py +0 -0
  42. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/about.html +0 -0
  43. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/base.html +0 -0
  44. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/config.html +0 -0
  45. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/features.html +0 -0
  46. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/log.html +0 -0
  47. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/metadata.html +0 -0
  48. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/overview.html +0 -0
  49. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/stats-card.html +0 -0
  50. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/style.html +0 -0
  51. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/target-decoy.html +0 -0
  52. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/texts.toml +0 -0
  53. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/report/utils.py +0 -0
  54. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/rescoring_engines/__init__.py +0 -0
  55. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/ms2rescore/utils.py +0 -0
  56. {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev8}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ms2rescore
3
- Version: 3.1.0.dev7
3
+ Version: 3.1.0.dev8
4
4
  Summary: MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and retention times.
5
5
  Keywords: MS2Rescore,MS2PIP,DeepLC,Percolator,proteomics,mass spectrometry,peptide identification,rescoring,machine learning
6
6
  Author: Ana Sílvia C. Silva, Robbin Bouwmeester, Louise Buur
@@ -1,6 +1,6 @@
1
1
  """MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and RTs."""
2
2
 
3
- __version__ = "3.1.0-dev7"
3
+ __version__ = "3.1.0-dev8"
4
4
 
5
5
  from warnings import filterwarnings
6
6
 
@@ -47,7 +47,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
47
47
  psm_list = parse_psms(config, psm_list)
48
48
 
49
49
  # Log #PSMs identified before rescoring
50
- id_psms_before = _log_id_psms_before(psm_list)
50
+ id_psms_before = _log_id_psms_before(psm_list, max_rank=config["max_psm_rank_output"])
51
51
 
52
52
  # Define feature names; get existing feature names from PSM file
53
53
  feature_names = dict()
@@ -62,7 +62,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
62
62
  )
63
63
 
64
64
  # Add missing precursor info from spectrum file if needed
65
- _fill_missing_precursor_info(psm_list, config)
65
+ psm_list = _fill_missing_precursor_info(psm_list, config)
66
66
 
67
67
  # Add rescoring features
68
68
  for fgen_name, fgen_config in config["feature_generators"].items():
@@ -145,22 +145,26 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
145
145
  **config["rescoring_engine"]["mokapot"],
146
146
  )
147
147
  except exceptions.RescoringError as e:
148
- logger.exception(e)
149
- rescoring_succeeded = False
150
- else:
151
- rescoring_succeeded = True
152
- _log_id_psms_after(psm_list, id_psms_before)
148
+ # Write output
149
+ logger.info(f"Writing intermediary output to {output_file_root}.psms.tsv...")
150
+ psm_utils.io.write_file(psm_list, output_file_root + ".psms.tsv", filetype="tsv")
151
+
152
+ # Reraise exception
153
+ raise e
153
154
 
154
- # Workaround for broken PEP calculation if best PSM is decoy
155
+ # Post-rescoring processing
155
156
  if all(psm_list["pep"] == 1.0):
156
157
  psm_list = _fix_constant_pep(psm_list)
158
+ psm_list = _filter_by_rank(psm_list, config["max_psm_rank_output"], False)
159
+ psm_list = _calculate_confidence(psm_list)
160
+ _ = _log_id_psms_after(psm_list, id_psms_before, max_rank=config["max_psm_rank_output"])
157
161
 
158
162
  # Write output
159
163
  logger.info(f"Writing output to {output_file_root}.psms.tsv...")
160
164
  psm_utils.io.write_file(psm_list, output_file_root + ".psms.tsv", filetype="tsv")
161
165
 
162
166
  # Write report
163
- if config["write_report"] and rescoring_succeeded:
167
+ if config["write_report"]:
164
168
  try:
165
169
  generate.generate_report(
166
170
  output_file_root, psm_list=psm_list, feature_names=feature_names, use_txt_log=True
@@ -169,7 +173,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
169
173
  logger.exception(e)
170
174
 
171
175
 
172
- def _fill_missing_precursor_info(psm_list, config):
176
+ def _fill_missing_precursor_info(psm_list: PSMList, config: Dict) -> PSMList:
173
177
  """Fill missing precursor info from spectrum file if needed."""
174
178
  # Check if required
175
179
  # TODO: avoid hard coding feature generators in some way
@@ -211,6 +215,16 @@ def _fill_missing_precursor_info(psm_list, config):
211
215
  [v is not None and not np.isnan(v) for v in psm_list[value_name]]
212
216
  ]
213
217
 
218
+ return psm_list
219
+
220
+
221
+ def _filter_by_rank(psm_list: PSMList, max_rank: int, lower_score_better: bool) -> PSMList:
222
+ """Filter PSMs by rank."""
223
+ psm_list.set_ranks(lower_score_better=lower_score_better)
224
+ rank_filter = psm_list["rank"] <= max_rank
225
+ logger.info(f"Removed {sum(~rank_filter)} PSMs with rank >= {max_rank}.")
226
+ return psm_list[rank_filter]
227
+
214
228
 
215
229
  def _write_feature_names(feature_names, output_file_root):
216
230
  """Write feature names to file."""
@@ -221,31 +235,39 @@ def _write_feature_names(feature_names, output_file_root):
221
235
  f.write(f"{fgen}\t{feature}\n")
222
236
 
223
237
 
224
- def _log_id_psms_before(psm_list):
238
+ def _log_id_psms_before(psm_list: PSMList, fdr: float = 0.01, max_rank: int = 1) -> int:
225
239
  """Log #PSMs identified before rescoring."""
226
240
  id_psms_before = (
227
- (psm_list["qvalue"] <= 0.01) & (psm_list["is_decoy"] == False) # noqa: E712
241
+ (psm_list["qvalue"] <= 0.01) & (psm_list["rank"] <= max_rank) & (~psm_list["is_decoy"])
228
242
  ).sum()
229
- logger.info("Found %i identified PSMs at 1%% FDR before rescoring.", id_psms_before)
243
+ logger.info(
244
+ f"Found {id_psms_before} identified PSMs with rank <= {max_rank} at {fdr} FDR before "
245
+ "rescoring."
246
+ )
230
247
  return id_psms_before
231
248
 
232
249
 
233
- def _log_id_psms_after(psm_list, id_psms_before):
250
+ def _log_id_psms_after(
251
+ psm_list: PSMList, id_psms_before: int, fdr: float = 0.01, max_rank: int = 1
252
+ ) -> int:
234
253
  """Log #PSMs identified after rescoring."""
235
254
  id_psms_after = (
236
- (psm_list["qvalue"] <= 0.01) & (psm_list["is_decoy"] == False) # noqa: E712
255
+ (psm_list["qvalue"] <= 0.01) & (psm_list["rank"] <= max_rank) & (~psm_list["is_decoy"])
237
256
  ).sum()
238
257
  diff = id_psms_after - id_psms_before
239
258
  diff_perc = diff / id_psms_before if id_psms_before > 0 else None
240
259
 
241
260
  diff_numbers = f"{diff} ({diff_perc:.2%})" if diff_perc is not None else str(diff)
242
261
  diff_word = "more" if diff > 0 else "less"
243
- logger.info(f"Identified {diff_numbers} {diff_word} PSMs at 1% FDR after rescoring.")
262
+ logger.info(
263
+ f"Identified {diff_numbers} {diff_word} PSMs with rank <= {max_rank} at {fdr} FDR after "
264
+ "rescoring."
265
+ )
244
266
 
245
267
  return id_psms_after
246
268
 
247
269
 
248
- def _fix_constant_pep(psm_list):
270
+ def _fix_constant_pep(psm_list: PSMList) -> PSMList:
249
271
  """Workaround for broken PEP calculation if best PSM is decoy."""
250
272
  logger.warning(
251
273
  "Attempting to fix constant PEP values by removing decoy PSMs that score higher than the "
@@ -257,30 +279,36 @@ def _fix_constant_pep(psm_list):
257
279
  if not higher_scoring_decoys.any():
258
280
  logger.warning("No decoys scoring higher than the best target found. Skipping fix.")
259
281
  else:
260
- logger.warning(f"Removing {higher_scoring_decoys.sum()} decoy PSMs.")
261
-
262
282
  psm_list = psm_list[~higher_scoring_decoys]
283
+ logger.warning(f"Removed {higher_scoring_decoys.sum()} decoy PSMs.")
263
284
 
264
- # Minimal conversion to LinearPsmDataset
265
- psm_df = psm_list.to_dataframe()
266
- psm_df = psm_df.reset_index(drop=True).reset_index()
267
- psm_df["peptide"] = (
268
- psm_df["peptidoform"].astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True)
269
- )
270
- psm_df["is_target"] = ~psm_df["is_decoy"]
271
- lin_psm_data = LinearPsmDataset(
272
- psms=psm_df[["index", "peptide", "score", "is_target"]],
273
- target_column="is_target",
274
- spectrum_columns="index", # Use artificial index to allow multi-rank rescoring
275
- peptide_column="peptide",
276
- feature_columns=["score"],
277
- )
285
+ return psm_list
286
+
287
+
288
+ def _calculate_confidence(psm_list: PSMList) -> PSMList:
289
+ """
290
+ Calculate scores, q-values, and PEPs for PSMs and peptides and add them to PSMList.
291
+ """
292
+ # Minimal conversion to LinearPsmDataset
293
+ psm_df = psm_list.to_dataframe()
294
+ psm_df = psm_df.reset_index(drop=True).reset_index()
295
+ psm_df["peptide"] = (
296
+ psm_df["peptidoform"].astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True)
297
+ )
298
+ psm_df["is_target"] = ~psm_df["is_decoy"]
299
+ lin_psm_data = LinearPsmDataset(
300
+ psms=psm_df[["index", "peptide", "score", "is_target"]],
301
+ target_column="is_target",
302
+ spectrum_columns="index", # Use artificial index to allow multi-rank rescoring
303
+ peptide_column="peptide",
304
+ feature_columns=["score"],
305
+ )
278
306
 
279
- # Recalculate confidence
280
- new_confidence = lin_psm_data.assign_confidence()
307
+ # Recalculate confidence
308
+ new_confidence = lin_psm_data.assign_confidence()
281
309
 
282
- # Add new confidence estimations to PSMList
283
- add_psm_confidence(psm_list, new_confidence)
284
- add_peptide_confidence(psm_list, new_confidence)
310
+ # Add new confidence estimations to PSMList
311
+ add_psm_confidence(psm_list, new_confidence)
312
+ add_peptide_confidence(psm_list, new_confidence)
285
313
 
286
- return psm_list
314
+ return psm_list
@@ -33,6 +33,8 @@
33
33
  "psm_id_rt_pattern": null,
34
34
  "psm_id_im_pattern": null,
35
35
  "lower_score_is_better": false,
36
+ "max_psm_rank_input": 10,
37
+ "max_psm_rank_output": 1,
36
38
  "modification_mapping": {},
37
39
  "fixed_modifications": {},
38
40
  "processes": -1,
@@ -131,6 +131,18 @@
131
131
  "type": "boolean",
132
132
  "default": false
133
133
  },
134
+ "max_psm_rank_input": {
135
+ "description": "Maximum rank of PSMs to use as input for rescoring",
136
+ "type": "number",
137
+ "default": 10,
138
+ "minimum": 1
139
+ },
140
+ "max_psm_rank_output": {
141
+ "description": "Maximum rank of PSMs to return after rescoring, before final FDR calculation",
142
+ "type": "number",
143
+ "default": 1,
144
+ "minimum": 1
145
+ },
134
146
  "modification_mapping": {
135
147
  "description": "Mapping of modification labels to each replacement label.",
136
148
  "type": "object",
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  import re
3
- from typing import Dict, Union
3
+ from typing import Dict, Optional, Union
4
4
 
5
5
  import numpy as np
6
6
  import psm_utils.io
@@ -24,14 +24,30 @@ def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList:
24
24
  PSMList object containing PSMs. If None, PSMs will be read from ``psm_file``.
25
25
 
26
26
  """
27
- # Read PSMs, find decoys, calculate q-values
28
- psm_list = _read_psms(config, psm_list)
27
+ # Read PSMs
28
+ try:
29
+ psm_list = _read_psms(config, psm_list)
30
+ except psm_utils.io.PSMUtilsIOException:
31
+ raise MS2RescoreConfigurationError(
32
+ "Error occurred while reading PSMs. Please check the 'psm_file' and "
33
+ "'psm_file_type' settings. See "
34
+ "https://ms2rescore.readthedocs.io/en/latest/userguide/input-files/"
35
+ " for more information."
36
+ )
37
+
38
+ # Filter by PSM rank
39
+ psm_list.set_ranks(config["lower_score_is_better"])
40
+ rank_filter = psm_list["rank"] <= config["max_psm_rank_input"]
41
+ psm_list = psm_list[rank_filter]
42
+ logger.info(f"Removed {sum(~rank_filter)} PSMs with rank >= {config['max_psm_rank_input']}.")
43
+
44
+ # Remove invalid AAs, find decoys, calculate q-values
29
45
  psm_list = _remove_invalid_aa(psm_list)
30
- _find_decoys(config, psm_list)
31
- _calculate_qvalues(config, psm_list)
46
+ _find_decoys(psm_list, config["id_decoy_pattern"])
47
+ _calculate_qvalues(psm_list, config["lower_score_is_better"])
32
48
  if config["psm_id_rt_pattern"] or config["psm_id_im_pattern"]:
33
49
  logger.debug("Parsing retention time and/or ion mobility from PSM identifier...")
34
- _parse_values_spectrum_id(config, psm_list)
50
+ _parse_values_from_spectrum_id(config, psm_list)
35
51
 
36
52
  # Store scoring values for comparison later
37
53
  for psm in psm_list:
@@ -79,39 +95,30 @@ def _read_psms(config, psm_list):
79
95
  if isinstance(psm_list, PSMList):
80
96
  return psm_list
81
97
  else:
82
- logger.info("Reading PSMs from file...")
83
98
  total_files = len(config["psm_file"])
84
99
  psm_list = []
85
100
  for current_file, psm_file in enumerate(config["psm_file"]):
86
101
  logger.info(
87
102
  f"Reading PSMs from PSM file ({current_file+1}/{total_files}): '{psm_file}'..."
88
103
  )
89
- try:
90
- psm_list.extend(
91
- psm_utils.io.read_file(
92
- psm_file,
93
- filetype=config["psm_file_type"],
94
- show_progressbar=True,
95
- **config["psm_reader_kwargs"],
96
- )
97
- )
98
- except psm_utils.io.PSMUtilsIOException:
99
- raise MS2RescoreConfigurationError(
100
- "Error occurred while reading PSMs. Please check the 'psm_file' and "
101
- "'psm_file_type' settings. See "
102
- "https://ms2rescore.readthedocs.io/en/latest/userguide/input-files/"
103
- " for more information."
104
+ psm_list.extend(
105
+ psm_utils.io.read_file(
106
+ psm_file,
107
+ filetype=config["psm_file_type"],
108
+ show_progressbar=True,
109
+ **config["psm_reader_kwargs"],
104
110
  )
111
+ )
105
112
  logger.debug(f"Read {len(psm_list)} PSMs from '{psm_file}'.")
106
113
 
107
114
  return PSMList(psm_list=psm_list)
108
115
 
109
116
 
110
- def _find_decoys(config, psm_list):
117
+ def _find_decoys(psm_list: PSMList, id_decoy_pattern: Optional[str] = None):
111
118
  """Find decoys in PSMs, log amount, and raise error if none found."""
112
119
  logger.debug("Finding decoys...")
113
- if config["id_decoy_pattern"]:
114
- psm_list.find_decoys(config["id_decoy_pattern"])
120
+ if id_decoy_pattern:
121
+ psm_list.find_decoys(id_decoy_pattern)
115
122
 
116
123
  n_psms = len(psm_list)
117
124
  percent_decoys = sum(psm_list["is_decoy"]) / n_psms * 100
@@ -126,12 +133,12 @@ def _find_decoys(config, psm_list):
126
133
  )
127
134
 
128
135
 
129
- def _calculate_qvalues(config, psm_list):
136
+ def _calculate_qvalues(psm_list: PSMList, lower_score_is_better: bool):
130
137
  """Calculate q-values for PSMs if not present."""
131
138
  # Calculate q-values if not present
132
139
  if None in psm_list["qvalue"]:
133
140
  logger.debug("Recalculating q-values...")
134
- psm_list.calculate_qvalues(reverse=not config["lower_score_is_better"])
141
+ psm_list.calculate_qvalues(reverse=not lower_score_is_better)
135
142
 
136
143
 
137
144
  def _match_psm_ids(old_id, regex_pattern):
@@ -146,50 +153,38 @@ def _match_psm_ids(old_id, regex_pattern):
146
153
  )
147
154
 
148
155
 
149
- def _parse_values_spectrum_id(config, psm_list):
156
+ def _parse_values_from_spectrum_id(
157
+ psm_list: PSMList,
158
+ psm_id_rt_pattern: Optional[str] = None,
159
+ psm_id_im_pattern: Optional[str] = None,
160
+ ):
150
161
  """Parse retention time and or ion mobility values from the spectrum_id."""
151
-
152
- if config["psm_id_rt_pattern"]:
153
- logger.debug(
154
- "Parsing retention time from spectrum_id with regex pattern "
155
- f"{config['psm_id_rt_pattern']}"
156
- )
157
- try:
158
- rt_pattern = re.compile(config["psm_id_rt_pattern"])
159
- psm_list["retention_time"] = [
160
- float(rt_pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
161
- ]
162
- except AttributeError:
163
- raise MS2RescoreConfigurationError(
164
- f"Could not parse retention time from spectrum_id with the "
165
- f"{config['psm_id_rt_pattern']} regex pattern. "
166
- f"Example spectrum_id: '{psm_list[0].spectrum_id}'\n."
167
- "Please make sure the retention time key is present in the spectrum_id "
168
- "and the value is in a capturing group or disable the relevant feature generator."
169
- )
170
-
171
- if config["psm_id_im_pattern"]:
172
- logger.debug(
173
- "Parsing ion mobility from spectrum_id with regex pattern "
174
- f"{config['psm_id_im_pattern']}"
175
- )
176
- try:
177
- im_pattern = re.compile(config["psm_id_im_pattern"])
178
- psm_list["ion_mobility"] = [
179
- float(im_pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
180
- ]
181
- except AttributeError:
182
- raise MS2RescoreConfigurationError(
183
- f"Could not parse ion mobility from spectrum_id with the "
184
- f"{config['psm_id_im_pattern']} regex pattern. "
185
- "Please make sure the ion mobility key is present in the spectrum_id "
186
- "and the value is in a capturing group or disable the relevant feature generator."
162
+ for pattern, label, key in zip(
163
+ [psm_id_rt_pattern, psm_id_im_pattern],
164
+ ["retention time", "ion mobility"],
165
+ ["retention_time", "ion_mobility"],
166
+ ):
167
+ if pattern:
168
+ logger.debug(
169
+ f"Parsing {label} from spectrum_id with regex pattern " f"{psm_id_rt_pattern}"
187
170
  )
171
+ try:
172
+ pattern = re.compile(pattern)
173
+ psm_list[key] = [
174
+ float(pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
175
+ ]
176
+ except AttributeError:
177
+ raise MS2RescoreConfigurationError(
178
+ f"Could not parse {label} from spectrum_id with the "
179
+ f"{pattern} regex pattern. "
180
+ f"Example spectrum_id: '{psm_list[0].spectrum_id}'\n. "
181
+ f"Please make sure the {label} key is present in the spectrum_id "
182
+ "and the value is in a capturing group or disable the relevant feature generator."
183
+ )
188
184
 
189
185
 
190
186
  def _remove_invalid_aa(psm_list: PSMList) -> PSMList:
191
187
  """Remove PSMs with invalid amino acids."""
192
- logger.debug("Removing PSMs with invalid amino acids...")
193
188
  invalid_psms = np.array(
194
189
  [any(aa in "BJOUXZ" for aa in psm.peptidoform.sequence) for psm in psm_list]
195
190
  )
@@ -242,16 +242,22 @@ def score_scatter_plot(
242
242
  ce_psms = pd.concat([ce_psms_targets, ce_psms_decoys], axis=0)
243
243
 
244
244
  # Get score thresholds
245
- score_threshold_before = (
246
- ce_psms[ce_psms["mokapot q-value before"] <= fdr_threshold]
247
- .sort_values("mokapot q-value before", ascending=False)["mokapot score before"]
248
- .iloc[0]
249
- )
250
- score_threshold_after = (
251
- ce_psms[ce_psms["mokapot q-value after"] <= fdr_threshold]
252
- .sort_values("mokapot q-value after", ascending=False)["mokapot score after"]
253
- .iloc[0]
254
- )
245
+ try:
246
+ score_threshold_before = (
247
+ ce_psms[ce_psms["mokapot q-value before"] <= fdr_threshold]
248
+ .sort_values("mokapot q-value before", ascending=False)["mokapot score before"]
249
+ .iloc[0]
250
+ )
251
+ except IndexError: # No PSMs below threshold
252
+ score_threshold_before = None
253
+ try:
254
+ score_threshold_after = (
255
+ ce_psms[ce_psms["mokapot q-value after"] <= fdr_threshold]
256
+ .sort_values("mokapot q-value after", ascending=False)["mokapot score after"]
257
+ .iloc[0]
258
+ )
259
+ except IndexError: # No PSMs below threshold
260
+ score_threshold_after = None
255
261
 
256
262
  # Plot
257
263
  fig = px.scatter(
@@ -268,10 +274,12 @@ def score_scatter_plot(
268
274
  },
269
275
  )
270
276
  # draw FDR thresholds
271
- fig.add_vline(x=score_threshold_before, line_dash="dash", row=1, col=1)
272
- fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=1)
273
- fig.add_vline(x=score_threshold_before, line_dash="dash", row=2, col=1)
274
- fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=2)
277
+ if score_threshold_before:
278
+ fig.add_vline(x=score_threshold_before, line_dash="dash", row=1, col=1)
279
+ fig.add_vline(x=score_threshold_before, line_dash="dash", row=2, col=1)
280
+ if score_threshold_after:
281
+ fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=1)
282
+ fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=2)
275
283
 
276
284
  return fig
277
285
 
@@ -223,7 +223,7 @@ def save_model_weights(
223
223
  def add_psm_confidence(
224
224
  psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
225
225
  ) -> None:
226
- """Add Mokapot PSM-level confidence estimates to PSM list."""
226
+ """Add PSM-level confidence estimates to PSM list, updating score, qvalue, pep, and rank."""
227
227
  # Reshape confidence estimates to match PSMList
228
228
  keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
229
229
  mokapot_values_targets = (
@@ -241,6 +241,9 @@ def add_psm_confidence(
241
241
  psm_list["qvalue"] = q[:, 1]
242
242
  psm_list["pep"] = q[:, 2]
243
243
 
244
+ # Reset ranks to match new scores
245
+ psm_list.set_ranks(lower_score_better=False)
246
+
244
247
 
245
248
  def add_peptide_confidence(
246
249
  psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
@@ -175,6 +175,8 @@ def _update_psm_scores(
175
175
  original_psm["qvalue"] = new_psm["qvalue"]
176
176
  original_psm["pep"] = new_psm["pep"]
177
177
 
178
+ psm_list.set_ranks(lower_score_better=False)
179
+
178
180
 
179
181
  def _write_pin_file(psm_list: psm_utils.PSMList, filepath: str):
180
182
  """Write PIN file for rescoring."""
File without changes