ms2rescore 3.1.0.dev1__tar.gz → 3.1.0.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/PKG-INFO +1 -1
  2. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/__init__.py +1 -1
  3. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/__main__.py +26 -1
  4. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/core.py +2 -2
  5. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/config_schema.json +5 -0
  6. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/charts.py +2 -2
  7. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/generate.py +1 -1
  8. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/rescoring_engines/mokapot.py +28 -9
  9. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/rescoring_engines/percolator.py +17 -5
  10. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/LICENSE +0 -0
  11. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/README.md +0 -0
  12. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/config_parser.py +0 -0
  13. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/exceptions.py +0 -0
  14. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/__init__.py +0 -0
  15. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/base.py +0 -0
  16. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/basic.py +0 -0
  17. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/deeplc.py +0 -0
  18. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/im2deep.py +0 -0
  19. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/ionmob.py +0 -0
  20. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/maxquant.py +0 -0
  21. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/ms2pip.py +0 -0
  22. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/gui/__init__.py +0 -0
  23. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/gui/__main__.py +0 -0
  24. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/gui/app.py +0 -0
  25. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/gui/function2ctk.py +0 -0
  26. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/gui/widgets.py +0 -0
  27. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/__init__.py +0 -0
  28. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/config_default.json +0 -0
  29. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/config_default_tims.json +0 -0
  30. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/__init__.py +0 -0
  31. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/config_icon.png +0 -0
  32. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/github-mark-white.png +0 -0
  33. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/github-mark.png +0 -0
  34. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/ms2rescore_logo.png +0 -0
  35. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/program_icon.ico +0 -0
  36. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/ms2rescore-gui-theme.json +0 -0
  37. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/parse_psms.py +0 -0
  38. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/parse_spectra.py +0 -0
  39. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/__init__.py +0 -0
  40. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/__main__.py +0 -0
  41. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/__init__.py +0 -0
  42. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/about.html +0 -0
  43. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/base.html +0 -0
  44. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/config.html +0 -0
  45. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/features.html +0 -0
  46. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/log.html +0 -0
  47. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/metadata.html +0 -0
  48. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/overview.html +0 -0
  49. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/stats-card.html +0 -0
  50. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/style.html +0 -0
  51. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/target-decoy.html +0 -0
  52. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/texts.toml +0 -0
  53. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/utils.py +0 -0
  54. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/rescoring_engines/__init__.py +0 -0
  55. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/utils.py +0 -0
  56. {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ms2rescore
3
- Version: 3.1.0.dev1
3
+ Version: 3.1.0.dev3
4
4
  Summary: MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and retention times.
5
5
  Keywords: MS2Rescore,MS2PIP,DeepLC,Percolator,proteomics,mass spectrometry,peptide identification,rescoring,machine learning
6
6
  Author: Ana Sílvia C. Silva, Robbin Bouwmeester, Louise Buur
@@ -1,6 +1,6 @@
1
1
  """MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and RTs."""
2
2
 
3
- __version__ = "3.1.0-dev1"
3
+ __version__ = "3.1.0-dev3"
4
4
 
5
5
  from warnings import filterwarnings
6
6
 
@@ -1,6 +1,7 @@
1
1
  """MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and RTs."""
2
2
 
3
3
  import argparse
4
+ import cProfile
4
5
  import importlib.resources
5
6
  import json
6
7
  import logging
@@ -139,6 +140,14 @@ def _argument_parser() -> argparse.ArgumentParser:
139
140
  dest="fasta_file",
140
141
  help="path to FASTA file",
141
142
  )
143
+ parser.add_argument(
144
+ "--profile",
145
+ # metavar="BOOL",
146
+ action="store_true",
147
+ # type=bool,
148
+ # dest="profile",
149
+ help="boolean to enable profiling with cProfile",
150
+ )
142
151
 
143
152
  return parser
144
153
 
@@ -161,6 +170,18 @@ def _setup_logging(passed_level: str, log_file: Union[str, Path]):
161
170
  )
162
171
 
163
172
 
173
+ def profile(fnc, filepath):
174
+ """A decorator that uses cProfile to profile a function"""
175
+
176
+ def inner(*args, **kwargs):
177
+ with cProfile.Profile() as profiler:
178
+ return_value = fnc(*args, **kwargs)
179
+ profiler.dump_stats(filepath + ".profile.prof")
180
+ return return_value
181
+
182
+ return inner
183
+
184
+
164
185
  def main_tims():
165
186
  """Run MS²Rescore command-line interface in TIMS²Rescore mode."""
166
187
  main(tims=True)
@@ -196,7 +217,11 @@ def main(tims=False):
196
217
 
197
218
  # Run MS²Rescore
198
219
  try:
199
- rescore(configuration=config)
220
+ if cli_args.profile:
221
+ profiled_rescore = profile(rescore, config["ms2rescore"]["output_path"])
222
+ profiled_rescore(configuration=config)
223
+ else:
224
+ rescore(configuration=config)
200
225
  except Exception as e:
201
226
  LOGGER.exception(e)
202
227
  sys.exit(1)
@@ -175,8 +175,8 @@ def _fill_missing_precursor_info(psm_list, config):
175
175
  get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required)
176
176
 
177
177
  # Check if values are now present
178
- for value_name in ["retention_time", "ion_mobility"]:
179
- if (
178
+ for value_name, required in [("retention_time", rt_required), ("ion_mobility", im_required)]:
179
+ if required and (
180
180
  0.0 in psm_list[value_name]
181
181
  or None in psm_list[value_name]
182
182
  or np.isnan(psm_list[value_name]).any()
@@ -157,6 +157,11 @@
157
157
  "description": "Write an HTML report with various QC metrics and charts",
158
158
  "type": "boolean",
159
159
  "default": false
160
+ },
161
+ "profile": {
162
+ "description": "Write an txt report using cProfile for profiling",
163
+ "type": "boolean",
164
+ "default": false
160
165
  }
161
166
  }
162
167
  }
@@ -373,7 +373,7 @@ def identification_overlap(
373
373
  return figure
374
374
 
375
375
  levels = before.levels # ["psms", "peptides", "proteins"] if all available
376
- indexers = ["index", "index", "mokapot protein group"]
376
+ indexers = ["index", "peptide", "mokapot protein group"]
377
377
 
378
378
  overlap_data = defaultdict(dict)
379
379
  for level, indexer in zip(levels, indexers):
@@ -386,7 +386,7 @@ def identification_overlap(
386
386
  set_after = set(df_after[df_after["mokapot q-value"] <= 0.01][indexer])
387
387
 
388
388
  overlap_data["removed"][level] = -len(set_before - set_after)
389
- overlap_data["retained"][level] = len(set_before | set_after)
389
+ overlap_data["retained"][level] = len(set_after.intersection(set_before))
390
390
  overlap_data["gained"][level] = len(set_after - set_before)
391
391
 
392
392
  colors = ["#953331", "#316395", "#319545"]
@@ -185,7 +185,7 @@ def _get_stats_context(confidence_before, confidence_after):
185
185
  "item": level_name,
186
186
  "card_color": card_color,
187
187
  "number": after,
188
- "diff": f"{after - before:+}",
188
+ "diff": f"({after - before:+})",
189
189
  "percentage": f"{increase:.1f}%",
190
190
  "is_increase": increase > 0,
191
191
  "bar_percentage": before / after * 100 if increase > 0 else after / before * 100,
@@ -20,7 +20,8 @@ If you use Mokapot through MS²Rescore, please cite:
20
20
  """
21
21
 
22
22
  import logging
23
- from typing import Any, List, Optional, Tuple, Dict
23
+ import re
24
+ from typing import Any, Dict, List, Optional, Tuple
24
25
 
25
26
  import mokapot
26
27
  import numpy as np
@@ -31,6 +32,7 @@ from mokapot.dataset import LinearPsmDataset
31
32
  from pyteomics.mass import nist_mass
32
33
 
33
34
  logger = logging.getLogger(__name__)
35
+ logging.getLogger("numba").setLevel(logging.WARNING)
34
36
 
35
37
 
36
38
  def rescore(
@@ -89,18 +91,15 @@ def rescore(
89
91
 
90
92
  # Rescore
91
93
  logger.debug(f"Mokapot brew options: `{kwargs}`")
92
- confidence_results, models = brew(lin_psm_data, **kwargs)
94
+ confidence_results, models = brew(lin_psm_data, rng=8, **kwargs)
93
95
 
94
96
  # Reshape confidence estimates to match PSMList
97
+ keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
95
98
  mokapot_values_targets = (
96
- confidence_results.confidence_estimates["psms"]
97
- .set_index("index")
98
- .sort_index()[["mokapot score", "mokapot q-value", "mokapot PEP"]]
99
+ confidence_results.confidence_estimates["psms"].set_index("index").sort_index()[keys]
99
100
  )
100
101
  mokapot_values_decoys = (
101
- confidence_results.decoy_confidence_estimates["psms"]
102
- .set_index("index")
103
- .sort_index()[["mokapot score", "mokapot q-value", "mokapot PEP"]]
102
+ confidence_results.decoy_confidence_estimates["psms"].set_index("index").sort_index()[keys]
104
103
  )
105
104
  q = np.full((len(psm_list), 3), np.nan)
106
105
  q[mokapot_values_targets.index] = mokapot_values_targets.values
@@ -111,6 +110,26 @@ def rescore(
111
110
  psm_list["qvalue"] = q[:, 1]
112
111
  psm_list["pep"] = q[:, 2]
113
112
 
113
+ # Repeat for peptide-level scores
114
+ peptide_info = pd.concat(
115
+ [
116
+ confidence_results.confidence_estimates["peptides"].set_index(["peptide"])[keys],
117
+ confidence_results.decoy_confidence_estimates["peptides"].set_index(["peptide"])[keys],
118
+ ],
119
+ axis=0,
120
+ ).to_dict(orient="index")
121
+
122
+ peptidoform_without_charge = re.compile(r"(/\d+$)")
123
+ for psm in psm_list:
124
+ peptide_scores = peptide_info[peptidoform_without_charge.sub("", str(psm.peptidoform), 1)]
125
+ psm.metadata.update(
126
+ {
127
+ "peptide_score": peptide_scores["mokapot score"],
128
+ "peptide_qvalue": peptide_scores["mokapot q-value"],
129
+ "peptide_pep": peptide_scores["mokapot PEP"],
130
+ }
131
+ )
132
+
114
133
  # Write results
115
134
  if write_weights:
116
135
  try:
@@ -173,7 +192,7 @@ def convert_psm_list(
173
192
 
174
193
  # Ensure filename for FlashLFQ txt output
175
194
  if not combined_df["run"].notnull().all():
176
- combined_df["run"] = "ms_run"
195
+ combined_df["run"] = "nan"
177
196
 
178
197
  feature_names = [f"feature:{f}" for f in feature_names] if feature_names else None
179
198
 
@@ -20,8 +20,8 @@ If you use Percolator through MS²Rescore, please cite:
20
20
  import logging
21
21
  import subprocess
22
22
  from typing import Any, Dict, Optional
23
+ from copy import deepcopy
23
24
 
24
- import numpy as np
25
25
  import psm_utils
26
26
 
27
27
  from ms2rescore.exceptions import MS2RescoreError
@@ -103,8 +103,15 @@ def rescore(
103
103
  # Need to be able to link back to original PSMs, so reindex spectrum IDs, but copy PSM list
104
104
  # to avoid modifying original...
105
105
  # TODO: Better approach for this?
106
- psm_list_reindexed = psm_list.copy()
107
- psm_list_reindexed["spectrum_id"] = np.arange(len(psm_list_reindexed))
106
+
107
+ psm_list_reindexed = deepcopy(psm_list)
108
+ psm_list_reindexed.set_ranks()
109
+ psm_list_reindexed["spectrum_id"] = [
110
+ f"{psm.get_usi(as_url=False)}_{psm.rank}" for psm in psm_list_reindexed
111
+ ]
112
+ spectrum_id_index = {
113
+ spectrum_id: index for index, spectrum_id in enumerate(psm_list_reindexed["spectrum_id"])
114
+ }
108
115
 
109
116
  _write_pin_file(psm_list_reindexed, pin_filepath)
110
117
 
@@ -134,10 +141,13 @@ def rescore(
134
141
  psm_list,
135
142
  percolator_kwargs["results-psms"],
136
143
  percolator_kwargs["decoy-results-psms"],
144
+ spectrum_id_index,
137
145
  )
138
146
 
139
147
 
140
- def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str):
148
+ def _update_psm_scores(
149
+ psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str, spectrum_id_index: list
150
+ ):
141
151
  """
142
152
  Update PSM scores with Percolator results.
143
153
 
@@ -150,7 +160,9 @@ def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout
150
160
  psm_list_percolator = psm_utils.PSMList(psm_list=target_psms.psm_list + decoy_psms.psm_list)
151
161
 
152
162
  # Sort by reindexed spectrum_id so order matches original PSM list
153
- psm_list_percolator[np.argsort(psm_list_percolator["spectrum_id"])]
163
+ psm_list_percolator = sorted(
164
+ psm_list_percolator, key=lambda psm: spectrum_id_index[psm["spectrum_id"]]
165
+ )
154
166
 
155
167
  if not len(psm_list) == len(psm_list_percolator):
156
168
  raise MS2RescoreError(
File without changes