ms2rescore 3.1.0.dev6__tar.gz → 3.1.0.dev8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/PKG-INFO +5 -6
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/__init__.py +1 -1
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/core.py +117 -36
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/exceptions.py +6 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/config_default.json +3 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/config_schema.json +25 -2
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/parse_psms.py +74 -81
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/charts.py +23 -14
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/rescoring_engines/mokapot.py +66 -39
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/rescoring_engines/percolator.py +2 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/pyproject.toml +6 -5
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/LICENSE +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/README.md +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/__main__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/config_parser.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/base.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/basic.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/deeplc.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/im2deep.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/ionmob.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/maxquant.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/feature_generators/ms2pip.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/gui/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/gui/__main__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/gui/app.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/gui/function2ctk.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/gui/widgets.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/config_default_tims.json +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/config_icon.png +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/github-mark-white.png +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/github-mark.png +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/ms2rescore_logo.png +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/program_icon.ico +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/ms2rescore-gui-theme.json +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/parse_spectra.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/__main__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/generate.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/about.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/base.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/config.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/features.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/log.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/metadata.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/overview.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/stats-card.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/style.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/target-decoy.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/texts.toml +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/utils.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/rescoring_engines/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ms2rescore
|
|
3
|
-
Version: 3.1.0.
|
|
3
|
+
Version: 3.1.0.dev8
|
|
4
4
|
Summary: MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and retention times.
|
|
5
5
|
Keywords: MS2Rescore,MS2PIP,DeepLC,Percolator,proteomics,mass spectrometry,peptide identification,rescoring,machine learning
|
|
6
6
|
Author: Ana Sílvia C. Silva, Robbin Bouwmeester, Louise Buur
|
|
@@ -24,13 +24,12 @@ Requires-Dist: lxml>=4.5
|
|
|
24
24
|
Requires-Dist: mokapot>=0.9
|
|
25
25
|
Requires-Dist: ms2pip>=4.0.0-dev10
|
|
26
26
|
Requires-Dist: ms2rescore_rs
|
|
27
|
-
Requires-Dist: numpy
|
|
28
|
-
Requires-Dist:
|
|
27
|
+
Requires-Dist: numpy>=1.16.0
|
|
28
|
+
Requires-Dist: scikit-learn==1.5.1; python_version == '3.11'
|
|
29
29
|
Requires-Dist: pandas>=1.0
|
|
30
30
|
Requires-Dist: plotly>=5
|
|
31
|
-
Requires-Dist: psm_utils>=0.
|
|
32
|
-
Requires-Dist:
|
|
33
|
-
Requires-Dist: pyteomics>=4.1.0, <4.7
|
|
31
|
+
Requires-Dist: psm_utils>=0.9
|
|
32
|
+
Requires-Dist: pyteomics>=4.7.2
|
|
34
33
|
Requires-Dist: rich>=12
|
|
35
34
|
Requires-Dist: tomli>=2; python_version < '3.11'
|
|
36
35
|
Requires-Dist: ruff ; extra == "dev"
|
|
@@ -5,6 +5,7 @@ from typing import Dict, Optional
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import psm_utils.io
|
|
8
|
+
from mokapot.dataset import LinearPsmDataset
|
|
8
9
|
from psm_utils import PSMList
|
|
9
10
|
|
|
10
11
|
from ms2rescore import exceptions
|
|
@@ -13,6 +14,7 @@ from ms2rescore.parse_psms import parse_psms
|
|
|
13
14
|
from ms2rescore.parse_spectra import get_missing_values
|
|
14
15
|
from ms2rescore.report import generate
|
|
15
16
|
from ms2rescore.rescoring_engines import mokapot, percolator
|
|
17
|
+
from ms2rescore.rescoring_engines.mokapot import add_peptide_confidence, add_psm_confidence
|
|
16
18
|
|
|
17
19
|
logger = logging.getLogger(__name__)
|
|
18
20
|
|
|
@@ -45,7 +47,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
|
|
|
45
47
|
psm_list = parse_psms(config, psm_list)
|
|
46
48
|
|
|
47
49
|
# Log #PSMs identified before rescoring
|
|
48
|
-
id_psms_before = _log_id_psms_before(psm_list)
|
|
50
|
+
id_psms_before = _log_id_psms_before(psm_list, max_rank=config["max_psm_rank_output"])
|
|
49
51
|
|
|
50
52
|
# Define feature names; get existing feature names from PSM file
|
|
51
53
|
feature_names = dict()
|
|
@@ -60,7 +62,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
|
|
|
60
62
|
)
|
|
61
63
|
|
|
62
64
|
# Add missing precursor info from spectrum file if needed
|
|
63
|
-
_fill_missing_precursor_info(psm_list, config)
|
|
65
|
+
psm_list = _fill_missing_precursor_info(psm_list, config)
|
|
64
66
|
|
|
65
67
|
# Add rescoring features
|
|
66
68
|
for fgen_name, fgen_config in config["feature_generators"].items():
|
|
@@ -104,8 +106,8 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
|
|
|
104
106
|
logging.debug(f"Creating USIs for {len(psm_list)} PSMs")
|
|
105
107
|
psm_list["spectrum_id"] = [psm.get_usi(as_url=False) for psm in psm_list]
|
|
106
108
|
|
|
107
|
-
# If no rescoring engine is specified, write PSMs and features to PIN file
|
|
108
|
-
if not config["rescoring_engine"]:
|
|
109
|
+
# If no rescoring engine is specified or DEBUG, write PSMs and features to PIN file
|
|
110
|
+
if not config["rescoring_engine"] or config["log_level"] == "debug":
|
|
109
111
|
logger.info(f"Writing added features to PIN file: {output_file_root}.psms.pin")
|
|
110
112
|
psm_utils.io.write_file(
|
|
111
113
|
psm_list,
|
|
@@ -113,35 +115,49 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
|
|
|
113
115
|
filetype="percolator",
|
|
114
116
|
feature_names=all_feature_names,
|
|
115
117
|
)
|
|
118
|
+
|
|
119
|
+
if not config["rescoring_engine"]:
|
|
120
|
+
logger.info("No rescoring engine specified. Skipping rescoring.")
|
|
116
121
|
return None
|
|
117
122
|
|
|
118
123
|
# Rescore PSMs
|
|
119
|
-
|
|
120
|
-
percolator
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
config["rescoring_engine"]["mokapot"]
|
|
130
|
-
|
|
131
|
-
protein_kwargs
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
124
|
+
try:
|
|
125
|
+
if "percolator" in config["rescoring_engine"]:
|
|
126
|
+
percolator.rescore(
|
|
127
|
+
psm_list,
|
|
128
|
+
output_file_root=output_file_root,
|
|
129
|
+
log_level=config["log_level"],
|
|
130
|
+
processes=config["processes"],
|
|
131
|
+
percolator_kwargs=config["rescoring_engine"]["percolator"],
|
|
132
|
+
)
|
|
133
|
+
elif "mokapot" in config["rescoring_engine"]:
|
|
134
|
+
if "fasta_file" not in config["rescoring_engine"]["mokapot"]:
|
|
135
|
+
config["rescoring_engine"]["mokapot"]["fasta_file"] = config["fasta_file"]
|
|
136
|
+
if "protein_kwargs" in config["rescoring_engine"]["mokapot"]:
|
|
137
|
+
protein_kwargs = config["rescoring_engine"]["mokapot"].pop("protein_kwargs")
|
|
138
|
+
else:
|
|
139
|
+
protein_kwargs = dict()
|
|
140
|
+
|
|
141
|
+
mokapot.rescore(
|
|
142
|
+
psm_list,
|
|
143
|
+
output_file_root=output_file_root,
|
|
144
|
+
protein_kwargs=protein_kwargs,
|
|
145
|
+
**config["rescoring_engine"]["mokapot"],
|
|
146
|
+
)
|
|
147
|
+
except exceptions.RescoringError as e:
|
|
148
|
+
# Write output
|
|
149
|
+
logger.info(f"Writing intermediary output to {output_file_root}.psms.tsv...")
|
|
150
|
+
psm_utils.io.write_file(psm_list, output_file_root + ".psms.tsv", filetype="tsv")
|
|
151
|
+
|
|
152
|
+
# Reraise exception
|
|
153
|
+
raise e
|
|
143
154
|
|
|
144
|
-
|
|
155
|
+
# Post-rescoring processing
|
|
156
|
+
if all(psm_list["pep"] == 1.0):
|
|
157
|
+
psm_list = _fix_constant_pep(psm_list)
|
|
158
|
+
psm_list = _filter_by_rank(psm_list, config["max_psm_rank_output"], False)
|
|
159
|
+
psm_list = _calculate_confidence(psm_list)
|
|
160
|
+
_ = _log_id_psms_after(psm_list, id_psms_before, max_rank=config["max_psm_rank_output"])
|
|
145
161
|
|
|
146
162
|
# Write output
|
|
147
163
|
logger.info(f"Writing output to {output_file_root}.psms.tsv...")
|
|
@@ -157,7 +173,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
|
|
|
157
173
|
logger.exception(e)
|
|
158
174
|
|
|
159
175
|
|
|
160
|
-
def _fill_missing_precursor_info(psm_list, config):
|
|
176
|
+
def _fill_missing_precursor_info(psm_list: PSMList, config: Dict) -> PSMList:
|
|
161
177
|
"""Fill missing precursor info from spectrum file if needed."""
|
|
162
178
|
# Check if required
|
|
163
179
|
# TODO: avoid hard coding feature generators in some way
|
|
@@ -199,6 +215,16 @@ def _fill_missing_precursor_info(psm_list, config):
|
|
|
199
215
|
[v is not None and not np.isnan(v) for v in psm_list[value_name]]
|
|
200
216
|
]
|
|
201
217
|
|
|
218
|
+
return psm_list
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _filter_by_rank(psm_list: PSMList, max_rank: int, lower_score_better: bool) -> PSMList:
|
|
222
|
+
"""Filter PSMs by rank."""
|
|
223
|
+
psm_list.set_ranks(lower_score_better=lower_score_better)
|
|
224
|
+
rank_filter = psm_list["rank"] <= max_rank
|
|
225
|
+
logger.info(f"Removed {sum(~rank_filter)} PSMs with rank >= {max_rank}.")
|
|
226
|
+
return psm_list[rank_filter]
|
|
227
|
+
|
|
202
228
|
|
|
203
229
|
def _write_feature_names(feature_names, output_file_root):
|
|
204
230
|
"""Write feature names to file."""
|
|
@@ -209,25 +235,80 @@ def _write_feature_names(feature_names, output_file_root):
|
|
|
209
235
|
f.write(f"{fgen}\t{feature}\n")
|
|
210
236
|
|
|
211
237
|
|
|
212
|
-
def _log_id_psms_before(psm_list):
|
|
238
|
+
def _log_id_psms_before(psm_list: PSMList, fdr: float = 0.01, max_rank: int = 1) -> int:
|
|
213
239
|
"""Log #PSMs identified before rescoring."""
|
|
214
240
|
id_psms_before = (
|
|
215
|
-
(psm_list["qvalue"] <= 0.01) & (psm_list["
|
|
241
|
+
(psm_list["qvalue"] <= 0.01) & (psm_list["rank"] <= max_rank) & (~psm_list["is_decoy"])
|
|
216
242
|
).sum()
|
|
217
|
-
logger.info(
|
|
243
|
+
logger.info(
|
|
244
|
+
f"Found {id_psms_before} identified PSMs with rank <= {max_rank} at {fdr} FDR before "
|
|
245
|
+
"rescoring."
|
|
246
|
+
)
|
|
218
247
|
return id_psms_before
|
|
219
248
|
|
|
220
249
|
|
|
221
|
-
def _log_id_psms_after(
|
|
250
|
+
def _log_id_psms_after(
|
|
251
|
+
psm_list: PSMList, id_psms_before: int, fdr: float = 0.01, max_rank: int = 1
|
|
252
|
+
) -> int:
|
|
222
253
|
"""Log #PSMs identified after rescoring."""
|
|
223
254
|
id_psms_after = (
|
|
224
|
-
(psm_list["qvalue"] <= 0.01) & (psm_list["
|
|
255
|
+
(psm_list["qvalue"] <= 0.01) & (psm_list["rank"] <= max_rank) & (~psm_list["is_decoy"])
|
|
225
256
|
).sum()
|
|
226
257
|
diff = id_psms_after - id_psms_before
|
|
227
258
|
diff_perc = diff / id_psms_before if id_psms_before > 0 else None
|
|
228
259
|
|
|
229
260
|
diff_numbers = f"{diff} ({diff_perc:.2%})" if diff_perc is not None else str(diff)
|
|
230
261
|
diff_word = "more" if diff > 0 else "less"
|
|
231
|
-
logger.info(
|
|
262
|
+
logger.info(
|
|
263
|
+
f"Identified {diff_numbers} {diff_word} PSMs with rank <= {max_rank} at {fdr} FDR after "
|
|
264
|
+
"rescoring."
|
|
265
|
+
)
|
|
232
266
|
|
|
233
267
|
return id_psms_after
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _fix_constant_pep(psm_list: PSMList) -> PSMList:
|
|
271
|
+
"""Workaround for broken PEP calculation if best PSM is decoy."""
|
|
272
|
+
logger.warning(
|
|
273
|
+
"Attempting to fix constant PEP values by removing decoy PSMs that score higher than the "
|
|
274
|
+
"best target PSM."
|
|
275
|
+
)
|
|
276
|
+
max_target_score = psm_list["score"][~psm_list["is_decoy"]].max()
|
|
277
|
+
higher_scoring_decoys = psm_list["is_decoy"] & (psm_list["score"] > max_target_score)
|
|
278
|
+
|
|
279
|
+
if not higher_scoring_decoys.any():
|
|
280
|
+
logger.warning("No decoys scoring higher than the best target found. Skipping fix.")
|
|
281
|
+
else:
|
|
282
|
+
psm_list = psm_list[~higher_scoring_decoys]
|
|
283
|
+
logger.warning(f"Removed {higher_scoring_decoys.sum()} decoy PSMs.")
|
|
284
|
+
|
|
285
|
+
return psm_list
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _calculate_confidence(psm_list: PSMList) -> PSMList:
|
|
289
|
+
"""
|
|
290
|
+
Calculate scores, q-values, and PEPs for PSMs and peptides and add them to PSMList.
|
|
291
|
+
"""
|
|
292
|
+
# Minimal conversion to LinearPsmDataset
|
|
293
|
+
psm_df = psm_list.to_dataframe()
|
|
294
|
+
psm_df = psm_df.reset_index(drop=True).reset_index()
|
|
295
|
+
psm_df["peptide"] = (
|
|
296
|
+
psm_df["peptidoform"].astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True)
|
|
297
|
+
)
|
|
298
|
+
psm_df["is_target"] = ~psm_df["is_decoy"]
|
|
299
|
+
lin_psm_data = LinearPsmDataset(
|
|
300
|
+
psms=psm_df[["index", "peptide", "score", "is_target"]],
|
|
301
|
+
target_column="is_target",
|
|
302
|
+
spectrum_columns="index", # Use artificial index to allow multi-rank rescoring
|
|
303
|
+
peptide_column="peptide",
|
|
304
|
+
feature_columns=["score"],
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Recalculate confidence
|
|
308
|
+
new_confidence = lin_psm_data.assign_confidence()
|
|
309
|
+
|
|
310
|
+
# Add new confidence estimations to PSMList
|
|
311
|
+
add_psm_confidence(psm_list, new_confidence)
|
|
312
|
+
add_peptide_confidence(psm_list, new_confidence)
|
|
313
|
+
|
|
314
|
+
return psm_list
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
},
|
|
15
15
|
"rescoring_engine": {
|
|
16
16
|
"mokapot": {
|
|
17
|
+
"train_fdr": 0.01,
|
|
17
18
|
"write_weights": true,
|
|
18
19
|
"write_txt": true,
|
|
19
20
|
"write_flashlfq": true
|
|
@@ -32,6 +33,8 @@
|
|
|
32
33
|
"psm_id_rt_pattern": null,
|
|
33
34
|
"psm_id_im_pattern": null,
|
|
34
35
|
"lower_score_is_better": false,
|
|
36
|
+
"max_psm_rank_input": 10,
|
|
37
|
+
"max_psm_rank_output": 1,
|
|
35
38
|
"modification_mapping": {},
|
|
36
39
|
"fixed_modifications": {},
|
|
37
40
|
"processes": -1,
|
|
@@ -68,7 +68,11 @@
|
|
|
68
68
|
},
|
|
69
69
|
"psm_file": {
|
|
70
70
|
"description": "Path to file with peptide-spectrum matches.",
|
|
71
|
-
"oneOf": [
|
|
71
|
+
"oneOf": [
|
|
72
|
+
{ "type": "string" },
|
|
73
|
+
{ "type": "null" },
|
|
74
|
+
{ "type": "array", "items": { "type": "string" } }
|
|
75
|
+
]
|
|
72
76
|
},
|
|
73
77
|
"psm_file_type": {
|
|
74
78
|
"description": "PSM file type. By default inferred from file extension.",
|
|
@@ -127,6 +131,18 @@
|
|
|
127
131
|
"type": "boolean",
|
|
128
132
|
"default": false
|
|
129
133
|
},
|
|
134
|
+
"max_psm_rank_input": {
|
|
135
|
+
"description": "Maximum rank of PSMs to use as input for rescoring",
|
|
136
|
+
"type": "number",
|
|
137
|
+
"default": 10,
|
|
138
|
+
"minimum": 1
|
|
139
|
+
},
|
|
140
|
+
"max_psm_rank_output": {
|
|
141
|
+
"description": "Maximum rank of PSMs to return after rescoring, before final FDR calculation",
|
|
142
|
+
"type": "number",
|
|
143
|
+
"default": 1,
|
|
144
|
+
"minimum": 1
|
|
145
|
+
},
|
|
130
146
|
"modification_mapping": {
|
|
131
147
|
"description": "Mapping of modification labels to each replacement label.",
|
|
132
148
|
"type": "object",
|
|
@@ -159,7 +175,7 @@
|
|
|
159
175
|
"default": false
|
|
160
176
|
},
|
|
161
177
|
"profile": {
|
|
162
|
-
"description": "Write
|
|
178
|
+
"description": "Write a txt report using cProfile for profiling",
|
|
163
179
|
"type": "boolean",
|
|
164
180
|
"default": false
|
|
165
181
|
}
|
|
@@ -263,6 +279,13 @@
|
|
|
263
279
|
"type": "object",
|
|
264
280
|
"additionalProperties": true,
|
|
265
281
|
"properties": {
|
|
282
|
+
"train_fdr": {
|
|
283
|
+
"description": "FDR threshold for training Mokapot",
|
|
284
|
+
"type": "number",
|
|
285
|
+
"minimum": 0,
|
|
286
|
+
"maximum": 1,
|
|
287
|
+
"default": 0.01
|
|
288
|
+
},
|
|
266
289
|
"write_weights": {
|
|
267
290
|
"description": "Write Mokapot weights to a text file",
|
|
268
291
|
"type": "boolean",
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
|
-
from typing import Dict, Union
|
|
3
|
+
from typing import Dict, Optional, Union
|
|
4
4
|
|
|
5
|
+
import numpy as np
|
|
5
6
|
import psm_utils.io
|
|
6
7
|
from psm_utils import PSMList
|
|
7
8
|
|
|
@@ -23,13 +24,30 @@ def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList:
|
|
|
23
24
|
PSMList object containing PSMs. If None, PSMs will be read from ``psm_file``.
|
|
24
25
|
|
|
25
26
|
"""
|
|
26
|
-
# Read PSMs
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
# Read PSMs
|
|
28
|
+
try:
|
|
29
|
+
psm_list = _read_psms(config, psm_list)
|
|
30
|
+
except psm_utils.io.PSMUtilsIOException:
|
|
31
|
+
raise MS2RescoreConfigurationError(
|
|
32
|
+
"Error occurred while reading PSMs. Please check the 'psm_file' and "
|
|
33
|
+
"'psm_file_type' settings. See "
|
|
34
|
+
"https://ms2rescore.readthedocs.io/en/latest/userguide/input-files/"
|
|
35
|
+
" for more information."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Filter by PSM rank
|
|
39
|
+
psm_list.set_ranks(config["lower_score_is_better"])
|
|
40
|
+
rank_filter = psm_list["rank"] <= config["max_psm_rank_input"]
|
|
41
|
+
psm_list = psm_list[rank_filter]
|
|
42
|
+
logger.info(f"Removed {sum(~rank_filter)} PSMs with rank >= {config['max_psm_rank_input']}.")
|
|
43
|
+
|
|
44
|
+
# Remove invalid AAs, find decoys, calculate q-values
|
|
45
|
+
psm_list = _remove_invalid_aa(psm_list)
|
|
46
|
+
_find_decoys(psm_list, config["id_decoy_pattern"])
|
|
47
|
+
_calculate_qvalues(psm_list, config["lower_score_is_better"])
|
|
30
48
|
if config["psm_id_rt_pattern"] or config["psm_id_im_pattern"]:
|
|
31
49
|
logger.debug("Parsing retention time and/or ion mobility from PSM identifier...")
|
|
32
|
-
|
|
50
|
+
_parse_values_from_spectrum_id(config, psm_list)
|
|
33
51
|
|
|
34
52
|
# Store scoring values for comparison later
|
|
35
53
|
for psm in psm_list:
|
|
@@ -70,10 +88,6 @@ def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList:
|
|
|
70
88
|
new_ids = [_match_psm_ids(old_id, pattern) for old_id in psm_list["spectrum_id"]]
|
|
71
89
|
psm_list["spectrum_id"] = new_ids
|
|
72
90
|
|
|
73
|
-
# TODO: Temporary fix until implemented in psm_utils
|
|
74
|
-
# Ensure that spectrum IDs are strings (Pydantic 2.0 does not coerce int to str)
|
|
75
|
-
psm_list["spectrum_id"] = [str(spec_id) for spec_id in psm_list["spectrum_id"]]
|
|
76
|
-
|
|
77
91
|
return psm_list
|
|
78
92
|
|
|
79
93
|
|
|
@@ -81,49 +95,30 @@ def _read_psms(config, psm_list):
|
|
|
81
95
|
if isinstance(psm_list, PSMList):
|
|
82
96
|
return psm_list
|
|
83
97
|
else:
|
|
84
|
-
logger.info("Reading PSMs from file...")
|
|
85
|
-
current_file = 1
|
|
86
98
|
total_files = len(config["psm_file"])
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
valid_psms = 0
|
|
90
|
-
for psm_file in config["psm_file"]:
|
|
99
|
+
psm_list = []
|
|
100
|
+
for current_file, psm_file in enumerate(config["psm_file"]):
|
|
91
101
|
logger.info(
|
|
92
|
-
f"Reading PSMs from PSM file ({current_file}/{total_files}): '{psm_file}'..."
|
|
102
|
+
f"Reading PSMs from PSM file ({current_file+1}/{total_files}): '{psm_file}'..."
|
|
93
103
|
)
|
|
94
|
-
|
|
95
|
-
|
|
104
|
+
psm_list.extend(
|
|
105
|
+
psm_utils.io.read_file(
|
|
96
106
|
psm_file,
|
|
97
107
|
filetype=config["psm_file_type"],
|
|
98
108
|
show_progressbar=True,
|
|
99
109
|
**config["psm_reader_kwargs"],
|
|
100
110
|
)
|
|
101
|
-
except psm_utils.io.PSMUtilsIOException:
|
|
102
|
-
raise MS2RescoreConfigurationError(
|
|
103
|
-
"Error occurred while reading PSMs. Please check the 'psm_file' and "
|
|
104
|
-
"'psm_file_type' settings. See "
|
|
105
|
-
"https://ms2rescore.readthedocs.io/en/latest/userguide/input-files/"
|
|
106
|
-
" for more information."
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
total_psms += len(id_file_psm_list.psm_list)
|
|
110
|
-
for psm in id_file_psm_list.psm_list:
|
|
111
|
-
if not _has_invalid_aminoacids(psm):
|
|
112
|
-
valid_psms_list.append(psm)
|
|
113
|
-
valid_psms += 1
|
|
114
|
-
current_file += 1
|
|
115
|
-
if total_psms - valid_psms > 0:
|
|
116
|
-
logger.warning(
|
|
117
|
-
f"{total_psms - valid_psms} PSMs with invalid amino acids were removed."
|
|
118
111
|
)
|
|
119
|
-
|
|
112
|
+
logger.debug(f"Read {len(psm_list)} PSMs from '{psm_file}'.")
|
|
113
|
+
|
|
114
|
+
return PSMList(psm_list=psm_list)
|
|
120
115
|
|
|
121
116
|
|
|
122
|
-
def _find_decoys(
|
|
117
|
+
def _find_decoys(psm_list: PSMList, id_decoy_pattern: Optional[str] = None):
|
|
123
118
|
"""Find decoys in PSMs, log amount, and raise error if none found."""
|
|
124
119
|
logger.debug("Finding decoys...")
|
|
125
|
-
if
|
|
126
|
-
psm_list.find_decoys(
|
|
120
|
+
if id_decoy_pattern:
|
|
121
|
+
psm_list.find_decoys(id_decoy_pattern)
|
|
127
122
|
|
|
128
123
|
n_psms = len(psm_list)
|
|
129
124
|
percent_decoys = sum(psm_list["is_decoy"]) / n_psms * 100
|
|
@@ -138,12 +133,12 @@ def _find_decoys(config, psm_list):
|
|
|
138
133
|
)
|
|
139
134
|
|
|
140
135
|
|
|
141
|
-
def _calculate_qvalues(
|
|
136
|
+
def _calculate_qvalues(psm_list: PSMList, lower_score_is_better: bool):
|
|
142
137
|
"""Calculate q-values for PSMs if not present."""
|
|
143
138
|
# Calculate q-values if not present
|
|
144
139
|
if None in psm_list["qvalue"]:
|
|
145
140
|
logger.debug("Recalculating q-values...")
|
|
146
|
-
psm_list.calculate_qvalues(reverse=not
|
|
141
|
+
psm_list.calculate_qvalues(reverse=not lower_score_is_better)
|
|
147
142
|
|
|
148
143
|
|
|
149
144
|
def _match_psm_ids(old_id, regex_pattern):
|
|
@@ -158,47 +153,45 @@ def _match_psm_ids(old_id, regex_pattern):
|
|
|
158
153
|
)
|
|
159
154
|
|
|
160
155
|
|
|
161
|
-
def
|
|
156
|
+
def _parse_values_from_spectrum_id(
|
|
157
|
+
psm_list: PSMList,
|
|
158
|
+
psm_id_rt_pattern: Optional[str] = None,
|
|
159
|
+
psm_id_im_pattern: Optional[str] = None,
|
|
160
|
+
):
|
|
162
161
|
"""Parse retention time and or ion mobility values from the spectrum_id."""
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
psm_list["retention_time"] = [
|
|
172
|
-
float(rt_pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
|
|
173
|
-
]
|
|
174
|
-
except AttributeError:
|
|
175
|
-
raise MS2RescoreConfigurationError(
|
|
176
|
-
f"Could not parse retention time from spectrum_id with the "
|
|
177
|
-
f"{config['psm_id_rt_pattern']} regex pattern. "
|
|
178
|
-
"Please make sure the retention time key is present in the spectrum_id "
|
|
179
|
-
"and the value is in a capturing group or disable the relevant feature generator."
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
if config["psm_id_im_pattern"]:
|
|
183
|
-
logger.debug(
|
|
184
|
-
"Parsing ion mobility from spectrum_id with regex pattern "
|
|
185
|
-
f"{config['psm_id_im_pattern']}"
|
|
186
|
-
)
|
|
187
|
-
try:
|
|
188
|
-
im_pattern = re.compile(config["psm_id_im_pattern"])
|
|
189
|
-
psm_list["ion_mobility"] = [
|
|
190
|
-
float(im_pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
|
|
191
|
-
]
|
|
192
|
-
except AttributeError:
|
|
193
|
-
raise MS2RescoreConfigurationError(
|
|
194
|
-
f"Could not parse ion mobility from spectrum_id with the "
|
|
195
|
-
f"{config['psm_id_im_pattern']} regex pattern. "
|
|
196
|
-
"Please make sure the ion mobility key is present in the spectrum_id "
|
|
197
|
-
"and the value is in a capturing group or disable the relevant feature generator."
|
|
162
|
+
for pattern, label, key in zip(
|
|
163
|
+
[psm_id_rt_pattern, psm_id_im_pattern],
|
|
164
|
+
["retention time", "ion mobility"],
|
|
165
|
+
["retention_time", "ion_mobility"],
|
|
166
|
+
):
|
|
167
|
+
if pattern:
|
|
168
|
+
logger.debug(
|
|
169
|
+
f"Parsing {label} from spectrum_id with regex pattern " f"{psm_id_rt_pattern}"
|
|
198
170
|
)
|
|
171
|
+
try:
|
|
172
|
+
pattern = re.compile(pattern)
|
|
173
|
+
psm_list[key] = [
|
|
174
|
+
float(pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
|
|
175
|
+
]
|
|
176
|
+
except AttributeError:
|
|
177
|
+
raise MS2RescoreConfigurationError(
|
|
178
|
+
f"Could not parse {label} from spectrum_id with the "
|
|
179
|
+
f"{pattern} regex pattern. "
|
|
180
|
+
f"Example spectrum_id: '{psm_list[0].spectrum_id}'\n. "
|
|
181
|
+
f"Please make sure the {label} key is present in the spectrum_id "
|
|
182
|
+
"and the value is in a capturing group or disable the relevant feature generator."
|
|
183
|
+
)
|
|
199
184
|
|
|
200
185
|
|
|
201
|
-
def
|
|
202
|
-
"""
|
|
186
|
+
def _remove_invalid_aa(psm_list: PSMList) -> PSMList:
|
|
187
|
+
"""Remove PSMs with invalid amino acids."""
|
|
188
|
+
invalid_psms = np.array(
|
|
189
|
+
[any(aa in "BJOUXZ" for aa in psm.peptidoform.sequence) for psm in psm_list]
|
|
190
|
+
)
|
|
203
191
|
|
|
204
|
-
|
|
192
|
+
if any(invalid_psms):
|
|
193
|
+
logger.warning(f"Removed {sum(invalid_psms)} PSMs with invalid amino acids.")
|
|
194
|
+
return psm_list[~invalid_psms]
|
|
195
|
+
else:
|
|
196
|
+
logger.debug("No PSMs with invalid amino acids found.")
|
|
197
|
+
return psm_list
|
|
@@ -198,6 +198,7 @@ def score_scatter_plot(
|
|
|
198
198
|
after: mokapot.LinearConfidence,
|
|
199
199
|
level: str = "psms",
|
|
200
200
|
indexer: str = "index",
|
|
201
|
+
fdr_threshold: float = 0.01,
|
|
201
202
|
) -> go.Figure:
|
|
202
203
|
"""
|
|
203
204
|
Plot PSM scores before and after rescoring.
|
|
@@ -241,16 +242,22 @@ def score_scatter_plot(
|
|
|
241
242
|
ce_psms = pd.concat([ce_psms_targets, ce_psms_decoys], axis=0)
|
|
242
243
|
|
|
243
244
|
# Get score thresholds
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
245
|
+
try:
|
|
246
|
+
score_threshold_before = (
|
|
247
|
+
ce_psms[ce_psms["mokapot q-value before"] <= fdr_threshold]
|
|
248
|
+
.sort_values("mokapot q-value before", ascending=False)["mokapot score before"]
|
|
249
|
+
.iloc[0]
|
|
250
|
+
)
|
|
251
|
+
except IndexError: # No PSMs below threshold
|
|
252
|
+
score_threshold_before = None
|
|
253
|
+
try:
|
|
254
|
+
score_threshold_after = (
|
|
255
|
+
ce_psms[ce_psms["mokapot q-value after"] <= fdr_threshold]
|
|
256
|
+
.sort_values("mokapot q-value after", ascending=False)["mokapot score after"]
|
|
257
|
+
.iloc[0]
|
|
258
|
+
)
|
|
259
|
+
except IndexError: # No PSMs below threshold
|
|
260
|
+
score_threshold_after = None
|
|
254
261
|
|
|
255
262
|
# Plot
|
|
256
263
|
fig = px.scatter(
|
|
@@ -267,10 +274,12 @@ def score_scatter_plot(
|
|
|
267
274
|
},
|
|
268
275
|
)
|
|
269
276
|
# draw FDR thresholds
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
277
|
+
if score_threshold_before:
|
|
278
|
+
fig.add_vline(x=score_threshold_before, line_dash="dash", row=1, col=1)
|
|
279
|
+
fig.add_vline(x=score_threshold_before, line_dash="dash", row=2, col=1)
|
|
280
|
+
if score_threshold_after:
|
|
281
|
+
fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=1)
|
|
282
|
+
fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=2)
|
|
274
283
|
|
|
275
284
|
return fig
|
|
276
285
|
|
|
@@ -29,8 +29,11 @@ import pandas as pd
|
|
|
29
29
|
import psm_utils
|
|
30
30
|
from mokapot.brew import brew
|
|
31
31
|
from mokapot.dataset import LinearPsmDataset
|
|
32
|
+
from mokapot.model import PercolatorModel
|
|
32
33
|
from pyteomics.mass import nist_mass
|
|
33
34
|
|
|
35
|
+
from ms2rescore.exceptions import RescoringError
|
|
36
|
+
|
|
34
37
|
logger = logging.getLogger(__name__)
|
|
35
38
|
logging.getLogger("numba").setLevel(logging.WARNING)
|
|
36
39
|
|
|
@@ -39,6 +42,7 @@ def rescore(
|
|
|
39
42
|
psm_list: psm_utils.PSMList,
|
|
40
43
|
output_file_root: str = "ms2rescore",
|
|
41
44
|
fasta_file: Optional[str] = None,
|
|
45
|
+
train_fdr: float = 0.01,
|
|
42
46
|
write_weights: bool = False,
|
|
43
47
|
write_txt: bool = False,
|
|
44
48
|
write_flashlfq: bool = False,
|
|
@@ -65,6 +69,8 @@ def rescore(
|
|
|
65
69
|
fasta_file
|
|
66
70
|
Path to FASTA file with protein sequences to use for protein inference. Defaults to
|
|
67
71
|
``None``.
|
|
72
|
+
train_fdr
|
|
73
|
+
FDR to use for training the Mokapot model. Defaults to ``0.01``.
|
|
68
74
|
write_weights
|
|
69
75
|
Write model weights to a text file. Defaults to ``False``.
|
|
70
76
|
write_txt
|
|
@@ -91,46 +97,15 @@ def rescore(
|
|
|
91
97
|
|
|
92
98
|
# Rescore
|
|
93
99
|
logger.debug(f"Mokapot brew options: `{kwargs}`")
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
|
|
98
|
-
mokapot_values_targets = (
|
|
99
|
-
confidence_results.confidence_estimates["psms"].set_index("index").sort_index()[keys]
|
|
100
|
-
)
|
|
101
|
-
mokapot_values_decoys = (
|
|
102
|
-
confidence_results.decoy_confidence_estimates["psms"].set_index("index").sort_index()[keys]
|
|
103
|
-
)
|
|
104
|
-
q = np.full((len(psm_list), 3), np.nan)
|
|
105
|
-
q[mokapot_values_targets.index] = mokapot_values_targets.values
|
|
106
|
-
q[mokapot_values_decoys.index] = mokapot_values_decoys.values
|
|
107
|
-
|
|
108
|
-
# Add Mokapot results to PSMList
|
|
109
|
-
psm_list["score"] = q[:, 0]
|
|
110
|
-
psm_list["qvalue"] = q[:, 1]
|
|
111
|
-
psm_list["pep"] = q[:, 2]
|
|
112
|
-
|
|
113
|
-
# Repeat for peptide-level scores
|
|
114
|
-
peptides_targets = confidence_results.confidence_estimates["peptides"].set_index(["peptide"])[
|
|
115
|
-
keys
|
|
116
|
-
]
|
|
117
|
-
peptides_decoys = confidence_results.decoy_confidence_estimates["peptides"].set_index(
|
|
118
|
-
["peptide"]
|
|
119
|
-
)[keys]
|
|
120
|
-
peptide_info = pd.concat([peptides_targets, peptides_decoys], axis=0).to_dict(orient="index")
|
|
121
|
-
|
|
122
|
-
# Add peptide-level scores to PSM metadata
|
|
123
|
-
# run_key = "na" if not all(psm.run for psm in psm_list) else None
|
|
124
|
-
no_charge_pattern = re.compile(r"(/\d+$)")
|
|
125
|
-
for psm in psm_list:
|
|
126
|
-
peptide_scores = peptide_info[(no_charge_pattern.sub("", str(psm.peptidoform), 1))]
|
|
127
|
-
psm.metadata.update(
|
|
128
|
-
{
|
|
129
|
-
"peptide_score": peptide_scores["mokapot score"],
|
|
130
|
-
"peptide_qvalue": peptide_scores["mokapot q-value"],
|
|
131
|
-
"peptide_pep": peptide_scores["mokapot PEP"],
|
|
132
|
-
}
|
|
100
|
+
try:
|
|
101
|
+
confidence_results, models = brew(
|
|
102
|
+
lin_psm_data, model=PercolatorModel(train_fdr=train_fdr), rng=8, **kwargs
|
|
133
103
|
)
|
|
104
|
+
except RuntimeError as e:
|
|
105
|
+
raise RescoringError("Mokapot could not be run. Please check the input data.") from e
|
|
106
|
+
|
|
107
|
+
add_psm_confidence(psm_list, confidence_results)
|
|
108
|
+
add_peptide_confidence(psm_list, confidence_results)
|
|
134
109
|
|
|
135
110
|
# Write results
|
|
136
111
|
if write_weights:
|
|
@@ -245,6 +220,58 @@ def save_model_weights(
|
|
|
245
220
|
)
|
|
246
221
|
|
|
247
222
|
|
|
223
|
+
def add_psm_confidence(
|
|
224
|
+
psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
|
|
225
|
+
) -> None:
|
|
226
|
+
"""Add PSM-level confidence estimates to PSM list, updating score, qvalue, pep, and rank."""
|
|
227
|
+
# Reshape confidence estimates to match PSMList
|
|
228
|
+
keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
|
|
229
|
+
mokapot_values_targets = (
|
|
230
|
+
confidence_results.confidence_estimates["psms"].set_index("index").sort_index()[keys]
|
|
231
|
+
)
|
|
232
|
+
mokapot_values_decoys = (
|
|
233
|
+
confidence_results.decoy_confidence_estimates["psms"].set_index("index").sort_index()[keys]
|
|
234
|
+
)
|
|
235
|
+
q = np.full((len(psm_list), 3), np.nan)
|
|
236
|
+
q[mokapot_values_targets.index] = mokapot_values_targets.values
|
|
237
|
+
q[mokapot_values_decoys.index] = mokapot_values_decoys.values
|
|
238
|
+
|
|
239
|
+
# Add Mokapot results to PSMList
|
|
240
|
+
psm_list["score"] = q[:, 0]
|
|
241
|
+
psm_list["qvalue"] = q[:, 1]
|
|
242
|
+
psm_list["pep"] = q[:, 2]
|
|
243
|
+
|
|
244
|
+
# Reset ranks to match new scores
|
|
245
|
+
psm_list.set_ranks(lower_score_better=False)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def add_peptide_confidence(
|
|
249
|
+
psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
|
|
250
|
+
) -> None:
|
|
251
|
+
"""Add Mokapot peptide-level confidence estimates to PSM list."""
|
|
252
|
+
keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
|
|
253
|
+
peptide_info = pd.concat(
|
|
254
|
+
[
|
|
255
|
+
confidence_results.confidence_estimates["peptides"].set_index("peptide")[keys],
|
|
256
|
+
confidence_results.decoy_confidence_estimates["peptides"].set_index("peptide")[keys],
|
|
257
|
+
],
|
|
258
|
+
axis=0,
|
|
259
|
+
).to_dict(orient="index")
|
|
260
|
+
|
|
261
|
+
# Add peptide-level scores to PSM metadata
|
|
262
|
+
# run_key = "na" if not all(psm.run for psm in psm_list) else None
|
|
263
|
+
no_charge_pattern = re.compile(r"(/\d+$)")
|
|
264
|
+
for psm in psm_list:
|
|
265
|
+
peptide_scores = peptide_info[(no_charge_pattern.sub("", str(psm.peptidoform), 1))]
|
|
266
|
+
psm.metadata.update(
|
|
267
|
+
{
|
|
268
|
+
"peptide_score": peptide_scores["mokapot score"],
|
|
269
|
+
"peptide_qvalue": peptide_scores["mokapot q-value"],
|
|
270
|
+
"peptide_pep": peptide_scores["mokapot PEP"],
|
|
271
|
+
}
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
|
|
248
275
|
def _mz_to_mass(mz: float, charge: int) -> float:
|
|
249
276
|
"""Convert m/z to mass."""
|
|
250
277
|
return mz * charge - charge * nist_mass["H"][1][0]
|
|
@@ -175,6 +175,8 @@ def _update_psm_scores(
|
|
|
175
175
|
original_psm["qvalue"] = new_psm["qvalue"]
|
|
176
176
|
original_psm["pep"] = new_psm["pep"]
|
|
177
177
|
|
|
178
|
+
psm_list.set_ranks(lower_score_better=False)
|
|
179
|
+
|
|
178
180
|
|
|
179
181
|
def _write_pin_file(psm_list: psm_utils.PSMList, filepath: str):
|
|
180
182
|
"""Write PIN file for rescoring."""
|
|
@@ -43,13 +43,14 @@ dependencies = [
|
|
|
43
43
|
"mokapot>=0.9",
|
|
44
44
|
"ms2pip>=4.0.0-dev10",
|
|
45
45
|
"ms2rescore_rs",
|
|
46
|
-
"numpy==1.24.3; python_version == '3.11'", # Incompatibility with sklearn, pygam, and TF...
|
|
47
|
-
"numpy>=1.16.0; python_version != '3.11'",
|
|
46
|
+
# "numpy==1.24.3; python_version == '3.11'", # Incompatibility with sklearn, pygam, and TF...
|
|
47
|
+
# "numpy>=1.16.0; python_version != '3.11'",
|
|
48
|
+
"numpy>=1.16.0",
|
|
49
|
+
"scikit-learn==1.5.1; python_version == '3.11'",
|
|
48
50
|
"pandas>=1.0",
|
|
49
51
|
"plotly>=5",
|
|
50
|
-
"psm_utils>=0.
|
|
51
|
-
"
|
|
52
|
-
"pyteomics>=4.1.0, <4.7",
|
|
52
|
+
"psm_utils>=0.9",
|
|
53
|
+
"pyteomics>=4.7.2",
|
|
53
54
|
"rich>=12",
|
|
54
55
|
"tomli>=2; python_version < '3.11'",
|
|
55
56
|
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/config_default_tims.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/github-mark-white.png
RENAMED
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/ms2rescore_logo.png
RENAMED
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/img/program_icon.ico
RENAMED
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/package_data/ms2rescore-gui-theme.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev8}/ms2rescore/report/templates/target-decoy.html
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|