ms2rescore 3.1.0.dev7__tar.gz → 3.1.0.dev9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/PKG-INFO +1 -2
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/__init__.py +1 -1
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/core.py +66 -39
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/config_default.json +2 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/config_schema.json +12 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/parse_psms.py +60 -65
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/charts.py +22 -14
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/utils.py +11 -21
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/rescoring_engines/mokapot.py +4 -1
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/rescoring_engines/percolator.py +2 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/pyproject.toml +0 -3
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/LICENSE +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/README.md +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/__main__.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/config_parser.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/exceptions.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/feature_generators/__init__.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/feature_generators/base.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/feature_generators/basic.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/feature_generators/deeplc.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/feature_generators/im2deep.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/feature_generators/ionmob.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/feature_generators/maxquant.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/feature_generators/ms2pip.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/gui/__init__.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/gui/__main__.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/gui/app.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/gui/function2ctk.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/gui/widgets.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/__init__.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/config_default_tims.json +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/img/__init__.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/img/config_icon.png +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/img/github-mark-white.png +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/img/github-mark.png +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/img/ms2rescore_logo.png +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/img/program_icon.ico +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/ms2rescore-gui-theme.json +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/parse_spectra.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/__init__.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/__main__.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/generate.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/__init__.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/about.html +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/base.html +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/config.html +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/features.html +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/log.html +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/metadata.html +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/overview.html +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/stats-card.html +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/style.html +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/target-decoy.html +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/texts.toml +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/rescoring_engines/__init__.py +0 -0
- {ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ms2rescore
|
|
3
|
-
Version: 3.1.0.
|
|
3
|
+
Version: 3.1.0.dev9
|
|
4
4
|
Summary: MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and retention times.
|
|
5
5
|
Keywords: MS2Rescore,MS2PIP,DeepLC,Percolator,proteomics,mass spectrometry,peptide identification,rescoring,machine learning
|
|
6
6
|
Author: Ana Sílvia C. Silva, Robbin Bouwmeester, Louise Buur
|
|
@@ -25,7 +25,6 @@ Requires-Dist: mokapot>=0.9
|
|
|
25
25
|
Requires-Dist: ms2pip>=4.0.0-dev10
|
|
26
26
|
Requires-Dist: ms2rescore_rs
|
|
27
27
|
Requires-Dist: numpy>=1.16.0
|
|
28
|
-
Requires-Dist: scikit-learn==1.5.1; python_version == '3.11'
|
|
29
28
|
Requires-Dist: pandas>=1.0
|
|
30
29
|
Requires-Dist: plotly>=5
|
|
31
30
|
Requires-Dist: psm_utils>=0.9
|
|
@@ -47,7 +47,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
|
|
|
47
47
|
psm_list = parse_psms(config, psm_list)
|
|
48
48
|
|
|
49
49
|
# Log #PSMs identified before rescoring
|
|
50
|
-
id_psms_before = _log_id_psms_before(psm_list)
|
|
50
|
+
id_psms_before = _log_id_psms_before(psm_list, max_rank=config["max_psm_rank_output"])
|
|
51
51
|
|
|
52
52
|
# Define feature names; get existing feature names from PSM file
|
|
53
53
|
feature_names = dict()
|
|
@@ -62,7 +62,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
|
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
# Add missing precursor info from spectrum file if needed
|
|
65
|
-
_fill_missing_precursor_info(psm_list, config)
|
|
65
|
+
psm_list = _fill_missing_precursor_info(psm_list, config)
|
|
66
66
|
|
|
67
67
|
# Add rescoring features
|
|
68
68
|
for fgen_name, fgen_config in config["feature_generators"].items():
|
|
@@ -145,22 +145,26 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
|
|
|
145
145
|
**config["rescoring_engine"]["mokapot"],
|
|
146
146
|
)
|
|
147
147
|
except exceptions.RescoringError as e:
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
148
|
+
# Write output
|
|
149
|
+
logger.info(f"Writing intermediary output to {output_file_root}.psms.tsv...")
|
|
150
|
+
psm_utils.io.write_file(psm_list, output_file_root + ".psms.tsv", filetype="tsv")
|
|
151
|
+
|
|
152
|
+
# Reraise exception
|
|
153
|
+
raise e
|
|
153
154
|
|
|
154
|
-
#
|
|
155
|
+
# Post-rescoring processing
|
|
155
156
|
if all(psm_list["pep"] == 1.0):
|
|
156
157
|
psm_list = _fix_constant_pep(psm_list)
|
|
158
|
+
psm_list = _filter_by_rank(psm_list, config["max_psm_rank_output"], False)
|
|
159
|
+
psm_list = _calculate_confidence(psm_list)
|
|
160
|
+
_ = _log_id_psms_after(psm_list, id_psms_before, max_rank=config["max_psm_rank_output"])
|
|
157
161
|
|
|
158
162
|
# Write output
|
|
159
163
|
logger.info(f"Writing output to {output_file_root}.psms.tsv...")
|
|
160
164
|
psm_utils.io.write_file(psm_list, output_file_root + ".psms.tsv", filetype="tsv")
|
|
161
165
|
|
|
162
166
|
# Write report
|
|
163
|
-
if config["write_report"]
|
|
167
|
+
if config["write_report"]:
|
|
164
168
|
try:
|
|
165
169
|
generate.generate_report(
|
|
166
170
|
output_file_root, psm_list=psm_list, feature_names=feature_names, use_txt_log=True
|
|
@@ -169,7 +173,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
|
|
|
169
173
|
logger.exception(e)
|
|
170
174
|
|
|
171
175
|
|
|
172
|
-
def _fill_missing_precursor_info(psm_list, config):
|
|
176
|
+
def _fill_missing_precursor_info(psm_list: PSMList, config: Dict) -> PSMList:
|
|
173
177
|
"""Fill missing precursor info from spectrum file if needed."""
|
|
174
178
|
# Check if required
|
|
175
179
|
# TODO: avoid hard coding feature generators in some way
|
|
@@ -211,6 +215,16 @@ def _fill_missing_precursor_info(psm_list, config):
|
|
|
211
215
|
[v is not None and not np.isnan(v) for v in psm_list[value_name]]
|
|
212
216
|
]
|
|
213
217
|
|
|
218
|
+
return psm_list
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _filter_by_rank(psm_list: PSMList, max_rank: int, lower_score_better: bool) -> PSMList:
|
|
222
|
+
"""Filter PSMs by rank."""
|
|
223
|
+
psm_list.set_ranks(lower_score_better=lower_score_better)
|
|
224
|
+
rank_filter = psm_list["rank"] <= max_rank
|
|
225
|
+
logger.info(f"Removed {sum(~rank_filter)} PSMs with rank >= {max_rank}.")
|
|
226
|
+
return psm_list[rank_filter]
|
|
227
|
+
|
|
214
228
|
|
|
215
229
|
def _write_feature_names(feature_names, output_file_root):
|
|
216
230
|
"""Write feature names to file."""
|
|
@@ -221,31 +235,39 @@ def _write_feature_names(feature_names, output_file_root):
|
|
|
221
235
|
f.write(f"{fgen}\t{feature}\n")
|
|
222
236
|
|
|
223
237
|
|
|
224
|
-
def _log_id_psms_before(psm_list):
|
|
238
|
+
def _log_id_psms_before(psm_list: PSMList, fdr: float = 0.01, max_rank: int = 1) -> int:
|
|
225
239
|
"""Log #PSMs identified before rescoring."""
|
|
226
240
|
id_psms_before = (
|
|
227
|
-
(psm_list["qvalue"] <= 0.01) & (psm_list["
|
|
241
|
+
(psm_list["qvalue"] <= 0.01) & (psm_list["rank"] <= max_rank) & (~psm_list["is_decoy"])
|
|
228
242
|
).sum()
|
|
229
|
-
logger.info(
|
|
243
|
+
logger.info(
|
|
244
|
+
f"Found {id_psms_before} identified PSMs with rank <= {max_rank} at {fdr} FDR before "
|
|
245
|
+
"rescoring."
|
|
246
|
+
)
|
|
230
247
|
return id_psms_before
|
|
231
248
|
|
|
232
249
|
|
|
233
|
-
def _log_id_psms_after(
|
|
250
|
+
def _log_id_psms_after(
|
|
251
|
+
psm_list: PSMList, id_psms_before: int, fdr: float = 0.01, max_rank: int = 1
|
|
252
|
+
) -> int:
|
|
234
253
|
"""Log #PSMs identified after rescoring."""
|
|
235
254
|
id_psms_after = (
|
|
236
|
-
(psm_list["qvalue"] <= 0.01) & (psm_list["
|
|
255
|
+
(psm_list["qvalue"] <= 0.01) & (psm_list["rank"] <= max_rank) & (~psm_list["is_decoy"])
|
|
237
256
|
).sum()
|
|
238
257
|
diff = id_psms_after - id_psms_before
|
|
239
258
|
diff_perc = diff / id_psms_before if id_psms_before > 0 else None
|
|
240
259
|
|
|
241
260
|
diff_numbers = f"{diff} ({diff_perc:.2%})" if diff_perc is not None else str(diff)
|
|
242
261
|
diff_word = "more" if diff > 0 else "less"
|
|
243
|
-
logger.info(
|
|
262
|
+
logger.info(
|
|
263
|
+
f"Identified {diff_numbers} {diff_word} PSMs with rank <= {max_rank} at {fdr} FDR after "
|
|
264
|
+
"rescoring."
|
|
265
|
+
)
|
|
244
266
|
|
|
245
267
|
return id_psms_after
|
|
246
268
|
|
|
247
269
|
|
|
248
|
-
def _fix_constant_pep(psm_list):
|
|
270
|
+
def _fix_constant_pep(psm_list: PSMList) -> PSMList:
|
|
249
271
|
"""Workaround for broken PEP calculation if best PSM is decoy."""
|
|
250
272
|
logger.warning(
|
|
251
273
|
"Attempting to fix constant PEP values by removing decoy PSMs that score higher than the "
|
|
@@ -257,30 +279,35 @@ def _fix_constant_pep(psm_list):
|
|
|
257
279
|
if not higher_scoring_decoys.any():
|
|
258
280
|
logger.warning("No decoys scoring higher than the best target found. Skipping fix.")
|
|
259
281
|
else:
|
|
260
|
-
logger.warning(f"Removing {higher_scoring_decoys.sum()} decoy PSMs.")
|
|
261
|
-
|
|
262
282
|
psm_list = psm_list[~higher_scoring_decoys]
|
|
283
|
+
logger.warning(f"Removed {higher_scoring_decoys.sum()} decoy PSMs.")
|
|
263
284
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
285
|
+
return psm_list
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _calculate_confidence(psm_list: PSMList) -> PSMList:
|
|
289
|
+
"""
|
|
290
|
+
Calculate scores, q-values, and PEPs for PSMs and peptides and add them to PSMList.
|
|
291
|
+
"""
|
|
292
|
+
# Minimal conversion to LinearPsmDataset
|
|
293
|
+
psm_df = psm_list.to_dataframe()
|
|
294
|
+
psm_df = psm_df.reset_index(drop=True).reset_index()
|
|
295
|
+
psm_df["peptide"] = (
|
|
296
|
+
psm_df["peptidoform"].astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True)
|
|
297
|
+
)
|
|
298
|
+
psm_df["is_target"] = ~psm_df["is_decoy"]
|
|
299
|
+
lin_psm_data = LinearPsmDataset(
|
|
300
|
+
psms=psm_df[["index", "peptide", "is_target"]],
|
|
301
|
+
target_column="is_target",
|
|
302
|
+
spectrum_columns="index", # Use artificial index to allow multi-rank rescoring
|
|
303
|
+
peptide_column="peptide",
|
|
304
|
+
)
|
|
278
305
|
|
|
279
|
-
|
|
280
|
-
|
|
306
|
+
# Recalculate confidence
|
|
307
|
+
new_confidence = lin_psm_data.assign_confidence(scores=psm_list["score"])
|
|
281
308
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
309
|
+
# Add new confidence estimations to PSMList
|
|
310
|
+
add_psm_confidence(psm_list, new_confidence)
|
|
311
|
+
add_peptide_confidence(psm_list, new_confidence)
|
|
285
312
|
|
|
286
|
-
|
|
313
|
+
return psm_list
|
|
@@ -131,6 +131,18 @@
|
|
|
131
131
|
"type": "boolean",
|
|
132
132
|
"default": false
|
|
133
133
|
},
|
|
134
|
+
"max_psm_rank_input": {
|
|
135
|
+
"description": "Maximum rank of PSMs to use as input for rescoring",
|
|
136
|
+
"type": "number",
|
|
137
|
+
"default": 10,
|
|
138
|
+
"minimum": 1
|
|
139
|
+
},
|
|
140
|
+
"max_psm_rank_output": {
|
|
141
|
+
"description": "Maximum rank of PSMs to return after rescoring, before final FDR calculation",
|
|
142
|
+
"type": "number",
|
|
143
|
+
"default": 1,
|
|
144
|
+
"minimum": 1
|
|
145
|
+
},
|
|
134
146
|
"modification_mapping": {
|
|
135
147
|
"description": "Mapping of modification labels to each replacement label.",
|
|
136
148
|
"type": "object",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
|
-
from typing import Dict, Union
|
|
3
|
+
from typing import Dict, Optional, Union
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import psm_utils.io
|
|
@@ -24,14 +24,30 @@ def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList:
|
|
|
24
24
|
PSMList object containing PSMs. If None, PSMs will be read from ``psm_file``.
|
|
25
25
|
|
|
26
26
|
"""
|
|
27
|
-
# Read PSMs
|
|
28
|
-
|
|
27
|
+
# Read PSMs
|
|
28
|
+
try:
|
|
29
|
+
psm_list = _read_psms(config, psm_list)
|
|
30
|
+
except psm_utils.io.PSMUtilsIOException:
|
|
31
|
+
raise MS2RescoreConfigurationError(
|
|
32
|
+
"Error occurred while reading PSMs. Please check the 'psm_file' and "
|
|
33
|
+
"'psm_file_type' settings. See "
|
|
34
|
+
"https://ms2rescore.readthedocs.io/en/latest/userguide/input-files/"
|
|
35
|
+
" for more information."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Filter by PSM rank
|
|
39
|
+
psm_list.set_ranks(config["lower_score_is_better"])
|
|
40
|
+
rank_filter = psm_list["rank"] <= config["max_psm_rank_input"]
|
|
41
|
+
psm_list = psm_list[rank_filter]
|
|
42
|
+
logger.info(f"Removed {sum(~rank_filter)} PSMs with rank >= {config['max_psm_rank_input']}.")
|
|
43
|
+
|
|
44
|
+
# Remove invalid AAs, find decoys, calculate q-values
|
|
29
45
|
psm_list = _remove_invalid_aa(psm_list)
|
|
30
|
-
_find_decoys(
|
|
31
|
-
_calculate_qvalues(
|
|
46
|
+
_find_decoys(psm_list, config["id_decoy_pattern"])
|
|
47
|
+
_calculate_qvalues(psm_list, config["lower_score_is_better"])
|
|
32
48
|
if config["psm_id_rt_pattern"] or config["psm_id_im_pattern"]:
|
|
33
49
|
logger.debug("Parsing retention time and/or ion mobility from PSM identifier...")
|
|
34
|
-
|
|
50
|
+
_parse_values_from_spectrum_id(config, psm_list)
|
|
35
51
|
|
|
36
52
|
# Store scoring values for comparison later
|
|
37
53
|
for psm in psm_list:
|
|
@@ -79,39 +95,30 @@ def _read_psms(config, psm_list):
|
|
|
79
95
|
if isinstance(psm_list, PSMList):
|
|
80
96
|
return psm_list
|
|
81
97
|
else:
|
|
82
|
-
logger.info("Reading PSMs from file...")
|
|
83
98
|
total_files = len(config["psm_file"])
|
|
84
99
|
psm_list = []
|
|
85
100
|
for current_file, psm_file in enumerate(config["psm_file"]):
|
|
86
101
|
logger.info(
|
|
87
102
|
f"Reading PSMs from PSM file ({current_file+1}/{total_files}): '{psm_file}'..."
|
|
88
103
|
)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
**config["psm_reader_kwargs"],
|
|
96
|
-
)
|
|
97
|
-
)
|
|
98
|
-
except psm_utils.io.PSMUtilsIOException:
|
|
99
|
-
raise MS2RescoreConfigurationError(
|
|
100
|
-
"Error occurred while reading PSMs. Please check the 'psm_file' and "
|
|
101
|
-
"'psm_file_type' settings. See "
|
|
102
|
-
"https://ms2rescore.readthedocs.io/en/latest/userguide/input-files/"
|
|
103
|
-
" for more information."
|
|
104
|
+
psm_list.extend(
|
|
105
|
+
psm_utils.io.read_file(
|
|
106
|
+
psm_file,
|
|
107
|
+
filetype=config["psm_file_type"],
|
|
108
|
+
show_progressbar=True,
|
|
109
|
+
**config["psm_reader_kwargs"],
|
|
104
110
|
)
|
|
111
|
+
)
|
|
105
112
|
logger.debug(f"Read {len(psm_list)} PSMs from '{psm_file}'.")
|
|
106
113
|
|
|
107
114
|
return PSMList(psm_list=psm_list)
|
|
108
115
|
|
|
109
116
|
|
|
110
|
-
def _find_decoys(
|
|
117
|
+
def _find_decoys(psm_list: PSMList, id_decoy_pattern: Optional[str] = None):
|
|
111
118
|
"""Find decoys in PSMs, log amount, and raise error if none found."""
|
|
112
119
|
logger.debug("Finding decoys...")
|
|
113
|
-
if
|
|
114
|
-
psm_list.find_decoys(
|
|
120
|
+
if id_decoy_pattern:
|
|
121
|
+
psm_list.find_decoys(id_decoy_pattern)
|
|
115
122
|
|
|
116
123
|
n_psms = len(psm_list)
|
|
117
124
|
percent_decoys = sum(psm_list["is_decoy"]) / n_psms * 100
|
|
@@ -126,12 +133,12 @@ def _find_decoys(config, psm_list):
|
|
|
126
133
|
)
|
|
127
134
|
|
|
128
135
|
|
|
129
|
-
def _calculate_qvalues(
|
|
136
|
+
def _calculate_qvalues(psm_list: PSMList, lower_score_is_better: bool):
|
|
130
137
|
"""Calculate q-values for PSMs if not present."""
|
|
131
138
|
# Calculate q-values if not present
|
|
132
139
|
if None in psm_list["qvalue"]:
|
|
133
140
|
logger.debug("Recalculating q-values...")
|
|
134
|
-
psm_list.calculate_qvalues(reverse=not
|
|
141
|
+
psm_list.calculate_qvalues(reverse=not lower_score_is_better)
|
|
135
142
|
|
|
136
143
|
|
|
137
144
|
def _match_psm_ids(old_id, regex_pattern):
|
|
@@ -146,50 +153,38 @@ def _match_psm_ids(old_id, regex_pattern):
|
|
|
146
153
|
)
|
|
147
154
|
|
|
148
155
|
|
|
149
|
-
def
|
|
156
|
+
def _parse_values_from_spectrum_id(
|
|
157
|
+
psm_list: PSMList,
|
|
158
|
+
psm_id_rt_pattern: Optional[str] = None,
|
|
159
|
+
psm_id_im_pattern: Optional[str] = None,
|
|
160
|
+
):
|
|
150
161
|
"""Parse retention time and or ion mobility values from the spectrum_id."""
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
psm_list["retention_time"] = [
|
|
160
|
-
float(rt_pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
|
|
161
|
-
]
|
|
162
|
-
except AttributeError:
|
|
163
|
-
raise MS2RescoreConfigurationError(
|
|
164
|
-
f"Could not parse retention time from spectrum_id with the "
|
|
165
|
-
f"{config['psm_id_rt_pattern']} regex pattern. "
|
|
166
|
-
f"Example spectrum_id: '{psm_list[0].spectrum_id}'\n."
|
|
167
|
-
"Please make sure the retention time key is present in the spectrum_id "
|
|
168
|
-
"and the value is in a capturing group or disable the relevant feature generator."
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
if config["psm_id_im_pattern"]:
|
|
172
|
-
logger.debug(
|
|
173
|
-
"Parsing ion mobility from spectrum_id with regex pattern "
|
|
174
|
-
f"{config['psm_id_im_pattern']}"
|
|
175
|
-
)
|
|
176
|
-
try:
|
|
177
|
-
im_pattern = re.compile(config["psm_id_im_pattern"])
|
|
178
|
-
psm_list["ion_mobility"] = [
|
|
179
|
-
float(im_pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
|
|
180
|
-
]
|
|
181
|
-
except AttributeError:
|
|
182
|
-
raise MS2RescoreConfigurationError(
|
|
183
|
-
f"Could not parse ion mobility from spectrum_id with the "
|
|
184
|
-
f"{config['psm_id_im_pattern']} regex pattern. "
|
|
185
|
-
"Please make sure the ion mobility key is present in the spectrum_id "
|
|
186
|
-
"and the value is in a capturing group or disable the relevant feature generator."
|
|
162
|
+
for pattern, label, key in zip(
|
|
163
|
+
[psm_id_rt_pattern, psm_id_im_pattern],
|
|
164
|
+
["retention time", "ion mobility"],
|
|
165
|
+
["retention_time", "ion_mobility"],
|
|
166
|
+
):
|
|
167
|
+
if pattern:
|
|
168
|
+
logger.debug(
|
|
169
|
+
f"Parsing {label} from spectrum_id with regex pattern " f"{psm_id_rt_pattern}"
|
|
187
170
|
)
|
|
171
|
+
try:
|
|
172
|
+
pattern = re.compile(pattern)
|
|
173
|
+
psm_list[key] = [
|
|
174
|
+
float(pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
|
|
175
|
+
]
|
|
176
|
+
except AttributeError:
|
|
177
|
+
raise MS2RescoreConfigurationError(
|
|
178
|
+
f"Could not parse {label} from spectrum_id with the "
|
|
179
|
+
f"{pattern} regex pattern. "
|
|
180
|
+
f"Example spectrum_id: '{psm_list[0].spectrum_id}'\n. "
|
|
181
|
+
f"Please make sure the {label} key is present in the spectrum_id "
|
|
182
|
+
"and the value is in a capturing group or disable the relevant feature generator."
|
|
183
|
+
)
|
|
188
184
|
|
|
189
185
|
|
|
190
186
|
def _remove_invalid_aa(psm_list: PSMList) -> PSMList:
|
|
191
187
|
"""Remove PSMs with invalid amino acids."""
|
|
192
|
-
logger.debug("Removing PSMs with invalid amino acids...")
|
|
193
188
|
invalid_psms = np.array(
|
|
194
189
|
[any(aa in "BJOUXZ" for aa in psm.peptidoform.sequence) for psm in psm_list]
|
|
195
190
|
)
|
|
@@ -242,16 +242,22 @@ def score_scatter_plot(
|
|
|
242
242
|
ce_psms = pd.concat([ce_psms_targets, ce_psms_decoys], axis=0)
|
|
243
243
|
|
|
244
244
|
# Get score thresholds
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
245
|
+
try:
|
|
246
|
+
score_threshold_before = (
|
|
247
|
+
ce_psms[ce_psms["mokapot q-value before"] <= fdr_threshold]
|
|
248
|
+
.sort_values("mokapot q-value before", ascending=False)["mokapot score before"]
|
|
249
|
+
.iloc[0]
|
|
250
|
+
)
|
|
251
|
+
except IndexError: # No PSMs below threshold
|
|
252
|
+
score_threshold_before = None
|
|
253
|
+
try:
|
|
254
|
+
score_threshold_after = (
|
|
255
|
+
ce_psms[ce_psms["mokapot q-value after"] <= fdr_threshold]
|
|
256
|
+
.sort_values("mokapot q-value after", ascending=False)["mokapot score after"]
|
|
257
|
+
.iloc[0]
|
|
258
|
+
)
|
|
259
|
+
except IndexError: # No PSMs below threshold
|
|
260
|
+
score_threshold_after = None
|
|
255
261
|
|
|
256
262
|
# Plot
|
|
257
263
|
fig = px.scatter(
|
|
@@ -268,10 +274,12 @@ def score_scatter_plot(
|
|
|
268
274
|
},
|
|
269
275
|
)
|
|
270
276
|
# draw FDR thresholds
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
277
|
+
if score_threshold_before:
|
|
278
|
+
fig.add_vline(x=score_threshold_before, line_dash="dash", row=1, col=1)
|
|
279
|
+
fig.add_vline(x=score_threshold_before, line_dash="dash", row=2, col=1)
|
|
280
|
+
if score_threshold_after:
|
|
281
|
+
fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=1)
|
|
282
|
+
fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=2)
|
|
275
283
|
|
|
276
284
|
return fig
|
|
277
285
|
|
|
@@ -51,35 +51,25 @@ def get_confidence_estimates(
|
|
|
51
51
|
"was generated by MS²Rescore. Could not generate report."
|
|
52
52
|
) from e
|
|
53
53
|
|
|
54
|
+
score_after = psm_list["score"]
|
|
54
55
|
peptide = (
|
|
55
56
|
pd.Series(psm_list["peptidoform"]).astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True)
|
|
56
57
|
)
|
|
57
|
-
psms = pd.DataFrame(
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
).reset_index()
|
|
65
|
-
|
|
58
|
+
psms = pd.DataFrame({"peptide": peptide, "is_target": ~psm_list["is_decoy"]}).reset_index()
|
|
59
|
+
lin_psm_dataset = LinearPsmDataset(
|
|
60
|
+
psms=psms,
|
|
61
|
+
target_column="is_target",
|
|
62
|
+
spectrum_columns="index",
|
|
63
|
+
peptide_column="peptide",
|
|
64
|
+
)
|
|
66
65
|
if fasta_file:
|
|
67
66
|
fasta = read_fasta(fasta_file)
|
|
67
|
+
lin_psm_dataset.add_proteins(fasta)
|
|
68
68
|
|
|
69
69
|
confidence = dict()
|
|
70
|
-
for when in ["before", "after"]:
|
|
71
|
-
lin_psm_dataset = LinearPsmDataset(
|
|
72
|
-
psms=psms,
|
|
73
|
-
target_column="is_target",
|
|
74
|
-
spectrum_columns="index",
|
|
75
|
-
feature_columns=[when],
|
|
76
|
-
peptide_column="peptide",
|
|
77
|
-
)
|
|
78
|
-
if fasta_file:
|
|
79
|
-
lin_psm_dataset.add_proteins(fasta)
|
|
80
|
-
|
|
70
|
+
for when, scores in [("before", score_before), ("after", score_after)]:
|
|
81
71
|
try:
|
|
82
|
-
confidence[when] = lin_psm_dataset.assign_confidence()
|
|
72
|
+
confidence[when] = lin_psm_dataset.assign_confidence(scores=scores)
|
|
83
73
|
except RuntimeError:
|
|
84
74
|
confidence[when] = None
|
|
85
75
|
|
|
@@ -223,7 +223,7 @@ def save_model_weights(
|
|
|
223
223
|
def add_psm_confidence(
|
|
224
224
|
psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
|
|
225
225
|
) -> None:
|
|
226
|
-
"""Add
|
|
226
|
+
"""Add PSM-level confidence estimates to PSM list, updating score, qvalue, pep, and rank."""
|
|
227
227
|
# Reshape confidence estimates to match PSMList
|
|
228
228
|
keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
|
|
229
229
|
mokapot_values_targets = (
|
|
@@ -241,6 +241,9 @@ def add_psm_confidence(
|
|
|
241
241
|
psm_list["qvalue"] = q[:, 1]
|
|
242
242
|
psm_list["pep"] = q[:, 2]
|
|
243
243
|
|
|
244
|
+
# Reset ranks to match new scores
|
|
245
|
+
psm_list.set_ranks(lower_score_better=False)
|
|
246
|
+
|
|
244
247
|
|
|
245
248
|
def add_peptide_confidence(
|
|
246
249
|
psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
|
|
@@ -175,6 +175,8 @@ def _update_psm_scores(
|
|
|
175
175
|
original_psm["qvalue"] = new_psm["qvalue"]
|
|
176
176
|
original_psm["pep"] = new_psm["pep"]
|
|
177
177
|
|
|
178
|
+
psm_list.set_ranks(lower_score_better=False)
|
|
179
|
+
|
|
178
180
|
|
|
179
181
|
def _write_pin_file(psm_list: psm_utils.PSMList, filepath: str):
|
|
180
182
|
"""Write PIN file for rescoring."""
|
|
@@ -43,10 +43,7 @@ dependencies = [
|
|
|
43
43
|
"mokapot>=0.9",
|
|
44
44
|
"ms2pip>=4.0.0-dev10",
|
|
45
45
|
"ms2rescore_rs",
|
|
46
|
-
# "numpy==1.24.3; python_version == '3.11'", # Incompatibility with sklearn, pygam, and TF...
|
|
47
|
-
# "numpy>=1.16.0; python_version != '3.11'",
|
|
48
46
|
"numpy>=1.16.0",
|
|
49
|
-
"scikit-learn==1.5.1; python_version == '3.11'",
|
|
50
47
|
"pandas>=1.0",
|
|
51
48
|
"plotly>=5",
|
|
52
49
|
"psm_utils>=0.9",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/config_default_tims.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/img/github-mark-white.png
RENAMED
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/img/ms2rescore_logo.png
RENAMED
|
File without changes
|
{ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/img/program_icon.ico
RENAMED
|
File without changes
|
{ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/package_data/ms2rescore-gui-theme.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev7 → ms2rescore-3.1.0.dev9}/ms2rescore/report/templates/target-decoy.html
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|