ms2rescore 3.1.0.dev6__tar.gz → 3.1.0.dev7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/PKG-INFO +5 -6
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/__init__.py +1 -1
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/core.py +80 -27
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/exceptions.py +6 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/config_default.json +1 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/config_schema.json +13 -2
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/parse_psms.py +27 -29
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/charts.py +3 -2
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/rescoring_engines/mokapot.py +63 -39
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/pyproject.toml +6 -5
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/LICENSE +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/README.md +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/__main__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/config_parser.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/feature_generators/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/feature_generators/base.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/feature_generators/basic.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/feature_generators/deeplc.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/feature_generators/im2deep.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/feature_generators/ionmob.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/feature_generators/maxquant.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/feature_generators/ms2pip.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/gui/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/gui/__main__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/gui/app.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/gui/function2ctk.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/gui/widgets.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/config_default_tims.json +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/img/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/img/config_icon.png +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/img/github-mark-white.png +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/img/github-mark.png +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/img/ms2rescore_logo.png +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/img/program_icon.ico +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/ms2rescore-gui-theme.json +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/parse_spectra.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/__main__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/generate.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/about.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/base.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/config.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/features.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/log.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/metadata.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/overview.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/stats-card.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/style.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/target-decoy.html +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/texts.toml +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/utils.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/rescoring_engines/__init__.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/rescoring_engines/percolator.py +0 -0
- {ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ms2rescore
|
|
3
|
-
Version: 3.1.0.
|
|
3
|
+
Version: 3.1.0.dev7
|
|
4
4
|
Summary: MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and retention times.
|
|
5
5
|
Keywords: MS2Rescore,MS2PIP,DeepLC,Percolator,proteomics,mass spectrometry,peptide identification,rescoring,machine learning
|
|
6
6
|
Author: Ana Sílvia C. Silva, Robbin Bouwmeester, Louise Buur
|
|
@@ -24,13 +24,12 @@ Requires-Dist: lxml>=4.5
|
|
|
24
24
|
Requires-Dist: mokapot>=0.9
|
|
25
25
|
Requires-Dist: ms2pip>=4.0.0-dev10
|
|
26
26
|
Requires-Dist: ms2rescore_rs
|
|
27
|
-
Requires-Dist: numpy
|
|
28
|
-
Requires-Dist:
|
|
27
|
+
Requires-Dist: numpy>=1.16.0
|
|
28
|
+
Requires-Dist: scikit-learn==1.5.1; python_version == '3.11'
|
|
29
29
|
Requires-Dist: pandas>=1.0
|
|
30
30
|
Requires-Dist: plotly>=5
|
|
31
|
-
Requires-Dist: psm_utils>=0.
|
|
32
|
-
Requires-Dist:
|
|
33
|
-
Requires-Dist: pyteomics>=4.1.0, <4.7
|
|
31
|
+
Requires-Dist: psm_utils>=0.9
|
|
32
|
+
Requires-Dist: pyteomics>=4.7.2
|
|
34
33
|
Requires-Dist: rich>=12
|
|
35
34
|
Requires-Dist: tomli>=2; python_version < '3.11'
|
|
36
35
|
Requires-Dist: ruff ; extra == "dev"
|
|
@@ -5,6 +5,7 @@ from typing import Dict, Optional
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import psm_utils.io
|
|
8
|
+
from mokapot.dataset import LinearPsmDataset
|
|
8
9
|
from psm_utils import PSMList
|
|
9
10
|
|
|
10
11
|
from ms2rescore import exceptions
|
|
@@ -13,6 +14,7 @@ from ms2rescore.parse_psms import parse_psms
|
|
|
13
14
|
from ms2rescore.parse_spectra import get_missing_values
|
|
14
15
|
from ms2rescore.report import generate
|
|
15
16
|
from ms2rescore.rescoring_engines import mokapot, percolator
|
|
17
|
+
from ms2rescore.rescoring_engines.mokapot import add_peptide_confidence, add_psm_confidence
|
|
16
18
|
|
|
17
19
|
logger = logging.getLogger(__name__)
|
|
18
20
|
|
|
@@ -104,8 +106,8 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
|
|
|
104
106
|
logging.debug(f"Creating USIs for {len(psm_list)} PSMs")
|
|
105
107
|
psm_list["spectrum_id"] = [psm.get_usi(as_url=False) for psm in psm_list]
|
|
106
108
|
|
|
107
|
-
# If no rescoring engine is specified, write PSMs and features to PIN file
|
|
108
|
-
if not config["rescoring_engine"]:
|
|
109
|
+
# If no rescoring engine is specified or DEBUG, write PSMs and features to PIN file
|
|
110
|
+
if not config["rescoring_engine"] or config["log_level"] == "debug":
|
|
109
111
|
logger.info(f"Writing added features to PIN file: {output_file_root}.psms.pin")
|
|
110
112
|
psm_utils.io.write_file(
|
|
111
113
|
psm_list,
|
|
@@ -113,42 +115,52 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
|
|
|
113
115
|
filetype="percolator",
|
|
114
116
|
feature_names=all_feature_names,
|
|
115
117
|
)
|
|
118
|
+
|
|
119
|
+
if not config["rescoring_engine"]:
|
|
120
|
+
logger.info("No rescoring engine specified. Skipping rescoring.")
|
|
116
121
|
return None
|
|
117
122
|
|
|
118
123
|
# Rescore PSMs
|
|
119
|
-
|
|
120
|
-
percolator
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
config["rescoring_engine"]["mokapot"]
|
|
130
|
-
|
|
131
|
-
protein_kwargs
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
124
|
+
try:
|
|
125
|
+
if "percolator" in config["rescoring_engine"]:
|
|
126
|
+
percolator.rescore(
|
|
127
|
+
psm_list,
|
|
128
|
+
output_file_root=output_file_root,
|
|
129
|
+
log_level=config["log_level"],
|
|
130
|
+
processes=config["processes"],
|
|
131
|
+
percolator_kwargs=config["rescoring_engine"]["percolator"],
|
|
132
|
+
)
|
|
133
|
+
elif "mokapot" in config["rescoring_engine"]:
|
|
134
|
+
if "fasta_file" not in config["rescoring_engine"]["mokapot"]:
|
|
135
|
+
config["rescoring_engine"]["mokapot"]["fasta_file"] = config["fasta_file"]
|
|
136
|
+
if "protein_kwargs" in config["rescoring_engine"]["mokapot"]:
|
|
137
|
+
protein_kwargs = config["rescoring_engine"]["mokapot"].pop("protein_kwargs")
|
|
138
|
+
else:
|
|
139
|
+
protein_kwargs = dict()
|
|
140
|
+
|
|
141
|
+
mokapot.rescore(
|
|
142
|
+
psm_list,
|
|
143
|
+
output_file_root=output_file_root,
|
|
144
|
+
protein_kwargs=protein_kwargs,
|
|
145
|
+
**config["rescoring_engine"]["mokapot"],
|
|
146
|
+
)
|
|
147
|
+
except exceptions.RescoringError as e:
|
|
148
|
+
logger.exception(e)
|
|
149
|
+
rescoring_succeeded = False
|
|
141
150
|
else:
|
|
142
|
-
|
|
151
|
+
rescoring_succeeded = True
|
|
152
|
+
_log_id_psms_after(psm_list, id_psms_before)
|
|
143
153
|
|
|
144
|
-
|
|
154
|
+
# Workaround for broken PEP calculation if best PSM is decoy
|
|
155
|
+
if all(psm_list["pep"] == 1.0):
|
|
156
|
+
psm_list = _fix_constant_pep(psm_list)
|
|
145
157
|
|
|
146
158
|
# Write output
|
|
147
159
|
logger.info(f"Writing output to {output_file_root}.psms.tsv...")
|
|
148
160
|
psm_utils.io.write_file(psm_list, output_file_root + ".psms.tsv", filetype="tsv")
|
|
149
161
|
|
|
150
162
|
# Write report
|
|
151
|
-
if config["write_report"]:
|
|
163
|
+
if config["write_report"] and rescoring_succeeded:
|
|
152
164
|
try:
|
|
153
165
|
generate.generate_report(
|
|
154
166
|
output_file_root, psm_list=psm_list, feature_names=feature_names, use_txt_log=True
|
|
@@ -231,3 +243,44 @@ def _log_id_psms_after(psm_list, id_psms_before):
|
|
|
231
243
|
logger.info(f"Identified {diff_numbers} {diff_word} PSMs at 1% FDR after rescoring.")
|
|
232
244
|
|
|
233
245
|
return id_psms_after
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _fix_constant_pep(psm_list):
|
|
249
|
+
"""Workaround for broken PEP calculation if best PSM is decoy."""
|
|
250
|
+
logger.warning(
|
|
251
|
+
"Attempting to fix constant PEP values by removing decoy PSMs that score higher than the "
|
|
252
|
+
"best target PSM."
|
|
253
|
+
)
|
|
254
|
+
max_target_score = psm_list["score"][~psm_list["is_decoy"]].max()
|
|
255
|
+
higher_scoring_decoys = psm_list["is_decoy"] & (psm_list["score"] > max_target_score)
|
|
256
|
+
|
|
257
|
+
if not higher_scoring_decoys.any():
|
|
258
|
+
logger.warning("No decoys scoring higher than the best target found. Skipping fix.")
|
|
259
|
+
else:
|
|
260
|
+
logger.warning(f"Removing {higher_scoring_decoys.sum()} decoy PSMs.")
|
|
261
|
+
|
|
262
|
+
psm_list = psm_list[~higher_scoring_decoys]
|
|
263
|
+
|
|
264
|
+
# Minimal conversion to LinearPsmDataset
|
|
265
|
+
psm_df = psm_list.to_dataframe()
|
|
266
|
+
psm_df = psm_df.reset_index(drop=True).reset_index()
|
|
267
|
+
psm_df["peptide"] = (
|
|
268
|
+
psm_df["peptidoform"].astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True)
|
|
269
|
+
)
|
|
270
|
+
psm_df["is_target"] = ~psm_df["is_decoy"]
|
|
271
|
+
lin_psm_data = LinearPsmDataset(
|
|
272
|
+
psms=psm_df[["index", "peptide", "score", "is_target"]],
|
|
273
|
+
target_column="is_target",
|
|
274
|
+
spectrum_columns="index", # Use artificial index to allow multi-rank rescoring
|
|
275
|
+
peptide_column="peptide",
|
|
276
|
+
feature_columns=["score"],
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Recalculate confidence
|
|
280
|
+
new_confidence = lin_psm_data.assign_confidence()
|
|
281
|
+
|
|
282
|
+
# Add new confidence estimations to PSMList
|
|
283
|
+
add_psm_confidence(psm_list, new_confidence)
|
|
284
|
+
add_peptide_confidence(psm_list, new_confidence)
|
|
285
|
+
|
|
286
|
+
return psm_list
|
|
@@ -68,7 +68,11 @@
|
|
|
68
68
|
},
|
|
69
69
|
"psm_file": {
|
|
70
70
|
"description": "Path to file with peptide-spectrum matches.",
|
|
71
|
-
"oneOf": [
|
|
71
|
+
"oneOf": [
|
|
72
|
+
{ "type": "string" },
|
|
73
|
+
{ "type": "null" },
|
|
74
|
+
{ "type": "array", "items": { "type": "string" } }
|
|
75
|
+
]
|
|
72
76
|
},
|
|
73
77
|
"psm_file_type": {
|
|
74
78
|
"description": "PSM file type. By default inferred from file extension.",
|
|
@@ -159,7 +163,7 @@
|
|
|
159
163
|
"default": false
|
|
160
164
|
},
|
|
161
165
|
"profile": {
|
|
162
|
-
"description": "Write
|
|
166
|
+
"description": "Write a txt report using cProfile for profiling",
|
|
163
167
|
"type": "boolean",
|
|
164
168
|
"default": false
|
|
165
169
|
}
|
|
@@ -263,6 +267,13 @@
|
|
|
263
267
|
"type": "object",
|
|
264
268
|
"additionalProperties": true,
|
|
265
269
|
"properties": {
|
|
270
|
+
"train_fdr": {
|
|
271
|
+
"description": "FDR threshold for training Mokapot",
|
|
272
|
+
"type": "number",
|
|
273
|
+
"minimum": 0,
|
|
274
|
+
"maximum": 1,
|
|
275
|
+
"default": 0.01
|
|
276
|
+
},
|
|
266
277
|
"write_weights": {
|
|
267
278
|
"description": "Write Mokapot weights to a text file",
|
|
268
279
|
"type": "boolean",
|
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
import re
|
|
3
3
|
from typing import Dict, Union
|
|
4
4
|
|
|
5
|
+
import numpy as np
|
|
5
6
|
import psm_utils.io
|
|
6
7
|
from psm_utils import PSMList
|
|
7
8
|
|
|
@@ -25,6 +26,7 @@ def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList:
|
|
|
25
26
|
"""
|
|
26
27
|
# Read PSMs, find decoys, calculate q-values
|
|
27
28
|
psm_list = _read_psms(config, psm_list)
|
|
29
|
+
psm_list = _remove_invalid_aa(psm_list)
|
|
28
30
|
_find_decoys(config, psm_list)
|
|
29
31
|
_calculate_qvalues(config, psm_list)
|
|
30
32
|
if config["psm_id_rt_pattern"] or config["psm_id_im_pattern"]:
|
|
@@ -70,10 +72,6 @@ def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList:
|
|
|
70
72
|
new_ids = [_match_psm_ids(old_id, pattern) for old_id in psm_list["spectrum_id"]]
|
|
71
73
|
psm_list["spectrum_id"] = new_ids
|
|
72
74
|
|
|
73
|
-
# TODO: Temporary fix until implemented in psm_utils
|
|
74
|
-
# Ensure that spectrum IDs are strings (Pydantic 2.0 does not coerce int to str)
|
|
75
|
-
psm_list["spectrum_id"] = [str(spec_id) for spec_id in psm_list["spectrum_id"]]
|
|
76
|
-
|
|
77
75
|
return psm_list
|
|
78
76
|
|
|
79
77
|
|
|
@@ -82,21 +80,20 @@ def _read_psms(config, psm_list):
|
|
|
82
80
|
return psm_list
|
|
83
81
|
else:
|
|
84
82
|
logger.info("Reading PSMs from file...")
|
|
85
|
-
current_file = 1
|
|
86
83
|
total_files = len(config["psm_file"])
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
valid_psms = 0
|
|
90
|
-
for psm_file in config["psm_file"]:
|
|
84
|
+
psm_list = []
|
|
85
|
+
for current_file, psm_file in enumerate(config["psm_file"]):
|
|
91
86
|
logger.info(
|
|
92
|
-
f"Reading PSMs from PSM file ({current_file}/{total_files}): '{psm_file}'..."
|
|
87
|
+
f"Reading PSMs from PSM file ({current_file+1}/{total_files}): '{psm_file}'..."
|
|
93
88
|
)
|
|
94
89
|
try:
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
90
|
+
psm_list.extend(
|
|
91
|
+
psm_utils.io.read_file(
|
|
92
|
+
psm_file,
|
|
93
|
+
filetype=config["psm_file_type"],
|
|
94
|
+
show_progressbar=True,
|
|
95
|
+
**config["psm_reader_kwargs"],
|
|
96
|
+
)
|
|
100
97
|
)
|
|
101
98
|
except psm_utils.io.PSMUtilsIOException:
|
|
102
99
|
raise MS2RescoreConfigurationError(
|
|
@@ -105,18 +102,9 @@ def _read_psms(config, psm_list):
|
|
|
105
102
|
"https://ms2rescore.readthedocs.io/en/latest/userguide/input-files/"
|
|
106
103
|
" for more information."
|
|
107
104
|
)
|
|
105
|
+
logger.debug(f"Read {len(psm_list)} PSMs from '{psm_file}'.")
|
|
108
106
|
|
|
109
|
-
|
|
110
|
-
for psm in id_file_psm_list.psm_list:
|
|
111
|
-
if not _has_invalid_aminoacids(psm):
|
|
112
|
-
valid_psms_list.append(psm)
|
|
113
|
-
valid_psms += 1
|
|
114
|
-
current_file += 1
|
|
115
|
-
if total_psms - valid_psms > 0:
|
|
116
|
-
logger.warning(
|
|
117
|
-
f"{total_psms - valid_psms} PSMs with invalid amino acids were removed."
|
|
118
|
-
)
|
|
119
|
-
return PSMList(psm_list=valid_psms_list)
|
|
107
|
+
return PSMList(psm_list=psm_list)
|
|
120
108
|
|
|
121
109
|
|
|
122
110
|
def _find_decoys(config, psm_list):
|
|
@@ -175,6 +163,7 @@ def _parse_values_spectrum_id(config, psm_list):
|
|
|
175
163
|
raise MS2RescoreConfigurationError(
|
|
176
164
|
f"Could not parse retention time from spectrum_id with the "
|
|
177
165
|
f"{config['psm_id_rt_pattern']} regex pattern. "
|
|
166
|
+
f"Example spectrum_id: '{psm_list[0].spectrum_id}'\n."
|
|
178
167
|
"Please make sure the retention time key is present in the spectrum_id "
|
|
179
168
|
"and the value is in a capturing group or disable the relevant feature generator."
|
|
180
169
|
)
|
|
@@ -198,7 +187,16 @@ def _parse_values_spectrum_id(config, psm_list):
|
|
|
198
187
|
)
|
|
199
188
|
|
|
200
189
|
|
|
201
|
-
def
|
|
202
|
-
"""
|
|
190
|
+
def _remove_invalid_aa(psm_list: PSMList) -> PSMList:
|
|
191
|
+
"""Remove PSMs with invalid amino acids."""
|
|
192
|
+
logger.debug("Removing PSMs with invalid amino acids...")
|
|
193
|
+
invalid_psms = np.array(
|
|
194
|
+
[any(aa in "BJOUXZ" for aa in psm.peptidoform.sequence) for psm in psm_list]
|
|
195
|
+
)
|
|
203
196
|
|
|
204
|
-
|
|
197
|
+
if any(invalid_psms):
|
|
198
|
+
logger.warning(f"Removed {sum(invalid_psms)} PSMs with invalid amino acids.")
|
|
199
|
+
return psm_list[~invalid_psms]
|
|
200
|
+
else:
|
|
201
|
+
logger.debug("No PSMs with invalid amino acids found.")
|
|
202
|
+
return psm_list
|
|
@@ -198,6 +198,7 @@ def score_scatter_plot(
|
|
|
198
198
|
after: mokapot.LinearConfidence,
|
|
199
199
|
level: str = "psms",
|
|
200
200
|
indexer: str = "index",
|
|
201
|
+
fdr_threshold: float = 0.01,
|
|
201
202
|
) -> go.Figure:
|
|
202
203
|
"""
|
|
203
204
|
Plot PSM scores before and after rescoring.
|
|
@@ -242,12 +243,12 @@ def score_scatter_plot(
|
|
|
242
243
|
|
|
243
244
|
# Get score thresholds
|
|
244
245
|
score_threshold_before = (
|
|
245
|
-
ce_psms[ce_psms["mokapot q-value before"] <=
|
|
246
|
+
ce_psms[ce_psms["mokapot q-value before"] <= fdr_threshold]
|
|
246
247
|
.sort_values("mokapot q-value before", ascending=False)["mokapot score before"]
|
|
247
248
|
.iloc[0]
|
|
248
249
|
)
|
|
249
250
|
score_threshold_after = (
|
|
250
|
-
ce_psms[ce_psms["mokapot q-value after"] <=
|
|
251
|
+
ce_psms[ce_psms["mokapot q-value after"] <= fdr_threshold]
|
|
251
252
|
.sort_values("mokapot q-value after", ascending=False)["mokapot score after"]
|
|
252
253
|
.iloc[0]
|
|
253
254
|
)
|
|
@@ -29,8 +29,11 @@ import pandas as pd
|
|
|
29
29
|
import psm_utils
|
|
30
30
|
from mokapot.brew import brew
|
|
31
31
|
from mokapot.dataset import LinearPsmDataset
|
|
32
|
+
from mokapot.model import PercolatorModel
|
|
32
33
|
from pyteomics.mass import nist_mass
|
|
33
34
|
|
|
35
|
+
from ms2rescore.exceptions import RescoringError
|
|
36
|
+
|
|
34
37
|
logger = logging.getLogger(__name__)
|
|
35
38
|
logging.getLogger("numba").setLevel(logging.WARNING)
|
|
36
39
|
|
|
@@ -39,6 +42,7 @@ def rescore(
|
|
|
39
42
|
psm_list: psm_utils.PSMList,
|
|
40
43
|
output_file_root: str = "ms2rescore",
|
|
41
44
|
fasta_file: Optional[str] = None,
|
|
45
|
+
train_fdr: float = 0.01,
|
|
42
46
|
write_weights: bool = False,
|
|
43
47
|
write_txt: bool = False,
|
|
44
48
|
write_flashlfq: bool = False,
|
|
@@ -65,6 +69,8 @@ def rescore(
|
|
|
65
69
|
fasta_file
|
|
66
70
|
Path to FASTA file with protein sequences to use for protein inference. Defaults to
|
|
67
71
|
``None``.
|
|
72
|
+
train_fdr
|
|
73
|
+
FDR to use for training the Mokapot model. Defaults to ``0.01``.
|
|
68
74
|
write_weights
|
|
69
75
|
Write model weights to a text file. Defaults to ``False``.
|
|
70
76
|
write_txt
|
|
@@ -91,46 +97,15 @@ def rescore(
|
|
|
91
97
|
|
|
92
98
|
# Rescore
|
|
93
99
|
logger.debug(f"Mokapot brew options: `{kwargs}`")
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
|
|
98
|
-
mokapot_values_targets = (
|
|
99
|
-
confidence_results.confidence_estimates["psms"].set_index("index").sort_index()[keys]
|
|
100
|
-
)
|
|
101
|
-
mokapot_values_decoys = (
|
|
102
|
-
confidence_results.decoy_confidence_estimates["psms"].set_index("index").sort_index()[keys]
|
|
103
|
-
)
|
|
104
|
-
q = np.full((len(psm_list), 3), np.nan)
|
|
105
|
-
q[mokapot_values_targets.index] = mokapot_values_targets.values
|
|
106
|
-
q[mokapot_values_decoys.index] = mokapot_values_decoys.values
|
|
107
|
-
|
|
108
|
-
# Add Mokapot results to PSMList
|
|
109
|
-
psm_list["score"] = q[:, 0]
|
|
110
|
-
psm_list["qvalue"] = q[:, 1]
|
|
111
|
-
psm_list["pep"] = q[:, 2]
|
|
112
|
-
|
|
113
|
-
# Repeat for peptide-level scores
|
|
114
|
-
peptides_targets = confidence_results.confidence_estimates["peptides"].set_index(["peptide"])[
|
|
115
|
-
keys
|
|
116
|
-
]
|
|
117
|
-
peptides_decoys = confidence_results.decoy_confidence_estimates["peptides"].set_index(
|
|
118
|
-
["peptide"]
|
|
119
|
-
)[keys]
|
|
120
|
-
peptide_info = pd.concat([peptides_targets, peptides_decoys], axis=0).to_dict(orient="index")
|
|
121
|
-
|
|
122
|
-
# Add peptide-level scores to PSM metadata
|
|
123
|
-
# run_key = "na" if not all(psm.run for psm in psm_list) else None
|
|
124
|
-
no_charge_pattern = re.compile(r"(/\d+$)")
|
|
125
|
-
for psm in psm_list:
|
|
126
|
-
peptide_scores = peptide_info[(no_charge_pattern.sub("", str(psm.peptidoform), 1))]
|
|
127
|
-
psm.metadata.update(
|
|
128
|
-
{
|
|
129
|
-
"peptide_score": peptide_scores["mokapot score"],
|
|
130
|
-
"peptide_qvalue": peptide_scores["mokapot q-value"],
|
|
131
|
-
"peptide_pep": peptide_scores["mokapot PEP"],
|
|
132
|
-
}
|
|
100
|
+
try:
|
|
101
|
+
confidence_results, models = brew(
|
|
102
|
+
lin_psm_data, model=PercolatorModel(train_fdr=train_fdr), rng=8, **kwargs
|
|
133
103
|
)
|
|
104
|
+
except RuntimeError as e:
|
|
105
|
+
raise RescoringError("Mokapot could not be run. Please check the input data.") from e
|
|
106
|
+
|
|
107
|
+
add_psm_confidence(psm_list, confidence_results)
|
|
108
|
+
add_peptide_confidence(psm_list, confidence_results)
|
|
134
109
|
|
|
135
110
|
# Write results
|
|
136
111
|
if write_weights:
|
|
@@ -245,6 +220,55 @@ def save_model_weights(
|
|
|
245
220
|
)
|
|
246
221
|
|
|
247
222
|
|
|
223
|
+
def add_psm_confidence(
|
|
224
|
+
psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
|
|
225
|
+
) -> None:
|
|
226
|
+
"""Add Mokapot PSM-level confidence estimates to PSM list."""
|
|
227
|
+
# Reshape confidence estimates to match PSMList
|
|
228
|
+
keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
|
|
229
|
+
mokapot_values_targets = (
|
|
230
|
+
confidence_results.confidence_estimates["psms"].set_index("index").sort_index()[keys]
|
|
231
|
+
)
|
|
232
|
+
mokapot_values_decoys = (
|
|
233
|
+
confidence_results.decoy_confidence_estimates["psms"].set_index("index").sort_index()[keys]
|
|
234
|
+
)
|
|
235
|
+
q = np.full((len(psm_list), 3), np.nan)
|
|
236
|
+
q[mokapot_values_targets.index] = mokapot_values_targets.values
|
|
237
|
+
q[mokapot_values_decoys.index] = mokapot_values_decoys.values
|
|
238
|
+
|
|
239
|
+
# Add Mokapot results to PSMList
|
|
240
|
+
psm_list["score"] = q[:, 0]
|
|
241
|
+
psm_list["qvalue"] = q[:, 1]
|
|
242
|
+
psm_list["pep"] = q[:, 2]
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def add_peptide_confidence(
|
|
246
|
+
psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
|
|
247
|
+
) -> None:
|
|
248
|
+
"""Add Mokapot peptide-level confidence estimates to PSM list."""
|
|
249
|
+
keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
|
|
250
|
+
peptide_info = pd.concat(
|
|
251
|
+
[
|
|
252
|
+
confidence_results.confidence_estimates["peptides"].set_index("peptide")[keys],
|
|
253
|
+
confidence_results.decoy_confidence_estimates["peptides"].set_index("peptide")[keys],
|
|
254
|
+
],
|
|
255
|
+
axis=0,
|
|
256
|
+
).to_dict(orient="index")
|
|
257
|
+
|
|
258
|
+
# Add peptide-level scores to PSM metadata
|
|
259
|
+
# run_key = "na" if not all(psm.run for psm in psm_list) else None
|
|
260
|
+
no_charge_pattern = re.compile(r"(/\d+$)")
|
|
261
|
+
for psm in psm_list:
|
|
262
|
+
peptide_scores = peptide_info[(no_charge_pattern.sub("", str(psm.peptidoform), 1))]
|
|
263
|
+
psm.metadata.update(
|
|
264
|
+
{
|
|
265
|
+
"peptide_score": peptide_scores["mokapot score"],
|
|
266
|
+
"peptide_qvalue": peptide_scores["mokapot q-value"],
|
|
267
|
+
"peptide_pep": peptide_scores["mokapot PEP"],
|
|
268
|
+
}
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
|
|
248
272
|
def _mz_to_mass(mz: float, charge: int) -> float:
|
|
249
273
|
"""Convert m/z to mass."""
|
|
250
274
|
return mz * charge - charge * nist_mass["H"][1][0]
|
|
@@ -43,13 +43,14 @@ dependencies = [
|
|
|
43
43
|
"mokapot>=0.9",
|
|
44
44
|
"ms2pip>=4.0.0-dev10",
|
|
45
45
|
"ms2rescore_rs",
|
|
46
|
-
"numpy==1.24.3; python_version == '3.11'", # Incompatibility with sklearn, pygam, and TF...
|
|
47
|
-
"numpy>=1.16.0; python_version != '3.11'",
|
|
46
|
+
# "numpy==1.24.3; python_version == '3.11'", # Incompatibility with sklearn, pygam, and TF...
|
|
47
|
+
# "numpy>=1.16.0; python_version != '3.11'",
|
|
48
|
+
"numpy>=1.16.0",
|
|
49
|
+
"scikit-learn==1.5.1; python_version == '3.11'",
|
|
48
50
|
"pandas>=1.0",
|
|
49
51
|
"plotly>=5",
|
|
50
|
-
"psm_utils>=0.
|
|
51
|
-
"
|
|
52
|
-
"pyteomics>=4.1.0, <4.7",
|
|
52
|
+
"psm_utils>=0.9",
|
|
53
|
+
"pyteomics>=4.7.2",
|
|
53
54
|
"rich>=12",
|
|
54
55
|
"tomli>=2; python_version < '3.11'",
|
|
55
56
|
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/config_default_tims.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/img/github-mark-white.png
RENAMED
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/img/ms2rescore_logo.png
RENAMED
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/img/program_icon.ico
RENAMED
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/package_data/ms2rescore-gui-theme.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev6 → ms2rescore-3.1.0.dev7}/ms2rescore/report/templates/target-decoy.html
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|