ms2rescore 3.1.0.dev1__tar.gz → 3.1.0.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/PKG-INFO +1 -1
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/__init__.py +1 -1
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/__main__.py +26 -1
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/core.py +2 -2
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/config_schema.json +5 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/charts.py +2 -2
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/generate.py +1 -1
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/rescoring_engines/mokapot.py +28 -9
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/rescoring_engines/percolator.py +17 -5
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/LICENSE +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/README.md +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/config_parser.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/exceptions.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/__init__.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/base.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/basic.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/deeplc.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/im2deep.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/ionmob.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/maxquant.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/feature_generators/ms2pip.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/gui/__init__.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/gui/__main__.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/gui/app.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/gui/function2ctk.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/gui/widgets.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/__init__.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/config_default.json +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/config_default_tims.json +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/__init__.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/config_icon.png +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/github-mark-white.png +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/github-mark.png +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/ms2rescore_logo.png +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/program_icon.ico +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/ms2rescore-gui-theme.json +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/parse_psms.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/parse_spectra.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/__init__.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/__main__.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/__init__.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/about.html +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/base.html +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/config.html +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/features.html +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/log.html +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/metadata.html +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/overview.html +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/stats-card.html +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/style.html +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/target-decoy.html +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/texts.toml +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/utils.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/rescoring_engines/__init__.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/utils.py +0 -0
- {ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ms2rescore
|
|
3
|
-
Version: 3.1.0.
|
|
3
|
+
Version: 3.1.0.dev3
|
|
4
4
|
Summary: MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and retention times.
|
|
5
5
|
Keywords: MS2Rescore,MS2PIP,DeepLC,Percolator,proteomics,mass spectrometry,peptide identification,rescoring,machine learning
|
|
6
6
|
Author: Ana Sílvia C. Silva, Robbin Bouwmeester, Louise Buur
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and RTs."""
|
|
2
2
|
|
|
3
3
|
import argparse
|
|
4
|
+
import cProfile
|
|
4
5
|
import importlib.resources
|
|
5
6
|
import json
|
|
6
7
|
import logging
|
|
@@ -139,6 +140,14 @@ def _argument_parser() -> argparse.ArgumentParser:
|
|
|
139
140
|
dest="fasta_file",
|
|
140
141
|
help="path to FASTA file",
|
|
141
142
|
)
|
|
143
|
+
parser.add_argument(
|
|
144
|
+
"--profile",
|
|
145
|
+
# metavar="BOOL",
|
|
146
|
+
action="store_true",
|
|
147
|
+
# type=bool,
|
|
148
|
+
# dest="profile",
|
|
149
|
+
help="boolean to enable profiling with cProfile",
|
|
150
|
+
)
|
|
142
151
|
|
|
143
152
|
return parser
|
|
144
153
|
|
|
@@ -161,6 +170,18 @@ def _setup_logging(passed_level: str, log_file: Union[str, Path]):
|
|
|
161
170
|
)
|
|
162
171
|
|
|
163
172
|
|
|
173
|
+
def profile(fnc, filepath):
|
|
174
|
+
"""A decorator that uses cProfile to profile a function"""
|
|
175
|
+
|
|
176
|
+
def inner(*args, **kwargs):
|
|
177
|
+
with cProfile.Profile() as profiler:
|
|
178
|
+
return_value = fnc(*args, **kwargs)
|
|
179
|
+
profiler.dump_stats(filepath + ".profile.prof")
|
|
180
|
+
return return_value
|
|
181
|
+
|
|
182
|
+
return inner
|
|
183
|
+
|
|
184
|
+
|
|
164
185
|
def main_tims():
|
|
165
186
|
"""Run MS²Rescore command-line interface in TIMS²Rescore mode."""
|
|
166
187
|
main(tims=True)
|
|
@@ -196,7 +217,11 @@ def main(tims=False):
|
|
|
196
217
|
|
|
197
218
|
# Run MS²Rescore
|
|
198
219
|
try:
|
|
199
|
-
|
|
220
|
+
if cli_args.profile:
|
|
221
|
+
profiled_rescore = profile(rescore, config["ms2rescore"]["output_path"])
|
|
222
|
+
profiled_rescore(configuration=config)
|
|
223
|
+
else:
|
|
224
|
+
rescore(configuration=config)
|
|
200
225
|
except Exception as e:
|
|
201
226
|
LOGGER.exception(e)
|
|
202
227
|
sys.exit(1)
|
|
@@ -175,8 +175,8 @@ def _fill_missing_precursor_info(psm_list, config):
|
|
|
175
175
|
get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required)
|
|
176
176
|
|
|
177
177
|
# Check if values are now present
|
|
178
|
-
for value_name in ["retention_time", "ion_mobility"]:
|
|
179
|
-
if (
|
|
178
|
+
for value_name, required in [("retention_time", rt_required), ("ion_mobility", im_required)]:
|
|
179
|
+
if required and (
|
|
180
180
|
0.0 in psm_list[value_name]
|
|
181
181
|
or None in psm_list[value_name]
|
|
182
182
|
or np.isnan(psm_list[value_name]).any()
|
|
@@ -157,6 +157,11 @@
|
|
|
157
157
|
"description": "Write an HTML report with various QC metrics and charts",
|
|
158
158
|
"type": "boolean",
|
|
159
159
|
"default": false
|
|
160
|
+
},
|
|
161
|
+
"profile": {
|
|
162
|
+
"description": "Write an txt report using cProfile for profiling",
|
|
163
|
+
"type": "boolean",
|
|
164
|
+
"default": false
|
|
160
165
|
}
|
|
161
166
|
}
|
|
162
167
|
}
|
|
@@ -373,7 +373,7 @@ def identification_overlap(
|
|
|
373
373
|
return figure
|
|
374
374
|
|
|
375
375
|
levels = before.levels # ["psms", "peptides", "proteins"] if all available
|
|
376
|
-
indexers = ["index", "
|
|
376
|
+
indexers = ["index", "peptide", "mokapot protein group"]
|
|
377
377
|
|
|
378
378
|
overlap_data = defaultdict(dict)
|
|
379
379
|
for level, indexer in zip(levels, indexers):
|
|
@@ -386,7 +386,7 @@ def identification_overlap(
|
|
|
386
386
|
set_after = set(df_after[df_after["mokapot q-value"] <= 0.01][indexer])
|
|
387
387
|
|
|
388
388
|
overlap_data["removed"][level] = -len(set_before - set_after)
|
|
389
|
-
overlap_data["retained"][level] = len(set_before
|
|
389
|
+
overlap_data["retained"][level] = len(set_after.intersection(set_before))
|
|
390
390
|
overlap_data["gained"][level] = len(set_after - set_before)
|
|
391
391
|
|
|
392
392
|
colors = ["#953331", "#316395", "#319545"]
|
|
@@ -185,7 +185,7 @@ def _get_stats_context(confidence_before, confidence_after):
|
|
|
185
185
|
"item": level_name,
|
|
186
186
|
"card_color": card_color,
|
|
187
187
|
"number": after,
|
|
188
|
-
"diff": f"{after - before:+}",
|
|
188
|
+
"diff": f"({after - before:+})",
|
|
189
189
|
"percentage": f"{increase:.1f}%",
|
|
190
190
|
"is_increase": increase > 0,
|
|
191
191
|
"bar_percentage": before / after * 100 if increase > 0 else after / before * 100,
|
|
@@ -20,7 +20,8 @@ If you use Mokapot through MS²Rescore, please cite:
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
import logging
|
|
23
|
-
|
|
23
|
+
import re
|
|
24
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
24
25
|
|
|
25
26
|
import mokapot
|
|
26
27
|
import numpy as np
|
|
@@ -31,6 +32,7 @@ from mokapot.dataset import LinearPsmDataset
|
|
|
31
32
|
from pyteomics.mass import nist_mass
|
|
32
33
|
|
|
33
34
|
logger = logging.getLogger(__name__)
|
|
35
|
+
logging.getLogger("numba").setLevel(logging.WARNING)
|
|
34
36
|
|
|
35
37
|
|
|
36
38
|
def rescore(
|
|
@@ -89,18 +91,15 @@ def rescore(
|
|
|
89
91
|
|
|
90
92
|
# Rescore
|
|
91
93
|
logger.debug(f"Mokapot brew options: `{kwargs}`")
|
|
92
|
-
confidence_results, models = brew(lin_psm_data, **kwargs)
|
|
94
|
+
confidence_results, models = brew(lin_psm_data, rng=8, **kwargs)
|
|
93
95
|
|
|
94
96
|
# Reshape confidence estimates to match PSMList
|
|
97
|
+
keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
|
|
95
98
|
mokapot_values_targets = (
|
|
96
|
-
confidence_results.confidence_estimates["psms"]
|
|
97
|
-
.set_index("index")
|
|
98
|
-
.sort_index()[["mokapot score", "mokapot q-value", "mokapot PEP"]]
|
|
99
|
+
confidence_results.confidence_estimates["psms"].set_index("index").sort_index()[keys]
|
|
99
100
|
)
|
|
100
101
|
mokapot_values_decoys = (
|
|
101
|
-
confidence_results.decoy_confidence_estimates["psms"]
|
|
102
|
-
.set_index("index")
|
|
103
|
-
.sort_index()[["mokapot score", "mokapot q-value", "mokapot PEP"]]
|
|
102
|
+
confidence_results.decoy_confidence_estimates["psms"].set_index("index").sort_index()[keys]
|
|
104
103
|
)
|
|
105
104
|
q = np.full((len(psm_list), 3), np.nan)
|
|
106
105
|
q[mokapot_values_targets.index] = mokapot_values_targets.values
|
|
@@ -111,6 +110,26 @@ def rescore(
|
|
|
111
110
|
psm_list["qvalue"] = q[:, 1]
|
|
112
111
|
psm_list["pep"] = q[:, 2]
|
|
113
112
|
|
|
113
|
+
# Repeat for peptide-level scores
|
|
114
|
+
peptide_info = pd.concat(
|
|
115
|
+
[
|
|
116
|
+
confidence_results.confidence_estimates["peptides"].set_index(["peptide"])[keys],
|
|
117
|
+
confidence_results.decoy_confidence_estimates["peptides"].set_index(["peptide"])[keys],
|
|
118
|
+
],
|
|
119
|
+
axis=0,
|
|
120
|
+
).to_dict(orient="index")
|
|
121
|
+
|
|
122
|
+
peptidoform_without_charge = re.compile(r"(/\d+$)")
|
|
123
|
+
for psm in psm_list:
|
|
124
|
+
peptide_scores = peptide_info[peptidoform_without_charge.sub("", str(psm.peptidoform), 1)]
|
|
125
|
+
psm.metadata.update(
|
|
126
|
+
{
|
|
127
|
+
"peptide_score": peptide_scores["mokapot score"],
|
|
128
|
+
"peptide_qvalue": peptide_scores["mokapot q-value"],
|
|
129
|
+
"peptide_pep": peptide_scores["mokapot PEP"],
|
|
130
|
+
}
|
|
131
|
+
)
|
|
132
|
+
|
|
114
133
|
# Write results
|
|
115
134
|
if write_weights:
|
|
116
135
|
try:
|
|
@@ -173,7 +192,7 @@ def convert_psm_list(
|
|
|
173
192
|
|
|
174
193
|
# Ensure filename for FlashLFQ txt output
|
|
175
194
|
if not combined_df["run"].notnull().all():
|
|
176
|
-
combined_df["run"] = "
|
|
195
|
+
combined_df["run"] = "nan"
|
|
177
196
|
|
|
178
197
|
feature_names = [f"feature:{f}" for f in feature_names] if feature_names else None
|
|
179
198
|
|
|
@@ -20,8 +20,8 @@ If you use Percolator through MS²Rescore, please cite:
|
|
|
20
20
|
import logging
|
|
21
21
|
import subprocess
|
|
22
22
|
from typing import Any, Dict, Optional
|
|
23
|
+
from copy import deepcopy
|
|
23
24
|
|
|
24
|
-
import numpy as np
|
|
25
25
|
import psm_utils
|
|
26
26
|
|
|
27
27
|
from ms2rescore.exceptions import MS2RescoreError
|
|
@@ -103,8 +103,15 @@ def rescore(
|
|
|
103
103
|
# Need to be able to link back to original PSMs, so reindex spectrum IDs, but copy PSM list
|
|
104
104
|
# to avoid modifying original...
|
|
105
105
|
# TODO: Better approach for this?
|
|
106
|
-
|
|
107
|
-
psm_list_reindexed
|
|
106
|
+
|
|
107
|
+
psm_list_reindexed = deepcopy(psm_list)
|
|
108
|
+
psm_list_reindexed.set_ranks()
|
|
109
|
+
psm_list_reindexed["spectrum_id"] = [
|
|
110
|
+
f"{psm.get_usi(as_url=False)}_{psm.rank}" for psm in psm_list_reindexed
|
|
111
|
+
]
|
|
112
|
+
spectrum_id_index = {
|
|
113
|
+
spectrum_id: index for index, spectrum_id in enumerate(psm_list_reindexed["spectrum_id"])
|
|
114
|
+
}
|
|
108
115
|
|
|
109
116
|
_write_pin_file(psm_list_reindexed, pin_filepath)
|
|
110
117
|
|
|
@@ -134,10 +141,13 @@ def rescore(
|
|
|
134
141
|
psm_list,
|
|
135
142
|
percolator_kwargs["results-psms"],
|
|
136
143
|
percolator_kwargs["decoy-results-psms"],
|
|
144
|
+
spectrum_id_index,
|
|
137
145
|
)
|
|
138
146
|
|
|
139
147
|
|
|
140
|
-
def _update_psm_scores(
|
|
148
|
+
def _update_psm_scores(
|
|
149
|
+
psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str, spectrum_id_index: list
|
|
150
|
+
):
|
|
141
151
|
"""
|
|
142
152
|
Update PSM scores with Percolator results.
|
|
143
153
|
|
|
@@ -150,7 +160,9 @@ def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout
|
|
|
150
160
|
psm_list_percolator = psm_utils.PSMList(psm_list=target_psms.psm_list + decoy_psms.psm_list)
|
|
151
161
|
|
|
152
162
|
# Sort by reindexed spectrum_id so order matches original PSM list
|
|
153
|
-
psm_list_percolator
|
|
163
|
+
psm_list_percolator = sorted(
|
|
164
|
+
psm_list_percolator, key=lambda psm: spectrum_id_index[psm["spectrum_id"]]
|
|
165
|
+
)
|
|
154
166
|
|
|
155
167
|
if not len(psm_list) == len(psm_list_percolator):
|
|
156
168
|
raise MS2RescoreError(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/config_default_tims.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/github-mark-white.png
RENAMED
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/ms2rescore_logo.png
RENAMED
|
File without changes
|
{ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/img/program_icon.ico
RENAMED
|
File without changes
|
{ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/package_data/ms2rescore-gui-theme.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ms2rescore-3.1.0.dev1 → ms2rescore-3.1.0.dev3}/ms2rescore/report/templates/target-decoy.html
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|