ms2rescore 3.0.3__tar.gz → 3.1.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/PKG-INFO +16 -15
  2. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/__init__.py +1 -1
  3. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/__main__.py +32 -11
  4. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/core.py +47 -7
  5. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/exceptions.py +6 -0
  6. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/feature_generators/__init__.py +2 -0
  7. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/feature_generators/deeplc.py +21 -34
  8. ms2rescore-3.1.0.dev1/ms2rescore/feature_generators/im2deep.py +169 -0
  9. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/feature_generators/ionmob.py +3 -3
  10. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/feature_generators/ms2pip.py +1 -1
  11. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/gui/app.py +27 -1
  12. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/package_data/config_default.json +2 -0
  13. ms2rescore-3.1.0.dev1/ms2rescore/package_data/config_default_tims.json +25 -0
  14. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/package_data/config_schema.json +28 -0
  15. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/parse_psms.py +45 -1
  16. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/parse_spectra.py +0 -1
  17. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/generate.py +27 -9
  18. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/texts.toml +6 -0
  19. ms2rescore-3.1.0.dev1/ms2rescore/utils.py +95 -0
  20. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/pyproject.toml +19 -14
  21. ms2rescore-3.0.3/ms2rescore/utils.py +0 -78
  22. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/LICENSE +0 -0
  23. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/README.md +0 -0
  24. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/config_parser.py +0 -0
  25. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/feature_generators/base.py +0 -0
  26. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/feature_generators/basic.py +0 -0
  27. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/feature_generators/maxquant.py +0 -0
  28. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/gui/__init__.py +0 -0
  29. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/gui/__main__.py +0 -0
  30. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/gui/function2ctk.py +0 -0
  31. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/gui/widgets.py +0 -0
  32. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/package_data/__init__.py +0 -0
  33. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/package_data/img/__init__.py +0 -0
  34. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/package_data/img/config_icon.png +0 -0
  35. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/package_data/img/github-mark-white.png +0 -0
  36. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/package_data/img/github-mark.png +0 -0
  37. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/package_data/img/ms2rescore_logo.png +0 -0
  38. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/package_data/img/program_icon.ico +0 -0
  39. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/package_data/ms2rescore-gui-theme.json +0 -0
  40. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/__init__.py +0 -0
  41. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/__main__.py +0 -0
  42. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/charts.py +0 -0
  43. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/__init__.py +0 -0
  44. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/about.html +0 -0
  45. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/base.html +0 -0
  46. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/config.html +0 -0
  47. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/features.html +0 -0
  48. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/log.html +0 -0
  49. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/metadata.html +0 -0
  50. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/overview.html +0 -0
  51. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/stats-card.html +0 -0
  52. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/style.html +0 -0
  53. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/templates/target-decoy.html +0 -0
  54. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/report/utils.py +0 -0
  55. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/rescoring_engines/__init__.py +0 -0
  56. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/rescoring_engines/mokapot.py +0 -0
  57. {ms2rescore-3.0.3 → ms2rescore-3.1.0.dev1}/ms2rescore/rescoring_engines/percolator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ms2rescore
3
- Version: 3.0.3
3
+ Version: 3.1.0.dev1
4
4
  Summary: MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and retention times.
5
5
  Keywords: MS2Rescore,MS2PIP,DeepLC,Percolator,proteomics,mass spectrometry,peptide identification,rescoring,machine learning
6
6
  Author: Ana Sílvia C. Silva, Robbin Bouwmeester, Louise Buur
@@ -13,25 +13,26 @@ Classifier: Operating System :: OS Independent
13
13
  Classifier: Programming Language :: Python :: 3 :: Only
14
14
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
15
15
  Classifier: Development Status :: 5 - Production/Stable
16
- Requires-Dist: ms2rescore_rs
17
- Requires-Dist: numpy>=1.16.0; python_version != '3.11'
18
- Requires-Dist: numpy==1.24.3; python_version == '3.11'
19
- Requires-Dist: pandas>=1.0
20
- Requires-Dist: rich>=12
21
- Requires-Dist: pyteomics>=4.1.0
22
- Requires-Dist: lxml>=4.5
23
- Requires-Dist: ms2pip>=4.0.0-dev4
24
- Requires-Dist: click>=7
25
16
  Requires-Dist: cascade-config>=0.4.0
17
+ Requires-Dist: click>=7
18
+ Requires-Dist: customtkinter>=5,<6
26
19
  Requires-Dist: deeplc>=2.2
27
20
  Requires-Dist: deeplcretrainer>=0.2
28
- Requires-Dist: tomli>=2; python_version < '3.11'
29
- Requires-Dist: psm_utils>=0.4
30
- Requires-Dist: customtkinter>=5,<6
31
- Requires-Dist: mokapot>=0.9
32
- Requires-Dist: pydantic>=1.8.2,<2
21
+ Requires-Dist: im2deep>=0.1.3
33
22
  Requires-Dist: jinja2>=3
23
+ Requires-Dist: lxml>=4.5
24
+ Requires-Dist: mokapot>=0.9
25
+ Requires-Dist: ms2pip>=4.0.0-dev10
26
+ Requires-Dist: ms2rescore_rs
27
+ Requires-Dist: numpy==1.24.3; python_version == '3.11'
28
+ Requires-Dist: numpy>=1.16.0; python_version != '3.11'
29
+ Requires-Dist: pandas>=1.0
34
30
  Requires-Dist: plotly>=5
31
+ Requires-Dist: psm_utils>=0.8
32
+ Requires-Dist: pydantic>=1.8.2,<2
33
+ Requires-Dist: pyteomics>=4.1.0, <4.7
34
+ Requires-Dist: rich>=12
35
+ Requires-Dist: tomli>=2; python_version < '3.11'
35
36
  Requires-Dist: ruff ; extra == "dev"
36
37
  Requires-Dist: black ; extra == "dev"
37
38
  Requires-Dist: pytest ; extra == "dev"
@@ -1,6 +1,6 @@
1
1
  """MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and RTs."""
2
2
 
3
- __version__ = "3.0.3"
3
+ __version__ = "3.1.0-dev1"
4
4
 
5
5
  from warnings import filterwarnings
6
6
 
@@ -1,6 +1,8 @@
1
1
  """MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and RTs."""
2
2
 
3
3
  import argparse
4
+ import importlib.resources
5
+ import json
4
6
  import logging
5
7
  import sys
6
8
  from pathlib import Path
@@ -10,7 +12,7 @@ from rich.console import Console
10
12
  from rich.logging import RichHandler
11
13
  from rich.text import Text
12
14
 
13
- from ms2rescore import __version__
15
+ from ms2rescore import __version__, package_data
14
16
  from ms2rescore.config_parser import parse_configurations
15
17
  from ms2rescore.core import rescore
16
18
  from ms2rescore.exceptions import MS2RescoreConfigurationError
@@ -33,19 +35,26 @@ LOGGER = logging.getLogger(__name__)
33
35
  CONSOLE = Console(record=True)
34
36
 
35
37
 
36
- def _print_credits():
38
+ def _print_credits(tims=False):
37
39
  """Print software credits to terminal."""
38
40
  text = Text()
39
41
  text.append("\n")
40
- text.append("MS²Rescore", style="bold link https://github.com/compomics/ms2rescore")
42
+ if tims:
43
+ text.append("TIMS²Rescore", style="bold link https://github.com/compomics/ms2rescore")
44
+ else:
45
+ text.append("MS²Rescore", style="bold link https://github.com/compomics/ms2rescore")
41
46
  text.append(f" (v{__version__})\n", style="bold")
47
+ if tims:
48
+ text.append("MS²Rescore tuned for Bruker timsTOF instruments.\n", style="italic")
42
49
  text.append("Developed at CompOmics, VIB / Ghent University, Belgium.\n")
43
50
  text.append("Please cite: ")
44
51
  text.append(
45
- "Declercq et al. MCP (2022)", style="link https://doi.org/10.1016/j.mcpro.2022.100266"
52
+ "Buur & Declercq et al. JPR (2024)",
53
+ style="link https://doi.org/10.1021/acs.jproteome.3c00785",
46
54
  )
47
55
  text.append("\n")
48
- text.stylize("cyan")
56
+ if tims:
57
+ text.stylize("#006cb5")
49
58
  CONSOLE.print(text)
50
59
 
51
60
 
@@ -152,18 +161,30 @@ def _setup_logging(passed_level: str, log_file: Union[str, Path]):
152
161
  )
153
162
 
154
163
 
155
- def main():
164
+ def main_tims():
165
+ """Run MS²Rescore command-line interface in TIMS²Rescore mode."""
166
+ main(tims=True)
167
+
168
+
169
+ def main(tims=False):
156
170
  """Run MS²Rescore command-line interface."""
157
- _print_credits()
171
+ _print_credits(tims)
158
172
 
159
173
  # Parse CLI arguments and configuration file
160
174
  parser = _argument_parser()
161
175
  cli_args = parser.parse_args()
176
+
177
+ configurations = []
178
+ if cli_args.config_file:
179
+ configurations.append(cli_args.config_file)
180
+ if tims:
181
+ configurations.append(
182
+ json.load(importlib.resources.open_text(package_data, "config_default_tims.json"))
183
+ )
184
+ configurations.append(cli_args)
185
+
162
186
  try:
163
- if cli_args.config_file:
164
- config = parse_configurations([cli_args.config_file, cli_args])
165
- else:
166
- config = parse_configurations(cli_args)
187
+ config = parse_configurations(configurations)
167
188
  except MS2RescoreConfigurationError as e:
168
189
  LOGGER.critical(e)
169
190
  sys.exit(1)
@@ -3,15 +3,16 @@ import logging
3
3
  from multiprocessing import cpu_count
4
4
  from typing import Dict, Optional
5
5
 
6
+ import numpy as np
6
7
  import psm_utils.io
7
8
  from psm_utils import PSMList
8
9
 
10
+ from ms2rescore import exceptions
9
11
  from ms2rescore.feature_generators import FEATURE_GENERATORS
10
12
  from ms2rescore.parse_psms import parse_psms
11
13
  from ms2rescore.parse_spectra import get_missing_values
12
14
  from ms2rescore.report import generate
13
15
  from ms2rescore.rescoring_engines import mokapot, percolator
14
- from ms2rescore import exceptions
15
16
 
16
17
  logger = logging.getLogger(__name__)
17
18
 
@@ -58,12 +59,8 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
58
59
  f"PSMs already contain the following rescoring features: {psm_list_feature_names}"
59
60
  )
60
61
 
61
- # TODO: avoid hard coding feature generators in some way
62
- rt_required = "deeplc" in config["feature_generators"] and None in psm_list["retention_time"]
63
- im_required = "ionmob" in config["feature_generators"] and None in psm_list["ion_mobility"]
64
- if rt_required or im_required:
65
- logger.info("Parsing missing retention time and/or ion mobility values from spectra...")
66
- get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required)
62
+ # Add missing precursor info from spectrum file if needed
63
+ _fill_missing_precursor_info(psm_list, config)
67
64
 
68
65
  # Add rescoring features
69
66
  for fgen_name, fgen_config in config["feature_generators"].items():
@@ -160,6 +157,49 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
160
157
  logger.exception(e)
161
158
 
162
159
 
160
+ def _fill_missing_precursor_info(psm_list, config):
161
+ """Fill missing precursor info from spectrum file if needed."""
162
+ # Check if required
163
+ # TODO: avoid hard coding feature generators in some way
164
+ rt_required = ("deeplc" in config["feature_generators"]) and any(
165
+ v is None or v == 0 or np.isnan(v) for v in psm_list["retention_time"]
166
+ )
167
+ im_required = (
168
+ "ionmob" in config["feature_generators"] or "im2deep" in config["feature_generators"]
169
+ ) and any(v is None or v == 0 or np.isnan(v) for v in psm_list["ion_mobility"])
170
+ logger.debug(f"RT required: {rt_required}, IM required: {im_required}")
171
+
172
+ # Add missing values
173
+ if rt_required or im_required:
174
+ logger.info("Parsing missing retention time and/or ion mobility values from spectra...")
175
+ get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required)
176
+
177
+ # Check if values are now present
178
+ for value_name in ["retention_time", "ion_mobility"]:
179
+ if (
180
+ 0.0 in psm_list[value_name]
181
+ or None in psm_list[value_name]
182
+ or np.isnan(psm_list[value_name]).any()
183
+ ):
184
+ if all(v is None or v == 0.0 or np.isnan(v) for v in psm_list[value_name]):
185
+ raise exceptions.MissingValuesError(
186
+ f"Could not find any '{value_name}' values in PSM or spectrum files. Disable "
187
+ f"feature generators that require '{value_name}' or ensure that the values are "
188
+ "present in the input files."
189
+ )
190
+ else:
191
+ missing_value_psms = psm_list[
192
+ [v is None or np.isnan(v) for v in psm_list[value_name]]
193
+ ]
194
+ logger.warning(
195
+ f"Found {len(missing_value_psms)} PSMs with missing '{value_name}' values. "
196
+ "These PSMs will be removed."
197
+ )
198
+ psm_list = psm_list[
199
+ [v is not None and not np.isnan(v) for v in psm_list[value_name]]
200
+ ]
201
+
202
+
163
203
  def _write_feature_names(feature_names, output_file_root):
164
204
  """Write feature names to file."""
165
205
  with open(output_file_root + ".feature_names.tsv", "w") as f:
@@ -25,6 +25,12 @@ class ModificationParsingError(IDFileParsingError):
25
25
  pass
26
26
 
27
27
 
28
+ class MissingValuesError(MS2RescoreError):
29
+ """Missing values in PSMs and/or spectra."""
30
+
31
+ pass
32
+
33
+
28
34
  class ReportGenerationError(MS2RescoreError):
29
35
  """Error while generating report."""
30
36
 
@@ -7,6 +7,7 @@ from ms2rescore.feature_generators.deeplc import DeepLCFeatureGenerator
7
7
  from ms2rescore.feature_generators.ionmob import IonMobFeatureGenerator
8
8
  from ms2rescore.feature_generators.maxquant import MaxQuantFeatureGenerator
9
9
  from ms2rescore.feature_generators.ms2pip import MS2PIPFeatureGenerator
10
+ from ms2rescore.feature_generators.im2deep import IM2DeepFeatureGenerator
10
11
 
11
12
  FEATURE_GENERATORS = {
12
13
  "basic": BasicFeatureGenerator,
@@ -14,4 +15,5 @@ FEATURE_GENERATORS = {
14
15
  "deeplc": DeepLCFeatureGenerator,
15
16
  "maxquant": MaxQuantFeatureGenerator,
16
17
  "ionmob": IonMobFeatureGenerator,
18
+ "im2deep": IM2DeepFeatureGenerator,
17
19
  }
@@ -21,12 +21,10 @@ import os
21
21
  from collections import defaultdict
22
22
  from inspect import getfullargspec
23
23
  from itertools import chain
24
- from typing import List, Optional, Union
24
+ from typing import List, Union
25
25
 
26
26
  import numpy as np
27
- import pandas as pd
28
27
  from psm_utils import PSMList
29
- from psm_utils.io import peptide_record
30
28
 
31
29
  from ms2rescore.feature_generators.base import FeatureGeneratorBase
32
30
 
@@ -41,8 +39,7 @@ class DeepLCFeatureGenerator(FeatureGeneratorBase):
41
39
  self,
42
40
  *args,
43
41
  lower_score_is_better: bool = False,
44
- calibration_set_size: Union[int, float] = 0.15,
45
- spectrum_path: Optional[str] = None,
42
+ calibration_set_size: Union[int, float, None] = None,
46
43
  processes: int = 1,
47
44
  **kwargs,
48
45
  ) -> None:
@@ -59,9 +56,6 @@ class DeepLCFeatureGenerator(FeatureGeneratorBase):
59
56
  calibration_set_size: int or float
60
57
  Amount of best PSMs to use for DeepLC calibration. If this value is lower
61
58
  than the number of available PSMs, all PSMs will be used. (default: 0.15)
62
- spectrum_path
63
- Path to spectrum file or directory with spectrum files. If None, inferred from `run`
64
- field in PSMs. Defaults to None.
65
59
  processes: {int, None}
66
60
  Number of processes to use in DeepLC. Defaults to 1.
67
61
  kwargs: dict
@@ -77,7 +71,6 @@ class DeepLCFeatureGenerator(FeatureGeneratorBase):
77
71
 
78
72
  self.lower_psm_score_better = lower_score_is_better
79
73
  self.calibration_set_size = calibration_set_size
80
- self.spectrum_path = spectrum_path
81
74
  self.processes = processes
82
75
  self.deeplc_kwargs = kwargs or {}
83
76
 
@@ -151,17 +144,15 @@ class DeepLCFeatureGenerator(FeatureGeneratorBase):
151
144
  # Make new PSM list for this run (chain PSMs per spectrum to flat list)
152
145
  psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values())))
153
146
 
154
- logger.debug("Calibrating DeepLC...")
155
147
  psm_list_calibration = self._get_calibration_psms(psm_list_run)
148
+ logger.debug(f"Calibrating DeepLC with {len(psm_list_calibration)} PSMs...")
156
149
  self.deeplc_predictor = self.DeepLC(
157
150
  n_jobs=self.processes,
158
151
  verbose=self._verbose,
159
152
  path_model=self.selected_model or self.user_model,
160
153
  **self.deeplc_kwargs,
161
154
  )
162
- self.deeplc_predictor.calibrate_preds(
163
- seq_df=self._psm_list_to_deeplc_peprec(psm_list_calibration)
164
- )
155
+ self.deeplc_predictor.calibrate_preds(psm_list_calibration)
165
156
  # Still calibrate for each run, but do not try out all model options.
166
157
  # Just use model that was selected based on first run
167
158
  if not self.selected_model:
@@ -174,11 +165,7 @@ class DeepLCFeatureGenerator(FeatureGeneratorBase):
174
165
  )
175
166
 
176
167
  logger.debug("Predicting retention times...")
177
- predictions = np.array(
178
- self.deeplc_predictor.make_preds(
179
- seq_df=self._psm_list_to_deeplc_peprec(psm_list_run)
180
- )
181
- )
168
+ predictions = np.array(self.deeplc_predictor.make_preds(psm_list_run))
182
169
  observations = psm_list_run["retention_time"]
183
170
  rt_diffs_run = np.abs(predictions - observations)
184
171
 
@@ -204,25 +191,25 @@ class DeepLCFeatureGenerator(FeatureGeneratorBase):
204
191
  )
205
192
  current_run += 1
206
193
 
207
- # TODO: Remove when DeepLC supports PSMList directly
208
- @staticmethod
209
- def _psm_list_to_deeplc_peprec(psm_list: PSMList) -> pd.DataFrame:
210
- peprec = peptide_record.to_dataframe(psm_list)
211
- peprec = peprec.rename(
212
- columns={
213
- "observed_retention_time": "tr",
214
- "peptide": "seq",
215
- }
216
- )[["tr", "seq", "modifications"]]
217
- return peprec
218
-
219
194
  def _get_calibration_psms(self, psm_list: PSMList):
220
195
  """Get N best scoring target PSMs for calibration."""
221
196
  psm_list_targets = psm_list[~psm_list["is_decoy"]]
222
- n_psms = self._get_number_of_calibration_psms(psm_list_targets)
223
- indices = np.argsort(psm_list_targets["score"])
224
- indices = indices[:n_psms] if self.lower_psm_score_better else indices[-n_psms:]
225
- return psm_list_targets[indices]
197
+ if self.calibration_set_size:
198
+ n_psms = self._get_number_of_calibration_psms(psm_list_targets)
199
+ indices = np.argsort(psm_list_targets["score"])
200
+ indices = indices[:n_psms] if self.lower_psm_score_better else indices[-n_psms:]
201
+ return psm_list_targets[indices]
202
+ else:
203
+ identified_psms = psm_list_targets[psm_list_targets["qvalue"] <= 0.01]
204
+ if len(identified_psms) == 0:
205
+ raise ValueError(
206
+ "No target PSMs with q-value <= 0.01 found. Please set calibration set size for calibrating deeplc."
207
+ )
208
+ elif (len(identified_psms) < 500) & (self.deeplc_kwargs["deeplc_retrain"]):
209
+ logger.warning(
210
+ " Less than 500 target PSMs with q-value <= 0.01 found for retraining. Consider turning of deeplc_retrain, as this is likely not enough data for retraining."
211
+ )
212
+ return identified_psms
226
213
 
227
214
  def _get_number_of_calibration_psms(self, psm_list):
228
215
  """Get number of calibration PSMs given `calibration_set_size` and total number of PSMs."""
@@ -0,0 +1,169 @@
1
+ """
2
+ IM2Deep ion mobility-based feature generator.
3
+
4
+ IM2Deep is a fully modification-aware peptide ion mobility predictor. It uses a deep convolutional
5
+ neural network to predict retention times based on the atomic composition of the (modified) amino
6
+ acid residues in the peptide. See
7
+ `github.com/compomics/IM2Deep <https://github.com/compomics/IM2Deep>`_ for more information.
8
+
9
+ """
10
+
11
+ import contextlib
12
+ import logging
13
+ import os
14
+ from inspect import getfullargspec
15
+ from itertools import chain
16
+ from typing import List
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+ from im2deep.calibrate import im2ccs
21
+ from im2deep.im2deep import predict_ccs
22
+ from psm_utils import PSMList
23
+
24
+ from ms2rescore.feature_generators.base import FeatureGeneratorBase
25
+
26
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class IM2DeepFeatureGenerator(FeatureGeneratorBase):
31
+ """IM2Deep collision cross section feature generator."""
32
+
33
+ def __init__(
34
+ self,
35
+ *args,
36
+ processes: int = 1,
37
+ **kwargs,
38
+ ):
39
+ """
40
+ Initialize the IM2DeepFeatureGenerator.
41
+
42
+ Parameters
43
+ ----------
44
+ processes : int, optional
45
+ Number of parallel processes to use for IM2Deep predictions. Default is 1.
46
+ **kwargs : dict, optional
47
+ Additional keyword arguments to `im2deep.predict_ccs`.
48
+
49
+ """
50
+ super().__init__(*args, **kwargs)
51
+
52
+ self._verbose = logger.getEffectiveLevel() <= logging.DEBUG
53
+
54
+ # Remove any kwargs that are not IM2Deep arguments
55
+ self.im2deep_kwargs = kwargs or {}
56
+ self.im2deep_kwargs = {
57
+ k: v for k, v in self.im2deep_kwargs.items() if k in getfullargspec(predict_ccs).args
58
+ }
59
+ self.im2deep_kwargs["n_jobs"] = processes
60
+
61
+ @property
62
+ def feature_names(self) -> List[str]:
63
+ return [
64
+ "ccs_observed_im2deep",
65
+ "ccs_predicted_im2deep",
66
+ "ccs_error_im2deep",
67
+ "abs_ccs_error_im2deep",
68
+ "perc_ccs_error_im2deep",
69
+ ]
70
+
71
+ def add_features(self, psm_list: PSMList) -> None:
72
+ """Add IM2Deep-derived features to PSMs"""
73
+ logger.info("Adding IM2Deep-derived features to PSMs")
74
+
75
+ # Get easy-access nested version of PSMlist
76
+ psm_dict = psm_list.get_psm_dict()
77
+
78
+ # Run IM2Deep for each spectrum file
79
+ current_run = 1
80
+ total_runs = sum(len(runs) for runs in psm_dict.values())
81
+
82
+ for runs in psm_dict.values():
83
+ # Reset IM2Deep predictor for each collection of runs
84
+ for run, psms in runs.items():
85
+ logger.info(
86
+ f"Running IM2Deep for PSMs from run ({current_run}/{total_runs}): `{run}`..."
87
+ )
88
+
89
+ # Disable wild logging to stdout by TensorFlow, unless in debug mode
90
+ with (
91
+ contextlib.redirect_stdout(open(os.devnull, "w"))
92
+ if not self._verbose
93
+ else contextlib.nullcontext()
94
+ ):
95
+ # Make new PSM list for this run (chain PSMs per spectrum to flat list)
96
+ psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values())))
97
+
98
+ logger.debug("Calibrating IM2Deep...")
99
+
100
+ # Convert ion mobility to CCS and calibrate CCS values
101
+ psm_list_run_df = psm_list_run.to_dataframe()
102
+ psm_list_run_df["charge"] = [
103
+ pep.precursor_charge for pep in psm_list_run_df["peptidoform"]
104
+ ]
105
+ psm_list_run_df["ccs_observed"] = im2ccs(
106
+ psm_list_run_df["ion_mobility"],
107
+ psm_list_run_df["precursor_mz"],
108
+ psm_list_run_df["charge"],
109
+ )
110
+
111
+ # Create dataframe with high confidence hits for calibration
112
+ cal_psm_df = self.make_calibration_df(psm_list_run_df)
113
+
114
+ # Make predictions with IM2Deep
115
+ logger.debug("Predicting CCS values...")
116
+ predictions = predict_ccs(
117
+ psm_list_run, cal_psm_df, write_output=False, **self.im2deep_kwargs
118
+ )
119
+
120
+ # Add features to PSMs
121
+ logger.debug("Adding features to PSMs...")
122
+ observations = psm_list_run_df["ccs_observed"]
123
+ ccs_diffs_run = np.abs(predictions - observations)
124
+ for i, psm in enumerate(psm_list_run):
125
+ psm["rescoring_features"].update(
126
+ {
127
+ "ccs_observed_im2deep": observations[i],
128
+ "ccs_predicted_im2deep": predictions[i],
129
+ "ccs_error_im2deep": ccs_diffs_run[i],
130
+ "abs_ccs_error_im2deep": np.abs(ccs_diffs_run[i]),
131
+ "perc_ccs_error_im2deep": np.abs(ccs_diffs_run[i])
132
+ / observations[i]
133
+ * 100,
134
+ }
135
+ )
136
+
137
+ current_run += 1
138
+
139
+ @staticmethod
140
+ def make_calibration_df(psm_list_df: pd.DataFrame, threshold: float = 0.25) -> pd.DataFrame:
141
+ """
142
+ Make dataframe for calibration of IM2Deep predictions.
143
+
144
+ Parameters
145
+ ----------
146
+ psm_list_df
147
+ DataFrame with PSMs.
148
+ threshold
149
+ Percentage of highest scoring identified target PSMs to use for calibration,
150
+ default 0.95.
151
+
152
+ Returns
153
+ -------
154
+ pd.DataFrame
155
+ DataFrame with high confidence hits for calibration.
156
+
157
+ """
158
+ identified_psms = psm_list_df[
159
+ (psm_list_df["qvalue"] < 0.01)
160
+ & (~psm_list_df["is_decoy"])
161
+ & (psm_list_df["charge"] < 5) # predictions do not go higher for IM2Deep
162
+ ]
163
+ calibration_psms = identified_psms[
164
+ identified_psms["qvalue"] < identified_psms["qvalue"].quantile(1 - threshold)
165
+ ]
166
+ logger.debug(
167
+ f"Number of high confidence hits for calculating shift: {len(calibration_psms)}"
168
+ )
169
+ return calibration_psms
@@ -165,6 +165,7 @@ class IonMobFeatureGenerator(FeatureGeneratorBase):
165
165
  )
166
166
  ]
167
167
 
168
+ # TODO: Use observed m/z?
168
169
  psm_list_run_df["mz"] = psm_list_run_df.apply(
169
170
  lambda x: calculate_mz(x["sequence-tokenized"], x["charge"]), axis=1
170
171
  ) # use precursor m/z from PSMs?
@@ -175,9 +176,8 @@ class IonMobFeatureGenerator(FeatureGeneratorBase):
175
176
  )
176
177
  # calibrate CCS values
177
178
  shift_factor = self.calculate_ccs_shift(psm_list_run_df)
178
- psm_list_run_df["ccs_observed"] = psm_list_run_df.apply(
179
- lambda x: x["ccs_observed"] + shift_factor, axis=1
180
- )
179
+ psm_list_run_df["ccs_observed"] + shift_factor
180
+
181
181
  # predict CCS values
182
182
  tf_ds = to_tf_dataset_inference(
183
183
  psm_list_run_df["mz"],
@@ -193,7 +193,7 @@ class MS2PIPFeatureGenerator(FeatureGeneratorBase):
193
193
  try:
194
194
  ms2pip_results = correlate(
195
195
  psms=psm_list_run,
196
- spectrum_file=spectrum_filename,
196
+ spectrum_file=str(spectrum_filename),
197
197
  spectrum_id_pattern=self.spectrum_id_pattern,
198
198
  model=self.model,
199
199
  ms2_tolerance=self.ms2_tolerance,
@@ -360,15 +360,20 @@ class FeatureGeneratorConfig(ctk.CTkFrame):
360
360
  self.deeplc_config = DeepLCConfiguration(self)
361
361
  self.deeplc_config.grid(row=2, column=0, pady=(0, 20), sticky="nsew")
362
362
 
363
+ self.im2deep_config = Im2DeepConfiguration(self)
364
+ self.im2deep_config.grid(row=3, column=0, pady=(0, 20), sticky="nsew")
365
+
363
366
  self.ionmob_config = IonmobConfiguration(self)
364
- self.ionmob_config.grid(row=3, column=0, pady=(0, 20), sticky="nsew")
367
+ self.ionmob_config.grid(row=4, column=0, pady=(0, 20), sticky="nsew")
365
368
 
366
369
  def get(self) -> Dict:
367
370
  """Return the configuration as a dictionary."""
368
371
  basic_enabled, basic_config = self.basic_config.get()
369
372
  ms2pip_enabled, ms2pip_config = self.ms2pip_config.get()
370
373
  deeplc_enabled, deeplc_config = self.deeplc_config.get()
374
+ im2deep_enabled, im2deep_config = self.im2deep_config.get()
371
375
  ionmob_enabled, ionmob_config = self.ionmob_config.get()
376
+
372
377
  config = {}
373
378
  if basic_enabled:
374
379
  config["basic"] = basic_config
@@ -523,6 +528,27 @@ class IonmobConfiguration(ctk.CTkFrame):
523
528
  return enabled, config
524
529
 
525
530
 
531
+ class Im2DeepConfiguration(ctk.CTkFrame):
532
+ def __init__(self, *args, **kwargs):
533
+ """IM2Deep configuration frame."""
534
+ super().__init__(*args, **kwargs)
535
+
536
+ self.configure(fg_color="transparent")
537
+ self.grid_columnconfigure(0, weight=1)
538
+
539
+ self.title = widgets.Heading(self, text="im2deep")
540
+ self.title.grid(row=0, column=0, columnspan=2, pady=(0, 5), sticky="ew")
541
+
542
+ self.enabled = widgets.LabeledSwitch(self, label="Enable im2deep", default=False)
543
+ self.enabled.grid(row=1, column=0, pady=(0, 10), sticky="nsew")
544
+
545
+ def get(self) -> Dict:
546
+ """Return the configuration as a dictionary."""
547
+ enabled = self.enabled.get()
548
+ config = {}
549
+ return enabled, config
550
+
551
+
526
552
  class RescoringEngineConfig(ctk.CTkFrame):
527
553
  def __init__(self, *args, **kwargs):
528
554
  """Rescoring engine configuration frame."""
@@ -29,6 +29,8 @@
29
29
  "id_decoy_pattern": null,
30
30
  "psm_id_pattern": null,
31
31
  "spectrum_id_pattern": null,
32
+ "psm_id_rt_pattern": null,
33
+ "psm_id_im_pattern": null,
32
34
  "lower_score_is_better": false,
33
35
  "modification_mapping": {},
34
36
  "fixed_modifications": {},
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "./config_schema.json",
3
+ "ms2rescore": {
4
+ "feature_generators": {
5
+ "basic": {},
6
+ "ms2pip": {
7
+ "model": "timsTOF",
8
+ "ms2_tolerance": 0.02
9
+ },
10
+ "deeplc": {
11
+ "deeplc_retrain": false
12
+ },
13
+ "im2deep": {},
14
+ "maxquant": {}
15
+ },
16
+ "rescoring_engine": {
17
+ "mokapot": {
18
+ "write_weights": true,
19
+ "write_txt": true,
20
+ "write_flashlfq": true
21
+ }
22
+ },
23
+ "psm_file": null
24
+ }
25
+ }
@@ -29,6 +29,9 @@
29
29
  },
30
30
  "ionmob": {
31
31
  "$ref": "#/definitions/ionmob"
32
+ },
33
+ "im2deep": {
34
+ "$ref": "#/definitions/im2deep"
32
35
  }
33
36
  },
34
37
  "default": {
@@ -107,6 +110,18 @@
107
110
  "default": "(.*)",
108
111
  "format": "regex"
109
112
  },
113
+ "psm_id_rt_pattern": {
114
+ "description": "Regex pattern to extract retention time from PSM identifier. Requires at least one capturing group.",
115
+ "oneOf": [{ "type": "string" }, { "type": "null" }],
116
+ "default": null,
117
+ "format": "regex"
118
+ },
119
+ "psm_id_im_pattern": {
120
+ "description": "Regex pattern to extract ion mobility from PSM identifier. Requires at least one capturing group.",
121
+ "oneOf": [{ "type": "string" }, { "type": "null" }],
122
+ "default": null,
123
+ "format": "regex"
124
+ },
110
125
  "lower_score_is_better": {
111
126
  "description": "Bool indicating if lower score is better",
112
127
  "type": "boolean",
@@ -224,6 +239,19 @@
224
239
  }
225
240
  }
226
241
  },
242
+ "im2deep": {
243
+ "$ref": "#/definitions/feature_generator",
244
+ "description": "Ion mobility feature generator configuration using IM2Deep",
245
+ "type": "object",
246
+ "additionalProperties": true,
247
+ "properties": {
248
+ "reference_dataset": {
249
+ "description": "Path to IM2Deep reference dataset file",
250
+ "type": "string",
251
+ "default": "Meier_unimod.parquet"
252
+ }
253
+ }
254
+ },
227
255
  "mokapot": {
228
256
  "$ref": "#/definitions/rescoring_engine",
229
257
  "description": "Mokapot rescoring engine configuration. Additional properties are passed to the Mokapot brew function.",
@@ -27,6 +27,9 @@ def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList:
27
27
  psm_list = _read_psms(config, psm_list)
28
28
  _find_decoys(config, psm_list)
29
29
  _calculate_qvalues(config, psm_list)
30
+ if config["psm_id_rt_pattern"] or config["psm_id_im_pattern"]:
31
+ logger.debug("Parsing retention time and/or ion mobility from PSM identifier...")
32
+ _parse_values_spectrum_id(config, psm_list)
30
33
 
31
34
  # Store scoring values for comparison later
32
35
  for psm in psm_list:
@@ -51,7 +54,8 @@ def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList:
51
54
  non_mapped_modifications = modifications_found - set(config["modification_mapping"].keys())
52
55
  if non_mapped_modifications:
53
56
  logger.warning(
54
- f"Non-mapped modifications found: {non_mapped_modifications}\nThis can be ignored if Unimod modification label"
57
+ f"Non-mapped modifications found: {non_mapped_modifications}\n"
58
+ "This can be ignored if they are Unimod modification labels."
55
59
  )
56
60
  psm_list.rename_modifications(config["modification_mapping"])
57
61
  psm_list.add_fixed_modifications(config["fixed_modifications"])
@@ -154,6 +158,46 @@ def _match_psm_ids(old_id, regex_pattern):
154
158
  )
155
159
 
156
160
 
161
+ def _parse_values_spectrum_id(config, psm_list):
162
+ """Parse retention time and or ion mobility values from the spectrum_id."""
163
+
164
+ if config["psm_id_rt_pattern"]:
165
+ logger.debug(
166
+ "Parsing retention time from spectrum_id with regex pattern "
167
+ f"{config['psm_id_rt_pattern']}"
168
+ )
169
+ try:
170
+ rt_pattern = re.compile(config["psm_id_rt_pattern"])
171
+ psm_list["retention_time"] = [
172
+ float(rt_pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
173
+ ]
174
+ except AttributeError:
175
+ raise MS2RescoreConfigurationError(
176
+ f"Could not parse retention time from spectrum_id with the "
177
+ f"{config['psm_id_rt_pattern']} regex pattern. "
178
+ "Please make sure the retention time key is present in the spectrum_id "
179
+ "and the value is in a capturing group or disable the relevant feature generator."
180
+ )
181
+
182
+ if config["psm_id_im_pattern"]:
183
+ logger.debug(
184
+ "Parsing ion mobility from spectrum_id with regex pattern "
185
+ f"{config['psm_id_im_pattern']}"
186
+ )
187
+ try:
188
+ im_pattern = re.compile(config["psm_id_im_pattern"])
189
+ psm_list["ion_mobility"] = [
190
+ float(im_pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
191
+ ]
192
+ except AttributeError:
193
+ raise MS2RescoreConfigurationError(
194
+ f"Could not parse ion mobility from spectrum_id with the "
195
+ f"{config['psm_id_im_pattern']} regex pattern. "
196
+ "Please make sure the ion mobility key is present in the spectrum_id "
197
+ "and the value is in a capturing group or disable the relevant feature generator."
198
+ )
199
+
200
+
157
201
  def _has_invalid_aminoacids(psm):
158
202
  """Check if a PSM contains invalid amino acids."""
159
203
 
@@ -6,7 +6,6 @@ from itertools import chain
6
6
 
7
7
  from ms2rescore_rs import get_precursor_info
8
8
  from psm_utils import PSMList
9
- from rich.progress import track
10
9
 
11
10
  from ms2rescore.exceptions import MS2RescoreError
12
11
  from ms2rescore.utils import infer_spectrum_path
@@ -145,9 +145,11 @@ def _collect_files(output_path_prefix, use_txt_log=False):
145
145
  "configuration": Path(output_path_prefix + ".full-config.json").resolve(),
146
146
  "feature names": Path(output_path_prefix + ".feature_names.tsv").resolve(),
147
147
  "feature weights": Path(output_path_prefix + ".mokapot.weights.tsv").resolve(),
148
- "log": Path(output_path_prefix + ".log.txt").resolve()
149
- if use_txt_log
150
- else Path(output_path_prefix + ".log.html").resolve(),
148
+ "log": (
149
+ Path(output_path_prefix + ".log.txt").resolve()
150
+ if use_txt_log
151
+ else Path(output_path_prefix + ".log.html").resolve()
152
+ ),
151
153
  }
152
154
  for file, path in files.items():
153
155
  if Path(path).is_file():
@@ -321,16 +323,12 @@ def _get_features_context(
321
323
  import deeplc.plot
322
324
 
323
325
  scatter_chart = deeplc.plot.scatter(
324
- df=features[
325
- (psm_list["is_decoy"] == False) & (psm_list["qvalue"] <= 0.01)
326
- ], # noqa: E712
326
+ df=features[(~psm_list["is_decoy"]) & (psm_list["qvalue"] <= 0.01)],
327
327
  predicted_column="predicted_retention_time_best",
328
328
  observed_column="observed_retention_time_best",
329
329
  )
330
330
  baseline_chart = deeplc.plot.distribution_baseline(
331
- df=features[
332
- (psm_list["is_decoy"] == False) & (psm_list["qvalue"] <= 0.01)
333
- ], # noqa: E712
331
+ df=features[(~psm_list["is_decoy"]) & (psm_list["qvalue"] <= 0.01)],
334
332
  predicted_column="predicted_retention_time_best",
335
333
  observed_column="observed_retention_time_best",
336
334
  )
@@ -343,6 +341,26 @@ def _get_features_context(
343
341
  }
344
342
  )
345
343
 
344
+ # IM2Deep specific charts
345
+ if "im2deep" in feature_names:
346
+ import deeplc.plot
347
+
348
+ scatter_chart = deeplc.plot.scatter(
349
+ df=features[(~psm_list["is_decoy"]) & (psm_list["qvalue"] <= 0.01)],
350
+ predicted_column="ccs_predicted_im2deep",
351
+ observed_column="ccs_observed_im2deep",
352
+ xaxis_label="Observed CCS",
353
+ yaxis_label="Predicted CCS",
354
+ plot_title="Predicted vs. observed CCS",
355
+ )
356
+
357
+ context["charts"].append(
358
+ {
359
+ "title": TEXTS["charts"]["im2deep_performance"]["title"],
360
+ "description": TEXTS["charts"]["im2deep_performance"]["description"],
361
+ "chart": scatter_chart.to_html(**PLOTLY_HTML_KWARGS),
362
+ }
363
+ )
346
364
  return context
347
365
 
348
366
 
@@ -105,3 +105,9 @@ bottom chart shows the distribution of RMAE values of DeepLC predictions on 460
105
105
  datasets. The red line indicates the RMAE value for all target PSMs that passed the 1% FDR threshold
106
106
  of the current dataset. A lower RMAE value indicates better performance.
107
107
  """
108
+
109
+ [charts.im2deep_performance]
110
+ title = "IM2Deep model performance"
111
+ description = """
112
+ IM2Deep model performance can be visualized by plotting the predicted CCS against the observed CCS.
113
+ """
@@ -0,0 +1,95 @@
1
+ import logging
2
+ import os
3
+ import re
4
+ from glob import glob
5
+ from pathlib import Path
6
+ from typing import Optional, Union
7
+
8
+ from ms2rescore.exceptions import MS2RescoreConfigurationError
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def infer_spectrum_path(
14
+ configured_path: Union[str, Path, None],
15
+ run_name: Optional[str] = None,
16
+ ) -> Union[str, Path]:
17
+ """
18
+ Infer spectrum path from passed path and expected filename (e.g. from PSM file).
19
+
20
+ Parameters
21
+ ----------
22
+ configured_path: str, Path, None
23
+ User-defined path to spectrum file or directory containing spectrum file
24
+ run_name : str, optional
25
+ MS run name (stem of spectrum filename), e.g., as expected from PSM file.
26
+
27
+ """
28
+ # If no spectrum path configured, use expected run_name in default dir
29
+ if not configured_path:
30
+ if run_name:
31
+ resolved_path = os.path.join(".", run_name)
32
+ else:
33
+ raise MS2RescoreConfigurationError(
34
+ "Could not resolve spectrum file name: No spectrum path configured "
35
+ "and no run name in PSM file found."
36
+ )
37
+
38
+ else:
39
+ is_bruker_dir = configured_path.endswith(".d") or _is_minitdf(configured_path)
40
+
41
+ # If passed path is directory (that is not Bruker raw), join with run name
42
+ if os.path.isdir(configured_path) and not is_bruker_dir:
43
+ if run_name:
44
+ resolved_path = os.path.join(configured_path, run_name)
45
+ else:
46
+ raise MS2RescoreConfigurationError(
47
+ "Could not resolve spectrum file name: Spectrum path is directory "
48
+ "but no run name in PSM file found."
49
+ )
50
+
51
+ # If passed path is file, use that, but warn if basename doesn't match expected
52
+ elif os.path.isfile(configured_path) or (os.path.isdir(configured_path) and is_bruker_dir):
53
+ if run_name and Path(configured_path).stem != Path(run_name).stem:
54
+ logger.warning(
55
+ "Passed spectrum path (`%s`) does not match run name found in PSM "
56
+ "file (`%s`). Continuing with passed spectrum path.",
57
+ configured_path,
58
+ run_name,
59
+ )
60
+ resolved_path = configured_path
61
+ else:
62
+ raise MS2RescoreConfigurationError(
63
+ "Configured `spectrum_path` must be `None` or a path to an existing file "
64
+ "or directory. If `None` or path to directory, spectrum run information "
65
+ "should be present in the PSM file."
66
+ )
67
+
68
+ # Match with file extension if not in resolved_path yet
69
+ if not _is_minitdf(resolved_path) and not re.match(
70
+ r"\.mgf$|\.mzml$|\.d$", resolved_path, flags=re.IGNORECASE
71
+ ):
72
+ for filename in glob(resolved_path + "*"):
73
+ if re.match(r".*(\.mgf$|\.mzml$|\.d)", filename, flags=re.IGNORECASE):
74
+ resolved_path = filename
75
+ break
76
+ else:
77
+ raise MS2RescoreConfigurationError(
78
+ f"Resolved spectrum filename ('{resolved_path}') does not contain a supported "
79
+ "file extension (mzML, MGF, or .d) and could not find any matching existing "
80
+ "files."
81
+ )
82
+
83
+ return Path(resolved_path)
84
+
85
+
86
+ def _is_minitdf(spectrum_file: str) -> bool:
87
+ """
88
+ Check if the spectrum file is a Bruker miniTDF folder.
89
+
90
+ A Bruker miniTDF folder has no fixed name, but contains files matching the patterns
91
+ ``*ms2spectrum.bin`` and ``*ms2spectrum.parquet``.
92
+ """
93
+ files = set(Path(spectrum_file).glob("*ms2spectrum.bin"))
94
+ files.update(Path(spectrum_file).glob("*ms2spectrum.parquet"))
95
+ return len(files) >= 2
@@ -32,25 +32,26 @@ classifiers = [
32
32
  dynamic = ["version"]
33
33
  requires-python = ">=3.8"
34
34
  dependencies = [
35
- "ms2rescore_rs",
36
- "numpy>=1.16.0; python_version != '3.11'",
37
- "numpy==1.24.3; python_version == '3.11'", # Incompatibility with sklearn, pygam, and TF...
38
- "pandas>=1.0",
39
- "rich>=12",
40
- "pyteomics>=4.1.0",
41
- "lxml>=4.5",
42
- "ms2pip>=4.0.0-dev4",
43
- "click>=7",
44
35
  "cascade-config>=0.4.0",
36
+ "click>=7",
37
+ "customtkinter>=5,<6",
45
38
  "deeplc>=2.2",
46
39
  "deeplcretrainer>=0.2",
47
- "tomli>=2; python_version < '3.11'",
48
- "psm_utils>=0.4",
49
- "customtkinter>=5,<6",
50
- "mokapot>=0.9",
51
- "pydantic>=1.8.2,<2", # Fix compatibility with v2 in psm_utils
40
+ "im2deep>=0.1.3",
52
41
  "jinja2>=3",
42
+ "lxml>=4.5",
43
+ "mokapot>=0.9",
44
+ "ms2pip>=4.0.0-dev10",
45
+ "ms2rescore_rs",
46
+ "numpy==1.24.3; python_version == '3.11'", # Incompatibility with sklearn, pygam, and TF...
47
+ "numpy>=1.16.0; python_version != '3.11'",
48
+ "pandas>=1.0",
53
49
  "plotly>=5",
50
+ "psm_utils>=0.8",
51
+ "pydantic>=1.8.2,<2", # Fix compatibility with v2 in psm_utils
52
+ "pyteomics>=4.1.0, <4.7",
53
+ "rich>=12",
54
+ "tomli>=2; python_version < '3.11'",
54
55
  ]
55
56
 
56
57
  [project.optional-dependencies]
@@ -79,6 +80,7 @@ CompOmics = "https://www.compomics.com"
79
80
  ms2rescore = "ms2rescore.__main__:main"
80
81
  ms2rescore-gui = "ms2rescore.gui.__main__:main"
81
82
  ms2rescore-report = "ms2rescore.report.__main__:main"
83
+ tims2rescore = "ms2rescore.__main__:main_tims"
82
84
 
83
85
  [build-system]
84
86
  requires = ["flit_core >=3.2,<4"]
@@ -94,3 +96,6 @@ target-version = ['py38']
94
96
  [tool.ruff]
95
97
  line-length = 99
96
98
  target-version = 'py38'
99
+
100
+ [tool.ruff.lint]
101
+ extend-select = ["T201", "T203"]
@@ -1,78 +0,0 @@
1
- import logging
2
- import os
3
- import re
4
- from glob import glob
5
- from pathlib import Path
6
- from typing import Optional, Union
7
-
8
- from ms2rescore.exceptions import MS2RescoreConfigurationError
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- def infer_spectrum_path(
14
- configured_path: Union[str, Path, None],
15
- run_name: Optional[str] = None,
16
- ) -> Union[str, Path]:
17
- """
18
- Infer spectrum path from passed path and expected filename (e.g. from PSM file).
19
-
20
- Parameters
21
- ----------
22
- configured_path: str, Path, None
23
- User-defined path to spectrum file or directory containing spectrum file
24
- run_name : str, optional
25
- MS run name (stem of spectrum filename), e.g., as expected from PSM file.
26
-
27
- """
28
- # If no spectrum path configured, use expected run_name in default dir
29
- if not configured_path:
30
- if run_name:
31
- resolved_path = os.path.join(".", run_name)
32
- else:
33
- raise MS2RescoreConfigurationError(
34
- "Could not resolve spectrum file name: No spectrum path configured "
35
- "and no run name in PSM file found."
36
- )
37
-
38
- # If passed path is directory, join with run name
39
- elif os.path.isdir(configured_path):
40
- if run_name:
41
- resolved_path = os.path.join(configured_path, run_name)
42
- else:
43
- raise MS2RescoreConfigurationError(
44
- "Could not resolve spectrum file name: Spectrum path is directory "
45
- "but no run name in PSM file found."
46
- )
47
-
48
- # If passed path is file, use that, but warn if basename doesn't match expected
49
- elif os.path.isfile(configured_path):
50
- if run_name and Path(configured_path).stem != Path(run_name).stem:
51
- logger.warning(
52
- "Passed spectrum path (`%s`) does not match run name found in PSM "
53
- "file (`%s`). Continuing with passed spectrum path.",
54
- configured_path,
55
- run_name,
56
- )
57
- resolved_path = configured_path
58
- else:
59
- raise MS2RescoreConfigurationError(
60
- "Configured `spectrum_path` must be `None` or a path to an existing file "
61
- "or directory. If `None` or path to directory, spectrum run information "
62
- "should be present in the PSM file."
63
- )
64
-
65
- # Match with file extension if not in resolved_path yet
66
- if not re.match(".mgf$|.mzml$", resolved_path, flags=re.IGNORECASE):
67
- for filename in glob(resolved_path + "*"):
68
- if re.match(r".*(\.mgf$|\.mzml$)", filename, flags=re.IGNORECASE):
69
- resolved_path = filename
70
- break
71
- else:
72
- raise MS2RescoreConfigurationError(
73
- "Resolved spectrum filename does not contain a supported file "
74
- "extension (mgf or mzml) and could not find any matching existing "
75
- "files."
76
- )
77
-
78
- return Path(resolved_path)
File without changes
File without changes