imspy-search 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
imspy_search/mgf.py ADDED
@@ -0,0 +1,139 @@
1
+ """MGF file parsing for sagepy queries."""
2
+
3
+ import numpy as np
4
+ from sagepy.core import (
5
+ Precursor, ProcessedSpectrum, RawSpectrum, SpectrumProcessor
6
+ )
7
+ from typing import List, Dict, Tuple
8
+
9
+
10
+ def iter_spectra(
11
+ codelines,
12
+ _start_tag: str = "BEGIN IONS",
13
+ _stop_tag: str = "END IONS",
14
+ ) -> list[dict]:
15
+ """
16
+ Iterate over the spectra in an MGF file.
17
+
18
+ Args:
19
+ codelines: List of lines from an MGF file
20
+ _start_tag: the tag that indicates the start of a spectrum
21
+ _stop_tag: the tag that indicates the end of a spectrum
22
+
23
+ Yields:
24
+ A list of lines representing each spectrum
25
+ """
26
+ recording = False
27
+ for line in codelines:
28
+ line = line.strip()
29
+ if line.startswith(_start_tag):
30
+ recording = True
31
+ buffer: list[str] = []
32
+ elif line.startswith(_stop_tag):
33
+ assert recording
34
+ recording = False
35
+ yield buffer
36
+ elif recording:
37
+ buffer.append(line)
38
+ else:
39
+ pass
40
+ assert not recording
41
+
42
+
43
+ def parse_spectrum(line_spectrum) -> Tuple[Dict, np.ndarray, np.ndarray]:
44
+ """
45
+ Parse the spectrum from the lines in the MGF file.
46
+
47
+ Args:
48
+ line_spectrum: List of lines from an MGF file
49
+
50
+ Returns:
51
+ precursor_info: Dictionary containing precursor information
52
+ fragment_mzs: Numpy array of fragment m/z values
53
+ fragment_intensities: Numpy array of fragment intensities
54
+ """
55
+ precursor_info = {}
56
+ fragment_mzs = []
57
+ fragment_intensities = []
58
+ precursor_intensity = 0
59
+
60
+ for l in line_spectrum:
61
+ if not l[0].isdigit():
62
+ name, val = l.split("=", 1)
63
+ if name == "CHARGE":
64
+ val = int(val.replace("+", ""))
65
+ elif name == "PEPMASS":
66
+ name = "MZ"
67
+ val, precursor_intensity = val.split(" ")
68
+ precursor_intensity = int(precursor_intensity)
69
+ val = float(val)
70
+ elif name == "ION_MOBILITY":
71
+ val = float(val.split(" ")[-1])
72
+ elif name == "RTINSECONDS":
73
+ val = float(val)
74
+ precursor_info[name] = val
75
+ else:
76
+ frag = l.split("\t")
77
+ frag_mz = frag[0]
78
+ frag_intensity = frag[1]
79
+ fragment_mzs.append(float(frag_mz))
80
+ fragment_intensities.append(int(frag_intensity))
81
+
82
+ assert precursor_intensity != 0, "We did not manage to parse out precursor intensity!!!"
83
+ precursor_info["intensity"] = precursor_intensity
84
+
85
+ try:
86
+ precursor_id = precursor_info["TITLE"].split(',')[0].split(' ')[1]
87
+ precursor_info["ID"] = precursor_id
88
+ except Exception as e:
89
+ raise Exception(f"{e}\nERROR IN PARSING PRECURSOR ID")
90
+
91
+ try:
92
+ COLLISION_ENERGY = float(precursor_info["TITLE"].split(',')[2].replace(" ", "").replace("eV", ""))
93
+ precursor_info["COLLISION_ENERGY"] = COLLISION_ENERGY
94
+ except Exception as e:
95
+ raise Exception(f"{e}\nERROR IN PARSING COLLISION ENERGY")
96
+
97
+ return precursor_info, np.array(fragment_mzs), np.array(fragment_intensities)
98
+
99
+
100
+ def mgf_to_sagepy_query(mgf_path: str, top_n: int = 150) -> List[ProcessedSpectrum]:
101
+ """
102
+ Read an MGF file and return a list of ProcessedSpectrum.
103
+
104
+ Args:
105
+ mgf_path: Path to the MGF file
106
+ top_n: Number of top peaks to keep in the spectrum
107
+
108
+ Returns:
109
+ List of ProcessedSpectrum objects
110
+ """
111
+ spec_processor = SpectrumProcessor(take_top_n=top_n)
112
+ queries = []
113
+
114
+ with open(mgf_path, "r") as mgf:
115
+ for specNo, line_spectrum in enumerate(iter_spectra(mgf)):
116
+ precursor_info, fragment_mzs, fragment_intensities = parse_spectrum(line_spectrum)
117
+
118
+ precursor = Precursor(
119
+ mz=precursor_info["MZ"],
120
+ charge=precursor_info.get("CHARGE", None),
121
+ intensity=precursor_info["intensity"],
122
+ inverse_ion_mobility=precursor_info.get("ION_MOBILITY", None),
123
+ collision_energy=precursor_info.get("COLLISION_ENERGY", None),
124
+ )
125
+
126
+ prec_rt = precursor_info["RTINSECONDS"] / 60.0
127
+ raw_spectrum = RawSpectrum(
128
+ file_id=1,
129
+ spec_id=str(specNo) + "-" + str(precursor_info["ID"]),
130
+ total_ion_current=fragment_intensities.sum(),
131
+ precursors=[precursor],
132
+ mz=fragment_mzs.astype(np.float32),
133
+ intensity=fragment_intensities.astype(np.float32),
134
+ scan_start_time=prec_rt,
135
+ ion_injection_time=prec_rt,
136
+ )
137
+ queries.append(spec_processor.process(raw_spectrum))
138
+
139
+ return queries
@@ -0,0 +1,166 @@
1
+ """PSM rescoring with deep learning features."""
2
+
3
+ import numpy as np
4
+
5
+ from sklearn.svm import SVC
6
+ from sklearn.linear_model import LogisticRegression
7
+ from sagepy.rescore.rescore import rescore_psms as sagepy_rescore_psms
8
+ from sagepy.qfdr.tdc import assign_sage_spectrum_q, assign_sage_peptide_q, assign_sage_protein_q
9
+
10
+ from sagepy.core.scoring import Psm
11
+ from sagepy.utility import psm_collection_to_pandas
12
+ from typing import List
13
+
14
+ from imspy_predictors import (
15
+ load_tokenizer_from_resources,
16
+ load_deep_ccs_predictor,
17
+ DeepPeptideIonMobilityApex,
18
+ load_deep_retention_time_predictor,
19
+ DeepChromatographyApex,
20
+ Prosit2023TimsTofWrapper,
21
+ get_collision_energy_calibration_factor,
22
+ )
23
+ from sagepy.core.scoring import associate_fragment_ions_with_prosit_predicted_intensities
24
+ from imspy_search.utility import (
25
+ linear_map,
26
+ generate_balanced_rt_dataset,
27
+ generate_balanced_im_dataset,
28
+ )
29
+
30
+
31
+ def re_score_psms(psms: List[Psm], use_logreg: bool = True) -> List[Psm]:
32
+ """Re-score the PSMs using machine learning.
33
+
34
+ Args:
35
+ psms: The PSMs to rescore
36
+ use_logreg: Whether to use logistic regression (True) or SVM (False)
37
+
38
+ Returns:
39
+ The re-scored PSMs
40
+ """
41
+ if use_logreg:
42
+ model = LogisticRegression()
43
+ else:
44
+ model = SVC(probability=True)
45
+
46
+ psms_rescored = sagepy_rescore_psms(
47
+ psm_collection=psms,
48
+ model=model,
49
+ num_splits=3,
50
+ verbose=True
51
+ )
52
+
53
+ psms_rescored = list(filter(lambda x: x.rank == 1, psms_rescored))
54
+
55
+ assign_sage_spectrum_q(psms_rescored, use_hyper_score=False)
56
+ assign_sage_peptide_q(psms_rescored, use_hyper_score=False)
57
+ assign_sage_protein_q(psms_rescored, use_hyper_score=False)
58
+
59
+ return psms_rescored
60
+
61
+
62
+ def create_feature_space(
63
+ psms: List[Psm],
64
+ fine_tune_im: bool = True,
65
+ fine_tune_rt: bool = True,
66
+ verbose: bool = False
67
+ ) -> List[Psm]:
68
+ """Create a feature space for the PSMs with predicted properties.
69
+
70
+ Args:
71
+ psms: The PSMs to add features to
72
+ fine_tune_im: Whether to fine-tune the ion mobility predictor
73
+ fine_tune_rt: Whether to fine-tune the retention time predictor
74
+ verbose: Whether to print information
75
+
76
+ Returns:
77
+ The PSMs with added feature space
78
+ """
79
+ # Take the top-n scoring PSMs to calibrate collision energy
80
+ sample = sorted(psms, key=lambda s: s.hyperscore)[-2 ** 8:]
81
+
82
+ # Load prosit model
83
+ prosit_model = Prosit2023TimsTofWrapper(verbose=verbose)
84
+
85
+ # Load ion mobility predictor
86
+ im_predictor = DeepPeptideIonMobilityApex(
87
+ load_deep_ccs_predictor(),
88
+ load_tokenizer_from_resources("tokenizer-ptm")
89
+ )
90
+
91
+ # Load retention time predictor
92
+ rt_predictor = DeepChromatographyApex(
93
+ load_deep_retention_time_predictor(),
94
+ load_tokenizer_from_resources("tokenizer-ptm"),
95
+ verbose=verbose
96
+ )
97
+
98
+ # Calculate the collision energy calibration factor
99
+ collision_energy_calibration_factor, angles = get_collision_energy_calibration_factor(
100
+ sample,
101
+ prosit_model,
102
+ verbose=verbose
103
+ )
104
+
105
+ # Add the calibration factor to the PSMs
106
+ for p in psms:
107
+ p.collision_energy_calibrated = p.collision_energy + collision_energy_calibration_factor
108
+
109
+ # Predict the intensity values
110
+ I = prosit_model.predict_intensities(
111
+ [p.sequence_modified if p.decoy == False else p.sequence_decoy_modified for p in psms],
112
+ np.array([p.charge for p in psms]),
113
+ [p.collision_energy_calibrated for p in psms],
114
+ batch_size=2048,
115
+ flatten=True,
116
+ )
117
+
118
+ # Add intensity values to PSMs
119
+ psms = associate_fragment_ions_with_prosit_predicted_intensities(psms, I, num_threads=16)
120
+
121
+ if fine_tune_im:
122
+ # Fit ion mobility predictor
123
+ im_predictor.fine_tune_model(
124
+ data=psm_collection_to_pandas(generate_balanced_im_dataset(psms=psms)),
125
+ batch_size=1024,
126
+ re_compile=True,
127
+ verbose=verbose,
128
+ )
129
+
130
+ # Predict ion mobilities
131
+ inv_mob = im_predictor.simulate_ion_mobilities(
132
+ sequences=[x.sequence_modified if x.decoy == False else x.sequence_decoy_modified for x in psms],
133
+ charges=[x.charge for x in psms],
134
+ mz=[x.mono_mz_calculated for x in psms]
135
+ )
136
+
137
+ # Set ion mobilities
138
+ for mob, p in zip(inv_mob, psms):
139
+ p.inverse_ion_mobility_predicted = mob
140
+
141
+ rt_min = min([p.retention_time for p in psms])
142
+ rt_max = max([p.retention_time for p in psms])
143
+
144
+ # Map the observed retention time into the domain [0, 60]
145
+ for value in psms:
146
+ value.retention_time_projected = linear_map(value.retention_time, rt_min, rt_max, 0.0, 60.0)
147
+
148
+ if fine_tune_rt:
149
+ # Fit retention time predictor
150
+ rt_predictor.fine_tune_model(
151
+ data=psm_collection_to_pandas(generate_balanced_rt_dataset(psms=psms)),
152
+ batch_size=1024,
153
+ re_compile=True,
154
+ verbose=verbose,
155
+ )
156
+
157
+ # Predict retention times
158
+ rt_pred = rt_predictor.simulate_separation_times(
159
+ sequences=[x.sequence_modified if x.decoy == False else x.sequence_decoy_modified for x in psms],
160
+ )
161
+
162
+ # Set retention times
163
+ for rt, p in zip(rt_pred, psms):
164
+ p.retention_time_predicted = rt
165
+
166
+ return psms
@@ -0,0 +1,318 @@
1
+ """Utilities for processing SAGE search output and rescoring PSMs."""
2
+
3
+ import re
4
+ import numpy as np
5
+ import pandas as pd
6
+ import random
7
+
8
+ from sagepy.core import Fragments, IonType
9
+ from scipy.spatial import distance
10
+
11
+ from sagepy.utility import get_features
12
+ from sagepy.qfdr.tdc import target_decoy_competition_pandas
13
+ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
14
+ from sklearn.preprocessing import StandardScaler
15
+
16
+ from matplotlib import pyplot as plt
17
+
18
+ from numpy.typing import NDArray
19
+ from typing import Tuple
20
+
21
+ from tqdm import tqdm
22
+
23
+
24
+ def break_into_equal_size_sets(sequence_set, k: int = 10):
25
+ """
26
+ Breaks a set of objects into k sets of equal size at random.
27
+
28
+ Args:
29
+ sequence_set: Set of sequences to be divided
30
+ k: Number of sets to divide the objects into
31
+
32
+ Returns:
33
+ A list containing k sets, each with equal number of randomly chosen sequences
34
+ """
35
+ objects_list = list(sequence_set)
36
+ random.shuffle(objects_list)
37
+
38
+ set_size = len(objects_list) // k
39
+ remainder = len(objects_list) % k
40
+
41
+ sets = []
42
+ start = 0
43
+ for i in range(k):
44
+ end = start + set_size + (1 if i < remainder else 0)
45
+ sets.append(set(objects_list[start:end]))
46
+ start = end
47
+
48
+ return sets
49
+
50
+
51
+ def split_dataframe_randomly(df: pd.DataFrame, n: int) -> list:
52
+ """Split a DataFrame randomly into n parts based on unique sequences."""
53
+ sequences_set = set(df.sequence.values)
54
+ split_sets = break_into_equal_size_sets(sequences_set, n)
55
+
56
+ ret_list = []
57
+ for seq_set in split_sets:
58
+ ret_list.append(df[df['sequence'].apply(lambda s: s in seq_set)])
59
+
60
+ return ret_list
61
+
62
+
63
+ def generate_training_data(
64
+ psms: pd.DataFrame,
65
+ method: str = "psm",
66
+ q_max: float = 0.01,
67
+ balance: bool = True
68
+ ) -> Tuple[NDArray, NDArray]:
69
+ """Generate training data for LDA rescoring.
70
+
71
+ Args:
72
+ psms: DataFrame of PSMs
73
+ method: Method to use for training data generation
74
+ q_max: Maximum q-value allowed for positive examples
75
+ balance: Whether to balance the dataset
76
+
77
+ Returns:
78
+ Tuple of X_train and Y_train
79
+ """
80
+ PSM_pandas = psms
81
+ PSM_q = target_decoy_competition_pandas(PSM_pandas, method=method)
82
+ PSM_pandas = PSM_pandas.drop(columns=["q_value", "score"])
83
+
84
+ TDC = pd.merge(PSM_q, PSM_pandas, left_on=["spec_idx", "match_idx", "decoy"],
85
+ right_on=["spec_idx", "match_idx", "decoy"])
86
+
87
+ TARGET = TDC[(TDC.decoy == False) & (TDC.q_value <= q_max)]
88
+ X_target, Y_target = get_features(TARGET)
89
+
90
+ DECOY = TDC[TDC.decoy]
91
+ X_decoy, Y_decoy = get_features(DECOY)
92
+
93
+ if balance:
94
+ num_target = np.min((len(DECOY), len(TARGET)))
95
+ target_indices = np.random.choice(np.arange(len(X_target)), size=num_target)
96
+ X_target = X_target[target_indices, :]
97
+ Y_target = Y_target[target_indices]
98
+
99
+ X_train = np.vstack((X_target, X_decoy))
100
+ Y_train = np.hstack((Y_target, Y_decoy))
101
+
102
+ return X_train, Y_train
103
+
104
+
105
+ def re_score_psms(
106
+ psms: pd.DataFrame,
107
+ num_splits: int = 10,
108
+ verbose: bool = True,
109
+ balance: bool = True,
110
+ score: str = "hyperscore",
111
+ positive_example_q_max: float = 0.01,
112
+ ) -> pd.DataFrame:
113
+ """Re-score PSMs using LDA.
114
+
115
+ Args:
116
+ psms: DataFrame of PSMs
117
+ num_splits: Number of splits for cross-validation
118
+ verbose: Whether to print progress
119
+ balance: Whether to balance the dataset
120
+ score: Score to use for re-scoring
121
+ positive_example_q_max: Maximum q-value allowed for positive examples
122
+
123
+ Returns:
124
+ DataFrame with re-scored PSMs
125
+ """
126
+ scaler = StandardScaler()
127
+ X_all, _ = get_features(psms, score=score)
128
+ X_all = np.nan_to_num(X_all, nan=0.0)
129
+ scaler.fit(X_all)
130
+
131
+ splits = split_dataframe_randomly(df=psms, n=num_splits)
132
+ predictions, ids, ranks = [], [], []
133
+
134
+ for i in tqdm(range(num_splits), disable=not verbose, desc='Re-scoring PSMs', ncols=100):
135
+ target = splits[i]
136
+ ids.extend(target["spec_idx"].values)
137
+ ranks.extend(target["rank"].values)
138
+ features = []
139
+
140
+ for j in range(num_splits):
141
+ if j != i:
142
+ features.append(splits[j])
143
+
144
+ if num_splits == 1:
145
+ features = [target]
146
+
147
+ X_train, Y_train = generate_training_data(pd.concat(features), balance=balance, q_max=positive_example_q_max)
148
+ X_train, Y_train = np.nan_to_num(X_train, nan=0.0), np.nan_to_num(Y_train, nan=0.0)
149
+ X, _ = get_features(target)
150
+ X = np.nan_to_num(X, nan=0.0)
151
+
152
+ lda = LinearDiscriminantAnalysis(solver="eigen", shrinkage="auto")
153
+ lda.fit(scaler.transform(X_train), Y_train)
154
+
155
+ try:
156
+ score_flip = 1.0 if Y_train[
157
+ np.argmax(np.squeeze(lda.transform(scaler.transform(X_train))))] == 1.0 else -1.0
158
+ except:
159
+ score_flip = 1.0
160
+
161
+ Y_pred = np.squeeze(lda.transform(scaler.transform(X))) * score_flip
162
+ predictions.extend(Y_pred)
163
+
164
+ return pd.DataFrame({
165
+ "spec_idx": ids,
166
+ "rank": ranks,
167
+ "re_score": predictions
168
+ })
169
+
170
+
171
+ def cosim_from_dict(observed, predicted):
172
+ """Calculate cosine similarity between observed and predicted intensities."""
173
+ intensities_a = []
174
+ intensities_b = []
175
+
176
+ for k, v in observed.items():
177
+ if k in predicted:
178
+ intensities_a.append(v)
179
+ intensities_b.append(predicted[k])
180
+ else:
181
+ intensities_a.append(v)
182
+ intensities_b.append(0)
183
+
184
+ a, b = np.array(intensities_a), np.array(intensities_b)
185
+ return 1 - distance.cosine(a, b)
186
+
187
+
188
+ def row_to_fragment(r):
189
+ """Convert a DataFrame row to a Fragments object."""
190
+ charges = r.fragment_charge
191
+ ion_types = r.fragment_type
192
+ fragment_ordinals = r.fragment_ordinals
193
+ intensities = r.fragment_intensity
194
+ mz_calculated = r.fragment_mz_calculated
195
+ mz_experimental = r.fragment_mz_experimental
196
+
197
+ ion_types_parsed = []
198
+ for ion in ion_types:
199
+ if ion == "b":
200
+ ion_types_parsed.append(IonType("b"))
201
+ else:
202
+ ion_types_parsed.append(IonType("y"))
203
+
204
+ return Fragments(charges, ion_types_parsed, fragment_ordinals, intensities, mz_calculated, mz_experimental)
205
+
206
+
207
+ def remove_substrings(input_string: str) -> str:
208
+ """Remove bracket substrings from a string."""
209
+ result = re.sub(r'\[.*?\]', '', input_string)
210
+ return result
211
+
212
+
213
+ # Token replacements for modification annotation conversion
214
+ replace_tokens = {
215
+ "[+42]": "[UNIMOD:1]",
216
+ "[+42.010565]": "[UNIMOD:1]",
217
+ "[+57.0215]": "[UNIMOD:4]",
218
+ "[+57.021464]": "[UNIMOD:4]",
219
+ "[+79.9663]": "[UNIMOD:21]",
220
+ "[+15.9949]": "[UNIMOD:35]",
221
+ "[+15.994915]": "[UNIMOD:35]",
222
+ }
223
+
224
+
225
+ class PatternReplacer:
226
+ """Replace patterns in strings using a replacement dictionary."""
227
+
228
+ def __init__(
229
+ self,
230
+ replacements: dict[str, str],
231
+ pattern: str | re.Pattern = r"\[.*?\]",
232
+ ):
233
+ self.pattern = re.compile(pattern)
234
+ self.replacements = replacements
235
+ for _in, _out in replacements.items():
236
+ assert (
237
+ len(re.findall(self.pattern, _in)) > 0
238
+ ), f"The submitted replacement, `{_in}->{_out}`, cannot be used with pattern `{pattern}`."
239
+
240
+ def apply(self, string: str) -> str:
241
+ """Apply replacements to a string."""
242
+ out_sequence = string
243
+ for _in in set(re.findall(self.pattern, string)):
244
+ try:
245
+ _out = self.replacements[_in]
246
+ except KeyError:
247
+ raise KeyError(
248
+ f"Modification {_in} not among those specified in the replacements."
249
+ )
250
+ out_sequence = out_sequence.replace(_in, _out)
251
+ return out_sequence
252
+
253
+
254
+ def fragments_to_dict(fragments: Fragments):
255
+ """Convert Fragments object to dictionary."""
256
+ d = {}
257
+ for charge, ion_type, ordinal, intensity in zip(
258
+ fragments.charges, fragments.ion_types, fragments.fragment_ordinals, fragments.intensities
259
+ ):
260
+ d[(charge, ion_type, ordinal)] = intensity
261
+ return d
262
+
263
+
264
+ def plot_summary(TARGET, DECOY, save_path, dpi=300, file_format='png'):
265
+ """Create a summary plot for target and decoy PSMs."""
266
+ fig, axs = plt.subplots(3, 2, figsize=(15, 18))
267
+
268
+ # Plot 1 - RT prediction
269
+ axs[0, 0].scatter(TARGET.projected_rt, TARGET.rt_predicted, s=1, alpha=.1, c="darkblue", label="Target")
270
+ axs[0, 0].scatter(DECOY.projected_rt, DECOY.rt_predicted, s=1, alpha=.1, c="orange", label="Decoy")
271
+ axs[0, 0].set_xlabel("Retention time observed")
272
+ axs[0, 0].set_ylabel("Retention time predicted")
273
+ axs[0, 0].legend()
274
+ axs[0, 0].set_title("Retention Time Prediction")
275
+
276
+ # Plot 2 - IM prediction
277
+ axs[0, 1].scatter(TARGET.ion_mobility, TARGET.inv_mob_predicted, s=1, alpha=.1, c="darkblue", label="Target")
278
+ axs[0, 1].scatter(DECOY.ion_mobility, DECOY.inv_mob_predicted, s=1, alpha=.1, c="orange", label="Decoy")
279
+ axs[0, 1].set_xlabel("Ion mobility observed")
280
+ axs[0, 1].set_ylabel("Ion mobility predicted")
281
+ axs[0, 1].legend()
282
+ axs[0, 1].set_title("Ion Mobility Prediction")
283
+
284
+ # Plot 3 - RT delta
285
+ axs[1, 0].hist(TARGET.projected_rt - TARGET.rt_predicted, alpha=.8, bins="auto", density=True, color="darkblue", label="Target")
286
+ axs[1, 0].hist(DECOY.projected_rt - DECOY.rt_predicted, alpha=.5, bins="auto", density=True, color="orange", label="Decoy")
287
+ axs[1, 0].set_xlabel("Retention time delta")
288
+ axs[1, 0].set_ylabel("Density")
289
+ axs[1, 0].legend()
290
+ axs[1, 0].set_title("Retention Time Delta")
291
+
292
+ # Plot 4 - IM delta
293
+ axs[1, 1].hist(TARGET.ion_mobility - TARGET.inv_mob_predicted, alpha=.8, bins="auto", density=True, color="darkblue", label="Target")
294
+ axs[1, 1].hist(DECOY.ion_mobility - DECOY.inv_mob_predicted, alpha=.5, bins="auto", density=True, color="orange", label="Decoy")
295
+ axs[1, 1].set_xlim((-0.4, 0.4))
296
+ axs[1, 1].set_xlabel("Ion mobility delta")
297
+ axs[1, 1].set_ylabel("Density")
298
+ axs[1, 1].legend()
299
+ axs[1, 1].set_title("Ion Mobility Delta")
300
+
301
+ # Plot 5 - Cosine similarity
302
+ axs[2, 0].hist(TARGET.cosine_similarity, bins="auto", density=True, alpha=.8, color="darkblue", label="Target")
303
+ axs[2, 0].hist(DECOY.cosine_similarity, bins="auto", density=True, alpha=.5, color="orange", label="Decoy")
304
+ axs[2, 0].set_xlabel("Cosine similarity")
305
+ axs[2, 0].set_ylabel("Density")
306
+ axs[2, 0].legend()
307
+ axs[2, 0].set_title("Cosine Similarity")
308
+
309
+ # Plot 6 - Score
310
+ axs[2, 1].hist(TARGET.re_score, bins="auto", density=True, alpha=.8, color="darkblue", label="Target")
311
+ axs[2, 1].hist(DECOY.re_score, bins="auto", density=True, alpha=.5, color="orange", label="Decoy")
312
+ axs[2, 1].set_xlabel("Score")
313
+ axs[2, 1].set_ylabel("Density")
314
+ axs[2, 1].legend()
315
+ axs[2, 1].set_title("Score Information")
316
+
317
+ plt.tight_layout()
318
+ plt.savefig(save_path, dpi=dpi, format=file_format)