masster 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (55) hide show
  1. masster/__init__.py +27 -27
  2. masster/_version.py +17 -17
  3. masster/chromatogram.py +497 -503
  4. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
  5. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
  6. masster/logger.py +318 -244
  7. masster/sample/__init__.py +9 -9
  8. masster/sample/defaults/__init__.py +15 -15
  9. masster/sample/defaults/find_adducts_def.py +325 -325
  10. masster/sample/defaults/find_features_def.py +366 -366
  11. masster/sample/defaults/find_ms2_def.py +285 -285
  12. masster/sample/defaults/get_spectrum_def.py +314 -318
  13. masster/sample/defaults/sample_def.py +374 -378
  14. masster/sample/h5.py +1321 -1297
  15. masster/sample/helpers.py +833 -364
  16. masster/sample/lib.py +762 -0
  17. masster/sample/load.py +1220 -1187
  18. masster/sample/parameters.py +131 -131
  19. masster/sample/plot.py +1685 -1622
  20. masster/sample/processing.py +1402 -1416
  21. masster/sample/quant.py +209 -0
  22. masster/sample/sample.py +393 -387
  23. masster/sample/sample5_schema.json +181 -181
  24. masster/sample/save.py +737 -736
  25. masster/sample/sciex.py +1213 -0
  26. masster/spectrum.py +1287 -1319
  27. masster/study/__init__.py +9 -9
  28. masster/study/defaults/__init__.py +21 -19
  29. masster/study/defaults/align_def.py +267 -267
  30. masster/study/defaults/export_def.py +41 -40
  31. masster/study/defaults/fill_chrom_def.py +264 -264
  32. masster/study/defaults/fill_def.py +260 -0
  33. masster/study/defaults/find_consensus_def.py +256 -256
  34. masster/study/defaults/find_ms2_def.py +163 -163
  35. masster/study/defaults/integrate_chrom_def.py +225 -225
  36. masster/study/defaults/integrate_def.py +221 -0
  37. masster/study/defaults/merge_def.py +256 -0
  38. masster/study/defaults/study_def.py +272 -269
  39. masster/study/export.py +674 -287
  40. masster/study/h5.py +1406 -886
  41. masster/study/helpers.py +1713 -433
  42. masster/study/helpers_optimized.py +317 -0
  43. masster/study/load.py +1231 -1078
  44. masster/study/parameters.py +99 -99
  45. masster/study/plot.py +632 -645
  46. masster/study/processing.py +1057 -1046
  47. masster/study/save.py +161 -134
  48. masster/study/study.py +612 -522
  49. masster/study/study5_schema.json +253 -241
  50. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/METADATA +15 -10
  51. masster-0.3.1.dist-info/RECORD +59 -0
  52. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/licenses/LICENSE +661 -661
  53. masster-0.2.5.dist-info/RECORD +0 -50
  54. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/WHEEL +0 -0
  55. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/entry_points.txt +0 -0
masster/sample/lib.py ADDED
@@ -0,0 +1,762 @@
1
+ """
2
+ _lib.py
3
+
4
+ This module provides utility functions and algorithms for mass spectrometry data processing.
5
+ It contains core functionality for compound library management, target identification,
6
+ adduct handling, and various analytical operations used throughout the masster package.
7
+
8
+ Key Features:
9
+ - **Compound Libraries**: Load and manage compound databases with metadata.
10
+ - **Adduct Calculations**: Handle various ionization adducts and charge states.
11
+ - **Mass Calculations**: Precise mass calculations with adduct corrections.
12
+ - **Target Matching**: Match detected features against compound libraries.
13
+ - **Polarity Handling**: Support for positive and negative ionization modes.
14
+ - **Database Integration**: Interface with various compound database formats.
15
+
16
+ Dependencies:
17
+ - `pyopenms`: For mass spectrometry algorithms and data structures.
18
+ - `polars` and `pandas`: For efficient data manipulation and analysis.
19
+ - `numpy`: For numerical computations and array operations.
20
+ - `tqdm`: For progress tracking during batch operations.
21
+
22
+ Functions:
23
+ - `lib_load()`: Load compound libraries from CSV files.
24
+ - `load_lib()`: Alias for lib_load function.
25
+ - Various utility functions for mass calculations and library management.
26
+
27
+ Supported Adducts:
28
+ - Positive mode: [M+H]+, [M+Na]+, [M+K]+, [M+NH4]+, [M-H2O+H]+
29
+ - Negative mode: [M-H]-, [M+CH3COO]-, [M+HCOO]-, [M+Cl]-
30
+
31
+ Example Usage:
32
+ ```python
33
+ from _lib import lib_load
34
+
35
+ # Load compound library
36
+ lib_load(self, csvfile="compounds.csv", polarity="positive")
37
+
38
+ # Access loaded library data
39
+ print(f"Loaded {len(self.lib_df)} compounds")
40
+ print(self.lib_df.head())
41
+ ```
42
+
43
+ See Also:
44
+ - `parameters._lib_parameters`: For library-specific parameter configuration.
45
+ - `single.py`: For applying library matching to detected features.
46
+
47
+ """
48
+
49
+ import os
50
+ import re
51
+
52
+ import numpy as np
53
+ import pandas as pd
54
+ import polars as pl
55
+ import pyopenms as oms
56
+
57
+ from tqdm import tqdm
58
+
59
+ from masster.chromatogram import Chromatogram
60
+ # Parameters removed - using hardcoded defaults
61
+
62
+
63
+ def load_lib(self, *args, **kwargs):
64
+ lib_load(self, *args, **kwargs)
65
+
66
+
67
+ def lib_load(self, csvfile=None, polarity="positive"):
68
+ delta_m = {
69
+ "[M+H]+": 1.007276,
70
+ "[M+Na]+": 22.989218,
71
+ "[M+K]+": 39.962383,
72
+ "[M+NH4]+": 18.033823,
73
+ "[M-H2O+H]+": -17.00329,
74
+ "[M-H]-": -1.007276,
75
+ "[M+CH3COO]-": -59.013852,
76
+ "[M+HCOO]-": -45.998203,
77
+ "[M+Cl]-": -34.968853,
78
+ }
79
+ delta_z = {
80
+ "[M+H]+": 1,
81
+ "[M+Na]+": 1,
82
+ "[M+K]+": 1,
83
+ "[M+NH4]+": 1,
84
+ "[M-H2O+H]+": 1,
85
+ "[M+CH3COO]-": -1,
86
+ "[M-H]-": -1,
87
+ "[M+HCOO]-": -1,
88
+ "[M+Cl]-": -1,
89
+ }
90
+ """
91
+ Load target compounds from a CSV file.
92
+ This method reads a CSV file containing target compounds and their properties, such as m/z, retention time (RT),
93
+ and adducts. It filters the targets based on the specified adducts and returns a DataFrame of the targets.
94
+ Parameters:
95
+ csvfile (str): The path to the CSV file containing target compounds.
96
+ adducts (list, optional): A list of adducts to filter the targets. Default is ['[M+H]+', '[M+Na]+', '[M+K]+'].
97
+ Returns:
98
+ pd.DataFrame: A DataFrame containing the filtered target compounds with columns 'mz', 'rt', 'adduct'.
99
+ """
100
+ self.lib = None
101
+ df = pd.read_csv(csvfile)
102
+ # filter targets by adducts
103
+ # iterate over all rows in df
104
+ # find index of column in df named "Name" or "name" or "Compound"
105
+ df_cols = df.columns
106
+ if "Name" in df_cols:
107
+ name_col = "Name"
108
+ elif "name" in df_cols:
109
+ name_col = "name"
110
+ elif "Compound" in df_cols:
111
+ name_col = "Compound"
112
+ elif "compound" in df_cols:
113
+ name_col = "compound"
114
+ else:
115
+ raise ValueError(
116
+ "No column named 'Name', 'name', or 'Compound' found in the CSV file.",
117
+ )
118
+ if "Formula" in df_cols:
119
+ formula_col = "Formula"
120
+ elif "formula" in df_cols:
121
+ formula_col = "formula"
122
+ else:
123
+ raise ValueError(
124
+ "No column named 'Formula' or 'formula' found in the CSV file.",
125
+ )
126
+ if "SMILES" in df_cols:
127
+ smiles_col = "SMILES"
128
+ elif "smiles" in df_cols:
129
+ smiles_col = "smiles"
130
+ else:
131
+ raise ValueError("No column named 'SMILES' or 'smiles' found in the CSV file.")
132
+ if "rt" in df_cols:
133
+ rt_col = "rt"
134
+ elif "RT" in df_cols:
135
+ rt_col = "RT"
136
+ else:
137
+ rt_col = None
138
+ if "rt2" in df_cols:
139
+ rt_col2 = "rt2"
140
+ elif "RT2" in df_cols:
141
+ rt_col2 = "RT2"
142
+ else:
143
+ rt_col2 = None
144
+ if "id" in df_cols:
145
+ id_col = "id"
146
+ elif "ID" in df_cols:
147
+ id_col = "ID"
148
+ else:
149
+ id_col = name_col
150
+ if "set" in df_cols:
151
+ set_col = "set"
152
+ elif "Set" in df_cols:
153
+ set_col = "Set"
154
+ else:
155
+ set_col = None
156
+ print(
157
+ "No column named 'set' or 'Set' found in the CSV file. Using all targets.",
158
+ )
159
+
160
+ targets = []
161
+ c = 0
162
+ for _index, row in df.iterrows():
163
+ # calculate accurate mass for row[formula_col]
164
+ m = oms.EmpiricalFormula(row[formula_col])
165
+ try:
166
+ accurate_mass = m.getMonoWeight()
167
+ except Exception as e:
168
+ print(f"Error calculating accurate mass for {row[formula_col]}: {e}")
169
+ continue
170
+
171
+ rt = row[rt_col] if rt_col is not None else None
172
+ for adduct in delta_m:
173
+ new_target = {
174
+ "libid": c,
175
+ "set": row[set_col] if set_col is not None else None,
176
+ "name": row[name_col],
177
+ "id": row[id_col],
178
+ "smiles": row[smiles_col],
179
+ "formula": row[formula_col],
180
+ "adduct": adduct,
181
+ "m": accurate_mass + delta_m[adduct],
182
+ "z": delta_z[adduct],
183
+ "mz": (accurate_mass + delta_m[adduct]) / delta_z[adduct],
184
+ "rt": rt,
185
+ "MS2spec": None,
186
+ }
187
+ targets.append(new_target)
188
+ if rt_col2 is not None:
189
+ rt = row[rt_col2]
190
+ for adduct in delta_m:
191
+ new_target = {
192
+ "libid": c,
193
+ "set": row[set_col] if set_col is not None else None,
194
+ "name": row[name_col] + " II",
195
+ "id": row[id_col],
196
+ "smiles": row[smiles_col],
197
+ "formula": row[formula_col],
198
+ "adduct": adduct,
199
+ "m": accurate_mass + delta_m[adduct],
200
+ "z": delta_z[adduct],
201
+ "mz": (accurate_mass + delta_m[adduct]) / delta_z[adduct],
202
+ "rt": rt,
203
+ "MS2spec": None,
204
+ }
205
+ targets.append(new_target)
206
+ c += 1
207
+
208
+ # convert targets to DataFrame
209
+ self.lib = pd.DataFrame(targets)
210
+ # ensure that mz is . use the abs()
211
+ self.lib["mz"] = self.lib["mz"].abs()
212
+ # convert all np.nan to None
213
+ self.lib = self.lib.where(pd.notnull(self.lib), None)
214
+ # find all elements == nan and replace them with None
215
+ self.lib = self.lib.replace({np.nan: None})
216
+ if polarity is not None:
217
+ if polarity.lower() == "positive":
218
+ self.lib = self.lib[self.lib["z"] > 0]
219
+ elif polarity.lower() == "negative":
220
+ self.lib = self.lib[self.lib["z"] < 0]
221
+ else:
222
+ raise ValueError("Polarity must be 'positive' or 'negative'.")
223
+
224
+
225
+ def link_lib(self, *args, **kwargs):
226
+ self.lib_link(*args, **kwargs)
227
+
228
+
229
+ def lib_link(
230
+ self,
231
+ mz_tol=0.01,
232
+ mz_tol_factor_lib=0.5,
233
+ rt_tol=6.0,
234
+ rt_tol_factor_lib=0.5,
235
+ level=1,
236
+ ):
237
+ """
238
+ Find all features that match the mz and rt is not None. Add all feature_uids of the feature to the lib_ms1 DataFrame.
239
+ """
240
+
241
+ lib_matches = []
242
+ mz_tol_lib = mz_tol * mz_tol_factor_lib
243
+ rt_tol_lib = rt_tol * rt_tol_factor_lib
244
+
245
+ for _index, row in self.lib.iterrows():
246
+ # find all features that match the mz and rt is not None
247
+ mask = (self.features_df["mz"] >= row["mz"] - mz_tol_lib) & (self.features_df["mz"] <= row["mz"] + mz_tol_lib)
248
+ if row["rt"] is not None and rt_tol_lib is not np.nan:
249
+ mask &= (self.features_df["rt"] >= row["rt"] - rt_tol_lib) & (
250
+ self.features_df["rt"] <= row["rt"] + rt_tol_lib
251
+ )
252
+ if level == 1:
253
+ # get the feature_uids of the features that match the mask
254
+ feature_uids = self.features_df[mask]["feature_uid"].to_list()
255
+ for feature_uid in feature_uids:
256
+ # create a new df with id, name, formula, adduct, delta_mz, delta_rt, scan_uid,
257
+ f = self.features_df[self.features_df["feature_uid"] == feature_uid]
258
+ new_match = {
259
+ "libid": row["libid"],
260
+ "set": row["set"],
261
+ "name": row["name"],
262
+ "id": row["id"],
263
+ "formula": row["formula"],
264
+ "adduct": row["adduct"],
265
+ "smiles": row["smiles"],
266
+ "z": row["z"],
267
+ "match_level": 1,
268
+ "feature_uid": feature_uid,
269
+ "inty": f["inty"].values[0],
270
+ "quality": f["quality"].values[0],
271
+ "mz": f["mz"].values[0],
272
+ "delta_mz": row["mz"] - f["mz"].values[0],
273
+ "rt": f["rt"].values[0],
274
+ "delta_rt": row["rt"] - f["rt"].values[0] if row["rt"] is not None else None,
275
+ "ms2_scans": f["ms2_scans"].values[0] if "ms2_scans" in self.features_df.columns else None,
276
+ "eic": None,
277
+ }
278
+ lib_matches.append(new_match)
279
+
280
+ # convert lib_matches to DataFrame
281
+ self.lib_match = pd.DataFrame(lib_matches)
282
+ self.lib_eic(mz_tol=mz_tol, rt_tol=rt_tol)
283
+
284
+
285
+ def lib_eic(
286
+ self,
287
+ mz_tol=0.01,
288
+ rt_tol=6.0,
289
+ ):
290
+ # for each matched feature, extract the EIC and add it to the lib_match DataFrame
291
+ if self.lib_match is None:
292
+ print("Please load and match the library first.")
293
+ return
294
+ if len(self.lib_match) == 0:
295
+ print("No matches found.")
296
+ return
297
+ for index, row in self.lib_match.iterrows():
298
+ # find the feature with feature_uid == row["feature_uid"]
299
+ f = self.features_df[self.features_df["feature_uid"] == row["feature_uid"]]
300
+ if f.empty:
301
+ continue
302
+ f = f.iloc[0]
303
+ rt_start = f["rt_start"] - rt_tol
304
+ rt_end = f["rt_end"] + rt_tol
305
+ # find all ms1 data in the retention time range. self.ms1_df is a polars DataFrame
306
+ d = self.ms1_df.filter(
307
+ (pl.col("rt") >= rt_start)
308
+ & (pl.col("rt") <= rt_end)
309
+ & (pl.col("mz") >= f["mz"] - mz_tol)
310
+ & (pl.col("mz") <= f["mz"] + mz_tol),
311
+ )
312
+ # for all unique rt values, find the maximum inty
313
+ eic_rt = d.group_by("rt").agg(pl.col("inty").max())
314
+ eic = Chromatogram(
315
+ eic_rt["rt"].to_numpy(),
316
+ eic_rt["inty"].to_numpy(),
317
+ label=f"EIC mz={f['mz']:.4f}; {row['name']} {row['adduct']}",
318
+ feature_start=f["rt_start"],
319
+ feature_end=f["rt_end"],
320
+ lib_rt=row["rt"],
321
+ )
322
+ self.lib_match.loc[index, "eic"] = eic
323
+
324
+
325
+ # TODO Should go in _export? (Almost the same method already there)
326
+ def save_lib_mgf(
327
+ self,
328
+ filename="lib_export.mgf",
329
+ selection="best",
330
+ split_energy=True,
331
+ merge=False,
332
+ centroid=True,
333
+ inty_min=float("-inf"),
334
+ q1_ratio_min=None,
335
+ q1_ratio_max=None,
336
+ eic_corr_min=None,
337
+ deisotope=True,
338
+ verbose=False,
339
+ precursor_trim=-10.0,
340
+ centroid_algo=None,
341
+ ):
342
+ if self.lib_match is None:
343
+ print("Please load and match the library first.")
344
+ return
345
+
346
+ if len(self.lib_match) == 0:
347
+ print("No matches found.")
348
+ return
349
+
350
+ # iterate over all features
351
+
352
+ def filter_peaks(spec, inty_min=None, q1_min=None, eic_min=None, q1_max=None):
353
+ # create a copy of the spectrum
354
+ spec = spec.copy()
355
+ l = len(spec.mz)
356
+ mask = [True] * l
357
+ if inty_min is not None and inty_min > 0:
358
+ mask = np.array(mask) & (spec.inty >= inty_min)
359
+ # check if q1_ratio is an attribute of spec
360
+ if q1_min is not None and hasattr(spec, "q1_ratio"):
361
+ mask = mask & (spec.q1_ratio >= q1_min)
362
+ # check if eic_corr is an attribute of spec
363
+ if q1_max is not None and hasattr(spec, "q1_ratio"):
364
+ mask = mask & (spec.q1_ratio <= q1_max)
365
+ # check if eic_corr is an attribute of spec
366
+ if eic_min is not None and hasattr(spec, "eic_corr"):
367
+ mask = mask & (spec.eic_corr >= eic_min)
368
+ # apply mask to all attributes of spec with the same length as mz
369
+ for attr in spec.__dict__:
370
+ # check it attr is a list or an array:
371
+ if isinstance(getattr(spec, attr), list) or isinstance(
372
+ getattr(spec, attr),
373
+ np.ndarray,
374
+ ):
375
+ # check if attr has attribute 0 and its length is equal to l:
376
+ if hasattr(getattr(spec, attr), "__len__"):
377
+ if len(getattr(spec, attr)) == l:
378
+ setattr(spec, attr, getattr(spec, attr)[mask])
379
+ return spec
380
+
381
+ def write_ion(f, d, spec):
382
+ if spec is None:
383
+ return
384
+ f.write("BEGIN IONS\n")
385
+ # iterate through all d.keys()
386
+ for key in d:
387
+ f.write(f"{key.upper()}={d[key]}\n")
388
+ for mz, inty in zip(spec.mz, spec.inty, strict=False):
389
+ f.write(f"{mz:.5f} {inty:.0f}\n")
390
+ f.write("END IONS\n\n")
391
+
392
+ if centroid_algo is None:
393
+ if "centroid_algo" in self.parameters:
394
+ centroid_algo = self.parameters["centroid_algo"]
395
+ else:
396
+ centroid_algo = "cr"
397
+
398
+ # c = 0
399
+ skip = 0
400
+ # check if features is empty
401
+ with open(filename, "w", encoding="utf-8") as f:
402
+ for _index, matchrow in tqdm(
403
+ self.lib_match.iterrows(),
404
+ total=len(self.lib_match),
405
+ desc="Export MGF",
406
+ ):
407
+ # find the feature with feature_uid == matchrow["feature_uid"]
408
+ row = self.features_df[self.features_df["feature_uid"] == matchrow["feature_uid"]].iloc[0]
409
+ if row["ms2_scans"] is None:
410
+ skip = skip + 1
411
+ continue
412
+
413
+ # write MS1 spectrum
414
+ ms1_scan_uid = self.select_closest_scan(rt=row["rt"])["scan_uid"][0]
415
+ spec = self.get_spectrum(
416
+ ms1_scan_uid,
417
+ centroid=centroid,
418
+ deisotope=deisotope,
419
+ centroid_algo=centroid_algo,
420
+ )
421
+ # trim spectrum 2 Da lower and 10 Da higher than precursor m/z
422
+ spec = spec.mz_trim(mz_min=row["mz"] - 2.0, mz_max=row["mz"] + 10.0)
423
+
424
+ filename: str = os.path.basename(self.file_path)
425
+ mslevel = 1 if spec.ms_level is None else spec.ms_level
426
+ activation = None
427
+ energy = None
428
+ kineticenergy = None
429
+ if mslevel > 1:
430
+ if "CID" in filename.upper() or "ZTS" in filename.upper():
431
+ if "EAD" in filename.upper():
432
+ activation = "CID-EAD"
433
+ # search ([0-9]*KE) in filename.upper() using regex
434
+ match = re.search(r"(\d+)KE", str(filename.upper()))
435
+ if match:
436
+ kineticenergy = int(match.group(1))
437
+ else:
438
+ match = re.search(r"(\d+)EV", filename.upper())
439
+ if match:
440
+ kineticenergy = int(match.group(1))
441
+ else:
442
+ activation = "CID"
443
+ elif "EAD" in filename.upper():
444
+ activation = "EAD"
445
+ # search ([0-9]*KE) in filename.upper() using regex
446
+ match = re.search(r"(\d+)KE", filename.upper())
447
+ if match:
448
+ kineticenergy = int(match.group(1))
449
+ else:
450
+ match = re.search(r"(\d+)EV", filename.upper())
451
+ if match:
452
+ kineticenergy = int(match.group(1))
453
+ energy = spec.energy if hasattr(spec, "energy") else None
454
+
455
+ spec = filter_peaks(spec, inty_min=inty_min)
456
+ d = {
457
+ "PEPMASS": row["mz"],
458
+ "RTINSECONDS": row["rt"],
459
+ "IONMODE": "positive" if matchrow["adduct"][-1] == "+" else "negative",
460
+ "CHARGE": "1" + matchrow["adduct"].split("]")[1],
461
+ "NAME": f"{matchrow['name']}",
462
+ "SMILES": matchrow["smiles"],
463
+ "FORMULA": matchrow["formula"],
464
+ "ADDUCT": matchrow["adduct"],
465
+ "LIBID": matchrow["libid"],
466
+ "ACTIVATION": activation,
467
+ "COLLISIONENERGY": energy,
468
+ "KINETICENERGY": kineticenergy,
469
+ "FILENAME": filename,
470
+ "SCANS": ms1_scan_uid,
471
+ "FID": row["feature_uid"],
472
+ "MSLEVEL": 1 if spec.ms_level is None else spec.ms_level,
473
+ }
474
+ write_ion(f, d, spec)
475
+
476
+ if split_energy:
477
+ # get energy of all scans with scan_uid in ms2_scans
478
+ energy = [s.energy for s in row["ms2_specs"]]
479
+ # find unique energies
480
+ unique_energies = list(set(energy))
481
+ for e in unique_energies:
482
+ ms2_scans = [s.scan_uid for s in row["ms2_specs"] if s.energy == e]
483
+ if selection == "best":
484
+ ms2_scans = ms2_scans[0]
485
+ for scan_uid in ms2_scans:
486
+ spec = self.get_spectrum(
487
+ scan_uid,
488
+ centroid=centroid,
489
+ deisotope=deisotope,
490
+ precursor_trim=precursor_trim,
491
+ centroid_algo=centroid_algo,
492
+ )
493
+ spec = filter_peaks(
494
+ spec,
495
+ inty_min=inty_min,
496
+ q1_min=q1_ratio_min,
497
+ eic_min=eic_corr_min,
498
+ q1_max=q1_ratio_max,
499
+ )
500
+ # TODO not used
501
+ mslevel = 1 if spec.ms_level is None else spec.ms_level
502
+ activation = None
503
+ energy = None
504
+ kineticenergy = None
505
+ if "CID" in filename.upper() or "ZTS" in filename.upper():
506
+ if "EAD" in filename.upper():
507
+ activation = "CID-EAD"
508
+ # search ([0-9]*KE) in filename.upper() using regex
509
+ match = re.search(r"(\d+)KE", filename.upper())
510
+ if match:
511
+ kineticenergy = int(match.group(1))
512
+ else:
513
+ match = re.search(r"(\d+)EV", filename.upper())
514
+ if match:
515
+ kineticenergy = int(match.group(1))
516
+ else:
517
+ activation = "CID"
518
+ elif "EAD" in filename.upper():
519
+ activation = "EAD"
520
+ # search ([0-9]*KE) in filename.upper() using regex
521
+ match = re.search(r"(\d+)KE", filename.upper())
522
+ if match:
523
+ kineticenergy = int(match.group(1))
524
+ else:
525
+ match = re.search(r"(\d+)EV", filename.upper())
526
+ if match:
527
+ kineticenergy = int(match.group(1))
528
+ energy = spec.energy if hasattr(spec, "energy") else None
529
+
530
+ spec = filter_peaks(spec, inty_min=inty_min)
531
+ d = {
532
+ "PEPMASS": row["mz"],
533
+ "RTINSECONDS": row["rt"],
534
+ "IONMODE": "positive" if matchrow["adduct"][-1] == "+" else "negative",
535
+ "CHARGE": "1" + matchrow["adduct"].split("]")[1],
536
+ "NAME": f"{matchrow['name']}",
537
+ "SMILES": matchrow["smiles"],
538
+ "FORMULA": matchrow["formula"],
539
+ "ADDUCT": matchrow["adduct"],
540
+ "LIBID": matchrow["libid"],
541
+ "ACTIVATION": activation,
542
+ "COLLISIONENERGY": energy,
543
+ "KINETICENERGY": kineticenergy,
544
+ "FILENAME": filename,
545
+ "SCANS": ms1_scan_uid,
546
+ "FID": row["feature_uid"],
547
+ "MSLEVEL": 1 if spec.ms_level is None else spec.ms_level,
548
+ }
549
+
550
+ write_ion(f, d, spec)
551
+ else:
552
+ if selection == "best":
553
+ ms2_scans = row["ms2_scans"][0]
554
+ spec = self.get_spectrum(
555
+ ms2_scans,
556
+ centroid=centroid,
557
+ deisotope=deisotope,
558
+ precursor_trim=precursor_trim,
559
+ centroid_algo=centroid_algo,
560
+ )
561
+ spec = filter_peaks(
562
+ spec,
563
+ inty_min=inty_min,
564
+ q1_min=q1_ratio_min,
565
+ eic_min=eic_corr_min,
566
+ q1_max=q1_ratio_max,
567
+ )
568
+ mslevel = 1 if spec.ms_level is None else spec.ms_level
569
+ activation = None
570
+ energy = None
571
+ kineticenergy = None
572
+ if mslevel > 1:
573
+ if "CID" in filename.upper() or "ZTS" in filename.upper():
574
+ if "EAD" in filename.upper():
575
+ activation = "CID-EAD"
576
+ # search ([0-9]*KE) in filename.upper() using regex
577
+ match = re.search(r"(\d+)KE", filename.upper())
578
+ if match:
579
+ kineticenergy = int(match.group(1))
580
+ else:
581
+ match = re.search(r"(\d+)EV", filename.upper())
582
+ if match:
583
+ kineticenergy = int(match.group(1))
584
+ else:
585
+ activation = "CID"
586
+ elif "EAD" in filename.upper():
587
+ activation = "EAD"
588
+ # search ([0-9]*KE) in filename.upper() using regex
589
+ match = re.search(r"(\d+)KE", filename.upper())
590
+ if match:
591
+ kineticenergy = int(match.group(1))
592
+ else:
593
+ match = re.search(r"(\d+)EV", filename.upper())
594
+ if match:
595
+ kineticenergy = int(match.group(1))
596
+ energy = spec.energy if hasattr(spec, "energy") else None
597
+
598
+ spec = filter_peaks(spec, inty_min=inty_min)
599
+ d = {
600
+ "PEPMASS": row["mz"],
601
+ "RTINSECONDS": row["rt"],
602
+ "IONMODE": "positive" if matchrow["adduct"][-1] == "+" else "negative",
603
+ "CHARGE": "1" + matchrow["adduct"].split("]")[1],
604
+ "NAME": f"{matchrow['name']}",
605
+ "SMILES": matchrow["smiles"],
606
+ "FORMULA": matchrow["formula"],
607
+ "ADDUCT": matchrow["adduct"],
608
+ "LIBID": matchrow["libid"],
609
+ "ACTIVATION": activation,
610
+ "COLLISIONENERGY": energy,
611
+ "KINETICENERGY": kineticenergy,
612
+ "FILENAME": filename,
613
+ "SCANS": ms1_scan_uid,
614
+ "FID": row["feature_uid"],
615
+ "MSLEVEL": 1 if spec.ms_level is None else spec.ms_level,
616
+ }
617
+ write_ion(f, d, spec)
618
+ elif selection == "all":
619
+ if merge:
620
+ specs = []
621
+ for ms2_scans in row["ms2_scans"]:
622
+ specs.append(
623
+ self.get_spectrum(
624
+ ms2_scans,
625
+ centroid=centroid,
626
+ deisotope=deisotope,
627
+ precursor_trim=precursor_trim,
628
+ ),
629
+ )
630
+ spec = spec.merge_peaks(specs)
631
+ if centroid:
632
+ spec = spec.denoise()
633
+ if spec.ms_level == 1:
634
+ spec = spec.centroid(
635
+ tolerance=self.parameters["mz_tol_ms1_da"],
636
+ ppm=self.parameters["mz_tol_ms1_ppm"],
637
+ min_points=self.parameters["centroid_min_points_ms1"],
638
+ algo=centroid_algo,
639
+ )
640
+ elif spec.ms_level == 2:
641
+ spec = spec.centroid(
642
+ tolerance=self.parameters["mz_tol_ms2_da"],
643
+ ppm=self.parameters["mz_tol_ms2_ppm"],
644
+ min_points=self.parameters["centroid_min_points_ms2"],
645
+ algo=centroid_algo,
646
+ )
647
+ if deisotope:
648
+ spec = spec.deisotope()
649
+ spec = filter_peaks(
650
+ spec,
651
+ inty_min=inty_min,
652
+ q1_min=q1_ratio_min,
653
+ eic_min=eic_corr_min,
654
+ q1_max=q1_ratio_max,
655
+ )
656
+ mslevel = 1 if spec.ms_level is None else spec.ms_level
657
+ activation = None
658
+ energy = None
659
+ kineticenergy = None
660
+ if mslevel > 1:
661
+ if "CID" in filename.upper() or "ZTS" in filename.upper():
662
+ if "EAD" in filename.upper():
663
+ activation = "CID-EAD"
664
+ match = re.search(r"(\d+)KE", filename.upper())
665
+ if match:
666
+ kineticenergy = int(match.group(1))
667
+ else:
668
+ match = re.search(r"(\d+)EV", filename.upper())
669
+ if match:
670
+ kineticenergy = int(match.group(1))
671
+ else:
672
+ activation = "CID"
673
+ energy = spec.energy if hasattr(spec, "energy") else None
674
+
675
+ spec = filter_peaks(spec, inty_min=inty_min)
676
+ d = {
677
+ "PEPMASS": row["mz"],
678
+ "RTINSECONDS": row["rt"],
679
+ "IONMODE": "positive" if matchrow["adduct"][-1] == "+" else "negative",
680
+ "CHARGE": "1" + matchrow["adduct"].split("]")[1],
681
+ "NAME": f"{matchrow['name']}",
682
+ "SMILES": matchrow["smiles"],
683
+ "FORMULA": matchrow["formula"],
684
+ "ADDUCT": matchrow["adduct"],
685
+ "LIBID": matchrow["libid"],
686
+ "ACTIVATION": activation,
687
+ "COLLISIONENERGY": energy,
688
+ "KINETICENERGY": kineticenergy,
689
+ "FILENAME": filename,
690
+ "SCANS": ms1_scan_uid,
691
+ "FID": row["feature_uid"],
692
+ "MSLEVEL": 1 if spec.ms_level is None else spec.ms_level,
693
+ }
694
+ write_ion(f, d, spec)
695
+ else:
696
+ for ms2_scans in row["ms2_scans"]:
697
+ spec = self.get_spectrum(
698
+ ms2_scans,
699
+ centroid=centroid,
700
+ deisotope=deisotope,
701
+ precursor_trim=precursor_trim,
702
+ centroid_algo=centroid_algo,
703
+ )
704
+ spec = filter_peaks(
705
+ spec,
706
+ inty_min=inty_min,
707
+ q1_min=q1_ratio_min,
708
+ eic_min=eic_corr_min,
709
+ q1_max=q1_ratio_max,
710
+ )
711
+ mslevel = 1 if spec.ms_level is None else spec.ms_level
712
+ activation = None
713
+ energy = None
714
+ kineticenergy = None
715
+ if mslevel > 1:
716
+ if (
717
+ "CID" in filename.upper() or "ZTS" in filename.upper()
718
+ ) and "EAD" in filename.upper():
719
+ activation = "CID-EAD"
720
+ match = re.search(r"(\d+)KE", filename.upper())
721
+ if match:
722
+ kineticenergy = int(match.group(1))
723
+ else:
724
+ match = re.search(r"(\d+)EV", filename.upper())
725
+ if match:
726
+ kineticenergy = int(match.group(1))
727
+ else:
728
+ activation = "CID"
729
+ energy = spec.energy if hasattr(spec, "energy") else None
730
+
731
+ spec = filter_peaks(spec, inty_min=inty_min)
732
+ d = {
733
+ "PEPMASS": row["mz"],
734
+ "RTINSECONDS": row["rt"],
735
+ "IONMODE": "positive" if matchrow["adduct"][-1] == "+" else "negative",
736
+ "CHARGE": "1" + matchrow["adduct"].split("]")[1],
737
+ "NAME": f"{matchrow['name']}",
738
+ "SMILES": matchrow["smiles"],
739
+ "FORMULA": matchrow["formula"],
740
+ "ADDUCT": matchrow["adduct"],
741
+ "LIBID": matchrow["libid"],
742
+ "ACTIVATION": activation,
743
+ "COLLISIONENERGY": energy,
744
+ "KINETICENERGY": kineticenergy,
745
+ "FILENAME": filename,
746
+ "SCANS": ms1_scan_uid,
747
+ "FID": row["fid"],
748
+ "MSLEVEL": 1 if spec.ms_level is None else spec.ms_level,
749
+ }
750
+ write_ion(f, d, spec)
751
+
752
+ if verbose:
753
+ print(
754
+ f"MGF created with int>{inty_min:.3f}, q1_ratio>{q1_ratio_min:.3f}, eic_corr>{eic_corr_min:.3f}",
755
+ )
756
+ # COMMENT `features` are missing
757
+ # print(
758
+ # f"- Exported {c} MS2 features for {len(features) - skip} precursors. Average peaks/feature is {c / (len(features) - skip + 0.000000001):.0f}"
759
+ # )
760
+ print(
761
+ f"- Skipped {skip} features because no MS2 peaks were left after filtering.",
762
+ )