masster 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/sample/save.py +219 -142
- {masster-0.2.3.dist-info → masster-0.2.5.dist-info}/METADATA +1 -1
- {masster-0.2.3.dist-info → masster-0.2.5.dist-info}/RECORD +7 -7
- {masster-0.2.3.dist-info → masster-0.2.5.dist-info}/WHEEL +0 -0
- {masster-0.2.3.dist-info → masster-0.2.5.dist-info}/entry_points.txt +0 -0
- {masster-0.2.3.dist-info → masster-0.2.5.dist-info}/licenses/LICENSE +0 -0
masster/_version.py
CHANGED
masster/sample/save.py
CHANGED
|
@@ -71,6 +71,23 @@ from masster.spectrum import combine_peaks
|
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
def save(self, filename=None):
|
|
74
|
+
"""
|
|
75
|
+
Save the current object to a file in the '.sample5' format.
|
|
76
|
+
|
|
77
|
+
If `filename` is not provided, the method attempts to use `self.file_path` as the base name,
|
|
78
|
+
replacing its extension with '.sample5'. If neither `filename` nor `self.file_path` is available,
|
|
79
|
+
a ValueError is raised.
|
|
80
|
+
|
|
81
|
+
If `filename` is provided and `self.file_path` is an absolute path, the extension of `filename`
|
|
82
|
+
is replaced with '.sample5'. Otherwise, if `self.file_path` is available, its extension is replaced
|
|
83
|
+
with '.sample5'. If neither is available, a ValueError is raised.
|
|
84
|
+
|
|
85
|
+
Parameters:
|
|
86
|
+
filename (str, optional): The name of the file to save to. If not provided, uses `self.file_path`.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
None
|
|
90
|
+
"""
|
|
74
91
|
if filename is None:
|
|
75
92
|
# save to default file name
|
|
76
93
|
if self.file_path is not None:
|
|
@@ -98,20 +115,43 @@ def _save_featureXML(self, filename="features.featureXML"):
|
|
|
98
115
|
|
|
99
116
|
|
|
100
117
|
def export_features(self, filename="features.csv"):
|
|
101
|
-
|
|
102
|
-
|
|
118
|
+
"""
|
|
119
|
+
Export the features DataFrame to a CSV or Excel file.
|
|
120
|
+
|
|
121
|
+
This method clones the internal features DataFrame, adds a boolean column 'has_ms2' indicating
|
|
122
|
+
whether the 'ms2_scans' column is not null, and exports the resulting DataFrame to the specified file.
|
|
123
|
+
Columns with data types 'List' or 'Object' are excluded from the export.
|
|
124
|
+
|
|
125
|
+
Parameters:
|
|
126
|
+
filename (str): The path to the output file. If the filename ends with '.xls' or '.xlsx',
|
|
127
|
+
the data is exported in Excel format; otherwise, it is exported as CSV.
|
|
128
|
+
Defaults to 'features.csv'.
|
|
129
|
+
|
|
130
|
+
Side Effects:
|
|
131
|
+
Writes the exported data to the specified file and logs the export operation.
|
|
132
|
+
"""
|
|
133
|
+
# clone df
|
|
134
|
+
clean_df = self.features_df.clone()
|
|
135
|
+
filename = os.path.abspath(filename)
|
|
136
|
+
# add a column has_ms2=True if colum ms2_scans is not None
|
|
137
|
+
if "ms2_scans" in clean_df.columns:
|
|
138
|
+
clean_df = clean_df.with_columns(
|
|
139
|
+
(pl.col("ms2_scans").is_not_null()).alias("has_ms2")
|
|
140
|
+
)
|
|
103
141
|
clean_df = self.features_df.select([
|
|
104
|
-
col
|
|
105
|
-
for col in self.features_df.columns
|
|
106
|
-
if self.features_df[col].dtype not in (pl.List, pl.Object)
|
|
142
|
+
col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)
|
|
107
143
|
])
|
|
108
|
-
|
|
109
|
-
|
|
144
|
+
if filename.lower().endswith((".xls", ".xlsx")):
|
|
145
|
+
clean_df.to_pandas().to_excel(filename, index=False)
|
|
146
|
+
self.logger.info(f"Features exported to {filename} (Excel format)")
|
|
147
|
+
else:
|
|
148
|
+
clean_df.write_csv(filename)
|
|
149
|
+
self.logger.info(f"Features exported to {filename}")
|
|
110
150
|
|
|
111
151
|
|
|
112
152
|
def export_mgf(
|
|
113
153
|
self,
|
|
114
|
-
filename:str="features.mgf",
|
|
154
|
+
filename: str = "features.mgf",
|
|
115
155
|
use_cache=True,
|
|
116
156
|
selection="best",
|
|
117
157
|
split_energy=True,
|
|
@@ -128,8 +168,7 @@ def export_mgf(
|
|
|
128
168
|
q1_ratio_max=None,
|
|
129
169
|
eic_corr_min=None,
|
|
130
170
|
deisotope=True,
|
|
131
|
-
|
|
132
|
-
precursor_trim=-(-10.0),
|
|
171
|
+
precursor_trim=10.0,
|
|
133
172
|
centroid_algo=None,
|
|
134
173
|
):
|
|
135
174
|
"""
|
|
@@ -175,14 +214,28 @@ def export_mgf(
|
|
|
175
214
|
return
|
|
176
215
|
else:
|
|
177
216
|
self.features_df = self.features.get_df()
|
|
217
|
+
|
|
218
|
+
# Apply filtering at DataFrame level for better performance
|
|
178
219
|
features = self.features_df
|
|
179
|
-
|
|
220
|
+
if mz_start is not None:
|
|
221
|
+
features = features.filter(pl.col("mz") >= mz_start)
|
|
222
|
+
if mz_end is not None:
|
|
223
|
+
features = features.filter(pl.col("mz") <= mz_end)
|
|
224
|
+
if rt_start is not None:
|
|
225
|
+
features = features.filter(pl.col("rt") >= rt_start)
|
|
226
|
+
if rt_end is not None:
|
|
227
|
+
features = features.filter(pl.col("rt") <= rt_end)
|
|
228
|
+
if not include_all_ms1:
|
|
229
|
+
features = features.filter(pl.col("ms2_scans").is_not_null())
|
|
230
|
+
|
|
231
|
+
# Convert to list of dictionaries for faster iteration
|
|
232
|
+
features_list = features.to_dicts()
|
|
180
233
|
|
|
181
234
|
def filter_peaks(spec, inty_min=None, q1_min=None, eic_min=None, q1_max=None):
|
|
182
235
|
# create a copy of the spectrum
|
|
183
236
|
spec = spec.copy()
|
|
184
|
-
|
|
185
|
-
mask = [True] *
|
|
237
|
+
spec_len = len(spec.mz)
|
|
238
|
+
mask = [True] * spec_len
|
|
186
239
|
if inty_min is not None and inty_min > 0:
|
|
187
240
|
mask = np.array(mask) & (spec.inty >= inty_min)
|
|
188
241
|
# check if q1_ratio is an attribute of spec
|
|
@@ -201,9 +254,9 @@ def export_mgf(
|
|
|
201
254
|
getattr(spec, attr),
|
|
202
255
|
np.ndarray,
|
|
203
256
|
):
|
|
204
|
-
# check if attr has attribute 0 and its length is equal to
|
|
257
|
+
# check if attr has attribute 0 and its length is equal to spec_len:
|
|
205
258
|
if hasattr(getattr(spec, attr), "__len__"):
|
|
206
|
-
if len(getattr(spec, attr)) ==
|
|
259
|
+
if len(getattr(spec, attr)) == spec_len:
|
|
207
260
|
setattr(spec, attr, getattr(spec, attr)[mask])
|
|
208
261
|
return spec
|
|
209
262
|
|
|
@@ -218,47 +271,54 @@ def export_mgf(
|
|
|
218
271
|
else:
|
|
219
272
|
f.write(f"MSLEVEL={spect.ms_level}\n")
|
|
220
273
|
if spect.ms_level is not None:
|
|
221
|
-
if spect.ms_level > 1 and "energy"
|
|
274
|
+
if spect.ms_level > 1 and hasattr(spect, "energy"):
|
|
222
275
|
f.write(f"ENERGY={spect.energy}\n")
|
|
223
|
-
|
|
224
|
-
|
|
276
|
+
# Use list comprehension for better performance
|
|
277
|
+
peak_lines = [f"{mz_val:.5f} {inty_val:.0f}\n" for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)]
|
|
278
|
+
f.writelines(peak_lines)
|
|
225
279
|
f.write("END IONS\n\n")
|
|
226
280
|
|
|
227
281
|
if centroid_algo is None:
|
|
228
|
-
if "centroid_algo"
|
|
229
|
-
centroid_algo = self.parameters
|
|
282
|
+
if hasattr(self.parameters, "centroid_algo"):
|
|
283
|
+
centroid_algo = self.parameters.centroid_algo
|
|
230
284
|
else:
|
|
231
285
|
centroid_algo = "cr"
|
|
232
286
|
|
|
287
|
+
# count how many features have charge < 0
|
|
288
|
+
if self.features_df.filter(pl.col("charge") < 0).shape[0]- self.features_df.filter(pl.col("charge") > 0).shape[0] > 0:
|
|
289
|
+
preferred_charge = -1
|
|
290
|
+
else:
|
|
291
|
+
preferred_charge = 1
|
|
292
|
+
|
|
233
293
|
c = 0
|
|
234
294
|
skip = 0
|
|
235
295
|
# check if features is empty
|
|
236
|
-
if len(
|
|
296
|
+
if len(features_list) == 0:
|
|
237
297
|
self.logger.warning("No features found.")
|
|
238
298
|
return
|
|
299
|
+
filename = os.path.abspath(filename)
|
|
239
300
|
with open(filename, "w", encoding="utf-8") as f:
|
|
240
301
|
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
241
|
-
for
|
|
242
|
-
|
|
243
|
-
total=len(
|
|
302
|
+
for row in tqdm(
|
|
303
|
+
features_list,
|
|
304
|
+
total=len(features_list),
|
|
244
305
|
desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Export MGF",
|
|
245
306
|
disable=tdqm_disable,
|
|
246
307
|
):
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
continue
|
|
308
|
+
# Pre-calculate common values
|
|
309
|
+
feature_uid = row["feature_uid"]
|
|
310
|
+
mz = row["mz"]
|
|
311
|
+
rt = row["rt"]
|
|
312
|
+
rt_str = f"{rt:.2f}"
|
|
313
|
+
mz_str = f"{mz:.4f}"
|
|
314
|
+
|
|
315
|
+
# Filtering is now done at DataFrame level, so we can skip these checks
|
|
256
316
|
if row["ms2_scans"] is None and not include_all_ms1:
|
|
257
317
|
skip = skip + 1
|
|
258
318
|
continue
|
|
259
319
|
|
|
260
320
|
# write MS1 spectrum
|
|
261
|
-
ms1_scan_uid = self.find_closest_scan(rt=
|
|
321
|
+
ms1_scan_uid = self.find_closest_scan(rt=rt)["scan_uid"]
|
|
262
322
|
spect = self.get_spectrum(
|
|
263
323
|
ms1_scan_uid,
|
|
264
324
|
centroid=centroid,
|
|
@@ -271,17 +331,21 @@ def export_mgf(
|
|
|
271
331
|
if not full_ms1:
|
|
272
332
|
# trim spectrum to region around the precursor, it's wide to potentially identify adducts
|
|
273
333
|
spect = spect.trim(
|
|
274
|
-
mz_min=
|
|
275
|
-
mz_max=
|
|
334
|
+
mz_min=mz - 50,
|
|
335
|
+
mz_max=mz + 50,
|
|
276
336
|
)
|
|
277
337
|
|
|
338
|
+
charge = preferred_charge
|
|
339
|
+
if row["charge"] is not None and row["charge"] != 0:
|
|
340
|
+
charge = row["charge"]
|
|
341
|
+
|
|
278
342
|
write_ion(
|
|
279
343
|
f,
|
|
280
|
-
f"
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
344
|
+
f"feature_uid:{feature_uid}, rt:{rt_str}, mz:{mz_str}",
|
|
345
|
+
feature_uid,
|
|
346
|
+
mz,
|
|
347
|
+
rt,
|
|
348
|
+
charge,
|
|
285
349
|
spect,
|
|
286
350
|
)
|
|
287
351
|
|
|
@@ -290,85 +354,98 @@ def export_mgf(
|
|
|
290
354
|
elif use_cache:
|
|
291
355
|
spect = row["ms2_specs"]
|
|
292
356
|
if spect is None:
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
if
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
357
|
+
# No cached spectra, fall through to fetch from scan_uid
|
|
358
|
+
use_cache = False
|
|
359
|
+
else:
|
|
360
|
+
# check if spec is a list of spectra
|
|
361
|
+
if isinstance(spect, list):
|
|
362
|
+
if selection == "best":
|
|
363
|
+
s = spect[0]
|
|
364
|
+
scan_uid = row["ms2_scans"][0]
|
|
365
|
+
s.energy = self.get_spectrum(scan_uid).energy
|
|
366
|
+
spect = [s]
|
|
367
|
+
scan_uids = [scan_uid]
|
|
368
|
+
else:
|
|
369
|
+
scan_uids = row["ms2_scans"]
|
|
370
|
+
|
|
371
|
+
for i, s in enumerate(spect):
|
|
372
|
+
if s is None:
|
|
373
|
+
print(
|
|
374
|
+
f"No MS2 spectrum for feature {feature_uid} is cached.",
|
|
375
|
+
)
|
|
376
|
+
continue
|
|
377
|
+
# check if s is a spectrum
|
|
378
|
+
if type(s).__name__ == "Spectrum":
|
|
379
|
+
s = filter_peaks(
|
|
380
|
+
s,
|
|
381
|
+
inty_min=inty_min,
|
|
382
|
+
q1_min=q1_ratio_min,
|
|
383
|
+
eic_min=eic_corr_min,
|
|
384
|
+
q1_max=q1_ratio_max,
|
|
385
|
+
)
|
|
386
|
+
# Get the corresponding scan_uid from the list
|
|
387
|
+
current_scan_uid = scan_uids[i] if i < len(scan_uids) else "unknown"
|
|
388
|
+
write_ion(
|
|
389
|
+
f,
|
|
390
|
+
f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{current_scan_uid}",
|
|
391
|
+
feature_uid,
|
|
392
|
+
mz,
|
|
393
|
+
rt,
|
|
394
|
+
charge,
|
|
395
|
+
s,
|
|
396
|
+
)
|
|
397
|
+
c += 1
|
|
398
|
+
continue # Skip the rest of the processing for this feature
|
|
399
|
+
|
|
400
|
+
# If we reach here, either use_cache=False or no cached spectra were available
|
|
401
|
+
if split_energy:
|
|
402
|
+
# get energy of all scans with scan_uid in ms2_scans by fetching them
|
|
403
|
+
ms2_scan_uids = row["ms2_scans"]
|
|
404
|
+
if isinstance(ms2_scan_uids, list) and len(ms2_scan_uids) > 0:
|
|
405
|
+
# Fetch spectra to get energy information
|
|
406
|
+
spectra_with_energy = []
|
|
407
|
+
for scan_uid in ms2_scan_uids:
|
|
408
|
+
spec = self.get_spectrum(scan_uid)
|
|
409
|
+
if spec is not None:
|
|
410
|
+
spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec, 'energy') else 0))
|
|
411
|
+
|
|
412
|
+
# Group by energy
|
|
413
|
+
energy_groups: dict[float, list[int]] = {}
|
|
414
|
+
for scan_uid, energy in spectra_with_energy:
|
|
415
|
+
if energy not in energy_groups:
|
|
416
|
+
energy_groups[energy] = []
|
|
417
|
+
energy_groups[energy].append(scan_uid)
|
|
418
|
+
|
|
419
|
+
for energy, scan_uids_for_energy in energy_groups.items():
|
|
420
|
+
if selection == "best":
|
|
421
|
+
# Keep only the first scan for this energy
|
|
422
|
+
scan_uids_for_energy = [scan_uids_for_energy[0]]
|
|
423
|
+
|
|
424
|
+
for scan_uid in scan_uids_for_energy:
|
|
425
|
+
spect = self.get_spectrum(
|
|
426
|
+
scan_uid,
|
|
427
|
+
centroid=centroid,
|
|
428
|
+
deisotope=deisotope,
|
|
429
|
+
precursor_trim=precursor_trim,
|
|
430
|
+
centroid_algo=centroid_algo,
|
|
310
431
|
)
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
if type(s).__name__ == "spec":
|
|
314
|
-
s = filter_peaks(
|
|
315
|
-
s,
|
|
432
|
+
spect = filter_peaks(
|
|
433
|
+
spect,
|
|
316
434
|
inty_min=inty_min,
|
|
317
435
|
q1_min=q1_ratio_min,
|
|
318
436
|
eic_min=eic_corr_min,
|
|
319
437
|
q1_max=q1_ratio_max,
|
|
320
438
|
)
|
|
321
|
-
# Get the corresponding scan_uid from the list
|
|
322
|
-
current_scan_uid = (
|
|
323
|
-
scan_uids[i] if i < len(scan_uids) else "unknown"
|
|
324
|
-
)
|
|
325
439
|
write_ion(
|
|
326
440
|
f,
|
|
327
|
-
f"fid:{
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
441
|
+
f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{scan_uid}, energy:{energy}",
|
|
442
|
+
feature_uid,
|
|
443
|
+
mz,
|
|
444
|
+
rt,
|
|
445
|
+
charge,
|
|
446
|
+
spect,
|
|
333
447
|
)
|
|
334
|
-
|
|
335
|
-
# get energy of all scans with scan_uid in ms2_scans
|
|
336
|
-
energy = [s.energy for s in row["ms2_specs"]]
|
|
337
|
-
# find unique energies
|
|
338
|
-
unique_energies = list(set(energy))
|
|
339
|
-
for e in unique_energies:
|
|
340
|
-
ms2_scans = [
|
|
341
|
-
row["ms2_scans"][i]
|
|
342
|
-
for i, s in enumerate(row["ms2_specs"])
|
|
343
|
-
if s.energy == e
|
|
344
|
-
]
|
|
345
|
-
if selection == "best":
|
|
346
|
-
# Keep as list with single element
|
|
347
|
-
ms2_scans = [ms2_scans[0]]
|
|
348
|
-
for scan_uid in ms2_scans:
|
|
349
|
-
spect = self.get_spectrum(
|
|
350
|
-
scan_uid,
|
|
351
|
-
centroid=centroid,
|
|
352
|
-
deisotope=deisotope,
|
|
353
|
-
precursor_trim=precursor_trim,
|
|
354
|
-
centroid_algo=centroid_algo,
|
|
355
|
-
)
|
|
356
|
-
spect = filter_peaks(
|
|
357
|
-
spect,
|
|
358
|
-
inty_min=inty_min,
|
|
359
|
-
q1_min=q1_ratio_min,
|
|
360
|
-
eic_min=eic_corr_min,
|
|
361
|
-
q1_max=q1_ratio_max,
|
|
362
|
-
)
|
|
363
|
-
write_ion(
|
|
364
|
-
f,
|
|
365
|
-
f"fid:{row['feature_uid']}, rt:{row['rt']:.2f}, mz:{row['mz']:.4f}, scan_uid:{scan_uid}, energy:{e}",
|
|
366
|
-
row["feature_uid"],
|
|
367
|
-
row["mz"],
|
|
368
|
-
row["rt"],
|
|
369
|
-
row["charge"],
|
|
370
|
-
spect,
|
|
371
|
-
)
|
|
448
|
+
c += 1
|
|
372
449
|
else:
|
|
373
450
|
if selection == "best":
|
|
374
451
|
ms2_scans = row["ms2_scans"][0]
|
|
@@ -388,13 +465,14 @@ def export_mgf(
|
|
|
388
465
|
)
|
|
389
466
|
write_ion(
|
|
390
467
|
f,
|
|
391
|
-
f"fid:{
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
468
|
+
f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{ms2_scans}",
|
|
469
|
+
feature_uid,
|
|
470
|
+
mz,
|
|
471
|
+
rt,
|
|
472
|
+
charge,
|
|
396
473
|
spect,
|
|
397
474
|
)
|
|
475
|
+
c += 1
|
|
398
476
|
elif selection == "all":
|
|
399
477
|
if merge:
|
|
400
478
|
specs = []
|
|
@@ -414,23 +492,19 @@ def export_mgf(
|
|
|
414
492
|
spect = spect.centroid(
|
|
415
493
|
tolerance=self.parameters["mz_tol_ms1_da"],
|
|
416
494
|
ppm=self.parameters["mz_tol_ms1_ppm"],
|
|
417
|
-
min_points=self.parameters[
|
|
418
|
-
"centroid_min_points_ms1"
|
|
419
|
-
],
|
|
495
|
+
min_points=self.parameters["centroid_min_points_ms1"],
|
|
420
496
|
algo=centroid_algo,
|
|
421
497
|
)
|
|
422
498
|
elif spect.ms_level == 2:
|
|
423
499
|
spect = spect.centroid(
|
|
424
500
|
tolerance=self.parameters["mz_tol_ms2_da"],
|
|
425
501
|
ppm=self.parameters["mz_tol_ms2_ppm"],
|
|
426
|
-
min_points=self.parameters[
|
|
427
|
-
"centroid_min_points_ms2"
|
|
428
|
-
],
|
|
502
|
+
min_points=self.parameters["centroid_min_points_ms2"],
|
|
429
503
|
algo=centroid_algo,
|
|
430
504
|
)
|
|
431
505
|
if deisotope:
|
|
432
506
|
spect = spect.deisotope()
|
|
433
|
-
title = f"fid:{
|
|
507
|
+
title = f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, merged"
|
|
434
508
|
spect = filter_peaks(
|
|
435
509
|
spect,
|
|
436
510
|
inty_min=inty_min,
|
|
@@ -441,12 +515,13 @@ def export_mgf(
|
|
|
441
515
|
write_ion(
|
|
442
516
|
f,
|
|
443
517
|
title,
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
518
|
+
feature_uid,
|
|
519
|
+
mz,
|
|
520
|
+
rt,
|
|
521
|
+
charge,
|
|
448
522
|
spect,
|
|
449
523
|
)
|
|
524
|
+
c += 1
|
|
450
525
|
else:
|
|
451
526
|
for ms2_scans in row["ms2_scans"]:
|
|
452
527
|
spect = self.get_spectrum(
|
|
@@ -465,24 +540,30 @@ def export_mgf(
|
|
|
465
540
|
)
|
|
466
541
|
write_ion(
|
|
467
542
|
f,
|
|
468
|
-
f"fid:{
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
543
|
+
f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{ms2_scans}",
|
|
544
|
+
feature_uid,
|
|
545
|
+
mz,
|
|
546
|
+
rt,
|
|
547
|
+
charge,
|
|
473
548
|
spect,
|
|
474
549
|
)
|
|
550
|
+
c += 1
|
|
475
551
|
|
|
476
|
-
self.logger.info(f"Exported {c
|
|
552
|
+
self.logger.info(f"Exported {c} features to {filename}")
|
|
477
553
|
|
|
554
|
+
# Handle None values in logging
|
|
555
|
+
inty_min_str = f"{inty_min:.3f}" if inty_min != float("-inf") else "None"
|
|
556
|
+
q1_ratio_min_str = f"{q1_ratio_min:.3f}" if q1_ratio_min is not None else "None"
|
|
557
|
+
eic_corr_min_str = f"{eic_corr_min:.3f}" if eic_corr_min is not None else "None"
|
|
558
|
+
|
|
478
559
|
self.logger.debug(
|
|
479
|
-
f"MGF created with int>{
|
|
560
|
+
f"MGF created with int>{inty_min_str}, q1_ratio>{q1_ratio_min_str}, eic_corr>{eic_corr_min_str}",
|
|
480
561
|
)
|
|
481
562
|
self.logger.debug(
|
|
482
|
-
f"- Exported {c} MS2
|
|
563
|
+
f"- Exported {c} MS2 spectra for {len(features_list) - skip} precursors. Average spectra/feature is {c / (len(features_list) - skip + 0.000000001):.0f}",
|
|
483
564
|
)
|
|
484
565
|
self.logger.debug(
|
|
485
|
-
f"- Skipped {skip} features because no MS2
|
|
566
|
+
f"- Skipped {skip} features because no MS2 scans were available.",
|
|
486
567
|
)
|
|
487
568
|
|
|
488
569
|
|
|
@@ -510,9 +591,7 @@ def export_dda_stats(self, filename="stats.csv"):
|
|
|
510
591
|
ms2_count = len(self.scans_df.filter(pl.col("ms_level") == 2))
|
|
511
592
|
features_count = len(self.features_df) if self.features_df is not None else 0
|
|
512
593
|
features_with_ms2 = (
|
|
513
|
-
self.features_df.filter(pl.col("ms2_scans").is_not_null()).height
|
|
514
|
-
if self.features_df is not None
|
|
515
|
-
else 0
|
|
594
|
+
self.features_df.filter(pl.col("ms2_scans").is_not_null()).height if self.features_df is not None else 0
|
|
516
595
|
)
|
|
517
596
|
|
|
518
597
|
# Initialize a dictionary to hold statistics
|
|
@@ -527,9 +606,7 @@ def export_dda_stats(self, filename="stats.csv"):
|
|
|
527
606
|
if "time_cycle" in self.scans_df.columns:
|
|
528
607
|
ms1_df = self.scans_df.filter(pl.col("ms_level") == 1)
|
|
529
608
|
avg_cycle_time = ms1_df["time_cycle"].mean()
|
|
530
|
-
stats["Average_cycle_time"] =
|
|
531
|
-
avg_cycle_time if avg_cycle_time is not None else ""
|
|
532
|
-
)
|
|
609
|
+
stats["Average_cycle_time"] = avg_cycle_time if avg_cycle_time is not None else ""
|
|
533
610
|
else:
|
|
534
611
|
stats["Average_cycle_time"] = 0
|
|
535
612
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
masster/__init__.py,sha256=xeh-hwR_2umE0CpRXn8t22wbkt4IT-FBEzeJknL8J6c,670
|
|
2
|
-
masster/_version.py,sha256
|
|
2
|
+
masster/_version.py,sha256=XwisdBCXGK6Rlum0rwaZ6ZBJxS8eMw72i3m-LXgksaY,239
|
|
3
3
|
masster/chromatogram.py,sha256=f25rMrNvCQN0A93wp9QPdG3H4FiOlYPbRY3H4yd7Q5Y,18910
|
|
4
4
|
masster/logger.py,sha256=9uzuVEPwQkVlnsqT_eVvh33FZY_FIm3Wn2TaJcGhZP8,10674
|
|
5
5
|
masster/spectrum.py,sha256=XiClDcN1uiG-_2TIr7Bqp7x8gWvHPbC5oh3zUu3fr6Y,46789
|
|
@@ -17,7 +17,7 @@ masster/sample/plot.py,sha256=7iyBddUa8-4OAcNhbgNUR_mNJgh5KKLwIlIL0s14g9w,58110
|
|
|
17
17
|
masster/sample/processing.py,sha256=-kWNWTYpyqlD2adF5uQ3laGZ9Zg8h79RgL5DY6qEoxM,56972
|
|
18
18
|
masster/sample/sample.py,sha256=uTsLmm7aWvRZO1mIGD_9sYDlKO3Ws74aLp7fIeLn9Eo,15282
|
|
19
19
|
masster/sample/sample5_schema.json,sha256=pnQMOM4z0P6BdrXGOovVoVZwt-1gS-v8KkhTgbSH8R8,3405
|
|
20
|
-
masster/sample/save.py,sha256=
|
|
20
|
+
masster/sample/save.py,sha256=LCWYYhlq8K98eUB6EZIGep_RLujQ39ayqgbPHPuNYec,31198
|
|
21
21
|
masster/sample/defaults/__init__.py,sha256=aNVdfpiJnyQQ9Jm5KCaJm-ySsi6S4aPEWFglVdKYnag,432
|
|
22
22
|
masster/sample/defaults/find_adducts_def.py,sha256=dhk_F-cAd1lc39mmC7Xt5sZF20LmLugR8JS2hu0DHYE,11305
|
|
23
23
|
masster/sample/defaults/find_features_def.py,sha256=LoIc2qDSiw9tsSyDJyjn6I3GSbatdkuzYY_14QkTFxQ,13512
|
|
@@ -43,8 +43,8 @@ masster/study/defaults/find_consensus_def.py,sha256=artvErq4w07SfHB0WHi68ZjxGg0X
|
|
|
43
43
|
masster/study/defaults/find_ms2_def.py,sha256=k-GmnCKgQuVO6M-EAjzGOqgdFrqZviRaNAdiFmwVujY,4907
|
|
44
44
|
masster/study/defaults/integrate_chrom_def.py,sha256=FY9QdJpdWe18sYucrwNKoZYY0eoOo0a_hcdkZHm_W00,7107
|
|
45
45
|
masster/study/defaults/study_def.py,sha256=SzUzd2YTGDGCHNMR-Dw57j5PprEnPhpITonv7wx6HQA,9035
|
|
46
|
-
masster-0.2.
|
|
47
|
-
masster-0.2.
|
|
48
|
-
masster-0.2.
|
|
49
|
-
masster-0.2.
|
|
50
|
-
masster-0.2.
|
|
46
|
+
masster-0.2.5.dist-info/METADATA,sha256=TtRzNj321SMax7E0IiDAUPd-En2lmQST8nEqSwTr_mw,44324
|
|
47
|
+
masster-0.2.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
48
|
+
masster-0.2.5.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
49
|
+
masster-0.2.5.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
50
|
+
masster-0.2.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|