masster 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- {masster-0.2.3 → masster-0.2.4}/PKG-INFO +1 -1
- {masster-0.2.3 → masster-0.2.4}/pyproject.toml +1 -1
- {masster-0.2.3 → masster-0.2.4}/src/masster/_version.py +1 -1
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/save.py +149 -89
- {masster-0.2.3 → masster-0.2.4}/uv.lock +1 -1
- {masster-0.2.3 → masster-0.2.4}/.github/workflows/publish.yml +0 -0
- {masster-0.2.3 → masster-0.2.4}/.github/workflows/security.yml +0 -0
- {masster-0.2.3 → masster-0.2.4}/.github/workflows/test.yml +0 -0
- {masster-0.2.3 → masster-0.2.4}/.gitignore +0 -0
- {masster-0.2.3 → masster-0.2.4}/.pre-commit-config.yaml +0 -0
- {masster-0.2.3 → masster-0.2.4}/LICENSE +0 -0
- {masster-0.2.3 → masster-0.2.4}/Makefile +0 -0
- {masster-0.2.3 → masster-0.2.4}/README.md +0 -0
- {masster-0.2.3 → masster-0.2.4}/TESTING.md +0 -0
- {masster-0.2.3 → masster-0.2.4}/demo/example_batch_process.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/demo/example_sample_process.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/__init__.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/chromatogram.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/logger.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/__init__.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/defaults/__init__.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/defaults/find_adducts_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/defaults/find_features_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/defaults/find_ms2_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/defaults/sample_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/h5.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/helpers.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/load.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/parameters.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/plot.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/processing.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/sample.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/sample/sample5_schema.json +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/spectrum.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/__init__.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/defaults/__init__.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/defaults/align_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/defaults/export_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/defaults/fill_chrom_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/defaults/find_consensus_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/defaults/find_ms2_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/defaults/study_def.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/export.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/h5.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/helpers.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/load.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/parameters.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/plot.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/processing.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/save.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/study.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/src/masster/study/study5_schema.json +0 -0
- {masster-0.2.3 → masster-0.2.4}/tests/conftest.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/tests/test_chromatogram.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/tests/test_defaults.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/tests/test_imports.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/tests/test_integration.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/tests/test_logger.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/tests/test_parameters.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/tests/test_sample.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/tests/test_spectrum.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/tests/test_study.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/tests/test_version.py +0 -0
- {masster-0.2.3 → masster-0.2.4}/tox.ini +0 -0
|
@@ -71,6 +71,23 @@ from masster.spectrum import combine_peaks
|
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
def save(self, filename=None):
|
|
74
|
+
"""
|
|
75
|
+
Save the current object to a file in the '.sample5' format.
|
|
76
|
+
|
|
77
|
+
If `filename` is not provided, the method attempts to use `self.file_path` as the base name,
|
|
78
|
+
replacing its extension with '.sample5'. If neither `filename` nor `self.file_path` is available,
|
|
79
|
+
a ValueError is raised.
|
|
80
|
+
|
|
81
|
+
If `filename` is provided and `self.file_path` is an absolute path, the extension of `filename`
|
|
82
|
+
is replaced with '.sample5'. Otherwise, if `self.file_path` is available, its extension is replaced
|
|
83
|
+
with '.sample5'. If neither is available, a ValueError is raised.
|
|
84
|
+
|
|
85
|
+
Parameters:
|
|
86
|
+
filename (str, optional): The name of the file to save to. If not provided, uses `self.file_path`.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
None
|
|
90
|
+
"""
|
|
74
91
|
if filename is None:
|
|
75
92
|
# save to default file name
|
|
76
93
|
if self.file_path is not None:
|
|
@@ -98,20 +115,43 @@ def _save_featureXML(self, filename="features.featureXML"):
|
|
|
98
115
|
|
|
99
116
|
|
|
100
117
|
def export_features(self, filename="features.csv"):
|
|
101
|
-
|
|
102
|
-
|
|
118
|
+
"""
|
|
119
|
+
Export the features DataFrame to a CSV or Excel file.
|
|
120
|
+
|
|
121
|
+
This method clones the internal features DataFrame, adds a boolean column 'has_ms2' indicating
|
|
122
|
+
whether the 'ms2_scans' column is not null, and exports the resulting DataFrame to the specified file.
|
|
123
|
+
Columns with data types 'List' or 'Object' are excluded from the export.
|
|
124
|
+
|
|
125
|
+
Parameters:
|
|
126
|
+
filename (str): The path to the output file. If the filename ends with '.xls' or '.xlsx',
|
|
127
|
+
the data is exported in Excel format; otherwise, it is exported as CSV.
|
|
128
|
+
Defaults to 'features.csv'.
|
|
129
|
+
|
|
130
|
+
Side Effects:
|
|
131
|
+
Writes the exported data to the specified file and logs the export operation.
|
|
132
|
+
"""
|
|
133
|
+
# clone df
|
|
134
|
+
clean_df = self.features_df.clone()
|
|
135
|
+
filename = os.path.abspath(filename)
|
|
136
|
+
# add a column has_ms2=True if colum ms2_scans is not None
|
|
137
|
+
if "ms2_scans" in clean_df.columns:
|
|
138
|
+
clean_df = clean_df.with_columns(
|
|
139
|
+
(pl.col("ms2_scans").is_not_null()).alias("has_ms2")
|
|
140
|
+
)
|
|
103
141
|
clean_df = self.features_df.select([
|
|
104
|
-
col
|
|
105
|
-
for col in self.features_df.columns
|
|
106
|
-
if self.features_df[col].dtype not in (pl.List, pl.Object)
|
|
142
|
+
col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)
|
|
107
143
|
])
|
|
108
|
-
|
|
109
|
-
|
|
144
|
+
if filename.lower().endswith((".xls", ".xlsx")):
|
|
145
|
+
clean_df.to_pandas().to_excel(filename, index=False)
|
|
146
|
+
self.logger.info(f"Features exported to {filename} (Excel format)")
|
|
147
|
+
else:
|
|
148
|
+
clean_df.write_csv(filename)
|
|
149
|
+
self.logger.info(f"Features exported to {filename}")
|
|
110
150
|
|
|
111
151
|
|
|
112
152
|
def export_mgf(
|
|
113
153
|
self,
|
|
114
|
-
filename:str="features.mgf",
|
|
154
|
+
filename: str = "features.mgf",
|
|
115
155
|
use_cache=True,
|
|
116
156
|
selection="best",
|
|
117
157
|
split_energy=True,
|
|
@@ -128,7 +168,6 @@ def export_mgf(
|
|
|
128
168
|
q1_ratio_max=None,
|
|
129
169
|
eic_corr_min=None,
|
|
130
170
|
deisotope=True,
|
|
131
|
-
verbose=False,
|
|
132
171
|
precursor_trim=-(-10.0),
|
|
133
172
|
centroid_algo=None,
|
|
134
173
|
):
|
|
@@ -175,14 +214,28 @@ def export_mgf(
|
|
|
175
214
|
return
|
|
176
215
|
else:
|
|
177
216
|
self.features_df = self.features.get_df()
|
|
217
|
+
|
|
218
|
+
# Apply filtering at DataFrame level for better performance
|
|
178
219
|
features = self.features_df
|
|
179
|
-
|
|
220
|
+
if mz_start is not None:
|
|
221
|
+
features = features.filter(pl.col("mz") >= mz_start)
|
|
222
|
+
if mz_end is not None:
|
|
223
|
+
features = features.filter(pl.col("mz") <= mz_end)
|
|
224
|
+
if rt_start is not None:
|
|
225
|
+
features = features.filter(pl.col("rt") >= rt_start)
|
|
226
|
+
if rt_end is not None:
|
|
227
|
+
features = features.filter(pl.col("rt") <= rt_end)
|
|
228
|
+
if not include_all_ms1:
|
|
229
|
+
features = features.filter(pl.col("ms2_scans").is_not_null())
|
|
230
|
+
|
|
231
|
+
# Convert to list of dictionaries for faster iteration
|
|
232
|
+
features_list = features.to_dicts()
|
|
180
233
|
|
|
181
234
|
def filter_peaks(spec, inty_min=None, q1_min=None, eic_min=None, q1_max=None):
|
|
182
235
|
# create a copy of the spectrum
|
|
183
236
|
spec = spec.copy()
|
|
184
|
-
|
|
185
|
-
mask = [True] *
|
|
237
|
+
spec_len = len(spec.mz)
|
|
238
|
+
mask = [True] * spec_len
|
|
186
239
|
if inty_min is not None and inty_min > 0:
|
|
187
240
|
mask = np.array(mask) & (spec.inty >= inty_min)
|
|
188
241
|
# check if q1_ratio is an attribute of spec
|
|
@@ -201,9 +254,9 @@ def export_mgf(
|
|
|
201
254
|
getattr(spec, attr),
|
|
202
255
|
np.ndarray,
|
|
203
256
|
):
|
|
204
|
-
# check if attr has attribute 0 and its length is equal to
|
|
257
|
+
# check if attr has attribute 0 and its length is equal to spec_len:
|
|
205
258
|
if hasattr(getattr(spec, attr), "__len__"):
|
|
206
|
-
if len(getattr(spec, attr)) ==
|
|
259
|
+
if len(getattr(spec, attr)) == spec_len:
|
|
207
260
|
setattr(spec, attr, getattr(spec, attr)[mask])
|
|
208
261
|
return spec
|
|
209
262
|
|
|
@@ -218,47 +271,54 @@ def export_mgf(
|
|
|
218
271
|
else:
|
|
219
272
|
f.write(f"MSLEVEL={spect.ms_level}\n")
|
|
220
273
|
if spect.ms_level is not None:
|
|
221
|
-
if spect.ms_level > 1 and "energy"
|
|
274
|
+
if spect.ms_level > 1 and hasattr(spect, "energy"):
|
|
222
275
|
f.write(f"ENERGY={spect.energy}\n")
|
|
223
|
-
|
|
224
|
-
|
|
276
|
+
# Use list comprehension for better performance
|
|
277
|
+
peak_lines = [f"{mz_val:.5f} {inty_val:.0f}\n" for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)]
|
|
278
|
+
f.writelines(peak_lines)
|
|
225
279
|
f.write("END IONS\n\n")
|
|
226
280
|
|
|
227
281
|
if centroid_algo is None:
|
|
228
|
-
if "centroid_algo"
|
|
229
|
-
centroid_algo = self.parameters
|
|
282
|
+
if hasattr(self.parameters, "centroid_algo"):
|
|
283
|
+
centroid_algo = self.parameters.centroid_algo
|
|
230
284
|
else:
|
|
231
285
|
centroid_algo = "cr"
|
|
232
286
|
|
|
287
|
+
# count how many features have charge < 0
|
|
288
|
+
if self.features_df.filter(pl.col("charge") < 0).shape[0]- self.features_df.filter(pl.col("charge") > 0).shape[0] > 0:
|
|
289
|
+
preferred_charge = -1
|
|
290
|
+
else:
|
|
291
|
+
preferred_charge = 1
|
|
292
|
+
|
|
233
293
|
c = 0
|
|
234
294
|
skip = 0
|
|
235
295
|
# check if features is empty
|
|
236
|
-
if len(
|
|
296
|
+
if len(features_list) == 0:
|
|
237
297
|
self.logger.warning("No features found.")
|
|
238
298
|
return
|
|
299
|
+
filename = os.path.abspath(filename)
|
|
239
300
|
with open(filename, "w", encoding="utf-8") as f:
|
|
240
301
|
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
241
|
-
for
|
|
242
|
-
|
|
243
|
-
total=len(
|
|
302
|
+
for row in tqdm(
|
|
303
|
+
features_list,
|
|
304
|
+
total=len(features_list),
|
|
244
305
|
desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Export MGF",
|
|
245
306
|
disable=tdqm_disable,
|
|
246
307
|
):
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
continue
|
|
308
|
+
# Pre-calculate common values
|
|
309
|
+
feature_uid = row["feature_uid"]
|
|
310
|
+
mz = row["mz"]
|
|
311
|
+
rt = row["rt"]
|
|
312
|
+
rt_str = f"{rt:.2f}"
|
|
313
|
+
mz_str = f"{mz:.4f}"
|
|
314
|
+
|
|
315
|
+
# Filtering is now done at DataFrame level, so we can skip these checks
|
|
256
316
|
if row["ms2_scans"] is None and not include_all_ms1:
|
|
257
317
|
skip = skip + 1
|
|
258
318
|
continue
|
|
259
319
|
|
|
260
320
|
# write MS1 spectrum
|
|
261
|
-
ms1_scan_uid = self.find_closest_scan(rt=
|
|
321
|
+
ms1_scan_uid = self.find_closest_scan(rt=rt)["scan_uid"]
|
|
262
322
|
spect = self.get_spectrum(
|
|
263
323
|
ms1_scan_uid,
|
|
264
324
|
centroid=centroid,
|
|
@@ -271,17 +331,21 @@ def export_mgf(
|
|
|
271
331
|
if not full_ms1:
|
|
272
332
|
# trim spectrum to region around the precursor, it's wide to potentially identify adducts
|
|
273
333
|
spect = spect.trim(
|
|
274
|
-
mz_min=
|
|
275
|
-
mz_max=
|
|
334
|
+
mz_min=mz - 50,
|
|
335
|
+
mz_max=mz + 50,
|
|
276
336
|
)
|
|
277
337
|
|
|
338
|
+
charge = preferred_charge
|
|
339
|
+
if row["charge"] is not None and row["charge"] != 0:
|
|
340
|
+
charge = row["charge"]
|
|
341
|
+
|
|
278
342
|
write_ion(
|
|
279
343
|
f,
|
|
280
|
-
f"
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
344
|
+
f"feature_uid:{feature_uid}, rt:{rt_str}, mz:{mz_str}",
|
|
345
|
+
feature_uid,
|
|
346
|
+
mz,
|
|
347
|
+
rt,
|
|
348
|
+
charge,
|
|
285
349
|
spect,
|
|
286
350
|
)
|
|
287
351
|
|
|
@@ -319,29 +383,24 @@ def export_mgf(
|
|
|
319
383
|
q1_max=q1_ratio_max,
|
|
320
384
|
)
|
|
321
385
|
# Get the corresponding scan_uid from the list
|
|
322
|
-
current_scan_uid = (
|
|
323
|
-
scan_uids[i] if i < len(scan_uids) else "unknown"
|
|
324
|
-
)
|
|
386
|
+
current_scan_uid = scan_uids[i] if i < len(scan_uids) else "unknown"
|
|
325
387
|
write_ion(
|
|
326
388
|
f,
|
|
327
|
-
f"fid:{
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
389
|
+
f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{current_scan_uid}",
|
|
390
|
+
feature_uid,
|
|
391
|
+
mz,
|
|
392
|
+
rt,
|
|
393
|
+
charge,
|
|
332
394
|
s,
|
|
333
395
|
)
|
|
396
|
+
c += 1
|
|
334
397
|
elif split_energy:
|
|
335
398
|
# get energy of all scans with scan_uid in ms2_scans
|
|
336
399
|
energy = [s.energy for s in row["ms2_specs"]]
|
|
337
400
|
# find unique energies
|
|
338
401
|
unique_energies = list(set(energy))
|
|
339
402
|
for e in unique_energies:
|
|
340
|
-
ms2_scans = [
|
|
341
|
-
row["ms2_scans"][i]
|
|
342
|
-
for i, s in enumerate(row["ms2_specs"])
|
|
343
|
-
if s.energy == e
|
|
344
|
-
]
|
|
403
|
+
ms2_scans = [row["ms2_scans"][i] for i, s in enumerate(row["ms2_specs"]) if s.energy == e]
|
|
345
404
|
if selection == "best":
|
|
346
405
|
# Keep as list with single element
|
|
347
406
|
ms2_scans = [ms2_scans[0]]
|
|
@@ -362,13 +421,14 @@ def export_mgf(
|
|
|
362
421
|
)
|
|
363
422
|
write_ion(
|
|
364
423
|
f,
|
|
365
|
-
f"fid:{
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
424
|
+
f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{scan_uid}, energy:{e}",
|
|
425
|
+
feature_uid,
|
|
426
|
+
mz,
|
|
427
|
+
rt,
|
|
428
|
+
charge,
|
|
370
429
|
spect,
|
|
371
430
|
)
|
|
431
|
+
c += 1
|
|
372
432
|
else:
|
|
373
433
|
if selection == "best":
|
|
374
434
|
ms2_scans = row["ms2_scans"][0]
|
|
@@ -388,13 +448,14 @@ def export_mgf(
|
|
|
388
448
|
)
|
|
389
449
|
write_ion(
|
|
390
450
|
f,
|
|
391
|
-
f"fid:{
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
451
|
+
f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{ms2_scans}",
|
|
452
|
+
feature_uid,
|
|
453
|
+
mz,
|
|
454
|
+
rt,
|
|
455
|
+
charge,
|
|
396
456
|
spect,
|
|
397
457
|
)
|
|
458
|
+
c += 1
|
|
398
459
|
elif selection == "all":
|
|
399
460
|
if merge:
|
|
400
461
|
specs = []
|
|
@@ -414,23 +475,19 @@ def export_mgf(
|
|
|
414
475
|
spect = spect.centroid(
|
|
415
476
|
tolerance=self.parameters["mz_tol_ms1_da"],
|
|
416
477
|
ppm=self.parameters["mz_tol_ms1_ppm"],
|
|
417
|
-
min_points=self.parameters[
|
|
418
|
-
"centroid_min_points_ms1"
|
|
419
|
-
],
|
|
478
|
+
min_points=self.parameters["centroid_min_points_ms1"],
|
|
420
479
|
algo=centroid_algo,
|
|
421
480
|
)
|
|
422
481
|
elif spect.ms_level == 2:
|
|
423
482
|
spect = spect.centroid(
|
|
424
483
|
tolerance=self.parameters["mz_tol_ms2_da"],
|
|
425
484
|
ppm=self.parameters["mz_tol_ms2_ppm"],
|
|
426
|
-
min_points=self.parameters[
|
|
427
|
-
"centroid_min_points_ms2"
|
|
428
|
-
],
|
|
485
|
+
min_points=self.parameters["centroid_min_points_ms2"],
|
|
429
486
|
algo=centroid_algo,
|
|
430
487
|
)
|
|
431
488
|
if deisotope:
|
|
432
489
|
spect = spect.deisotope()
|
|
433
|
-
title = f"fid:{
|
|
490
|
+
title = f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, merged"
|
|
434
491
|
spect = filter_peaks(
|
|
435
492
|
spect,
|
|
436
493
|
inty_min=inty_min,
|
|
@@ -441,12 +498,13 @@ def export_mgf(
|
|
|
441
498
|
write_ion(
|
|
442
499
|
f,
|
|
443
500
|
title,
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
501
|
+
feature_uid,
|
|
502
|
+
mz,
|
|
503
|
+
rt,
|
|
504
|
+
charge,
|
|
448
505
|
spect,
|
|
449
506
|
)
|
|
507
|
+
c += 1
|
|
450
508
|
else:
|
|
451
509
|
for ms2_scans in row["ms2_scans"]:
|
|
452
510
|
spect = self.get_spectrum(
|
|
@@ -465,24 +523,30 @@ def export_mgf(
|
|
|
465
523
|
)
|
|
466
524
|
write_ion(
|
|
467
525
|
f,
|
|
468
|
-
f"fid:{
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
526
|
+
f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{ms2_scans}",
|
|
527
|
+
feature_uid,
|
|
528
|
+
mz,
|
|
529
|
+
rt,
|
|
530
|
+
charge,
|
|
473
531
|
spect,
|
|
474
532
|
)
|
|
533
|
+
c += 1
|
|
475
534
|
|
|
476
|
-
self.logger.info(f"Exported {c
|
|
535
|
+
self.logger.info(f"Exported {c} features to {filename}")
|
|
477
536
|
|
|
537
|
+
# Handle None values in logging
|
|
538
|
+
inty_min_str = f"{inty_min:.3f}" if inty_min != float("-inf") else "None"
|
|
539
|
+
q1_ratio_min_str = f"{q1_ratio_min:.3f}" if q1_ratio_min is not None else "None"
|
|
540
|
+
eic_corr_min_str = f"{eic_corr_min:.3f}" if eic_corr_min is not None else "None"
|
|
541
|
+
|
|
478
542
|
self.logger.debug(
|
|
479
|
-
f"MGF created with int>{
|
|
543
|
+
f"MGF created with int>{inty_min_str}, q1_ratio>{q1_ratio_min_str}, eic_corr>{eic_corr_min_str}",
|
|
480
544
|
)
|
|
481
545
|
self.logger.debug(
|
|
482
|
-
f"- Exported {c} MS2
|
|
546
|
+
f"- Exported {c} MS2 spectra for {len(features_list) - skip} precursors. Average spectra/feature is {c / (len(features_list) - skip + 0.000000001):.0f}",
|
|
483
547
|
)
|
|
484
548
|
self.logger.debug(
|
|
485
|
-
f"- Skipped {skip} features because no MS2
|
|
549
|
+
f"- Skipped {skip} features because no MS2 scans were available.",
|
|
486
550
|
)
|
|
487
551
|
|
|
488
552
|
|
|
@@ -510,9 +574,7 @@ def export_dda_stats(self, filename="stats.csv"):
|
|
|
510
574
|
ms2_count = len(self.scans_df.filter(pl.col("ms_level") == 2))
|
|
511
575
|
features_count = len(self.features_df) if self.features_df is not None else 0
|
|
512
576
|
features_with_ms2 = (
|
|
513
|
-
self.features_df.filter(pl.col("ms2_scans").is_not_null()).height
|
|
514
|
-
if self.features_df is not None
|
|
515
|
-
else 0
|
|
577
|
+
self.features_df.filter(pl.col("ms2_scans").is_not_null()).height if self.features_df is not None else 0
|
|
516
578
|
)
|
|
517
579
|
|
|
518
580
|
# Initialize a dictionary to hold statistics
|
|
@@ -527,9 +589,7 @@ def export_dda_stats(self, filename="stats.csv"):
|
|
|
527
589
|
if "time_cycle" in self.scans_df.columns:
|
|
528
590
|
ms1_df = self.scans_df.filter(pl.col("ms_level") == 1)
|
|
529
591
|
avg_cycle_time = ms1_df["time_cycle"].mean()
|
|
530
|
-
stats["Average_cycle_time"] =
|
|
531
|
-
avg_cycle_time if avg_cycle_time is not None else ""
|
|
532
|
-
)
|
|
592
|
+
stats["Average_cycle_time"] = avg_cycle_time if avg_cycle_time is not None else ""
|
|
533
593
|
else:
|
|
534
594
|
stats["Average_cycle_time"] = 0
|
|
535
595
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|