masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

@@ -253,8 +253,7 @@ def get_spectrum(self, scan, **kwargs):
253
253
  spec=spect,
254
254
  scan_uid=scan_uid,
255
255
  feature_uid=scan_info["feature_uid"][0]
256
- if "feature_uid" in scan_info
257
- and scan_info["feature_uid"][0] is not None
256
+ if "feature_uid" in scan_info and scan_info["feature_uid"][0] is not None
258
257
  else feature_uid,
259
258
  q1_step=2,
260
259
  deisotope=deisotope,
@@ -447,9 +446,7 @@ def _spec_to_mat(
447
446
  closest_index = np.argmin(np.abs(ar2 - val1))
448
447
  closest_indices.append((i, closest_index))
449
448
  # filter out pairs that are not within the specified tolerance
450
- closest_indices = [
451
- (i, j) for i, j in closest_indices if np.abs(ar1[i] - ar2[j]) <= tol
452
- ]
449
+ closest_indices = [(i, j) for i, j in closest_indices if np.abs(ar1[i] - ar2[j]) <= tol]
453
450
  # remove duplicates from the list of indices
454
451
  closest_indices = list(set(closest_indices))
455
452
  # sort the list of indices by the first element (i) in ascending order
@@ -621,8 +618,7 @@ def find_features(self, **kwargs):
621
618
  mtd_par.setValue("noise_threshold_int", float(params.get("noise")))
622
619
  mtd_par.setValue(
623
620
  "min_trace_length",
624
- float(params.get("min_trace_length_multiplier"))
625
- * float(params.get("chrom_fwhm_min")),
621
+ float(params.get("min_trace_length_multiplier")) * float(params.get("chrom_fwhm_min")),
626
622
  )
627
623
  mtd_par.setValue(
628
624
  "trace_termination_outliers",
@@ -801,7 +797,7 @@ def find_features(self, **kwargs):
801
797
  )
802
798
 
803
799
  self.features_df = df
804
- #self._features_sync()
800
+ # self._features_sync()
805
801
  self.logger.success(f"Feature detection completed. Total features: {len(df)}")
806
802
 
807
803
  # store params
@@ -1134,9 +1130,7 @@ def find_ms2(self, **kwargs):
1134
1130
  feature_rt_start = features_subset.select("rt_start").to_numpy().flatten()
1135
1131
  feature_rt_end = features_subset.select("rt_end").to_numpy().flatten()
1136
1132
  feature_uids = features_subset.select("feature_uid").to_numpy().flatten()
1137
- feature_indices = (
1138
- features_subset.with_row_index().select("index").to_numpy().flatten()
1139
- )
1133
+ feature_indices = features_subset.with_row_index().select("index").to_numpy().flatten()
1140
1134
 
1141
1135
  # Pre-compute RT radius for all features
1142
1136
  rt_radius = np.minimum(feature_rt - feature_rt_start, feature_rt_end - feature_rt)
@@ -1283,16 +1277,16 @@ def find_ms2(self, **kwargs):
1283
1277
 
1284
1278
  def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
1285
1279
  """Extract isotopic distributions from MS1 data and add to features_df.
1286
-
1280
+
1287
1281
  This method processes each feature to find isotopic distributions from MS1 data,
1288
1282
  similar to the study.find_iso() method but for individual samples. The method
1289
1283
  adds a new 'ms1_spec' column to features_df containing numpy arrays with
1290
1284
  isotopic distribution data.
1291
-
1285
+
1292
1286
  Args:
1293
1287
  rt_tolerance (float): RT tolerance in minutes for matching MS1 scans. Default 0.1.
1294
1288
  **kwargs: Additional parameters
1295
-
1289
+
1296
1290
  Notes:
1297
1291
  - Adds a new 'ms1_spec' column to features_df containing numpy arrays
1298
1292
  - Each array contains [mz, intensity] pairs for the isotopic distribution
@@ -1302,11 +1296,11 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
1302
1296
  if self.features_df is None or self.features_df.is_empty():
1303
1297
  self.logger.warning("No features found. Run find_features() first.")
1304
1298
  return
1305
-
1299
+
1306
1300
  if self.ms1_df is None or self.ms1_df.is_empty():
1307
1301
  self.logger.warning("No MS1 data found.")
1308
1302
  return
1309
-
1303
+
1310
1304
  # Check if ms1_spec column already exists
1311
1305
  if "ms1_spec" in self.features_df.columns:
1312
1306
  features_without_spec = self.features_df.filter(pl.col("ms1_spec").is_null())
@@ -1316,9 +1310,7 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
1316
1310
  self.logger.info(f"Processing {len(features_without_spec)} features without isotopic distributions.")
1317
1311
  else:
1318
1312
  # Add the ms1_spec column with None values
1319
- self.features_df = self.features_df.with_columns(
1320
- pl.lit(None, dtype=pl.Object).alias("ms1_spec")
1321
- )
1313
+ self.features_df = self.features_df.with_columns(pl.lit(None, dtype=pl.Object).alias("ms1_spec"))
1322
1314
  features_without_spec = self.features_df
1323
1315
  self.logger.info(f"Processing {len(features_without_spec)} features for isotopic distributions.")
1324
1316
 
@@ -1336,60 +1328,59 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
1336
1328
  6.02010,
1337
1329
  7.02345,
1338
1330
  ])
1339
-
1331
+
1340
1332
  # Convert rt_tolerance from minutes to seconds
1341
1333
  rt_tolerance_s = rt_tolerance * 60
1342
-
1334
+
1343
1335
  # Process each feature
1344
1336
  ms1_specs = []
1345
1337
  feature_indices = []
1346
-
1347
- for i, row in enumerate(tqdm(
1348
- features_without_spec.rows(named=True),
1349
- desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Extracting isotope patterns"
1350
- )):
1338
+
1339
+ for i, row in enumerate(
1340
+ tqdm(
1341
+ features_without_spec.rows(named=True),
1342
+ desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Extracting isotope patterns",
1343
+ )
1344
+ ):
1351
1345
  feature_rt = row["rt"]
1352
1346
  feature_mz = row["mz"]
1353
-
1347
+
1354
1348
  # Find MS1 scans within RT tolerance
1355
- rt_mask = (
1356
- (self.ms1_df["rt"] >= (feature_rt - rt_tolerance_s)) &
1357
- (self.ms1_df["rt"] <= (feature_rt + rt_tolerance_s))
1349
+ rt_mask = (self.ms1_df["rt"] >= (feature_rt - rt_tolerance_s)) & (
1350
+ self.ms1_df["rt"] <= (feature_rt + rt_tolerance_s)
1358
1351
  )
1359
1352
  ms1_in_range = self.ms1_df.filter(rt_mask)
1360
-
1353
+
1361
1354
  if ms1_in_range.is_empty():
1362
1355
  ms1_specs.append(None)
1363
1356
  feature_indices.append(row["feature_uid"])
1364
1357
  continue
1365
-
1358
+
1366
1359
  # Extract isotopic pattern
1367
1360
  isotope_pattern = []
1368
-
1361
+
1369
1362
  # Start with the monoisotopic peak (M+0)
1370
1363
  base_intensity = 0
1371
1364
  mz_tolerance = 0.01 # 10 ppm at 1000 Da
1372
-
1365
+
1373
1366
  # Find the base peak intensity
1374
- base_mask = (
1375
- (ms1_in_range["mz"] >= (feature_mz - mz_tolerance)) &
1376
- (ms1_in_range["mz"] <= (feature_mz + mz_tolerance))
1367
+ base_mask = (ms1_in_range["mz"] >= (feature_mz - mz_tolerance)) & (
1368
+ ms1_in_range["mz"] <= (feature_mz + mz_tolerance)
1377
1369
  )
1378
1370
  base_peaks = ms1_in_range.filter(base_mask)
1379
-
1371
+
1380
1372
  if not base_peaks.is_empty():
1381
1373
  base_intensity = base_peaks["inty"].max()
1382
1374
  isotope_pattern.append([feature_mz, base_intensity])
1383
-
1375
+
1384
1376
  # Look for isotope peaks
1385
1377
  for shift in isotope_shifts:
1386
1378
  isotope_mz = feature_mz + shift
1387
- isotope_mask = (
1388
- (ms1_in_range["mz"] >= (isotope_mz - mz_tolerance)) &
1389
- (ms1_in_range["mz"] <= (isotope_mz + mz_tolerance))
1379
+ isotope_mask = (ms1_in_range["mz"] >= (isotope_mz - mz_tolerance)) & (
1380
+ ms1_in_range["mz"] <= (isotope_mz + mz_tolerance)
1390
1381
  )
1391
1382
  isotope_peaks = ms1_in_range.filter(isotope_mask)
1392
-
1383
+
1393
1384
  if not isotope_peaks.is_empty():
1394
1385
  max_intensity = isotope_peaks["inty"].max()
1395
1386
  # Only keep isotope peaks that are at least 1% of base peak
@@ -1397,29 +1388,25 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
1397
1388
  # Get the mz of the most intense peak
1398
1389
  max_peak = isotope_peaks.filter(pl.col("inty") == max_intensity).row(0, named=True)
1399
1390
  isotope_pattern.append([max_peak["mz"], max_intensity])
1400
-
1391
+
1401
1392
  # Convert to numpy array or None if empty
1402
1393
  if len(isotope_pattern) > 1: # Need at least 2 points (monoisotopic + 1 isotope)
1403
1394
  ms1_spec = np.array(isotope_pattern, dtype=np.float64)
1404
1395
  else:
1405
1396
  ms1_spec = None
1406
-
1397
+
1407
1398
  ms1_specs.append(ms1_spec)
1408
1399
  feature_indices.append(row["feature_uid"])
1409
-
1400
+
1410
1401
  # Update the features_df with the isotopic spectra
1411
1402
  update_df = pl.DataFrame({
1412
1403
  "feature_uid": feature_indices,
1413
- "ms1_spec_new": pl.Series("ms1_spec_new", ms1_specs, dtype=pl.Object)
1404
+ "ms1_spec_new": pl.Series("ms1_spec_new", ms1_specs, dtype=pl.Object),
1414
1405
  })
1415
-
1406
+
1416
1407
  # Join and update
1417
1408
  self.features_df = (
1418
- self.features_df.join(
1419
- update_df,
1420
- on="feature_uid",
1421
- how="left"
1422
- )
1409
+ self.features_df.join(update_df, on="feature_uid", how="left")
1423
1410
  .with_columns([
1424
1411
  pl.when(pl.col("ms1_spec_new").is_not_null())
1425
1412
  .then(pl.col("ms1_spec_new"))
@@ -1428,11 +1415,11 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
1428
1415
  ])
1429
1416
  .drop("ms1_spec_new")
1430
1417
  )
1431
-
1418
+
1432
1419
  # Log results
1433
1420
  non_null_count = len([spec for spec in ms1_specs if spec is not None])
1434
1421
  self.logger.success(f"Extracted isotopic distributions for {non_null_count}/{len(ms1_specs)} features.")
1435
-
1422
+
1436
1423
  # Store parameters in history
1437
1424
  params_dict = {"rt_tolerance": rt_tolerance}
1438
1425
  params_dict.update(kwargs)
masster/sample/sample.py CHANGED
@@ -1,8 +1,8 @@
1
1
  """
2
2
  sample.py - Mass Spectrometry Sample Analysis Module
3
3
 
4
- This module provides comprehensive tools for processing and analyzing Data-Dependent Acquisition (DDA)
5
- mass spectrometry data. It defines the `Sample` class, which offers methods to load, process, analyze,
4
+ This module provides comprehensive tools for processing and analyzing Data-Dependent Acquisition (DDA)
5
+ mass spectrometry data. It defines the `Sample` class, which offers methods to load, process, analyze,
6
6
  and visualize mass spectrometry data from various file formats.
7
7
 
8
8
  Supported File Formats:
@@ -31,7 +31,7 @@ Core Dependencies:
31
31
  - `h5py`: HDF5 file format support for Sample5 files
32
32
 
33
33
  Classes:
34
- Sample: Main class for handling DDA mass spectrometry data, providing methods for
34
+ Sample: Main class for handling DDA mass spectrometry data, providing methods for
35
35
  data import, processing, analysis, and visualization.
36
36
 
37
37
  Typical Workflow:
@@ -43,43 +43,43 @@ Typical Workflow:
43
43
 
44
44
  Example Usage:
45
45
  Basic analysis workflow:
46
-
46
+
47
47
  ```python
48
48
  from masster.sample import Sample
49
-
49
+
50
50
  # Load a mass spectrometry file
51
51
  sample = Sample(filename="experiment.mzML")
52
-
52
+
53
53
  # Detect features
54
54
  sample.find_features()
55
-
55
+
56
56
  # Find MS2 spectra for features
57
57
  sample.find_ms2()
58
-
58
+
59
59
  # Generate 2D visualization
60
60
  sample.plot_2d()
61
-
61
+
62
62
  # Export results
63
63
  sample.export_features("features.xlsx")
64
64
  ```
65
-
65
+
66
66
  Advanced usage with custom parameters:
67
-
67
+
68
68
  ```python
69
69
  from masster.sample import Sample
70
70
  from masster.sample.defaults import sample_defaults, find_features_defaults
71
-
71
+
72
72
  # Create custom parameters
73
73
  params = sample_defaults(log_level="DEBUG", label="My Experiment")
74
74
  ff_params = find_features_defaults(noise_threshold_int=1000)
75
-
75
+
76
76
  # Initialize with custom parameters
77
77
  sample = Sample(params=params)
78
78
  sample.load("data.raw")
79
-
79
+
80
80
  # Feature detection with custom parameters
81
81
  sample.find_features(params=ff_params)
82
-
82
+
83
83
  # Generate comprehensive statistics
84
84
  stats = sample.get_dda_stats()
85
85
  sample.plot_dda_stats()
@@ -275,7 +275,7 @@ class Sample:
275
275
  save = save
276
276
  find_features = find_features
277
277
  find_adducts = find_adducts
278
- _get_adducts= _get_adducts
278
+ _get_adducts = _get_adducts
279
279
  find_iso = find_iso
280
280
  find_ms2 = find_ms2
281
281
  get_spectrum = get_spectrum
@@ -348,45 +348,44 @@ class Sample:
348
348
 
349
349
  def __dir__(self):
350
350
  """
351
- Custom __dir__ implementation to hide internal methods starting with '_'
352
- and backward compatibility aliases from tab completion and dir() calls,
351
+ Custom __dir__ implementation to hide internal methods starting with '_'
352
+ and backward compatibility aliases from tab completion and dir() calls,
353
353
  while keeping them accessible to class methods.
354
-
354
+
355
355
  Returns:
356
356
  list: List of public attribute and method names (excluding internal and deprecated methods)
357
357
  """
358
358
  # Define backward compatibility aliases to hide
359
359
  backward_compatibility_aliases = {
360
- 'load_study', # deprecated alias for _load_ms1
361
- 'filter_features', # alias for filter (deprecated naming)
362
- 'select_features', # alias for select (deprecated naming)
363
- 'features_filter', # confusing duplicate of filter
364
- 'features_select', # confusing duplicate of select
365
- 'merge_defaults', # alias for find_features_defaults (confusing)
366
- 'plot_feature_stats', # backward compatibility for plot_features_stats
367
- 'store_history', # deprecated alias for update_history
360
+ "load_study", # deprecated alias for _load_ms1
361
+ "filter_features", # alias for filter (deprecated naming)
362
+ "select_features", # alias for select (deprecated naming)
363
+ "features_filter", # confusing duplicate of filter
364
+ "features_select", # confusing duplicate of select
365
+ "merge_defaults", # alias for find_features_defaults (confusing)
366
+ "plot_feature_stats", # backward compatibility for plot_features_stats
367
+ "store_history", # deprecated alias for update_history
368
368
  }
369
-
369
+
370
370
  # Get all attributes from the class
371
371
  all_attrs = set()
372
-
372
+
373
373
  # Add attributes from the class and all its bases
374
374
  for cls in self.__class__.__mro__:
375
375
  all_attrs.update(cls.__dict__.keys())
376
-
376
+
377
377
  # Add instance attributes
378
378
  all_attrs.update(self.__dict__.keys())
379
-
379
+
380
380
  # Filter out attributes starting with '_' (but keep special methods like __init__, __str__, etc.)
381
381
  # Also filter out backward compatibility aliases
382
382
  public_attrs = [
383
- attr for attr in all_attrs
384
- if not attr.startswith('_') or attr.startswith('__') and attr.endswith('__')
383
+ attr for attr in all_attrs if not attr.startswith("_") or attr.startswith("__") and attr.endswith("__")
385
384
  ]
386
-
385
+
387
386
  # Remove backward compatibility aliases from the public attributes
388
387
  public_attrs = [attr for attr in public_attrs if attr not in backward_compatibility_aliases]
389
-
388
+
390
389
  return sorted(public_attrs)
391
390
 
392
391
  def logger_update(
@@ -442,10 +441,7 @@ class Sample:
442
441
 
443
442
  # Get all currently loaded modules that are part of the sample package
444
443
  for module_name in sys.modules:
445
- if (
446
- module_name.startswith(sample_module_prefix)
447
- and module_name != current_module
448
- ):
444
+ if module_name.startswith(sample_module_prefix) and module_name != current_module:
449
445
  sample_modules.append(module_name)
450
446
 
451
447
  # Add core masster modules
@@ -461,15 +457,10 @@ class Sample:
461
457
  study_modules = []
462
458
  study_module_prefix = f"{base_modname}.study."
463
459
  for module_name in sys.modules:
464
- if (
465
- module_name.startswith(study_module_prefix)
466
- and module_name != current_module
467
- ):
460
+ if module_name.startswith(study_module_prefix) and module_name != current_module:
468
461
  study_modules.append(module_name)
469
462
 
470
- all_modules_to_reload = (
471
- core_modules + sample_modules + study_modules
472
- )
463
+ all_modules_to_reload = core_modules + sample_modules + study_modules
473
464
 
474
465
  # Reload all discovered modules
475
466
  for full_module_name in all_modules_to_reload:
masster/sample/save.py CHANGED
@@ -105,7 +105,8 @@ def save(self, filename=None):
105
105
  self._save_sample5(filename=filename)
106
106
  self.file_path = filename
107
107
 
108
- '''
108
+
109
+ """
109
110
  def _save_featureXML(self, filename="features.featureXML"):
110
111
  if self._oms_features_map is None:
111
112
  self.logger.warning("No features found.")
@@ -114,7 +115,9 @@ def _save_featureXML(self, filename="features.featureXML"):
114
115
  fh.store(filename, self._oms_features_map)
115
116
  self.logger.debug(f"Features Map saved to {filename}")
116
117
 
117
- '''
118
+ """
119
+
120
+
118
121
  def export_features(self, filename="features.csv"):
119
122
  """
120
123
  Export the features DataFrame to a CSV or Excel file.
@@ -140,11 +143,7 @@ def export_features(self, filename="features.csv"):
140
143
  (pl.col("ms2_scans").is_not_null()).alias("has_ms2"),
141
144
  )
142
145
  clean_df = self.features_df.select(
143
- [
144
- col
145
- for col in self.features_df.columns
146
- if self.features_df[col].dtype not in (pl.List, pl.Object)
147
- ],
146
+ [col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)],
148
147
  )
149
148
  if filename.lower().endswith((".xls", ".xlsx")):
150
149
  clean_df.to_pandas().to_excel(filename, index=False)
@@ -231,7 +230,7 @@ def export_mgf(
231
230
  if rt_end is not None:
232
231
  features = features.filter(pl.col("rt") <= rt_end)
233
232
  # Note: We no longer filter out features without MS2 data here since we want to export
234
- # MS1 spectra for ALL features with isotope data. The MS2 filtering is done in the
233
+ # MS1 spectra for ALL features with isotope data. The MS2 filtering is done in the
235
234
  # second pass where we specifically check for ms2_scans.
236
235
 
237
236
  # Convert to list of dictionaries for faster iteration
@@ -269,26 +268,26 @@ def export_mgf(
269
268
  def write_ion(f, title, fuid, fid, mz, rt, charge, spect):
270
269
  if spect is None:
271
270
  return "none"
272
-
271
+
273
272
  # For MSLEVEL=2 ions, don't write empty spectra
274
273
  ms_level = spect.ms_level if spect.ms_level is not None else 1
275
274
  if ms_level > 1 and (len(spect.mz) == 0 or len(spect.inty) == 0):
276
275
  return "empty_ms2"
277
-
276
+
278
277
  # Create dynamic title based on MS level
279
278
  if ms_level == 1:
280
279
  # MS1: uid, rt, mz
281
280
  dynamic_title = f"uid:{fuid}, rt:{rt:.2f}, mz:{mz:.4f}"
282
281
  else:
283
282
  # MS2: uid, rt, mz, energy
284
- energy = spect.energy if hasattr(spect, 'energy') else 0
283
+ energy = spect.energy if hasattr(spect, "energy") else 0
285
284
  dynamic_title = f"uid:{fuid}, rt:{rt:.2f}, mz:{mz:.4f}, energy:{energy}"
286
-
285
+
287
286
  f.write(f"BEGIN IONS\nTITLE={dynamic_title}\n")
288
287
  f.write(f"FEATURE_UID={fuid}\n")
289
288
  f.write(f"FEATURE_ID={fid}\n")
290
289
  f.write(f"CHARGE={charge}\nPEPMASS={mz}\nRTINSECONDS={rt}\n")
291
-
290
+
292
291
  if spect.ms_level is None:
293
292
  f.write("MSLEVEL=1\n")
294
293
  # Add PRECURSORINTENSITY for MS1 spectra
@@ -301,15 +300,12 @@ def export_mgf(
301
300
  if spect.ms_level == 1 and len(spect.inty) > 0:
302
301
  precursor_intensity = max(spect.inty)
303
302
  f.write(f"PRECURSORINTENSITY={precursor_intensity:.0f}\n")
304
-
303
+
305
304
  if spect.ms_level is not None:
306
305
  if spect.ms_level > 1 and hasattr(spect, "energy"):
307
306
  f.write(f"ENERGY={spect.energy}\n")
308
307
  # Use list comprehension for better performance
309
- peak_lines = [
310
- f"{mz_val:.5f} {inty_val:.0f}\n"
311
- for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)
312
- ]
308
+ peak_lines = [f"{mz_val:.5f} {inty_val:.0f}\n" for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)]
313
309
  f.writelines(peak_lines)
314
310
  f.write("END IONS\n\n")
315
311
  return "written"
@@ -322,8 +318,7 @@ def export_mgf(
322
318
 
323
319
  # count how many features have charge < 0
324
320
  if (
325
- self.features_df.filter(pl.col("charge") < 0).shape[0]
326
- - self.features_df.filter(pl.col("charge") > 0).shape[0]
321
+ self.features_df.filter(pl.col("charge") < 0).shape[0] - self.features_df.filter(pl.col("charge") > 0).shape[0]
327
322
  > 0
328
323
  ):
329
324
  preferred_charge = -1
@@ -342,7 +337,7 @@ def export_mgf(
342
337
  filename = os.path.abspath(filename)
343
338
  with open(filename, "w", encoding="utf-8") as f:
344
339
  tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
345
-
340
+
346
341
  # First pass: Export MS1 spectra for ALL features with ms1_spec data
347
342
  for row in tqdm(
348
343
  features_list,
@@ -362,19 +357,15 @@ def export_mgf(
362
357
  if "ms1_spec" in row and row["ms1_spec"] is not None:
363
358
  # Create spectrum from ms1_spec isotope pattern data
364
359
  from masster.spectrum import Spectrum
365
-
360
+
366
361
  iso_data = row["ms1_spec"]
367
362
  if len(iso_data) >= 2: # Ensure we have mz and intensity arrays
368
363
  ms1_mz = iso_data[0]
369
364
  ms1_inty = iso_data[1]
370
-
365
+
371
366
  # Create a Spectrum object from the isotope data
372
- spect = Spectrum(
373
- mz=np.array(ms1_mz),
374
- inty=np.array(ms1_inty),
375
- ms_level=1
376
- )
377
-
367
+ spect = Spectrum(mz=np.array(ms1_mz), inty=np.array(ms1_inty), ms_level=1)
368
+
378
369
  charge = preferred_charge
379
370
  if row["charge"] is not None and row["charge"] != 0:
380
371
  charge = row["charge"]
@@ -395,7 +386,7 @@ def export_mgf(
395
386
  else:
396
387
  # No MS1 spectrum exported for features without ms1_spec data
397
388
  ms1_fallback_count += 1
398
-
389
+
399
390
  # Second pass: Export MS2 spectra for features with MS2 data
400
391
  for row in tqdm(
401
392
  features_list,
@@ -453,9 +444,7 @@ def export_mgf(
453
444
  q1_max=q1_ratio_max,
454
445
  )
455
446
  # Get the corresponding scan_uid from the list
456
- current_scan_uid = (
457
- scan_uids[i] if i < len(scan_uids) else "unknown"
458
- )
447
+ current_scan_uid = scan_uids[i] if i < len(scan_uids) else "unknown"
459
448
  result = write_ion(
460
449
  f,
461
450
  f"uid:{feature_uid}",
@@ -580,18 +569,14 @@ def export_mgf(
580
569
  spect = spect.centroid(
581
570
  tolerance=self.parameters["mz_tol_ms1_da"],
582
571
  ppm=self.parameters["mz_tol_ms1_ppm"],
583
- min_points=self.parameters[
584
- "centroid_min_points_ms1"
585
- ],
572
+ min_points=self.parameters["centroid_min_points_ms1"],
586
573
  algo=centroid_algo,
587
574
  )
588
575
  elif spect.ms_level == 2:
589
576
  spect = spect.centroid(
590
577
  tolerance=self.parameters["mz_tol_ms2_da"],
591
578
  ppm=self.parameters["mz_tol_ms2_ppm"],
592
- min_points=self.parameters[
593
- "centroid_min_points_ms2"
594
- ],
579
+ min_points=self.parameters["centroid_min_points_ms2"],
595
580
  algo=centroid_algo,
596
581
  )
597
582
  if deisotope:
@@ -654,7 +639,7 @@ def export_mgf(
654
639
  self.logger.info(f"Skipped {empty_ms2_count} empty MS2 spectra")
655
640
  if ms1_fallback_count > 0:
656
641
  self.logger.info(f"Skipped MS1 export for {ms1_fallback_count} features without isotope patterns")
657
-
642
+
658
643
  # Handle None values in logging
659
644
  inty_min_str = f"{inty_min:.3f}" if inty_min != float("-inf") else "None"
660
645
  q1_ratio_min_str = f"{q1_ratio_min:.3f}" if q1_ratio_min is not None else "None"
@@ -695,9 +680,7 @@ def export_dda_stats(self, filename="stats.csv"):
695
680
  ms2_count = len(self.scans_df.filter(pl.col("ms_level") == 2))
696
681
  features_count = len(self.features_df) if self.features_df is not None else 0
697
682
  features_with_ms2 = (
698
- self.features_df.filter(pl.col("ms2_scans").is_not_null()).height
699
- if self.features_df is not None
700
- else 0
683
+ self.features_df.filter(pl.col("ms2_scans").is_not_null()).height if self.features_df is not None else 0
701
684
  )
702
685
 
703
686
  # Initialize a dictionary to hold statistics
@@ -712,9 +695,7 @@ def export_dda_stats(self, filename="stats.csv"):
712
695
  if "time_cycle" in self.scans_df.columns:
713
696
  ms1_df = self.scans_df.filter(pl.col("ms_level") == 1)
714
697
  avg_cycle_time = ms1_df["time_cycle"].mean()
715
- stats["Average_cycle_time"] = (
716
- avg_cycle_time if avg_cycle_time is not None else ""
717
- )
698
+ stats["Average_cycle_time"] = avg_cycle_time if avg_cycle_time is not None else ""
718
699
  else:
719
700
  stats["Average_cycle_time"] = 0
720
701
 
@@ -851,32 +832,27 @@ def export_xlsx(self, filename="features.xlsx"):
851
832
  return
852
833
 
853
834
  # Validate filename extension
854
- if not filename.lower().endswith(('.xlsx', '.xls')):
835
+ if not filename.lower().endswith((".xlsx", ".xls")):
855
836
  raise ValueError("Filename must end with '.xlsx' or '.xls' for Excel export")
856
-
837
+
857
838
  filename = os.path.abspath(filename)
858
-
839
+
859
840
  # Clone the DataFrame to avoid modifying the original
860
841
  clean_df = self.features_df.clone()
861
-
842
+
862
843
  # Add a column has_ms2=True if column ms2_scans is not None
863
844
  if "ms2_scans" in clean_df.columns:
864
- clean_df = clean_df.with_columns(
865
- (pl.col("ms2_scans").is_not_null()).alias("has_ms2")
866
- )
867
-
845
+ clean_df = clean_df.with_columns((pl.col("ms2_scans").is_not_null()).alias("has_ms2"))
846
+
868
847
  # Filter out columns with List or Object data types that can't be exported to Excel
869
- exportable_columns = [
870
- col for col in clean_df.columns
871
- if clean_df[col].dtype not in (pl.List, pl.Object)
872
- ]
873
-
848
+ exportable_columns = [col for col in clean_df.columns if clean_df[col].dtype not in (pl.List, pl.Object)]
849
+
874
850
  clean_df = clean_df.select(exportable_columns)
875
-
851
+
876
852
  # Convert to pandas and export to Excel
877
853
  pandas_df = clean_df.to_pandas()
878
854
  pandas_df.to_excel(filename, index=False)
879
-
855
+
880
856
  self.logger.success(f"Features exported to {filename} (Excel format)")
881
857
  self.logger.debug(f"Exported {len(clean_df)} features with {len(exportable_columns)} columns")
882
858