masster 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. masster/__init__.py +8 -8
  2. masster/_version.py +1 -1
  3. masster/chromatogram.py +3 -9
  4. masster/data/libs/README.md +1 -1
  5. masster/data/libs/ccm.csv +120 -120
  6. masster/data/libs/ccm.py +116 -62
  7. masster/data/libs/central_carbon_README.md +1 -1
  8. masster/data/libs/urine.py +161 -65
  9. masster/data/libs/urine_metabolites.csv +4693 -4693
  10. masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +2 -2
  11. masster/logger.py +43 -78
  12. masster/sample/__init__.py +1 -1
  13. masster/sample/adducts.py +264 -338
  14. masster/sample/defaults/find_adducts_def.py +8 -21
  15. masster/sample/defaults/find_features_def.py +1 -6
  16. masster/sample/defaults/get_spectrum_def.py +1 -5
  17. masster/sample/defaults/sample_def.py +1 -5
  18. masster/sample/h5.py +282 -561
  19. masster/sample/helpers.py +75 -131
  20. masster/sample/lib.py +17 -42
  21. masster/sample/load.py +17 -31
  22. masster/sample/parameters.py +2 -6
  23. masster/sample/plot.py +27 -88
  24. masster/sample/processing.py +87 -117
  25. masster/sample/quant.py +51 -57
  26. masster/sample/sample.py +90 -103
  27. masster/sample/sample5_schema.json +44 -44
  28. masster/sample/save.py +12 -35
  29. masster/sample/sciex.py +19 -66
  30. masster/spectrum.py +20 -58
  31. masster/study/__init__.py +1 -1
  32. masster/study/defaults/align_def.py +1 -5
  33. masster/study/defaults/fill_chrom_def.py +1 -5
  34. masster/study/defaults/fill_def.py +1 -5
  35. masster/study/defaults/integrate_chrom_def.py +1 -5
  36. masster/study/defaults/integrate_def.py +1 -5
  37. masster/study/defaults/study_def.py +25 -58
  38. masster/study/export.py +207 -233
  39. masster/study/h5.py +136 -470
  40. masster/study/helpers.py +202 -495
  41. masster/study/helpers_optimized.py +13 -40
  42. masster/study/id.py +110 -213
  43. masster/study/load.py +143 -230
  44. masster/study/plot.py +257 -518
  45. masster/study/processing.py +257 -469
  46. masster/study/save.py +5 -15
  47. masster/study/study.py +276 -379
  48. masster/study/study5_schema.json +96 -96
  49. {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/METADATA +1 -1
  50. masster-0.4.1.dist-info/RECORD +67 -0
  51. masster-0.4.0.dist-info/RECORD +0 -67
  52. {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/WHEEL +0 -0
  53. {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/entry_points.txt +0 -0
  54. {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/licenses/LICENSE +0 -0
masster/sample/sciex.py CHANGED
@@ -262,9 +262,7 @@ class SciexWiff2FileReader:
262
262
  self._alpharaw_reader.import_raw(self.filename)
263
263
 
264
264
  # Extract basic information (SciexWiffData doesn't have sample_names property)
265
- self.sample_names = [
266
- "Sample_0",
267
- ] # Default since WIFF2 format needs investigation
265
+ self.sample_names = ["Sample_0"] # Default since WIFF2 format needs investigation
268
266
  self.sample_count = 1
269
267
  self.initialization_method = "alpharaw_SciexWiffData"
270
268
 
@@ -356,9 +354,7 @@ class SciexWiff2FileReader:
356
354
  # Get metadata from alpharaw reader
357
355
  try:
358
356
  if hasattr(self._alpharaw_reader, "get_spectrum_count"):
359
- metadata["total_spectra"] = (
360
- self._alpharaw_reader.get_spectrum_count()
361
- )
357
+ metadata["total_spectra"] = self._alpharaw_reader.get_spectrum_count()
362
358
 
363
359
  # Add alpharaw-specific metadata
364
360
  for attr in ["creation_time", "instrument_model", "ms_levels"]:
@@ -388,9 +384,7 @@ class SciexWiff2FileReader:
388
384
  if hasattr(sample, "Details"):
389
385
  details = sample.Details
390
386
  if hasattr(details, "AcquisitionDateTime"):
391
- sample_info["acquisition_time"] = str(
392
- details.AcquisitionDateTime.ToString("O"),
393
- )
387
+ sample_info["acquisition_time"] = str(details.AcquisitionDateTime.ToString("O"))
394
388
 
395
389
  if hasattr(sample, "MassSpectrometerSample"):
396
390
  ms_sample = sample.MassSpectrometerSample
@@ -440,29 +434,15 @@ class SciexWiff2FileReader:
440
434
 
441
435
  # Convert to the expected format
442
436
  spectral_data = {
443
- "peak_indices": spectrum_df[
444
- ["peak_start_idx", "peak_stop_idx"]
445
- ].values.flatten(),
437
+ "peak_indices": spectrum_df[["peak_start_idx", "peak_stop_idx"]].values.flatten(),
446
438
  "peak_mz": peak_df["mz"].values,
447
439
  "peak_intensity": peak_df["intensity"].values,
448
440
  "rt": spectrum_df["rt"].values,
449
441
  "ms_level": spectrum_df["ms_level"].values,
450
- "precursor_mz": spectrum_df.get(
451
- "precursor_mz",
452
- np.full(len(spectrum_df), -1.0),
453
- ).values,
454
- "precursor_charge": spectrum_df.get(
455
- "precursor_charge",
456
- np.full(len(spectrum_df), 0),
457
- ).values,
458
- "isolation_lower_mz": spectrum_df.get(
459
- "isolation_lower_mz",
460
- np.full(len(spectrum_df), -1.0),
461
- ).values,
462
- "isolation_upper_mz": spectrum_df.get(
463
- "isolation_upper_mz",
464
- np.full(len(spectrum_df), -1.0),
465
- ).values,
442
+ "precursor_mz": spectrum_df.get("precursor_mz", np.full(len(spectrum_df), -1.0)).values,
443
+ "precursor_charge": spectrum_df.get("precursor_charge", np.full(len(spectrum_df), 0)).values,
444
+ "isolation_lower_mz": spectrum_df.get("isolation_lower_mz", np.full(len(spectrum_df), -1.0)).values,
445
+ "isolation_upper_mz": spectrum_df.get("isolation_upper_mz", np.full(len(spectrum_df), -1.0)).values,
466
446
  "nce": spectrum_df.get("nce", np.full(len(spectrum_df), 0.0)).values,
467
447
  "metadata": {
468
448
  "format": "WIFF2",
@@ -475,10 +455,7 @@ class SciexWiff2FileReader:
475
455
  "total_peaks": len(peak_df),
476
456
  "ms1_count": np.sum(spectrum_df["ms_level"] == 1),
477
457
  "ms2_count": np.sum(spectrum_df["ms_level"] > 1),
478
- "rt_range": [
479
- float(spectrum_df["rt"].min()),
480
- float(spectrum_df["rt"].max()),
481
- ]
458
+ "rt_range": [float(spectrum_df["rt"].min()), float(spectrum_df["rt"].max())]
482
459
  if len(spectrum_df) > 0
483
460
  else [0, 0],
484
461
  "reader_method": "alpharaw",
@@ -501,9 +478,7 @@ class SciexWiff2FileReader:
501
478
  }
502
479
 
503
480
  if sample_id < 0 or sample_id >= self.sample_count:
504
- raise ValueError(
505
- f"Sample ID {sample_id} out of range (0-{self.sample_count - 1})",
506
- )
481
+ raise ValueError(f"Sample ID {sample_id} out of range (0-{self.sample_count - 1})")
507
482
 
508
483
  # Use the same loading approach as SciexWiffFileReader but with enhancements
509
484
  sample = self._wiff_file.GetSample(sample_id)
@@ -521,9 +496,7 @@ class SciexWiff2FileReader:
521
496
  isolation_lower_list: list[float] = []
522
497
  isolation_upper_list: list[float] = []
523
498
 
524
- exp_list = [
525
- ms_sample.GetMSExperiment(i) for i in range(ms_sample.ExperimentCount)
526
- ]
499
+ exp_list = [ms_sample.GetMSExperiment(i) for i in range(ms_sample.ExperimentCount)]
527
500
 
528
501
  for j in range(exp_list[0].Details.NumberOfScans):
529
502
  for i in range(ms_sample.ExperimentCount):
@@ -542,9 +515,7 @@ class SciexWiff2FileReader:
542
515
  continue
543
516
 
544
517
  mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
545
- int_array = dot_net_array_to_np_array(
546
- mass_spectrum.GetActualYValues(),
547
- ).astype(np.float32)
518
+ int_array = dot_net_array_to_np_array(mass_spectrum.GetActualYValues()).astype(np.float32)
548
519
 
549
520
  if enhanced_params["centroid"]:
550
521
  mz_array, int_array = naive_centroid(
@@ -554,9 +525,7 @@ class SciexWiff2FileReader:
554
525
  )
555
526
 
556
527
  if len(mz_array) > enhanced_params["keep_k_peaks"]:
557
- top_indices = np.argsort(int_array)[
558
- -enhanced_params["keep_k_peaks"] :
559
- ]
528
+ top_indices = np.argsort(int_array)[-enhanced_params["keep_k_peaks"] :]
560
529
  top_indices = np.sort(top_indices)
561
530
  mz_array = mz_array[top_indices]
562
531
  int_array = int_array[top_indices]
@@ -578,9 +547,7 @@ class SciexWiff2FileReader:
578
547
  from WiffOps4Python import WiffOps as DotNetWiffOps
579
548
 
580
549
  center_mz = DotNetWiffOps.get_center_mz(details)
581
- isolation_window = DotNetWiffOps.get_isolation_window(
582
- details,
583
- )
550
+ isolation_window = DotNetWiffOps.get_isolation_window(details)
584
551
  except:
585
552
  center_mz = mass_spectrum_info.ParentMZ
586
553
  isolation_window = 3.0
@@ -610,9 +577,7 @@ class SciexWiff2FileReader:
610
577
  return {
611
578
  "peak_indices": peak_indices,
612
579
  "peak_mz": np.concatenate(peak_mz_list) if peak_mz_list else np.array([]),
613
- "peak_intensity": np.concatenate(peak_intensity_list)
614
- if peak_intensity_list
615
- else np.array([]),
580
+ "peak_intensity": np.concatenate(peak_intensity_list) if peak_intensity_list else np.array([]),
616
581
  "rt": np.array(rt_list, dtype=np.float64),
617
582
  "ms_level": np.array(ms_level_list, dtype=np.int8),
618
583
  "precursor_mz": np.array(precursor_mz_list, dtype=np.float64),
@@ -629,9 +594,7 @@ class SciexWiff2FileReader:
629
594
  "total_peaks": sum(_peak_indices),
630
595
  "ms1_count": np.sum(np.array(ms_level_list) == 1),
631
596
  "ms2_count": np.sum(np.array(ms_level_list) > 1),
632
- "rt_range": [float(np.min(rt_list)), float(np.max(rt_list))]
633
- if rt_list
634
- else [0, 0],
597
+ "rt_range": [float(np.min(rt_list)), float(np.max(rt_list))] if rt_list else [0, 0],
635
598
  "creation_time": str(sample.Details.AcquisitionDateTime.ToString("O"))
636
599
  if hasattr(sample, "Details")
637
600
  else "",
@@ -733,10 +696,7 @@ class SciexWiffFileReader:
733
696
  isolation_lower_mz_list: list[float] = []
734
697
  isolation_upper_mz_list: list[float] = []
735
698
 
736
- exp_list = [
737
- self.msSample.GetMSExperiment(i)
738
- for i in range(self.msSample.ExperimentCount)
739
- ]
699
+ exp_list = [self.msSample.GetMSExperiment(i) for i in range(self.msSample.ExperimentCount)]
740
700
 
741
701
  for j in range(exp_list[0].Details.NumberOfScans):
742
702
  for i in range(self.msSample.ExperimentCount):
@@ -746,12 +706,7 @@ class SciexWiffFileReader:
746
706
  details = exp.Details
747
707
  ms_level = mass_spectrum_info.MSLevel
748
708
 
749
- if (
750
- ms_level > 1
751
- and not details.IsSwath
752
- and mass_spectrum.NumDataPoints <= 0
753
- and ignore_empty_scans
754
- ):
709
+ if ms_level > 1 and not details.IsSwath and mass_spectrum.NumDataPoints <= 0 and ignore_empty_scans:
755
710
  continue
756
711
 
757
712
  mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
@@ -915,9 +870,7 @@ class SciexWiffData:
915
870
  ignore_empty_scans=self.ignore_empty_scans,
916
871
  keep_k_peaks=self.keep_k_peaks_per_spec,
917
872
  )
918
- self.creation_time = (
919
- wiff_reader.wiffSample.Details.AcquisitionDateTime.ToString("O")
920
- )
873
+ self.creation_time = wiff_reader.wiffSample.Details.AcquisitionDateTime.ToString("O")
921
874
  wiff_reader.close()
922
875
  return data_dict
923
876
 
masster/spectrum.py CHANGED
@@ -138,7 +138,7 @@ class Spectrum:
138
138
 
139
139
  Example Usage:
140
140
  >>> import numpy as np
141
- >>> from master import spec
141
+ >>> from masster import spec
142
142
  >>> mz = np.array([100.0, 150.0, 200.0, 250.0])
143
143
  >>> intensity = np.array([1000, 5000, 3000, 800])
144
144
  >>> spectrum = spec(mz=mz, inty=intensity, ms_level=1)
@@ -278,9 +278,7 @@ class Spectrum:
278
278
 
279
279
  def pandalize(self):
280
280
  data = {
281
- key: val
282
- for key, val in self.__dict__.items()
283
- if isinstance(val, np.ndarray) and val.size == self.mz.size
281
+ key: val for key, val in self.__dict__.items() if isinstance(val, np.ndarray) and val.size == self.mz.size
284
282
  }
285
283
  return pd.DataFrame(data)
286
284
 
@@ -303,20 +301,14 @@ class Spectrum:
303
301
  self.mz = self.mz[mask]
304
302
  self.inty = self.inty[mask]
305
303
  for key in self.__dict__:
306
- if (
307
- isinstance(self.__dict__[key], np.ndarray)
308
- and self.__dict__[key].size == mask.size
309
- ):
304
+ if isinstance(self.__dict__[key], np.ndarray) and self.__dict__[key].size == mask.size:
310
305
  self.__dict__[key] = self.__dict__[key][mask]
311
306
  if mz_max is not None:
312
307
  mask = self.mz <= mz_max
313
308
  self.mz = self.mz[mask]
314
309
  self.inty = self.inty[mask]
315
310
  for key in self.__dict__:
316
- if (
317
- isinstance(self.__dict__[key], np.ndarray)
318
- and self.__dict__[key].size == mask.size
319
- ):
311
+ if isinstance(self.__dict__[key], np.ndarray) and self.__dict__[key].size == mask.size:
320
312
  self.__dict__[key] = self.__dict__[key][mask]
321
313
  return self
322
314
 
@@ -434,10 +426,7 @@ class Spectrum:
434
426
  mask = self_c.inty > threshold
435
427
  length = self_c.mz.size
436
428
  for key in self_c.__dict__:
437
- if (
438
- isinstance(self_c.__dict__[key], np.ndarray)
439
- and self_c.__dict__[key].size == length
440
- ):
429
+ if isinstance(self_c.__dict__[key], np.ndarray) and self_c.__dict__[key].size == length:
441
430
  self_c.__dict__[key] = self_c.__dict__[key][mask]
442
431
  self_c.history_add("t[BL]")
443
432
  self_c.bl = threshold
@@ -478,10 +467,7 @@ class Spectrum:
478
467
  spec_obj.history_add("f[eic_corr_max]")
479
468
  mask_length = len(mask)
480
469
  for key in spec_obj.__dict__:
481
- if (
482
- isinstance(spec_obj.__dict__[key], np.ndarray)
483
- and spec_obj.__dict__[key].size == mask_length
484
- ):
470
+ if isinstance(spec_obj.__dict__[key], np.ndarray) and spec_obj.__dict__[key].size == mask_length:
485
471
  spec_obj.__dict__[key] = spec_obj.__dict__[key][mask]
486
472
  return spec_obj
487
473
 
@@ -564,16 +550,12 @@ class Spectrum:
564
550
  s.history_add("s[SG]")
565
551
  case "cumsum":
566
552
  cumsum_vec = np.cumsum(np.insert(s.inty, 0, 0))
567
- ma_vec = (
568
- cumsum_vec[window_length:] - cumsum_vec[:-window_length]
569
- ) / window_length
570
- s.inty = np.concatenate(
571
- (
572
- s.inty[: window_length // 2],
573
- ma_vec,
574
- s.inty[-window_length // 2 :],
575
- ),
576
- )
553
+ ma_vec = (cumsum_vec[window_length:] - cumsum_vec[:-window_length]) / window_length
554
+ s.inty = np.concatenate((
555
+ s.inty[: window_length // 2],
556
+ ma_vec,
557
+ s.inty[-window_length // 2 :],
558
+ ))
577
559
  s.history_add("s[CSM]")
578
560
  s.history_add("s[CSM]")
579
561
  return s
@@ -707,9 +689,7 @@ class Spectrum:
707
689
  i += 1
708
690
  mask = np.where(is_isotopolog_of == 0)[0]
709
691
  for key in self_c.__dict__:
710
- if isinstance(self_c.__dict__[key], np.ndarray) and self_c.__dict__[
711
- key
712
- ].size == len(is_isotopolog_of):
692
+ if isinstance(self_c.__dict__[key], np.ndarray) and self_c.__dict__[key].size == len(is_isotopolog_of):
713
693
  self_c.__dict__[key] = self_c.__dict__[key][mask]
714
694
  if self_c.label is not None:
715
695
  self_c.label = self_c.label + " deiso."
@@ -749,9 +729,7 @@ class Spectrum:
749
729
  cvalues = (cvalues - cmap_min) / (cmap_max - cmap_min) * 255
750
730
  cm = process_cmap(cmap, ncolors=255, provider=cmap_provider)
751
731
  colors = [
752
- rgb2hex(cm[int(i * (len(cm) - 1) / 255)])
753
- if not np.isnan(i)
754
- else rgb2hex((0, 0, 0))
732
+ rgb2hex(cm[int(i * (len(cm) - 1) / 255)]) if not np.isnan(i) else rgb2hex((0, 0, 0))
755
733
  for i in cvalues
756
734
  ]
757
735
  p = figure(
@@ -801,11 +779,7 @@ class Spectrum:
801
779
  p.line(mz, inty, line_color="black", legend_label=label)
802
780
  else:
803
781
  data = self.to_dict()
804
- data = {
805
- key: val
806
- for key, val in data.items()
807
- if isinstance(val, np.ndarray) and val.size == mz.size
808
- }
782
+ data = {key: val for key, val in data.items() if isinstance(val, np.ndarray) and val.size == mz.size}
809
783
  if ylog:
810
784
  data["zeros"] = np.ones_like(mz)
811
785
  else:
@@ -844,9 +818,7 @@ class Spectrum:
844
818
  tooltips = [(k, "@" + k) for k in source.data if k != "zeros"]
845
819
  hover_tool = HoverTool(renderers=[sc], tooltips=tooltips)
846
820
  p.add_tools(hover_tool)
847
- box_zoom_tools = [
848
- tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)
849
- ]
821
+ box_zoom_tools = [tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)]
850
822
  if box_zoom_tools:
851
823
  p.toolbar.active_drag = box_zoom_tools[0]
852
824
  if colorby is not None:
@@ -1001,9 +973,7 @@ def combine_peaks(
1001
973
  all_inty = np.concatenate([pm.inty for pm in spectra])
1002
974
 
1003
975
  # Track which spectrum each peak came from
1004
- spectrum_indices = np.concatenate(
1005
- [np.full(len(pm.mz), i) for i, pm in enumerate(spectra)],
1006
- )
976
+ spectrum_indices = np.concatenate([np.full(len(pm.mz), i) for i, pm in enumerate(spectra)])
1007
977
 
1008
978
  if all_mz.size < 2:
1009
979
  return Spectrum(
@@ -1137,9 +1107,7 @@ def plot_spectra(
1137
1107
  num_plots = len(spectra)
1138
1108
  cm = process_cmap(cmap, ncolors=num_plots, provider=cmap_provider)
1139
1109
  colors = [
1140
- rgb2hex(cm[int(i * (len(cm) - 1) / (num_plots - 1))])
1141
- if num_plots > 1
1142
- else rgb2hex(cm[0])
1110
+ rgb2hex(cm[int(i * (len(cm) - 1) / (num_plots - 1))]) if num_plots > 1 else rgb2hex(cm[0])
1143
1111
  for i in range(num_plots)
1144
1112
  ]
1145
1113
 
@@ -1237,11 +1205,7 @@ def plot_spectra(
1237
1205
  # For centroided spectra, build a data source that includes all available array attributes
1238
1206
  data = spec.to_dict()
1239
1207
  # remove all keys whose value does not have the size of mz
1240
- data = {
1241
- key: val
1242
- for key, val in data.items()
1243
- if isinstance(val, np.ndarray) and val.size == mz.size
1244
- }
1208
+ data = {key: val for key, val in data.items() if isinstance(val, np.ndarray) and val.size == mz.size}
1245
1209
  data["zeros"] = np.zeros_like(mz)
1246
1210
  if colorby is not None:
1247
1211
  data[colorby] = mcolors
@@ -1280,9 +1244,7 @@ def plot_spectra(
1280
1244
  tooltips = [(k, "@" + k) for k in source.data if k != "zeros"]
1281
1245
  hover_tool = HoverTool(renderers=[sc], tooltips=tooltips) # seg
1282
1246
  p.add_tools(hover_tool)
1283
- box_zoom_tools = [
1284
- tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)
1285
- ]
1247
+ box_zoom_tools = [tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)]
1286
1248
  if box_zoom_tools:
1287
1249
  p.toolbar.active_drag = box_zoom_tools[0]
1288
1250
  except Exception as e:
masster/study/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """
2
- Study module for master.
2
+ Study module for masster.
3
3
 
4
4
  This module provides the Sample class for handling mass spectrometry data.
5
5
  """
@@ -298,11 +298,7 @@ class align_defaults:
298
298
  "dtype": str,
299
299
  "description": "Method to use for extrapolation outside the data range in LOWESS",
300
300
  "default": "four-point-linear",
301
- "allowed_values": [
302
- "two-point-linear",
303
- "four-point-linear",
304
- "global-linear",
305
- ],
301
+ "allowed_values": ["two-point-linear", "four-point-linear", "global-linear"],
306
302
  },
307
303
  },
308
304
  repr=False,
@@ -168,11 +168,7 @@ class fill_chrom_defaults:
168
168
  expected_dtype = self._param_metadata[param_name]["dtype"]
169
169
 
170
170
  # Handle optional types
171
- if (
172
- isinstance(expected_dtype, str)
173
- and expected_dtype.startswith("Optional")
174
- and value is not None
175
- ):
171
+ if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
176
172
  if "int" in expected_dtype and not isinstance(value, int):
177
173
  try:
178
174
  value = int(value)
@@ -168,11 +168,7 @@ class fill_defaults:
168
168
  expected_dtype = self._param_metadata[param_name]["dtype"]
169
169
 
170
170
  # Handle optional types
171
- if (
172
- isinstance(expected_dtype, str)
173
- and expected_dtype.startswith("Optional")
174
- and value is not None
175
- ):
171
+ if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
176
172
  if "int" in expected_dtype and not isinstance(value, int):
177
173
  try:
178
174
  value = int(value)
@@ -135,11 +135,7 @@ class integrate_chrom_defaults:
135
135
  expected_dtype = self._param_metadata[param_name]["dtype"]
136
136
 
137
137
  # Handle optional types
138
- if (
139
- isinstance(expected_dtype, str)
140
- and expected_dtype.startswith("Optional")
141
- and value is not None
142
- ):
138
+ if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
143
139
  if "float" in expected_dtype and not isinstance(value, float):
144
140
  try:
145
141
  value = float(value)
@@ -135,11 +135,7 @@ class integrate_defaults:
135
135
  expected_dtype = self._param_metadata[param_name]["dtype"]
136
136
 
137
137
  # Handle optional types
138
- if (
139
- isinstance(expected_dtype, str)
140
- and expected_dtype.startswith("Optional")
141
- and value is not None
142
- ):
138
+ if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
143
139
  if "float" in expected_dtype and not isinstance(value, float):
144
140
  try:
145
141
  value = float(value)
@@ -33,7 +33,7 @@ class study_defaults:
33
33
 
34
34
  eic_mz_tol: float = 0.01
35
35
  eic_rt_tol: float = 10.0
36
-
36
+
37
37
  polarity: str = "positive"
38
38
  adducts: list[str] | None = None
39
39
  adduct_min_probability: float = 0.04
@@ -54,14 +54,7 @@ class study_defaults:
54
54
  "dtype": str,
55
55
  "description": "Logging level to be set for the logger",
56
56
  "default": "INFO",
57
- "allowed_values": [
58
- "TRACE",
59
- "DEBUG",
60
- "INFO",
61
- "WARNING",
62
- "ERROR",
63
- "CRITICAL",
64
- ],
57
+ "allowed_values": ["TRACE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
65
58
  },
66
59
  "log_label": {
67
60
  "dtype": "Optional[str]",
@@ -99,19 +92,14 @@ class study_defaults:
99
92
  "default": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
100
93
  "examples": {
101
94
  "positive": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
102
- "negative": [
103
- "H-1:-:0.95",
104
- "Cl:-:0.05",
105
- "CH2O2:0:0.2",
106
- "H-2-O:0:0.2",
107
- ],
95
+ "negative": ["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2", "H-2-O:0:0.2"]
108
96
  },
109
97
  "validation_rules": [
110
98
  "Format: element:charge:probability",
111
99
  "Charge must be +, -, or 0 (neutral)",
112
100
  "Probability must be between 0.0 and 1.0",
113
- "Sum of all charged adduct probabilities must equal 1.0",
114
- ],
101
+ "Sum of all charged adduct probabilities must equal 1.0"
102
+ ]
115
103
  },
116
104
  "adduct_min_probability": {
117
105
  "dtype": float,
@@ -128,71 +116,54 @@ class study_defaults:
128
116
  """Set polarity-specific defaults for adducts if not explicitly provided."""
129
117
  # If adducts is None, set based on polarity
130
118
  if self.adducts is None:
131
- if self.polarity.lower() in ["positive", "pos"]:
132
- self.adducts = [
133
- "+H:1:0.65",
134
- "+Na:1:0.15",
135
- "+NH4:1:0.15",
136
- "+K:1:0.05",
137
- "-H2O:0:0.15",
138
- ]
139
- elif self.polarity.lower() in ["negative", "neg"]:
140
- self.adducts = [
141
- "-H:-1:0.9",
142
- "+Cl:-1:0.1",
143
- "+CH2O2:0:0.15",
144
- "-H2O:0:0.15",
145
- ]
119
+ if self.polarity.lower() in ['positive', 'pos']:
120
+ self.adducts = ["+H:1:0.65", "+Na:1:0.15", "+NH4:1:0.15", "+K:1:0.05", "-H2O:0:0.15"]
121
+ elif self.polarity.lower() in ['negative', 'neg']:
122
+ self.adducts = ["-H:-1:0.9", "+Cl:-1:0.1", "+CH2O2:0:0.15", "-H2O:0:0.15"]
146
123
  else:
147
124
  # Default to positive if polarity is not recognized
148
- self.adducts = [
149
- "+H:1:0.65",
150
- "+Na:1:0.15",
151
- "+NH4:1:0.15",
152
- "+K:1:0.05",
153
- "-H2O:0:0.15",
154
- ]
125
+ self.adducts = ["+H:1:0.65", "+Na:1:0.15", "+NH4:1:0.15", "+K:1:0.05", "-H2O:0:0.15"]
155
126
 
156
127
  def _validate_adducts(self, adduct_list: list[str]) -> bool:
157
128
  """
158
129
  Validate adducts according to OpenMS convention.
159
-
130
+
160
131
  Format: element:charge:probability
161
132
  - Elements can be molecular formulas (e.g., H, Na, NH4, H-1, CH2O2)
162
133
  - Charge must be +, -, or 0 (for neutral)
163
134
  - Probability must be a float between 0 and 1
164
135
  - Total probability of all charged adducts should sum to 1.0
165
-
136
+
166
137
  Args:
167
138
  adduct_list: List of adduct strings in OpenMS format
168
-
139
+
169
140
  Returns:
170
141
  True if all adducts are valid, False otherwise
171
142
  """
172
143
  if not adduct_list: # Empty list is valid
173
144
  return True
174
-
145
+
175
146
  charged_total_prob = 0.0
176
147
  neutral_total_prob = 0.0
177
-
148
+
178
149
  for adduct in adduct_list:
179
150
  if not isinstance(adduct, str):
180
151
  return False
181
-
152
+
182
153
  parts = adduct.split(":")
183
154
  if len(parts) != 3:
184
155
  return False
185
-
156
+
186
157
  element, charge, prob_str = parts
187
-
158
+
188
159
  # Validate element (non-empty string)
189
160
  if not element:
190
161
  return False
191
-
162
+
192
163
  # Validate charge
193
164
  if charge not in ["+", "-", "0"]:
194
165
  return False
195
-
166
+
196
167
  # Validate probability
197
168
  try:
198
169
  probability = float(prob_str)
@@ -200,20 +171,20 @@ class study_defaults:
200
171
  return False
201
172
  except (ValueError, TypeError):
202
173
  return False
203
-
174
+
204
175
  # Sum probabilities by charge type
205
176
  if charge in ["+", "-"]:
206
177
  charged_total_prob += probability
207
178
  else: # charge == "0" (neutral)
208
179
  neutral_total_prob += probability
209
-
180
+
210
181
  # Validate probability constraints
211
182
  # Charged adducts should sum to 1.0 (within tolerance)
212
183
  if charged_total_prob > 0 and abs(charged_total_prob - 1.0) > 1e-6:
213
184
  return False
214
-
185
+
215
186
  # Neutral adducts can have any total probability (they're optional)
216
-
187
+
217
188
  return True
218
189
 
219
190
  def get_info(self, param_name: str) -> dict[str, Any]:
@@ -345,11 +316,7 @@ class study_defaults:
345
316
  expected_dtype = self._param_metadata[param_name]["dtype"]
346
317
 
347
318
  # Handle optional types
348
- if (
349
- isinstance(expected_dtype, str)
350
- and expected_dtype.startswith("Optional")
351
- and value is not None
352
- ):
319
+ if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
353
320
  if "int" in expected_dtype and not isinstance(value, int):
354
321
  try:
355
322
  value = int(value)