masster 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- masster/__init__.py +8 -8
- masster/_version.py +1 -1
- masster/chromatogram.py +3 -9
- masster/data/libs/README.md +1 -1
- masster/data/libs/ccm.csv +120 -120
- masster/data/libs/ccm.py +116 -62
- masster/data/libs/central_carbon_README.md +1 -1
- masster/data/libs/urine.py +161 -65
- masster/data/libs/urine_metabolites.csv +4693 -4693
- masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +2 -2
- masster/logger.py +43 -78
- masster/sample/__init__.py +1 -1
- masster/sample/adducts.py +264 -338
- masster/sample/defaults/find_adducts_def.py +8 -21
- masster/sample/defaults/find_features_def.py +1 -6
- masster/sample/defaults/get_spectrum_def.py +1 -5
- masster/sample/defaults/sample_def.py +1 -5
- masster/sample/h5.py +282 -561
- masster/sample/helpers.py +75 -131
- masster/sample/lib.py +17 -42
- masster/sample/load.py +17 -31
- masster/sample/parameters.py +2 -6
- masster/sample/plot.py +27 -88
- masster/sample/processing.py +87 -117
- masster/sample/quant.py +51 -57
- masster/sample/sample.py +90 -103
- masster/sample/sample5_schema.json +44 -44
- masster/sample/save.py +12 -35
- masster/sample/sciex.py +19 -66
- masster/spectrum.py +20 -58
- masster/study/__init__.py +1 -1
- masster/study/defaults/align_def.py +1 -5
- masster/study/defaults/fill_chrom_def.py +1 -5
- masster/study/defaults/fill_def.py +1 -5
- masster/study/defaults/integrate_chrom_def.py +1 -5
- masster/study/defaults/integrate_def.py +1 -5
- masster/study/defaults/study_def.py +25 -58
- masster/study/export.py +207 -233
- masster/study/h5.py +136 -470
- masster/study/helpers.py +202 -495
- masster/study/helpers_optimized.py +13 -40
- masster/study/id.py +110 -213
- masster/study/load.py +143 -230
- masster/study/plot.py +257 -518
- masster/study/processing.py +257 -469
- masster/study/save.py +5 -15
- masster/study/study.py +276 -379
- masster/study/study5_schema.json +96 -96
- {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/METADATA +1 -1
- masster-0.4.1.dist-info/RECORD +67 -0
- masster-0.4.0.dist-info/RECORD +0 -67
- {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/WHEEL +0 -0
- {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/entry_points.txt +0 -0
- {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/licenses/LICENSE +0 -0
masster/sample/sciex.py
CHANGED
|
@@ -262,9 +262,7 @@ class SciexWiff2FileReader:
|
|
|
262
262
|
self._alpharaw_reader.import_raw(self.filename)
|
|
263
263
|
|
|
264
264
|
# Extract basic information (SciexWiffData doesn't have sample_names property)
|
|
265
|
-
self.sample_names = [
|
|
266
|
-
"Sample_0",
|
|
267
|
-
] # Default since WIFF2 format needs investigation
|
|
265
|
+
self.sample_names = ["Sample_0"] # Default since WIFF2 format needs investigation
|
|
268
266
|
self.sample_count = 1
|
|
269
267
|
self.initialization_method = "alpharaw_SciexWiffData"
|
|
270
268
|
|
|
@@ -356,9 +354,7 @@ class SciexWiff2FileReader:
|
|
|
356
354
|
# Get metadata from alpharaw reader
|
|
357
355
|
try:
|
|
358
356
|
if hasattr(self._alpharaw_reader, "get_spectrum_count"):
|
|
359
|
-
metadata["total_spectra"] = (
|
|
360
|
-
self._alpharaw_reader.get_spectrum_count()
|
|
361
|
-
)
|
|
357
|
+
metadata["total_spectra"] = self._alpharaw_reader.get_spectrum_count()
|
|
362
358
|
|
|
363
359
|
# Add alpharaw-specific metadata
|
|
364
360
|
for attr in ["creation_time", "instrument_model", "ms_levels"]:
|
|
@@ -388,9 +384,7 @@ class SciexWiff2FileReader:
|
|
|
388
384
|
if hasattr(sample, "Details"):
|
|
389
385
|
details = sample.Details
|
|
390
386
|
if hasattr(details, "AcquisitionDateTime"):
|
|
391
|
-
sample_info["acquisition_time"] = str(
|
|
392
|
-
details.AcquisitionDateTime.ToString("O"),
|
|
393
|
-
)
|
|
387
|
+
sample_info["acquisition_time"] = str(details.AcquisitionDateTime.ToString("O"))
|
|
394
388
|
|
|
395
389
|
if hasattr(sample, "MassSpectrometerSample"):
|
|
396
390
|
ms_sample = sample.MassSpectrometerSample
|
|
@@ -440,29 +434,15 @@ class SciexWiff2FileReader:
|
|
|
440
434
|
|
|
441
435
|
# Convert to the expected format
|
|
442
436
|
spectral_data = {
|
|
443
|
-
"peak_indices": spectrum_df[
|
|
444
|
-
["peak_start_idx", "peak_stop_idx"]
|
|
445
|
-
].values.flatten(),
|
|
437
|
+
"peak_indices": spectrum_df[["peak_start_idx", "peak_stop_idx"]].values.flatten(),
|
|
446
438
|
"peak_mz": peak_df["mz"].values,
|
|
447
439
|
"peak_intensity": peak_df["intensity"].values,
|
|
448
440
|
"rt": spectrum_df["rt"].values,
|
|
449
441
|
"ms_level": spectrum_df["ms_level"].values,
|
|
450
|
-
"precursor_mz": spectrum_df.get(
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
).values,
|
|
454
|
-
"precursor_charge": spectrum_df.get(
|
|
455
|
-
"precursor_charge",
|
|
456
|
-
np.full(len(spectrum_df), 0),
|
|
457
|
-
).values,
|
|
458
|
-
"isolation_lower_mz": spectrum_df.get(
|
|
459
|
-
"isolation_lower_mz",
|
|
460
|
-
np.full(len(spectrum_df), -1.0),
|
|
461
|
-
).values,
|
|
462
|
-
"isolation_upper_mz": spectrum_df.get(
|
|
463
|
-
"isolation_upper_mz",
|
|
464
|
-
np.full(len(spectrum_df), -1.0),
|
|
465
|
-
).values,
|
|
442
|
+
"precursor_mz": spectrum_df.get("precursor_mz", np.full(len(spectrum_df), -1.0)).values,
|
|
443
|
+
"precursor_charge": spectrum_df.get("precursor_charge", np.full(len(spectrum_df), 0)).values,
|
|
444
|
+
"isolation_lower_mz": spectrum_df.get("isolation_lower_mz", np.full(len(spectrum_df), -1.0)).values,
|
|
445
|
+
"isolation_upper_mz": spectrum_df.get("isolation_upper_mz", np.full(len(spectrum_df), -1.0)).values,
|
|
466
446
|
"nce": spectrum_df.get("nce", np.full(len(spectrum_df), 0.0)).values,
|
|
467
447
|
"metadata": {
|
|
468
448
|
"format": "WIFF2",
|
|
@@ -475,10 +455,7 @@ class SciexWiff2FileReader:
|
|
|
475
455
|
"total_peaks": len(peak_df),
|
|
476
456
|
"ms1_count": np.sum(spectrum_df["ms_level"] == 1),
|
|
477
457
|
"ms2_count": np.sum(spectrum_df["ms_level"] > 1),
|
|
478
|
-
"rt_range": [
|
|
479
|
-
float(spectrum_df["rt"].min()),
|
|
480
|
-
float(spectrum_df["rt"].max()),
|
|
481
|
-
]
|
|
458
|
+
"rt_range": [float(spectrum_df["rt"].min()), float(spectrum_df["rt"].max())]
|
|
482
459
|
if len(spectrum_df) > 0
|
|
483
460
|
else [0, 0],
|
|
484
461
|
"reader_method": "alpharaw",
|
|
@@ -501,9 +478,7 @@ class SciexWiff2FileReader:
|
|
|
501
478
|
}
|
|
502
479
|
|
|
503
480
|
if sample_id < 0 or sample_id >= self.sample_count:
|
|
504
|
-
raise ValueError(
|
|
505
|
-
f"Sample ID {sample_id} out of range (0-{self.sample_count - 1})",
|
|
506
|
-
)
|
|
481
|
+
raise ValueError(f"Sample ID {sample_id} out of range (0-{self.sample_count - 1})")
|
|
507
482
|
|
|
508
483
|
# Use the same loading approach as SciexWiffFileReader but with enhancements
|
|
509
484
|
sample = self._wiff_file.GetSample(sample_id)
|
|
@@ -521,9 +496,7 @@ class SciexWiff2FileReader:
|
|
|
521
496
|
isolation_lower_list: list[float] = []
|
|
522
497
|
isolation_upper_list: list[float] = []
|
|
523
498
|
|
|
524
|
-
exp_list = [
|
|
525
|
-
ms_sample.GetMSExperiment(i) for i in range(ms_sample.ExperimentCount)
|
|
526
|
-
]
|
|
499
|
+
exp_list = [ms_sample.GetMSExperiment(i) for i in range(ms_sample.ExperimentCount)]
|
|
527
500
|
|
|
528
501
|
for j in range(exp_list[0].Details.NumberOfScans):
|
|
529
502
|
for i in range(ms_sample.ExperimentCount):
|
|
@@ -542,9 +515,7 @@ class SciexWiff2FileReader:
|
|
|
542
515
|
continue
|
|
543
516
|
|
|
544
517
|
mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
|
|
545
|
-
int_array = dot_net_array_to_np_array(
|
|
546
|
-
mass_spectrum.GetActualYValues(),
|
|
547
|
-
).astype(np.float32)
|
|
518
|
+
int_array = dot_net_array_to_np_array(mass_spectrum.GetActualYValues()).astype(np.float32)
|
|
548
519
|
|
|
549
520
|
if enhanced_params["centroid"]:
|
|
550
521
|
mz_array, int_array = naive_centroid(
|
|
@@ -554,9 +525,7 @@ class SciexWiff2FileReader:
|
|
|
554
525
|
)
|
|
555
526
|
|
|
556
527
|
if len(mz_array) > enhanced_params["keep_k_peaks"]:
|
|
557
|
-
top_indices = np.argsort(int_array)[
|
|
558
|
-
-enhanced_params["keep_k_peaks"] :
|
|
559
|
-
]
|
|
528
|
+
top_indices = np.argsort(int_array)[-enhanced_params["keep_k_peaks"] :]
|
|
560
529
|
top_indices = np.sort(top_indices)
|
|
561
530
|
mz_array = mz_array[top_indices]
|
|
562
531
|
int_array = int_array[top_indices]
|
|
@@ -578,9 +547,7 @@ class SciexWiff2FileReader:
|
|
|
578
547
|
from WiffOps4Python import WiffOps as DotNetWiffOps
|
|
579
548
|
|
|
580
549
|
center_mz = DotNetWiffOps.get_center_mz(details)
|
|
581
|
-
isolation_window = DotNetWiffOps.get_isolation_window(
|
|
582
|
-
details,
|
|
583
|
-
)
|
|
550
|
+
isolation_window = DotNetWiffOps.get_isolation_window(details)
|
|
584
551
|
except:
|
|
585
552
|
center_mz = mass_spectrum_info.ParentMZ
|
|
586
553
|
isolation_window = 3.0
|
|
@@ -610,9 +577,7 @@ class SciexWiff2FileReader:
|
|
|
610
577
|
return {
|
|
611
578
|
"peak_indices": peak_indices,
|
|
612
579
|
"peak_mz": np.concatenate(peak_mz_list) if peak_mz_list else np.array([]),
|
|
613
|
-
"peak_intensity": np.concatenate(peak_intensity_list)
|
|
614
|
-
if peak_intensity_list
|
|
615
|
-
else np.array([]),
|
|
580
|
+
"peak_intensity": np.concatenate(peak_intensity_list) if peak_intensity_list else np.array([]),
|
|
616
581
|
"rt": np.array(rt_list, dtype=np.float64),
|
|
617
582
|
"ms_level": np.array(ms_level_list, dtype=np.int8),
|
|
618
583
|
"precursor_mz": np.array(precursor_mz_list, dtype=np.float64),
|
|
@@ -629,9 +594,7 @@ class SciexWiff2FileReader:
|
|
|
629
594
|
"total_peaks": sum(_peak_indices),
|
|
630
595
|
"ms1_count": np.sum(np.array(ms_level_list) == 1),
|
|
631
596
|
"ms2_count": np.sum(np.array(ms_level_list) > 1),
|
|
632
|
-
"rt_range": [float(np.min(rt_list)), float(np.max(rt_list))]
|
|
633
|
-
if rt_list
|
|
634
|
-
else [0, 0],
|
|
597
|
+
"rt_range": [float(np.min(rt_list)), float(np.max(rt_list))] if rt_list else [0, 0],
|
|
635
598
|
"creation_time": str(sample.Details.AcquisitionDateTime.ToString("O"))
|
|
636
599
|
if hasattr(sample, "Details")
|
|
637
600
|
else "",
|
|
@@ -733,10 +696,7 @@ class SciexWiffFileReader:
|
|
|
733
696
|
isolation_lower_mz_list: list[float] = []
|
|
734
697
|
isolation_upper_mz_list: list[float] = []
|
|
735
698
|
|
|
736
|
-
exp_list = [
|
|
737
|
-
self.msSample.GetMSExperiment(i)
|
|
738
|
-
for i in range(self.msSample.ExperimentCount)
|
|
739
|
-
]
|
|
699
|
+
exp_list = [self.msSample.GetMSExperiment(i) for i in range(self.msSample.ExperimentCount)]
|
|
740
700
|
|
|
741
701
|
for j in range(exp_list[0].Details.NumberOfScans):
|
|
742
702
|
for i in range(self.msSample.ExperimentCount):
|
|
@@ -746,12 +706,7 @@ class SciexWiffFileReader:
|
|
|
746
706
|
details = exp.Details
|
|
747
707
|
ms_level = mass_spectrum_info.MSLevel
|
|
748
708
|
|
|
749
|
-
if
|
|
750
|
-
ms_level > 1
|
|
751
|
-
and not details.IsSwath
|
|
752
|
-
and mass_spectrum.NumDataPoints <= 0
|
|
753
|
-
and ignore_empty_scans
|
|
754
|
-
):
|
|
709
|
+
if ms_level > 1 and not details.IsSwath and mass_spectrum.NumDataPoints <= 0 and ignore_empty_scans:
|
|
755
710
|
continue
|
|
756
711
|
|
|
757
712
|
mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
|
|
@@ -915,9 +870,7 @@ class SciexWiffData:
|
|
|
915
870
|
ignore_empty_scans=self.ignore_empty_scans,
|
|
916
871
|
keep_k_peaks=self.keep_k_peaks_per_spec,
|
|
917
872
|
)
|
|
918
|
-
self.creation_time = (
|
|
919
|
-
wiff_reader.wiffSample.Details.AcquisitionDateTime.ToString("O")
|
|
920
|
-
)
|
|
873
|
+
self.creation_time = wiff_reader.wiffSample.Details.AcquisitionDateTime.ToString("O")
|
|
921
874
|
wiff_reader.close()
|
|
922
875
|
return data_dict
|
|
923
876
|
|
masster/spectrum.py
CHANGED
|
@@ -138,7 +138,7 @@ class Spectrum:
|
|
|
138
138
|
|
|
139
139
|
Example Usage:
|
|
140
140
|
>>> import numpy as np
|
|
141
|
-
>>> from
|
|
141
|
+
>>> from masster import spec
|
|
142
142
|
>>> mz = np.array([100.0, 150.0, 200.0, 250.0])
|
|
143
143
|
>>> intensity = np.array([1000, 5000, 3000, 800])
|
|
144
144
|
>>> spectrum = spec(mz=mz, inty=intensity, ms_level=1)
|
|
@@ -278,9 +278,7 @@ class Spectrum:
|
|
|
278
278
|
|
|
279
279
|
def pandalize(self):
|
|
280
280
|
data = {
|
|
281
|
-
key: val
|
|
282
|
-
for key, val in self.__dict__.items()
|
|
283
|
-
if isinstance(val, np.ndarray) and val.size == self.mz.size
|
|
281
|
+
key: val for key, val in self.__dict__.items() if isinstance(val, np.ndarray) and val.size == self.mz.size
|
|
284
282
|
}
|
|
285
283
|
return pd.DataFrame(data)
|
|
286
284
|
|
|
@@ -303,20 +301,14 @@ class Spectrum:
|
|
|
303
301
|
self.mz = self.mz[mask]
|
|
304
302
|
self.inty = self.inty[mask]
|
|
305
303
|
for key in self.__dict__:
|
|
306
|
-
if (
|
|
307
|
-
isinstance(self.__dict__[key], np.ndarray)
|
|
308
|
-
and self.__dict__[key].size == mask.size
|
|
309
|
-
):
|
|
304
|
+
if isinstance(self.__dict__[key], np.ndarray) and self.__dict__[key].size == mask.size:
|
|
310
305
|
self.__dict__[key] = self.__dict__[key][mask]
|
|
311
306
|
if mz_max is not None:
|
|
312
307
|
mask = self.mz <= mz_max
|
|
313
308
|
self.mz = self.mz[mask]
|
|
314
309
|
self.inty = self.inty[mask]
|
|
315
310
|
for key in self.__dict__:
|
|
316
|
-
if (
|
|
317
|
-
isinstance(self.__dict__[key], np.ndarray)
|
|
318
|
-
and self.__dict__[key].size == mask.size
|
|
319
|
-
):
|
|
311
|
+
if isinstance(self.__dict__[key], np.ndarray) and self.__dict__[key].size == mask.size:
|
|
320
312
|
self.__dict__[key] = self.__dict__[key][mask]
|
|
321
313
|
return self
|
|
322
314
|
|
|
@@ -434,10 +426,7 @@ class Spectrum:
|
|
|
434
426
|
mask = self_c.inty > threshold
|
|
435
427
|
length = self_c.mz.size
|
|
436
428
|
for key in self_c.__dict__:
|
|
437
|
-
if (
|
|
438
|
-
isinstance(self_c.__dict__[key], np.ndarray)
|
|
439
|
-
and self_c.__dict__[key].size == length
|
|
440
|
-
):
|
|
429
|
+
if isinstance(self_c.__dict__[key], np.ndarray) and self_c.__dict__[key].size == length:
|
|
441
430
|
self_c.__dict__[key] = self_c.__dict__[key][mask]
|
|
442
431
|
self_c.history_add("t[BL]")
|
|
443
432
|
self_c.bl = threshold
|
|
@@ -478,10 +467,7 @@ class Spectrum:
|
|
|
478
467
|
spec_obj.history_add("f[eic_corr_max]")
|
|
479
468
|
mask_length = len(mask)
|
|
480
469
|
for key in spec_obj.__dict__:
|
|
481
|
-
if (
|
|
482
|
-
isinstance(spec_obj.__dict__[key], np.ndarray)
|
|
483
|
-
and spec_obj.__dict__[key].size == mask_length
|
|
484
|
-
):
|
|
470
|
+
if isinstance(spec_obj.__dict__[key], np.ndarray) and spec_obj.__dict__[key].size == mask_length:
|
|
485
471
|
spec_obj.__dict__[key] = spec_obj.__dict__[key][mask]
|
|
486
472
|
return spec_obj
|
|
487
473
|
|
|
@@ -564,16 +550,12 @@ class Spectrum:
|
|
|
564
550
|
s.history_add("s[SG]")
|
|
565
551
|
case "cumsum":
|
|
566
552
|
cumsum_vec = np.cumsum(np.insert(s.inty, 0, 0))
|
|
567
|
-
ma_vec = (
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
ma_vec,
|
|
574
|
-
s.inty[-window_length // 2 :],
|
|
575
|
-
),
|
|
576
|
-
)
|
|
553
|
+
ma_vec = (cumsum_vec[window_length:] - cumsum_vec[:-window_length]) / window_length
|
|
554
|
+
s.inty = np.concatenate((
|
|
555
|
+
s.inty[: window_length // 2],
|
|
556
|
+
ma_vec,
|
|
557
|
+
s.inty[-window_length // 2 :],
|
|
558
|
+
))
|
|
577
559
|
s.history_add("s[CSM]")
|
|
578
560
|
s.history_add("s[CSM]")
|
|
579
561
|
return s
|
|
@@ -707,9 +689,7 @@ class Spectrum:
|
|
|
707
689
|
i += 1
|
|
708
690
|
mask = np.where(is_isotopolog_of == 0)[0]
|
|
709
691
|
for key in self_c.__dict__:
|
|
710
|
-
if isinstance(self_c.__dict__[key], np.ndarray) and self_c.__dict__[
|
|
711
|
-
key
|
|
712
|
-
].size == len(is_isotopolog_of):
|
|
692
|
+
if isinstance(self_c.__dict__[key], np.ndarray) and self_c.__dict__[key].size == len(is_isotopolog_of):
|
|
713
693
|
self_c.__dict__[key] = self_c.__dict__[key][mask]
|
|
714
694
|
if self_c.label is not None:
|
|
715
695
|
self_c.label = self_c.label + " deiso."
|
|
@@ -749,9 +729,7 @@ class Spectrum:
|
|
|
749
729
|
cvalues = (cvalues - cmap_min) / (cmap_max - cmap_min) * 255
|
|
750
730
|
cm = process_cmap(cmap, ncolors=255, provider=cmap_provider)
|
|
751
731
|
colors = [
|
|
752
|
-
rgb2hex(cm[int(i * (len(cm) - 1) / 255)])
|
|
753
|
-
if not np.isnan(i)
|
|
754
|
-
else rgb2hex((0, 0, 0))
|
|
732
|
+
rgb2hex(cm[int(i * (len(cm) - 1) / 255)]) if not np.isnan(i) else rgb2hex((0, 0, 0))
|
|
755
733
|
for i in cvalues
|
|
756
734
|
]
|
|
757
735
|
p = figure(
|
|
@@ -801,11 +779,7 @@ class Spectrum:
|
|
|
801
779
|
p.line(mz, inty, line_color="black", legend_label=label)
|
|
802
780
|
else:
|
|
803
781
|
data = self.to_dict()
|
|
804
|
-
data = {
|
|
805
|
-
key: val
|
|
806
|
-
for key, val in data.items()
|
|
807
|
-
if isinstance(val, np.ndarray) and val.size == mz.size
|
|
808
|
-
}
|
|
782
|
+
data = {key: val for key, val in data.items() if isinstance(val, np.ndarray) and val.size == mz.size}
|
|
809
783
|
if ylog:
|
|
810
784
|
data["zeros"] = np.ones_like(mz)
|
|
811
785
|
else:
|
|
@@ -844,9 +818,7 @@ class Spectrum:
|
|
|
844
818
|
tooltips = [(k, "@" + k) for k in source.data if k != "zeros"]
|
|
845
819
|
hover_tool = HoverTool(renderers=[sc], tooltips=tooltips)
|
|
846
820
|
p.add_tools(hover_tool)
|
|
847
|
-
box_zoom_tools = [
|
|
848
|
-
tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)
|
|
849
|
-
]
|
|
821
|
+
box_zoom_tools = [tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)]
|
|
850
822
|
if box_zoom_tools:
|
|
851
823
|
p.toolbar.active_drag = box_zoom_tools[0]
|
|
852
824
|
if colorby is not None:
|
|
@@ -1001,9 +973,7 @@ def combine_peaks(
|
|
|
1001
973
|
all_inty = np.concatenate([pm.inty for pm in spectra])
|
|
1002
974
|
|
|
1003
975
|
# Track which spectrum each peak came from
|
|
1004
|
-
spectrum_indices = np.concatenate(
|
|
1005
|
-
[np.full(len(pm.mz), i) for i, pm in enumerate(spectra)],
|
|
1006
|
-
)
|
|
976
|
+
spectrum_indices = np.concatenate([np.full(len(pm.mz), i) for i, pm in enumerate(spectra)])
|
|
1007
977
|
|
|
1008
978
|
if all_mz.size < 2:
|
|
1009
979
|
return Spectrum(
|
|
@@ -1137,9 +1107,7 @@ def plot_spectra(
|
|
|
1137
1107
|
num_plots = len(spectra)
|
|
1138
1108
|
cm = process_cmap(cmap, ncolors=num_plots, provider=cmap_provider)
|
|
1139
1109
|
colors = [
|
|
1140
|
-
rgb2hex(cm[int(i * (len(cm) - 1) / (num_plots - 1))])
|
|
1141
|
-
if num_plots > 1
|
|
1142
|
-
else rgb2hex(cm[0])
|
|
1110
|
+
rgb2hex(cm[int(i * (len(cm) - 1) / (num_plots - 1))]) if num_plots > 1 else rgb2hex(cm[0])
|
|
1143
1111
|
for i in range(num_plots)
|
|
1144
1112
|
]
|
|
1145
1113
|
|
|
@@ -1237,11 +1205,7 @@ def plot_spectra(
|
|
|
1237
1205
|
# For centroided spectra, build a data source that includes all available array attributes
|
|
1238
1206
|
data = spec.to_dict()
|
|
1239
1207
|
# remove all keys whose value does not have the size of mz
|
|
1240
|
-
data = {
|
|
1241
|
-
key: val
|
|
1242
|
-
for key, val in data.items()
|
|
1243
|
-
if isinstance(val, np.ndarray) and val.size == mz.size
|
|
1244
|
-
}
|
|
1208
|
+
data = {key: val for key, val in data.items() if isinstance(val, np.ndarray) and val.size == mz.size}
|
|
1245
1209
|
data["zeros"] = np.zeros_like(mz)
|
|
1246
1210
|
if colorby is not None:
|
|
1247
1211
|
data[colorby] = mcolors
|
|
@@ -1280,9 +1244,7 @@ def plot_spectra(
|
|
|
1280
1244
|
tooltips = [(k, "@" + k) for k in source.data if k != "zeros"]
|
|
1281
1245
|
hover_tool = HoverTool(renderers=[sc], tooltips=tooltips) # seg
|
|
1282
1246
|
p.add_tools(hover_tool)
|
|
1283
|
-
box_zoom_tools = [
|
|
1284
|
-
tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)
|
|
1285
|
-
]
|
|
1247
|
+
box_zoom_tools = [tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)]
|
|
1286
1248
|
if box_zoom_tools:
|
|
1287
1249
|
p.toolbar.active_drag = box_zoom_tools[0]
|
|
1288
1250
|
except Exception as e:
|
masster/study/__init__.py
CHANGED
|
@@ -298,11 +298,7 @@ class align_defaults:
|
|
|
298
298
|
"dtype": str,
|
|
299
299
|
"description": "Method to use for extrapolation outside the data range in LOWESS",
|
|
300
300
|
"default": "four-point-linear",
|
|
301
|
-
"allowed_values": [
|
|
302
|
-
"two-point-linear",
|
|
303
|
-
"four-point-linear",
|
|
304
|
-
"global-linear",
|
|
305
|
-
],
|
|
301
|
+
"allowed_values": ["two-point-linear", "four-point-linear", "global-linear"],
|
|
306
302
|
},
|
|
307
303
|
},
|
|
308
304
|
repr=False,
|
|
@@ -168,11 +168,7 @@ class fill_chrom_defaults:
|
|
|
168
168
|
expected_dtype = self._param_metadata[param_name]["dtype"]
|
|
169
169
|
|
|
170
170
|
# Handle optional types
|
|
171
|
-
if (
|
|
172
|
-
isinstance(expected_dtype, str)
|
|
173
|
-
and expected_dtype.startswith("Optional")
|
|
174
|
-
and value is not None
|
|
175
|
-
):
|
|
171
|
+
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
|
|
176
172
|
if "int" in expected_dtype and not isinstance(value, int):
|
|
177
173
|
try:
|
|
178
174
|
value = int(value)
|
|
@@ -168,11 +168,7 @@ class fill_defaults:
|
|
|
168
168
|
expected_dtype = self._param_metadata[param_name]["dtype"]
|
|
169
169
|
|
|
170
170
|
# Handle optional types
|
|
171
|
-
if (
|
|
172
|
-
isinstance(expected_dtype, str)
|
|
173
|
-
and expected_dtype.startswith("Optional")
|
|
174
|
-
and value is not None
|
|
175
|
-
):
|
|
171
|
+
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
|
|
176
172
|
if "int" in expected_dtype and not isinstance(value, int):
|
|
177
173
|
try:
|
|
178
174
|
value = int(value)
|
|
@@ -135,11 +135,7 @@ class integrate_chrom_defaults:
|
|
|
135
135
|
expected_dtype = self._param_metadata[param_name]["dtype"]
|
|
136
136
|
|
|
137
137
|
# Handle optional types
|
|
138
|
-
if (
|
|
139
|
-
isinstance(expected_dtype, str)
|
|
140
|
-
and expected_dtype.startswith("Optional")
|
|
141
|
-
and value is not None
|
|
142
|
-
):
|
|
138
|
+
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
|
|
143
139
|
if "float" in expected_dtype and not isinstance(value, float):
|
|
144
140
|
try:
|
|
145
141
|
value = float(value)
|
|
@@ -135,11 +135,7 @@ class integrate_defaults:
|
|
|
135
135
|
expected_dtype = self._param_metadata[param_name]["dtype"]
|
|
136
136
|
|
|
137
137
|
# Handle optional types
|
|
138
|
-
if (
|
|
139
|
-
isinstance(expected_dtype, str)
|
|
140
|
-
and expected_dtype.startswith("Optional")
|
|
141
|
-
and value is not None
|
|
142
|
-
):
|
|
138
|
+
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
|
|
143
139
|
if "float" in expected_dtype and not isinstance(value, float):
|
|
144
140
|
try:
|
|
145
141
|
value = float(value)
|
|
@@ -33,7 +33,7 @@ class study_defaults:
|
|
|
33
33
|
|
|
34
34
|
eic_mz_tol: float = 0.01
|
|
35
35
|
eic_rt_tol: float = 10.0
|
|
36
|
-
|
|
36
|
+
|
|
37
37
|
polarity: str = "positive"
|
|
38
38
|
adducts: list[str] | None = None
|
|
39
39
|
adduct_min_probability: float = 0.04
|
|
@@ -54,14 +54,7 @@ class study_defaults:
|
|
|
54
54
|
"dtype": str,
|
|
55
55
|
"description": "Logging level to be set for the logger",
|
|
56
56
|
"default": "INFO",
|
|
57
|
-
"allowed_values": [
|
|
58
|
-
"TRACE",
|
|
59
|
-
"DEBUG",
|
|
60
|
-
"INFO",
|
|
61
|
-
"WARNING",
|
|
62
|
-
"ERROR",
|
|
63
|
-
"CRITICAL",
|
|
64
|
-
],
|
|
57
|
+
"allowed_values": ["TRACE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
|
65
58
|
},
|
|
66
59
|
"log_label": {
|
|
67
60
|
"dtype": "Optional[str]",
|
|
@@ -99,19 +92,14 @@ class study_defaults:
|
|
|
99
92
|
"default": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
|
|
100
93
|
"examples": {
|
|
101
94
|
"positive": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
|
|
102
|
-
"negative": [
|
|
103
|
-
"H-1:-:0.95",
|
|
104
|
-
"Cl:-:0.05",
|
|
105
|
-
"CH2O2:0:0.2",
|
|
106
|
-
"H-2-O:0:0.2",
|
|
107
|
-
],
|
|
95
|
+
"negative": ["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2", "H-2-O:0:0.2"]
|
|
108
96
|
},
|
|
109
97
|
"validation_rules": [
|
|
110
98
|
"Format: element:charge:probability",
|
|
111
99
|
"Charge must be +, -, or 0 (neutral)",
|
|
112
100
|
"Probability must be between 0.0 and 1.0",
|
|
113
|
-
"Sum of all charged adduct probabilities must equal 1.0"
|
|
114
|
-
]
|
|
101
|
+
"Sum of all charged adduct probabilities must equal 1.0"
|
|
102
|
+
]
|
|
115
103
|
},
|
|
116
104
|
"adduct_min_probability": {
|
|
117
105
|
"dtype": float,
|
|
@@ -128,71 +116,54 @@ class study_defaults:
|
|
|
128
116
|
"""Set polarity-specific defaults for adducts if not explicitly provided."""
|
|
129
117
|
# If adducts is None, set based on polarity
|
|
130
118
|
if self.adducts is None:
|
|
131
|
-
if self.polarity.lower() in [
|
|
132
|
-
self.adducts = [
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
"+NH4:1:0.15",
|
|
136
|
-
"+K:1:0.05",
|
|
137
|
-
"-H2O:0:0.15",
|
|
138
|
-
]
|
|
139
|
-
elif self.polarity.lower() in ["negative", "neg"]:
|
|
140
|
-
self.adducts = [
|
|
141
|
-
"-H:-1:0.9",
|
|
142
|
-
"+Cl:-1:0.1",
|
|
143
|
-
"+CH2O2:0:0.15",
|
|
144
|
-
"-H2O:0:0.15",
|
|
145
|
-
]
|
|
119
|
+
if self.polarity.lower() in ['positive', 'pos']:
|
|
120
|
+
self.adducts = ["+H:1:0.65", "+Na:1:0.15", "+NH4:1:0.15", "+K:1:0.05", "-H2O:0:0.15"]
|
|
121
|
+
elif self.polarity.lower() in ['negative', 'neg']:
|
|
122
|
+
self.adducts = ["-H:-1:0.9", "+Cl:-1:0.1", "+CH2O2:0:0.15", "-H2O:0:0.15"]
|
|
146
123
|
else:
|
|
147
124
|
# Default to positive if polarity is not recognized
|
|
148
|
-
self.adducts = [
|
|
149
|
-
"+H:1:0.65",
|
|
150
|
-
"+Na:1:0.15",
|
|
151
|
-
"+NH4:1:0.15",
|
|
152
|
-
"+K:1:0.05",
|
|
153
|
-
"-H2O:0:0.15",
|
|
154
|
-
]
|
|
125
|
+
self.adducts = ["+H:1:0.65", "+Na:1:0.15", "+NH4:1:0.15", "+K:1:0.05", "-H2O:0:0.15"]
|
|
155
126
|
|
|
156
127
|
def _validate_adducts(self, adduct_list: list[str]) -> bool:
|
|
157
128
|
"""
|
|
158
129
|
Validate adducts according to OpenMS convention.
|
|
159
|
-
|
|
130
|
+
|
|
160
131
|
Format: element:charge:probability
|
|
161
132
|
- Elements can be molecular formulas (e.g., H, Na, NH4, H-1, CH2O2)
|
|
162
133
|
- Charge must be +, -, or 0 (for neutral)
|
|
163
134
|
- Probability must be a float between 0 and 1
|
|
164
135
|
- Total probability of all charged adducts should sum to 1.0
|
|
165
|
-
|
|
136
|
+
|
|
166
137
|
Args:
|
|
167
138
|
adduct_list: List of adduct strings in OpenMS format
|
|
168
|
-
|
|
139
|
+
|
|
169
140
|
Returns:
|
|
170
141
|
True if all adducts are valid, False otherwise
|
|
171
142
|
"""
|
|
172
143
|
if not adduct_list: # Empty list is valid
|
|
173
144
|
return True
|
|
174
|
-
|
|
145
|
+
|
|
175
146
|
charged_total_prob = 0.0
|
|
176
147
|
neutral_total_prob = 0.0
|
|
177
|
-
|
|
148
|
+
|
|
178
149
|
for adduct in adduct_list:
|
|
179
150
|
if not isinstance(adduct, str):
|
|
180
151
|
return False
|
|
181
|
-
|
|
152
|
+
|
|
182
153
|
parts = adduct.split(":")
|
|
183
154
|
if len(parts) != 3:
|
|
184
155
|
return False
|
|
185
|
-
|
|
156
|
+
|
|
186
157
|
element, charge, prob_str = parts
|
|
187
|
-
|
|
158
|
+
|
|
188
159
|
# Validate element (non-empty string)
|
|
189
160
|
if not element:
|
|
190
161
|
return False
|
|
191
|
-
|
|
162
|
+
|
|
192
163
|
# Validate charge
|
|
193
164
|
if charge not in ["+", "-", "0"]:
|
|
194
165
|
return False
|
|
195
|
-
|
|
166
|
+
|
|
196
167
|
# Validate probability
|
|
197
168
|
try:
|
|
198
169
|
probability = float(prob_str)
|
|
@@ -200,20 +171,20 @@ class study_defaults:
|
|
|
200
171
|
return False
|
|
201
172
|
except (ValueError, TypeError):
|
|
202
173
|
return False
|
|
203
|
-
|
|
174
|
+
|
|
204
175
|
# Sum probabilities by charge type
|
|
205
176
|
if charge in ["+", "-"]:
|
|
206
177
|
charged_total_prob += probability
|
|
207
178
|
else: # charge == "0" (neutral)
|
|
208
179
|
neutral_total_prob += probability
|
|
209
|
-
|
|
180
|
+
|
|
210
181
|
# Validate probability constraints
|
|
211
182
|
# Charged adducts should sum to 1.0 (within tolerance)
|
|
212
183
|
if charged_total_prob > 0 and abs(charged_total_prob - 1.0) > 1e-6:
|
|
213
184
|
return False
|
|
214
|
-
|
|
185
|
+
|
|
215
186
|
# Neutral adducts can have any total probability (they're optional)
|
|
216
|
-
|
|
187
|
+
|
|
217
188
|
return True
|
|
218
189
|
|
|
219
190
|
def get_info(self, param_name: str) -> dict[str, Any]:
|
|
@@ -345,11 +316,7 @@ class study_defaults:
|
|
|
345
316
|
expected_dtype = self._param_metadata[param_name]["dtype"]
|
|
346
317
|
|
|
347
318
|
# Handle optional types
|
|
348
|
-
if (
|
|
349
|
-
isinstance(expected_dtype, str)
|
|
350
|
-
and expected_dtype.startswith("Optional")
|
|
351
|
-
and value is not None
|
|
352
|
-
):
|
|
319
|
+
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
|
|
353
320
|
if "int" in expected_dtype and not isinstance(value, int):
|
|
354
321
|
try:
|
|
355
322
|
value = int(value)
|