masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/logger.py +35 -19
- masster/sample/adducts.py +15 -29
- masster/sample/defaults/find_adducts_def.py +1 -3
- masster/sample/defaults/sample_def.py +4 -4
- masster/sample/h5.py +203 -361
- masster/sample/helpers.py +14 -30
- masster/sample/lib.py +3 -3
- masster/sample/load.py +21 -29
- masster/sample/plot.py +222 -132
- masster/sample/processing.py +42 -55
- masster/sample/sample.py +37 -46
- masster/sample/save.py +37 -61
- masster/sample/sciex.py +13 -11
- masster/sample/thermo.py +69 -74
- masster/spectrum.py +15 -15
- masster/study/analysis.py +650 -586
- masster/study/defaults/identify_def.py +1 -3
- masster/study/defaults/merge_def.py +6 -7
- masster/study/defaults/study_def.py +1 -5
- masster/study/export.py +35 -96
- masster/study/h5.py +134 -211
- masster/study/helpers.py +385 -459
- masster/study/id.py +239 -290
- masster/study/importers.py +84 -93
- masster/study/load.py +159 -178
- masster/study/merge.py +1112 -1098
- masster/study/plot.py +195 -149
- masster/study/processing.py +144 -191
- masster/study/save.py +14 -13
- masster/study/study.py +89 -130
- masster/wizard/wizard.py +764 -714
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/METADATA +27 -1
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/RECORD +37 -37
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/WHEEL +0 -0
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/entry_points.txt +0 -0
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/licenses/LICENSE +0 -0
masster/sample/helpers.py
CHANGED
|
@@ -113,9 +113,7 @@ def _get_scan_uids(self, scans=None, verbose=True):
|
|
|
113
113
|
scans_uids = self.scans_df.get_column("scan_uid").to_list()
|
|
114
114
|
elif isinstance(scans, list):
|
|
115
115
|
# if scans is a list, ensure all elements are valid scan_uids
|
|
116
|
-
scans_uids = [
|
|
117
|
-
s for s in scans if s in self.scans_df.get_column("scan_uid").to_list()
|
|
118
|
-
]
|
|
116
|
+
scans_uids = [s for s in scans if s in self.scans_df.get_column("scan_uid").to_list()]
|
|
119
117
|
if verbose and not scans_uids:
|
|
120
118
|
self.logger.error("No valid scan_uids provided.")
|
|
121
119
|
|
|
@@ -340,9 +338,7 @@ def get_eic(self, mz, mz_tol=None):
|
|
|
340
338
|
return None
|
|
341
339
|
|
|
342
340
|
# Aggregate intensities per retention time. Use sum in case multiple points per rt.
|
|
343
|
-
chrom = (
|
|
344
|
-
matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
|
|
345
|
-
)
|
|
341
|
+
chrom = matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
|
|
346
342
|
|
|
347
343
|
# Attach to Sample
|
|
348
344
|
self.chrom_df = chrom
|
|
@@ -401,16 +397,14 @@ def features_select(
|
|
|
401
397
|
# self.logger.info("No features found. R")
|
|
402
398
|
return
|
|
403
399
|
feats = self.features_df.clone()
|
|
404
|
-
|
|
400
|
+
|
|
405
401
|
# Filter by feature UIDs if provided
|
|
406
402
|
if uid is not None:
|
|
407
403
|
if isinstance(uid, tuple) and len(uid) == 2:
|
|
408
404
|
# Handle tuple as range of feature UIDs
|
|
409
405
|
min_uid, max_uid = uid
|
|
410
406
|
feats_len_before_filter = len(feats)
|
|
411
|
-
feats = feats.filter(
|
|
412
|
-
(pl.col("feature_uid") >= min_uid) & (pl.col("feature_uid") <= max_uid)
|
|
413
|
-
)
|
|
407
|
+
feats = feats.filter((pl.col("feature_uid") >= min_uid) & (pl.col("feature_uid") <= max_uid))
|
|
414
408
|
self.logger.debug(
|
|
415
409
|
f"Selected features by UID range ({min_uid}-{max_uid}). Features removed: {feats_len_before_filter - len(feats)}",
|
|
416
410
|
)
|
|
@@ -420,13 +414,13 @@ def features_select(
|
|
|
420
414
|
if not feature_uids_to_keep:
|
|
421
415
|
self.logger.warning("No valid feature UIDs provided.")
|
|
422
416
|
return feats.limit(0) # Return empty DataFrame with same structure
|
|
423
|
-
|
|
417
|
+
|
|
424
418
|
feats_len_before_filter = len(feats)
|
|
425
419
|
feats = feats.filter(pl.col("feature_uid").is_in(feature_uids_to_keep))
|
|
426
420
|
self.logger.debug(
|
|
427
421
|
f"Selected features by UIDs. Features removed: {feats_len_before_filter - len(feats)}",
|
|
428
422
|
)
|
|
429
|
-
|
|
423
|
+
|
|
430
424
|
if coherence is not None:
|
|
431
425
|
has_coherence = "chrom_coherence" in self.features_df.columns
|
|
432
426
|
if not has_coherence:
|
|
@@ -437,8 +431,7 @@ def features_select(
|
|
|
437
431
|
if isinstance(coherence, tuple) and len(coherence) == 2:
|
|
438
432
|
min_coherence, max_coherence = coherence
|
|
439
433
|
feats = feats.filter(
|
|
440
|
-
(pl.col("chrom_coherence") >= min_coherence)
|
|
441
|
-
& (pl.col("chrom_coherence") <= max_coherence),
|
|
434
|
+
(pl.col("chrom_coherence") >= min_coherence) & (pl.col("chrom_coherence") <= max_coherence),
|
|
442
435
|
)
|
|
443
436
|
else:
|
|
444
437
|
feats = feats.filter(pl.col("chrom_coherence") >= coherence)
|
|
@@ -489,8 +482,7 @@ def features_select(
|
|
|
489
482
|
if isinstance(rt_delta, tuple) and len(rt_delta) == 2:
|
|
490
483
|
min_rt_delta, max_rt_delta = rt_delta
|
|
491
484
|
feats = feats.filter(
|
|
492
|
-
(pl.col("rt_delta") >= min_rt_delta)
|
|
493
|
-
& (pl.col("rt_delta") <= max_rt_delta),
|
|
485
|
+
(pl.col("rt_delta") >= min_rt_delta) & (pl.col("rt_delta") <= max_rt_delta),
|
|
494
486
|
)
|
|
495
487
|
else:
|
|
496
488
|
feats = feats.filter(pl.col("rt_delta") >= rt_delta)
|
|
@@ -567,8 +559,7 @@ def features_select(
|
|
|
567
559
|
if isinstance(prominence, tuple) and len(prominence) == 2:
|
|
568
560
|
min_prominence, max_prominence = prominence
|
|
569
561
|
feats = feats.filter(
|
|
570
|
-
(pl.col("chrom_prominence") >= min_prominence)
|
|
571
|
-
& (pl.col("chrom_prominence") <= max_prominence),
|
|
562
|
+
(pl.col("chrom_prominence") >= min_prominence) & (pl.col("chrom_prominence") <= max_prominence),
|
|
572
563
|
)
|
|
573
564
|
else:
|
|
574
565
|
feats = feats.filter(pl.col("chrom_prominence") >= prominence)
|
|
@@ -579,9 +570,7 @@ def features_select(
|
|
|
579
570
|
if height is not None:
|
|
580
571
|
feats_len_before_filter = len(feats)
|
|
581
572
|
# Check if chrom_height column exists, if not use chrom_height_scaled
|
|
582
|
-
height_col =
|
|
583
|
-
"chrom_height" if "chrom_height" in feats.columns else "chrom_height_scaled"
|
|
584
|
-
)
|
|
573
|
+
height_col = "chrom_height" if "chrom_height" in feats.columns else "chrom_height_scaled"
|
|
585
574
|
if isinstance(height, tuple) and len(height) == 2:
|
|
586
575
|
min_height, max_height = height
|
|
587
576
|
feats = feats.filter(
|
|
@@ -617,6 +606,7 @@ def features_select(
|
|
|
617
606
|
self.logger.info(f"Selected features. Features remaining: {len(feats)}")
|
|
618
607
|
return feats
|
|
619
608
|
|
|
609
|
+
|
|
620
610
|
'''
|
|
621
611
|
def _features_sync(self):
|
|
622
612
|
"""
|
|
@@ -725,6 +715,7 @@ def _features_sync(self):
|
|
|
725
715
|
self.logger.error(f"Error during feature synchronization: {e}")
|
|
726
716
|
'''
|
|
727
717
|
|
|
718
|
+
|
|
728
719
|
def features_delete(self, features: list | None = None):
|
|
729
720
|
"""
|
|
730
721
|
Delete features from both self.features_df and self._oms_features_map based on a list of feature UIDs.
|
|
@@ -841,10 +832,7 @@ def _delete_ms2(self):
|
|
|
841
832
|
|
|
842
833
|
# Update scans_df to remove feature_uid association for linked MS2 spectra
|
|
843
834
|
self.scans_df = self.scans_df.with_columns(
|
|
844
|
-
pl.when(pl.col("ms_level") == 2)
|
|
845
|
-
.then(None)
|
|
846
|
-
.otherwise(pl.col("feature_uid"))
|
|
847
|
-
.alias("feature_uid"),
|
|
835
|
+
pl.when(pl.col("ms_level") == 2).then(None).otherwise(pl.col("feature_uid")).alias("feature_uid"),
|
|
848
836
|
)
|
|
849
837
|
self.logger.info("MS2 spectra unlinked from features.")
|
|
850
838
|
|
|
@@ -930,11 +918,7 @@ def features_filter(self, features):
|
|
|
930
918
|
)
|
|
931
919
|
|
|
932
920
|
# Update scans_df to remove feature_uid associations for deleted features
|
|
933
|
-
if (
|
|
934
|
-
hasattr(self, "scans_df")
|
|
935
|
-
and self.scans_df is not None
|
|
936
|
-
and feature_uids_to_delete
|
|
937
|
-
):
|
|
921
|
+
if hasattr(self, "scans_df") and self.scans_df is not None and feature_uids_to_delete:
|
|
938
922
|
self.scans_df = self.scans_df.with_columns(
|
|
939
923
|
pl.when(pl.col("feature_uid").is_in(feature_uids_to_delete))
|
|
940
924
|
.then(None)
|
masster/sample/lib.py
CHANGED
|
@@ -221,11 +221,11 @@ def lib_load(self, csvfile=None, polarity=None):
|
|
|
221
221
|
self.lib = self.lib.where(pd.notnull(self.lib), None)
|
|
222
222
|
# find all elements == nan and replace them with None
|
|
223
223
|
self.lib = self.lib.replace({np.nan: None})
|
|
224
|
-
|
|
224
|
+
|
|
225
225
|
# Use sample.polarity if polarity parameter is None
|
|
226
226
|
if polarity is None:
|
|
227
|
-
polarity = getattr(self,
|
|
228
|
-
|
|
227
|
+
polarity = getattr(self, "polarity", "positive")
|
|
228
|
+
|
|
229
229
|
if polarity is not None:
|
|
230
230
|
if polarity.lower() == "positive":
|
|
231
231
|
self.lib = self.lib[self.lib["z"] > 0]
|
masster/sample/load.py
CHANGED
|
@@ -13,11 +13,6 @@ Key Features:
|
|
|
13
13
|
- **Error Handling**: Comprehensive error reporting for file loading issues.
|
|
14
14
|
- **Raw Data Processing**: Handle centroided and profile data with signal smoothing.
|
|
15
15
|
|
|
16
|
-
Dependencies:
|
|
17
|
-
- `pyopenms`: For standard mass spectrometry file format support.
|
|
18
|
-
- `polars` and `pandas`: For efficient data handling and manipulation.
|
|
19
|
-
- `numpy`: For numerical array operations.
|
|
20
|
-
|
|
21
16
|
Functions:
|
|
22
17
|
- `load()`: Main file loading function with format detection.
|
|
23
18
|
- `_load_mzML()`: Specialized mzML file loader.
|
|
@@ -55,7 +50,9 @@ warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH.*", categor
|
|
|
55
50
|
|
|
56
51
|
# Import pyopenms with suppressed warnings
|
|
57
52
|
with warnings.catch_warnings():
|
|
58
|
-
warnings.filterwarnings(
|
|
53
|
+
warnings.filterwarnings(
|
|
54
|
+
"ignore", message=".*OPENMS_DATA_PATH environment variable already exists.*", category=UserWarning
|
|
55
|
+
)
|
|
59
56
|
warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH.*", category=UserWarning)
|
|
60
57
|
import pyopenms as oms
|
|
61
58
|
|
|
@@ -163,6 +160,7 @@ def load_noms1(
|
|
|
163
160
|
_load_raw(self, filename)
|
|
164
161
|
elif filename.lower().endswith(".sample5"):
|
|
165
162
|
from masster.sample.h5 import _load_sample5_study
|
|
163
|
+
|
|
166
164
|
_load_sample5_study(self, filename) # Use optimized version for study loading
|
|
167
165
|
else:
|
|
168
166
|
raise ValueError("File must be .mzML, .wiff, *.raw, or .sample5")
|
|
@@ -286,12 +284,8 @@ def _load_mzML(
|
|
|
286
284
|
if len(prec_mz) == 0:
|
|
287
285
|
continue
|
|
288
286
|
prec_mz = prec_mz[0].getMZ()
|
|
289
|
-
precursorIsolationWindowLowerMZ = s.getPrecursors()[
|
|
290
|
-
|
|
291
|
-
].getIsolationWindowLowerOffset()
|
|
292
|
-
precursorIsolationWindowUpperMZ = s.getPrecursors()[
|
|
293
|
-
0
|
|
294
|
-
].getIsolationWindowUpperOffset()
|
|
287
|
+
precursorIsolationWindowLowerMZ = s.getPrecursors()[0].getIsolationWindowLowerOffset()
|
|
288
|
+
precursorIsolationWindowUpperMZ = s.getPrecursors()[0].getIsolationWindowUpperOffset()
|
|
295
289
|
prec_inty = s.getPrecursors()[0].getIntensity()
|
|
296
290
|
# Try to get collision energy from meta values first, fallback to getActivationEnergy()
|
|
297
291
|
try:
|
|
@@ -416,7 +410,7 @@ def _load_raw(
|
|
|
416
410
|
- Updates instance attributes including self.file_path, self.file_obj, self.file_interface, and self.label.
|
|
417
411
|
- Initiates further analysis by invoking analyze_dda().
|
|
418
412
|
"""
|
|
419
|
-
#from alpharaw.thermo import ThermoRawData
|
|
413
|
+
# from alpharaw.thermo import ThermoRawData
|
|
420
414
|
from masster.sample.thermo import ThermoRawData
|
|
421
415
|
|
|
422
416
|
if not filename:
|
|
@@ -482,11 +476,11 @@ def _load_raw(
|
|
|
482
476
|
|
|
483
477
|
# try to get polarity
|
|
484
478
|
if self.polarity is None:
|
|
485
|
-
if s[
|
|
486
|
-
self.polarity =
|
|
487
|
-
elif s[
|
|
488
|
-
self.polarity =
|
|
489
|
-
|
|
479
|
+
if s["polarity"] == "positive":
|
|
480
|
+
self.polarity = "positive"
|
|
481
|
+
elif s["polarity"] == "negative":
|
|
482
|
+
self.polarity = "negative"
|
|
483
|
+
|
|
490
484
|
peak_start_idx = s["peak_start_idx"]
|
|
491
485
|
peak_stop_idx = s["peak_stop_idx"]
|
|
492
486
|
peaks = raw_data.peak_df.loc[peak_start_idx : peak_stop_idx - 1]
|
|
@@ -639,11 +633,11 @@ def _load_wiff(
|
|
|
639
633
|
ms_level = s["ms_level"]
|
|
640
634
|
# try to get polarity
|
|
641
635
|
if polarity is None:
|
|
642
|
-
if s[
|
|
643
|
-
polarity =
|
|
644
|
-
elif s[
|
|
645
|
-
polarity =
|
|
646
|
-
|
|
636
|
+
if s["polarity"] == "positive":
|
|
637
|
+
polarity = "positive"
|
|
638
|
+
elif s["polarity"] == "negative":
|
|
639
|
+
polarity = "negative"
|
|
640
|
+
|
|
647
641
|
if ms_level == 1:
|
|
648
642
|
cycle += 1
|
|
649
643
|
prec_mz = None
|
|
@@ -745,6 +739,7 @@ def _load_wiff(
|
|
|
745
739
|
self.file_source = filename
|
|
746
740
|
self.file_obj = raw_data
|
|
747
741
|
self.file_interface = "alpharaw"
|
|
742
|
+
self.polarity = polarity
|
|
748
743
|
self.label = os.path.basename(filename)
|
|
749
744
|
self.ms1_df = pl.DataFrame(ms1_df_records, schema=schema)
|
|
750
745
|
if self.type != "ztscan":
|
|
@@ -775,6 +770,7 @@ def _load_featureXML(
|
|
|
775
770
|
fh.load(filename, fm)
|
|
776
771
|
self._oms_features_map = fm
|
|
777
772
|
|
|
773
|
+
|
|
778
774
|
def _wiff_to_dict(
|
|
779
775
|
filename=None,
|
|
780
776
|
):
|
|
@@ -1161,9 +1157,7 @@ def chrom_extract(
|
|
|
1161
1157
|
scan_uid = trace["scan_uid"]
|
|
1162
1158
|
# find all ms1 data with scan_uid and mz between q1-mz_tol and q1+mz_tol
|
|
1163
1159
|
d = self.ms1_df.filter(
|
|
1164
|
-
(pl.col("scan_uid").is_in(scan_uid))
|
|
1165
|
-
& (pl.col("mz") >= q1 - mz_tol)
|
|
1166
|
-
& (pl.col("mz") <= q1 + mz_tol),
|
|
1160
|
+
(pl.col("scan_uid").is_in(scan_uid)) & (pl.col("mz") >= q1 - mz_tol) & (pl.col("mz") <= q1 + mz_tol),
|
|
1167
1161
|
)
|
|
1168
1162
|
# for all unique rt values, find the maximum inty
|
|
1169
1163
|
eic_rt = d.group_by("rt").agg(pl.col("inty").max())
|
|
@@ -1182,9 +1176,7 @@ def chrom_extract(
|
|
|
1182
1176
|
scan_uid = trace["scan_uid"]
|
|
1183
1177
|
# find all ms2 data with scan_uid and mz between q3-mz_tol and q3+mz_tol
|
|
1184
1178
|
d = self.ms2data.filter(
|
|
1185
|
-
(pl.col("scan_uid").is_in(scan_uid))
|
|
1186
|
-
& (pl.col("mz") >= q3 - mz_tol)
|
|
1187
|
-
& (pl.col("mz") <= q3 + mz_tol),
|
|
1179
|
+
(pl.col("scan_uid").is_in(scan_uid)) & (pl.col("mz") >= q3 - mz_tol) & (pl.col("mz") <= q3 + mz_tol),
|
|
1188
1180
|
)
|
|
1189
1181
|
# for all unique rt values, find the maximum inty
|
|
1190
1182
|
eic_rt = d.group_by("rt").agg(pl.col("inty").max())
|