masster 0.4.4__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/chromatogram.py +2 -2
- masster/data/libs/urine.csv +3 -3
- masster/logger.py +8 -8
- masster/sample/adducts.py +337 -263
- masster/sample/defaults/find_adducts_def.py +21 -8
- masster/sample/h5.py +557 -278
- masster/sample/helpers.py +131 -75
- masster/sample/lib.py +2 -2
- masster/sample/load.py +25 -11
- masster/sample/plot.py +5 -5
- masster/sample/processing.py +115 -85
- masster/sample/sample.py +28 -15
- masster/sample/sample5_schema.json +44 -44
- masster/sample/save.py +34 -11
- masster/spectrum.py +2 -2
- masster/study/defaults/align_def.py +5 -1
- masster/study/defaults/identify_def.py +3 -1
- masster/study/defaults/study_def.py +58 -25
- masster/study/export.py +354 -204
- masster/study/h5.py +557 -155
- masster/study/helpers.py +487 -194
- masster/study/id.py +536 -347
- masster/study/load.py +228 -138
- masster/study/plot.py +68 -68
- masster/study/processing.py +455 -253
- masster/study/save.py +14 -4
- masster/study/study.py +122 -40
- masster/study/study5_schema.json +149 -149
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/METADATA +5 -3
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/RECORD +34 -34
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/WHEEL +0 -0
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/entry_points.txt +0 -0
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/licenses/LICENSE +0 -0
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/top_level.txt +0 -0
masster/sample/helpers.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import polars as pl
|
|
4
|
-
import numpy as np
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
# Parameters removed - using hardcoded defaults
|
|
@@ -79,10 +78,14 @@ def _estimate_memory_usage(self):
|
|
|
79
78
|
|
|
80
79
|
# Log the memory usage summary
|
|
81
80
|
if hasattr(self, "logger"):
|
|
82
|
-
self.logger.debug(
|
|
81
|
+
self.logger.debug(
|
|
82
|
+
f"Total DataFrame memory usage: {memory_usage['total']['mb']:.2f} MB",
|
|
83
|
+
)
|
|
83
84
|
for df_name, stats in memory_usage.items():
|
|
84
85
|
if df_name != "total" and stats["bytes"] > 0:
|
|
85
|
-
self.logger.debug(
|
|
86
|
+
self.logger.debug(
|
|
87
|
+
f"{df_name}: {stats['rows']} rows, {stats['mb']:.2f} MB",
|
|
88
|
+
)
|
|
86
89
|
|
|
87
90
|
return memory_usage["total"]["mb"]
|
|
88
91
|
|
|
@@ -110,7 +113,9 @@ def _get_scan_uids(self, scans=None, verbose=True):
|
|
|
110
113
|
scans_uids = self.scans_df.get_column("scan_uid").to_list()
|
|
111
114
|
elif isinstance(scans, list):
|
|
112
115
|
# if scans is a list, ensure all elements are valid scan_uids
|
|
113
|
-
scans_uids = [
|
|
116
|
+
scans_uids = [
|
|
117
|
+
s for s in scans if s in self.scans_df.get_column("scan_uid").to_list()
|
|
118
|
+
]
|
|
114
119
|
if verbose and not scans_uids:
|
|
115
120
|
self.logger.error("No valid scan_uids provided.")
|
|
116
121
|
|
|
@@ -143,7 +148,9 @@ def _get_feature_uids(self, features=None, verbose=True):
|
|
|
143
148
|
# If features is a list, ensure all elements are valid feature_uids
|
|
144
149
|
if self.features_df is None:
|
|
145
150
|
if verbose:
|
|
146
|
-
self.logger.warning(
|
|
151
|
+
self.logger.warning(
|
|
152
|
+
"No features_df available to validate feature UIDs.",
|
|
153
|
+
)
|
|
147
154
|
return []
|
|
148
155
|
|
|
149
156
|
valid_feature_uids = self.features_df.get_column("feature_uid").to_list()
|
|
@@ -164,7 +171,9 @@ def _get_feature_uids(self, features=None, verbose=True):
|
|
|
164
171
|
|
|
165
172
|
if feature_column is None:
|
|
166
173
|
if verbose:
|
|
167
|
-
self.logger.error(
|
|
174
|
+
self.logger.error(
|
|
175
|
+
"No 'feature_uid' or 'feature_id' column found in polars DataFrame.",
|
|
176
|
+
)
|
|
168
177
|
return []
|
|
169
178
|
|
|
170
179
|
# Get unique values from the column
|
|
@@ -190,7 +199,9 @@ def _get_feature_uids(self, features=None, verbose=True):
|
|
|
190
199
|
|
|
191
200
|
if feature_column is None:
|
|
192
201
|
if verbose:
|
|
193
|
-
self.logger.error(
|
|
202
|
+
self.logger.error(
|
|
203
|
+
"No 'feature_uid' or 'feature_id' column found in pandas DataFrame.",
|
|
204
|
+
)
|
|
194
205
|
return []
|
|
195
206
|
|
|
196
207
|
# Get unique values from the column
|
|
@@ -198,7 +209,9 @@ def _get_feature_uids(self, features=None, verbose=True):
|
|
|
198
209
|
|
|
199
210
|
else:
|
|
200
211
|
if verbose:
|
|
201
|
-
self.logger.error(
|
|
212
|
+
self.logger.error(
|
|
213
|
+
"Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame.",
|
|
214
|
+
)
|
|
202
215
|
return []
|
|
203
216
|
|
|
204
217
|
except Exception as e:
|
|
@@ -315,7 +328,9 @@ def get_eic(self, mz, mz_tol=None):
|
|
|
315
328
|
# Filter by mz window
|
|
316
329
|
mz_min = mz - mz_tol
|
|
317
330
|
mz_max = mz + mz_tol
|
|
318
|
-
matches = self.ms1_df.filter(
|
|
331
|
+
matches = self.ms1_df.filter(
|
|
332
|
+
(pl.col("mz") >= mz_min) & (pl.col("mz") <= mz_max),
|
|
333
|
+
)
|
|
319
334
|
|
|
320
335
|
if len(matches) == 0:
|
|
321
336
|
if hasattr(self, "logger"):
|
|
@@ -325,7 +340,9 @@ def get_eic(self, mz, mz_tol=None):
|
|
|
325
340
|
return None
|
|
326
341
|
|
|
327
342
|
# Aggregate intensities per retention time. Use sum in case multiple points per rt.
|
|
328
|
-
chrom =
|
|
343
|
+
chrom = (
|
|
344
|
+
matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
|
|
345
|
+
)
|
|
329
346
|
|
|
330
347
|
# Attach to Sample
|
|
331
348
|
self.chrom_df = chrom
|
|
@@ -391,7 +408,8 @@ def select(
|
|
|
391
408
|
if isinstance(coherence, tuple) and len(coherence) == 2:
|
|
392
409
|
min_coherence, max_coherence = coherence
|
|
393
410
|
feats = feats.filter(
|
|
394
|
-
(pl.col("chrom_coherence") >= min_coherence)
|
|
411
|
+
(pl.col("chrom_coherence") >= min_coherence)
|
|
412
|
+
& (pl.col("chrom_coherence") <= max_coherence),
|
|
395
413
|
)
|
|
396
414
|
else:
|
|
397
415
|
feats = feats.filter(pl.col("chrom_coherence") >= coherence)
|
|
@@ -442,7 +460,8 @@ def select(
|
|
|
442
460
|
if isinstance(rt_delta, tuple) and len(rt_delta) == 2:
|
|
443
461
|
min_rt_delta, max_rt_delta = rt_delta
|
|
444
462
|
feats = feats.filter(
|
|
445
|
-
(pl.col("rt_delta") >= min_rt_delta)
|
|
463
|
+
(pl.col("rt_delta") >= min_rt_delta)
|
|
464
|
+
& (pl.col("rt_delta") <= max_rt_delta),
|
|
446
465
|
)
|
|
447
466
|
else:
|
|
448
467
|
feats = feats.filter(pl.col("rt_delta") >= rt_delta)
|
|
@@ -519,7 +538,8 @@ def select(
|
|
|
519
538
|
if isinstance(prominence, tuple) and len(prominence) == 2:
|
|
520
539
|
min_prominence, max_prominence = prominence
|
|
521
540
|
feats = feats.filter(
|
|
522
|
-
(pl.col("chrom_prominence") >= min_prominence)
|
|
541
|
+
(pl.col("chrom_prominence") >= min_prominence)
|
|
542
|
+
& (pl.col("chrom_prominence") <= max_prominence),
|
|
523
543
|
)
|
|
524
544
|
else:
|
|
525
545
|
feats = feats.filter(pl.col("chrom_prominence") >= prominence)
|
|
@@ -530,7 +550,9 @@ def select(
|
|
|
530
550
|
if height is not None:
|
|
531
551
|
feats_len_before_filter = len(feats)
|
|
532
552
|
# Check if chrom_height column exists, if not use chrom_height_scaled
|
|
533
|
-
height_col =
|
|
553
|
+
height_col = (
|
|
554
|
+
"chrom_height" if "chrom_height" in feats.columns else "chrom_height_scaled"
|
|
555
|
+
)
|
|
534
556
|
if isinstance(height, tuple) and len(height) == 2:
|
|
535
557
|
min_height, max_height = height
|
|
536
558
|
feats = feats.filter(
|
|
@@ -551,23 +573,23 @@ def select(
|
|
|
551
573
|
def _features_sync(self):
|
|
552
574
|
"""
|
|
553
575
|
Synchronizes the cached FeatureMap with features_df.
|
|
554
|
-
|
|
555
|
-
This ensures that the cached FeatureMap (_oms_features_map) contains only features
|
|
556
|
-
that exist in both the FeatureMap and the features_df. This is important
|
|
576
|
+
|
|
577
|
+
This ensures that the cached FeatureMap (_oms_features_map) contains only features
|
|
578
|
+
that exist in both the FeatureMap and the features_df. This is important
|
|
557
579
|
after operations that modify features_df but not the FeatureMap (like filtering).
|
|
558
|
-
|
|
580
|
+
|
|
559
581
|
Side Effects:
|
|
560
582
|
Updates self._oms_features_map and self.features_df to contain only common features.
|
|
561
583
|
Logs information about removed features.
|
|
562
584
|
"""
|
|
563
585
|
if self.features_df is None or len(self.features_df) == 0:
|
|
564
586
|
self.logger.debug("No features_df to synchronize")
|
|
565
|
-
if hasattr(self,
|
|
587
|
+
if hasattr(self, "_oms_features_map"):
|
|
566
588
|
self._oms_features_map = None
|
|
567
589
|
return
|
|
568
590
|
|
|
569
591
|
# Check if we have a cached feature map
|
|
570
|
-
if not hasattr(self,
|
|
592
|
+
if not hasattr(self, "_oms_features_map") or self._oms_features_map is None:
|
|
571
593
|
self.logger.debug("No cached feature map to synchronize")
|
|
572
594
|
return
|
|
573
595
|
|
|
@@ -576,20 +598,26 @@ def _features_sync(self):
|
|
|
576
598
|
except ImportError:
|
|
577
599
|
self.logger.warning("PyOpenMS not available, cannot sync FeatureMap")
|
|
578
600
|
return
|
|
579
|
-
|
|
601
|
+
|
|
580
602
|
try:
|
|
581
603
|
# Get feature IDs from both sources
|
|
582
604
|
if "feature_id" in self.features_df.columns:
|
|
583
|
-
df_feature_ids = set(
|
|
605
|
+
df_feature_ids = set(
|
|
606
|
+
self.features_df.get_column("feature_id").cast(str).to_list(),
|
|
607
|
+
)
|
|
584
608
|
else:
|
|
585
|
-
self.logger.warning(
|
|
609
|
+
self.logger.warning(
|
|
610
|
+
"No feature_id column in features_df, cannot synchronize",
|
|
611
|
+
)
|
|
586
612
|
return
|
|
587
613
|
|
|
588
614
|
# Get feature IDs from FeatureMap
|
|
589
615
|
feature_map_ids = set()
|
|
590
616
|
for i in range(self._oms_features_map.size()):
|
|
591
617
|
feature = self._oms_features_map[i]
|
|
592
|
-
unique_id = str(
|
|
618
|
+
unique_id = str(
|
|
619
|
+
feature.getUniqueId(),
|
|
620
|
+
) # Convert to string to match DataFrame
|
|
593
621
|
feature_map_ids.add(unique_id)
|
|
594
622
|
|
|
595
623
|
# Find features that exist in both
|
|
@@ -687,7 +715,7 @@ def features_delete(self, features: list | None = None):
|
|
|
687
715
|
)
|
|
688
716
|
|
|
689
717
|
# Update the OpenMS FeatureMap by creating a new one with only features to keep
|
|
690
|
-
if hasattr(self,
|
|
718
|
+
if hasattr(self, "_oms_features_map") and self._oms_features_map is not None:
|
|
691
719
|
try:
|
|
692
720
|
# Import pyopenms
|
|
693
721
|
import pyopenms as oms
|
|
@@ -696,7 +724,9 @@ def features_delete(self, features: list | None = None):
|
|
|
696
724
|
filtered_map = oms.FeatureMap()
|
|
697
725
|
|
|
698
726
|
# Get the feature UIDs that should remain after deletion
|
|
699
|
-
remaining_feature_uids = self.features_df.get_column(
|
|
727
|
+
remaining_feature_uids = self.features_df.get_column(
|
|
728
|
+
"feature_uid",
|
|
729
|
+
).to_list()
|
|
700
730
|
|
|
701
731
|
# Iterate through existing features and keep only those not in deletion list
|
|
702
732
|
for i in range(self._oms_features_map.size()):
|
|
@@ -708,12 +738,16 @@ def features_delete(self, features: list | None = None):
|
|
|
708
738
|
|
|
709
739
|
# Replace the original FeatureMap with the filtered one
|
|
710
740
|
self._oms_features_map = filtered_map
|
|
711
|
-
self.logger.debug(
|
|
741
|
+
self.logger.debug(
|
|
742
|
+
f"OpenMS FeatureMap updated with {filtered_map.size()} remaining features.",
|
|
743
|
+
)
|
|
712
744
|
|
|
713
745
|
except ImportError:
|
|
714
746
|
self.logger.warning("PyOpenMS not available, only updating features_df")
|
|
715
747
|
except Exception as e:
|
|
716
|
-
self.logger.warning(
|
|
748
|
+
self.logger.warning(
|
|
749
|
+
f"Could not update OpenMS FeatureMap: {e}. FeatureMap may be out of sync.",
|
|
750
|
+
)
|
|
717
751
|
|
|
718
752
|
# Update scans_df to remove feature_uid associations for deleted features
|
|
719
753
|
if hasattr(self, "scans_df") and self.scans_df is not None:
|
|
@@ -725,7 +759,9 @@ def features_delete(self, features: list | None = None):
|
|
|
725
759
|
)
|
|
726
760
|
|
|
727
761
|
deleted_count = original_count - len(self.features_df)
|
|
728
|
-
self.logger.info(
|
|
762
|
+
self.logger.info(
|
|
763
|
+
f"Deleted {deleted_count} features. Remaining features: {len(self.features_df)}",
|
|
764
|
+
)
|
|
729
765
|
|
|
730
766
|
|
|
731
767
|
def _delete_ms2(self):
|
|
@@ -748,14 +784,19 @@ def _delete_ms2(self):
|
|
|
748
784
|
self.logger.debug("Unlinking MS2 spectra from features...")
|
|
749
785
|
|
|
750
786
|
# Set ms2_scans and ms2_specs to None using Polars syntax
|
|
751
|
-
self.features_df = self.features_df.with_columns(
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
787
|
+
self.features_df = self.features_df.with_columns(
|
|
788
|
+
[
|
|
789
|
+
pl.lit(None).alias("ms2_scans"),
|
|
790
|
+
pl.lit(None).alias("ms2_specs"),
|
|
791
|
+
],
|
|
792
|
+
)
|
|
755
793
|
|
|
756
794
|
# Update scans_df to remove feature_uid association for linked MS2 spectra
|
|
757
795
|
self.scans_df = self.scans_df.with_columns(
|
|
758
|
-
pl.when(pl.col("ms_level") == 2)
|
|
796
|
+
pl.when(pl.col("ms_level") == 2)
|
|
797
|
+
.then(None)
|
|
798
|
+
.otherwise(pl.col("feature_uid"))
|
|
799
|
+
.alias("feature_uid"),
|
|
759
800
|
)
|
|
760
801
|
self.logger.info("MS2 spectra unlinked from features.")
|
|
761
802
|
|
|
@@ -787,7 +828,9 @@ def features_filter(self, features):
|
|
|
787
828
|
return
|
|
788
829
|
|
|
789
830
|
if features is None:
|
|
790
|
-
self.logger.warning(
|
|
831
|
+
self.logger.warning(
|
|
832
|
+
"No features specified to keep. Use features_delete() to delete all features.",
|
|
833
|
+
)
|
|
791
834
|
return
|
|
792
835
|
|
|
793
836
|
# Get the feature UIDs to keep
|
|
@@ -809,7 +852,7 @@ def features_filter(self, features):
|
|
|
809
852
|
feature_uids_to_delete = list(all_feature_uids - set(feature_uids_to_keep))
|
|
810
853
|
|
|
811
854
|
# Update the OpenMS FeatureMap by creating a new one with only features to keep
|
|
812
|
-
if hasattr(self,
|
|
855
|
+
if hasattr(self, "_oms_features_map") and self._oms_features_map is not None:
|
|
813
856
|
try:
|
|
814
857
|
# Import pyopenms
|
|
815
858
|
import pyopenms as oms
|
|
@@ -827,15 +870,23 @@ def features_filter(self, features):
|
|
|
827
870
|
|
|
828
871
|
# Replace the original FeatureMap with the filtered one
|
|
829
872
|
self._oms_features_map = filtered_map
|
|
830
|
-
self.logger.debug(
|
|
873
|
+
self.logger.debug(
|
|
874
|
+
f"OpenMS FeatureMap updated with {filtered_map.size()} remaining features.",
|
|
875
|
+
)
|
|
831
876
|
|
|
832
877
|
except ImportError:
|
|
833
878
|
self.logger.warning("PyOpenMS not available, only updating features_df")
|
|
834
879
|
except Exception as e:
|
|
835
|
-
self.logger.warning(
|
|
880
|
+
self.logger.warning(
|
|
881
|
+
f"Could not update OpenMS FeatureMap: {e}. FeatureMap may be out of sync.",
|
|
882
|
+
)
|
|
836
883
|
|
|
837
884
|
# Update scans_df to remove feature_uid associations for deleted features
|
|
838
|
-
if
|
|
885
|
+
if (
|
|
886
|
+
hasattr(self, "scans_df")
|
|
887
|
+
and self.scans_df is not None
|
|
888
|
+
and feature_uids_to_delete
|
|
889
|
+
):
|
|
839
890
|
self.scans_df = self.scans_df.with_columns(
|
|
840
891
|
pl.when(pl.col("feature_uid").is_in(feature_uids_to_delete))
|
|
841
892
|
.then(None)
|
|
@@ -845,7 +896,9 @@ def features_filter(self, features):
|
|
|
845
896
|
|
|
846
897
|
kept_count = len(self.features_df)
|
|
847
898
|
deleted_count = original_count - kept_count
|
|
848
|
-
self.logger.info(
|
|
899
|
+
self.logger.info(
|
|
900
|
+
f"Kept {kept_count} features, deleted {deleted_count} features. Remaining features: {kept_count}",
|
|
901
|
+
)
|
|
849
902
|
|
|
850
903
|
|
|
851
904
|
def set_source(self, filename):
|
|
@@ -889,7 +942,9 @@ def set_source(self, filename):
|
|
|
889
942
|
|
|
890
943
|
# Log the change
|
|
891
944
|
if old_file_source is not None:
|
|
892
|
-
self.logger.info(
|
|
945
|
+
self.logger.info(
|
|
946
|
+
f"Updated file_source from {old_file_source} to {self.file_source}",
|
|
947
|
+
)
|
|
893
948
|
else:
|
|
894
949
|
self.logger.info(f"Set file_source to {self.file_source}")
|
|
895
950
|
|
|
@@ -897,89 +952,90 @@ def set_source(self, filename):
|
|
|
897
952
|
def _recreate_feature_map(self):
|
|
898
953
|
"""
|
|
899
954
|
Recreate OpenMS FeatureMap from features_df.
|
|
900
|
-
|
|
955
|
+
|
|
901
956
|
This helper function creates a new OpenMS FeatureMap using the data from features_df.
|
|
902
957
|
This allows us to avoid storing and loading featureXML files by default, while still
|
|
903
958
|
being able to recreate the feature map when needed for OpenMS operations like
|
|
904
959
|
find_features() or saving to featureXML format.
|
|
905
|
-
|
|
960
|
+
|
|
906
961
|
Returns:
|
|
907
962
|
oms.FeatureMap: A new FeatureMap with features from features_df, or None if no features
|
|
908
|
-
|
|
963
|
+
|
|
909
964
|
Side Effects:
|
|
910
965
|
Caches the created feature map in self._oms_features_map for reuse
|
|
911
966
|
"""
|
|
912
967
|
if self.features_df is None or len(self.features_df) == 0:
|
|
913
968
|
self.logger.debug("No features_df available to recreate feature map")
|
|
914
969
|
return None
|
|
915
|
-
|
|
970
|
+
|
|
916
971
|
try:
|
|
917
972
|
import pyopenms as oms
|
|
918
973
|
except ImportError:
|
|
919
974
|
self.logger.warning("PyOpenMS not available, cannot recreate feature map")
|
|
920
975
|
return None
|
|
921
|
-
|
|
976
|
+
|
|
922
977
|
# Create new FeatureMap
|
|
923
978
|
feature_map = oms.FeatureMap()
|
|
924
|
-
|
|
979
|
+
|
|
925
980
|
# Set the primary MS run path if available
|
|
926
|
-
if hasattr(self,
|
|
981
|
+
if hasattr(self, "file_path") and self.file_path:
|
|
927
982
|
feature_map.setPrimaryMSRunPath([self.file_path.encode()])
|
|
928
|
-
|
|
983
|
+
|
|
929
984
|
# Convert DataFrame features to OpenMS Features
|
|
930
985
|
for i, feature_row in enumerate(self.features_df.iter_rows(named=True)):
|
|
931
986
|
feature = oms.Feature()
|
|
932
|
-
|
|
987
|
+
|
|
933
988
|
# Set basic properties from DataFrame (handle missing values gracefully)
|
|
934
989
|
try:
|
|
935
|
-
if feature_row.get(
|
|
936
|
-
feature.setUniqueId(int(feature_row[
|
|
990
|
+
if feature_row.get("feature_id") is not None:
|
|
991
|
+
feature.setUniqueId(int(feature_row["feature_id"]))
|
|
937
992
|
else:
|
|
938
993
|
feature.setUniqueId(i) # Use index as fallback
|
|
939
|
-
|
|
940
|
-
if feature_row.get(
|
|
941
|
-
feature.setMZ(float(feature_row[
|
|
942
|
-
if feature_row.get(
|
|
943
|
-
feature.setRT(float(feature_row[
|
|
944
|
-
if feature_row.get(
|
|
945
|
-
feature.setIntensity(float(feature_row[
|
|
946
|
-
if feature_row.get(
|
|
947
|
-
feature.setOverallQuality(float(feature_row[
|
|
948
|
-
if feature_row.get(
|
|
949
|
-
feature.setCharge(int(feature_row[
|
|
950
|
-
|
|
994
|
+
|
|
995
|
+
if feature_row.get("mz") is not None:
|
|
996
|
+
feature.setMZ(float(feature_row["mz"]))
|
|
997
|
+
if feature_row.get("rt") is not None:
|
|
998
|
+
feature.setRT(float(feature_row["rt"]))
|
|
999
|
+
if feature_row.get("inty") is not None:
|
|
1000
|
+
feature.setIntensity(float(feature_row["inty"]))
|
|
1001
|
+
if feature_row.get("quality") is not None:
|
|
1002
|
+
feature.setOverallQuality(float(feature_row["quality"]))
|
|
1003
|
+
if feature_row.get("charge") is not None:
|
|
1004
|
+
feature.setCharge(int(feature_row["charge"]))
|
|
1005
|
+
|
|
951
1006
|
# Add to feature map
|
|
952
1007
|
feature_map.push_back(feature)
|
|
953
|
-
|
|
1008
|
+
|
|
954
1009
|
except (ValueError, TypeError) as e:
|
|
955
1010
|
self.logger.warning(f"Skipping feature due to conversion error: {e}")
|
|
956
1011
|
continue
|
|
957
|
-
|
|
1012
|
+
|
|
958
1013
|
# Ensure unique IDs
|
|
959
1014
|
feature_map.ensureUniqueId()
|
|
960
|
-
|
|
1015
|
+
|
|
961
1016
|
# Cache the feature map
|
|
962
1017
|
self._oms_features_map = feature_map
|
|
963
|
-
|
|
964
|
-
self.logger.debug(
|
|
1018
|
+
|
|
1019
|
+
self.logger.debug(
|
|
1020
|
+
f"Recreated FeatureMap with {feature_map.size()} features from features_df",
|
|
1021
|
+
)
|
|
965
1022
|
return feature_map
|
|
966
1023
|
|
|
967
1024
|
|
|
968
1025
|
def _get_feature_map(self):
|
|
969
1026
|
"""
|
|
970
1027
|
Get the OpenMS FeatureMap, creating it from features_df if needed.
|
|
971
|
-
|
|
1028
|
+
|
|
972
1029
|
This property-like method returns the cached feature map if available,
|
|
973
|
-
or recreates it from features_df if not. This allows lazy loading of
|
|
1030
|
+
or recreates it from features_df if not. This allows lazy loading of
|
|
974
1031
|
feature maps only when needed for OpenMS operations.
|
|
975
|
-
|
|
1032
|
+
|
|
976
1033
|
Returns:
|
|
977
1034
|
oms.FeatureMap or None: The feature map, or None if not available
|
|
978
1035
|
"""
|
|
979
1036
|
# Return cached feature map if available
|
|
980
|
-
if hasattr(self,
|
|
1037
|
+
if hasattr(self, "_oms_features_map") and self._oms_features_map is not None:
|
|
981
1038
|
return self._oms_features_map
|
|
982
|
-
|
|
1039
|
+
|
|
983
1040
|
# Otherwise recreate from features_df
|
|
984
1041
|
return self._recreate_feature_map()
|
|
985
|
-
|
masster/sample/lib.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
2
|
lib.py
|
|
3
3
|
|
|
4
|
-
This module provides the Lib class and utility functions for mass spectrometry compound library
|
|
5
|
-
management and feature annotation. It contains core functionality for compound library management,
|
|
4
|
+
This module provides the Lib class and utility functions for mass spectrometry compound library
|
|
5
|
+
management and feature annotation. It contains core functionality for compound library management,
|
|
6
6
|
target identification, adduct handling, and various analytical operations.
|
|
7
7
|
|
|
8
8
|
Key Features:
|
masster/sample/load.py
CHANGED
|
@@ -177,7 +177,7 @@ def load_study(
|
|
|
177
177
|
):
|
|
178
178
|
"""
|
|
179
179
|
Backward compatibility alias for load_noms1().
|
|
180
|
-
|
|
180
|
+
|
|
181
181
|
This method is deprecated. Use load_noms1() instead.
|
|
182
182
|
"""
|
|
183
183
|
return self.load_noms1(filename=filename, ondisk=ondisk, type=type, label=label)
|
|
@@ -263,12 +263,16 @@ def _load_mzML(
|
|
|
263
263
|
energy = None
|
|
264
264
|
else:
|
|
265
265
|
prec_mz = s.getPrecursors()[0].getMZ()
|
|
266
|
-
precursorIsolationWindowLowerMZ = s.getPrecursors()[
|
|
267
|
-
|
|
266
|
+
precursorIsolationWindowLowerMZ = s.getPrecursors()[
|
|
267
|
+
0
|
|
268
|
+
].getIsolationWindowLowerOffset()
|
|
269
|
+
precursorIsolationWindowUpperMZ = s.getPrecursors()[
|
|
270
|
+
0
|
|
271
|
+
].getIsolationWindowUpperOffset()
|
|
268
272
|
prec_intyensity = s.getPrecursors()[0].getIntensity()
|
|
269
273
|
# Try to get collision energy from meta values first, fallback to getActivationEnergy()
|
|
270
274
|
try:
|
|
271
|
-
energy = s.getPrecursors()[0].getMetaValue(
|
|
275
|
+
energy = s.getPrecursors()[0].getMetaValue("collision energy")
|
|
272
276
|
if energy is None or energy == 0.0:
|
|
273
277
|
energy = s.getPrecursors()[0].getActivationEnergy()
|
|
274
278
|
except Exception:
|
|
@@ -553,9 +557,9 @@ def _load_wiff(
|
|
|
553
557
|
):
|
|
554
558
|
try:
|
|
555
559
|
# Use masster's own implementation first
|
|
556
|
-
from masster.sample.sciex import SciexWiffData as
|
|
560
|
+
from masster.sample.sciex import SciexWiffData as MasterSciexWiffData
|
|
557
561
|
|
|
558
|
-
SciexWiffDataClass =
|
|
562
|
+
SciexWiffDataClass = MasterSciexWiffData
|
|
559
563
|
except ImportError:
|
|
560
564
|
# Fallback to alpharaw if masster implementation fails
|
|
561
565
|
from alpharaw.sciex import SciexWiffData as AlpharawSciexWiffData
|
|
@@ -985,7 +989,9 @@ def index_file(self):
|
|
|
985
989
|
self.logger.info("Index raw data...")
|
|
986
990
|
raw_data.import_raw(self.file_source)
|
|
987
991
|
self.file_obj = raw_data
|
|
988
|
-
elif os.path.exists(self.file_source) and self.file_source.lower().endswith(
|
|
992
|
+
elif os.path.exists(self.file_source) and self.file_source.lower().endswith(
|
|
993
|
+
".mzml",
|
|
994
|
+
):
|
|
989
995
|
self.file_interface = "oms"
|
|
990
996
|
omsexp: oms.OnDiscMSExperiment | oms.MSExperiment
|
|
991
997
|
if self.ondisk:
|
|
@@ -995,7 +1001,9 @@ def index_file(self):
|
|
|
995
1001
|
omsexp = oms.MSExperiment()
|
|
996
1002
|
oms.MzMLFile().load(self.file_source, omsexp)
|
|
997
1003
|
self.file_obj = omsexp
|
|
998
|
-
elif os.path.exists(self.file_source) and self.file_source.lower().endswith(
|
|
1004
|
+
elif os.path.exists(self.file_source) and self.file_source.lower().endswith(
|
|
1005
|
+
".sample5",
|
|
1006
|
+
):
|
|
999
1007
|
# this is an old save, try to see if
|
|
1000
1008
|
if os.path.exists(self.file_source.replace(".sample5", ".wiff")):
|
|
1001
1009
|
self.set_source(self.file_source.replace(".sample5", ".wiff"))
|
|
@@ -1009,7 +1017,9 @@ def index_file(self):
|
|
|
1009
1017
|
)
|
|
1010
1018
|
self.index_file()
|
|
1011
1019
|
else:
|
|
1012
|
-
raise FileNotFoundError(
|
|
1020
|
+
raise FileNotFoundError(
|
|
1021
|
+
f"File {self.file_source} not found. Did the path change? Consider running source().",
|
|
1022
|
+
)
|
|
1013
1023
|
|
|
1014
1024
|
|
|
1015
1025
|
def _load_ms2data(
|
|
@@ -1214,7 +1224,9 @@ def chrom_extract(
|
|
|
1214
1224
|
scan_uid = trace["scan_uid"]
|
|
1215
1225
|
# find all ms1 data with scan_uid and mz between q1-mz_tol and q1+mz_tol
|
|
1216
1226
|
d = self.ms1_df.filter(
|
|
1217
|
-
(pl.col("scan_uid").is_in(scan_uid))
|
|
1227
|
+
(pl.col("scan_uid").is_in(scan_uid))
|
|
1228
|
+
& (pl.col("mz") >= q1 - mz_tol)
|
|
1229
|
+
& (pl.col("mz") <= q1 + mz_tol),
|
|
1218
1230
|
)
|
|
1219
1231
|
# for all unique rt values, find the maximum inty
|
|
1220
1232
|
eic_rt = d.group_by("rt").agg(pl.col("inty").max())
|
|
@@ -1233,7 +1245,9 @@ def chrom_extract(
|
|
|
1233
1245
|
scan_uid = trace["scan_uid"]
|
|
1234
1246
|
# find all ms2 data with scan_uid and mz between q3-mz_tol and q3+mz_tol
|
|
1235
1247
|
d = self.ms2data.filter(
|
|
1236
|
-
(pl.col("scan_uid").is_in(scan_uid))
|
|
1248
|
+
(pl.col("scan_uid").is_in(scan_uid))
|
|
1249
|
+
& (pl.col("mz") >= q3 - mz_tol)
|
|
1250
|
+
& (pl.col("mz") <= q3 + mz_tol),
|
|
1237
1251
|
)
|
|
1238
1252
|
# for all unique rt values, find the maximum inty
|
|
1239
1253
|
eic_rt = d.group_by("rt").agg(pl.col("inty").max())
|
masster/sample/plot.py
CHANGED
|
@@ -144,7 +144,7 @@ def _display_plot(plot_object, layout=None):
|
|
|
144
144
|
def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh"):
|
|
145
145
|
"""
|
|
146
146
|
Helper function to handle consistent save/display behavior for sample plots.
|
|
147
|
-
|
|
147
|
+
|
|
148
148
|
Parameters:
|
|
149
149
|
plot_obj: The plot object (bokeh figure, holoviews layout, or panel object)
|
|
150
150
|
filename: Optional filename to save the plot
|
|
@@ -155,10 +155,10 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
|
|
|
155
155
|
import os
|
|
156
156
|
if hasattr(self, 'folder') and self.folder and not os.path.isabs(filename):
|
|
157
157
|
filename = os.path.join(self.folder, filename)
|
|
158
|
-
|
|
158
|
+
|
|
159
159
|
# Convert to absolute path for logging
|
|
160
160
|
abs_filename = os.path.abspath(filename)
|
|
161
|
-
|
|
161
|
+
|
|
162
162
|
if filename.endswith(".html"):
|
|
163
163
|
if plot_type == "panel":
|
|
164
164
|
plot_obj.save(filename, embed=True) # type: ignore[attr-defined]
|
|
@@ -375,7 +375,7 @@ def plot_chrom(
|
|
|
375
375
|
|
|
376
376
|
layout = layout.cols(1)
|
|
377
377
|
layout = panel.Column(layout)
|
|
378
|
-
|
|
378
|
+
|
|
379
379
|
# Use consistent save/display behavior
|
|
380
380
|
self._handle_sample_plot_output(layout, filename, "panel")
|
|
381
381
|
|
|
@@ -927,7 +927,7 @@ def plot_2d(
|
|
|
927
927
|
layout = panel.Column(overlay)
|
|
928
928
|
|
|
929
929
|
if filename is not None:
|
|
930
|
-
# Use consistent save/display behavior
|
|
930
|
+
# Use consistent save/display behavior
|
|
931
931
|
self._handle_sample_plot_output(layout, filename, "panel")
|
|
932
932
|
return None
|
|
933
933
|
else:
|