masster 0.4.22__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/sample/adducts.py +1 -1
- masster/sample/load.py +10 -9
- masster/sample/plot.py +1 -1
- masster/sample/processing.py +4 -4
- masster/sample/sample.py +29 -32
- masster/study/analysis.py +1762 -0
- masster/study/export.py +5 -3
- masster/study/helpers.py +153 -80
- masster/study/id.py +3 -3
- masster/study/load.py +17 -52
- masster/study/merge.py +316 -313
- masster/study/parameters.py +3 -3
- masster/study/plot.py +398 -43
- masster/study/processing.py +4 -4
- masster/study/save.py +8 -4
- masster/study/study.py +97 -139
- {masster-0.4.22.dist-info → masster-0.5.0.dist-info}/METADATA +54 -14
- {masster-0.4.22.dist-info → masster-0.5.0.dist-info}/RECORD +22 -21
- {masster-0.4.22.dist-info → masster-0.5.0.dist-info}/WHEEL +0 -0
- {masster-0.4.22.dist-info → masster-0.5.0.dist-info}/entry_points.txt +0 -0
- {masster-0.4.22.dist-info → masster-0.5.0.dist-info}/licenses/LICENSE +0 -0
masster/study/export.py
CHANGED
|
@@ -60,7 +60,7 @@ def _get_mgf_df(self, **kwargs):
|
|
|
60
60
|
# end of parameter initialization
|
|
61
61
|
|
|
62
62
|
# Store parameters in the Study object
|
|
63
|
-
self.
|
|
63
|
+
self.update_history(["get_mgf"], params.to_dict())
|
|
64
64
|
self.logger.debug("Parameters stored to get_mgf")
|
|
65
65
|
|
|
66
66
|
# Get parameter values for use in the method
|
|
@@ -442,7 +442,8 @@ def export_mgf(self, **kwargs):
|
|
|
442
442
|
None: Writes MGF file to disk.
|
|
443
443
|
"""
|
|
444
444
|
# Get mgf data as DataFrame
|
|
445
|
-
|
|
445
|
+
from masster.study.export import _get_mgf_df
|
|
446
|
+
mgf_data = _get_mgf_df(self, **kwargs)
|
|
446
447
|
|
|
447
448
|
if mgf_data is None or len(mgf_data) == 0:
|
|
448
449
|
self.logger.warning("No MGF data generated.")
|
|
@@ -559,7 +560,8 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
559
560
|
mgf_data = None
|
|
560
561
|
mgf_mapping: dict[str, list[int]] = {}
|
|
561
562
|
if include_mgf:
|
|
562
|
-
|
|
563
|
+
from masster.study.export import _get_mgf_df
|
|
564
|
+
mgf_data = _get_mgf_df(self, **kwargs)
|
|
563
565
|
# Create mapping from feature_uid to MGF indexes
|
|
564
566
|
if mgf_data is not None and len(mgf_data) > 0:
|
|
565
567
|
for row in mgf_data.iter_rows(named=True):
|
masster/study/helpers.py
CHANGED
|
@@ -49,7 +49,7 @@ def get_bpc(owner, sample=None, rt_unit="s", label=None, original=False):
|
|
|
49
49
|
s = owner
|
|
50
50
|
else:
|
|
51
51
|
# owner is expected to be a Study
|
|
52
|
-
s =
|
|
52
|
+
s = get_samples(owner, sample)
|
|
53
53
|
|
|
54
54
|
if s is None:
|
|
55
55
|
raise ValueError("Could not resolve sample for BPC computation")
|
|
@@ -189,7 +189,7 @@ def get_tic(owner, sample=None, label=None):
|
|
|
189
189
|
if hasattr(owner, "ms1_df"):
|
|
190
190
|
s = owner
|
|
191
191
|
else:
|
|
192
|
-
s =
|
|
192
|
+
s = get_samples(owner, sample)
|
|
193
193
|
|
|
194
194
|
if s is None:
|
|
195
195
|
raise ValueError("Could not resolve sample for TIC computation")
|
|
@@ -278,7 +278,7 @@ def get_eic(owner, sample=None, mz=None, mz_tol=None, rt_unit="s", label=None):
|
|
|
278
278
|
s = owner
|
|
279
279
|
else:
|
|
280
280
|
# owner is expected to be a Study
|
|
281
|
-
s =
|
|
281
|
+
s = get_samples(owner, sample)
|
|
282
282
|
|
|
283
283
|
if s is None:
|
|
284
284
|
raise ValueError("Could not resolve sample for EIC computation")
|
|
@@ -360,7 +360,7 @@ def get_chrom(self, uids=None, samples=None):
|
|
|
360
360
|
return None
|
|
361
361
|
|
|
362
362
|
ids = self._get_consensus_uids(uids)
|
|
363
|
-
sample_uids = self.
|
|
363
|
+
sample_uids = self._get_samples_uids(samples)
|
|
364
364
|
|
|
365
365
|
if self.consensus_map is None:
|
|
366
366
|
self.logger.error("No consensus map found.")
|
|
@@ -467,7 +467,7 @@ def get_chrom(self, uids=None, samples=None):
|
|
|
467
467
|
# =====================================================================================
|
|
468
468
|
|
|
469
469
|
|
|
470
|
-
def
|
|
470
|
+
def set_study_folder(self, folder):
|
|
471
471
|
"""
|
|
472
472
|
Set the folder for saving and loading files.
|
|
473
473
|
"""
|
|
@@ -492,7 +492,8 @@ def align_reset(self):
|
|
|
492
492
|
)
|
|
493
493
|
|
|
494
494
|
# Ensure column order is maintained after with_columns operation
|
|
495
|
-
|
|
495
|
+
from masster.study.helpers import _ensure_features_df_schema_order
|
|
496
|
+
_ensure_features_df_schema_order(self)
|
|
496
497
|
|
|
497
498
|
|
|
498
499
|
# =====================================================================================
|
|
@@ -722,7 +723,7 @@ def fill_reset(self):
|
|
|
722
723
|
)
|
|
723
724
|
|
|
724
725
|
|
|
725
|
-
def
|
|
726
|
+
def _get_features_uids(self, uids=None, seed=42):
|
|
726
727
|
"""
|
|
727
728
|
Helper function to get feature_uids from features_df based on input uids.
|
|
728
729
|
If uids is None, returns all feature_uids.
|
|
@@ -806,7 +807,7 @@ def _get_consensus_uids(self, uids=None, seed=42):
|
|
|
806
807
|
return consensus_uids
|
|
807
808
|
|
|
808
809
|
|
|
809
|
-
def
|
|
810
|
+
def _get_samples_uids(self, samples=None, seed=42):
|
|
810
811
|
"""
|
|
811
812
|
Helper function to get sample_uids from samples_df based on input samples.
|
|
812
813
|
If samples is None, returns all sample_uids.
|
|
@@ -847,7 +848,7 @@ def _get_sample_uids(self, samples=None, seed=42):
|
|
|
847
848
|
return sample_uids
|
|
848
849
|
|
|
849
850
|
|
|
850
|
-
def
|
|
851
|
+
def get_samples(self, sample):
|
|
851
852
|
"""
|
|
852
853
|
Return a `Sample` object corresponding to the provided sample identifier.
|
|
853
854
|
|
|
@@ -911,6 +912,138 @@ def get_orphans(self):
|
|
|
911
912
|
return not_in_consensus
|
|
912
913
|
|
|
913
914
|
|
|
915
|
+
def get_sample_stats(self):
|
|
916
|
+
"""
|
|
917
|
+
Get statistics for all samples in the study.
|
|
918
|
+
|
|
919
|
+
Returns:
|
|
920
|
+
pl.DataFrame: DataFrame with the following columns:
|
|
921
|
+
- sample_uid: Sample unique identifier
|
|
922
|
+
- num_features: Total number of features per sample
|
|
923
|
+
- num_ms1: Number of MS1 features per sample
|
|
924
|
+
- num_ms2: Number of MS2 features per sample
|
|
925
|
+
- num_linked_ms1: Number of non-filled features present in consensus_mapping_df
|
|
926
|
+
- num_orphans: Number of non-filled features not present in consensus_mapping_df
|
|
927
|
+
- max_rt_correction: Maximum RT correction applied
|
|
928
|
+
- average_rt_correction: Average RT correction applied
|
|
929
|
+
- num_linked_ms2: Number of linked MS2 spectra from consensus_ms2_df
|
|
930
|
+
"""
|
|
931
|
+
if self.samples_df is None or self.samples_df.is_empty():
|
|
932
|
+
self.logger.warning("No samples found in study.")
|
|
933
|
+
return pl.DataFrame()
|
|
934
|
+
|
|
935
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
936
|
+
self.logger.warning("No features found in study.")
|
|
937
|
+
return pl.DataFrame()
|
|
938
|
+
|
|
939
|
+
# Get base sample information
|
|
940
|
+
sample_uids = self.samples_df["sample_uid"].to_list()
|
|
941
|
+
stats_data = []
|
|
942
|
+
|
|
943
|
+
for sample_uid in sample_uids:
|
|
944
|
+
# Filter features for this sample
|
|
945
|
+
sample_features = self.features_df.filter(pl.col("sample_uid") == sample_uid)
|
|
946
|
+
|
|
947
|
+
if sample_features.is_empty():
|
|
948
|
+
# Sample has no features
|
|
949
|
+
stats_data.append({
|
|
950
|
+
"sample_uid": sample_uid,
|
|
951
|
+
"num_features": 0,
|
|
952
|
+
"num_ms1": 0,
|
|
953
|
+
"num_ms2": 0,
|
|
954
|
+
"num_linked_ms1": 0,
|
|
955
|
+
"num_orphans": 0,
|
|
956
|
+
"max_rt_correction": None,
|
|
957
|
+
"average_rt_correction": None,
|
|
958
|
+
"num_linked_ms2": 0
|
|
959
|
+
})
|
|
960
|
+
continue
|
|
961
|
+
|
|
962
|
+
# Basic feature counts
|
|
963
|
+
num_features = len(sample_features)
|
|
964
|
+
|
|
965
|
+
# Count MS1 and MS2 features
|
|
966
|
+
# Assume features with ms_level=1 or missing ms_level are MS1
|
|
967
|
+
num_ms1 = sample_features.filter(
|
|
968
|
+
pl.col("ms_level").is_null() | (pl.col("ms_level") == 1)
|
|
969
|
+
).height if "ms_level" in sample_features.columns else num_features
|
|
970
|
+
|
|
971
|
+
num_ms2 = sample_features.filter(
|
|
972
|
+
pl.col("ms_level") == 2
|
|
973
|
+
).height if "ms_level" in sample_features.columns else 0
|
|
974
|
+
|
|
975
|
+
# Get non-filled features for this sample
|
|
976
|
+
if "filled" in sample_features.columns:
|
|
977
|
+
non_filled_features = sample_features.filter(~pl.col("filled") | pl.col("filled").is_null())
|
|
978
|
+
else:
|
|
979
|
+
non_filled_features = sample_features
|
|
980
|
+
|
|
981
|
+
# Count linked MS1 features (non-filled and present in consensus_mapping_df)
|
|
982
|
+
num_linked_ms1 = 0
|
|
983
|
+
if not self.consensus_mapping_df.is_empty() and not non_filled_features.is_empty():
|
|
984
|
+
linked_feature_uids = self.consensus_mapping_df.filter(
|
|
985
|
+
pl.col("sample_uid") == sample_uid
|
|
986
|
+
)["feature_uid"].to_list()
|
|
987
|
+
|
|
988
|
+
num_linked_ms1 = non_filled_features.filter(
|
|
989
|
+
pl.col("feature_uid").is_in(linked_feature_uids)
|
|
990
|
+
).height
|
|
991
|
+
|
|
992
|
+
# Count orphan features (non-filled and NOT present in consensus_mapping_df)
|
|
993
|
+
num_orphans = len(non_filled_features) - num_linked_ms1
|
|
994
|
+
|
|
995
|
+
# Calculate RT correction statistics
|
|
996
|
+
max_rt_correction = None
|
|
997
|
+
average_rt_correction = None
|
|
998
|
+
|
|
999
|
+
if "rt" in sample_features.columns and "rt_original" in sample_features.columns:
|
|
1000
|
+
rt_corrections = sample_features.with_columns(
|
|
1001
|
+
(pl.col("rt") - pl.col("rt_original")).alias("rt_correction")
|
|
1002
|
+
).filter(
|
|
1003
|
+
pl.col("rt_correction").is_not_null()
|
|
1004
|
+
)["rt_correction"]
|
|
1005
|
+
|
|
1006
|
+
if not rt_corrections.is_empty():
|
|
1007
|
+
max_rt_correction = rt_corrections.abs().max()
|
|
1008
|
+
average_rt_correction = rt_corrections.abs().mean()
|
|
1009
|
+
|
|
1010
|
+
# Count linked MS2 spectra from consensus_ms2_df
|
|
1011
|
+
num_linked_ms2 = 0
|
|
1012
|
+
if hasattr(self, 'consensus_ms2') and self.consensus_ms2 is not None and not self.consensus_ms2.is_empty():
|
|
1013
|
+
if "sample_uid" in self.consensus_ms2.columns:
|
|
1014
|
+
num_linked_ms2 = self.consensus_ms2.filter(
|
|
1015
|
+
pl.col("sample_uid") == sample_uid
|
|
1016
|
+
).height
|
|
1017
|
+
|
|
1018
|
+
stats_data.append({
|
|
1019
|
+
"sample_uid": sample_uid,
|
|
1020
|
+
"num_features": num_features,
|
|
1021
|
+
"num_ms1": num_ms1,
|
|
1022
|
+
"num_ms2": num_ms2,
|
|
1023
|
+
"num_linked_ms1": num_linked_ms1,
|
|
1024
|
+
"num_orphans": num_orphans,
|
|
1025
|
+
"max_rt_correction": max_rt_correction,
|
|
1026
|
+
"average_rt_correction": average_rt_correction,
|
|
1027
|
+
"num_linked_ms2": num_linked_ms2
|
|
1028
|
+
})
|
|
1029
|
+
|
|
1030
|
+
# Create DataFrame with proper schema
|
|
1031
|
+
return pl.DataFrame(
|
|
1032
|
+
stats_data,
|
|
1033
|
+
schema={
|
|
1034
|
+
"sample_uid": pl.UInt64,
|
|
1035
|
+
"num_features": pl.UInt32,
|
|
1036
|
+
"num_ms1": pl.UInt32,
|
|
1037
|
+
"num_ms2": pl.UInt32,
|
|
1038
|
+
"num_linked_ms1": pl.UInt32,
|
|
1039
|
+
"num_orphans": pl.UInt32,
|
|
1040
|
+
"max_rt_correction": pl.Float64,
|
|
1041
|
+
"average_rt_correction": pl.Float64,
|
|
1042
|
+
"num_linked_ms2": pl.UInt32
|
|
1043
|
+
}
|
|
1044
|
+
)
|
|
1045
|
+
|
|
1046
|
+
|
|
914
1047
|
# =====================================================================================
|
|
915
1048
|
# DATA COMPRESSION AND RESTORATION FUNCTIONS
|
|
916
1049
|
# =====================================================================================
|
|
@@ -995,7 +1128,7 @@ def restore_features(self, samples=None, maps=False):
|
|
|
995
1128
|
return
|
|
996
1129
|
|
|
997
1130
|
# Get sample_uids to process
|
|
998
|
-
sample_uids = self.
|
|
1131
|
+
sample_uids = self._get_samples_uids(samples)
|
|
999
1132
|
|
|
1000
1133
|
if not sample_uids:
|
|
1001
1134
|
self.logger.warning("No valid samples specified.")
|
|
@@ -1154,7 +1287,7 @@ def restore_chrom(self, samples=None, mz_tol=0.010, rt_tol=10.0):
|
|
|
1154
1287
|
return
|
|
1155
1288
|
|
|
1156
1289
|
# Get sample_uids to process
|
|
1157
|
-
sample_uids = self.
|
|
1290
|
+
sample_uids = self._get_samples_uids(samples)
|
|
1158
1291
|
if not sample_uids:
|
|
1159
1292
|
self.logger.warning("No valid samples specified.")
|
|
1160
1293
|
return
|
|
@@ -1610,7 +1743,7 @@ def sample_name_reset(self):
|
|
|
1610
1743
|
)
|
|
1611
1744
|
|
|
1612
1745
|
|
|
1613
|
-
def
|
|
1746
|
+
def set_samples_source(self, filename):
|
|
1614
1747
|
"""
|
|
1615
1748
|
Reassign file_source for all samples in samples_df. If filename contains only a path,
|
|
1616
1749
|
keep the current basename and build an absolute path. Check that the new file exists
|
|
@@ -3301,7 +3434,7 @@ def samples_delete(self, samples):
|
|
|
3301
3434
|
# =====================================================================================
|
|
3302
3435
|
|
|
3303
3436
|
|
|
3304
|
-
def
|
|
3437
|
+
def set_samples_color(self, by=None, palette="Turbo256"):
|
|
3305
3438
|
"""
|
|
3306
3439
|
Set sample colors in the sample_color column of samples_df.
|
|
3307
3440
|
|
|
@@ -3344,13 +3477,13 @@ def sample_color(self, by=None, palette="Turbo256"):
|
|
|
3344
3477
|
|
|
3345
3478
|
Example:
|
|
3346
3479
|
# Set colors based on sample type
|
|
3347
|
-
study.
|
|
3480
|
+
study.set_samples_color(by='sample_type', palette='Set1')
|
|
3348
3481
|
|
|
3349
3482
|
# Set colors using a custom color list
|
|
3350
|
-
study.
|
|
3483
|
+
study.set_samples_color(by=['#FF0000', '#00FF00', '#0000FF'])
|
|
3351
3484
|
|
|
3352
3485
|
# Reset to default Turbo256 sequential colors
|
|
3353
|
-
study.
|
|
3486
|
+
study.set_samples_color()
|
|
3354
3487
|
"""
|
|
3355
3488
|
if self.samples_df is None or len(self.samples_df) == 0:
|
|
3356
3489
|
self.logger.warning("No samples found in study.")
|
|
@@ -3473,67 +3606,7 @@ def sample_color(self, by=None, palette="Turbo256"):
|
|
|
3473
3606
|
self.logger.debug(f"Set sample colors based on {by} using {palette} palette")
|
|
3474
3607
|
|
|
3475
3608
|
|
|
3476
|
-
def sample_color_reset(self):
|
|
3477
|
-
"""
|
|
3478
|
-
Reset sample colors to default coloring using the 'turbo' colormap.
|
|
3479
|
-
|
|
3480
|
-
This function assigns colors by distributing samples evenly across the full
|
|
3481
|
-
turbo colormap range, ensuring maximum color diversity and visual distinction
|
|
3482
|
-
between samples.
|
|
3483
|
-
|
|
3484
|
-
Returns:
|
|
3485
|
-
None (modifies self.samples_df in place)
|
|
3486
|
-
"""
|
|
3487
|
-
if self.samples_df is None or len(self.samples_df) == 0:
|
|
3488
|
-
self.logger.warning("No samples found in study.")
|
|
3489
|
-
return
|
|
3490
|
-
|
|
3491
|
-
try:
|
|
3492
|
-
from cmap import Colormap
|
|
3493
|
-
|
|
3494
|
-
# Use turbo colormap
|
|
3495
|
-
cm = Colormap("turbo")
|
|
3496
|
-
|
|
3497
|
-
# Get sample count and assign colors evenly distributed across colormap
|
|
3498
|
-
n_samples = len(self.samples_df)
|
|
3499
|
-
colors = []
|
|
3500
|
-
|
|
3501
|
-
# Distribute samples evenly across the full colormap range
|
|
3502
|
-
for i in range(n_samples):
|
|
3503
|
-
# Evenly distribute samples across colormap (avoiding endpoints to prevent white/black)
|
|
3504
|
-
normalized_value = (
|
|
3505
|
-
i + 0.5
|
|
3506
|
-
) / n_samples # +0.5 to center samples in their bins
|
|
3507
|
-
# Optionally, map to a subset of colormap to avoid extreme colors
|
|
3508
|
-
# Use 10% to 90% of colormap range for better color diversity
|
|
3509
|
-
normalized_value = 0.1 + (normalized_value * 0.8)
|
|
3510
|
-
|
|
3511
|
-
color_rgba = cm(normalized_value)
|
|
3512
3609
|
|
|
3513
|
-
# Convert RGBA to hex
|
|
3514
|
-
if len(color_rgba) >= 3:
|
|
3515
|
-
r, g, b = color_rgba[:3]
|
|
3516
|
-
# Convert to 0-255 range if needed
|
|
3517
|
-
if max(color_rgba[:3]) <= 1.0:
|
|
3518
|
-
r, g, b = int(r * 255), int(g * 255), int(b * 255)
|
|
3519
|
-
hex_color = f"#{r:02x}{g:02x}{b:02x}"
|
|
3520
|
-
colors.append(hex_color)
|
|
3521
|
-
|
|
3522
|
-
# Update the sample_color column
|
|
3523
|
-
self.samples_df = self.samples_df.with_columns(
|
|
3524
|
-
pl.Series("sample_color", colors).alias("sample_color"),
|
|
3525
|
-
)
|
|
3526
|
-
|
|
3527
|
-
self.logger.debug(
|
|
3528
|
-
f"Reset sample colors using turbo colormap with even distribution ({n_samples} samples)",
|
|
3529
|
-
)
|
|
3530
|
-
|
|
3531
|
-
except ImportError:
|
|
3532
|
-
self.logger.error(
|
|
3533
|
-
"cmap library is required for sample color reset. Install with: uv add cmap",
|
|
3534
|
-
)
|
|
3535
|
-
except Exception as e:
|
|
3536
|
-
self.logger.error(f"Failed to reset sample colors: {e}")
|
|
3537
3610
|
|
|
3538
3611
|
|
|
3539
3612
|
def _get_color_palette(palette_name):
|
|
@@ -3634,7 +3707,7 @@ def _get_color_palette(palette_name):
|
|
|
3634
3707
|
|
|
3635
3708
|
def _sample_colors_from_colormap(palette_name, n_colors):
|
|
3636
3709
|
"""
|
|
3637
|
-
Sample colors evenly from the whole colormap range, similar to
|
|
3710
|
+
Sample colors evenly from the whole colormap range, similar to set_samples_color(by=None).
|
|
3638
3711
|
|
|
3639
3712
|
Parameters:
|
|
3640
3713
|
palette_name (str): Name of the palette/colormap
|
|
@@ -3686,7 +3759,7 @@ def _sample_colors_from_colormap(palette_name, n_colors):
|
|
|
3686
3759
|
|
|
3687
3760
|
colors = []
|
|
3688
3761
|
|
|
3689
|
-
# Distribute samples evenly across the full colormap range (same approach as
|
|
3762
|
+
# Distribute samples evenly across the full colormap range (same approach as set_samples_color(by=None))
|
|
3690
3763
|
for i in range(n_colors):
|
|
3691
3764
|
# Evenly distribute samples across colormap (avoiding endpoints to prevent white/black)
|
|
3692
3765
|
normalized_value = (
|
|
@@ -3818,7 +3891,7 @@ def restore_ms2(self, samples=None, **kwargs):
|
|
|
3818
3891
|
return
|
|
3819
3892
|
|
|
3820
3893
|
# Get sample_uids to process
|
|
3821
|
-
sample_uids = self.
|
|
3894
|
+
sample_uids = self._get_samples_uids(samples)
|
|
3822
3895
|
if not sample_uids:
|
|
3823
3896
|
self.logger.warning("No valid samples specified.")
|
|
3824
3897
|
return
|
|
@@ -3888,7 +3961,7 @@ def decompress(self, features=True, ms2=True, chrom=True, samples=None, **kwargs
|
|
|
3888
3961
|
return
|
|
3889
3962
|
|
|
3890
3963
|
# Get sample_uids to process
|
|
3891
|
-
sample_uids = self.
|
|
3964
|
+
sample_uids = self._get_samples_uids(samples)
|
|
3892
3965
|
if not sample_uids:
|
|
3893
3966
|
self.logger.warning("No valid samples specified.")
|
|
3894
3967
|
return
|
masster/study/id.py
CHANGED
|
@@ -124,8 +124,8 @@ def lib_load(
|
|
|
124
124
|
study.lib_df = pl.DataFrame()
|
|
125
125
|
|
|
126
126
|
# Store this operation in history
|
|
127
|
-
if hasattr(study, "
|
|
128
|
-
study.
|
|
127
|
+
if hasattr(study, "update_history"):
|
|
128
|
+
study.update_history(
|
|
129
129
|
["lib_load"],
|
|
130
130
|
{"lib_source": str(lib_source), "polarity": polarity, "adducts": adducts},
|
|
131
131
|
)
|
|
@@ -385,7 +385,7 @@ def _store_identification_history(study, effective_mz_tol, effective_rt_tol, tar
|
|
|
385
385
|
history_params["params"] = params.to_dict()
|
|
386
386
|
if kwargs:
|
|
387
387
|
history_params["kwargs"] = kwargs
|
|
388
|
-
study.
|
|
388
|
+
study.update_history(["identify"], history_params)
|
|
389
389
|
|
|
390
390
|
|
|
391
391
|
def _validate_identify_inputs(study, logger=None):
|
masster/study/load.py
CHANGED
|
@@ -213,18 +213,19 @@ def load(self, filename=None):
|
|
|
213
213
|
return
|
|
214
214
|
|
|
215
215
|
# self.logger.info(f"Loading study from {filename}")
|
|
216
|
-
|
|
216
|
+
from masster.study.h5 import _load_study5
|
|
217
|
+
_load_study5(self, filename)
|
|
217
218
|
|
|
218
219
|
# After loading the study, check if we have consensus features before loading consensus XML
|
|
219
|
-
if (self.consensus_df is not None and not self.consensus_df.is_empty()):
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
220
|
+
#if (self.consensus_df is not None and not self.consensus_df.is_empty()):
|
|
221
|
+
# consensus_xml_path = filename.replace(".study5", ".consensusXML")
|
|
222
|
+
# if os.path.exists(consensus_xml_path):
|
|
223
|
+
# self._load_consensusXML(filename=consensus_xml_path)
|
|
223
224
|
# self.logger.info(f"Automatically loaded consensus from {consensus_xml_path}")
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
else:
|
|
227
|
-
|
|
225
|
+
# else:
|
|
226
|
+
# self.logger.warning(f"No consensus XML file found at {consensus_xml_path}")
|
|
227
|
+
#else:
|
|
228
|
+
# self.logger.debug("No consensus features found, skipping consensusXML loading")
|
|
228
229
|
|
|
229
230
|
self.filename = filename
|
|
230
231
|
|
|
@@ -559,7 +560,7 @@ def fill_single(self, **kwargs):
|
|
|
559
560
|
# end of parameter initialization
|
|
560
561
|
|
|
561
562
|
# Store parameters in the Study object
|
|
562
|
-
self.
|
|
563
|
+
self.update_history(["fill_single"], params.to_dict())
|
|
563
564
|
self.logger.debug("Parameters stored to fill_single")
|
|
564
565
|
|
|
565
566
|
# Call the original fill_chrom_single function with extracted parameters
|
|
@@ -979,7 +980,7 @@ def fill(self, **kwargs):
|
|
|
979
980
|
# end of parameter initialization
|
|
980
981
|
|
|
981
982
|
# Store parameters in the Study object
|
|
982
|
-
self.
|
|
983
|
+
self.update_history(["fill"], params.to_dict())
|
|
983
984
|
self.logger.debug("Parameters stored to fill")
|
|
984
985
|
|
|
985
986
|
# Call the original fill_chrom function with extracted parameters
|
|
@@ -1115,7 +1116,7 @@ def _get_missing_consensus_sample_combinations(self, uids):
|
|
|
1115
1116
|
return missing_combinations
|
|
1116
1117
|
|
|
1117
1118
|
|
|
1118
|
-
def
|
|
1119
|
+
def _sanitize(self):
|
|
1119
1120
|
"""
|
|
1120
1121
|
Sanitize features DataFrame to ensure all complex objects are properly typed.
|
|
1121
1122
|
Convert serialized objects back to their proper types (Chromatogram, Spectrum).
|
|
@@ -1209,7 +1210,7 @@ def sanitize(self):
|
|
|
1209
1210
|
self.logger.error(f"Failed to recreate sanitized DataFrame: {e}")
|
|
1210
1211
|
|
|
1211
1212
|
|
|
1212
|
-
def
|
|
1213
|
+
def _load_features(self):
|
|
1213
1214
|
"""
|
|
1214
1215
|
Load features by reconstructing FeatureMaps from the processed features_df data.
|
|
1215
1216
|
|
|
@@ -1630,7 +1631,7 @@ def _add_sample_optimized(
|
|
|
1630
1631
|
# - No _ensure_features_df_schema_order()
|
|
1631
1632
|
# - No complex column alignment
|
|
1632
1633
|
# - No type casting loops
|
|
1633
|
-
# - No
|
|
1634
|
+
# - No set_samples_color(by=None) call needed
|
|
1634
1635
|
|
|
1635
1636
|
self.logger.debug(
|
|
1636
1637
|
f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (optimized)",
|
|
@@ -1914,42 +1915,6 @@ def _add_sample_standard(
|
|
|
1914
1915
|
|
|
1915
1916
|
def _sample_color_reset_optimized(self):
|
|
1916
1917
|
"""
|
|
1917
|
-
Optimized version of
|
|
1918
|
+
Optimized version of sample color reset using set_samples_color.
|
|
1918
1919
|
"""
|
|
1919
|
-
|
|
1920
|
-
self.logger.warning("No samples found in study.")
|
|
1921
|
-
return
|
|
1922
|
-
|
|
1923
|
-
# Cache the colormap if not already cached
|
|
1924
|
-
if not hasattr(self, "_cached_colormap"):
|
|
1925
|
-
try:
|
|
1926
|
-
from cmap import Colormap
|
|
1927
|
-
|
|
1928
|
-
self._cached_colormap = Colormap("turbo")
|
|
1929
|
-
except ImportError:
|
|
1930
|
-
self.logger.warning("cmap package not available, using default colors")
|
|
1931
|
-
return
|
|
1932
|
-
|
|
1933
|
-
cm = self._cached_colormap
|
|
1934
|
-
n_samples = len(self.samples_df)
|
|
1935
|
-
|
|
1936
|
-
# Pre-allocate colors list for better performance
|
|
1937
|
-
colors = [None] * n_samples
|
|
1938
|
-
|
|
1939
|
-
# Vectorized color generation
|
|
1940
|
-
for i in range(n_samples):
|
|
1941
|
-
normalized_value = 0.1 + ((i + 0.5) / n_samples) * 0.8
|
|
1942
|
-
color_rgba = cm(normalized_value)
|
|
1943
|
-
|
|
1944
|
-
if len(color_rgba) >= 3:
|
|
1945
|
-
r, g, b = color_rgba[:3]
|
|
1946
|
-
if max(color_rgba[:3]) <= 1.0:
|
|
1947
|
-
r, g, b = int(r * 255), int(g * 255), int(b * 255)
|
|
1948
|
-
colors[i] = f"#{r:02x}{g:02x}{b:02x}"
|
|
1949
|
-
|
|
1950
|
-
# Update the sample_color column efficiently
|
|
1951
|
-
self.samples_df = self.samples_df.with_columns(
|
|
1952
|
-
pl.Series("sample_color", colors).alias("sample_color"),
|
|
1953
|
-
)
|
|
1954
|
-
|
|
1955
|
-
self.logger.debug(f"Reset sample colors (cached) for {n_samples} samples")
|
|
1920
|
+
return self.set_samples_color(by=None)
|