masster 0.4.22__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/data/libs/aa.csv +22 -0
- masster/lib/lib.py +6 -0
- masster/sample/adducts.py +1 -1
- masster/sample/load.py +10 -9
- masster/sample/plot.py +1 -1
- masster/sample/processing.py +4 -4
- masster/sample/sample.py +29 -32
- masster/study/analysis.py +1762 -0
- masster/study/defaults/fill_def.py +1 -1
- masster/study/export.py +5 -3
- masster/study/h5.py +3 -0
- masster/study/helpers.py +153 -80
- masster/study/id.py +545 -4
- masster/study/load.py +33 -59
- masster/study/merge.py +413 -315
- masster/study/parameters.py +3 -3
- masster/study/plot.py +398 -43
- masster/study/processing.py +6 -14
- masster/study/save.py +8 -4
- masster/study/study.py +179 -139
- masster/study/study5_schema.json +9 -0
- {masster-0.4.22.dist-info → masster-0.5.1.dist-info}/METADATA +54 -14
- {masster-0.4.22.dist-info → masster-0.5.1.dist-info}/RECORD +27 -25
- {masster-0.4.22.dist-info → masster-0.5.1.dist-info}/WHEEL +0 -0
- {masster-0.4.22.dist-info → masster-0.5.1.dist-info}/entry_points.txt +0 -0
- {masster-0.4.22.dist-info → masster-0.5.1.dist-info}/licenses/LICENSE +0 -0
masster/study/load.py
CHANGED
|
@@ -213,18 +213,19 @@ def load(self, filename=None):
|
|
|
213
213
|
return
|
|
214
214
|
|
|
215
215
|
# self.logger.info(f"Loading study from {filename}")
|
|
216
|
-
|
|
216
|
+
from masster.study.h5 import _load_study5
|
|
217
|
+
_load_study5(self, filename)
|
|
217
218
|
|
|
218
219
|
# After loading the study, check if we have consensus features before loading consensus XML
|
|
219
|
-
if (self.consensus_df is not None and not self.consensus_df.is_empty()):
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
220
|
+
#if (self.consensus_df is not None and not self.consensus_df.is_empty()):
|
|
221
|
+
# consensus_xml_path = filename.replace(".study5", ".consensusXML")
|
|
222
|
+
# if os.path.exists(consensus_xml_path):
|
|
223
|
+
# self._load_consensusXML(filename=consensus_xml_path)
|
|
223
224
|
# self.logger.info(f"Automatically loaded consensus from {consensus_xml_path}")
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
else:
|
|
227
|
-
|
|
225
|
+
# else:
|
|
226
|
+
# self.logger.warning(f"No consensus XML file found at {consensus_xml_path}")
|
|
227
|
+
#else:
|
|
228
|
+
# self.logger.debug("No consensus features found, skipping consensusXML loading")
|
|
228
229
|
|
|
229
230
|
self.filename = filename
|
|
230
231
|
|
|
@@ -260,9 +261,14 @@ def _fill_chrom_single_impl(
|
|
|
260
261
|
min_number_abs = 1
|
|
261
262
|
if isinstance(min_samples_rel, float) and min_samples_rel > 0:
|
|
262
263
|
min_number_rel = int(min_samples_rel * len(self.samples_df))
|
|
263
|
-
if isinstance(min_samples_abs, int) and min_samples_abs
|
|
264
|
-
min_number_abs = int(min_samples_abs)
|
|
264
|
+
if isinstance(min_samples_abs, int) and min_samples_abs >= 0:
|
|
265
|
+
min_number_abs = int(min_samples_abs) if min_samples_abs > 0 else 0
|
|
265
266
|
min_number = max(min_number_rel, min_number_abs)
|
|
267
|
+
|
|
268
|
+
# Special case: if min_samples_abs is explicitly 0, allow 0-sample features (like library features)
|
|
269
|
+
if isinstance(min_samples_abs, int) and min_samples_abs == 0:
|
|
270
|
+
min_number = 0
|
|
271
|
+
|
|
266
272
|
self.logger.debug(f"Threshold for gap filling: number_samples>={min_number}")
|
|
267
273
|
|
|
268
274
|
if min_number > 0:
|
|
@@ -276,7 +282,7 @@ def _fill_chrom_single_impl(
|
|
|
276
282
|
)
|
|
277
283
|
self.logger.debug("Identifying missing features...")
|
|
278
284
|
# Instead of building full chromatogram matrix, identify missing consensus/sample combinations directly
|
|
279
|
-
missing_combinations =
|
|
285
|
+
missing_combinations = _get_missing_consensus_sample_combinations(self,uids)
|
|
280
286
|
if not missing_combinations:
|
|
281
287
|
self.logger.info("No missing features found to fill.")
|
|
282
288
|
return
|
|
@@ -559,7 +565,7 @@ def fill_single(self, **kwargs):
|
|
|
559
565
|
# end of parameter initialization
|
|
560
566
|
|
|
561
567
|
# Store parameters in the Study object
|
|
562
|
-
self.
|
|
568
|
+
self.update_history(["fill_single"], params.to_dict())
|
|
563
569
|
self.logger.debug("Parameters stored to fill_single")
|
|
564
570
|
|
|
565
571
|
# Call the original fill_chrom_single function with extracted parameters
|
|
@@ -753,10 +759,14 @@ def _fill_chrom_impl(
|
|
|
753
759
|
min_number_abs = 1
|
|
754
760
|
if isinstance(min_samples_rel, float) and min_samples_rel > 0:
|
|
755
761
|
min_number_rel = int(min_samples_rel * len(self.samples_df))
|
|
756
|
-
if isinstance(min_samples_abs, int) and min_samples_abs
|
|
757
|
-
min_number_abs = int(min_samples_abs)
|
|
762
|
+
if isinstance(min_samples_abs, int) and min_samples_abs >= 0:
|
|
763
|
+
min_number_abs = int(min_samples_abs) if min_samples_abs > 0 else 0
|
|
758
764
|
min_number = max(min_number_rel, min_number_abs)
|
|
759
765
|
|
|
766
|
+
# Special case: if min_samples_abs is explicitly 0, allow 0-sample features (like library features)
|
|
767
|
+
if isinstance(min_samples_abs, int) and min_samples_abs == 0:
|
|
768
|
+
min_number = 0
|
|
769
|
+
|
|
760
770
|
self.logger.debug(f"Threshold for gap filling: number_samples>={min_number}")
|
|
761
771
|
|
|
762
772
|
if min_number > 0:
|
|
@@ -769,7 +779,7 @@ def _fill_chrom_impl(
|
|
|
769
779
|
|
|
770
780
|
# Get missing consensus/sample combinations using the optimized method
|
|
771
781
|
self.logger.debug("Identifying missing features...")
|
|
772
|
-
missing_combinations =
|
|
782
|
+
missing_combinations = _get_missing_consensus_sample_combinations(self, uids)
|
|
773
783
|
|
|
774
784
|
if not missing_combinations or len(missing_combinations) == 0:
|
|
775
785
|
self.logger.info("No missing features found to fill.")
|
|
@@ -845,7 +855,7 @@ def _fill_chrom_impl(
|
|
|
845
855
|
future_to_sample = {}
|
|
846
856
|
for sample_info in samples_to_process:
|
|
847
857
|
future = executor.submit(
|
|
848
|
-
|
|
858
|
+
_process_sample_for_parallel_fill, self,
|
|
849
859
|
sample_info,
|
|
850
860
|
consensus_info,
|
|
851
861
|
uids,
|
|
@@ -979,7 +989,7 @@ def fill(self, **kwargs):
|
|
|
979
989
|
# end of parameter initialization
|
|
980
990
|
|
|
981
991
|
# Store parameters in the Study object
|
|
982
|
-
self.
|
|
992
|
+
self.update_history(["fill"], params.to_dict())
|
|
983
993
|
self.logger.debug("Parameters stored to fill")
|
|
984
994
|
|
|
985
995
|
# Call the original fill_chrom function with extracted parameters
|
|
@@ -1115,7 +1125,7 @@ def _get_missing_consensus_sample_combinations(self, uids):
|
|
|
1115
1125
|
return missing_combinations
|
|
1116
1126
|
|
|
1117
1127
|
|
|
1118
|
-
def
|
|
1128
|
+
def _sanitize(self):
|
|
1119
1129
|
"""
|
|
1120
1130
|
Sanitize features DataFrame to ensure all complex objects are properly typed.
|
|
1121
1131
|
Convert serialized objects back to their proper types (Chromatogram, Spectrum).
|
|
@@ -1209,7 +1219,7 @@ def sanitize(self):
|
|
|
1209
1219
|
self.logger.error(f"Failed to recreate sanitized DataFrame: {e}")
|
|
1210
1220
|
|
|
1211
1221
|
|
|
1212
|
-
def
|
|
1222
|
+
def _load_features(self):
|
|
1213
1223
|
"""
|
|
1214
1224
|
Load features by reconstructing FeatureMaps from the processed features_df data.
|
|
1215
1225
|
|
|
@@ -1630,7 +1640,7 @@ def _add_sample_optimized(
|
|
|
1630
1640
|
# - No _ensure_features_df_schema_order()
|
|
1631
1641
|
# - No complex column alignment
|
|
1632
1642
|
# - No type casting loops
|
|
1633
|
-
# - No
|
|
1643
|
+
# - No set_samples_color(by=None) call needed
|
|
1634
1644
|
|
|
1635
1645
|
self.logger.debug(
|
|
1636
1646
|
f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (optimized)",
|
|
@@ -1914,42 +1924,6 @@ def _add_sample_standard(
|
|
|
1914
1924
|
|
|
1915
1925
|
def _sample_color_reset_optimized(self):
|
|
1916
1926
|
"""
|
|
1917
|
-
Optimized version of
|
|
1927
|
+
Optimized version of sample color reset using set_samples_color.
|
|
1918
1928
|
"""
|
|
1919
|
-
|
|
1920
|
-
self.logger.warning("No samples found in study.")
|
|
1921
|
-
return
|
|
1922
|
-
|
|
1923
|
-
# Cache the colormap if not already cached
|
|
1924
|
-
if not hasattr(self, "_cached_colormap"):
|
|
1925
|
-
try:
|
|
1926
|
-
from cmap import Colormap
|
|
1927
|
-
|
|
1928
|
-
self._cached_colormap = Colormap("turbo")
|
|
1929
|
-
except ImportError:
|
|
1930
|
-
self.logger.warning("cmap package not available, using default colors")
|
|
1931
|
-
return
|
|
1932
|
-
|
|
1933
|
-
cm = self._cached_colormap
|
|
1934
|
-
n_samples = len(self.samples_df)
|
|
1935
|
-
|
|
1936
|
-
# Pre-allocate colors list for better performance
|
|
1937
|
-
colors = [None] * n_samples
|
|
1938
|
-
|
|
1939
|
-
# Vectorized color generation
|
|
1940
|
-
for i in range(n_samples):
|
|
1941
|
-
normalized_value = 0.1 + ((i + 0.5) / n_samples) * 0.8
|
|
1942
|
-
color_rgba = cm(normalized_value)
|
|
1943
|
-
|
|
1944
|
-
if len(color_rgba) >= 3:
|
|
1945
|
-
r, g, b = color_rgba[:3]
|
|
1946
|
-
if max(color_rgba[:3]) <= 1.0:
|
|
1947
|
-
r, g, b = int(r * 255), int(g * 255), int(b * 255)
|
|
1948
|
-
colors[i] = f"#{r:02x}{g:02x}{b:02x}"
|
|
1949
|
-
|
|
1950
|
-
# Update the sample_color column efficiently
|
|
1951
|
-
self.samples_df = self.samples_df.with_columns(
|
|
1952
|
-
pl.Series("sample_color", colors).alias("sample_color"),
|
|
1953
|
-
)
|
|
1954
|
-
|
|
1955
|
-
self.logger.debug(f"Reset sample colors (cached) for {n_samples} samples")
|
|
1929
|
+
return self.set_samples_color(by=None)
|