PyPI - masster - Versions diffs - 0.4.22__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

masster 0.4.22py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (27) hide show

masster/_version.py +1 -1
masster/data/libs/aa.csv +22 -0
masster/lib/lib.py +6 -0
masster/sample/adducts.py +1 -1
masster/sample/load.py +10 -9
masster/sample/plot.py +1 -1
masster/sample/processing.py +4 -4
masster/sample/sample.py +29 -32
masster/study/analysis.py +1762 -0
masster/study/defaults/fill_def.py +1 -1
masster/study/export.py +5 -3
masster/study/h5.py +3 -0
masster/study/helpers.py +153 -80
masster/study/id.py +545 -4
masster/study/load.py +33 -59
masster/study/merge.py +413 -315
masster/study/parameters.py +3 -3
masster/study/plot.py +398 -43
masster/study/processing.py +6 -14
masster/study/save.py +8 -4
masster/study/study.py +179 -139
masster/study/study5_schema.json +9 -0
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/METADATA +54 -14
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/RECORD +27 -25
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/WHEEL +0 -0
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/entry_points.txt +0 -0
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/licenses/LICENSE +0 -0

masster/study/load.py CHANGED Viewed

@@ -213,18 +213,19 @@ def load(self, filename=None):
             return
     # self.logger.info(f"Loading study from {filename}")
-    self._load_study5(filename)
+    from masster.study.h5 import _load_study5
+    _load_study5(self, filename)
     # After loading the study, check if we have consensus features before loading consensus XML
-    if (self.consensus_df is not None and not self.consensus_df.is_empty()):
-        consensus_xml_path = filename.replace(".study5", ".consensusXML")
-        if os.path.exists(consensus_xml_path):
-            self._load_consensusXML(filename=consensus_xml_path)
+    #if (self.consensus_df is not None and not self.consensus_df.is_empty()):
+    #    consensus_xml_path = filename.replace(".study5", ".consensusXML")
+    #    if os.path.exists(consensus_xml_path):
+    #        self._load_consensusXML(filename=consensus_xml_path)
             # self.logger.info(f"Automatically loaded consensus from {consensus_xml_path}")
-        else:
-            self.logger.warning(f"No consensus XML file found at {consensus_xml_path}")
-    else:
-        self.logger.debug("No consensus features found, skipping consensusXML loading")
+    #    else:
+    #        self.logger.warning(f"No consensus XML file found at {consensus_xml_path}")
+    #else:
+    #    self.logger.debug("No consensus features found, skipping consensusXML loading")
     self.filename = filename
@@ -260,9 +261,14 @@ def _fill_chrom_single_impl(
     min_number_abs = 1
     if isinstance(min_samples_rel, float) and min_samples_rel > 0:
         min_number_rel = int(min_samples_rel * len(self.samples_df))
-    if isinstance(min_samples_abs, int) and min_samples_abs > 0:
-        min_number_abs = int(min_samples_abs)
+    if isinstance(min_samples_abs, int) and min_samples_abs >= 0:
+        min_number_abs = int(min_samples_abs) if min_samples_abs > 0 else 0
     min_number = max(min_number_rel, min_number_abs)
+    # Special case: if min_samples_abs is explicitly 0, allow 0-sample features (like library features)
+    if isinstance(min_samples_abs, int) and min_samples_abs == 0:
+        min_number = 0
     self.logger.debug(f"Threshold for gap filling: number_samples>={min_number}")
     if min_number > 0:
@@ -276,7 +282,7 @@ def _fill_chrom_single_impl(
         )
     self.logger.debug("Identifying missing features...")
     # Instead of building full chromatogram matrix, identify missing consensus/sample combinations directly
-    missing_combinations = self._get_missing_consensus_sample_combinations(uids)
+    missing_combinations = _get_missing_consensus_sample_combinations(self,uids)
     if not missing_combinations:
         self.logger.info("No missing features found to fill.")
         return
@@ -559,7 +565,7 @@ def fill_single(self, **kwargs):
     # end of parameter initialization
     # Store parameters in the Study object
-    self.store_history(["fill_single"], params.to_dict())
+    self.update_history(["fill_single"], params.to_dict())
     self.logger.debug("Parameters stored to fill_single")
     # Call the original fill_chrom_single function with extracted parameters
@@ -753,10 +759,14 @@ def _fill_chrom_impl(
     min_number_abs = 1
     if isinstance(min_samples_rel, float) and min_samples_rel > 0:
         min_number_rel = int(min_samples_rel * len(self.samples_df))
-    if isinstance(min_samples_abs, int) and min_samples_abs > 0:
-        min_number_abs = int(min_samples_abs)
+    if isinstance(min_samples_abs, int) and min_samples_abs >= 0:
+        min_number_abs = int(min_samples_abs) if min_samples_abs > 0 else 0
     min_number = max(min_number_rel, min_number_abs)
+    # Special case: if min_samples_abs is explicitly 0, allow 0-sample features (like library features)
+    if isinstance(min_samples_abs, int) and min_samples_abs == 0:
+        min_number = 0
     self.logger.debug(f"Threshold for gap filling: number_samples>={min_number}")
     if min_number > 0:
@@ -769,7 +779,7 @@ def _fill_chrom_impl(
     # Get missing consensus/sample combinations using the optimized method
     self.logger.debug("Identifying missing features...")
-    missing_combinations = self._get_missing_consensus_sample_combinations(uids)
+    missing_combinations = _get_missing_consensus_sample_combinations(self, uids)
     if not missing_combinations or len(missing_combinations) == 0:
         self.logger.info("No missing features found to fill.")
@@ -845,7 +855,7 @@ def _fill_chrom_impl(
         future_to_sample = {}
         for sample_info in samples_to_process:
             future = executor.submit(
-                self._process_sample_for_parallel_fill,
+                _process_sample_for_parallel_fill, self,
                 sample_info,
                 consensus_info,
                 uids,
@@ -979,7 +989,7 @@ def fill(self, **kwargs):
     # end of parameter initialization
     # Store parameters in the Study object
-    self.store_history(["fill"], params.to_dict())
+    self.update_history(["fill"], params.to_dict())
     self.logger.debug("Parameters stored to fill")
     # Call the original fill_chrom function with extracted parameters
@@ -1115,7 +1125,7 @@ def _get_missing_consensus_sample_combinations(self, uids):
         return missing_combinations
-def sanitize(self):
+def _sanitize(self):
     """
     Sanitize features DataFrame to ensure all complex objects are properly typed.
     Convert serialized objects back to their proper types (Chromatogram, Spectrum).
@@ -1209,7 +1219,7 @@ def sanitize(self):
         self.logger.error(f"Failed to recreate sanitized DataFrame: {e}")
-def load_features(self):
+def _load_features(self):
     """
     Load features by reconstructing FeatureMaps from the processed features_df data.
@@ -1630,7 +1640,7 @@ def _add_sample_optimized(
     # - No _ensure_features_df_schema_order()
     # - No complex column alignment
     # - No type casting loops
-    # - No sample_color_reset()
+    # - No set_samples_color(by=None) call needed
     self.logger.debug(
         f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (optimized)",
@@ -1914,42 +1924,6 @@ def _add_sample_standard(
 def _sample_color_reset_optimized(self):
     """
-    Optimized version of sample_color_reset that caches colormap initialization.
+    Optimized version of sample color reset using set_samples_color.
     """
-    if self.samples_df is None or len(self.samples_df) == 0:
-        self.logger.warning("No samples found in study.")
-        return
-    # Cache the colormap if not already cached
-    if not hasattr(self, "_cached_colormap"):
-        try:
-            from cmap import Colormap
-            self._cached_colormap = Colormap("turbo")
-        except ImportError:
-            self.logger.warning("cmap package not available, using default colors")
-            return
-    cm = self._cached_colormap
-    n_samples = len(self.samples_df)
-    # Pre-allocate colors list for better performance
-    colors = [None] * n_samples
-    # Vectorized color generation
-    for i in range(n_samples):
-        normalized_value = 0.1 + ((i + 0.5) / n_samples) * 0.8
-        color_rgba = cm(normalized_value)
-        if len(color_rgba) >= 3:
-            r, g, b = color_rgba[:3]
-            if max(color_rgba[:3]) <= 1.0:
-                r, g, b = int(r * 255), int(g * 255), int(b * 255)
-            colors[i] = f"#{r:02x}{g:02x}{b:02x}"
-    # Update the sample_color column efficiently
-    self.samples_df = self.samples_df.with_columns(
-        pl.Series("sample_color", colors).alias("sample_color"),
-    )
-    self.logger.debug(f"Reset sample colors (cached) for {n_samples} samples")
+    return self.set_samples_color(by=None)

masster 0.4.22__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

masster 0.4.22py3-none-any.whl → 0.5.1py3-none-any.whl