PyPI - masster - Versions diffs - 0.3.19__py3-none-any.whl → 0.3.20__py3-none-any.whl - Mend - Supply Chain Defender

masster 0.3.19py3-none-any.whl → 0.3.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (24) hide show

masster/sample/plot.py CHANGED Viewed

@@ -141,6 +141,110 @@ def _display_plot(plot_object, layout=None):
         return None
+def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh"):
+    """
+    Helper function to handle consistent save/display behavior for sample plots.
+    Parameters:
+        plot_obj: The plot object (bokeh figure, holoviews layout, or panel object)
+        filename: Optional filename to save the plot
+        plot_type: Type of plot object ("bokeh", "panel", "holoviews")
+    """
+    if filename is not None:
+        # Convert relative paths to absolute paths using sample folder as base
+        import os
+        if hasattr(self, 'folder') and self.folder and not os.path.isabs(filename):
+            filename = os.path.join(self.folder, filename)
+        # Convert to absolute path for logging
+        abs_filename = os.path.abspath(filename)
+        if filename.endswith(".html"):
+            if plot_type == "panel":
+                plot_obj.save(filename, embed=True)  # type: ignore[attr-defined]
+            elif plot_type == "holoviews":
+                import panel
+                panel.panel(plot_obj).save(filename, embed=True)  # type: ignore[attr-defined]
+            elif plot_type == "bokeh":
+                from bokeh.plotting import output_file
+                from bokeh.io import save
+                output_file(filename)
+                save(plot_obj)
+            self.logger.info(f"Plot saved to: {abs_filename}")
+        elif filename.endswith(".png"):
+            try:
+                if plot_type == "bokeh":
+                    from bokeh.io.export import export_png
+                    export_png(plot_obj, filename=filename)
+                elif plot_type in ["panel", "holoviews"]:
+                    import holoviews as hv
+                    hv.save(plot_obj, filename, fmt="png")
+                self.logger.info(f"Plot saved to: {abs_filename}")
+            except Exception:
+                # Fall back to HTML if PNG export not available
+                html_filename = filename.replace('.png', '.html')
+                abs_html_filename = os.path.abspath(html_filename)
+                if plot_type == "panel":
+                    plot_obj.save(html_filename, embed=True)  # type: ignore[attr-defined]
+                elif plot_type == "holoviews":
+                    import panel
+                    panel.panel(plot_obj).save(html_filename, embed=True)  # type: ignore[attr-defined]
+                elif plot_type == "bokeh":
+                    from bokeh.plotting import output_file
+                    from bokeh.io import save
+                    output_file(html_filename)
+                    save(plot_obj)
+                self.logger.warning(f"PNG export not available, saved as HTML instead: {abs_html_filename}")
+        elif filename.endswith(".pdf"):
+            # Try to save as PDF, fall back to HTML if not available
+            try:
+                if plot_type == "bokeh":
+                    from bokeh.io.export import export_pdf
+                    export_pdf(plot_obj, filename=filename)
+                elif plot_type in ["panel", "holoviews"]:
+                    import holoviews as hv
+                    hv.save(plot_obj, filename, fmt="pdf")
+                self.logger.info(f"Plot saved to: {abs_filename}")
+            except ImportError:
+                # Fall back to HTML if PDF export not available
+                html_filename = filename.replace('.pdf', '.html')
+                abs_html_filename = os.path.abspath(html_filename)
+                if plot_type == "panel":
+                    plot_obj.save(html_filename, embed=True)  # type: ignore[attr-defined]
+                elif plot_type == "holoviews":
+                    import panel
+                    panel.panel(plot_obj).save(html_filename, embed=True)  # type: ignore[attr-defined]
+                elif plot_type == "bokeh":
+                    from bokeh.plotting import output_file
+                    from bokeh.io import save
+                    output_file(html_filename)
+                    save(plot_obj)
+                self.logger.warning(f"PDF export not available, saved as HTML instead: {abs_html_filename}")
+        else:
+            # Default to HTML for unknown extensions
+            if plot_type == "panel":
+                plot_obj.save(filename, embed=True)  # type: ignore[attr-defined]
+            elif plot_type == "holoviews":
+                import panel
+                panel.panel(plot_obj).save(filename, embed=True)  # type: ignore[attr-defined]
+            elif plot_type == "bokeh":
+                from bokeh.plotting import output_file
+                from bokeh.io import save
+                output_file(filename)
+                save(plot_obj)
+            self.logger.info(f"Plot saved to: {abs_filename}")
+    else:
+        # Show in notebook when no filename provided
+        if plot_type == "panel":
+            plot_obj.show()  # type: ignore[attr-defined]
+        elif plot_type == "holoviews":
+            import panel
+            return panel.panel(plot_obj)
+        elif plot_type == "bokeh":
+            from bokeh.plotting import show
+            show(plot_obj)
 def plot_chrom(
     self,
     feature_uid=None,
@@ -271,16 +375,9 @@ def plot_chrom(
     layout = layout.cols(1)
     layout = panel.Column(layout)
-    if filename is not None:
-        # if filename includes .html, save the panel layout to an HTML file
-        if filename.endswith(".html"):
-            layout.save(filename, embed=True)
-        else:
-            # save the panel layout as a png
-            hv.save(layout, filename, fmt="png")
-    else:
-        # Check if we're in a notebook environment and display appropriately
-        return _display_plot(layout.object, layout)
+    # Use consistent save/display behavior
+    self._handle_sample_plot_output(layout, filename, "panel")
 def plot_2d(
@@ -830,12 +927,8 @@ def plot_2d(
         layout = panel.Column(overlay)
     if filename is not None:
-        # if filename includes .html, save the panel layout to an HTML file
-        if filename.endswith(".html"):
-            layout.save(filename, embed=True)
-        else:
-            # save the panel layout as a png
-            hv.save(overlay, filename, fmt="png")
+        # Use consistent save/display behavior
+        self._handle_sample_plot_output(layout, filename, "panel")
         return None
     else:
         # Check if we're in a notebook environment and display appropriately
@@ -1960,16 +2053,8 @@ def plot_feature_stats(
     # Arrange the plots in a layout with three columns
     layout = hv.Layout(density_plots).cols(3).opts(shared_axes=False)
-    # Save or display the layout based on the filename parameter
-    if filename is not None:
-        if filename.endswith(".html"):
-            panel.panel(layout).save(filename, embed=True)  # type: ignore[attr-defined]
-        else:
-            hv.save(layout, filename, fmt="png")
-    else:
-        # Check if we're in a notebook environment and display appropriately
-        layout_obj = panel.panel(layout)
-        return _display_plot(layout, layout_obj)
+    # Use consistent save/display behavior
+    self._handle_sample_plot_output(layout, filename, "holoviews")
 def plot_tic(

masster/study/helpers.py CHANGED Viewed

@@ -969,13 +969,20 @@ def restore_features(self, samples=None, maps=False):
             # Load sample to get its features_df
             # Use a direct load call with map=False to prevent feature synchronization
             # which would remove filled features that don't exist in the original FeatureMap
-            sample = Sample(log_level="DEBUG")
+            # Use ERROR log level to suppress info messages
+            sample = Sample(log_level="ERROR")
             sample._load_sample5(sample_path, map=False)
             if sample.features_df is None or sample.features_df.is_empty():
                 self.logger.warning(f"No features found in sample {sample_name}")
                 continue
+            # Check which columns are actually available in the sample
+            available_columns = [col for col in columns_to_update if col in sample.features_df.columns]
+            if not available_columns:
+                self.logger.debug(f"No target columns found in sample {sample_name}")
+                continue
             # Create update data for this sample
             updates_made = 0
             for row in sample.features_df.iter_rows(named=True):
@@ -987,8 +994,8 @@ def restore_features(self, samples=None, maps=False):
                 if key in study_feature_mapping:
                     feature_uid = study_feature_mapping[key]
-                    # Update the specific columns in study.features_df
-                    for col in columns_to_update:
+                    # Update only the available columns in study.features_df
+                    for col in available_columns:
                         if col in row and col in self.features_df.columns:
                             # Get the original column dtype to preserve it
                             original_dtype = self.features_df[col].dtype
@@ -1013,7 +1020,8 @@ def restore_features(self, samples=None, maps=False):
                                 )
                     updates_made += 1
-            self.logger.debug(f"Updated {updates_made} features from sample {sample_name}")
+            if updates_made > 0:
+                self.logger.debug(f"Updated {updates_made} features from sample {sample_name}")
             # If maps is True, load featureXML data
             if maps:
@@ -1096,13 +1104,18 @@ def restore_chrom(self, samples=None, mz_tol=0.010, rt_tol=10.0):
         try:
             # Load sample (with map=False to prevent feature synchronization)
-            sample = Sample(log_level="WARNING")
+            # Use ERROR log level to suppress info messages
+            sample = Sample(log_level="ERROR")
             sample._load_sample5(sample_path, map=False)
             if sample.features_df is None or sample.features_df.is_empty():
                 self.logger.warning(f"No features found in sample {sample_name}")
                 continue
+            # Check if chrom column exists in sample
+            if "chrom" not in sample.features_df.columns:
+                continue
             # Update chromatograms from this sample
             for row in sample.features_df.iter_rows(named=True):
                 feature_id = row.get("feature_id")
@@ -1183,7 +1196,8 @@ def restore_chrom(self, samples=None, mz_tol=0.010, rt_tol=10.0):
         try:
             # Load sample for MS1 data extraction
-            sample = Sample(log_level="WARNING")
+            # Use ERROR log level to suppress info messages
+            sample = Sample(log_level="ERROR")
             sample._load_sample5(sample_path, map=False)
             if not hasattr(sample, "ms1_df") or sample.ms1_df is None or sample.ms1_df.is_empty():
@@ -3233,3 +3247,213 @@ def migrate_map_id_to_index(self):
     self.logger.info(f"Successfully migrated {sample_count} samples to indexed map_id format")
     self.logger.info(f"map_id now ranges from 0 to {sample_count - 1}")
+def restore_ms2(self, samples=None, **kwargs):
+    """
+    Restore MS2 data by re-running find_ms2 on specified samples.
+    This function rebuilds the consensus_ms2 DataFrame by re-extracting MS2 spectra
+    from the original sample files. Use this to reverse the effects of compress_ms2().
+    Parameters:
+        samples (list, optional): List of sample_uids or sample_names to process.
+                                 If None, processes all samples.
+        **kwargs: Additional keyword arguments passed to find_ms2()
+                 (e.g., mz_tol, centroid, deisotope, etc.)
+    """
+    if self.features_df is None or self.features_df.is_empty():
+        self.logger.error("No features_df found in study.")
+        return
+    if self.samples_df is None or self.samples_df.is_empty():
+        self.logger.error("No samples_df found in study.")
+        return
+    # Get sample_uids to process
+    sample_uids = self._get_sample_uids(samples)
+    if not sample_uids:
+        self.logger.warning("No valid samples specified.")
+        return
+    self.logger.info(f"Restoring MS2 data from {len(sample_uids)} samples...")
+    # Clear existing consensus_ms2 to rebuild from scratch
+    initial_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
+    self.consensus_ms2 = pl.DataFrame()
+    # Re-run find_ms2 which will rebuild consensus_ms2
+    try:
+        self.find_ms2(**kwargs)
+        final_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
+        self.logger.info(f"MS2 restoration completed: {initial_ms2_count} -> {final_ms2_count} MS2 spectra")
+    except Exception as e:
+        self.logger.error(f"Failed to restore MS2 data: {e}")
+        raise
+def decompress(self, features=True, ms2=True, chrom=True, samples=None, **kwargs):
+    """
+    Reverse any compression effects by restoring compressed data adaptively.
+    This function restores data that was compressed using compress(), compress_features(),
+    compress_ms2(), compress_chrom(), or study.save(compress=True). It optimizes the
+    decompression process for speed by only processing what actually needs restoration.
+    Parameters:
+        features (bool): Restore features data (ms2_specs, ms2_scans, chrom_area)
+        ms2 (bool): Restore MS2 spectra by re-running find_ms2()
+        chrom (bool): Restore chromatogram objects
+        samples (list, optional): List of sample_uids or sample_names to process.
+                                 If None, processes all samples.
+        **kwargs: Additional keyword arguments for restoration functions:
+                 - For restore_chrom: mz_tol (default: 0.010), rt_tol (default: 10.0)
+                 - For restore_ms2/find_ms2: mz_tol, centroid, deisotope, etc.
+    Performance Optimizations:
+        - Adaptive processing: Only restores what actually needs restoration
+        - Processes features and chromatograms together when possible (shared file I/O)
+        - Uses cached sample instances to avoid repeated file loading
+        - Processes MS2 restoration last as it's the most computationally expensive
+        - Provides detailed progress information for long-running operations
+    Example:
+        # Restore everything (but only what needs restoration)
+        study.decompress()
+        # Restore only chromatograms with custom tolerances
+        study.decompress(features=False, ms2=False, chrom=True, mz_tol=0.005, rt_tol=5.0)
+        # Restore specific samples only
+        study.decompress(samples=["sample1", "sample2"])
+    """
+    if not any([features, ms2, chrom]):
+        self.logger.warning("No decompression operations specified.")
+        return
+    # Get sample_uids to process
+    sample_uids = self._get_sample_uids(samples)
+    if not sample_uids:
+        self.logger.warning("No valid samples specified.")
+        return
+    # Adaptively check what actually needs to be done
+    import polars as pl
+    # Check if features need restoration (more sophisticated logic)
+    features_need_restoration = False
+    if features and not self.features_df.is_empty():
+        # Check for completely missing columns that should exist after feature processing
+        missing_cols = []
+        for col in ["ms2_scans", "ms2_specs"]:
+            if col not in self.features_df.columns:
+                missing_cols.append(col)
+        # If columns are missing entirely, we likely need restoration
+        if missing_cols:
+            features_need_restoration = True
+        else:
+            # If columns exist, check if they're mostly null (indicating compression)
+            # But be smart about it - only check if we have consensus features with MS2
+            if not self.consensus_ms2.is_empty():
+                # We have MS2 data, so ms2_specs should have some content
+                null_ms2_specs = self.features_df.filter(pl.col("ms2_specs").is_null()).height
+                total_features = len(self.features_df)
+                # If more than 90% are null but we have MS2 data, likely compressed
+                if null_ms2_specs > (total_features * 0.9):
+                    features_need_restoration = True
+    # Check if chromatograms need restoration
+    chrom_need_restoration = False
+    if chrom and not self.features_df.is_empty():
+        if "chrom" not in self.features_df.columns:
+            # Column completely missing
+            chrom_need_restoration = True
+        else:
+            null_chroms = self.features_df.filter(pl.col("chrom").is_null()).height
+            total_features = len(self.features_df)
+            # If more than 50% are null, likely need restoration
+            chrom_need_restoration = null_chroms > (total_features * 0.5)
+    # Check if MS2 data might need restoration (compare expected vs actual)
+    ms2_need_restoration = False
+    if ms2:
+        current_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
+        consensus_count = len(self.consensus_df) if not self.consensus_df.is_empty() else 0
+        if consensus_count > 0:
+            # Calculate expected MS2 count based on consensus features with MS2 potential
+            # This is a heuristic - if we have very few MS2 compared to consensus, likely compressed
+            expected_ratio = 3.0  # Expect at least 3 MS2 per consensus on average
+            expected_ms2 = consensus_count * expected_ratio
+            if current_ms2_count < min(expected_ms2 * 0.3, consensus_count * 0.8):
+                ms2_need_restoration = True
+    # Build list of operations that actually need to be done
+    operations_needed = []
+    if features and features_need_restoration:
+        operations_needed.append("features")
+    if chrom and chrom_need_restoration:
+        operations_needed.append("chromatograms")
+    if ms2 and ms2_need_restoration:
+        operations_needed.append("MS2 spectra")
+    # Early exit if nothing needs to be done
+    if not operations_needed:
+        self.logger.info("All data appears to be already decompressed. No operations needed.")
+        return
+    self.logger.info(f"Starting adaptive decompression: {', '.join(operations_needed)} from {len(sample_uids)} samples")
+    try:
+        # Phase 1: Restore features and chromatograms together (shared file I/O)
+        if ("features" in operations_needed and "chromatograms" in operations_needed):
+            self.logger.info("Phase 1: Restoring features and chromatograms together...")
+            # Extract relevant kwargs for restore_features and restore_chrom
+            restore_kwargs = {}
+            if 'mz_tol' in kwargs:
+                restore_kwargs['mz_tol'] = kwargs['mz_tol']
+            if 'rt_tol' in kwargs:
+                restore_kwargs['rt_tol'] = kwargs['rt_tol']
+            # Restore features first (includes chrom column)
+            self.restore_features(samples=samples)
+            # Then do additional chrom gap-filling if needed
+            self.restore_chrom(samples=samples, **restore_kwargs)
+        elif ("features" in operations_needed and "chromatograms" not in operations_needed):
+            self.logger.info("Phase 1: Restoring features data...")
+            self.restore_features(samples=samples)
+        elif ("chromatograms" in operations_needed and "features" not in operations_needed):
+            self.logger.info("Phase 1: Restoring chromatograms...")
+            restore_kwargs = {}
+            if 'mz_tol' in kwargs:
+                restore_kwargs['mz_tol'] = kwargs['mz_tol']
+            if 'rt_tol' in kwargs:
+                restore_kwargs['rt_tol'] = kwargs['rt_tol']
+            self.restore_chrom(samples=samples, **restore_kwargs)
+        # Phase 2: Restore MS2 data (most computationally expensive, done last)
+        if "MS2 spectra" in operations_needed:
+            self.logger.info("Phase 2: Restoring MS2 spectra...")
+            # Extract MS2-specific kwargs
+            ms2_kwargs = {}
+            for key, value in kwargs.items():
+                if key in ['mz_tol', 'centroid', 'deisotope', 'dia_stats', 'feature_uid']:
+                    ms2_kwargs[key] = value
+            self.restore_ms2(samples=samples, **ms2_kwargs)
+        self.logger.info("Adaptive decompression completed successfully")
+    except Exception as e:
+        self.logger.error(f"Decompression failed: {e}")
+        raise