PyPI - masster - Versions diffs - 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl - Mend

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (37) hide show

masster/_version.py +1 -1
masster/logger.py +35 -19
masster/sample/adducts.py +15 -29
masster/sample/defaults/find_adducts_def.py +1 -3
masster/sample/defaults/sample_def.py +4 -4
masster/sample/h5.py +203 -361
masster/sample/helpers.py +14 -30
masster/sample/lib.py +3 -3
masster/sample/load.py +21 -29
masster/sample/plot.py +222 -132
masster/sample/processing.py +42 -55
masster/sample/sample.py +37 -46
masster/sample/save.py +37 -61
masster/sample/sciex.py +13 -11
masster/sample/thermo.py +69 -74
masster/spectrum.py +15 -15
masster/study/analysis.py +650 -586
masster/study/defaults/identify_def.py +1 -3
masster/study/defaults/merge_def.py +6 -7
masster/study/defaults/study_def.py +1 -5
masster/study/export.py +35 -96
masster/study/h5.py +134 -211
masster/study/helpers.py +385 -459
masster/study/id.py +239 -290
masster/study/importers.py +84 -93
masster/study/load.py +159 -178
masster/study/merge.py +1112 -1098
masster/study/plot.py +195 -149
masster/study/processing.py +144 -191
masster/study/save.py +14 -13
masster/study/study.py +89 -130
masster/wizard/wizard.py +764 -714
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/METADATA +27 -1
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/RECORD +37 -37
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/WHEEL +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/entry_points.txt +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/licenses/LICENSE +0 -0

masster/study/plot.py CHANGED Viewed

@@ -11,6 +11,7 @@ from tqdm import tqdm
 # Import cmap for colormap handling
 from cmap import Colormap
 hv.extension("bokeh")
@@ -22,84 +23,93 @@ from bokeh.layouts import row as bokeh_row
 def _export_with_webdriver_manager(plot_obj, filename, format_type, logger=None):
     """
     Export plot to PNG or SVG using webdriver-manager for automatic driver management.
     Parameters:
         plot_obj: Bokeh plot object or holoviews object to export
-        filename: Output filename
+        filename: Output filename
         format_type: Either "png" or "svg"
         logger: Logger for error reporting (optional)
     Returns:
         bool: True if export successful, False otherwise
     """
     try:
         # Convert holoviews to bokeh if needed
-        if hasattr(plot_obj, 'opts'):  # Likely a holoviews object
+        if hasattr(plot_obj, "opts"):  # Likely a holoviews object
             import holoviews as hv
             bokeh_plot = hv.render(plot_obj)
         else:
             bokeh_plot = plot_obj
         # Try webdriver-manager export first
         try:
             from webdriver_manager.chrome import ChromeDriverManager
             from selenium import webdriver
             from selenium.webdriver.chrome.service import Service
             from selenium.webdriver.chrome.options import Options
             # Set up Chrome options for headless operation
             chrome_options = Options()
             chrome_options.add_argument("--headless")
             chrome_options.add_argument("--no-sandbox")
             chrome_options.add_argument("--disable-dev-shm-usage")
             chrome_options.add_argument("--disable-gpu")
             # Use webdriver-manager to automatically get the correct ChromeDriver
             service = Service(ChromeDriverManager().install())
             driver = webdriver.Chrome(service=service, options=chrome_options)
             # Export with managed webdriver
             if format_type == "png":
                 from bokeh.io import export_png
                 export_png(bokeh_plot, filename=filename, webdriver=driver)
             elif format_type == "svg":
                 from bokeh.io import export_svg
                 export_svg(bokeh_plot, filename=filename, webdriver=driver)
             else:
                 raise ValueError(f"Unsupported format: {format_type}")
             driver.quit()
             return True
         except ImportError:
             if logger:
                 logger.debug(f"webdriver-manager not available, using default {format_type.upper()} export")
             # Fall back to default export
             if format_type == "png":
                 from bokeh.io import export_png
                 export_png(bokeh_plot, filename=filename)
             elif format_type == "svg":
                 from bokeh.io import export_svg
                 export_svg(bokeh_plot, filename=filename)
             return True
         except Exception as e:
             if logger:
-                logger.debug(f"{format_type.upper()} export with webdriver-manager failed: {e}, using default {format_type.upper()} export")
+                logger.debug(
+                    f"{format_type.upper()} export with webdriver-manager failed: {e}, using default {format_type.upper()} export"
+                )
             try:
                 # Final fallback to default export
                 if format_type == "png":
                     from bokeh.io import export_png
                     export_png(bokeh_plot, filename=filename)
                 elif format_type == "svg":
                     from bokeh.io import export_svg
                     export_svg(bokeh_plot, filename=filename)
                 return True
             except Exception as e2:
                 if logger:
                     logger.error(f"{format_type.upper()} export failed: {e2}")
                 return False
     except Exception as e:
         if logger:
             logger.error(f"Export preparation failed: {e}")
@@ -117,11 +127,11 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
         from bokeh.embed import file_html
         # Create HTML content without affecting global state
-        resources = Resources(mode='cdn')
+        resources = Resources(mode="cdn")
         html = file_html(plot_object, resources, title=plot_title)
         # Write directly to file
-        with open(filename, 'w', encoding='utf-8') as f:
+        with open(filename, "w", encoding="utf-8") as f:
             f.write(html)
         logger.info(f"Plot saved to: {abs_filename}")
@@ -132,15 +142,15 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
             logger.info(f"Plot saved to: {abs_filename}")
         else:
             # Fall back to HTML if PNG export not available
-            html_filename = filename.replace('.png', '.html')
-            abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.png', '.html')
+            html_filename = filename.replace(".png", ".html")
+            abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace(".png", ".html")
             from bokeh.resources import Resources
             from bokeh.embed import file_html
-            resources = Resources(mode='cdn')
+            resources = Resources(mode="cdn")
             html = file_html(plot_object, resources, title=plot_title)
-            with open(html_filename, 'w', encoding='utf-8') as f:
+            with open(html_filename, "w", encoding="utf-8") as f:
                 f.write(html)
             logger.warning(f"PNG export not available. Saved as HTML instead: {abs_html_filename}")
@@ -150,21 +160,21 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
             logger.info(f"Plot saved to: {abs_filename}")
         else:
             # Fall back to HTML if SVG export not available
-            html_filename = filename.replace('.svg', '.html')
-            abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.svg', '.html')
+            html_filename = filename.replace(".svg", ".html")
+            abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace(".svg", ".html")
             from bokeh.resources import Resources
             from bokeh.embed import file_html
-            resources = Resources(mode='cdn')
+            resources = Resources(mode="cdn")
             html = file_html(plot_object, resources, title=plot_title)
-            with open(html_filename, 'w', encoding='utf-8') as f:
+            with open(html_filename, "w", encoding="utf-8") as f:
                 f.write(html)
             logger.warning(f"SVG export not available. Saved as HTML instead: {abs_html_filename}")
             html = file_html(plot_object, resources, title=plot_title)
-            with open(html_filename, 'w', encoding='utf-8') as f:
+            with open(html_filename, "w", encoding="utf-8") as f:
                 f.write(html)
             logger.warning(f"SVG export not available. Saved as HTML instead: {abs_html_filename}")
@@ -173,10 +183,10 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
         from bokeh.resources import Resources
         from bokeh.embed import file_html
-        resources = Resources(mode='cdn')
+        resources = Resources(mode="cdn")
         html = file_html(plot_object, resources, title=plot_title)
-        with open(filename, 'w', encoding='utf-8') as f:
+        with open(filename, "w", encoding="utf-8") as f:
             f.write(html)
         logger.info(f"Plot saved to: {abs_filename}")
@@ -194,7 +204,7 @@ def _isolated_show_notebook(plot_object):
     # Suppress both warnings and logging messages for the specific Bokeh callback warnings
     # that occur when Panel components with Python callbacks are converted to standalone Bokeh
-    bokeh_logger = logging.getLogger('bokeh.embed.util')
+    bokeh_logger = logging.getLogger("bokeh.embed.util")
     original_level = bokeh_logger.level
     bokeh_logger.setLevel(logging.ERROR)  # Suppress WARNING level messages
@@ -210,8 +220,8 @@ def _isolated_show_notebook(plot_object):
             output_notebook(hide_banner=True)
             # Reset Holoviews to notebook mode
-            hv.extension('bokeh', logo=False)
-            hv.output(backend='bokeh', mode='jupyter')
+            hv.extension("bokeh", logo=False)
+            hv.output(backend="bokeh", mode="jupyter")
             # Show in notebook
             show(plot_object)
@@ -245,13 +255,14 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
     elif filename.endswith(".png"):
         try:
             from panel.io.save import save_png
             # Convert Panel to Bokeh models before saving
             bokeh_layout = panel_obj.get_root()
             save_png(bokeh_layout, filename=filename)
             logger.info(f"{plot_title} saved to: {abs_filename}")
         except Exception:
             # Fall back to HTML if PNG export not available
-            html_filename = filename.replace('.png', '.html')
+            html_filename = filename.replace(".png", ".html")
             abs_html_filename = os.path.abspath(html_filename)
             try:
                 panel_obj.save(html_filename, embed=True)
@@ -263,12 +274,13 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
         # Try to save as PDF, fall back to HTML if not available
         try:
             from bokeh.io.export import export_pdf
             bokeh_layout = panel_obj.get_root()
             export_pdf(bokeh_layout, filename=filename)
             logger.info(f"{plot_title} saved to: {abs_filename}")
         except ImportError:
             # Fall back to HTML if PDF export not available
-            html_filename = filename.replace('.pdf', '.html')
+            html_filename = filename.replace(".pdf", ".html")
             abs_html_filename = os.path.abspath(html_filename)
             try:
                 panel_obj.save(html_filename, embed=True)
@@ -279,12 +291,13 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
         # Try to save as SVG, fall back to HTML if not available
         try:
             from bokeh.io.export import export_svg
             bokeh_layout = panel_obj.get_root()
             export_svg(bokeh_layout, filename=filename)
             logger.info(f"{plot_title} saved to: {abs_filename}")
         except Exception as e:
             # Fall back to HTML if SVG export not available
-            html_filename = filename.replace('.svg', '.html')
+            html_filename = filename.replace(".svg", ".html")
             abs_html_filename = os.path.abspath(html_filename)
             try:
                 panel_obj.save(html_filename, embed=True)
@@ -318,16 +331,18 @@ def _isolated_show_panel_notebook(panel_obj):
     output_notebook(hide_banner=True)
     # Reset Holoviews to notebook mode
-    hv.extension('bokeh', logo=False)
-    hv.output(backend='bokeh', mode='jupyter')
+    hv.extension("bokeh", logo=False)
+    hv.output(backend="bokeh", mode="jupyter")
     # For Panel objects in notebooks, use on.extension and display inline
     import panel as on
     try:
         # Configure Panel for notebook display
-        on.extension('bokeh', inline=True, comms='vscode')
+        on.extension("bokeh", inline=True, comms="vscode")
         # Use IPython display to show inline instead of show()
         from IPython.display import display
         display(panel_obj)
     except Exception:
         # Fallback to regular Panel show
@@ -344,8 +359,8 @@ def plot_alignment(
 ):
     """Visualize retention time alignment using two synchronized Bokeh scatter plots.
-    Uses ``features_df`` to create side-by-side plots showing Original RT (left)
-    and Current/Aligned RT (right). If no alignment has been performed yet,
+    Uses ``features_df`` to create side-by-side plots showing Original RT (left)
+    and Current/Aligned RT (right). If no alignment has been performed yet,
     both plots show the current RT values.
     Parameters:
@@ -409,27 +424,33 @@ def plot_alignment(
     for sample_idx, sample in enumerate(samples_list):
         # Filter sample data
         sample_data = features_df.filter(pl.col(sample_col) == sample)
         # Sample data if too large for performance
         max_points_per_sample = 10000
         if sample_data.height > max_points_per_sample:
-            self.logger.info(f"Sample {sample}: Sampling {max_points_per_sample} points from {sample_data.height} features for performance")
+            self.logger.info(
+                f"Sample {sample}: Sampling {max_points_per_sample} points from {sample_data.height} features for performance"
+            )
             sample_data = sample_data.sample(n=max_points_per_sample, seed=42)
         # Calculate max intensity for alpha scaling
         max_inty = sample_data.select(pl.col("inty").max()).item() or 1
         # Get sample information
-        sample_uid = sample if sample_col == "sample_uid" else sample_data.select(pl.col("sample_uid")).item() if "sample_uid" in sample_data.columns else sample
+        sample_uid = (
+            sample
+            if sample_col == "sample_uid"
+            else sample_data.select(pl.col("sample_uid")).item()
+            if "sample_uid" in sample_data.columns
+            else sample
+        )
         # Try to get actual sample name from samples_df if available
         sample_name = str(sample)  # fallback
         if hasattr(self, "samples_df") and self.samples_df is not None and sample_uid is not None:
             try:
                 sample_name_result = (
-                    self.samples_df.filter(pl.col("sample_uid") == sample_uid)
-                    .select("sample_name")
-                    .to_series()
+                    self.samples_df.filter(pl.col("sample_uid") == sample_uid).select("sample_name").to_series()
                 )
                 if len(sample_name_result) > 0 and sample_name_result[0] is not None:
                     sample_name = str(sample_name_result[0])
@@ -441,7 +462,7 @@ def plot_alignment(
         cols_to_select = ["rt", "mz", "inty"]
         if has_alignment:
             cols_to_select.append("rt_original")
         sample_dict = sample_data.select(cols_to_select).to_dicts()
         for row_dict in sample_dict:
@@ -490,7 +511,7 @@ def plot_alignment(
     # Get colors from samples_df if available
     sample_uids_list = list(sample_idx_to_uid.values())
     color_map: dict[int, str] = {}
     if sample_uids_list and hasattr(self, "samples_df") and self.samples_df is not None:
         try:
             sample_colors = (
@@ -499,7 +520,7 @@ def plot_alignment(
                 .to_dict(as_series=False)
             )
             uid_to_color = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
             for sample_idx, sample_uid in sample_idx_to_uid.items():
                 color_map[sample_idx] = uid_to_color.get(sample_uid, "#1f77b4")
         except Exception:
@@ -522,7 +543,7 @@ def plot_alignment(
     # Create Bokeh figures
     title_before = "Original RT" if has_alignment else "Current RT (No Alignment)"
     title_after = "Aligned RT" if has_alignment else "Current RT (Copy)"
     p1 = figure(
         width=width,
         height=height,
@@ -605,6 +626,7 @@ def plot_alignment(
     if filename is not None:
         # Convert relative paths to absolute paths using study folder as base
         import os
         if not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
@@ -642,7 +664,7 @@ def plot_consensus_2d(
     Parameters:
         filename (str, optional): Path to save the plot
         colorby (str): Column name to use for color mapping (default: "number_samples")
-                      Automatically detects if column contains categorical (string) or
+                      Automatically detects if column contains categorical (string) or
                       numeric data and applies appropriate color mapping:
                       - Categorical: Uses factor_cmap with distinct colors and legend
                       - Numeric: Uses LinearColorMapper with continuous colorbar
@@ -657,7 +679,7 @@ def plot_consensus_2d(
         height (int): Plot height in pixels (default: 900)
         mz_range (tuple, optional): m/z range for filtering consensus features (min_mz, max_mz)
         rt_range (tuple, optional): Retention time range for filtering consensus features (min_rt, max_rt)
-        legend (str, optional): Legend position for categorical data. Options: 'top_right', 'top_left',
+        legend (str, optional): Legend position for categorical data. Options: 'top_right', 'top_left',
                                'bottom_right', 'bottom_left', 'right', 'left', 'top', 'bottom'.
                                If None, legend is hidden. Only applies to categorical coloring (default: "bottom_right")
         show_none (bool): Whether to display points with None values for colorby column (default: True)
@@ -742,7 +764,7 @@ def plot_consensus_2d(
     # Filter out None values for colorby column if show_none=False
     if not show_none and colorby in data.columns:
         data = data.filter(pl.col(colorby).is_not_null())
     # Convert Polars DataFrame to pandas for Bokeh compatibility
     data_pd = data.to_pandas()
     source = ColumnDataSource(data_pd)
@@ -786,20 +808,22 @@ def plot_consensus_2d(
     # Check if colorby column contains categorical data (string/object)
     colorby_values = data[colorby].to_list()
     is_categorical = (
-        data_pd[colorby].dtype in ["object", "string", "category"] or
-        isinstance(colorby_values[0], str) if colorby_values else False
+        data_pd[colorby].dtype in ["object", "string", "category"] or isinstance(colorby_values[0], str)
+        if colorby_values
+        else False
     )
     if is_categorical:
         # Handle categorical coloring
         # Use natural order of unique values - don't sort to preserve correct legend mapping
         # Sorting would break the correspondence between legend labels and point colors
         unique_values = [v for v in data_pd[colorby].unique() if v is not None]
         # Use the custom palette from cmap if available, otherwise fall back to defaults
         if len(palette) >= len(unique_values):
             # Use custom colormap palette - sample evenly across the palette
             import numpy as np
             indices = np.linspace(0, len(palette) - 1, len(unique_values)).astype(int)
             categorical_palette = [palette[i] for i in indices]
         elif len(unique_values) <= 20:
@@ -808,7 +832,7 @@ def plot_consensus_2d(
         else:
             # For many categories, use a subset of the viridis palette
             categorical_palette = viridis(min(256, len(unique_values)))
         color_mapper = factor_cmap(colorby, categorical_palette, unique_values)
     else:
         # Handle numeric coloring with LinearColorMapper
@@ -832,11 +856,12 @@ def plot_consensus_2d(
         all_unique_values = list(data_pd[colorby].unique())
         unique_values = [v for v in all_unique_values if v is not None]
         has_none_values = None in all_unique_values
         # Use the custom palette from cmap if available, otherwise fall back to defaults
         if len(palette) >= len(unique_values):
             # Use custom colormap palette - sample evenly across the palette
             import numpy as np
             indices = np.linspace(0, len(palette) - 1, len(unique_values)).astype(int)
             categorical_palette = [palette[i] for i in indices]
         elif len(unique_values) <= 20:
@@ -844,23 +869,23 @@ def plot_consensus_2d(
             categorical_palette = Category20[min(20, max(3, len(unique_values)))]
         else:
             categorical_palette = viridis(min(256, len(unique_values)))
         # Handle None values with black color FIRST so they appear in the background
         if has_none_values and show_none:
             # Filter data for None values
             none_data = data.filter(pl.col(colorby).is_null())
             none_data_pd = none_data.to_pandas()
             none_source = bp.ColumnDataSource(none_data_pd)
             if scaling.lower() in ["dyn", "dynamic"]:
                 # Calculate appropriate radius for dynamic scaling
                 rt_range = data["rt"].max() - data["rt"].min()
                 mz_range = data["mz"].max() - data["mz"].min()
                 dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
                 renderer = p.circle(
                     x="rt",
-                    y="mz",
+                    y="mz",
                     radius=dynamic_radius,
                     fill_color="lightgray",
                     line_color=None,
@@ -872,32 +897,32 @@ def plot_consensus_2d(
                 renderer = p.scatter(
                     x="rt",
                     y="mz",
-                    size="markersize",
+                    size="markersize",
                     fill_color="lightgray",
                     line_color=None,
                     alpha=alpha,
                     source=none_source,
                     legend_label="None",
                 )
         # Create a separate renderer for each non-None category (plotted on top of None values)
         for i, category in enumerate(unique_values):
             # Filter data for this category
             category_data = data.filter(pl.col(colorby) == category)
             category_data_pd = category_data.to_pandas()
             category_source = bp.ColumnDataSource(category_data_pd)
             color = categorical_palette[i % len(categorical_palette)]
             if scaling.lower() in ["dyn", "dynamic"]:
                 # Calculate appropriate radius for dynamic scaling
                 rt_range = data["rt"].max() - data["rt"].min()
                 mz_range = data["mz"].max() - data["mz"].min()
                 dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
                 renderer = p.circle(
                     x="rt",
-                    y="mz",
+                    y="mz",
                     radius=dynamic_radius,
                     fill_color=color,
                     line_color=None,
@@ -909,17 +934,17 @@ def plot_consensus_2d(
                 renderer = p.scatter(
                     x="rt",
                     y="mz",
-                    size="markersize",
+                    size="markersize",
                     fill_color=color,
                     line_color=None,
                     alpha=alpha,
                     source=category_source,
                     legend_label=str(category),
                 )
         # No single scatter_renderer for categorical data
         scatter_renderer = None
     else:
         # Handle numeric coloring - single renderer with color mapping
         if scaling.lower() in ["dyn", "dynamic"]:
@@ -927,7 +952,7 @@ def plot_consensus_2d(
             rt_range = data["rt"].max() - data["rt"].min()
             mz_range = data["mz"].max() - data["mz"].min()
             dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
             scatter_renderer = p.circle(
                 x="rt",
                 y="mz",
@@ -957,7 +982,7 @@ def plot_consensus_2d(
         ("number_ms2", "@number_ms2"),
         ("inty_mean", "@inty_mean"),
     ]
     # Add id_top_* columns if they exist and have non-null values
     id_top_columns = ["id_top_name", "id_top_adduct", "id_top_class", "id_top_score"]
     for col in id_top_columns:
@@ -969,7 +994,7 @@ def plot_consensus_2d(
                     tooltips.append((col, f"@{col}{{0.0}}"))
                 else:
                     tooltips.append((col, f"@{col}"))
     hover = HoverTool(
         tooltips=tooltips,
     )
@@ -977,7 +1002,7 @@ def plot_consensus_2d(
     # For numeric data, specify the single renderer
     if not is_categorical and scatter_renderer:
         hover.renderers = [scatter_renderer]
     p.add_tools(hover)
     # add colorbar only for numeric data (LinearColorMapper)
@@ -996,15 +1021,15 @@ def plot_consensus_2d(
             # Map legend position parameter to Bokeh legend position
             legend_position_map = {
                 "top_right": "top_right",
-                "top_left": "top_left",
+                "top_left": "top_left",
                 "bottom_right": "bottom_right",
                 "bottom_left": "bottom_left",
                 "right": "right",
                 "left": "left",
                 "top": "top",
-                "bottom": "bottom"
+                "bottom": "bottom",
             }
             bokeh_legend_pos = legend_position_map.get(legend, "bottom_right")
             p.legend.location = bokeh_legend_pos
             p.legend.click_policy = "hide"
@@ -1015,6 +1040,7 @@ def plot_consensus_2d(
     if filename is not None:
         # Convert relative paths to absolute paths using study folder as base
         import os
         if not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
@@ -1249,6 +1275,7 @@ def plot_samples_2d(
     if filename is not None:
         # Convert relative paths to absolute paths using study folder as base
         import os
         if not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
@@ -1422,6 +1449,7 @@ def plot_bpc(
     if filename is not None:
         # Convert relative paths to absolute paths using study folder as base
         import os
         if not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
@@ -1593,6 +1621,7 @@ def plot_eic(
     if filename is not None:
         # Convert relative paths to absolute paths using study folder as base
         import os
         if not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
@@ -1659,15 +1688,13 @@ def plot_rt_correction(
     sample_names_dict = {}
     if hasattr(self, "samples_df") and self.samples_df is not None:
         try:
-            sample_name_mapping = (
-                self.samples_df
-                .filter(pl.col("sample_uid").is_in(sample_uids))
-                .select(["sample_uid", "sample_name"])
+            sample_name_mapping = self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids)).select([
+                "sample_uid",
+                "sample_name",
+            ])
+            sample_names_dict = dict(
+                zip(sample_name_mapping["sample_uid"].to_list(), sample_name_mapping["sample_name"].to_list())
             )
-            sample_names_dict = dict(zip(
-                sample_name_mapping["sample_uid"].to_list(),
-                sample_name_mapping["sample_name"].to_list()
-            ))
         except Exception:
             pass
@@ -1686,10 +1713,8 @@ def plot_rt_correction(
     # OPTIMIZED: Filter once, group once instead of per-sample filtering
     try:
         # Filter all data once for selected samples and required conditions
-        all_sample_feats = self.features_df.filter(
-            pl.col(sample_id_col).is_in(sample_uids)
-        )
+        all_sample_feats = self.features_df.filter(pl.col(sample_id_col).is_in(sample_uids))
         if all_sample_feats.is_empty():
             self.logger.warning("No features found for the selected samples.")
             return
@@ -1708,14 +1733,8 @@ def plot_rt_correction(
         # Filter nulls, add delta column, and sort - all in one operation
         all_sample_feats = (
-            all_sample_feats
-            .filter(
-                pl.col("rt").is_not_null() &
-                pl.col("rt_original").is_not_null()
-            )
-            .with_columns([
-                (pl.col("rt") - pl.col("rt_original")).alias("delta")
-            ])
+            all_sample_feats.filter(pl.col("rt").is_not_null() & pl.col("rt_original").is_not_null())
+            .with_columns([(pl.col("rt") - pl.col("rt_original")).alias("delta")])
             .sort([sample_id_col, "rt"])
         )
@@ -1770,6 +1789,7 @@ def plot_rt_correction(
     if filename is not None:
         # Convert relative paths to absolute paths using study folder as base
         import os
         if not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
@@ -1882,7 +1902,7 @@ def plot_chrom(
                     curve = hv.Curve(
                         (rt, inty, sample_names_array, sample_uids_array, sample_colors_array),
                         kdims=["RT"],
-                        vdims=["inty", "sample_name", "sample_uid", "sample_color"]
+                        vdims=["inty", "sample_name", "sample_uid", "sample_color"],
                     ).opts(
                         color=color_map[sample],
                         line_width=1,
@@ -1892,8 +1912,8 @@ def plot_chrom(
                             ("Intensity", "@inty{0,0}"),
                             ("Sample Name", "@sample_name"),
                             ("Sample UID", "@sample_uid"),
-                            ("Sample Color", "$color[swatch]:sample_color")
-                        ]
+                            ("Sample Color", "$color[swatch]:sample_color"),
+                        ],
                     )
                     curves.append(curve)
@@ -1957,6 +1977,7 @@ def plot_chrom(
     if filename is not None:
         # Convert relative paths to absolute paths using study folder as base
         import os
         if not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
@@ -1989,7 +2010,7 @@ def plot_consensus_stats(
 ):
     """
     Plot histograms/distributions for specific consensus statistics in the requested order.
     Shows the following properties in order:
     1. rt: Retention time
     2. rt_delta_mean: Mean retention time delta
@@ -2003,7 +2024,7 @@ def plot_consensus_stats(
     10. chrom_coherence_mean: Mean chromatographic coherence
     11. chrom_height_scaled_mean: Mean scaled chromatographic height
     12. chrom_prominence_scaled_mean: Mean scaled chromatographic prominence
     Parameters:
         filename (str, optional): Output filename for saving the plot
         width (int): Overall width of the plot (default: 840)
@@ -2019,7 +2040,7 @@ def plot_consensus_stats(
     # Get the consensus statistics data using the new helper method
     data_df = self.get_consensus_stats()
     if data_df is None or data_df.is_empty():
         self.logger.error("No consensus statistics data available.")
         return
@@ -2032,39 +2053,52 @@ def plot_consensus_stats(
     # Define specific columns to plot in the exact order requested (excluding consensus_uid)
     desired_columns = [
-        "rt",
-        "rt_delta_mean",
-        "mz",
+        "rt",
+        "rt_delta_mean",
+        "mz",
         "mz_range",  # mz_max-mz_min
         "log10_inty_mean",  # log10(inty_mean)
-        "number_samples",
-        "number_ms2",
-        "charge_mean",
-        "quality",
-        "chrom_coherence_mean",
-        "chrom_height_scaled_mean",
-        "chrom_prominence_scaled_mean"
+        "number_samples",
+        "number_ms2",
+        "charge_mean",
+        "quality",
+        "chrom_coherence_mean",
+        "chrom_height_scaled_mean",
+        "chrom_prominence_scaled_mean",
     ]
     # Filter to only include columns that exist in the dataframe, preserving order
     numeric_columns = [col for col in desired_columns if col in data_df_clean.columns]
     # Check if the numeric columns are actually numeric
     final_numeric_columns = []
     for col in numeric_columns:
         dtype = data_df_clean[col].dtype
-        if dtype in [pl.Int8, pl.Int16, pl.Int32, pl.Int64,
-                    pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64,
-                    pl.Float32, pl.Float64]:
+        if dtype in [
+            pl.Int8,
+            pl.Int16,
+            pl.Int32,
+            pl.Int64,
+            pl.UInt8,
+            pl.UInt16,
+            pl.UInt32,
+            pl.UInt64,
+            pl.Float32,
+            pl.Float64,
+        ]:
             final_numeric_columns.append(col)
     numeric_columns = final_numeric_columns
     if len(numeric_columns) == 0:
-        self.logger.error(f"None of the requested consensus statistics columns were found or are numeric. Available columns: {list(data_df_clean.columns)}")
+        self.logger.error(
+            f"None of the requested consensus statistics columns were found or are numeric. Available columns: {list(data_df_clean.columns)}"
+        )
         return
-    self.logger.debug(f"Creating distribution plots for {len(numeric_columns)} specific consensus columns: {numeric_columns}")
+    self.logger.debug(
+        f"Creating distribution plots for {len(numeric_columns)} specific consensus columns: {numeric_columns}"
+    )
     # Select only the numeric columns for plotting
     data_df_clean = data_df_clean.select(numeric_columns)
@@ -2073,15 +2107,23 @@ def plot_consensus_stats(
     all_columns_empty = True
     for col in numeric_columns:
         # Check if column has any non-null, finite values
-        non_null_count = data_df_clean[col].filter(
-            data_df_clean[col].is_not_null() &
-            (data_df_clean[col].is_finite() if data_df_clean[col].dtype in [pl.Float32, pl.Float64] else pl.lit(True))
-        ).len()
+        non_null_count = (
+            data_df_clean[col]
+            .filter(
+                data_df_clean[col].is_not_null()
+                & (
+                    data_df_clean[col].is_finite()
+                    if data_df_clean[col].dtype in [pl.Float32, pl.Float64]
+                    else pl.lit(True)
+                )
+            )
+            .len()
+        )
         if non_null_count > 0:
             all_columns_empty = False
             break
     if all_columns_empty:
         self.logger.error("All numeric columns contain only NaN/infinite values.")
         return
@@ -2089,24 +2131,24 @@ def plot_consensus_stats(
     # Calculate grid dimensions
     n_plots = len(numeric_columns)
     n_rows = (n_plots + n_cols - 1) // n_cols  # Ceiling division
     # Auto-calculate height if not provided
     if height is None:
         plot_height = 210  # Reduced from 300 (30% smaller)
         height = plot_height * n_rows + 56  # Reduced from 80 (30% smaller)
     else:
         plot_height = (height - 56) // n_rows  # Reduced padding (30% smaller)
     plot_width = (width - 56) // n_cols  # Reduced padding (30% smaller)
     # Create plots grid
     plots = []
     current_row = []
     for i, col in enumerate(numeric_columns):
         # Check if this column should use log scale for y-axis
         y_axis_type = "log" if col in ["number_samples", "number_ms2"] else "linear"
         # Create histogram for this column
         p = figure(
             width=plot_width,
@@ -2114,30 +2156,28 @@ def plot_consensus_stats(
             title=col,
             toolbar_location="above",
             tools="pan,wheel_zoom,box_zoom,reset,save",
-            y_axis_type=y_axis_type
+            y_axis_type=y_axis_type,
         )
         # Set white background
         p.background_fill_color = "white"
         p.border_fill_color = "white"
         # Calculate histogram using Polars
         # Get valid (non-null, finite) values for this column
         if data_df_clean[col].dtype in [pl.Float32, pl.Float64]:
-            valid_values = data_df_clean.filter(
-                data_df_clean[col].is_not_null() & data_df_clean[col].is_finite()
-            )[col]
+            valid_values = data_df_clean.filter(data_df_clean[col].is_not_null() & data_df_clean[col].is_finite())[col]
         else:
             valid_values = data_df_clean.filter(data_df_clean[col].is_not_null())[col]
         if valid_values.len() == 0:
             self.logger.warning(f"No valid values for column {col}")
             continue
         # Convert to numpy for histogram calculation
         values_array = valid_values.to_numpy()
         hist, edges = np.histogram(values_array, bins=bins)
         # Handle log y-axis: replace zero counts with small positive values
         if y_axis_type == "log":
             # Replace zero counts with a small value (1e-1) to make them visible on log scale
@@ -2146,7 +2186,7 @@ def plot_consensus_stats(
         else:
             hist_log_safe = hist
             bottom_val = 0
         # Create histogram bars
         p.quad(
             top=hist_log_safe,
@@ -2157,7 +2197,7 @@ def plot_consensus_stats(
             line_color="white",
             alpha=alpha,
         )
         # Style the plot
         p.title.text_font_size = "10pt"  # Reduced from 12pt
         p.xaxis.axis_label = ""  # Remove x-axis title
@@ -2166,12 +2206,12 @@ def plot_consensus_stats(
         p.grid.grid_line_dash = [6, 4]  # Dashed grid lines
         p.xgrid.visible = False  # Hide x-axis grid
         p.outline_line_color = None  # Remove gray border around plot area
         # Remove y-axis label but keep y-axis visible
         p.yaxis.axis_label = ""
         current_row.append(p)
         # If we've filled a row or reached the end, add the row to plots
         if len(current_row) == n_cols or i == n_plots - 1:
             # Fill remaining spots in the last row with None if needed
@@ -2182,15 +2222,15 @@ def plot_consensus_stats(
     # Create grid layout with white background
     grid = gridplot(plots, toolbar_location="above", merge_tools=True)
     # The background should be white by default in Bokeh
     # Individual plots already have white backgrounds set above
     # Apply consistent save/display behavior
     if filename is not None:
         # Convert relative paths to absolute paths using study folder as base
         import os
         if not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
@@ -2456,6 +2496,7 @@ def plot_samples_pca(
     if filename is not None:
         # Convert relative paths to absolute paths using study folder as base
         import os
         if not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
@@ -2503,7 +2544,7 @@ def plot_samples_umap(
         random_state (int or None): Random state for reproducibility (default: 42).
             - Use an integer (e.g., 42) for reproducible results (slower, single-threaded)
             - Use None for faster computation with multiple cores (non-reproducible)
     Note:
         Setting random_state forces single-threaded computation but ensures reproducible results.
         Set random_state=None to enable parallel processing for faster computation.
@@ -2574,7 +2615,7 @@ def plot_samples_umap(
         min_dist=min_dist,
         metric=metric,
         random_state=random_state,
-        n_jobs=1
+        n_jobs=1,
     )
     umap_result = reducer.fit_transform(matrix_scaled)
@@ -2743,6 +2784,7 @@ def plot_samples_umap(
     if filename is not None:
         # Convert relative paths to absolute paths using study folder as base
         import os
         if not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
@@ -2897,6 +2939,7 @@ def plot_tic(
     if filename is not None:
         # Convert relative paths to absolute paths using study folder as base
         import os
         if not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
@@ -2915,11 +2958,14 @@ def plot_tic(
 def plot_pca(self, *args, **kwargs):
     """Deprecated: Use plot_samples_pca instead."""
     import warnings
     warnings.warn("plot_pca is deprecated, use plot_samples_pca instead", DeprecationWarning, stacklevel=2)
     return self.plot_samples_pca(*args, **kwargs)
 def plot_umap(self, *args, **kwargs):
     """Deprecated: Use plot_samples_umap instead."""
     import warnings
     warnings.warn("plot_umap is deprecated, use plot_samples_umap instead", DeprecationWarning, stacklevel=2)
     return self.plot_samples_umap(*args, **kwargs)

masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

Potentially problematic release.

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl