PyPI - masster - Versions diffs - 0.3.17__py3-none-any.whl → 0.3.19__py3-none-any.whl - Mend

masster 0.3.17py3-none-any.whl → 0.3.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (18) hide show

masster/_version.py +1 -1
masster/sample/h5.py +1 -1
masster/sample/helpers.py +3 -7
masster/sample/load.py +2 -2
masster/sample/plot.py +2 -1
masster/study/export.py +27 -10
masster/study/h5.py +58 -40
masster/study/helpers.py +275 -225
masster/study/helpers_optimized.py +5 -5
masster/study/load.py +148 -121
masster/study/plot.py +306 -106
masster/study/processing.py +9 -5
masster/study/study.py +2 -6
{masster-0.3.17.dist-info → masster-0.3.19.dist-info}/METADATA +1 -1
{masster-0.3.17.dist-info → masster-0.3.19.dist-info}/RECORD +18 -18
{masster-0.3.17.dist-info → masster-0.3.19.dist-info}/WHEEL +0 -0
{masster-0.3.17.dist-info → masster-0.3.19.dist-info}/entry_points.txt +0 -0
{masster-0.3.17.dist-info → masster-0.3.19.dist-info}/licenses/LICENSE +0 -0

masster/study/plot.py CHANGED Viewed

@@ -17,7 +17,18 @@ hv.extension("bokeh")
 from bokeh.layouts import row as bokeh_row
-def plot_alignment(self, maps: bool = True, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
+<<<<<<< Updated upstream
+def plot_alignment(
+    self,
+    maps: bool = True,
+    filename: str | None = None,
+    width: int = 450,
+    height: int = 450,
+    markersize: int = 3,
+):
+=======
+def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | None = None, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
+>>>>>>> Stashed changes
     """Visualize retention time alignment using two synchronized Bokeh scatter plots.
     - When ``maps=True`` the function reads ``self.features_maps`` (list of FeatureMap)
@@ -27,6 +38,11 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
     Parameters
     - maps: whether to use feature maps (default True).
+    - samples: Sample selection parameter, interpreted like in plot_samples_2d:
+        - None: show all samples
+        - int: show a random subset of N samples
+        - list of ints: show samples with these sample_uids
+        - list of strings: show samples with these sample_names
     - filename: optional HTML file path to save the plot.
     - width/height: pixel size of each subplot.
     - markersize: base marker size.
@@ -54,6 +70,32 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
             self.logger.error("No feature maps available for plotting.")
             return
+        # Get sample_uids to limit which samples to show
+        sample_uids_to_show = self._get_sample_uids(samples)
+        # Filter feature maps based on sample selection
+        if sample_uids_to_show is not None:
+            # Get sample indices for the selected sample_uids
+            selected_indices = []
+            if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
+                samples_info = self.samples_df.to_pandas()
+                for idx, row in samples_info.iterrows():
+                    if row.get('sample_uid') in sample_uids_to_show:
+                        selected_indices.append(idx)
+            else:
+                # If no samples_df, just limit to the first N samples
+                if isinstance(samples, int):
+                    selected_indices = list(range(min(samples, len(fmaps))))
+                else:
+                    selected_indices = list(range(len(fmaps)))
+            # Filter feature maps to only include selected indices
+            fmaps = [fmaps[i] for i in selected_indices if i < len(fmaps)]
+            if not fmaps:
+                self.logger.error("No feature maps match the selected samples.")
+                return
         # Reference (first) sample: use current RT for both before and after
         ref = fmaps[0]
         ref_rt = [f.getRT() for f in ref]
@@ -62,17 +104,39 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
         max_ref_inty = max(ref_inty) if ref_inty else 1
         # sample metadata
-        if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
+        if hasattr(self, "samples_df") and self.samples_df is not None and not self.samples_df.is_empty():
             samples_info = self.samples_df.to_pandas()
-            ref_sample_uid = samples_info.iloc[0]['sample_uid'] if 'sample_uid' in samples_info.columns else 'Reference_UID'
-            ref_sample_name = samples_info.iloc[0]['sample_name'] if 'sample_name' in samples_info.columns else 'Reference'
+            ref_sample_uid = (
+                samples_info.iloc[0]["sample_uid"] if "sample_uid" in samples_info.columns else "Reference_UID"
+            )
+            ref_sample_name = (
+                samples_info.iloc[0]["sample_name"] if "sample_name" in samples_info.columns else "Reference"
+            )
         else:
-            ref_sample_uid = 'Reference_UID'
-            ref_sample_name = 'Reference'
+            ref_sample_uid = "Reference_UID"
+            ref_sample_name = "Reference"
         for rt, mz, inty in zip(ref_rt, ref_mz, ref_inty):
-            before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
-            after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
+            before_data.append({
+                "rt": rt,
+                "mz": mz,
+                "inty": inty,
+                "alpha": inty / max_ref_inty,
+                "sample_idx": 0,
+                "sample_name": ref_sample_name,
+                "sample_uid": ref_sample_uid,
+                "size": markersize + 2,
+            })
+            after_data.append({
+                "rt": rt,
+                "mz": mz,
+                "inty": inty,
+                "alpha": inty / max_ref_inty,
+                "sample_idx": 0,
+                "sample_name": ref_sample_name,
+                "sample_uid": ref_sample_uid,
+                "size": markersize + 2,
+            })
         # Remaining samples
         for sample_idx, fm in enumerate(fmaps[1:], start=1):
@@ -83,7 +147,7 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
             for f in fm:
                 try:
-                    orig = f.getMetaValue('original_RT')
+                    orig = f.getMetaValue("original_RT")
                 except Exception:
                     orig = None
@@ -101,23 +165,41 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
             max_inty = max(inty_vals)
-            if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
+            if hasattr(self, "samples_df") and self.samples_df is not None and not self.samples_df.is_empty():
                 samples_info = self.samples_df.to_pandas()
                 if sample_idx < len(samples_info):
-                    sample_name = samples_info.iloc[sample_idx].get('sample_name', f'Sample {sample_idx}')
-                    sample_uid = samples_info.iloc[sample_idx].get('sample_uid', f'Sample_{sample_idx}_UID')
+                    sample_name = samples_info.iloc[sample_idx].get("sample_name", f"Sample {sample_idx}")
+                    sample_uid = samples_info.iloc[sample_idx].get("sample_uid", f"Sample_{sample_idx}_UID")
                 else:
-                    sample_name = f'Sample {sample_idx}'
-                    sample_uid = f'Sample_{sample_idx}_UID'
+                    sample_name = f"Sample {sample_idx}"
+                    sample_uid = f"Sample_{sample_idx}_UID"
             else:
-                sample_name = f'Sample {sample_idx}'
-                sample_uid = f'Sample_{sample_idx}_UID'
+                sample_name = f"Sample {sample_idx}"
+                sample_uid = f"Sample_{sample_idx}_UID"
             for rt, mz, inty in zip(original_rt, mz_vals, inty_vals):
-                before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
+                before_data.append({
+                    "rt": rt,
+                    "mz": mz,
+                    "inty": inty,
+                    "alpha": inty / max_inty,
+                    "sample_idx": sample_idx,
+                    "sample_name": sample_name,
+                    "sample_uid": sample_uid,
+                    "size": markersize,
+                })
             for rt, mz, inty in zip(aligned_rt, mz_vals, inty_vals):
-                after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
+                after_data.append({
+                    "rt": rt,
+                    "mz": mz,
+                    "inty": inty,
+                    "alpha": inty / max_inty,
+                    "sample_idx": sample_idx,
+                    "sample_name": sample_name,
+                    "sample_uid": sample_uid,
+                    "size": markersize,
+                })
     else:
         # Use features_df
@@ -125,66 +207,98 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
             self.logger.error("No features_df found. Load features first.")
             return
-        required_cols = ['rt', 'mz', 'inty']
+        required_cols = ["rt", "mz", "inty"]
         missing = [c for c in required_cols if c not in self.features_df.columns]
         if missing:
             self.logger.error(f"Missing required columns in features_df: {missing}")
             return
-        if 'rt_original' not in self.features_df.columns:
+        if "rt_original" not in self.features_df.columns:
             self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
             return
         # Use Polars instead of pandas
         features_df = self.features_df
-        sample_col = 'sample_uid' if 'sample_uid' in features_df.columns else 'sample_name'
+        sample_col = "sample_uid" if "sample_uid" in features_df.columns else "sample_name"
         if sample_col not in features_df.columns:
             self.logger.error("No sample identifier column found in features_df.")
             return
+        # Get sample_uids to limit which samples to show
+        sample_uids_to_show = self._get_sample_uids(samples)
+        # Filter features_df based on sample selection if specified
+        if sample_uids_to_show is not None:
+            if sample_col == 'sample_uid':
+                features_df = features_df.filter(pl.col('sample_uid').is_in(sample_uids_to_show))
+            else:
+                # Need to convert sample names to sample_uids if using sample_name column
+                if 'sample_uid' in features_df.columns:
+                    # Filter by sample_uid even though we're using sample_name as the primary column
+                    features_df = features_df.filter(pl.col('sample_uid').is_in(sample_uids_to_show))
+                else:
+                    # Convert sample_uids to sample_names and filter
+                    sample_names_to_show = []
+                    if hasattr(self, 'samples_df') and self.samples_df is not None:
+                        for uid in sample_uids_to_show:
+                            matching_rows = self.samples_df.filter(pl.col("sample_uid") == uid)
+                            if not matching_rows.is_empty():
+                                sample_names_to_show.append(matching_rows.row(0, named=True)["sample_name"])
+                    features_df = features_df.filter(pl.col('sample_name').is_in(sample_names_to_show))
         # Get unique samples using Polars
         samples = features_df.select(pl.col(sample_col)).unique().to_series().to_list()
         for sample_idx, sample in enumerate(samples):
             # Filter sample data using Polars
             sample_data = features_df.filter(pl.col(sample_col) == sample)
             # Calculate max intensity using Polars
-            max_inty = sample_data.select(pl.col('inty').max()).item()
+            max_inty = sample_data.select(pl.col("inty").max()).item()
             max_inty = max_inty if max_inty and max_inty > 0 else 1
             sample_name = str(sample)
             # Get sample_uid - if sample_col is 'sample_uid', use sample directly
-            if sample_col == 'sample_uid':
+            if sample_col == "sample_uid":
                 sample_uid = sample
             else:
                 # Try to get sample_uid from the first row if it exists
-                if 'sample_uid' in sample_data.columns:
-                    sample_uid = sample_data.select(pl.col('sample_uid')).item()
+                if "sample_uid" in sample_data.columns:
+                    sample_uid = sample_data.select(pl.col("sample_uid")).item()
                 else:
                     sample_uid = sample
             # Convert to dict for iteration - more efficient than row-by-row processing
-            sample_dict = sample_data.select(['rt_original', 'rt', 'mz', 'inty']).to_dicts()
+            sample_dict = sample_data.select(["rt_original", "rt", "mz", "inty"]).to_dicts()
             for row_dict in sample_dict:
-                rt_original = row_dict['rt_original']
-                rt_current = row_dict['rt']
-                mz = row_dict['mz']
-                inty = row_dict['inty']
+                rt_original = row_dict["rt_original"]
+                rt_current = row_dict["rt"]
+                mz = row_dict["mz"]
+                inty = row_dict["inty"]
                 alpha = inty / max_inty
                 size = markersize + 2 if sample_idx == 0 else markersize
                 before_data.append({
-                    'rt': rt_original, 'mz': mz, 'inty': inty, 'alpha': alpha,
-                    'sample_idx': sample_idx, 'sample_name': sample_name,
-                    'sample_uid': sample_uid, 'size': size
+                    "rt": rt_original,
+                    "mz": mz,
+                    "inty": inty,
+                    "alpha": alpha,
+                    "sample_idx": sample_idx,
+                    "sample_name": sample_name,
+                    "sample_uid": sample_uid,
+                    "size": size,
                 })
                 after_data.append({
-                    'rt': rt_current, 'mz': mz, 'inty': inty, 'alpha': alpha,
-                    'sample_idx': sample_idx, 'sample_name': sample_name,
-                    'sample_uid': sample_uid, 'size': size
+                    "rt": rt_current,
+                    "mz": mz,
+                    "inty": inty,
+                    "alpha": alpha,
+                    "sample_idx": sample_idx,
+                    "sample_name": sample_name,
+                    "sample_uid": sample_uid,
+                    "size": size,
                 })
     # Get sample colors from samples_df using sample indices
@@ -193,17 +307,16 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
         # Create mapping from sample_idx to sample_uid more efficiently
         sample_idx_to_uid = {}
         for item in before_data:
-            if item['sample_idx'] not in sample_idx_to_uid:
-                sample_idx_to_uid[item['sample_idx']] = item['sample_uid']
+            if item["sample_idx"] not in sample_idx_to_uid:
+                sample_idx_to_uid[item["sample_idx"]] = item["sample_uid"]
     else:
         sample_idx_to_uid = {}
     # Get colors from samples_df
     sample_uids_list = list(sample_idx_to_uid.values())
-    if sample_uids_list and hasattr(self, 'samples_df') and self.samples_df is not None:
+    if sample_uids_list and hasattr(self, "samples_df") and self.samples_df is not None:
         sample_colors = (
-            self.samples_df
-            .filter(pl.col("sample_uid").is_in(sample_uids_list))
+            self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids_list))
             .select(["sample_uid", "sample_color"])
             .to_dict(as_series=False)
         )
@@ -219,68 +332,106 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
     # Add sample_color to data dictionaries before creating DataFrames
     if before_data:
         for item in before_data:
-            item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
+            item["sample_color"] = color_map.get(item["sample_idx"], "#1f77b4")
     if after_data:
         for item in after_data:
-            item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
+            item["sample_color"] = color_map.get(item["sample_idx"], "#1f77b4")
     # Now create DataFrames with the sample_color already included
     before_df = pd.DataFrame(before_data) if before_data else pd.DataFrame()
     after_df = pd.DataFrame(after_data) if after_data else pd.DataFrame()
     # Create Bokeh figures
-    p1 = figure(width=width, height=height, title='Original RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save')
+    p1 = figure(
+        width=width,
+        height=height,
+        title="Original RT",
+        x_axis_label="Retention Time (s)",
+        y_axis_label="m/z",
+        tools="pan,wheel_zoom,box_zoom,reset,save",
+    )
     p1.outline_line_color = None
-    p1.background_fill_color = 'white'
-    p1.border_fill_color = 'white'
+    p1.background_fill_color = "white"
+    p1.border_fill_color = "white"
     p1.min_border = 0
-    p2 = figure(width=width, height=height, title='Current RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save', x_range=p1.x_range, y_range=p1.y_range)
+    p2 = figure(
+        width=width,
+        height=height,
+        title="Current RT",
+        x_axis_label="Retention Time (s)",
+        y_axis_label="m/z",
+        tools="pan,wheel_zoom,box_zoom,reset,save",
+        x_range=p1.x_range,
+        y_range=p1.y_range,
+    )
     p2.outline_line_color = None
-    p2.background_fill_color = 'white'
-    p2.border_fill_color = 'white'
+    p2.background_fill_color = "white"
+    p2.border_fill_color = "white"
     p2.min_border = 0
     # Get unique sample indices for iteration
-    unique_samples = sorted(list(set(item['sample_idx'] for item in before_data))) if before_data else []
+    unique_samples = sorted(list({item["sample_idx"] for item in before_data})) if before_data else []
     renderers_before = []
     renderers_after = []
     for sample_idx in unique_samples:
-        sb = before_df[before_df['sample_idx'] == sample_idx]
-        sa = after_df[after_df['sample_idx'] == sample_idx]
-        color = color_map.get(sample_idx, '#000000')
+        sb = before_df[before_df["sample_idx"] == sample_idx]
+        sa = after_df[after_df["sample_idx"] == sample_idx]
+        color = color_map.get(sample_idx, "#000000")
         if not sb.empty:
             src = ColumnDataSource(sb)
-            r = p1.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
+            r = p1.scatter("rt", "mz", size="size", color=color, alpha="alpha", source=src)
             renderers_before.append(r)
         if not sa.empty:
             src = ColumnDataSource(sa)
-            r = p2.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
+            r = p2.scatter("rt", "mz", size="size", color=color, alpha="alpha", source=src)
             renderers_after.append(r)
     # Add hover tools
-    hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_before)
+    hover1 = HoverTool(
+        tooltips=[
+            ("Sample UID", "@sample_uid"),
+            ("Sample Name", "@sample_name"),
+            ("Sample Color", "$color[swatch]:sample_color"),
+            ("RT", "@rt{0.00}"),
+            ("m/z", "@mz{0.0000}"),
+            ("Intensity", "@inty{0.0e0}"),
+        ],
+        renderers=renderers_before,
+    )
     p1.add_tools(hover1)
-    hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_after)
+    hover2 = HoverTool(
+        tooltips=[
+            ("Sample UID", "@sample_uid"),
+            ("Sample Name", "@sample_name"),
+            ("Sample Color", "$color[swatch]:sample_color"),
+            ("RT", "@rt{0.00}"),
+            ("m/z", "@mz{0.0000}"),
+            ("Intensity", "@inty{0.0e0}"),
+        ],
+        renderers=renderers_after,
+    )
     p2.add_tools(hover2)
     # Create layout with both plots side by side
     # Use the aliased bokeh_row and set sizing_mode, width and height to avoid validation warnings.
-    layout = bokeh_row(p1, p2, sizing_mode='fixed', width=width, height=height)
+    layout = bokeh_row(p1, p2, sizing_mode="fixed", width=width, height=height)
     # Output and show
     if filename:
         from bokeh.plotting import output_file, show
         output_file(filename)
         show(layout)
     else:
         from bokeh.plotting import show
         show(layout)
     return layout
@@ -392,14 +543,14 @@ def plot_consensus_2d(
     except ImportError:
         from bokeh.models.annotations import ColorBar
     from bokeh.palettes import viridis
     # Import cmap for colormap handling
     from cmap import Colormap
     # Convert Polars DataFrame to pandas for Bokeh compatibility
     data_pd = data.to_pandas()
     source = ColumnDataSource(data_pd)
     # Handle colormap using cmap.Colormap
     try:
         # Get colormap palette using cmap
@@ -408,6 +559,7 @@ def plot_consensus_2d(
             # Generate 256 colors and convert to hex
             import numpy as np
             import matplotlib.colors as mcolors
             colors = colormap(np.linspace(0, 1, 256))
             palette = [mcolors.rgb2hex(color) for color in colors]
         else:
@@ -420,19 +572,21 @@ def plot_consensus_2d(
                     # Fall back to generating colors manually
                     import numpy as np
                     import matplotlib.colors as mcolors
                     colors = colormap(np.linspace(0, 1, 256))
                     palette = [mcolors.rgb2hex(color) for color in colors]
             except AttributeError:
                 # Fall back to generating colors manually
                 import numpy as np
                 import matplotlib.colors as mcolors
                 colors = colormap(np.linspace(0, 1, 256))
                 palette = [mcolors.rgb2hex(color) for color in colors]
     except (AttributeError, ValueError, TypeError) as e:
         # Fallback to viridis if cmap interpretation fails
         self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
         palette = viridis(256)
     color_mapper = LinearColorMapper(
         palette=palette,
         low=data[colorby].min(),
@@ -550,8 +704,7 @@ def plot_samples_2d(
     # Get sample colors from samples_df
     sample_colors = (
-        self.samples_df
-        .filter(pl.col("sample_uid").is_in(sample_uids))
+        self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
         .select(["sample_uid", "sample_color"])
         .to_dict(as_series=False)
     )
@@ -741,7 +894,7 @@ def plot_bpc(
     original: bool = False,
 ):
     """
-    Plot Base Peak Chromatograms (BPC) for selected samples overlayed using Bokeh.
+    Plot Base Peak Chromatograms (BPC) for selected samples overlaid using Bokeh.
     This collects per-sample BPCs via `get_bpc(self, sample=uid)` and overlays them.
     Colors are mapped per-sample using the same Turbo256 palette as `plot_samples_2d`.
@@ -765,8 +918,7 @@ def plot_bpc(
     # Get sample colors from samples_df
     sample_colors = (
-        self.samples_df
-        .filter(pl.col("sample_uid").is_in(sample_uids))
+        self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
         .select(["sample_uid", "sample_color"])
         .to_dict(as_series=False)
     )
@@ -783,7 +935,7 @@ def plot_bpc(
     for uid in sample_uids:
         try:
             first_chrom = get_bpc(self, sample=uid, label=None, original=original)
-            if hasattr(first_chrom, 'rt_unit'):
+            if hasattr(first_chrom, "rt_unit"):
                 rt_unit = first_chrom.rt_unit
                 break
         except Exception:
@@ -814,7 +966,11 @@ def plot_bpc(
         # extract arrays
         try:
             # prefer Chromatogram API
-            chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
+            chrom_dict = (
+                chrom.to_dict()
+                if hasattr(chrom, "to_dict")
+                else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
+            )
             rt = chrom_dict.get("rt")
             inty = chrom_dict.get("inty")
         except Exception:
@@ -854,7 +1010,7 @@ def plot_bpc(
         # Debug: log sample processing details
         self.logger.debug(
-            f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
+            f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}",
         )
         data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
@@ -868,7 +1024,15 @@ def plot_bpc(
         self.logger.warning("No BPC curves to plot for the selected samples.")
         return
-    hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
+    hover = HoverTool(
+        tooltips=[
+            ("sample", "@sample"),
+            ("sample_color", "$color[swatch]:sample_color"),
+            ("rt", "@rt{0.00}"),
+            ("inty", "@inty{0.00e0}"),
+        ],
+        renderers=renderers,
+    )
     p.add_tools(hover)
     # Only set legend properties if a legend was actually created to avoid Bokeh warnings
@@ -937,8 +1101,7 @@ def plot_eic(
     # Get sample colors from samples_df
     sample_colors = (
-        self.samples_df
-        .filter(pl.col("sample_uid").is_in(sample_uids))
+        self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
         .select(["sample_uid", "sample_color"])
         .to_dict(as_series=False)
     )
@@ -951,7 +1114,7 @@ def plot_eic(
     for uid in sample_uids:
         try:
             first_chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
-            if hasattr(first_chrom, 'rt_unit'):
+            if hasattr(first_chrom, "rt_unit"):
                 rt_unit = first_chrom.rt_unit
                 break
         except Exception:
@@ -982,7 +1145,11 @@ def plot_eic(
         # extract arrays
         try:
             # prefer Chromatogram API
-            chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
+            chrom_dict = (
+                chrom.to_dict()
+                if hasattr(chrom, "to_dict")
+                else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
+            )
             rt = chrom_dict.get("rt")
             inty = chrom_dict.get("inty")
         except Exception:
@@ -1030,7 +1197,15 @@ def plot_eic(
         self.logger.warning("No EIC curves to plot for the selected samples.")
         return
-    hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e0}")], renderers=renderers)
+    hover = HoverTool(
+        tooltips=[
+            ("sample", "@sample"),
+            ("sample_color", "$color[swatch]:sample_color"),
+            ("rt", "@rt{0.00}"),
+            ("inty", "@inty{0.0e0}"),
+        ],
+        renderers=renderers,
+    )
     p.add_tools(hover)
     if getattr(p, "legend", None) and len(p.legend) > 0:
@@ -1064,7 +1239,7 @@ def plot_rt_correction(
     height: int = 300,
 ):
     """
-    Plot RT correction per sample: (rt - rt_original) vs rt overlayed for selected samples.
+    Plot RT correction per sample: (rt - rt_original) vs rt overlaid for selected samples.
     This uses the same color mapping as `plot_bpc` so curves for the same samples match.
     """
@@ -1088,8 +1263,7 @@ def plot_rt_correction(
     # Get sample colors from samples_df
     sample_colors = (
-        self.samples_df
-        .filter(pl.col("sample_uid").is_in(sample_uids))
+        self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
         .select(["sample_uid", "sample_color"])
         .to_dict(as_series=False)
     )
@@ -1175,7 +1349,15 @@ def plot_rt_correction(
         self.logger.warning("No RT correction curves to plot for the selected samples.")
         return
-    hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
+    hover = HoverTool(
+        tooltips=[
+            ("sample", "@sample"),
+            ("sample_color", "$color[swatch]:sample_color"),
+            ("rt", "@rt{0.00}"),
+            ("rt - rt_original", "@delta{0.00}"),
+        ],
+        renderers=renderers,
+    )
     p.add_tools(hover)
     # Only set legend properties if a legend was actually created to avoid Bokeh warnings
@@ -1227,7 +1409,7 @@ def plot_chrom(
     if not sample_names:
         self.logger.error("No sample names found in chromatogram data.")
         return
     # Create color mapping by getting sample_color for each sample_name
     samples_info = self.samples_df.select(["sample_name", "sample_color"]).to_dict(as_series=False)
     sample_name_to_color = dict(zip(samples_info["sample_name"], samples_info["sample_color"]))
@@ -1649,11 +1831,19 @@ def plot_pca(
     self.logger.debug(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
-    # Convert consensus matrix to numpy if it's not already
-    if hasattr(consensus_matrix, "values"):
+    # Convert consensus matrix to numpy - handle both Polars and pandas DataFrames
+    if hasattr(consensus_matrix, "to_numpy"):
+        # Polars or pandas DataFrame
+        if hasattr(consensus_matrix, "select"):
+            # Polars DataFrame - exclude the consensus_uid column
+            numeric_cols = [col for col in consensus_matrix.columns if col != "consensus_uid"]
+            matrix_data = consensus_matrix.select(numeric_cols).to_numpy()
+        else:
+            # Pandas DataFrame
+            matrix_data = consensus_matrix.to_numpy()
+    elif hasattr(consensus_matrix, "values"):
+        # Pandas DataFrame
         matrix_data = consensus_matrix.values
-    elif hasattr(consensus_matrix, "to_numpy"):
-        matrix_data = consensus_matrix.to_numpy()
     else:
         matrix_data = np.array(consensus_matrix)
@@ -1692,7 +1882,7 @@ def plot_pca(
     else:
         self.logger.warning(
             f"Sample count mismatch: samples_df has {len(samples_pd)} rows, "
-            f"but consensus matrix has {len(pca_df)} samples"
+            f"but consensus matrix has {len(pca_df)} samples",
         )
     # Prepare color mapping
@@ -1763,25 +1953,23 @@ def plot_pca(
         if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
             # Choose the identifier to map colors by
             id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
             # Get colors from samples_df based on the identifier
             if id_col == "sample_uid":
                 sample_colors = (
-                    self.samples_df
-                    .filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
+                    self.samples_df.filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
                     .select(["sample_uid", "sample_color"])
                     .to_dict(as_series=False)
                 )
                 color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
             else:  # sample_name
                 sample_colors = (
-                    self.samples_df
-                    .filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
+                    self.samples_df.filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
                     .select(["sample_name", "sample_color"])
                     .to_dict(as_series=False)
                 )
                 color_map = dict(zip(sample_colors["sample_name"], sample_colors["sample_color"]))
             # Map colors into dataframe
             pca_df["color"] = [color_map.get(x, "#1f77b4") for x in pca_df[id_col]]  # fallback to blue
             # Update the ColumnDataSource with new color column
@@ -1817,7 +2005,7 @@ def plot_pca(
         if col in pca_df.columns:
             if col == "sample_color":
                 # Display sample_color as a colored swatch
-                tooltip_list.append(('color', "$color[swatch]:sample_color"))
+                tooltip_list.append(("color", "$color[swatch]:sample_color"))
             elif pca_df[col].dtype in ["float64", "float32"]:
                 tooltip_list.append((col, f"@{col}{{0.00}}"))
             else:
@@ -1843,6 +2031,7 @@ def plot_pca(
     show(p)
     return p
 def plot_tic(
     self,
     samples=None,
@@ -1853,7 +2042,7 @@ def plot_tic(
     original: bool = False,
 ):
     """
-    Plot Total Ion Chromatograms (TIC) for selected samples overlayed using Bokeh.
+    Plot Total Ion Chromatograms (TIC) for selected samples overlaid using Bokeh.
     Parameters and behavior mirror `plot_bpc` but use per-sample TICs (get_tic).
     """
@@ -1870,8 +2059,7 @@ def plot_tic(
     # Get sample colors from samples_df
     sample_colors = (
-        self.samples_df
-        .filter(pl.col("sample_uid").is_in(sample_uids))
+        self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
         .select(["sample_uid", "sample_color"])
         .to_dict(as_series=False)
     )
@@ -1884,7 +2072,7 @@ def plot_tic(
     for uid in sample_uids:
         try:
             first_chrom = get_tic(self, sample=uid, label=None)
-            if hasattr(first_chrom, 'rt_unit'):
+            if hasattr(first_chrom, "rt_unit"):
                 rt_unit = first_chrom.rt_unit
                 break
         except Exception:
@@ -1913,7 +2101,11 @@ def plot_tic(
         # extract arrays
         try:
-            chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
+            chrom_dict = (
+                chrom.to_dict()
+                if hasattr(chrom, "to_dict")
+                else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
+            )
             rt = chrom_dict.get("rt")
             inty = chrom_dict.get("inty")
         except Exception:
@@ -1961,7 +2153,15 @@ def plot_tic(
         self.logger.warning("No TIC curves to plot for the selected samples.")
         return
-    hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
+    hover = HoverTool(
+        tooltips=[
+            ("sample", "@sample"),
+            ("sample_color", "$color[swatch]:sample_color"),
+            ("rt", "@rt{0.00}"),
+            ("inty", "@inty{0.00e0}"),
+        ],
+        renderers=renderers,
+    )
     p.add_tools(hover)
     # Only set legend properties if a legend was actually created to avoid Bokeh warnings

masster 0.3.17__py3-none-any.whl → 0.3.19__py3-none-any.whl

Potentially problematic release.

masster 0.3.17py3-none-any.whl → 0.3.19py3-none-any.whl