PyPI - masster - Versions diffs - 0.3.13__py3-none-any.whl → 0.3.15__py3-none-any.whl - Mend

masster 0.3.13py3-none-any.whl → 0.3.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (20) hide show

masster/sample/helpers.py +9 -2
masster/sample/load.py +11 -7
masster/sample/plot.py +43 -34
masster/study/defaults/study_def.py +20 -0
masster/study/h5.py +120 -23
masster/study/helpers.py +974 -13
masster/study/load.py +28 -15
masster/study/plot.py +270 -98
masster/study/processing.py +9 -0
masster/study/study.py +32 -38
masster/study/study5_schema.json +14 -5
{masster-0.3.13.dist-info → masster-0.3.15.dist-info}/METADATA +2 -1
{masster-0.3.13.dist-info → masster-0.3.15.dist-info}/RECORD +16 -20
masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +0 -199787
masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
masster/docs/SCX_API_Documentation.md +0 -0
masster/docs/SCX_DLL_Analysis.md +0 -0
{masster-0.3.13.dist-info → masster-0.3.15.dist-info}/WHEEL +0 -0
{masster-0.3.13.dist-info → masster-0.3.15.dist-info}/entry_points.txt +0 -0
{masster-0.3.13.dist-info → masster-0.3.15.dist-info}/licenses/LICENSE +0 -0

masster/study/plot.py CHANGED Viewed

@@ -37,7 +37,6 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
     # Local imports so the module can be used even if bokeh isn't needed elsewhere
     from bokeh.models import ColumnDataSource, HoverTool
     from bokeh.plotting import figure, show, output_file
-    from bokeh.palettes import Turbo256
     import pandas as pd
     # Build the before/after tabular data used for plotting
@@ -136,34 +135,99 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
             self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
             return
-        features_pd = self.features_df.to_pandas()
+        # Use Polars instead of pandas
+        features_df = self.features_df
-        sample_col = 'sample_uid' if 'sample_uid' in features_pd.columns else 'sample_name'
-        if sample_col not in features_pd.columns:
+        sample_col = 'sample_uid' if 'sample_uid' in features_df.columns else 'sample_name'
+        if sample_col not in features_df.columns:
             self.logger.error("No sample identifier column found in features_df.")
             return
-        samples = features_pd[sample_col].unique()
+        # Get unique samples using Polars
+        samples = features_df.select(pl.col(sample_col)).unique().to_series().to_list()
         for sample_idx, sample in enumerate(samples):
-            sample_data = features_pd[features_pd[sample_col] == sample]
-            max_inty = sample_data['inty'].max() if sample_data['inty'].max() > 0 else 1
+            # Filter sample data using Polars
+            sample_data = features_df.filter(pl.col(sample_col) == sample)
+            # Calculate max intensity using Polars
+            max_inty = sample_data.select(pl.col('inty').max()).item()
+            max_inty = max_inty if max_inty and max_inty > 0 else 1
             sample_name = str(sample)
-            sample_uid = sample if sample_col == 'sample_uid' else (sample_data['sample_uid'].iloc[0] if 'sample_uid' in sample_data.columns else sample)
-            for _, row in sample_data.iterrows():
-                before_data.append({'rt': row['rt_original'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
-                after_data.append({'rt': row['rt'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
-    # Ensure dataframes exist even if empty
-    before_df = pd.DataFrame(before_data)
-    after_df = pd.DataFrame(after_data)
-    # Create ColumnDataSources (safe even for empty dfs)
-    from bokeh.models import ColumnDataSource
+            # Get sample_uid - if sample_col is 'sample_uid', use sample directly
+            if sample_col == 'sample_uid':
+                sample_uid = sample
+            else:
+                # Try to get sample_uid from the first row if it exists
+                if 'sample_uid' in sample_data.columns:
+                    sample_uid = sample_data.select(pl.col('sample_uid')).item()
+                else:
+                    sample_uid = sample
+            # Convert to dict for iteration - more efficient than row-by-row processing
+            sample_dict = sample_data.select(['rt_original', 'rt', 'mz', 'inty']).to_dicts()
+            for row_dict in sample_dict:
+                rt_original = row_dict['rt_original']
+                rt_current = row_dict['rt']
+                mz = row_dict['mz']
+                inty = row_dict['inty']
+                alpha = inty / max_inty
+                size = markersize + 2 if sample_idx == 0 else markersize
+                before_data.append({
+                    'rt': rt_original, 'mz': mz, 'inty': inty, 'alpha': alpha,
+                    'sample_idx': sample_idx, 'sample_name': sample_name,
+                    'sample_uid': sample_uid, 'size': size
+                })
+                after_data.append({
+                    'rt': rt_current, 'mz': mz, 'inty': inty, 'alpha': alpha,
+                    'sample_idx': sample_idx, 'sample_name': sample_name,
+                    'sample_uid': sample_uid, 'size': size
+                })
+    # Get sample colors from samples_df using sample indices
+    # Extract unique sample information from the dictionaries we created
+    if before_data:
+        # Create mapping from sample_idx to sample_uid more efficiently
+        sample_idx_to_uid = {}
+        for item in before_data:
+            if item['sample_idx'] not in sample_idx_to_uid:
+                sample_idx_to_uid[item['sample_idx']] = item['sample_uid']
+    else:
+        sample_idx_to_uid = {}
+    # Get colors from samples_df
+    sample_uids_list = list(sample_idx_to_uid.values())
+    if sample_uids_list and hasattr(self, 'samples_df') and self.samples_df is not None:
+        sample_colors = (
+            self.samples_df
+            .filter(pl.col("sample_uid").is_in(sample_uids_list))
+            .select(["sample_uid", "sample_color"])
+            .to_dict(as_series=False)
+        )
+        uid_to_color = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
+    else:
+        uid_to_color = {}
-    before_source = ColumnDataSource(before_df)
-    after_source = ColumnDataSource(after_df)
+    # Create color map for sample indices
+    color_map: dict[int, str] = {}
+    for sample_idx, sample_uid in sample_idx_to_uid.items():
+        color_map[sample_idx] = uid_to_color.get(sample_uid, "#1f77b4")  # fallback to blue
+    # Add sample_color to data dictionaries before creating DataFrames
+    if before_data:
+        for item in before_data:
+            item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
+    if after_data:
+        for item in after_data:
+            item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
+    # Now create DataFrames with the sample_color already included
+    before_df = pd.DataFrame(before_data) if before_data else pd.DataFrame()
+    after_df = pd.DataFrame(after_data) if after_data else pd.DataFrame()
     # Create Bokeh figures
     p1 = figure(width=width, height=height, title='Original RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save')
@@ -177,15 +241,9 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
     p2.background_fill_color = 'white'
     p2.border_fill_color = 'white'
     p2.min_border = 0
-    # Color mapping using Turbo256
-    unique_samples = sorted(list(set(before_df['sample_idx'].tolist()))) if not before_df.empty else []
-    colors = Turbo256
-    color_map: dict[int, str] = {}
-    n = max(1, len(unique_samples))
-    step = max(1, 256 // n)
-    for i, sample_idx in enumerate(unique_samples):
-        color_map[sample_idx] = colors[(i * step) % 256]
+    # Get unique sample indices for iteration
+    unique_samples = sorted(list(set(item['sample_idx'] for item in before_data))) if before_data else []
     renderers_before = []
     renderers_after = []
@@ -206,10 +264,10 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
             renderers_after.append(r)
     # Add hover tools
-    hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_before)
+    hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_before)
     p1.add_tools(hover1)
-    hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_after)
+    hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_after)
     p2.add_tools(hover2)
     # Create layout with both plots side by side
@@ -232,13 +290,13 @@ def plot_consensus_2d(
     self,
     filename=None,
     colorby="number_samples",
+    cmap=None,
+    markersize=4,
     sizeby="inty_mean",
-    markersize=6,
-    size="dynamic",
+    scaling="dynamic",
     alpha=0.7,
-    cmap=None,
-    width=900,
-    height=900,
+    width=600,
+    height=450,
     mz_range=None,
     rt_range=None,
 ):
@@ -317,7 +375,7 @@ def plot_consensus_2d(
         ])
     if cmap is None:
-        cmap = "vi"
+        cmap = "viridis"
     elif cmap == "grey":
         cmap = "Greys256"
@@ -334,12 +392,49 @@ def plot_consensus_2d(
     except ImportError:
         from bokeh.models.annotations import ColorBar
     from bokeh.palettes import viridis
+    # Import cmap for colormap handling
+    from cmap import Colormap
     # Convert Polars DataFrame to pandas for Bokeh compatibility
     data_pd = data.to_pandas()
     source = ColumnDataSource(data_pd)
+    # Handle colormap using cmap.Colormap
+    try:
+        # Get colormap palette using cmap
+        if isinstance(cmap, str):
+            colormap = Colormap(cmap)
+            # Generate 256 colors and convert to hex
+            import numpy as np
+            import matplotlib.colors as mcolors
+            colors = colormap(np.linspace(0, 1, 256))
+            palette = [mcolors.rgb2hex(color) for color in colors]
+        else:
+            colormap = cmap
+            # Try to use to_bokeh() method first
+            try:
+                palette = colormap.to_bokeh()
+                # Ensure we got a color palette, not another mapper
+                if not isinstance(palette, (list, tuple)):
+                    # Fall back to generating colors manually
+                    import numpy as np
+                    import matplotlib.colors as mcolors
+                    colors = colormap(np.linspace(0, 1, 256))
+                    palette = [mcolors.rgb2hex(color) for color in colors]
+            except AttributeError:
+                # Fall back to generating colors manually
+                import numpy as np
+                import matplotlib.colors as mcolors
+                colors = colormap(np.linspace(0, 1, 256))
+                palette = [mcolors.rgb2hex(color) for color in colors]
+    except (AttributeError, ValueError, TypeError) as e:
+        # Fallback to viridis if cmap interpretation fails
+        self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
+        palette = viridis(256)
     color_mapper = LinearColorMapper(
-        palette=viridis(256),
+        palette=palette,
         low=data[colorby].min(),
         high=data[colorby].max(),
     )
@@ -352,11 +447,11 @@ def plot_consensus_2d(
     p.xaxis.axis_label = "Retention Time (min)"
     p.yaxis.axis_label = "m/z"
     scatter_renderer: Any = None
-    if size.lower() in ["dyn", "dynamic"]:
+    if scaling.lower() in ["dyn", "dynamic"]:
         scatter_renderer = p.circle(
             x="rt",
             y="mz",
-            radius=markersize / 10,
+            radius=markersize,
             fill_color={"field": colorby, "transform": color_mapper},
             line_color=None,
             alpha=alpha,
@@ -414,7 +509,6 @@ def plot_samples_2d(
     size="dynamic",
     alpha_max=0.8,
     alpha="inty",
-    cmap="Turbo256",
     max_features=50000,
     width=600,
     height=600,
@@ -447,7 +541,6 @@ def plot_samples_2d(
     from bokeh.plotting import figure, show, output_file
     from bokeh.io.export import export_png
     from bokeh.models import ColumnDataSource, HoverTool
-    from bokeh.palettes import Turbo256
     sample_uids = self._get_sample_uids(samples)
@@ -455,8 +548,14 @@ def plot_samples_2d(
         self.logger.error("No valid sample_uids provided.")
         return
-    colors = Turbo256
-    color_map = {uid: colors[i * (256 // max(1, len(sample_uids)))] for i, uid in enumerate(sample_uids)}
+    # Get sample colors from samples_df
+    sample_colors = (
+        self.samples_df
+        .filter(pl.col("sample_uid").is_in(sample_uids))
+        .select(["sample_uid", "sample_color"])
+        .to_dict(as_series=False)
+    )
+    color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
     p = figure(
         width=width,
@@ -569,6 +668,7 @@ def plot_samples_2d(
                 "inty": sample_data["inty"].values,
                 "alpha": sample_data["alpha"].values,
                 "sample": np.full(len(sample_data), sample_name, dtype=object),
+                "sample_color": np.full(len(sample_data), color_values[uid], dtype=object),
             },
         )
@@ -604,6 +704,7 @@ def plot_samples_2d(
         hover = HoverTool(
             tooltips=[
                 ("sample", "@sample"),
+                ("sample_color", "$color[swatch]:sample_color"),
                 ("rt", "@rt{0.00}"),
                 ("mz", "@mz{0.0000}"),
                 ("intensity", "@inty{0.0e+0}"),
@@ -637,7 +738,6 @@ def plot_bpc(
     filename: str | None = None,
     width: int = 1000,
     height: int = 300,
-    rt_unit: str = "s",
     original: bool = False,
 ):
     """
@@ -653,7 +753,6 @@ def plot_bpc(
     from bokeh.plotting import figure, show, output_file
     from bokeh.models import ColumnDataSource, HoverTool
     from bokeh.io.export import export_png
-    from bokeh.palettes import Turbo256
     from masster.study.helpers import get_bpc
     sample_uids = self._get_sample_uids(samples)
@@ -664,10 +763,14 @@ def plot_bpc(
     # Debug: show which sample_uids we will process
     self.logger.debug(f"plot_bpc: sample_uids={sample_uids}")
-    colors = Turbo256
-    n = max(1, len(sample_uids))
-    step = max(1, 256 // n)
-    color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
+    # Get sample colors from samples_df
+    sample_colors = (
+        self.samples_df
+        .filter(pl.col("sample_uid").is_in(sample_uids))
+        .select(["sample_uid", "sample_color"])
+        .to_dict(as_series=False)
+    )
+    color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
     # If plotting original (uncorrected) RTs, use the requested title.
     if original:
@@ -675,6 +778,17 @@ def plot_bpc(
     else:
         plot_title = title or "Base Peak Chromatograms"
+    # Get rt_unit from the first chromatogram, default to "s" if not available
+    rt_unit = "s"
+    for uid in sample_uids:
+        try:
+            first_chrom = get_bpc(self, sample=uid, label=None, original=original)
+            if hasattr(first_chrom, 'rt_unit'):
+                rt_unit = first_chrom.rt_unit
+                break
+        except Exception:
+            continue
     p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
     p.xaxis.axis_label = f"Retention Time ({rt_unit})"
     p.yaxis.axis_label = "Intensity"
@@ -691,7 +805,7 @@ def plot_bpc(
     for uid in sample_uids:
         try:
-            chrom = get_bpc(self, sample=uid, rt_unit=rt_unit, label=None, original=original)
+            chrom = get_bpc(self, sample=uid, label=None, original=original)
         except Exception as e:
             # log and skip samples we can't compute BPC for
             self.logger.debug(f"Skipping sample {uid} for BPC: {e}")
@@ -743,7 +857,7 @@ def plot_bpc(
             f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
         )
-        data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
+        data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
         src = ColumnDataSource(data)
         r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
@@ -754,7 +868,7 @@ def plot_bpc(
         self.logger.warning("No BPC curves to plot for the selected samples.")
         return
-    hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
+    hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
     p.add_tools(hover)
     # Only set legend properties if a legend was actually created to avoid Bokeh warnings
@@ -784,13 +898,12 @@ def plot_bpc(
 def plot_eic(
     self,
     mz,
-    mz_tol=0.01,
+    mz_tol=None,
     samples=None,
     title: str | None = None,
     filename: str | None = None,
     width: int = 1000,
     height: int = 300,
-    rt_unit: str = "s",
     original: bool = False,
 ):
     """
@@ -799,14 +912,20 @@ def plot_eic(
     Parameters mirror `plot_bpc` with additional `mz` and `mz_tol` arguments. The function
     retrieves a Sample object for each sample UID, calls `sample.get_eic(mz, mz_tol)`, and
     overlays the resulting chromatograms.
+    Args:
+        mz_tol: m/z tolerance in Da. If None, uses study.parameters.eic_mz_tol as default.
     """
     # Local imports to avoid heavy top-level deps / circular imports
     from bokeh.plotting import figure, show, output_file
     from bokeh.models import ColumnDataSource, HoverTool
     from bokeh.io.export import export_png
-    from bokeh.palettes import Turbo256
     from masster.study.helpers import get_eic
+    # Use study's eic_mz_tol parameter as default if not provided
+    if mz_tol is None:
+        mz_tol = self.parameters.eic_mz_tol
     if mz is None:
         self.logger.error("mz must be provided for EIC plotting")
         return
@@ -816,13 +935,28 @@ def plot_eic(
         self.logger.error("No valid sample_uids provided for EIC plotting.")
         return
-    colors = Turbo256
-    n = max(1, len(sample_uids))
-    step = max(1, 256 // n)
-    color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
+    # Get sample colors from samples_df
+    sample_colors = (
+        self.samples_df
+        .filter(pl.col("sample_uid").is_in(sample_uids))
+        .select(["sample_uid", "sample_color"])
+        .to_dict(as_series=False)
+    )
+    color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
     plot_title = title or f"Extracted Ion Chromatograms (m/z={mz:.4f} ± {mz_tol})"
+    # Get rt_unit from the first chromatogram, default to "s" if not available
+    rt_unit = "s"
+    for uid in sample_uids:
+        try:
+            first_chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
+            if hasattr(first_chrom, 'rt_unit'):
+                rt_unit = first_chrom.rt_unit
+                break
+        except Exception:
+            continue
     p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
     p.xaxis.axis_label = f"Retention Time ({rt_unit})"
     p.yaxis.axis_label = "Intensity"
@@ -839,7 +973,7 @@ def plot_eic(
     for uid in sample_uids:
         try:
-            chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, rt_unit=rt_unit, label=None)
+            chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
         except Exception as e:
             # log and skip samples we can't compute EIC for
             self.logger.debug(f"Skipping sample {uid} for EIC: {e}")
@@ -885,7 +1019,7 @@ def plot_eic(
         color = color_map.get(uid, "#000000")
-        data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
+        data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
         src = ColumnDataSource(data)
         r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
@@ -896,7 +1030,7 @@ def plot_eic(
         self.logger.warning("No EIC curves to plot for the selected samples.")
         return
-    hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
+    hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e0}")], renderers=renderers)
     p.add_tools(hover)
     if getattr(p, "legend", None) and len(p.legend) > 0:
@@ -928,7 +1062,6 @@ def plot_rt_correction(
     filename: str | None = None,
     width: int = 1000,
     height: int = 300,
-    rt_unit: str = "s",
 ):
     """
     Plot RT correction per sample: (rt - rt_original) vs rt overlayed for selected samples.
@@ -937,7 +1070,6 @@ def plot_rt_correction(
     """
     from bokeh.plotting import figure, show, output_file
     from bokeh.models import ColumnDataSource, HoverTool
-    from bokeh.palettes import Turbo256
     import numpy as _np
     # Validate features dataframe
@@ -954,11 +1086,17 @@ def plot_rt_correction(
         self.logger.error("No valid sample_uids provided for RT correction plotting.")
         return
-    # Color mapping like plot_bpc
-    colors = Turbo256
-    n = max(1, len(sample_uids))
-    step = max(1, 256 // n)
-    color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
+    # Get sample colors from samples_df
+    sample_colors = (
+        self.samples_df
+        .filter(pl.col("sample_uid").is_in(sample_uids))
+        .select(["sample_uid", "sample_color"])
+        .to_dict(as_series=False)
+    )
+    color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
+    # For RT correction plots, default to "s" since we're working with features_df directly
+    rt_unit = "s"
     p = figure(width=width, height=height, title=title or "RT correction", tools="pan,wheel_zoom,box_zoom,reset,save")
     p.xaxis.axis_label = f"Retention Time ({rt_unit})"
@@ -1026,7 +1164,7 @@ def plot_rt_correction(
         color = color_map.get(uid, "#000000")
-        data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt)}
+        data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
         src = ColumnDataSource(data)
         r_line = p.line("rt", "delta", source=src, line_width=1, color=color)
@@ -1037,7 +1175,7 @@ def plot_rt_correction(
         self.logger.warning("No RT correction curves to plot for the selected samples.")
         return
-    hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
+    hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
     p.add_tools(hover)
     # Only set legend properties if a legend was actually created to avoid Bokeh warnings
@@ -1083,15 +1221,17 @@ def plot_chrom(
         self.logger.error("No chromatogram data found.")
         return
-    # Local import for color palette
-    from bokeh.palettes import Turbo256
-    # Assign a fixed color to each sample/column
+    # Get sample colors for alignment plots
+    # Need to map sample names to colors since chromatogram data uses sample names as columns
     sample_names = [col for col in chroms.columns if col not in ["consensus_uid"]]
     if not sample_names:
         self.logger.error("No sample names found in chromatogram data.")
         return
-    color_map = {sample: Turbo256[i * (256 // max(1, len(sample_names)))] for i, sample in enumerate(sample_names)}
+    # Create color mapping by getting sample_color for each sample_name
+    samples_info = self.samples_df.select(["sample_name", "sample_color"]).to_dict(as_series=False)
+    sample_name_to_color = dict(zip(samples_info["sample_name"], samples_info["sample_color"]))
+    color_map = {name: sample_name_to_color.get(name, "#1f77b4") for name in sample_names}  # fallback to blue
     plots = []
     self.logger.info(f"Plotting {chroms.shape[0]} chromatograms...")
@@ -1461,8 +1601,8 @@ def plot_consensus_stats(
 def plot_pca(
     self,
     filename=None,
-    width=400,
-    height=400,
+    width=500,
+    height=450,
     alpha=0.8,
     markersize=6,
     n_components=2,
@@ -1484,7 +1624,7 @@ def plot_pca(
     """
     from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
     from bokeh.plotting import figure, show, output_file
-    from bokeh.palettes import Category20, viridis, Turbo256
+    from bokeh.palettes import Category20, viridis
     from bokeh.transform import factor_cmap
     from sklearn.decomposition import PCA
     from sklearn.preprocessing import StandardScaler
@@ -1507,7 +1647,7 @@ def plot_pca(
         self.logger.error("No samples dataframe available.")
         return
-    self.logger.info(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
+    self.logger.debug(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
     # Convert consensus matrix to numpy if it's not already
     if hasattr(consensus_matrix, "values"):
@@ -1534,7 +1674,7 @@ def plot_pca(
     # Get explained variance ratios
     explained_var = pca.explained_variance_ratio_
-    self.logger.info(f"PCA explained variance ratios: {explained_var}")
+    self.logger.debug(f"PCA explained variance ratios: {explained_var}")
     # Convert samples_df to pandas for easier manipulation
     samples_pd = samples_df.to_pandas()
@@ -1619,15 +1759,31 @@ def plot_pca(
                 legend_field=color_by,
             )
     else:
-        # If no color_by provided, color points by sample similar to plot_samples_2d
+        # If no color_by provided, use sample_color column from samples_df
         if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
             # Choose the identifier to map colors by
             id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
-            sample_ids = list(pd.unique(pca_df[id_col]))
-            colors = Turbo256
-            color_map = {uid: colors[i * (256 // max(1, len(sample_ids)))] for i, uid in enumerate(sample_ids)}
+            # Get colors from samples_df based on the identifier
+            if id_col == "sample_uid":
+                sample_colors = (
+                    self.samples_df
+                    .filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
+                    .select(["sample_uid", "sample_color"])
+                    .to_dict(as_series=False)
+                )
+                color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
+            else:  # sample_name
+                sample_colors = (
+                    self.samples_df
+                    .filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
+                    .select(["sample_name", "sample_color"])
+                    .to_dict(as_series=False)
+                )
+                color_map = dict(zip(sample_colors["sample_name"], sample_colors["sample_color"]))
             # Map colors into dataframe
-            pca_df["color"] = [color_map[x] for x in pca_df[id_col]]
+            pca_df["color"] = [color_map.get(x, "#1f77b4") for x in pca_df[id_col]]  # fallback to blue
             # Update the ColumnDataSource with new color column
             source = ColumnDataSource(pca_df)
             scatter = p.scatter(
@@ -1652,14 +1808,17 @@ def plot_pca(
     tooltip_list = []
     # Columns to exclude from tooltips (file paths and internal/plot fields)
-    excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2"}
+    excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2", "size"}
     # Add all sample dataframe columns to tooltips, skipping excluded ones
     for col in samples_pd.columns:
         if col in excluded_cols:
             continue
         if col in pca_df.columns:
-            if pca_df[col].dtype in ["float64", "float32"]:
+            if col == "sample_color":
+                # Display sample_color as a colored swatch
+                tooltip_list.append(('color', "$color[swatch]:sample_color"))
+            elif pca_df[col].dtype in ["float64", "float32"]:
                 tooltip_list.append((col, f"@{col}{{0.00}}"))
             else:
                 tooltip_list.append((col, f"@{col}"))
@@ -1691,7 +1850,6 @@ def plot_tic(
     filename: str | None = None,
     width: int = 1000,
     height: int = 300,
-    rt_unit: str = "s",
     original: bool = False,
 ):
     """
@@ -1703,7 +1861,6 @@ def plot_tic(
     from bokeh.plotting import figure, show, output_file
     from bokeh.models import ColumnDataSource, HoverTool
     from bokeh.io.export import export_png
-    from bokeh.palettes import Turbo256
     from masster.study.helpers import get_tic
     sample_uids = self._get_sample_uids(samples)
@@ -1711,13 +1868,28 @@ def plot_tic(
         self.logger.error("No valid sample_uids provided for TIC plotting.")
         return
-    colors = Turbo256
-    n = max(1, len(sample_uids))
-    step = max(1, 256 // n)
-    color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
+    # Get sample colors from samples_df
+    sample_colors = (
+        self.samples_df
+        .filter(pl.col("sample_uid").is_in(sample_uids))
+        .select(["sample_uid", "sample_color"])
+        .to_dict(as_series=False)
+    )
+    color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
     plot_title = title or "Total Ion Chromatograms"
+    # Get rt_unit from the first chromatogram, default to "s" if not available
+    rt_unit = "s"
+    for uid in sample_uids:
+        try:
+            first_chrom = get_tic(self, sample=uid, label=None)
+            if hasattr(first_chrom, 'rt_unit'):
+                rt_unit = first_chrom.rt_unit
+                break
+        except Exception:
+            continue
     p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
     p.xaxis.axis_label = f"Retention Time ({rt_unit})"
     p.yaxis.axis_label = "Intensity"
@@ -1778,7 +1950,7 @@ def plot_tic(
         color = color_map.get(uid, "#000000")
-        data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
+        data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
         src = ColumnDataSource(data)
         r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
@@ -1789,7 +1961,7 @@ def plot_tic(
         self.logger.warning("No TIC curves to plot for the selected samples.")
         return
-    hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
+    hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
     p.add_tools(hover)
     # Only set legend properties if a legend was actually created to avoid Bokeh warnings

masster 0.3.13__py3-none-any.whl → 0.3.15__py3-none-any.whl

Potentially problematic release.

masster 0.3.13py3-none-any.whl → 0.3.15py3-none-any.whl