PyPI - masster - Versions diffs - 0.3.11__py3-none-any.whl → 0.3.12__py3-none-any.whl - Mend

masster 0.3.11py3-none-any.whl → 0.3.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (16) hide show

masster/sample/helpers.py +53 -4
masster/sample/plot.py +100 -16
masster/sample/sample.py +6 -0
masster/sample/sample5_schema.json +43 -34
masster/study/defaults/align_def.py +10 -10
masster/study/helpers.py +466 -3
masster/study/load.py +6 -0
masster/study/plot.py +809 -130
masster/study/processing.py +35 -10
masster/study/study.py +60 -4
masster/study/study5_schema.json +83 -83
{masster-0.3.11.dist-info → masster-0.3.12.dist-info}/METADATA +1 -1
{masster-0.3.11.dist-info → masster-0.3.12.dist-info}/RECORD +16 -16
{masster-0.3.11.dist-info → masster-0.3.12.dist-info}/WHEEL +0 -0
{masster-0.3.11.dist-info → masster-0.3.12.dist-info}/entry_points.txt +0 -0
{masster-0.3.11.dist-info → masster-0.3.12.dist-info}/licenses/LICENSE +0 -0

masster/study/plot.py CHANGED Viewed

@@ -12,135 +12,220 @@ from tqdm import tqdm
 hv.extension("bokeh")
-def plot_alignment(self, filename=None):
-    import matplotlib.pyplot as plt
-    import numpy as np
+# Replace any unaliased import that could be shadowed:
+# from bokeh.layouts import row
+from bokeh.layouts import row as bokeh_row
-    if self.features_maps is None or len(self.features_maps) == 0:
-        self.load_features()
-    feature_maps = self.features_maps
-    ref_index = self.alignment_ref_index
-    if ref_index is None:
-        self.logger.error("No alignment performed yet.")
-        return
+def plot_alignment(self, maps: bool = True, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
+    """Visualize retention time alignment using two synchronized Bokeh scatter plots.
-    fmaps = [
-        feature_maps[ref_index],
-        *feature_maps[:ref_index],
-        *feature_maps[ref_index + 1 :],
-    ]
+    - When ``maps=True`` the function reads ``self.features_maps`` (list of FeatureMap)
+      and builds two side-by-side plots: Original RT (left) and Current/Aligned RT (right).
+    - When ``maps=False`` the function uses ``self.features_df`` and expects an
+      ``rt_original`` column (before) and ``rt`` column (after).
-    fig = plt.figure(figsize=(12, 6))
+    Parameters
+    - maps: whether to use feature maps (default True).
+    - filename: optional HTML file path to save the plot.
+    - width/height: pixel size of each subplot.
+    - markersize: base marker size.
-    ax = fig.add_subplot(1, 2, 1)
-    ax.set_title("Feature maps before alignment")
-    ax.set_ylabel("m/z")
-    ax.set_xlabel("RT")
+    Returns
+    - Bokeh layout (row) containing the two synchronized plots.
+    """
+    # Local imports so the module can be used even if bokeh isn't needed elsewhere
+    from bokeh.models import ColumnDataSource, HoverTool
+    from bokeh.plotting import figure, show, output_file
+    from bokeh.palettes import Turbo256
+    import pandas as pd
-    # use alpha value to display feature intensity
-    ax.scatter(
-        [f.getRT() for f in fmaps[0]],
-        [f.getMZ() for f in fmaps[0]],
-        alpha=np.asarray([f.getIntensity() for f in fmaps[0]]) / max([f.getIntensity() for f in fmaps[0]]),
-        s=4,
-    )
+    # Build the before/after tabular data used for plotting
+    before_data: list[dict[str, Any]] = []
+    after_data: list[dict[str, Any]] = []
+    if maps:
+        # Ensure feature maps are loaded
+        if self.features_maps is None or len(self.features_maps) == 0:
+            self.load_features()
+        fmaps = self.features_maps or []
+        if not fmaps:
+            self.logger.error("No feature maps available for plotting.")
+            return
+        # Reference (first) sample: use current RT for both before and after
+        ref = fmaps[0]
+        ref_rt = [f.getRT() for f in ref]
+        ref_mz = [f.getMZ() for f in ref]
+        ref_inty = [f.getIntensity() for f in ref]
+        max_ref_inty = max(ref_inty) if ref_inty else 1
+        # sample metadata
+        if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
+            samples_info = self.samples_df.to_pandas()
+            ref_sample_uid = samples_info.iloc[0]['sample_uid'] if 'sample_uid' in samples_info.columns else 'Reference_UID'
+            ref_sample_name = samples_info.iloc[0]['sample_name'] if 'sample_name' in samples_info.columns else 'Reference'
+        else:
+            ref_sample_uid = 'Reference_UID'
+            ref_sample_name = 'Reference'
+        for rt, mz, inty in zip(ref_rt, ref_mz, ref_inty):
+            before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
+            after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
+        # Remaining samples
+        for sample_idx, fm in enumerate(fmaps[1:], start=1):
+            mz_vals = []
+            inty_vals = []
+            original_rt = []
+            aligned_rt = []
+            for f in fm:
+                try:
+                    orig = f.getMetaValue('original_RT')
+                except Exception:
+                    orig = None
+                if orig is None:
+                    original_rt.append(f.getRT())
+                else:
+                    original_rt.append(orig)
+                aligned_rt.append(f.getRT())
+                mz_vals.append(f.getMZ())
+                inty_vals.append(f.getIntensity())
+            if not inty_vals:
+                continue
+            max_inty = max(inty_vals)
+            if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
+                samples_info = self.samples_df.to_pandas()
+                if sample_idx < len(samples_info):
+                    sample_name = samples_info.iloc[sample_idx].get('sample_name', f'Sample {sample_idx}')
+                    sample_uid = samples_info.iloc[sample_idx].get('sample_uid', f'Sample_{sample_idx}_UID')
+                else:
+                    sample_name = f'Sample {sample_idx}'
+                    sample_uid = f'Sample_{sample_idx}_UID'
+            else:
+                sample_name = f'Sample {sample_idx}'
+                sample_uid = f'Sample_{sample_idx}_UID'
-    for fm in fmaps[1:]:
-        ax.scatter(
-            [f.getMetaValue("original_RT") for f in fm],
-            [f.getMZ() for f in fm],
-            alpha=np.asarray([f.getIntensity() for f in fm]) / max([f.getIntensity() for f in fm]),
-            s=2,  # Set symbol size to 3
-        )
+            for rt, mz, inty in zip(original_rt, mz_vals, inty_vals):
+                before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
-    ax = fig.add_subplot(1, 2, 2)
-    ax.set_title("Feature maps after alignment")
-    ax.set_ylabel("m/z")
-    ax.set_xlabel("RT")
-    for fm in fmaps:
-        ax.scatter(
-            [f.getRT() for f in fm],
-            [f.getMZ() for f in fm],
-            alpha=np.asarray([f.getIntensity() for f in fm]) / max([f.getIntensity() for f in fm]),
-            s=2,  # Set symbol size to 3
-        )
+            for rt, mz, inty in zip(aligned_rt, mz_vals, inty_vals):
+                after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
-    fig.tight_layout()
+    else:
+        # Use features_df
+        if self.features_df is None or self.features_df.is_empty():
+            self.logger.error("No features_df found. Load features first.")
+            return
+        required_cols = ['rt', 'mz', 'inty']
+        missing = [c for c in required_cols if c not in self.features_df.columns]
+        if missing:
+            self.logger.error(f"Missing required columns in features_df: {missing}")
+            return
-def plot_alignment_bokeh(self, filename=None):
-    from bokeh.plotting import figure, show, output_file
-    from bokeh.layouts import gridplot
+        if 'rt_original' not in self.features_df.columns:
+            self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
+            return
-    feature_maps = self.features_maps
-    ref_index = self.alignment_ref_index
-    if ref_index is None:
-        self.logger.warning("No alignment performed yet.")
-        return
+        features_pd = self.features_df.to_pandas()
-    fmaps = [
-        feature_maps[ref_index],
-        *feature_maps[:ref_index],
-        *feature_maps[ref_index + 1 :],
-    ]
+        sample_col = 'sample_uid' if 'sample_uid' in features_pd.columns else 'sample_name'
+        if sample_col not in features_pd.columns:
+            self.logger.error("No sample identifier column found in features_df.")
+            return
+        samples = features_pd[sample_col].unique()
+        for sample_idx, sample in enumerate(samples):
+            sample_data = features_pd[features_pd[sample_col] == sample]
+            max_inty = sample_data['inty'].max() if sample_data['inty'].max() > 0 else 1
+            sample_name = str(sample)
+            sample_uid = sample if sample_col == 'sample_uid' else (sample_data['sample_uid'].iloc[0] if 'sample_uid' in sample_data.columns else sample)
+            for _, row in sample_data.iterrows():
+                before_data.append({'rt': row['rt_original'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
+                after_data.append({'rt': row['rt'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
+    # Ensure dataframes exist even if empty
+    before_df = pd.DataFrame(before_data)
+    after_df = pd.DataFrame(after_data)
+    # Create ColumnDataSources (safe even for empty dfs)
+    from bokeh.models import ColumnDataSource
+    before_source = ColumnDataSource(before_df)
+    after_source = ColumnDataSource(after_df)
     # Create Bokeh figures
-    p1 = figure(
-        title="Feature maps before alignment",
-        width=600,
-        height=400,
-    )
-    p1.xaxis.axis_label = "RT"
-    p1.yaxis.axis_label = "m/z"
-    p2 = figure(
-        title="Feature maps after alignment",
-        width=600,
-        height=400,
-    )
-    p2.xaxis.axis_label = "RT"
-    p2.yaxis.axis_label = "m/z"
-    # Plot before alignment
-    p1.scatter(
-        x=[f.getRT() for f in fmaps[0]],
-        y=[f.getMZ() for f in fmaps[0]],
-        size=4,
-        alpha=[f.getIntensity() / max([f.getIntensity() for f in fmaps[0]]) for f in fmaps[0]],
-        color="blue",
-    )
+    p1 = figure(width=width, height=height, title='Original RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save')
+    p1.outline_line_color = None
+    p1.background_fill_color = 'white'
+    p1.border_fill_color = 'white'
+    p1.min_border = 0
+    p2 = figure(width=width, height=height, title='Current RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save', x_range=p1.x_range, y_range=p1.y_range)
+    p2.outline_line_color = None
+    p2.background_fill_color = 'white'
+    p2.border_fill_color = 'white'
+    p2.min_border = 0
+    # Color mapping using Turbo256
+    unique_samples = sorted(list(set(before_df['sample_idx'].tolist()))) if not before_df.empty else []
+    colors = Turbo256
+    color_map: dict[int, str] = {}
+    n = max(1, len(unique_samples))
+    step = max(1, 256 // n)
+    for i, sample_idx in enumerate(unique_samples):
+        color_map[sample_idx] = colors[(i * step) % 256]
-    for fm in fmaps[1:]:
-        p1.scatter(
-            x=[f.getMetaValue("original_RT") for f in fm],
-            y=[f.getMZ() for f in fm],
-            size=2,
-            alpha=[f.getIntensity() / max([f.getIntensity() for f in fm]) for f in fm],
-            color="green",
-        )
+    renderers_before = []
+    renderers_after = []
-    # Plot after alignment
-    for fm in fmaps:
-        p2.scatter(
-            x=[f.getRT() for f in fm],
-            y=[f.getMZ() for f in fm],
-            size=2,
-            alpha=[f.getIntensity() / max([f.getIntensity() for f in fm]) for f in fm],
-            color="red",
-        )
+    for sample_idx in unique_samples:
+        sb = before_df[before_df['sample_idx'] == sample_idx]
+        sa = after_df[after_df['sample_idx'] == sample_idx]
+        color = color_map.get(sample_idx, '#000000')
+        if not sb.empty:
+            src = ColumnDataSource(sb)
+            r = p1.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
+            renderers_before.append(r)
+        if not sa.empty:
+            src = ColumnDataSource(sa)
+            r = p2.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
+            renderers_after.append(r)
-    # Arrange plots in a grid
-    # Link the x_range and y_range of both plots for synchronized zooming/panning
-    p2.x_range = p1.x_range
-    p2.y_range = p1.y_range
+    # Add hover tools
+    hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_before)
+    p1.add_tools(hover1)
-    grid = gridplot([[p1, p2]])
+    hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_after)
+    p2.add_tools(hover2)
-    # Output to file and show
+    # Create layout with both plots side by side
+    # Use the aliased bokeh_row and set sizing_mode, width and height to avoid validation warnings.
+    layout = bokeh_row(p1, p2, sizing_mode='fixed', width=width, height=height)
+    # Output and show
     if filename:
+        from bokeh.plotting import output_file, show
         output_file(filename)
-    show(grid)
+        show(layout)
+    else:
+        from bokeh.plotting import show
+        show(layout)
+    return layout
 def plot_consensus_2d(
@@ -331,8 +416,8 @@ def plot_samples_2d(
     alpha="inty",
     cmap="Turbo256",
     max_features=50000,
-    width=900,
-    height=900,
+    width=600,
+    height=600,
     mz_range=None,
     rt_range=None,
 ):
@@ -455,7 +540,10 @@ def plot_samples_2d(
     color_values = {}
     sample_names = {}
-    for uid in sample_uids:
+    # Decide whether to show tqdm based on log level (show for INFO/DEBUG/TRACE)
+    tqdm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
+    for uid in tqdm(sample_uids, desc="Plotting BPCs", disable=tqdm_disable):
         sample_data = features_pd[features_pd["sample_uid"] == uid]
         if sample_data.empty:
             continue
@@ -525,7 +613,9 @@ def plot_samples_2d(
         p.add_tools(hover)
     # Remove legend from plot
-    p.legend.visible = False
+    # Only set legend properties if a legend was actually created to avoid Bokeh warnings
+    if getattr(p, "legend", None) and len(p.legend) > 0:
+        p.legend.visible = False
     if filename:
         if filename.endswith(".html"):
             output_file(filename)
@@ -540,6 +630,441 @@ def plot_samples_2d(
     return
+def plot_bpc(
+    self,
+    samples=None,
+    title: str | None = None,
+    filename: str | None = None,
+    width: int = 1000,
+    height: int = 300,
+    rt_unit: str = "s",
+    original: bool = False,
+):
+    """
+    Plot Base Peak Chromatograms (BPC) for selected samples overlayed using Bokeh.
+    This collects per-sample BPCs via `get_bpc(self, sample=uid)` and overlays them.
+    Colors are mapped per-sample using the same Turbo256 palette as `plot_samples_2d`.
+    Parameters:
+        original (bool): If True, attempt to map RTs back to original RTs using `features_df`.
+                         If False (default), return current/aligned RTs.
+    """
+    # Local imports to avoid heavy top-level deps / circular imports
+    from bokeh.plotting import figure, show, output_file
+    from bokeh.models import ColumnDataSource, HoverTool
+    from bokeh.io.export import export_png
+    from bokeh.palettes import Turbo256
+    from masster.study.helpers import get_bpc
+    sample_uids = self._get_sample_uids(samples)
+    if not sample_uids:
+        self.logger.error("No valid sample_uids provided for BPC plotting.")
+        return
+    # Debug: show which sample_uids we will process
+    self.logger.debug(f"plot_bpc: sample_uids={sample_uids}")
+    colors = Turbo256
+    n = max(1, len(sample_uids))
+    step = max(1, 256 // n)
+    color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
+    # If plotting original (uncorrected) RTs, use the requested title.
+    if original:
+        plot_title = "Base Peak Chromatogarms (uncorrected)"
+    else:
+        plot_title = title or "Base Peak Chromatograms"
+    p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
+    p.xaxis.axis_label = f"Retention Time ({rt_unit})"
+    p.yaxis.axis_label = "Intensity"
+    renderers = []
+    # Build sample name mapping once
+    samples_info = None
+    if hasattr(self, "samples_df") and self.samples_df is not None:
+        try:
+            samples_info = self.samples_df.to_pandas()
+        except Exception:
+            samples_info = None
+    for uid in sample_uids:
+        try:
+            chrom = get_bpc(self, sample=uid, rt_unit=rt_unit, label=None, original=original)
+        except Exception as e:
+            # log and skip samples we can't compute BPC for
+            self.logger.debug(f"Skipping sample {uid} for BPC: {e}")
+            continue
+        # extract arrays
+        try:
+            # prefer Chromatogram API
+            chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
+            rt = chrom_dict.get("rt")
+            inty = chrom_dict.get("inty")
+        except Exception:
+            try:
+                rt = chrom.rt
+                inty = chrom.inty
+            except Exception as e:
+                self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
+                continue
+        if rt is None or inty is None:
+            continue
+        # Ensure numpy arrays
+        import numpy as _np
+        rt = _np.asarray(rt)
+        inty = _np.asarray(inty)
+        if rt.size == 0 or inty.size == 0:
+            continue
+        # Sort by rt
+        idx = _np.argsort(rt)
+        rt = rt[idx]
+        inty = inty[idx]
+        sample_name = str(uid)
+        if samples_info is not None:
+            try:
+                row = samples_info[samples_info["sample_uid"] == uid]
+                if not row.empty:
+                    sample_name = row.iloc[0].get("sample_name", sample_name)
+            except Exception:
+                pass
+        # Determine color for this sample early so we can log it
+        color = color_map.get(uid, "#000000")
+        # Debug: log sample processing details
+        self.logger.debug(
+            f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
+        )
+        data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
+        src = ColumnDataSource(data)
+        r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
+        r_points = p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
+        renderers.append(r_line)
+    if not renderers:
+        self.logger.warning("No BPC curves to plot for the selected samples.")
+        return
+    hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
+    p.add_tools(hover)
+    # Only set legend properties if a legend was actually created to avoid Bokeh warnings
+    if getattr(p, "legend", None) and len(p.legend) > 0:
+        p.legend.visible = False
+    if filename:
+        if filename.endswith(".html"):
+            output_file(filename)
+            show(p)
+        elif filename.endswith(".png"):
+            try:
+                export_png(p, filename=filename)
+            except Exception:
+                # fallback to saving HTML
+                output_file(filename.replace(".png", ".html"))
+                show(p)
+        else:
+            output_file(filename)
+            show(p)
+    else:
+        show(p)
+    return p
+def plot_eic(
+    self,
+    mz,
+    mz_tol=0.01,
+    samples=None,
+    title: str | None = None,
+    filename: str | None = None,
+    width: int = 1000,
+    height: int = 300,
+    rt_unit: str = "s",
+    original: bool = False,
+):
+    """
+    Plot Extracted Ion Chromatograms (EIC) for a target m/z (± mz_tol) for selected samples.
+    Parameters mirror `plot_bpc` with additional `mz` and `mz_tol` arguments. The function
+    retrieves a Sample object for each sample UID, calls `sample.get_eic(mz, mz_tol)`, and
+    overlays the resulting chromatograms.
+    """
+    # Local imports to avoid heavy top-level deps / circular imports
+    from bokeh.plotting import figure, show, output_file
+    from bokeh.models import ColumnDataSource, HoverTool
+    from bokeh.io.export import export_png
+    from bokeh.palettes import Turbo256
+    from masster.study.helpers import get_eic
+    if mz is None:
+        self.logger.error("mz must be provided for EIC plotting")
+        return
+    sample_uids = self._get_sample_uids(samples)
+    if not sample_uids:
+        self.logger.error("No valid sample_uids provided for EIC plotting.")
+        return
+    colors = Turbo256
+    n = max(1, len(sample_uids))
+    step = max(1, 256 // n)
+    color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
+    plot_title = title or f"Extracted Ion Chromatograms (m/z={mz:.4f} ± {mz_tol})"
+    p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
+    p.xaxis.axis_label = f"Retention Time ({rt_unit})"
+    p.yaxis.axis_label = "Intensity"
+    renderers = []
+    # Build sample name mapping once
+    samples_info = None
+    if hasattr(self, "samples_df") and self.samples_df is not None:
+        try:
+            samples_info = self.samples_df.to_pandas()
+        except Exception:
+            samples_info = None
+    for uid in sample_uids:
+        try:
+            chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, rt_unit=rt_unit, label=None)
+        except Exception as e:
+            # log and skip samples we can't compute EIC for
+            self.logger.debug(f"Skipping sample {uid} for EIC: {e}")
+            continue
+        # extract arrays
+        try:
+            # prefer Chromatogram API
+            chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
+            rt = chrom_dict.get("rt")
+            inty = chrom_dict.get("inty")
+        except Exception:
+            try:
+                rt = chrom.rt
+                inty = chrom.inty
+            except Exception as e:
+                self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
+                continue
+        if rt is None or inty is None:
+            continue
+        import numpy as _np
+        rt = _np.asarray(rt)
+        inty = _np.asarray(inty)
+        if rt.size == 0 or inty.size == 0:
+            continue
+        # Sort by rt
+        idx = _np.argsort(rt)
+        rt = rt[idx]
+        inty = inty[idx]
+        sample_name = str(uid)
+        if samples_info is not None:
+            try:
+                row = samples_info[samples_info["sample_uid"] == uid]
+                if not row.empty:
+                    sample_name = row.iloc[0].get("sample_name", sample_name)
+            except Exception:
+                pass
+        color = color_map.get(uid, "#000000")
+        data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
+        src = ColumnDataSource(data)
+        r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
+        p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
+        renderers.append(r_line)
+    if not renderers:
+        self.logger.warning("No EIC curves to plot for the selected samples.")
+        return
+    hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
+    p.add_tools(hover)
+    if getattr(p, "legend", None) and len(p.legend) > 0:
+        p.legend.visible = False
+    if filename:
+        if filename.endswith(".html"):
+            output_file(filename)
+            show(p)
+        elif filename.endswith(".png"):
+            try:
+                export_png(p, filename=filename)
+            except Exception:
+                output_file(filename.replace(".png", ".html"))
+                show(p)
+        else:
+            output_file(filename)
+            show(p)
+    else:
+        show(p)
+    return p
+def plot_rt_correction(
+    self,
+    samples=None,
+    title: str | None = None,
+    filename: str | None = None,
+    width: int = 1000,
+    height: int = 300,
+    rt_unit: str = "s",
+):
+    """
+    Plot RT correction per sample: (rt - rt_original) vs rt overlayed for selected samples.
+    This uses the same color mapping as `plot_bpc` so curves for the same samples match.
+    """
+    from bokeh.plotting import figure, show, output_file
+    from bokeh.models import ColumnDataSource, HoverTool
+    from bokeh.palettes import Turbo256
+    import numpy as _np
+    # Validate features dataframe
+    if self.features_df is None or self.features_df.is_empty():
+        self.logger.error("No features_df found. Load features first.")
+        return
+    if "rt_original" not in self.features_df.columns:
+        self.logger.error("Column 'rt_original' not found in features_df. Alignment/backup RTs missing.")
+        return
+    sample_uids = self._get_sample_uids(samples)
+    if not sample_uids:
+        self.logger.error("No valid sample_uids provided for RT correction plotting.")
+        return
+    # Color mapping like plot_bpc
+    colors = Turbo256
+    n = max(1, len(sample_uids))
+    step = max(1, 256 // n)
+    color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
+    p = figure(width=width, height=height, title=title or "RT correction", tools="pan,wheel_zoom,box_zoom,reset,save")
+    p.xaxis.axis_label = f"Retention Time ({rt_unit})"
+    p.yaxis.axis_label = "RT - RT_original (s)"
+    samples_info = None
+    if hasattr(self, "samples_df") and self.samples_df is not None:
+        try:
+            samples_info = self.samples_df.to_pandas()
+        except Exception:
+            samples_info = None
+    renderers = []
+    # Iterate samples and build curves
+    for uid in sample_uids:
+        # Select features belonging to this sample
+        try:
+            if "sample_uid" in self.features_df.columns:
+                sample_feats = self.features_df.filter(pl.col("sample_uid") == uid)
+            elif "sample_name" in self.features_df.columns:
+                sample_feats = self.features_df.filter(pl.col("sample_name") == uid)
+            else:
+                self.logger.debug("No sample identifier column in features_df; skipping sample filtering")
+                continue
+        except Exception as e:
+            self.logger.debug(f"Error filtering features for sample {uid}: {e}")
+            continue
+        if sample_feats.is_empty():
+            continue
+        # Convert to pandas for easy numeric handling
+        try:
+            df = sample_feats.to_pandas()
+        except Exception:
+            continue
+        # Need both rt and rt_original
+        if "rt" not in df.columns or "rt_original" not in df.columns:
+            continue
+        # Drop NA and ensure numeric arrays
+        df = df.dropna(subset=["rt", "rt_original"]).copy()
+        if df.empty:
+            continue
+        rt = _np.asarray(df["rt"], dtype=float)
+        rt_orig = _np.asarray(df["rt_original"], dtype=float)
+        delta = rt - rt_orig
+        # sort by rt
+        idx = _np.argsort(rt)
+        rt = rt[idx]
+        delta = delta[idx]
+        sample_name = str(uid)
+        if samples_info is not None:
+            try:
+                row = samples_info[samples_info["sample_uid"] == uid]
+                if not row.empty:
+                    sample_name = row.iloc[0].get("sample_name", sample_name)
+            except Exception:
+                pass
+        color = color_map.get(uid, "#000000")
+        data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt)}
+        src = ColumnDataSource(data)
+        r_line = p.line("rt", "delta", source=src, line_width=1, color=color)
+        p.scatter("rt", "delta", source=src, size=2, color=color, alpha=0.6)
+        renderers.append(r_line)
+    if not renderers:
+        self.logger.warning("No RT correction curves to plot for the selected samples.")
+        return
+    hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
+    p.add_tools(hover)
+    # Only set legend properties if a legend was actually created to avoid Bokeh warnings
+    if getattr(p, "legend", None) and len(p.legend) > 0:
+        p.legend.visible = False
+    if filename:
+        if filename.endswith(".html"):
+            output_file(filename)
+            show(p)
+        elif filename.endswith(".png"):
+            try:
+                from bokeh.io.export import export_png
+                export_png(p, filename=filename)
+            except Exception:
+                output_file(filename.replace(".png", ".html"))
+                show(p)
+        else:
+            output_file(filename)
+            show(p)
+    else:
+        show(p)
+    return p
 def plot_chrom(
     self,
     uids=None,
@@ -936,10 +1461,10 @@ def plot_consensus_stats(
 def plot_pca(
     self,
     filename=None,
-    width=600,
-    height=600,
+    width=400,
+    height=400,
     alpha=0.8,
-    markersize=8,
+    markersize=6,
     n_components=2,
     color_by=None,
     title="PCA of Consensus Matrix",
@@ -959,7 +1484,7 @@ def plot_pca(
     """
     from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
     from bokeh.plotting import figure, show, output_file
-    from bokeh.palettes import Category20, viridis
+    from bokeh.palettes import Category20, viridis, Turbo256
     from bokeh.transform import factor_cmap
     from sklearn.decomposition import PCA
     from sklearn.preprocessing import StandardScaler
@@ -1094,23 +1619,45 @@ def plot_pca(
                 legend_field=color_by,
             )
     else:
-        scatter = p.scatter(
-            "PC1",
-            "PC2",
-            size=markersize,
-            alpha=alpha,
-            color="blue",
-            source=source,
-        )
+        # If no color_by provided, color points by sample similar to plot_samples_2d
+        if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
+            # Choose the identifier to map colors by
+            id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
+            sample_ids = list(pd.unique(pca_df[id_col]))
+            colors = Turbo256
+            color_map = {uid: colors[i * (256 // max(1, len(sample_ids)))] for i, uid in enumerate(sample_ids)}
+            # Map colors into dataframe
+            pca_df["color"] = [color_map[x] for x in pca_df[id_col]]
+            # Update the ColumnDataSource with new color column
+            source = ColumnDataSource(pca_df)
+            scatter = p.scatter(
+                "PC1",
+                "PC2",
+                size=markersize,
+                alpha=alpha,
+                color="color",
+                source=source,
+            )
+        else:
+            scatter = p.scatter(
+                "PC1",
+                "PC2",
+                size=markersize,
+                alpha=alpha,
+                color="blue",
+                source=source,
+            )
     # Create comprehensive hover tooltips with all sample information
-    tooltip_list = [
-        ("PC1", "@PC1{0.00}"),
-        ("PC2", "@PC2{0.00}"),
-    ]
+    tooltip_list = []
-    # Add all sample dataframe columns to tooltips
+    # Columns to exclude from tooltips (file paths and internal/plot fields)
+    excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2"}
+    # Add all sample dataframe columns to tooltips, skipping excluded ones
     for col in samples_pd.columns:
+        if col in excluded_cols:
+            continue
         if col in pca_df.columns:
             if pca_df[col].dtype in ["float64", "float32"]:
                 tooltip_list.append((col, f"@{col}{{0.00}}"))
@@ -1125,8 +1672,10 @@ def plot_pca(
     # Add legend if using categorical coloring
     if color_mapper and not isinstance(color_mapper, LinearColorMapper) and color_by:
-        p.legend.location = "top_left"
-        p.legend.click_policy = "hide"
+        # Only set legend properties if legends exist (avoid Bokeh warning when none created)
+        if getattr(p, "legend", None) and len(p.legend) > 0:
+            p.legend.location = "top_left"
+            p.legend.click_policy = "hide"
     # Output and show
     if filename:
@@ -1134,3 +1683,133 @@ def plot_pca(
     show(p)
     return p
+def plot_tic(
+    self,
+    samples=None,
+    title: str | None = None,
+    filename: str | None = None,
+    width: int = 1000,
+    height: int = 300,
+    rt_unit: str = "s",
+    original: bool = False,
+):
+    """
+    Plot Total Ion Chromatograms (TIC) for selected samples overlayed using Bokeh.
+    Parameters and behavior mirror `plot_bpc` but use per-sample TICs (get_tic).
+    """
+    # Local imports to avoid heavy top-level deps / circular imports
+    from bokeh.plotting import figure, show, output_file
+    from bokeh.models import ColumnDataSource, HoverTool
+    from bokeh.io.export import export_png
+    from bokeh.palettes import Turbo256
+    from masster.study.helpers import get_tic
+    sample_uids = self._get_sample_uids(samples)
+    if not sample_uids:
+        self.logger.error("No valid sample_uids provided for TIC plotting.")
+        return
+    colors = Turbo256
+    n = max(1, len(sample_uids))
+    step = max(1, 256 // n)
+    color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
+    plot_title = title or "Total Ion Chromatograms"
+    p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
+    p.xaxis.axis_label = f"Retention Time ({rt_unit})"
+    p.yaxis.axis_label = "Intensity"
+    renderers = []
+    # Build sample name mapping once
+    samples_info = None
+    if hasattr(self, "samples_df") and self.samples_df is not None:
+        try:
+            samples_info = self.samples_df.to_pandas()
+        except Exception:
+            samples_info = None
+    for uid in sample_uids:
+        try:
+            chrom = get_tic(self, sample=uid, label=None)
+        except Exception as e:
+            self.logger.debug(f"Skipping sample {uid} for TIC: {e}")
+            continue
+        # extract arrays
+        try:
+            chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
+            rt = chrom_dict.get("rt")
+            inty = chrom_dict.get("inty")
+        except Exception:
+            try:
+                rt = chrom.rt
+                inty = chrom.inty
+            except Exception as e:
+                self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
+                continue
+        if rt is None or inty is None:
+            continue
+        import numpy as _np
+        rt = _np.asarray(rt)
+        inty = _np.asarray(inty)
+        if rt.size == 0 or inty.size == 0:
+            continue
+        # Sort by rt
+        idx = _np.argsort(rt)
+        rt = rt[idx]
+        inty = inty[idx]
+        sample_name = str(uid)
+        if samples_info is not None:
+            try:
+                row = samples_info[samples_info["sample_uid"] == uid]
+                if not row.empty:
+                    sample_name = row.iloc[0].get("sample_name", sample_name)
+            except Exception:
+                pass
+        color = color_map.get(uid, "#000000")
+        data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
+        src = ColumnDataSource(data)
+        r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
+        p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
+        renderers.append(r_line)
+    if not renderers:
+        self.logger.warning("No TIC curves to plot for the selected samples.")
+        return
+    hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
+    p.add_tools(hover)
+    # Only set legend properties if a legend was actually created to avoid Bokeh warnings
+    if getattr(p, "legend", None) and len(p.legend) > 0:
+        p.legend.visible = False
+    if filename:
+        if filename.endswith(".html"):
+            output_file(filename)
+            show(p)
+        elif filename.endswith(".png"):
+            try:
+                export_png(p, filename=filename)
+            except Exception:
+                output_file(filename.replace(".png", ".html"))
+                show(p)
+        else:
+            output_file(filename)
+            show(p)
+    else:
+        show(p)
+    return p

masster 0.3.11__py3-none-any.whl → 0.3.12__py3-none-any.whl

Potentially problematic release.

masster 0.3.11py3-none-any.whl → 0.3.12py3-none-any.whl