masster 0.3.11__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/plot.py CHANGED
@@ -12,135 +12,220 @@ from tqdm import tqdm
12
12
  hv.extension("bokeh")
13
13
 
14
14
 
15
- def plot_alignment(self, filename=None):
16
- import matplotlib.pyplot as plt
17
- import numpy as np
15
+ # Replace any unaliased import that could be shadowed:
16
+ # from bokeh.layouts import row
17
+ from bokeh.layouts import row as bokeh_row
18
18
 
19
- if self.features_maps is None or len(self.features_maps) == 0:
20
- self.load_features()
21
19
 
22
- feature_maps = self.features_maps
23
- ref_index = self.alignment_ref_index
24
- if ref_index is None:
25
- self.logger.error("No alignment performed yet.")
26
- return
20
+ def plot_alignment(self, maps: bool = True, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
21
+ """Visualize retention time alignment using two synchronized Bokeh scatter plots.
27
22
 
28
- fmaps = [
29
- feature_maps[ref_index],
30
- *feature_maps[:ref_index],
31
- *feature_maps[ref_index + 1 :],
32
- ]
23
+ - When ``maps=True`` the function reads ``self.features_maps`` (list of FeatureMap)
24
+ and builds two side-by-side plots: Original RT (left) and Current/Aligned RT (right).
25
+ - When ``maps=False`` the function uses ``self.features_df`` and expects an
26
+ ``rt_original`` column (before) and ``rt`` column (after).
33
27
 
34
- fig = plt.figure(figsize=(12, 6))
28
+ Parameters
29
+ - maps: whether to use feature maps (default True).
30
+ - filename: optional HTML file path to save the plot.
31
+ - width/height: pixel size of each subplot.
32
+ - markersize: base marker size.
35
33
 
36
- ax = fig.add_subplot(1, 2, 1)
37
- ax.set_title("Feature maps before alignment")
38
- ax.set_ylabel("m/z")
39
- ax.set_xlabel("RT")
34
+ Returns
35
+ - Bokeh layout (row) containing the two synchronized plots.
36
+ """
37
+ # Local imports so the module can be used even if bokeh isn't needed elsewhere
38
+ from bokeh.models import ColumnDataSource, HoverTool
39
+ from bokeh.plotting import figure, show, output_file
40
+ from bokeh.palettes import Turbo256
41
+ import pandas as pd
40
42
 
41
- # use alpha value to display feature intensity
42
- ax.scatter(
43
- [f.getRT() for f in fmaps[0]],
44
- [f.getMZ() for f in fmaps[0]],
45
- alpha=np.asarray([f.getIntensity() for f in fmaps[0]]) / max([f.getIntensity() for f in fmaps[0]]),
46
- s=4,
47
- )
43
+ # Build the before/after tabular data used for plotting
44
+ before_data: list[dict[str, Any]] = []
45
+ after_data: list[dict[str, Any]] = []
46
+
47
+ if maps:
48
+ # Ensure feature maps are loaded
49
+ if self.features_maps is None or len(self.features_maps) == 0:
50
+ self.load_features()
51
+
52
+ fmaps = self.features_maps or []
53
+
54
+ if not fmaps:
55
+ self.logger.error("No feature maps available for plotting.")
56
+ return
57
+
58
+ # Reference (first) sample: use current RT for both before and after
59
+ ref = fmaps[0]
60
+ ref_rt = [f.getRT() for f in ref]
61
+ ref_mz = [f.getMZ() for f in ref]
62
+ ref_inty = [f.getIntensity() for f in ref]
63
+ max_ref_inty = max(ref_inty) if ref_inty else 1
64
+
65
+ # sample metadata
66
+ if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
67
+ samples_info = self.samples_df.to_pandas()
68
+ ref_sample_uid = samples_info.iloc[0]['sample_uid'] if 'sample_uid' in samples_info.columns else 'Reference_UID'
69
+ ref_sample_name = samples_info.iloc[0]['sample_name'] if 'sample_name' in samples_info.columns else 'Reference'
70
+ else:
71
+ ref_sample_uid = 'Reference_UID'
72
+ ref_sample_name = 'Reference'
73
+
74
+ for rt, mz, inty in zip(ref_rt, ref_mz, ref_inty):
75
+ before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
76
+ after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
77
+
78
+ # Remaining samples
79
+ for sample_idx, fm in enumerate(fmaps[1:], start=1):
80
+ mz_vals = []
81
+ inty_vals = []
82
+ original_rt = []
83
+ aligned_rt = []
84
+
85
+ for f in fm:
86
+ try:
87
+ orig = f.getMetaValue('original_RT')
88
+ except Exception:
89
+ orig = None
90
+
91
+ if orig is None:
92
+ original_rt.append(f.getRT())
93
+ else:
94
+ original_rt.append(orig)
95
+
96
+ aligned_rt.append(f.getRT())
97
+ mz_vals.append(f.getMZ())
98
+ inty_vals.append(f.getIntensity())
99
+
100
+ if not inty_vals:
101
+ continue
102
+
103
+ max_inty = max(inty_vals)
104
+
105
+ if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
106
+ samples_info = self.samples_df.to_pandas()
107
+ if sample_idx < len(samples_info):
108
+ sample_name = samples_info.iloc[sample_idx].get('sample_name', f'Sample {sample_idx}')
109
+ sample_uid = samples_info.iloc[sample_idx].get('sample_uid', f'Sample_{sample_idx}_UID')
110
+ else:
111
+ sample_name = f'Sample {sample_idx}'
112
+ sample_uid = f'Sample_{sample_idx}_UID'
113
+ else:
114
+ sample_name = f'Sample {sample_idx}'
115
+ sample_uid = f'Sample_{sample_idx}_UID'
48
116
 
49
- for fm in fmaps[1:]:
50
- ax.scatter(
51
- [f.getMetaValue("original_RT") for f in fm],
52
- [f.getMZ() for f in fm],
53
- alpha=np.asarray([f.getIntensity() for f in fm]) / max([f.getIntensity() for f in fm]),
54
- s=2, # Set symbol size to 3
55
- )
117
+ for rt, mz, inty in zip(original_rt, mz_vals, inty_vals):
118
+ before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
56
119
 
57
- ax = fig.add_subplot(1, 2, 2)
58
- ax.set_title("Feature maps after alignment")
59
- ax.set_ylabel("m/z")
60
- ax.set_xlabel("RT")
61
-
62
- for fm in fmaps:
63
- ax.scatter(
64
- [f.getRT() for f in fm],
65
- [f.getMZ() for f in fm],
66
- alpha=np.asarray([f.getIntensity() for f in fm]) / max([f.getIntensity() for f in fm]),
67
- s=2, # Set symbol size to 3
68
- )
120
+ for rt, mz, inty in zip(aligned_rt, mz_vals, inty_vals):
121
+ after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
69
122
 
70
- fig.tight_layout()
123
+ else:
124
+ # Use features_df
125
+ if self.features_df is None or self.features_df.is_empty():
126
+ self.logger.error("No features_df found. Load features first.")
127
+ return
71
128
 
129
+ required_cols = ['rt', 'mz', 'inty']
130
+ missing = [c for c in required_cols if c not in self.features_df.columns]
131
+ if missing:
132
+ self.logger.error(f"Missing required columns in features_df: {missing}")
133
+ return
72
134
 
73
- def plot_alignment_bokeh(self, filename=None):
74
- from bokeh.plotting import figure, show, output_file
75
- from bokeh.layouts import gridplot
135
+ if 'rt_original' not in self.features_df.columns:
136
+ self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
137
+ return
76
138
 
77
- feature_maps = self.features_maps
78
- ref_index = self.alignment_ref_index
79
- if ref_index is None:
80
- self.logger.warning("No alignment performed yet.")
81
- return
139
+ features_pd = self.features_df.to_pandas()
82
140
 
83
- fmaps = [
84
- feature_maps[ref_index],
85
- *feature_maps[:ref_index],
86
- *feature_maps[ref_index + 1 :],
87
- ]
141
+ sample_col = 'sample_uid' if 'sample_uid' in features_pd.columns else 'sample_name'
142
+ if sample_col not in features_pd.columns:
143
+ self.logger.error("No sample identifier column found in features_df.")
144
+ return
145
+
146
+ samples = features_pd[sample_col].unique()
147
+
148
+ for sample_idx, sample in enumerate(samples):
149
+ sample_data = features_pd[features_pd[sample_col] == sample]
150
+ max_inty = sample_data['inty'].max() if sample_data['inty'].max() > 0 else 1
151
+ sample_name = str(sample)
152
+ sample_uid = sample if sample_col == 'sample_uid' else (sample_data['sample_uid'].iloc[0] if 'sample_uid' in sample_data.columns else sample)
153
+
154
+ for _, row in sample_data.iterrows():
155
+ before_data.append({'rt': row['rt_original'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
156
+ after_data.append({'rt': row['rt'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
157
+
158
+ # Ensure dataframes exist even if empty
159
+ before_df = pd.DataFrame(before_data)
160
+ after_df = pd.DataFrame(after_data)
161
+
162
+ # Create ColumnDataSources (safe even for empty dfs)
163
+ from bokeh.models import ColumnDataSource
164
+
165
+ before_source = ColumnDataSource(before_df)
166
+ after_source = ColumnDataSource(after_df)
88
167
 
89
168
  # Create Bokeh figures
90
- p1 = figure(
91
- title="Feature maps before alignment",
92
- width=600,
93
- height=400,
94
- )
95
- p1.xaxis.axis_label = "RT"
96
- p1.yaxis.axis_label = "m/z"
97
- p2 = figure(
98
- title="Feature maps after alignment",
99
- width=600,
100
- height=400,
101
- )
102
- p2.xaxis.axis_label = "RT"
103
- p2.yaxis.axis_label = "m/z"
104
-
105
- # Plot before alignment
106
- p1.scatter(
107
- x=[f.getRT() for f in fmaps[0]],
108
- y=[f.getMZ() for f in fmaps[0]],
109
- size=4,
110
- alpha=[f.getIntensity() / max([f.getIntensity() for f in fmaps[0]]) for f in fmaps[0]],
111
- color="blue",
112
- )
169
+ p1 = figure(width=width, height=height, title='Original RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save')
170
+ p1.outline_line_color = None
171
+ p1.background_fill_color = 'white'
172
+ p1.border_fill_color = 'white'
173
+ p1.min_border = 0
174
+
175
+ p2 = figure(width=width, height=height, title='Current RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save', x_range=p1.x_range, y_range=p1.y_range)
176
+ p2.outline_line_color = None
177
+ p2.background_fill_color = 'white'
178
+ p2.border_fill_color = 'white'
179
+ p2.min_border = 0
180
+
181
+ # Color mapping using Turbo256
182
+ unique_samples = sorted(list(set(before_df['sample_idx'].tolist()))) if not before_df.empty else []
183
+ colors = Turbo256
184
+ color_map: dict[int, str] = {}
185
+ n = max(1, len(unique_samples))
186
+ step = max(1, 256 // n)
187
+ for i, sample_idx in enumerate(unique_samples):
188
+ color_map[sample_idx] = colors[(i * step) % 256]
113
189
 
114
- for fm in fmaps[1:]:
115
- p1.scatter(
116
- x=[f.getMetaValue("original_RT") for f in fm],
117
- y=[f.getMZ() for f in fm],
118
- size=2,
119
- alpha=[f.getIntensity() / max([f.getIntensity() for f in fm]) for f in fm],
120
- color="green",
121
- )
190
+ renderers_before = []
191
+ renderers_after = []
122
192
 
123
- # Plot after alignment
124
- for fm in fmaps:
125
- p2.scatter(
126
- x=[f.getRT() for f in fm],
127
- y=[f.getMZ() for f in fm],
128
- size=2,
129
- alpha=[f.getIntensity() / max([f.getIntensity() for f in fm]) for f in fm],
130
- color="red",
131
- )
193
+ for sample_idx in unique_samples:
194
+ sb = before_df[before_df['sample_idx'] == sample_idx]
195
+ sa = after_df[after_df['sample_idx'] == sample_idx]
196
+ color = color_map.get(sample_idx, '#000000')
197
+
198
+ if not sb.empty:
199
+ src = ColumnDataSource(sb)
200
+ r = p1.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
201
+ renderers_before.append(r)
202
+
203
+ if not sa.empty:
204
+ src = ColumnDataSource(sa)
205
+ r = p2.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
206
+ renderers_after.append(r)
132
207
 
133
- # Arrange plots in a grid
134
- # Link the x_range and y_range of both plots for synchronized zooming/panning
135
- p2.x_range = p1.x_range
136
- p2.y_range = p1.y_range
208
+ # Add hover tools
209
+ hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_before)
210
+ p1.add_tools(hover1)
137
211
 
138
- grid = gridplot([[p1, p2]])
212
+ hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_after)
213
+ p2.add_tools(hover2)
139
214
 
140
- # Output to file and show
215
+ # Create layout with both plots side by side
216
+ # Use the aliased bokeh_row and set sizing_mode, width and height to avoid validation warnings.
217
+ layout = bokeh_row(p1, p2, sizing_mode='fixed', width=width, height=height)
218
+
219
+ # Output and show
141
220
  if filename:
221
+ from bokeh.plotting import output_file, show
142
222
  output_file(filename)
143
- show(grid)
223
+ show(layout)
224
+ else:
225
+ from bokeh.plotting import show
226
+ show(layout)
227
+
228
+ return layout
144
229
 
145
230
 
146
231
  def plot_consensus_2d(
@@ -331,8 +416,8 @@ def plot_samples_2d(
331
416
  alpha="inty",
332
417
  cmap="Turbo256",
333
418
  max_features=50000,
334
- width=900,
335
- height=900,
419
+ width=600,
420
+ height=600,
336
421
  mz_range=None,
337
422
  rt_range=None,
338
423
  ):
@@ -455,7 +540,10 @@ def plot_samples_2d(
455
540
  color_values = {}
456
541
  sample_names = {}
457
542
 
458
- for uid in sample_uids:
543
+ # Decide whether to show tqdm based on log level (show for INFO/DEBUG/TRACE)
544
+ tqdm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
545
+
546
+ for uid in tqdm(sample_uids, desc="Plotting BPCs", disable=tqdm_disable):
459
547
  sample_data = features_pd[features_pd["sample_uid"] == uid]
460
548
  if sample_data.empty:
461
549
  continue
@@ -525,7 +613,9 @@ def plot_samples_2d(
525
613
  p.add_tools(hover)
526
614
 
527
615
  # Remove legend from plot
528
- p.legend.visible = False
616
+ # Only set legend properties if a legend was actually created to avoid Bokeh warnings
617
+ if getattr(p, "legend", None) and len(p.legend) > 0:
618
+ p.legend.visible = False
529
619
  if filename:
530
620
  if filename.endswith(".html"):
531
621
  output_file(filename)
@@ -540,6 +630,441 @@ def plot_samples_2d(
540
630
  return
541
631
 
542
632
 
633
+ def plot_bpc(
634
+ self,
635
+ samples=None,
636
+ title: str | None = None,
637
+ filename: str | None = None,
638
+ width: int = 1000,
639
+ height: int = 300,
640
+ rt_unit: str = "s",
641
+ original: bool = False,
642
+ ):
643
+ """
644
+ Plot Base Peak Chromatograms (BPC) for selected samples overlayed using Bokeh.
645
+
646
+ This collects per-sample BPCs via `get_bpc(self, sample=uid)` and overlays them.
647
+ Colors are mapped per-sample using the same Turbo256 palette as `plot_samples_2d`.
648
+ Parameters:
649
+ original (bool): If True, attempt to map RTs back to original RTs using `features_df`.
650
+ If False (default), return current/aligned RTs.
651
+ """
652
+ # Local imports to avoid heavy top-level deps / circular imports
653
+ from bokeh.plotting import figure, show, output_file
654
+ from bokeh.models import ColumnDataSource, HoverTool
655
+ from bokeh.io.export import export_png
656
+ from bokeh.palettes import Turbo256
657
+ from masster.study.helpers import get_bpc
658
+
659
+ sample_uids = self._get_sample_uids(samples)
660
+ if not sample_uids:
661
+ self.logger.error("No valid sample_uids provided for BPC plotting.")
662
+ return
663
+
664
+ # Debug: show which sample_uids we will process
665
+ self.logger.debug(f"plot_bpc: sample_uids={sample_uids}")
666
+
667
+ colors = Turbo256
668
+ n = max(1, len(sample_uids))
669
+ step = max(1, 256 // n)
670
+ color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
671
+
672
+ # If plotting original (uncorrected) RTs, use the requested title.
673
+ if original:
674
+ plot_title = "Base Peak Chromatogarms (uncorrected)"
675
+ else:
676
+ plot_title = title or "Base Peak Chromatograms"
677
+
678
+ p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
679
+ p.xaxis.axis_label = f"Retention Time ({rt_unit})"
680
+ p.yaxis.axis_label = "Intensity"
681
+
682
+ renderers = []
683
+
684
+ # Build sample name mapping once
685
+ samples_info = None
686
+ if hasattr(self, "samples_df") and self.samples_df is not None:
687
+ try:
688
+ samples_info = self.samples_df.to_pandas()
689
+ except Exception:
690
+ samples_info = None
691
+
692
+ for uid in sample_uids:
693
+ try:
694
+ chrom = get_bpc(self, sample=uid, rt_unit=rt_unit, label=None, original=original)
695
+ except Exception as e:
696
+ # log and skip samples we can't compute BPC for
697
+ self.logger.debug(f"Skipping sample {uid} for BPC: {e}")
698
+ continue
699
+
700
+ # extract arrays
701
+ try:
702
+ # prefer Chromatogram API
703
+ chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
704
+ rt = chrom_dict.get("rt")
705
+ inty = chrom_dict.get("inty")
706
+ except Exception:
707
+ try:
708
+ rt = chrom.rt
709
+ inty = chrom.inty
710
+ except Exception as e:
711
+ self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
712
+ continue
713
+
714
+ if rt is None or inty is None:
715
+ continue
716
+
717
+ # Ensure numpy arrays
718
+ import numpy as _np
719
+
720
+ rt = _np.asarray(rt)
721
+ inty = _np.asarray(inty)
722
+ if rt.size == 0 or inty.size == 0:
723
+ continue
724
+
725
+ # Sort by rt
726
+ idx = _np.argsort(rt)
727
+ rt = rt[idx]
728
+ inty = inty[idx]
729
+
730
+ sample_name = str(uid)
731
+ if samples_info is not None:
732
+ try:
733
+ row = samples_info[samples_info["sample_uid"] == uid]
734
+ if not row.empty:
735
+ sample_name = row.iloc[0].get("sample_name", sample_name)
736
+ except Exception:
737
+ pass
738
+ # Determine color for this sample early so we can log it
739
+ color = color_map.get(uid, "#000000")
740
+
741
+ # Debug: log sample processing details
742
+ self.logger.debug(
743
+ f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
744
+ )
745
+
746
+ data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
747
+ src = ColumnDataSource(data)
748
+
749
+ r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
750
+ r_points = p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
751
+ renderers.append(r_line)
752
+
753
+ if not renderers:
754
+ self.logger.warning("No BPC curves to plot for the selected samples.")
755
+ return
756
+
757
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
758
+ p.add_tools(hover)
759
+
760
+ # Only set legend properties if a legend was actually created to avoid Bokeh warnings
761
+ if getattr(p, "legend", None) and len(p.legend) > 0:
762
+ p.legend.visible = False
763
+
764
+ if filename:
765
+ if filename.endswith(".html"):
766
+ output_file(filename)
767
+ show(p)
768
+ elif filename.endswith(".png"):
769
+ try:
770
+ export_png(p, filename=filename)
771
+ except Exception:
772
+ # fallback to saving HTML
773
+ output_file(filename.replace(".png", ".html"))
774
+ show(p)
775
+ else:
776
+ output_file(filename)
777
+ show(p)
778
+ else:
779
+ show(p)
780
+
781
+ return p
782
+
783
+
784
+ def plot_eic(
785
+ self,
786
+ mz,
787
+ mz_tol=0.01,
788
+ samples=None,
789
+ title: str | None = None,
790
+ filename: str | None = None,
791
+ width: int = 1000,
792
+ height: int = 300,
793
+ rt_unit: str = "s",
794
+ original: bool = False,
795
+ ):
796
+ """
797
+ Plot Extracted Ion Chromatograms (EIC) for a target m/z (± mz_tol) for selected samples.
798
+
799
+ Parameters mirror `plot_bpc` with additional `mz` and `mz_tol` arguments. The function
800
+ retrieves a Sample object for each sample UID, calls `sample.get_eic(mz, mz_tol)`, and
801
+ overlays the resulting chromatograms.
802
+ """
803
+ # Local imports to avoid heavy top-level deps / circular imports
804
+ from bokeh.plotting import figure, show, output_file
805
+ from bokeh.models import ColumnDataSource, HoverTool
806
+ from bokeh.io.export import export_png
807
+ from bokeh.palettes import Turbo256
808
+ from masster.study.helpers import get_eic
809
+
810
+ if mz is None:
811
+ self.logger.error("mz must be provided for EIC plotting")
812
+ return
813
+
814
+ sample_uids = self._get_sample_uids(samples)
815
+ if not sample_uids:
816
+ self.logger.error("No valid sample_uids provided for EIC plotting.")
817
+ return
818
+
819
+ colors = Turbo256
820
+ n = max(1, len(sample_uids))
821
+ step = max(1, 256 // n)
822
+ color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
823
+
824
+ plot_title = title or f"Extracted Ion Chromatograms (m/z={mz:.4f} ± {mz_tol})"
825
+
826
+ p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
827
+ p.xaxis.axis_label = f"Retention Time ({rt_unit})"
828
+ p.yaxis.axis_label = "Intensity"
829
+
830
+ renderers = []
831
+
832
+ # Build sample name mapping once
833
+ samples_info = None
834
+ if hasattr(self, "samples_df") and self.samples_df is not None:
835
+ try:
836
+ samples_info = self.samples_df.to_pandas()
837
+ except Exception:
838
+ samples_info = None
839
+
840
+ for uid in sample_uids:
841
+ try:
842
+ chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, rt_unit=rt_unit, label=None)
843
+ except Exception as e:
844
+ # log and skip samples we can't compute EIC for
845
+ self.logger.debug(f"Skipping sample {uid} for EIC: {e}")
846
+ continue
847
+
848
+ # extract arrays
849
+ try:
850
+ # prefer Chromatogram API
851
+ chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
852
+ rt = chrom_dict.get("rt")
853
+ inty = chrom_dict.get("inty")
854
+ except Exception:
855
+ try:
856
+ rt = chrom.rt
857
+ inty = chrom.inty
858
+ except Exception as e:
859
+ self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
860
+ continue
861
+
862
+ if rt is None or inty is None:
863
+ continue
864
+
865
+ import numpy as _np
866
+
867
+ rt = _np.asarray(rt)
868
+ inty = _np.asarray(inty)
869
+ if rt.size == 0 or inty.size == 0:
870
+ continue
871
+
872
+ # Sort by rt
873
+ idx = _np.argsort(rt)
874
+ rt = rt[idx]
875
+ inty = inty[idx]
876
+
877
+ sample_name = str(uid)
878
+ if samples_info is not None:
879
+ try:
880
+ row = samples_info[samples_info["sample_uid"] == uid]
881
+ if not row.empty:
882
+ sample_name = row.iloc[0].get("sample_name", sample_name)
883
+ except Exception:
884
+ pass
885
+
886
+ color = color_map.get(uid, "#000000")
887
+
888
+ data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
889
+ src = ColumnDataSource(data)
890
+
891
+ r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
892
+ p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
893
+ renderers.append(r_line)
894
+
895
+ if not renderers:
896
+ self.logger.warning("No EIC curves to plot for the selected samples.")
897
+ return
898
+
899
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
900
+ p.add_tools(hover)
901
+
902
+ if getattr(p, "legend", None) and len(p.legend) > 0:
903
+ p.legend.visible = False
904
+
905
+ if filename:
906
+ if filename.endswith(".html"):
907
+ output_file(filename)
908
+ show(p)
909
+ elif filename.endswith(".png"):
910
+ try:
911
+ export_png(p, filename=filename)
912
+ except Exception:
913
+ output_file(filename.replace(".png", ".html"))
914
+ show(p)
915
+ else:
916
+ output_file(filename)
917
+ show(p)
918
+ else:
919
+ show(p)
920
+
921
+ return p
922
+
923
+
924
+ def plot_rt_correction(
925
+ self,
926
+ samples=None,
927
+ title: str | None = None,
928
+ filename: str | None = None,
929
+ width: int = 1000,
930
+ height: int = 300,
931
+ rt_unit: str = "s",
932
+ ):
933
+ """
934
+ Plot RT correction per sample: (rt - rt_original) vs rt overlayed for selected samples.
935
+
936
+ This uses the same color mapping as `plot_bpc` so curves for the same samples match.
937
+ """
938
+ from bokeh.plotting import figure, show, output_file
939
+ from bokeh.models import ColumnDataSource, HoverTool
940
+ from bokeh.palettes import Turbo256
941
+ import numpy as _np
942
+
943
+ # Validate features dataframe
944
+ if self.features_df is None or self.features_df.is_empty():
945
+ self.logger.error("No features_df found. Load features first.")
946
+ return
947
+
948
+ if "rt_original" not in self.features_df.columns:
949
+ self.logger.error("Column 'rt_original' not found in features_df. Alignment/backup RTs missing.")
950
+ return
951
+
952
+ sample_uids = self._get_sample_uids(samples)
953
+ if not sample_uids:
954
+ self.logger.error("No valid sample_uids provided for RT correction plotting.")
955
+ return
956
+
957
+ # Color mapping like plot_bpc
958
+ colors = Turbo256
959
+ n = max(1, len(sample_uids))
960
+ step = max(1, 256 // n)
961
+ color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
962
+
963
+ p = figure(width=width, height=height, title=title or "RT correction", tools="pan,wheel_zoom,box_zoom,reset,save")
964
+ p.xaxis.axis_label = f"Retention Time ({rt_unit})"
965
+ p.yaxis.axis_label = "RT - RT_original (s)"
966
+
967
+ samples_info = None
968
+ if hasattr(self, "samples_df") and self.samples_df is not None:
969
+ try:
970
+ samples_info = self.samples_df.to_pandas()
971
+ except Exception:
972
+ samples_info = None
973
+
974
+ renderers = []
975
+
976
+ # Iterate samples and build curves
977
+ for uid in sample_uids:
978
+ # Select features belonging to this sample
979
+ try:
980
+ if "sample_uid" in self.features_df.columns:
981
+ sample_feats = self.features_df.filter(pl.col("sample_uid") == uid)
982
+ elif "sample_name" in self.features_df.columns:
983
+ sample_feats = self.features_df.filter(pl.col("sample_name") == uid)
984
+ else:
985
+ self.logger.debug("No sample identifier column in features_df; skipping sample filtering")
986
+ continue
987
+ except Exception as e:
988
+ self.logger.debug(f"Error filtering features for sample {uid}: {e}")
989
+ continue
990
+
991
+ if sample_feats.is_empty():
992
+ continue
993
+
994
+ # Convert to pandas for easy numeric handling
995
+ try:
996
+ df = sample_feats.to_pandas()
997
+ except Exception:
998
+ continue
999
+
1000
+ # Need both rt and rt_original
1001
+ if "rt" not in df.columns or "rt_original" not in df.columns:
1002
+ continue
1003
+
1004
+ # Drop NA and ensure numeric arrays
1005
+ df = df.dropna(subset=["rt", "rt_original"]).copy()
1006
+ if df.empty:
1007
+ continue
1008
+
1009
+ rt = _np.asarray(df["rt"], dtype=float)
1010
+ rt_orig = _np.asarray(df["rt_original"], dtype=float)
1011
+ delta = rt - rt_orig
1012
+
1013
+ # sort by rt
1014
+ idx = _np.argsort(rt)
1015
+ rt = rt[idx]
1016
+ delta = delta[idx]
1017
+
1018
+ sample_name = str(uid)
1019
+ if samples_info is not None:
1020
+ try:
1021
+ row = samples_info[samples_info["sample_uid"] == uid]
1022
+ if not row.empty:
1023
+ sample_name = row.iloc[0].get("sample_name", sample_name)
1024
+ except Exception:
1025
+ pass
1026
+
1027
+ color = color_map.get(uid, "#000000")
1028
+
1029
+ data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt)}
1030
+ src = ColumnDataSource(data)
1031
+
1032
+ r_line = p.line("rt", "delta", source=src, line_width=1, color=color)
1033
+ p.scatter("rt", "delta", source=src, size=2, color=color, alpha=0.6)
1034
+ renderers.append(r_line)
1035
+
1036
+ if not renderers:
1037
+ self.logger.warning("No RT correction curves to plot for the selected samples.")
1038
+ return
1039
+
1040
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
1041
+ p.add_tools(hover)
1042
+
1043
+ # Only set legend properties if a legend was actually created to avoid Bokeh warnings
1044
+ if getattr(p, "legend", None) and len(p.legend) > 0:
1045
+ p.legend.visible = False
1046
+
1047
+ if filename:
1048
+ if filename.endswith(".html"):
1049
+ output_file(filename)
1050
+ show(p)
1051
+ elif filename.endswith(".png"):
1052
+ try:
1053
+ from bokeh.io.export import export_png
1054
+
1055
+ export_png(p, filename=filename)
1056
+ except Exception:
1057
+ output_file(filename.replace(".png", ".html"))
1058
+ show(p)
1059
+ else:
1060
+ output_file(filename)
1061
+ show(p)
1062
+ else:
1063
+ show(p)
1064
+
1065
+ return p
1066
+
1067
+
543
1068
  def plot_chrom(
544
1069
  self,
545
1070
  uids=None,
@@ -936,10 +1461,10 @@ def plot_consensus_stats(
936
1461
  def plot_pca(
937
1462
  self,
938
1463
  filename=None,
939
- width=600,
940
- height=600,
1464
+ width=400,
1465
+ height=400,
941
1466
  alpha=0.8,
942
- markersize=8,
1467
+ markersize=6,
943
1468
  n_components=2,
944
1469
  color_by=None,
945
1470
  title="PCA of Consensus Matrix",
@@ -959,7 +1484,7 @@ def plot_pca(
959
1484
  """
960
1485
  from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
961
1486
  from bokeh.plotting import figure, show, output_file
962
- from bokeh.palettes import Category20, viridis
1487
+ from bokeh.palettes import Category20, viridis, Turbo256
963
1488
  from bokeh.transform import factor_cmap
964
1489
  from sklearn.decomposition import PCA
965
1490
  from sklearn.preprocessing import StandardScaler
@@ -1094,23 +1619,45 @@ def plot_pca(
1094
1619
  legend_field=color_by,
1095
1620
  )
1096
1621
  else:
1097
- scatter = p.scatter(
1098
- "PC1",
1099
- "PC2",
1100
- size=markersize,
1101
- alpha=alpha,
1102
- color="blue",
1103
- source=source,
1104
- )
1622
+ # If no color_by provided, color points by sample similar to plot_samples_2d
1623
+ if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
1624
+ # Choose the identifier to map colors by
1625
+ id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
1626
+ sample_ids = list(pd.unique(pca_df[id_col]))
1627
+ colors = Turbo256
1628
+ color_map = {uid: colors[i * (256 // max(1, len(sample_ids)))] for i, uid in enumerate(sample_ids)}
1629
+ # Map colors into dataframe
1630
+ pca_df["color"] = [color_map[x] for x in pca_df[id_col]]
1631
+ # Update the ColumnDataSource with new color column
1632
+ source = ColumnDataSource(pca_df)
1633
+ scatter = p.scatter(
1634
+ "PC1",
1635
+ "PC2",
1636
+ size=markersize,
1637
+ alpha=alpha,
1638
+ color="color",
1639
+ source=source,
1640
+ )
1641
+ else:
1642
+ scatter = p.scatter(
1643
+ "PC1",
1644
+ "PC2",
1645
+ size=markersize,
1646
+ alpha=alpha,
1647
+ color="blue",
1648
+ source=source,
1649
+ )
1105
1650
 
1106
1651
  # Create comprehensive hover tooltips with all sample information
1107
- tooltip_list = [
1108
- ("PC1", "@PC1{0.00}"),
1109
- ("PC2", "@PC2{0.00}"),
1110
- ]
1652
+ tooltip_list = []
1111
1653
 
1112
- # Add all sample dataframe columns to tooltips
1654
+ # Columns to exclude from tooltips (file paths and internal/plot fields)
1655
+ excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2"}
1656
+
1657
+ # Add all sample dataframe columns to tooltips, skipping excluded ones
1113
1658
  for col in samples_pd.columns:
1659
+ if col in excluded_cols:
1660
+ continue
1114
1661
  if col in pca_df.columns:
1115
1662
  if pca_df[col].dtype in ["float64", "float32"]:
1116
1663
  tooltip_list.append((col, f"@{col}{{0.00}}"))
@@ -1125,8 +1672,10 @@ def plot_pca(
1125
1672
 
1126
1673
  # Add legend if using categorical coloring
1127
1674
  if color_mapper and not isinstance(color_mapper, LinearColorMapper) and color_by:
1128
- p.legend.location = "top_left"
1129
- p.legend.click_policy = "hide"
1675
+ # Only set legend properties if legends exist (avoid Bokeh warning when none created)
1676
+ if getattr(p, "legend", None) and len(p.legend) > 0:
1677
+ p.legend.location = "top_left"
1678
+ p.legend.click_policy = "hide"
1130
1679
 
1131
1680
  # Output and show
1132
1681
  if filename:
@@ -1134,3 +1683,133 @@ def plot_pca(
1134
1683
 
1135
1684
  show(p)
1136
1685
  return p
1686
+
1687
+ def plot_tic(
1688
+ self,
1689
+ samples=None,
1690
+ title: str | None = None,
1691
+ filename: str | None = None,
1692
+ width: int = 1000,
1693
+ height: int = 300,
1694
+ rt_unit: str = "s",
1695
+ original: bool = False,
1696
+ ):
1697
+ """
1698
+ Plot Total Ion Chromatograms (TIC) for selected samples overlayed using Bokeh.
1699
+
1700
+ Parameters and behavior mirror `plot_bpc` but use per-sample TICs (get_tic).
1701
+ """
1702
+ # Local imports to avoid heavy top-level deps / circular imports
1703
+ from bokeh.plotting import figure, show, output_file
1704
+ from bokeh.models import ColumnDataSource, HoverTool
1705
+ from bokeh.io.export import export_png
1706
+ from bokeh.palettes import Turbo256
1707
+ from masster.study.helpers import get_tic
1708
+
1709
+ sample_uids = self._get_sample_uids(samples)
1710
+ if not sample_uids:
1711
+ self.logger.error("No valid sample_uids provided for TIC plotting.")
1712
+ return
1713
+
1714
+ colors = Turbo256
1715
+ n = max(1, len(sample_uids))
1716
+ step = max(1, 256 // n)
1717
+ color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
1718
+
1719
+ plot_title = title or "Total Ion Chromatograms"
1720
+
1721
+ p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
1722
+ p.xaxis.axis_label = f"Retention Time ({rt_unit})"
1723
+ p.yaxis.axis_label = "Intensity"
1724
+
1725
+ renderers = []
1726
+
1727
+ # Build sample name mapping once
1728
+ samples_info = None
1729
+ if hasattr(self, "samples_df") and self.samples_df is not None:
1730
+ try:
1731
+ samples_info = self.samples_df.to_pandas()
1732
+ except Exception:
1733
+ samples_info = None
1734
+
1735
+ for uid in sample_uids:
1736
+ try:
1737
+ chrom = get_tic(self, sample=uid, label=None)
1738
+ except Exception as e:
1739
+ self.logger.debug(f"Skipping sample {uid} for TIC: {e}")
1740
+ continue
1741
+
1742
+ # extract arrays
1743
+ try:
1744
+ chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
1745
+ rt = chrom_dict.get("rt")
1746
+ inty = chrom_dict.get("inty")
1747
+ except Exception:
1748
+ try:
1749
+ rt = chrom.rt
1750
+ inty = chrom.inty
1751
+ except Exception as e:
1752
+ self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
1753
+ continue
1754
+
1755
+ if rt is None or inty is None:
1756
+ continue
1757
+
1758
+ import numpy as _np
1759
+
1760
+ rt = _np.asarray(rt)
1761
+ inty = _np.asarray(inty)
1762
+ if rt.size == 0 or inty.size == 0:
1763
+ continue
1764
+
1765
+ # Sort by rt
1766
+ idx = _np.argsort(rt)
1767
+ rt = rt[idx]
1768
+ inty = inty[idx]
1769
+
1770
+ sample_name = str(uid)
1771
+ if samples_info is not None:
1772
+ try:
1773
+ row = samples_info[samples_info["sample_uid"] == uid]
1774
+ if not row.empty:
1775
+ sample_name = row.iloc[0].get("sample_name", sample_name)
1776
+ except Exception:
1777
+ pass
1778
+
1779
+ color = color_map.get(uid, "#000000")
1780
+
1781
+ data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
1782
+ src = ColumnDataSource(data)
1783
+
1784
+ r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
1785
+ p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
1786
+ renderers.append(r_line)
1787
+
1788
+ if not renderers:
1789
+ self.logger.warning("No TIC curves to plot for the selected samples.")
1790
+ return
1791
+
1792
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
1793
+ p.add_tools(hover)
1794
+
1795
+ # Only set legend properties if a legend was actually created to avoid Bokeh warnings
1796
+ if getattr(p, "legend", None) and len(p.legend) > 0:
1797
+ p.legend.visible = False
1798
+
1799
+ if filename:
1800
+ if filename.endswith(".html"):
1801
+ output_file(filename)
1802
+ show(p)
1803
+ elif filename.endswith(".png"):
1804
+ try:
1805
+ export_png(p, filename=filename)
1806
+ except Exception:
1807
+ output_file(filename.replace(".png", ".html"))
1808
+ show(p)
1809
+ else:
1810
+ output_file(filename)
1811
+ show(p)
1812
+ else:
1813
+ show(p)
1814
+
1815
+ return p