masster 0.3.13__py3-none-any.whl → 0.3.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/plot.py CHANGED
@@ -37,7 +37,6 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
37
37
  # Local imports so the module can be used even if bokeh isn't needed elsewhere
38
38
  from bokeh.models import ColumnDataSource, HoverTool
39
39
  from bokeh.plotting import figure, show, output_file
40
- from bokeh.palettes import Turbo256
41
40
  import pandas as pd
42
41
 
43
42
  # Build the before/after tabular data used for plotting
@@ -136,34 +135,99 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
136
135
  self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
137
136
  return
138
137
 
139
- features_pd = self.features_df.to_pandas()
138
+ # Use Polars instead of pandas
139
+ features_df = self.features_df
140
140
 
141
- sample_col = 'sample_uid' if 'sample_uid' in features_pd.columns else 'sample_name'
142
- if sample_col not in features_pd.columns:
141
+ sample_col = 'sample_uid' if 'sample_uid' in features_df.columns else 'sample_name'
142
+ if sample_col not in features_df.columns:
143
143
  self.logger.error("No sample identifier column found in features_df.")
144
144
  return
145
145
 
146
- samples = features_pd[sample_col].unique()
146
+ # Get unique samples using Polars
147
+ samples = features_df.select(pl.col(sample_col)).unique().to_series().to_list()
147
148
 
148
149
  for sample_idx, sample in enumerate(samples):
149
- sample_data = features_pd[features_pd[sample_col] == sample]
150
- max_inty = sample_data['inty'].max() if sample_data['inty'].max() > 0 else 1
150
+ # Filter sample data using Polars
151
+ sample_data = features_df.filter(pl.col(sample_col) == sample)
152
+
153
+ # Calculate max intensity using Polars
154
+ max_inty = sample_data.select(pl.col('inty').max()).item()
155
+ max_inty = max_inty if max_inty and max_inty > 0 else 1
156
+
151
157
  sample_name = str(sample)
152
- sample_uid = sample if sample_col == 'sample_uid' else (sample_data['sample_uid'].iloc[0] if 'sample_uid' in sample_data.columns else sample)
153
-
154
- for _, row in sample_data.iterrows():
155
- before_data.append({'rt': row['rt_original'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
156
- after_data.append({'rt': row['rt'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
157
-
158
- # Ensure dataframes exist even if empty
159
- before_df = pd.DataFrame(before_data)
160
- after_df = pd.DataFrame(after_data)
161
-
162
- # Create ColumnDataSources (safe even for empty dfs)
163
- from bokeh.models import ColumnDataSource
158
+ # Get sample_uid - if sample_col is 'sample_uid', use sample directly
159
+ if sample_col == 'sample_uid':
160
+ sample_uid = sample
161
+ else:
162
+ # Try to get sample_uid from the first row if it exists
163
+ if 'sample_uid' in sample_data.columns:
164
+ sample_uid = sample_data.select(pl.col('sample_uid')).item()
165
+ else:
166
+ sample_uid = sample
167
+
168
+ # Convert to dict for iteration - more efficient than row-by-row processing
169
+ sample_dict = sample_data.select(['rt_original', 'rt', 'mz', 'inty']).to_dicts()
170
+
171
+ for row_dict in sample_dict:
172
+ rt_original = row_dict['rt_original']
173
+ rt_current = row_dict['rt']
174
+ mz = row_dict['mz']
175
+ inty = row_dict['inty']
176
+ alpha = inty / max_inty
177
+ size = markersize + 2 if sample_idx == 0 else markersize
178
+
179
+ before_data.append({
180
+ 'rt': rt_original, 'mz': mz, 'inty': inty, 'alpha': alpha,
181
+ 'sample_idx': sample_idx, 'sample_name': sample_name,
182
+ 'sample_uid': sample_uid, 'size': size
183
+ })
184
+ after_data.append({
185
+ 'rt': rt_current, 'mz': mz, 'inty': inty, 'alpha': alpha,
186
+ 'sample_idx': sample_idx, 'sample_name': sample_name,
187
+ 'sample_uid': sample_uid, 'size': size
188
+ })
189
+
190
+ # Get sample colors from samples_df using sample indices
191
+ # Extract unique sample information from the dictionaries we created
192
+ if before_data:
193
+ # Create mapping from sample_idx to sample_uid more efficiently
194
+ sample_idx_to_uid = {}
195
+ for item in before_data:
196
+ if item['sample_idx'] not in sample_idx_to_uid:
197
+ sample_idx_to_uid[item['sample_idx']] = item['sample_uid']
198
+ else:
199
+ sample_idx_to_uid = {}
200
+
201
+ # Get colors from samples_df
202
+ sample_uids_list = list(sample_idx_to_uid.values())
203
+ if sample_uids_list and hasattr(self, 'samples_df') and self.samples_df is not None:
204
+ sample_colors = (
205
+ self.samples_df
206
+ .filter(pl.col("sample_uid").is_in(sample_uids_list))
207
+ .select(["sample_uid", "sample_color"])
208
+ .to_dict(as_series=False)
209
+ )
210
+ uid_to_color = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
211
+ else:
212
+ uid_to_color = {}
164
213
 
165
- before_source = ColumnDataSource(before_df)
166
- after_source = ColumnDataSource(after_df)
214
+ # Create color map for sample indices
215
+ color_map: dict[int, str] = {}
216
+ for sample_idx, sample_uid in sample_idx_to_uid.items():
217
+ color_map[sample_idx] = uid_to_color.get(sample_uid, "#1f77b4") # fallback to blue
218
+
219
+ # Add sample_color to data dictionaries before creating DataFrames
220
+ if before_data:
221
+ for item in before_data:
222
+ item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
223
+
224
+ if after_data:
225
+ for item in after_data:
226
+ item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
227
+
228
+ # Now create DataFrames with the sample_color already included
229
+ before_df = pd.DataFrame(before_data) if before_data else pd.DataFrame()
230
+ after_df = pd.DataFrame(after_data) if after_data else pd.DataFrame()
167
231
 
168
232
  # Create Bokeh figures
169
233
  p1 = figure(width=width, height=height, title='Original RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save')
@@ -177,15 +241,9 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
177
241
  p2.background_fill_color = 'white'
178
242
  p2.border_fill_color = 'white'
179
243
  p2.min_border = 0
180
-
181
- # Color mapping using Turbo256
182
- unique_samples = sorted(list(set(before_df['sample_idx'].tolist()))) if not before_df.empty else []
183
- colors = Turbo256
184
- color_map: dict[int, str] = {}
185
- n = max(1, len(unique_samples))
186
- step = max(1, 256 // n)
187
- for i, sample_idx in enumerate(unique_samples):
188
- color_map[sample_idx] = colors[(i * step) % 256]
244
+
245
+ # Get unique sample indices for iteration
246
+ unique_samples = sorted(list(set(item['sample_idx'] for item in before_data))) if before_data else []
189
247
 
190
248
  renderers_before = []
191
249
  renderers_after = []
@@ -206,10 +264,10 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
206
264
  renderers_after.append(r)
207
265
 
208
266
  # Add hover tools
209
- hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_before)
267
+ hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_before)
210
268
  p1.add_tools(hover1)
211
269
 
212
- hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_after)
270
+ hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_after)
213
271
  p2.add_tools(hover2)
214
272
 
215
273
  # Create layout with both plots side by side
@@ -232,13 +290,13 @@ def plot_consensus_2d(
232
290
  self,
233
291
  filename=None,
234
292
  colorby="number_samples",
293
+ cmap=None,
294
+ markersize=4,
235
295
  sizeby="inty_mean",
236
- markersize=6,
237
- size="dynamic",
296
+ scaling="dynamic",
238
297
  alpha=0.7,
239
- cmap=None,
240
- width=900,
241
- height=900,
298
+ width=600,
299
+ height=450,
242
300
  mz_range=None,
243
301
  rt_range=None,
244
302
  ):
@@ -317,7 +375,7 @@ def plot_consensus_2d(
317
375
  ])
318
376
 
319
377
  if cmap is None:
320
- cmap = "vi"
378
+ cmap = "viridis"
321
379
  elif cmap == "grey":
322
380
  cmap = "Greys256"
323
381
 
@@ -334,12 +392,49 @@ def plot_consensus_2d(
334
392
  except ImportError:
335
393
  from bokeh.models.annotations import ColorBar
336
394
  from bokeh.palettes import viridis
395
+
396
+ # Import cmap for colormap handling
397
+ from cmap import Colormap
337
398
 
338
399
  # Convert Polars DataFrame to pandas for Bokeh compatibility
339
400
  data_pd = data.to_pandas()
340
401
  source = ColumnDataSource(data_pd)
402
+
403
+ # Handle colormap using cmap.Colormap
404
+ try:
405
+ # Get colormap palette using cmap
406
+ if isinstance(cmap, str):
407
+ colormap = Colormap(cmap)
408
+ # Generate 256 colors and convert to hex
409
+ import numpy as np
410
+ import matplotlib.colors as mcolors
411
+ colors = colormap(np.linspace(0, 1, 256))
412
+ palette = [mcolors.rgb2hex(color) for color in colors]
413
+ else:
414
+ colormap = cmap
415
+ # Try to use to_bokeh() method first
416
+ try:
417
+ palette = colormap.to_bokeh()
418
+ # Ensure we got a color palette, not another mapper
419
+ if not isinstance(palette, (list, tuple)):
420
+ # Fall back to generating colors manually
421
+ import numpy as np
422
+ import matplotlib.colors as mcolors
423
+ colors = colormap(np.linspace(0, 1, 256))
424
+ palette = [mcolors.rgb2hex(color) for color in colors]
425
+ except AttributeError:
426
+ # Fall back to generating colors manually
427
+ import numpy as np
428
+ import matplotlib.colors as mcolors
429
+ colors = colormap(np.linspace(0, 1, 256))
430
+ palette = [mcolors.rgb2hex(color) for color in colors]
431
+ except (AttributeError, ValueError, TypeError) as e:
432
+ # Fallback to viridis if cmap interpretation fails
433
+ self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
434
+ palette = viridis(256)
435
+
341
436
  color_mapper = LinearColorMapper(
342
- palette=viridis(256),
437
+ palette=palette,
343
438
  low=data[colorby].min(),
344
439
  high=data[colorby].max(),
345
440
  )
@@ -352,11 +447,11 @@ def plot_consensus_2d(
352
447
  p.xaxis.axis_label = "Retention Time (min)"
353
448
  p.yaxis.axis_label = "m/z"
354
449
  scatter_renderer: Any = None
355
- if size.lower() in ["dyn", "dynamic"]:
450
+ if scaling.lower() in ["dyn", "dynamic"]:
356
451
  scatter_renderer = p.circle(
357
452
  x="rt",
358
453
  y="mz",
359
- radius=markersize / 10,
454
+ radius=markersize,
360
455
  fill_color={"field": colorby, "transform": color_mapper},
361
456
  line_color=None,
362
457
  alpha=alpha,
@@ -414,7 +509,6 @@ def plot_samples_2d(
414
509
  size="dynamic",
415
510
  alpha_max=0.8,
416
511
  alpha="inty",
417
- cmap="Turbo256",
418
512
  max_features=50000,
419
513
  width=600,
420
514
  height=600,
@@ -447,7 +541,6 @@ def plot_samples_2d(
447
541
  from bokeh.plotting import figure, show, output_file
448
542
  from bokeh.io.export import export_png
449
543
  from bokeh.models import ColumnDataSource, HoverTool
450
- from bokeh.palettes import Turbo256
451
544
 
452
545
  sample_uids = self._get_sample_uids(samples)
453
546
 
@@ -455,8 +548,14 @@ def plot_samples_2d(
455
548
  self.logger.error("No valid sample_uids provided.")
456
549
  return
457
550
 
458
- colors = Turbo256
459
- color_map = {uid: colors[i * (256 // max(1, len(sample_uids)))] for i, uid in enumerate(sample_uids)}
551
+ # Get sample colors from samples_df
552
+ sample_colors = (
553
+ self.samples_df
554
+ .filter(pl.col("sample_uid").is_in(sample_uids))
555
+ .select(["sample_uid", "sample_color"])
556
+ .to_dict(as_series=False)
557
+ )
558
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
460
559
 
461
560
  p = figure(
462
561
  width=width,
@@ -569,6 +668,7 @@ def plot_samples_2d(
569
668
  "inty": sample_data["inty"].values,
570
669
  "alpha": sample_data["alpha"].values,
571
670
  "sample": np.full(len(sample_data), sample_name, dtype=object),
671
+ "sample_color": np.full(len(sample_data), color_values[uid], dtype=object),
572
672
  },
573
673
  )
574
674
 
@@ -604,6 +704,7 @@ def plot_samples_2d(
604
704
  hover = HoverTool(
605
705
  tooltips=[
606
706
  ("sample", "@sample"),
707
+ ("sample_color", "$color[swatch]:sample_color"),
607
708
  ("rt", "@rt{0.00}"),
608
709
  ("mz", "@mz{0.0000}"),
609
710
  ("intensity", "@inty{0.0e+0}"),
@@ -637,7 +738,6 @@ def plot_bpc(
637
738
  filename: str | None = None,
638
739
  width: int = 1000,
639
740
  height: int = 300,
640
- rt_unit: str = "s",
641
741
  original: bool = False,
642
742
  ):
643
743
  """
@@ -653,7 +753,6 @@ def plot_bpc(
653
753
  from bokeh.plotting import figure, show, output_file
654
754
  from bokeh.models import ColumnDataSource, HoverTool
655
755
  from bokeh.io.export import export_png
656
- from bokeh.palettes import Turbo256
657
756
  from masster.study.helpers import get_bpc
658
757
 
659
758
  sample_uids = self._get_sample_uids(samples)
@@ -664,10 +763,14 @@ def plot_bpc(
664
763
  # Debug: show which sample_uids we will process
665
764
  self.logger.debug(f"plot_bpc: sample_uids={sample_uids}")
666
765
 
667
- colors = Turbo256
668
- n = max(1, len(sample_uids))
669
- step = max(1, 256 // n)
670
- color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
766
+ # Get sample colors from samples_df
767
+ sample_colors = (
768
+ self.samples_df
769
+ .filter(pl.col("sample_uid").is_in(sample_uids))
770
+ .select(["sample_uid", "sample_color"])
771
+ .to_dict(as_series=False)
772
+ )
773
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
671
774
 
672
775
  # If plotting original (uncorrected) RTs, use the requested title.
673
776
  if original:
@@ -675,6 +778,17 @@ def plot_bpc(
675
778
  else:
676
779
  plot_title = title or "Base Peak Chromatograms"
677
780
 
781
+ # Get rt_unit from the first chromatogram, default to "s" if not available
782
+ rt_unit = "s"
783
+ for uid in sample_uids:
784
+ try:
785
+ first_chrom = get_bpc(self, sample=uid, label=None, original=original)
786
+ if hasattr(first_chrom, 'rt_unit'):
787
+ rt_unit = first_chrom.rt_unit
788
+ break
789
+ except Exception:
790
+ continue
791
+
678
792
  p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
679
793
  p.xaxis.axis_label = f"Retention Time ({rt_unit})"
680
794
  p.yaxis.axis_label = "Intensity"
@@ -691,7 +805,7 @@ def plot_bpc(
691
805
 
692
806
  for uid in sample_uids:
693
807
  try:
694
- chrom = get_bpc(self, sample=uid, rt_unit=rt_unit, label=None, original=original)
808
+ chrom = get_bpc(self, sample=uid, label=None, original=original)
695
809
  except Exception as e:
696
810
  # log and skip samples we can't compute BPC for
697
811
  self.logger.debug(f"Skipping sample {uid} for BPC: {e}")
@@ -743,7 +857,7 @@ def plot_bpc(
743
857
  f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
744
858
  )
745
859
 
746
- data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
860
+ data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
747
861
  src = ColumnDataSource(data)
748
862
 
749
863
  r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
@@ -754,7 +868,7 @@ def plot_bpc(
754
868
  self.logger.warning("No BPC curves to plot for the selected samples.")
755
869
  return
756
870
 
757
- hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
871
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
758
872
  p.add_tools(hover)
759
873
 
760
874
  # Only set legend properties if a legend was actually created to avoid Bokeh warnings
@@ -784,13 +898,12 @@ def plot_bpc(
784
898
  def plot_eic(
785
899
  self,
786
900
  mz,
787
- mz_tol=0.01,
901
+ mz_tol=None,
788
902
  samples=None,
789
903
  title: str | None = None,
790
904
  filename: str | None = None,
791
905
  width: int = 1000,
792
906
  height: int = 300,
793
- rt_unit: str = "s",
794
907
  original: bool = False,
795
908
  ):
796
909
  """
@@ -799,14 +912,20 @@ def plot_eic(
799
912
  Parameters mirror `plot_bpc` with additional `mz` and `mz_tol` arguments. The function
800
913
  retrieves a Sample object for each sample UID, calls `sample.get_eic(mz, mz_tol)`, and
801
914
  overlays the resulting chromatograms.
915
+
916
+ Args:
917
+ mz_tol: m/z tolerance in Da. If None, uses study.parameters.eic_mz_tol as default.
802
918
  """
803
919
  # Local imports to avoid heavy top-level deps / circular imports
804
920
  from bokeh.plotting import figure, show, output_file
805
921
  from bokeh.models import ColumnDataSource, HoverTool
806
922
  from bokeh.io.export import export_png
807
- from bokeh.palettes import Turbo256
808
923
  from masster.study.helpers import get_eic
809
924
 
925
+ # Use study's eic_mz_tol parameter as default if not provided
926
+ if mz_tol is None:
927
+ mz_tol = self.parameters.eic_mz_tol
928
+
810
929
  if mz is None:
811
930
  self.logger.error("mz must be provided for EIC plotting")
812
931
  return
@@ -816,13 +935,28 @@ def plot_eic(
816
935
  self.logger.error("No valid sample_uids provided for EIC plotting.")
817
936
  return
818
937
 
819
- colors = Turbo256
820
- n = max(1, len(sample_uids))
821
- step = max(1, 256 // n)
822
- color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
938
+ # Get sample colors from samples_df
939
+ sample_colors = (
940
+ self.samples_df
941
+ .filter(pl.col("sample_uid").is_in(sample_uids))
942
+ .select(["sample_uid", "sample_color"])
943
+ .to_dict(as_series=False)
944
+ )
945
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
823
946
 
824
947
  plot_title = title or f"Extracted Ion Chromatograms (m/z={mz:.4f} ± {mz_tol})"
825
948
 
949
+ # Get rt_unit from the first chromatogram, default to "s" if not available
950
+ rt_unit = "s"
951
+ for uid in sample_uids:
952
+ try:
953
+ first_chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
954
+ if hasattr(first_chrom, 'rt_unit'):
955
+ rt_unit = first_chrom.rt_unit
956
+ break
957
+ except Exception:
958
+ continue
959
+
826
960
  p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
827
961
  p.xaxis.axis_label = f"Retention Time ({rt_unit})"
828
962
  p.yaxis.axis_label = "Intensity"
@@ -839,7 +973,7 @@ def plot_eic(
839
973
 
840
974
  for uid in sample_uids:
841
975
  try:
842
- chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, rt_unit=rt_unit, label=None)
976
+ chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
843
977
  except Exception as e:
844
978
  # log and skip samples we can't compute EIC for
845
979
  self.logger.debug(f"Skipping sample {uid} for EIC: {e}")
@@ -885,7 +1019,7 @@ def plot_eic(
885
1019
 
886
1020
  color = color_map.get(uid, "#000000")
887
1021
 
888
- data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
1022
+ data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
889
1023
  src = ColumnDataSource(data)
890
1024
 
891
1025
  r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
@@ -896,7 +1030,7 @@ def plot_eic(
896
1030
  self.logger.warning("No EIC curves to plot for the selected samples.")
897
1031
  return
898
1032
 
899
- hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
1033
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e0}")], renderers=renderers)
900
1034
  p.add_tools(hover)
901
1035
 
902
1036
  if getattr(p, "legend", None) and len(p.legend) > 0:
@@ -928,7 +1062,6 @@ def plot_rt_correction(
928
1062
  filename: str | None = None,
929
1063
  width: int = 1000,
930
1064
  height: int = 300,
931
- rt_unit: str = "s",
932
1065
  ):
933
1066
  """
934
1067
  Plot RT correction per sample: (rt - rt_original) vs rt overlayed for selected samples.
@@ -937,7 +1070,6 @@ def plot_rt_correction(
937
1070
  """
938
1071
  from bokeh.plotting import figure, show, output_file
939
1072
  from bokeh.models import ColumnDataSource, HoverTool
940
- from bokeh.palettes import Turbo256
941
1073
  import numpy as _np
942
1074
 
943
1075
  # Validate features dataframe
@@ -954,11 +1086,17 @@ def plot_rt_correction(
954
1086
  self.logger.error("No valid sample_uids provided for RT correction plotting.")
955
1087
  return
956
1088
 
957
- # Color mapping like plot_bpc
958
- colors = Turbo256
959
- n = max(1, len(sample_uids))
960
- step = max(1, 256 // n)
961
- color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
1089
+ # Get sample colors from samples_df
1090
+ sample_colors = (
1091
+ self.samples_df
1092
+ .filter(pl.col("sample_uid").is_in(sample_uids))
1093
+ .select(["sample_uid", "sample_color"])
1094
+ .to_dict(as_series=False)
1095
+ )
1096
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
1097
+
1098
+ # For RT correction plots, default to "s" since we're working with features_df directly
1099
+ rt_unit = "s"
962
1100
 
963
1101
  p = figure(width=width, height=height, title=title or "RT correction", tools="pan,wheel_zoom,box_zoom,reset,save")
964
1102
  p.xaxis.axis_label = f"Retention Time ({rt_unit})"
@@ -1026,7 +1164,7 @@ def plot_rt_correction(
1026
1164
 
1027
1165
  color = color_map.get(uid, "#000000")
1028
1166
 
1029
- data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt)}
1167
+ data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
1030
1168
  src = ColumnDataSource(data)
1031
1169
 
1032
1170
  r_line = p.line("rt", "delta", source=src, line_width=1, color=color)
@@ -1037,7 +1175,7 @@ def plot_rt_correction(
1037
1175
  self.logger.warning("No RT correction curves to plot for the selected samples.")
1038
1176
  return
1039
1177
 
1040
- hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
1178
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
1041
1179
  p.add_tools(hover)
1042
1180
 
1043
1181
  # Only set legend properties if a legend was actually created to avoid Bokeh warnings
@@ -1083,15 +1221,17 @@ def plot_chrom(
1083
1221
  self.logger.error("No chromatogram data found.")
1084
1222
  return
1085
1223
 
1086
- # Local import for color palette
1087
- from bokeh.palettes import Turbo256
1088
-
1089
- # Assign a fixed color to each sample/column
1224
+ # Get sample colors for alignment plots
1225
+ # Need to map sample names to colors since chromatogram data uses sample names as columns
1090
1226
  sample_names = [col for col in chroms.columns if col not in ["consensus_uid"]]
1091
1227
  if not sample_names:
1092
1228
  self.logger.error("No sample names found in chromatogram data.")
1093
1229
  return
1094
- color_map = {sample: Turbo256[i * (256 // max(1, len(sample_names)))] for i, sample in enumerate(sample_names)}
1230
+
1231
+ # Create color mapping by getting sample_color for each sample_name
1232
+ samples_info = self.samples_df.select(["sample_name", "sample_color"]).to_dict(as_series=False)
1233
+ sample_name_to_color = dict(zip(samples_info["sample_name"], samples_info["sample_color"]))
1234
+ color_map = {name: sample_name_to_color.get(name, "#1f77b4") for name in sample_names} # fallback to blue
1095
1235
 
1096
1236
  plots = []
1097
1237
  self.logger.info(f"Plotting {chroms.shape[0]} chromatograms...")
@@ -1461,8 +1601,8 @@ def plot_consensus_stats(
1461
1601
  def plot_pca(
1462
1602
  self,
1463
1603
  filename=None,
1464
- width=400,
1465
- height=400,
1604
+ width=500,
1605
+ height=450,
1466
1606
  alpha=0.8,
1467
1607
  markersize=6,
1468
1608
  n_components=2,
@@ -1484,7 +1624,7 @@ def plot_pca(
1484
1624
  """
1485
1625
  from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
1486
1626
  from bokeh.plotting import figure, show, output_file
1487
- from bokeh.palettes import Category20, viridis, Turbo256
1627
+ from bokeh.palettes import Category20, viridis
1488
1628
  from bokeh.transform import factor_cmap
1489
1629
  from sklearn.decomposition import PCA
1490
1630
  from sklearn.preprocessing import StandardScaler
@@ -1507,7 +1647,7 @@ def plot_pca(
1507
1647
  self.logger.error("No samples dataframe available.")
1508
1648
  return
1509
1649
 
1510
- self.logger.info(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
1650
+ self.logger.debug(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
1511
1651
 
1512
1652
  # Convert consensus matrix to numpy if it's not already
1513
1653
  if hasattr(consensus_matrix, "values"):
@@ -1534,7 +1674,7 @@ def plot_pca(
1534
1674
  # Get explained variance ratios
1535
1675
  explained_var = pca.explained_variance_ratio_
1536
1676
 
1537
- self.logger.info(f"PCA explained variance ratios: {explained_var}")
1677
+ self.logger.debug(f"PCA explained variance ratios: {explained_var}")
1538
1678
 
1539
1679
  # Convert samples_df to pandas for easier manipulation
1540
1680
  samples_pd = samples_df.to_pandas()
@@ -1619,15 +1759,31 @@ def plot_pca(
1619
1759
  legend_field=color_by,
1620
1760
  )
1621
1761
  else:
1622
- # If no color_by provided, color points by sample similar to plot_samples_2d
1762
+ # If no color_by provided, use sample_color column from samples_df
1623
1763
  if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
1624
1764
  # Choose the identifier to map colors by
1625
1765
  id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
1626
- sample_ids = list(pd.unique(pca_df[id_col]))
1627
- colors = Turbo256
1628
- color_map = {uid: colors[i * (256 // max(1, len(sample_ids)))] for i, uid in enumerate(sample_ids)}
1766
+
1767
+ # Get colors from samples_df based on the identifier
1768
+ if id_col == "sample_uid":
1769
+ sample_colors = (
1770
+ self.samples_df
1771
+ .filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
1772
+ .select(["sample_uid", "sample_color"])
1773
+ .to_dict(as_series=False)
1774
+ )
1775
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
1776
+ else: # sample_name
1777
+ sample_colors = (
1778
+ self.samples_df
1779
+ .filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
1780
+ .select(["sample_name", "sample_color"])
1781
+ .to_dict(as_series=False)
1782
+ )
1783
+ color_map = dict(zip(sample_colors["sample_name"], sample_colors["sample_color"]))
1784
+
1629
1785
  # Map colors into dataframe
1630
- pca_df["color"] = [color_map[x] for x in pca_df[id_col]]
1786
+ pca_df["color"] = [color_map.get(x, "#1f77b4") for x in pca_df[id_col]] # fallback to blue
1631
1787
  # Update the ColumnDataSource with new color column
1632
1788
  source = ColumnDataSource(pca_df)
1633
1789
  scatter = p.scatter(
@@ -1652,14 +1808,17 @@ def plot_pca(
1652
1808
  tooltip_list = []
1653
1809
 
1654
1810
  # Columns to exclude from tooltips (file paths and internal/plot fields)
1655
- excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2"}
1811
+ excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2", "size"}
1656
1812
 
1657
1813
  # Add all sample dataframe columns to tooltips, skipping excluded ones
1658
1814
  for col in samples_pd.columns:
1659
1815
  if col in excluded_cols:
1660
1816
  continue
1661
1817
  if col in pca_df.columns:
1662
- if pca_df[col].dtype in ["float64", "float32"]:
1818
+ if col == "sample_color":
1819
+ # Display sample_color as a colored swatch
1820
+ tooltip_list.append(('color', "$color[swatch]:sample_color"))
1821
+ elif pca_df[col].dtype in ["float64", "float32"]:
1663
1822
  tooltip_list.append((col, f"@{col}{{0.00}}"))
1664
1823
  else:
1665
1824
  tooltip_list.append((col, f"@{col}"))
@@ -1691,7 +1850,6 @@ def plot_tic(
1691
1850
  filename: str | None = None,
1692
1851
  width: int = 1000,
1693
1852
  height: int = 300,
1694
- rt_unit: str = "s",
1695
1853
  original: bool = False,
1696
1854
  ):
1697
1855
  """
@@ -1703,7 +1861,6 @@ def plot_tic(
1703
1861
  from bokeh.plotting import figure, show, output_file
1704
1862
  from bokeh.models import ColumnDataSource, HoverTool
1705
1863
  from bokeh.io.export import export_png
1706
- from bokeh.palettes import Turbo256
1707
1864
  from masster.study.helpers import get_tic
1708
1865
 
1709
1866
  sample_uids = self._get_sample_uids(samples)
@@ -1711,13 +1868,28 @@ def plot_tic(
1711
1868
  self.logger.error("No valid sample_uids provided for TIC plotting.")
1712
1869
  return
1713
1870
 
1714
- colors = Turbo256
1715
- n = max(1, len(sample_uids))
1716
- step = max(1, 256 // n)
1717
- color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
1871
+ # Get sample colors from samples_df
1872
+ sample_colors = (
1873
+ self.samples_df
1874
+ .filter(pl.col("sample_uid").is_in(sample_uids))
1875
+ .select(["sample_uid", "sample_color"])
1876
+ .to_dict(as_series=False)
1877
+ )
1878
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
1718
1879
 
1719
1880
  plot_title = title or "Total Ion Chromatograms"
1720
1881
 
1882
+ # Get rt_unit from the first chromatogram, default to "s" if not available
1883
+ rt_unit = "s"
1884
+ for uid in sample_uids:
1885
+ try:
1886
+ first_chrom = get_tic(self, sample=uid, label=None)
1887
+ if hasattr(first_chrom, 'rt_unit'):
1888
+ rt_unit = first_chrom.rt_unit
1889
+ break
1890
+ except Exception:
1891
+ continue
1892
+
1721
1893
  p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
1722
1894
  p.xaxis.axis_label = f"Retention Time ({rt_unit})"
1723
1895
  p.yaxis.axis_label = "Intensity"
@@ -1778,7 +1950,7 @@ def plot_tic(
1778
1950
 
1779
1951
  color = color_map.get(uid, "#000000")
1780
1952
 
1781
- data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
1953
+ data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
1782
1954
  src = ColumnDataSource(data)
1783
1955
 
1784
1956
  r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
@@ -1789,7 +1961,7 @@ def plot_tic(
1789
1961
  self.logger.warning("No TIC curves to plot for the selected samples.")
1790
1962
  return
1791
1963
 
1792
- hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
1964
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
1793
1965
  p.add_tools(hover)
1794
1966
 
1795
1967
  # Only set legend properties if a legend was actually created to avoid Bokeh warnings