masster 0.3.13__py3-none-any.whl → 0.3.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/plot.py CHANGED
@@ -37,7 +37,6 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
37
37
  # Local imports so the module can be used even if bokeh isn't needed elsewhere
38
38
  from bokeh.models import ColumnDataSource, HoverTool
39
39
  from bokeh.plotting import figure, show, output_file
40
- from bokeh.palettes import Turbo256
41
40
  import pandas as pd
42
41
 
43
42
  # Build the before/after tabular data used for plotting
@@ -136,34 +135,99 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
136
135
  self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
137
136
  return
138
137
 
139
- features_pd = self.features_df.to_pandas()
138
+ # Use Polars instead of pandas
139
+ features_df = self.features_df
140
140
 
141
- sample_col = 'sample_uid' if 'sample_uid' in features_pd.columns else 'sample_name'
142
- if sample_col not in features_pd.columns:
141
+ sample_col = 'sample_uid' if 'sample_uid' in features_df.columns else 'sample_name'
142
+ if sample_col not in features_df.columns:
143
143
  self.logger.error("No sample identifier column found in features_df.")
144
144
  return
145
145
 
146
- samples = features_pd[sample_col].unique()
146
+ # Get unique samples using Polars
147
+ samples = features_df.select(pl.col(sample_col)).unique().to_series().to_list()
147
148
 
148
149
  for sample_idx, sample in enumerate(samples):
149
- sample_data = features_pd[features_pd[sample_col] == sample]
150
- max_inty = sample_data['inty'].max() if sample_data['inty'].max() > 0 else 1
150
+ # Filter sample data using Polars
151
+ sample_data = features_df.filter(pl.col(sample_col) == sample)
152
+
153
+ # Calculate max intensity using Polars
154
+ max_inty = sample_data.select(pl.col('inty').max()).item()
155
+ max_inty = max_inty if max_inty and max_inty > 0 else 1
156
+
151
157
  sample_name = str(sample)
152
- sample_uid = sample if sample_col == 'sample_uid' else (sample_data['sample_uid'].iloc[0] if 'sample_uid' in sample_data.columns else sample)
153
-
154
- for _, row in sample_data.iterrows():
155
- before_data.append({'rt': row['rt_original'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
156
- after_data.append({'rt': row['rt'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
157
-
158
- # Ensure dataframes exist even if empty
159
- before_df = pd.DataFrame(before_data)
160
- after_df = pd.DataFrame(after_data)
161
-
162
- # Create ColumnDataSources (safe even for empty dfs)
163
- from bokeh.models import ColumnDataSource
158
+ # Get sample_uid - if sample_col is 'sample_uid', use sample directly
159
+ if sample_col == 'sample_uid':
160
+ sample_uid = sample
161
+ else:
162
+ # Try to get sample_uid from the first row if it exists
163
+ if 'sample_uid' in sample_data.columns:
164
+ sample_uid = sample_data.select(pl.col('sample_uid')).item()
165
+ else:
166
+ sample_uid = sample
167
+
168
+ # Convert to dict for iteration - more efficient than row-by-row processing
169
+ sample_dict = sample_data.select(['rt_original', 'rt', 'mz', 'inty']).to_dicts()
170
+
171
+ for row_dict in sample_dict:
172
+ rt_original = row_dict['rt_original']
173
+ rt_current = row_dict['rt']
174
+ mz = row_dict['mz']
175
+ inty = row_dict['inty']
176
+ alpha = inty / max_inty
177
+ size = markersize + 2 if sample_idx == 0 else markersize
178
+
179
+ before_data.append({
180
+ 'rt': rt_original, 'mz': mz, 'inty': inty, 'alpha': alpha,
181
+ 'sample_idx': sample_idx, 'sample_name': sample_name,
182
+ 'sample_uid': sample_uid, 'size': size
183
+ })
184
+ after_data.append({
185
+ 'rt': rt_current, 'mz': mz, 'inty': inty, 'alpha': alpha,
186
+ 'sample_idx': sample_idx, 'sample_name': sample_name,
187
+ 'sample_uid': sample_uid, 'size': size
188
+ })
189
+
190
+ # Get sample colors from samples_df using sample indices
191
+ # Extract unique sample information from the dictionaries we created
192
+ if before_data:
193
+ # Create mapping from sample_idx to sample_uid more efficiently
194
+ sample_idx_to_uid = {}
195
+ for item in before_data:
196
+ if item['sample_idx'] not in sample_idx_to_uid:
197
+ sample_idx_to_uid[item['sample_idx']] = item['sample_uid']
198
+ else:
199
+ sample_idx_to_uid = {}
200
+
201
+ # Get colors from samples_df
202
+ sample_uids_list = list(sample_idx_to_uid.values())
203
+ if sample_uids_list and hasattr(self, 'samples_df') and self.samples_df is not None:
204
+ sample_colors = (
205
+ self.samples_df
206
+ .filter(pl.col("sample_uid").is_in(sample_uids_list))
207
+ .select(["sample_uid", "sample_color"])
208
+ .to_dict(as_series=False)
209
+ )
210
+ uid_to_color = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
211
+ else:
212
+ uid_to_color = {}
164
213
 
165
- before_source = ColumnDataSource(before_df)
166
- after_source = ColumnDataSource(after_df)
214
+ # Create color map for sample indices
215
+ color_map: dict[int, str] = {}
216
+ for sample_idx, sample_uid in sample_idx_to_uid.items():
217
+ color_map[sample_idx] = uid_to_color.get(sample_uid, "#1f77b4") # fallback to blue
218
+
219
+ # Add sample_color to data dictionaries before creating DataFrames
220
+ if before_data:
221
+ for item in before_data:
222
+ item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
223
+
224
+ if after_data:
225
+ for item in after_data:
226
+ item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
227
+
228
+ # Now create DataFrames with the sample_color already included
229
+ before_df = pd.DataFrame(before_data) if before_data else pd.DataFrame()
230
+ after_df = pd.DataFrame(after_data) if after_data else pd.DataFrame()
167
231
 
168
232
  # Create Bokeh figures
169
233
  p1 = figure(width=width, height=height, title='Original RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save')
@@ -177,15 +241,9 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
177
241
  p2.background_fill_color = 'white'
178
242
  p2.border_fill_color = 'white'
179
243
  p2.min_border = 0
180
-
181
- # Color mapping using Turbo256
182
- unique_samples = sorted(list(set(before_df['sample_idx'].tolist()))) if not before_df.empty else []
183
- colors = Turbo256
184
- color_map: dict[int, str] = {}
185
- n = max(1, len(unique_samples))
186
- step = max(1, 256 // n)
187
- for i, sample_idx in enumerate(unique_samples):
188
- color_map[sample_idx] = colors[(i * step) % 256]
244
+
245
+ # Get unique sample indices for iteration
246
+ unique_samples = sorted(list(set(item['sample_idx'] for item in before_data))) if before_data else []
189
247
 
190
248
  renderers_before = []
191
249
  renderers_after = []
@@ -206,10 +264,10 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
206
264
  renderers_after.append(r)
207
265
 
208
266
  # Add hover tools
209
- hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_before)
267
+ hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_before)
210
268
  p1.add_tools(hover1)
211
269
 
212
- hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_after)
270
+ hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_after)
213
271
  p2.add_tools(hover2)
214
272
 
215
273
  # Create layout with both plots side by side
@@ -232,13 +290,13 @@ def plot_consensus_2d(
232
290
  self,
233
291
  filename=None,
234
292
  colorby="number_samples",
293
+ cmap=None,
294
+ markersize=4,
235
295
  sizeby="inty_mean",
236
- markersize=6,
237
- size="dynamic",
296
+ scaling="dynamic",
238
297
  alpha=0.7,
239
- cmap=None,
240
- width=900,
241
- height=900,
298
+ width=600,
299
+ height=450,
242
300
  mz_range=None,
243
301
  rt_range=None,
244
302
  ):
@@ -334,12 +392,49 @@ def plot_consensus_2d(
334
392
  except ImportError:
335
393
  from bokeh.models.annotations import ColorBar
336
394
  from bokeh.palettes import viridis
395
+
396
+ # Import cmap for colormap handling
397
+ from cmap import Colormap
337
398
 
338
399
  # Convert Polars DataFrame to pandas for Bokeh compatibility
339
400
  data_pd = data.to_pandas()
340
401
  source = ColumnDataSource(data_pd)
402
+
403
+ # Handle colormap using cmap.Colormap
404
+ try:
405
+ # Get colormap palette using cmap
406
+ if isinstance(cmap, str):
407
+ colormap = Colormap(cmap)
408
+ # Generate 256 colors and convert to hex
409
+ import numpy as np
410
+ import matplotlib.colors as mcolors
411
+ colors = colormap(np.linspace(0, 1, 256))
412
+ palette = [mcolors.rgb2hex(color) for color in colors]
413
+ else:
414
+ colormap = cmap
415
+ # Try to use to_bokeh() method first
416
+ try:
417
+ palette = colormap.to_bokeh()
418
+ # Ensure we got a color palette, not another mapper
419
+ if not isinstance(palette, (list, tuple)):
420
+ # Fall back to generating colors manually
421
+ import numpy as np
422
+ import matplotlib.colors as mcolors
423
+ colors = colormap(np.linspace(0, 1, 256))
424
+ palette = [mcolors.rgb2hex(color) for color in colors]
425
+ except AttributeError:
426
+ # Fall back to generating colors manually
427
+ import numpy as np
428
+ import matplotlib.colors as mcolors
429
+ colors = colormap(np.linspace(0, 1, 256))
430
+ palette = [mcolors.rgb2hex(color) for color in colors]
431
+ except (AttributeError, ValueError, TypeError) as e:
432
+ # Fallback to viridis if cmap interpretation fails
433
+ self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
434
+ palette = viridis(256)
435
+
341
436
  color_mapper = LinearColorMapper(
342
- palette=viridis(256),
437
+ palette=palette,
343
438
  low=data[colorby].min(),
344
439
  high=data[colorby].max(),
345
440
  )
@@ -352,11 +447,11 @@ def plot_consensus_2d(
352
447
  p.xaxis.axis_label = "Retention Time (min)"
353
448
  p.yaxis.axis_label = "m/z"
354
449
  scatter_renderer: Any = None
355
- if size.lower() in ["dyn", "dynamic"]:
450
+ if scaling.lower() in ["dyn", "dynamic"]:
356
451
  scatter_renderer = p.circle(
357
452
  x="rt",
358
453
  y="mz",
359
- radius=markersize / 10,
454
+ radius=markersize,
360
455
  fill_color={"field": colorby, "transform": color_mapper},
361
456
  line_color=None,
362
457
  alpha=alpha,
@@ -414,7 +509,6 @@ def plot_samples_2d(
414
509
  size="dynamic",
415
510
  alpha_max=0.8,
416
511
  alpha="inty",
417
- cmap="Turbo256",
418
512
  max_features=50000,
419
513
  width=600,
420
514
  height=600,
@@ -447,7 +541,6 @@ def plot_samples_2d(
447
541
  from bokeh.plotting import figure, show, output_file
448
542
  from bokeh.io.export import export_png
449
543
  from bokeh.models import ColumnDataSource, HoverTool
450
- from bokeh.palettes import Turbo256
451
544
 
452
545
  sample_uids = self._get_sample_uids(samples)
453
546
 
@@ -455,8 +548,14 @@ def plot_samples_2d(
455
548
  self.logger.error("No valid sample_uids provided.")
456
549
  return
457
550
 
458
- colors = Turbo256
459
- color_map = {uid: colors[i * (256 // max(1, len(sample_uids)))] for i, uid in enumerate(sample_uids)}
551
+ # Get sample colors from samples_df
552
+ sample_colors = (
553
+ self.samples_df
554
+ .filter(pl.col("sample_uid").is_in(sample_uids))
555
+ .select(["sample_uid", "sample_color"])
556
+ .to_dict(as_series=False)
557
+ )
558
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
460
559
 
461
560
  p = figure(
462
561
  width=width,
@@ -569,6 +668,7 @@ def plot_samples_2d(
569
668
  "inty": sample_data["inty"].values,
570
669
  "alpha": sample_data["alpha"].values,
571
670
  "sample": np.full(len(sample_data), sample_name, dtype=object),
671
+ "sample_color": np.full(len(sample_data), color_values[uid], dtype=object),
572
672
  },
573
673
  )
574
674
 
@@ -604,6 +704,7 @@ def plot_samples_2d(
604
704
  hover = HoverTool(
605
705
  tooltips=[
606
706
  ("sample", "@sample"),
707
+ ("sample_color", "$color[swatch]:sample_color"),
607
708
  ("rt", "@rt{0.00}"),
608
709
  ("mz", "@mz{0.0000}"),
609
710
  ("intensity", "@inty{0.0e+0}"),
@@ -637,7 +738,6 @@ def plot_bpc(
637
738
  filename: str | None = None,
638
739
  width: int = 1000,
639
740
  height: int = 300,
640
- rt_unit: str = "s",
641
741
  original: bool = False,
642
742
  ):
643
743
  """
@@ -653,7 +753,6 @@ def plot_bpc(
653
753
  from bokeh.plotting import figure, show, output_file
654
754
  from bokeh.models import ColumnDataSource, HoverTool
655
755
  from bokeh.io.export import export_png
656
- from bokeh.palettes import Turbo256
657
756
  from masster.study.helpers import get_bpc
658
757
 
659
758
  sample_uids = self._get_sample_uids(samples)
@@ -664,10 +763,14 @@ def plot_bpc(
664
763
  # Debug: show which sample_uids we will process
665
764
  self.logger.debug(f"plot_bpc: sample_uids={sample_uids}")
666
765
 
667
- colors = Turbo256
668
- n = max(1, len(sample_uids))
669
- step = max(1, 256 // n)
670
- color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
766
+ # Get sample colors from samples_df
767
+ sample_colors = (
768
+ self.samples_df
769
+ .filter(pl.col("sample_uid").is_in(sample_uids))
770
+ .select(["sample_uid", "sample_color"])
771
+ .to_dict(as_series=False)
772
+ )
773
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
671
774
 
672
775
  # If plotting original (uncorrected) RTs, use the requested title.
673
776
  if original:
@@ -675,6 +778,17 @@ def plot_bpc(
675
778
  else:
676
779
  plot_title = title or "Base Peak Chromatograms"
677
780
 
781
+ # Get rt_unit from the first chromatogram, default to "s" if not available
782
+ rt_unit = "s"
783
+ for uid in sample_uids:
784
+ try:
785
+ first_chrom = get_bpc(self, sample=uid, label=None, original=original)
786
+ if hasattr(first_chrom, 'rt_unit'):
787
+ rt_unit = first_chrom.rt_unit
788
+ break
789
+ except Exception:
790
+ continue
791
+
678
792
  p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
679
793
  p.xaxis.axis_label = f"Retention Time ({rt_unit})"
680
794
  p.yaxis.axis_label = "Intensity"
@@ -691,7 +805,7 @@ def plot_bpc(
691
805
 
692
806
  for uid in sample_uids:
693
807
  try:
694
- chrom = get_bpc(self, sample=uid, rt_unit=rt_unit, label=None, original=original)
808
+ chrom = get_bpc(self, sample=uid, label=None, original=original)
695
809
  except Exception as e:
696
810
  # log and skip samples we can't compute BPC for
697
811
  self.logger.debug(f"Skipping sample {uid} for BPC: {e}")
@@ -743,7 +857,7 @@ def plot_bpc(
743
857
  f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
744
858
  )
745
859
 
746
- data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
860
+ data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
747
861
  src = ColumnDataSource(data)
748
862
 
749
863
  r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
@@ -754,7 +868,7 @@ def plot_bpc(
754
868
  self.logger.warning("No BPC curves to plot for the selected samples.")
755
869
  return
756
870
 
757
- hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
871
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
758
872
  p.add_tools(hover)
759
873
 
760
874
  # Only set legend properties if a legend was actually created to avoid Bokeh warnings
@@ -790,7 +904,6 @@ def plot_eic(
790
904
  filename: str | None = None,
791
905
  width: int = 1000,
792
906
  height: int = 300,
793
- rt_unit: str = "s",
794
907
  original: bool = False,
795
908
  ):
796
909
  """
@@ -804,7 +917,6 @@ def plot_eic(
804
917
  from bokeh.plotting import figure, show, output_file
805
918
  from bokeh.models import ColumnDataSource, HoverTool
806
919
  from bokeh.io.export import export_png
807
- from bokeh.palettes import Turbo256
808
920
  from masster.study.helpers import get_eic
809
921
 
810
922
  if mz is None:
@@ -816,13 +928,28 @@ def plot_eic(
816
928
  self.logger.error("No valid sample_uids provided for EIC plotting.")
817
929
  return
818
930
 
819
- colors = Turbo256
820
- n = max(1, len(sample_uids))
821
- step = max(1, 256 // n)
822
- color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
931
+ # Get sample colors from samples_df
932
+ sample_colors = (
933
+ self.samples_df
934
+ .filter(pl.col("sample_uid").is_in(sample_uids))
935
+ .select(["sample_uid", "sample_color"])
936
+ .to_dict(as_series=False)
937
+ )
938
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
823
939
 
824
940
  plot_title = title or f"Extracted Ion Chromatograms (m/z={mz:.4f} ± {mz_tol})"
825
941
 
942
+ # Get rt_unit from the first chromatogram, default to "s" if not available
943
+ rt_unit = "s"
944
+ for uid in sample_uids:
945
+ try:
946
+ first_chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
947
+ if hasattr(first_chrom, 'rt_unit'):
948
+ rt_unit = first_chrom.rt_unit
949
+ break
950
+ except Exception:
951
+ continue
952
+
826
953
  p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
827
954
  p.xaxis.axis_label = f"Retention Time ({rt_unit})"
828
955
  p.yaxis.axis_label = "Intensity"
@@ -839,7 +966,7 @@ def plot_eic(
839
966
 
840
967
  for uid in sample_uids:
841
968
  try:
842
- chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, rt_unit=rt_unit, label=None)
969
+ chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
843
970
  except Exception as e:
844
971
  # log and skip samples we can't compute EIC for
845
972
  self.logger.debug(f"Skipping sample {uid} for EIC: {e}")
@@ -885,7 +1012,7 @@ def plot_eic(
885
1012
 
886
1013
  color = color_map.get(uid, "#000000")
887
1014
 
888
- data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
1015
+ data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
889
1016
  src = ColumnDataSource(data)
890
1017
 
891
1018
  r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
@@ -896,7 +1023,7 @@ def plot_eic(
896
1023
  self.logger.warning("No EIC curves to plot for the selected samples.")
897
1024
  return
898
1025
 
899
- hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
1026
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e0}")], renderers=renderers)
900
1027
  p.add_tools(hover)
901
1028
 
902
1029
  if getattr(p, "legend", None) and len(p.legend) > 0:
@@ -928,7 +1055,6 @@ def plot_rt_correction(
928
1055
  filename: str | None = None,
929
1056
  width: int = 1000,
930
1057
  height: int = 300,
931
- rt_unit: str = "s",
932
1058
  ):
933
1059
  """
934
1060
  Plot RT correction per sample: (rt - rt_original) vs rt overlayed for selected samples.
@@ -937,7 +1063,6 @@ def plot_rt_correction(
937
1063
  """
938
1064
  from bokeh.plotting import figure, show, output_file
939
1065
  from bokeh.models import ColumnDataSource, HoverTool
940
- from bokeh.palettes import Turbo256
941
1066
  import numpy as _np
942
1067
 
943
1068
  # Validate features dataframe
@@ -954,11 +1079,17 @@ def plot_rt_correction(
954
1079
  self.logger.error("No valid sample_uids provided for RT correction plotting.")
955
1080
  return
956
1081
 
957
- # Color mapping like plot_bpc
958
- colors = Turbo256
959
- n = max(1, len(sample_uids))
960
- step = max(1, 256 // n)
961
- color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
1082
+ # Get sample colors from samples_df
1083
+ sample_colors = (
1084
+ self.samples_df
1085
+ .filter(pl.col("sample_uid").is_in(sample_uids))
1086
+ .select(["sample_uid", "sample_color"])
1087
+ .to_dict(as_series=False)
1088
+ )
1089
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
1090
+
1091
+ # For RT correction plots, default to "s" since we're working with features_df directly
1092
+ rt_unit = "s"
962
1093
 
963
1094
  p = figure(width=width, height=height, title=title or "RT correction", tools="pan,wheel_zoom,box_zoom,reset,save")
964
1095
  p.xaxis.axis_label = f"Retention Time ({rt_unit})"
@@ -1026,7 +1157,7 @@ def plot_rt_correction(
1026
1157
 
1027
1158
  color = color_map.get(uid, "#000000")
1028
1159
 
1029
- data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt)}
1160
+ data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
1030
1161
  src = ColumnDataSource(data)
1031
1162
 
1032
1163
  r_line = p.line("rt", "delta", source=src, line_width=1, color=color)
@@ -1037,7 +1168,7 @@ def plot_rt_correction(
1037
1168
  self.logger.warning("No RT correction curves to plot for the selected samples.")
1038
1169
  return
1039
1170
 
1040
- hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
1171
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
1041
1172
  p.add_tools(hover)
1042
1173
 
1043
1174
  # Only set legend properties if a legend was actually created to avoid Bokeh warnings
@@ -1083,15 +1214,17 @@ def plot_chrom(
1083
1214
  self.logger.error("No chromatogram data found.")
1084
1215
  return
1085
1216
 
1086
- # Local import for color palette
1087
- from bokeh.palettes import Turbo256
1088
-
1089
- # Assign a fixed color to each sample/column
1217
+ # Get sample colors for alignment plots
1218
+ # Need to map sample names to colors since chromatogram data uses sample names as columns
1090
1219
  sample_names = [col for col in chroms.columns if col not in ["consensus_uid"]]
1091
1220
  if not sample_names:
1092
1221
  self.logger.error("No sample names found in chromatogram data.")
1093
1222
  return
1094
- color_map = {sample: Turbo256[i * (256 // max(1, len(sample_names)))] for i, sample in enumerate(sample_names)}
1223
+
1224
+ # Create color mapping by getting sample_color for each sample_name
1225
+ samples_info = self.samples_df.select(["sample_name", "sample_color"]).to_dict(as_series=False)
1226
+ sample_name_to_color = dict(zip(samples_info["sample_name"], samples_info["sample_color"]))
1227
+ color_map = {name: sample_name_to_color.get(name, "#1f77b4") for name in sample_names} # fallback to blue
1095
1228
 
1096
1229
  plots = []
1097
1230
  self.logger.info(f"Plotting {chroms.shape[0]} chromatograms...")
@@ -1461,8 +1594,8 @@ def plot_consensus_stats(
1461
1594
  def plot_pca(
1462
1595
  self,
1463
1596
  filename=None,
1464
- width=400,
1465
- height=400,
1597
+ width=500,
1598
+ height=450,
1466
1599
  alpha=0.8,
1467
1600
  markersize=6,
1468
1601
  n_components=2,
@@ -1484,7 +1617,7 @@ def plot_pca(
1484
1617
  """
1485
1618
  from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
1486
1619
  from bokeh.plotting import figure, show, output_file
1487
- from bokeh.palettes import Category20, viridis, Turbo256
1620
+ from bokeh.palettes import Category20, viridis
1488
1621
  from bokeh.transform import factor_cmap
1489
1622
  from sklearn.decomposition import PCA
1490
1623
  from sklearn.preprocessing import StandardScaler
@@ -1507,7 +1640,7 @@ def plot_pca(
1507
1640
  self.logger.error("No samples dataframe available.")
1508
1641
  return
1509
1642
 
1510
- self.logger.info(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
1643
+ self.logger.debug(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
1511
1644
 
1512
1645
  # Convert consensus matrix to numpy if it's not already
1513
1646
  if hasattr(consensus_matrix, "values"):
@@ -1534,7 +1667,7 @@ def plot_pca(
1534
1667
  # Get explained variance ratios
1535
1668
  explained_var = pca.explained_variance_ratio_
1536
1669
 
1537
- self.logger.info(f"PCA explained variance ratios: {explained_var}")
1670
+ self.logger.debug(f"PCA explained variance ratios: {explained_var}")
1538
1671
 
1539
1672
  # Convert samples_df to pandas for easier manipulation
1540
1673
  samples_pd = samples_df.to_pandas()
@@ -1619,15 +1752,31 @@ def plot_pca(
1619
1752
  legend_field=color_by,
1620
1753
  )
1621
1754
  else:
1622
- # If no color_by provided, color points by sample similar to plot_samples_2d
1755
+ # If no color_by provided, use sample_color column from samples_df
1623
1756
  if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
1624
1757
  # Choose the identifier to map colors by
1625
1758
  id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
1626
- sample_ids = list(pd.unique(pca_df[id_col]))
1627
- colors = Turbo256
1628
- color_map = {uid: colors[i * (256 // max(1, len(sample_ids)))] for i, uid in enumerate(sample_ids)}
1759
+
1760
+ # Get colors from samples_df based on the identifier
1761
+ if id_col == "sample_uid":
1762
+ sample_colors = (
1763
+ self.samples_df
1764
+ .filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
1765
+ .select(["sample_uid", "sample_color"])
1766
+ .to_dict(as_series=False)
1767
+ )
1768
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
1769
+ else: # sample_name
1770
+ sample_colors = (
1771
+ self.samples_df
1772
+ .filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
1773
+ .select(["sample_name", "sample_color"])
1774
+ .to_dict(as_series=False)
1775
+ )
1776
+ color_map = dict(zip(sample_colors["sample_name"], sample_colors["sample_color"]))
1777
+
1629
1778
  # Map colors into dataframe
1630
- pca_df["color"] = [color_map[x] for x in pca_df[id_col]]
1779
+ pca_df["color"] = [color_map.get(x, "#1f77b4") for x in pca_df[id_col]] # fallback to blue
1631
1780
  # Update the ColumnDataSource with new color column
1632
1781
  source = ColumnDataSource(pca_df)
1633
1782
  scatter = p.scatter(
@@ -1652,14 +1801,17 @@ def plot_pca(
1652
1801
  tooltip_list = []
1653
1802
 
1654
1803
  # Columns to exclude from tooltips (file paths and internal/plot fields)
1655
- excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2"}
1804
+ excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2", "size"}
1656
1805
 
1657
1806
  # Add all sample dataframe columns to tooltips, skipping excluded ones
1658
1807
  for col in samples_pd.columns:
1659
1808
  if col in excluded_cols:
1660
1809
  continue
1661
1810
  if col in pca_df.columns:
1662
- if pca_df[col].dtype in ["float64", "float32"]:
1811
+ if col == "sample_color":
1812
+ # Display sample_color as a colored swatch
1813
+ tooltip_list.append(('color', "$color[swatch]:sample_color"))
1814
+ elif pca_df[col].dtype in ["float64", "float32"]:
1663
1815
  tooltip_list.append((col, f"@{col}{{0.00}}"))
1664
1816
  else:
1665
1817
  tooltip_list.append((col, f"@{col}"))
@@ -1691,7 +1843,6 @@ def plot_tic(
1691
1843
  filename: str | None = None,
1692
1844
  width: int = 1000,
1693
1845
  height: int = 300,
1694
- rt_unit: str = "s",
1695
1846
  original: bool = False,
1696
1847
  ):
1697
1848
  """
@@ -1703,7 +1854,6 @@ def plot_tic(
1703
1854
  from bokeh.plotting import figure, show, output_file
1704
1855
  from bokeh.models import ColumnDataSource, HoverTool
1705
1856
  from bokeh.io.export import export_png
1706
- from bokeh.palettes import Turbo256
1707
1857
  from masster.study.helpers import get_tic
1708
1858
 
1709
1859
  sample_uids = self._get_sample_uids(samples)
@@ -1711,13 +1861,28 @@ def plot_tic(
1711
1861
  self.logger.error("No valid sample_uids provided for TIC plotting.")
1712
1862
  return
1713
1863
 
1714
- colors = Turbo256
1715
- n = max(1, len(sample_uids))
1716
- step = max(1, 256 // n)
1717
- color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
1864
+ # Get sample colors from samples_df
1865
+ sample_colors = (
1866
+ self.samples_df
1867
+ .filter(pl.col("sample_uid").is_in(sample_uids))
1868
+ .select(["sample_uid", "sample_color"])
1869
+ .to_dict(as_series=False)
1870
+ )
1871
+ color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
1718
1872
 
1719
1873
  plot_title = title or "Total Ion Chromatograms"
1720
1874
 
1875
+ # Get rt_unit from the first chromatogram, default to "s" if not available
1876
+ rt_unit = "s"
1877
+ for uid in sample_uids:
1878
+ try:
1879
+ first_chrom = get_tic(self, sample=uid, label=None)
1880
+ if hasattr(first_chrom, 'rt_unit'):
1881
+ rt_unit = first_chrom.rt_unit
1882
+ break
1883
+ except Exception:
1884
+ continue
1885
+
1721
1886
  p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
1722
1887
  p.xaxis.axis_label = f"Retention Time ({rt_unit})"
1723
1888
  p.yaxis.axis_label = "Intensity"
@@ -1778,7 +1943,7 @@ def plot_tic(
1778
1943
 
1779
1944
  color = color_map.get(uid, "#000000")
1780
1945
 
1781
- data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
1946
+ data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
1782
1947
  src = ColumnDataSource(data)
1783
1948
 
1784
1949
  r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
@@ -1789,7 +1954,7 @@ def plot_tic(
1789
1954
  self.logger.warning("No TIC curves to plot for the selected samples.")
1790
1955
  return
1791
1956
 
1792
- hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
1957
+ hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
1793
1958
  p.add_tools(hover)
1794
1959
 
1795
1960
  # Only set legend properties if a legend was actually created to avoid Bokeh warnings