masster 0.3.17__py3-none-any.whl → 0.3.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/plot.py CHANGED
@@ -17,7 +17,18 @@ hv.extension("bokeh")
17
17
  from bokeh.layouts import row as bokeh_row
18
18
 
19
19
 
20
- def plot_alignment(self, maps: bool = True, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
20
+ <<<<<<< Updated upstream
21
+ def plot_alignment(
22
+ self,
23
+ maps: bool = True,
24
+ filename: str | None = None,
25
+ width: int = 450,
26
+ height: int = 450,
27
+ markersize: int = 3,
28
+ ):
29
+ =======
30
+ def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | None = None, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
31
+ >>>>>>> Stashed changes
21
32
  """Visualize retention time alignment using two synchronized Bokeh scatter plots.
22
33
 
23
34
  - When ``maps=True`` the function reads ``self.features_maps`` (list of FeatureMap)
@@ -27,6 +38,11 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
27
38
 
28
39
  Parameters
29
40
  - maps: whether to use feature maps (default True).
41
+ - samples: Sample selection parameter, interpreted like in plot_samples_2d:
42
+ - None: show all samples
43
+ - int: show a random subset of N samples
44
+ - list of ints: show samples with these sample_uids
45
+ - list of strings: show samples with these sample_names
30
46
  - filename: optional HTML file path to save the plot.
31
47
  - width/height: pixel size of each subplot.
32
48
  - markersize: base marker size.
@@ -54,6 +70,32 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
54
70
  self.logger.error("No feature maps available for plotting.")
55
71
  return
56
72
 
73
+ # Get sample_uids to limit which samples to show
74
+ sample_uids_to_show = self._get_sample_uids(samples)
75
+
76
+ # Filter feature maps based on sample selection
77
+ if sample_uids_to_show is not None:
78
+ # Get sample indices for the selected sample_uids
79
+ selected_indices = []
80
+ if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
81
+ samples_info = self.samples_df.to_pandas()
82
+ for idx, row in samples_info.iterrows():
83
+ if row.get('sample_uid') in sample_uids_to_show:
84
+ selected_indices.append(idx)
85
+ else:
86
+ # If no samples_df, just limit to the first N samples
87
+ if isinstance(samples, int):
88
+ selected_indices = list(range(min(samples, len(fmaps))))
89
+ else:
90
+ selected_indices = list(range(len(fmaps)))
91
+
92
+ # Filter feature maps to only include selected indices
93
+ fmaps = [fmaps[i] for i in selected_indices if i < len(fmaps)]
94
+
95
+ if not fmaps:
96
+ self.logger.error("No feature maps match the selected samples.")
97
+ return
98
+
57
99
  # Reference (first) sample: use current RT for both before and after
58
100
  ref = fmaps[0]
59
101
  ref_rt = [f.getRT() for f in ref]
@@ -62,17 +104,39 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
62
104
  max_ref_inty = max(ref_inty) if ref_inty else 1
63
105
 
64
106
  # sample metadata
65
- if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
107
+ if hasattr(self, "samples_df") and self.samples_df is not None and not self.samples_df.is_empty():
66
108
  samples_info = self.samples_df.to_pandas()
67
- ref_sample_uid = samples_info.iloc[0]['sample_uid'] if 'sample_uid' in samples_info.columns else 'Reference_UID'
68
- ref_sample_name = samples_info.iloc[0]['sample_name'] if 'sample_name' in samples_info.columns else 'Reference'
109
+ ref_sample_uid = (
110
+ samples_info.iloc[0]["sample_uid"] if "sample_uid" in samples_info.columns else "Reference_UID"
111
+ )
112
+ ref_sample_name = (
113
+ samples_info.iloc[0]["sample_name"] if "sample_name" in samples_info.columns else "Reference"
114
+ )
69
115
  else:
70
- ref_sample_uid = 'Reference_UID'
71
- ref_sample_name = 'Reference'
116
+ ref_sample_uid = "Reference_UID"
117
+ ref_sample_name = "Reference"
72
118
 
73
119
  for rt, mz, inty in zip(ref_rt, ref_mz, ref_inty):
74
- before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
75
- after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
120
+ before_data.append({
121
+ "rt": rt,
122
+ "mz": mz,
123
+ "inty": inty,
124
+ "alpha": inty / max_ref_inty,
125
+ "sample_idx": 0,
126
+ "sample_name": ref_sample_name,
127
+ "sample_uid": ref_sample_uid,
128
+ "size": markersize + 2,
129
+ })
130
+ after_data.append({
131
+ "rt": rt,
132
+ "mz": mz,
133
+ "inty": inty,
134
+ "alpha": inty / max_ref_inty,
135
+ "sample_idx": 0,
136
+ "sample_name": ref_sample_name,
137
+ "sample_uid": ref_sample_uid,
138
+ "size": markersize + 2,
139
+ })
76
140
 
77
141
  # Remaining samples
78
142
  for sample_idx, fm in enumerate(fmaps[1:], start=1):
@@ -83,7 +147,7 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
83
147
 
84
148
  for f in fm:
85
149
  try:
86
- orig = f.getMetaValue('original_RT')
150
+ orig = f.getMetaValue("original_RT")
87
151
  except Exception:
88
152
  orig = None
89
153
 
@@ -101,23 +165,41 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
101
165
 
102
166
  max_inty = max(inty_vals)
103
167
 
104
- if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
168
+ if hasattr(self, "samples_df") and self.samples_df is not None and not self.samples_df.is_empty():
105
169
  samples_info = self.samples_df.to_pandas()
106
170
  if sample_idx < len(samples_info):
107
- sample_name = samples_info.iloc[sample_idx].get('sample_name', f'Sample {sample_idx}')
108
- sample_uid = samples_info.iloc[sample_idx].get('sample_uid', f'Sample_{sample_idx}_UID')
171
+ sample_name = samples_info.iloc[sample_idx].get("sample_name", f"Sample {sample_idx}")
172
+ sample_uid = samples_info.iloc[sample_idx].get("sample_uid", f"Sample_{sample_idx}_UID")
109
173
  else:
110
- sample_name = f'Sample {sample_idx}'
111
- sample_uid = f'Sample_{sample_idx}_UID'
174
+ sample_name = f"Sample {sample_idx}"
175
+ sample_uid = f"Sample_{sample_idx}_UID"
112
176
  else:
113
- sample_name = f'Sample {sample_idx}'
114
- sample_uid = f'Sample_{sample_idx}_UID'
177
+ sample_name = f"Sample {sample_idx}"
178
+ sample_uid = f"Sample_{sample_idx}_UID"
115
179
 
116
180
  for rt, mz, inty in zip(original_rt, mz_vals, inty_vals):
117
- before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
181
+ before_data.append({
182
+ "rt": rt,
183
+ "mz": mz,
184
+ "inty": inty,
185
+ "alpha": inty / max_inty,
186
+ "sample_idx": sample_idx,
187
+ "sample_name": sample_name,
188
+ "sample_uid": sample_uid,
189
+ "size": markersize,
190
+ })
118
191
 
119
192
  for rt, mz, inty in zip(aligned_rt, mz_vals, inty_vals):
120
- after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
193
+ after_data.append({
194
+ "rt": rt,
195
+ "mz": mz,
196
+ "inty": inty,
197
+ "alpha": inty / max_inty,
198
+ "sample_idx": sample_idx,
199
+ "sample_name": sample_name,
200
+ "sample_uid": sample_uid,
201
+ "size": markersize,
202
+ })
121
203
 
122
204
  else:
123
205
  # Use features_df
@@ -125,66 +207,98 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
125
207
  self.logger.error("No features_df found. Load features first.")
126
208
  return
127
209
 
128
- required_cols = ['rt', 'mz', 'inty']
210
+ required_cols = ["rt", "mz", "inty"]
129
211
  missing = [c for c in required_cols if c not in self.features_df.columns]
130
212
  if missing:
131
213
  self.logger.error(f"Missing required columns in features_df: {missing}")
132
214
  return
133
215
 
134
- if 'rt_original' not in self.features_df.columns:
216
+ if "rt_original" not in self.features_df.columns:
135
217
  self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
136
218
  return
137
219
 
138
220
  # Use Polars instead of pandas
139
221
  features_df = self.features_df
140
222
 
141
- sample_col = 'sample_uid' if 'sample_uid' in features_df.columns else 'sample_name'
223
+ sample_col = "sample_uid" if "sample_uid" in features_df.columns else "sample_name"
142
224
  if sample_col not in features_df.columns:
143
225
  self.logger.error("No sample identifier column found in features_df.")
144
226
  return
145
227
 
228
+ # Get sample_uids to limit which samples to show
229
+ sample_uids_to_show = self._get_sample_uids(samples)
230
+
231
+ # Filter features_df based on sample selection if specified
232
+ if sample_uids_to_show is not None:
233
+ if sample_col == 'sample_uid':
234
+ features_df = features_df.filter(pl.col('sample_uid').is_in(sample_uids_to_show))
235
+ else:
236
+ # Need to convert sample names to sample_uids if using sample_name column
237
+ if 'sample_uid' in features_df.columns:
238
+ # Filter by sample_uid even though we're using sample_name as the primary column
239
+ features_df = features_df.filter(pl.col('sample_uid').is_in(sample_uids_to_show))
240
+ else:
241
+ # Convert sample_uids to sample_names and filter
242
+ sample_names_to_show = []
243
+ if hasattr(self, 'samples_df') and self.samples_df is not None:
244
+ for uid in sample_uids_to_show:
245
+ matching_rows = self.samples_df.filter(pl.col("sample_uid") == uid)
246
+ if not matching_rows.is_empty():
247
+ sample_names_to_show.append(matching_rows.row(0, named=True)["sample_name"])
248
+ features_df = features_df.filter(pl.col('sample_name').is_in(sample_names_to_show))
249
+
146
250
  # Get unique samples using Polars
147
251
  samples = features_df.select(pl.col(sample_col)).unique().to_series().to_list()
148
252
 
149
253
  for sample_idx, sample in enumerate(samples):
150
254
  # Filter sample data using Polars
151
255
  sample_data = features_df.filter(pl.col(sample_col) == sample)
152
-
256
+
153
257
  # Calculate max intensity using Polars
154
- max_inty = sample_data.select(pl.col('inty').max()).item()
258
+ max_inty = sample_data.select(pl.col("inty").max()).item()
155
259
  max_inty = max_inty if max_inty and max_inty > 0 else 1
156
-
260
+
157
261
  sample_name = str(sample)
158
262
  # Get sample_uid - if sample_col is 'sample_uid', use sample directly
159
- if sample_col == 'sample_uid':
263
+ if sample_col == "sample_uid":
160
264
  sample_uid = sample
161
265
  else:
162
266
  # Try to get sample_uid from the first row if it exists
163
- if 'sample_uid' in sample_data.columns:
164
- sample_uid = sample_data.select(pl.col('sample_uid')).item()
267
+ if "sample_uid" in sample_data.columns:
268
+ sample_uid = sample_data.select(pl.col("sample_uid")).item()
165
269
  else:
166
270
  sample_uid = sample
167
271
 
168
272
  # Convert to dict for iteration - more efficient than row-by-row processing
169
- sample_dict = sample_data.select(['rt_original', 'rt', 'mz', 'inty']).to_dicts()
170
-
273
+ sample_dict = sample_data.select(["rt_original", "rt", "mz", "inty"]).to_dicts()
274
+
171
275
  for row_dict in sample_dict:
172
- rt_original = row_dict['rt_original']
173
- rt_current = row_dict['rt']
174
- mz = row_dict['mz']
175
- inty = row_dict['inty']
276
+ rt_original = row_dict["rt_original"]
277
+ rt_current = row_dict["rt"]
278
+ mz = row_dict["mz"]
279
+ inty = row_dict["inty"]
176
280
  alpha = inty / max_inty
177
281
  size = markersize + 2 if sample_idx == 0 else markersize
178
-
282
+
179
283
  before_data.append({
180
- 'rt': rt_original, 'mz': mz, 'inty': inty, 'alpha': alpha,
181
- 'sample_idx': sample_idx, 'sample_name': sample_name,
182
- 'sample_uid': sample_uid, 'size': size
284
+ "rt": rt_original,
285
+ "mz": mz,
286
+ "inty": inty,
287
+ "alpha": alpha,
288
+ "sample_idx": sample_idx,
289
+ "sample_name": sample_name,
290
+ "sample_uid": sample_uid,
291
+ "size": size,
183
292
  })
184
293
  after_data.append({
185
- 'rt': rt_current, 'mz': mz, 'inty': inty, 'alpha': alpha,
186
- 'sample_idx': sample_idx, 'sample_name': sample_name,
187
- 'sample_uid': sample_uid, 'size': size
294
+ "rt": rt_current,
295
+ "mz": mz,
296
+ "inty": inty,
297
+ "alpha": alpha,
298
+ "sample_idx": sample_idx,
299
+ "sample_name": sample_name,
300
+ "sample_uid": sample_uid,
301
+ "size": size,
188
302
  })
189
303
 
190
304
  # Get sample colors from samples_df using sample indices
@@ -193,17 +307,16 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
193
307
  # Create mapping from sample_idx to sample_uid more efficiently
194
308
  sample_idx_to_uid = {}
195
309
  for item in before_data:
196
- if item['sample_idx'] not in sample_idx_to_uid:
197
- sample_idx_to_uid[item['sample_idx']] = item['sample_uid']
310
+ if item["sample_idx"] not in sample_idx_to_uid:
311
+ sample_idx_to_uid[item["sample_idx"]] = item["sample_uid"]
198
312
  else:
199
313
  sample_idx_to_uid = {}
200
-
314
+
201
315
  # Get colors from samples_df
202
316
  sample_uids_list = list(sample_idx_to_uid.values())
203
- if sample_uids_list and hasattr(self, 'samples_df') and self.samples_df is not None:
317
+ if sample_uids_list and hasattr(self, "samples_df") and self.samples_df is not None:
204
318
  sample_colors = (
205
- self.samples_df
206
- .filter(pl.col("sample_uid").is_in(sample_uids_list))
319
+ self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids_list))
207
320
  .select(["sample_uid", "sample_color"])
208
321
  .to_dict(as_series=False)
209
322
  )
@@ -219,68 +332,106 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
219
332
  # Add sample_color to data dictionaries before creating DataFrames
220
333
  if before_data:
221
334
  for item in before_data:
222
- item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
223
-
335
+ item["sample_color"] = color_map.get(item["sample_idx"], "#1f77b4")
336
+
224
337
  if after_data:
225
338
  for item in after_data:
226
- item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
227
-
339
+ item["sample_color"] = color_map.get(item["sample_idx"], "#1f77b4")
340
+
228
341
  # Now create DataFrames with the sample_color already included
229
342
  before_df = pd.DataFrame(before_data) if before_data else pd.DataFrame()
230
343
  after_df = pd.DataFrame(after_data) if after_data else pd.DataFrame()
231
344
 
232
345
  # Create Bokeh figures
233
- p1 = figure(width=width, height=height, title='Original RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save')
346
+ p1 = figure(
347
+ width=width,
348
+ height=height,
349
+ title="Original RT",
350
+ x_axis_label="Retention Time (s)",
351
+ y_axis_label="m/z",
352
+ tools="pan,wheel_zoom,box_zoom,reset,save",
353
+ )
234
354
  p1.outline_line_color = None
235
- p1.background_fill_color = 'white'
236
- p1.border_fill_color = 'white'
355
+ p1.background_fill_color = "white"
356
+ p1.border_fill_color = "white"
237
357
  p1.min_border = 0
238
358
 
239
- p2 = figure(width=width, height=height, title='Current RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save', x_range=p1.x_range, y_range=p1.y_range)
359
+ p2 = figure(
360
+ width=width,
361
+ height=height,
362
+ title="Current RT",
363
+ x_axis_label="Retention Time (s)",
364
+ y_axis_label="m/z",
365
+ tools="pan,wheel_zoom,box_zoom,reset,save",
366
+ x_range=p1.x_range,
367
+ y_range=p1.y_range,
368
+ )
240
369
  p2.outline_line_color = None
241
- p2.background_fill_color = 'white'
242
- p2.border_fill_color = 'white'
370
+ p2.background_fill_color = "white"
371
+ p2.border_fill_color = "white"
243
372
  p2.min_border = 0
244
-
373
+
245
374
  # Get unique sample indices for iteration
246
- unique_samples = sorted(list(set(item['sample_idx'] for item in before_data))) if before_data else []
375
+ unique_samples = sorted(list({item["sample_idx"] for item in before_data})) if before_data else []
247
376
 
248
377
  renderers_before = []
249
378
  renderers_after = []
250
379
 
251
380
  for sample_idx in unique_samples:
252
- sb = before_df[before_df['sample_idx'] == sample_idx]
253
- sa = after_df[after_df['sample_idx'] == sample_idx]
254
- color = color_map.get(sample_idx, '#000000')
381
+ sb = before_df[before_df["sample_idx"] == sample_idx]
382
+ sa = after_df[after_df["sample_idx"] == sample_idx]
383
+ color = color_map.get(sample_idx, "#000000")
255
384
 
256
385
  if not sb.empty:
257
386
  src = ColumnDataSource(sb)
258
- r = p1.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
387
+ r = p1.scatter("rt", "mz", size="size", color=color, alpha="alpha", source=src)
259
388
  renderers_before.append(r)
260
389
 
261
390
  if not sa.empty:
262
391
  src = ColumnDataSource(sa)
263
- r = p2.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
392
+ r = p2.scatter("rt", "mz", size="size", color=color, alpha="alpha", source=src)
264
393
  renderers_after.append(r)
265
394
 
266
395
  # Add hover tools
267
- hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_before)
396
+ hover1 = HoverTool(
397
+ tooltips=[
398
+ ("Sample UID", "@sample_uid"),
399
+ ("Sample Name", "@sample_name"),
400
+ ("Sample Color", "$color[swatch]:sample_color"),
401
+ ("RT", "@rt{0.00}"),
402
+ ("m/z", "@mz{0.0000}"),
403
+ ("Intensity", "@inty{0.0e0}"),
404
+ ],
405
+ renderers=renderers_before,
406
+ )
268
407
  p1.add_tools(hover1)
269
408
 
270
- hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_after)
409
+ hover2 = HoverTool(
410
+ tooltips=[
411
+ ("Sample UID", "@sample_uid"),
412
+ ("Sample Name", "@sample_name"),
413
+ ("Sample Color", "$color[swatch]:sample_color"),
414
+ ("RT", "@rt{0.00}"),
415
+ ("m/z", "@mz{0.0000}"),
416
+ ("Intensity", "@inty{0.0e0}"),
417
+ ],
418
+ renderers=renderers_after,
419
+ )
271
420
  p2.add_tools(hover2)
272
421
 
273
422
  # Create layout with both plots side by side
274
423
  # Use the aliased bokeh_row and set sizing_mode, width and height to avoid validation warnings.
275
- layout = bokeh_row(p1, p2, sizing_mode='fixed', width=width, height=height)
424
+ layout = bokeh_row(p1, p2, sizing_mode="fixed", width=width, height=height)
276
425
 
277
426
  # Output and show
278
427
  if filename:
279
428
  from bokeh.plotting import output_file, show
429
+
280
430
  output_file(filename)
281
431
  show(layout)
282
432
  else:
283
433
  from bokeh.plotting import show
434
+
284
435
  show(layout)
285
436
 
286
437
  return layout
@@ -392,14 +543,14 @@ def plot_consensus_2d(
392
543
  except ImportError:
393
544
  from bokeh.models.annotations import ColorBar
394
545
  from bokeh.palettes import viridis
395
-
546
+
396
547
  # Import cmap for colormap handling
397
548
  from cmap import Colormap
398
549
 
399
550
  # Convert Polars DataFrame to pandas for Bokeh compatibility
400
551
  data_pd = data.to_pandas()
401
552
  source = ColumnDataSource(data_pd)
402
-
553
+
403
554
  # Handle colormap using cmap.Colormap
404
555
  try:
405
556
  # Get colormap palette using cmap
@@ -408,6 +559,7 @@ def plot_consensus_2d(
408
559
  # Generate 256 colors and convert to hex
409
560
  import numpy as np
410
561
  import matplotlib.colors as mcolors
562
+
411
563
  colors = colormap(np.linspace(0, 1, 256))
412
564
  palette = [mcolors.rgb2hex(color) for color in colors]
413
565
  else:
@@ -420,19 +572,21 @@ def plot_consensus_2d(
420
572
  # Fall back to generating colors manually
421
573
  import numpy as np
422
574
  import matplotlib.colors as mcolors
575
+
423
576
  colors = colormap(np.linspace(0, 1, 256))
424
577
  palette = [mcolors.rgb2hex(color) for color in colors]
425
578
  except AttributeError:
426
579
  # Fall back to generating colors manually
427
580
  import numpy as np
428
581
  import matplotlib.colors as mcolors
582
+
429
583
  colors = colormap(np.linspace(0, 1, 256))
430
584
  palette = [mcolors.rgb2hex(color) for color in colors]
431
585
  except (AttributeError, ValueError, TypeError) as e:
432
586
  # Fallback to viridis if cmap interpretation fails
433
587
  self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
434
588
  palette = viridis(256)
435
-
589
+
436
590
  color_mapper = LinearColorMapper(
437
591
  palette=palette,
438
592
  low=data[colorby].min(),
@@ -550,8 +704,7 @@ def plot_samples_2d(
550
704
 
551
705
  # Get sample colors from samples_df
552
706
  sample_colors = (
553
- self.samples_df
554
- .filter(pl.col("sample_uid").is_in(sample_uids))
707
+ self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
555
708
  .select(["sample_uid", "sample_color"])
556
709
  .to_dict(as_series=False)
557
710
  )
@@ -741,7 +894,7 @@ def plot_bpc(
741
894
  original: bool = False,
742
895
  ):
743
896
  """
744
- Plot Base Peak Chromatograms (BPC) for selected samples overlayed using Bokeh.
897
+ Plot Base Peak Chromatograms (BPC) for selected samples overlaid using Bokeh.
745
898
 
746
899
  This collects per-sample BPCs via `get_bpc(self, sample=uid)` and overlays them.
747
900
  Colors are mapped per-sample using the same Turbo256 palette as `plot_samples_2d`.
@@ -765,8 +918,7 @@ def plot_bpc(
765
918
 
766
919
  # Get sample colors from samples_df
767
920
  sample_colors = (
768
- self.samples_df
769
- .filter(pl.col("sample_uid").is_in(sample_uids))
921
+ self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
770
922
  .select(["sample_uid", "sample_color"])
771
923
  .to_dict(as_series=False)
772
924
  )
@@ -783,7 +935,7 @@ def plot_bpc(
783
935
  for uid in sample_uids:
784
936
  try:
785
937
  first_chrom = get_bpc(self, sample=uid, label=None, original=original)
786
- if hasattr(first_chrom, 'rt_unit'):
938
+ if hasattr(first_chrom, "rt_unit"):
787
939
  rt_unit = first_chrom.rt_unit
788
940
  break
789
941
  except Exception:
@@ -814,7 +966,11 @@ def plot_bpc(
814
966
  # extract arrays
815
967
  try:
816
968
  # prefer Chromatogram API
817
- chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
969
+ chrom_dict = (
970
+ chrom.to_dict()
971
+ if hasattr(chrom, "to_dict")
972
+ else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
973
+ )
818
974
  rt = chrom_dict.get("rt")
819
975
  inty = chrom_dict.get("inty")
820
976
  except Exception:
@@ -854,7 +1010,7 @@ def plot_bpc(
854
1010
 
855
1011
  # Debug: log sample processing details
856
1012
  self.logger.debug(
857
- f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
1013
+ f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}",
858
1014
  )
859
1015
 
860
1016
  data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
@@ -868,7 +1024,15 @@ def plot_bpc(
868
1024
  self.logger.warning("No BPC curves to plot for the selected samples.")
869
1025
  return
870
1026
 
871
- hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
1027
+ hover = HoverTool(
1028
+ tooltips=[
1029
+ ("sample", "@sample"),
1030
+ ("sample_color", "$color[swatch]:sample_color"),
1031
+ ("rt", "@rt{0.00}"),
1032
+ ("inty", "@inty{0.00e0}"),
1033
+ ],
1034
+ renderers=renderers,
1035
+ )
872
1036
  p.add_tools(hover)
873
1037
 
874
1038
  # Only set legend properties if a legend was actually created to avoid Bokeh warnings
@@ -937,8 +1101,7 @@ def plot_eic(
937
1101
 
938
1102
  # Get sample colors from samples_df
939
1103
  sample_colors = (
940
- self.samples_df
941
- .filter(pl.col("sample_uid").is_in(sample_uids))
1104
+ self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
942
1105
  .select(["sample_uid", "sample_color"])
943
1106
  .to_dict(as_series=False)
944
1107
  )
@@ -951,7 +1114,7 @@ def plot_eic(
951
1114
  for uid in sample_uids:
952
1115
  try:
953
1116
  first_chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
954
- if hasattr(first_chrom, 'rt_unit'):
1117
+ if hasattr(first_chrom, "rt_unit"):
955
1118
  rt_unit = first_chrom.rt_unit
956
1119
  break
957
1120
  except Exception:
@@ -982,7 +1145,11 @@ def plot_eic(
982
1145
  # extract arrays
983
1146
  try:
984
1147
  # prefer Chromatogram API
985
- chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
1148
+ chrom_dict = (
1149
+ chrom.to_dict()
1150
+ if hasattr(chrom, "to_dict")
1151
+ else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
1152
+ )
986
1153
  rt = chrom_dict.get("rt")
987
1154
  inty = chrom_dict.get("inty")
988
1155
  except Exception:
@@ -1030,7 +1197,15 @@ def plot_eic(
1030
1197
  self.logger.warning("No EIC curves to plot for the selected samples.")
1031
1198
  return
1032
1199
 
1033
- hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e0}")], renderers=renderers)
1200
+ hover = HoverTool(
1201
+ tooltips=[
1202
+ ("sample", "@sample"),
1203
+ ("sample_color", "$color[swatch]:sample_color"),
1204
+ ("rt", "@rt{0.00}"),
1205
+ ("inty", "@inty{0.0e0}"),
1206
+ ],
1207
+ renderers=renderers,
1208
+ )
1034
1209
  p.add_tools(hover)
1035
1210
 
1036
1211
  if getattr(p, "legend", None) and len(p.legend) > 0:
@@ -1064,7 +1239,7 @@ def plot_rt_correction(
1064
1239
  height: int = 300,
1065
1240
  ):
1066
1241
  """
1067
- Plot RT correction per sample: (rt - rt_original) vs rt overlayed for selected samples.
1242
+ Plot RT correction per sample: (rt - rt_original) vs rt overlaid for selected samples.
1068
1243
 
1069
1244
  This uses the same color mapping as `plot_bpc` so curves for the same samples match.
1070
1245
  """
@@ -1088,8 +1263,7 @@ def plot_rt_correction(
1088
1263
 
1089
1264
  # Get sample colors from samples_df
1090
1265
  sample_colors = (
1091
- self.samples_df
1092
- .filter(pl.col("sample_uid").is_in(sample_uids))
1266
+ self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
1093
1267
  .select(["sample_uid", "sample_color"])
1094
1268
  .to_dict(as_series=False)
1095
1269
  )
@@ -1175,7 +1349,15 @@ def plot_rt_correction(
1175
1349
  self.logger.warning("No RT correction curves to plot for the selected samples.")
1176
1350
  return
1177
1351
 
1178
- hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
1352
+ hover = HoverTool(
1353
+ tooltips=[
1354
+ ("sample", "@sample"),
1355
+ ("sample_color", "$color[swatch]:sample_color"),
1356
+ ("rt", "@rt{0.00}"),
1357
+ ("rt - rt_original", "@delta{0.00}"),
1358
+ ],
1359
+ renderers=renderers,
1360
+ )
1179
1361
  p.add_tools(hover)
1180
1362
 
1181
1363
  # Only set legend properties if a legend was actually created to avoid Bokeh warnings
@@ -1227,7 +1409,7 @@ def plot_chrom(
1227
1409
  if not sample_names:
1228
1410
  self.logger.error("No sample names found in chromatogram data.")
1229
1411
  return
1230
-
1412
+
1231
1413
  # Create color mapping by getting sample_color for each sample_name
1232
1414
  samples_info = self.samples_df.select(["sample_name", "sample_color"]).to_dict(as_series=False)
1233
1415
  sample_name_to_color = dict(zip(samples_info["sample_name"], samples_info["sample_color"]))
@@ -1649,11 +1831,19 @@ def plot_pca(
1649
1831
 
1650
1832
  self.logger.debug(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
1651
1833
 
1652
- # Convert consensus matrix to numpy if it's not already
1653
- if hasattr(consensus_matrix, "values"):
1834
+ # Convert consensus matrix to numpy - handle both Polars and pandas DataFrames
1835
+ if hasattr(consensus_matrix, "to_numpy"):
1836
+ # Polars or pandas DataFrame
1837
+ if hasattr(consensus_matrix, "select"):
1838
+ # Polars DataFrame - exclude the consensus_uid column
1839
+ numeric_cols = [col for col in consensus_matrix.columns if col != "consensus_uid"]
1840
+ matrix_data = consensus_matrix.select(numeric_cols).to_numpy()
1841
+ else:
1842
+ # Pandas DataFrame
1843
+ matrix_data = consensus_matrix.to_numpy()
1844
+ elif hasattr(consensus_matrix, "values"):
1845
+ # Pandas DataFrame
1654
1846
  matrix_data = consensus_matrix.values
1655
- elif hasattr(consensus_matrix, "to_numpy"):
1656
- matrix_data = consensus_matrix.to_numpy()
1657
1847
  else:
1658
1848
  matrix_data = np.array(consensus_matrix)
1659
1849
 
@@ -1692,7 +1882,7 @@ def plot_pca(
1692
1882
  else:
1693
1883
  self.logger.warning(
1694
1884
  f"Sample count mismatch: samples_df has {len(samples_pd)} rows, "
1695
- f"but consensus matrix has {len(pca_df)} samples"
1885
+ f"but consensus matrix has {len(pca_df)} samples",
1696
1886
  )
1697
1887
 
1698
1888
  # Prepare color mapping
@@ -1763,25 +1953,23 @@ def plot_pca(
1763
1953
  if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
1764
1954
  # Choose the identifier to map colors by
1765
1955
  id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
1766
-
1956
+
1767
1957
  # Get colors from samples_df based on the identifier
1768
1958
  if id_col == "sample_uid":
1769
1959
  sample_colors = (
1770
- self.samples_df
1771
- .filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
1960
+ self.samples_df.filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
1772
1961
  .select(["sample_uid", "sample_color"])
1773
1962
  .to_dict(as_series=False)
1774
1963
  )
1775
1964
  color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
1776
1965
  else: # sample_name
1777
1966
  sample_colors = (
1778
- self.samples_df
1779
- .filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
1967
+ self.samples_df.filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
1780
1968
  .select(["sample_name", "sample_color"])
1781
1969
  .to_dict(as_series=False)
1782
1970
  )
1783
1971
  color_map = dict(zip(sample_colors["sample_name"], sample_colors["sample_color"]))
1784
-
1972
+
1785
1973
  # Map colors into dataframe
1786
1974
  pca_df["color"] = [color_map.get(x, "#1f77b4") for x in pca_df[id_col]] # fallback to blue
1787
1975
  # Update the ColumnDataSource with new color column
@@ -1817,7 +2005,7 @@ def plot_pca(
1817
2005
  if col in pca_df.columns:
1818
2006
  if col == "sample_color":
1819
2007
  # Display sample_color as a colored swatch
1820
- tooltip_list.append(('color', "$color[swatch]:sample_color"))
2008
+ tooltip_list.append(("color", "$color[swatch]:sample_color"))
1821
2009
  elif pca_df[col].dtype in ["float64", "float32"]:
1822
2010
  tooltip_list.append((col, f"@{col}{{0.00}}"))
1823
2011
  else:
@@ -1843,6 +2031,7 @@ def plot_pca(
1843
2031
  show(p)
1844
2032
  return p
1845
2033
 
2034
+
1846
2035
  def plot_tic(
1847
2036
  self,
1848
2037
  samples=None,
@@ -1853,7 +2042,7 @@ def plot_tic(
1853
2042
  original: bool = False,
1854
2043
  ):
1855
2044
  """
1856
- Plot Total Ion Chromatograms (TIC) for selected samples overlayed using Bokeh.
2045
+ Plot Total Ion Chromatograms (TIC) for selected samples overlaid using Bokeh.
1857
2046
 
1858
2047
  Parameters and behavior mirror `plot_bpc` but use per-sample TICs (get_tic).
1859
2048
  """
@@ -1870,8 +2059,7 @@ def plot_tic(
1870
2059
 
1871
2060
  # Get sample colors from samples_df
1872
2061
  sample_colors = (
1873
- self.samples_df
1874
- .filter(pl.col("sample_uid").is_in(sample_uids))
2062
+ self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
1875
2063
  .select(["sample_uid", "sample_color"])
1876
2064
  .to_dict(as_series=False)
1877
2065
  )
@@ -1884,7 +2072,7 @@ def plot_tic(
1884
2072
  for uid in sample_uids:
1885
2073
  try:
1886
2074
  first_chrom = get_tic(self, sample=uid, label=None)
1887
- if hasattr(first_chrom, 'rt_unit'):
2075
+ if hasattr(first_chrom, "rt_unit"):
1888
2076
  rt_unit = first_chrom.rt_unit
1889
2077
  break
1890
2078
  except Exception:
@@ -1913,7 +2101,11 @@ def plot_tic(
1913
2101
 
1914
2102
  # extract arrays
1915
2103
  try:
1916
- chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
2104
+ chrom_dict = (
2105
+ chrom.to_dict()
2106
+ if hasattr(chrom, "to_dict")
2107
+ else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
2108
+ )
1917
2109
  rt = chrom_dict.get("rt")
1918
2110
  inty = chrom_dict.get("inty")
1919
2111
  except Exception:
@@ -1961,7 +2153,15 @@ def plot_tic(
1961
2153
  self.logger.warning("No TIC curves to plot for the selected samples.")
1962
2154
  return
1963
2155
 
1964
- hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
2156
+ hover = HoverTool(
2157
+ tooltips=[
2158
+ ("sample", "@sample"),
2159
+ ("sample_color", "$color[swatch]:sample_color"),
2160
+ ("rt", "@rt{0.00}"),
2161
+ ("inty", "@inty{0.00e0}"),
2162
+ ],
2163
+ renderers=renderers,
2164
+ )
1965
2165
  p.add_tools(hover)
1966
2166
 
1967
2167
  # Only set legend properties if a legend was actually created to avoid Bokeh warnings