masster 0.4.10__py3-none-any.whl → 0.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/lib/lib.py +45 -3
- masster/sample/sample5_schema.json +44 -44
- masster/study/h5.py +0 -13
- masster/study/helpers.py +263 -310
- masster/study/id.py +564 -324
- masster/study/plot.py +174 -312
- masster/study/processing.py +5 -0
- masster/study/study.py +95 -60
- masster/study/study5_schema.json +157 -145
- {masster-0.4.10.dist-info → masster-0.4.12.dist-info}/METADATA +1 -1
- {masster-0.4.10.dist-info → masster-0.4.12.dist-info}/RECORD +15 -15
- {masster-0.4.10.dist-info → masster-0.4.12.dist-info}/WHEEL +0 -0
- {masster-0.4.10.dist-info → masster-0.4.12.dist-info}/entry_points.txt +0 -0
- {masster-0.4.10.dist-info → masster-0.4.12.dist-info}/licenses/LICENSE +0 -0
masster/study/plot.py
CHANGED
|
@@ -226,8 +226,7 @@ def _isolated_show_panel_notebook(panel_obj):
|
|
|
226
226
|
|
|
227
227
|
def plot_alignment(
|
|
228
228
|
self,
|
|
229
|
-
samples=
|
|
230
|
-
maps: bool = True,
|
|
229
|
+
samples=50,
|
|
231
230
|
filename: str | None = None,
|
|
232
231
|
width: int = 450,
|
|
233
232
|
height: int = 450,
|
|
@@ -235,322 +234,172 @@ def plot_alignment(
|
|
|
235
234
|
):
|
|
236
235
|
"""Visualize retention time alignment using two synchronized Bokeh scatter plots.
|
|
237
236
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
``rt_original`` column (before) and ``rt`` column (after).
|
|
237
|
+
Uses ``features_df`` to create side-by-side plots showing Original RT (left)
|
|
238
|
+
and Current/Aligned RT (right). If no alignment has been performed yet,
|
|
239
|
+
both plots show the current RT values.
|
|
242
240
|
|
|
243
|
-
Parameters
|
|
241
|
+
Parameters:
|
|
244
242
|
- samples: List of sample identifiers (sample_uids or sample_names), or single int for random selection, or None for all samples.
|
|
245
|
-
- maps: whether to use feature maps (default True).
|
|
246
243
|
- filename: optional HTML file path to save the plot.
|
|
247
244
|
- width/height: pixel size of each subplot.
|
|
248
245
|
- markersize: base marker size.
|
|
249
246
|
|
|
250
|
-
Returns
|
|
247
|
+
Returns:
|
|
251
248
|
- Bokeh layout (row) containing the two synchronized plots.
|
|
252
249
|
"""
|
|
253
250
|
# Local imports so the module can be used even if bokeh isn't needed elsewhere
|
|
254
251
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
255
|
-
from bokeh.plotting import figure
|
|
252
|
+
from bokeh.plotting import figure
|
|
256
253
|
import pandas as pd
|
|
257
254
|
|
|
258
|
-
#
|
|
259
|
-
|
|
255
|
+
# Check if features_df exists
|
|
256
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
257
|
+
self.logger.error("No features_df found. Load features first.")
|
|
258
|
+
return
|
|
260
259
|
|
|
261
|
-
#
|
|
262
|
-
|
|
263
|
-
|
|
260
|
+
# Check required columns
|
|
261
|
+
required_cols = ["rt", "mz", "inty"]
|
|
262
|
+
missing = [c for c in required_cols if c not in self.features_df.columns]
|
|
263
|
+
if missing:
|
|
264
|
+
self.logger.error(f"Missing required columns in features_df: {missing}")
|
|
265
|
+
return
|
|
264
266
|
|
|
265
|
-
if
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
267
|
+
# Check if alignment has been performed
|
|
268
|
+
has_alignment = "rt_original" in self.features_df.columns
|
|
269
|
+
if not has_alignment:
|
|
270
|
+
self.logger.warning("Column 'rt_original' not found - alignment has not been performed yet.")
|
|
271
|
+
self.logger.info("Showing current RT values for both plots. Run align() first to see alignment comparison.")
|
|
272
|
+
|
|
273
|
+
# Get sample_uids to filter by if specified
|
|
274
|
+
sample_uids = self._get_sample_uids(samples) if samples is not None else None
|
|
269
275
|
|
|
270
|
-
|
|
276
|
+
# Start with full features_df
|
|
277
|
+
features_df = self.features_df
|
|
271
278
|
|
|
272
|
-
|
|
273
|
-
|
|
279
|
+
# Filter by selected samples if specified
|
|
280
|
+
if sample_uids is not None:
|
|
281
|
+
features_df = features_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
282
|
+
if features_df.is_empty():
|
|
283
|
+
self.logger.error("No features found for the selected samples.")
|
|
274
284
|
return
|
|
275
285
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
# Filter samples_info to only selected sample_uids and get their map_ids
|
|
283
|
-
selected_samples = samples_info[samples_info["sample_uid"].isin(sample_uids)]
|
|
284
|
-
if selected_samples.empty:
|
|
285
|
-
self.logger.error("No matching samples found for the provided sample_uids.")
|
|
286
|
-
return
|
|
287
|
-
|
|
288
|
-
# Get the map_ids for selected samples
|
|
289
|
-
selected_map_ids = selected_samples["map_id"].tolist()
|
|
290
|
-
|
|
291
|
-
# Filter feature maps based on map_ids
|
|
292
|
-
filtered_maps = []
|
|
293
|
-
for map_id in selected_map_ids:
|
|
294
|
-
if 0 <= map_id < len(fmaps):
|
|
295
|
-
filtered_maps.append(fmaps[map_id])
|
|
296
|
-
|
|
297
|
-
fmaps = filtered_maps
|
|
298
|
-
samples_info = selected_samples.reset_index(drop=True)
|
|
299
|
-
|
|
300
|
-
if not fmaps:
|
|
301
|
-
self.logger.error("No feature maps found for the selected samples.")
|
|
302
|
-
return
|
|
303
|
-
else:
|
|
304
|
-
self.logger.warning("Cannot filter feature maps: no samples_df available")
|
|
286
|
+
# Determine sample column
|
|
287
|
+
sample_col = "sample_uid" if "sample_uid" in features_df.columns else "sample_name"
|
|
288
|
+
if sample_col not in features_df.columns:
|
|
289
|
+
self.logger.error("No sample identifier column found in features_df.")
|
|
290
|
+
return
|
|
305
291
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
return
|
|
292
|
+
# Get unique samples
|
|
293
|
+
samples_list = features_df.select(pl.col(sample_col)).unique().to_series().to_list()
|
|
309
294
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
295
|
+
# Build plotting data
|
|
296
|
+
before_data: list[dict[str, Any]] = []
|
|
297
|
+
after_data: list[dict[str, Any]] = []
|
|
298
|
+
|
|
299
|
+
for sample_idx, sample in enumerate(samples_list):
|
|
300
|
+
# Filter sample data
|
|
301
|
+
sample_data = features_df.filter(pl.col(sample_col) == sample)
|
|
302
|
+
|
|
303
|
+
# Sample data if too large for performance
|
|
304
|
+
max_points_per_sample = 10000
|
|
305
|
+
if sample_data.height > max_points_per_sample:
|
|
306
|
+
self.logger.info(f"Sample {sample}: Sampling {max_points_per_sample} points from {sample_data.height} features for performance")
|
|
307
|
+
sample_data = sample_data.sample(n=max_points_per_sample, seed=42)
|
|
308
|
+
|
|
309
|
+
# Calculate max intensity for alpha scaling
|
|
310
|
+
max_inty = sample_data.select(pl.col("inty").max()).item() or 1
|
|
311
|
+
|
|
312
|
+
# Get sample information
|
|
313
|
+
sample_name = str(sample)
|
|
314
|
+
sample_uid = sample if sample_col == "sample_uid" else sample_data.select(pl.col("sample_uid")).item() if "sample_uid" in sample_data.columns else sample
|
|
315
|
+
|
|
316
|
+
# Select columns to process
|
|
317
|
+
cols_to_select = ["rt", "mz", "inty"]
|
|
318
|
+
if has_alignment:
|
|
319
|
+
cols_to_select.append("rt_original")
|
|
320
|
+
|
|
321
|
+
sample_dict = sample_data.select(cols_to_select).to_dicts()
|
|
322
|
+
|
|
323
|
+
for row_dict in sample_dict:
|
|
324
|
+
rt_original = row_dict.get("rt_original", row_dict["rt"]) if has_alignment else row_dict["rt"]
|
|
325
|
+
rt_current = row_dict["rt"]
|
|
326
|
+
mz = row_dict["mz"]
|
|
327
|
+
inty = row_dict["inty"]
|
|
328
|
+
alpha = inty / max_inty
|
|
329
|
+
size = markersize + 2 if sample_idx == 0 else markersize
|
|
330
330
|
|
|
331
|
-
for rt, mz, inty in zip(ref_rt, ref_mz, ref_inty):
|
|
332
331
|
before_data.append({
|
|
333
|
-
"rt":
|
|
332
|
+
"rt": rt_original,
|
|
334
333
|
"mz": mz,
|
|
335
334
|
"inty": inty,
|
|
336
|
-
"alpha":
|
|
337
|
-
"sample_idx":
|
|
338
|
-
"sample_name":
|
|
339
|
-
"sample_uid":
|
|
340
|
-
"size":
|
|
335
|
+
"alpha": alpha,
|
|
336
|
+
"sample_idx": sample_idx,
|
|
337
|
+
"sample_name": sample_name,
|
|
338
|
+
"sample_uid": sample_uid,
|
|
339
|
+
"size": size,
|
|
341
340
|
})
|
|
342
341
|
after_data.append({
|
|
343
|
-
"rt":
|
|
342
|
+
"rt": rt_current,
|
|
344
343
|
"mz": mz,
|
|
345
344
|
"inty": inty,
|
|
346
|
-
"alpha":
|
|
347
|
-
"sample_idx":
|
|
348
|
-
"sample_name":
|
|
349
|
-
"sample_uid":
|
|
350
|
-
"size":
|
|
345
|
+
"alpha": alpha,
|
|
346
|
+
"sample_idx": sample_idx,
|
|
347
|
+
"sample_name": sample_name,
|
|
348
|
+
"sample_uid": sample_uid,
|
|
349
|
+
"size": size,
|
|
351
350
|
})
|
|
352
351
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
original_rt = []
|
|
358
|
-
aligned_rt = []
|
|
359
|
-
|
|
360
|
-
for f in fm:
|
|
361
|
-
try:
|
|
362
|
-
orig = f.getMetaValue("original_RT")
|
|
363
|
-
except Exception:
|
|
364
|
-
orig = None
|
|
365
|
-
|
|
366
|
-
if orig is None:
|
|
367
|
-
original_rt.append(f.getRT())
|
|
368
|
-
else:
|
|
369
|
-
original_rt.append(orig)
|
|
370
|
-
|
|
371
|
-
aligned_rt.append(f.getRT())
|
|
372
|
-
mz_vals.append(f.getMZ())
|
|
373
|
-
inty_vals.append(f.getIntensity())
|
|
374
|
-
|
|
375
|
-
if not inty_vals:
|
|
376
|
-
continue
|
|
377
|
-
|
|
378
|
-
max_inty = max(inty_vals)
|
|
379
|
-
|
|
380
|
-
# Get sample metadata from filtered samples_info
|
|
381
|
-
if hasattr(self, "samples_df") and self.samples_df is not None and not self.samples_df.is_empty():
|
|
382
|
-
# Use filtered samples_info if it exists from the filtering above
|
|
383
|
-
if 'samples_info' in locals() and sample_idx < len(samples_info):
|
|
384
|
-
sample_name = samples_info.iloc[sample_idx].get("sample_name", f"Sample {sample_idx}")
|
|
385
|
-
sample_uid = samples_info.iloc[sample_idx].get("sample_uid", f"Sample_{sample_idx}_UID")
|
|
386
|
-
else:
|
|
387
|
-
# Fallback to original samples_df if filtered samples_info is not available
|
|
388
|
-
all_samples_info = self.samples_df.to_pandas()
|
|
389
|
-
if sample_idx < len(all_samples_info):
|
|
390
|
-
sample_name = all_samples_info.iloc[sample_idx].get("sample_name", f"Sample {sample_idx}")
|
|
391
|
-
sample_uid = all_samples_info.iloc[sample_idx].get("sample_uid", f"Sample_{sample_idx}_UID")
|
|
392
|
-
else:
|
|
393
|
-
sample_name = f"Sample {sample_idx}"
|
|
394
|
-
sample_uid = f"Sample_{sample_idx}_UID"
|
|
395
|
-
else:
|
|
396
|
-
sample_name = f"Sample {sample_idx}"
|
|
397
|
-
sample_uid = f"Sample_{sample_idx}_UID"
|
|
398
|
-
|
|
399
|
-
for rt, mz, inty in zip(original_rt, mz_vals, inty_vals):
|
|
400
|
-
before_data.append({
|
|
401
|
-
"rt": rt,
|
|
402
|
-
"mz": mz,
|
|
403
|
-
"inty": inty,
|
|
404
|
-
"alpha": inty / max_inty,
|
|
405
|
-
"sample_idx": sample_idx,
|
|
406
|
-
"sample_name": sample_name,
|
|
407
|
-
"sample_uid": sample_uid,
|
|
408
|
-
"size": markersize,
|
|
409
|
-
})
|
|
410
|
-
|
|
411
|
-
for rt, mz, inty in zip(aligned_rt, mz_vals, inty_vals):
|
|
412
|
-
after_data.append({
|
|
413
|
-
"rt": rt,
|
|
414
|
-
"mz": mz,
|
|
415
|
-
"inty": inty,
|
|
416
|
-
"alpha": inty / max_inty,
|
|
417
|
-
"sample_idx": sample_idx,
|
|
418
|
-
"sample_name": sample_name,
|
|
419
|
-
"sample_uid": sample_uid,
|
|
420
|
-
"size": markersize,
|
|
421
|
-
})
|
|
422
|
-
|
|
423
|
-
else:
|
|
424
|
-
# Use features_df
|
|
425
|
-
if self.features_df is None or self.features_df.is_empty():
|
|
426
|
-
self.logger.error("No features_df found. Load features first.")
|
|
427
|
-
return
|
|
428
|
-
|
|
429
|
-
required_cols = ["rt", "mz", "inty"]
|
|
430
|
-
missing = [c for c in required_cols if c not in self.features_df.columns]
|
|
431
|
-
if missing:
|
|
432
|
-
self.logger.error(f"Missing required columns in features_df: {missing}")
|
|
433
|
-
return
|
|
434
|
-
|
|
435
|
-
if "rt_original" not in self.features_df.columns:
|
|
436
|
-
self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
|
|
437
|
-
return
|
|
438
|
-
|
|
439
|
-
# Use Polars instead of pandas
|
|
440
|
-
features_df = self.features_df
|
|
441
|
-
|
|
442
|
-
# Filter by selected samples if specified
|
|
443
|
-
if sample_uids is not None:
|
|
444
|
-
features_df = features_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
445
|
-
if features_df.is_empty():
|
|
446
|
-
self.logger.error("No features found for the selected samples.")
|
|
447
|
-
return
|
|
448
|
-
|
|
449
|
-
sample_col = "sample_uid" if "sample_uid" in features_df.columns else "sample_name"
|
|
450
|
-
if sample_col not in features_df.columns:
|
|
451
|
-
self.logger.error("No sample identifier column found in features_df.")
|
|
452
|
-
return
|
|
453
|
-
|
|
454
|
-
# Get unique samples using Polars
|
|
455
|
-
samples = features_df.select(pl.col(sample_col)).unique().to_series().to_list()
|
|
456
|
-
|
|
457
|
-
for sample_idx, sample in enumerate(samples):
|
|
458
|
-
# Filter sample data using Polars
|
|
459
|
-
sample_data = features_df.filter(pl.col(sample_col) == sample)
|
|
460
|
-
|
|
461
|
-
# Calculate max intensity using Polars
|
|
462
|
-
max_inty = sample_data.select(pl.col("inty").max()).item()
|
|
463
|
-
max_inty = max_inty if max_inty and max_inty > 0 else 1
|
|
352
|
+
# Check if we have any data to plot
|
|
353
|
+
if not before_data:
|
|
354
|
+
self.logger.error("No data to plot.")
|
|
355
|
+
return
|
|
464
356
|
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
# Try to get sample_uid from the first row if it exists
|
|
471
|
-
if "sample_uid" in sample_data.columns:
|
|
472
|
-
sample_uid = sample_data.select(pl.col("sample_uid")).item()
|
|
473
|
-
else:
|
|
474
|
-
sample_uid = sample
|
|
475
|
-
|
|
476
|
-
# Convert to dict for iteration - more efficient than row-by-row processing
|
|
477
|
-
sample_dict = sample_data.select(["rt_original", "rt", "mz", "inty"]).to_dicts()
|
|
478
|
-
|
|
479
|
-
for row_dict in sample_dict:
|
|
480
|
-
rt_original = row_dict["rt_original"]
|
|
481
|
-
rt_current = row_dict["rt"]
|
|
482
|
-
mz = row_dict["mz"]
|
|
483
|
-
inty = row_dict["inty"]
|
|
484
|
-
alpha = inty / max_inty
|
|
485
|
-
size = markersize + 2 if sample_idx == 0 else markersize
|
|
486
|
-
|
|
487
|
-
before_data.append({
|
|
488
|
-
"rt": rt_original,
|
|
489
|
-
"mz": mz,
|
|
490
|
-
"inty": inty,
|
|
491
|
-
"alpha": alpha,
|
|
492
|
-
"sample_idx": sample_idx,
|
|
493
|
-
"sample_name": sample_name,
|
|
494
|
-
"sample_uid": sample_uid,
|
|
495
|
-
"size": size,
|
|
496
|
-
})
|
|
497
|
-
after_data.append({
|
|
498
|
-
"rt": rt_current,
|
|
499
|
-
"mz": mz,
|
|
500
|
-
"inty": inty,
|
|
501
|
-
"alpha": alpha,
|
|
502
|
-
"sample_idx": sample_idx,
|
|
503
|
-
"sample_name": sample_name,
|
|
504
|
-
"sample_uid": sample_uid,
|
|
505
|
-
"size": size,
|
|
506
|
-
})
|
|
507
|
-
|
|
508
|
-
# Get sample colors from samples_df using sample indices
|
|
509
|
-
# Extract unique sample information from the dictionaries we created
|
|
510
|
-
if before_data:
|
|
511
|
-
# Create mapping from sample_idx to sample_uid more efficiently
|
|
512
|
-
sample_idx_to_uid = {}
|
|
513
|
-
for item in before_data:
|
|
514
|
-
if item["sample_idx"] not in sample_idx_to_uid:
|
|
515
|
-
sample_idx_to_uid[item["sample_idx"]] = item["sample_uid"]
|
|
516
|
-
else:
|
|
517
|
-
sample_idx_to_uid = {}
|
|
357
|
+
# Get sample colors from samples_df
|
|
358
|
+
sample_idx_to_uid = {}
|
|
359
|
+
for item in before_data:
|
|
360
|
+
if item["sample_idx"] not in sample_idx_to_uid:
|
|
361
|
+
sample_idx_to_uid[item["sample_idx"]] = item["sample_uid"]
|
|
518
362
|
|
|
519
|
-
# Get colors from samples_df
|
|
363
|
+
# Get colors from samples_df if available
|
|
520
364
|
sample_uids_list = list(sample_idx_to_uid.values())
|
|
365
|
+
color_map: dict[int, str] = {}
|
|
366
|
+
|
|
521
367
|
if sample_uids_list and hasattr(self, "samples_df") and self.samples_df is not None:
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
368
|
+
try:
|
|
369
|
+
sample_colors = (
|
|
370
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids_list))
|
|
371
|
+
.select(["sample_uid", "sample_color"])
|
|
372
|
+
.to_dict(as_series=False)
|
|
373
|
+
)
|
|
374
|
+
uid_to_color = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
375
|
+
|
|
376
|
+
for sample_idx, sample_uid in sample_idx_to_uid.items():
|
|
377
|
+
color_map[sample_idx] = uid_to_color.get(sample_uid, "#1f77b4")
|
|
378
|
+
except Exception:
|
|
379
|
+
# Fallback to default colors if sample colors not available
|
|
380
|
+
for sample_idx in sample_idx_to_uid.keys():
|
|
381
|
+
color_map[sample_idx] = "#1f77b4"
|
|
528
382
|
else:
|
|
529
|
-
|
|
383
|
+
# Default colors
|
|
384
|
+
for sample_idx in sample_idx_to_uid.keys():
|
|
385
|
+
color_map[sample_idx] = "#1f77b4"
|
|
530
386
|
|
|
531
|
-
#
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
color_map[sample_idx] = uid_to_color.get(sample_uid, "#1f77b4") # fallback to blue
|
|
387
|
+
# Add sample_color to data
|
|
388
|
+
for item in before_data + after_data:
|
|
389
|
+
item["sample_color"] = color_map.get(item["sample_idx"], "#1f77b4")
|
|
535
390
|
|
|
536
|
-
#
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
item["sample_color"] = color_map.get(item["sample_idx"], "#1f77b4")
|
|
540
|
-
|
|
541
|
-
if after_data:
|
|
542
|
-
for item in after_data:
|
|
543
|
-
item["sample_color"] = color_map.get(item["sample_idx"], "#1f77b4")
|
|
544
|
-
|
|
545
|
-
# Now create DataFrames with the sample_color already included
|
|
546
|
-
before_df = pd.DataFrame(before_data) if before_data else pd.DataFrame()
|
|
547
|
-
after_df = pd.DataFrame(after_data) if after_data else pd.DataFrame()
|
|
391
|
+
# Create DataFrames
|
|
392
|
+
before_df = pd.DataFrame(before_data)
|
|
393
|
+
after_df = pd.DataFrame(after_data)
|
|
548
394
|
|
|
549
395
|
# Create Bokeh figures
|
|
396
|
+
title_before = "Original RT" if has_alignment else "Current RT (No Alignment)"
|
|
397
|
+
title_after = "Aligned RT" if has_alignment else "Current RT (Copy)"
|
|
398
|
+
|
|
550
399
|
p1 = figure(
|
|
551
400
|
width=width,
|
|
552
401
|
height=height,
|
|
553
|
-
title=
|
|
402
|
+
title=title_before,
|
|
554
403
|
x_axis_label="Retention Time (s)",
|
|
555
404
|
y_axis_label="m/z",
|
|
556
405
|
tools="pan,wheel_zoom,box_zoom,reset,save",
|
|
@@ -563,7 +412,7 @@ def plot_alignment(
|
|
|
563
412
|
p2 = figure(
|
|
564
413
|
width=width,
|
|
565
414
|
height=height,
|
|
566
|
-
title=
|
|
415
|
+
title=title_after,
|
|
567
416
|
x_axis_label="Retention Time (s)",
|
|
568
417
|
y_axis_label="m/z",
|
|
569
418
|
tools="pan,wheel_zoom,box_zoom,reset,save",
|
|
@@ -575,16 +424,15 @@ def plot_alignment(
|
|
|
575
424
|
p2.border_fill_color = "white"
|
|
576
425
|
p2.min_border = 0
|
|
577
426
|
|
|
578
|
-
#
|
|
579
|
-
unique_samples = sorted(list({item["sample_idx"] for item in before_data}))
|
|
580
|
-
|
|
427
|
+
# Plot data by sample
|
|
428
|
+
unique_samples = sorted(list({item["sample_idx"] for item in before_data}))
|
|
581
429
|
renderers_before = []
|
|
582
430
|
renderers_after = []
|
|
583
431
|
|
|
584
432
|
for sample_idx in unique_samples:
|
|
585
433
|
sb = before_df[before_df["sample_idx"] == sample_idx]
|
|
586
434
|
sa = after_df[after_df["sample_idx"] == sample_idx]
|
|
587
|
-
color = color_map.get(sample_idx, "#
|
|
435
|
+
color = color_map.get(sample_idx, "#1f77b4")
|
|
588
436
|
|
|
589
437
|
if not sb.empty:
|
|
590
438
|
src = ColumnDataSource(sb)
|
|
@@ -623,8 +471,7 @@ def plot_alignment(
|
|
|
623
471
|
)
|
|
624
472
|
p2.add_tools(hover2)
|
|
625
473
|
|
|
626
|
-
# Create layout
|
|
627
|
-
# Use the aliased bokeh_row and set sizing_mode, width and height to avoid validation warnings.
|
|
474
|
+
# Create layout
|
|
628
475
|
layout = bokeh_row(p1, p2, sizing_mode="fixed", width=width, height=height)
|
|
629
476
|
|
|
630
477
|
# Apply consistent save/display behavior
|
|
@@ -832,19 +679,34 @@ def plot_consensus_2d(
|
|
|
832
679
|
source=source,
|
|
833
680
|
)
|
|
834
681
|
# add hover tool
|
|
682
|
+
# Start with base tooltips
|
|
683
|
+
tooltips = [
|
|
684
|
+
("consensus_uid", "@consensus_uid"),
|
|
685
|
+
("consensus_id", "@consensus_id"),
|
|
686
|
+
("number_samples", "@number_samples"),
|
|
687
|
+
("number_ms2", "@number_ms2"),
|
|
688
|
+
("rt", "@rt"),
|
|
689
|
+
("mz", "@mz"),
|
|
690
|
+
("inty_mean", "@inty_mean"),
|
|
691
|
+
("iso_mean", "@iso_mean"),
|
|
692
|
+
("coherence_mean", "@chrom_coherence_mean"),
|
|
693
|
+
("prominence_scaled_mean", "@chrom_prominence_scaled_mean"),
|
|
694
|
+
]
|
|
695
|
+
|
|
696
|
+
# Add id_top_* columns if they exist and have non-null values
|
|
697
|
+
id_top_columns = ["id_top_name", "id_top_class", "id_top_adduct", "id_top_score"]
|
|
698
|
+
for col in id_top_columns:
|
|
699
|
+
if col in data.columns:
|
|
700
|
+
# Check if the column has any non-null values
|
|
701
|
+
if data.filter(pl.col(col).is_not_null()).height > 0:
|
|
702
|
+
# Format score column with decimal places, others as strings
|
|
703
|
+
if col == "id_top_score":
|
|
704
|
+
tooltips.append((col.replace("id_top_", "id_"), f"@{col}{{0.0000}}"))
|
|
705
|
+
else:
|
|
706
|
+
tooltips.append((col.replace("id_top_", "id_"), f"@{col}"))
|
|
707
|
+
|
|
835
708
|
hover = HoverTool(
|
|
836
|
-
tooltips=
|
|
837
|
-
("consensus_uid", "@consensus_uid"),
|
|
838
|
-
("consensus_id", "@consensus_id"),
|
|
839
|
-
("number_samples", "@number_samples"),
|
|
840
|
-
("number_ms2", "@number_ms2"),
|
|
841
|
-
("rt", "@rt"),
|
|
842
|
-
("mz", "@mz"),
|
|
843
|
-
("inty_mean", "@inty_mean"),
|
|
844
|
-
("iso_mean", "@iso_mean"),
|
|
845
|
-
("coherence_mean", "@chrom_coherence_mean"),
|
|
846
|
-
("prominence_mean", "@chrom_prominence_mean"),
|
|
847
|
-
],
|
|
709
|
+
tooltips=tooltips,
|
|
848
710
|
renderers=[scatter_renderer],
|
|
849
711
|
)
|
|
850
712
|
p.add_tools(hover)
|
|
@@ -878,7 +740,7 @@ def plot_consensus_2d(
|
|
|
878
740
|
|
|
879
741
|
def plot_samples_2d(
|
|
880
742
|
self,
|
|
881
|
-
samples=
|
|
743
|
+
samples=100,
|
|
882
744
|
filename=None,
|
|
883
745
|
markersize=2,
|
|
884
746
|
size="dynamic",
|
|
@@ -1112,7 +974,7 @@ def plot_samples_2d(
|
|
|
1112
974
|
|
|
1113
975
|
def plot_bpc(
|
|
1114
976
|
self,
|
|
1115
|
-
samples=
|
|
977
|
+
samples=100,
|
|
1116
978
|
title: str | None = None,
|
|
1117
979
|
filename: str | None = None,
|
|
1118
980
|
width: int = 1000,
|
|
@@ -1288,7 +1150,7 @@ def plot_eic(
|
|
|
1288
1150
|
self,
|
|
1289
1151
|
mz,
|
|
1290
1152
|
mz_tol=None,
|
|
1291
|
-
samples=
|
|
1153
|
+
samples=100,
|
|
1292
1154
|
title: str | None = None,
|
|
1293
1155
|
filename: str | None = None,
|
|
1294
1156
|
width: int = 1000,
|
|
@@ -1457,7 +1319,7 @@ def plot_eic(
|
|
|
1457
1319
|
|
|
1458
1320
|
def plot_rt_correction(
|
|
1459
1321
|
self,
|
|
1460
|
-
samples=
|
|
1322
|
+
samples=200,
|
|
1461
1323
|
title: str | None = None,
|
|
1462
1324
|
filename: str | None = None,
|
|
1463
1325
|
width: int = 1000,
|
|
@@ -1611,7 +1473,7 @@ def plot_rt_correction(
|
|
|
1611
1473
|
def plot_chrom(
|
|
1612
1474
|
self,
|
|
1613
1475
|
uids=None,
|
|
1614
|
-
samples=
|
|
1476
|
+
samples=100,
|
|
1615
1477
|
filename=None,
|
|
1616
1478
|
aligned=True,
|
|
1617
1479
|
width=800,
|
|
@@ -2051,7 +1913,7 @@ def plot_pca(
|
|
|
2051
1913
|
alpha=0.8,
|
|
2052
1914
|
markersize=6,
|
|
2053
1915
|
n_components=2,
|
|
2054
|
-
|
|
1916
|
+
colorby=None,
|
|
2055
1917
|
title="PCA of Consensus Matrix",
|
|
2056
1918
|
):
|
|
2057
1919
|
"""
|
|
@@ -2154,25 +2016,25 @@ def plot_pca(
|
|
|
2154
2016
|
color_column = None
|
|
2155
2017
|
color_mapper = None
|
|
2156
2018
|
|
|
2157
|
-
if
|
|
2158
|
-
color_column =
|
|
2159
|
-
unique_values = pca_df[
|
|
2019
|
+
if colorby and colorby in pca_df.columns:
|
|
2020
|
+
color_column = colorby
|
|
2021
|
+
unique_values = pca_df[colorby].unique()
|
|
2160
2022
|
|
|
2161
2023
|
# Handle categorical vs numeric coloring
|
|
2162
|
-
if pca_df[
|
|
2024
|
+
if pca_df[colorby].dtype in ["object", "string", "category"]:
|
|
2163
2025
|
# Categorical coloring
|
|
2164
2026
|
if len(unique_values) <= 20:
|
|
2165
2027
|
palette = Category20[min(20, max(3, len(unique_values)))]
|
|
2166
2028
|
else:
|
|
2167
2029
|
palette = viridis(min(256, len(unique_values)))
|
|
2168
|
-
color_mapper = factor_cmap(
|
|
2030
|
+
color_mapper = factor_cmap(colorby, palette, unique_values)
|
|
2169
2031
|
else:
|
|
2170
2032
|
# Numeric coloring
|
|
2171
2033
|
palette = viridis(256)
|
|
2172
2034
|
color_mapper = LinearColorMapper(
|
|
2173
2035
|
palette=palette,
|
|
2174
|
-
low=pca_df[
|
|
2175
|
-
high=pca_df[
|
|
2036
|
+
low=pca_df[colorby].min(),
|
|
2037
|
+
high=pca_df[colorby].max(),
|
|
2176
2038
|
)
|
|
2177
2039
|
|
|
2178
2040
|
# Create Bokeh plot
|
|
@@ -2197,7 +2059,7 @@ def plot_pca(
|
|
|
2197
2059
|
"PC2",
|
|
2198
2060
|
size=markersize,
|
|
2199
2061
|
alpha=alpha,
|
|
2200
|
-
color={"field":
|
|
2062
|
+
color={"field": colorby, "transform": color_mapper},
|
|
2201
2063
|
source=source,
|
|
2202
2064
|
)
|
|
2203
2065
|
# Add colorbar for numeric coloring
|
|
@@ -2211,7 +2073,7 @@ def plot_pca(
|
|
|
2211
2073
|
alpha=alpha,
|
|
2212
2074
|
color=color_mapper,
|
|
2213
2075
|
source=source,
|
|
2214
|
-
legend_field=
|
|
2076
|
+
legend_field=colorby,
|
|
2215
2077
|
)
|
|
2216
2078
|
else:
|
|
2217
2079
|
# If no color_by provided, use sample_color column from samples_df
|
|
@@ -2283,7 +2145,7 @@ def plot_pca(
|
|
|
2283
2145
|
p.add_tools(hover)
|
|
2284
2146
|
|
|
2285
2147
|
# Add legend if using categorical coloring
|
|
2286
|
-
if color_mapper and not isinstance(color_mapper, LinearColorMapper) and
|
|
2148
|
+
if color_mapper and not isinstance(color_mapper, LinearColorMapper) and colorby:
|
|
2287
2149
|
# Only set legend properties if legends exist (avoid Bokeh warning when none created)
|
|
2288
2150
|
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
2289
2151
|
p.legend.location = "top_left"
|
|
@@ -2309,7 +2171,7 @@ def plot_pca(
|
|
|
2309
2171
|
|
|
2310
2172
|
def plot_tic(
|
|
2311
2173
|
self,
|
|
2312
|
-
samples=
|
|
2174
|
+
samples=100,
|
|
2313
2175
|
title: str | None = None,
|
|
2314
2176
|
filename: str | None = None,
|
|
2315
2177
|
width: int = 1000,
|