masster 0.3.13__py3-none-any.whl → 0.3.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/sample/helpers.py +9 -2
- masster/sample/load.py +11 -7
- masster/sample/plot.py +43 -34
- masster/study/defaults/study_def.py +20 -0
- masster/study/h5.py +120 -23
- masster/study/helpers.py +974 -13
- masster/study/load.py +28 -15
- masster/study/plot.py +270 -98
- masster/study/processing.py +9 -0
- masster/study/study.py +32 -38
- masster/study/study5_schema.json +14 -5
- {masster-0.3.13.dist-info → masster-0.3.15.dist-info}/METADATA +2 -1
- {masster-0.3.13.dist-info → masster-0.3.15.dist-info}/RECORD +16 -20
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +0 -199787
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
- masster/docs/SCX_API_Documentation.md +0 -0
- masster/docs/SCX_DLL_Analysis.md +0 -0
- {masster-0.3.13.dist-info → masster-0.3.15.dist-info}/WHEEL +0 -0
- {masster-0.3.13.dist-info → masster-0.3.15.dist-info}/entry_points.txt +0 -0
- {masster-0.3.13.dist-info → masster-0.3.15.dist-info}/licenses/LICENSE +0 -0
masster/study/plot.py
CHANGED
|
@@ -37,7 +37,6 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
37
37
|
# Local imports so the module can be used even if bokeh isn't needed elsewhere
|
|
38
38
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
39
39
|
from bokeh.plotting import figure, show, output_file
|
|
40
|
-
from bokeh.palettes import Turbo256
|
|
41
40
|
import pandas as pd
|
|
42
41
|
|
|
43
42
|
# Build the before/after tabular data used for plotting
|
|
@@ -136,34 +135,99 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
136
135
|
self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
|
|
137
136
|
return
|
|
138
137
|
|
|
139
|
-
|
|
138
|
+
# Use Polars instead of pandas
|
|
139
|
+
features_df = self.features_df
|
|
140
140
|
|
|
141
|
-
sample_col = 'sample_uid' if 'sample_uid' in
|
|
142
|
-
if sample_col not in
|
|
141
|
+
sample_col = 'sample_uid' if 'sample_uid' in features_df.columns else 'sample_name'
|
|
142
|
+
if sample_col not in features_df.columns:
|
|
143
143
|
self.logger.error("No sample identifier column found in features_df.")
|
|
144
144
|
return
|
|
145
145
|
|
|
146
|
-
samples
|
|
146
|
+
# Get unique samples using Polars
|
|
147
|
+
samples = features_df.select(pl.col(sample_col)).unique().to_series().to_list()
|
|
147
148
|
|
|
148
149
|
for sample_idx, sample in enumerate(samples):
|
|
149
|
-
|
|
150
|
-
|
|
150
|
+
# Filter sample data using Polars
|
|
151
|
+
sample_data = features_df.filter(pl.col(sample_col) == sample)
|
|
152
|
+
|
|
153
|
+
# Calculate max intensity using Polars
|
|
154
|
+
max_inty = sample_data.select(pl.col('inty').max()).item()
|
|
155
|
+
max_inty = max_inty if max_inty and max_inty > 0 else 1
|
|
156
|
+
|
|
151
157
|
sample_name = str(sample)
|
|
152
|
-
sample_uid
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
158
|
+
# Get sample_uid - if sample_col is 'sample_uid', use sample directly
|
|
159
|
+
if sample_col == 'sample_uid':
|
|
160
|
+
sample_uid = sample
|
|
161
|
+
else:
|
|
162
|
+
# Try to get sample_uid from the first row if it exists
|
|
163
|
+
if 'sample_uid' in sample_data.columns:
|
|
164
|
+
sample_uid = sample_data.select(pl.col('sample_uid')).item()
|
|
165
|
+
else:
|
|
166
|
+
sample_uid = sample
|
|
167
|
+
|
|
168
|
+
# Convert to dict for iteration - more efficient than row-by-row processing
|
|
169
|
+
sample_dict = sample_data.select(['rt_original', 'rt', 'mz', 'inty']).to_dicts()
|
|
170
|
+
|
|
171
|
+
for row_dict in sample_dict:
|
|
172
|
+
rt_original = row_dict['rt_original']
|
|
173
|
+
rt_current = row_dict['rt']
|
|
174
|
+
mz = row_dict['mz']
|
|
175
|
+
inty = row_dict['inty']
|
|
176
|
+
alpha = inty / max_inty
|
|
177
|
+
size = markersize + 2 if sample_idx == 0 else markersize
|
|
178
|
+
|
|
179
|
+
before_data.append({
|
|
180
|
+
'rt': rt_original, 'mz': mz, 'inty': inty, 'alpha': alpha,
|
|
181
|
+
'sample_idx': sample_idx, 'sample_name': sample_name,
|
|
182
|
+
'sample_uid': sample_uid, 'size': size
|
|
183
|
+
})
|
|
184
|
+
after_data.append({
|
|
185
|
+
'rt': rt_current, 'mz': mz, 'inty': inty, 'alpha': alpha,
|
|
186
|
+
'sample_idx': sample_idx, 'sample_name': sample_name,
|
|
187
|
+
'sample_uid': sample_uid, 'size': size
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
# Get sample colors from samples_df using sample indices
|
|
191
|
+
# Extract unique sample information from the dictionaries we created
|
|
192
|
+
if before_data:
|
|
193
|
+
# Create mapping from sample_idx to sample_uid more efficiently
|
|
194
|
+
sample_idx_to_uid = {}
|
|
195
|
+
for item in before_data:
|
|
196
|
+
if item['sample_idx'] not in sample_idx_to_uid:
|
|
197
|
+
sample_idx_to_uid[item['sample_idx']] = item['sample_uid']
|
|
198
|
+
else:
|
|
199
|
+
sample_idx_to_uid = {}
|
|
200
|
+
|
|
201
|
+
# Get colors from samples_df
|
|
202
|
+
sample_uids_list = list(sample_idx_to_uid.values())
|
|
203
|
+
if sample_uids_list and hasattr(self, 'samples_df') and self.samples_df is not None:
|
|
204
|
+
sample_colors = (
|
|
205
|
+
self.samples_df
|
|
206
|
+
.filter(pl.col("sample_uid").is_in(sample_uids_list))
|
|
207
|
+
.select(["sample_uid", "sample_color"])
|
|
208
|
+
.to_dict(as_series=False)
|
|
209
|
+
)
|
|
210
|
+
uid_to_color = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
211
|
+
else:
|
|
212
|
+
uid_to_color = {}
|
|
164
213
|
|
|
165
|
-
|
|
166
|
-
|
|
214
|
+
# Create color map for sample indices
|
|
215
|
+
color_map: dict[int, str] = {}
|
|
216
|
+
for sample_idx, sample_uid in sample_idx_to_uid.items():
|
|
217
|
+
color_map[sample_idx] = uid_to_color.get(sample_uid, "#1f77b4") # fallback to blue
|
|
218
|
+
|
|
219
|
+
# Add sample_color to data dictionaries before creating DataFrames
|
|
220
|
+
if before_data:
|
|
221
|
+
for item in before_data:
|
|
222
|
+
item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
|
|
223
|
+
|
|
224
|
+
if after_data:
|
|
225
|
+
for item in after_data:
|
|
226
|
+
item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
|
|
227
|
+
|
|
228
|
+
# Now create DataFrames with the sample_color already included
|
|
229
|
+
before_df = pd.DataFrame(before_data) if before_data else pd.DataFrame()
|
|
230
|
+
after_df = pd.DataFrame(after_data) if after_data else pd.DataFrame()
|
|
167
231
|
|
|
168
232
|
# Create Bokeh figures
|
|
169
233
|
p1 = figure(width=width, height=height, title='Original RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save')
|
|
@@ -177,15 +241,9 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
177
241
|
p2.background_fill_color = 'white'
|
|
178
242
|
p2.border_fill_color = 'white'
|
|
179
243
|
p2.min_border = 0
|
|
180
|
-
|
|
181
|
-
#
|
|
182
|
-
unique_samples = sorted(list(set(
|
|
183
|
-
colors = Turbo256
|
|
184
|
-
color_map: dict[int, str] = {}
|
|
185
|
-
n = max(1, len(unique_samples))
|
|
186
|
-
step = max(1, 256 // n)
|
|
187
|
-
for i, sample_idx in enumerate(unique_samples):
|
|
188
|
-
color_map[sample_idx] = colors[(i * step) % 256]
|
|
244
|
+
|
|
245
|
+
# Get unique sample indices for iteration
|
|
246
|
+
unique_samples = sorted(list(set(item['sample_idx'] for item in before_data))) if before_data else []
|
|
189
247
|
|
|
190
248
|
renderers_before = []
|
|
191
249
|
renderers_after = []
|
|
@@ -206,10 +264,10 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
206
264
|
renderers_after.append(r)
|
|
207
265
|
|
|
208
266
|
# Add hover tools
|
|
209
|
-
hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.
|
|
267
|
+
hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_before)
|
|
210
268
|
p1.add_tools(hover1)
|
|
211
269
|
|
|
212
|
-
hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.
|
|
270
|
+
hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_after)
|
|
213
271
|
p2.add_tools(hover2)
|
|
214
272
|
|
|
215
273
|
# Create layout with both plots side by side
|
|
@@ -232,13 +290,13 @@ def plot_consensus_2d(
|
|
|
232
290
|
self,
|
|
233
291
|
filename=None,
|
|
234
292
|
colorby="number_samples",
|
|
293
|
+
cmap=None,
|
|
294
|
+
markersize=4,
|
|
235
295
|
sizeby="inty_mean",
|
|
236
|
-
|
|
237
|
-
size="dynamic",
|
|
296
|
+
scaling="dynamic",
|
|
238
297
|
alpha=0.7,
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
height=900,
|
|
298
|
+
width=600,
|
|
299
|
+
height=450,
|
|
242
300
|
mz_range=None,
|
|
243
301
|
rt_range=None,
|
|
244
302
|
):
|
|
@@ -317,7 +375,7 @@ def plot_consensus_2d(
|
|
|
317
375
|
])
|
|
318
376
|
|
|
319
377
|
if cmap is None:
|
|
320
|
-
cmap = "
|
|
378
|
+
cmap = "viridis"
|
|
321
379
|
elif cmap == "grey":
|
|
322
380
|
cmap = "Greys256"
|
|
323
381
|
|
|
@@ -334,12 +392,49 @@ def plot_consensus_2d(
|
|
|
334
392
|
except ImportError:
|
|
335
393
|
from bokeh.models.annotations import ColorBar
|
|
336
394
|
from bokeh.palettes import viridis
|
|
395
|
+
|
|
396
|
+
# Import cmap for colormap handling
|
|
397
|
+
from cmap import Colormap
|
|
337
398
|
|
|
338
399
|
# Convert Polars DataFrame to pandas for Bokeh compatibility
|
|
339
400
|
data_pd = data.to_pandas()
|
|
340
401
|
source = ColumnDataSource(data_pd)
|
|
402
|
+
|
|
403
|
+
# Handle colormap using cmap.Colormap
|
|
404
|
+
try:
|
|
405
|
+
# Get colormap palette using cmap
|
|
406
|
+
if isinstance(cmap, str):
|
|
407
|
+
colormap = Colormap(cmap)
|
|
408
|
+
# Generate 256 colors and convert to hex
|
|
409
|
+
import numpy as np
|
|
410
|
+
import matplotlib.colors as mcolors
|
|
411
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
412
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
413
|
+
else:
|
|
414
|
+
colormap = cmap
|
|
415
|
+
# Try to use to_bokeh() method first
|
|
416
|
+
try:
|
|
417
|
+
palette = colormap.to_bokeh()
|
|
418
|
+
# Ensure we got a color palette, not another mapper
|
|
419
|
+
if not isinstance(palette, (list, tuple)):
|
|
420
|
+
# Fall back to generating colors manually
|
|
421
|
+
import numpy as np
|
|
422
|
+
import matplotlib.colors as mcolors
|
|
423
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
424
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
425
|
+
except AttributeError:
|
|
426
|
+
# Fall back to generating colors manually
|
|
427
|
+
import numpy as np
|
|
428
|
+
import matplotlib.colors as mcolors
|
|
429
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
430
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
431
|
+
except (AttributeError, ValueError, TypeError) as e:
|
|
432
|
+
# Fallback to viridis if cmap interpretation fails
|
|
433
|
+
self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
|
|
434
|
+
palette = viridis(256)
|
|
435
|
+
|
|
341
436
|
color_mapper = LinearColorMapper(
|
|
342
|
-
palette=
|
|
437
|
+
palette=palette,
|
|
343
438
|
low=data[colorby].min(),
|
|
344
439
|
high=data[colorby].max(),
|
|
345
440
|
)
|
|
@@ -352,11 +447,11 @@ def plot_consensus_2d(
|
|
|
352
447
|
p.xaxis.axis_label = "Retention Time (min)"
|
|
353
448
|
p.yaxis.axis_label = "m/z"
|
|
354
449
|
scatter_renderer: Any = None
|
|
355
|
-
if
|
|
450
|
+
if scaling.lower() in ["dyn", "dynamic"]:
|
|
356
451
|
scatter_renderer = p.circle(
|
|
357
452
|
x="rt",
|
|
358
453
|
y="mz",
|
|
359
|
-
radius=markersize
|
|
454
|
+
radius=markersize,
|
|
360
455
|
fill_color={"field": colorby, "transform": color_mapper},
|
|
361
456
|
line_color=None,
|
|
362
457
|
alpha=alpha,
|
|
@@ -414,7 +509,6 @@ def plot_samples_2d(
|
|
|
414
509
|
size="dynamic",
|
|
415
510
|
alpha_max=0.8,
|
|
416
511
|
alpha="inty",
|
|
417
|
-
cmap="Turbo256",
|
|
418
512
|
max_features=50000,
|
|
419
513
|
width=600,
|
|
420
514
|
height=600,
|
|
@@ -447,7 +541,6 @@ def plot_samples_2d(
|
|
|
447
541
|
from bokeh.plotting import figure, show, output_file
|
|
448
542
|
from bokeh.io.export import export_png
|
|
449
543
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
450
|
-
from bokeh.palettes import Turbo256
|
|
451
544
|
|
|
452
545
|
sample_uids = self._get_sample_uids(samples)
|
|
453
546
|
|
|
@@ -455,8 +548,14 @@ def plot_samples_2d(
|
|
|
455
548
|
self.logger.error("No valid sample_uids provided.")
|
|
456
549
|
return
|
|
457
550
|
|
|
458
|
-
colors
|
|
459
|
-
|
|
551
|
+
# Get sample colors from samples_df
|
|
552
|
+
sample_colors = (
|
|
553
|
+
self.samples_df
|
|
554
|
+
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
555
|
+
.select(["sample_uid", "sample_color"])
|
|
556
|
+
.to_dict(as_series=False)
|
|
557
|
+
)
|
|
558
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
460
559
|
|
|
461
560
|
p = figure(
|
|
462
561
|
width=width,
|
|
@@ -569,6 +668,7 @@ def plot_samples_2d(
|
|
|
569
668
|
"inty": sample_data["inty"].values,
|
|
570
669
|
"alpha": sample_data["alpha"].values,
|
|
571
670
|
"sample": np.full(len(sample_data), sample_name, dtype=object),
|
|
671
|
+
"sample_color": np.full(len(sample_data), color_values[uid], dtype=object),
|
|
572
672
|
},
|
|
573
673
|
)
|
|
574
674
|
|
|
@@ -604,6 +704,7 @@ def plot_samples_2d(
|
|
|
604
704
|
hover = HoverTool(
|
|
605
705
|
tooltips=[
|
|
606
706
|
("sample", "@sample"),
|
|
707
|
+
("sample_color", "$color[swatch]:sample_color"),
|
|
607
708
|
("rt", "@rt{0.00}"),
|
|
608
709
|
("mz", "@mz{0.0000}"),
|
|
609
710
|
("intensity", "@inty{0.0e+0}"),
|
|
@@ -637,7 +738,6 @@ def plot_bpc(
|
|
|
637
738
|
filename: str | None = None,
|
|
638
739
|
width: int = 1000,
|
|
639
740
|
height: int = 300,
|
|
640
|
-
rt_unit: str = "s",
|
|
641
741
|
original: bool = False,
|
|
642
742
|
):
|
|
643
743
|
"""
|
|
@@ -653,7 +753,6 @@ def plot_bpc(
|
|
|
653
753
|
from bokeh.plotting import figure, show, output_file
|
|
654
754
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
655
755
|
from bokeh.io.export import export_png
|
|
656
|
-
from bokeh.palettes import Turbo256
|
|
657
756
|
from masster.study.helpers import get_bpc
|
|
658
757
|
|
|
659
758
|
sample_uids = self._get_sample_uids(samples)
|
|
@@ -664,10 +763,14 @@ def plot_bpc(
|
|
|
664
763
|
# Debug: show which sample_uids we will process
|
|
665
764
|
self.logger.debug(f"plot_bpc: sample_uids={sample_uids}")
|
|
666
765
|
|
|
667
|
-
colors
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
766
|
+
# Get sample colors from samples_df
|
|
767
|
+
sample_colors = (
|
|
768
|
+
self.samples_df
|
|
769
|
+
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
770
|
+
.select(["sample_uid", "sample_color"])
|
|
771
|
+
.to_dict(as_series=False)
|
|
772
|
+
)
|
|
773
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
671
774
|
|
|
672
775
|
# If plotting original (uncorrected) RTs, use the requested title.
|
|
673
776
|
if original:
|
|
@@ -675,6 +778,17 @@ def plot_bpc(
|
|
|
675
778
|
else:
|
|
676
779
|
plot_title = title or "Base Peak Chromatograms"
|
|
677
780
|
|
|
781
|
+
# Get rt_unit from the first chromatogram, default to "s" if not available
|
|
782
|
+
rt_unit = "s"
|
|
783
|
+
for uid in sample_uids:
|
|
784
|
+
try:
|
|
785
|
+
first_chrom = get_bpc(self, sample=uid, label=None, original=original)
|
|
786
|
+
if hasattr(first_chrom, 'rt_unit'):
|
|
787
|
+
rt_unit = first_chrom.rt_unit
|
|
788
|
+
break
|
|
789
|
+
except Exception:
|
|
790
|
+
continue
|
|
791
|
+
|
|
678
792
|
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
679
793
|
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
680
794
|
p.yaxis.axis_label = "Intensity"
|
|
@@ -691,7 +805,7 @@ def plot_bpc(
|
|
|
691
805
|
|
|
692
806
|
for uid in sample_uids:
|
|
693
807
|
try:
|
|
694
|
-
chrom = get_bpc(self, sample=uid,
|
|
808
|
+
chrom = get_bpc(self, sample=uid, label=None, original=original)
|
|
695
809
|
except Exception as e:
|
|
696
810
|
# log and skip samples we can't compute BPC for
|
|
697
811
|
self.logger.debug(f"Skipping sample {uid} for BPC: {e}")
|
|
@@ -743,7 +857,7 @@ def plot_bpc(
|
|
|
743
857
|
f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
|
|
744
858
|
)
|
|
745
859
|
|
|
746
|
-
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
860
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
|
|
747
861
|
src = ColumnDataSource(data)
|
|
748
862
|
|
|
749
863
|
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
@@ -754,7 +868,7 @@ def plot_bpc(
|
|
|
754
868
|
self.logger.warning("No BPC curves to plot for the selected samples.")
|
|
755
869
|
return
|
|
756
870
|
|
|
757
|
-
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.
|
|
871
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
|
|
758
872
|
p.add_tools(hover)
|
|
759
873
|
|
|
760
874
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
@@ -784,13 +898,12 @@ def plot_bpc(
|
|
|
784
898
|
def plot_eic(
|
|
785
899
|
self,
|
|
786
900
|
mz,
|
|
787
|
-
mz_tol=
|
|
901
|
+
mz_tol=None,
|
|
788
902
|
samples=None,
|
|
789
903
|
title: str | None = None,
|
|
790
904
|
filename: str | None = None,
|
|
791
905
|
width: int = 1000,
|
|
792
906
|
height: int = 300,
|
|
793
|
-
rt_unit: str = "s",
|
|
794
907
|
original: bool = False,
|
|
795
908
|
):
|
|
796
909
|
"""
|
|
@@ -799,14 +912,20 @@ def plot_eic(
|
|
|
799
912
|
Parameters mirror `plot_bpc` with additional `mz` and `mz_tol` arguments. The function
|
|
800
913
|
retrieves a Sample object for each sample UID, calls `sample.get_eic(mz, mz_tol)`, and
|
|
801
914
|
overlays the resulting chromatograms.
|
|
915
|
+
|
|
916
|
+
Args:
|
|
917
|
+
mz_tol: m/z tolerance in Da. If None, uses study.parameters.eic_mz_tol as default.
|
|
802
918
|
"""
|
|
803
919
|
# Local imports to avoid heavy top-level deps / circular imports
|
|
804
920
|
from bokeh.plotting import figure, show, output_file
|
|
805
921
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
806
922
|
from bokeh.io.export import export_png
|
|
807
|
-
from bokeh.palettes import Turbo256
|
|
808
923
|
from masster.study.helpers import get_eic
|
|
809
924
|
|
|
925
|
+
# Use study's eic_mz_tol parameter as default if not provided
|
|
926
|
+
if mz_tol is None:
|
|
927
|
+
mz_tol = self.parameters.eic_mz_tol
|
|
928
|
+
|
|
810
929
|
if mz is None:
|
|
811
930
|
self.logger.error("mz must be provided for EIC plotting")
|
|
812
931
|
return
|
|
@@ -816,13 +935,28 @@ def plot_eic(
|
|
|
816
935
|
self.logger.error("No valid sample_uids provided for EIC plotting.")
|
|
817
936
|
return
|
|
818
937
|
|
|
819
|
-
colors
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
938
|
+
# Get sample colors from samples_df
|
|
939
|
+
sample_colors = (
|
|
940
|
+
self.samples_df
|
|
941
|
+
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
942
|
+
.select(["sample_uid", "sample_color"])
|
|
943
|
+
.to_dict(as_series=False)
|
|
944
|
+
)
|
|
945
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
823
946
|
|
|
824
947
|
plot_title = title or f"Extracted Ion Chromatograms (m/z={mz:.4f} ± {mz_tol})"
|
|
825
948
|
|
|
949
|
+
# Get rt_unit from the first chromatogram, default to "s" if not available
|
|
950
|
+
rt_unit = "s"
|
|
951
|
+
for uid in sample_uids:
|
|
952
|
+
try:
|
|
953
|
+
first_chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
|
|
954
|
+
if hasattr(first_chrom, 'rt_unit'):
|
|
955
|
+
rt_unit = first_chrom.rt_unit
|
|
956
|
+
break
|
|
957
|
+
except Exception:
|
|
958
|
+
continue
|
|
959
|
+
|
|
826
960
|
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
827
961
|
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
828
962
|
p.yaxis.axis_label = "Intensity"
|
|
@@ -839,7 +973,7 @@ def plot_eic(
|
|
|
839
973
|
|
|
840
974
|
for uid in sample_uids:
|
|
841
975
|
try:
|
|
842
|
-
chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol,
|
|
976
|
+
chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
|
|
843
977
|
except Exception as e:
|
|
844
978
|
# log and skip samples we can't compute EIC for
|
|
845
979
|
self.logger.debug(f"Skipping sample {uid} for EIC: {e}")
|
|
@@ -885,7 +1019,7 @@ def plot_eic(
|
|
|
885
1019
|
|
|
886
1020
|
color = color_map.get(uid, "#000000")
|
|
887
1021
|
|
|
888
|
-
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
1022
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
|
|
889
1023
|
src = ColumnDataSource(data)
|
|
890
1024
|
|
|
891
1025
|
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
@@ -896,7 +1030,7 @@ def plot_eic(
|
|
|
896
1030
|
self.logger.warning("No EIC curves to plot for the selected samples.")
|
|
897
1031
|
return
|
|
898
1032
|
|
|
899
|
-
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.
|
|
1033
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e0}")], renderers=renderers)
|
|
900
1034
|
p.add_tools(hover)
|
|
901
1035
|
|
|
902
1036
|
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
@@ -928,7 +1062,6 @@ def plot_rt_correction(
|
|
|
928
1062
|
filename: str | None = None,
|
|
929
1063
|
width: int = 1000,
|
|
930
1064
|
height: int = 300,
|
|
931
|
-
rt_unit: str = "s",
|
|
932
1065
|
):
|
|
933
1066
|
"""
|
|
934
1067
|
Plot RT correction per sample: (rt - rt_original) vs rt overlayed for selected samples.
|
|
@@ -937,7 +1070,6 @@ def plot_rt_correction(
|
|
|
937
1070
|
"""
|
|
938
1071
|
from bokeh.plotting import figure, show, output_file
|
|
939
1072
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
940
|
-
from bokeh.palettes import Turbo256
|
|
941
1073
|
import numpy as _np
|
|
942
1074
|
|
|
943
1075
|
# Validate features dataframe
|
|
@@ -954,11 +1086,17 @@ def plot_rt_correction(
|
|
|
954
1086
|
self.logger.error("No valid sample_uids provided for RT correction plotting.")
|
|
955
1087
|
return
|
|
956
1088
|
|
|
957
|
-
#
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
1089
|
+
# Get sample colors from samples_df
|
|
1090
|
+
sample_colors = (
|
|
1091
|
+
self.samples_df
|
|
1092
|
+
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1093
|
+
.select(["sample_uid", "sample_color"])
|
|
1094
|
+
.to_dict(as_series=False)
|
|
1095
|
+
)
|
|
1096
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
1097
|
+
|
|
1098
|
+
# For RT correction plots, default to "s" since we're working with features_df directly
|
|
1099
|
+
rt_unit = "s"
|
|
962
1100
|
|
|
963
1101
|
p = figure(width=width, height=height, title=title or "RT correction", tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
964
1102
|
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
@@ -1026,7 +1164,7 @@ def plot_rt_correction(
|
|
|
1026
1164
|
|
|
1027
1165
|
color = color_map.get(uid, "#000000")
|
|
1028
1166
|
|
|
1029
|
-
data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt)}
|
|
1167
|
+
data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
|
|
1030
1168
|
src = ColumnDataSource(data)
|
|
1031
1169
|
|
|
1032
1170
|
r_line = p.line("rt", "delta", source=src, line_width=1, color=color)
|
|
@@ -1037,7 +1175,7 @@ def plot_rt_correction(
|
|
|
1037
1175
|
self.logger.warning("No RT correction curves to plot for the selected samples.")
|
|
1038
1176
|
return
|
|
1039
1177
|
|
|
1040
|
-
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
|
|
1178
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
|
|
1041
1179
|
p.add_tools(hover)
|
|
1042
1180
|
|
|
1043
1181
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
@@ -1083,15 +1221,17 @@ def plot_chrom(
|
|
|
1083
1221
|
self.logger.error("No chromatogram data found.")
|
|
1084
1222
|
return
|
|
1085
1223
|
|
|
1086
|
-
#
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
# Assign a fixed color to each sample/column
|
|
1224
|
+
# Get sample colors for alignment plots
|
|
1225
|
+
# Need to map sample names to colors since chromatogram data uses sample names as columns
|
|
1090
1226
|
sample_names = [col for col in chroms.columns if col not in ["consensus_uid"]]
|
|
1091
1227
|
if not sample_names:
|
|
1092
1228
|
self.logger.error("No sample names found in chromatogram data.")
|
|
1093
1229
|
return
|
|
1094
|
-
|
|
1230
|
+
|
|
1231
|
+
# Create color mapping by getting sample_color for each sample_name
|
|
1232
|
+
samples_info = self.samples_df.select(["sample_name", "sample_color"]).to_dict(as_series=False)
|
|
1233
|
+
sample_name_to_color = dict(zip(samples_info["sample_name"], samples_info["sample_color"]))
|
|
1234
|
+
color_map = {name: sample_name_to_color.get(name, "#1f77b4") for name in sample_names} # fallback to blue
|
|
1095
1235
|
|
|
1096
1236
|
plots = []
|
|
1097
1237
|
self.logger.info(f"Plotting {chroms.shape[0]} chromatograms...")
|
|
@@ -1461,8 +1601,8 @@ def plot_consensus_stats(
|
|
|
1461
1601
|
def plot_pca(
|
|
1462
1602
|
self,
|
|
1463
1603
|
filename=None,
|
|
1464
|
-
width=
|
|
1465
|
-
height=
|
|
1604
|
+
width=500,
|
|
1605
|
+
height=450,
|
|
1466
1606
|
alpha=0.8,
|
|
1467
1607
|
markersize=6,
|
|
1468
1608
|
n_components=2,
|
|
@@ -1484,7 +1624,7 @@ def plot_pca(
|
|
|
1484
1624
|
"""
|
|
1485
1625
|
from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
|
|
1486
1626
|
from bokeh.plotting import figure, show, output_file
|
|
1487
|
-
from bokeh.palettes import Category20, viridis
|
|
1627
|
+
from bokeh.palettes import Category20, viridis
|
|
1488
1628
|
from bokeh.transform import factor_cmap
|
|
1489
1629
|
from sklearn.decomposition import PCA
|
|
1490
1630
|
from sklearn.preprocessing import StandardScaler
|
|
@@ -1507,7 +1647,7 @@ def plot_pca(
|
|
|
1507
1647
|
self.logger.error("No samples dataframe available.")
|
|
1508
1648
|
return
|
|
1509
1649
|
|
|
1510
|
-
self.logger.
|
|
1650
|
+
self.logger.debug(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
|
|
1511
1651
|
|
|
1512
1652
|
# Convert consensus matrix to numpy if it's not already
|
|
1513
1653
|
if hasattr(consensus_matrix, "values"):
|
|
@@ -1534,7 +1674,7 @@ def plot_pca(
|
|
|
1534
1674
|
# Get explained variance ratios
|
|
1535
1675
|
explained_var = pca.explained_variance_ratio_
|
|
1536
1676
|
|
|
1537
|
-
self.logger.
|
|
1677
|
+
self.logger.debug(f"PCA explained variance ratios: {explained_var}")
|
|
1538
1678
|
|
|
1539
1679
|
# Convert samples_df to pandas for easier manipulation
|
|
1540
1680
|
samples_pd = samples_df.to_pandas()
|
|
@@ -1619,15 +1759,31 @@ def plot_pca(
|
|
|
1619
1759
|
legend_field=color_by,
|
|
1620
1760
|
)
|
|
1621
1761
|
else:
|
|
1622
|
-
# If no color_by provided,
|
|
1762
|
+
# If no color_by provided, use sample_color column from samples_df
|
|
1623
1763
|
if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
|
|
1624
1764
|
# Choose the identifier to map colors by
|
|
1625
1765
|
id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
|
|
1626
|
-
|
|
1627
|
-
colors
|
|
1628
|
-
|
|
1766
|
+
|
|
1767
|
+
# Get colors from samples_df based on the identifier
|
|
1768
|
+
if id_col == "sample_uid":
|
|
1769
|
+
sample_colors = (
|
|
1770
|
+
self.samples_df
|
|
1771
|
+
.filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
|
|
1772
|
+
.select(["sample_uid", "sample_color"])
|
|
1773
|
+
.to_dict(as_series=False)
|
|
1774
|
+
)
|
|
1775
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
1776
|
+
else: # sample_name
|
|
1777
|
+
sample_colors = (
|
|
1778
|
+
self.samples_df
|
|
1779
|
+
.filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
|
|
1780
|
+
.select(["sample_name", "sample_color"])
|
|
1781
|
+
.to_dict(as_series=False)
|
|
1782
|
+
)
|
|
1783
|
+
color_map = dict(zip(sample_colors["sample_name"], sample_colors["sample_color"]))
|
|
1784
|
+
|
|
1629
1785
|
# Map colors into dataframe
|
|
1630
|
-
pca_df["color"] = [color_map
|
|
1786
|
+
pca_df["color"] = [color_map.get(x, "#1f77b4") for x in pca_df[id_col]] # fallback to blue
|
|
1631
1787
|
# Update the ColumnDataSource with new color column
|
|
1632
1788
|
source = ColumnDataSource(pca_df)
|
|
1633
1789
|
scatter = p.scatter(
|
|
@@ -1652,14 +1808,17 @@ def plot_pca(
|
|
|
1652
1808
|
tooltip_list = []
|
|
1653
1809
|
|
|
1654
1810
|
# Columns to exclude from tooltips (file paths and internal/plot fields)
|
|
1655
|
-
excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2"}
|
|
1811
|
+
excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2", "size"}
|
|
1656
1812
|
|
|
1657
1813
|
# Add all sample dataframe columns to tooltips, skipping excluded ones
|
|
1658
1814
|
for col in samples_pd.columns:
|
|
1659
1815
|
if col in excluded_cols:
|
|
1660
1816
|
continue
|
|
1661
1817
|
if col in pca_df.columns:
|
|
1662
|
-
if
|
|
1818
|
+
if col == "sample_color":
|
|
1819
|
+
# Display sample_color as a colored swatch
|
|
1820
|
+
tooltip_list.append(('color', "$color[swatch]:sample_color"))
|
|
1821
|
+
elif pca_df[col].dtype in ["float64", "float32"]:
|
|
1663
1822
|
tooltip_list.append((col, f"@{col}{{0.00}}"))
|
|
1664
1823
|
else:
|
|
1665
1824
|
tooltip_list.append((col, f"@{col}"))
|
|
@@ -1691,7 +1850,6 @@ def plot_tic(
|
|
|
1691
1850
|
filename: str | None = None,
|
|
1692
1851
|
width: int = 1000,
|
|
1693
1852
|
height: int = 300,
|
|
1694
|
-
rt_unit: str = "s",
|
|
1695
1853
|
original: bool = False,
|
|
1696
1854
|
):
|
|
1697
1855
|
"""
|
|
@@ -1703,7 +1861,6 @@ def plot_tic(
|
|
|
1703
1861
|
from bokeh.plotting import figure, show, output_file
|
|
1704
1862
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
1705
1863
|
from bokeh.io.export import export_png
|
|
1706
|
-
from bokeh.palettes import Turbo256
|
|
1707
1864
|
from masster.study.helpers import get_tic
|
|
1708
1865
|
|
|
1709
1866
|
sample_uids = self._get_sample_uids(samples)
|
|
@@ -1711,13 +1868,28 @@ def plot_tic(
|
|
|
1711
1868
|
self.logger.error("No valid sample_uids provided for TIC plotting.")
|
|
1712
1869
|
return
|
|
1713
1870
|
|
|
1714
|
-
colors
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1871
|
+
# Get sample colors from samples_df
|
|
1872
|
+
sample_colors = (
|
|
1873
|
+
self.samples_df
|
|
1874
|
+
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1875
|
+
.select(["sample_uid", "sample_color"])
|
|
1876
|
+
.to_dict(as_series=False)
|
|
1877
|
+
)
|
|
1878
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
1718
1879
|
|
|
1719
1880
|
plot_title = title or "Total Ion Chromatograms"
|
|
1720
1881
|
|
|
1882
|
+
# Get rt_unit from the first chromatogram, default to "s" if not available
|
|
1883
|
+
rt_unit = "s"
|
|
1884
|
+
for uid in sample_uids:
|
|
1885
|
+
try:
|
|
1886
|
+
first_chrom = get_tic(self, sample=uid, label=None)
|
|
1887
|
+
if hasattr(first_chrom, 'rt_unit'):
|
|
1888
|
+
rt_unit = first_chrom.rt_unit
|
|
1889
|
+
break
|
|
1890
|
+
except Exception:
|
|
1891
|
+
continue
|
|
1892
|
+
|
|
1721
1893
|
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
1722
1894
|
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
1723
1895
|
p.yaxis.axis_label = "Intensity"
|
|
@@ -1778,7 +1950,7 @@ def plot_tic(
|
|
|
1778
1950
|
|
|
1779
1951
|
color = color_map.get(uid, "#000000")
|
|
1780
1952
|
|
|
1781
|
-
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
1953
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
|
|
1782
1954
|
src = ColumnDataSource(data)
|
|
1783
1955
|
|
|
1784
1956
|
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
@@ -1789,7 +1961,7 @@ def plot_tic(
|
|
|
1789
1961
|
self.logger.warning("No TIC curves to plot for the selected samples.")
|
|
1790
1962
|
return
|
|
1791
1963
|
|
|
1792
|
-
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.
|
|
1964
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
|
|
1793
1965
|
p.add_tools(hover)
|
|
1794
1966
|
|
|
1795
1967
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|