masster 0.3.12__py3-none-any.whl → 0.3.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/study/helpers.py +492 -2
- masster/study/load.py +35 -7
- masster/study/plot.py +261 -96
- masster/study/processing.py +9 -0
- masster/study/study.py +8 -25
- {masster-0.3.12.dist-info → masster-0.3.14.dist-info}/METADATA +2 -1
- {masster-0.3.12.dist-info → masster-0.3.14.dist-info}/RECORD +11 -15
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +0 -199787
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
- masster/docs/SCX_API_Documentation.md +0 -0
- masster/docs/SCX_DLL_Analysis.md +0 -0
- {masster-0.3.12.dist-info → masster-0.3.14.dist-info}/WHEEL +0 -0
- {masster-0.3.12.dist-info → masster-0.3.14.dist-info}/entry_points.txt +0 -0
- {masster-0.3.12.dist-info → masster-0.3.14.dist-info}/licenses/LICENSE +0 -0
masster/study/plot.py
CHANGED
|
@@ -37,7 +37,6 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
37
37
|
# Local imports so the module can be used even if bokeh isn't needed elsewhere
|
|
38
38
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
39
39
|
from bokeh.plotting import figure, show, output_file
|
|
40
|
-
from bokeh.palettes import Turbo256
|
|
41
40
|
import pandas as pd
|
|
42
41
|
|
|
43
42
|
# Build the before/after tabular data used for plotting
|
|
@@ -136,34 +135,99 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
136
135
|
self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
|
|
137
136
|
return
|
|
138
137
|
|
|
139
|
-
|
|
138
|
+
# Use Polars instead of pandas
|
|
139
|
+
features_df = self.features_df
|
|
140
140
|
|
|
141
|
-
sample_col = 'sample_uid' if 'sample_uid' in
|
|
142
|
-
if sample_col not in
|
|
141
|
+
sample_col = 'sample_uid' if 'sample_uid' in features_df.columns else 'sample_name'
|
|
142
|
+
if sample_col not in features_df.columns:
|
|
143
143
|
self.logger.error("No sample identifier column found in features_df.")
|
|
144
144
|
return
|
|
145
145
|
|
|
146
|
-
samples
|
|
146
|
+
# Get unique samples using Polars
|
|
147
|
+
samples = features_df.select(pl.col(sample_col)).unique().to_series().to_list()
|
|
147
148
|
|
|
148
149
|
for sample_idx, sample in enumerate(samples):
|
|
149
|
-
|
|
150
|
-
|
|
150
|
+
# Filter sample data using Polars
|
|
151
|
+
sample_data = features_df.filter(pl.col(sample_col) == sample)
|
|
152
|
+
|
|
153
|
+
# Calculate max intensity using Polars
|
|
154
|
+
max_inty = sample_data.select(pl.col('inty').max()).item()
|
|
155
|
+
max_inty = max_inty if max_inty and max_inty > 0 else 1
|
|
156
|
+
|
|
151
157
|
sample_name = str(sample)
|
|
152
|
-
sample_uid
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
158
|
+
# Get sample_uid - if sample_col is 'sample_uid', use sample directly
|
|
159
|
+
if sample_col == 'sample_uid':
|
|
160
|
+
sample_uid = sample
|
|
161
|
+
else:
|
|
162
|
+
# Try to get sample_uid from the first row if it exists
|
|
163
|
+
if 'sample_uid' in sample_data.columns:
|
|
164
|
+
sample_uid = sample_data.select(pl.col('sample_uid')).item()
|
|
165
|
+
else:
|
|
166
|
+
sample_uid = sample
|
|
167
|
+
|
|
168
|
+
# Convert to dict for iteration - more efficient than row-by-row processing
|
|
169
|
+
sample_dict = sample_data.select(['rt_original', 'rt', 'mz', 'inty']).to_dicts()
|
|
170
|
+
|
|
171
|
+
for row_dict in sample_dict:
|
|
172
|
+
rt_original = row_dict['rt_original']
|
|
173
|
+
rt_current = row_dict['rt']
|
|
174
|
+
mz = row_dict['mz']
|
|
175
|
+
inty = row_dict['inty']
|
|
176
|
+
alpha = inty / max_inty
|
|
177
|
+
size = markersize + 2 if sample_idx == 0 else markersize
|
|
178
|
+
|
|
179
|
+
before_data.append({
|
|
180
|
+
'rt': rt_original, 'mz': mz, 'inty': inty, 'alpha': alpha,
|
|
181
|
+
'sample_idx': sample_idx, 'sample_name': sample_name,
|
|
182
|
+
'sample_uid': sample_uid, 'size': size
|
|
183
|
+
})
|
|
184
|
+
after_data.append({
|
|
185
|
+
'rt': rt_current, 'mz': mz, 'inty': inty, 'alpha': alpha,
|
|
186
|
+
'sample_idx': sample_idx, 'sample_name': sample_name,
|
|
187
|
+
'sample_uid': sample_uid, 'size': size
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
# Get sample colors from samples_df using sample indices
|
|
191
|
+
# Extract unique sample information from the dictionaries we created
|
|
192
|
+
if before_data:
|
|
193
|
+
# Create mapping from sample_idx to sample_uid more efficiently
|
|
194
|
+
sample_idx_to_uid = {}
|
|
195
|
+
for item in before_data:
|
|
196
|
+
if item['sample_idx'] not in sample_idx_to_uid:
|
|
197
|
+
sample_idx_to_uid[item['sample_idx']] = item['sample_uid']
|
|
198
|
+
else:
|
|
199
|
+
sample_idx_to_uid = {}
|
|
200
|
+
|
|
201
|
+
# Get colors from samples_df
|
|
202
|
+
sample_uids_list = list(sample_idx_to_uid.values())
|
|
203
|
+
if sample_uids_list and hasattr(self, 'samples_df') and self.samples_df is not None:
|
|
204
|
+
sample_colors = (
|
|
205
|
+
self.samples_df
|
|
206
|
+
.filter(pl.col("sample_uid").is_in(sample_uids_list))
|
|
207
|
+
.select(["sample_uid", "sample_color"])
|
|
208
|
+
.to_dict(as_series=False)
|
|
209
|
+
)
|
|
210
|
+
uid_to_color = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
211
|
+
else:
|
|
212
|
+
uid_to_color = {}
|
|
164
213
|
|
|
165
|
-
|
|
166
|
-
|
|
214
|
+
# Create color map for sample indices
|
|
215
|
+
color_map: dict[int, str] = {}
|
|
216
|
+
for sample_idx, sample_uid in sample_idx_to_uid.items():
|
|
217
|
+
color_map[sample_idx] = uid_to_color.get(sample_uid, "#1f77b4") # fallback to blue
|
|
218
|
+
|
|
219
|
+
# Add sample_color to data dictionaries before creating DataFrames
|
|
220
|
+
if before_data:
|
|
221
|
+
for item in before_data:
|
|
222
|
+
item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
|
|
223
|
+
|
|
224
|
+
if after_data:
|
|
225
|
+
for item in after_data:
|
|
226
|
+
item['sample_color'] = color_map.get(item['sample_idx'], '#1f77b4')
|
|
227
|
+
|
|
228
|
+
# Now create DataFrames with the sample_color already included
|
|
229
|
+
before_df = pd.DataFrame(before_data) if before_data else pd.DataFrame()
|
|
230
|
+
after_df = pd.DataFrame(after_data) if after_data else pd.DataFrame()
|
|
167
231
|
|
|
168
232
|
# Create Bokeh figures
|
|
169
233
|
p1 = figure(width=width, height=height, title='Original RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save')
|
|
@@ -177,15 +241,9 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
177
241
|
p2.background_fill_color = 'white'
|
|
178
242
|
p2.border_fill_color = 'white'
|
|
179
243
|
p2.min_border = 0
|
|
180
|
-
|
|
181
|
-
#
|
|
182
|
-
unique_samples = sorted(list(set(
|
|
183
|
-
colors = Turbo256
|
|
184
|
-
color_map: dict[int, str] = {}
|
|
185
|
-
n = max(1, len(unique_samples))
|
|
186
|
-
step = max(1, 256 // n)
|
|
187
|
-
for i, sample_idx in enumerate(unique_samples):
|
|
188
|
-
color_map[sample_idx] = colors[(i * step) % 256]
|
|
244
|
+
|
|
245
|
+
# Get unique sample indices for iteration
|
|
246
|
+
unique_samples = sorted(list(set(item['sample_idx'] for item in before_data))) if before_data else []
|
|
189
247
|
|
|
190
248
|
renderers_before = []
|
|
191
249
|
renderers_after = []
|
|
@@ -206,10 +264,10 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
206
264
|
renderers_after.append(r)
|
|
207
265
|
|
|
208
266
|
# Add hover tools
|
|
209
|
-
hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.
|
|
267
|
+
hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_before)
|
|
210
268
|
p1.add_tools(hover1)
|
|
211
269
|
|
|
212
|
-
hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.
|
|
270
|
+
hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('Sample Color', '$color[swatch]:sample_color'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e0}')], renderers=renderers_after)
|
|
213
271
|
p2.add_tools(hover2)
|
|
214
272
|
|
|
215
273
|
# Create layout with both plots side by side
|
|
@@ -232,13 +290,13 @@ def plot_consensus_2d(
|
|
|
232
290
|
self,
|
|
233
291
|
filename=None,
|
|
234
292
|
colorby="number_samples",
|
|
293
|
+
cmap=None,
|
|
294
|
+
markersize=4,
|
|
235
295
|
sizeby="inty_mean",
|
|
236
|
-
|
|
237
|
-
size="dynamic",
|
|
296
|
+
scaling="dynamic",
|
|
238
297
|
alpha=0.7,
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
height=900,
|
|
298
|
+
width=600,
|
|
299
|
+
height=450,
|
|
242
300
|
mz_range=None,
|
|
243
301
|
rt_range=None,
|
|
244
302
|
):
|
|
@@ -334,12 +392,49 @@ def plot_consensus_2d(
|
|
|
334
392
|
except ImportError:
|
|
335
393
|
from bokeh.models.annotations import ColorBar
|
|
336
394
|
from bokeh.palettes import viridis
|
|
395
|
+
|
|
396
|
+
# Import cmap for colormap handling
|
|
397
|
+
from cmap import Colormap
|
|
337
398
|
|
|
338
399
|
# Convert Polars DataFrame to pandas for Bokeh compatibility
|
|
339
400
|
data_pd = data.to_pandas()
|
|
340
401
|
source = ColumnDataSource(data_pd)
|
|
402
|
+
|
|
403
|
+
# Handle colormap using cmap.Colormap
|
|
404
|
+
try:
|
|
405
|
+
# Get colormap palette using cmap
|
|
406
|
+
if isinstance(cmap, str):
|
|
407
|
+
colormap = Colormap(cmap)
|
|
408
|
+
# Generate 256 colors and convert to hex
|
|
409
|
+
import numpy as np
|
|
410
|
+
import matplotlib.colors as mcolors
|
|
411
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
412
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
413
|
+
else:
|
|
414
|
+
colormap = cmap
|
|
415
|
+
# Try to use to_bokeh() method first
|
|
416
|
+
try:
|
|
417
|
+
palette = colormap.to_bokeh()
|
|
418
|
+
# Ensure we got a color palette, not another mapper
|
|
419
|
+
if not isinstance(palette, (list, tuple)):
|
|
420
|
+
# Fall back to generating colors manually
|
|
421
|
+
import numpy as np
|
|
422
|
+
import matplotlib.colors as mcolors
|
|
423
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
424
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
425
|
+
except AttributeError:
|
|
426
|
+
# Fall back to generating colors manually
|
|
427
|
+
import numpy as np
|
|
428
|
+
import matplotlib.colors as mcolors
|
|
429
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
430
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
431
|
+
except (AttributeError, ValueError, TypeError) as e:
|
|
432
|
+
# Fallback to viridis if cmap interpretation fails
|
|
433
|
+
self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
|
|
434
|
+
palette = viridis(256)
|
|
435
|
+
|
|
341
436
|
color_mapper = LinearColorMapper(
|
|
342
|
-
palette=
|
|
437
|
+
palette=palette,
|
|
343
438
|
low=data[colorby].min(),
|
|
344
439
|
high=data[colorby].max(),
|
|
345
440
|
)
|
|
@@ -352,11 +447,11 @@ def plot_consensus_2d(
|
|
|
352
447
|
p.xaxis.axis_label = "Retention Time (min)"
|
|
353
448
|
p.yaxis.axis_label = "m/z"
|
|
354
449
|
scatter_renderer: Any = None
|
|
355
|
-
if
|
|
450
|
+
if scaling.lower() in ["dyn", "dynamic"]:
|
|
356
451
|
scatter_renderer = p.circle(
|
|
357
452
|
x="rt",
|
|
358
453
|
y="mz",
|
|
359
|
-
radius=markersize
|
|
454
|
+
radius=markersize,
|
|
360
455
|
fill_color={"field": colorby, "transform": color_mapper},
|
|
361
456
|
line_color=None,
|
|
362
457
|
alpha=alpha,
|
|
@@ -414,7 +509,6 @@ def plot_samples_2d(
|
|
|
414
509
|
size="dynamic",
|
|
415
510
|
alpha_max=0.8,
|
|
416
511
|
alpha="inty",
|
|
417
|
-
cmap="Turbo256",
|
|
418
512
|
max_features=50000,
|
|
419
513
|
width=600,
|
|
420
514
|
height=600,
|
|
@@ -447,7 +541,6 @@ def plot_samples_2d(
|
|
|
447
541
|
from bokeh.plotting import figure, show, output_file
|
|
448
542
|
from bokeh.io.export import export_png
|
|
449
543
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
450
|
-
from bokeh.palettes import Turbo256
|
|
451
544
|
|
|
452
545
|
sample_uids = self._get_sample_uids(samples)
|
|
453
546
|
|
|
@@ -455,8 +548,14 @@ def plot_samples_2d(
|
|
|
455
548
|
self.logger.error("No valid sample_uids provided.")
|
|
456
549
|
return
|
|
457
550
|
|
|
458
|
-
colors
|
|
459
|
-
|
|
551
|
+
# Get sample colors from samples_df
|
|
552
|
+
sample_colors = (
|
|
553
|
+
self.samples_df
|
|
554
|
+
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
555
|
+
.select(["sample_uid", "sample_color"])
|
|
556
|
+
.to_dict(as_series=False)
|
|
557
|
+
)
|
|
558
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
460
559
|
|
|
461
560
|
p = figure(
|
|
462
561
|
width=width,
|
|
@@ -569,6 +668,7 @@ def plot_samples_2d(
|
|
|
569
668
|
"inty": sample_data["inty"].values,
|
|
570
669
|
"alpha": sample_data["alpha"].values,
|
|
571
670
|
"sample": np.full(len(sample_data), sample_name, dtype=object),
|
|
671
|
+
"sample_color": np.full(len(sample_data), color_values[uid], dtype=object),
|
|
572
672
|
},
|
|
573
673
|
)
|
|
574
674
|
|
|
@@ -604,6 +704,7 @@ def plot_samples_2d(
|
|
|
604
704
|
hover = HoverTool(
|
|
605
705
|
tooltips=[
|
|
606
706
|
("sample", "@sample"),
|
|
707
|
+
("sample_color", "$color[swatch]:sample_color"),
|
|
607
708
|
("rt", "@rt{0.00}"),
|
|
608
709
|
("mz", "@mz{0.0000}"),
|
|
609
710
|
("intensity", "@inty{0.0e+0}"),
|
|
@@ -637,7 +738,6 @@ def plot_bpc(
|
|
|
637
738
|
filename: str | None = None,
|
|
638
739
|
width: int = 1000,
|
|
639
740
|
height: int = 300,
|
|
640
|
-
rt_unit: str = "s",
|
|
641
741
|
original: bool = False,
|
|
642
742
|
):
|
|
643
743
|
"""
|
|
@@ -653,7 +753,6 @@ def plot_bpc(
|
|
|
653
753
|
from bokeh.plotting import figure, show, output_file
|
|
654
754
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
655
755
|
from bokeh.io.export import export_png
|
|
656
|
-
from bokeh.palettes import Turbo256
|
|
657
756
|
from masster.study.helpers import get_bpc
|
|
658
757
|
|
|
659
758
|
sample_uids = self._get_sample_uids(samples)
|
|
@@ -664,10 +763,14 @@ def plot_bpc(
|
|
|
664
763
|
# Debug: show which sample_uids we will process
|
|
665
764
|
self.logger.debug(f"plot_bpc: sample_uids={sample_uids}")
|
|
666
765
|
|
|
667
|
-
colors
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
766
|
+
# Get sample colors from samples_df
|
|
767
|
+
sample_colors = (
|
|
768
|
+
self.samples_df
|
|
769
|
+
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
770
|
+
.select(["sample_uid", "sample_color"])
|
|
771
|
+
.to_dict(as_series=False)
|
|
772
|
+
)
|
|
773
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
671
774
|
|
|
672
775
|
# If plotting original (uncorrected) RTs, use the requested title.
|
|
673
776
|
if original:
|
|
@@ -675,6 +778,17 @@ def plot_bpc(
|
|
|
675
778
|
else:
|
|
676
779
|
plot_title = title or "Base Peak Chromatograms"
|
|
677
780
|
|
|
781
|
+
# Get rt_unit from the first chromatogram, default to "s" if not available
|
|
782
|
+
rt_unit = "s"
|
|
783
|
+
for uid in sample_uids:
|
|
784
|
+
try:
|
|
785
|
+
first_chrom = get_bpc(self, sample=uid, label=None, original=original)
|
|
786
|
+
if hasattr(first_chrom, 'rt_unit'):
|
|
787
|
+
rt_unit = first_chrom.rt_unit
|
|
788
|
+
break
|
|
789
|
+
except Exception:
|
|
790
|
+
continue
|
|
791
|
+
|
|
678
792
|
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
679
793
|
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
680
794
|
p.yaxis.axis_label = "Intensity"
|
|
@@ -691,7 +805,7 @@ def plot_bpc(
|
|
|
691
805
|
|
|
692
806
|
for uid in sample_uids:
|
|
693
807
|
try:
|
|
694
|
-
chrom = get_bpc(self, sample=uid,
|
|
808
|
+
chrom = get_bpc(self, sample=uid, label=None, original=original)
|
|
695
809
|
except Exception as e:
|
|
696
810
|
# log and skip samples we can't compute BPC for
|
|
697
811
|
self.logger.debug(f"Skipping sample {uid} for BPC: {e}")
|
|
@@ -743,7 +857,7 @@ def plot_bpc(
|
|
|
743
857
|
f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
|
|
744
858
|
)
|
|
745
859
|
|
|
746
|
-
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
860
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
|
|
747
861
|
src = ColumnDataSource(data)
|
|
748
862
|
|
|
749
863
|
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
@@ -754,7 +868,7 @@ def plot_bpc(
|
|
|
754
868
|
self.logger.warning("No BPC curves to plot for the selected samples.")
|
|
755
869
|
return
|
|
756
870
|
|
|
757
|
-
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.
|
|
871
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
|
|
758
872
|
p.add_tools(hover)
|
|
759
873
|
|
|
760
874
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
@@ -790,7 +904,6 @@ def plot_eic(
|
|
|
790
904
|
filename: str | None = None,
|
|
791
905
|
width: int = 1000,
|
|
792
906
|
height: int = 300,
|
|
793
|
-
rt_unit: str = "s",
|
|
794
907
|
original: bool = False,
|
|
795
908
|
):
|
|
796
909
|
"""
|
|
@@ -804,7 +917,6 @@ def plot_eic(
|
|
|
804
917
|
from bokeh.plotting import figure, show, output_file
|
|
805
918
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
806
919
|
from bokeh.io.export import export_png
|
|
807
|
-
from bokeh.palettes import Turbo256
|
|
808
920
|
from masster.study.helpers import get_eic
|
|
809
921
|
|
|
810
922
|
if mz is None:
|
|
@@ -816,13 +928,28 @@ def plot_eic(
|
|
|
816
928
|
self.logger.error("No valid sample_uids provided for EIC plotting.")
|
|
817
929
|
return
|
|
818
930
|
|
|
819
|
-
colors
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
931
|
+
# Get sample colors from samples_df
|
|
932
|
+
sample_colors = (
|
|
933
|
+
self.samples_df
|
|
934
|
+
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
935
|
+
.select(["sample_uid", "sample_color"])
|
|
936
|
+
.to_dict(as_series=False)
|
|
937
|
+
)
|
|
938
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
823
939
|
|
|
824
940
|
plot_title = title or f"Extracted Ion Chromatograms (m/z={mz:.4f} ± {mz_tol})"
|
|
825
941
|
|
|
942
|
+
# Get rt_unit from the first chromatogram, default to "s" if not available
|
|
943
|
+
rt_unit = "s"
|
|
944
|
+
for uid in sample_uids:
|
|
945
|
+
try:
|
|
946
|
+
first_chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
|
|
947
|
+
if hasattr(first_chrom, 'rt_unit'):
|
|
948
|
+
rt_unit = first_chrom.rt_unit
|
|
949
|
+
break
|
|
950
|
+
except Exception:
|
|
951
|
+
continue
|
|
952
|
+
|
|
826
953
|
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
827
954
|
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
828
955
|
p.yaxis.axis_label = "Intensity"
|
|
@@ -839,7 +966,7 @@ def plot_eic(
|
|
|
839
966
|
|
|
840
967
|
for uid in sample_uids:
|
|
841
968
|
try:
|
|
842
|
-
chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol,
|
|
969
|
+
chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
|
|
843
970
|
except Exception as e:
|
|
844
971
|
# log and skip samples we can't compute EIC for
|
|
845
972
|
self.logger.debug(f"Skipping sample {uid} for EIC: {e}")
|
|
@@ -885,7 +1012,7 @@ def plot_eic(
|
|
|
885
1012
|
|
|
886
1013
|
color = color_map.get(uid, "#000000")
|
|
887
1014
|
|
|
888
|
-
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
1015
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
|
|
889
1016
|
src = ColumnDataSource(data)
|
|
890
1017
|
|
|
891
1018
|
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
@@ -896,7 +1023,7 @@ def plot_eic(
|
|
|
896
1023
|
self.logger.warning("No EIC curves to plot for the selected samples.")
|
|
897
1024
|
return
|
|
898
1025
|
|
|
899
|
-
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.
|
|
1026
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e0}")], renderers=renderers)
|
|
900
1027
|
p.add_tools(hover)
|
|
901
1028
|
|
|
902
1029
|
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
@@ -928,7 +1055,6 @@ def plot_rt_correction(
|
|
|
928
1055
|
filename: str | None = None,
|
|
929
1056
|
width: int = 1000,
|
|
930
1057
|
height: int = 300,
|
|
931
|
-
rt_unit: str = "s",
|
|
932
1058
|
):
|
|
933
1059
|
"""
|
|
934
1060
|
Plot RT correction per sample: (rt - rt_original) vs rt overlayed for selected samples.
|
|
@@ -937,7 +1063,6 @@ def plot_rt_correction(
|
|
|
937
1063
|
"""
|
|
938
1064
|
from bokeh.plotting import figure, show, output_file
|
|
939
1065
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
940
|
-
from bokeh.palettes import Turbo256
|
|
941
1066
|
import numpy as _np
|
|
942
1067
|
|
|
943
1068
|
# Validate features dataframe
|
|
@@ -954,11 +1079,17 @@ def plot_rt_correction(
|
|
|
954
1079
|
self.logger.error("No valid sample_uids provided for RT correction plotting.")
|
|
955
1080
|
return
|
|
956
1081
|
|
|
957
|
-
#
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
1082
|
+
# Get sample colors from samples_df
|
|
1083
|
+
sample_colors = (
|
|
1084
|
+
self.samples_df
|
|
1085
|
+
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1086
|
+
.select(["sample_uid", "sample_color"])
|
|
1087
|
+
.to_dict(as_series=False)
|
|
1088
|
+
)
|
|
1089
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
1090
|
+
|
|
1091
|
+
# For RT correction plots, default to "s" since we're working with features_df directly
|
|
1092
|
+
rt_unit = "s"
|
|
962
1093
|
|
|
963
1094
|
p = figure(width=width, height=height, title=title or "RT correction", tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
964
1095
|
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
@@ -1026,7 +1157,7 @@ def plot_rt_correction(
|
|
|
1026
1157
|
|
|
1027
1158
|
color = color_map.get(uid, "#000000")
|
|
1028
1159
|
|
|
1029
|
-
data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt)}
|
|
1160
|
+
data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
|
|
1030
1161
|
src = ColumnDataSource(data)
|
|
1031
1162
|
|
|
1032
1163
|
r_line = p.line("rt", "delta", source=src, line_width=1, color=color)
|
|
@@ -1037,7 +1168,7 @@ def plot_rt_correction(
|
|
|
1037
1168
|
self.logger.warning("No RT correction curves to plot for the selected samples.")
|
|
1038
1169
|
return
|
|
1039
1170
|
|
|
1040
|
-
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
|
|
1171
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
|
|
1041
1172
|
p.add_tools(hover)
|
|
1042
1173
|
|
|
1043
1174
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
@@ -1083,15 +1214,17 @@ def plot_chrom(
|
|
|
1083
1214
|
self.logger.error("No chromatogram data found.")
|
|
1084
1215
|
return
|
|
1085
1216
|
|
|
1086
|
-
#
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
# Assign a fixed color to each sample/column
|
|
1217
|
+
# Get sample colors for alignment plots
|
|
1218
|
+
# Need to map sample names to colors since chromatogram data uses sample names as columns
|
|
1090
1219
|
sample_names = [col for col in chroms.columns if col not in ["consensus_uid"]]
|
|
1091
1220
|
if not sample_names:
|
|
1092
1221
|
self.logger.error("No sample names found in chromatogram data.")
|
|
1093
1222
|
return
|
|
1094
|
-
|
|
1223
|
+
|
|
1224
|
+
# Create color mapping by getting sample_color for each sample_name
|
|
1225
|
+
samples_info = self.samples_df.select(["sample_name", "sample_color"]).to_dict(as_series=False)
|
|
1226
|
+
sample_name_to_color = dict(zip(samples_info["sample_name"], samples_info["sample_color"]))
|
|
1227
|
+
color_map = {name: sample_name_to_color.get(name, "#1f77b4") for name in sample_names} # fallback to blue
|
|
1095
1228
|
|
|
1096
1229
|
plots = []
|
|
1097
1230
|
self.logger.info(f"Plotting {chroms.shape[0]} chromatograms...")
|
|
@@ -1461,8 +1594,8 @@ def plot_consensus_stats(
|
|
|
1461
1594
|
def plot_pca(
|
|
1462
1595
|
self,
|
|
1463
1596
|
filename=None,
|
|
1464
|
-
width=
|
|
1465
|
-
height=
|
|
1597
|
+
width=500,
|
|
1598
|
+
height=450,
|
|
1466
1599
|
alpha=0.8,
|
|
1467
1600
|
markersize=6,
|
|
1468
1601
|
n_components=2,
|
|
@@ -1484,7 +1617,7 @@ def plot_pca(
|
|
|
1484
1617
|
"""
|
|
1485
1618
|
from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
|
|
1486
1619
|
from bokeh.plotting import figure, show, output_file
|
|
1487
|
-
from bokeh.palettes import Category20, viridis
|
|
1620
|
+
from bokeh.palettes import Category20, viridis
|
|
1488
1621
|
from bokeh.transform import factor_cmap
|
|
1489
1622
|
from sklearn.decomposition import PCA
|
|
1490
1623
|
from sklearn.preprocessing import StandardScaler
|
|
@@ -1507,7 +1640,7 @@ def plot_pca(
|
|
|
1507
1640
|
self.logger.error("No samples dataframe available.")
|
|
1508
1641
|
return
|
|
1509
1642
|
|
|
1510
|
-
self.logger.
|
|
1643
|
+
self.logger.debug(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
|
|
1511
1644
|
|
|
1512
1645
|
# Convert consensus matrix to numpy if it's not already
|
|
1513
1646
|
if hasattr(consensus_matrix, "values"):
|
|
@@ -1534,7 +1667,7 @@ def plot_pca(
|
|
|
1534
1667
|
# Get explained variance ratios
|
|
1535
1668
|
explained_var = pca.explained_variance_ratio_
|
|
1536
1669
|
|
|
1537
|
-
self.logger.
|
|
1670
|
+
self.logger.debug(f"PCA explained variance ratios: {explained_var}")
|
|
1538
1671
|
|
|
1539
1672
|
# Convert samples_df to pandas for easier manipulation
|
|
1540
1673
|
samples_pd = samples_df.to_pandas()
|
|
@@ -1619,15 +1752,31 @@ def plot_pca(
|
|
|
1619
1752
|
legend_field=color_by,
|
|
1620
1753
|
)
|
|
1621
1754
|
else:
|
|
1622
|
-
# If no color_by provided,
|
|
1755
|
+
# If no color_by provided, use sample_color column from samples_df
|
|
1623
1756
|
if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
|
|
1624
1757
|
# Choose the identifier to map colors by
|
|
1625
1758
|
id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
|
|
1626
|
-
|
|
1627
|
-
colors
|
|
1628
|
-
|
|
1759
|
+
|
|
1760
|
+
# Get colors from samples_df based on the identifier
|
|
1761
|
+
if id_col == "sample_uid":
|
|
1762
|
+
sample_colors = (
|
|
1763
|
+
self.samples_df
|
|
1764
|
+
.filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
|
|
1765
|
+
.select(["sample_uid", "sample_color"])
|
|
1766
|
+
.to_dict(as_series=False)
|
|
1767
|
+
)
|
|
1768
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
1769
|
+
else: # sample_name
|
|
1770
|
+
sample_colors = (
|
|
1771
|
+
self.samples_df
|
|
1772
|
+
.filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
|
|
1773
|
+
.select(["sample_name", "sample_color"])
|
|
1774
|
+
.to_dict(as_series=False)
|
|
1775
|
+
)
|
|
1776
|
+
color_map = dict(zip(sample_colors["sample_name"], sample_colors["sample_color"]))
|
|
1777
|
+
|
|
1629
1778
|
# Map colors into dataframe
|
|
1630
|
-
pca_df["color"] = [color_map
|
|
1779
|
+
pca_df["color"] = [color_map.get(x, "#1f77b4") for x in pca_df[id_col]] # fallback to blue
|
|
1631
1780
|
# Update the ColumnDataSource with new color column
|
|
1632
1781
|
source = ColumnDataSource(pca_df)
|
|
1633
1782
|
scatter = p.scatter(
|
|
@@ -1652,14 +1801,17 @@ def plot_pca(
|
|
|
1652
1801
|
tooltip_list = []
|
|
1653
1802
|
|
|
1654
1803
|
# Columns to exclude from tooltips (file paths and internal/plot fields)
|
|
1655
|
-
excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2"}
|
|
1804
|
+
excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2", "size"}
|
|
1656
1805
|
|
|
1657
1806
|
# Add all sample dataframe columns to tooltips, skipping excluded ones
|
|
1658
1807
|
for col in samples_pd.columns:
|
|
1659
1808
|
if col in excluded_cols:
|
|
1660
1809
|
continue
|
|
1661
1810
|
if col in pca_df.columns:
|
|
1662
|
-
if
|
|
1811
|
+
if col == "sample_color":
|
|
1812
|
+
# Display sample_color as a colored swatch
|
|
1813
|
+
tooltip_list.append(('color', "$color[swatch]:sample_color"))
|
|
1814
|
+
elif pca_df[col].dtype in ["float64", "float32"]:
|
|
1663
1815
|
tooltip_list.append((col, f"@{col}{{0.00}}"))
|
|
1664
1816
|
else:
|
|
1665
1817
|
tooltip_list.append((col, f"@{col}"))
|
|
@@ -1691,7 +1843,6 @@ def plot_tic(
|
|
|
1691
1843
|
filename: str | None = None,
|
|
1692
1844
|
width: int = 1000,
|
|
1693
1845
|
height: int = 300,
|
|
1694
|
-
rt_unit: str = "s",
|
|
1695
1846
|
original: bool = False,
|
|
1696
1847
|
):
|
|
1697
1848
|
"""
|
|
@@ -1703,7 +1854,6 @@ def plot_tic(
|
|
|
1703
1854
|
from bokeh.plotting import figure, show, output_file
|
|
1704
1855
|
from bokeh.models import ColumnDataSource, HoverTool
|
|
1705
1856
|
from bokeh.io.export import export_png
|
|
1706
|
-
from bokeh.palettes import Turbo256
|
|
1707
1857
|
from masster.study.helpers import get_tic
|
|
1708
1858
|
|
|
1709
1859
|
sample_uids = self._get_sample_uids(samples)
|
|
@@ -1711,13 +1861,28 @@ def plot_tic(
|
|
|
1711
1861
|
self.logger.error("No valid sample_uids provided for TIC plotting.")
|
|
1712
1862
|
return
|
|
1713
1863
|
|
|
1714
|
-
colors
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1864
|
+
# Get sample colors from samples_df
|
|
1865
|
+
sample_colors = (
|
|
1866
|
+
self.samples_df
|
|
1867
|
+
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1868
|
+
.select(["sample_uid", "sample_color"])
|
|
1869
|
+
.to_dict(as_series=False)
|
|
1870
|
+
)
|
|
1871
|
+
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
1718
1872
|
|
|
1719
1873
|
plot_title = title or "Total Ion Chromatograms"
|
|
1720
1874
|
|
|
1875
|
+
# Get rt_unit from the first chromatogram, default to "s" if not available
|
|
1876
|
+
rt_unit = "s"
|
|
1877
|
+
for uid in sample_uids:
|
|
1878
|
+
try:
|
|
1879
|
+
first_chrom = get_tic(self, sample=uid, label=None)
|
|
1880
|
+
if hasattr(first_chrom, 'rt_unit'):
|
|
1881
|
+
rt_unit = first_chrom.rt_unit
|
|
1882
|
+
break
|
|
1883
|
+
except Exception:
|
|
1884
|
+
continue
|
|
1885
|
+
|
|
1721
1886
|
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
1722
1887
|
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
1723
1888
|
p.yaxis.axis_label = "Intensity"
|
|
@@ -1778,7 +1943,7 @@ def plot_tic(
|
|
|
1778
1943
|
|
|
1779
1944
|
color = color_map.get(uid, "#000000")
|
|
1780
1945
|
|
|
1781
|
-
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
1946
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
|
|
1782
1947
|
src = ColumnDataSource(data)
|
|
1783
1948
|
|
|
1784
1949
|
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
@@ -1789,7 +1954,7 @@ def plot_tic(
|
|
|
1789
1954
|
self.logger.warning("No TIC curves to plot for the selected samples.")
|
|
1790
1955
|
return
|
|
1791
1956
|
|
|
1792
|
-
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.
|
|
1957
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("sample_color", "$color[swatch]:sample_color"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.00e0}")], renderers=renderers)
|
|
1793
1958
|
p.add_tools(hover)
|
|
1794
1959
|
|
|
1795
1960
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|