masster 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/sample/helpers.py +53 -4
- masster/sample/plot.py +100 -16
- masster/sample/sample.py +6 -0
- masster/sample/sample5_schema.json +43 -34
- masster/study/defaults/align_def.py +10 -10
- masster/study/helpers.py +466 -3
- masster/study/load.py +33 -1
- masster/study/plot.py +809 -130
- masster/study/processing.py +35 -10
- masster/study/study.py +60 -4
- masster/study/study5_schema.json +83 -83
- {masster-0.3.11.dist-info → masster-0.3.13.dist-info}/METADATA +1 -1
- {masster-0.3.11.dist-info → masster-0.3.13.dist-info}/RECORD +16 -16
- {masster-0.3.11.dist-info → masster-0.3.13.dist-info}/WHEEL +0 -0
- {masster-0.3.11.dist-info → masster-0.3.13.dist-info}/entry_points.txt +0 -0
- {masster-0.3.11.dist-info → masster-0.3.13.dist-info}/licenses/LICENSE +0 -0
masster/study/plot.py
CHANGED
|
@@ -12,135 +12,220 @@ from tqdm import tqdm
|
|
|
12
12
|
hv.extension("bokeh")
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
# Replace any unaliased import that could be shadowed:
|
|
16
|
+
# from bokeh.layouts import row
|
|
17
|
+
from bokeh.layouts import row as bokeh_row
|
|
18
18
|
|
|
19
|
-
if self.features_maps is None or len(self.features_maps) == 0:
|
|
20
|
-
self.load_features()
|
|
21
19
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
if ref_index is None:
|
|
25
|
-
self.logger.error("No alignment performed yet.")
|
|
26
|
-
return
|
|
20
|
+
def plot_alignment(self, maps: bool = True, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
|
|
21
|
+
"""Visualize retention time alignment using two synchronized Bokeh scatter plots.
|
|
27
22
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
]
|
|
23
|
+
- When ``maps=True`` the function reads ``self.features_maps`` (list of FeatureMap)
|
|
24
|
+
and builds two side-by-side plots: Original RT (left) and Current/Aligned RT (right).
|
|
25
|
+
- When ``maps=False`` the function uses ``self.features_df`` and expects an
|
|
26
|
+
``rt_original`` column (before) and ``rt`` column (after).
|
|
33
27
|
|
|
34
|
-
|
|
28
|
+
Parameters
|
|
29
|
+
- maps: whether to use feature maps (default True).
|
|
30
|
+
- filename: optional HTML file path to save the plot.
|
|
31
|
+
- width/height: pixel size of each subplot.
|
|
32
|
+
- markersize: base marker size.
|
|
35
33
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
34
|
+
Returns
|
|
35
|
+
- Bokeh layout (row) containing the two synchronized plots.
|
|
36
|
+
"""
|
|
37
|
+
# Local imports so the module can be used even if bokeh isn't needed elsewhere
|
|
38
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
39
|
+
from bokeh.plotting import figure, show, output_file
|
|
40
|
+
from bokeh.palettes import Turbo256
|
|
41
|
+
import pandas as pd
|
|
40
42
|
|
|
41
|
-
#
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
43
|
+
# Build the before/after tabular data used for plotting
|
|
44
|
+
before_data: list[dict[str, Any]] = []
|
|
45
|
+
after_data: list[dict[str, Any]] = []
|
|
46
|
+
|
|
47
|
+
if maps:
|
|
48
|
+
# Ensure feature maps are loaded
|
|
49
|
+
if self.features_maps is None or len(self.features_maps) == 0:
|
|
50
|
+
self.load_features()
|
|
51
|
+
|
|
52
|
+
fmaps = self.features_maps or []
|
|
53
|
+
|
|
54
|
+
if not fmaps:
|
|
55
|
+
self.logger.error("No feature maps available for plotting.")
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
# Reference (first) sample: use current RT for both before and after
|
|
59
|
+
ref = fmaps[0]
|
|
60
|
+
ref_rt = [f.getRT() for f in ref]
|
|
61
|
+
ref_mz = [f.getMZ() for f in ref]
|
|
62
|
+
ref_inty = [f.getIntensity() for f in ref]
|
|
63
|
+
max_ref_inty = max(ref_inty) if ref_inty else 1
|
|
64
|
+
|
|
65
|
+
# sample metadata
|
|
66
|
+
if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
|
|
67
|
+
samples_info = self.samples_df.to_pandas()
|
|
68
|
+
ref_sample_uid = samples_info.iloc[0]['sample_uid'] if 'sample_uid' in samples_info.columns else 'Reference_UID'
|
|
69
|
+
ref_sample_name = samples_info.iloc[0]['sample_name'] if 'sample_name' in samples_info.columns else 'Reference'
|
|
70
|
+
else:
|
|
71
|
+
ref_sample_uid = 'Reference_UID'
|
|
72
|
+
ref_sample_name = 'Reference'
|
|
73
|
+
|
|
74
|
+
for rt, mz, inty in zip(ref_rt, ref_mz, ref_inty):
|
|
75
|
+
before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
|
|
76
|
+
after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
|
|
77
|
+
|
|
78
|
+
# Remaining samples
|
|
79
|
+
for sample_idx, fm in enumerate(fmaps[1:], start=1):
|
|
80
|
+
mz_vals = []
|
|
81
|
+
inty_vals = []
|
|
82
|
+
original_rt = []
|
|
83
|
+
aligned_rt = []
|
|
84
|
+
|
|
85
|
+
for f in fm:
|
|
86
|
+
try:
|
|
87
|
+
orig = f.getMetaValue('original_RT')
|
|
88
|
+
except Exception:
|
|
89
|
+
orig = None
|
|
90
|
+
|
|
91
|
+
if orig is None:
|
|
92
|
+
original_rt.append(f.getRT())
|
|
93
|
+
else:
|
|
94
|
+
original_rt.append(orig)
|
|
95
|
+
|
|
96
|
+
aligned_rt.append(f.getRT())
|
|
97
|
+
mz_vals.append(f.getMZ())
|
|
98
|
+
inty_vals.append(f.getIntensity())
|
|
99
|
+
|
|
100
|
+
if not inty_vals:
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
max_inty = max(inty_vals)
|
|
104
|
+
|
|
105
|
+
if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
|
|
106
|
+
samples_info = self.samples_df.to_pandas()
|
|
107
|
+
if sample_idx < len(samples_info):
|
|
108
|
+
sample_name = samples_info.iloc[sample_idx].get('sample_name', f'Sample {sample_idx}')
|
|
109
|
+
sample_uid = samples_info.iloc[sample_idx].get('sample_uid', f'Sample_{sample_idx}_UID')
|
|
110
|
+
else:
|
|
111
|
+
sample_name = f'Sample {sample_idx}'
|
|
112
|
+
sample_uid = f'Sample_{sample_idx}_UID'
|
|
113
|
+
else:
|
|
114
|
+
sample_name = f'Sample {sample_idx}'
|
|
115
|
+
sample_uid = f'Sample_{sample_idx}_UID'
|
|
48
116
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
[f.getMetaValue("original_RT") for f in fm],
|
|
52
|
-
[f.getMZ() for f in fm],
|
|
53
|
-
alpha=np.asarray([f.getIntensity() for f in fm]) / max([f.getIntensity() for f in fm]),
|
|
54
|
-
s=2, # Set symbol size to 3
|
|
55
|
-
)
|
|
117
|
+
for rt, mz, inty in zip(original_rt, mz_vals, inty_vals):
|
|
118
|
+
before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
|
|
56
119
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
ax.set_ylabel("m/z")
|
|
60
|
-
ax.set_xlabel("RT")
|
|
61
|
-
|
|
62
|
-
for fm in fmaps:
|
|
63
|
-
ax.scatter(
|
|
64
|
-
[f.getRT() for f in fm],
|
|
65
|
-
[f.getMZ() for f in fm],
|
|
66
|
-
alpha=np.asarray([f.getIntensity() for f in fm]) / max([f.getIntensity() for f in fm]),
|
|
67
|
-
s=2, # Set symbol size to 3
|
|
68
|
-
)
|
|
120
|
+
for rt, mz, inty in zip(aligned_rt, mz_vals, inty_vals):
|
|
121
|
+
after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
|
|
69
122
|
|
|
70
|
-
|
|
123
|
+
else:
|
|
124
|
+
# Use features_df
|
|
125
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
126
|
+
self.logger.error("No features_df found. Load features first.")
|
|
127
|
+
return
|
|
71
128
|
|
|
129
|
+
required_cols = ['rt', 'mz', 'inty']
|
|
130
|
+
missing = [c for c in required_cols if c not in self.features_df.columns]
|
|
131
|
+
if missing:
|
|
132
|
+
self.logger.error(f"Missing required columns in features_df: {missing}")
|
|
133
|
+
return
|
|
72
134
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
135
|
+
if 'rt_original' not in self.features_df.columns:
|
|
136
|
+
self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
|
|
137
|
+
return
|
|
76
138
|
|
|
77
|
-
|
|
78
|
-
ref_index = self.alignment_ref_index
|
|
79
|
-
if ref_index is None:
|
|
80
|
-
self.logger.warning("No alignment performed yet.")
|
|
81
|
-
return
|
|
139
|
+
features_pd = self.features_df.to_pandas()
|
|
82
140
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
141
|
+
sample_col = 'sample_uid' if 'sample_uid' in features_pd.columns else 'sample_name'
|
|
142
|
+
if sample_col not in features_pd.columns:
|
|
143
|
+
self.logger.error("No sample identifier column found in features_df.")
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
samples = features_pd[sample_col].unique()
|
|
147
|
+
|
|
148
|
+
for sample_idx, sample in enumerate(samples):
|
|
149
|
+
sample_data = features_pd[features_pd[sample_col] == sample]
|
|
150
|
+
max_inty = sample_data['inty'].max() if sample_data['inty'].max() > 0 else 1
|
|
151
|
+
sample_name = str(sample)
|
|
152
|
+
sample_uid = sample if sample_col == 'sample_uid' else (sample_data['sample_uid'].iloc[0] if 'sample_uid' in sample_data.columns else sample)
|
|
153
|
+
|
|
154
|
+
for _, row in sample_data.iterrows():
|
|
155
|
+
before_data.append({'rt': row['rt_original'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
|
|
156
|
+
after_data.append({'rt': row['rt'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
|
|
157
|
+
|
|
158
|
+
# Ensure dataframes exist even if empty
|
|
159
|
+
before_df = pd.DataFrame(before_data)
|
|
160
|
+
after_df = pd.DataFrame(after_data)
|
|
161
|
+
|
|
162
|
+
# Create ColumnDataSources (safe even for empty dfs)
|
|
163
|
+
from bokeh.models import ColumnDataSource
|
|
164
|
+
|
|
165
|
+
before_source = ColumnDataSource(before_df)
|
|
166
|
+
after_source = ColumnDataSource(after_df)
|
|
88
167
|
|
|
89
168
|
# Create Bokeh figures
|
|
90
|
-
p1 = figure(
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
p2 =
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
alpha=[f.getIntensity() / max([f.getIntensity() for f in fmaps[0]]) for f in fmaps[0]],
|
|
111
|
-
color="blue",
|
|
112
|
-
)
|
|
169
|
+
p1 = figure(width=width, height=height, title='Original RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save')
|
|
170
|
+
p1.outline_line_color = None
|
|
171
|
+
p1.background_fill_color = 'white'
|
|
172
|
+
p1.border_fill_color = 'white'
|
|
173
|
+
p1.min_border = 0
|
|
174
|
+
|
|
175
|
+
p2 = figure(width=width, height=height, title='Current RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save', x_range=p1.x_range, y_range=p1.y_range)
|
|
176
|
+
p2.outline_line_color = None
|
|
177
|
+
p2.background_fill_color = 'white'
|
|
178
|
+
p2.border_fill_color = 'white'
|
|
179
|
+
p2.min_border = 0
|
|
180
|
+
|
|
181
|
+
# Color mapping using Turbo256
|
|
182
|
+
unique_samples = sorted(list(set(before_df['sample_idx'].tolist()))) if not before_df.empty else []
|
|
183
|
+
colors = Turbo256
|
|
184
|
+
color_map: dict[int, str] = {}
|
|
185
|
+
n = max(1, len(unique_samples))
|
|
186
|
+
step = max(1, 256 // n)
|
|
187
|
+
for i, sample_idx in enumerate(unique_samples):
|
|
188
|
+
color_map[sample_idx] = colors[(i * step) % 256]
|
|
113
189
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
x=[f.getMetaValue("original_RT") for f in fm],
|
|
117
|
-
y=[f.getMZ() for f in fm],
|
|
118
|
-
size=2,
|
|
119
|
-
alpha=[f.getIntensity() / max([f.getIntensity() for f in fm]) for f in fm],
|
|
120
|
-
color="green",
|
|
121
|
-
)
|
|
190
|
+
renderers_before = []
|
|
191
|
+
renderers_after = []
|
|
122
192
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
color=
|
|
131
|
-
|
|
193
|
+
for sample_idx in unique_samples:
|
|
194
|
+
sb = before_df[before_df['sample_idx'] == sample_idx]
|
|
195
|
+
sa = after_df[after_df['sample_idx'] == sample_idx]
|
|
196
|
+
color = color_map.get(sample_idx, '#000000')
|
|
197
|
+
|
|
198
|
+
if not sb.empty:
|
|
199
|
+
src = ColumnDataSource(sb)
|
|
200
|
+
r = p1.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
|
|
201
|
+
renderers_before.append(r)
|
|
202
|
+
|
|
203
|
+
if not sa.empty:
|
|
204
|
+
src = ColumnDataSource(sa)
|
|
205
|
+
r = p2.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
|
|
206
|
+
renderers_after.append(r)
|
|
132
207
|
|
|
133
|
-
#
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
p2.y_range = p1.y_range
|
|
208
|
+
# Add hover tools
|
|
209
|
+
hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_before)
|
|
210
|
+
p1.add_tools(hover1)
|
|
137
211
|
|
|
138
|
-
|
|
212
|
+
hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_after)
|
|
213
|
+
p2.add_tools(hover2)
|
|
139
214
|
|
|
140
|
-
#
|
|
215
|
+
# Create layout with both plots side by side
|
|
216
|
+
# Use the aliased bokeh_row and set sizing_mode, width and height to avoid validation warnings.
|
|
217
|
+
layout = bokeh_row(p1, p2, sizing_mode='fixed', width=width, height=height)
|
|
218
|
+
|
|
219
|
+
# Output and show
|
|
141
220
|
if filename:
|
|
221
|
+
from bokeh.plotting import output_file, show
|
|
142
222
|
output_file(filename)
|
|
143
|
-
|
|
223
|
+
show(layout)
|
|
224
|
+
else:
|
|
225
|
+
from bokeh.plotting import show
|
|
226
|
+
show(layout)
|
|
227
|
+
|
|
228
|
+
return layout
|
|
144
229
|
|
|
145
230
|
|
|
146
231
|
def plot_consensus_2d(
|
|
@@ -331,8 +416,8 @@ def plot_samples_2d(
|
|
|
331
416
|
alpha="inty",
|
|
332
417
|
cmap="Turbo256",
|
|
333
418
|
max_features=50000,
|
|
334
|
-
width=
|
|
335
|
-
height=
|
|
419
|
+
width=600,
|
|
420
|
+
height=600,
|
|
336
421
|
mz_range=None,
|
|
337
422
|
rt_range=None,
|
|
338
423
|
):
|
|
@@ -455,7 +540,10 @@ def plot_samples_2d(
|
|
|
455
540
|
color_values = {}
|
|
456
541
|
sample_names = {}
|
|
457
542
|
|
|
458
|
-
|
|
543
|
+
# Decide whether to show tqdm based on log level (show for INFO/DEBUG/TRACE)
|
|
544
|
+
tqdm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
545
|
+
|
|
546
|
+
for uid in tqdm(sample_uids, desc="Plotting BPCs", disable=tqdm_disable):
|
|
459
547
|
sample_data = features_pd[features_pd["sample_uid"] == uid]
|
|
460
548
|
if sample_data.empty:
|
|
461
549
|
continue
|
|
@@ -525,7 +613,9 @@ def plot_samples_2d(
|
|
|
525
613
|
p.add_tools(hover)
|
|
526
614
|
|
|
527
615
|
# Remove legend from plot
|
|
528
|
-
|
|
616
|
+
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
617
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
618
|
+
p.legend.visible = False
|
|
529
619
|
if filename:
|
|
530
620
|
if filename.endswith(".html"):
|
|
531
621
|
output_file(filename)
|
|
@@ -540,6 +630,441 @@ def plot_samples_2d(
|
|
|
540
630
|
return
|
|
541
631
|
|
|
542
632
|
|
|
633
|
+
def plot_bpc(
|
|
634
|
+
self,
|
|
635
|
+
samples=None,
|
|
636
|
+
title: str | None = None,
|
|
637
|
+
filename: str | None = None,
|
|
638
|
+
width: int = 1000,
|
|
639
|
+
height: int = 300,
|
|
640
|
+
rt_unit: str = "s",
|
|
641
|
+
original: bool = False,
|
|
642
|
+
):
|
|
643
|
+
"""
|
|
644
|
+
Plot Base Peak Chromatograms (BPC) for selected samples overlayed using Bokeh.
|
|
645
|
+
|
|
646
|
+
This collects per-sample BPCs via `get_bpc(self, sample=uid)` and overlays them.
|
|
647
|
+
Colors are mapped per-sample using the same Turbo256 palette as `plot_samples_2d`.
|
|
648
|
+
Parameters:
|
|
649
|
+
original (bool): If True, attempt to map RTs back to original RTs using `features_df`.
|
|
650
|
+
If False (default), return current/aligned RTs.
|
|
651
|
+
"""
|
|
652
|
+
# Local imports to avoid heavy top-level deps / circular imports
|
|
653
|
+
from bokeh.plotting import figure, show, output_file
|
|
654
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
655
|
+
from bokeh.io.export import export_png
|
|
656
|
+
from bokeh.palettes import Turbo256
|
|
657
|
+
from masster.study.helpers import get_bpc
|
|
658
|
+
|
|
659
|
+
sample_uids = self._get_sample_uids(samples)
|
|
660
|
+
if not sample_uids:
|
|
661
|
+
self.logger.error("No valid sample_uids provided for BPC plotting.")
|
|
662
|
+
return
|
|
663
|
+
|
|
664
|
+
# Debug: show which sample_uids we will process
|
|
665
|
+
self.logger.debug(f"plot_bpc: sample_uids={sample_uids}")
|
|
666
|
+
|
|
667
|
+
colors = Turbo256
|
|
668
|
+
n = max(1, len(sample_uids))
|
|
669
|
+
step = max(1, 256 // n)
|
|
670
|
+
color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
|
|
671
|
+
|
|
672
|
+
# If plotting original (uncorrected) RTs, use the requested title.
|
|
673
|
+
if original:
|
|
674
|
+
plot_title = "Base Peak Chromatogarms (uncorrected)"
|
|
675
|
+
else:
|
|
676
|
+
plot_title = title or "Base Peak Chromatograms"
|
|
677
|
+
|
|
678
|
+
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
679
|
+
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
680
|
+
p.yaxis.axis_label = "Intensity"
|
|
681
|
+
|
|
682
|
+
renderers = []
|
|
683
|
+
|
|
684
|
+
# Build sample name mapping once
|
|
685
|
+
samples_info = None
|
|
686
|
+
if hasattr(self, "samples_df") and self.samples_df is not None:
|
|
687
|
+
try:
|
|
688
|
+
samples_info = self.samples_df.to_pandas()
|
|
689
|
+
except Exception:
|
|
690
|
+
samples_info = None
|
|
691
|
+
|
|
692
|
+
for uid in sample_uids:
|
|
693
|
+
try:
|
|
694
|
+
chrom = get_bpc(self, sample=uid, rt_unit=rt_unit, label=None, original=original)
|
|
695
|
+
except Exception as e:
|
|
696
|
+
# log and skip samples we can't compute BPC for
|
|
697
|
+
self.logger.debug(f"Skipping sample {uid} for BPC: {e}")
|
|
698
|
+
continue
|
|
699
|
+
|
|
700
|
+
# extract arrays
|
|
701
|
+
try:
|
|
702
|
+
# prefer Chromatogram API
|
|
703
|
+
chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
704
|
+
rt = chrom_dict.get("rt")
|
|
705
|
+
inty = chrom_dict.get("inty")
|
|
706
|
+
except Exception:
|
|
707
|
+
try:
|
|
708
|
+
rt = chrom.rt
|
|
709
|
+
inty = chrom.inty
|
|
710
|
+
except Exception as e:
|
|
711
|
+
self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
|
|
712
|
+
continue
|
|
713
|
+
|
|
714
|
+
if rt is None or inty is None:
|
|
715
|
+
continue
|
|
716
|
+
|
|
717
|
+
# Ensure numpy arrays
|
|
718
|
+
import numpy as _np
|
|
719
|
+
|
|
720
|
+
rt = _np.asarray(rt)
|
|
721
|
+
inty = _np.asarray(inty)
|
|
722
|
+
if rt.size == 0 or inty.size == 0:
|
|
723
|
+
continue
|
|
724
|
+
|
|
725
|
+
# Sort by rt
|
|
726
|
+
idx = _np.argsort(rt)
|
|
727
|
+
rt = rt[idx]
|
|
728
|
+
inty = inty[idx]
|
|
729
|
+
|
|
730
|
+
sample_name = str(uid)
|
|
731
|
+
if samples_info is not None:
|
|
732
|
+
try:
|
|
733
|
+
row = samples_info[samples_info["sample_uid"] == uid]
|
|
734
|
+
if not row.empty:
|
|
735
|
+
sample_name = row.iloc[0].get("sample_name", sample_name)
|
|
736
|
+
except Exception:
|
|
737
|
+
pass
|
|
738
|
+
# Determine color for this sample early so we can log it
|
|
739
|
+
color = color_map.get(uid, "#000000")
|
|
740
|
+
|
|
741
|
+
# Debug: log sample processing details
|
|
742
|
+
self.logger.debug(
|
|
743
|
+
f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
747
|
+
src = ColumnDataSource(data)
|
|
748
|
+
|
|
749
|
+
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
750
|
+
r_points = p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
|
|
751
|
+
renderers.append(r_line)
|
|
752
|
+
|
|
753
|
+
if not renderers:
|
|
754
|
+
self.logger.warning("No BPC curves to plot for the selected samples.")
|
|
755
|
+
return
|
|
756
|
+
|
|
757
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
|
|
758
|
+
p.add_tools(hover)
|
|
759
|
+
|
|
760
|
+
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
761
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
762
|
+
p.legend.visible = False
|
|
763
|
+
|
|
764
|
+
if filename:
|
|
765
|
+
if filename.endswith(".html"):
|
|
766
|
+
output_file(filename)
|
|
767
|
+
show(p)
|
|
768
|
+
elif filename.endswith(".png"):
|
|
769
|
+
try:
|
|
770
|
+
export_png(p, filename=filename)
|
|
771
|
+
except Exception:
|
|
772
|
+
# fallback to saving HTML
|
|
773
|
+
output_file(filename.replace(".png", ".html"))
|
|
774
|
+
show(p)
|
|
775
|
+
else:
|
|
776
|
+
output_file(filename)
|
|
777
|
+
show(p)
|
|
778
|
+
else:
|
|
779
|
+
show(p)
|
|
780
|
+
|
|
781
|
+
return p
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
def plot_eic(
|
|
785
|
+
self,
|
|
786
|
+
mz,
|
|
787
|
+
mz_tol=0.01,
|
|
788
|
+
samples=None,
|
|
789
|
+
title: str | None = None,
|
|
790
|
+
filename: str | None = None,
|
|
791
|
+
width: int = 1000,
|
|
792
|
+
height: int = 300,
|
|
793
|
+
rt_unit: str = "s",
|
|
794
|
+
original: bool = False,
|
|
795
|
+
):
|
|
796
|
+
"""
|
|
797
|
+
Plot Extracted Ion Chromatograms (EIC) for a target m/z (± mz_tol) for selected samples.
|
|
798
|
+
|
|
799
|
+
Parameters mirror `plot_bpc` with additional `mz` and `mz_tol` arguments. The function
|
|
800
|
+
retrieves a Sample object for each sample UID, calls `sample.get_eic(mz, mz_tol)`, and
|
|
801
|
+
overlays the resulting chromatograms.
|
|
802
|
+
"""
|
|
803
|
+
# Local imports to avoid heavy top-level deps / circular imports
|
|
804
|
+
from bokeh.plotting import figure, show, output_file
|
|
805
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
806
|
+
from bokeh.io.export import export_png
|
|
807
|
+
from bokeh.palettes import Turbo256
|
|
808
|
+
from masster.study.helpers import get_eic
|
|
809
|
+
|
|
810
|
+
if mz is None:
|
|
811
|
+
self.logger.error("mz must be provided for EIC plotting")
|
|
812
|
+
return
|
|
813
|
+
|
|
814
|
+
sample_uids = self._get_sample_uids(samples)
|
|
815
|
+
if not sample_uids:
|
|
816
|
+
self.logger.error("No valid sample_uids provided for EIC plotting.")
|
|
817
|
+
return
|
|
818
|
+
|
|
819
|
+
colors = Turbo256
|
|
820
|
+
n = max(1, len(sample_uids))
|
|
821
|
+
step = max(1, 256 // n)
|
|
822
|
+
color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
|
|
823
|
+
|
|
824
|
+
plot_title = title or f"Extracted Ion Chromatograms (m/z={mz:.4f} ± {mz_tol})"
|
|
825
|
+
|
|
826
|
+
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
827
|
+
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
828
|
+
p.yaxis.axis_label = "Intensity"
|
|
829
|
+
|
|
830
|
+
renderers = []
|
|
831
|
+
|
|
832
|
+
# Build sample name mapping once
|
|
833
|
+
samples_info = None
|
|
834
|
+
if hasattr(self, "samples_df") and self.samples_df is not None:
|
|
835
|
+
try:
|
|
836
|
+
samples_info = self.samples_df.to_pandas()
|
|
837
|
+
except Exception:
|
|
838
|
+
samples_info = None
|
|
839
|
+
|
|
840
|
+
for uid in sample_uids:
|
|
841
|
+
try:
|
|
842
|
+
chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, rt_unit=rt_unit, label=None)
|
|
843
|
+
except Exception as e:
|
|
844
|
+
# log and skip samples we can't compute EIC for
|
|
845
|
+
self.logger.debug(f"Skipping sample {uid} for EIC: {e}")
|
|
846
|
+
continue
|
|
847
|
+
|
|
848
|
+
# extract arrays
|
|
849
|
+
try:
|
|
850
|
+
# prefer Chromatogram API
|
|
851
|
+
chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
852
|
+
rt = chrom_dict.get("rt")
|
|
853
|
+
inty = chrom_dict.get("inty")
|
|
854
|
+
except Exception:
|
|
855
|
+
try:
|
|
856
|
+
rt = chrom.rt
|
|
857
|
+
inty = chrom.inty
|
|
858
|
+
except Exception as e:
|
|
859
|
+
self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
|
|
860
|
+
continue
|
|
861
|
+
|
|
862
|
+
if rt is None or inty is None:
|
|
863
|
+
continue
|
|
864
|
+
|
|
865
|
+
import numpy as _np
|
|
866
|
+
|
|
867
|
+
rt = _np.asarray(rt)
|
|
868
|
+
inty = _np.asarray(inty)
|
|
869
|
+
if rt.size == 0 or inty.size == 0:
|
|
870
|
+
continue
|
|
871
|
+
|
|
872
|
+
# Sort by rt
|
|
873
|
+
idx = _np.argsort(rt)
|
|
874
|
+
rt = rt[idx]
|
|
875
|
+
inty = inty[idx]
|
|
876
|
+
|
|
877
|
+
sample_name = str(uid)
|
|
878
|
+
if samples_info is not None:
|
|
879
|
+
try:
|
|
880
|
+
row = samples_info[samples_info["sample_uid"] == uid]
|
|
881
|
+
if not row.empty:
|
|
882
|
+
sample_name = row.iloc[0].get("sample_name", sample_name)
|
|
883
|
+
except Exception:
|
|
884
|
+
pass
|
|
885
|
+
|
|
886
|
+
color = color_map.get(uid, "#000000")
|
|
887
|
+
|
|
888
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
889
|
+
src = ColumnDataSource(data)
|
|
890
|
+
|
|
891
|
+
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
892
|
+
p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
|
|
893
|
+
renderers.append(r_line)
|
|
894
|
+
|
|
895
|
+
if not renderers:
|
|
896
|
+
self.logger.warning("No EIC curves to plot for the selected samples.")
|
|
897
|
+
return
|
|
898
|
+
|
|
899
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
|
|
900
|
+
p.add_tools(hover)
|
|
901
|
+
|
|
902
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
903
|
+
p.legend.visible = False
|
|
904
|
+
|
|
905
|
+
if filename:
|
|
906
|
+
if filename.endswith(".html"):
|
|
907
|
+
output_file(filename)
|
|
908
|
+
show(p)
|
|
909
|
+
elif filename.endswith(".png"):
|
|
910
|
+
try:
|
|
911
|
+
export_png(p, filename=filename)
|
|
912
|
+
except Exception:
|
|
913
|
+
output_file(filename.replace(".png", ".html"))
|
|
914
|
+
show(p)
|
|
915
|
+
else:
|
|
916
|
+
output_file(filename)
|
|
917
|
+
show(p)
|
|
918
|
+
else:
|
|
919
|
+
show(p)
|
|
920
|
+
|
|
921
|
+
return p
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
def plot_rt_correction(
|
|
925
|
+
self,
|
|
926
|
+
samples=None,
|
|
927
|
+
title: str | None = None,
|
|
928
|
+
filename: str | None = None,
|
|
929
|
+
width: int = 1000,
|
|
930
|
+
height: int = 300,
|
|
931
|
+
rt_unit: str = "s",
|
|
932
|
+
):
|
|
933
|
+
"""
|
|
934
|
+
Plot RT correction per sample: (rt - rt_original) vs rt overlayed for selected samples.
|
|
935
|
+
|
|
936
|
+
This uses the same color mapping as `plot_bpc` so curves for the same samples match.
|
|
937
|
+
"""
|
|
938
|
+
from bokeh.plotting import figure, show, output_file
|
|
939
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
940
|
+
from bokeh.palettes import Turbo256
|
|
941
|
+
import numpy as _np
|
|
942
|
+
|
|
943
|
+
# Validate features dataframe
|
|
944
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
945
|
+
self.logger.error("No features_df found. Load features first.")
|
|
946
|
+
return
|
|
947
|
+
|
|
948
|
+
if "rt_original" not in self.features_df.columns:
|
|
949
|
+
self.logger.error("Column 'rt_original' not found in features_df. Alignment/backup RTs missing.")
|
|
950
|
+
return
|
|
951
|
+
|
|
952
|
+
sample_uids = self._get_sample_uids(samples)
|
|
953
|
+
if not sample_uids:
|
|
954
|
+
self.logger.error("No valid sample_uids provided for RT correction plotting.")
|
|
955
|
+
return
|
|
956
|
+
|
|
957
|
+
# Color mapping like plot_bpc
|
|
958
|
+
colors = Turbo256
|
|
959
|
+
n = max(1, len(sample_uids))
|
|
960
|
+
step = max(1, 256 // n)
|
|
961
|
+
color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
|
|
962
|
+
|
|
963
|
+
p = figure(width=width, height=height, title=title or "RT correction", tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
964
|
+
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
965
|
+
p.yaxis.axis_label = "RT - RT_original (s)"
|
|
966
|
+
|
|
967
|
+
samples_info = None
|
|
968
|
+
if hasattr(self, "samples_df") and self.samples_df is not None:
|
|
969
|
+
try:
|
|
970
|
+
samples_info = self.samples_df.to_pandas()
|
|
971
|
+
except Exception:
|
|
972
|
+
samples_info = None
|
|
973
|
+
|
|
974
|
+
renderers = []
|
|
975
|
+
|
|
976
|
+
# Iterate samples and build curves
|
|
977
|
+
for uid in sample_uids:
|
|
978
|
+
# Select features belonging to this sample
|
|
979
|
+
try:
|
|
980
|
+
if "sample_uid" in self.features_df.columns:
|
|
981
|
+
sample_feats = self.features_df.filter(pl.col("sample_uid") == uid)
|
|
982
|
+
elif "sample_name" in self.features_df.columns:
|
|
983
|
+
sample_feats = self.features_df.filter(pl.col("sample_name") == uid)
|
|
984
|
+
else:
|
|
985
|
+
self.logger.debug("No sample identifier column in features_df; skipping sample filtering")
|
|
986
|
+
continue
|
|
987
|
+
except Exception as e:
|
|
988
|
+
self.logger.debug(f"Error filtering features for sample {uid}: {e}")
|
|
989
|
+
continue
|
|
990
|
+
|
|
991
|
+
if sample_feats.is_empty():
|
|
992
|
+
continue
|
|
993
|
+
|
|
994
|
+
# Convert to pandas for easy numeric handling
|
|
995
|
+
try:
|
|
996
|
+
df = sample_feats.to_pandas()
|
|
997
|
+
except Exception:
|
|
998
|
+
continue
|
|
999
|
+
|
|
1000
|
+
# Need both rt and rt_original
|
|
1001
|
+
if "rt" not in df.columns or "rt_original" not in df.columns:
|
|
1002
|
+
continue
|
|
1003
|
+
|
|
1004
|
+
# Drop NA and ensure numeric arrays
|
|
1005
|
+
df = df.dropna(subset=["rt", "rt_original"]).copy()
|
|
1006
|
+
if df.empty:
|
|
1007
|
+
continue
|
|
1008
|
+
|
|
1009
|
+
rt = _np.asarray(df["rt"], dtype=float)
|
|
1010
|
+
rt_orig = _np.asarray(df["rt_original"], dtype=float)
|
|
1011
|
+
delta = rt - rt_orig
|
|
1012
|
+
|
|
1013
|
+
# sort by rt
|
|
1014
|
+
idx = _np.argsort(rt)
|
|
1015
|
+
rt = rt[idx]
|
|
1016
|
+
delta = delta[idx]
|
|
1017
|
+
|
|
1018
|
+
sample_name = str(uid)
|
|
1019
|
+
if samples_info is not None:
|
|
1020
|
+
try:
|
|
1021
|
+
row = samples_info[samples_info["sample_uid"] == uid]
|
|
1022
|
+
if not row.empty:
|
|
1023
|
+
sample_name = row.iloc[0].get("sample_name", sample_name)
|
|
1024
|
+
except Exception:
|
|
1025
|
+
pass
|
|
1026
|
+
|
|
1027
|
+
color = color_map.get(uid, "#000000")
|
|
1028
|
+
|
|
1029
|
+
data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt)}
|
|
1030
|
+
src = ColumnDataSource(data)
|
|
1031
|
+
|
|
1032
|
+
r_line = p.line("rt", "delta", source=src, line_width=1, color=color)
|
|
1033
|
+
p.scatter("rt", "delta", source=src, size=2, color=color, alpha=0.6)
|
|
1034
|
+
renderers.append(r_line)
|
|
1035
|
+
|
|
1036
|
+
if not renderers:
|
|
1037
|
+
self.logger.warning("No RT correction curves to plot for the selected samples.")
|
|
1038
|
+
return
|
|
1039
|
+
|
|
1040
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
|
|
1041
|
+
p.add_tools(hover)
|
|
1042
|
+
|
|
1043
|
+
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
1044
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
1045
|
+
p.legend.visible = False
|
|
1046
|
+
|
|
1047
|
+
if filename:
|
|
1048
|
+
if filename.endswith(".html"):
|
|
1049
|
+
output_file(filename)
|
|
1050
|
+
show(p)
|
|
1051
|
+
elif filename.endswith(".png"):
|
|
1052
|
+
try:
|
|
1053
|
+
from bokeh.io.export import export_png
|
|
1054
|
+
|
|
1055
|
+
export_png(p, filename=filename)
|
|
1056
|
+
except Exception:
|
|
1057
|
+
output_file(filename.replace(".png", ".html"))
|
|
1058
|
+
show(p)
|
|
1059
|
+
else:
|
|
1060
|
+
output_file(filename)
|
|
1061
|
+
show(p)
|
|
1062
|
+
else:
|
|
1063
|
+
show(p)
|
|
1064
|
+
|
|
1065
|
+
return p
|
|
1066
|
+
|
|
1067
|
+
|
|
543
1068
|
def plot_chrom(
|
|
544
1069
|
self,
|
|
545
1070
|
uids=None,
|
|
@@ -936,10 +1461,10 @@ def plot_consensus_stats(
|
|
|
936
1461
|
def plot_pca(
|
|
937
1462
|
self,
|
|
938
1463
|
filename=None,
|
|
939
|
-
width=
|
|
940
|
-
height=
|
|
1464
|
+
width=400,
|
|
1465
|
+
height=400,
|
|
941
1466
|
alpha=0.8,
|
|
942
|
-
markersize=
|
|
1467
|
+
markersize=6,
|
|
943
1468
|
n_components=2,
|
|
944
1469
|
color_by=None,
|
|
945
1470
|
title="PCA of Consensus Matrix",
|
|
@@ -959,7 +1484,7 @@ def plot_pca(
|
|
|
959
1484
|
"""
|
|
960
1485
|
from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
|
|
961
1486
|
from bokeh.plotting import figure, show, output_file
|
|
962
|
-
from bokeh.palettes import Category20, viridis
|
|
1487
|
+
from bokeh.palettes import Category20, viridis, Turbo256
|
|
963
1488
|
from bokeh.transform import factor_cmap
|
|
964
1489
|
from sklearn.decomposition import PCA
|
|
965
1490
|
from sklearn.preprocessing import StandardScaler
|
|
@@ -1094,23 +1619,45 @@ def plot_pca(
|
|
|
1094
1619
|
legend_field=color_by,
|
|
1095
1620
|
)
|
|
1096
1621
|
else:
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1622
|
+
# If no color_by provided, color points by sample similar to plot_samples_2d
|
|
1623
|
+
if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
|
|
1624
|
+
# Choose the identifier to map colors by
|
|
1625
|
+
id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
|
|
1626
|
+
sample_ids = list(pd.unique(pca_df[id_col]))
|
|
1627
|
+
colors = Turbo256
|
|
1628
|
+
color_map = {uid: colors[i * (256 // max(1, len(sample_ids)))] for i, uid in enumerate(sample_ids)}
|
|
1629
|
+
# Map colors into dataframe
|
|
1630
|
+
pca_df["color"] = [color_map[x] for x in pca_df[id_col]]
|
|
1631
|
+
# Update the ColumnDataSource with new color column
|
|
1632
|
+
source = ColumnDataSource(pca_df)
|
|
1633
|
+
scatter = p.scatter(
|
|
1634
|
+
"PC1",
|
|
1635
|
+
"PC2",
|
|
1636
|
+
size=markersize,
|
|
1637
|
+
alpha=alpha,
|
|
1638
|
+
color="color",
|
|
1639
|
+
source=source,
|
|
1640
|
+
)
|
|
1641
|
+
else:
|
|
1642
|
+
scatter = p.scatter(
|
|
1643
|
+
"PC1",
|
|
1644
|
+
"PC2",
|
|
1645
|
+
size=markersize,
|
|
1646
|
+
alpha=alpha,
|
|
1647
|
+
color="blue",
|
|
1648
|
+
source=source,
|
|
1649
|
+
)
|
|
1105
1650
|
|
|
1106
1651
|
# Create comprehensive hover tooltips with all sample information
|
|
1107
|
-
tooltip_list = [
|
|
1108
|
-
("PC1", "@PC1{0.00}"),
|
|
1109
|
-
("PC2", "@PC2{0.00}"),
|
|
1110
|
-
]
|
|
1652
|
+
tooltip_list = []
|
|
1111
1653
|
|
|
1112
|
-
#
|
|
1654
|
+
# Columns to exclude from tooltips (file paths and internal/plot fields)
|
|
1655
|
+
excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2"}
|
|
1656
|
+
|
|
1657
|
+
# Add all sample dataframe columns to tooltips, skipping excluded ones
|
|
1113
1658
|
for col in samples_pd.columns:
|
|
1659
|
+
if col in excluded_cols:
|
|
1660
|
+
continue
|
|
1114
1661
|
if col in pca_df.columns:
|
|
1115
1662
|
if pca_df[col].dtype in ["float64", "float32"]:
|
|
1116
1663
|
tooltip_list.append((col, f"@{col}{{0.00}}"))
|
|
@@ -1125,8 +1672,10 @@ def plot_pca(
|
|
|
1125
1672
|
|
|
1126
1673
|
# Add legend if using categorical coloring
|
|
1127
1674
|
if color_mapper and not isinstance(color_mapper, LinearColorMapper) and color_by:
|
|
1128
|
-
|
|
1129
|
-
p.legend
|
|
1675
|
+
# Only set legend properties if legends exist (avoid Bokeh warning when none created)
|
|
1676
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
1677
|
+
p.legend.location = "top_left"
|
|
1678
|
+
p.legend.click_policy = "hide"
|
|
1130
1679
|
|
|
1131
1680
|
# Output and show
|
|
1132
1681
|
if filename:
|
|
@@ -1134,3 +1683,133 @@ def plot_pca(
|
|
|
1134
1683
|
|
|
1135
1684
|
show(p)
|
|
1136
1685
|
return p
|
|
1686
|
+
|
|
1687
|
+
def plot_tic(
|
|
1688
|
+
self,
|
|
1689
|
+
samples=None,
|
|
1690
|
+
title: str | None = None,
|
|
1691
|
+
filename: str | None = None,
|
|
1692
|
+
width: int = 1000,
|
|
1693
|
+
height: int = 300,
|
|
1694
|
+
rt_unit: str = "s",
|
|
1695
|
+
original: bool = False,
|
|
1696
|
+
):
|
|
1697
|
+
"""
|
|
1698
|
+
Plot Total Ion Chromatograms (TIC) for selected samples overlayed using Bokeh.
|
|
1699
|
+
|
|
1700
|
+
Parameters and behavior mirror `plot_bpc` but use per-sample TICs (get_tic).
|
|
1701
|
+
"""
|
|
1702
|
+
# Local imports to avoid heavy top-level deps / circular imports
|
|
1703
|
+
from bokeh.plotting import figure, show, output_file
|
|
1704
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
1705
|
+
from bokeh.io.export import export_png
|
|
1706
|
+
from bokeh.palettes import Turbo256
|
|
1707
|
+
from masster.study.helpers import get_tic
|
|
1708
|
+
|
|
1709
|
+
sample_uids = self._get_sample_uids(samples)
|
|
1710
|
+
if not sample_uids:
|
|
1711
|
+
self.logger.error("No valid sample_uids provided for TIC plotting.")
|
|
1712
|
+
return
|
|
1713
|
+
|
|
1714
|
+
colors = Turbo256
|
|
1715
|
+
n = max(1, len(sample_uids))
|
|
1716
|
+
step = max(1, 256 // n)
|
|
1717
|
+
color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
|
|
1718
|
+
|
|
1719
|
+
plot_title = title or "Total Ion Chromatograms"
|
|
1720
|
+
|
|
1721
|
+
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
1722
|
+
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
1723
|
+
p.yaxis.axis_label = "Intensity"
|
|
1724
|
+
|
|
1725
|
+
renderers = []
|
|
1726
|
+
|
|
1727
|
+
# Build sample name mapping once
|
|
1728
|
+
samples_info = None
|
|
1729
|
+
if hasattr(self, "samples_df") and self.samples_df is not None:
|
|
1730
|
+
try:
|
|
1731
|
+
samples_info = self.samples_df.to_pandas()
|
|
1732
|
+
except Exception:
|
|
1733
|
+
samples_info = None
|
|
1734
|
+
|
|
1735
|
+
for uid in sample_uids:
|
|
1736
|
+
try:
|
|
1737
|
+
chrom = get_tic(self, sample=uid, label=None)
|
|
1738
|
+
except Exception as e:
|
|
1739
|
+
self.logger.debug(f"Skipping sample {uid} for TIC: {e}")
|
|
1740
|
+
continue
|
|
1741
|
+
|
|
1742
|
+
# extract arrays
|
|
1743
|
+
try:
|
|
1744
|
+
chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
1745
|
+
rt = chrom_dict.get("rt")
|
|
1746
|
+
inty = chrom_dict.get("inty")
|
|
1747
|
+
except Exception:
|
|
1748
|
+
try:
|
|
1749
|
+
rt = chrom.rt
|
|
1750
|
+
inty = chrom.inty
|
|
1751
|
+
except Exception as e:
|
|
1752
|
+
self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
|
|
1753
|
+
continue
|
|
1754
|
+
|
|
1755
|
+
if rt is None or inty is None:
|
|
1756
|
+
continue
|
|
1757
|
+
|
|
1758
|
+
import numpy as _np
|
|
1759
|
+
|
|
1760
|
+
rt = _np.asarray(rt)
|
|
1761
|
+
inty = _np.asarray(inty)
|
|
1762
|
+
if rt.size == 0 or inty.size == 0:
|
|
1763
|
+
continue
|
|
1764
|
+
|
|
1765
|
+
# Sort by rt
|
|
1766
|
+
idx = _np.argsort(rt)
|
|
1767
|
+
rt = rt[idx]
|
|
1768
|
+
inty = inty[idx]
|
|
1769
|
+
|
|
1770
|
+
sample_name = str(uid)
|
|
1771
|
+
if samples_info is not None:
|
|
1772
|
+
try:
|
|
1773
|
+
row = samples_info[samples_info["sample_uid"] == uid]
|
|
1774
|
+
if not row.empty:
|
|
1775
|
+
sample_name = row.iloc[0].get("sample_name", sample_name)
|
|
1776
|
+
except Exception:
|
|
1777
|
+
pass
|
|
1778
|
+
|
|
1779
|
+
color = color_map.get(uid, "#000000")
|
|
1780
|
+
|
|
1781
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
1782
|
+
src = ColumnDataSource(data)
|
|
1783
|
+
|
|
1784
|
+
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
1785
|
+
p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
|
|
1786
|
+
renderers.append(r_line)
|
|
1787
|
+
|
|
1788
|
+
if not renderers:
|
|
1789
|
+
self.logger.warning("No TIC curves to plot for the selected samples.")
|
|
1790
|
+
return
|
|
1791
|
+
|
|
1792
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
|
|
1793
|
+
p.add_tools(hover)
|
|
1794
|
+
|
|
1795
|
+
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
1796
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
1797
|
+
p.legend.visible = False
|
|
1798
|
+
|
|
1799
|
+
if filename:
|
|
1800
|
+
if filename.endswith(".html"):
|
|
1801
|
+
output_file(filename)
|
|
1802
|
+
show(p)
|
|
1803
|
+
elif filename.endswith(".png"):
|
|
1804
|
+
try:
|
|
1805
|
+
export_png(p, filename=filename)
|
|
1806
|
+
except Exception:
|
|
1807
|
+
output_file(filename.replace(".png", ".html"))
|
|
1808
|
+
show(p)
|
|
1809
|
+
else:
|
|
1810
|
+
output_file(filename)
|
|
1811
|
+
show(p)
|
|
1812
|
+
else:
|
|
1813
|
+
show(p)
|
|
1814
|
+
|
|
1815
|
+
return p
|