masster 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/docs/SCX_API_Documentation.md +0 -0
- masster/docs/SCX_DLL_Analysis.md +0 -0
- masster/logger.py +92 -78
- masster/sample/defaults/find_features_def.py +16 -6
- masster/sample/defaults/sample_def.py +1 -1
- masster/sample/h5.py +2 -2
- masster/sample/helpers.py +190 -140
- masster/sample/load.py +13 -9
- masster/sample/plot.py +256 -147
- masster/sample/processing.py +18 -12
- masster/sample/sample.py +10 -4
- masster/sample/sample5_schema.json +38 -29
- masster/sample/save.py +16 -13
- masster/sample/sciex.py +187 -176
- masster/study/defaults/align_def.py +231 -13
- masster/study/defaults/fill_chrom_def.py +1 -5
- masster/study/defaults/integrate_chrom_def.py +1 -5
- masster/study/defaults/study_def.py +2 -2
- masster/study/export.py +144 -131
- masster/study/h5.py +193 -133
- masster/study/helpers.py +757 -246
- masster/study/helpers_optimized.py +99 -57
- masster/study/load.py +57 -25
- masster/study/plot.py +1244 -129
- masster/study/processing.py +194 -86
- masster/study/save.py +7 -7
- masster/study/study.py +154 -89
- masster/study/study5_schema.json +15 -15
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/METADATA +1 -1
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/RECORD +33 -31
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/WHEEL +0 -0
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/entry_points.txt +0 -0
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/licenses/LICENSE +0 -0
masster/study/plot.py
CHANGED
|
@@ -7,148 +7,225 @@ import holoviews as hv
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import panel
|
|
9
9
|
import polars as pl
|
|
10
|
-
|
|
11
|
-
from bokeh.io.export import export_png
|
|
12
|
-
from bokeh.models import ColumnDataSource
|
|
13
|
-
from bokeh.models import HoverTool
|
|
14
|
-
from bokeh.palettes import Turbo256
|
|
15
|
-
from bokeh.plotting import figure
|
|
16
|
-
from bokeh.plotting import output_file
|
|
17
|
-
from bokeh.plotting import show
|
|
18
10
|
from tqdm import tqdm
|
|
19
11
|
|
|
20
12
|
hv.extension("bokeh")
|
|
21
13
|
|
|
22
14
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
15
|
+
# Replace any unaliased import that could be shadowed:
|
|
16
|
+
# from bokeh.layouts import row
|
|
17
|
+
from bokeh.layouts import row as bokeh_row
|
|
26
18
|
|
|
27
|
-
if self.features_maps is None or len(self.features_maps) == 0:
|
|
28
|
-
self.load_features()
|
|
29
19
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
if ref_index is None:
|
|
33
|
-
self.logger.error("No alignment performed yet.")
|
|
34
|
-
return
|
|
20
|
+
def plot_alignment(self, maps: bool = True, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
|
|
21
|
+
"""Visualize retention time alignment using two synchronized Bokeh scatter plots.
|
|
35
22
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
]
|
|
23
|
+
- When ``maps=True`` the function reads ``self.features_maps`` (list of FeatureMap)
|
|
24
|
+
and builds two side-by-side plots: Original RT (left) and Current/Aligned RT (right).
|
|
25
|
+
- When ``maps=False`` the function uses ``self.features_df`` and expects an
|
|
26
|
+
``rt_original`` column (before) and ``rt`` column (after).
|
|
41
27
|
|
|
42
|
-
|
|
28
|
+
Parameters
|
|
29
|
+
- maps: whether to use feature maps (default True).
|
|
30
|
+
- filename: optional HTML file path to save the plot.
|
|
31
|
+
- width/height: pixel size of each subplot.
|
|
32
|
+
- markersize: base marker size.
|
|
43
33
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
34
|
+
Returns
|
|
35
|
+
- Bokeh layout (row) containing the two synchronized plots.
|
|
36
|
+
"""
|
|
37
|
+
# Local imports so the module can be used even if bokeh isn't needed elsewhere
|
|
38
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
39
|
+
from bokeh.plotting import figure, show, output_file
|
|
40
|
+
from bokeh.palettes import Turbo256
|
|
41
|
+
import pandas as pd
|
|
42
|
+
|
|
43
|
+
# Build the before/after tabular data used for plotting
|
|
44
|
+
before_data: list[dict[str, Any]] = []
|
|
45
|
+
after_data: list[dict[str, Any]] = []
|
|
46
|
+
|
|
47
|
+
if maps:
|
|
48
|
+
# Ensure feature maps are loaded
|
|
49
|
+
if self.features_maps is None or len(self.features_maps) == 0:
|
|
50
|
+
self.load_features()
|
|
51
|
+
|
|
52
|
+
fmaps = self.features_maps or []
|
|
53
|
+
|
|
54
|
+
if not fmaps:
|
|
55
|
+
self.logger.error("No feature maps available for plotting.")
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
# Reference (first) sample: use current RT for both before and after
|
|
59
|
+
ref = fmaps[0]
|
|
60
|
+
ref_rt = [f.getRT() for f in ref]
|
|
61
|
+
ref_mz = [f.getMZ() for f in ref]
|
|
62
|
+
ref_inty = [f.getIntensity() for f in ref]
|
|
63
|
+
max_ref_inty = max(ref_inty) if ref_inty else 1
|
|
64
|
+
|
|
65
|
+
# sample metadata
|
|
66
|
+
if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
|
|
67
|
+
samples_info = self.samples_df.to_pandas()
|
|
68
|
+
ref_sample_uid = samples_info.iloc[0]['sample_uid'] if 'sample_uid' in samples_info.columns else 'Reference_UID'
|
|
69
|
+
ref_sample_name = samples_info.iloc[0]['sample_name'] if 'sample_name' in samples_info.columns else 'Reference'
|
|
70
|
+
else:
|
|
71
|
+
ref_sample_uid = 'Reference_UID'
|
|
72
|
+
ref_sample_name = 'Reference'
|
|
73
|
+
|
|
74
|
+
for rt, mz, inty in zip(ref_rt, ref_mz, ref_inty):
|
|
75
|
+
before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
|
|
76
|
+
after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_ref_inty, 'sample_idx': 0, 'sample_name': ref_sample_name, 'sample_uid': ref_sample_uid, 'size': markersize + 2})
|
|
77
|
+
|
|
78
|
+
# Remaining samples
|
|
79
|
+
for sample_idx, fm in enumerate(fmaps[1:], start=1):
|
|
80
|
+
mz_vals = []
|
|
81
|
+
inty_vals = []
|
|
82
|
+
original_rt = []
|
|
83
|
+
aligned_rt = []
|
|
84
|
+
|
|
85
|
+
for f in fm:
|
|
86
|
+
try:
|
|
87
|
+
orig = f.getMetaValue('original_RT')
|
|
88
|
+
except Exception:
|
|
89
|
+
orig = None
|
|
90
|
+
|
|
91
|
+
if orig is None:
|
|
92
|
+
original_rt.append(f.getRT())
|
|
93
|
+
else:
|
|
94
|
+
original_rt.append(orig)
|
|
95
|
+
|
|
96
|
+
aligned_rt.append(f.getRT())
|
|
97
|
+
mz_vals.append(f.getMZ())
|
|
98
|
+
inty_vals.append(f.getIntensity())
|
|
99
|
+
|
|
100
|
+
if not inty_vals:
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
max_inty = max(inty_vals)
|
|
104
|
+
|
|
105
|
+
if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
|
|
106
|
+
samples_info = self.samples_df.to_pandas()
|
|
107
|
+
if sample_idx < len(samples_info):
|
|
108
|
+
sample_name = samples_info.iloc[sample_idx].get('sample_name', f'Sample {sample_idx}')
|
|
109
|
+
sample_uid = samples_info.iloc[sample_idx].get('sample_uid', f'Sample_{sample_idx}_UID')
|
|
110
|
+
else:
|
|
111
|
+
sample_name = f'Sample {sample_idx}'
|
|
112
|
+
sample_uid = f'Sample_{sample_idx}_UID'
|
|
113
|
+
else:
|
|
114
|
+
sample_name = f'Sample {sample_idx}'
|
|
115
|
+
sample_uid = f'Sample_{sample_idx}_UID'
|
|
48
116
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
[f.getRT() for f in fmaps[0]],
|
|
52
|
-
[f.getMZ() for f in fmaps[0]],
|
|
53
|
-
alpha=np.asarray([f.getIntensity() for f in fmaps[0]]) / max([f.getIntensity() for f in fmaps[0]]),
|
|
54
|
-
s=4,
|
|
55
|
-
)
|
|
117
|
+
for rt, mz, inty in zip(original_rt, mz_vals, inty_vals):
|
|
118
|
+
before_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
|
|
56
119
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
[f.getMetaValue("original_RT") for f in fm],
|
|
60
|
-
[f.getMZ() for f in fm],
|
|
61
|
-
alpha=np.asarray([f.getIntensity() for f in fm]) / max([f.getIntensity() for f in fm]),
|
|
62
|
-
s=2, # Set symbol size to 3
|
|
63
|
-
)
|
|
120
|
+
for rt, mz, inty in zip(aligned_rt, mz_vals, inty_vals):
|
|
121
|
+
after_data.append({'rt': rt, 'mz': mz, 'inty': inty, 'alpha': inty / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize})
|
|
64
122
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
for fm in fmaps:
|
|
71
|
-
ax.scatter(
|
|
72
|
-
[f.getRT() for f in fm],
|
|
73
|
-
[f.getMZ() for f in fm],
|
|
74
|
-
alpha=np.asarray([f.getIntensity() for f in fm]) / max([f.getIntensity() for f in fm]),
|
|
75
|
-
s=2, # Set symbol size to 3
|
|
76
|
-
)
|
|
123
|
+
else:
|
|
124
|
+
# Use features_df
|
|
125
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
126
|
+
self.logger.error("No features_df found. Load features first.")
|
|
127
|
+
return
|
|
77
128
|
|
|
78
|
-
|
|
129
|
+
required_cols = ['rt', 'mz', 'inty']
|
|
130
|
+
missing = [c for c in required_cols if c not in self.features_df.columns]
|
|
131
|
+
if missing:
|
|
132
|
+
self.logger.error(f"Missing required columns in features_df: {missing}")
|
|
133
|
+
return
|
|
79
134
|
|
|
135
|
+
if 'rt_original' not in self.features_df.columns:
|
|
136
|
+
self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
|
|
137
|
+
return
|
|
80
138
|
|
|
81
|
-
|
|
82
|
-
from bokeh.plotting import figure, show, output_file
|
|
83
|
-
from bokeh.layouts import gridplot
|
|
139
|
+
features_pd = self.features_df.to_pandas()
|
|
84
140
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
return
|
|
141
|
+
sample_col = 'sample_uid' if 'sample_uid' in features_pd.columns else 'sample_name'
|
|
142
|
+
if sample_col not in features_pd.columns:
|
|
143
|
+
self.logger.error("No sample identifier column found in features_df.")
|
|
144
|
+
return
|
|
90
145
|
|
|
91
|
-
|
|
92
|
-
feature_maps[ref_index],
|
|
93
|
-
*feature_maps[:ref_index],
|
|
94
|
-
*feature_maps[ref_index + 1 :],
|
|
95
|
-
]
|
|
146
|
+
samples = features_pd[sample_col].unique()
|
|
96
147
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
)
|
|
103
|
-
p1.xaxis.axis_label = "RT"
|
|
104
|
-
p1.yaxis.axis_label = "m/z"
|
|
105
|
-
p2 = figure(
|
|
106
|
-
title="Feature maps after alignment",
|
|
107
|
-
width=600,
|
|
108
|
-
height=400,
|
|
109
|
-
)
|
|
110
|
-
p2.xaxis.axis_label = "RT"
|
|
111
|
-
p2.yaxis.axis_label = "m/z"
|
|
112
|
-
|
|
113
|
-
# Plot before alignment
|
|
114
|
-
p1.scatter(
|
|
115
|
-
x=[f.getRT() for f in fmaps[0]],
|
|
116
|
-
y=[f.getMZ() for f in fmaps[0]],
|
|
117
|
-
size=4,
|
|
118
|
-
alpha=[f.getIntensity() / max([f.getIntensity() for f in fmaps[0]]) for f in fmaps[0]],
|
|
119
|
-
color="blue",
|
|
120
|
-
)
|
|
148
|
+
for sample_idx, sample in enumerate(samples):
|
|
149
|
+
sample_data = features_pd[features_pd[sample_col] == sample]
|
|
150
|
+
max_inty = sample_data['inty'].max() if sample_data['inty'].max() > 0 else 1
|
|
151
|
+
sample_name = str(sample)
|
|
152
|
+
sample_uid = sample if sample_col == 'sample_uid' else (sample_data['sample_uid'].iloc[0] if 'sample_uid' in sample_data.columns else sample)
|
|
121
153
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
y=[f.getMZ() for f in fm],
|
|
126
|
-
size=2,
|
|
127
|
-
alpha=[f.getIntensity() / max([f.getIntensity() for f in fm]) for f in fm],
|
|
128
|
-
color="green",
|
|
129
|
-
)
|
|
154
|
+
for _, row in sample_data.iterrows():
|
|
155
|
+
before_data.append({'rt': row['rt_original'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
|
|
156
|
+
after_data.append({'rt': row['rt'], 'mz': row['mz'], 'inty': row['inty'], 'alpha': row['inty'] / max_inty, 'sample_idx': sample_idx, 'sample_name': sample_name, 'sample_uid': sample_uid, 'size': markersize + 2 if sample_idx == 0 else markersize})
|
|
130
157
|
|
|
131
|
-
#
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
x=[f.getRT() for f in fm],
|
|
135
|
-
y=[f.getMZ() for f in fm],
|
|
136
|
-
size=2,
|
|
137
|
-
alpha=[f.getIntensity() / max([f.getIntensity() for f in fm]) for f in fm],
|
|
138
|
-
color="red",
|
|
139
|
-
)
|
|
158
|
+
# Ensure dataframes exist even if empty
|
|
159
|
+
before_df = pd.DataFrame(before_data)
|
|
160
|
+
after_df = pd.DataFrame(after_data)
|
|
140
161
|
|
|
141
|
-
#
|
|
142
|
-
|
|
143
|
-
p2.x_range = p1.x_range
|
|
144
|
-
p2.y_range = p1.y_range
|
|
162
|
+
# Create ColumnDataSources (safe even for empty dfs)
|
|
163
|
+
from bokeh.models import ColumnDataSource
|
|
145
164
|
|
|
146
|
-
|
|
165
|
+
before_source = ColumnDataSource(before_df)
|
|
166
|
+
after_source = ColumnDataSource(after_df)
|
|
147
167
|
|
|
148
|
-
#
|
|
168
|
+
# Create Bokeh figures
|
|
169
|
+
p1 = figure(width=width, height=height, title='Original RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save')
|
|
170
|
+
p1.outline_line_color = None
|
|
171
|
+
p1.background_fill_color = 'white'
|
|
172
|
+
p1.border_fill_color = 'white'
|
|
173
|
+
p1.min_border = 0
|
|
174
|
+
|
|
175
|
+
p2 = figure(width=width, height=height, title='Current RT', x_axis_label='Retention Time (s)', y_axis_label='m/z', tools='pan,wheel_zoom,box_zoom,reset,save', x_range=p1.x_range, y_range=p1.y_range)
|
|
176
|
+
p2.outline_line_color = None
|
|
177
|
+
p2.background_fill_color = 'white'
|
|
178
|
+
p2.border_fill_color = 'white'
|
|
179
|
+
p2.min_border = 0
|
|
180
|
+
|
|
181
|
+
# Color mapping using Turbo256
|
|
182
|
+
unique_samples = sorted(list(set(before_df['sample_idx'].tolist()))) if not before_df.empty else []
|
|
183
|
+
colors = Turbo256
|
|
184
|
+
color_map: dict[int, str] = {}
|
|
185
|
+
n = max(1, len(unique_samples))
|
|
186
|
+
step = max(1, 256 // n)
|
|
187
|
+
for i, sample_idx in enumerate(unique_samples):
|
|
188
|
+
color_map[sample_idx] = colors[(i * step) % 256]
|
|
189
|
+
|
|
190
|
+
renderers_before = []
|
|
191
|
+
renderers_after = []
|
|
192
|
+
|
|
193
|
+
for sample_idx in unique_samples:
|
|
194
|
+
sb = before_df[before_df['sample_idx'] == sample_idx]
|
|
195
|
+
sa = after_df[after_df['sample_idx'] == sample_idx]
|
|
196
|
+
color = color_map.get(sample_idx, '#000000')
|
|
197
|
+
|
|
198
|
+
if not sb.empty:
|
|
199
|
+
src = ColumnDataSource(sb)
|
|
200
|
+
r = p1.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
|
|
201
|
+
renderers_before.append(r)
|
|
202
|
+
|
|
203
|
+
if not sa.empty:
|
|
204
|
+
src = ColumnDataSource(sa)
|
|
205
|
+
r = p2.scatter('rt', 'mz', size='size', color=color, alpha='alpha', source=src)
|
|
206
|
+
renderers_after.append(r)
|
|
207
|
+
|
|
208
|
+
# Add hover tools
|
|
209
|
+
hover1 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_before)
|
|
210
|
+
p1.add_tools(hover1)
|
|
211
|
+
|
|
212
|
+
hover2 = HoverTool(tooltips=[('Sample UID', '@sample_uid'), ('Sample Name', '@sample_name'), ('RT', '@rt{0.00}'), ('m/z', '@mz{0.0000}'), ('Intensity', '@inty{0.0e+0}')], renderers=renderers_after)
|
|
213
|
+
p2.add_tools(hover2)
|
|
214
|
+
|
|
215
|
+
# Create layout with both plots side by side
|
|
216
|
+
# Use the aliased bokeh_row and set sizing_mode, width and height to avoid validation warnings.
|
|
217
|
+
layout = bokeh_row(p1, p2, sizing_mode='fixed', width=width, height=height)
|
|
218
|
+
|
|
219
|
+
# Output and show
|
|
149
220
|
if filename:
|
|
221
|
+
from bokeh.plotting import output_file, show
|
|
150
222
|
output_file(filename)
|
|
151
|
-
|
|
223
|
+
show(layout)
|
|
224
|
+
else:
|
|
225
|
+
from bokeh.plotting import show
|
|
226
|
+
show(layout)
|
|
227
|
+
|
|
228
|
+
return layout
|
|
152
229
|
|
|
153
230
|
|
|
154
231
|
def plot_consensus_2d(
|
|
@@ -163,11 +240,11 @@ def plot_consensus_2d(
|
|
|
163
240
|
width=900,
|
|
164
241
|
height=900,
|
|
165
242
|
mz_range=None,
|
|
166
|
-
rt_range=None
|
|
243
|
+
rt_range=None,
|
|
167
244
|
):
|
|
168
245
|
"""
|
|
169
246
|
Plot consensus features in a 2D scatter plot with retention time vs m/z.
|
|
170
|
-
|
|
247
|
+
|
|
171
248
|
Parameters:
|
|
172
249
|
filename (str, optional): Path to save the plot
|
|
173
250
|
colorby (str): Column name to use for color mapping (default: "number_samples")
|
|
@@ -187,13 +264,13 @@ def plot_consensus_2d(
|
|
|
187
264
|
self.logger.error("No consensus map found.")
|
|
188
265
|
return
|
|
189
266
|
data = self.consensus_df.clone()
|
|
190
|
-
|
|
267
|
+
|
|
191
268
|
# Filter by mz_range and rt_range if provided
|
|
192
269
|
if mz_range is not None:
|
|
193
270
|
data = data.filter((pl.col("mz") >= mz_range[0]) & (pl.col("mz") <= mz_range[1]))
|
|
194
271
|
if rt_range is not None:
|
|
195
272
|
data = data.filter((pl.col("rt") >= rt_range[0]) & (pl.col("rt") <= rt_range[1]))
|
|
196
|
-
|
|
273
|
+
|
|
197
274
|
if colorby not in data.columns:
|
|
198
275
|
self.logger.error(f"Column {colorby} not found in consensus_df.")
|
|
199
276
|
return
|
|
@@ -339,16 +416,16 @@ def plot_samples_2d(
|
|
|
339
416
|
alpha="inty",
|
|
340
417
|
cmap="Turbo256",
|
|
341
418
|
max_features=50000,
|
|
342
|
-
width=
|
|
343
|
-
height=
|
|
419
|
+
width=600,
|
|
420
|
+
height=600,
|
|
344
421
|
mz_range=None,
|
|
345
|
-
rt_range=None
|
|
422
|
+
rt_range=None,
|
|
346
423
|
):
|
|
347
424
|
"""
|
|
348
425
|
Plot all feature maps for sample_uid in parameter uids in an overlaid scatter plot.
|
|
349
426
|
Each sample is a different color. Alpha scales with intensity.
|
|
350
427
|
OPTIMIZED VERSION: Uses vectorized operations and batch processing.
|
|
351
|
-
|
|
428
|
+
|
|
352
429
|
Parameters:
|
|
353
430
|
samples: Sample UIDs to plot
|
|
354
431
|
filename (str, optional): Path to save the plot
|
|
@@ -366,6 +443,12 @@ def plot_samples_2d(
|
|
|
366
443
|
rt_range (tuple, optional): Retention time range for filtering features (min_rt, max_rt)
|
|
367
444
|
"""
|
|
368
445
|
|
|
446
|
+
# Local bokeh imports to avoid heavy top-level dependency
|
|
447
|
+
from bokeh.plotting import figure, show, output_file
|
|
448
|
+
from bokeh.io.export import export_png
|
|
449
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
450
|
+
from bokeh.palettes import Turbo256
|
|
451
|
+
|
|
369
452
|
sample_uids = self._get_sample_uids(samples)
|
|
370
453
|
|
|
371
454
|
if not sample_uids:
|
|
@@ -385,7 +468,7 @@ def plot_samples_2d(
|
|
|
385
468
|
|
|
386
469
|
# OPTIMIZATION 1: Batch filter all features for selected samples at once
|
|
387
470
|
features_batch = self.features_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
388
|
-
|
|
471
|
+
|
|
389
472
|
# Filter by mz_range and rt_range if provided
|
|
390
473
|
if mz_range is not None:
|
|
391
474
|
features_batch = features_batch.filter((pl.col("mz") >= mz_range[0]) & (pl.col("mz") <= mz_range[1]))
|
|
@@ -457,7 +540,10 @@ def plot_samples_2d(
|
|
|
457
540
|
color_values = {}
|
|
458
541
|
sample_names = {}
|
|
459
542
|
|
|
460
|
-
|
|
543
|
+
# Decide whether to show tqdm based on log level (show for INFO/DEBUG/TRACE)
|
|
544
|
+
tqdm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
545
|
+
|
|
546
|
+
for uid in tqdm(sample_uids, desc="Plotting BPCs", disable=tqdm_disable):
|
|
461
547
|
sample_data = features_pd[features_pd["sample_uid"] == uid]
|
|
462
548
|
if sample_data.empty:
|
|
463
549
|
continue
|
|
@@ -527,7 +613,9 @@ def plot_samples_2d(
|
|
|
527
613
|
p.add_tools(hover)
|
|
528
614
|
|
|
529
615
|
# Remove legend from plot
|
|
530
|
-
|
|
616
|
+
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
617
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
618
|
+
p.legend.visible = False
|
|
531
619
|
if filename:
|
|
532
620
|
if filename.endswith(".html"):
|
|
533
621
|
output_file(filename)
|
|
@@ -542,6 +630,441 @@ def plot_samples_2d(
|
|
|
542
630
|
return
|
|
543
631
|
|
|
544
632
|
|
|
633
|
+
def plot_bpc(
|
|
634
|
+
self,
|
|
635
|
+
samples=None,
|
|
636
|
+
title: str | None = None,
|
|
637
|
+
filename: str | None = None,
|
|
638
|
+
width: int = 1000,
|
|
639
|
+
height: int = 300,
|
|
640
|
+
rt_unit: str = "s",
|
|
641
|
+
original: bool = False,
|
|
642
|
+
):
|
|
643
|
+
"""
|
|
644
|
+
Plot Base Peak Chromatograms (BPC) for selected samples overlayed using Bokeh.
|
|
645
|
+
|
|
646
|
+
This collects per-sample BPCs via `get_bpc(self, sample=uid)` and overlays them.
|
|
647
|
+
Colors are mapped per-sample using the same Turbo256 palette as `plot_samples_2d`.
|
|
648
|
+
Parameters:
|
|
649
|
+
original (bool): If True, attempt to map RTs back to original RTs using `features_df`.
|
|
650
|
+
If False (default), return current/aligned RTs.
|
|
651
|
+
"""
|
|
652
|
+
# Local imports to avoid heavy top-level deps / circular imports
|
|
653
|
+
from bokeh.plotting import figure, show, output_file
|
|
654
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
655
|
+
from bokeh.io.export import export_png
|
|
656
|
+
from bokeh.palettes import Turbo256
|
|
657
|
+
from masster.study.helpers import get_bpc
|
|
658
|
+
|
|
659
|
+
sample_uids = self._get_sample_uids(samples)
|
|
660
|
+
if not sample_uids:
|
|
661
|
+
self.logger.error("No valid sample_uids provided for BPC plotting.")
|
|
662
|
+
return
|
|
663
|
+
|
|
664
|
+
# Debug: show which sample_uids we will process
|
|
665
|
+
self.logger.debug(f"plot_bpc: sample_uids={sample_uids}")
|
|
666
|
+
|
|
667
|
+
colors = Turbo256
|
|
668
|
+
n = max(1, len(sample_uids))
|
|
669
|
+
step = max(1, 256 // n)
|
|
670
|
+
color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
|
|
671
|
+
|
|
672
|
+
# If plotting original (uncorrected) RTs, use the requested title.
|
|
673
|
+
if original:
|
|
674
|
+
plot_title = "Base Peak Chromatogarms (uncorrected)"
|
|
675
|
+
else:
|
|
676
|
+
plot_title = title or "Base Peak Chromatograms"
|
|
677
|
+
|
|
678
|
+
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
679
|
+
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
680
|
+
p.yaxis.axis_label = "Intensity"
|
|
681
|
+
|
|
682
|
+
renderers = []
|
|
683
|
+
|
|
684
|
+
# Build sample name mapping once
|
|
685
|
+
samples_info = None
|
|
686
|
+
if hasattr(self, "samples_df") and self.samples_df is not None:
|
|
687
|
+
try:
|
|
688
|
+
samples_info = self.samples_df.to_pandas()
|
|
689
|
+
except Exception:
|
|
690
|
+
samples_info = None
|
|
691
|
+
|
|
692
|
+
for uid in sample_uids:
|
|
693
|
+
try:
|
|
694
|
+
chrom = get_bpc(self, sample=uid, rt_unit=rt_unit, label=None, original=original)
|
|
695
|
+
except Exception as e:
|
|
696
|
+
# log and skip samples we can't compute BPC for
|
|
697
|
+
self.logger.debug(f"Skipping sample {uid} for BPC: {e}")
|
|
698
|
+
continue
|
|
699
|
+
|
|
700
|
+
# extract arrays
|
|
701
|
+
try:
|
|
702
|
+
# prefer Chromatogram API
|
|
703
|
+
chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
704
|
+
rt = chrom_dict.get("rt")
|
|
705
|
+
inty = chrom_dict.get("inty")
|
|
706
|
+
except Exception:
|
|
707
|
+
try:
|
|
708
|
+
rt = chrom.rt
|
|
709
|
+
inty = chrom.inty
|
|
710
|
+
except Exception as e:
|
|
711
|
+
self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
|
|
712
|
+
continue
|
|
713
|
+
|
|
714
|
+
if rt is None or inty is None:
|
|
715
|
+
continue
|
|
716
|
+
|
|
717
|
+
# Ensure numpy arrays
|
|
718
|
+
import numpy as _np
|
|
719
|
+
|
|
720
|
+
rt = _np.asarray(rt)
|
|
721
|
+
inty = _np.asarray(inty)
|
|
722
|
+
if rt.size == 0 or inty.size == 0:
|
|
723
|
+
continue
|
|
724
|
+
|
|
725
|
+
# Sort by rt
|
|
726
|
+
idx = _np.argsort(rt)
|
|
727
|
+
rt = rt[idx]
|
|
728
|
+
inty = inty[idx]
|
|
729
|
+
|
|
730
|
+
sample_name = str(uid)
|
|
731
|
+
if samples_info is not None:
|
|
732
|
+
try:
|
|
733
|
+
row = samples_info[samples_info["sample_uid"] == uid]
|
|
734
|
+
if not row.empty:
|
|
735
|
+
sample_name = row.iloc[0].get("sample_name", sample_name)
|
|
736
|
+
except Exception:
|
|
737
|
+
pass
|
|
738
|
+
# Determine color for this sample early so we can log it
|
|
739
|
+
color = color_map.get(uid, "#000000")
|
|
740
|
+
|
|
741
|
+
# Debug: log sample processing details
|
|
742
|
+
self.logger.debug(
|
|
743
|
+
f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
747
|
+
src = ColumnDataSource(data)
|
|
748
|
+
|
|
749
|
+
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
750
|
+
r_points = p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
|
|
751
|
+
renderers.append(r_line)
|
|
752
|
+
|
|
753
|
+
if not renderers:
|
|
754
|
+
self.logger.warning("No BPC curves to plot for the selected samples.")
|
|
755
|
+
return
|
|
756
|
+
|
|
757
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
|
|
758
|
+
p.add_tools(hover)
|
|
759
|
+
|
|
760
|
+
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
761
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
762
|
+
p.legend.visible = False
|
|
763
|
+
|
|
764
|
+
if filename:
|
|
765
|
+
if filename.endswith(".html"):
|
|
766
|
+
output_file(filename)
|
|
767
|
+
show(p)
|
|
768
|
+
elif filename.endswith(".png"):
|
|
769
|
+
try:
|
|
770
|
+
export_png(p, filename=filename)
|
|
771
|
+
except Exception:
|
|
772
|
+
# fallback to saving HTML
|
|
773
|
+
output_file(filename.replace(".png", ".html"))
|
|
774
|
+
show(p)
|
|
775
|
+
else:
|
|
776
|
+
output_file(filename)
|
|
777
|
+
show(p)
|
|
778
|
+
else:
|
|
779
|
+
show(p)
|
|
780
|
+
|
|
781
|
+
return p
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
def plot_eic(
|
|
785
|
+
self,
|
|
786
|
+
mz,
|
|
787
|
+
mz_tol=0.01,
|
|
788
|
+
samples=None,
|
|
789
|
+
title: str | None = None,
|
|
790
|
+
filename: str | None = None,
|
|
791
|
+
width: int = 1000,
|
|
792
|
+
height: int = 300,
|
|
793
|
+
rt_unit: str = "s",
|
|
794
|
+
original: bool = False,
|
|
795
|
+
):
|
|
796
|
+
"""
|
|
797
|
+
Plot Extracted Ion Chromatograms (EIC) for a target m/z (± mz_tol) for selected samples.
|
|
798
|
+
|
|
799
|
+
Parameters mirror `plot_bpc` with additional `mz` and `mz_tol` arguments. The function
|
|
800
|
+
retrieves a Sample object for each sample UID, calls `sample.get_eic(mz, mz_tol)`, and
|
|
801
|
+
overlays the resulting chromatograms.
|
|
802
|
+
"""
|
|
803
|
+
# Local imports to avoid heavy top-level deps / circular imports
|
|
804
|
+
from bokeh.plotting import figure, show, output_file
|
|
805
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
806
|
+
from bokeh.io.export import export_png
|
|
807
|
+
from bokeh.palettes import Turbo256
|
|
808
|
+
from masster.study.helpers import get_eic
|
|
809
|
+
|
|
810
|
+
if mz is None:
|
|
811
|
+
self.logger.error("mz must be provided for EIC plotting")
|
|
812
|
+
return
|
|
813
|
+
|
|
814
|
+
sample_uids = self._get_sample_uids(samples)
|
|
815
|
+
if not sample_uids:
|
|
816
|
+
self.logger.error("No valid sample_uids provided for EIC plotting.")
|
|
817
|
+
return
|
|
818
|
+
|
|
819
|
+
colors = Turbo256
|
|
820
|
+
n = max(1, len(sample_uids))
|
|
821
|
+
step = max(1, 256 // n)
|
|
822
|
+
color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
|
|
823
|
+
|
|
824
|
+
plot_title = title or f"Extracted Ion Chromatograms (m/z={mz:.4f} ± {mz_tol})"
|
|
825
|
+
|
|
826
|
+
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
827
|
+
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
828
|
+
p.yaxis.axis_label = "Intensity"
|
|
829
|
+
|
|
830
|
+
renderers = []
|
|
831
|
+
|
|
832
|
+
# Build sample name mapping once
|
|
833
|
+
samples_info = None
|
|
834
|
+
if hasattr(self, "samples_df") and self.samples_df is not None:
|
|
835
|
+
try:
|
|
836
|
+
samples_info = self.samples_df.to_pandas()
|
|
837
|
+
except Exception:
|
|
838
|
+
samples_info = None
|
|
839
|
+
|
|
840
|
+
for uid in sample_uids:
|
|
841
|
+
try:
|
|
842
|
+
chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, rt_unit=rt_unit, label=None)
|
|
843
|
+
except Exception as e:
|
|
844
|
+
# log and skip samples we can't compute EIC for
|
|
845
|
+
self.logger.debug(f"Skipping sample {uid} for EIC: {e}")
|
|
846
|
+
continue
|
|
847
|
+
|
|
848
|
+
# extract arrays
|
|
849
|
+
try:
|
|
850
|
+
# prefer Chromatogram API
|
|
851
|
+
chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
852
|
+
rt = chrom_dict.get("rt")
|
|
853
|
+
inty = chrom_dict.get("inty")
|
|
854
|
+
except Exception:
|
|
855
|
+
try:
|
|
856
|
+
rt = chrom.rt
|
|
857
|
+
inty = chrom.inty
|
|
858
|
+
except Exception as e:
|
|
859
|
+
self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
|
|
860
|
+
continue
|
|
861
|
+
|
|
862
|
+
if rt is None or inty is None:
|
|
863
|
+
continue
|
|
864
|
+
|
|
865
|
+
import numpy as _np
|
|
866
|
+
|
|
867
|
+
rt = _np.asarray(rt)
|
|
868
|
+
inty = _np.asarray(inty)
|
|
869
|
+
if rt.size == 0 or inty.size == 0:
|
|
870
|
+
continue
|
|
871
|
+
|
|
872
|
+
# Sort by rt
|
|
873
|
+
idx = _np.argsort(rt)
|
|
874
|
+
rt = rt[idx]
|
|
875
|
+
inty = inty[idx]
|
|
876
|
+
|
|
877
|
+
sample_name = str(uid)
|
|
878
|
+
if samples_info is not None:
|
|
879
|
+
try:
|
|
880
|
+
row = samples_info[samples_info["sample_uid"] == uid]
|
|
881
|
+
if not row.empty:
|
|
882
|
+
sample_name = row.iloc[0].get("sample_name", sample_name)
|
|
883
|
+
except Exception:
|
|
884
|
+
pass
|
|
885
|
+
|
|
886
|
+
color = color_map.get(uid, "#000000")
|
|
887
|
+
|
|
888
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
889
|
+
src = ColumnDataSource(data)
|
|
890
|
+
|
|
891
|
+
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
892
|
+
p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
|
|
893
|
+
renderers.append(r_line)
|
|
894
|
+
|
|
895
|
+
if not renderers:
|
|
896
|
+
self.logger.warning("No EIC curves to plot for the selected samples.")
|
|
897
|
+
return
|
|
898
|
+
|
|
899
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
|
|
900
|
+
p.add_tools(hover)
|
|
901
|
+
|
|
902
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
903
|
+
p.legend.visible = False
|
|
904
|
+
|
|
905
|
+
if filename:
|
|
906
|
+
if filename.endswith(".html"):
|
|
907
|
+
output_file(filename)
|
|
908
|
+
show(p)
|
|
909
|
+
elif filename.endswith(".png"):
|
|
910
|
+
try:
|
|
911
|
+
export_png(p, filename=filename)
|
|
912
|
+
except Exception:
|
|
913
|
+
output_file(filename.replace(".png", ".html"))
|
|
914
|
+
show(p)
|
|
915
|
+
else:
|
|
916
|
+
output_file(filename)
|
|
917
|
+
show(p)
|
|
918
|
+
else:
|
|
919
|
+
show(p)
|
|
920
|
+
|
|
921
|
+
return p
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
def plot_rt_correction(
|
|
925
|
+
self,
|
|
926
|
+
samples=None,
|
|
927
|
+
title: str | None = None,
|
|
928
|
+
filename: str | None = None,
|
|
929
|
+
width: int = 1000,
|
|
930
|
+
height: int = 300,
|
|
931
|
+
rt_unit: str = "s",
|
|
932
|
+
):
|
|
933
|
+
"""
|
|
934
|
+
Plot RT correction per sample: (rt - rt_original) vs rt overlayed for selected samples.
|
|
935
|
+
|
|
936
|
+
This uses the same color mapping as `plot_bpc` so curves for the same samples match.
|
|
937
|
+
"""
|
|
938
|
+
from bokeh.plotting import figure, show, output_file
|
|
939
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
940
|
+
from bokeh.palettes import Turbo256
|
|
941
|
+
import numpy as _np
|
|
942
|
+
|
|
943
|
+
# Validate features dataframe
|
|
944
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
945
|
+
self.logger.error("No features_df found. Load features first.")
|
|
946
|
+
return
|
|
947
|
+
|
|
948
|
+
if "rt_original" not in self.features_df.columns:
|
|
949
|
+
self.logger.error("Column 'rt_original' not found in features_df. Alignment/backup RTs missing.")
|
|
950
|
+
return
|
|
951
|
+
|
|
952
|
+
sample_uids = self._get_sample_uids(samples)
|
|
953
|
+
if not sample_uids:
|
|
954
|
+
self.logger.error("No valid sample_uids provided for RT correction plotting.")
|
|
955
|
+
return
|
|
956
|
+
|
|
957
|
+
# Color mapping like plot_bpc
|
|
958
|
+
colors = Turbo256
|
|
959
|
+
n = max(1, len(sample_uids))
|
|
960
|
+
step = max(1, 256 // n)
|
|
961
|
+
color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
|
|
962
|
+
|
|
963
|
+
p = figure(width=width, height=height, title=title or "RT correction", tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
964
|
+
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
965
|
+
p.yaxis.axis_label = "RT - RT_original (s)"
|
|
966
|
+
|
|
967
|
+
samples_info = None
|
|
968
|
+
if hasattr(self, "samples_df") and self.samples_df is not None:
|
|
969
|
+
try:
|
|
970
|
+
samples_info = self.samples_df.to_pandas()
|
|
971
|
+
except Exception:
|
|
972
|
+
samples_info = None
|
|
973
|
+
|
|
974
|
+
renderers = []
|
|
975
|
+
|
|
976
|
+
# Iterate samples and build curves
|
|
977
|
+
for uid in sample_uids:
|
|
978
|
+
# Select features belonging to this sample
|
|
979
|
+
try:
|
|
980
|
+
if "sample_uid" in self.features_df.columns:
|
|
981
|
+
sample_feats = self.features_df.filter(pl.col("sample_uid") == uid)
|
|
982
|
+
elif "sample_name" in self.features_df.columns:
|
|
983
|
+
sample_feats = self.features_df.filter(pl.col("sample_name") == uid)
|
|
984
|
+
else:
|
|
985
|
+
self.logger.debug("No sample identifier column in features_df; skipping sample filtering")
|
|
986
|
+
continue
|
|
987
|
+
except Exception as e:
|
|
988
|
+
self.logger.debug(f"Error filtering features for sample {uid}: {e}")
|
|
989
|
+
continue
|
|
990
|
+
|
|
991
|
+
if sample_feats.is_empty():
|
|
992
|
+
continue
|
|
993
|
+
|
|
994
|
+
# Convert to pandas for easy numeric handling
|
|
995
|
+
try:
|
|
996
|
+
df = sample_feats.to_pandas()
|
|
997
|
+
except Exception:
|
|
998
|
+
continue
|
|
999
|
+
|
|
1000
|
+
# Need both rt and rt_original
|
|
1001
|
+
if "rt" not in df.columns or "rt_original" not in df.columns:
|
|
1002
|
+
continue
|
|
1003
|
+
|
|
1004
|
+
# Drop NA and ensure numeric arrays
|
|
1005
|
+
df = df.dropna(subset=["rt", "rt_original"]).copy()
|
|
1006
|
+
if df.empty:
|
|
1007
|
+
continue
|
|
1008
|
+
|
|
1009
|
+
rt = _np.asarray(df["rt"], dtype=float)
|
|
1010
|
+
rt_orig = _np.asarray(df["rt_original"], dtype=float)
|
|
1011
|
+
delta = rt - rt_orig
|
|
1012
|
+
|
|
1013
|
+
# sort by rt
|
|
1014
|
+
idx = _np.argsort(rt)
|
|
1015
|
+
rt = rt[idx]
|
|
1016
|
+
delta = delta[idx]
|
|
1017
|
+
|
|
1018
|
+
sample_name = str(uid)
|
|
1019
|
+
if samples_info is not None:
|
|
1020
|
+
try:
|
|
1021
|
+
row = samples_info[samples_info["sample_uid"] == uid]
|
|
1022
|
+
if not row.empty:
|
|
1023
|
+
sample_name = row.iloc[0].get("sample_name", sample_name)
|
|
1024
|
+
except Exception:
|
|
1025
|
+
pass
|
|
1026
|
+
|
|
1027
|
+
color = color_map.get(uid, "#000000")
|
|
1028
|
+
|
|
1029
|
+
data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt)}
|
|
1030
|
+
src = ColumnDataSource(data)
|
|
1031
|
+
|
|
1032
|
+
r_line = p.line("rt", "delta", source=src, line_width=1, color=color)
|
|
1033
|
+
p.scatter("rt", "delta", source=src, size=2, color=color, alpha=0.6)
|
|
1034
|
+
renderers.append(r_line)
|
|
1035
|
+
|
|
1036
|
+
if not renderers:
|
|
1037
|
+
self.logger.warning("No RT correction curves to plot for the selected samples.")
|
|
1038
|
+
return
|
|
1039
|
+
|
|
1040
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("rt - rt_original", "@delta{0.00}")], renderers=renderers)
|
|
1041
|
+
p.add_tools(hover)
|
|
1042
|
+
|
|
1043
|
+
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
1044
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
1045
|
+
p.legend.visible = False
|
|
1046
|
+
|
|
1047
|
+
if filename:
|
|
1048
|
+
if filename.endswith(".html"):
|
|
1049
|
+
output_file(filename)
|
|
1050
|
+
show(p)
|
|
1051
|
+
elif filename.endswith(".png"):
|
|
1052
|
+
try:
|
|
1053
|
+
from bokeh.io.export import export_png
|
|
1054
|
+
|
|
1055
|
+
export_png(p, filename=filename)
|
|
1056
|
+
except Exception:
|
|
1057
|
+
output_file(filename.replace(".png", ".html"))
|
|
1058
|
+
show(p)
|
|
1059
|
+
else:
|
|
1060
|
+
output_file(filename)
|
|
1061
|
+
show(p)
|
|
1062
|
+
else:
|
|
1063
|
+
show(p)
|
|
1064
|
+
|
|
1065
|
+
return p
|
|
1066
|
+
|
|
1067
|
+
|
|
545
1068
|
def plot_chrom(
|
|
546
1069
|
self,
|
|
547
1070
|
uids=None,
|
|
@@ -560,6 +1083,9 @@ def plot_chrom(
|
|
|
560
1083
|
self.logger.error("No chromatogram data found.")
|
|
561
1084
|
return
|
|
562
1085
|
|
|
1086
|
+
# Local import for color palette
|
|
1087
|
+
from bokeh.palettes import Turbo256
|
|
1088
|
+
|
|
563
1089
|
# Assign a fixed color to each sample/column
|
|
564
1090
|
sample_names = [col for col in chroms.columns if col not in ["consensus_uid"]]
|
|
565
1091
|
if not sample_names:
|
|
@@ -569,12 +1095,12 @@ def plot_chrom(
|
|
|
569
1095
|
|
|
570
1096
|
plots = []
|
|
571
1097
|
self.logger.info(f"Plotting {chroms.shape[0]} chromatograms...")
|
|
572
|
-
|
|
1098
|
+
tqdm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
573
1099
|
for row in tqdm(
|
|
574
1100
|
chroms.iter_rows(named=True),
|
|
575
1101
|
total=chroms.shape[0],
|
|
576
1102
|
desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Plot chromatograms",
|
|
577
|
-
disable=
|
|
1103
|
+
disable=tqdm_disable,
|
|
578
1104
|
):
|
|
579
1105
|
consensus_uid = row["consensus_uid"] # Get consensus_uid from the row
|
|
580
1106
|
consensus_id = consensus_uid # Use the same value for consensus_id
|
|
@@ -698,3 +1224,592 @@ def plot_chrom(
|
|
|
698
1224
|
# In a server context, return the panel object instead of showing or saving directly
|
|
699
1225
|
# return panel.panel(layout)
|
|
700
1226
|
panel.panel(layout).show()
|
|
1227
|
+
|
|
1228
|
+
|
|
1229
|
+
def plot_consensus_stats(
|
|
1230
|
+
self,
|
|
1231
|
+
filename=None,
|
|
1232
|
+
width=1200,
|
|
1233
|
+
height=1200,
|
|
1234
|
+
alpha=0.6,
|
|
1235
|
+
markersize=3,
|
|
1236
|
+
):
|
|
1237
|
+
"""
|
|
1238
|
+
Plot a scatter plot matrix (SPLOM) of consensus statistics using Bokeh.
|
|
1239
|
+
|
|
1240
|
+
Parameters:
|
|
1241
|
+
filename (str, optional): Output filename for saving the plot
|
|
1242
|
+
width (int): Overall width of the plot (default: 1200)
|
|
1243
|
+
height (int): Overall height of the plot (default: 1200)
|
|
1244
|
+
alpha (float): Point transparency (default: 0.6)
|
|
1245
|
+
markersize (int): Size of points (default: 5)
|
|
1246
|
+
"""
|
|
1247
|
+
from bokeh.layouts import gridplot
|
|
1248
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
1249
|
+
from bokeh.plotting import figure, show, output_file
|
|
1250
|
+
|
|
1251
|
+
# Check if consensus_df exists and has data
|
|
1252
|
+
if self.consensus_df is None or self.consensus_df.is_empty():
|
|
1253
|
+
self.logger.error("No consensus data available. Run merge/find_consensus first.")
|
|
1254
|
+
return
|
|
1255
|
+
|
|
1256
|
+
# Define the columns to plot
|
|
1257
|
+
columns = [
|
|
1258
|
+
"rt",
|
|
1259
|
+
"mz",
|
|
1260
|
+
"number_samples",
|
|
1261
|
+
"log10_quality",
|
|
1262
|
+
"mz_delta_mean",
|
|
1263
|
+
"rt_delta_mean",
|
|
1264
|
+
"chrom_coherence_mean",
|
|
1265
|
+
"chrom_prominence_scaled_mean",
|
|
1266
|
+
"inty_mean",
|
|
1267
|
+
"number_ms2",
|
|
1268
|
+
]
|
|
1269
|
+
|
|
1270
|
+
# Check which columns exist in the dataframe and compute missing ones
|
|
1271
|
+
available_columns = self.consensus_df.columns
|
|
1272
|
+
data_df = self.consensus_df.clone()
|
|
1273
|
+
|
|
1274
|
+
# Add log10_quality if quality exists
|
|
1275
|
+
if "quality" in available_columns and "log10_quality" not in available_columns:
|
|
1276
|
+
data_df = data_df.with_columns(
|
|
1277
|
+
pl.col("quality").log10().alias("log10_quality"),
|
|
1278
|
+
)
|
|
1279
|
+
|
|
1280
|
+
# Filter columns that actually exist
|
|
1281
|
+
final_columns = [col for col in columns if col in data_df.columns]
|
|
1282
|
+
|
|
1283
|
+
if len(final_columns) < 2:
|
|
1284
|
+
self.logger.error(f"Need at least 2 columns for SPLOM. Available: {final_columns}")
|
|
1285
|
+
return
|
|
1286
|
+
|
|
1287
|
+
self.logger.debug(f"Creating SPLOM with columns: {final_columns}")
|
|
1288
|
+
|
|
1289
|
+
# Add important ID columns for tooltips even if not plotting them
|
|
1290
|
+
tooltip_columns = []
|
|
1291
|
+
for id_col in ["consensus_uid", "consensus_id"]:
|
|
1292
|
+
if id_col in data_df.columns and id_col not in final_columns:
|
|
1293
|
+
tooltip_columns.append(id_col)
|
|
1294
|
+
|
|
1295
|
+
# Select plotting columns plus tooltip columns
|
|
1296
|
+
all_columns = final_columns + tooltip_columns
|
|
1297
|
+
data_pd = data_df.select(all_columns).to_pandas()
|
|
1298
|
+
|
|
1299
|
+
# Remove any infinite or NaN values
|
|
1300
|
+
data_pd = data_pd.replace([np.inf, -np.inf], np.nan).dropna()
|
|
1301
|
+
|
|
1302
|
+
if data_pd.empty:
|
|
1303
|
+
self.logger.error("No valid data after removing NaN/infinite values.")
|
|
1304
|
+
return
|
|
1305
|
+
|
|
1306
|
+
source = ColumnDataSource(data_pd)
|
|
1307
|
+
|
|
1308
|
+
n_vars = len(final_columns)
|
|
1309
|
+
|
|
1310
|
+
# Fixed dimensions - override user input to ensure consistent layout
|
|
1311
|
+
total_width = 1200
|
|
1312
|
+
total_height = 1200
|
|
1313
|
+
|
|
1314
|
+
# Calculate plot sizes to ensure uniform inner plot areas
|
|
1315
|
+
# First column needs extra width for y-axis labels
|
|
1316
|
+
plot_width_first = 180 # Wider to account for y-axis labels
|
|
1317
|
+
plot_width_others = 120 # Standard width for other columns
|
|
1318
|
+
plot_height_normal = 120 # Standard height
|
|
1319
|
+
plot_height_last = 155 # Taller last row to accommodate x-axis labels while keeping inner plot area same size
|
|
1320
|
+
|
|
1321
|
+
# Create grid of plots with variable outer sizes but equal inner areas
|
|
1322
|
+
plots = []
|
|
1323
|
+
|
|
1324
|
+
for i, y_var in enumerate(final_columns):
|
|
1325
|
+
row = []
|
|
1326
|
+
for j, x_var in enumerate(final_columns):
|
|
1327
|
+
# Determine if this plot needs axis labels
|
|
1328
|
+
has_x_label = i == n_vars - 1 # bottom row
|
|
1329
|
+
has_y_label = j == 0 # left column
|
|
1330
|
+
|
|
1331
|
+
# First column wider to accommodate y-axis labels, ensuring equal inner plot areas
|
|
1332
|
+
current_width = plot_width_first if has_y_label else plot_width_others
|
|
1333
|
+
current_height = plot_height_last if has_x_label else plot_height_normal
|
|
1334
|
+
|
|
1335
|
+
p = figure(
|
|
1336
|
+
width=current_width,
|
|
1337
|
+
height=current_height,
|
|
1338
|
+
title=None, # No title on any plot
|
|
1339
|
+
toolbar_location=None,
|
|
1340
|
+
# Adjusted borders - first column has more space, others minimal
|
|
1341
|
+
min_border_left=70 if has_y_label else 15,
|
|
1342
|
+
min_border_bottom=50 if has_x_label else 15,
|
|
1343
|
+
min_border_right=15,
|
|
1344
|
+
min_border_top=15,
|
|
1345
|
+
)
|
|
1346
|
+
|
|
1347
|
+
# Ensure subplot background and border are explicitly white so the plot looks
|
|
1348
|
+
# correct in dark and light themes.
|
|
1349
|
+
p.outline_line_color = None
|
|
1350
|
+
p.border_fill_color = "white"
|
|
1351
|
+
p.border_fill_alpha = 1.0
|
|
1352
|
+
p.background_fill_color = "white"
|
|
1353
|
+
|
|
1354
|
+
# Remove axis lines to eliminate black lines between plots
|
|
1355
|
+
p.xaxis.axis_line_color = None
|
|
1356
|
+
p.yaxis.axis_line_color = None
|
|
1357
|
+
|
|
1358
|
+
# Keep subtle grid lines for data reference
|
|
1359
|
+
p.grid.visible = True
|
|
1360
|
+
p.grid.grid_line_color = "#E0E0E0" # Light gray grid lines
|
|
1361
|
+
|
|
1362
|
+
# Set axis labels and formatting
|
|
1363
|
+
if has_x_label: # bottom row
|
|
1364
|
+
p.xaxis.axis_label = x_var
|
|
1365
|
+
p.xaxis.axis_label_text_font_size = "12pt"
|
|
1366
|
+
p.xaxis.major_label_text_font_size = "9pt"
|
|
1367
|
+
p.xaxis.axis_label_standoff = 15
|
|
1368
|
+
else:
|
|
1369
|
+
p.xaxis.major_label_text_font_size = "0pt"
|
|
1370
|
+
p.xaxis.minor_tick_line_color = None
|
|
1371
|
+
p.xaxis.major_tick_line_color = None
|
|
1372
|
+
|
|
1373
|
+
if has_y_label: # left column
|
|
1374
|
+
p.yaxis.axis_label = y_var
|
|
1375
|
+
p.yaxis.axis_label_text_font_size = "10pt" # Smaller y-axis title
|
|
1376
|
+
p.yaxis.major_label_text_font_size = "8pt"
|
|
1377
|
+
p.yaxis.axis_label_standoff = 12
|
|
1378
|
+
else:
|
|
1379
|
+
p.yaxis.major_label_text_font_size = "0pt"
|
|
1380
|
+
p.yaxis.minor_tick_line_color = None
|
|
1381
|
+
p.yaxis.major_tick_line_color = None
|
|
1382
|
+
|
|
1383
|
+
if i == j:
|
|
1384
|
+
# Diagonal: histogram
|
|
1385
|
+
hist, edges = np.histogram(data_pd[x_var], bins=30)
|
|
1386
|
+
p.quad(
|
|
1387
|
+
top=hist,
|
|
1388
|
+
bottom=0,
|
|
1389
|
+
left=edges[:-1],
|
|
1390
|
+
right=edges[1:],
|
|
1391
|
+
fill_color="green",
|
|
1392
|
+
line_color="white",
|
|
1393
|
+
alpha=alpha,
|
|
1394
|
+
)
|
|
1395
|
+
else:
|
|
1396
|
+
# Off-diagonal: scatter plot
|
|
1397
|
+
scatter = p.scatter(
|
|
1398
|
+
x=x_var,
|
|
1399
|
+
y=y_var,
|
|
1400
|
+
size=markersize,
|
|
1401
|
+
alpha=alpha,
|
|
1402
|
+
color="blue",
|
|
1403
|
+
source=source,
|
|
1404
|
+
)
|
|
1405
|
+
|
|
1406
|
+
# Add hover tool
|
|
1407
|
+
hover = HoverTool(
|
|
1408
|
+
tooltips=[
|
|
1409
|
+
(x_var, f"@{x_var}{{0.0000}}"),
|
|
1410
|
+
(y_var, f"@{y_var}{{0.0000}}"),
|
|
1411
|
+
(
|
|
1412
|
+
"consensus_uid",
|
|
1413
|
+
"@consensus_uid"
|
|
1414
|
+
if "consensus_uid" in data_pd.columns
|
|
1415
|
+
else "@consensus_id"
|
|
1416
|
+
if "consensus_id" in data_pd.columns
|
|
1417
|
+
else "N/A",
|
|
1418
|
+
),
|
|
1419
|
+
("rt", "@rt{0.00}" if "rt" in data_pd.columns else "N/A"),
|
|
1420
|
+
("mz", "@mz{0.0000}" if "mz" in data_pd.columns else "N/A"),
|
|
1421
|
+
],
|
|
1422
|
+
renderers=[scatter],
|
|
1423
|
+
)
|
|
1424
|
+
p.add_tools(hover)
|
|
1425
|
+
|
|
1426
|
+
row.append(p)
|
|
1427
|
+
plots.append(row)
|
|
1428
|
+
|
|
1429
|
+
# Link axes for same variables
|
|
1430
|
+
for i in range(n_vars):
|
|
1431
|
+
for j in range(n_vars):
|
|
1432
|
+
if i != j: # Don't link diagonal plots
|
|
1433
|
+
# Link x-axis to other plots in same column
|
|
1434
|
+
for k in range(n_vars):
|
|
1435
|
+
if k != i and k != j:
|
|
1436
|
+
plots[i][j].x_range = plots[k][j].x_range
|
|
1437
|
+
|
|
1438
|
+
# Link y-axis to other plots in same row
|
|
1439
|
+
for k in range(n_vars):
|
|
1440
|
+
if k != j and k != i:
|
|
1441
|
+
plots[i][j].y_range = plots[i][k].y_range
|
|
1442
|
+
|
|
1443
|
+
# Create grid layout and force overall background/border to white so the outer
|
|
1444
|
+
# container doesn't show dark UI colors in night mode.
|
|
1445
|
+
grid = gridplot(plots)
|
|
1446
|
+
|
|
1447
|
+
# Set overall background and border to white when supported
|
|
1448
|
+
if hasattr(grid, "background_fill_color"):
|
|
1449
|
+
grid.background_fill_color = "white"
|
|
1450
|
+
if hasattr(grid, "border_fill_color"):
|
|
1451
|
+
grid.border_fill_color = "white"
|
|
1452
|
+
|
|
1453
|
+
# Output and show
|
|
1454
|
+
if filename:
|
|
1455
|
+
output_file(filename)
|
|
1456
|
+
|
|
1457
|
+
show(grid)
|
|
1458
|
+
return grid
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
def plot_pca(
|
|
1462
|
+
self,
|
|
1463
|
+
filename=None,
|
|
1464
|
+
width=400,
|
|
1465
|
+
height=400,
|
|
1466
|
+
alpha=0.8,
|
|
1467
|
+
markersize=6,
|
|
1468
|
+
n_components=2,
|
|
1469
|
+
color_by=None,
|
|
1470
|
+
title="PCA of Consensus Matrix",
|
|
1471
|
+
):
|
|
1472
|
+
"""
|
|
1473
|
+
Plot PCA (Principal Component Analysis) of the consensus matrix using Bokeh.
|
|
1474
|
+
|
|
1475
|
+
Parameters:
|
|
1476
|
+
filename (str, optional): Output filename for saving the plot
|
|
1477
|
+
width (int): Plot width (default: 800)
|
|
1478
|
+
height (int): Plot height (default: 600)
|
|
1479
|
+
alpha (float): Point transparency (default: 0.8)
|
|
1480
|
+
markersize (int): Size of points (default: 8)
|
|
1481
|
+
n_components (int): Number of PCA components to compute (default: 2)
|
|
1482
|
+
color_by (str, optional): Column from samples_df to color points by
|
|
1483
|
+
title (str): Plot title (default: "PCA of Consensus Matrix")
|
|
1484
|
+
"""
|
|
1485
|
+
from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
|
|
1486
|
+
from bokeh.plotting import figure, show, output_file
|
|
1487
|
+
from bokeh.palettes import Category20, viridis, Turbo256
|
|
1488
|
+
from bokeh.transform import factor_cmap
|
|
1489
|
+
from sklearn.decomposition import PCA
|
|
1490
|
+
from sklearn.preprocessing import StandardScaler
|
|
1491
|
+
import pandas as pd
|
|
1492
|
+
import numpy as np
|
|
1493
|
+
|
|
1494
|
+
# Check if consensus matrix and samples_df exist
|
|
1495
|
+
try:
|
|
1496
|
+
consensus_matrix = self.get_consensus_matrix()
|
|
1497
|
+
samples_df = self.samples_df
|
|
1498
|
+
except Exception as e:
|
|
1499
|
+
self.logger.error(f"Error getting consensus matrix or samples_df: {e}")
|
|
1500
|
+
return
|
|
1501
|
+
|
|
1502
|
+
if consensus_matrix is None or consensus_matrix.shape[0] == 0:
|
|
1503
|
+
self.logger.error("No consensus matrix available. Run merge/find_consensus first.")
|
|
1504
|
+
return
|
|
1505
|
+
|
|
1506
|
+
if samples_df is None or samples_df.is_empty():
|
|
1507
|
+
self.logger.error("No samples dataframe available.")
|
|
1508
|
+
return
|
|
1509
|
+
|
|
1510
|
+
self.logger.info(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
|
|
1511
|
+
|
|
1512
|
+
# Convert consensus matrix to numpy if it's not already
|
|
1513
|
+
if hasattr(consensus_matrix, "values"):
|
|
1514
|
+
matrix_data = consensus_matrix.values
|
|
1515
|
+
elif hasattr(consensus_matrix, "to_numpy"):
|
|
1516
|
+
matrix_data = consensus_matrix.to_numpy()
|
|
1517
|
+
else:
|
|
1518
|
+
matrix_data = np.array(consensus_matrix)
|
|
1519
|
+
|
|
1520
|
+
# Transpose matrix so samples are rows and features are columns
|
|
1521
|
+
matrix_data = matrix_data.T
|
|
1522
|
+
|
|
1523
|
+
# Handle missing values by replacing with 0
|
|
1524
|
+
matrix_data = np.nan_to_num(matrix_data, nan=0.0, posinf=0.0, neginf=0.0)
|
|
1525
|
+
|
|
1526
|
+
# Standardize the data
|
|
1527
|
+
scaler = StandardScaler()
|
|
1528
|
+
matrix_scaled = scaler.fit_transform(matrix_data)
|
|
1529
|
+
|
|
1530
|
+
# Perform PCA
|
|
1531
|
+
pca = PCA(n_components=n_components)
|
|
1532
|
+
pca_result = pca.fit_transform(matrix_scaled)
|
|
1533
|
+
|
|
1534
|
+
# Get explained variance ratios
|
|
1535
|
+
explained_var = pca.explained_variance_ratio_
|
|
1536
|
+
|
|
1537
|
+
self.logger.info(f"PCA explained variance ratios: {explained_var}")
|
|
1538
|
+
|
|
1539
|
+
# Convert samples_df to pandas for easier manipulation
|
|
1540
|
+
samples_pd = samples_df.to_pandas()
|
|
1541
|
+
|
|
1542
|
+
# Create dataframe with PCA results and sample information
|
|
1543
|
+
pca_df = pd.DataFrame({
|
|
1544
|
+
"PC1": pca_result[:, 0],
|
|
1545
|
+
"PC2": pca_result[:, 1] if n_components > 1 else np.zeros(len(pca_result)),
|
|
1546
|
+
})
|
|
1547
|
+
|
|
1548
|
+
# Add sample information to PCA dataframe
|
|
1549
|
+
if len(samples_pd) == len(pca_df):
|
|
1550
|
+
for col in samples_pd.columns:
|
|
1551
|
+
pca_df[col] = samples_pd[col].values
|
|
1552
|
+
else:
|
|
1553
|
+
self.logger.warning(
|
|
1554
|
+
f"Sample count mismatch: samples_df has {len(samples_pd)} rows, "
|
|
1555
|
+
f"but consensus matrix has {len(pca_df)} samples"
|
|
1556
|
+
)
|
|
1557
|
+
|
|
1558
|
+
# Prepare color mapping
|
|
1559
|
+
color_column = None
|
|
1560
|
+
color_mapper = None
|
|
1561
|
+
|
|
1562
|
+
if color_by and color_by in pca_df.columns:
|
|
1563
|
+
color_column = color_by
|
|
1564
|
+
unique_values = pca_df[color_by].unique()
|
|
1565
|
+
|
|
1566
|
+
# Handle categorical vs numeric coloring
|
|
1567
|
+
if pca_df[color_by].dtype in ["object", "string", "category"]:
|
|
1568
|
+
# Categorical coloring
|
|
1569
|
+
if len(unique_values) <= 20:
|
|
1570
|
+
palette = Category20[min(20, max(3, len(unique_values)))]
|
|
1571
|
+
else:
|
|
1572
|
+
palette = viridis(min(256, len(unique_values)))
|
|
1573
|
+
color_mapper = factor_cmap(color_by, palette, unique_values)
|
|
1574
|
+
else:
|
|
1575
|
+
# Numeric coloring
|
|
1576
|
+
palette = viridis(256)
|
|
1577
|
+
color_mapper = LinearColorMapper(
|
|
1578
|
+
palette=palette,
|
|
1579
|
+
low=pca_df[color_by].min(),
|
|
1580
|
+
high=pca_df[color_by].max(),
|
|
1581
|
+
)
|
|
1582
|
+
|
|
1583
|
+
# Create Bokeh plot
|
|
1584
|
+
p = figure(
|
|
1585
|
+
width=width,
|
|
1586
|
+
height=height,
|
|
1587
|
+
title=f"{title} (PC1: {explained_var[0]:.1%}, PC2: {explained_var[1]:.1%})",
|
|
1588
|
+
tools="pan,wheel_zoom,box_zoom,reset,save",
|
|
1589
|
+
)
|
|
1590
|
+
|
|
1591
|
+
p.xaxis.axis_label = f"PC1 ({explained_var[0]:.1%} variance)"
|
|
1592
|
+
p.yaxis.axis_label = f"PC2 ({explained_var[1]:.1%} variance)"
|
|
1593
|
+
|
|
1594
|
+
# Create data source
|
|
1595
|
+
source = ColumnDataSource(pca_df)
|
|
1596
|
+
|
|
1597
|
+
# Create scatter plot
|
|
1598
|
+
if color_mapper:
|
|
1599
|
+
if isinstance(color_mapper, LinearColorMapper):
|
|
1600
|
+
scatter = p.scatter(
|
|
1601
|
+
"PC1",
|
|
1602
|
+
"PC2",
|
|
1603
|
+
size=markersize,
|
|
1604
|
+
alpha=alpha,
|
|
1605
|
+
color={"field": color_by, "transform": color_mapper},
|
|
1606
|
+
source=source,
|
|
1607
|
+
)
|
|
1608
|
+
# Add colorbar for numeric coloring
|
|
1609
|
+
color_bar = ColorBar(color_mapper=color_mapper, width=8, location=(0, 0))
|
|
1610
|
+
p.add_layout(color_bar, "right")
|
|
1611
|
+
else:
|
|
1612
|
+
scatter = p.scatter(
|
|
1613
|
+
"PC1",
|
|
1614
|
+
"PC2",
|
|
1615
|
+
size=markersize,
|
|
1616
|
+
alpha=alpha,
|
|
1617
|
+
color=color_mapper,
|
|
1618
|
+
source=source,
|
|
1619
|
+
legend_field=color_by,
|
|
1620
|
+
)
|
|
1621
|
+
else:
|
|
1622
|
+
# If no color_by provided, color points by sample similar to plot_samples_2d
|
|
1623
|
+
if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
|
|
1624
|
+
# Choose the identifier to map colors by
|
|
1625
|
+
id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
|
|
1626
|
+
sample_ids = list(pd.unique(pca_df[id_col]))
|
|
1627
|
+
colors = Turbo256
|
|
1628
|
+
color_map = {uid: colors[i * (256 // max(1, len(sample_ids)))] for i, uid in enumerate(sample_ids)}
|
|
1629
|
+
# Map colors into dataframe
|
|
1630
|
+
pca_df["color"] = [color_map[x] for x in pca_df[id_col]]
|
|
1631
|
+
# Update the ColumnDataSource with new color column
|
|
1632
|
+
source = ColumnDataSource(pca_df)
|
|
1633
|
+
scatter = p.scatter(
|
|
1634
|
+
"PC1",
|
|
1635
|
+
"PC2",
|
|
1636
|
+
size=markersize,
|
|
1637
|
+
alpha=alpha,
|
|
1638
|
+
color="color",
|
|
1639
|
+
source=source,
|
|
1640
|
+
)
|
|
1641
|
+
else:
|
|
1642
|
+
scatter = p.scatter(
|
|
1643
|
+
"PC1",
|
|
1644
|
+
"PC2",
|
|
1645
|
+
size=markersize,
|
|
1646
|
+
alpha=alpha,
|
|
1647
|
+
color="blue",
|
|
1648
|
+
source=source,
|
|
1649
|
+
)
|
|
1650
|
+
|
|
1651
|
+
# Create comprehensive hover tooltips with all sample information
|
|
1652
|
+
tooltip_list = []
|
|
1653
|
+
|
|
1654
|
+
# Columns to exclude from tooltips (file paths and internal/plot fields)
|
|
1655
|
+
excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "PC1", "PC2", "ms1", "ms2"}
|
|
1656
|
+
|
|
1657
|
+
# Add all sample dataframe columns to tooltips, skipping excluded ones
|
|
1658
|
+
for col in samples_pd.columns:
|
|
1659
|
+
if col in excluded_cols:
|
|
1660
|
+
continue
|
|
1661
|
+
if col in pca_df.columns:
|
|
1662
|
+
if pca_df[col].dtype in ["float64", "float32"]:
|
|
1663
|
+
tooltip_list.append((col, f"@{col}{{0.00}}"))
|
|
1664
|
+
else:
|
|
1665
|
+
tooltip_list.append((col, f"@{col}"))
|
|
1666
|
+
|
|
1667
|
+
hover = HoverTool(
|
|
1668
|
+
tooltips=tooltip_list,
|
|
1669
|
+
renderers=[scatter],
|
|
1670
|
+
)
|
|
1671
|
+
p.add_tools(hover)
|
|
1672
|
+
|
|
1673
|
+
# Add legend if using categorical coloring
|
|
1674
|
+
if color_mapper and not isinstance(color_mapper, LinearColorMapper) and color_by:
|
|
1675
|
+
# Only set legend properties if legends exist (avoid Bokeh warning when none created)
|
|
1676
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
1677
|
+
p.legend.location = "top_left"
|
|
1678
|
+
p.legend.click_policy = "hide"
|
|
1679
|
+
|
|
1680
|
+
# Output and show
|
|
1681
|
+
if filename:
|
|
1682
|
+
output_file(filename)
|
|
1683
|
+
|
|
1684
|
+
show(p)
|
|
1685
|
+
return p
|
|
1686
|
+
|
|
1687
|
+
def plot_tic(
|
|
1688
|
+
self,
|
|
1689
|
+
samples=None,
|
|
1690
|
+
title: str | None = None,
|
|
1691
|
+
filename: str | None = None,
|
|
1692
|
+
width: int = 1000,
|
|
1693
|
+
height: int = 300,
|
|
1694
|
+
rt_unit: str = "s",
|
|
1695
|
+
original: bool = False,
|
|
1696
|
+
):
|
|
1697
|
+
"""
|
|
1698
|
+
Plot Total Ion Chromatograms (TIC) for selected samples overlayed using Bokeh.
|
|
1699
|
+
|
|
1700
|
+
Parameters and behavior mirror `plot_bpc` but use per-sample TICs (get_tic).
|
|
1701
|
+
"""
|
|
1702
|
+
# Local imports to avoid heavy top-level deps / circular imports
|
|
1703
|
+
from bokeh.plotting import figure, show, output_file
|
|
1704
|
+
from bokeh.models import ColumnDataSource, HoverTool
|
|
1705
|
+
from bokeh.io.export import export_png
|
|
1706
|
+
from bokeh.palettes import Turbo256
|
|
1707
|
+
from masster.study.helpers import get_tic
|
|
1708
|
+
|
|
1709
|
+
sample_uids = self._get_sample_uids(samples)
|
|
1710
|
+
if not sample_uids:
|
|
1711
|
+
self.logger.error("No valid sample_uids provided for TIC plotting.")
|
|
1712
|
+
return
|
|
1713
|
+
|
|
1714
|
+
colors = Turbo256
|
|
1715
|
+
n = max(1, len(sample_uids))
|
|
1716
|
+
step = max(1, 256 // n)
|
|
1717
|
+
color_map = {uid: colors[(i * step) % 256] for i, uid in enumerate(sample_uids)}
|
|
1718
|
+
|
|
1719
|
+
plot_title = title or "Total Ion Chromatograms"
|
|
1720
|
+
|
|
1721
|
+
p = figure(width=width, height=height, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,save")
|
|
1722
|
+
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
1723
|
+
p.yaxis.axis_label = "Intensity"
|
|
1724
|
+
|
|
1725
|
+
renderers = []
|
|
1726
|
+
|
|
1727
|
+
# Build sample name mapping once
|
|
1728
|
+
samples_info = None
|
|
1729
|
+
if hasattr(self, "samples_df") and self.samples_df is not None:
|
|
1730
|
+
try:
|
|
1731
|
+
samples_info = self.samples_df.to_pandas()
|
|
1732
|
+
except Exception:
|
|
1733
|
+
samples_info = None
|
|
1734
|
+
|
|
1735
|
+
for uid in sample_uids:
|
|
1736
|
+
try:
|
|
1737
|
+
chrom = get_tic(self, sample=uid, label=None)
|
|
1738
|
+
except Exception as e:
|
|
1739
|
+
self.logger.debug(f"Skipping sample {uid} for TIC: {e}")
|
|
1740
|
+
continue
|
|
1741
|
+
|
|
1742
|
+
# extract arrays
|
|
1743
|
+
try:
|
|
1744
|
+
chrom_dict = chrom.to_dict() if hasattr(chrom, "to_dict") else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
1745
|
+
rt = chrom_dict.get("rt")
|
|
1746
|
+
inty = chrom_dict.get("inty")
|
|
1747
|
+
except Exception:
|
|
1748
|
+
try:
|
|
1749
|
+
rt = chrom.rt
|
|
1750
|
+
inty = chrom.inty
|
|
1751
|
+
except Exception as e:
|
|
1752
|
+
self.logger.debug(f"Invalid chromatogram for sample {uid}: {e}")
|
|
1753
|
+
continue
|
|
1754
|
+
|
|
1755
|
+
if rt is None or inty is None:
|
|
1756
|
+
continue
|
|
1757
|
+
|
|
1758
|
+
import numpy as _np
|
|
1759
|
+
|
|
1760
|
+
rt = _np.asarray(rt)
|
|
1761
|
+
inty = _np.asarray(inty)
|
|
1762
|
+
if rt.size == 0 or inty.size == 0:
|
|
1763
|
+
continue
|
|
1764
|
+
|
|
1765
|
+
# Sort by rt
|
|
1766
|
+
idx = _np.argsort(rt)
|
|
1767
|
+
rt = rt[idx]
|
|
1768
|
+
inty = inty[idx]
|
|
1769
|
+
|
|
1770
|
+
sample_name = str(uid)
|
|
1771
|
+
if samples_info is not None:
|
|
1772
|
+
try:
|
|
1773
|
+
row = samples_info[samples_info["sample_uid"] == uid]
|
|
1774
|
+
if not row.empty:
|
|
1775
|
+
sample_name = row.iloc[0].get("sample_name", sample_name)
|
|
1776
|
+
except Exception:
|
|
1777
|
+
pass
|
|
1778
|
+
|
|
1779
|
+
color = color_map.get(uid, "#000000")
|
|
1780
|
+
|
|
1781
|
+
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt)}
|
|
1782
|
+
src = ColumnDataSource(data)
|
|
1783
|
+
|
|
1784
|
+
r_line = p.line("rt", "inty", source=src, line_width=1, color=color, legend_label=str(sample_name))
|
|
1785
|
+
p.scatter("rt", "inty", source=src, size=2, color=color, alpha=0.6)
|
|
1786
|
+
renderers.append(r_line)
|
|
1787
|
+
|
|
1788
|
+
if not renderers:
|
|
1789
|
+
self.logger.warning("No TIC curves to plot for the selected samples.")
|
|
1790
|
+
return
|
|
1791
|
+
|
|
1792
|
+
hover = HoverTool(tooltips=[("sample", "@sample"), ("rt", "@rt{0.00}"), ("inty", "@inty{0.0e+0}")], renderers=renderers)
|
|
1793
|
+
p.add_tools(hover)
|
|
1794
|
+
|
|
1795
|
+
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
1796
|
+
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
1797
|
+
p.legend.visible = False
|
|
1798
|
+
|
|
1799
|
+
if filename:
|
|
1800
|
+
if filename.endswith(".html"):
|
|
1801
|
+
output_file(filename)
|
|
1802
|
+
show(p)
|
|
1803
|
+
elif filename.endswith(".png"):
|
|
1804
|
+
try:
|
|
1805
|
+
export_png(p, filename=filename)
|
|
1806
|
+
except Exception:
|
|
1807
|
+
output_file(filename.replace(".png", ".html"))
|
|
1808
|
+
show(p)
|
|
1809
|
+
else:
|
|
1810
|
+
output_file(filename)
|
|
1811
|
+
show(p)
|
|
1812
|
+
else:
|
|
1813
|
+
show(p)
|
|
1814
|
+
|
|
1815
|
+
return p
|