masster 0.3.17__py3-none-any.whl → 0.3.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/sample/h5.py +1 -1
- masster/sample/helpers.py +3 -7
- masster/sample/load.py +2 -2
- masster/sample/plot.py +2 -1
- masster/study/export.py +27 -10
- masster/study/h5.py +58 -40
- masster/study/helpers.py +275 -225
- masster/study/helpers_optimized.py +5 -5
- masster/study/load.py +148 -121
- masster/study/plot.py +306 -106
- masster/study/processing.py +9 -5
- masster/study/study.py +2 -6
- {masster-0.3.17.dist-info → masster-0.3.19.dist-info}/METADATA +1 -1
- {masster-0.3.17.dist-info → masster-0.3.19.dist-info}/RECORD +18 -18
- {masster-0.3.17.dist-info → masster-0.3.19.dist-info}/WHEEL +0 -0
- {masster-0.3.17.dist-info → masster-0.3.19.dist-info}/entry_points.txt +0 -0
- {masster-0.3.17.dist-info → masster-0.3.19.dist-info}/licenses/LICENSE +0 -0
masster/study/plot.py
CHANGED
|
@@ -17,7 +17,18 @@ hv.extension("bokeh")
|
|
|
17
17
|
from bokeh.layouts import row as bokeh_row
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
<<<<<<< Updated upstream
|
|
21
|
+
def plot_alignment(
|
|
22
|
+
self,
|
|
23
|
+
maps: bool = True,
|
|
24
|
+
filename: str | None = None,
|
|
25
|
+
width: int = 450,
|
|
26
|
+
height: int = 450,
|
|
27
|
+
markersize: int = 3,
|
|
28
|
+
):
|
|
29
|
+
=======
|
|
30
|
+
def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | None = None, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
|
|
31
|
+
>>>>>>> Stashed changes
|
|
21
32
|
"""Visualize retention time alignment using two synchronized Bokeh scatter plots.
|
|
22
33
|
|
|
23
34
|
- When ``maps=True`` the function reads ``self.features_maps`` (list of FeatureMap)
|
|
@@ -27,6 +38,11 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
27
38
|
|
|
28
39
|
Parameters
|
|
29
40
|
- maps: whether to use feature maps (default True).
|
|
41
|
+
- samples: Sample selection parameter, interpreted like in plot_samples_2d:
|
|
42
|
+
- None: show all samples
|
|
43
|
+
- int: show a random subset of N samples
|
|
44
|
+
- list of ints: show samples with these sample_uids
|
|
45
|
+
- list of strings: show samples with these sample_names
|
|
30
46
|
- filename: optional HTML file path to save the plot.
|
|
31
47
|
- width/height: pixel size of each subplot.
|
|
32
48
|
- markersize: base marker size.
|
|
@@ -54,6 +70,32 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
54
70
|
self.logger.error("No feature maps available for plotting.")
|
|
55
71
|
return
|
|
56
72
|
|
|
73
|
+
# Get sample_uids to limit which samples to show
|
|
74
|
+
sample_uids_to_show = self._get_sample_uids(samples)
|
|
75
|
+
|
|
76
|
+
# Filter feature maps based on sample selection
|
|
77
|
+
if sample_uids_to_show is not None:
|
|
78
|
+
# Get sample indices for the selected sample_uids
|
|
79
|
+
selected_indices = []
|
|
80
|
+
if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
|
|
81
|
+
samples_info = self.samples_df.to_pandas()
|
|
82
|
+
for idx, row in samples_info.iterrows():
|
|
83
|
+
if row.get('sample_uid') in sample_uids_to_show:
|
|
84
|
+
selected_indices.append(idx)
|
|
85
|
+
else:
|
|
86
|
+
# If no samples_df, just limit to the first N samples
|
|
87
|
+
if isinstance(samples, int):
|
|
88
|
+
selected_indices = list(range(min(samples, len(fmaps))))
|
|
89
|
+
else:
|
|
90
|
+
selected_indices = list(range(len(fmaps)))
|
|
91
|
+
|
|
92
|
+
# Filter feature maps to only include selected indices
|
|
93
|
+
fmaps = [fmaps[i] for i in selected_indices if i < len(fmaps)]
|
|
94
|
+
|
|
95
|
+
if not fmaps:
|
|
96
|
+
self.logger.error("No feature maps match the selected samples.")
|
|
97
|
+
return
|
|
98
|
+
|
|
57
99
|
# Reference (first) sample: use current RT for both before and after
|
|
58
100
|
ref = fmaps[0]
|
|
59
101
|
ref_rt = [f.getRT() for f in ref]
|
|
@@ -62,17 +104,39 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
62
104
|
max_ref_inty = max(ref_inty) if ref_inty else 1
|
|
63
105
|
|
|
64
106
|
# sample metadata
|
|
65
|
-
if hasattr(self,
|
|
107
|
+
if hasattr(self, "samples_df") and self.samples_df is not None and not self.samples_df.is_empty():
|
|
66
108
|
samples_info = self.samples_df.to_pandas()
|
|
67
|
-
ref_sample_uid =
|
|
68
|
-
|
|
109
|
+
ref_sample_uid = (
|
|
110
|
+
samples_info.iloc[0]["sample_uid"] if "sample_uid" in samples_info.columns else "Reference_UID"
|
|
111
|
+
)
|
|
112
|
+
ref_sample_name = (
|
|
113
|
+
samples_info.iloc[0]["sample_name"] if "sample_name" in samples_info.columns else "Reference"
|
|
114
|
+
)
|
|
69
115
|
else:
|
|
70
|
-
ref_sample_uid =
|
|
71
|
-
ref_sample_name =
|
|
116
|
+
ref_sample_uid = "Reference_UID"
|
|
117
|
+
ref_sample_name = "Reference"
|
|
72
118
|
|
|
73
119
|
for rt, mz, inty in zip(ref_rt, ref_mz, ref_inty):
|
|
74
|
-
before_data.append({
|
|
75
|
-
|
|
120
|
+
before_data.append({
|
|
121
|
+
"rt": rt,
|
|
122
|
+
"mz": mz,
|
|
123
|
+
"inty": inty,
|
|
124
|
+
"alpha": inty / max_ref_inty,
|
|
125
|
+
"sample_idx": 0,
|
|
126
|
+
"sample_name": ref_sample_name,
|
|
127
|
+
"sample_uid": ref_sample_uid,
|
|
128
|
+
"size": markersize + 2,
|
|
129
|
+
})
|
|
130
|
+
after_data.append({
|
|
131
|
+
"rt": rt,
|
|
132
|
+
"mz": mz,
|
|
133
|
+
"inty": inty,
|
|
134
|
+
"alpha": inty / max_ref_inty,
|
|
135
|
+
"sample_idx": 0,
|
|
136
|
+
"sample_name": ref_sample_name,
|
|
137
|
+
"sample_uid": ref_sample_uid,
|
|
138
|
+
"size": markersize + 2,
|
|
139
|
+
})
|
|
76
140
|
|
|
77
141
|
# Remaining samples
|
|
78
142
|
for sample_idx, fm in enumerate(fmaps[1:], start=1):
|
|
@@ -83,7 +147,7 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
83
147
|
|
|
84
148
|
for f in fm:
|
|
85
149
|
try:
|
|
86
|
-
orig = f.getMetaValue(
|
|
150
|
+
orig = f.getMetaValue("original_RT")
|
|
87
151
|
except Exception:
|
|
88
152
|
orig = None
|
|
89
153
|
|
|
@@ -101,23 +165,41 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
101
165
|
|
|
102
166
|
max_inty = max(inty_vals)
|
|
103
167
|
|
|
104
|
-
if hasattr(self,
|
|
168
|
+
if hasattr(self, "samples_df") and self.samples_df is not None and not self.samples_df.is_empty():
|
|
105
169
|
samples_info = self.samples_df.to_pandas()
|
|
106
170
|
if sample_idx < len(samples_info):
|
|
107
|
-
sample_name = samples_info.iloc[sample_idx].get(
|
|
108
|
-
sample_uid = samples_info.iloc[sample_idx].get(
|
|
171
|
+
sample_name = samples_info.iloc[sample_idx].get("sample_name", f"Sample {sample_idx}")
|
|
172
|
+
sample_uid = samples_info.iloc[sample_idx].get("sample_uid", f"Sample_{sample_idx}_UID")
|
|
109
173
|
else:
|
|
110
|
-
sample_name = f
|
|
111
|
-
sample_uid = f
|
|
174
|
+
sample_name = f"Sample {sample_idx}"
|
|
175
|
+
sample_uid = f"Sample_{sample_idx}_UID"
|
|
112
176
|
else:
|
|
113
|
-
sample_name = f
|
|
114
|
-
sample_uid = f
|
|
177
|
+
sample_name = f"Sample {sample_idx}"
|
|
178
|
+
sample_uid = f"Sample_{sample_idx}_UID"
|
|
115
179
|
|
|
116
180
|
for rt, mz, inty in zip(original_rt, mz_vals, inty_vals):
|
|
117
|
-
before_data.append({
|
|
181
|
+
before_data.append({
|
|
182
|
+
"rt": rt,
|
|
183
|
+
"mz": mz,
|
|
184
|
+
"inty": inty,
|
|
185
|
+
"alpha": inty / max_inty,
|
|
186
|
+
"sample_idx": sample_idx,
|
|
187
|
+
"sample_name": sample_name,
|
|
188
|
+
"sample_uid": sample_uid,
|
|
189
|
+
"size": markersize,
|
|
190
|
+
})
|
|
118
191
|
|
|
119
192
|
for rt, mz, inty in zip(aligned_rt, mz_vals, inty_vals):
|
|
120
|
-
after_data.append({
|
|
193
|
+
after_data.append({
|
|
194
|
+
"rt": rt,
|
|
195
|
+
"mz": mz,
|
|
196
|
+
"inty": inty,
|
|
197
|
+
"alpha": inty / max_inty,
|
|
198
|
+
"sample_idx": sample_idx,
|
|
199
|
+
"sample_name": sample_name,
|
|
200
|
+
"sample_uid": sample_uid,
|
|
201
|
+
"size": markersize,
|
|
202
|
+
})
|
|
121
203
|
|
|
122
204
|
else:
|
|
123
205
|
# Use features_df
|
|
@@ -125,66 +207,98 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
125
207
|
self.logger.error("No features_df found. Load features first.")
|
|
126
208
|
return
|
|
127
209
|
|
|
128
|
-
required_cols = [
|
|
210
|
+
required_cols = ["rt", "mz", "inty"]
|
|
129
211
|
missing = [c for c in required_cols if c not in self.features_df.columns]
|
|
130
212
|
if missing:
|
|
131
213
|
self.logger.error(f"Missing required columns in features_df: {missing}")
|
|
132
214
|
return
|
|
133
215
|
|
|
134
|
-
if
|
|
216
|
+
if "rt_original" not in self.features_df.columns:
|
|
135
217
|
self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
|
|
136
218
|
return
|
|
137
219
|
|
|
138
220
|
# Use Polars instead of pandas
|
|
139
221
|
features_df = self.features_df
|
|
140
222
|
|
|
141
|
-
sample_col =
|
|
223
|
+
sample_col = "sample_uid" if "sample_uid" in features_df.columns else "sample_name"
|
|
142
224
|
if sample_col not in features_df.columns:
|
|
143
225
|
self.logger.error("No sample identifier column found in features_df.")
|
|
144
226
|
return
|
|
145
227
|
|
|
228
|
+
# Get sample_uids to limit which samples to show
|
|
229
|
+
sample_uids_to_show = self._get_sample_uids(samples)
|
|
230
|
+
|
|
231
|
+
# Filter features_df based on sample selection if specified
|
|
232
|
+
if sample_uids_to_show is not None:
|
|
233
|
+
if sample_col == 'sample_uid':
|
|
234
|
+
features_df = features_df.filter(pl.col('sample_uid').is_in(sample_uids_to_show))
|
|
235
|
+
else:
|
|
236
|
+
# Need to convert sample names to sample_uids if using sample_name column
|
|
237
|
+
if 'sample_uid' in features_df.columns:
|
|
238
|
+
# Filter by sample_uid even though we're using sample_name as the primary column
|
|
239
|
+
features_df = features_df.filter(pl.col('sample_uid').is_in(sample_uids_to_show))
|
|
240
|
+
else:
|
|
241
|
+
# Convert sample_uids to sample_names and filter
|
|
242
|
+
sample_names_to_show = []
|
|
243
|
+
if hasattr(self, 'samples_df') and self.samples_df is not None:
|
|
244
|
+
for uid in sample_uids_to_show:
|
|
245
|
+
matching_rows = self.samples_df.filter(pl.col("sample_uid") == uid)
|
|
246
|
+
if not matching_rows.is_empty():
|
|
247
|
+
sample_names_to_show.append(matching_rows.row(0, named=True)["sample_name"])
|
|
248
|
+
features_df = features_df.filter(pl.col('sample_name').is_in(sample_names_to_show))
|
|
249
|
+
|
|
146
250
|
# Get unique samples using Polars
|
|
147
251
|
samples = features_df.select(pl.col(sample_col)).unique().to_series().to_list()
|
|
148
252
|
|
|
149
253
|
for sample_idx, sample in enumerate(samples):
|
|
150
254
|
# Filter sample data using Polars
|
|
151
255
|
sample_data = features_df.filter(pl.col(sample_col) == sample)
|
|
152
|
-
|
|
256
|
+
|
|
153
257
|
# Calculate max intensity using Polars
|
|
154
|
-
max_inty = sample_data.select(pl.col(
|
|
258
|
+
max_inty = sample_data.select(pl.col("inty").max()).item()
|
|
155
259
|
max_inty = max_inty if max_inty and max_inty > 0 else 1
|
|
156
|
-
|
|
260
|
+
|
|
157
261
|
sample_name = str(sample)
|
|
158
262
|
# Get sample_uid - if sample_col is 'sample_uid', use sample directly
|
|
159
|
-
if sample_col ==
|
|
263
|
+
if sample_col == "sample_uid":
|
|
160
264
|
sample_uid = sample
|
|
161
265
|
else:
|
|
162
266
|
# Try to get sample_uid from the first row if it exists
|
|
163
|
-
if
|
|
164
|
-
sample_uid = sample_data.select(pl.col(
|
|
267
|
+
if "sample_uid" in sample_data.columns:
|
|
268
|
+
sample_uid = sample_data.select(pl.col("sample_uid")).item()
|
|
165
269
|
else:
|
|
166
270
|
sample_uid = sample
|
|
167
271
|
|
|
168
272
|
# Convert to dict for iteration - more efficient than row-by-row processing
|
|
169
|
-
sample_dict = sample_data.select([
|
|
170
|
-
|
|
273
|
+
sample_dict = sample_data.select(["rt_original", "rt", "mz", "inty"]).to_dicts()
|
|
274
|
+
|
|
171
275
|
for row_dict in sample_dict:
|
|
172
|
-
rt_original = row_dict[
|
|
173
|
-
rt_current = row_dict[
|
|
174
|
-
mz = row_dict[
|
|
175
|
-
inty = row_dict[
|
|
276
|
+
rt_original = row_dict["rt_original"]
|
|
277
|
+
rt_current = row_dict["rt"]
|
|
278
|
+
mz = row_dict["mz"]
|
|
279
|
+
inty = row_dict["inty"]
|
|
176
280
|
alpha = inty / max_inty
|
|
177
281
|
size = markersize + 2 if sample_idx == 0 else markersize
|
|
178
|
-
|
|
282
|
+
|
|
179
283
|
before_data.append({
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
284
|
+
"rt": rt_original,
|
|
285
|
+
"mz": mz,
|
|
286
|
+
"inty": inty,
|
|
287
|
+
"alpha": alpha,
|
|
288
|
+
"sample_idx": sample_idx,
|
|
289
|
+
"sample_name": sample_name,
|
|
290
|
+
"sample_uid": sample_uid,
|
|
291
|
+
"size": size,
|
|
183
292
|
})
|
|
184
293
|
after_data.append({
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
294
|
+
"rt": rt_current,
|
|
295
|
+
"mz": mz,
|
|
296
|
+
"inty": inty,
|
|
297
|
+
"alpha": alpha,
|
|
298
|
+
"sample_idx": sample_idx,
|
|
299
|
+
"sample_name": sample_name,
|
|
300
|
+
"sample_uid": sample_uid,
|
|
301
|
+
"size": size,
|
|
188
302
|
})
|
|
189
303
|
|
|
190
304
|
# Get sample colors from samples_df using sample indices
|
|
@@ -193,17 +307,16 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
193
307
|
# Create mapping from sample_idx to sample_uid more efficiently
|
|
194
308
|
sample_idx_to_uid = {}
|
|
195
309
|
for item in before_data:
|
|
196
|
-
if item[
|
|
197
|
-
sample_idx_to_uid[item[
|
|
310
|
+
if item["sample_idx"] not in sample_idx_to_uid:
|
|
311
|
+
sample_idx_to_uid[item["sample_idx"]] = item["sample_uid"]
|
|
198
312
|
else:
|
|
199
313
|
sample_idx_to_uid = {}
|
|
200
|
-
|
|
314
|
+
|
|
201
315
|
# Get colors from samples_df
|
|
202
316
|
sample_uids_list = list(sample_idx_to_uid.values())
|
|
203
|
-
if sample_uids_list and hasattr(self,
|
|
317
|
+
if sample_uids_list and hasattr(self, "samples_df") and self.samples_df is not None:
|
|
204
318
|
sample_colors = (
|
|
205
|
-
self.samples_df
|
|
206
|
-
.filter(pl.col("sample_uid").is_in(sample_uids_list))
|
|
319
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids_list))
|
|
207
320
|
.select(["sample_uid", "sample_color"])
|
|
208
321
|
.to_dict(as_series=False)
|
|
209
322
|
)
|
|
@@ -219,68 +332,106 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
|
|
|
219
332
|
# Add sample_color to data dictionaries before creating DataFrames
|
|
220
333
|
if before_data:
|
|
221
334
|
for item in before_data:
|
|
222
|
-
item[
|
|
223
|
-
|
|
335
|
+
item["sample_color"] = color_map.get(item["sample_idx"], "#1f77b4")
|
|
336
|
+
|
|
224
337
|
if after_data:
|
|
225
338
|
for item in after_data:
|
|
226
|
-
item[
|
|
227
|
-
|
|
339
|
+
item["sample_color"] = color_map.get(item["sample_idx"], "#1f77b4")
|
|
340
|
+
|
|
228
341
|
# Now create DataFrames with the sample_color already included
|
|
229
342
|
before_df = pd.DataFrame(before_data) if before_data else pd.DataFrame()
|
|
230
343
|
after_df = pd.DataFrame(after_data) if after_data else pd.DataFrame()
|
|
231
344
|
|
|
232
345
|
# Create Bokeh figures
|
|
233
|
-
p1 = figure(
|
|
346
|
+
p1 = figure(
|
|
347
|
+
width=width,
|
|
348
|
+
height=height,
|
|
349
|
+
title="Original RT",
|
|
350
|
+
x_axis_label="Retention Time (s)",
|
|
351
|
+
y_axis_label="m/z",
|
|
352
|
+
tools="pan,wheel_zoom,box_zoom,reset,save",
|
|
353
|
+
)
|
|
234
354
|
p1.outline_line_color = None
|
|
235
|
-
p1.background_fill_color =
|
|
236
|
-
p1.border_fill_color =
|
|
355
|
+
p1.background_fill_color = "white"
|
|
356
|
+
p1.border_fill_color = "white"
|
|
237
357
|
p1.min_border = 0
|
|
238
358
|
|
|
239
|
-
p2 = figure(
|
|
359
|
+
p2 = figure(
|
|
360
|
+
width=width,
|
|
361
|
+
height=height,
|
|
362
|
+
title="Current RT",
|
|
363
|
+
x_axis_label="Retention Time (s)",
|
|
364
|
+
y_axis_label="m/z",
|
|
365
|
+
tools="pan,wheel_zoom,box_zoom,reset,save",
|
|
366
|
+
x_range=p1.x_range,
|
|
367
|
+
y_range=p1.y_range,
|
|
368
|
+
)
|
|
240
369
|
p2.outline_line_color = None
|
|
241
|
-
p2.background_fill_color =
|
|
242
|
-
p2.border_fill_color =
|
|
370
|
+
p2.background_fill_color = "white"
|
|
371
|
+
p2.border_fill_color = "white"
|
|
243
372
|
p2.min_border = 0
|
|
244
|
-
|
|
373
|
+
|
|
245
374
|
# Get unique sample indices for iteration
|
|
246
|
-
unique_samples = sorted(list(
|
|
375
|
+
unique_samples = sorted(list({item["sample_idx"] for item in before_data})) if before_data else []
|
|
247
376
|
|
|
248
377
|
renderers_before = []
|
|
249
378
|
renderers_after = []
|
|
250
379
|
|
|
251
380
|
for sample_idx in unique_samples:
|
|
252
|
-
sb = before_df[before_df[
|
|
253
|
-
sa = after_df[after_df[
|
|
254
|
-
color = color_map.get(sample_idx,
|
|
381
|
+
sb = before_df[before_df["sample_idx"] == sample_idx]
|
|
382
|
+
sa = after_df[after_df["sample_idx"] == sample_idx]
|
|
383
|
+
color = color_map.get(sample_idx, "#000000")
|
|
255
384
|
|
|
256
385
|
if not sb.empty:
|
|
257
386
|
src = ColumnDataSource(sb)
|
|
258
|
-
r = p1.scatter(
|
|
387
|
+
r = p1.scatter("rt", "mz", size="size", color=color, alpha="alpha", source=src)
|
|
259
388
|
renderers_before.append(r)
|
|
260
389
|
|
|
261
390
|
if not sa.empty:
|
|
262
391
|
src = ColumnDataSource(sa)
|
|
263
|
-
r = p2.scatter(
|
|
392
|
+
r = p2.scatter("rt", "mz", size="size", color=color, alpha="alpha", source=src)
|
|
264
393
|
renderers_after.append(r)
|
|
265
394
|
|
|
266
395
|
# Add hover tools
|
|
267
|
-
hover1 = HoverTool(
|
|
396
|
+
hover1 = HoverTool(
|
|
397
|
+
tooltips=[
|
|
398
|
+
("Sample UID", "@sample_uid"),
|
|
399
|
+
("Sample Name", "@sample_name"),
|
|
400
|
+
("Sample Color", "$color[swatch]:sample_color"),
|
|
401
|
+
("RT", "@rt{0.00}"),
|
|
402
|
+
("m/z", "@mz{0.0000}"),
|
|
403
|
+
("Intensity", "@inty{0.0e0}"),
|
|
404
|
+
],
|
|
405
|
+
renderers=renderers_before,
|
|
406
|
+
)
|
|
268
407
|
p1.add_tools(hover1)
|
|
269
408
|
|
|
270
|
-
hover2 = HoverTool(
|
|
409
|
+
hover2 = HoverTool(
|
|
410
|
+
tooltips=[
|
|
411
|
+
("Sample UID", "@sample_uid"),
|
|
412
|
+
("Sample Name", "@sample_name"),
|
|
413
|
+
("Sample Color", "$color[swatch]:sample_color"),
|
|
414
|
+
("RT", "@rt{0.00}"),
|
|
415
|
+
("m/z", "@mz{0.0000}"),
|
|
416
|
+
("Intensity", "@inty{0.0e0}"),
|
|
417
|
+
],
|
|
418
|
+
renderers=renderers_after,
|
|
419
|
+
)
|
|
271
420
|
p2.add_tools(hover2)
|
|
272
421
|
|
|
273
422
|
# Create layout with both plots side by side
|
|
274
423
|
# Use the aliased bokeh_row and set sizing_mode, width and height to avoid validation warnings.
|
|
275
|
-
layout = bokeh_row(p1, p2, sizing_mode=
|
|
424
|
+
layout = bokeh_row(p1, p2, sizing_mode="fixed", width=width, height=height)
|
|
276
425
|
|
|
277
426
|
# Output and show
|
|
278
427
|
if filename:
|
|
279
428
|
from bokeh.plotting import output_file, show
|
|
429
|
+
|
|
280
430
|
output_file(filename)
|
|
281
431
|
show(layout)
|
|
282
432
|
else:
|
|
283
433
|
from bokeh.plotting import show
|
|
434
|
+
|
|
284
435
|
show(layout)
|
|
285
436
|
|
|
286
437
|
return layout
|
|
@@ -392,14 +543,14 @@ def plot_consensus_2d(
|
|
|
392
543
|
except ImportError:
|
|
393
544
|
from bokeh.models.annotations import ColorBar
|
|
394
545
|
from bokeh.palettes import viridis
|
|
395
|
-
|
|
546
|
+
|
|
396
547
|
# Import cmap for colormap handling
|
|
397
548
|
from cmap import Colormap
|
|
398
549
|
|
|
399
550
|
# Convert Polars DataFrame to pandas for Bokeh compatibility
|
|
400
551
|
data_pd = data.to_pandas()
|
|
401
552
|
source = ColumnDataSource(data_pd)
|
|
402
|
-
|
|
553
|
+
|
|
403
554
|
# Handle colormap using cmap.Colormap
|
|
404
555
|
try:
|
|
405
556
|
# Get colormap palette using cmap
|
|
@@ -408,6 +559,7 @@ def plot_consensus_2d(
|
|
|
408
559
|
# Generate 256 colors and convert to hex
|
|
409
560
|
import numpy as np
|
|
410
561
|
import matplotlib.colors as mcolors
|
|
562
|
+
|
|
411
563
|
colors = colormap(np.linspace(0, 1, 256))
|
|
412
564
|
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
413
565
|
else:
|
|
@@ -420,19 +572,21 @@ def plot_consensus_2d(
|
|
|
420
572
|
# Fall back to generating colors manually
|
|
421
573
|
import numpy as np
|
|
422
574
|
import matplotlib.colors as mcolors
|
|
575
|
+
|
|
423
576
|
colors = colormap(np.linspace(0, 1, 256))
|
|
424
577
|
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
425
578
|
except AttributeError:
|
|
426
579
|
# Fall back to generating colors manually
|
|
427
580
|
import numpy as np
|
|
428
581
|
import matplotlib.colors as mcolors
|
|
582
|
+
|
|
429
583
|
colors = colormap(np.linspace(0, 1, 256))
|
|
430
584
|
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
431
585
|
except (AttributeError, ValueError, TypeError) as e:
|
|
432
586
|
# Fallback to viridis if cmap interpretation fails
|
|
433
587
|
self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
|
|
434
588
|
palette = viridis(256)
|
|
435
|
-
|
|
589
|
+
|
|
436
590
|
color_mapper = LinearColorMapper(
|
|
437
591
|
palette=palette,
|
|
438
592
|
low=data[colorby].min(),
|
|
@@ -550,8 +704,7 @@ def plot_samples_2d(
|
|
|
550
704
|
|
|
551
705
|
# Get sample colors from samples_df
|
|
552
706
|
sample_colors = (
|
|
553
|
-
self.samples_df
|
|
554
|
-
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
707
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
555
708
|
.select(["sample_uid", "sample_color"])
|
|
556
709
|
.to_dict(as_series=False)
|
|
557
710
|
)
|
|
@@ -741,7 +894,7 @@ def plot_bpc(
|
|
|
741
894
|
original: bool = False,
|
|
742
895
|
):
|
|
743
896
|
"""
|
|
744
|
-
Plot Base Peak Chromatograms (BPC) for selected samples
|
|
897
|
+
Plot Base Peak Chromatograms (BPC) for selected samples overlaid using Bokeh.
|
|
745
898
|
|
|
746
899
|
This collects per-sample BPCs via `get_bpc(self, sample=uid)` and overlays them.
|
|
747
900
|
Colors are mapped per-sample using the same Turbo256 palette as `plot_samples_2d`.
|
|
@@ -765,8 +918,7 @@ def plot_bpc(
|
|
|
765
918
|
|
|
766
919
|
# Get sample colors from samples_df
|
|
767
920
|
sample_colors = (
|
|
768
|
-
self.samples_df
|
|
769
|
-
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
921
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
770
922
|
.select(["sample_uid", "sample_color"])
|
|
771
923
|
.to_dict(as_series=False)
|
|
772
924
|
)
|
|
@@ -783,7 +935,7 @@ def plot_bpc(
|
|
|
783
935
|
for uid in sample_uids:
|
|
784
936
|
try:
|
|
785
937
|
first_chrom = get_bpc(self, sample=uid, label=None, original=original)
|
|
786
|
-
if hasattr(first_chrom,
|
|
938
|
+
if hasattr(first_chrom, "rt_unit"):
|
|
787
939
|
rt_unit = first_chrom.rt_unit
|
|
788
940
|
break
|
|
789
941
|
except Exception:
|
|
@@ -814,7 +966,11 @@ def plot_bpc(
|
|
|
814
966
|
# extract arrays
|
|
815
967
|
try:
|
|
816
968
|
# prefer Chromatogram API
|
|
817
|
-
chrom_dict =
|
|
969
|
+
chrom_dict = (
|
|
970
|
+
chrom.to_dict()
|
|
971
|
+
if hasattr(chrom, "to_dict")
|
|
972
|
+
else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
973
|
+
)
|
|
818
974
|
rt = chrom_dict.get("rt")
|
|
819
975
|
inty = chrom_dict.get("inty")
|
|
820
976
|
except Exception:
|
|
@@ -854,7 +1010,7 @@ def plot_bpc(
|
|
|
854
1010
|
|
|
855
1011
|
# Debug: log sample processing details
|
|
856
1012
|
self.logger.debug(
|
|
857
|
-
f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
|
|
1013
|
+
f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}",
|
|
858
1014
|
)
|
|
859
1015
|
|
|
860
1016
|
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
|
|
@@ -868,7 +1024,15 @@ def plot_bpc(
|
|
|
868
1024
|
self.logger.warning("No BPC curves to plot for the selected samples.")
|
|
869
1025
|
return
|
|
870
1026
|
|
|
871
|
-
hover = HoverTool(
|
|
1027
|
+
hover = HoverTool(
|
|
1028
|
+
tooltips=[
|
|
1029
|
+
("sample", "@sample"),
|
|
1030
|
+
("sample_color", "$color[swatch]:sample_color"),
|
|
1031
|
+
("rt", "@rt{0.00}"),
|
|
1032
|
+
("inty", "@inty{0.00e0}"),
|
|
1033
|
+
],
|
|
1034
|
+
renderers=renderers,
|
|
1035
|
+
)
|
|
872
1036
|
p.add_tools(hover)
|
|
873
1037
|
|
|
874
1038
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
@@ -937,8 +1101,7 @@ def plot_eic(
|
|
|
937
1101
|
|
|
938
1102
|
# Get sample colors from samples_df
|
|
939
1103
|
sample_colors = (
|
|
940
|
-
self.samples_df
|
|
941
|
-
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1104
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
942
1105
|
.select(["sample_uid", "sample_color"])
|
|
943
1106
|
.to_dict(as_series=False)
|
|
944
1107
|
)
|
|
@@ -951,7 +1114,7 @@ def plot_eic(
|
|
|
951
1114
|
for uid in sample_uids:
|
|
952
1115
|
try:
|
|
953
1116
|
first_chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
|
|
954
|
-
if hasattr(first_chrom,
|
|
1117
|
+
if hasattr(first_chrom, "rt_unit"):
|
|
955
1118
|
rt_unit = first_chrom.rt_unit
|
|
956
1119
|
break
|
|
957
1120
|
except Exception:
|
|
@@ -982,7 +1145,11 @@ def plot_eic(
|
|
|
982
1145
|
# extract arrays
|
|
983
1146
|
try:
|
|
984
1147
|
# prefer Chromatogram API
|
|
985
|
-
chrom_dict =
|
|
1148
|
+
chrom_dict = (
|
|
1149
|
+
chrom.to_dict()
|
|
1150
|
+
if hasattr(chrom, "to_dict")
|
|
1151
|
+
else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
1152
|
+
)
|
|
986
1153
|
rt = chrom_dict.get("rt")
|
|
987
1154
|
inty = chrom_dict.get("inty")
|
|
988
1155
|
except Exception:
|
|
@@ -1030,7 +1197,15 @@ def plot_eic(
|
|
|
1030
1197
|
self.logger.warning("No EIC curves to plot for the selected samples.")
|
|
1031
1198
|
return
|
|
1032
1199
|
|
|
1033
|
-
hover = HoverTool(
|
|
1200
|
+
hover = HoverTool(
|
|
1201
|
+
tooltips=[
|
|
1202
|
+
("sample", "@sample"),
|
|
1203
|
+
("sample_color", "$color[swatch]:sample_color"),
|
|
1204
|
+
("rt", "@rt{0.00}"),
|
|
1205
|
+
("inty", "@inty{0.0e0}"),
|
|
1206
|
+
],
|
|
1207
|
+
renderers=renderers,
|
|
1208
|
+
)
|
|
1034
1209
|
p.add_tools(hover)
|
|
1035
1210
|
|
|
1036
1211
|
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
@@ -1064,7 +1239,7 @@ def plot_rt_correction(
|
|
|
1064
1239
|
height: int = 300,
|
|
1065
1240
|
):
|
|
1066
1241
|
"""
|
|
1067
|
-
Plot RT correction per sample: (rt - rt_original) vs rt
|
|
1242
|
+
Plot RT correction per sample: (rt - rt_original) vs rt overlaid for selected samples.
|
|
1068
1243
|
|
|
1069
1244
|
This uses the same color mapping as `plot_bpc` so curves for the same samples match.
|
|
1070
1245
|
"""
|
|
@@ -1088,8 +1263,7 @@ def plot_rt_correction(
|
|
|
1088
1263
|
|
|
1089
1264
|
# Get sample colors from samples_df
|
|
1090
1265
|
sample_colors = (
|
|
1091
|
-
self.samples_df
|
|
1092
|
-
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1266
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1093
1267
|
.select(["sample_uid", "sample_color"])
|
|
1094
1268
|
.to_dict(as_series=False)
|
|
1095
1269
|
)
|
|
@@ -1175,7 +1349,15 @@ def plot_rt_correction(
|
|
|
1175
1349
|
self.logger.warning("No RT correction curves to plot for the selected samples.")
|
|
1176
1350
|
return
|
|
1177
1351
|
|
|
1178
|
-
hover = HoverTool(
|
|
1352
|
+
hover = HoverTool(
|
|
1353
|
+
tooltips=[
|
|
1354
|
+
("sample", "@sample"),
|
|
1355
|
+
("sample_color", "$color[swatch]:sample_color"),
|
|
1356
|
+
("rt", "@rt{0.00}"),
|
|
1357
|
+
("rt - rt_original", "@delta{0.00}"),
|
|
1358
|
+
],
|
|
1359
|
+
renderers=renderers,
|
|
1360
|
+
)
|
|
1179
1361
|
p.add_tools(hover)
|
|
1180
1362
|
|
|
1181
1363
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
@@ -1227,7 +1409,7 @@ def plot_chrom(
|
|
|
1227
1409
|
if not sample_names:
|
|
1228
1410
|
self.logger.error("No sample names found in chromatogram data.")
|
|
1229
1411
|
return
|
|
1230
|
-
|
|
1412
|
+
|
|
1231
1413
|
# Create color mapping by getting sample_color for each sample_name
|
|
1232
1414
|
samples_info = self.samples_df.select(["sample_name", "sample_color"]).to_dict(as_series=False)
|
|
1233
1415
|
sample_name_to_color = dict(zip(samples_info["sample_name"], samples_info["sample_color"]))
|
|
@@ -1649,11 +1831,19 @@ def plot_pca(
|
|
|
1649
1831
|
|
|
1650
1832
|
self.logger.debug(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
|
|
1651
1833
|
|
|
1652
|
-
# Convert consensus matrix to numpy
|
|
1653
|
-
if hasattr(consensus_matrix, "
|
|
1834
|
+
# Convert consensus matrix to numpy - handle both Polars and pandas DataFrames
|
|
1835
|
+
if hasattr(consensus_matrix, "to_numpy"):
|
|
1836
|
+
# Polars or pandas DataFrame
|
|
1837
|
+
if hasattr(consensus_matrix, "select"):
|
|
1838
|
+
# Polars DataFrame - exclude the consensus_uid column
|
|
1839
|
+
numeric_cols = [col for col in consensus_matrix.columns if col != "consensus_uid"]
|
|
1840
|
+
matrix_data = consensus_matrix.select(numeric_cols).to_numpy()
|
|
1841
|
+
else:
|
|
1842
|
+
# Pandas DataFrame
|
|
1843
|
+
matrix_data = consensus_matrix.to_numpy()
|
|
1844
|
+
elif hasattr(consensus_matrix, "values"):
|
|
1845
|
+
# Pandas DataFrame
|
|
1654
1846
|
matrix_data = consensus_matrix.values
|
|
1655
|
-
elif hasattr(consensus_matrix, "to_numpy"):
|
|
1656
|
-
matrix_data = consensus_matrix.to_numpy()
|
|
1657
1847
|
else:
|
|
1658
1848
|
matrix_data = np.array(consensus_matrix)
|
|
1659
1849
|
|
|
@@ -1692,7 +1882,7 @@ def plot_pca(
|
|
|
1692
1882
|
else:
|
|
1693
1883
|
self.logger.warning(
|
|
1694
1884
|
f"Sample count mismatch: samples_df has {len(samples_pd)} rows, "
|
|
1695
|
-
f"but consensus matrix has {len(pca_df)} samples"
|
|
1885
|
+
f"but consensus matrix has {len(pca_df)} samples",
|
|
1696
1886
|
)
|
|
1697
1887
|
|
|
1698
1888
|
# Prepare color mapping
|
|
@@ -1763,25 +1953,23 @@ def plot_pca(
|
|
|
1763
1953
|
if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
|
|
1764
1954
|
# Choose the identifier to map colors by
|
|
1765
1955
|
id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
|
|
1766
|
-
|
|
1956
|
+
|
|
1767
1957
|
# Get colors from samples_df based on the identifier
|
|
1768
1958
|
if id_col == "sample_uid":
|
|
1769
1959
|
sample_colors = (
|
|
1770
|
-
self.samples_df
|
|
1771
|
-
.filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
|
|
1960
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
|
|
1772
1961
|
.select(["sample_uid", "sample_color"])
|
|
1773
1962
|
.to_dict(as_series=False)
|
|
1774
1963
|
)
|
|
1775
1964
|
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
1776
1965
|
else: # sample_name
|
|
1777
1966
|
sample_colors = (
|
|
1778
|
-
self.samples_df
|
|
1779
|
-
.filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
|
|
1967
|
+
self.samples_df.filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
|
|
1780
1968
|
.select(["sample_name", "sample_color"])
|
|
1781
1969
|
.to_dict(as_series=False)
|
|
1782
1970
|
)
|
|
1783
1971
|
color_map = dict(zip(sample_colors["sample_name"], sample_colors["sample_color"]))
|
|
1784
|
-
|
|
1972
|
+
|
|
1785
1973
|
# Map colors into dataframe
|
|
1786
1974
|
pca_df["color"] = [color_map.get(x, "#1f77b4") for x in pca_df[id_col]] # fallback to blue
|
|
1787
1975
|
# Update the ColumnDataSource with new color column
|
|
@@ -1817,7 +2005,7 @@ def plot_pca(
|
|
|
1817
2005
|
if col in pca_df.columns:
|
|
1818
2006
|
if col == "sample_color":
|
|
1819
2007
|
# Display sample_color as a colored swatch
|
|
1820
|
-
tooltip_list.append((
|
|
2008
|
+
tooltip_list.append(("color", "$color[swatch]:sample_color"))
|
|
1821
2009
|
elif pca_df[col].dtype in ["float64", "float32"]:
|
|
1822
2010
|
tooltip_list.append((col, f"@{col}{{0.00}}"))
|
|
1823
2011
|
else:
|
|
@@ -1843,6 +2031,7 @@ def plot_pca(
|
|
|
1843
2031
|
show(p)
|
|
1844
2032
|
return p
|
|
1845
2033
|
|
|
2034
|
+
|
|
1846
2035
|
def plot_tic(
|
|
1847
2036
|
self,
|
|
1848
2037
|
samples=None,
|
|
@@ -1853,7 +2042,7 @@ def plot_tic(
|
|
|
1853
2042
|
original: bool = False,
|
|
1854
2043
|
):
|
|
1855
2044
|
"""
|
|
1856
|
-
Plot Total Ion Chromatograms (TIC) for selected samples
|
|
2045
|
+
Plot Total Ion Chromatograms (TIC) for selected samples overlaid using Bokeh.
|
|
1857
2046
|
|
|
1858
2047
|
Parameters and behavior mirror `plot_bpc` but use per-sample TICs (get_tic).
|
|
1859
2048
|
"""
|
|
@@ -1870,8 +2059,7 @@ def plot_tic(
|
|
|
1870
2059
|
|
|
1871
2060
|
# Get sample colors from samples_df
|
|
1872
2061
|
sample_colors = (
|
|
1873
|
-
self.samples_df
|
|
1874
|
-
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
2062
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1875
2063
|
.select(["sample_uid", "sample_color"])
|
|
1876
2064
|
.to_dict(as_series=False)
|
|
1877
2065
|
)
|
|
@@ -1884,7 +2072,7 @@ def plot_tic(
|
|
|
1884
2072
|
for uid in sample_uids:
|
|
1885
2073
|
try:
|
|
1886
2074
|
first_chrom = get_tic(self, sample=uid, label=None)
|
|
1887
|
-
if hasattr(first_chrom,
|
|
2075
|
+
if hasattr(first_chrom, "rt_unit"):
|
|
1888
2076
|
rt_unit = first_chrom.rt_unit
|
|
1889
2077
|
break
|
|
1890
2078
|
except Exception:
|
|
@@ -1913,7 +2101,11 @@ def plot_tic(
|
|
|
1913
2101
|
|
|
1914
2102
|
# extract arrays
|
|
1915
2103
|
try:
|
|
1916
|
-
chrom_dict =
|
|
2104
|
+
chrom_dict = (
|
|
2105
|
+
chrom.to_dict()
|
|
2106
|
+
if hasattr(chrom, "to_dict")
|
|
2107
|
+
else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
2108
|
+
)
|
|
1917
2109
|
rt = chrom_dict.get("rt")
|
|
1918
2110
|
inty = chrom_dict.get("inty")
|
|
1919
2111
|
except Exception:
|
|
@@ -1961,7 +2153,15 @@ def plot_tic(
|
|
|
1961
2153
|
self.logger.warning("No TIC curves to plot for the selected samples.")
|
|
1962
2154
|
return
|
|
1963
2155
|
|
|
1964
|
-
hover = HoverTool(
|
|
2156
|
+
hover = HoverTool(
|
|
2157
|
+
tooltips=[
|
|
2158
|
+
("sample", "@sample"),
|
|
2159
|
+
("sample_color", "$color[swatch]:sample_color"),
|
|
2160
|
+
("rt", "@rt{0.00}"),
|
|
2161
|
+
("inty", "@inty{0.00e0}"),
|
|
2162
|
+
],
|
|
2163
|
+
renderers=renderers,
|
|
2164
|
+
)
|
|
1965
2165
|
p.add_tools(hover)
|
|
1966
2166
|
|
|
1967
2167
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|