masster 0.3.18__py3-none-any.whl → 0.3.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/sample/h5.py +1 -1
- masster/sample/helpers.py +3 -7
- masster/sample/load.py +2 -2
- masster/sample/plot.py +2 -1
- masster/study/export.py +27 -10
- masster/study/h5.py +58 -40
- masster/study/helpers.py +220 -190
- masster/study/helpers_optimized.py +5 -5
- masster/study/load.py +144 -118
- masster/study/plot.py +240 -101
- masster/study/processing.py +9 -5
- masster/study/study.py +2 -6
- {masster-0.3.18.dist-info → masster-0.3.19.dist-info}/METADATA +1 -1
- {masster-0.3.18.dist-info → masster-0.3.19.dist-info}/RECORD +18 -18
- {masster-0.3.18.dist-info → masster-0.3.19.dist-info}/WHEEL +0 -0
- {masster-0.3.18.dist-info → masster-0.3.19.dist-info}/entry_points.txt +0 -0
- {masster-0.3.18.dist-info → masster-0.3.19.dist-info}/licenses/LICENSE +0 -0
masster/study/plot.py
CHANGED
|
@@ -17,7 +17,18 @@ hv.extension("bokeh")
|
|
|
17
17
|
from bokeh.layouts import row as bokeh_row
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
<<<<<<< Updated upstream
|
|
21
|
+
def plot_alignment(
|
|
22
|
+
self,
|
|
23
|
+
maps: bool = True,
|
|
24
|
+
filename: str | None = None,
|
|
25
|
+
width: int = 450,
|
|
26
|
+
height: int = 450,
|
|
27
|
+
markersize: int = 3,
|
|
28
|
+
):
|
|
29
|
+
=======
|
|
20
30
|
def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | None = None, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
|
|
31
|
+
>>>>>>> Stashed changes
|
|
21
32
|
"""Visualize retention time alignment using two synchronized Bokeh scatter plots.
|
|
22
33
|
|
|
23
34
|
- When ``maps=True`` the function reads ``self.features_maps`` (list of FeatureMap)
|
|
@@ -93,17 +104,39 @@ def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | Non
|
|
|
93
104
|
max_ref_inty = max(ref_inty) if ref_inty else 1
|
|
94
105
|
|
|
95
106
|
# sample metadata
|
|
96
|
-
if hasattr(self,
|
|
107
|
+
if hasattr(self, "samples_df") and self.samples_df is not None and not self.samples_df.is_empty():
|
|
97
108
|
samples_info = self.samples_df.to_pandas()
|
|
98
|
-
ref_sample_uid =
|
|
99
|
-
|
|
109
|
+
ref_sample_uid = (
|
|
110
|
+
samples_info.iloc[0]["sample_uid"] if "sample_uid" in samples_info.columns else "Reference_UID"
|
|
111
|
+
)
|
|
112
|
+
ref_sample_name = (
|
|
113
|
+
samples_info.iloc[0]["sample_name"] if "sample_name" in samples_info.columns else "Reference"
|
|
114
|
+
)
|
|
100
115
|
else:
|
|
101
|
-
ref_sample_uid =
|
|
102
|
-
ref_sample_name =
|
|
116
|
+
ref_sample_uid = "Reference_UID"
|
|
117
|
+
ref_sample_name = "Reference"
|
|
103
118
|
|
|
104
119
|
for rt, mz, inty in zip(ref_rt, ref_mz, ref_inty):
|
|
105
|
-
before_data.append({
|
|
106
|
-
|
|
120
|
+
before_data.append({
|
|
121
|
+
"rt": rt,
|
|
122
|
+
"mz": mz,
|
|
123
|
+
"inty": inty,
|
|
124
|
+
"alpha": inty / max_ref_inty,
|
|
125
|
+
"sample_idx": 0,
|
|
126
|
+
"sample_name": ref_sample_name,
|
|
127
|
+
"sample_uid": ref_sample_uid,
|
|
128
|
+
"size": markersize + 2,
|
|
129
|
+
})
|
|
130
|
+
after_data.append({
|
|
131
|
+
"rt": rt,
|
|
132
|
+
"mz": mz,
|
|
133
|
+
"inty": inty,
|
|
134
|
+
"alpha": inty / max_ref_inty,
|
|
135
|
+
"sample_idx": 0,
|
|
136
|
+
"sample_name": ref_sample_name,
|
|
137
|
+
"sample_uid": ref_sample_uid,
|
|
138
|
+
"size": markersize + 2,
|
|
139
|
+
})
|
|
107
140
|
|
|
108
141
|
# Remaining samples
|
|
109
142
|
for sample_idx, fm in enumerate(fmaps[1:], start=1):
|
|
@@ -114,7 +147,7 @@ def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | Non
|
|
|
114
147
|
|
|
115
148
|
for f in fm:
|
|
116
149
|
try:
|
|
117
|
-
orig = f.getMetaValue(
|
|
150
|
+
orig = f.getMetaValue("original_RT")
|
|
118
151
|
except Exception:
|
|
119
152
|
orig = None
|
|
120
153
|
|
|
@@ -132,23 +165,41 @@ def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | Non
|
|
|
132
165
|
|
|
133
166
|
max_inty = max(inty_vals)
|
|
134
167
|
|
|
135
|
-
if hasattr(self,
|
|
168
|
+
if hasattr(self, "samples_df") and self.samples_df is not None and not self.samples_df.is_empty():
|
|
136
169
|
samples_info = self.samples_df.to_pandas()
|
|
137
170
|
if sample_idx < len(samples_info):
|
|
138
|
-
sample_name = samples_info.iloc[sample_idx].get(
|
|
139
|
-
sample_uid = samples_info.iloc[sample_idx].get(
|
|
171
|
+
sample_name = samples_info.iloc[sample_idx].get("sample_name", f"Sample {sample_idx}")
|
|
172
|
+
sample_uid = samples_info.iloc[sample_idx].get("sample_uid", f"Sample_{sample_idx}_UID")
|
|
140
173
|
else:
|
|
141
|
-
sample_name = f
|
|
142
|
-
sample_uid = f
|
|
174
|
+
sample_name = f"Sample {sample_idx}"
|
|
175
|
+
sample_uid = f"Sample_{sample_idx}_UID"
|
|
143
176
|
else:
|
|
144
|
-
sample_name = f
|
|
145
|
-
sample_uid = f
|
|
177
|
+
sample_name = f"Sample {sample_idx}"
|
|
178
|
+
sample_uid = f"Sample_{sample_idx}_UID"
|
|
146
179
|
|
|
147
180
|
for rt, mz, inty in zip(original_rt, mz_vals, inty_vals):
|
|
148
|
-
before_data.append({
|
|
181
|
+
before_data.append({
|
|
182
|
+
"rt": rt,
|
|
183
|
+
"mz": mz,
|
|
184
|
+
"inty": inty,
|
|
185
|
+
"alpha": inty / max_inty,
|
|
186
|
+
"sample_idx": sample_idx,
|
|
187
|
+
"sample_name": sample_name,
|
|
188
|
+
"sample_uid": sample_uid,
|
|
189
|
+
"size": markersize,
|
|
190
|
+
})
|
|
149
191
|
|
|
150
192
|
for rt, mz, inty in zip(aligned_rt, mz_vals, inty_vals):
|
|
151
|
-
after_data.append({
|
|
193
|
+
after_data.append({
|
|
194
|
+
"rt": rt,
|
|
195
|
+
"mz": mz,
|
|
196
|
+
"inty": inty,
|
|
197
|
+
"alpha": inty / max_inty,
|
|
198
|
+
"sample_idx": sample_idx,
|
|
199
|
+
"sample_name": sample_name,
|
|
200
|
+
"sample_uid": sample_uid,
|
|
201
|
+
"size": markersize,
|
|
202
|
+
})
|
|
152
203
|
|
|
153
204
|
else:
|
|
154
205
|
# Use features_df
|
|
@@ -156,20 +207,20 @@ def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | Non
|
|
|
156
207
|
self.logger.error("No features_df found. Load features first.")
|
|
157
208
|
return
|
|
158
209
|
|
|
159
|
-
required_cols = [
|
|
210
|
+
required_cols = ["rt", "mz", "inty"]
|
|
160
211
|
missing = [c for c in required_cols if c not in self.features_df.columns]
|
|
161
212
|
if missing:
|
|
162
213
|
self.logger.error(f"Missing required columns in features_df: {missing}")
|
|
163
214
|
return
|
|
164
215
|
|
|
165
|
-
if
|
|
216
|
+
if "rt_original" not in self.features_df.columns:
|
|
166
217
|
self.logger.error("Column 'rt_original' not found in features_df. Alignment may not have been performed.")
|
|
167
218
|
return
|
|
168
219
|
|
|
169
220
|
# Use Polars instead of pandas
|
|
170
221
|
features_df = self.features_df
|
|
171
222
|
|
|
172
|
-
sample_col =
|
|
223
|
+
sample_col = "sample_uid" if "sample_uid" in features_df.columns else "sample_name"
|
|
173
224
|
if sample_col not in features_df.columns:
|
|
174
225
|
self.logger.error("No sample identifier column found in features_df.")
|
|
175
226
|
return
|
|
@@ -202,42 +253,52 @@ def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | Non
|
|
|
202
253
|
for sample_idx, sample in enumerate(samples):
|
|
203
254
|
# Filter sample data using Polars
|
|
204
255
|
sample_data = features_df.filter(pl.col(sample_col) == sample)
|
|
205
|
-
|
|
256
|
+
|
|
206
257
|
# Calculate max intensity using Polars
|
|
207
|
-
max_inty = sample_data.select(pl.col(
|
|
258
|
+
max_inty = sample_data.select(pl.col("inty").max()).item()
|
|
208
259
|
max_inty = max_inty if max_inty and max_inty > 0 else 1
|
|
209
|
-
|
|
260
|
+
|
|
210
261
|
sample_name = str(sample)
|
|
211
262
|
# Get sample_uid - if sample_col is 'sample_uid', use sample directly
|
|
212
|
-
if sample_col ==
|
|
263
|
+
if sample_col == "sample_uid":
|
|
213
264
|
sample_uid = sample
|
|
214
265
|
else:
|
|
215
266
|
# Try to get sample_uid from the first row if it exists
|
|
216
|
-
if
|
|
217
|
-
sample_uid = sample_data.select(pl.col(
|
|
267
|
+
if "sample_uid" in sample_data.columns:
|
|
268
|
+
sample_uid = sample_data.select(pl.col("sample_uid")).item()
|
|
218
269
|
else:
|
|
219
270
|
sample_uid = sample
|
|
220
271
|
|
|
221
272
|
# Convert to dict for iteration - more efficient than row-by-row processing
|
|
222
|
-
sample_dict = sample_data.select([
|
|
223
|
-
|
|
273
|
+
sample_dict = sample_data.select(["rt_original", "rt", "mz", "inty"]).to_dicts()
|
|
274
|
+
|
|
224
275
|
for row_dict in sample_dict:
|
|
225
|
-
rt_original = row_dict[
|
|
226
|
-
rt_current = row_dict[
|
|
227
|
-
mz = row_dict[
|
|
228
|
-
inty = row_dict[
|
|
276
|
+
rt_original = row_dict["rt_original"]
|
|
277
|
+
rt_current = row_dict["rt"]
|
|
278
|
+
mz = row_dict["mz"]
|
|
279
|
+
inty = row_dict["inty"]
|
|
229
280
|
alpha = inty / max_inty
|
|
230
281
|
size = markersize + 2 if sample_idx == 0 else markersize
|
|
231
|
-
|
|
282
|
+
|
|
232
283
|
before_data.append({
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
284
|
+
"rt": rt_original,
|
|
285
|
+
"mz": mz,
|
|
286
|
+
"inty": inty,
|
|
287
|
+
"alpha": alpha,
|
|
288
|
+
"sample_idx": sample_idx,
|
|
289
|
+
"sample_name": sample_name,
|
|
290
|
+
"sample_uid": sample_uid,
|
|
291
|
+
"size": size,
|
|
236
292
|
})
|
|
237
293
|
after_data.append({
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
294
|
+
"rt": rt_current,
|
|
295
|
+
"mz": mz,
|
|
296
|
+
"inty": inty,
|
|
297
|
+
"alpha": alpha,
|
|
298
|
+
"sample_idx": sample_idx,
|
|
299
|
+
"sample_name": sample_name,
|
|
300
|
+
"sample_uid": sample_uid,
|
|
301
|
+
"size": size,
|
|
241
302
|
})
|
|
242
303
|
|
|
243
304
|
# Get sample colors from samples_df using sample indices
|
|
@@ -246,17 +307,16 @@ def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | Non
|
|
|
246
307
|
# Create mapping from sample_idx to sample_uid more efficiently
|
|
247
308
|
sample_idx_to_uid = {}
|
|
248
309
|
for item in before_data:
|
|
249
|
-
if item[
|
|
250
|
-
sample_idx_to_uid[item[
|
|
310
|
+
if item["sample_idx"] not in sample_idx_to_uid:
|
|
311
|
+
sample_idx_to_uid[item["sample_idx"]] = item["sample_uid"]
|
|
251
312
|
else:
|
|
252
313
|
sample_idx_to_uid = {}
|
|
253
|
-
|
|
314
|
+
|
|
254
315
|
# Get colors from samples_df
|
|
255
316
|
sample_uids_list = list(sample_idx_to_uid.values())
|
|
256
|
-
if sample_uids_list and hasattr(self,
|
|
317
|
+
if sample_uids_list and hasattr(self, "samples_df") and self.samples_df is not None:
|
|
257
318
|
sample_colors = (
|
|
258
|
-
self.samples_df
|
|
259
|
-
.filter(pl.col("sample_uid").is_in(sample_uids_list))
|
|
319
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids_list))
|
|
260
320
|
.select(["sample_uid", "sample_color"])
|
|
261
321
|
.to_dict(as_series=False)
|
|
262
322
|
)
|
|
@@ -272,68 +332,106 @@ def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | Non
|
|
|
272
332
|
# Add sample_color to data dictionaries before creating DataFrames
|
|
273
333
|
if before_data:
|
|
274
334
|
for item in before_data:
|
|
275
|
-
item[
|
|
276
|
-
|
|
335
|
+
item["sample_color"] = color_map.get(item["sample_idx"], "#1f77b4")
|
|
336
|
+
|
|
277
337
|
if after_data:
|
|
278
338
|
for item in after_data:
|
|
279
|
-
item[
|
|
280
|
-
|
|
339
|
+
item["sample_color"] = color_map.get(item["sample_idx"], "#1f77b4")
|
|
340
|
+
|
|
281
341
|
# Now create DataFrames with the sample_color already included
|
|
282
342
|
before_df = pd.DataFrame(before_data) if before_data else pd.DataFrame()
|
|
283
343
|
after_df = pd.DataFrame(after_data) if after_data else pd.DataFrame()
|
|
284
344
|
|
|
285
345
|
# Create Bokeh figures
|
|
286
|
-
p1 = figure(
|
|
346
|
+
p1 = figure(
|
|
347
|
+
width=width,
|
|
348
|
+
height=height,
|
|
349
|
+
title="Original RT",
|
|
350
|
+
x_axis_label="Retention Time (s)",
|
|
351
|
+
y_axis_label="m/z",
|
|
352
|
+
tools="pan,wheel_zoom,box_zoom,reset,save",
|
|
353
|
+
)
|
|
287
354
|
p1.outline_line_color = None
|
|
288
|
-
p1.background_fill_color =
|
|
289
|
-
p1.border_fill_color =
|
|
355
|
+
p1.background_fill_color = "white"
|
|
356
|
+
p1.border_fill_color = "white"
|
|
290
357
|
p1.min_border = 0
|
|
291
358
|
|
|
292
|
-
p2 = figure(
|
|
359
|
+
p2 = figure(
|
|
360
|
+
width=width,
|
|
361
|
+
height=height,
|
|
362
|
+
title="Current RT",
|
|
363
|
+
x_axis_label="Retention Time (s)",
|
|
364
|
+
y_axis_label="m/z",
|
|
365
|
+
tools="pan,wheel_zoom,box_zoom,reset,save",
|
|
366
|
+
x_range=p1.x_range,
|
|
367
|
+
y_range=p1.y_range,
|
|
368
|
+
)
|
|
293
369
|
p2.outline_line_color = None
|
|
294
|
-
p2.background_fill_color =
|
|
295
|
-
p2.border_fill_color =
|
|
370
|
+
p2.background_fill_color = "white"
|
|
371
|
+
p2.border_fill_color = "white"
|
|
296
372
|
p2.min_border = 0
|
|
297
|
-
|
|
373
|
+
|
|
298
374
|
# Get unique sample indices for iteration
|
|
299
|
-
unique_samples = sorted(list(
|
|
375
|
+
unique_samples = sorted(list({item["sample_idx"] for item in before_data})) if before_data else []
|
|
300
376
|
|
|
301
377
|
renderers_before = []
|
|
302
378
|
renderers_after = []
|
|
303
379
|
|
|
304
380
|
for sample_idx in unique_samples:
|
|
305
|
-
sb = before_df[before_df[
|
|
306
|
-
sa = after_df[after_df[
|
|
307
|
-
color = color_map.get(sample_idx,
|
|
381
|
+
sb = before_df[before_df["sample_idx"] == sample_idx]
|
|
382
|
+
sa = after_df[after_df["sample_idx"] == sample_idx]
|
|
383
|
+
color = color_map.get(sample_idx, "#000000")
|
|
308
384
|
|
|
309
385
|
if not sb.empty:
|
|
310
386
|
src = ColumnDataSource(sb)
|
|
311
|
-
r = p1.scatter(
|
|
387
|
+
r = p1.scatter("rt", "mz", size="size", color=color, alpha="alpha", source=src)
|
|
312
388
|
renderers_before.append(r)
|
|
313
389
|
|
|
314
390
|
if not sa.empty:
|
|
315
391
|
src = ColumnDataSource(sa)
|
|
316
|
-
r = p2.scatter(
|
|
392
|
+
r = p2.scatter("rt", "mz", size="size", color=color, alpha="alpha", source=src)
|
|
317
393
|
renderers_after.append(r)
|
|
318
394
|
|
|
319
395
|
# Add hover tools
|
|
320
|
-
hover1 = HoverTool(
|
|
396
|
+
hover1 = HoverTool(
|
|
397
|
+
tooltips=[
|
|
398
|
+
("Sample UID", "@sample_uid"),
|
|
399
|
+
("Sample Name", "@sample_name"),
|
|
400
|
+
("Sample Color", "$color[swatch]:sample_color"),
|
|
401
|
+
("RT", "@rt{0.00}"),
|
|
402
|
+
("m/z", "@mz{0.0000}"),
|
|
403
|
+
("Intensity", "@inty{0.0e0}"),
|
|
404
|
+
],
|
|
405
|
+
renderers=renderers_before,
|
|
406
|
+
)
|
|
321
407
|
p1.add_tools(hover1)
|
|
322
408
|
|
|
323
|
-
hover2 = HoverTool(
|
|
409
|
+
hover2 = HoverTool(
|
|
410
|
+
tooltips=[
|
|
411
|
+
("Sample UID", "@sample_uid"),
|
|
412
|
+
("Sample Name", "@sample_name"),
|
|
413
|
+
("Sample Color", "$color[swatch]:sample_color"),
|
|
414
|
+
("RT", "@rt{0.00}"),
|
|
415
|
+
("m/z", "@mz{0.0000}"),
|
|
416
|
+
("Intensity", "@inty{0.0e0}"),
|
|
417
|
+
],
|
|
418
|
+
renderers=renderers_after,
|
|
419
|
+
)
|
|
324
420
|
p2.add_tools(hover2)
|
|
325
421
|
|
|
326
422
|
# Create layout with both plots side by side
|
|
327
423
|
# Use the aliased bokeh_row and set sizing_mode, width and height to avoid validation warnings.
|
|
328
|
-
layout = bokeh_row(p1, p2, sizing_mode=
|
|
424
|
+
layout = bokeh_row(p1, p2, sizing_mode="fixed", width=width, height=height)
|
|
329
425
|
|
|
330
426
|
# Output and show
|
|
331
427
|
if filename:
|
|
332
428
|
from bokeh.plotting import output_file, show
|
|
429
|
+
|
|
333
430
|
output_file(filename)
|
|
334
431
|
show(layout)
|
|
335
432
|
else:
|
|
336
433
|
from bokeh.plotting import show
|
|
434
|
+
|
|
337
435
|
show(layout)
|
|
338
436
|
|
|
339
437
|
return layout
|
|
@@ -445,14 +543,14 @@ def plot_consensus_2d(
|
|
|
445
543
|
except ImportError:
|
|
446
544
|
from bokeh.models.annotations import ColorBar
|
|
447
545
|
from bokeh.palettes import viridis
|
|
448
|
-
|
|
546
|
+
|
|
449
547
|
# Import cmap for colormap handling
|
|
450
548
|
from cmap import Colormap
|
|
451
549
|
|
|
452
550
|
# Convert Polars DataFrame to pandas for Bokeh compatibility
|
|
453
551
|
data_pd = data.to_pandas()
|
|
454
552
|
source = ColumnDataSource(data_pd)
|
|
455
|
-
|
|
553
|
+
|
|
456
554
|
# Handle colormap using cmap.Colormap
|
|
457
555
|
try:
|
|
458
556
|
# Get colormap palette using cmap
|
|
@@ -461,6 +559,7 @@ def plot_consensus_2d(
|
|
|
461
559
|
# Generate 256 colors and convert to hex
|
|
462
560
|
import numpy as np
|
|
463
561
|
import matplotlib.colors as mcolors
|
|
562
|
+
|
|
464
563
|
colors = colormap(np.linspace(0, 1, 256))
|
|
465
564
|
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
466
565
|
else:
|
|
@@ -473,19 +572,21 @@ def plot_consensus_2d(
|
|
|
473
572
|
# Fall back to generating colors manually
|
|
474
573
|
import numpy as np
|
|
475
574
|
import matplotlib.colors as mcolors
|
|
575
|
+
|
|
476
576
|
colors = colormap(np.linspace(0, 1, 256))
|
|
477
577
|
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
478
578
|
except AttributeError:
|
|
479
579
|
# Fall back to generating colors manually
|
|
480
580
|
import numpy as np
|
|
481
581
|
import matplotlib.colors as mcolors
|
|
582
|
+
|
|
482
583
|
colors = colormap(np.linspace(0, 1, 256))
|
|
483
584
|
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
484
585
|
except (AttributeError, ValueError, TypeError) as e:
|
|
485
586
|
# Fallback to viridis if cmap interpretation fails
|
|
486
587
|
self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
|
|
487
588
|
palette = viridis(256)
|
|
488
|
-
|
|
589
|
+
|
|
489
590
|
color_mapper = LinearColorMapper(
|
|
490
591
|
palette=palette,
|
|
491
592
|
low=data[colorby].min(),
|
|
@@ -603,8 +704,7 @@ def plot_samples_2d(
|
|
|
603
704
|
|
|
604
705
|
# Get sample colors from samples_df
|
|
605
706
|
sample_colors = (
|
|
606
|
-
self.samples_df
|
|
607
|
-
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
707
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
608
708
|
.select(["sample_uid", "sample_color"])
|
|
609
709
|
.to_dict(as_series=False)
|
|
610
710
|
)
|
|
@@ -794,7 +894,7 @@ def plot_bpc(
|
|
|
794
894
|
original: bool = False,
|
|
795
895
|
):
|
|
796
896
|
"""
|
|
797
|
-
Plot Base Peak Chromatograms (BPC) for selected samples
|
|
897
|
+
Plot Base Peak Chromatograms (BPC) for selected samples overlaid using Bokeh.
|
|
798
898
|
|
|
799
899
|
This collects per-sample BPCs via `get_bpc(self, sample=uid)` and overlays them.
|
|
800
900
|
Colors are mapped per-sample using the same Turbo256 palette as `plot_samples_2d`.
|
|
@@ -818,8 +918,7 @@ def plot_bpc(
|
|
|
818
918
|
|
|
819
919
|
# Get sample colors from samples_df
|
|
820
920
|
sample_colors = (
|
|
821
|
-
self.samples_df
|
|
822
|
-
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
921
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
823
922
|
.select(["sample_uid", "sample_color"])
|
|
824
923
|
.to_dict(as_series=False)
|
|
825
924
|
)
|
|
@@ -836,7 +935,7 @@ def plot_bpc(
|
|
|
836
935
|
for uid in sample_uids:
|
|
837
936
|
try:
|
|
838
937
|
first_chrom = get_bpc(self, sample=uid, label=None, original=original)
|
|
839
|
-
if hasattr(first_chrom,
|
|
938
|
+
if hasattr(first_chrom, "rt_unit"):
|
|
840
939
|
rt_unit = first_chrom.rt_unit
|
|
841
940
|
break
|
|
842
941
|
except Exception:
|
|
@@ -867,7 +966,11 @@ def plot_bpc(
|
|
|
867
966
|
# extract arrays
|
|
868
967
|
try:
|
|
869
968
|
# prefer Chromatogram API
|
|
870
|
-
chrom_dict =
|
|
969
|
+
chrom_dict = (
|
|
970
|
+
chrom.to_dict()
|
|
971
|
+
if hasattr(chrom, "to_dict")
|
|
972
|
+
else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
973
|
+
)
|
|
871
974
|
rt = chrom_dict.get("rt")
|
|
872
975
|
inty = chrom_dict.get("inty")
|
|
873
976
|
except Exception:
|
|
@@ -907,7 +1010,7 @@ def plot_bpc(
|
|
|
907
1010
|
|
|
908
1011
|
# Debug: log sample processing details
|
|
909
1012
|
self.logger.debug(
|
|
910
|
-
f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}"
|
|
1013
|
+
f"Processing BPC for sample_uid={uid}, sample_name={sample_name}, rt_len={rt.size}, color={color}",
|
|
911
1014
|
)
|
|
912
1015
|
|
|
913
1016
|
data = {"rt": rt, "inty": inty, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
|
|
@@ -921,7 +1024,15 @@ def plot_bpc(
|
|
|
921
1024
|
self.logger.warning("No BPC curves to plot for the selected samples.")
|
|
922
1025
|
return
|
|
923
1026
|
|
|
924
|
-
hover = HoverTool(
|
|
1027
|
+
hover = HoverTool(
|
|
1028
|
+
tooltips=[
|
|
1029
|
+
("sample", "@sample"),
|
|
1030
|
+
("sample_color", "$color[swatch]:sample_color"),
|
|
1031
|
+
("rt", "@rt{0.00}"),
|
|
1032
|
+
("inty", "@inty{0.00e0}"),
|
|
1033
|
+
],
|
|
1034
|
+
renderers=renderers,
|
|
1035
|
+
)
|
|
925
1036
|
p.add_tools(hover)
|
|
926
1037
|
|
|
927
1038
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
@@ -990,8 +1101,7 @@ def plot_eic(
|
|
|
990
1101
|
|
|
991
1102
|
# Get sample colors from samples_df
|
|
992
1103
|
sample_colors = (
|
|
993
|
-
self.samples_df
|
|
994
|
-
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1104
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
995
1105
|
.select(["sample_uid", "sample_color"])
|
|
996
1106
|
.to_dict(as_series=False)
|
|
997
1107
|
)
|
|
@@ -1004,7 +1114,7 @@ def plot_eic(
|
|
|
1004
1114
|
for uid in sample_uids:
|
|
1005
1115
|
try:
|
|
1006
1116
|
first_chrom = get_eic(self, sample=uid, mz=mz, mz_tol=mz_tol, label=None)
|
|
1007
|
-
if hasattr(first_chrom,
|
|
1117
|
+
if hasattr(first_chrom, "rt_unit"):
|
|
1008
1118
|
rt_unit = first_chrom.rt_unit
|
|
1009
1119
|
break
|
|
1010
1120
|
except Exception:
|
|
@@ -1035,7 +1145,11 @@ def plot_eic(
|
|
|
1035
1145
|
# extract arrays
|
|
1036
1146
|
try:
|
|
1037
1147
|
# prefer Chromatogram API
|
|
1038
|
-
chrom_dict =
|
|
1148
|
+
chrom_dict = (
|
|
1149
|
+
chrom.to_dict()
|
|
1150
|
+
if hasattr(chrom, "to_dict")
|
|
1151
|
+
else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
1152
|
+
)
|
|
1039
1153
|
rt = chrom_dict.get("rt")
|
|
1040
1154
|
inty = chrom_dict.get("inty")
|
|
1041
1155
|
except Exception:
|
|
@@ -1083,7 +1197,15 @@ def plot_eic(
|
|
|
1083
1197
|
self.logger.warning("No EIC curves to plot for the selected samples.")
|
|
1084
1198
|
return
|
|
1085
1199
|
|
|
1086
|
-
hover = HoverTool(
|
|
1200
|
+
hover = HoverTool(
|
|
1201
|
+
tooltips=[
|
|
1202
|
+
("sample", "@sample"),
|
|
1203
|
+
("sample_color", "$color[swatch]:sample_color"),
|
|
1204
|
+
("rt", "@rt{0.00}"),
|
|
1205
|
+
("inty", "@inty{0.0e0}"),
|
|
1206
|
+
],
|
|
1207
|
+
renderers=renderers,
|
|
1208
|
+
)
|
|
1087
1209
|
p.add_tools(hover)
|
|
1088
1210
|
|
|
1089
1211
|
if getattr(p, "legend", None) and len(p.legend) > 0:
|
|
@@ -1117,7 +1239,7 @@ def plot_rt_correction(
|
|
|
1117
1239
|
height: int = 300,
|
|
1118
1240
|
):
|
|
1119
1241
|
"""
|
|
1120
|
-
Plot RT correction per sample: (rt - rt_original) vs rt
|
|
1242
|
+
Plot RT correction per sample: (rt - rt_original) vs rt overlaid for selected samples.
|
|
1121
1243
|
|
|
1122
1244
|
This uses the same color mapping as `plot_bpc` so curves for the same samples match.
|
|
1123
1245
|
"""
|
|
@@ -1141,8 +1263,7 @@ def plot_rt_correction(
|
|
|
1141
1263
|
|
|
1142
1264
|
# Get sample colors from samples_df
|
|
1143
1265
|
sample_colors = (
|
|
1144
|
-
self.samples_df
|
|
1145
|
-
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1266
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1146
1267
|
.select(["sample_uid", "sample_color"])
|
|
1147
1268
|
.to_dict(as_series=False)
|
|
1148
1269
|
)
|
|
@@ -1228,7 +1349,15 @@ def plot_rt_correction(
|
|
|
1228
1349
|
self.logger.warning("No RT correction curves to plot for the selected samples.")
|
|
1229
1350
|
return
|
|
1230
1351
|
|
|
1231
|
-
hover = HoverTool(
|
|
1352
|
+
hover = HoverTool(
|
|
1353
|
+
tooltips=[
|
|
1354
|
+
("sample", "@sample"),
|
|
1355
|
+
("sample_color", "$color[swatch]:sample_color"),
|
|
1356
|
+
("rt", "@rt{0.00}"),
|
|
1357
|
+
("rt - rt_original", "@delta{0.00}"),
|
|
1358
|
+
],
|
|
1359
|
+
renderers=renderers,
|
|
1360
|
+
)
|
|
1232
1361
|
p.add_tools(hover)
|
|
1233
1362
|
|
|
1234
1363
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|
|
@@ -1280,7 +1409,7 @@ def plot_chrom(
|
|
|
1280
1409
|
if not sample_names:
|
|
1281
1410
|
self.logger.error("No sample names found in chromatogram data.")
|
|
1282
1411
|
return
|
|
1283
|
-
|
|
1412
|
+
|
|
1284
1413
|
# Create color mapping by getting sample_color for each sample_name
|
|
1285
1414
|
samples_info = self.samples_df.select(["sample_name", "sample_color"]).to_dict(as_series=False)
|
|
1286
1415
|
sample_name_to_color = dict(zip(samples_info["sample_name"], samples_info["sample_color"]))
|
|
@@ -1753,7 +1882,7 @@ def plot_pca(
|
|
|
1753
1882
|
else:
|
|
1754
1883
|
self.logger.warning(
|
|
1755
1884
|
f"Sample count mismatch: samples_df has {len(samples_pd)} rows, "
|
|
1756
|
-
f"but consensus matrix has {len(pca_df)} samples"
|
|
1885
|
+
f"but consensus matrix has {len(pca_df)} samples",
|
|
1757
1886
|
)
|
|
1758
1887
|
|
|
1759
1888
|
# Prepare color mapping
|
|
@@ -1824,25 +1953,23 @@ def plot_pca(
|
|
|
1824
1953
|
if "sample_uid" in pca_df.columns or "sample_name" in pca_df.columns:
|
|
1825
1954
|
# Choose the identifier to map colors by
|
|
1826
1955
|
id_col = "sample_uid" if "sample_uid" in pca_df.columns else "sample_name"
|
|
1827
|
-
|
|
1956
|
+
|
|
1828
1957
|
# Get colors from samples_df based on the identifier
|
|
1829
1958
|
if id_col == "sample_uid":
|
|
1830
1959
|
sample_colors = (
|
|
1831
|
-
self.samples_df
|
|
1832
|
-
.filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
|
|
1960
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(pca_df[id_col].unique()))
|
|
1833
1961
|
.select(["sample_uid", "sample_color"])
|
|
1834
1962
|
.to_dict(as_series=False)
|
|
1835
1963
|
)
|
|
1836
1964
|
color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
1837
1965
|
else: # sample_name
|
|
1838
1966
|
sample_colors = (
|
|
1839
|
-
self.samples_df
|
|
1840
|
-
.filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
|
|
1967
|
+
self.samples_df.filter(pl.col("sample_name").is_in(pca_df[id_col].unique()))
|
|
1841
1968
|
.select(["sample_name", "sample_color"])
|
|
1842
1969
|
.to_dict(as_series=False)
|
|
1843
1970
|
)
|
|
1844
1971
|
color_map = dict(zip(sample_colors["sample_name"], sample_colors["sample_color"]))
|
|
1845
|
-
|
|
1972
|
+
|
|
1846
1973
|
# Map colors into dataframe
|
|
1847
1974
|
pca_df["color"] = [color_map.get(x, "#1f77b4") for x in pca_df[id_col]] # fallback to blue
|
|
1848
1975
|
# Update the ColumnDataSource with new color column
|
|
@@ -1878,7 +2005,7 @@ def plot_pca(
|
|
|
1878
2005
|
if col in pca_df.columns:
|
|
1879
2006
|
if col == "sample_color":
|
|
1880
2007
|
# Display sample_color as a colored swatch
|
|
1881
|
-
tooltip_list.append((
|
|
2008
|
+
tooltip_list.append(("color", "$color[swatch]:sample_color"))
|
|
1882
2009
|
elif pca_df[col].dtype in ["float64", "float32"]:
|
|
1883
2010
|
tooltip_list.append((col, f"@{col}{{0.00}}"))
|
|
1884
2011
|
else:
|
|
@@ -1904,6 +2031,7 @@ def plot_pca(
|
|
|
1904
2031
|
show(p)
|
|
1905
2032
|
return p
|
|
1906
2033
|
|
|
2034
|
+
|
|
1907
2035
|
def plot_tic(
|
|
1908
2036
|
self,
|
|
1909
2037
|
samples=None,
|
|
@@ -1914,7 +2042,7 @@ def plot_tic(
|
|
|
1914
2042
|
original: bool = False,
|
|
1915
2043
|
):
|
|
1916
2044
|
"""
|
|
1917
|
-
Plot Total Ion Chromatograms (TIC) for selected samples
|
|
2045
|
+
Plot Total Ion Chromatograms (TIC) for selected samples overlaid using Bokeh.
|
|
1918
2046
|
|
|
1919
2047
|
Parameters and behavior mirror `plot_bpc` but use per-sample TICs (get_tic).
|
|
1920
2048
|
"""
|
|
@@ -1931,8 +2059,7 @@ def plot_tic(
|
|
|
1931
2059
|
|
|
1932
2060
|
# Get sample colors from samples_df
|
|
1933
2061
|
sample_colors = (
|
|
1934
|
-
self.samples_df
|
|
1935
|
-
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
2062
|
+
self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1936
2063
|
.select(["sample_uid", "sample_color"])
|
|
1937
2064
|
.to_dict(as_series=False)
|
|
1938
2065
|
)
|
|
@@ -1945,7 +2072,7 @@ def plot_tic(
|
|
|
1945
2072
|
for uid in sample_uids:
|
|
1946
2073
|
try:
|
|
1947
2074
|
first_chrom = get_tic(self, sample=uid, label=None)
|
|
1948
|
-
if hasattr(first_chrom,
|
|
2075
|
+
if hasattr(first_chrom, "rt_unit"):
|
|
1949
2076
|
rt_unit = first_chrom.rt_unit
|
|
1950
2077
|
break
|
|
1951
2078
|
except Exception:
|
|
@@ -1974,7 +2101,11 @@ def plot_tic(
|
|
|
1974
2101
|
|
|
1975
2102
|
# extract arrays
|
|
1976
2103
|
try:
|
|
1977
|
-
chrom_dict =
|
|
2104
|
+
chrom_dict = (
|
|
2105
|
+
chrom.to_dict()
|
|
2106
|
+
if hasattr(chrom, "to_dict")
|
|
2107
|
+
else {"rt": getattr(chrom, "rt"), "inty": getattr(chrom, "inty")}
|
|
2108
|
+
)
|
|
1978
2109
|
rt = chrom_dict.get("rt")
|
|
1979
2110
|
inty = chrom_dict.get("inty")
|
|
1980
2111
|
except Exception:
|
|
@@ -2022,7 +2153,15 @@ def plot_tic(
|
|
|
2022
2153
|
self.logger.warning("No TIC curves to plot for the selected samples.")
|
|
2023
2154
|
return
|
|
2024
2155
|
|
|
2025
|
-
hover = HoverTool(
|
|
2156
|
+
hover = HoverTool(
|
|
2157
|
+
tooltips=[
|
|
2158
|
+
("sample", "@sample"),
|
|
2159
|
+
("sample_color", "$color[swatch]:sample_color"),
|
|
2160
|
+
("rt", "@rt{0.00}"),
|
|
2161
|
+
("inty", "@inty{0.00e0}"),
|
|
2162
|
+
],
|
|
2163
|
+
renderers=renderers,
|
|
2164
|
+
)
|
|
2026
2165
|
p.add_tools(hover)
|
|
2027
2166
|
|
|
2028
2167
|
# Only set legend properties if a legend was actually created to avoid Bokeh warnings
|