masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/logger.py +35 -19
- masster/sample/adducts.py +15 -29
- masster/sample/defaults/find_adducts_def.py +1 -3
- masster/sample/defaults/sample_def.py +4 -4
- masster/sample/h5.py +203 -361
- masster/sample/helpers.py +14 -30
- masster/sample/lib.py +3 -3
- masster/sample/load.py +21 -29
- masster/sample/plot.py +222 -132
- masster/sample/processing.py +42 -55
- masster/sample/sample.py +37 -46
- masster/sample/save.py +37 -61
- masster/sample/sciex.py +13 -11
- masster/sample/thermo.py +69 -74
- masster/spectrum.py +15 -15
- masster/study/analysis.py +650 -586
- masster/study/defaults/identify_def.py +1 -3
- masster/study/defaults/merge_def.py +6 -7
- masster/study/defaults/study_def.py +1 -5
- masster/study/export.py +35 -96
- masster/study/h5.py +134 -211
- masster/study/helpers.py +385 -459
- masster/study/id.py +239 -290
- masster/study/importers.py +84 -93
- masster/study/load.py +159 -178
- masster/study/merge.py +1112 -1098
- masster/study/plot.py +195 -149
- masster/study/processing.py +144 -191
- masster/study/save.py +14 -13
- masster/study/study.py +89 -130
- masster/wizard/wizard.py +764 -714
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/METADATA +27 -1
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/RECORD +37 -37
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/WHEEL +0 -0
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/entry_points.txt +0 -0
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/licenses/LICENSE +0 -0
masster/study/plot.py
CHANGED
|
@@ -11,6 +11,7 @@ from tqdm import tqdm
|
|
|
11
11
|
|
|
12
12
|
# Import cmap for colormap handling
|
|
13
13
|
from cmap import Colormap
|
|
14
|
+
|
|
14
15
|
hv.extension("bokeh")
|
|
15
16
|
|
|
16
17
|
|
|
@@ -22,84 +23,93 @@ from bokeh.layouts import row as bokeh_row
|
|
|
22
23
|
def _export_with_webdriver_manager(plot_obj, filename, format_type, logger=None):
|
|
23
24
|
"""
|
|
24
25
|
Export plot to PNG or SVG using webdriver-manager for automatic driver management.
|
|
25
|
-
|
|
26
|
+
|
|
26
27
|
Parameters:
|
|
27
28
|
plot_obj: Bokeh plot object or holoviews object to export
|
|
28
|
-
filename: Output filename
|
|
29
|
+
filename: Output filename
|
|
29
30
|
format_type: Either "png" or "svg"
|
|
30
31
|
logger: Logger for error reporting (optional)
|
|
31
|
-
|
|
32
|
+
|
|
32
33
|
Returns:
|
|
33
34
|
bool: True if export successful, False otherwise
|
|
34
35
|
"""
|
|
35
36
|
try:
|
|
36
37
|
# Convert holoviews to bokeh if needed
|
|
37
|
-
if hasattr(plot_obj,
|
|
38
|
+
if hasattr(plot_obj, "opts"): # Likely a holoviews object
|
|
38
39
|
import holoviews as hv
|
|
40
|
+
|
|
39
41
|
bokeh_plot = hv.render(plot_obj)
|
|
40
42
|
else:
|
|
41
43
|
bokeh_plot = plot_obj
|
|
42
|
-
|
|
44
|
+
|
|
43
45
|
# Try webdriver-manager export first
|
|
44
46
|
try:
|
|
45
47
|
from webdriver_manager.chrome import ChromeDriverManager
|
|
46
48
|
from selenium import webdriver
|
|
47
49
|
from selenium.webdriver.chrome.service import Service
|
|
48
50
|
from selenium.webdriver.chrome.options import Options
|
|
49
|
-
|
|
51
|
+
|
|
50
52
|
# Set up Chrome options for headless operation
|
|
51
53
|
chrome_options = Options()
|
|
52
54
|
chrome_options.add_argument("--headless")
|
|
53
55
|
chrome_options.add_argument("--no-sandbox")
|
|
54
56
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
|
55
57
|
chrome_options.add_argument("--disable-gpu")
|
|
56
|
-
|
|
58
|
+
|
|
57
59
|
# Use webdriver-manager to automatically get the correct ChromeDriver
|
|
58
60
|
service = Service(ChromeDriverManager().install())
|
|
59
61
|
driver = webdriver.Chrome(service=service, options=chrome_options)
|
|
60
|
-
|
|
62
|
+
|
|
61
63
|
# Export with managed webdriver
|
|
62
64
|
if format_type == "png":
|
|
63
65
|
from bokeh.io import export_png
|
|
66
|
+
|
|
64
67
|
export_png(bokeh_plot, filename=filename, webdriver=driver)
|
|
65
68
|
elif format_type == "svg":
|
|
66
69
|
from bokeh.io import export_svg
|
|
70
|
+
|
|
67
71
|
export_svg(bokeh_plot, filename=filename, webdriver=driver)
|
|
68
72
|
else:
|
|
69
73
|
raise ValueError(f"Unsupported format: {format_type}")
|
|
70
|
-
|
|
74
|
+
|
|
71
75
|
driver.quit()
|
|
72
76
|
return True
|
|
73
|
-
|
|
77
|
+
|
|
74
78
|
except ImportError:
|
|
75
79
|
if logger:
|
|
76
80
|
logger.debug(f"webdriver-manager not available, using default {format_type.upper()} export")
|
|
77
81
|
# Fall back to default export
|
|
78
82
|
if format_type == "png":
|
|
79
83
|
from bokeh.io import export_png
|
|
84
|
+
|
|
80
85
|
export_png(bokeh_plot, filename=filename)
|
|
81
86
|
elif format_type == "svg":
|
|
82
87
|
from bokeh.io import export_svg
|
|
88
|
+
|
|
83
89
|
export_svg(bokeh_plot, filename=filename)
|
|
84
90
|
return True
|
|
85
|
-
|
|
91
|
+
|
|
86
92
|
except Exception as e:
|
|
87
93
|
if logger:
|
|
88
|
-
logger.debug(
|
|
94
|
+
logger.debug(
|
|
95
|
+
f"{format_type.upper()} export with webdriver-manager failed: {e}, using default {format_type.upper()} export"
|
|
96
|
+
)
|
|
89
97
|
try:
|
|
90
98
|
# Final fallback to default export
|
|
91
99
|
if format_type == "png":
|
|
92
100
|
from bokeh.io import export_png
|
|
101
|
+
|
|
93
102
|
export_png(bokeh_plot, filename=filename)
|
|
94
103
|
elif format_type == "svg":
|
|
95
104
|
from bokeh.io import export_svg
|
|
105
|
+
|
|
96
106
|
export_svg(bokeh_plot, filename=filename)
|
|
97
107
|
return True
|
|
98
108
|
except Exception as e2:
|
|
99
109
|
if logger:
|
|
100
110
|
logger.error(f"{format_type.upper()} export failed: {e2}")
|
|
101
111
|
return False
|
|
102
|
-
|
|
112
|
+
|
|
103
113
|
except Exception as e:
|
|
104
114
|
if logger:
|
|
105
115
|
logger.error(f"Export preparation failed: {e}")
|
|
@@ -117,11 +127,11 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
|
|
|
117
127
|
from bokeh.embed import file_html
|
|
118
128
|
|
|
119
129
|
# Create HTML content without affecting global state
|
|
120
|
-
resources = Resources(mode=
|
|
130
|
+
resources = Resources(mode="cdn")
|
|
121
131
|
html = file_html(plot_object, resources, title=plot_title)
|
|
122
132
|
|
|
123
133
|
# Write directly to file
|
|
124
|
-
with open(filename,
|
|
134
|
+
with open(filename, "w", encoding="utf-8") as f:
|
|
125
135
|
f.write(html)
|
|
126
136
|
|
|
127
137
|
logger.info(f"Plot saved to: {abs_filename}")
|
|
@@ -132,15 +142,15 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
|
|
|
132
142
|
logger.info(f"Plot saved to: {abs_filename}")
|
|
133
143
|
else:
|
|
134
144
|
# Fall back to HTML if PNG export not available
|
|
135
|
-
html_filename = filename.replace(
|
|
136
|
-
abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace(
|
|
145
|
+
html_filename = filename.replace(".png", ".html")
|
|
146
|
+
abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace(".png", ".html")
|
|
137
147
|
from bokeh.resources import Resources
|
|
138
148
|
from bokeh.embed import file_html
|
|
139
149
|
|
|
140
|
-
resources = Resources(mode=
|
|
150
|
+
resources = Resources(mode="cdn")
|
|
141
151
|
html = file_html(plot_object, resources, title=plot_title)
|
|
142
152
|
|
|
143
|
-
with open(html_filename,
|
|
153
|
+
with open(html_filename, "w", encoding="utf-8") as f:
|
|
144
154
|
f.write(html)
|
|
145
155
|
|
|
146
156
|
logger.warning(f"PNG export not available. Saved as HTML instead: {abs_html_filename}")
|
|
@@ -150,21 +160,21 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
|
|
|
150
160
|
logger.info(f"Plot saved to: {abs_filename}")
|
|
151
161
|
else:
|
|
152
162
|
# Fall back to HTML if SVG export not available
|
|
153
|
-
html_filename = filename.replace(
|
|
154
|
-
abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace(
|
|
163
|
+
html_filename = filename.replace(".svg", ".html")
|
|
164
|
+
abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace(".svg", ".html")
|
|
155
165
|
from bokeh.resources import Resources
|
|
156
166
|
from bokeh.embed import file_html
|
|
157
167
|
|
|
158
|
-
resources = Resources(mode=
|
|
168
|
+
resources = Resources(mode="cdn")
|
|
159
169
|
html = file_html(plot_object, resources, title=plot_title)
|
|
160
170
|
|
|
161
|
-
with open(html_filename,
|
|
171
|
+
with open(html_filename, "w", encoding="utf-8") as f:
|
|
162
172
|
f.write(html)
|
|
163
173
|
|
|
164
174
|
logger.warning(f"SVG export not available. Saved as HTML instead: {abs_html_filename}")
|
|
165
175
|
html = file_html(plot_object, resources, title=plot_title)
|
|
166
176
|
|
|
167
|
-
with open(html_filename,
|
|
177
|
+
with open(html_filename, "w", encoding="utf-8") as f:
|
|
168
178
|
f.write(html)
|
|
169
179
|
|
|
170
180
|
logger.warning(f"SVG export not available. Saved as HTML instead: {abs_html_filename}")
|
|
@@ -173,10 +183,10 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
|
|
|
173
183
|
from bokeh.resources import Resources
|
|
174
184
|
from bokeh.embed import file_html
|
|
175
185
|
|
|
176
|
-
resources = Resources(mode=
|
|
186
|
+
resources = Resources(mode="cdn")
|
|
177
187
|
html = file_html(plot_object, resources, title=plot_title)
|
|
178
188
|
|
|
179
|
-
with open(filename,
|
|
189
|
+
with open(filename, "w", encoding="utf-8") as f:
|
|
180
190
|
f.write(html)
|
|
181
191
|
|
|
182
192
|
logger.info(f"Plot saved to: {abs_filename}")
|
|
@@ -194,7 +204,7 @@ def _isolated_show_notebook(plot_object):
|
|
|
194
204
|
|
|
195
205
|
# Suppress both warnings and logging messages for the specific Bokeh callback warnings
|
|
196
206
|
# that occur when Panel components with Python callbacks are converted to standalone Bokeh
|
|
197
|
-
bokeh_logger = logging.getLogger(
|
|
207
|
+
bokeh_logger = logging.getLogger("bokeh.embed.util")
|
|
198
208
|
original_level = bokeh_logger.level
|
|
199
209
|
bokeh_logger.setLevel(logging.ERROR) # Suppress WARNING level messages
|
|
200
210
|
|
|
@@ -210,8 +220,8 @@ def _isolated_show_notebook(plot_object):
|
|
|
210
220
|
output_notebook(hide_banner=True)
|
|
211
221
|
|
|
212
222
|
# Reset Holoviews to notebook mode
|
|
213
|
-
hv.extension(
|
|
214
|
-
hv.output(backend=
|
|
223
|
+
hv.extension("bokeh", logo=False)
|
|
224
|
+
hv.output(backend="bokeh", mode="jupyter")
|
|
215
225
|
|
|
216
226
|
# Show in notebook
|
|
217
227
|
show(plot_object)
|
|
@@ -245,13 +255,14 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
|
|
|
245
255
|
elif filename.endswith(".png"):
|
|
246
256
|
try:
|
|
247
257
|
from panel.io.save import save_png
|
|
258
|
+
|
|
248
259
|
# Convert Panel to Bokeh models before saving
|
|
249
260
|
bokeh_layout = panel_obj.get_root()
|
|
250
261
|
save_png(bokeh_layout, filename=filename)
|
|
251
262
|
logger.info(f"{plot_title} saved to: {abs_filename}")
|
|
252
263
|
except Exception:
|
|
253
264
|
# Fall back to HTML if PNG export not available
|
|
254
|
-
html_filename = filename.replace(
|
|
265
|
+
html_filename = filename.replace(".png", ".html")
|
|
255
266
|
abs_html_filename = os.path.abspath(html_filename)
|
|
256
267
|
try:
|
|
257
268
|
panel_obj.save(html_filename, embed=True)
|
|
@@ -263,12 +274,13 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
|
|
|
263
274
|
# Try to save as PDF, fall back to HTML if not available
|
|
264
275
|
try:
|
|
265
276
|
from bokeh.io.export import export_pdf
|
|
277
|
+
|
|
266
278
|
bokeh_layout = panel_obj.get_root()
|
|
267
279
|
export_pdf(bokeh_layout, filename=filename)
|
|
268
280
|
logger.info(f"{plot_title} saved to: {abs_filename}")
|
|
269
281
|
except ImportError:
|
|
270
282
|
# Fall back to HTML if PDF export not available
|
|
271
|
-
html_filename = filename.replace(
|
|
283
|
+
html_filename = filename.replace(".pdf", ".html")
|
|
272
284
|
abs_html_filename = os.path.abspath(html_filename)
|
|
273
285
|
try:
|
|
274
286
|
panel_obj.save(html_filename, embed=True)
|
|
@@ -279,12 +291,13 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
|
|
|
279
291
|
# Try to save as SVG, fall back to HTML if not available
|
|
280
292
|
try:
|
|
281
293
|
from bokeh.io.export import export_svg
|
|
294
|
+
|
|
282
295
|
bokeh_layout = panel_obj.get_root()
|
|
283
296
|
export_svg(bokeh_layout, filename=filename)
|
|
284
297
|
logger.info(f"{plot_title} saved to: {abs_filename}")
|
|
285
298
|
except Exception as e:
|
|
286
299
|
# Fall back to HTML if SVG export not available
|
|
287
|
-
html_filename = filename.replace(
|
|
300
|
+
html_filename = filename.replace(".svg", ".html")
|
|
288
301
|
abs_html_filename = os.path.abspath(html_filename)
|
|
289
302
|
try:
|
|
290
303
|
panel_obj.save(html_filename, embed=True)
|
|
@@ -318,16 +331,18 @@ def _isolated_show_panel_notebook(panel_obj):
|
|
|
318
331
|
output_notebook(hide_banner=True)
|
|
319
332
|
|
|
320
333
|
# Reset Holoviews to notebook mode
|
|
321
|
-
hv.extension(
|
|
322
|
-
hv.output(backend=
|
|
334
|
+
hv.extension("bokeh", logo=False)
|
|
335
|
+
hv.output(backend="bokeh", mode="jupyter")
|
|
323
336
|
|
|
324
337
|
# For Panel objects in notebooks, use on.extension and display inline
|
|
325
338
|
import panel as on
|
|
339
|
+
|
|
326
340
|
try:
|
|
327
341
|
# Configure Panel for notebook display
|
|
328
|
-
on.extension(
|
|
342
|
+
on.extension("bokeh", inline=True, comms="vscode")
|
|
329
343
|
# Use IPython display to show inline instead of show()
|
|
330
344
|
from IPython.display import display
|
|
345
|
+
|
|
331
346
|
display(panel_obj)
|
|
332
347
|
except Exception:
|
|
333
348
|
# Fallback to regular Panel show
|
|
@@ -344,8 +359,8 @@ def plot_alignment(
|
|
|
344
359
|
):
|
|
345
360
|
"""Visualize retention time alignment using two synchronized Bokeh scatter plots.
|
|
346
361
|
|
|
347
|
-
Uses ``features_df`` to create side-by-side plots showing Original RT (left)
|
|
348
|
-
and Current/Aligned RT (right). If no alignment has been performed yet,
|
|
362
|
+
Uses ``features_df`` to create side-by-side plots showing Original RT (left)
|
|
363
|
+
and Current/Aligned RT (right). If no alignment has been performed yet,
|
|
349
364
|
both plots show the current RT values.
|
|
350
365
|
|
|
351
366
|
Parameters:
|
|
@@ -409,27 +424,33 @@ def plot_alignment(
|
|
|
409
424
|
for sample_idx, sample in enumerate(samples_list):
|
|
410
425
|
# Filter sample data
|
|
411
426
|
sample_data = features_df.filter(pl.col(sample_col) == sample)
|
|
412
|
-
|
|
427
|
+
|
|
413
428
|
# Sample data if too large for performance
|
|
414
429
|
max_points_per_sample = 10000
|
|
415
430
|
if sample_data.height > max_points_per_sample:
|
|
416
|
-
self.logger.info(
|
|
431
|
+
self.logger.info(
|
|
432
|
+
f"Sample {sample}: Sampling {max_points_per_sample} points from {sample_data.height} features for performance"
|
|
433
|
+
)
|
|
417
434
|
sample_data = sample_data.sample(n=max_points_per_sample, seed=42)
|
|
418
435
|
|
|
419
436
|
# Calculate max intensity for alpha scaling
|
|
420
437
|
max_inty = sample_data.select(pl.col("inty").max()).item() or 1
|
|
421
438
|
|
|
422
439
|
# Get sample information
|
|
423
|
-
sample_uid =
|
|
424
|
-
|
|
440
|
+
sample_uid = (
|
|
441
|
+
sample
|
|
442
|
+
if sample_col == "sample_uid"
|
|
443
|
+
else sample_data.select(pl.col("sample_uid")).item()
|
|
444
|
+
if "sample_uid" in sample_data.columns
|
|
445
|
+
else sample
|
|
446
|
+
)
|
|
447
|
+
|
|
425
448
|
# Try to get actual sample name from samples_df if available
|
|
426
449
|
sample_name = str(sample) # fallback
|
|
427
450
|
if hasattr(self, "samples_df") and self.samples_df is not None and sample_uid is not None:
|
|
428
451
|
try:
|
|
429
452
|
sample_name_result = (
|
|
430
|
-
self.samples_df.filter(pl.col("sample_uid") == sample_uid)
|
|
431
|
-
.select("sample_name")
|
|
432
|
-
.to_series()
|
|
453
|
+
self.samples_df.filter(pl.col("sample_uid") == sample_uid).select("sample_name").to_series()
|
|
433
454
|
)
|
|
434
455
|
if len(sample_name_result) > 0 and sample_name_result[0] is not None:
|
|
435
456
|
sample_name = str(sample_name_result[0])
|
|
@@ -441,7 +462,7 @@ def plot_alignment(
|
|
|
441
462
|
cols_to_select = ["rt", "mz", "inty"]
|
|
442
463
|
if has_alignment:
|
|
443
464
|
cols_to_select.append("rt_original")
|
|
444
|
-
|
|
465
|
+
|
|
445
466
|
sample_dict = sample_data.select(cols_to_select).to_dicts()
|
|
446
467
|
|
|
447
468
|
for row_dict in sample_dict:
|
|
@@ -490,7 +511,7 @@ def plot_alignment(
|
|
|
490
511
|
# Get colors from samples_df if available
|
|
491
512
|
sample_uids_list = list(sample_idx_to_uid.values())
|
|
492
513
|
color_map: dict[int, str] = {}
|
|
493
|
-
|
|
514
|
+
|
|
494
515
|
if sample_uids_list and hasattr(self, "samples_df") and self.samples_df is not None:
|
|
495
516
|
try:
|
|
496
517
|
sample_colors = (
|
|
@@ -499,7 +520,7 @@ def plot_alignment(
|
|
|
499
520
|
.to_dict(as_series=False)
|
|
500
521
|
)
|
|
501
522
|
uid_to_color = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
|
|
502
|
-
|
|
523
|
+
|
|
503
524
|
for sample_idx, sample_uid in sample_idx_to_uid.items():
|
|
504
525
|
color_map[sample_idx] = uid_to_color.get(sample_uid, "#1f77b4")
|
|
505
526
|
except Exception:
|
|
@@ -522,7 +543,7 @@ def plot_alignment(
|
|
|
522
543
|
# Create Bokeh figures
|
|
523
544
|
title_before = "Original RT" if has_alignment else "Current RT (No Alignment)"
|
|
524
545
|
title_after = "Aligned RT" if has_alignment else "Current RT (Copy)"
|
|
525
|
-
|
|
546
|
+
|
|
526
547
|
p1 = figure(
|
|
527
548
|
width=width,
|
|
528
549
|
height=height,
|
|
@@ -605,6 +626,7 @@ def plot_alignment(
|
|
|
605
626
|
if filename is not None:
|
|
606
627
|
# Convert relative paths to absolute paths using study folder as base
|
|
607
628
|
import os
|
|
629
|
+
|
|
608
630
|
if not os.path.isabs(filename):
|
|
609
631
|
filename = os.path.join(self.folder, filename)
|
|
610
632
|
|
|
@@ -642,7 +664,7 @@ def plot_consensus_2d(
|
|
|
642
664
|
Parameters:
|
|
643
665
|
filename (str, optional): Path to save the plot
|
|
644
666
|
colorby (str): Column name to use for color mapping (default: "number_samples")
|
|
645
|
-
Automatically detects if column contains categorical (string) or
|
|
667
|
+
Automatically detects if column contains categorical (string) or
|
|
646
668
|
numeric data and applies appropriate color mapping:
|
|
647
669
|
- Categorical: Uses factor_cmap with distinct colors and legend
|
|
648
670
|
- Numeric: Uses LinearColorMapper with continuous colorbar
|
|
@@ -657,7 +679,7 @@ def plot_consensus_2d(
|
|
|
657
679
|
height (int): Plot height in pixels (default: 900)
|
|
658
680
|
mz_range (tuple, optional): m/z range for filtering consensus features (min_mz, max_mz)
|
|
659
681
|
rt_range (tuple, optional): Retention time range for filtering consensus features (min_rt, max_rt)
|
|
660
|
-
legend (str, optional): Legend position for categorical data. Options: 'top_right', 'top_left',
|
|
682
|
+
legend (str, optional): Legend position for categorical data. Options: 'top_right', 'top_left',
|
|
661
683
|
'bottom_right', 'bottom_left', 'right', 'left', 'top', 'bottom'.
|
|
662
684
|
If None, legend is hidden. Only applies to categorical coloring (default: "bottom_right")
|
|
663
685
|
show_none (bool): Whether to display points with None values for colorby column (default: True)
|
|
@@ -742,7 +764,7 @@ def plot_consensus_2d(
|
|
|
742
764
|
# Filter out None values for colorby column if show_none=False
|
|
743
765
|
if not show_none and colorby in data.columns:
|
|
744
766
|
data = data.filter(pl.col(colorby).is_not_null())
|
|
745
|
-
|
|
767
|
+
|
|
746
768
|
# Convert Polars DataFrame to pandas for Bokeh compatibility
|
|
747
769
|
data_pd = data.to_pandas()
|
|
748
770
|
source = ColumnDataSource(data_pd)
|
|
@@ -786,20 +808,22 @@ def plot_consensus_2d(
|
|
|
786
808
|
# Check if colorby column contains categorical data (string/object)
|
|
787
809
|
colorby_values = data[colorby].to_list()
|
|
788
810
|
is_categorical = (
|
|
789
|
-
data_pd[colorby].dtype in ["object", "string", "category"] or
|
|
790
|
-
|
|
811
|
+
data_pd[colorby].dtype in ["object", "string", "category"] or isinstance(colorby_values[0], str)
|
|
812
|
+
if colorby_values
|
|
813
|
+
else False
|
|
791
814
|
)
|
|
792
|
-
|
|
815
|
+
|
|
793
816
|
if is_categorical:
|
|
794
817
|
# Handle categorical coloring
|
|
795
818
|
# Use natural order of unique values - don't sort to preserve correct legend mapping
|
|
796
819
|
# Sorting would break the correspondence between legend labels and point colors
|
|
797
820
|
unique_values = [v for v in data_pd[colorby].unique() if v is not None]
|
|
798
|
-
|
|
821
|
+
|
|
799
822
|
# Use the custom palette from cmap if available, otherwise fall back to defaults
|
|
800
823
|
if len(palette) >= len(unique_values):
|
|
801
824
|
# Use custom colormap palette - sample evenly across the palette
|
|
802
825
|
import numpy as np
|
|
826
|
+
|
|
803
827
|
indices = np.linspace(0, len(palette) - 1, len(unique_values)).astype(int)
|
|
804
828
|
categorical_palette = [palette[i] for i in indices]
|
|
805
829
|
elif len(unique_values) <= 20:
|
|
@@ -808,7 +832,7 @@ def plot_consensus_2d(
|
|
|
808
832
|
else:
|
|
809
833
|
# For many categories, use a subset of the viridis palette
|
|
810
834
|
categorical_palette = viridis(min(256, len(unique_values)))
|
|
811
|
-
|
|
835
|
+
|
|
812
836
|
color_mapper = factor_cmap(colorby, categorical_palette, unique_values)
|
|
813
837
|
else:
|
|
814
838
|
# Handle numeric coloring with LinearColorMapper
|
|
@@ -832,11 +856,12 @@ def plot_consensus_2d(
|
|
|
832
856
|
all_unique_values = list(data_pd[colorby].unique())
|
|
833
857
|
unique_values = [v for v in all_unique_values if v is not None]
|
|
834
858
|
has_none_values = None in all_unique_values
|
|
835
|
-
|
|
859
|
+
|
|
836
860
|
# Use the custom palette from cmap if available, otherwise fall back to defaults
|
|
837
861
|
if len(palette) >= len(unique_values):
|
|
838
862
|
# Use custom colormap palette - sample evenly across the palette
|
|
839
863
|
import numpy as np
|
|
864
|
+
|
|
840
865
|
indices = np.linspace(0, len(palette) - 1, len(unique_values)).astype(int)
|
|
841
866
|
categorical_palette = [palette[i] for i in indices]
|
|
842
867
|
elif len(unique_values) <= 20:
|
|
@@ -844,23 +869,23 @@ def plot_consensus_2d(
|
|
|
844
869
|
categorical_palette = Category20[min(20, max(3, len(unique_values)))]
|
|
845
870
|
else:
|
|
846
871
|
categorical_palette = viridis(min(256, len(unique_values)))
|
|
847
|
-
|
|
872
|
+
|
|
848
873
|
# Handle None values with black color FIRST so they appear in the background
|
|
849
874
|
if has_none_values and show_none:
|
|
850
875
|
# Filter data for None values
|
|
851
876
|
none_data = data.filter(pl.col(colorby).is_null())
|
|
852
877
|
none_data_pd = none_data.to_pandas()
|
|
853
878
|
none_source = bp.ColumnDataSource(none_data_pd)
|
|
854
|
-
|
|
879
|
+
|
|
855
880
|
if scaling.lower() in ["dyn", "dynamic"]:
|
|
856
881
|
# Calculate appropriate radius for dynamic scaling
|
|
857
882
|
rt_range = data["rt"].max() - data["rt"].min()
|
|
858
883
|
mz_range = data["mz"].max() - data["mz"].min()
|
|
859
884
|
dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
|
|
860
|
-
|
|
885
|
+
|
|
861
886
|
renderer = p.circle(
|
|
862
887
|
x="rt",
|
|
863
|
-
y="mz",
|
|
888
|
+
y="mz",
|
|
864
889
|
radius=dynamic_radius,
|
|
865
890
|
fill_color="lightgray",
|
|
866
891
|
line_color=None,
|
|
@@ -872,32 +897,32 @@ def plot_consensus_2d(
|
|
|
872
897
|
renderer = p.scatter(
|
|
873
898
|
x="rt",
|
|
874
899
|
y="mz",
|
|
875
|
-
size="markersize",
|
|
900
|
+
size="markersize",
|
|
876
901
|
fill_color="lightgray",
|
|
877
902
|
line_color=None,
|
|
878
903
|
alpha=alpha,
|
|
879
904
|
source=none_source,
|
|
880
905
|
legend_label="None",
|
|
881
906
|
)
|
|
882
|
-
|
|
907
|
+
|
|
883
908
|
# Create a separate renderer for each non-None category (plotted on top of None values)
|
|
884
909
|
for i, category in enumerate(unique_values):
|
|
885
910
|
# Filter data for this category
|
|
886
911
|
category_data = data.filter(pl.col(colorby) == category)
|
|
887
912
|
category_data_pd = category_data.to_pandas()
|
|
888
913
|
category_source = bp.ColumnDataSource(category_data_pd)
|
|
889
|
-
|
|
914
|
+
|
|
890
915
|
color = categorical_palette[i % len(categorical_palette)]
|
|
891
|
-
|
|
916
|
+
|
|
892
917
|
if scaling.lower() in ["dyn", "dynamic"]:
|
|
893
918
|
# Calculate appropriate radius for dynamic scaling
|
|
894
919
|
rt_range = data["rt"].max() - data["rt"].min()
|
|
895
920
|
mz_range = data["mz"].max() - data["mz"].min()
|
|
896
921
|
dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
|
|
897
|
-
|
|
922
|
+
|
|
898
923
|
renderer = p.circle(
|
|
899
924
|
x="rt",
|
|
900
|
-
y="mz",
|
|
925
|
+
y="mz",
|
|
901
926
|
radius=dynamic_radius,
|
|
902
927
|
fill_color=color,
|
|
903
928
|
line_color=None,
|
|
@@ -909,17 +934,17 @@ def plot_consensus_2d(
|
|
|
909
934
|
renderer = p.scatter(
|
|
910
935
|
x="rt",
|
|
911
936
|
y="mz",
|
|
912
|
-
size="markersize",
|
|
937
|
+
size="markersize",
|
|
913
938
|
fill_color=color,
|
|
914
939
|
line_color=None,
|
|
915
940
|
alpha=alpha,
|
|
916
941
|
source=category_source,
|
|
917
942
|
legend_label=str(category),
|
|
918
943
|
)
|
|
919
|
-
|
|
944
|
+
|
|
920
945
|
# No single scatter_renderer for categorical data
|
|
921
946
|
scatter_renderer = None
|
|
922
|
-
|
|
947
|
+
|
|
923
948
|
else:
|
|
924
949
|
# Handle numeric coloring - single renderer with color mapping
|
|
925
950
|
if scaling.lower() in ["dyn", "dynamic"]:
|
|
@@ -927,7 +952,7 @@ def plot_consensus_2d(
|
|
|
927
952
|
rt_range = data["rt"].max() - data["rt"].min()
|
|
928
953
|
mz_range = data["mz"].max() - data["mz"].min()
|
|
929
954
|
dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
|
|
930
|
-
|
|
955
|
+
|
|
931
956
|
scatter_renderer = p.circle(
|
|
932
957
|
x="rt",
|
|
933
958
|
y="mz",
|
|
@@ -957,7 +982,7 @@ def plot_consensus_2d(
|
|
|
957
982
|
("number_ms2", "@number_ms2"),
|
|
958
983
|
("inty_mean", "@inty_mean"),
|
|
959
984
|
]
|
|
960
|
-
|
|
985
|
+
|
|
961
986
|
# Add id_top_* columns if they exist and have non-null values
|
|
962
987
|
id_top_columns = ["id_top_name", "id_top_adduct", "id_top_class", "id_top_score"]
|
|
963
988
|
for col in id_top_columns:
|
|
@@ -969,7 +994,7 @@ def plot_consensus_2d(
|
|
|
969
994
|
tooltips.append((col, f"@{col}{{0.0}}"))
|
|
970
995
|
else:
|
|
971
996
|
tooltips.append((col, f"@{col}"))
|
|
972
|
-
|
|
997
|
+
|
|
973
998
|
hover = HoverTool(
|
|
974
999
|
tooltips=tooltips,
|
|
975
1000
|
)
|
|
@@ -977,7 +1002,7 @@ def plot_consensus_2d(
|
|
|
977
1002
|
# For numeric data, specify the single renderer
|
|
978
1003
|
if not is_categorical and scatter_renderer:
|
|
979
1004
|
hover.renderers = [scatter_renderer]
|
|
980
|
-
|
|
1005
|
+
|
|
981
1006
|
p.add_tools(hover)
|
|
982
1007
|
|
|
983
1008
|
# add colorbar only for numeric data (LinearColorMapper)
|
|
@@ -996,15 +1021,15 @@ def plot_consensus_2d(
|
|
|
996
1021
|
# Map legend position parameter to Bokeh legend position
|
|
997
1022
|
legend_position_map = {
|
|
998
1023
|
"top_right": "top_right",
|
|
999
|
-
"top_left": "top_left",
|
|
1024
|
+
"top_left": "top_left",
|
|
1000
1025
|
"bottom_right": "bottom_right",
|
|
1001
1026
|
"bottom_left": "bottom_left",
|
|
1002
1027
|
"right": "right",
|
|
1003
1028
|
"left": "left",
|
|
1004
1029
|
"top": "top",
|
|
1005
|
-
"bottom": "bottom"
|
|
1030
|
+
"bottom": "bottom",
|
|
1006
1031
|
}
|
|
1007
|
-
|
|
1032
|
+
|
|
1008
1033
|
bokeh_legend_pos = legend_position_map.get(legend, "bottom_right")
|
|
1009
1034
|
p.legend.location = bokeh_legend_pos
|
|
1010
1035
|
p.legend.click_policy = "hide"
|
|
@@ -1015,6 +1040,7 @@ def plot_consensus_2d(
|
|
|
1015
1040
|
if filename is not None:
|
|
1016
1041
|
# Convert relative paths to absolute paths using study folder as base
|
|
1017
1042
|
import os
|
|
1043
|
+
|
|
1018
1044
|
if not os.path.isabs(filename):
|
|
1019
1045
|
filename = os.path.join(self.folder, filename)
|
|
1020
1046
|
|
|
@@ -1249,6 +1275,7 @@ def plot_samples_2d(
|
|
|
1249
1275
|
if filename is not None:
|
|
1250
1276
|
# Convert relative paths to absolute paths using study folder as base
|
|
1251
1277
|
import os
|
|
1278
|
+
|
|
1252
1279
|
if not os.path.isabs(filename):
|
|
1253
1280
|
filename = os.path.join(self.folder, filename)
|
|
1254
1281
|
|
|
@@ -1422,6 +1449,7 @@ def plot_bpc(
|
|
|
1422
1449
|
if filename is not None:
|
|
1423
1450
|
# Convert relative paths to absolute paths using study folder as base
|
|
1424
1451
|
import os
|
|
1452
|
+
|
|
1425
1453
|
if not os.path.isabs(filename):
|
|
1426
1454
|
filename = os.path.join(self.folder, filename)
|
|
1427
1455
|
|
|
@@ -1593,6 +1621,7 @@ def plot_eic(
|
|
|
1593
1621
|
if filename is not None:
|
|
1594
1622
|
# Convert relative paths to absolute paths using study folder as base
|
|
1595
1623
|
import os
|
|
1624
|
+
|
|
1596
1625
|
if not os.path.isabs(filename):
|
|
1597
1626
|
filename = os.path.join(self.folder, filename)
|
|
1598
1627
|
|
|
@@ -1659,15 +1688,13 @@ def plot_rt_correction(
|
|
|
1659
1688
|
sample_names_dict = {}
|
|
1660
1689
|
if hasattr(self, "samples_df") and self.samples_df is not None:
|
|
1661
1690
|
try:
|
|
1662
|
-
sample_name_mapping = (
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1691
|
+
sample_name_mapping = self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids)).select([
|
|
1692
|
+
"sample_uid",
|
|
1693
|
+
"sample_name",
|
|
1694
|
+
])
|
|
1695
|
+
sample_names_dict = dict(
|
|
1696
|
+
zip(sample_name_mapping["sample_uid"].to_list(), sample_name_mapping["sample_name"].to_list())
|
|
1666
1697
|
)
|
|
1667
|
-
sample_names_dict = dict(zip(
|
|
1668
|
-
sample_name_mapping["sample_uid"].to_list(),
|
|
1669
|
-
sample_name_mapping["sample_name"].to_list()
|
|
1670
|
-
))
|
|
1671
1698
|
except Exception:
|
|
1672
1699
|
pass
|
|
1673
1700
|
|
|
@@ -1686,10 +1713,8 @@ def plot_rt_correction(
|
|
|
1686
1713
|
# OPTIMIZED: Filter once, group once instead of per-sample filtering
|
|
1687
1714
|
try:
|
|
1688
1715
|
# Filter all data once for selected samples and required conditions
|
|
1689
|
-
all_sample_feats = self.features_df.filter(
|
|
1690
|
-
|
|
1691
|
-
)
|
|
1692
|
-
|
|
1716
|
+
all_sample_feats = self.features_df.filter(pl.col(sample_id_col).is_in(sample_uids))
|
|
1717
|
+
|
|
1693
1718
|
if all_sample_feats.is_empty():
|
|
1694
1719
|
self.logger.warning("No features found for the selected samples.")
|
|
1695
1720
|
return
|
|
@@ -1708,14 +1733,8 @@ def plot_rt_correction(
|
|
|
1708
1733
|
|
|
1709
1734
|
# Filter nulls, add delta column, and sort - all in one operation
|
|
1710
1735
|
all_sample_feats = (
|
|
1711
|
-
all_sample_feats
|
|
1712
|
-
.
|
|
1713
|
-
pl.col("rt").is_not_null() &
|
|
1714
|
-
pl.col("rt_original").is_not_null()
|
|
1715
|
-
)
|
|
1716
|
-
.with_columns([
|
|
1717
|
-
(pl.col("rt") - pl.col("rt_original")).alias("delta")
|
|
1718
|
-
])
|
|
1736
|
+
all_sample_feats.filter(pl.col("rt").is_not_null() & pl.col("rt_original").is_not_null())
|
|
1737
|
+
.with_columns([(pl.col("rt") - pl.col("rt_original")).alias("delta")])
|
|
1719
1738
|
.sort([sample_id_col, "rt"])
|
|
1720
1739
|
)
|
|
1721
1740
|
|
|
@@ -1770,6 +1789,7 @@ def plot_rt_correction(
|
|
|
1770
1789
|
if filename is not None:
|
|
1771
1790
|
# Convert relative paths to absolute paths using study folder as base
|
|
1772
1791
|
import os
|
|
1792
|
+
|
|
1773
1793
|
if not os.path.isabs(filename):
|
|
1774
1794
|
filename = os.path.join(self.folder, filename)
|
|
1775
1795
|
|
|
@@ -1882,7 +1902,7 @@ def plot_chrom(
|
|
|
1882
1902
|
curve = hv.Curve(
|
|
1883
1903
|
(rt, inty, sample_names_array, sample_uids_array, sample_colors_array),
|
|
1884
1904
|
kdims=["RT"],
|
|
1885
|
-
vdims=["inty", "sample_name", "sample_uid", "sample_color"]
|
|
1905
|
+
vdims=["inty", "sample_name", "sample_uid", "sample_color"],
|
|
1886
1906
|
).opts(
|
|
1887
1907
|
color=color_map[sample],
|
|
1888
1908
|
line_width=1,
|
|
@@ -1892,8 +1912,8 @@ def plot_chrom(
|
|
|
1892
1912
|
("Intensity", "@inty{0,0}"),
|
|
1893
1913
|
("Sample Name", "@sample_name"),
|
|
1894
1914
|
("Sample UID", "@sample_uid"),
|
|
1895
|
-
("Sample Color", "$color[swatch]:sample_color")
|
|
1896
|
-
]
|
|
1915
|
+
("Sample Color", "$color[swatch]:sample_color"),
|
|
1916
|
+
],
|
|
1897
1917
|
)
|
|
1898
1918
|
curves.append(curve)
|
|
1899
1919
|
|
|
@@ -1957,6 +1977,7 @@ def plot_chrom(
|
|
|
1957
1977
|
if filename is not None:
|
|
1958
1978
|
# Convert relative paths to absolute paths using study folder as base
|
|
1959
1979
|
import os
|
|
1980
|
+
|
|
1960
1981
|
if not os.path.isabs(filename):
|
|
1961
1982
|
filename = os.path.join(self.folder, filename)
|
|
1962
1983
|
|
|
@@ -1989,7 +2010,7 @@ def plot_consensus_stats(
|
|
|
1989
2010
|
):
|
|
1990
2011
|
"""
|
|
1991
2012
|
Plot histograms/distributions for specific consensus statistics in the requested order.
|
|
1992
|
-
|
|
2013
|
+
|
|
1993
2014
|
Shows the following properties in order:
|
|
1994
2015
|
1. rt: Retention time
|
|
1995
2016
|
2. rt_delta_mean: Mean retention time delta
|
|
@@ -2003,7 +2024,7 @@ def plot_consensus_stats(
|
|
|
2003
2024
|
10. chrom_coherence_mean: Mean chromatographic coherence
|
|
2004
2025
|
11. chrom_height_scaled_mean: Mean scaled chromatographic height
|
|
2005
2026
|
12. chrom_prominence_scaled_mean: Mean scaled chromatographic prominence
|
|
2006
|
-
|
|
2027
|
+
|
|
2007
2028
|
Parameters:
|
|
2008
2029
|
filename (str, optional): Output filename for saving the plot
|
|
2009
2030
|
width (int): Overall width of the plot (default: 840)
|
|
@@ -2019,7 +2040,7 @@ def plot_consensus_stats(
|
|
|
2019
2040
|
|
|
2020
2041
|
# Get the consensus statistics data using the new helper method
|
|
2021
2042
|
data_df = self.get_consensus_stats()
|
|
2022
|
-
|
|
2043
|
+
|
|
2023
2044
|
if data_df is None or data_df.is_empty():
|
|
2024
2045
|
self.logger.error("No consensus statistics data available.")
|
|
2025
2046
|
return
|
|
@@ -2032,39 +2053,52 @@ def plot_consensus_stats(
|
|
|
2032
2053
|
|
|
2033
2054
|
# Define specific columns to plot in the exact order requested (excluding consensus_uid)
|
|
2034
2055
|
desired_columns = [
|
|
2035
|
-
"rt",
|
|
2036
|
-
"rt_delta_mean",
|
|
2037
|
-
"mz",
|
|
2056
|
+
"rt",
|
|
2057
|
+
"rt_delta_mean",
|
|
2058
|
+
"mz",
|
|
2038
2059
|
"mz_range", # mz_max-mz_min
|
|
2039
2060
|
"log10_inty_mean", # log10(inty_mean)
|
|
2040
|
-
"number_samples",
|
|
2041
|
-
"number_ms2",
|
|
2042
|
-
"charge_mean",
|
|
2043
|
-
"quality",
|
|
2044
|
-
"chrom_coherence_mean",
|
|
2045
|
-
"chrom_height_scaled_mean",
|
|
2046
|
-
"chrom_prominence_scaled_mean"
|
|
2061
|
+
"number_samples",
|
|
2062
|
+
"number_ms2",
|
|
2063
|
+
"charge_mean",
|
|
2064
|
+
"quality",
|
|
2065
|
+
"chrom_coherence_mean",
|
|
2066
|
+
"chrom_height_scaled_mean",
|
|
2067
|
+
"chrom_prominence_scaled_mean",
|
|
2047
2068
|
]
|
|
2048
|
-
|
|
2069
|
+
|
|
2049
2070
|
# Filter to only include columns that exist in the dataframe, preserving order
|
|
2050
2071
|
numeric_columns = [col for col in desired_columns if col in data_df_clean.columns]
|
|
2051
|
-
|
|
2072
|
+
|
|
2052
2073
|
# Check if the numeric columns are actually numeric
|
|
2053
2074
|
final_numeric_columns = []
|
|
2054
2075
|
for col in numeric_columns:
|
|
2055
2076
|
dtype = data_df_clean[col].dtype
|
|
2056
|
-
if dtype in [
|
|
2057
|
-
|
|
2058
|
-
|
|
2077
|
+
if dtype in [
|
|
2078
|
+
pl.Int8,
|
|
2079
|
+
pl.Int16,
|
|
2080
|
+
pl.Int32,
|
|
2081
|
+
pl.Int64,
|
|
2082
|
+
pl.UInt8,
|
|
2083
|
+
pl.UInt16,
|
|
2084
|
+
pl.UInt32,
|
|
2085
|
+
pl.UInt64,
|
|
2086
|
+
pl.Float32,
|
|
2087
|
+
pl.Float64,
|
|
2088
|
+
]:
|
|
2059
2089
|
final_numeric_columns.append(col)
|
|
2060
|
-
|
|
2090
|
+
|
|
2061
2091
|
numeric_columns = final_numeric_columns
|
|
2062
2092
|
|
|
2063
2093
|
if len(numeric_columns) == 0:
|
|
2064
|
-
self.logger.error(
|
|
2094
|
+
self.logger.error(
|
|
2095
|
+
f"None of the requested consensus statistics columns were found or are numeric. Available columns: {list(data_df_clean.columns)}"
|
|
2096
|
+
)
|
|
2065
2097
|
return
|
|
2066
2098
|
|
|
2067
|
-
self.logger.debug(
|
|
2099
|
+
self.logger.debug(
|
|
2100
|
+
f"Creating distribution plots for {len(numeric_columns)} specific consensus columns: {numeric_columns}"
|
|
2101
|
+
)
|
|
2068
2102
|
|
|
2069
2103
|
# Select only the numeric columns for plotting
|
|
2070
2104
|
data_df_clean = data_df_clean.select(numeric_columns)
|
|
@@ -2073,15 +2107,23 @@ def plot_consensus_stats(
|
|
|
2073
2107
|
all_columns_empty = True
|
|
2074
2108
|
for col in numeric_columns:
|
|
2075
2109
|
# Check if column has any non-null, finite values
|
|
2076
|
-
non_null_count =
|
|
2077
|
-
data_df_clean[col]
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2110
|
+
non_null_count = (
|
|
2111
|
+
data_df_clean[col]
|
|
2112
|
+
.filter(
|
|
2113
|
+
data_df_clean[col].is_not_null()
|
|
2114
|
+
& (
|
|
2115
|
+
data_df_clean[col].is_finite()
|
|
2116
|
+
if data_df_clean[col].dtype in [pl.Float32, pl.Float64]
|
|
2117
|
+
else pl.lit(True)
|
|
2118
|
+
)
|
|
2119
|
+
)
|
|
2120
|
+
.len()
|
|
2121
|
+
)
|
|
2122
|
+
|
|
2081
2123
|
if non_null_count > 0:
|
|
2082
2124
|
all_columns_empty = False
|
|
2083
2125
|
break
|
|
2084
|
-
|
|
2126
|
+
|
|
2085
2127
|
if all_columns_empty:
|
|
2086
2128
|
self.logger.error("All numeric columns contain only NaN/infinite values.")
|
|
2087
2129
|
return
|
|
@@ -2089,24 +2131,24 @@ def plot_consensus_stats(
|
|
|
2089
2131
|
# Calculate grid dimensions
|
|
2090
2132
|
n_plots = len(numeric_columns)
|
|
2091
2133
|
n_rows = (n_plots + n_cols - 1) // n_cols # Ceiling division
|
|
2092
|
-
|
|
2134
|
+
|
|
2093
2135
|
# Auto-calculate height if not provided
|
|
2094
2136
|
if height is None:
|
|
2095
2137
|
plot_height = 210 # Reduced from 300 (30% smaller)
|
|
2096
2138
|
height = plot_height * n_rows + 56 # Reduced from 80 (30% smaller)
|
|
2097
2139
|
else:
|
|
2098
2140
|
plot_height = (height - 56) // n_rows # Reduced padding (30% smaller)
|
|
2099
|
-
|
|
2141
|
+
|
|
2100
2142
|
plot_width = (width - 56) // n_cols # Reduced padding (30% smaller)
|
|
2101
2143
|
|
|
2102
2144
|
# Create plots grid
|
|
2103
2145
|
plots = []
|
|
2104
2146
|
current_row = []
|
|
2105
|
-
|
|
2147
|
+
|
|
2106
2148
|
for i, col in enumerate(numeric_columns):
|
|
2107
2149
|
# Check if this column should use log scale for y-axis
|
|
2108
2150
|
y_axis_type = "log" if col in ["number_samples", "number_ms2"] else "linear"
|
|
2109
|
-
|
|
2151
|
+
|
|
2110
2152
|
# Create histogram for this column
|
|
2111
2153
|
p = figure(
|
|
2112
2154
|
width=plot_width,
|
|
@@ -2114,30 +2156,28 @@ def plot_consensus_stats(
|
|
|
2114
2156
|
title=col,
|
|
2115
2157
|
toolbar_location="above",
|
|
2116
2158
|
tools="pan,wheel_zoom,box_zoom,reset,save",
|
|
2117
|
-
y_axis_type=y_axis_type
|
|
2159
|
+
y_axis_type=y_axis_type,
|
|
2118
2160
|
)
|
|
2119
|
-
|
|
2161
|
+
|
|
2120
2162
|
# Set white background
|
|
2121
2163
|
p.background_fill_color = "white"
|
|
2122
2164
|
p.border_fill_color = "white"
|
|
2123
|
-
|
|
2165
|
+
|
|
2124
2166
|
# Calculate histogram using Polars
|
|
2125
2167
|
# Get valid (non-null, finite) values for this column
|
|
2126
2168
|
if data_df_clean[col].dtype in [pl.Float32, pl.Float64]:
|
|
2127
|
-
valid_values = data_df_clean.filter(
|
|
2128
|
-
data_df_clean[col].is_not_null() & data_df_clean[col].is_finite()
|
|
2129
|
-
)[col]
|
|
2169
|
+
valid_values = data_df_clean.filter(data_df_clean[col].is_not_null() & data_df_clean[col].is_finite())[col]
|
|
2130
2170
|
else:
|
|
2131
2171
|
valid_values = data_df_clean.filter(data_df_clean[col].is_not_null())[col]
|
|
2132
|
-
|
|
2172
|
+
|
|
2133
2173
|
if valid_values.len() == 0:
|
|
2134
2174
|
self.logger.warning(f"No valid values for column {col}")
|
|
2135
2175
|
continue
|
|
2136
|
-
|
|
2176
|
+
|
|
2137
2177
|
# Convert to numpy for histogram calculation
|
|
2138
2178
|
values_array = valid_values.to_numpy()
|
|
2139
2179
|
hist, edges = np.histogram(values_array, bins=bins)
|
|
2140
|
-
|
|
2180
|
+
|
|
2141
2181
|
# Handle log y-axis: replace zero counts with small positive values
|
|
2142
2182
|
if y_axis_type == "log":
|
|
2143
2183
|
# Replace zero counts with a small value (1e-1) to make them visible on log scale
|
|
@@ -2146,7 +2186,7 @@ def plot_consensus_stats(
|
|
|
2146
2186
|
else:
|
|
2147
2187
|
hist_log_safe = hist
|
|
2148
2188
|
bottom_val = 0
|
|
2149
|
-
|
|
2189
|
+
|
|
2150
2190
|
# Create histogram bars
|
|
2151
2191
|
p.quad(
|
|
2152
2192
|
top=hist_log_safe,
|
|
@@ -2157,7 +2197,7 @@ def plot_consensus_stats(
|
|
|
2157
2197
|
line_color="white",
|
|
2158
2198
|
alpha=alpha,
|
|
2159
2199
|
)
|
|
2160
|
-
|
|
2200
|
+
|
|
2161
2201
|
# Style the plot
|
|
2162
2202
|
p.title.text_font_size = "10pt" # Reduced from 12pt
|
|
2163
2203
|
p.xaxis.axis_label = "" # Remove x-axis title
|
|
@@ -2166,12 +2206,12 @@ def plot_consensus_stats(
|
|
|
2166
2206
|
p.grid.grid_line_dash = [6, 4] # Dashed grid lines
|
|
2167
2207
|
p.xgrid.visible = False # Hide x-axis grid
|
|
2168
2208
|
p.outline_line_color = None # Remove gray border around plot area
|
|
2169
|
-
|
|
2209
|
+
|
|
2170
2210
|
# Remove y-axis label but keep y-axis visible
|
|
2171
2211
|
p.yaxis.axis_label = ""
|
|
2172
|
-
|
|
2212
|
+
|
|
2173
2213
|
current_row.append(p)
|
|
2174
|
-
|
|
2214
|
+
|
|
2175
2215
|
# If we've filled a row or reached the end, add the row to plots
|
|
2176
2216
|
if len(current_row) == n_cols or i == n_plots - 1:
|
|
2177
2217
|
# Fill remaining spots in the last row with None if needed
|
|
@@ -2182,15 +2222,15 @@ def plot_consensus_stats(
|
|
|
2182
2222
|
|
|
2183
2223
|
# Create grid layout with white background
|
|
2184
2224
|
grid = gridplot(plots, toolbar_location="above", merge_tools=True)
|
|
2185
|
-
|
|
2225
|
+
|
|
2186
2226
|
# The background should be white by default in Bokeh
|
|
2187
2227
|
# Individual plots already have white backgrounds set above
|
|
2188
2228
|
|
|
2189
|
-
|
|
2190
2229
|
# Apply consistent save/display behavior
|
|
2191
2230
|
if filename is not None:
|
|
2192
2231
|
# Convert relative paths to absolute paths using study folder as base
|
|
2193
2232
|
import os
|
|
2233
|
+
|
|
2194
2234
|
if not os.path.isabs(filename):
|
|
2195
2235
|
filename = os.path.join(self.folder, filename)
|
|
2196
2236
|
|
|
@@ -2456,6 +2496,7 @@ def plot_samples_pca(
|
|
|
2456
2496
|
if filename is not None:
|
|
2457
2497
|
# Convert relative paths to absolute paths using study folder as base
|
|
2458
2498
|
import os
|
|
2499
|
+
|
|
2459
2500
|
if not os.path.isabs(filename):
|
|
2460
2501
|
filename = os.path.join(self.folder, filename)
|
|
2461
2502
|
|
|
@@ -2503,7 +2544,7 @@ def plot_samples_umap(
|
|
|
2503
2544
|
random_state (int or None): Random state for reproducibility (default: 42).
|
|
2504
2545
|
- Use an integer (e.g., 42) for reproducible results (slower, single-threaded)
|
|
2505
2546
|
- Use None for faster computation with multiple cores (non-reproducible)
|
|
2506
|
-
|
|
2547
|
+
|
|
2507
2548
|
Note:
|
|
2508
2549
|
Setting random_state forces single-threaded computation but ensures reproducible results.
|
|
2509
2550
|
Set random_state=None to enable parallel processing for faster computation.
|
|
@@ -2574,7 +2615,7 @@ def plot_samples_umap(
|
|
|
2574
2615
|
min_dist=min_dist,
|
|
2575
2616
|
metric=metric,
|
|
2576
2617
|
random_state=random_state,
|
|
2577
|
-
n_jobs=1
|
|
2618
|
+
n_jobs=1,
|
|
2578
2619
|
)
|
|
2579
2620
|
umap_result = reducer.fit_transform(matrix_scaled)
|
|
2580
2621
|
|
|
@@ -2743,6 +2784,7 @@ def plot_samples_umap(
|
|
|
2743
2784
|
if filename is not None:
|
|
2744
2785
|
# Convert relative paths to absolute paths using study folder as base
|
|
2745
2786
|
import os
|
|
2787
|
+
|
|
2746
2788
|
if not os.path.isabs(filename):
|
|
2747
2789
|
filename = os.path.join(self.folder, filename)
|
|
2748
2790
|
|
|
@@ -2897,6 +2939,7 @@ def plot_tic(
|
|
|
2897
2939
|
if filename is not None:
|
|
2898
2940
|
# Convert relative paths to absolute paths using study folder as base
|
|
2899
2941
|
import os
|
|
2942
|
+
|
|
2900
2943
|
if not os.path.isabs(filename):
|
|
2901
2944
|
filename = os.path.join(self.folder, filename)
|
|
2902
2945
|
|
|
@@ -2915,11 +2958,14 @@ def plot_tic(
|
|
|
2915
2958
|
def plot_pca(self, *args, **kwargs):
|
|
2916
2959
|
"""Deprecated: Use plot_samples_pca instead."""
|
|
2917
2960
|
import warnings
|
|
2961
|
+
|
|
2918
2962
|
warnings.warn("plot_pca is deprecated, use plot_samples_pca instead", DeprecationWarning, stacklevel=2)
|
|
2919
2963
|
return self.plot_samples_pca(*args, **kwargs)
|
|
2920
2964
|
|
|
2965
|
+
|
|
2921
2966
|
def plot_umap(self, *args, **kwargs):
|
|
2922
2967
|
"""Deprecated: Use plot_samples_umap instead."""
|
|
2923
2968
|
import warnings
|
|
2969
|
+
|
|
2924
2970
|
warnings.warn("plot_umap is deprecated, use plot_samples_umap instead", DeprecationWarning, stacklevel=2)
|
|
2925
2971
|
return self.plot_samples_umap(*args, **kwargs)
|