masster 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/sample/plot.py +317 -50
- masster/sample/sample.py +8 -6
- masster/study/h5.py +8 -0
- masster/study/helpers.py +87 -24
- masster/study/load.py +38 -8
- masster/study/plot.py +62 -15
- masster/study/save.py +22 -10
- masster/study/study.py +11 -5
- masster/study/study5_schema.json +6 -0
- {masster-0.3.0.dist-info → masster-0.3.2.dist-info}/METADATA +1 -1
- {masster-0.3.0.dist-info → masster-0.3.2.dist-info}/RECORD +14 -14
- {masster-0.3.0.dist-info → masster-0.3.2.dist-info}/WHEEL +0 -0
- {masster-0.3.0.dist-info → masster-0.3.2.dist-info}/entry_points.txt +0 -0
- {masster-0.3.0.dist-info → masster-0.3.2.dist-info}/licenses/LICENSE +0 -0
masster/sample/plot.py
CHANGED
|
@@ -22,13 +22,13 @@ Dependencies:
|
|
|
22
22
|
- `numpy`: For numerical computations.
|
|
23
23
|
|
|
24
24
|
Functions:
|
|
25
|
-
- `
|
|
25
|
+
- `plot_chrom()`: Generate chromatograms with feature overlays.
|
|
26
26
|
- `plot_2d()`: Create 2D mass spectrometry data visualizations.
|
|
27
27
|
- `plot_features()`: Visualize detected features in retention time vs m/z space.
|
|
28
28
|
- Various utility functions for plot styling and configuration.
|
|
29
29
|
|
|
30
30
|
Supported Plot Types:
|
|
31
|
-
-
|
|
31
|
+
- Chromatograms
|
|
32
32
|
- Total Ion Chromatograms (TIC)
|
|
33
33
|
- Base Peak Chromatograms (BPC)
|
|
34
34
|
- 2D intensity maps (RT vs m/z)
|
|
@@ -63,7 +63,74 @@ from matplotlib.colors import rgb2hex
|
|
|
63
63
|
hv.extension("bokeh")
|
|
64
64
|
|
|
65
65
|
|
|
66
|
-
def
|
|
66
|
+
def _is_notebook_environment():
|
|
67
|
+
"""
|
|
68
|
+
Detect if code is running in a notebook environment (Jupyter, JupyterLab, or Marimo).
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
bool: True if running in a notebook, False otherwise
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
# Check for Jupyter/JupyterLab
|
|
75
|
+
from IPython import get_ipython
|
|
76
|
+
if get_ipython() is not None:
|
|
77
|
+
# Check if we're in a notebook context
|
|
78
|
+
shell = get_ipython().__class__.__name__
|
|
79
|
+
if shell in ['ZMQInteractiveShell', 'Shell']: # Jupyter notebook/lab
|
|
80
|
+
return True
|
|
81
|
+
|
|
82
|
+
# Check for Marimo
|
|
83
|
+
import sys
|
|
84
|
+
if 'marimo' in sys.modules:
|
|
85
|
+
return True
|
|
86
|
+
|
|
87
|
+
# Additional check for notebook environments
|
|
88
|
+
if hasattr(__builtins__, '__IPYTHON__') or hasattr(__builtins__, '_ih'):
|
|
89
|
+
return True
|
|
90
|
+
|
|
91
|
+
except ImportError:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _display_plot(plot_object, layout=None):
|
|
98
|
+
"""
|
|
99
|
+
Display a plot object in the appropriate way based on the environment.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
plot_object: The plot object to display (holoviews overlay, etc.)
|
|
103
|
+
layout: Optional panel layout object
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
The layout object if in notebook environment, None otherwise
|
|
107
|
+
"""
|
|
108
|
+
if _is_notebook_environment():
|
|
109
|
+
# Display inline in notebook
|
|
110
|
+
try:
|
|
111
|
+
# For Jupyter notebooks, just return the plot object -
|
|
112
|
+
# holoviews will handle the display automatically
|
|
113
|
+
return plot_object
|
|
114
|
+
except Exception:
|
|
115
|
+
# Fallback to panel display for other notebook environments
|
|
116
|
+
if layout is not None:
|
|
117
|
+
return layout
|
|
118
|
+
else:
|
|
119
|
+
# Create a simple layout if none provided
|
|
120
|
+
simple_layout = panel.Column(plot_object)
|
|
121
|
+
return simple_layout
|
|
122
|
+
else:
|
|
123
|
+
# Display in browser (original behavior)
|
|
124
|
+
if layout is not None:
|
|
125
|
+
layout.show()
|
|
126
|
+
else:
|
|
127
|
+
# Create a simple layout for browser display
|
|
128
|
+
simple_layout = panel.Column(plot_object)
|
|
129
|
+
simple_layout.show()
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def plot_chrom(
|
|
67
134
|
self,
|
|
68
135
|
feature_uid=None,
|
|
69
136
|
filename=None,
|
|
@@ -74,16 +141,16 @@ def plot_eic(
|
|
|
74
141
|
link_x=False,
|
|
75
142
|
):
|
|
76
143
|
"""
|
|
77
|
-
Plot
|
|
144
|
+
Plot chromatograms for one or more features using MS1 data and feature metadata.
|
|
78
145
|
|
|
79
146
|
This function filters MS1 data based on retention time (rt) and mass-to-charge ratio (mz) windows
|
|
80
|
-
derived from feature information in `features_df`. It then generates interactive
|
|
147
|
+
derived from feature information in `features_df`. It then generates interactive chromatogram plots using
|
|
81
148
|
HoloViews, with feature retention time windows annotated. Plots can be displayed interactively or
|
|
82
149
|
saved to a file.
|
|
83
150
|
|
|
84
151
|
Parameters:
|
|
85
152
|
feature_uid (int or list of int, optional):
|
|
86
|
-
Feature identifier(s) for
|
|
153
|
+
Feature identifier(s) for chromatogram generation. If None, chromatograms for all features in `features_df` are plotted.
|
|
87
154
|
filename (str, optional):
|
|
88
155
|
Output file path. If ending with `.html`, saves as interactive HTML; otherwise, saves as PNG.
|
|
89
156
|
If not provided, displays the plot interactively.
|
|
@@ -96,7 +163,7 @@ def plot_eic(
|
|
|
96
163
|
mz_tol_factor_plot (float, default=1):
|
|
97
164
|
m/z time tolerance factor.
|
|
98
165
|
link_x (bool, default=True):
|
|
99
|
-
If True, links the x-axes (retention time) across all
|
|
166
|
+
If True, links the x-axes (retention time) across all chromatogram subplots.
|
|
100
167
|
|
|
101
168
|
Returns:
|
|
102
169
|
None
|
|
@@ -106,7 +173,7 @@ def plot_eic(
|
|
|
106
173
|
- Aggregates MS1 intensities by retention time.
|
|
107
174
|
- Utilizes HoloViews for visualization and Panel for layout/display.
|
|
108
175
|
"""
|
|
109
|
-
# plots the
|
|
176
|
+
# plots the chromatogram for a given feature id
|
|
110
177
|
# If rt or mz are not provided, they are extracted from features_df using the supplied feature id (feature_uid)
|
|
111
178
|
|
|
112
179
|
feature_uids = feature_uid
|
|
@@ -121,7 +188,7 @@ def plot_eic(
|
|
|
121
188
|
|
|
122
189
|
# make sure feature_uid is a list of integers
|
|
123
190
|
|
|
124
|
-
|
|
191
|
+
chrom_plots = []
|
|
125
192
|
feature_uids = feats["feature_uid"].values.tolist()
|
|
126
193
|
mz_tol_plot = mz_tol * mz_tol_factor_plot
|
|
127
194
|
rt_tol_plot = rt_tol * rt_tol_factor_plot
|
|
@@ -137,29 +204,29 @@ def plot_eic(
|
|
|
137
204
|
mz_end = feature_row["mz_end"].values[0]
|
|
138
205
|
|
|
139
206
|
# filter self.ms1_df with rt_start, rt_end, mz_start, mz_end
|
|
140
|
-
|
|
207
|
+
chrom_df = self.ms1_df.filter(
|
|
141
208
|
pl.col("rt") >= rt_start - rt_tol_plot,
|
|
142
209
|
pl.col("rt") <= rt_end + rt_tol_plot,
|
|
143
210
|
)
|
|
144
|
-
|
|
211
|
+
chrom_df = chrom_df.filter(
|
|
145
212
|
pl.col("mz") >= mz_start - mz_tol_plot,
|
|
146
213
|
pl.col("mz") <= mz_end + mz_tol_plot,
|
|
147
214
|
)
|
|
148
215
|
|
|
149
|
-
if
|
|
216
|
+
if chrom_df.is_empty():
|
|
150
217
|
print("No MS1 data found in the specified window.")
|
|
151
218
|
continue
|
|
152
219
|
|
|
153
220
|
# convert to pandas DataFrame
|
|
154
|
-
|
|
221
|
+
chrom_df = chrom_df.to_pandas()
|
|
155
222
|
# aggregate all points with the same rt using the sum of inty
|
|
156
|
-
|
|
223
|
+
chrom_df = chrom_df.groupby("rt").agg({"inty": "sum"}).reset_index()
|
|
157
224
|
yname = f"inty_{feature_uid}"
|
|
158
|
-
|
|
225
|
+
chrom_df.rename(columns={"inty": yname}, inplace=True)
|
|
159
226
|
|
|
160
|
-
# Plot the
|
|
161
|
-
|
|
162
|
-
title=f"
|
|
227
|
+
# Plot the chromatogram using bokeh and ensure axes are independent by setting axiswise=True
|
|
228
|
+
chrom = hv.Curve(chrom_df, kdims=["rt"], vdims=[yname]).opts(
|
|
229
|
+
title=f"Chromatogram for feature {feature_uid}, mz = {mz:.4f}",
|
|
163
230
|
xlabel="Retention time (s)",
|
|
164
231
|
ylabel="Intensity",
|
|
165
232
|
width=1000,
|
|
@@ -170,13 +237,13 @@ def plot_eic(
|
|
|
170
237
|
)
|
|
171
238
|
|
|
172
239
|
# Add vertical lines at the start and end of the retention time
|
|
173
|
-
|
|
240
|
+
chrom = chrom * hv.VLine(rt_start).opts(
|
|
174
241
|
color="blue",
|
|
175
242
|
line_width=1,
|
|
176
243
|
line_dash="dashed",
|
|
177
244
|
axiswise=True,
|
|
178
245
|
)
|
|
179
|
-
|
|
246
|
+
chrom = chrom * hv.VLine(rt_end).opts(
|
|
180
247
|
color="blue",
|
|
181
248
|
line_width=1,
|
|
182
249
|
line_dash="dashed",
|
|
@@ -184,12 +251,12 @@ def plot_eic(
|
|
|
184
251
|
)
|
|
185
252
|
|
|
186
253
|
# Append the subplot without linking axes
|
|
187
|
-
|
|
254
|
+
chrom_plots.append(chrom)
|
|
188
255
|
if link_x:
|
|
189
|
-
# Create a layout with shared x-axis for all
|
|
190
|
-
layout = hv.Layout(
|
|
256
|
+
# Create a layout with shared x-axis for all chromatogram plots
|
|
257
|
+
layout = hv.Layout(chrom_plots).opts(shared_axes=True)
|
|
191
258
|
else:
|
|
192
|
-
layout = hv.Layout(
|
|
259
|
+
layout = hv.Layout(chrom_plots).opts(shared_axes=False)
|
|
193
260
|
|
|
194
261
|
layout = layout.cols(1)
|
|
195
262
|
layout = panel.Column(layout)
|
|
@@ -201,8 +268,8 @@ def plot_eic(
|
|
|
201
268
|
# save the panel layout as a png
|
|
202
269
|
hv.save(layout, filename, fmt="png")
|
|
203
270
|
else:
|
|
204
|
-
#
|
|
205
|
-
layout.
|
|
271
|
+
# Check if we're in a notebook environment and display appropriately
|
|
272
|
+
return _display_plot(layout.object, layout)
|
|
206
273
|
|
|
207
274
|
|
|
208
275
|
def plot_2d(
|
|
@@ -216,9 +283,12 @@ def plot_2d(
|
|
|
216
283
|
cmap=None,
|
|
217
284
|
marker="circle",
|
|
218
285
|
markersize=10,
|
|
286
|
+
size="dynamic",
|
|
219
287
|
raster_dynamic=True,
|
|
220
288
|
raster_max_px=8,
|
|
221
289
|
raster_threshold=0.8,
|
|
290
|
+
height=600,
|
|
291
|
+
width=800,
|
|
222
292
|
mz_range=None,
|
|
223
293
|
rt_range=None,
|
|
224
294
|
):
|
|
@@ -251,6 +321,11 @@ def plot_2d(
|
|
|
251
321
|
Marker type to use for feature and MS2 points.
|
|
252
322
|
markersize (int, default 10):
|
|
253
323
|
Base size of the markers used for plotting points.
|
|
324
|
+
size (str, default 'dynamic'):
|
|
325
|
+
Controls marker sizing behavior. Options: 'dynamic', 'static', or 'slider'.
|
|
326
|
+
- 'dynamic': Uses coordinate-based sizing that scales with zoom level (markers get larger when zooming in)
|
|
327
|
+
- 'static': Uses screen-based sizing that remains constant regardless of zoom level
|
|
328
|
+
- 'slider': Provides an interactive slider to dynamically adjust marker size
|
|
254
329
|
raster_dynamic (bool, default True):
|
|
255
330
|
Whether to use dynamic rasterization for the background point cloud.
|
|
256
331
|
raster_max_px (int, default 8):
|
|
@@ -290,9 +365,9 @@ def plot_2d(
|
|
|
290
365
|
# keep only rt, mz, and inty
|
|
291
366
|
spectradf = spectradf.select(["rt", "mz", "inty"])
|
|
292
367
|
if mz_range is not None:
|
|
293
|
-
spectradf = spectradf
|
|
368
|
+
spectradf = spectradf.filter((pl.col("mz") >= mz_range[0]) & (pl.col("mz") <= mz_range[1]))
|
|
294
369
|
if rt_range is not None:
|
|
295
|
-
spectradf = spectradf
|
|
370
|
+
spectradf = spectradf.filter((pl.col("rt") >= rt_range[0]) & (pl.col("rt") <= rt_range[1]))
|
|
296
371
|
maxrt = spectradf["rt"].max()
|
|
297
372
|
minrt = spectradf["rt"].min()
|
|
298
373
|
maxmz = spectradf["mz"].max()
|
|
@@ -317,19 +392,81 @@ def plot_2d(
|
|
|
317
392
|
tools=["hover"],
|
|
318
393
|
)
|
|
319
394
|
|
|
320
|
-
|
|
395
|
+
# Configure marker and size behavior based on size parameter
|
|
396
|
+
use_dynamic_sizing = size.lower() in ["dyn", "dynamic"]
|
|
397
|
+
use_slider_sizing = size.lower() == "slider"
|
|
398
|
+
|
|
399
|
+
def dynamic_sizing_hook(plot, element):
|
|
400
|
+
"""Hook to convert size-based markers to radius-based for dynamic behavior"""
|
|
401
|
+
try:
|
|
402
|
+
if use_dynamic_sizing and hasattr(plot, 'state') and hasattr(plot.state, 'renderers'):
|
|
403
|
+
from bokeh.models import Circle
|
|
404
|
+
for renderer in plot.state.renderers:
|
|
405
|
+
if hasattr(renderer, 'glyph'):
|
|
406
|
+
glyph = renderer.glyph
|
|
407
|
+
# Check if it's a circle/scatter glyph that we can convert
|
|
408
|
+
if hasattr(glyph, 'size') and marker_type == "circle":
|
|
409
|
+
# Create a new Circle glyph with radius instead of size
|
|
410
|
+
new_glyph = Circle(
|
|
411
|
+
x=glyph.x,
|
|
412
|
+
y=glyph.y,
|
|
413
|
+
radius=base_radius,
|
|
414
|
+
fill_color=glyph.fill_color,
|
|
415
|
+
line_color=glyph.line_color,
|
|
416
|
+
fill_alpha=glyph.fill_alpha,
|
|
417
|
+
line_alpha=glyph.line_alpha,
|
|
418
|
+
)
|
|
419
|
+
renderer.glyph = new_glyph
|
|
420
|
+
except Exception:
|
|
421
|
+
# Silently fail and use regular sizing if hook doesn't work
|
|
422
|
+
pass
|
|
423
|
+
|
|
424
|
+
if use_dynamic_sizing:
|
|
425
|
+
# Dynamic sizing: use coordinate-based sizing that scales with zoom
|
|
426
|
+
marker_type = "circle"
|
|
427
|
+
# Calculate radius based on data range for coordinate-based sizing
|
|
428
|
+
rtrange = maxrt - minrt
|
|
429
|
+
mzrange = maxmz - minmz
|
|
430
|
+
# Use a fraction of the smaller dimension for radius
|
|
431
|
+
base_radius = min(rtrange, mzrange) * 0.0005 * markersize
|
|
432
|
+
size_1 = markersize # Use regular size initially, hook will convert to radius
|
|
433
|
+
size_2 = markersize
|
|
434
|
+
hooks = [dynamic_sizing_hook]
|
|
435
|
+
elif use_slider_sizing:
|
|
436
|
+
# Slider sizing: create an interactive slider for marker size
|
|
437
|
+
marker_type = marker # Use the original marker parameter
|
|
438
|
+
size_1 = markersize # Use markersize initially, will be updated by slider
|
|
439
|
+
size_2 = markersize
|
|
440
|
+
base_radius = None # Not used in slider mode
|
|
441
|
+
hooks = []
|
|
442
|
+
else:
|
|
443
|
+
# Static sizing: use pixel-based sizing that stays fixed
|
|
444
|
+
marker_type = marker # Use the original marker parameter
|
|
445
|
+
size_1 = markersize
|
|
446
|
+
size_2 = markersize
|
|
447
|
+
base_radius = None # Not used in static mode
|
|
448
|
+
hooks = []
|
|
449
|
+
|
|
321
450
|
color_1 = "forestgreen"
|
|
322
|
-
size_2 = 1 * markersize
|
|
323
451
|
color_2 = "darkorange"
|
|
324
452
|
if filename is not None:
|
|
325
453
|
dyn = False
|
|
326
454
|
if not filename.endswith(".html"):
|
|
327
|
-
|
|
455
|
+
if use_dynamic_sizing:
|
|
456
|
+
# For exported files, use smaller coordinate-based size
|
|
457
|
+
size_1 = 2
|
|
458
|
+
size_2 = 2
|
|
459
|
+
else:
|
|
460
|
+
size_1 = 2
|
|
461
|
+
size_2 = 2
|
|
328
462
|
color_1 = "forestgreen"
|
|
329
|
-
size_2 = 2
|
|
330
463
|
color_2 = "darkorange"
|
|
331
464
|
raster_dynamic = False
|
|
332
465
|
|
|
466
|
+
# For slider functionality, disable raster dynamic to avoid DynamicMap nesting
|
|
467
|
+
if use_slider_sizing:
|
|
468
|
+
raster_dynamic = False
|
|
469
|
+
|
|
333
470
|
dyn = raster_dynamic
|
|
334
471
|
raster = hd.rasterize(
|
|
335
472
|
points,
|
|
@@ -341,8 +478,8 @@ def plot_2d(
|
|
|
341
478
|
cmap=process_cmap(cmap, provider="bokeh"), # blues
|
|
342
479
|
tools=["hover"],
|
|
343
480
|
hooks=[new_bounds_hook],
|
|
344
|
-
width=
|
|
345
|
-
height=
|
|
481
|
+
width=width,
|
|
482
|
+
height=height,
|
|
346
483
|
cnorm="log",
|
|
347
484
|
xlabel="Retention time (s)",
|
|
348
485
|
ylabel="m/z",
|
|
@@ -381,6 +518,7 @@ def plot_2d(
|
|
|
381
518
|
feats = feats[feats["iso"] == 0]
|
|
382
519
|
# find features with ms2_scans not None and iso==0
|
|
383
520
|
features_df = feats[feats["ms2_scans"].notnull()]
|
|
521
|
+
# Create feature points with proper sizing method
|
|
384
522
|
feature_points_1 = hv.Points(
|
|
385
523
|
features_df,
|
|
386
524
|
kdims=["rt", "mz"],
|
|
@@ -396,9 +534,10 @@ def plot_2d(
|
|
|
396
534
|
label="Features with MS2 data",
|
|
397
535
|
).options(
|
|
398
536
|
color=color_1,
|
|
399
|
-
marker=
|
|
537
|
+
marker=marker_type,
|
|
400
538
|
size=size_1,
|
|
401
539
|
tools=["hover"],
|
|
540
|
+
hooks=hooks,
|
|
402
541
|
)
|
|
403
542
|
# find features without MS2 data
|
|
404
543
|
features_df = feats[feats["ms2_scans"].isnull()]
|
|
@@ -416,9 +555,10 @@ def plot_2d(
|
|
|
416
555
|
label="Features without MS2 data",
|
|
417
556
|
).options(
|
|
418
557
|
color="red",
|
|
558
|
+
marker=marker_type,
|
|
419
559
|
size=size_2,
|
|
420
|
-
marker=marker,
|
|
421
560
|
tools=["hover"],
|
|
561
|
+
hooks=hooks,
|
|
422
562
|
)
|
|
423
563
|
|
|
424
564
|
if show_isotopes:
|
|
@@ -443,9 +583,10 @@ def plot_2d(
|
|
|
443
583
|
label="Isotopes",
|
|
444
584
|
).options(
|
|
445
585
|
color="violet",
|
|
446
|
-
marker=
|
|
586
|
+
marker=marker_type,
|
|
447
587
|
size=size_1,
|
|
448
588
|
tools=["hover"],
|
|
589
|
+
hooks=hooks,
|
|
449
590
|
)
|
|
450
591
|
if show_ms2:
|
|
451
592
|
# find all self.scans_df with mslevel 2 that are not linked to a feature
|
|
@@ -502,8 +643,119 @@ def plot_2d(
|
|
|
502
643
|
if title is not None:
|
|
503
644
|
overlay = overlay.opts(title=title)
|
|
504
645
|
|
|
505
|
-
#
|
|
506
|
-
|
|
646
|
+
# Handle slider functionality
|
|
647
|
+
if use_slider_sizing:
|
|
648
|
+
# For slider functionality, we need to work with the feature points directly
|
|
649
|
+
# and not nest DynamicMaps. We'll create the slider using param and panel.
|
|
650
|
+
import param
|
|
651
|
+
import panel as pn
|
|
652
|
+
|
|
653
|
+
class MarkerSizeController(param.Parameterized):
|
|
654
|
+
size_slider = param.Number(default=markersize, bounds=(1, 20), step=0.5)
|
|
655
|
+
|
|
656
|
+
controller = MarkerSizeController()
|
|
657
|
+
|
|
658
|
+
# Create a function that generates just the feature overlays with different sizes
|
|
659
|
+
def create_feature_overlay(size_val):
|
|
660
|
+
feature_overlay = None
|
|
661
|
+
|
|
662
|
+
if feature_points_4 is not None:
|
|
663
|
+
updated_points_4 = feature_points_4.opts(size=size_val)
|
|
664
|
+
feature_overlay = updated_points_4 if feature_overlay is None else feature_overlay * updated_points_4
|
|
665
|
+
if feature_points_3 is not None:
|
|
666
|
+
updated_points_3 = feature_points_3.opts(size=size_val)
|
|
667
|
+
feature_overlay = updated_points_3 if feature_overlay is None else feature_overlay * updated_points_3
|
|
668
|
+
if feature_points_1 is not None:
|
|
669
|
+
updated_points_1 = feature_points_1.opts(size=size_val)
|
|
670
|
+
feature_overlay = updated_points_1 if feature_overlay is None else feature_overlay * updated_points_1
|
|
671
|
+
if not show_only_features_with_ms2 and feature_points_2 is not None:
|
|
672
|
+
updated_points_2 = feature_points_2.opts(size=size_val)
|
|
673
|
+
feature_overlay = updated_points_2 if feature_overlay is None else feature_overlay * updated_points_2
|
|
674
|
+
if feature_points_iso is not None:
|
|
675
|
+
updated_points_iso = feature_points_iso.opts(size=size_val)
|
|
676
|
+
feature_overlay = updated_points_iso if feature_overlay is None else feature_overlay * updated_points_iso
|
|
677
|
+
|
|
678
|
+
# Combine with the static raster background
|
|
679
|
+
if feature_overlay is not None:
|
|
680
|
+
combined_overlay = raster * feature_overlay
|
|
681
|
+
else:
|
|
682
|
+
combined_overlay = raster
|
|
683
|
+
|
|
684
|
+
if title is not None:
|
|
685
|
+
combined_overlay = combined_overlay.opts(title=title)
|
|
686
|
+
|
|
687
|
+
return combined_overlay
|
|
688
|
+
|
|
689
|
+
# Create a horizontal control widget on top of the plot
|
|
690
|
+
# Create the slider widget with explicit visibility
|
|
691
|
+
size_slider = pn.widgets.FloatSlider(
|
|
692
|
+
name="Marker Size",
|
|
693
|
+
start=1.0,
|
|
694
|
+
end=20.0,
|
|
695
|
+
step=0.5,
|
|
696
|
+
value=markersize,
|
|
697
|
+
width=300,
|
|
698
|
+
height=40,
|
|
699
|
+
margin=(5, 5),
|
|
700
|
+
show_value=True
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
# Create the slider widget row with clear styling
|
|
704
|
+
slider_widget = pn.Row(
|
|
705
|
+
pn.pane.HTML("<b>Marker Size Control:</b>", width=150, height=40, margin=(5, 10)),
|
|
706
|
+
size_slider,
|
|
707
|
+
height=60,
|
|
708
|
+
margin=10
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
# Create slider widget
|
|
712
|
+
size_slider = pn.widgets.FloatSlider(
|
|
713
|
+
name="Marker Size",
|
|
714
|
+
start=1.0,
|
|
715
|
+
end=20.0,
|
|
716
|
+
step=0.5,
|
|
717
|
+
value=markersize,
|
|
718
|
+
width=300,
|
|
719
|
+
height=40,
|
|
720
|
+
margin=(5, 5),
|
|
721
|
+
show_value=True
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
slider_widget = pn.Row(
|
|
725
|
+
pn.pane.HTML("<b>Marker Size:</b>", width=100, height=40, margin=(5, 10)),
|
|
726
|
+
size_slider,
|
|
727
|
+
height=60,
|
|
728
|
+
margin=10
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
# Simple reactive plot - slider mode doesn't use dynamic rasterization
|
|
732
|
+
@pn.depends(size_slider.param.value)
|
|
733
|
+
def reactive_plot(size_val):
|
|
734
|
+
overlay = create_feature_overlay(float(size_val))
|
|
735
|
+
# Apply static rasterization for slider mode
|
|
736
|
+
if raster_dynamic:
|
|
737
|
+
return hd.rasterize(
|
|
738
|
+
overlay,
|
|
739
|
+
aggregator=ds.count(),
|
|
740
|
+
width=raster_max_px,
|
|
741
|
+
height=raster_max_px,
|
|
742
|
+
dynamic=False # Static raster for slider mode
|
|
743
|
+
).opts(
|
|
744
|
+
cnorm='eq_hist',
|
|
745
|
+
tools=['hover'],
|
|
746
|
+
width=width,
|
|
747
|
+
height=height
|
|
748
|
+
)
|
|
749
|
+
else:
|
|
750
|
+
return overlay
|
|
751
|
+
|
|
752
|
+
# Create layout
|
|
753
|
+
layout = pn.Column(slider_widget, reactive_plot, sizing_mode='stretch_width')
|
|
754
|
+
|
|
755
|
+
return layout
|
|
756
|
+
else:
|
|
757
|
+
# Create a panel layout without slider
|
|
758
|
+
layout = panel.Column(overlay)
|
|
507
759
|
|
|
508
760
|
if filename is not None:
|
|
509
761
|
# if filename includes .html, save the panel layout to an HTML file
|
|
@@ -511,10 +763,17 @@ def plot_2d(
|
|
|
511
763
|
layout.save(filename, embed=True)
|
|
512
764
|
else:
|
|
513
765
|
# save the panel layout as a png
|
|
514
|
-
|
|
766
|
+
if use_slider_sizing:
|
|
767
|
+
# For slider plots, save the current state of the param_plot
|
|
768
|
+
hv.save(create_feature_overlay(markersize), filename, fmt="png")
|
|
769
|
+
else:
|
|
770
|
+
hv.save(overlay, filename, fmt="png")
|
|
515
771
|
else:
|
|
516
|
-
#
|
|
517
|
-
|
|
772
|
+
# Check if we're in a notebook environment and display appropriately
|
|
773
|
+
if use_slider_sizing:
|
|
774
|
+
return _display_plot(layout, layout)
|
|
775
|
+
else:
|
|
776
|
+
return _display_plot(overlay, layout)
|
|
518
777
|
|
|
519
778
|
|
|
520
779
|
def plot_2d_oracle(
|
|
@@ -922,8 +1181,8 @@ def plot_2d_oracle(
|
|
|
922
1181
|
# save the panel layout as a png
|
|
923
1182
|
hv.save(overlay, filename, fmt="png")
|
|
924
1183
|
else:
|
|
925
|
-
#
|
|
926
|
-
layout
|
|
1184
|
+
# Check if we're in a notebook environment and display appropriately
|
|
1185
|
+
return _display_plot(overlay, layout)
|
|
927
1186
|
|
|
928
1187
|
|
|
929
1188
|
def plot_ms2_eic(
|
|
@@ -1070,7 +1329,9 @@ def plot_ms2_eic(
|
|
|
1070
1329
|
else:
|
|
1071
1330
|
hv.save(layout, filename, fmt="png")
|
|
1072
1331
|
else:
|
|
1073
|
-
|
|
1332
|
+
# Check if we're in a notebook environment and display appropriately
|
|
1333
|
+
layout_obj = panel.panel(layout)
|
|
1334
|
+
return _display_plot(layout, layout_obj)
|
|
1074
1335
|
|
|
1075
1336
|
|
|
1076
1337
|
def plot_ms2_cycle(
|
|
@@ -1290,8 +1551,8 @@ def plot_ms2_cycle(
|
|
|
1290
1551
|
# save the panel layout as a png
|
|
1291
1552
|
hv.save(overlay, filename, fmt="png")
|
|
1292
1553
|
else:
|
|
1293
|
-
#
|
|
1294
|
-
layout
|
|
1554
|
+
# Check if we're in a notebook environment and display appropriately
|
|
1555
|
+
return _display_plot(overlay, layout)
|
|
1295
1556
|
|
|
1296
1557
|
|
|
1297
1558
|
def plot_ms2_q1(
|
|
@@ -1393,7 +1654,9 @@ def plot_ms2_q1(
|
|
|
1393
1654
|
else:
|
|
1394
1655
|
hv.save(layout, filename, fmt="png")
|
|
1395
1656
|
else:
|
|
1396
|
-
|
|
1657
|
+
# Check if we're in a notebook environment and display appropriately
|
|
1658
|
+
layout_obj = panel.panel(layout)
|
|
1659
|
+
return _display_plot(layout, layout_obj)
|
|
1397
1660
|
|
|
1398
1661
|
|
|
1399
1662
|
def plot_dda_stats(
|
|
@@ -1468,7 +1731,9 @@ def plot_dda_stats(
|
|
|
1468
1731
|
else:
|
|
1469
1732
|
hv.save(layout, filename, fmt="png")
|
|
1470
1733
|
else:
|
|
1471
|
-
|
|
1734
|
+
# Check if we're in a notebook environment and display appropriately
|
|
1735
|
+
layout_obj = panel.panel(layout)
|
|
1736
|
+
return _display_plot(layout, layout_obj)
|
|
1472
1737
|
|
|
1473
1738
|
|
|
1474
1739
|
def plot_feature_stats(
|
|
@@ -1584,7 +1849,9 @@ def plot_feature_stats(
|
|
|
1584
1849
|
else:
|
|
1585
1850
|
hv.save(layout, filename, fmt="png")
|
|
1586
1851
|
else:
|
|
1587
|
-
|
|
1852
|
+
# Check if we're in a notebook environment and display appropriately
|
|
1853
|
+
layout_obj = panel.panel(layout)
|
|
1854
|
+
return _display_plot(layout, layout_obj)
|
|
1588
1855
|
|
|
1589
1856
|
|
|
1590
1857
|
def plot_tic(
|
masster/sample/sample.py
CHANGED
|
@@ -71,7 +71,7 @@ from masster.logger import MassterLogger
|
|
|
71
71
|
from masster.sample.plot import plot_2d
|
|
72
72
|
from masster.sample.plot import plot_2d_oracle
|
|
73
73
|
from masster.sample.plot import plot_dda_stats
|
|
74
|
-
from masster.sample.plot import
|
|
74
|
+
from masster.sample.plot import plot_chrom
|
|
75
75
|
from masster.sample.plot import plot_feature_stats
|
|
76
76
|
from masster.sample.plot import plot_ms2_cycle
|
|
77
77
|
from masster.sample.plot import plot_ms2_eic
|
|
@@ -221,7 +221,7 @@ class Sample:
|
|
|
221
221
|
plot_2d = plot_2d
|
|
222
222
|
plot_2d_oracle = plot_2d_oracle
|
|
223
223
|
plot_dda_stats = plot_dda_stats
|
|
224
|
-
|
|
224
|
+
plot_chrom = plot_chrom
|
|
225
225
|
plot_feature_stats = plot_feature_stats
|
|
226
226
|
plot_ms2_cycle = plot_ms2_cycle
|
|
227
227
|
plot_ms2_eic = plot_ms2_eic
|
|
@@ -287,7 +287,7 @@ class Sample:
|
|
|
287
287
|
"""
|
|
288
288
|
# Reset logger configuration flags to allow proper reconfiguration after reload
|
|
289
289
|
try:
|
|
290
|
-
import masster.
|
|
290
|
+
import masster.logger as logger_module
|
|
291
291
|
|
|
292
292
|
if hasattr(logger_module, "_SAMPLE_LOGGER_CONFIGURED"):
|
|
293
293
|
logger_module._SAMPLE_LOGGER_CONFIGURED = False
|
|
@@ -312,6 +312,7 @@ class Sample:
|
|
|
312
312
|
f"{base_modname}._version",
|
|
313
313
|
f"{base_modname}.chromatogram",
|
|
314
314
|
f"{base_modname}.spectrum",
|
|
315
|
+
f"{base_modname}.logger",
|
|
315
316
|
]
|
|
316
317
|
|
|
317
318
|
# Add study submodules
|
|
@@ -321,14 +322,15 @@ class Sample:
|
|
|
321
322
|
if module_name.startswith(study_module_prefix) and module_name != current_module:
|
|
322
323
|
study_modules.append(module_name)
|
|
323
324
|
|
|
324
|
-
# Add parameters submodules
|
|
325
|
+
''' # Add parameters submodules
|
|
325
326
|
parameters_modules = []
|
|
326
327
|
parameters_module_prefix = f"{base_modname}.parameters."
|
|
327
328
|
for module_name in sys.modules:
|
|
328
329
|
if module_name.startswith(parameters_module_prefix) and module_name != current_module:
|
|
329
330
|
parameters_modules.append(module_name)
|
|
330
|
-
|
|
331
|
-
|
|
331
|
+
'''
|
|
332
|
+
|
|
333
|
+
all_modules_to_reload = core_modules + sample_modules + study_modules #+ parameters_modules
|
|
332
334
|
|
|
333
335
|
# Reload all discovered modules
|
|
334
336
|
for full_module_name in all_modules_to_reload:
|
masster/study/h5.py
CHANGED
|
@@ -1289,6 +1289,8 @@ def _load_study5(self, filename=None):
|
|
|
1289
1289
|
"size": [],
|
|
1290
1290
|
"map_id": [],
|
|
1291
1291
|
"file_source": [],
|
|
1292
|
+
"ms1": [],
|
|
1293
|
+
"ms2": [],
|
|
1292
1294
|
},
|
|
1293
1295
|
schema={
|
|
1294
1296
|
"sample_uid": pl.Int64,
|
|
@@ -1298,6 +1300,8 @@ def _load_study5(self, filename=None):
|
|
|
1298
1300
|
"size": pl.Int64,
|
|
1299
1301
|
"map_id": pl.Utf8,
|
|
1300
1302
|
"file_source": pl.Utf8,
|
|
1303
|
+
"ms1": pl.Int64,
|
|
1304
|
+
"ms2": pl.Int64,
|
|
1301
1305
|
},
|
|
1302
1306
|
)
|
|
1303
1307
|
pbar.update(1)
|
|
@@ -1317,6 +1321,8 @@ def _load_study5(self, filename=None):
|
|
|
1317
1321
|
"size": [],
|
|
1318
1322
|
"map_id": [],
|
|
1319
1323
|
"file_source": [],
|
|
1324
|
+
"ms1": [],
|
|
1325
|
+
"ms2": [],
|
|
1320
1326
|
},
|
|
1321
1327
|
schema={
|
|
1322
1328
|
"sample_uid": pl.Int64,
|
|
@@ -1326,6 +1332,8 @@ def _load_study5(self, filename=None):
|
|
|
1326
1332
|
"size": pl.Int64,
|
|
1327
1333
|
"map_id": pl.Utf8,
|
|
1328
1334
|
"file_source": pl.Utf8,
|
|
1335
|
+
"ms1": pl.Int64,
|
|
1336
|
+
"ms2": pl.Int64,
|
|
1329
1337
|
},
|
|
1330
1338
|
)
|
|
1331
1339
|
pbar.update(1)
|
masster/study/helpers.py
CHANGED
|
@@ -1197,24 +1197,24 @@ def features_select(
|
|
|
1197
1197
|
if final_count == 0:
|
|
1198
1198
|
self.logger.warning("No features remaining after applying selection criteria.")
|
|
1199
1199
|
else:
|
|
1200
|
-
removed_count = initial_count - final_count
|
|
1201
|
-
self.logger.info(f"Features selected: {final_count} (
|
|
1202
|
-
|
|
1200
|
+
#removed_count = initial_count - final_count
|
|
1201
|
+
self.logger.info(f"Features selected: {final_count} (out of {initial_count})")
|
|
1202
|
+
|
|
1203
1203
|
return feats
|
|
1204
1204
|
|
|
1205
1205
|
|
|
1206
1206
|
def features_filter(self, features):
|
|
1207
1207
|
"""
|
|
1208
|
-
Filter features_df by
|
|
1209
|
-
This
|
|
1208
|
+
Filter features_df by keeping only features that match the given criteria.
|
|
1209
|
+
This keeps only the specified features and removes all others.
|
|
1210
1210
|
|
|
1211
1211
|
OPTIMIZED VERSION: Batch operations and reduced overhead for better performance.
|
|
1212
1212
|
|
|
1213
1213
|
Parameters:
|
|
1214
|
-
features: Features to
|
|
1214
|
+
features: Features to keep. Can be:
|
|
1215
1215
|
- polars.DataFrame: Features DataFrame (will use feature_uid column)
|
|
1216
|
-
- list: List of feature_uids to
|
|
1217
|
-
- int: Single feature_uid to
|
|
1216
|
+
- list: List of feature_uids to keep
|
|
1217
|
+
- int: Single feature_uid to keep
|
|
1218
1218
|
|
|
1219
1219
|
Returns:
|
|
1220
1220
|
None (modifies self.features_df in place)
|
|
@@ -1230,34 +1230,34 @@ def features_filter(self, features):
|
|
|
1230
1230
|
|
|
1231
1231
|
initial_count = len(self.features_df)
|
|
1232
1232
|
|
|
1233
|
-
# Determine feature_uids to
|
|
1233
|
+
# Determine feature_uids to keep - optimized type checking
|
|
1234
1234
|
if isinstance(features, pl.DataFrame):
|
|
1235
1235
|
if "feature_uid" not in features.columns:
|
|
1236
1236
|
self.logger.error("features DataFrame must contain 'feature_uid' column")
|
|
1237
1237
|
return
|
|
1238
|
-
|
|
1238
|
+
feature_uids_to_keep = features["feature_uid"].to_list()
|
|
1239
1239
|
elif isinstance(features, (list, tuple)):
|
|
1240
|
-
|
|
1240
|
+
feature_uids_to_keep = list(features) # Convert tuple to list if needed
|
|
1241
1241
|
elif isinstance(features, int):
|
|
1242
|
-
|
|
1242
|
+
feature_uids_to_keep = [features]
|
|
1243
1243
|
else:
|
|
1244
1244
|
self.logger.error("features parameter must be a DataFrame, list, tuple, or int")
|
|
1245
1245
|
return
|
|
1246
1246
|
|
|
1247
|
-
# Early return if no UIDs to
|
|
1248
|
-
if not
|
|
1247
|
+
# Early return if no UIDs to keep
|
|
1248
|
+
if not feature_uids_to_keep:
|
|
1249
1249
|
self.logger.warning("No feature UIDs provided for filtering.")
|
|
1250
1250
|
return
|
|
1251
1251
|
|
|
1252
1252
|
# Convert to set for faster lookup if list is large
|
|
1253
|
-
if len(
|
|
1254
|
-
feature_uids_set = set(
|
|
1253
|
+
if len(feature_uids_to_keep) > 100:
|
|
1254
|
+
feature_uids_set = set(feature_uids_to_keep)
|
|
1255
1255
|
# Use the set for filtering if it's significantly smaller
|
|
1256
|
-
if len(feature_uids_set) < len(
|
|
1257
|
-
|
|
1256
|
+
if len(feature_uids_set) < len(feature_uids_to_keep) * 0.8:
|
|
1257
|
+
feature_uids_to_keep = list(feature_uids_set)
|
|
1258
1258
|
|
|
1259
|
-
# Create filter condition once
|
|
1260
|
-
filter_condition =
|
|
1259
|
+
# Create filter condition once - keep only the specified features
|
|
1260
|
+
filter_condition = pl.col("feature_uid").is_in(feature_uids_to_keep)
|
|
1261
1261
|
|
|
1262
1262
|
# Apply filter to features_df using lazy evaluation for better performance
|
|
1263
1263
|
self.features_df = self.features_df.lazy().filter(filter_condition).collect()
|
|
@@ -1280,15 +1280,15 @@ def features_filter(self, features):
|
|
|
1280
1280
|
|
|
1281
1281
|
# Single comprehensive log message
|
|
1282
1282
|
if mapping_removed_count > 0:
|
|
1283
|
-
self.logger.info(f"
|
|
1283
|
+
self.logger.info(f"Kept {final_count} features and removed {mapping_removed_count} consensus mappings. Filtered out {removed_count} features.")
|
|
1284
1284
|
else:
|
|
1285
|
-
self.logger.info(f"
|
|
1285
|
+
self.logger.info(f"Kept {final_count} features. Filtered out {removed_count} features.")
|
|
1286
1286
|
|
|
1287
1287
|
|
|
1288
1288
|
def features_delete(self, features):
|
|
1289
1289
|
"""
|
|
1290
1290
|
Delete features from features_df based on feature identifiers.
|
|
1291
|
-
This
|
|
1291
|
+
This removes the specified features and keeps all others (opposite of features_filter).
|
|
1292
1292
|
|
|
1293
1293
|
Parameters:
|
|
1294
1294
|
features: Features to delete. Can be:
|
|
@@ -1299,7 +1299,70 @@ def features_delete(self, features):
|
|
|
1299
1299
|
Returns:
|
|
1300
1300
|
None (modifies self.features_df in place)
|
|
1301
1301
|
"""
|
|
1302
|
-
self.
|
|
1302
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
1303
|
+
self.logger.warning("No features found in study.")
|
|
1304
|
+
return
|
|
1305
|
+
|
|
1306
|
+
# Early return if no features provided
|
|
1307
|
+
if features is None:
|
|
1308
|
+
self.logger.warning("No features provided for deletion.")
|
|
1309
|
+
return
|
|
1310
|
+
|
|
1311
|
+
initial_count = len(self.features_df)
|
|
1312
|
+
|
|
1313
|
+
# Determine feature_uids to remove - optimized type checking
|
|
1314
|
+
if isinstance(features, pl.DataFrame):
|
|
1315
|
+
if "feature_uid" not in features.columns:
|
|
1316
|
+
self.logger.error("features DataFrame must contain 'feature_uid' column")
|
|
1317
|
+
return
|
|
1318
|
+
feature_uids_to_remove = features["feature_uid"].to_list()
|
|
1319
|
+
elif isinstance(features, (list, tuple)):
|
|
1320
|
+
feature_uids_to_remove = list(features) # Convert tuple to list if needed
|
|
1321
|
+
elif isinstance(features, int):
|
|
1322
|
+
feature_uids_to_remove = [features]
|
|
1323
|
+
else:
|
|
1324
|
+
self.logger.error("features parameter must be a DataFrame, list, tuple, or int")
|
|
1325
|
+
return
|
|
1326
|
+
|
|
1327
|
+
# Early return if no UIDs to remove
|
|
1328
|
+
if not feature_uids_to_remove:
|
|
1329
|
+
self.logger.warning("No feature UIDs provided for deletion.")
|
|
1330
|
+
return
|
|
1331
|
+
|
|
1332
|
+
# Convert to set for faster lookup if list is large
|
|
1333
|
+
if len(feature_uids_to_remove) > 100:
|
|
1334
|
+
feature_uids_set = set(feature_uids_to_remove)
|
|
1335
|
+
# Use the set for filtering if it's significantly smaller
|
|
1336
|
+
if len(feature_uids_set) < len(feature_uids_to_remove) * 0.8:
|
|
1337
|
+
feature_uids_to_remove = list(feature_uids_set)
|
|
1338
|
+
|
|
1339
|
+
# Create filter condition - remove specified features
|
|
1340
|
+
filter_condition = ~pl.col("feature_uid").is_in(feature_uids_to_remove)
|
|
1341
|
+
|
|
1342
|
+
# Apply filter to features_df using lazy evaluation for better performance
|
|
1343
|
+
self.features_df = self.features_df.lazy().filter(filter_condition).collect()
|
|
1344
|
+
|
|
1345
|
+
# Apply filter to consensus_mapping_df if it exists - batch operation
|
|
1346
|
+
mapping_removed_count = 0
|
|
1347
|
+
if self.consensus_mapping_df is not None and not self.consensus_mapping_df.is_empty():
|
|
1348
|
+
initial_mapping_count = len(self.consensus_mapping_df)
|
|
1349
|
+
self.consensus_mapping_df = (
|
|
1350
|
+
self.consensus_mapping_df
|
|
1351
|
+
.lazy()
|
|
1352
|
+
.filter(filter_condition)
|
|
1353
|
+
.collect()
|
|
1354
|
+
)
|
|
1355
|
+
mapping_removed_count = initial_mapping_count - len(self.consensus_mapping_df)
|
|
1356
|
+
|
|
1357
|
+
# Calculate results once and log efficiently
|
|
1358
|
+
final_count = len(self.features_df)
|
|
1359
|
+
removed_count = initial_count - final_count
|
|
1360
|
+
|
|
1361
|
+
# Single comprehensive log message
|
|
1362
|
+
if mapping_removed_count > 0:
|
|
1363
|
+
self.logger.info(f"Deleted {removed_count} features and {mapping_removed_count} consensus mappings. Remaining features: {final_count}")
|
|
1364
|
+
else:
|
|
1365
|
+
self.logger.info(f"Deleted {removed_count} features. Remaining features: {final_count}")
|
|
1303
1366
|
|
|
1304
1367
|
|
|
1305
1368
|
def consensus_select(
|
masster/study/load.py
CHANGED
|
@@ -189,15 +189,50 @@ def add_sample(self, file, type=None, reset=False, adducts=None):
|
|
|
189
189
|
sample_type = "blank"
|
|
190
190
|
map_id_value = str(ddaobj.features.getUniqueId())
|
|
191
191
|
|
|
192
|
+
# Determine the final sample path based on file type
|
|
193
|
+
if file.endswith(".sample5"):
|
|
194
|
+
# If input is already .sample5, keep it in original location
|
|
195
|
+
final_sample_path = file
|
|
196
|
+
self.logger.debug(f"Using existing .sample5 file at original location: {final_sample_path}")
|
|
197
|
+
|
|
198
|
+
# Check if there's a corresponding featureXML file in the same directory
|
|
199
|
+
featurexml_path = file.replace(".sample5", ".featureXML")
|
|
200
|
+
if os.path.exists(featurexml_path):
|
|
201
|
+
self.logger.debug(f"Found corresponding featureXML file: {featurexml_path}")
|
|
202
|
+
else:
|
|
203
|
+
self.logger.debug(f"No corresponding featureXML file found at: {featurexml_path}")
|
|
204
|
+
else:
|
|
205
|
+
# For .wiff, .mzML, .raw files, save to study folder (original behavior)
|
|
206
|
+
if self.folder is not None:
|
|
207
|
+
if not os.path.exists(self.folder):
|
|
208
|
+
os.makedirs(self.folder)
|
|
209
|
+
final_sample_path = os.path.join(self.folder, sample_name + ".sample5")
|
|
210
|
+
ddaobj.save(final_sample_path)
|
|
211
|
+
self.logger.debug(f"Saved converted sample to study folder: {final_sample_path}")
|
|
212
|
+
else:
|
|
213
|
+
# If no study folder is set, save in current directory
|
|
214
|
+
final_sample_path = os.path.join(os.getcwd(), sample_name + ".sample5")
|
|
215
|
+
ddaobj.save(final_sample_path)
|
|
216
|
+
self.logger.debug(f"Saved converted sample to current directory: {final_sample_path}")
|
|
217
|
+
|
|
218
|
+
# Count MS1 and MS2 scans from the loaded sample
|
|
219
|
+
ms1_count = 0
|
|
220
|
+
ms2_count = 0
|
|
221
|
+
if hasattr(ddaobj, 'scans_df') and ddaobj.scans_df is not None and not ddaobj.scans_df.is_empty():
|
|
222
|
+
ms1_count = int(ddaobj.scans_df.filter(pl.col("ms_level") == 1).height)
|
|
223
|
+
ms2_count = int(ddaobj.scans_df.filter(pl.col("ms_level") == 2).height)
|
|
224
|
+
|
|
192
225
|
new_sample = pl.DataFrame(
|
|
193
226
|
{
|
|
194
227
|
"sample_uid": [int(len(self.samples_df) + 1)],
|
|
195
228
|
"sample_name": [sample_name],
|
|
196
|
-
"sample_path": [
|
|
229
|
+
"sample_path": [final_sample_path], # Use the determined path
|
|
197
230
|
"sample_type": [sample_type],
|
|
198
231
|
"size": [int(ddaobj.features.size())],
|
|
199
232
|
"map_id": [map_id_value],
|
|
200
233
|
"file_source": [getattr(ddaobj, 'file_source', file)],
|
|
234
|
+
"ms1": [ms1_count],
|
|
235
|
+
"ms2": [ms2_count],
|
|
201
236
|
},
|
|
202
237
|
schema={
|
|
203
238
|
"sample_uid": pl.Int64,
|
|
@@ -207,15 +242,10 @@ def add_sample(self, file, type=None, reset=False, adducts=None):
|
|
|
207
242
|
"size": pl.Int64,
|
|
208
243
|
"map_id": pl.Utf8,
|
|
209
244
|
"file_source": pl.Utf8,
|
|
245
|
+
"ms1": pl.Int64,
|
|
246
|
+
"ms2": pl.Int64,
|
|
210
247
|
},
|
|
211
248
|
)
|
|
212
|
-
# save ddaobj to folder if it is set
|
|
213
|
-
if self.folder is not None:
|
|
214
|
-
if not os.path.exists(self.folder):
|
|
215
|
-
os.makedirs(self.folder)
|
|
216
|
-
basename = os.path.basename(file)
|
|
217
|
-
sample_name = os.path.splitext(basename)[0]
|
|
218
|
-
ddaobj.save(os.path.join(self.folder, sample_name + ".sample5"))
|
|
219
249
|
self.samples_df = pl.concat([self.samples_df, new_sample])
|
|
220
250
|
|
|
221
251
|
# Optimized DataFrame operations - chain operations instead of multiple clones
|
masster/study/plot.py
CHANGED
|
@@ -157,9 +157,28 @@ def plot_consensus_2d(
|
|
|
157
157
|
colorby="number_samples",
|
|
158
158
|
sizeby="inty_mean",
|
|
159
159
|
markersize=6,
|
|
160
|
+
size="dynamic",
|
|
160
161
|
alpha=0.7,
|
|
161
162
|
cmap=None,
|
|
163
|
+
width=900,
|
|
164
|
+
height=900
|
|
162
165
|
):
|
|
166
|
+
"""
|
|
167
|
+
Plot consensus features in a 2D scatter plot with retention time vs m/z.
|
|
168
|
+
|
|
169
|
+
Parameters:
|
|
170
|
+
filename (str, optional): Path to save the plot
|
|
171
|
+
colorby (str): Column name to use for color mapping (default: "number_samples")
|
|
172
|
+
sizeby (str): Column name to use for size mapping (default: "inty_mean")
|
|
173
|
+
markersize (int): Base marker size (default: 6)
|
|
174
|
+
size (str): Controls whether points scale with zoom. Options:
|
|
175
|
+
'dynamic' - points use circle() and scale with zoom
|
|
176
|
+
'static' - points use scatter() and maintain fixed pixel size
|
|
177
|
+
alpha (float): Transparency level (default: 0.7)
|
|
178
|
+
cmap (str, optional): Color map name
|
|
179
|
+
width (int): Plot width in pixels (default: 900)
|
|
180
|
+
height (int): Plot height in pixels (default: 900)
|
|
181
|
+
"""
|
|
163
182
|
if self.consensus_df is None:
|
|
164
183
|
self.logger.error("No consensus map found.")
|
|
165
184
|
return
|
|
@@ -238,21 +257,33 @@ def plot_consensus_2d(
|
|
|
238
257
|
)
|
|
239
258
|
# scatter plot rt vs mz
|
|
240
259
|
p = bp.figure(
|
|
241
|
-
width=
|
|
242
|
-
height=
|
|
260
|
+
width=width,
|
|
261
|
+
height=height,
|
|
243
262
|
title="Consensus map",
|
|
244
263
|
)
|
|
245
264
|
p.xaxis.axis_label = "Retention Time (min)"
|
|
246
265
|
p.yaxis.axis_label = "m/z"
|
|
247
|
-
scatter_renderer =
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
266
|
+
scatter_renderer: Any = None
|
|
267
|
+
if size.lower() in ["dyn", "dynamic"]:
|
|
268
|
+
scatter_renderer = p.circle(
|
|
269
|
+
x="rt",
|
|
270
|
+
y="mz",
|
|
271
|
+
radius=markersize / 10,
|
|
272
|
+
fill_color={"field": colorby, "transform": color_mapper},
|
|
273
|
+
line_color=None,
|
|
274
|
+
alpha=alpha,
|
|
275
|
+
source=source,
|
|
276
|
+
)
|
|
277
|
+
else:
|
|
278
|
+
scatter_renderer = p.scatter(
|
|
279
|
+
x="rt",
|
|
280
|
+
y="mz",
|
|
281
|
+
size="markersize",
|
|
282
|
+
fill_color={"field": colorby, "transform": color_mapper},
|
|
283
|
+
line_color=None,
|
|
284
|
+
alpha=alpha,
|
|
285
|
+
source=source,
|
|
286
|
+
)
|
|
256
287
|
# add hover tool
|
|
257
288
|
hover = HoverTool(
|
|
258
289
|
tooltips=[
|
|
@@ -292,16 +323,32 @@ def plot_samples_2d(
|
|
|
292
323
|
samples=None,
|
|
293
324
|
filename=None,
|
|
294
325
|
markersize=2,
|
|
295
|
-
size="
|
|
326
|
+
size="dynamic",
|
|
296
327
|
alpha_max=0.8,
|
|
297
328
|
alpha="inty",
|
|
298
329
|
cmap="Turbo256",
|
|
299
|
-
max_features=50000,
|
|
330
|
+
max_features=50000,
|
|
331
|
+
width=900,
|
|
332
|
+
height=900
|
|
300
333
|
):
|
|
301
334
|
"""
|
|
302
335
|
Plot all feature maps for sample_uid in parameter uids in an overlaid scatter plot.
|
|
303
336
|
Each sample is a different color. Alpha scales with intensity.
|
|
304
337
|
OPTIMIZED VERSION: Uses vectorized operations and batch processing.
|
|
338
|
+
|
|
339
|
+
Parameters:
|
|
340
|
+
samples: Sample UIDs to plot
|
|
341
|
+
filename (str, optional): Path to save the plot
|
|
342
|
+
markersize (int): Base marker size (default: 2)
|
|
343
|
+
size (str): Controls whether points scale with zoom. Options:
|
|
344
|
+
'dynamic' or 'dyn' - points use circle() and scale with zoom
|
|
345
|
+
'const', 'static' or other - points use scatter() and maintain fixed pixel size
|
|
346
|
+
alpha_max (float): Maximum transparency level (default: 0.8)
|
|
347
|
+
alpha (str): Column name to use for alpha mapping (default: "inty")
|
|
348
|
+
cmap (str): Color map name (default: "Turbo256")
|
|
349
|
+
max_features (int): Maximum number of features to plot (default: 50000)
|
|
350
|
+
width (int): Plot width in pixels (default: 900)
|
|
351
|
+
height (int): Plot height in pixels (default: 900)
|
|
305
352
|
"""
|
|
306
353
|
|
|
307
354
|
sample_uids = self._get_sample_uids(samples)
|
|
@@ -314,8 +361,8 @@ def plot_samples_2d(
|
|
|
314
361
|
color_map = {uid: colors[i * (256 // max(1, len(sample_uids)))] for i, uid in enumerate(sample_uids)}
|
|
315
362
|
|
|
316
363
|
p = figure(
|
|
317
|
-
width=
|
|
318
|
-
height=
|
|
364
|
+
width=width,
|
|
365
|
+
height=height,
|
|
319
366
|
title="Sample Features",
|
|
320
367
|
)
|
|
321
368
|
p.xaxis.axis_label = "Retention Time (RT)"
|
masster/study/save.py
CHANGED
|
@@ -105,6 +105,8 @@ def save_samples(self, samples=None):
|
|
|
105
105
|
# save ddaobj
|
|
106
106
|
ddaobj.save()
|
|
107
107
|
sample_name = sample_row.row(0, named=True)["sample_name"]
|
|
108
|
+
sample_path = sample_row.row(0, named=True)["sample_path"]
|
|
109
|
+
|
|
108
110
|
# Find the index of this sample in the original order for features_maps
|
|
109
111
|
sample_index = next(
|
|
110
112
|
(
|
|
@@ -114,19 +116,29 @@ def save_samples(self, samples=None):
|
|
|
114
116
|
),
|
|
115
117
|
None,
|
|
116
118
|
)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
)
|
|
119
|
+
|
|
120
|
+
# Determine where to save the featureXML file based on sample_path location
|
|
121
|
+
if sample_path.endswith(".sample5"):
|
|
122
|
+
# If sample_path is a .sample5 file, save featureXML in the same directory
|
|
123
|
+
featurexml_filename = sample_path.replace(".sample5", ".featureXML")
|
|
124
|
+
self.logger.debug(f"Saving featureXML alongside .sample5 file: {featurexml_filename}")
|
|
122
125
|
else:
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
126
|
+
# Fallback to study folder or current directory (original behavior)
|
|
127
|
+
if self.folder is not None:
|
|
128
|
+
featurexml_filename = os.path.join(
|
|
129
|
+
self.folder,
|
|
130
|
+
sample_name + ".featureXML",
|
|
131
|
+
)
|
|
132
|
+
else:
|
|
133
|
+
featurexml_filename = os.path.join(
|
|
134
|
+
os.getcwd(),
|
|
135
|
+
sample_name + ".featureXML",
|
|
136
|
+
)
|
|
137
|
+
self.logger.debug(f"Saving featureXML to default location: {featurexml_filename}")
|
|
138
|
+
|
|
127
139
|
fh = oms.FeatureXMLFile()
|
|
128
140
|
if sample_index is not None and sample_index < len(self.features_maps):
|
|
129
|
-
fh.store(
|
|
141
|
+
fh.store(featurexml_filename, self.features_maps[sample_index])
|
|
130
142
|
|
|
131
143
|
self.logger.debug("All samples saved successfully.")
|
|
132
144
|
|
masster/study/study.py
CHANGED
|
@@ -243,6 +243,8 @@ class Study:
|
|
|
243
243
|
"size": [],
|
|
244
244
|
"map_id": [],
|
|
245
245
|
"file_source": [],
|
|
246
|
+
"ms1": [],
|
|
247
|
+
"ms2": [],
|
|
246
248
|
},
|
|
247
249
|
schema={
|
|
248
250
|
"sample_uid": pl.Int64,
|
|
@@ -252,6 +254,8 @@ class Study:
|
|
|
252
254
|
"size": pl.Int64,
|
|
253
255
|
"map_id": pl.Utf8,
|
|
254
256
|
"file_source": pl.Utf8,
|
|
257
|
+
"ms1": pl.Int64,
|
|
258
|
+
"ms2": pl.Int64,
|
|
255
259
|
},
|
|
256
260
|
)
|
|
257
261
|
self.features_maps = []
|
|
@@ -387,15 +391,17 @@ class Study:
|
|
|
387
391
|
f"{base_modname}._version",
|
|
388
392
|
f"{base_modname}.chromatogram",
|
|
389
393
|
f"{base_modname}.spectrum",
|
|
390
|
-
f"{base_modname}.
|
|
394
|
+
f"{base_modname}.logger",
|
|
391
395
|
]
|
|
392
396
|
|
|
393
|
-
# Add
|
|
397
|
+
# Add sample submodules
|
|
398
|
+
sample_modules = []
|
|
399
|
+
sample_module_prefix = f"{base_modname}.sample."
|
|
394
400
|
for module_name in sys.modules:
|
|
395
|
-
if module_name.startswith(
|
|
396
|
-
|
|
401
|
+
if module_name.startswith(sample_module_prefix) and module_name != current_module:
|
|
402
|
+
sample_modules.append(module_name)
|
|
397
403
|
|
|
398
|
-
all_modules_to_reload = core_modules + study_modules
|
|
404
|
+
all_modules_to_reload = core_modules + sample_modules + study_modules
|
|
399
405
|
|
|
400
406
|
# Reload all discovered modules
|
|
401
407
|
for full_module_name in all_modules_to_reload:
|
masster/study/study5_schema.json
CHANGED
|
@@ -16,10 +16,10 @@ masster/sample/helpers.py,sha256=OEgvR3bptA-tEqHAFVPjWpbagKXAU1h0bePPi9ttHa4,348
|
|
|
16
16
|
masster/sample/lib.py,sha256=9r2XlF_BaJ4WNAsQo8hElieRLwsAv0yrbYq4DJ0iVOM,33496
|
|
17
17
|
masster/sample/load.py,sha256=y-KUJ2nCFX_06FHPUOh-CzRRvaTx14xNcXoL19bU8qY,47562
|
|
18
18
|
masster/sample/parameters.py,sha256=Gg2KcuNbV_wZ_Wwv93QlM5J19ji0oSIvZLPV1NoBmq0,4456
|
|
19
|
-
masster/sample/plot.py,sha256=
|
|
19
|
+
masster/sample/plot.py,sha256=uUJAd2qxhVG6Ev2hLuU406zFA2TDkkBz2MG12P9fLik,71449
|
|
20
20
|
masster/sample/processing.py,sha256=NjNLt47Fy0UF3Xs35NBhADg57qTC6Lfa4Xz8Y30v83A,58250
|
|
21
21
|
masster/sample/quant.py,sha256=tHNjvUFTdehKR31BXBZnVsBxMD9XJHgaltITOjr71uE,7562
|
|
22
|
-
masster/sample/sample.py,sha256=
|
|
22
|
+
masster/sample/sample.py,sha256=7ivuAMb3JlFikLOxZjTGwYmuqGehLz9d47gQxfSRtf4,16178
|
|
23
23
|
masster/sample/sample5_schema.json,sha256=3SPFQZH4SooLYUt_lW-PCOE9rHnl56Vhc2XG-r1nyEQ,3586
|
|
24
24
|
masster/sample/save.py,sha256=o9eFSqqr7KYwvCD3gOJt_nZ4h3pkflWqs0n0oSLM-sU,31970
|
|
25
25
|
masster/sample/sciex.py,sha256=q6PdcjCtV2PWnJiXuvfISu09zjkaTR_fvHvWN9OvOcM,46870
|
|
@@ -31,16 +31,16 @@ masster/sample/defaults/get_spectrum_def.py,sha256=o62p31PhGd-LiIkTOzKQhwPtnO2At
|
|
|
31
31
|
masster/sample/defaults/sample_def.py,sha256=t8vrb8MoBBsFQcRzlaT0-q0hAssOxWO7vhCAJU3_THs,14068
|
|
32
32
|
masster/study/__init__.py,sha256=Zspv6U8jFqjkHGYdNdDy1rfUnCSolCzUdgSSg98PRgE,166
|
|
33
33
|
masster/study/export.py,sha256=bm3e6AEwkXqBO6Pwd-2pWhxOmzQTFlOSauXFnaiSJDI,29019
|
|
34
|
-
masster/study/h5.py,sha256=
|
|
35
|
-
masster/study/helpers.py,sha256=
|
|
34
|
+
masster/study/h5.py,sha256=EcpyYfMknDzzdA6XTyMU_ppY92_DsPSPYGE0kpVN7T8,66429
|
|
35
|
+
masster/study/helpers.py,sha256=SeW17rA3BIM2I2Whiye6wegRRSCabIpQoCsjOCafjKw,74888
|
|
36
36
|
masster/study/helpers_optimized.py,sha256=EgOgPaL3c2LA8jDhnlEHvzb7O9Um-vnMIcnNaoH90gA,13620
|
|
37
|
-
masster/study/load.py,sha256=
|
|
37
|
+
masster/study/load.py,sha256=TLxVhXu0HHb51lGggXitQLtfNxz2JJfKMkAXJbxhvhM,46880
|
|
38
38
|
masster/study/parameters.py,sha256=0elaF7YspTsB7qyajWAbRNL2VfKlGz5GJLifmO8IGkk,3276
|
|
39
|
-
masster/study/plot.py,sha256=
|
|
39
|
+
masster/study/plot.py,sha256=4i3u4geOinCefsambnEGVPF4XuyKTK-_eT5xAWgC7Ik,24045
|
|
40
40
|
masster/study/processing.py,sha256=BQuSBO7O8iTlCjXenECyg0_PAsPF1NNiUllypuemPZI,46101
|
|
41
|
-
masster/study/save.py,sha256=
|
|
42
|
-
masster/study/study.py,sha256=
|
|
43
|
-
masster/study/study5_schema.json,sha256=
|
|
41
|
+
masster/study/save.py,sha256=bcRADWTvhTER9WRkT9zNU5mDUPQZkZB2cuJwpRsYmrM,6589
|
|
42
|
+
masster/study/study.py,sha256=5TZgG7tr7mzqHh1tm48V8SEcvRcWiFYG9iDqz0U9ACc,27073
|
|
43
|
+
masster/study/study5_schema.json,sha256=A_xDPzB97xt2EFeQsX9j8Ut7yC4_DS7BZ24ucotOXIw,5103
|
|
44
44
|
masster/study/defaults/__init__.py,sha256=m3Z5KXGqsTdh7GjYzZoENERt39yRg0ceVRV1DeCt1P0,610
|
|
45
45
|
masster/study/defaults/align_def.py,sha256=9aM7kY4_ecgG8QC6v57AASiRRkPxwG77r3-PlQ2BkHk,9139
|
|
46
46
|
masster/study/defaults/export_def.py,sha256=eXl3h4aoLX88XkHTpqahLd-QZ2gjUqrmjq8IJULXeWo,1203
|
|
@@ -52,8 +52,8 @@ masster/study/defaults/integrate_chrom_def.py,sha256=Rih3-vat7fHGVfIvRitjNJJI3zL
|
|
|
52
52
|
masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
|
|
53
53
|
masster/study/defaults/merge_def.py,sha256=EBsKE3hsAkTEzN9dpdRD5W3_suTKy_WZ_96rwS0uBuE,8572
|
|
54
54
|
masster/study/defaults/study_def.py,sha256=hj8bYtEPwzdowC95yfyoCFt6fZkQePLjpJtmpNz9Z5M,9533
|
|
55
|
-
masster-0.3.
|
|
56
|
-
masster-0.3.
|
|
57
|
-
masster-0.3.
|
|
58
|
-
masster-0.3.
|
|
59
|
-
masster-0.3.
|
|
55
|
+
masster-0.3.2.dist-info/METADATA,sha256=LTK6jfDeryui93xgzncpurTiYrc_iuzRMao82DQ0eMI,44356
|
|
56
|
+
masster-0.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
57
|
+
masster-0.3.2.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
58
|
+
masster-0.3.2.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
|
|
59
|
+
masster-0.3.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|