masster 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/sample/plot.py +111 -36
- masster/sample/sample.py +7 -5
- masster/study/h5.py +8 -0
- masster/study/helpers.py +87 -24
- masster/study/load.py +38 -8
- masster/study/save.py +22 -10
- masster/study/study.py +11 -5
- masster/study/study5_schema.json +6 -0
- {masster-0.3.0.dist-info → masster-0.3.1.dist-info}/METADATA +1 -1
- {masster-0.3.0.dist-info → masster-0.3.1.dist-info}/RECORD +13 -13
- {masster-0.3.0.dist-info → masster-0.3.1.dist-info}/WHEEL +0 -0
- {masster-0.3.0.dist-info → masster-0.3.1.dist-info}/entry_points.txt +0 -0
- {masster-0.3.0.dist-info → masster-0.3.1.dist-info}/licenses/LICENSE +0 -0
masster/sample/plot.py
CHANGED
|
@@ -22,13 +22,13 @@ Dependencies:
|
|
|
22
22
|
- `numpy`: For numerical computations.
|
|
23
23
|
|
|
24
24
|
Functions:
|
|
25
|
-
- `
|
|
25
|
+
- `plot_chrom()`: Generate chromatograms with feature overlays.
|
|
26
26
|
- `plot_2d()`: Create 2D mass spectrometry data visualizations.
|
|
27
27
|
- `plot_features()`: Visualize detected features in retention time vs m/z space.
|
|
28
28
|
- Various utility functions for plot styling and configuration.
|
|
29
29
|
|
|
30
30
|
Supported Plot Types:
|
|
31
|
-
-
|
|
31
|
+
- Chromatograms
|
|
32
32
|
- Total Ion Chromatograms (TIC)
|
|
33
33
|
- Base Peak Chromatograms (BPC)
|
|
34
34
|
- 2D intensity maps (RT vs m/z)
|
|
@@ -63,7 +63,74 @@ from matplotlib.colors import rgb2hex
|
|
|
63
63
|
hv.extension("bokeh")
|
|
64
64
|
|
|
65
65
|
|
|
66
|
-
def
|
|
66
|
+
def _is_notebook_environment():
|
|
67
|
+
"""
|
|
68
|
+
Detect if code is running in a notebook environment (Jupyter, JupyterLab, or Marimo).
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
bool: True if running in a notebook, False otherwise
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
# Check for Jupyter/JupyterLab
|
|
75
|
+
from IPython import get_ipython
|
|
76
|
+
if get_ipython() is not None:
|
|
77
|
+
# Check if we're in a notebook context
|
|
78
|
+
shell = get_ipython().__class__.__name__
|
|
79
|
+
if shell in ['ZMQInteractiveShell', 'Shell']: # Jupyter notebook/lab
|
|
80
|
+
return True
|
|
81
|
+
|
|
82
|
+
# Check for Marimo
|
|
83
|
+
import sys
|
|
84
|
+
if 'marimo' in sys.modules:
|
|
85
|
+
return True
|
|
86
|
+
|
|
87
|
+
# Additional check for notebook environments
|
|
88
|
+
if hasattr(__builtins__, '__IPYTHON__') or hasattr(__builtins__, '_ih'):
|
|
89
|
+
return True
|
|
90
|
+
|
|
91
|
+
except ImportError:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _display_plot(plot_object, layout=None):
|
|
98
|
+
"""
|
|
99
|
+
Display a plot object in the appropriate way based on the environment.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
plot_object: The plot object to display (holoviews overlay, etc.)
|
|
103
|
+
layout: Optional panel layout object
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
The layout object if in notebook environment, None otherwise
|
|
107
|
+
"""
|
|
108
|
+
if _is_notebook_environment():
|
|
109
|
+
# Display inline in notebook
|
|
110
|
+
try:
|
|
111
|
+
# For Jupyter notebooks, just return the plot object -
|
|
112
|
+
# holoviews will handle the display automatically
|
|
113
|
+
return plot_object
|
|
114
|
+
except Exception:
|
|
115
|
+
# Fallback to panel display for other notebook environments
|
|
116
|
+
if layout is not None:
|
|
117
|
+
return layout
|
|
118
|
+
else:
|
|
119
|
+
# Create a simple layout if none provided
|
|
120
|
+
simple_layout = panel.Column(plot_object)
|
|
121
|
+
return simple_layout
|
|
122
|
+
else:
|
|
123
|
+
# Display in browser (original behavior)
|
|
124
|
+
if layout is not None:
|
|
125
|
+
layout.show()
|
|
126
|
+
else:
|
|
127
|
+
# Create a simple layout for browser display
|
|
128
|
+
simple_layout = panel.Column(plot_object)
|
|
129
|
+
simple_layout.show()
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def plot_chrom(
|
|
67
134
|
self,
|
|
68
135
|
feature_uid=None,
|
|
69
136
|
filename=None,
|
|
@@ -74,16 +141,16 @@ def plot_eic(
|
|
|
74
141
|
link_x=False,
|
|
75
142
|
):
|
|
76
143
|
"""
|
|
77
|
-
Plot
|
|
144
|
+
Plot chromatograms for one or more features using MS1 data and feature metadata.
|
|
78
145
|
|
|
79
146
|
This function filters MS1 data based on retention time (rt) and mass-to-charge ratio (mz) windows
|
|
80
|
-
derived from feature information in `features_df`. It then generates interactive
|
|
147
|
+
derived from feature information in `features_df`. It then generates interactive chromatogram plots using
|
|
81
148
|
HoloViews, with feature retention time windows annotated. Plots can be displayed interactively or
|
|
82
149
|
saved to a file.
|
|
83
150
|
|
|
84
151
|
Parameters:
|
|
85
152
|
feature_uid (int or list of int, optional):
|
|
86
|
-
Feature identifier(s) for
|
|
153
|
+
Feature identifier(s) for chromatogram generation. If None, chromatograms for all features in `features_df` are plotted.
|
|
87
154
|
filename (str, optional):
|
|
88
155
|
Output file path. If ending with `.html`, saves as interactive HTML; otherwise, saves as PNG.
|
|
89
156
|
If not provided, displays the plot interactively.
|
|
@@ -96,7 +163,7 @@ def plot_eic(
|
|
|
96
163
|
mz_tol_factor_plot (float, default=1):
|
|
97
164
|
m/z time tolerance factor.
|
|
98
165
|
link_x (bool, default=True):
|
|
99
|
-
If True, links the x-axes (retention time) across all
|
|
166
|
+
If True, links the x-axes (retention time) across all chromatogram subplots.
|
|
100
167
|
|
|
101
168
|
Returns:
|
|
102
169
|
None
|
|
@@ -106,7 +173,7 @@ def plot_eic(
|
|
|
106
173
|
- Aggregates MS1 intensities by retention time.
|
|
107
174
|
- Utilizes HoloViews for visualization and Panel for layout/display.
|
|
108
175
|
"""
|
|
109
|
-
# plots the
|
|
176
|
+
# plots the chromatogram for a given feature id
|
|
110
177
|
# If rt or mz are not provided, they are extracted from features_df using the supplied feature id (feature_uid)
|
|
111
178
|
|
|
112
179
|
feature_uids = feature_uid
|
|
@@ -121,7 +188,7 @@ def plot_eic(
|
|
|
121
188
|
|
|
122
189
|
# make sure feature_uid is a list of integers
|
|
123
190
|
|
|
124
|
-
|
|
191
|
+
chrom_plots = []
|
|
125
192
|
feature_uids = feats["feature_uid"].values.tolist()
|
|
126
193
|
mz_tol_plot = mz_tol * mz_tol_factor_plot
|
|
127
194
|
rt_tol_plot = rt_tol * rt_tol_factor_plot
|
|
@@ -137,29 +204,29 @@ def plot_eic(
|
|
|
137
204
|
mz_end = feature_row["mz_end"].values[0]
|
|
138
205
|
|
|
139
206
|
# filter self.ms1_df with rt_start, rt_end, mz_start, mz_end
|
|
140
|
-
|
|
207
|
+
chrom_df = self.ms1_df.filter(
|
|
141
208
|
pl.col("rt") >= rt_start - rt_tol_plot,
|
|
142
209
|
pl.col("rt") <= rt_end + rt_tol_plot,
|
|
143
210
|
)
|
|
144
|
-
|
|
211
|
+
chrom_df = chrom_df.filter(
|
|
145
212
|
pl.col("mz") >= mz_start - mz_tol_plot,
|
|
146
213
|
pl.col("mz") <= mz_end + mz_tol_plot,
|
|
147
214
|
)
|
|
148
215
|
|
|
149
|
-
if
|
|
216
|
+
if chrom_df.is_empty():
|
|
150
217
|
print("No MS1 data found in the specified window.")
|
|
151
218
|
continue
|
|
152
219
|
|
|
153
220
|
# convert to pandas DataFrame
|
|
154
|
-
|
|
221
|
+
chrom_df = chrom_df.to_pandas()
|
|
155
222
|
# aggregate all points with the same rt using the sum of inty
|
|
156
|
-
|
|
223
|
+
chrom_df = chrom_df.groupby("rt").agg({"inty": "sum"}).reset_index()
|
|
157
224
|
yname = f"inty_{feature_uid}"
|
|
158
|
-
|
|
225
|
+
chrom_df.rename(columns={"inty": yname}, inplace=True)
|
|
159
226
|
|
|
160
|
-
# Plot the
|
|
161
|
-
|
|
162
|
-
title=f"
|
|
227
|
+
# Plot the chromatogram using bokeh and ensure axes are independent by setting axiswise=True
|
|
228
|
+
chrom = hv.Curve(chrom_df, kdims=["rt"], vdims=[yname]).opts(
|
|
229
|
+
title=f"Chromatogram for feature {feature_uid}, mz = {mz:.4f}",
|
|
163
230
|
xlabel="Retention time (s)",
|
|
164
231
|
ylabel="Intensity",
|
|
165
232
|
width=1000,
|
|
@@ -170,13 +237,13 @@ def plot_eic(
|
|
|
170
237
|
)
|
|
171
238
|
|
|
172
239
|
# Add vertical lines at the start and end of the retention time
|
|
173
|
-
|
|
240
|
+
chrom = chrom * hv.VLine(rt_start).opts(
|
|
174
241
|
color="blue",
|
|
175
242
|
line_width=1,
|
|
176
243
|
line_dash="dashed",
|
|
177
244
|
axiswise=True,
|
|
178
245
|
)
|
|
179
|
-
|
|
246
|
+
chrom = chrom * hv.VLine(rt_end).opts(
|
|
180
247
|
color="blue",
|
|
181
248
|
line_width=1,
|
|
182
249
|
line_dash="dashed",
|
|
@@ -184,12 +251,12 @@ def plot_eic(
|
|
|
184
251
|
)
|
|
185
252
|
|
|
186
253
|
# Append the subplot without linking axes
|
|
187
|
-
|
|
254
|
+
chrom_plots.append(chrom)
|
|
188
255
|
if link_x:
|
|
189
|
-
# Create a layout with shared x-axis for all
|
|
190
|
-
layout = hv.Layout(
|
|
256
|
+
# Create a layout with shared x-axis for all chromatogram plots
|
|
257
|
+
layout = hv.Layout(chrom_plots).opts(shared_axes=True)
|
|
191
258
|
else:
|
|
192
|
-
layout = hv.Layout(
|
|
259
|
+
layout = hv.Layout(chrom_plots).opts(shared_axes=False)
|
|
193
260
|
|
|
194
261
|
layout = layout.cols(1)
|
|
195
262
|
layout = panel.Column(layout)
|
|
@@ -201,8 +268,8 @@ def plot_eic(
|
|
|
201
268
|
# save the panel layout as a png
|
|
202
269
|
hv.save(layout, filename, fmt="png")
|
|
203
270
|
else:
|
|
204
|
-
#
|
|
205
|
-
layout.
|
|
271
|
+
# Check if we're in a notebook environment and display appropriately
|
|
272
|
+
return _display_plot(layout.object, layout)
|
|
206
273
|
|
|
207
274
|
|
|
208
275
|
def plot_2d(
|
|
@@ -513,8 +580,8 @@ def plot_2d(
|
|
|
513
580
|
# save the panel layout as a png
|
|
514
581
|
hv.save(overlay, filename, fmt="png")
|
|
515
582
|
else:
|
|
516
|
-
#
|
|
517
|
-
layout
|
|
583
|
+
# Check if we're in a notebook environment and display appropriately
|
|
584
|
+
return _display_plot(overlay, layout)
|
|
518
585
|
|
|
519
586
|
|
|
520
587
|
def plot_2d_oracle(
|
|
@@ -922,8 +989,8 @@ def plot_2d_oracle(
|
|
|
922
989
|
# save the panel layout as a png
|
|
923
990
|
hv.save(overlay, filename, fmt="png")
|
|
924
991
|
else:
|
|
925
|
-
#
|
|
926
|
-
layout
|
|
992
|
+
# Check if we're in a notebook environment and display appropriately
|
|
993
|
+
return _display_plot(overlay, layout)
|
|
927
994
|
|
|
928
995
|
|
|
929
996
|
def plot_ms2_eic(
|
|
@@ -1070,7 +1137,9 @@ def plot_ms2_eic(
|
|
|
1070
1137
|
else:
|
|
1071
1138
|
hv.save(layout, filename, fmt="png")
|
|
1072
1139
|
else:
|
|
1073
|
-
|
|
1140
|
+
# Check if we're in a notebook environment and display appropriately
|
|
1141
|
+
layout_obj = panel.panel(layout)
|
|
1142
|
+
return _display_plot(layout, layout_obj)
|
|
1074
1143
|
|
|
1075
1144
|
|
|
1076
1145
|
def plot_ms2_cycle(
|
|
@@ -1290,8 +1359,8 @@ def plot_ms2_cycle(
|
|
|
1290
1359
|
# save the panel layout as a png
|
|
1291
1360
|
hv.save(overlay, filename, fmt="png")
|
|
1292
1361
|
else:
|
|
1293
|
-
#
|
|
1294
|
-
layout
|
|
1362
|
+
# Check if we're in a notebook environment and display appropriately
|
|
1363
|
+
return _display_plot(overlay, layout)
|
|
1295
1364
|
|
|
1296
1365
|
|
|
1297
1366
|
def plot_ms2_q1(
|
|
@@ -1393,7 +1462,9 @@ def plot_ms2_q1(
|
|
|
1393
1462
|
else:
|
|
1394
1463
|
hv.save(layout, filename, fmt="png")
|
|
1395
1464
|
else:
|
|
1396
|
-
|
|
1465
|
+
# Check if we're in a notebook environment and display appropriately
|
|
1466
|
+
layout_obj = panel.panel(layout)
|
|
1467
|
+
return _display_plot(layout, layout_obj)
|
|
1397
1468
|
|
|
1398
1469
|
|
|
1399
1470
|
def plot_dda_stats(
|
|
@@ -1468,7 +1539,9 @@ def plot_dda_stats(
|
|
|
1468
1539
|
else:
|
|
1469
1540
|
hv.save(layout, filename, fmt="png")
|
|
1470
1541
|
else:
|
|
1471
|
-
|
|
1542
|
+
# Check if we're in a notebook environment and display appropriately
|
|
1543
|
+
layout_obj = panel.panel(layout)
|
|
1544
|
+
return _display_plot(layout, layout_obj)
|
|
1472
1545
|
|
|
1473
1546
|
|
|
1474
1547
|
def plot_feature_stats(
|
|
@@ -1584,7 +1657,9 @@ def plot_feature_stats(
|
|
|
1584
1657
|
else:
|
|
1585
1658
|
hv.save(layout, filename, fmt="png")
|
|
1586
1659
|
else:
|
|
1587
|
-
|
|
1660
|
+
# Check if we're in a notebook environment and display appropriately
|
|
1661
|
+
layout_obj = panel.panel(layout)
|
|
1662
|
+
return _display_plot(layout, layout_obj)
|
|
1588
1663
|
|
|
1589
1664
|
|
|
1590
1665
|
def plot_tic(
|
masster/sample/sample.py
CHANGED
|
@@ -71,7 +71,7 @@ from masster.logger import MassterLogger
|
|
|
71
71
|
from masster.sample.plot import plot_2d
|
|
72
72
|
from masster.sample.plot import plot_2d_oracle
|
|
73
73
|
from masster.sample.plot import plot_dda_stats
|
|
74
|
-
from masster.sample.plot import
|
|
74
|
+
from masster.sample.plot import plot_chrom
|
|
75
75
|
from masster.sample.plot import plot_feature_stats
|
|
76
76
|
from masster.sample.plot import plot_ms2_cycle
|
|
77
77
|
from masster.sample.plot import plot_ms2_eic
|
|
@@ -221,7 +221,7 @@ class Sample:
|
|
|
221
221
|
plot_2d = plot_2d
|
|
222
222
|
plot_2d_oracle = plot_2d_oracle
|
|
223
223
|
plot_dda_stats = plot_dda_stats
|
|
224
|
-
|
|
224
|
+
plot_chrom = plot_chrom
|
|
225
225
|
plot_feature_stats = plot_feature_stats
|
|
226
226
|
plot_ms2_cycle = plot_ms2_cycle
|
|
227
227
|
plot_ms2_eic = plot_ms2_eic
|
|
@@ -312,6 +312,7 @@ class Sample:
|
|
|
312
312
|
f"{base_modname}._version",
|
|
313
313
|
f"{base_modname}.chromatogram",
|
|
314
314
|
f"{base_modname}.spectrum",
|
|
315
|
+
f"{base_modname}.logger",
|
|
315
316
|
]
|
|
316
317
|
|
|
317
318
|
# Add study submodules
|
|
@@ -321,14 +322,15 @@ class Sample:
|
|
|
321
322
|
if module_name.startswith(study_module_prefix) and module_name != current_module:
|
|
322
323
|
study_modules.append(module_name)
|
|
323
324
|
|
|
324
|
-
# Add parameters submodules
|
|
325
|
+
''' # Add parameters submodules
|
|
325
326
|
parameters_modules = []
|
|
326
327
|
parameters_module_prefix = f"{base_modname}.parameters."
|
|
327
328
|
for module_name in sys.modules:
|
|
328
329
|
if module_name.startswith(parameters_module_prefix) and module_name != current_module:
|
|
329
330
|
parameters_modules.append(module_name)
|
|
330
|
-
|
|
331
|
-
|
|
331
|
+
'''
|
|
332
|
+
|
|
333
|
+
all_modules_to_reload = core_modules + sample_modules + study_modules #+ parameters_modules
|
|
332
334
|
|
|
333
335
|
# Reload all discovered modules
|
|
334
336
|
for full_module_name in all_modules_to_reload:
|
masster/study/h5.py
CHANGED
|
@@ -1289,6 +1289,8 @@ def _load_study5(self, filename=None):
|
|
|
1289
1289
|
"size": [],
|
|
1290
1290
|
"map_id": [],
|
|
1291
1291
|
"file_source": [],
|
|
1292
|
+
"ms1": [],
|
|
1293
|
+
"ms2": [],
|
|
1292
1294
|
},
|
|
1293
1295
|
schema={
|
|
1294
1296
|
"sample_uid": pl.Int64,
|
|
@@ -1298,6 +1300,8 @@ def _load_study5(self, filename=None):
|
|
|
1298
1300
|
"size": pl.Int64,
|
|
1299
1301
|
"map_id": pl.Utf8,
|
|
1300
1302
|
"file_source": pl.Utf8,
|
|
1303
|
+
"ms1": pl.Int64,
|
|
1304
|
+
"ms2": pl.Int64,
|
|
1301
1305
|
},
|
|
1302
1306
|
)
|
|
1303
1307
|
pbar.update(1)
|
|
@@ -1317,6 +1321,8 @@ def _load_study5(self, filename=None):
|
|
|
1317
1321
|
"size": [],
|
|
1318
1322
|
"map_id": [],
|
|
1319
1323
|
"file_source": [],
|
|
1324
|
+
"ms1": [],
|
|
1325
|
+
"ms2": [],
|
|
1320
1326
|
},
|
|
1321
1327
|
schema={
|
|
1322
1328
|
"sample_uid": pl.Int64,
|
|
@@ -1326,6 +1332,8 @@ def _load_study5(self, filename=None):
|
|
|
1326
1332
|
"size": pl.Int64,
|
|
1327
1333
|
"map_id": pl.Utf8,
|
|
1328
1334
|
"file_source": pl.Utf8,
|
|
1335
|
+
"ms1": pl.Int64,
|
|
1336
|
+
"ms2": pl.Int64,
|
|
1329
1337
|
},
|
|
1330
1338
|
)
|
|
1331
1339
|
pbar.update(1)
|
masster/study/helpers.py
CHANGED
|
@@ -1197,24 +1197,24 @@ def features_select(
|
|
|
1197
1197
|
if final_count == 0:
|
|
1198
1198
|
self.logger.warning("No features remaining after applying selection criteria.")
|
|
1199
1199
|
else:
|
|
1200
|
-
removed_count = initial_count - final_count
|
|
1201
|
-
self.logger.info(f"Features selected: {final_count} (
|
|
1202
|
-
|
|
1200
|
+
#removed_count = initial_count - final_count
|
|
1201
|
+
self.logger.info(f"Features selected: {final_count} (out of {initial_count})")
|
|
1202
|
+
|
|
1203
1203
|
return feats
|
|
1204
1204
|
|
|
1205
1205
|
|
|
1206
1206
|
def features_filter(self, features):
|
|
1207
1207
|
"""
|
|
1208
|
-
Filter features_df by
|
|
1209
|
-
This
|
|
1208
|
+
Filter features_df by keeping only features that match the given criteria.
|
|
1209
|
+
This keeps only the specified features and removes all others.
|
|
1210
1210
|
|
|
1211
1211
|
OPTIMIZED VERSION: Batch operations and reduced overhead for better performance.
|
|
1212
1212
|
|
|
1213
1213
|
Parameters:
|
|
1214
|
-
features: Features to
|
|
1214
|
+
features: Features to keep. Can be:
|
|
1215
1215
|
- polars.DataFrame: Features DataFrame (will use feature_uid column)
|
|
1216
|
-
- list: List of feature_uids to
|
|
1217
|
-
- int: Single feature_uid to
|
|
1216
|
+
- list: List of feature_uids to keep
|
|
1217
|
+
- int: Single feature_uid to keep
|
|
1218
1218
|
|
|
1219
1219
|
Returns:
|
|
1220
1220
|
None (modifies self.features_df in place)
|
|
@@ -1230,34 +1230,34 @@ def features_filter(self, features):
|
|
|
1230
1230
|
|
|
1231
1231
|
initial_count = len(self.features_df)
|
|
1232
1232
|
|
|
1233
|
-
# Determine feature_uids to
|
|
1233
|
+
# Determine feature_uids to keep - optimized type checking
|
|
1234
1234
|
if isinstance(features, pl.DataFrame):
|
|
1235
1235
|
if "feature_uid" not in features.columns:
|
|
1236
1236
|
self.logger.error("features DataFrame must contain 'feature_uid' column")
|
|
1237
1237
|
return
|
|
1238
|
-
|
|
1238
|
+
feature_uids_to_keep = features["feature_uid"].to_list()
|
|
1239
1239
|
elif isinstance(features, (list, tuple)):
|
|
1240
|
-
|
|
1240
|
+
feature_uids_to_keep = list(features) # Convert tuple to list if needed
|
|
1241
1241
|
elif isinstance(features, int):
|
|
1242
|
-
|
|
1242
|
+
feature_uids_to_keep = [features]
|
|
1243
1243
|
else:
|
|
1244
1244
|
self.logger.error("features parameter must be a DataFrame, list, tuple, or int")
|
|
1245
1245
|
return
|
|
1246
1246
|
|
|
1247
|
-
# Early return if no UIDs to
|
|
1248
|
-
if not
|
|
1247
|
+
# Early return if no UIDs to keep
|
|
1248
|
+
if not feature_uids_to_keep:
|
|
1249
1249
|
self.logger.warning("No feature UIDs provided for filtering.")
|
|
1250
1250
|
return
|
|
1251
1251
|
|
|
1252
1252
|
# Convert to set for faster lookup if list is large
|
|
1253
|
-
if len(
|
|
1254
|
-
feature_uids_set = set(
|
|
1253
|
+
if len(feature_uids_to_keep) > 100:
|
|
1254
|
+
feature_uids_set = set(feature_uids_to_keep)
|
|
1255
1255
|
# Use the set for filtering if it's significantly smaller
|
|
1256
|
-
if len(feature_uids_set) < len(
|
|
1257
|
-
|
|
1256
|
+
if len(feature_uids_set) < len(feature_uids_to_keep) * 0.8:
|
|
1257
|
+
feature_uids_to_keep = list(feature_uids_set)
|
|
1258
1258
|
|
|
1259
|
-
# Create filter condition once
|
|
1260
|
-
filter_condition =
|
|
1259
|
+
# Create filter condition once - keep only the specified features
|
|
1260
|
+
filter_condition = pl.col("feature_uid").is_in(feature_uids_to_keep)
|
|
1261
1261
|
|
|
1262
1262
|
# Apply filter to features_df using lazy evaluation for better performance
|
|
1263
1263
|
self.features_df = self.features_df.lazy().filter(filter_condition).collect()
|
|
@@ -1280,15 +1280,15 @@ def features_filter(self, features):
|
|
|
1280
1280
|
|
|
1281
1281
|
# Single comprehensive log message
|
|
1282
1282
|
if mapping_removed_count > 0:
|
|
1283
|
-
self.logger.info(f"
|
|
1283
|
+
self.logger.info(f"Kept {final_count} features and removed {mapping_removed_count} consensus mappings. Filtered out {removed_count} features.")
|
|
1284
1284
|
else:
|
|
1285
|
-
self.logger.info(f"
|
|
1285
|
+
self.logger.info(f"Kept {final_count} features. Filtered out {removed_count} features.")
|
|
1286
1286
|
|
|
1287
1287
|
|
|
1288
1288
|
def features_delete(self, features):
|
|
1289
1289
|
"""
|
|
1290
1290
|
Delete features from features_df based on feature identifiers.
|
|
1291
|
-
This
|
|
1291
|
+
This removes the specified features and keeps all others (opposite of features_filter).
|
|
1292
1292
|
|
|
1293
1293
|
Parameters:
|
|
1294
1294
|
features: Features to delete. Can be:
|
|
@@ -1299,7 +1299,70 @@ def features_delete(self, features):
|
|
|
1299
1299
|
Returns:
|
|
1300
1300
|
None (modifies self.features_df in place)
|
|
1301
1301
|
"""
|
|
1302
|
-
self.
|
|
1302
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
1303
|
+
self.logger.warning("No features found in study.")
|
|
1304
|
+
return
|
|
1305
|
+
|
|
1306
|
+
# Early return if no features provided
|
|
1307
|
+
if features is None:
|
|
1308
|
+
self.logger.warning("No features provided for deletion.")
|
|
1309
|
+
return
|
|
1310
|
+
|
|
1311
|
+
initial_count = len(self.features_df)
|
|
1312
|
+
|
|
1313
|
+
# Determine feature_uids to remove - optimized type checking
|
|
1314
|
+
if isinstance(features, pl.DataFrame):
|
|
1315
|
+
if "feature_uid" not in features.columns:
|
|
1316
|
+
self.logger.error("features DataFrame must contain 'feature_uid' column")
|
|
1317
|
+
return
|
|
1318
|
+
feature_uids_to_remove = features["feature_uid"].to_list()
|
|
1319
|
+
elif isinstance(features, (list, tuple)):
|
|
1320
|
+
feature_uids_to_remove = list(features) # Convert tuple to list if needed
|
|
1321
|
+
elif isinstance(features, int):
|
|
1322
|
+
feature_uids_to_remove = [features]
|
|
1323
|
+
else:
|
|
1324
|
+
self.logger.error("features parameter must be a DataFrame, list, tuple, or int")
|
|
1325
|
+
return
|
|
1326
|
+
|
|
1327
|
+
# Early return if no UIDs to remove
|
|
1328
|
+
if not feature_uids_to_remove:
|
|
1329
|
+
self.logger.warning("No feature UIDs provided for deletion.")
|
|
1330
|
+
return
|
|
1331
|
+
|
|
1332
|
+
# Convert to set for faster lookup if list is large
|
|
1333
|
+
if len(feature_uids_to_remove) > 100:
|
|
1334
|
+
feature_uids_set = set(feature_uids_to_remove)
|
|
1335
|
+
# Use the set for filtering if it's significantly smaller
|
|
1336
|
+
if len(feature_uids_set) < len(feature_uids_to_remove) * 0.8:
|
|
1337
|
+
feature_uids_to_remove = list(feature_uids_set)
|
|
1338
|
+
|
|
1339
|
+
# Create filter condition - remove specified features
|
|
1340
|
+
filter_condition = ~pl.col("feature_uid").is_in(feature_uids_to_remove)
|
|
1341
|
+
|
|
1342
|
+
# Apply filter to features_df using lazy evaluation for better performance
|
|
1343
|
+
self.features_df = self.features_df.lazy().filter(filter_condition).collect()
|
|
1344
|
+
|
|
1345
|
+
# Apply filter to consensus_mapping_df if it exists - batch operation
|
|
1346
|
+
mapping_removed_count = 0
|
|
1347
|
+
if self.consensus_mapping_df is not None and not self.consensus_mapping_df.is_empty():
|
|
1348
|
+
initial_mapping_count = len(self.consensus_mapping_df)
|
|
1349
|
+
self.consensus_mapping_df = (
|
|
1350
|
+
self.consensus_mapping_df
|
|
1351
|
+
.lazy()
|
|
1352
|
+
.filter(filter_condition)
|
|
1353
|
+
.collect()
|
|
1354
|
+
)
|
|
1355
|
+
mapping_removed_count = initial_mapping_count - len(self.consensus_mapping_df)
|
|
1356
|
+
|
|
1357
|
+
# Calculate results once and log efficiently
|
|
1358
|
+
final_count = len(self.features_df)
|
|
1359
|
+
removed_count = initial_count - final_count
|
|
1360
|
+
|
|
1361
|
+
# Single comprehensive log message
|
|
1362
|
+
if mapping_removed_count > 0:
|
|
1363
|
+
self.logger.info(f"Deleted {removed_count} features and {mapping_removed_count} consensus mappings. Remaining features: {final_count}")
|
|
1364
|
+
else:
|
|
1365
|
+
self.logger.info(f"Deleted {removed_count} features. Remaining features: {final_count}")
|
|
1303
1366
|
|
|
1304
1367
|
|
|
1305
1368
|
def consensus_select(
|
masster/study/load.py
CHANGED
|
@@ -189,15 +189,50 @@ def add_sample(self, file, type=None, reset=False, adducts=None):
|
|
|
189
189
|
sample_type = "blank"
|
|
190
190
|
map_id_value = str(ddaobj.features.getUniqueId())
|
|
191
191
|
|
|
192
|
+
# Determine the final sample path based on file type
|
|
193
|
+
if file.endswith(".sample5"):
|
|
194
|
+
# If input is already .sample5, keep it in original location
|
|
195
|
+
final_sample_path = file
|
|
196
|
+
self.logger.debug(f"Using existing .sample5 file at original location: {final_sample_path}")
|
|
197
|
+
|
|
198
|
+
# Check if there's a corresponding featureXML file in the same directory
|
|
199
|
+
featurexml_path = file.replace(".sample5", ".featureXML")
|
|
200
|
+
if os.path.exists(featurexml_path):
|
|
201
|
+
self.logger.debug(f"Found corresponding featureXML file: {featurexml_path}")
|
|
202
|
+
else:
|
|
203
|
+
self.logger.debug(f"No corresponding featureXML file found at: {featurexml_path}")
|
|
204
|
+
else:
|
|
205
|
+
# For .wiff, .mzML, .raw files, save to study folder (original behavior)
|
|
206
|
+
if self.folder is not None:
|
|
207
|
+
if not os.path.exists(self.folder):
|
|
208
|
+
os.makedirs(self.folder)
|
|
209
|
+
final_sample_path = os.path.join(self.folder, sample_name + ".sample5")
|
|
210
|
+
ddaobj.save(final_sample_path)
|
|
211
|
+
self.logger.debug(f"Saved converted sample to study folder: {final_sample_path}")
|
|
212
|
+
else:
|
|
213
|
+
# If no study folder is set, save in current directory
|
|
214
|
+
final_sample_path = os.path.join(os.getcwd(), sample_name + ".sample5")
|
|
215
|
+
ddaobj.save(final_sample_path)
|
|
216
|
+
self.logger.debug(f"Saved converted sample to current directory: {final_sample_path}")
|
|
217
|
+
|
|
218
|
+
# Count MS1 and MS2 scans from the loaded sample
|
|
219
|
+
ms1_count = 0
|
|
220
|
+
ms2_count = 0
|
|
221
|
+
if hasattr(ddaobj, 'scans_df') and ddaobj.scans_df is not None and not ddaobj.scans_df.is_empty():
|
|
222
|
+
ms1_count = int(ddaobj.scans_df.filter(pl.col("ms_level") == 1).height)
|
|
223
|
+
ms2_count = int(ddaobj.scans_df.filter(pl.col("ms_level") == 2).height)
|
|
224
|
+
|
|
192
225
|
new_sample = pl.DataFrame(
|
|
193
226
|
{
|
|
194
227
|
"sample_uid": [int(len(self.samples_df) + 1)],
|
|
195
228
|
"sample_name": [sample_name],
|
|
196
|
-
"sample_path": [
|
|
229
|
+
"sample_path": [final_sample_path], # Use the determined path
|
|
197
230
|
"sample_type": [sample_type],
|
|
198
231
|
"size": [int(ddaobj.features.size())],
|
|
199
232
|
"map_id": [map_id_value],
|
|
200
233
|
"file_source": [getattr(ddaobj, 'file_source', file)],
|
|
234
|
+
"ms1": [ms1_count],
|
|
235
|
+
"ms2": [ms2_count],
|
|
201
236
|
},
|
|
202
237
|
schema={
|
|
203
238
|
"sample_uid": pl.Int64,
|
|
@@ -207,15 +242,10 @@ def add_sample(self, file, type=None, reset=False, adducts=None):
|
|
|
207
242
|
"size": pl.Int64,
|
|
208
243
|
"map_id": pl.Utf8,
|
|
209
244
|
"file_source": pl.Utf8,
|
|
245
|
+
"ms1": pl.Int64,
|
|
246
|
+
"ms2": pl.Int64,
|
|
210
247
|
},
|
|
211
248
|
)
|
|
212
|
-
# save ddaobj to folder if it is set
|
|
213
|
-
if self.folder is not None:
|
|
214
|
-
if not os.path.exists(self.folder):
|
|
215
|
-
os.makedirs(self.folder)
|
|
216
|
-
basename = os.path.basename(file)
|
|
217
|
-
sample_name = os.path.splitext(basename)[0]
|
|
218
|
-
ddaobj.save(os.path.join(self.folder, sample_name + ".sample5"))
|
|
219
249
|
self.samples_df = pl.concat([self.samples_df, new_sample])
|
|
220
250
|
|
|
221
251
|
# Optimized DataFrame operations - chain operations instead of multiple clones
|
masster/study/save.py
CHANGED
|
@@ -105,6 +105,8 @@ def save_samples(self, samples=None):
|
|
|
105
105
|
# save ddaobj
|
|
106
106
|
ddaobj.save()
|
|
107
107
|
sample_name = sample_row.row(0, named=True)["sample_name"]
|
|
108
|
+
sample_path = sample_row.row(0, named=True)["sample_path"]
|
|
109
|
+
|
|
108
110
|
# Find the index of this sample in the original order for features_maps
|
|
109
111
|
sample_index = next(
|
|
110
112
|
(
|
|
@@ -114,19 +116,29 @@ def save_samples(self, samples=None):
|
|
|
114
116
|
),
|
|
115
117
|
None,
|
|
116
118
|
)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
)
|
|
119
|
+
|
|
120
|
+
# Determine where to save the featureXML file based on sample_path location
|
|
121
|
+
if sample_path.endswith(".sample5"):
|
|
122
|
+
# If sample_path is a .sample5 file, save featureXML in the same directory
|
|
123
|
+
featurexml_filename = sample_path.replace(".sample5", ".featureXML")
|
|
124
|
+
self.logger.debug(f"Saving featureXML alongside .sample5 file: {featurexml_filename}")
|
|
122
125
|
else:
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
126
|
+
# Fallback to study folder or current directory (original behavior)
|
|
127
|
+
if self.folder is not None:
|
|
128
|
+
featurexml_filename = os.path.join(
|
|
129
|
+
self.folder,
|
|
130
|
+
sample_name + ".featureXML",
|
|
131
|
+
)
|
|
132
|
+
else:
|
|
133
|
+
featurexml_filename = os.path.join(
|
|
134
|
+
os.getcwd(),
|
|
135
|
+
sample_name + ".featureXML",
|
|
136
|
+
)
|
|
137
|
+
self.logger.debug(f"Saving featureXML to default location: {featurexml_filename}")
|
|
138
|
+
|
|
127
139
|
fh = oms.FeatureXMLFile()
|
|
128
140
|
if sample_index is not None and sample_index < len(self.features_maps):
|
|
129
|
-
fh.store(
|
|
141
|
+
fh.store(featurexml_filename, self.features_maps[sample_index])
|
|
130
142
|
|
|
131
143
|
self.logger.debug("All samples saved successfully.")
|
|
132
144
|
|
masster/study/study.py
CHANGED
|
@@ -243,6 +243,8 @@ class Study:
|
|
|
243
243
|
"size": [],
|
|
244
244
|
"map_id": [],
|
|
245
245
|
"file_source": [],
|
|
246
|
+
"ms1": [],
|
|
247
|
+
"ms2": [],
|
|
246
248
|
},
|
|
247
249
|
schema={
|
|
248
250
|
"sample_uid": pl.Int64,
|
|
@@ -252,6 +254,8 @@ class Study:
|
|
|
252
254
|
"size": pl.Int64,
|
|
253
255
|
"map_id": pl.Utf8,
|
|
254
256
|
"file_source": pl.Utf8,
|
|
257
|
+
"ms1": pl.Int64,
|
|
258
|
+
"ms2": pl.Int64,
|
|
255
259
|
},
|
|
256
260
|
)
|
|
257
261
|
self.features_maps = []
|
|
@@ -387,15 +391,17 @@ class Study:
|
|
|
387
391
|
f"{base_modname}._version",
|
|
388
392
|
f"{base_modname}.chromatogram",
|
|
389
393
|
f"{base_modname}.spectrum",
|
|
390
|
-
f"{base_modname}.
|
|
394
|
+
f"{base_modname}.logger",
|
|
391
395
|
]
|
|
392
396
|
|
|
393
|
-
# Add
|
|
397
|
+
# Add sample submodules
|
|
398
|
+
sample_modules = []
|
|
399
|
+
sample_module_prefix = f"{base_modname}.sample."
|
|
394
400
|
for module_name in sys.modules:
|
|
395
|
-
if module_name.startswith(
|
|
396
|
-
|
|
401
|
+
if module_name.startswith(sample_module_prefix) and module_name != current_module:
|
|
402
|
+
sample_modules.append(module_name)
|
|
397
403
|
|
|
398
|
-
all_modules_to_reload = core_modules + study_modules
|
|
404
|
+
all_modules_to_reload = core_modules + sample_modules + study_modules
|
|
399
405
|
|
|
400
406
|
# Reload all discovered modules
|
|
401
407
|
for full_module_name in all_modules_to_reload:
|
masster/study/study5_schema.json
CHANGED
|
@@ -16,10 +16,10 @@ masster/sample/helpers.py,sha256=OEgvR3bptA-tEqHAFVPjWpbagKXAU1h0bePPi9ttHa4,348
|
|
|
16
16
|
masster/sample/lib.py,sha256=9r2XlF_BaJ4WNAsQo8hElieRLwsAv0yrbYq4DJ0iVOM,33496
|
|
17
17
|
masster/sample/load.py,sha256=y-KUJ2nCFX_06FHPUOh-CzRRvaTx14xNcXoL19bU8qY,47562
|
|
18
18
|
masster/sample/parameters.py,sha256=Gg2KcuNbV_wZ_Wwv93QlM5J19ji0oSIvZLPV1NoBmq0,4456
|
|
19
|
-
masster/sample/plot.py,sha256=
|
|
19
|
+
masster/sample/plot.py,sha256=wd-4OosFT8MoO0fM8PSMskZK_yg8i8vfbiTieAzgrv4,62831
|
|
20
20
|
masster/sample/processing.py,sha256=NjNLt47Fy0UF3Xs35NBhADg57qTC6Lfa4Xz8Y30v83A,58250
|
|
21
21
|
masster/sample/quant.py,sha256=tHNjvUFTdehKR31BXBZnVsBxMD9XJHgaltITOjr71uE,7562
|
|
22
|
-
masster/sample/sample.py,sha256=
|
|
22
|
+
masster/sample/sample.py,sha256=UlyA7cZtV_IMO8PRaYaUqf8cfAGfavVVfNDo0g_6OJw,16185
|
|
23
23
|
masster/sample/sample5_schema.json,sha256=3SPFQZH4SooLYUt_lW-PCOE9rHnl56Vhc2XG-r1nyEQ,3586
|
|
24
24
|
masster/sample/save.py,sha256=o9eFSqqr7KYwvCD3gOJt_nZ4h3pkflWqs0n0oSLM-sU,31970
|
|
25
25
|
masster/sample/sciex.py,sha256=q6PdcjCtV2PWnJiXuvfISu09zjkaTR_fvHvWN9OvOcM,46870
|
|
@@ -31,16 +31,16 @@ masster/sample/defaults/get_spectrum_def.py,sha256=o62p31PhGd-LiIkTOzKQhwPtnO2At
|
|
|
31
31
|
masster/sample/defaults/sample_def.py,sha256=t8vrb8MoBBsFQcRzlaT0-q0hAssOxWO7vhCAJU3_THs,14068
|
|
32
32
|
masster/study/__init__.py,sha256=Zspv6U8jFqjkHGYdNdDy1rfUnCSolCzUdgSSg98PRgE,166
|
|
33
33
|
masster/study/export.py,sha256=bm3e6AEwkXqBO6Pwd-2pWhxOmzQTFlOSauXFnaiSJDI,29019
|
|
34
|
-
masster/study/h5.py,sha256=
|
|
35
|
-
masster/study/helpers.py,sha256=
|
|
34
|
+
masster/study/h5.py,sha256=EcpyYfMknDzzdA6XTyMU_ppY92_DsPSPYGE0kpVN7T8,66429
|
|
35
|
+
masster/study/helpers.py,sha256=SeW17rA3BIM2I2Whiye6wegRRSCabIpQoCsjOCafjKw,74888
|
|
36
36
|
masster/study/helpers_optimized.py,sha256=EgOgPaL3c2LA8jDhnlEHvzb7O9Um-vnMIcnNaoH90gA,13620
|
|
37
|
-
masster/study/load.py,sha256=
|
|
37
|
+
masster/study/load.py,sha256=TLxVhXu0HHb51lGggXitQLtfNxz2JJfKMkAXJbxhvhM,46880
|
|
38
38
|
masster/study/parameters.py,sha256=0elaF7YspTsB7qyajWAbRNL2VfKlGz5GJLifmO8IGkk,3276
|
|
39
39
|
masster/study/plot.py,sha256=hOG8bBT3mYV63FieEk-gYKtOyIXWppkTu21VeGbRnGk,21918
|
|
40
40
|
masster/study/processing.py,sha256=BQuSBO7O8iTlCjXenECyg0_PAsPF1NNiUllypuemPZI,46101
|
|
41
|
-
masster/study/save.py,sha256=
|
|
42
|
-
masster/study/study.py,sha256=
|
|
43
|
-
masster/study/study5_schema.json,sha256=
|
|
41
|
+
masster/study/save.py,sha256=bcRADWTvhTER9WRkT9zNU5mDUPQZkZB2cuJwpRsYmrM,6589
|
|
42
|
+
masster/study/study.py,sha256=5TZgG7tr7mzqHh1tm48V8SEcvRcWiFYG9iDqz0U9ACc,27073
|
|
43
|
+
masster/study/study5_schema.json,sha256=A_xDPzB97xt2EFeQsX9j8Ut7yC4_DS7BZ24ucotOXIw,5103
|
|
44
44
|
masster/study/defaults/__init__.py,sha256=m3Z5KXGqsTdh7GjYzZoENERt39yRg0ceVRV1DeCt1P0,610
|
|
45
45
|
masster/study/defaults/align_def.py,sha256=9aM7kY4_ecgG8QC6v57AASiRRkPxwG77r3-PlQ2BkHk,9139
|
|
46
46
|
masster/study/defaults/export_def.py,sha256=eXl3h4aoLX88XkHTpqahLd-QZ2gjUqrmjq8IJULXeWo,1203
|
|
@@ -52,8 +52,8 @@ masster/study/defaults/integrate_chrom_def.py,sha256=Rih3-vat7fHGVfIvRitjNJJI3zL
|
|
|
52
52
|
masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
|
|
53
53
|
masster/study/defaults/merge_def.py,sha256=EBsKE3hsAkTEzN9dpdRD5W3_suTKy_WZ_96rwS0uBuE,8572
|
|
54
54
|
masster/study/defaults/study_def.py,sha256=hj8bYtEPwzdowC95yfyoCFt6fZkQePLjpJtmpNz9Z5M,9533
|
|
55
|
-
masster-0.3.
|
|
56
|
-
masster-0.3.
|
|
57
|
-
masster-0.3.
|
|
58
|
-
masster-0.3.
|
|
59
|
-
masster-0.3.
|
|
55
|
+
masster-0.3.1.dist-info/METADATA,sha256=VLzNZSby0weoT9QUfjleppVOtuvt_GtZu6AfLRM9MSg,44356
|
|
56
|
+
masster-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
57
|
+
masster-0.3.1.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
58
|
+
masster-0.3.1.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
|
|
59
|
+
masster-0.3.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|