masster 0.3.11__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/sample/helpers.py +53 -4
- masster/sample/plot.py +100 -16
- masster/sample/sample.py +6 -0
- masster/sample/sample5_schema.json +43 -34
- masster/study/defaults/align_def.py +10 -10
- masster/study/helpers.py +466 -3
- masster/study/load.py +6 -0
- masster/study/plot.py +809 -130
- masster/study/processing.py +35 -10
- masster/study/study.py +60 -4
- masster/study/study5_schema.json +83 -83
- {masster-0.3.11.dist-info → masster-0.3.12.dist-info}/METADATA +1 -1
- {masster-0.3.11.dist-info → masster-0.3.12.dist-info}/RECORD +16 -16
- {masster-0.3.11.dist-info → masster-0.3.12.dist-info}/WHEEL +0 -0
- {masster-0.3.11.dist-info → masster-0.3.12.dist-info}/entry_points.txt +0 -0
- {masster-0.3.11.dist-info → masster-0.3.12.dist-info}/licenses/LICENSE +0 -0
masster/sample/helpers.py
CHANGED
|
@@ -92,9 +92,6 @@ def get_dda_stats(self):
|
|
|
92
92
|
return ms1
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
# TODO
|
|
96
|
-
|
|
97
|
-
|
|
98
95
|
def get_feature(self, feature_uid):
|
|
99
96
|
# get the feature with feature_uid == feature_uid
|
|
100
97
|
feature = self.features_df.filter(pl.col("feature_uid") == feature_uid)
|
|
@@ -284,7 +281,59 @@ def select_closest_scan(
|
|
|
284
281
|
return scan
|
|
285
282
|
|
|
286
283
|
|
|
287
|
-
|
|
284
|
+
def get_eic(self, mz, mz_tol=0.01):
|
|
285
|
+
"""
|
|
286
|
+
Extract an extracted ion chromatogram (EIC) from `ms1_df` for a target m/z ± mz_tol.
|
|
287
|
+
|
|
288
|
+
The function filters `self.ms1_df` for rows with `mz` within the tolerance, aggregates
|
|
289
|
+
intensities per retention time (summing intensities for the same `rt`), sorts by `rt`,
|
|
290
|
+
stores the resulting chromatogram in `self.chrom_df` and returns it.
|
|
291
|
+
|
|
292
|
+
Parameters:
|
|
293
|
+
mz (float): target m/z value
|
|
294
|
+
mz_tol (float): tolerance around mz (default 0.01)
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
polars.DataFrame or None: chromatogram with columns ['rt', 'inty'] or None if not available
|
|
298
|
+
"""
|
|
299
|
+
# Validate ms1_df
|
|
300
|
+
if not hasattr(self, "ms1_df") or self.ms1_df is None:
|
|
301
|
+
if hasattr(self, "logger"):
|
|
302
|
+
self.logger.warning("No ms1_df available to build EIC.")
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
try:
|
|
306
|
+
# Filter by mz window
|
|
307
|
+
mz_min = mz - mz_tol
|
|
308
|
+
mz_max = mz + mz_tol
|
|
309
|
+
matches = self.ms1_df.filter((pl.col("mz") >= mz_min) & (pl.col("mz") <= mz_max))
|
|
310
|
+
|
|
311
|
+
if len(matches) == 0:
|
|
312
|
+
if hasattr(self, "logger"):
|
|
313
|
+
self.logger.debug(f"No ms1 points found for mz={mz} ± {mz_tol}.")
|
|
314
|
+
# ensure chrom_df is None when nothing found
|
|
315
|
+
self.chrom_df = None
|
|
316
|
+
return None
|
|
317
|
+
|
|
318
|
+
# Aggregate intensities per retention time. Use sum in case multiple points per rt.
|
|
319
|
+
chrom = (
|
|
320
|
+
matches.group_by("rt")
|
|
321
|
+
.agg([pl.col("inty").sum().alias("inty")])
|
|
322
|
+
.sort("rt")
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Attach to Sample
|
|
326
|
+
self.chrom_df = chrom
|
|
327
|
+
|
|
328
|
+
if hasattr(self, "logger"):
|
|
329
|
+
self.logger.debug(f"Built EIC for mz={mz} ± {mz_tol}: {len(chrom)} points.")
|
|
330
|
+
|
|
331
|
+
return chrom
|
|
332
|
+
|
|
333
|
+
except Exception as e:
|
|
334
|
+
if hasattr(self, "logger"):
|
|
335
|
+
self.logger.error(f"Error building EIC for mz={mz}: {e}")
|
|
336
|
+
return None
|
|
288
337
|
|
|
289
338
|
|
|
290
339
|
def select(
|
masster/sample/plot.py
CHANGED
|
@@ -56,6 +56,7 @@ from bokeh.models import HoverTool
|
|
|
56
56
|
from holoviews import dim
|
|
57
57
|
from holoviews.plotting.util import process_cmap
|
|
58
58
|
from matplotlib.colors import rgb2hex
|
|
59
|
+
from masster.chromatogram import Chromatogram
|
|
59
60
|
|
|
60
61
|
# Parameters removed - using hardcoded defaults
|
|
61
62
|
|
|
@@ -1966,19 +1967,102 @@ def plot_tic(
|
|
|
1966
1967
|
title=None,
|
|
1967
1968
|
filename=None,
|
|
1968
1969
|
):
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
|
|
1973
|
-
|
|
1974
|
-
|
|
1975
|
-
|
|
1976
|
-
|
|
1977
|
-
|
|
1978
|
-
|
|
1979
|
-
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
|
|
1984
|
-
|
|
1970
|
+
"""
|
|
1971
|
+
Plot Total Ion Chromatogram (TIC) by summing MS1 peak intensities at each retention time.
|
|
1972
|
+
|
|
1973
|
+
Uses `self.ms1_df` (Polars DataFrame) and aggregates intensities by `rt` (sum).
|
|
1974
|
+
Creates a `Chromatogram` object and uses its `plot()` method to display the result.
|
|
1975
|
+
"""
|
|
1976
|
+
if self.ms1_df is None:
|
|
1977
|
+
self.logger.error("No MS1 data available.")
|
|
1978
|
+
return
|
|
1979
|
+
|
|
1980
|
+
# Import helper locally to avoid circular imports
|
|
1981
|
+
from masster.study.helpers import get_tic
|
|
1982
|
+
|
|
1983
|
+
# Delegate TIC computation to study helper which handles ms1_df and scans_df fallbacks
|
|
1984
|
+
try:
|
|
1985
|
+
chrom = get_tic(self, label=title)
|
|
1986
|
+
except Exception as e:
|
|
1987
|
+
self.logger.exception("Failed to compute TIC via helper: %s", e)
|
|
1988
|
+
return
|
|
1989
|
+
|
|
1990
|
+
if filename is not None:
|
|
1991
|
+
try:
|
|
1992
|
+
chrom.plot(width=1000, height=250)
|
|
1993
|
+
except Exception:
|
|
1994
|
+
import matplotlib.pyplot as plt
|
|
1995
|
+
|
|
1996
|
+
plt.figure(figsize=(10, 3))
|
|
1997
|
+
plt.plot(chrom.rt, chrom.inty, color="black")
|
|
1998
|
+
plt.xlabel("Retention time (s)")
|
|
1999
|
+
plt.ylabel("Intensity")
|
|
2000
|
+
if title:
|
|
2001
|
+
plt.title(title)
|
|
2002
|
+
plt.tight_layout()
|
|
2003
|
+
plt.savefig(filename)
|
|
2004
|
+
return None
|
|
2005
|
+
|
|
2006
|
+
chrom.plot(width=1000, height=250)
|
|
2007
|
+
return None
|
|
2008
|
+
|
|
2009
|
+
|
|
2010
|
+
def plot_bpc(
|
|
2011
|
+
self,
|
|
2012
|
+
title=None,
|
|
2013
|
+
filename=None,
|
|
2014
|
+
rt_unit="s",
|
|
2015
|
+
):
|
|
2016
|
+
"""
|
|
2017
|
+
Plot Base Peak Chromatogram (BPC) using MS1 data.
|
|
2018
|
+
|
|
2019
|
+
Aggregates MS1 points by retention time and selects the maximum intensity (base peak)
|
|
2020
|
+
at each time point. Uses `self.ms1_df` (Polars DataFrame) as the source of MS1 peaks.
|
|
2021
|
+
|
|
2022
|
+
Parameters:
|
|
2023
|
+
title (str, optional): Plot title.
|
|
2024
|
+
filename (str, optional): If provided and ends with `.html` saves an interactive html,
|
|
2025
|
+
otherwise saves a png. If None, returns a displayable object for notebooks.
|
|
2026
|
+
rt_unit (str, optional): Unit label for the x-axis, default 's' (seconds).
|
|
2027
|
+
|
|
2028
|
+
Returns:
|
|
2029
|
+
None or notebook display object (via _display_plot)
|
|
2030
|
+
"""
|
|
2031
|
+
if self.ms1_df is None:
|
|
2032
|
+
self.logger.error("No MS1 data available.")
|
|
2033
|
+
return
|
|
2034
|
+
|
|
2035
|
+
# Import helper locally to avoid circular imports
|
|
2036
|
+
from masster.study.helpers import get_bpc
|
|
2037
|
+
|
|
2038
|
+
# Delegate BPC computation to study helper
|
|
2039
|
+
try:
|
|
2040
|
+
chrom = get_bpc(self, rt_unit=rt_unit, label=title)
|
|
2041
|
+
except Exception as e:
|
|
2042
|
+
self.logger.exception("Failed to compute BPC via helper: %s", e)
|
|
2043
|
+
return
|
|
2044
|
+
|
|
2045
|
+
# If filename was requested, save a static png using bokeh export via the chromatogram plotting
|
|
2046
|
+
if filename is not None:
|
|
2047
|
+
# chromatogram.plot() uses bokeh to show the figure; to save as png we rely on holoviews/hv.save
|
|
2048
|
+
# Create a bokeh figure by plotting to an offscreen axis
|
|
2049
|
+
try:
|
|
2050
|
+
# Use Chromatogram.plot to generate and show the figure (will open in notebook/browser)
|
|
2051
|
+
chrom.plot(width=1000, height=250)
|
|
2052
|
+
except Exception:
|
|
2053
|
+
# Last-resort: create a simple matplotlib plot and save
|
|
2054
|
+
import matplotlib.pyplot as plt
|
|
2055
|
+
|
|
2056
|
+
plt.figure(figsize=(10, 3))
|
|
2057
|
+
plt.plot(chrom.rt, chrom.inty, color="black")
|
|
2058
|
+
plt.xlabel(f"Retention time ({rt_unit})")
|
|
2059
|
+
plt.ylabel("Intensity")
|
|
2060
|
+
if title:
|
|
2061
|
+
plt.title(title)
|
|
2062
|
+
plt.tight_layout()
|
|
2063
|
+
plt.savefig(filename)
|
|
2064
|
+
return None
|
|
2065
|
+
|
|
2066
|
+
# No filename: display using the chromatogram's built-in plotting (bokeh)
|
|
2067
|
+
chrom.plot(width=1000, height=250)
|
|
2068
|
+
return None
|
masster/sample/sample.py
CHANGED
|
@@ -62,6 +62,7 @@ from masster.sample.helpers import select_closest_scan
|
|
|
62
62
|
from masster.sample.helpers import get_dda_stats
|
|
63
63
|
from masster.sample.helpers import get_feature
|
|
64
64
|
from masster.sample.helpers import get_scan
|
|
65
|
+
from masster.sample.helpers import get_eic
|
|
65
66
|
from masster.sample.helpers import set_source
|
|
66
67
|
from masster.sample.load import _load_featureXML
|
|
67
68
|
from masster.sample.load import _load_ms2data
|
|
@@ -80,6 +81,8 @@ from masster.sample.plot import plot_feature_stats
|
|
|
80
81
|
from masster.sample.plot import plot_ms2_cycle
|
|
81
82
|
from masster.sample.plot import plot_ms2_eic
|
|
82
83
|
from masster.sample.plot import plot_ms2_q1
|
|
84
|
+
from masster.sample.plot import plot_bpc
|
|
85
|
+
from masster.sample.plot import plot_tic
|
|
83
86
|
from masster.sample.processing import _clean_features_df
|
|
84
87
|
from masster.sample.processing import _features_deisotope
|
|
85
88
|
from masster.sample.processing import _get_ztscan_stats
|
|
@@ -229,6 +232,9 @@ class Sample:
|
|
|
229
232
|
plot_ms2_cycle = plot_ms2_cycle
|
|
230
233
|
plot_ms2_eic = plot_ms2_eic
|
|
231
234
|
plot_ms2_q1 = plot_ms2_q1
|
|
235
|
+
plot_bpc = plot_bpc
|
|
236
|
+
plot_tic = plot_tic
|
|
237
|
+
get_eic = get_eic
|
|
232
238
|
get_feature = get_feature
|
|
233
239
|
get_scan = get_scan
|
|
234
240
|
get_dda_stats = get_dda_stats
|
|
@@ -1,80 +1,89 @@
|
|
|
1
1
|
{
|
|
2
2
|
"features_df": {
|
|
3
3
|
"columns": {
|
|
4
|
-
"
|
|
5
|
-
"dtype": "pl.Utf8"
|
|
6
|
-
},
|
|
7
|
-
"adduct_group": {
|
|
4
|
+
"feature_uid": {
|
|
8
5
|
"dtype": "pl.Int64"
|
|
9
6
|
},
|
|
10
|
-
"
|
|
11
|
-
"dtype": "pl.
|
|
7
|
+
"feature_id": {
|
|
8
|
+
"dtype": "pl.Utf8"
|
|
12
9
|
},
|
|
13
|
-
"
|
|
10
|
+
"sample_uid": {
|
|
14
11
|
"dtype": "pl.Int32"
|
|
15
12
|
},
|
|
16
|
-
"
|
|
17
|
-
"dtype": "pl.
|
|
13
|
+
"mz": {
|
|
14
|
+
"dtype": "pl.Float64"
|
|
18
15
|
},
|
|
19
|
-
"
|
|
16
|
+
"rt": {
|
|
20
17
|
"dtype": "pl.Float64"
|
|
21
18
|
},
|
|
22
|
-
"
|
|
19
|
+
"rt_original": {
|
|
23
20
|
"dtype": "pl.Float64"
|
|
24
21
|
},
|
|
25
|
-
"
|
|
22
|
+
"rt_start": {
|
|
26
23
|
"dtype": "pl.Float64"
|
|
27
24
|
},
|
|
28
|
-
"
|
|
25
|
+
"rt_end": {
|
|
29
26
|
"dtype": "pl.Float64"
|
|
30
27
|
},
|
|
31
|
-
"
|
|
32
|
-
"dtype": "pl.
|
|
28
|
+
"rt_delta": {
|
|
29
|
+
"dtype": "pl.Float64"
|
|
33
30
|
},
|
|
34
|
-
"
|
|
35
|
-
"dtype": "pl.
|
|
31
|
+
"mz_start": {
|
|
32
|
+
"dtype": "pl.Float64"
|
|
33
|
+
},
|
|
34
|
+
"mz_end": {
|
|
35
|
+
"dtype": "pl.Float64"
|
|
36
36
|
},
|
|
37
37
|
"inty": {
|
|
38
38
|
"dtype": "pl.Float64"
|
|
39
39
|
},
|
|
40
|
+
"quality": {
|
|
41
|
+
"dtype": "pl.Float64"
|
|
42
|
+
},
|
|
43
|
+
"charge": {
|
|
44
|
+
"dtype": "pl.Int32"
|
|
45
|
+
},
|
|
40
46
|
"iso": {
|
|
41
47
|
"dtype": "pl.Int64"
|
|
42
48
|
},
|
|
43
49
|
"iso_of": {
|
|
44
50
|
"dtype": "pl.Int64"
|
|
45
51
|
},
|
|
46
|
-
"
|
|
47
|
-
"dtype": "pl.
|
|
48
|
-
},
|
|
49
|
-
"ms2_specs": {
|
|
50
|
-
"dtype": "pl.Object"
|
|
52
|
+
"adduct": {
|
|
53
|
+
"dtype": "pl.Utf8"
|
|
51
54
|
},
|
|
52
|
-
"
|
|
55
|
+
"adduct_mass": {
|
|
53
56
|
"dtype": "pl.Float64"
|
|
54
57
|
},
|
|
55
|
-
"
|
|
56
|
-
"dtype": "pl.
|
|
58
|
+
"adduct_group": {
|
|
59
|
+
"dtype": "pl.Int64"
|
|
57
60
|
},
|
|
58
|
-
"
|
|
59
|
-
"dtype": "pl.
|
|
61
|
+
"chrom": {
|
|
62
|
+
"dtype": "pl.Object"
|
|
60
63
|
},
|
|
61
|
-
"
|
|
62
|
-
"dtype": "pl.
|
|
64
|
+
"filled": {
|
|
65
|
+
"dtype": "pl.Boolean"
|
|
63
66
|
},
|
|
64
|
-
"
|
|
67
|
+
"chrom_area": {
|
|
65
68
|
"dtype": "pl.Float64"
|
|
66
69
|
},
|
|
67
|
-
"
|
|
70
|
+
"chrom_coherence": {
|
|
68
71
|
"dtype": "pl.Float64"
|
|
69
72
|
},
|
|
70
|
-
"
|
|
73
|
+
"chrom_prominence": {
|
|
71
74
|
"dtype": "pl.Float64"
|
|
72
75
|
},
|
|
73
|
-
"
|
|
76
|
+
"chrom_prominence_scaled": {
|
|
74
77
|
"dtype": "pl.Float64"
|
|
75
78
|
},
|
|
76
|
-
"
|
|
79
|
+
"chrom_height_scaled": {
|
|
77
80
|
"dtype": "pl.Float64"
|
|
81
|
+
},
|
|
82
|
+
"ms2_scans": {
|
|
83
|
+
"dtype": "pl.Object"
|
|
84
|
+
},
|
|
85
|
+
"ms2_specs": {
|
|
86
|
+
"dtype": "pl.Object"
|
|
78
87
|
}
|
|
79
88
|
}
|
|
80
89
|
},
|
|
@@ -49,9 +49,9 @@ class align_defaults:
|
|
|
49
49
|
LOWESS_extrapolation_type (str): Method for extrapolation outside data range. Default is "four-point-linear".
|
|
50
50
|
"""
|
|
51
51
|
|
|
52
|
-
rt_max_diff: float =
|
|
53
|
-
mz_max_diff: float = 0.
|
|
54
|
-
rt_pair_distance_frac: float = 0.
|
|
52
|
+
rt_max_diff: float = 5.0
|
|
53
|
+
mz_max_diff: float = 0.01
|
|
54
|
+
rt_pair_distance_frac: float = 0.5
|
|
55
55
|
mz_pair_max_distance: float = 0.01
|
|
56
56
|
num_used_points: int = 1000
|
|
57
57
|
save_features: bool = False
|
|
@@ -88,16 +88,16 @@ class align_defaults:
|
|
|
88
88
|
"rt_max_diff": {
|
|
89
89
|
"dtype": float,
|
|
90
90
|
"description": "Maximum retention time difference for alignment (seconds)",
|
|
91
|
-
"default":
|
|
91
|
+
"default": 5.0,
|
|
92
92
|
"min_value": 1.0,
|
|
93
|
-
"max_value":
|
|
93
|
+
"max_value": 30.0,
|
|
94
94
|
},
|
|
95
95
|
"mz_max_diff": {
|
|
96
96
|
"dtype": float,
|
|
97
97
|
"description": "Maximum m/z difference for alignment (Da)",
|
|
98
98
|
"default": 0.01,
|
|
99
99
|
"min_value": 0.001,
|
|
100
|
-
"max_value":
|
|
100
|
+
"max_value": 0.05,
|
|
101
101
|
},
|
|
102
102
|
"rt_pair_distance_frac": {
|
|
103
103
|
"dtype": float,
|
|
@@ -111,24 +111,24 @@ class align_defaults:
|
|
|
111
111
|
"description": "Maximum m/z pair distance (Da)",
|
|
112
112
|
"default": 0.01,
|
|
113
113
|
"min_value": 0.001,
|
|
114
|
-
"max_value": 0.
|
|
114
|
+
"max_value": 0.2,
|
|
115
115
|
},
|
|
116
116
|
"num_used_points": {
|
|
117
117
|
"dtype": int,
|
|
118
118
|
"description": "Number of points used for alignment",
|
|
119
119
|
"default": 1000,
|
|
120
120
|
"min_value": 10,
|
|
121
|
-
"max_value":
|
|
121
|
+
"max_value": 10000,
|
|
122
122
|
},
|
|
123
123
|
"save_features": {
|
|
124
124
|
"dtype": bool,
|
|
125
125
|
"description": "Whether to save features after alignment",
|
|
126
|
-
"default":
|
|
126
|
+
"default": False,
|
|
127
127
|
},
|
|
128
128
|
"skip_blanks": {
|
|
129
129
|
"dtype": bool,
|
|
130
130
|
"description": "Whether to skip blank samples during alignment",
|
|
131
|
-
"default":
|
|
131
|
+
"default": False,
|
|
132
132
|
},
|
|
133
133
|
"algo": {
|
|
134
134
|
"dtype": str,
|