masster 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/sample/defaults/find_features_def.py +82 -96
- masster/sample/defaults/sample_def.py +15 -0
- masster/sample/lib.py +11 -11
- masster/sample/plot.py +93 -11
- masster/sample/processing.py +150 -153
- masster/study/processing.py +39 -48
- {masster-0.3.8.dist-info → masster-0.3.10.dist-info}/METADATA +1 -1
- {masster-0.3.8.dist-info → masster-0.3.10.dist-info}/RECORD +11 -11
- {masster-0.3.8.dist-info → masster-0.3.10.dist-info}/WHEEL +0 -0
- {masster-0.3.8.dist-info → masster-0.3.10.dist-info}/entry_points.txt +0 -0
- {masster-0.3.8.dist-info → masster-0.3.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -17,102 +17,100 @@ from typing import Any
|
|
|
17
17
|
|
|
18
18
|
@dataclass
|
|
19
19
|
class find_features_defaults:
|
|
20
|
+
"""Configuration defaults for the feature-finding pipeline.
|
|
21
|
+
|
|
22
|
+
This dataclass centralizes parameters used by the `find_features()` routine
|
|
23
|
+
(mass-trace detection, elution-peak detection and feature assembly). The
|
|
24
|
+
purpose of this docstring is to explain the role and impact of the main
|
|
25
|
+
parameters users commonly tune.
|
|
26
|
+
|
|
27
|
+
Main parameters (what they mean, units and guidance):
|
|
28
|
+
|
|
29
|
+
- chrom_fwhm (float, seconds):
|
|
30
|
+
Expected chromatographic peak full-width at half-maximum (FWHM) in
|
|
31
|
+
seconds. This value informs the peak detection algorithms about the
|
|
32
|
+
typical temporal width of chromatographic peaks. It is used for
|
|
33
|
+
smoothing, window sizes when searching for local maxima and when
|
|
34
|
+
calculating RT-based tolerances. Use a value that matches your LC
|
|
35
|
+
method: smaller values for sharp, fast chromatography and larger values
|
|
36
|
+
for broader peaks. Default: 1.0 s.
|
|
37
|
+
|
|
38
|
+
- noise (float, intensity units):
|
|
39
|
+
Intensity threshold used to filter out low-intensity signals before
|
|
40
|
+
mass-trace and peak detection. Points with intensity below this
|
|
41
|
+
threshold are treated as background and typically ignored. Raising
|
|
42
|
+
`noise` reduces false positives from background fluctuations but may
|
|
43
|
+
remove low-abundance true peaks; lowering it increases sensitivity at
|
|
44
|
+
the cost of more noise. Default: 200.0 (instrument-dependent).
|
|
45
|
+
|
|
46
|
+
- chrom_peak_snr (float, unitless):
|
|
47
|
+
Minimum signal-to-noise ratio required to accept a detected
|
|
48
|
+
chromatographic peak. SNR is typically computed as peak height
|
|
49
|
+
(or crest intensity) divided by an estimate of local noise. A higher
|
|
50
|
+
`chrom_peak_snr` makes detection stricter (fewer false positives),
|
|
51
|
+
while a lower value makes detection more permissive (more low-SNR
|
|
52
|
+
peaks accepted). Typical values range from ~3 (relaxed) to >10
|
|
53
|
+
(stringent). Default: 10.0.
|
|
54
|
+
|
|
55
|
+
Use these three parameters together to balance sensitivity and
|
|
56
|
+
specificity for your dataset: tune `chrom_fwhm` to match chromatographic
|
|
57
|
+
peak shapes, set `noise` to a conservative background level for your
|
|
58
|
+
instrument, then adjust `chrom_peak_snr` to control how aggressively
|
|
59
|
+
peaks are accepted or rejected.
|
|
60
|
+
|
|
61
|
+
The class also contains many other configuration options (mass tolerances,
|
|
62
|
+
isotope handling, post-processing and reporting flags). See individual
|
|
63
|
+
parameter metadata (`_param_metadata`) for allowed ranges and types.
|
|
20
64
|
"""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
Mass Trace Detection (MTD) Parameters:
|
|
28
|
-
tol_ppm: Mass error tolerance in parts-per-million for mass trace detection.
|
|
29
|
-
noise: Noise threshold intensity to filter out low-intensity signals.
|
|
30
|
-
min_trace_length_multiplier: Multiplier for minimum trace length (multiplied by chrom_fwhm_min).
|
|
31
|
-
trace_termination_outliers: Number of outliers allowed before terminating a trace.
|
|
32
|
-
|
|
33
|
-
Elution Peak Detection (EPD) Parameters:
|
|
34
|
-
chrom_fwhm: Full width at half maximum for chromatographic peak shape.
|
|
35
|
-
chrom_fwhm_min: Minimum FWHM for chromatographic peak detection.
|
|
36
|
-
chrom_peak_snr: Signal-to-noise ratio required for chromatographic peaks.
|
|
37
|
-
masstrace_snr_filtering: Whether to apply SNR filtering to mass traces.
|
|
38
|
-
mz_scoring_13C: Whether to enable scoring of 13C isotopic patterns.
|
|
39
|
-
width_filtering: Width filtering method for mass traces.
|
|
40
|
-
|
|
41
|
-
Feature Finding (FFM) Parameters:
|
|
42
|
-
remove_single_traces: Whether to remove mass traces without satellite isotopic traces.
|
|
43
|
-
report_convex_hulls: Whether to report convex hulls for features.
|
|
44
|
-
report_summed_ints: Whether to report summed intensities.
|
|
45
|
-
report_chromatograms: Whether to report chromatograms.
|
|
46
|
-
|
|
47
|
-
Post-processing Parameters:
|
|
48
|
-
deisotope: Whether to perform deisotoping of detected features.
|
|
49
|
-
deisotope_mz_tol: m/z tolerance for deisotoping.
|
|
50
|
-
deisotope_rt_tol_factor: RT tolerance factor for deisotoping (multiplied by chrom_fwhm_min/4).
|
|
51
|
-
eic_mz_tol: m/z tolerance for EIC extraction.
|
|
52
|
-
eic_rt_tol: RT tolerance for EIC extraction.
|
|
53
|
-
|
|
54
|
-
Available Methods:
|
|
55
|
-
- validate(param_name, value): Validate a single parameter value
|
|
56
|
-
- validate_all(): Validate all parameters at once
|
|
57
|
-
- to_dict(): Convert parameters to dictionary
|
|
58
|
-
- set_from_dict(param_dict, validate=True): Update multiple parameters from dict
|
|
59
|
-
- set(param_name, value, validate=True): Set parameter value with validation
|
|
60
|
-
- get(param_name): Get parameter value
|
|
61
|
-
- get_description(param_name): Get parameter description
|
|
62
|
-
- get_info(param_name): Get full parameter metadata
|
|
63
|
-
- list_parameters(): Get list of all parameter names
|
|
64
|
-
"""
|
|
65
|
-
|
|
65
|
+
|
|
66
|
+
# Main params
|
|
67
|
+
noise: float = 200.0
|
|
68
|
+
chrom_fwhm: float = 1.0
|
|
69
|
+
chrom_peak_snr: float = 10.0
|
|
70
|
+
|
|
66
71
|
# Mass Trace Detection parameters
|
|
67
72
|
tol_ppm: float = 30.0
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
73
|
+
reestimate_mt_sd: bool = True
|
|
74
|
+
quant_method: str = "area"
|
|
75
|
+
trace_termination_criterion: str = "outlier"
|
|
76
|
+
trace_termination_outliers: int = 5
|
|
77
|
+
min_sample_rate: float = 0.5
|
|
78
|
+
|
|
79
|
+
min_trace_length: float = 0.5
|
|
80
|
+
min_trace_length_multiplier: float = 0.2
|
|
81
|
+
max_trace_length: float = -1.0
|
|
71
82
|
|
|
72
83
|
# Elution Peak Detection parameters
|
|
73
|
-
|
|
74
|
-
chrom_fwhm_min: float = 0.
|
|
75
|
-
|
|
76
|
-
masstrace_snr_filtering: bool = False
|
|
77
|
-
mz_scoring_13C: bool = False
|
|
84
|
+
enabled: bool = True
|
|
85
|
+
chrom_fwhm_min: float = 0.2
|
|
86
|
+
chrom_fwhm_max: float = 60.0
|
|
78
87
|
width_filtering: str = "fixed"
|
|
88
|
+
masstrace_snr_filtering: bool = False
|
|
79
89
|
|
|
80
90
|
# Feature Finding parameters
|
|
91
|
+
local_rt_range: float = 1.0
|
|
92
|
+
local_mz_range: float = 5.0
|
|
93
|
+
charge_lower_bound: int = 0
|
|
94
|
+
charge_upper_bound: int = 5
|
|
95
|
+
|
|
96
|
+
report_smoothed_intensities: bool = False
|
|
81
97
|
remove_single_traces: bool = False
|
|
82
98
|
report_convex_hulls: bool = True
|
|
83
99
|
report_summed_ints: bool = False
|
|
84
100
|
report_chromatograms: bool = True
|
|
101
|
+
mz_scoring_13C: bool = False
|
|
102
|
+
|
|
103
|
+
threads: int = 1
|
|
104
|
+
no_progress: bool = False
|
|
105
|
+
debug: bool = False
|
|
85
106
|
|
|
86
107
|
# Post-processing parameters
|
|
87
108
|
deisotope: bool = True
|
|
88
109
|
deisotope_mz_tol: float = 0.02
|
|
89
|
-
deisotope_rt_tol_factor: float = 0.
|
|
90
|
-
eic_mz_tol: float = 0.01
|
|
91
|
-
eic_rt_tol: float = 10.0
|
|
110
|
+
deisotope_rt_tol_factor: float = 0.5 # Will be multiplied by chrom_fwhm
|
|
92
111
|
|
|
93
|
-
#
|
|
94
|
-
|
|
95
|
-
no_progress: bool = False
|
|
96
|
-
debug: bool = False
|
|
97
|
-
min_sample_rate: float = 0.5
|
|
98
|
-
min_trace_length: int = 5
|
|
99
|
-
min_fwhm: float = 1.0
|
|
100
|
-
max_fwhm: float = 60.0
|
|
101
|
-
|
|
102
|
-
# Additional Mass Trace Detection parameters
|
|
103
|
-
trace_termination_criterion: str = "outlier"
|
|
104
|
-
reestimate_mt_sd: bool = True
|
|
105
|
-
quant_method: str = "area"
|
|
106
|
-
|
|
107
|
-
# Additional Elution Peak Detection parameters
|
|
108
|
-
enabled: bool = True
|
|
109
|
-
|
|
110
|
-
# Additional Feature Finding parameters
|
|
111
|
-
local_rt_range: float = 10.0
|
|
112
|
-
local_mz_range: float = 6.5
|
|
113
|
-
charge_lower_bound: int = 1
|
|
114
|
-
charge_upper_bound: int = 3
|
|
115
|
-
report_smoothed_intensities: bool = False
|
|
112
|
+
# chrom extraction parameters
|
|
113
|
+
|
|
116
114
|
|
|
117
115
|
# Parameter metadata for validation and description
|
|
118
116
|
_param_metadata: dict[str, dict[str, Any]] = field(
|
|
@@ -132,8 +130,8 @@ class find_features_defaults:
|
|
|
132
130
|
"min_trace_length_multiplier": {
|
|
133
131
|
"dtype": float,
|
|
134
132
|
"description": "Multiplier for minimum trace length calculation (multiplied by chrom_fwhm_min)",
|
|
135
|
-
"min_value": 1
|
|
136
|
-
"max_value":
|
|
133
|
+
"min_value": 0.1,
|
|
134
|
+
"max_value": 2.0,
|
|
137
135
|
},
|
|
138
136
|
"trace_termination_outliers": {
|
|
139
137
|
"dtype": int,
|
|
@@ -204,18 +202,6 @@ class find_features_defaults:
|
|
|
204
202
|
"min_value": 0.1,
|
|
205
203
|
"max_value": 2.0,
|
|
206
204
|
},
|
|
207
|
-
"eic_mz_tol": {
|
|
208
|
-
"dtype": float,
|
|
209
|
-
"description": "m/z tolerance for EIC extraction (Da)",
|
|
210
|
-
"min_value": 0.001,
|
|
211
|
-
"max_value": 0.1,
|
|
212
|
-
},
|
|
213
|
-
"eic_rt_tol": {
|
|
214
|
-
"dtype": float,
|
|
215
|
-
"description": "RT tolerance for EIC extraction (seconds)",
|
|
216
|
-
"min_value": 1.0,
|
|
217
|
-
"max_value": 60.0,
|
|
218
|
-
},
|
|
219
205
|
"threads": {
|
|
220
206
|
"dtype": int,
|
|
221
207
|
"description": "Number of threads to use for parallel processing",
|
|
@@ -242,13 +228,13 @@ class find_features_defaults:
|
|
|
242
228
|
"min_value": 2,
|
|
243
229
|
"max_value": 100,
|
|
244
230
|
},
|
|
245
|
-
"min_fwhm": {
|
|
231
|
+
''' "min_fwhm": {
|
|
246
232
|
"dtype": float,
|
|
247
233
|
"description": "Minimum full width at half maximum for peaks (seconds)",
|
|
248
234
|
"min_value": 0.1,
|
|
249
235
|
"max_value": 10.0,
|
|
250
|
-
},
|
|
251
|
-
"
|
|
236
|
+
},'''
|
|
237
|
+
"chrom_fwhm_max": {
|
|
252
238
|
"dtype": float,
|
|
253
239
|
"description": "Maximum full width at half maximum for peaks (seconds)",
|
|
254
240
|
"min_value": 1.0,
|
|
@@ -53,6 +53,9 @@ class sample_defaults:
|
|
|
53
53
|
centroid_prominence: int = -1
|
|
54
54
|
max_points_per_spectrum: int = 50000
|
|
55
55
|
dia_window: float | None = None
|
|
56
|
+
|
|
57
|
+
eic_mz_tol: float = 0.01
|
|
58
|
+
eic_rt_tol: float = 10.0
|
|
56
59
|
|
|
57
60
|
_param_metadata: dict[str, dict[str, Any]] = field(
|
|
58
61
|
default_factory=lambda: {
|
|
@@ -163,6 +166,18 @@ class sample_defaults:
|
|
|
163
166
|
"default": None,
|
|
164
167
|
"min_value": 0.0,
|
|
165
168
|
},
|
|
169
|
+
"eic_mz_tol": {
|
|
170
|
+
"dtype": float,
|
|
171
|
+
"description": "m/z tolerance for EIC extraction (Da)",
|
|
172
|
+
"min_value": 0.001,
|
|
173
|
+
"max_value": 1.0,
|
|
174
|
+
},
|
|
175
|
+
"eic_rt_tol": {
|
|
176
|
+
"dtype": float,
|
|
177
|
+
"description": "RT tolerance for EIC extraction (seconds)",
|
|
178
|
+
"min_value": 0.2,
|
|
179
|
+
"max_value": 60.0,
|
|
180
|
+
},
|
|
166
181
|
},
|
|
167
182
|
repr=False,
|
|
168
183
|
)
|
masster/sample/lib.py
CHANGED
|
@@ -421,14 +421,14 @@ def save_lib_mgf(
|
|
|
421
421
|
# trim spectrum 2 Da lower and 10 Da higher than precursor m/z
|
|
422
422
|
spec = spec.mz_trim(mz_min=row["mz"] - 2.0, mz_max=row["mz"] + 10.0)
|
|
423
423
|
|
|
424
|
-
|
|
424
|
+
file_basename: str = os.path.basename(self.file_path)
|
|
425
425
|
mslevel = 1 if spec.ms_level is None else spec.ms_level
|
|
426
426
|
activation = None
|
|
427
427
|
energy = None
|
|
428
428
|
kineticenergy = None
|
|
429
429
|
if mslevel > 1:
|
|
430
|
-
if "CID" in
|
|
431
|
-
if "EAD" in
|
|
430
|
+
if "CID" in file_basename.upper() or "ZTS" in file_basename.upper():
|
|
431
|
+
if "EAD" in file_basename.upper():
|
|
432
432
|
activation = "CID-EAD"
|
|
433
433
|
# search ([0-9]*KE) in filename.upper() using regex
|
|
434
434
|
match = re.search(r"(\d+)KE", str(filename.upper()))
|
|
@@ -440,14 +440,14 @@ def save_lib_mgf(
|
|
|
440
440
|
kineticenergy = int(match.group(1))
|
|
441
441
|
else:
|
|
442
442
|
activation = "CID"
|
|
443
|
-
elif "EAD" in
|
|
443
|
+
elif "EAD" in file_basename.upper():
|
|
444
444
|
activation = "EAD"
|
|
445
445
|
# search ([0-9]*KE) in filename.upper() using regex
|
|
446
|
-
match = re.search(r"(\d+)KE",
|
|
446
|
+
match = re.search(r"(\d+)KE", file_basename.upper())
|
|
447
447
|
if match:
|
|
448
448
|
kineticenergy = int(match.group(1))
|
|
449
449
|
else:
|
|
450
|
-
match = re.search(r"(\d+)EV",
|
|
450
|
+
match = re.search(r"(\d+)EV", file_basename.upper())
|
|
451
451
|
if match:
|
|
452
452
|
kineticenergy = int(match.group(1))
|
|
453
453
|
energy = spec.energy if hasattr(spec, "energy") else None
|
|
@@ -515,14 +515,14 @@ def save_lib_mgf(
|
|
|
515
515
|
kineticenergy = int(match.group(1))
|
|
516
516
|
else:
|
|
517
517
|
activation = "CID"
|
|
518
|
-
elif "EAD" in
|
|
518
|
+
elif "EAD" in file_basename.upper():
|
|
519
519
|
activation = "EAD"
|
|
520
|
-
# search ([0-9]*KE) in
|
|
521
|
-
match = re.search(r"(\d+)KE",
|
|
520
|
+
# search ([0-9]*KE) in file_basename.upper() using regex
|
|
521
|
+
match = re.search(r"(\d+)KE", file_basename.upper())
|
|
522
522
|
if match:
|
|
523
523
|
kineticenergy = int(match.group(1))
|
|
524
524
|
else:
|
|
525
|
-
match = re.search(r"(\d+)EV",
|
|
525
|
+
match = re.search(r"(\d+)EV", file_basename.upper())
|
|
526
526
|
if match:
|
|
527
527
|
kineticenergy = int(match.group(1))
|
|
528
528
|
energy = spec.energy if hasattr(spec, "energy") else None
|
|
@@ -541,7 +541,7 @@ def save_lib_mgf(
|
|
|
541
541
|
"ACTIVATION": activation,
|
|
542
542
|
"COLLISIONENERGY": energy,
|
|
543
543
|
"KINETICENERGY": kineticenergy,
|
|
544
|
-
"FILENAME":
|
|
544
|
+
"FILENAME": file_basename,
|
|
545
545
|
"SCANS": ms1_scan_uid,
|
|
546
546
|
"FID": row["feature_uid"],
|
|
547
547
|
"MSLEVEL": 1 if spec.ms_level is None else spec.ms_level,
|
masster/sample/plot.py
CHANGED
|
@@ -519,6 +519,14 @@ def plot_2d(
|
|
|
519
519
|
# find features with ms2_scans not None and iso==0
|
|
520
520
|
features_df = feats[feats["ms2_scans"].notnull()]
|
|
521
521
|
# Create feature points with proper sizing method
|
|
522
|
+
feature_hover_1 = HoverTool(tooltips=[
|
|
523
|
+
("rt", "@rt"),
|
|
524
|
+
("m/z", "@mz{0.0000}"),
|
|
525
|
+
("feature_uid", "@feature_uid"),
|
|
526
|
+
("inty", "@inty"),
|
|
527
|
+
("quality", "@quality"),
|
|
528
|
+
("rt_delta", "@rt_delta"),
|
|
529
|
+
])
|
|
522
530
|
feature_points_1 = hv.Points(
|
|
523
531
|
features_df,
|
|
524
532
|
kdims=["rt", "mz"],
|
|
@@ -536,11 +544,19 @@ def plot_2d(
|
|
|
536
544
|
color=color_1,
|
|
537
545
|
marker=marker_type,
|
|
538
546
|
size=size_1,
|
|
539
|
-
tools=[
|
|
547
|
+
tools=[feature_hover_1],
|
|
540
548
|
hooks=hooks,
|
|
541
549
|
)
|
|
542
550
|
# find features without MS2 data
|
|
543
551
|
features_df = feats[feats["ms2_scans"].isnull()]
|
|
552
|
+
feature_hover_2 = HoverTool(tooltips=[
|
|
553
|
+
("rt", "@rt"),
|
|
554
|
+
("m/z", "@mz{0.0000}"),
|
|
555
|
+
("feature_uid", "@feature_uid"),
|
|
556
|
+
("inty", "@inty"),
|
|
557
|
+
("quality", "@quality"),
|
|
558
|
+
("rt_delta", "@rt_delta"),
|
|
559
|
+
])
|
|
544
560
|
feature_points_2 = hv.Points(
|
|
545
561
|
features_df,
|
|
546
562
|
kdims=["rt", "mz"],
|
|
@@ -557,7 +573,7 @@ def plot_2d(
|
|
|
557
573
|
color="red",
|
|
558
574
|
marker=marker_type,
|
|
559
575
|
size=size_2,
|
|
560
|
-
tools=[
|
|
576
|
+
tools=[feature_hover_2],
|
|
561
577
|
hooks=hooks,
|
|
562
578
|
)
|
|
563
579
|
|
|
@@ -567,6 +583,16 @@ def plot_2d(
|
|
|
567
583
|
# Convert to pandas for plotting compatibility
|
|
568
584
|
if hasattr(features_df, "to_pandas"):
|
|
569
585
|
features_df = features_df.to_pandas()
|
|
586
|
+
feature_hover_iso = HoverTool(tooltips=[
|
|
587
|
+
("rt", "@rt"),
|
|
588
|
+
("m/z", "@mz{0.0000}"),
|
|
589
|
+
("feature_uid", "@feature_uid"),
|
|
590
|
+
("inty", "@inty"),
|
|
591
|
+
("quality", "@quality"),
|
|
592
|
+
("rt_delta", "@rt_delta"),
|
|
593
|
+
("iso", "@iso"),
|
|
594
|
+
("iso_of", "@iso_of"),
|
|
595
|
+
])
|
|
570
596
|
feature_points_iso = hv.Points(
|
|
571
597
|
features_df,
|
|
572
598
|
kdims=["rt", "mz"],
|
|
@@ -585,7 +611,7 @@ def plot_2d(
|
|
|
585
611
|
color="violet",
|
|
586
612
|
marker=marker_type,
|
|
587
613
|
size=size_1,
|
|
588
|
-
tools=[
|
|
614
|
+
tools=[feature_hover_iso],
|
|
589
615
|
hooks=hooks,
|
|
590
616
|
)
|
|
591
617
|
if show_ms2:
|
|
@@ -597,6 +623,13 @@ def plot_2d(
|
|
|
597
623
|
if len(ms2_orphan) > 0:
|
|
598
624
|
# pandalize
|
|
599
625
|
ms2 = ms2_orphan.to_pandas()
|
|
626
|
+
ms2_hover_3 = HoverTool(tooltips=[
|
|
627
|
+
("rt", "@rt"),
|
|
628
|
+
("prec_mz", "@prec_mz{0.0000}"),
|
|
629
|
+
("index", "@index"),
|
|
630
|
+
("inty_tot", "@inty_tot"),
|
|
631
|
+
("bl", "@bl"),
|
|
632
|
+
])
|
|
600
633
|
feature_points_3 = hv.Points(
|
|
601
634
|
ms2,
|
|
602
635
|
kdims=["rt", "prec_mz"],
|
|
@@ -606,7 +639,7 @@ def plot_2d(
|
|
|
606
639
|
color=color_2,
|
|
607
640
|
marker="x",
|
|
608
641
|
size=size_2,
|
|
609
|
-
tools=[
|
|
642
|
+
tools=[ms2_hover_3],
|
|
610
643
|
)
|
|
611
644
|
|
|
612
645
|
ms2_linked = self.scans_df.filter(pl.col("ms_level") == 2).filter(
|
|
@@ -615,6 +648,13 @@ def plot_2d(
|
|
|
615
648
|
if len(ms2_linked) > 0:
|
|
616
649
|
# pandalize
|
|
617
650
|
ms2 = ms2_linked.to_pandas()
|
|
651
|
+
ms2_hover_4 = HoverTool(tooltips=[
|
|
652
|
+
("rt", "@rt"),
|
|
653
|
+
("prec_mz", "@prec_mz{0.0000}"),
|
|
654
|
+
("index", "@index"),
|
|
655
|
+
("inty_tot", "@inty_tot"),
|
|
656
|
+
("bl", "@bl"),
|
|
657
|
+
])
|
|
618
658
|
feature_points_4 = hv.Points(
|
|
619
659
|
ms2,
|
|
620
660
|
kdims=["rt", "prec_mz"],
|
|
@@ -624,7 +664,7 @@ def plot_2d(
|
|
|
624
664
|
color=color_1,
|
|
625
665
|
marker="x",
|
|
626
666
|
size=size_2,
|
|
627
|
-
tools=[
|
|
667
|
+
tools=[ms2_hover_4],
|
|
628
668
|
)
|
|
629
669
|
|
|
630
670
|
overlay = raster
|
|
@@ -1041,6 +1081,18 @@ def plot_2d_oracle(
|
|
|
1041
1081
|
feat_df = feats.copy()
|
|
1042
1082
|
feat_df = feat_df[feat_df["id_level"] == 2]
|
|
1043
1083
|
|
|
1084
|
+
oracle_hover_1 = HoverTool(tooltips=[
|
|
1085
|
+
("rt", "@rt"),
|
|
1086
|
+
("m/z", "@mz{0.0000}"),
|
|
1087
|
+
("feature_uid", "@feature_uid"),
|
|
1088
|
+
("id_level", "@id_level"),
|
|
1089
|
+
("id_class", "@id_class"),
|
|
1090
|
+
("id_label", "@id_label"),
|
|
1091
|
+
("id_ion", "@id_ion"),
|
|
1092
|
+
("id_evidence", "@id_evidence"),
|
|
1093
|
+
("score", "@score"),
|
|
1094
|
+
("score2", "@score2"),
|
|
1095
|
+
])
|
|
1044
1096
|
feature_points_1 = hv.Points(
|
|
1045
1097
|
feat_df,
|
|
1046
1098
|
kdims=["rt", "mz"],
|
|
@@ -1062,7 +1114,7 @@ def plot_2d_oracle(
|
|
|
1062
1114
|
marker="circle",
|
|
1063
1115
|
size=markersize,
|
|
1064
1116
|
fill_alpha=1.0,
|
|
1065
|
-
tools=[
|
|
1117
|
+
tools=[oracle_hover_1],
|
|
1066
1118
|
)
|
|
1067
1119
|
|
|
1068
1120
|
# feature_points_2 are all features that have ms2_scans not null and id_level ==1
|
|
@@ -1070,6 +1122,15 @@ def plot_2d_oracle(
|
|
|
1070
1122
|
feat_df = feats.copy()
|
|
1071
1123
|
feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] == 1)]
|
|
1072
1124
|
if len(feat_df) > 0:
|
|
1125
|
+
oracle_hover_2 = HoverTool(tooltips=[
|
|
1126
|
+
("rt", "@rt"),
|
|
1127
|
+
("m/z", "@mz{0.0000}"),
|
|
1128
|
+
("feature_uid", "@feature_uid"),
|
|
1129
|
+
("id_level", "@id_level"),
|
|
1130
|
+
("id_label", "@id_label"),
|
|
1131
|
+
("id_ion", "@id_ion"),
|
|
1132
|
+
("id_class", "@id_class"),
|
|
1133
|
+
])
|
|
1073
1134
|
feature_points_2 = hv.Points(
|
|
1074
1135
|
feat_df,
|
|
1075
1136
|
kdims=["rt", "mz"],
|
|
@@ -1088,7 +1149,7 @@ def plot_2d_oracle(
|
|
|
1088
1149
|
marker="circle",
|
|
1089
1150
|
size=markersize,
|
|
1090
1151
|
fill_alpha=0.0,
|
|
1091
|
-
tools=[
|
|
1152
|
+
tools=[oracle_hover_2],
|
|
1092
1153
|
)
|
|
1093
1154
|
|
|
1094
1155
|
# feature_points_3 are all features that have ms2_scans null and id_level ==1
|
|
@@ -1096,6 +1157,15 @@ def plot_2d_oracle(
|
|
|
1096
1157
|
feat_df = feats.copy()
|
|
1097
1158
|
feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] == 1)]
|
|
1098
1159
|
if len(feat_df) > 0:
|
|
1160
|
+
oracle_hover_3 = HoverTool(tooltips=[
|
|
1161
|
+
("rt", "@rt"),
|
|
1162
|
+
("m/z", "@mz{0.0000}"),
|
|
1163
|
+
("feature_uid", "@feature_uid"),
|
|
1164
|
+
("id_level", "@id_level"),
|
|
1165
|
+
("id_label", "@id_label"),
|
|
1166
|
+
("id_ion", "@id_ion"),
|
|
1167
|
+
("id_class", "@id_class"),
|
|
1168
|
+
])
|
|
1099
1169
|
feature_points_3 = hv.Points(
|
|
1100
1170
|
feat_df,
|
|
1101
1171
|
kdims=["rt", "mz"],
|
|
@@ -1114,7 +1184,7 @@ def plot_2d_oracle(
|
|
|
1114
1184
|
marker="diamond",
|
|
1115
1185
|
size=markersize,
|
|
1116
1186
|
fill_alpha=0.0,
|
|
1117
|
-
tools=[
|
|
1187
|
+
tools=[oracle_hover_3],
|
|
1118
1188
|
)
|
|
1119
1189
|
|
|
1120
1190
|
# feature_points_4 are all features that have ms2_scans null and id_level ==0
|
|
@@ -1122,6 +1192,12 @@ def plot_2d_oracle(
|
|
|
1122
1192
|
feat_df = feats.copy()
|
|
1123
1193
|
feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] < 1)]
|
|
1124
1194
|
if len(feat_df) > 0:
|
|
1195
|
+
oracle_hover_4 = HoverTool(tooltips=[
|
|
1196
|
+
("rt", "@rt"),
|
|
1197
|
+
("m/z", "@mz{0.0000}"),
|
|
1198
|
+
("feature_uid", "@feature_uid"),
|
|
1199
|
+
("inty", "@inty"),
|
|
1200
|
+
])
|
|
1125
1201
|
feature_points_4 = hv.Points(
|
|
1126
1202
|
feat_df,
|
|
1127
1203
|
kdims=["rt", "mz"],
|
|
@@ -1132,14 +1208,20 @@ def plot_2d_oracle(
|
|
|
1132
1208
|
marker="circle",
|
|
1133
1209
|
size=markersize,
|
|
1134
1210
|
fill_alpha=0.0,
|
|
1135
|
-
tools=[
|
|
1211
|
+
tools=[oracle_hover_4],
|
|
1136
1212
|
)
|
|
1137
1213
|
|
|
1138
|
-
#
|
|
1214
|
+
# feature_points_5 are all features that have ms2_scans null and id_level ==0
|
|
1139
1215
|
feature_points_5 = None
|
|
1140
1216
|
feat_df = feats.copy()
|
|
1141
1217
|
feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] < 1)]
|
|
1142
1218
|
if len(feat_df) > 0:
|
|
1219
|
+
oracle_hover_5 = HoverTool(tooltips=[
|
|
1220
|
+
("rt", "@rt"),
|
|
1221
|
+
("m/z", "@mz{0.0000}"),
|
|
1222
|
+
("feature_uid", "@feature_uid"),
|
|
1223
|
+
("inty", "@inty"),
|
|
1224
|
+
])
|
|
1143
1225
|
feature_points_5 = hv.Points(
|
|
1144
1226
|
feat_df,
|
|
1145
1227
|
kdims=["rt", "mz"],
|
|
@@ -1150,7 +1232,7 @@ def plot_2d_oracle(
|
|
|
1150
1232
|
marker="diamond",
|
|
1151
1233
|
fill_alpha=0.0,
|
|
1152
1234
|
size=markersize,
|
|
1153
|
-
tools=[
|
|
1235
|
+
tools=[oracle_hover_5],
|
|
1154
1236
|
)
|
|
1155
1237
|
|
|
1156
1238
|
overlay = raster
|
masster/sample/processing.py
CHANGED
|
@@ -19,55 +19,34 @@ from .defaults.get_spectrum_def import get_spectrum_defaults
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def get_spectrum(self, scan, **kwargs):
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
centroid (bool, optional): Flag indicating whether the spectrum should be centroided.
|
|
43
|
-
If True and the spectrum is not already centroided, the method
|
|
44
|
-
applies denoising followed by centroiding using parameters from self.parameters.
|
|
45
|
-
Default is True.
|
|
46
|
-
deisotope (bool, optional): Flag indicating whether deisotoping should be performed. Default is False.
|
|
47
|
-
dia_stats (optional): Flag or parameter for processing DIA (data-independent acquisition)
|
|
48
|
-
statistics. If provided (and if applicable to the file type), additional
|
|
49
|
-
statistics will be computed for 'ztscan' files. Default is None.
|
|
50
|
-
feature (optional): An optional identifier used when computing DIA statistics. Default is None.
|
|
51
|
-
label (str, optional): Optional label to assign to the spectrum. If not provided,
|
|
52
|
-
a default name is generated based on the MS level and retention time.
|
|
53
|
-
Default is None.
|
|
54
|
-
centroid_algo (str, optional): Algorithm to use for centroiding. Default is None.
|
|
22
|
+
"""Retrieve a single spectrum and optionally post-process it.
|
|
23
|
+
|
|
24
|
+
The function locates the requested scan in ``self.scans_df`` and returns a
|
|
25
|
+
:class:`Spectrum` object. Processing steps (centroiding, deisotoping,
|
|
26
|
+
trimming and optional DIA statistics) are controlled by parameters defined
|
|
27
|
+
in :class:`get_spectrum_defaults`. Pass an instance of that class via
|
|
28
|
+
``**kwargs`` or override individual parameters (they will be validated
|
|
29
|
+
against the defaults class).
|
|
30
|
+
|
|
31
|
+
Main parameters (from ``get_spectrum_defaults``):
|
|
32
|
+
|
|
33
|
+
- scan (list[int]): Scan id(s) to retrieve. A single integer or a list is accepted.
|
|
34
|
+
- precursor_trim (int): m/z window used to trim precursor region for MS2 (default: -10).
|
|
35
|
+
- max_peaks (int | None): Maximum number of peaks to keep; ``None`` keeps all.
|
|
36
|
+
- centroid (bool): Whether to centroid the spectrum (default: True).
|
|
37
|
+
- deisotope (bool): Whether to apply deisotoping (default: True).
|
|
38
|
+
- dia_stats (bool | None): Collect DIA/ztscan statistics when applicable (default: False).
|
|
39
|
+
- feature (int | None): Optional feature id used for computing DIA statistics.
|
|
40
|
+
- label (str | None): Optional label to assign to the returned Spectrum.
|
|
41
|
+
- centroid_algo (str | None): Centroiding algorithm to use (allowed: 'lmp', 'cwt', 'gaussian').
|
|
55
42
|
|
|
56
43
|
Returns:
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
- metadata such as MS level, retention time, energy, and an assigned label
|
|
60
|
-
Depending on the processing steps (centroiding, trimming, deisotoping, etc.), the
|
|
61
|
-
returned spectrum is modified accordingly.
|
|
62
|
-
Returns None or an empty spectrum if the scan is not found or if an error occurs.
|
|
44
|
+
Spectrum or None: Processed spectrum object (may be an empty Spectrum if
|
|
45
|
+
the scan is missing or on error).
|
|
63
46
|
|
|
64
47
|
Notes:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
- For the 'alpharaw' file interface, the method uses internal DataFrame attributes to locate the
|
|
68
|
-
scan and its associated peaks.
|
|
69
|
-
- The method applies additional processing (denoising, centroiding, deisotoping, trimming) based on
|
|
70
|
-
the input flags and the MS level of the spectrum.
|
|
48
|
+
This wrapper validates provided parameters against ``get_spectrum_defaults``.
|
|
49
|
+
Use the defaults class to discover parameter constraints and allowed values.
|
|
71
50
|
"""
|
|
72
51
|
|
|
73
52
|
# parameters initialization
|
|
@@ -510,40 +489,47 @@ def _spec_to_mat(
|
|
|
510
489
|
|
|
511
490
|
|
|
512
491
|
def find_features(self, **kwargs):
|
|
513
|
-
"""
|
|
514
|
-
Detect features in mass spectrometry data by processing MS1 spectra, performing mass trace detection,
|
|
515
|
-
elution peak detection, and feature detection. Optionally, deisotope features and remove low-quality peaks.
|
|
492
|
+
"""Detect features from MS1 data (mass-trace detection, peak deconvolution, feature assembly).
|
|
516
493
|
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
resulting feature map is cleaned, deisotoped (if enabled), and assigned unique IDs before being stored.
|
|
494
|
+
The method converts internal MS1 data into an MSExperiment (one MSSpectrum per cycle), runs mass-trace
|
|
495
|
+
detection, deconvolutes mass traces to find chromatographic peaks, and assembles features. Results are
|
|
496
|
+
cleaned, optionally deisotoped, assigned unique IDs and stored in ``self.features`` / ``self.features_df``.
|
|
521
497
|
|
|
522
498
|
Parameters:
|
|
523
|
-
**kwargs: Keyword
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
499
|
+
**kwargs: Keyword overrides for any parameter available in :class:`find_features_defaults`.
|
|
500
|
+
You may pass a full ``find_features_defaults`` instance or individual parameter values.
|
|
501
|
+
|
|
502
|
+
Main parameters (what they mean, units and tuning guidance):
|
|
503
|
+
|
|
504
|
+
- chrom_fwhm (float, seconds):
|
|
505
|
+
Expected chromatographic peak full-width at half-maximum (FWHM) in seconds. This guides smoothing,
|
|
506
|
+
peak-finding window sizes and RT-based tolerances. Choose a value that matches your LC peak widths:
|
|
507
|
+
small values (e.g. 0.2–0.8 s) for sharp/fast separations, larger values (several seconds) for broad peaks.
|
|
508
|
+
Default: 1.0 s.
|
|
509
|
+
|
|
510
|
+
- noise (float, intensity units):
|
|
511
|
+
Intensity threshold used to ignore background points before mass-trace and peak detection. Raising
|
|
512
|
+
``noise`` reduces false positives from baseline fluctuations but may discard low-abundance true signals;
|
|
513
|
+
lowering it increases sensitivity but raises the false-positive rate. Set this to a conservative estimate of
|
|
514
|
+
your instrument baseline (default: 200.0, instrument-dependent).
|
|
515
|
+
|
|
516
|
+
- chrom_peak_snr (float, unitless):
|
|
517
|
+
Minimum signal-to-noise ratio required to accept an elution peak during peak deconvolution. SNR is usually
|
|
518
|
+
computed as peak height divided by a local noise estimate. Higher values make detection stricter (fewer
|
|
519
|
+
low-quality peaks), lower values make it more permissive. Typical tuning range: ~3 (relaxed) to >10
|
|
520
|
+
(stringent). Default: 10.0.
|
|
521
|
+
|
|
522
|
+
Tuning recommendation: first set ``chrom_fwhm`` to match your LC peak shape, then set ``noise`` to a baseline
|
|
523
|
+
intensity filter for your data, and finally adjust ``chrom_peak_snr`` to reach the desired balance between
|
|
524
|
+
sensitivity and specificity.
|
|
536
525
|
|
|
537
526
|
Attributes set:
|
|
538
|
-
self.features:
|
|
539
|
-
self.features_df:
|
|
540
|
-
detected features.
|
|
527
|
+
self.features: OpenMS FeatureMap produced by the routine (after ensureUniqueId).
|
|
528
|
+
self.features_df: cleaned polars DataFrame of detected features (zero-quality peaks removed).
|
|
541
529
|
|
|
542
530
|
Notes:
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
FeatureFindingMetabo) are used throughout the processing.
|
|
546
|
-
- After feature detection, additional cleaning is performed via internal helper methods.
|
|
531
|
+
The implementation relies on OpenMS components (MassTraceDetection, ElutionPeakDetection,
|
|
532
|
+
FeatureFindingMetabo). See ``find_features_defaults`` for the full list of adjustable parameters.
|
|
547
533
|
"""
|
|
548
534
|
if self.ms1_df is None:
|
|
549
535
|
self.logger.error("No MS1 data found. Please load a file first.")
|
|
@@ -570,14 +556,20 @@ def find_features(self, **kwargs):
|
|
|
570
556
|
self.logger.warning(f"Unknown parameter {key} ignored")
|
|
571
557
|
|
|
572
558
|
# Set global parameters
|
|
573
|
-
if params
|
|
574
|
-
|
|
559
|
+
if hasattr(params, 'threads') and params.threads is not None:
|
|
560
|
+
try:
|
|
561
|
+
# Try setting via OpenMP environment variable first (newer approach)
|
|
562
|
+
import os
|
|
563
|
+
os.environ['OMP_NUM_THREADS'] = str(params.threads)
|
|
564
|
+
self.logger.debug(f"Set thread count to {params.threads} via OMP_NUM_THREADS")
|
|
565
|
+
except Exception:
|
|
566
|
+
self.logger.warning(f"Could not set thread count to {params.threads} - using default")
|
|
575
567
|
|
|
576
568
|
# Set debug mode if enabled
|
|
577
|
-
if params
|
|
578
|
-
|
|
579
|
-
elif params
|
|
580
|
-
|
|
569
|
+
if hasattr(params, 'debug') and params.debug:
|
|
570
|
+
self.logger.debug("Debug mode enabled")
|
|
571
|
+
elif hasattr(params, 'no_progress') and params.no_progress:
|
|
572
|
+
self.logger.debug("No progress mode enabled")
|
|
581
573
|
|
|
582
574
|
self.logger.info("Starting feature detection...")
|
|
583
575
|
self.logger.debug(
|
|
@@ -610,8 +602,7 @@ def find_features(self, **kwargs):
|
|
|
610
602
|
# Apply MTD parameters
|
|
611
603
|
mtd_par.setValue("mass_error_ppm", float(params.get("tol_ppm")))
|
|
612
604
|
mtd_par.setValue("noise_threshold_int", float(params.get("noise")))
|
|
613
|
-
mtd_par.setValue(
|
|
614
|
-
"min_trace_length",
|
|
605
|
+
mtd_par.setValue("min_trace_length",
|
|
615
606
|
float(params.get("min_trace_length_multiplier")) * float(params.get("chrom_fwhm_min")),
|
|
616
607
|
)
|
|
617
608
|
mtd_par.setValue(
|
|
@@ -622,7 +613,7 @@ def find_features(self, **kwargs):
|
|
|
622
613
|
|
|
623
614
|
# Additional MTD parameters
|
|
624
615
|
mtd_par.setValue("min_sample_rate", float(params.get("min_sample_rate")))
|
|
625
|
-
mtd_par.setValue("min_trace_length",
|
|
616
|
+
mtd_par.setValue("min_trace_length", float(params.get("min_trace_length")))
|
|
626
617
|
mtd_par.setValue("trace_termination_criterion", params.get("trace_termination_criterion"))
|
|
627
618
|
mtd_par.setValue("reestimate_mt_sd", "true" if params.get("reestimate_mt_sd") else "false")
|
|
628
619
|
mtd_par.setValue("quant_method", params.get("quant_method"))
|
|
@@ -638,6 +629,7 @@ def find_features(self, **kwargs):
|
|
|
638
629
|
# Apply EPD parameters using our parameter class
|
|
639
630
|
epd_par.setValue("width_filtering", params.get("width_filtering"))
|
|
640
631
|
epd_par.setValue("min_fwhm", float(params.get("chrom_fwhm_min")))
|
|
632
|
+
epd_par.setValue("max_fwhm", float(params.get("chrom_fwhm_max")))
|
|
641
633
|
epd_par.setValue("chrom_fwhm", float(params.get("chrom_fwhm")))
|
|
642
634
|
epd_par.setValue("chrom_peak_snr", float(params.get("chrom_peak_snr")))
|
|
643
635
|
if params.get("masstrace_snr_filtering"):
|
|
@@ -648,10 +640,6 @@ def find_features(self, **kwargs):
|
|
|
648
640
|
# Additional EPD parameters
|
|
649
641
|
epd_par.setValue("enabled", "true" if params.get("enabled") else "false")
|
|
650
642
|
|
|
651
|
-
# Set min/max FWHM parameters
|
|
652
|
-
epd_par.setValue("min_fwhm", float(params.get("min_fwhm")))
|
|
653
|
-
epd_par.setValue("max_fwhm", float(params.get("max_fwhm")))
|
|
654
|
-
|
|
655
643
|
epd.setParameters(epd_par)
|
|
656
644
|
epd.detectPeaks(mass_traces, mass_traces_deconvol)
|
|
657
645
|
|
|
@@ -678,18 +666,19 @@ def find_features(self, **kwargs):
|
|
|
678
666
|
"report_chromatograms",
|
|
679
667
|
"true" if params.get("report_chromatograms") else "false",
|
|
680
668
|
)
|
|
681
|
-
|
|
669
|
+
ffm_par.setValue(
|
|
670
|
+
"report_smoothed_intensities",
|
|
671
|
+
"true" if params.get("report_smoothed_intensities") else "false",
|
|
672
|
+
)
|
|
682
673
|
# Additional FFM parameters
|
|
683
674
|
ffm_par.setValue("local_rt_range", float(params.get("local_rt_range")))
|
|
684
675
|
ffm_par.setValue("local_mz_range", float(params.get("local_mz_range")))
|
|
685
676
|
ffm_par.setValue("charge_lower_bound", int(params.get("charge_lower_bound")))
|
|
686
677
|
ffm_par.setValue("charge_upper_bound", int(params.get("charge_upper_bound")))
|
|
687
|
-
|
|
688
|
-
"report_smoothed_intensities",
|
|
689
|
-
"true" if params.get("report_smoothed_intensities") else "false",
|
|
690
|
-
)
|
|
678
|
+
|
|
691
679
|
|
|
692
680
|
ffm.setParameters(ffm_par)
|
|
681
|
+
|
|
693
682
|
self.logger.debug("Running feature finding with parameters:")
|
|
694
683
|
self.logger.debug(ffm_par)
|
|
695
684
|
ffm.run(mass_traces_deconvol, feature_map, chrom_out)
|
|
@@ -706,7 +695,7 @@ def find_features(self, **kwargs):
|
|
|
706
695
|
df = self._features_deisotope(
|
|
707
696
|
df,
|
|
708
697
|
mz_tol=params.get("deisotope_mz_tol"),
|
|
709
|
-
rt_tol=params.get("
|
|
698
|
+
rt_tol=params.get("chrom_fwhm") * params.get("deisotope_rt_tol_factor"),
|
|
710
699
|
)
|
|
711
700
|
if params.get("deisotope"):
|
|
712
701
|
# record size before deisotoping
|
|
@@ -723,8 +712,8 @@ def find_features(self, **kwargs):
|
|
|
723
712
|
prominence_scaleds: list[float] = []
|
|
724
713
|
height_scaleds: list[float] = []
|
|
725
714
|
|
|
726
|
-
mz_tol =
|
|
727
|
-
rt_tol =
|
|
715
|
+
mz_tol = self.parameters.get("eic_mz_tol")
|
|
716
|
+
rt_tol = self.parameters.get("eic_rt_tol")
|
|
728
717
|
|
|
729
718
|
# iterate over all rows in df using polars iteration
|
|
730
719
|
self.logger.debug("Extracting EICs...")
|
|
@@ -801,27 +790,44 @@ def find_features(self, **kwargs):
|
|
|
801
790
|
|
|
802
791
|
|
|
803
792
|
def find_adducts(self, **kwargs):
|
|
804
|
-
"""
|
|
805
|
-
Detect adducts in mass spectrometry features using OpenMS MetaboliteFeatureDeconvolution.
|
|
793
|
+
"""Detect adduct relationships among detected features.
|
|
806
794
|
|
|
807
|
-
This method
|
|
808
|
-
|
|
809
|
-
|
|
795
|
+
This method groups features that are likely adducts of the same molecular entity
|
|
796
|
+
using OpenMS MetaboliteFeatureDeconvolution. Parameters are taken from the
|
|
797
|
+
:class:`find_adducts_defaults` dataclass; you can pass an instance of that class
|
|
798
|
+
via ``**kwargs`` or override individual parameter names (they will be validated
|
|
799
|
+
against the defaults class).
|
|
810
800
|
|
|
811
|
-
|
|
812
|
-
**kwargs: Keyword arguments for adduct detection parameters. Can include:
|
|
813
|
-
- A find_adducts_defaults instance to set all parameters at once
|
|
814
|
-
- Individual parameter names and values (see find_adducts_defaults for details)
|
|
801
|
+
Main parameters (from ``find_adducts_defaults``):
|
|
815
802
|
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
retention_max_diff (float): Maximum retention time difference for grouping (default: 1.0).
|
|
803
|
+
- adducts (list[str] | str | None):
|
|
804
|
+
List of potential adduct strings formatted for OpenMS, or a short ionization
|
|
805
|
+
mode string (``'pos'``/``'neg'``). When ``None`` a sensible positive-mode
|
|
806
|
+
default set is used.
|
|
821
807
|
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
808
|
+
- charge_min (int):
|
|
809
|
+
Minimum allowed charge state for grouping (default: 1).
|
|
810
|
+
|
|
811
|
+
- charge_max (int):
|
|
812
|
+
Maximum allowed charge state for grouping (default: 2).
|
|
813
|
+
|
|
814
|
+
- charge_span_max (int):
|
|
815
|
+
Maximum span between different charge states within the same adduct group
|
|
816
|
+
(default: 2).
|
|
817
|
+
|
|
818
|
+
- retention_max_diff (float, minutes):
|
|
819
|
+
Global maximum retention-time difference allowed for grouping (default: 1.0).
|
|
820
|
+
|
|
821
|
+
- retention_max_diff_local (float, minutes):
|
|
822
|
+
A tighter, local RT tolerance used for fine-grained grouping (default: 1.0).
|
|
823
|
+
|
|
824
|
+
Side effects:
|
|
825
|
+
Updates ``self.features_df`` with columns ``adduct``, ``adduct_mass`` and
|
|
826
|
+
``adduct_group`` populated from the OpenMS results.
|
|
827
|
+
|
|
828
|
+
Notes:
|
|
829
|
+
Use ``find_adducts_defaults`` to inspect available parameters and their
|
|
830
|
+
canonical descriptions/constraints.
|
|
825
831
|
"""
|
|
826
832
|
params = find_adducts_defaults()
|
|
827
833
|
for key, value in kwargs.items():
|
|
@@ -1171,54 +1177,44 @@ def analyze_dda(self):
|
|
|
1171
1177
|
|
|
1172
1178
|
|
|
1173
1179
|
def find_ms2(self, **kwargs):
|
|
1174
|
-
"""
|
|
1175
|
-
Link MS2 spectra to features in the dataset.
|
|
1176
|
-
This method matches MS2 spectra from the scans dataframe with features in the features dataframe
|
|
1177
|
-
based on retention time (RT) and precursor m/z tolerance criteria. For each feature in the provided
|
|
1178
|
-
or inferred list of feature ids (feature_uid), it computes the RT difference between the feature and available
|
|
1179
|
-
MS2 spectra. It then selects MS2 spectra that fall within a computed RT radius (based on the feature's
|
|
1180
|
-
start and end times) and a specified m/z tolerance. For each feature, it chooses one MS2 spectrum per
|
|
1181
|
-
unique cycle based on the closest RT difference, and it updates the feature with the list of matched
|
|
1182
|
-
scan ids and the spectrum corresponding to the first matching scan id. Additionally, the scan dataframe
|
|
1183
|
-
is updated to associate matched scan ids with the corresponding feature id.
|
|
1180
|
+
"""Link MS2 spectra to detected features.
|
|
1184
1181
|
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
Key Parameters:
|
|
1191
|
-
features (int or list of int, optional): A specific feature id or a list of feature ids to process.
|
|
1192
|
-
If an individual feature_uid is provided and equals -1, all features with no associated MS2 data will be processed.
|
|
1193
|
-
If None, all features in the features dataframe are processed.
|
|
1194
|
-
mz_tol (float, optional): The precursor m/z tolerance to consider when matching MS2 spectra. If not provided,
|
|
1195
|
-
it defaults to 0.5, except for certain file types ('ztscan' or 'dia') which set it to 4.
|
|
1196
|
-
centroid (bool, optional): If True, the returned spectrum will be centroided. Default is True.
|
|
1197
|
-
deisotope (bool, optional): Flag indicating whether deisotoping should be performed. Default is False.
|
|
1198
|
-
dia_stats (bool, optional): A flag to collect additional DIA-related statistics when retrieving a spectrum.
|
|
1199
|
-
Default is False.
|
|
1182
|
+
Matches MS2 scans from ``self.scans_df`` to features in ``self.features_df`` using
|
|
1183
|
+
retention time and precursor m/z criteria. Parameters are defined in
|
|
1184
|
+
:class:`find_ms2_defaults`; pass an instance via ``**kwargs`` or override
|
|
1185
|
+
individual parameters (they will be validated against the defaults class).
|
|
1200
1186
|
|
|
1201
|
-
|
|
1202
|
-
|
|
1187
|
+
Main parameters (from ``find_ms2_defaults``):
|
|
1188
|
+
|
|
1189
|
+
- mz_tol (float):
|
|
1190
|
+
Precursor m/z tolerance used for matching. The effective tolerance may be
|
|
1191
|
+
adjusted by file type (the defaults class provides ``get_mz_tolerance(file_type)``).
|
|
1192
|
+
Default: 0.5 (ztscan/DIA defaults may be larger).
|
|
1193
|
+
|
|
1194
|
+
- centroid (bool):
|
|
1195
|
+
If True, retrieved spectra will be centroided (default: True).
|
|
1203
1196
|
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1197
|
+
- deisotope (bool):
|
|
1198
|
+
If True, spectra will be deisotoped before returning (default: False).
|
|
1199
|
+
|
|
1200
|
+
- dia_stats (bool):
|
|
1201
|
+
Collect additional DIA/ztscan statistics when retrieving spectra (default: False).
|
|
1202
|
+
|
|
1203
|
+
- features (int | list[int] | None):
|
|
1204
|
+
Specific feature uid or list of uids to process. Use ``None`` to process all
|
|
1205
|
+
features. An empty list is treated as ``None``.
|
|
1206
|
+
|
|
1207
|
+
- mz_tol_ztscan (float):
|
|
1208
|
+
m/z tolerance used for ztscan/DIA file types (default: 4.0).
|
|
1209
|
+
|
|
1210
|
+
Side effects:
|
|
1211
|
+
Updates ``self.features_df`` with columns ``ms2_scans`` and ``ms2_specs`` and
|
|
1212
|
+
updates ``self.scans_df`` to set the ``feature_uid`` for matched scans.
|
|
1208
1213
|
|
|
1209
1214
|
Notes:
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
- The function assumes that self.features_df and self.scans_df are already set up and contain the expected
|
|
1214
|
-
columns ('feature_uid', 'rt', 'rt_start', 'rt_end', 'mz' for features and 'scan_uid', 'rt', 'prec_mz', 'cycle', 'ms_level'
|
|
1215
|
-
for scans).
|
|
1216
|
-
|
|
1217
|
-
Examples:
|
|
1218
|
-
Assume the current instance has features and scans data loaded, then to link MS2 spectra for all features:
|
|
1219
|
-
instance.find_ms2()
|
|
1220
|
-
To link MS2 spectra for a specific list of feature ids:
|
|
1221
|
-
instance.find_ms2(feature_uid=[1, 3, 5])
|
|
1215
|
+
The function is implemented to be efficient by vectorizing the matching
|
|
1216
|
+
and performing batch updates. Use ``find_ms2_defaults`` to inspect all
|
|
1217
|
+
available parameters and their canonical descriptions.
|
|
1222
1218
|
"""
|
|
1223
1219
|
|
|
1224
1220
|
# parameters initialization
|
|
@@ -1368,6 +1364,7 @@ def find_ms2(self, **kwargs):
|
|
|
1368
1364
|
|
|
1369
1365
|
self.logger.debug("Update features.")
|
|
1370
1366
|
# Convert to polars if needed and batch update features_df
|
|
1367
|
+
# Convert to polars if needed and batch update features_df
|
|
1371
1368
|
if not isinstance(features_df, pl.DataFrame):
|
|
1372
1369
|
features_df = pl.from_pandas(features_df)
|
|
1373
1370
|
|
masster/study/processing.py
CHANGED
|
@@ -17,22 +17,19 @@ from masster.study.defaults import (
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def align(self, **kwargs):
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
num_used_points (int): Number of points used for superimposer (default: 1000).
|
|
34
|
-
save_features (bool): Whether to save features after alignment (default: True).
|
|
35
|
-
skip_blanks (bool): Whether to skip blank samples during alignment (default: True).
|
|
20
|
+
"""Align feature maps using pose clustering and update feature RTs.
|
|
21
|
+
|
|
22
|
+
Parameters can be provided as an ``align_defaults`` instance or as
|
|
23
|
+
individual keyword arguments; they are validated against the defaults class.
|
|
24
|
+
|
|
25
|
+
Key parameters (from ``align_defaults``):
|
|
26
|
+
- rt_max_diff (float): Maximum RT difference for pair finding (seconds).
|
|
27
|
+
- mz_max_diff (float): Maximum m/z difference for pair finding (Da).
|
|
28
|
+
- rt_pair_distance_frac (float): RT fraction used by the superimposer.
|
|
29
|
+
- mz_pair_max_distance (float): Max m/z distance for pair selection.
|
|
30
|
+
- num_used_points (int): Number of points to use for alignment estimation.
|
|
31
|
+
- save_features (bool): If True, save updated features after alignment.
|
|
32
|
+
- skip_blanks (bool): If True, skip blank samples during alignment.
|
|
36
33
|
"""
|
|
37
34
|
# parameters initialization
|
|
38
35
|
params = align_defaults()
|
|
@@ -225,20 +222,17 @@ def align(self, **kwargs):
|
|
|
225
222
|
|
|
226
223
|
|
|
227
224
|
def merge(self, **kwargs):
|
|
228
|
-
"""
|
|
229
|
-
Groups features across samples into consensus features using the specified algorithm.
|
|
225
|
+
"""Group features across samples into consensus features.
|
|
230
226
|
|
|
231
|
-
Parameters
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
mz_tol (float): m/z tolerance for grouping (default: 0.01).
|
|
241
|
-
rt_tol (float): RT tolerance for grouping (default: 1.0).
|
|
227
|
+
Parameters can be provided as a ``merge_defaults`` instance or as
|
|
228
|
+
individual keyword arguments; they are validated against the defaults class.
|
|
229
|
+
|
|
230
|
+
Key parameters (from ``merge_defaults``):
|
|
231
|
+
- algorithm (str): Grouping algorithm to use ('qt', 'kd', 'unlabeled', 'sequential').
|
|
232
|
+
- min_samples (int): Minimum number of samples required for a consensus feature.
|
|
233
|
+
- link_ms2 (bool): Whether to attach/link MS2 spectra to consensus features.
|
|
234
|
+
- mz_tol (float): m/z tolerance for grouping (Da).
|
|
235
|
+
- rt_tol (float): RT tolerance for grouping (seconds).
|
|
242
236
|
"""
|
|
243
237
|
# Reset consensus-related DataFrames at the start
|
|
244
238
|
self.consensus_df = pl.DataFrame()
|
|
@@ -768,17 +762,20 @@ def filter_consensus(
|
|
|
768
762
|
|
|
769
763
|
## TODO is uid supposed to be a list? rt_tol 0?
|
|
770
764
|
def _integrate_chrom_impl(self, **kwargs):
|
|
771
|
-
"""
|
|
772
|
-
Given a consensus_id, integrate the intensity of all features in the consensus map.
|
|
765
|
+
"""Integrate chromatogram intensities for consensus features.
|
|
773
766
|
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
767
|
+
Integrates EICs for consensus features using parameters defined in
|
|
768
|
+
:class:`integrate_defaults`. Pass an ``integrate_defaults`` instance via
|
|
769
|
+
``**kwargs`` or override individual parameters (they will be validated
|
|
770
|
+
against the defaults class).
|
|
771
|
+
|
|
772
|
+
Main parameters (from ``integrate_defaults``):
|
|
773
|
+
|
|
774
|
+
- uids (Optional[list]): List of consensus UIDs to integrate; ``None`` means all.
|
|
775
|
+
- rt_tol (float): RT tolerance (seconds) used when locating integration boundaries.
|
|
778
776
|
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
rt_tol: RT tolerance for integration boundaries.
|
|
777
|
+
Notes:
|
|
778
|
+
This function batches updates to the study's feature table for efficiency.
|
|
782
779
|
"""
|
|
783
780
|
# parameters initialization
|
|
784
781
|
params = integrate_defaults()
|
|
@@ -984,17 +981,11 @@ def _integrate_chrom_impl(self, **kwargs):
|
|
|
984
981
|
|
|
985
982
|
|
|
986
983
|
def integrate(self, **kwargs):
|
|
987
|
-
"""
|
|
988
|
-
Integrate chromatograms across consensus features.
|
|
989
|
-
|
|
990
|
-
Parameters:
|
|
991
|
-
**kwargs: Keyword arguments for integration parameters. Can include:
|
|
992
|
-
- An integrate_defaults instance to set all parameters at once
|
|
993
|
-
- Individual parameter names and values (see integrate_defaults for details)
|
|
984
|
+
"""Integrate chromatograms across consensus features.
|
|
994
985
|
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
986
|
+
Wrapper that extracts parameters from :class:`integrate_defaults` and
|
|
987
|
+
calls the underlying implementation. See ``integrate_defaults`` for
|
|
988
|
+
the canonical parameter list and descriptions.
|
|
998
989
|
"""
|
|
999
990
|
# parameters initialization
|
|
1000
991
|
params = integrate_defaults()
|
|
@@ -13,11 +13,11 @@ masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005m
|
|
|
13
13
|
masster/sample/__init__.py,sha256=HL0m1ept0PMAYUCQtDDnkdOS12IFl6oLAq4TZQz83uY,170
|
|
14
14
|
masster/sample/h5.py,sha256=aGj3vJsM08wiWFKryRCn5ROKBSRX85vE0Z-jiPSqLjI,63817
|
|
15
15
|
masster/sample/helpers.py,sha256=OEgvR3bptA-tEqHAFVPjWpbagKXAU1h0bePPi9ttHa4,34842
|
|
16
|
-
masster/sample/lib.py,sha256=
|
|
16
|
+
masster/sample/lib.py,sha256=l5YdU9TxEWJI0kJxXxrRCxgDDwbzO5zBf1_Qi_HY87w,33556
|
|
17
17
|
masster/sample/load.py,sha256=y-KUJ2nCFX_06FHPUOh-CzRRvaTx14xNcXoL19bU8qY,47562
|
|
18
18
|
masster/sample/parameters.py,sha256=Gg2KcuNbV_wZ_Wwv93QlM5J19ji0oSIvZLPV1NoBmq0,4456
|
|
19
|
-
masster/sample/plot.py,sha256=
|
|
20
|
-
masster/sample/processing.py,sha256=
|
|
19
|
+
masster/sample/plot.py,sha256=2fEHy3rQDj6JOZEev__xyA7_6de1y4s_U2hj2xiAU5w,74439
|
|
20
|
+
masster/sample/processing.py,sha256=NOx8c_y6oXVY3LCnxY9Rw4ekr_nqbIEf8LLlsYva_oY,57503
|
|
21
21
|
masster/sample/quant.py,sha256=tHNjvUFTdehKR31BXBZnVsBxMD9XJHgaltITOjr71uE,7562
|
|
22
22
|
masster/sample/sample.py,sha256=ekS-qBfzx3_Lb2KLMu-s90owRlk3lK4616mmiAiiHTc,16726
|
|
23
23
|
masster/sample/sample5_schema.json,sha256=3SPFQZH4SooLYUt_lW-PCOE9rHnl56Vhc2XG-r1nyEQ,3586
|
|
@@ -25,10 +25,10 @@ masster/sample/save.py,sha256=o9eFSqqr7KYwvCD3gOJt_nZ4h3pkflWqs0n0oSLM-sU,31970
|
|
|
25
25
|
masster/sample/sciex.py,sha256=q6PdcjCtV2PWnJiXuvfISu09zjkaTR_fvHvWN9OvOcM,46870
|
|
26
26
|
masster/sample/defaults/__init__.py,sha256=A09AOP44cxD_oYohyt7XFUho0zndRcrzVD4DUaGnKH4,447
|
|
27
27
|
masster/sample/defaults/find_adducts_def.py,sha256=6CcGRlz4VeosoBT_W0bkR1Kjj11Rq3QvuuOnkizIZyk,11630
|
|
28
|
-
masster/sample/defaults/find_features_def.py,sha256=
|
|
28
|
+
masster/sample/defaults/find_features_def.py,sha256=qTf9G3h6zhfxUk7TEPT5hAwgQ3gZthavoN89okhVVeQ,17193
|
|
29
29
|
masster/sample/defaults/find_ms2_def.py,sha256=KTELMAnioGLYbhzAwOgK14TZqboPEvzeBN0HC-v0Z5A,9872
|
|
30
30
|
masster/sample/defaults/get_spectrum_def.py,sha256=o62p31PhGd-LiIkTOzKQhwPtnO2AtQDHcPu-O-YoQPs,11460
|
|
31
|
-
masster/sample/defaults/sample_def.py,sha256=
|
|
31
|
+
masster/sample/defaults/sample_def.py,sha256=Q_yal7iYBnGEty4uPlOiAp9OKX5392HsAKezA3xCb34,14583
|
|
32
32
|
masster/study/__init__.py,sha256=Zspv6U8jFqjkHGYdNdDy1rfUnCSolCzUdgSSg98PRgE,166
|
|
33
33
|
masster/study/export.py,sha256=bm3e6AEwkXqBO6Pwd-2pWhxOmzQTFlOSauXFnaiSJDI,29019
|
|
34
34
|
masster/study/h5.py,sha256=EcpyYfMknDzzdA6XTyMU_ppY92_DsPSPYGE0kpVN7T8,66429
|
|
@@ -37,7 +37,7 @@ masster/study/helpers_optimized.py,sha256=EgOgPaL3c2LA8jDhnlEHvzb7O9Um-vnMIcnNao
|
|
|
37
37
|
masster/study/load.py,sha256=TLxVhXu0HHb51lGggXitQLtfNxz2JJfKMkAXJbxhvhM,46880
|
|
38
38
|
masster/study/parameters.py,sha256=0elaF7YspTsB7qyajWAbRNL2VfKlGz5GJLifmO8IGkk,3276
|
|
39
39
|
masster/study/plot.py,sha256=NW31XdM9Bf5wNvIAs-56AIoPA8VLTqBzr6qJInfZmhc,25159
|
|
40
|
-
masster/study/processing.py,sha256=
|
|
40
|
+
masster/study/processing.py,sha256=c7rMc5LA6lcGkj9qfQSr9pMFbHWPa9spMUUV4LQPm6M,45670
|
|
41
41
|
masster/study/save.py,sha256=bcRADWTvhTER9WRkT9zNU5mDUPQZkZB2cuJwpRsYmrM,6589
|
|
42
42
|
masster/study/study.py,sha256=4jItwNGUZUv9uDwAcfFYT3GRh-ACJ1bddSGQaiQs6YM,29002
|
|
43
43
|
masster/study/study5_schema.json,sha256=A_xDPzB97xt2EFeQsX9j8Ut7yC4_DS7BZ24ucotOXIw,5103
|
|
@@ -52,8 +52,8 @@ masster/study/defaults/integrate_chrom_def.py,sha256=Rih3-vat7fHGVfIvRitjNJJI3zL
|
|
|
52
52
|
masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
|
|
53
53
|
masster/study/defaults/merge_def.py,sha256=EBsKE3hsAkTEzN9dpdRD5W3_suTKy_WZ_96rwS0uBuE,8572
|
|
54
54
|
masster/study/defaults/study_def.py,sha256=hj8bYtEPwzdowC95yfyoCFt6fZkQePLjpJtmpNz9Z5M,9533
|
|
55
|
-
masster-0.3.
|
|
56
|
-
masster-0.3.
|
|
57
|
-
masster-0.3.
|
|
58
|
-
masster-0.3.
|
|
59
|
-
masster-0.3.
|
|
55
|
+
masster-0.3.10.dist-info/METADATA,sha256=cC-8Ghfpd_dNFIzSiuEj3hx5F81GW6S0oOR0cE37rDs,44293
|
|
56
|
+
masster-0.3.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
57
|
+
masster-0.3.10.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
58
|
+
masster-0.3.10.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
|
|
59
|
+
masster-0.3.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|