masster 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/docs/SCX_API_Documentation.md +0 -0
- masster/docs/SCX_DLL_Analysis.md +0 -0
- masster/logger.py +92 -78
- masster/sample/defaults/find_features_def.py +16 -6
- masster/sample/defaults/sample_def.py +1 -1
- masster/sample/h5.py +2 -2
- masster/sample/helpers.py +190 -140
- masster/sample/load.py +13 -9
- masster/sample/plot.py +256 -147
- masster/sample/processing.py +18 -12
- masster/sample/sample.py +10 -4
- masster/sample/sample5_schema.json +38 -29
- masster/sample/save.py +16 -13
- masster/sample/sciex.py +187 -176
- masster/study/defaults/align_def.py +231 -13
- masster/study/defaults/fill_chrom_def.py +1 -5
- masster/study/defaults/integrate_chrom_def.py +1 -5
- masster/study/defaults/study_def.py +2 -2
- masster/study/export.py +144 -131
- masster/study/h5.py +193 -133
- masster/study/helpers.py +757 -246
- masster/study/helpers_optimized.py +99 -57
- masster/study/load.py +57 -25
- masster/study/plot.py +1244 -129
- masster/study/processing.py +194 -86
- masster/study/save.py +7 -7
- masster/study/study.py +154 -89
- masster/study/study5_schema.json +15 -15
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/METADATA +1 -1
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/RECORD +33 -31
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/WHEEL +0 -0
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/entry_points.txt +0 -0
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/licenses/LICENSE +0 -0
masster/sample/processing.py
CHANGED
|
@@ -519,6 +519,10 @@ def find_features(self, **kwargs):
|
|
|
519
519
|
low-quality peaks), lower values make it more permissive. Typical tuning range: ~3 (relaxed) to >10
|
|
520
520
|
(stringent). Default: 10.0.
|
|
521
521
|
|
|
522
|
+
- isotope_filtering_model (str):
|
|
523
|
+
Isotope filtering model ('metabolites (2% RMS)', 'metabolites (5% RMS)', 'peptides', 'none').
|
|
524
|
+
Default: 'metabolites (5% RMS)'.
|
|
525
|
+
|
|
522
526
|
Tuning recommendation: first set ``chrom_fwhm`` to match your LC peak shape, then set ``noise`` to a baseline
|
|
523
527
|
intensity filter for your data, and finally adjust ``chrom_peak_snr`` to reach the desired balance between
|
|
524
528
|
sensitivity and specificity.
|
|
@@ -556,24 +560,25 @@ def find_features(self, **kwargs):
|
|
|
556
560
|
self.logger.warning(f"Unknown parameter {key} ignored")
|
|
557
561
|
|
|
558
562
|
# Set global parameters
|
|
559
|
-
if hasattr(params,
|
|
563
|
+
if hasattr(params, "threads") and params.threads is not None:
|
|
560
564
|
try:
|
|
561
565
|
# Try setting via OpenMP environment variable first (newer approach)
|
|
562
566
|
import os
|
|
563
|
-
|
|
567
|
+
|
|
568
|
+
os.environ["OMP_NUM_THREADS"] = str(params.threads)
|
|
564
569
|
self.logger.debug(f"Set thread count to {params.threads} via OMP_NUM_THREADS")
|
|
565
570
|
except Exception:
|
|
566
571
|
self.logger.warning(f"Could not set thread count to {params.threads} - using default")
|
|
567
|
-
|
|
572
|
+
|
|
568
573
|
# Set debug mode if enabled
|
|
569
|
-
if hasattr(params,
|
|
574
|
+
if hasattr(params, "debug") and params.debug:
|
|
570
575
|
self.logger.debug("Debug mode enabled")
|
|
571
|
-
elif hasattr(params,
|
|
576
|
+
elif hasattr(params, "no_progress") and params.no_progress:
|
|
572
577
|
self.logger.debug("No progress mode enabled")
|
|
573
|
-
|
|
578
|
+
|
|
574
579
|
self.logger.info("Starting feature detection...")
|
|
575
580
|
self.logger.debug(
|
|
576
|
-
f"Parameters: chrom_fwhm={params.get('chrom_fwhm')}, noise={params.get('noise')}, tol_ppm={params.get('tol_ppm')}",
|
|
581
|
+
f"Parameters: chrom_fwhm={params.get('chrom_fwhm')}, noise={params.get('noise')}, tol_ppm={params.get('tol_ppm')}, isotope_filtering_model={params.get('isotope_filtering_model')}",
|
|
577
582
|
)
|
|
578
583
|
|
|
579
584
|
exp = oms.MSExperiment()
|
|
@@ -602,7 +607,8 @@ def find_features(self, **kwargs):
|
|
|
602
607
|
# Apply MTD parameters
|
|
603
608
|
mtd_par.setValue("mass_error_ppm", float(params.get("tol_ppm")))
|
|
604
609
|
mtd_par.setValue("noise_threshold_int", float(params.get("noise")))
|
|
605
|
-
mtd_par.setValue(
|
|
610
|
+
mtd_par.setValue(
|
|
611
|
+
"min_trace_length",
|
|
606
612
|
float(params.get("min_trace_length_multiplier")) * float(params.get("chrom_fwhm_min")),
|
|
607
613
|
)
|
|
608
614
|
mtd_par.setValue(
|
|
@@ -610,7 +616,7 @@ def find_features(self, **kwargs):
|
|
|
610
616
|
int(params.get("trace_termination_outliers")),
|
|
611
617
|
)
|
|
612
618
|
mtd_par.setValue("chrom_peak_snr", float(params.get("chrom_peak_snr")))
|
|
613
|
-
|
|
619
|
+
|
|
614
620
|
# Additional MTD parameters
|
|
615
621
|
mtd_par.setValue("min_sample_rate", float(params.get("min_sample_rate")))
|
|
616
622
|
mtd_par.setValue("min_trace_length", float(params.get("min_trace_length")))
|
|
@@ -636,10 +642,10 @@ def find_features(self, **kwargs):
|
|
|
636
642
|
epd_par.setValue("masstrace_snr_filtering", "true")
|
|
637
643
|
if params.get("mz_scoring_13C"):
|
|
638
644
|
epd_par.setValue("mz_scoring_13C", "true")
|
|
639
|
-
|
|
645
|
+
|
|
640
646
|
# Additional EPD parameters
|
|
641
647
|
epd_par.setValue("enabled", "true" if params.get("enabled") else "false")
|
|
642
|
-
|
|
648
|
+
|
|
643
649
|
epd.setParameters(epd_par)
|
|
644
650
|
epd.detectPeaks(mass_traces, mass_traces_deconvol)
|
|
645
651
|
|
|
@@ -675,7 +681,7 @@ def find_features(self, **kwargs):
|
|
|
675
681
|
ffm_par.setValue("local_mz_range", float(params.get("local_mz_range")))
|
|
676
682
|
ffm_par.setValue("charge_lower_bound", int(params.get("charge_lower_bound")))
|
|
677
683
|
ffm_par.setValue("charge_upper_bound", int(params.get("charge_upper_bound")))
|
|
678
|
-
|
|
684
|
+
ffm_par.setValue("isotope_filtering_model", params.get("isotope_filtering_model"))
|
|
679
685
|
|
|
680
686
|
ffm.setParameters(ffm_par)
|
|
681
687
|
|
masster/sample/sample.py
CHANGED
|
@@ -62,6 +62,7 @@ from masster.sample.helpers import select_closest_scan
|
|
|
62
62
|
from masster.sample.helpers import get_dda_stats
|
|
63
63
|
from masster.sample.helpers import get_feature
|
|
64
64
|
from masster.sample.helpers import get_scan
|
|
65
|
+
from masster.sample.helpers import get_eic
|
|
65
66
|
from masster.sample.helpers import set_source
|
|
66
67
|
from masster.sample.load import _load_featureXML
|
|
67
68
|
from masster.sample.load import _load_ms2data
|
|
@@ -80,6 +81,8 @@ from masster.sample.plot import plot_feature_stats
|
|
|
80
81
|
from masster.sample.plot import plot_ms2_cycle
|
|
81
82
|
from masster.sample.plot import plot_ms2_eic
|
|
82
83
|
from masster.sample.plot import plot_ms2_q1
|
|
84
|
+
from masster.sample.plot import plot_bpc
|
|
85
|
+
from masster.sample.plot import plot_tic
|
|
83
86
|
from masster.sample.processing import _clean_features_df
|
|
84
87
|
from masster.sample.processing import _features_deisotope
|
|
85
88
|
from masster.sample.processing import _get_ztscan_stats
|
|
@@ -229,6 +232,9 @@ class Sample:
|
|
|
229
232
|
plot_ms2_cycle = plot_ms2_cycle
|
|
230
233
|
plot_ms2_eic = plot_ms2_eic
|
|
231
234
|
plot_ms2_q1 = plot_ms2_q1
|
|
235
|
+
plot_bpc = plot_bpc
|
|
236
|
+
plot_tic = plot_tic
|
|
237
|
+
get_eic = get_eic
|
|
232
238
|
get_feature = get_feature
|
|
233
239
|
get_scan = get_scan
|
|
234
240
|
get_dda_stats = get_dda_stats
|
|
@@ -333,15 +339,15 @@ class Sample:
|
|
|
333
339
|
if module_name.startswith(study_module_prefix) and module_name != current_module:
|
|
334
340
|
study_modules.append(module_name)
|
|
335
341
|
|
|
336
|
-
|
|
342
|
+
""" # Add parameters submodules
|
|
337
343
|
parameters_modules = []
|
|
338
344
|
parameters_module_prefix = f"{base_modname}.parameters."
|
|
339
345
|
for module_name in sys.modules:
|
|
340
346
|
if module_name.startswith(parameters_module_prefix) and module_name != current_module:
|
|
341
347
|
parameters_modules.append(module_name)
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
all_modules_to_reload = core_modules + sample_modules + study_modules
|
|
348
|
+
"""
|
|
349
|
+
|
|
350
|
+
all_modules_to_reload = core_modules + sample_modules + study_modules # + parameters_modules
|
|
345
351
|
|
|
346
352
|
# Reload all discovered modules
|
|
347
353
|
for full_module_name in all_modules_to_reload:
|
|
@@ -7,6 +7,9 @@
|
|
|
7
7
|
"feature_id": {
|
|
8
8
|
"dtype": "pl.Utf8"
|
|
9
9
|
},
|
|
10
|
+
"sample_uid": {
|
|
11
|
+
"dtype": "pl.Int32"
|
|
12
|
+
},
|
|
10
13
|
"mz": {
|
|
11
14
|
"dtype": "pl.Float64"
|
|
12
15
|
},
|
|
@@ -46,18 +49,24 @@
|
|
|
46
49
|
"iso_of": {
|
|
47
50
|
"dtype": "pl.Int64"
|
|
48
51
|
},
|
|
49
|
-
"adduct_group": {
|
|
50
|
-
"dtype": "pl.Int64"
|
|
51
|
-
},
|
|
52
52
|
"adduct": {
|
|
53
53
|
"dtype": "pl.Utf8"
|
|
54
54
|
},
|
|
55
55
|
"adduct_mass": {
|
|
56
56
|
"dtype": "pl.Float64"
|
|
57
57
|
},
|
|
58
|
+
"adduct_group": {
|
|
59
|
+
"dtype": "pl.Int64"
|
|
60
|
+
},
|
|
58
61
|
"chrom": {
|
|
59
62
|
"dtype": "pl.Object"
|
|
60
63
|
},
|
|
64
|
+
"filled": {
|
|
65
|
+
"dtype": "pl.Boolean"
|
|
66
|
+
},
|
|
67
|
+
"chrom_area": {
|
|
68
|
+
"dtype": "pl.Float64"
|
|
69
|
+
},
|
|
61
70
|
"chrom_coherence": {
|
|
62
71
|
"dtype": "pl.Float64"
|
|
63
72
|
},
|
|
@@ -100,64 +109,64 @@
|
|
|
100
109
|
},
|
|
101
110
|
"scans_df": {
|
|
102
111
|
"columns": {
|
|
103
|
-
"
|
|
104
|
-
"dtype": "pl.
|
|
112
|
+
"bl": {
|
|
113
|
+
"dtype": "pl.Float64"
|
|
114
|
+
},
|
|
115
|
+
"comment": {
|
|
116
|
+
"dtype": "pl.Utf8"
|
|
105
117
|
},
|
|
106
118
|
"cycle": {
|
|
107
119
|
"dtype": "pl.Int64"
|
|
108
120
|
},
|
|
109
|
-
"
|
|
121
|
+
"energy": {
|
|
122
|
+
"dtype": "pl.Float64"
|
|
123
|
+
},
|
|
124
|
+
"feature_uid": {
|
|
110
125
|
"dtype": "pl.Int64"
|
|
111
126
|
},
|
|
112
|
-
"
|
|
113
|
-
"dtype": "pl.
|
|
127
|
+
"id": {
|
|
128
|
+
"dtype": "pl.Utf8"
|
|
114
129
|
},
|
|
115
|
-
"
|
|
130
|
+
"inty_max": {
|
|
116
131
|
"dtype": "pl.Float64"
|
|
117
132
|
},
|
|
118
133
|
"inty_min": {
|
|
119
134
|
"dtype": "pl.Float64"
|
|
120
135
|
},
|
|
121
|
-
"
|
|
136
|
+
"inty_tot": {
|
|
122
137
|
"dtype": "pl.Float64"
|
|
123
138
|
},
|
|
124
|
-
"
|
|
125
|
-
"dtype": "pl.
|
|
139
|
+
"ms2_n": {
|
|
140
|
+
"dtype": "pl.Int64"
|
|
126
141
|
},
|
|
127
|
-
"
|
|
128
|
-
"dtype": "pl.
|
|
142
|
+
"ms_level": {
|
|
143
|
+
"dtype": "pl.Int64"
|
|
129
144
|
},
|
|
130
145
|
"mz_max": {
|
|
131
146
|
"dtype": "pl.Float64"
|
|
132
147
|
},
|
|
133
|
-
"
|
|
134
|
-
"dtype": "pl.
|
|
148
|
+
"mz_min": {
|
|
149
|
+
"dtype": "pl.Float64"
|
|
135
150
|
},
|
|
136
151
|
"name": {
|
|
137
152
|
"dtype": "pl.Utf8"
|
|
138
153
|
},
|
|
139
|
-
"
|
|
140
|
-
"dtype": "pl.Utf8"
|
|
141
|
-
},
|
|
142
|
-
"prec_mz": {
|
|
154
|
+
"prec_inty": {
|
|
143
155
|
"dtype": "pl.Float64"
|
|
144
156
|
},
|
|
145
|
-
"
|
|
157
|
+
"prec_mz": {
|
|
146
158
|
"dtype": "pl.Float64"
|
|
147
159
|
},
|
|
148
160
|
"prec_mz_max": {
|
|
149
161
|
"dtype": "pl.Float64"
|
|
150
162
|
},
|
|
151
|
-
"
|
|
163
|
+
"prec_mz_min": {
|
|
152
164
|
"dtype": "pl.Float64"
|
|
153
165
|
},
|
|
154
|
-
"
|
|
166
|
+
"rt": {
|
|
155
167
|
"dtype": "pl.Float64"
|
|
156
168
|
},
|
|
157
|
-
"
|
|
158
|
-
"dtype": "pl.Int64"
|
|
159
|
-
},
|
|
160
|
-
"ms2_n": {
|
|
169
|
+
"scan_uid": {
|
|
161
170
|
"dtype": "pl.Int64"
|
|
162
171
|
},
|
|
163
172
|
"time_cycle": {
|
|
@@ -169,10 +178,10 @@
|
|
|
169
178
|
"time_ms1_to_ms2": {
|
|
170
179
|
"dtype": "pl.Float64"
|
|
171
180
|
},
|
|
172
|
-
"
|
|
181
|
+
"time_ms2_to_ms1": {
|
|
173
182
|
"dtype": "pl.Float64"
|
|
174
183
|
},
|
|
175
|
-
"
|
|
184
|
+
"time_ms2_to_ms2": {
|
|
176
185
|
"dtype": "pl.Float64"
|
|
177
186
|
}
|
|
178
187
|
}
|
masster/sample/save.py
CHANGED
|
@@ -134,10 +134,10 @@ def export_features(self, filename="features.csv"):
|
|
|
134
134
|
# clone df
|
|
135
135
|
clean_df = self.features_df.clone()
|
|
136
136
|
filename = os.path.abspath(filename)
|
|
137
|
-
# add a column has_ms2=True if
|
|
137
|
+
# add a column has_ms2=True if column ms2_scans is not None
|
|
138
138
|
if "ms2_scans" in clean_df.columns:
|
|
139
139
|
clean_df = clean_df.with_columns(
|
|
140
|
-
(pl.col("ms2_scans").is_not_null()).alias("has_ms2")
|
|
140
|
+
(pl.col("ms2_scans").is_not_null()).alias("has_ms2"),
|
|
141
141
|
)
|
|
142
142
|
clean_df = self.features_df.select([
|
|
143
143
|
col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)
|
|
@@ -215,7 +215,7 @@ def export_mgf(
|
|
|
215
215
|
return
|
|
216
216
|
else:
|
|
217
217
|
self.features_df = self.features.get_df()
|
|
218
|
-
|
|
218
|
+
|
|
219
219
|
# Apply filtering at DataFrame level for better performance
|
|
220
220
|
features = self.features_df
|
|
221
221
|
if mz_start is not None:
|
|
@@ -228,7 +228,7 @@ def export_mgf(
|
|
|
228
228
|
features = features.filter(pl.col("rt") <= rt_end)
|
|
229
229
|
if not include_all_ms1:
|
|
230
230
|
features = features.filter(pl.col("ms2_scans").is_not_null())
|
|
231
|
-
|
|
231
|
+
|
|
232
232
|
# Convert to list of dictionaries for faster iteration
|
|
233
233
|
features_list = features.to_dicts()
|
|
234
234
|
|
|
@@ -286,7 +286,10 @@ def export_mgf(
|
|
|
286
286
|
centroid_algo = "cr"
|
|
287
287
|
|
|
288
288
|
# count how many features have charge < 0
|
|
289
|
-
if
|
|
289
|
+
if (
|
|
290
|
+
self.features_df.filter(pl.col("charge") < 0).shape[0] - self.features_df.filter(pl.col("charge") > 0).shape[0]
|
|
291
|
+
> 0
|
|
292
|
+
):
|
|
290
293
|
preferred_charge = -1
|
|
291
294
|
else:
|
|
292
295
|
preferred_charge = 1
|
|
@@ -312,7 +315,7 @@ def export_mgf(
|
|
|
312
315
|
rt = row["rt"]
|
|
313
316
|
rt_str = f"{rt:.2f}"
|
|
314
317
|
mz_str = f"{mz:.4f}"
|
|
315
|
-
|
|
318
|
+
|
|
316
319
|
# Filtering is now done at DataFrame level, so we can skip these checks
|
|
317
320
|
if row["ms2_scans"] is None and not include_all_ms1:
|
|
318
321
|
skip = skip + 1
|
|
@@ -338,7 +341,7 @@ def export_mgf(
|
|
|
338
341
|
|
|
339
342
|
charge = preferred_charge
|
|
340
343
|
if row["charge"] is not None and row["charge"] != 0:
|
|
341
|
-
|
|
344
|
+
charge = row["charge"]
|
|
342
345
|
|
|
343
346
|
write_ion(
|
|
344
347
|
f,
|
|
@@ -397,7 +400,7 @@ def export_mgf(
|
|
|
397
400
|
)
|
|
398
401
|
c += 1
|
|
399
402
|
continue # Skip the rest of the processing for this feature
|
|
400
|
-
|
|
403
|
+
|
|
401
404
|
# If we reach here, either use_cache=False or no cached spectra were available
|
|
402
405
|
if split_energy:
|
|
403
406
|
# get energy of all scans with scan_uid in ms2_scans by fetching them
|
|
@@ -408,20 +411,20 @@ def export_mgf(
|
|
|
408
411
|
for scan_uid in ms2_scan_uids:
|
|
409
412
|
spec = self.get_spectrum(scan_uid)
|
|
410
413
|
if spec is not None:
|
|
411
|
-
spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec,
|
|
412
|
-
|
|
414
|
+
spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec, "energy") else 0))
|
|
415
|
+
|
|
413
416
|
# Group by energy
|
|
414
417
|
energy_groups: dict[float, list[int]] = {}
|
|
415
418
|
for scan_uid, energy in spectra_with_energy:
|
|
416
419
|
if energy not in energy_groups:
|
|
417
420
|
energy_groups[energy] = []
|
|
418
421
|
energy_groups[energy].append(scan_uid)
|
|
419
|
-
|
|
422
|
+
|
|
420
423
|
for energy, scan_uids_for_energy in energy_groups.items():
|
|
421
424
|
if selection == "best":
|
|
422
425
|
# Keep only the first scan for this energy
|
|
423
426
|
scan_uids_for_energy = [scan_uids_for_energy[0]]
|
|
424
|
-
|
|
427
|
+
|
|
425
428
|
for scan_uid in scan_uids_for_energy:
|
|
426
429
|
spect = self.get_spectrum(
|
|
427
430
|
scan_uid,
|
|
@@ -556,7 +559,7 @@ def export_mgf(
|
|
|
556
559
|
inty_min_str = f"{inty_min:.3f}" if inty_min != float("-inf") else "None"
|
|
557
560
|
q1_ratio_min_str = f"{q1_ratio_min:.3f}" if q1_ratio_min is not None else "None"
|
|
558
561
|
eic_corr_min_str = f"{eic_corr_min:.3f}" if eic_corr_min is not None else "None"
|
|
559
|
-
|
|
562
|
+
|
|
560
563
|
self.logger.debug(
|
|
561
564
|
f"MGF created with int>{inty_min_str}, q1_ratio>{q1_ratio_min_str}, eic_corr>{eic_corr_min_str}",
|
|
562
565
|
)
|