masster 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/docs/SCX_API_Documentation.md +0 -0
- masster/docs/SCX_DLL_Analysis.md +0 -0
- masster/logger.py +92 -78
- masster/sample/defaults/find_features_def.py +90 -94
- masster/sample/defaults/sample_def.py +15 -0
- masster/sample/h5.py +2 -2
- masster/sample/helpers.py +137 -136
- masster/sample/lib.py +11 -11
- masster/sample/load.py +13 -9
- masster/sample/plot.py +167 -60
- masster/sample/processing.py +150 -153
- masster/sample/sample.py +4 -4
- masster/sample/sample5_schema.json +62 -62
- masster/sample/save.py +16 -13
- masster/sample/sciex.py +187 -176
- masster/study/defaults/align_def.py +224 -6
- masster/study/defaults/fill_chrom_def.py +1 -5
- masster/study/defaults/integrate_chrom_def.py +1 -5
- masster/study/defaults/study_def.py +2 -2
- masster/study/export.py +144 -131
- masster/study/h5.py +193 -133
- masster/study/helpers.py +293 -245
- masster/study/helpers_optimized.py +99 -57
- masster/study/load.py +51 -25
- masster/study/plot.py +453 -17
- masster/study/processing.py +197 -123
- masster/study/save.py +7 -7
- masster/study/study.py +97 -88
- masster/study/study5_schema.json +82 -82
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/METADATA +1 -1
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/RECORD +34 -32
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/WHEEL +0 -0
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/entry_points.txt +0 -0
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,44 +1,41 @@
|
|
|
1
1
|
{
|
|
2
2
|
"features_df": {
|
|
3
3
|
"columns": {
|
|
4
|
-
"
|
|
5
|
-
"dtype": "pl.Int64"
|
|
6
|
-
},
|
|
7
|
-
"feature_id": {
|
|
4
|
+
"adduct": {
|
|
8
5
|
"dtype": "pl.Utf8"
|
|
9
6
|
},
|
|
10
|
-
"
|
|
11
|
-
"dtype": "pl.
|
|
7
|
+
"adduct_group": {
|
|
8
|
+
"dtype": "pl.Int64"
|
|
12
9
|
},
|
|
13
|
-
"
|
|
10
|
+
"adduct_mass": {
|
|
14
11
|
"dtype": "pl.Float64"
|
|
15
12
|
},
|
|
16
|
-
"
|
|
17
|
-
"dtype": "pl.
|
|
13
|
+
"charge": {
|
|
14
|
+
"dtype": "pl.Int32"
|
|
18
15
|
},
|
|
19
|
-
"
|
|
20
|
-
"dtype": "pl.
|
|
16
|
+
"chrom": {
|
|
17
|
+
"dtype": "pl.Object"
|
|
21
18
|
},
|
|
22
|
-
"
|
|
19
|
+
"chrom_coherence": {
|
|
23
20
|
"dtype": "pl.Float64"
|
|
24
21
|
},
|
|
25
|
-
"
|
|
22
|
+
"chrom_height_scaled": {
|
|
26
23
|
"dtype": "pl.Float64"
|
|
27
24
|
},
|
|
28
|
-
"
|
|
25
|
+
"chrom_prominence": {
|
|
29
26
|
"dtype": "pl.Float64"
|
|
30
27
|
},
|
|
31
|
-
"
|
|
28
|
+
"chrom_prominence_scaled": {
|
|
32
29
|
"dtype": "pl.Float64"
|
|
33
30
|
},
|
|
34
|
-
"
|
|
35
|
-
"dtype": "pl.
|
|
31
|
+
"feature_id": {
|
|
32
|
+
"dtype": "pl.Utf8"
|
|
36
33
|
},
|
|
37
|
-
"
|
|
38
|
-
"dtype": "pl.
|
|
34
|
+
"feature_uid": {
|
|
35
|
+
"dtype": "pl.Int64"
|
|
39
36
|
},
|
|
40
|
-
"
|
|
41
|
-
"dtype": "pl.
|
|
37
|
+
"inty": {
|
|
38
|
+
"dtype": "pl.Float64"
|
|
42
39
|
},
|
|
43
40
|
"iso": {
|
|
44
41
|
"dtype": "pl.Int64"
|
|
@@ -46,35 +43,38 @@
|
|
|
46
43
|
"iso_of": {
|
|
47
44
|
"dtype": "pl.Int64"
|
|
48
45
|
},
|
|
49
|
-
"
|
|
50
|
-
"dtype": "pl.
|
|
46
|
+
"ms2_scans": {
|
|
47
|
+
"dtype": "pl.Object"
|
|
51
48
|
},
|
|
52
|
-
"
|
|
53
|
-
"dtype": "pl.
|
|
49
|
+
"ms2_specs": {
|
|
50
|
+
"dtype": "pl.Object"
|
|
54
51
|
},
|
|
55
|
-
"
|
|
52
|
+
"mz": {
|
|
56
53
|
"dtype": "pl.Float64"
|
|
57
54
|
},
|
|
58
|
-
"
|
|
59
|
-
"dtype": "pl.
|
|
55
|
+
"mz_end": {
|
|
56
|
+
"dtype": "pl.Float64"
|
|
60
57
|
},
|
|
61
|
-
"
|
|
58
|
+
"mz_start": {
|
|
62
59
|
"dtype": "pl.Float64"
|
|
63
60
|
},
|
|
64
|
-
"
|
|
61
|
+
"quality": {
|
|
65
62
|
"dtype": "pl.Float64"
|
|
66
63
|
},
|
|
67
|
-
"
|
|
64
|
+
"rt": {
|
|
68
65
|
"dtype": "pl.Float64"
|
|
69
66
|
},
|
|
70
|
-
"
|
|
67
|
+
"rt_delta": {
|
|
71
68
|
"dtype": "pl.Float64"
|
|
72
69
|
},
|
|
73
|
-
"
|
|
74
|
-
"dtype": "pl.
|
|
70
|
+
"rt_end": {
|
|
71
|
+
"dtype": "pl.Float64"
|
|
75
72
|
},
|
|
76
|
-
"
|
|
77
|
-
"dtype": "pl.
|
|
73
|
+
"rt_original": {
|
|
74
|
+
"dtype": "pl.Float64"
|
|
75
|
+
},
|
|
76
|
+
"rt_start": {
|
|
77
|
+
"dtype": "pl.Float64"
|
|
78
78
|
}
|
|
79
79
|
}
|
|
80
80
|
},
|
|
@@ -100,64 +100,64 @@
|
|
|
100
100
|
},
|
|
101
101
|
"scans_df": {
|
|
102
102
|
"columns": {
|
|
103
|
-
"
|
|
104
|
-
"dtype": "pl.
|
|
103
|
+
"bl": {
|
|
104
|
+
"dtype": "pl.Float64"
|
|
105
|
+
},
|
|
106
|
+
"comment": {
|
|
107
|
+
"dtype": "pl.Utf8"
|
|
105
108
|
},
|
|
106
109
|
"cycle": {
|
|
107
110
|
"dtype": "pl.Int64"
|
|
108
111
|
},
|
|
109
|
-
"
|
|
112
|
+
"energy": {
|
|
113
|
+
"dtype": "pl.Float64"
|
|
114
|
+
},
|
|
115
|
+
"feature_uid": {
|
|
110
116
|
"dtype": "pl.Int64"
|
|
111
117
|
},
|
|
112
|
-
"
|
|
113
|
-
"dtype": "pl.
|
|
118
|
+
"id": {
|
|
119
|
+
"dtype": "pl.Utf8"
|
|
114
120
|
},
|
|
115
|
-
"
|
|
121
|
+
"inty_max": {
|
|
116
122
|
"dtype": "pl.Float64"
|
|
117
123
|
},
|
|
118
124
|
"inty_min": {
|
|
119
125
|
"dtype": "pl.Float64"
|
|
120
126
|
},
|
|
121
|
-
"
|
|
127
|
+
"inty_tot": {
|
|
122
128
|
"dtype": "pl.Float64"
|
|
123
129
|
},
|
|
124
|
-
"
|
|
125
|
-
"dtype": "pl.
|
|
130
|
+
"ms2_n": {
|
|
131
|
+
"dtype": "pl.Int64"
|
|
126
132
|
},
|
|
127
|
-
"
|
|
128
|
-
"dtype": "pl.
|
|
133
|
+
"ms_level": {
|
|
134
|
+
"dtype": "pl.Int64"
|
|
129
135
|
},
|
|
130
136
|
"mz_max": {
|
|
131
137
|
"dtype": "pl.Float64"
|
|
132
138
|
},
|
|
133
|
-
"
|
|
134
|
-
"dtype": "pl.
|
|
139
|
+
"mz_min": {
|
|
140
|
+
"dtype": "pl.Float64"
|
|
135
141
|
},
|
|
136
142
|
"name": {
|
|
137
143
|
"dtype": "pl.Utf8"
|
|
138
144
|
},
|
|
139
|
-
"
|
|
140
|
-
"dtype": "pl.Utf8"
|
|
141
|
-
},
|
|
142
|
-
"prec_mz": {
|
|
145
|
+
"prec_inty": {
|
|
143
146
|
"dtype": "pl.Float64"
|
|
144
147
|
},
|
|
145
|
-
"
|
|
148
|
+
"prec_mz": {
|
|
146
149
|
"dtype": "pl.Float64"
|
|
147
150
|
},
|
|
148
151
|
"prec_mz_max": {
|
|
149
152
|
"dtype": "pl.Float64"
|
|
150
153
|
},
|
|
151
|
-
"
|
|
154
|
+
"prec_mz_min": {
|
|
152
155
|
"dtype": "pl.Float64"
|
|
153
156
|
},
|
|
154
|
-
"
|
|
157
|
+
"rt": {
|
|
155
158
|
"dtype": "pl.Float64"
|
|
156
159
|
},
|
|
157
|
-
"
|
|
158
|
-
"dtype": "pl.Int64"
|
|
159
|
-
},
|
|
160
|
-
"ms2_n": {
|
|
160
|
+
"scan_uid": {
|
|
161
161
|
"dtype": "pl.Int64"
|
|
162
162
|
},
|
|
163
163
|
"time_cycle": {
|
|
@@ -169,10 +169,10 @@
|
|
|
169
169
|
"time_ms1_to_ms2": {
|
|
170
170
|
"dtype": "pl.Float64"
|
|
171
171
|
},
|
|
172
|
-
"
|
|
172
|
+
"time_ms2_to_ms1": {
|
|
173
173
|
"dtype": "pl.Float64"
|
|
174
174
|
},
|
|
175
|
-
"
|
|
175
|
+
"time_ms2_to_ms2": {
|
|
176
176
|
"dtype": "pl.Float64"
|
|
177
177
|
}
|
|
178
178
|
}
|
masster/sample/save.py
CHANGED
|
@@ -134,10 +134,10 @@ def export_features(self, filename="features.csv"):
|
|
|
134
134
|
# clone df
|
|
135
135
|
clean_df = self.features_df.clone()
|
|
136
136
|
filename = os.path.abspath(filename)
|
|
137
|
-
# add a column has_ms2=True if
|
|
137
|
+
# add a column has_ms2=True if column ms2_scans is not None
|
|
138
138
|
if "ms2_scans" in clean_df.columns:
|
|
139
139
|
clean_df = clean_df.with_columns(
|
|
140
|
-
(pl.col("ms2_scans").is_not_null()).alias("has_ms2")
|
|
140
|
+
(pl.col("ms2_scans").is_not_null()).alias("has_ms2"),
|
|
141
141
|
)
|
|
142
142
|
clean_df = self.features_df.select([
|
|
143
143
|
col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)
|
|
@@ -215,7 +215,7 @@ def export_mgf(
|
|
|
215
215
|
return
|
|
216
216
|
else:
|
|
217
217
|
self.features_df = self.features.get_df()
|
|
218
|
-
|
|
218
|
+
|
|
219
219
|
# Apply filtering at DataFrame level for better performance
|
|
220
220
|
features = self.features_df
|
|
221
221
|
if mz_start is not None:
|
|
@@ -228,7 +228,7 @@ def export_mgf(
|
|
|
228
228
|
features = features.filter(pl.col("rt") <= rt_end)
|
|
229
229
|
if not include_all_ms1:
|
|
230
230
|
features = features.filter(pl.col("ms2_scans").is_not_null())
|
|
231
|
-
|
|
231
|
+
|
|
232
232
|
# Convert to list of dictionaries for faster iteration
|
|
233
233
|
features_list = features.to_dicts()
|
|
234
234
|
|
|
@@ -286,7 +286,10 @@ def export_mgf(
|
|
|
286
286
|
centroid_algo = "cr"
|
|
287
287
|
|
|
288
288
|
# count how many features have charge < 0
|
|
289
|
-
if
|
|
289
|
+
if (
|
|
290
|
+
self.features_df.filter(pl.col("charge") < 0).shape[0] - self.features_df.filter(pl.col("charge") > 0).shape[0]
|
|
291
|
+
> 0
|
|
292
|
+
):
|
|
290
293
|
preferred_charge = -1
|
|
291
294
|
else:
|
|
292
295
|
preferred_charge = 1
|
|
@@ -312,7 +315,7 @@ def export_mgf(
|
|
|
312
315
|
rt = row["rt"]
|
|
313
316
|
rt_str = f"{rt:.2f}"
|
|
314
317
|
mz_str = f"{mz:.4f}"
|
|
315
|
-
|
|
318
|
+
|
|
316
319
|
# Filtering is now done at DataFrame level, so we can skip these checks
|
|
317
320
|
if row["ms2_scans"] is None and not include_all_ms1:
|
|
318
321
|
skip = skip + 1
|
|
@@ -338,7 +341,7 @@ def export_mgf(
|
|
|
338
341
|
|
|
339
342
|
charge = preferred_charge
|
|
340
343
|
if row["charge"] is not None and row["charge"] != 0:
|
|
341
|
-
|
|
344
|
+
charge = row["charge"]
|
|
342
345
|
|
|
343
346
|
write_ion(
|
|
344
347
|
f,
|
|
@@ -397,7 +400,7 @@ def export_mgf(
|
|
|
397
400
|
)
|
|
398
401
|
c += 1
|
|
399
402
|
continue # Skip the rest of the processing for this feature
|
|
400
|
-
|
|
403
|
+
|
|
401
404
|
# If we reach here, either use_cache=False or no cached spectra were available
|
|
402
405
|
if split_energy:
|
|
403
406
|
# get energy of all scans with scan_uid in ms2_scans by fetching them
|
|
@@ -408,20 +411,20 @@ def export_mgf(
|
|
|
408
411
|
for scan_uid in ms2_scan_uids:
|
|
409
412
|
spec = self.get_spectrum(scan_uid)
|
|
410
413
|
if spec is not None:
|
|
411
|
-
spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec,
|
|
412
|
-
|
|
414
|
+
spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec, "energy") else 0))
|
|
415
|
+
|
|
413
416
|
# Group by energy
|
|
414
417
|
energy_groups: dict[float, list[int]] = {}
|
|
415
418
|
for scan_uid, energy in spectra_with_energy:
|
|
416
419
|
if energy not in energy_groups:
|
|
417
420
|
energy_groups[energy] = []
|
|
418
421
|
energy_groups[energy].append(scan_uid)
|
|
419
|
-
|
|
422
|
+
|
|
420
423
|
for energy, scan_uids_for_energy in energy_groups.items():
|
|
421
424
|
if selection == "best":
|
|
422
425
|
# Keep only the first scan for this energy
|
|
423
426
|
scan_uids_for_energy = [scan_uids_for_energy[0]]
|
|
424
|
-
|
|
427
|
+
|
|
425
428
|
for scan_uid in scan_uids_for_energy:
|
|
426
429
|
spect = self.get_spectrum(
|
|
427
430
|
scan_uid,
|
|
@@ -556,7 +559,7 @@ def export_mgf(
|
|
|
556
559
|
inty_min_str = f"{inty_min:.3f}" if inty_min != float("-inf") else "None"
|
|
557
560
|
q1_ratio_min_str = f"{q1_ratio_min:.3f}" if q1_ratio_min is not None else "None"
|
|
558
561
|
eic_corr_min_str = f"{eic_corr_min:.3f}" if eic_corr_min is not None else "None"
|
|
559
|
-
|
|
562
|
+
|
|
560
563
|
self.logger.debug(
|
|
561
564
|
f"MGF created with int>{inty_min_str}, q1_ratio>{q1_ratio_min_str}, eic_corr>{eic_corr_min_str}",
|
|
562
565
|
)
|