masster 0.4.4__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/chromatogram.py +2 -2
- masster/data/libs/urine.csv +3 -3
- masster/logger.py +8 -8
- masster/sample/adducts.py +337 -263
- masster/sample/defaults/find_adducts_def.py +21 -8
- masster/sample/h5.py +557 -278
- masster/sample/helpers.py +131 -75
- masster/sample/lib.py +2 -2
- masster/sample/load.py +25 -11
- masster/sample/plot.py +5 -5
- masster/sample/processing.py +115 -85
- masster/sample/sample.py +28 -15
- masster/sample/sample5_schema.json +44 -44
- masster/sample/save.py +34 -11
- masster/spectrum.py +2 -2
- masster/study/defaults/align_def.py +5 -1
- masster/study/defaults/identify_def.py +3 -1
- masster/study/defaults/study_def.py +58 -25
- masster/study/export.py +354 -204
- masster/study/h5.py +557 -155
- masster/study/helpers.py +487 -194
- masster/study/id.py +536 -347
- masster/study/load.py +228 -138
- masster/study/plot.py +68 -68
- masster/study/processing.py +455 -253
- masster/study/save.py +14 -4
- masster/study/study.py +122 -40
- masster/study/study5_schema.json +149 -149
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/METADATA +5 -3
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/RECORD +34 -34
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/WHEEL +0 -0
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/entry_points.txt +0 -0
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/licenses/LICENSE +0 -0
- {masster-0.4.4.dist-info → masster-0.4.6.dist-info}/top_level.txt +0 -0
masster/sample/save.py
CHANGED
|
@@ -139,9 +139,13 @@ def export_features(self, filename="features.csv"):
|
|
|
139
139
|
clean_df = clean_df.with_columns(
|
|
140
140
|
(pl.col("ms2_scans").is_not_null()).alias("has_ms2"),
|
|
141
141
|
)
|
|
142
|
-
clean_df = self.features_df.select(
|
|
143
|
-
|
|
144
|
-
|
|
142
|
+
clean_df = self.features_df.select(
|
|
143
|
+
[
|
|
144
|
+
col
|
|
145
|
+
for col in self.features_df.columns
|
|
146
|
+
if self.features_df[col].dtype not in (pl.List, pl.Object)
|
|
147
|
+
],
|
|
148
|
+
)
|
|
145
149
|
if filename.lower().endswith((".xls", ".xlsx")):
|
|
146
150
|
clean_df.to_pandas().to_excel(filename, index=False)
|
|
147
151
|
self.logger.info(f"Features exported to {filename} (Excel format)")
|
|
@@ -275,7 +279,10 @@ def export_mgf(
|
|
|
275
279
|
if spect.ms_level > 1 and hasattr(spect, "energy"):
|
|
276
280
|
f.write(f"ENERGY={spect.energy}\n")
|
|
277
281
|
# Use list comprehension for better performance
|
|
278
|
-
peak_lines = [
|
|
282
|
+
peak_lines = [
|
|
283
|
+
f"{mz_val:.5f} {inty_val:.0f}\n"
|
|
284
|
+
for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)
|
|
285
|
+
]
|
|
279
286
|
f.writelines(peak_lines)
|
|
280
287
|
f.write("END IONS\n\n")
|
|
281
288
|
|
|
@@ -287,7 +294,8 @@ def export_mgf(
|
|
|
287
294
|
|
|
288
295
|
# count how many features have charge < 0
|
|
289
296
|
if (
|
|
290
|
-
self.features_df.filter(pl.col("charge") < 0).shape[0]
|
|
297
|
+
self.features_df.filter(pl.col("charge") < 0).shape[0]
|
|
298
|
+
- self.features_df.filter(pl.col("charge") > 0).shape[0]
|
|
291
299
|
> 0
|
|
292
300
|
):
|
|
293
301
|
preferred_charge = -1
|
|
@@ -388,7 +396,9 @@ def export_mgf(
|
|
|
388
396
|
q1_max=q1_ratio_max,
|
|
389
397
|
)
|
|
390
398
|
# Get the corresponding scan_uid from the list
|
|
391
|
-
current_scan_uid =
|
|
399
|
+
current_scan_uid = (
|
|
400
|
+
scan_uids[i] if i < len(scan_uids) else "unknown"
|
|
401
|
+
)
|
|
392
402
|
write_ion(
|
|
393
403
|
f,
|
|
394
404
|
f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{current_scan_uid}",
|
|
@@ -411,7 +421,12 @@ def export_mgf(
|
|
|
411
421
|
for scan_uid in ms2_scan_uids:
|
|
412
422
|
spec = self.get_spectrum(scan_uid)
|
|
413
423
|
if spec is not None:
|
|
414
|
-
spectra_with_energy.append(
|
|
424
|
+
spectra_with_energy.append(
|
|
425
|
+
(
|
|
426
|
+
scan_uid,
|
|
427
|
+
spec.energy if hasattr(spec, "energy") else 0,
|
|
428
|
+
),
|
|
429
|
+
)
|
|
415
430
|
|
|
416
431
|
# Group by energy
|
|
417
432
|
energy_groups: dict[float, list[int]] = {}
|
|
@@ -496,14 +511,18 @@ def export_mgf(
|
|
|
496
511
|
spect = spect.centroid(
|
|
497
512
|
tolerance=self.parameters["mz_tol_ms1_da"],
|
|
498
513
|
ppm=self.parameters["mz_tol_ms1_ppm"],
|
|
499
|
-
min_points=self.parameters[
|
|
514
|
+
min_points=self.parameters[
|
|
515
|
+
"centroid_min_points_ms1"
|
|
516
|
+
],
|
|
500
517
|
algo=centroid_algo,
|
|
501
518
|
)
|
|
502
519
|
elif spect.ms_level == 2:
|
|
503
520
|
spect = spect.centroid(
|
|
504
521
|
tolerance=self.parameters["mz_tol_ms2_da"],
|
|
505
522
|
ppm=self.parameters["mz_tol_ms2_ppm"],
|
|
506
|
-
min_points=self.parameters[
|
|
523
|
+
min_points=self.parameters[
|
|
524
|
+
"centroid_min_points_ms2"
|
|
525
|
+
],
|
|
507
526
|
algo=centroid_algo,
|
|
508
527
|
)
|
|
509
528
|
if deisotope:
|
|
@@ -595,7 +614,9 @@ def export_dda_stats(self, filename="stats.csv"):
|
|
|
595
614
|
ms2_count = len(self.scans_df.filter(pl.col("ms_level") == 2))
|
|
596
615
|
features_count = len(self.features_df) if self.features_df is not None else 0
|
|
597
616
|
features_with_ms2 = (
|
|
598
|
-
self.features_df.filter(pl.col("ms2_scans").is_not_null()).height
|
|
617
|
+
self.features_df.filter(pl.col("ms2_scans").is_not_null()).height
|
|
618
|
+
if self.features_df is not None
|
|
619
|
+
else 0
|
|
599
620
|
)
|
|
600
621
|
|
|
601
622
|
# Initialize a dictionary to hold statistics
|
|
@@ -610,7 +631,9 @@ def export_dda_stats(self, filename="stats.csv"):
|
|
|
610
631
|
if "time_cycle" in self.scans_df.columns:
|
|
611
632
|
ms1_df = self.scans_df.filter(pl.col("ms_level") == 1)
|
|
612
633
|
avg_cycle_time = ms1_df["time_cycle"].mean()
|
|
613
|
-
stats["Average_cycle_time"] =
|
|
634
|
+
stats["Average_cycle_time"] = (
|
|
635
|
+
avg_cycle_time if avg_cycle_time is not None else ""
|
|
636
|
+
)
|
|
614
637
|
else:
|
|
615
638
|
stats["Average_cycle_time"] = 0
|
|
616
639
|
|
masster/spectrum.py
CHANGED
|
@@ -138,10 +138,10 @@ class Spectrum:
|
|
|
138
138
|
|
|
139
139
|
Example Usage:
|
|
140
140
|
>>> import numpy as np
|
|
141
|
-
>>> from masster import
|
|
141
|
+
>>> from masster import Spectrum
|
|
142
142
|
>>> mz = np.array([100.0, 150.0, 200.0, 250.0])
|
|
143
143
|
>>> intensity = np.array([1000, 5000, 3000, 800])
|
|
144
|
-
>>> spectrum =
|
|
144
|
+
>>> spectrum = Spectrum(mz=mz, inty=intensity, ms_level=1)
|
|
145
145
|
>>> spectrum.find_peaks()
|
|
146
146
|
>>> spectrum.plot()
|
|
147
147
|
|
|
@@ -298,7 +298,11 @@ class align_defaults:
|
|
|
298
298
|
"dtype": str,
|
|
299
299
|
"description": "Method to use for extrapolation outside the data range in LOWESS",
|
|
300
300
|
"default": "four-point-linear",
|
|
301
|
-
"allowed_values": [
|
|
301
|
+
"allowed_values": [
|
|
302
|
+
"two-point-linear",
|
|
303
|
+
"four-point-linear",
|
|
304
|
+
"global-linear",
|
|
305
|
+
],
|
|
302
306
|
},
|
|
303
307
|
},
|
|
304
308
|
repr=False,
|
|
@@ -158,7 +158,9 @@ class identify_defaults:
|
|
|
158
158
|
if not isinstance(value, list):
|
|
159
159
|
return False
|
|
160
160
|
# For heteroatoms, ensure all elements are strings
|
|
161
|
-
if param_name == "heteroatoms" and not all(
|
|
161
|
+
if param_name == "heteroatoms" and not all(
|
|
162
|
+
isinstance(item, str) for item in value
|
|
163
|
+
):
|
|
162
164
|
return False
|
|
163
165
|
|
|
164
166
|
# Range validation for numeric types
|
|
@@ -33,7 +33,7 @@ class study_defaults:
|
|
|
33
33
|
|
|
34
34
|
eic_mz_tol: float = 0.01
|
|
35
35
|
eic_rt_tol: float = 10.0
|
|
36
|
-
|
|
36
|
+
|
|
37
37
|
polarity: str = "positive"
|
|
38
38
|
adducts: list[str] | None = None
|
|
39
39
|
adduct_min_probability: float = 0.04
|
|
@@ -54,7 +54,14 @@ class study_defaults:
|
|
|
54
54
|
"dtype": str,
|
|
55
55
|
"description": "Logging level to be set for the logger",
|
|
56
56
|
"default": "INFO",
|
|
57
|
-
"allowed_values": [
|
|
57
|
+
"allowed_values": [
|
|
58
|
+
"TRACE",
|
|
59
|
+
"DEBUG",
|
|
60
|
+
"INFO",
|
|
61
|
+
"WARNING",
|
|
62
|
+
"ERROR",
|
|
63
|
+
"CRITICAL",
|
|
64
|
+
],
|
|
58
65
|
},
|
|
59
66
|
"log_label": {
|
|
60
67
|
"dtype": "Optional[str]",
|
|
@@ -92,14 +99,19 @@ class study_defaults:
|
|
|
92
99
|
"default": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
|
|
93
100
|
"examples": {
|
|
94
101
|
"positive": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
|
|
95
|
-
"negative": [
|
|
102
|
+
"negative": [
|
|
103
|
+
"H-1:-:0.95",
|
|
104
|
+
"Cl:-:0.05",
|
|
105
|
+
"CH2O2:0:0.2",
|
|
106
|
+
"H-2-O:0:0.2",
|
|
107
|
+
],
|
|
96
108
|
},
|
|
97
109
|
"validation_rules": [
|
|
98
110
|
"Format: element:charge:probability",
|
|
99
111
|
"Charge must be +, -, or 0 (neutral)",
|
|
100
112
|
"Probability must be between 0.0 and 1.0",
|
|
101
|
-
"Sum of all charged adduct probabilities must equal 1.0"
|
|
102
|
-
]
|
|
113
|
+
"Sum of all charged adduct probabilities must equal 1.0",
|
|
114
|
+
],
|
|
103
115
|
},
|
|
104
116
|
"adduct_min_probability": {
|
|
105
117
|
"dtype": float,
|
|
@@ -116,54 +128,71 @@ class study_defaults:
|
|
|
116
128
|
"""Set polarity-specific defaults for adducts if not explicitly provided."""
|
|
117
129
|
# If adducts is None, set based on polarity
|
|
118
130
|
if self.adducts is None:
|
|
119
|
-
if self.polarity.lower() in [
|
|
120
|
-
self.adducts = [
|
|
121
|
-
|
|
122
|
-
|
|
131
|
+
if self.polarity.lower() in ["positive", "pos"]:
|
|
132
|
+
self.adducts = [
|
|
133
|
+
"+H:1:0.65",
|
|
134
|
+
"+Na:1:0.15",
|
|
135
|
+
"+NH4:1:0.15",
|
|
136
|
+
"+K:1:0.05",
|
|
137
|
+
"-H2O:0:0.15",
|
|
138
|
+
]
|
|
139
|
+
elif self.polarity.lower() in ["negative", "neg"]:
|
|
140
|
+
self.adducts = [
|
|
141
|
+
"-H:-1:0.9",
|
|
142
|
+
"+Cl:-1:0.1",
|
|
143
|
+
"+CH2O2:0:0.15",
|
|
144
|
+
"-H2O:0:0.15",
|
|
145
|
+
]
|
|
123
146
|
else:
|
|
124
147
|
# Default to positive if polarity is not recognized
|
|
125
|
-
self.adducts = [
|
|
148
|
+
self.adducts = [
|
|
149
|
+
"+H:1:0.65",
|
|
150
|
+
"+Na:1:0.15",
|
|
151
|
+
"+NH4:1:0.15",
|
|
152
|
+
"+K:1:0.05",
|
|
153
|
+
"-H2O:0:0.15",
|
|
154
|
+
]
|
|
126
155
|
|
|
127
156
|
def _validate_adducts(self, adduct_list: list[str]) -> bool:
|
|
128
157
|
"""
|
|
129
158
|
Validate adducts according to OpenMS convention.
|
|
130
|
-
|
|
159
|
+
|
|
131
160
|
Format: element:charge:probability
|
|
132
161
|
- Elements can be molecular formulas (e.g., H, Na, NH4, H-1, CH2O2)
|
|
133
162
|
- Charge must be +, -, or 0 (for neutral)
|
|
134
163
|
- Probability must be a float between 0 and 1
|
|
135
164
|
- Total probability of all charged adducts should sum to 1.0
|
|
136
|
-
|
|
165
|
+
|
|
137
166
|
Args:
|
|
138
167
|
adduct_list: List of adduct strings in OpenMS format
|
|
139
|
-
|
|
168
|
+
|
|
140
169
|
Returns:
|
|
141
170
|
True if all adducts are valid, False otherwise
|
|
142
171
|
"""
|
|
143
172
|
if not adduct_list: # Empty list is valid
|
|
144
173
|
return True
|
|
145
|
-
|
|
174
|
+
|
|
146
175
|
charged_total_prob = 0.0
|
|
147
176
|
neutral_total_prob = 0.0
|
|
148
|
-
|
|
177
|
+
|
|
149
178
|
for adduct in adduct_list:
|
|
150
179
|
if not isinstance(adduct, str):
|
|
151
180
|
return False
|
|
152
|
-
|
|
181
|
+
|
|
153
182
|
parts = adduct.split(":")
|
|
154
183
|
if len(parts) != 3:
|
|
155
184
|
return False
|
|
156
|
-
|
|
185
|
+
|
|
157
186
|
element, charge, prob_str = parts
|
|
158
|
-
|
|
187
|
+
|
|
159
188
|
# Validate element (non-empty string)
|
|
160
189
|
if not element:
|
|
161
190
|
return False
|
|
162
|
-
|
|
191
|
+
|
|
163
192
|
# Validate charge
|
|
164
193
|
if charge not in ["+", "-", "0"]:
|
|
165
194
|
return False
|
|
166
|
-
|
|
195
|
+
|
|
167
196
|
# Validate probability
|
|
168
197
|
try:
|
|
169
198
|
probability = float(prob_str)
|
|
@@ -171,20 +200,20 @@ class study_defaults:
|
|
|
171
200
|
return False
|
|
172
201
|
except (ValueError, TypeError):
|
|
173
202
|
return False
|
|
174
|
-
|
|
203
|
+
|
|
175
204
|
# Sum probabilities by charge type
|
|
176
205
|
if charge in ["+", "-"]:
|
|
177
206
|
charged_total_prob += probability
|
|
178
207
|
else: # charge == "0" (neutral)
|
|
179
208
|
neutral_total_prob += probability
|
|
180
|
-
|
|
209
|
+
|
|
181
210
|
# Validate probability constraints
|
|
182
211
|
# Charged adducts should sum to 1.0 (within tolerance)
|
|
183
212
|
if charged_total_prob > 0 and abs(charged_total_prob - 1.0) > 1e-6:
|
|
184
213
|
return False
|
|
185
|
-
|
|
214
|
+
|
|
186
215
|
# Neutral adducts can have any total probability (they're optional)
|
|
187
|
-
|
|
216
|
+
|
|
188
217
|
return True
|
|
189
218
|
|
|
190
219
|
def get_info(self, param_name: str) -> dict[str, Any]:
|
|
@@ -316,7 +345,11 @@ class study_defaults:
|
|
|
316
345
|
expected_dtype = self._param_metadata[param_name]["dtype"]
|
|
317
346
|
|
|
318
347
|
# Handle optional types
|
|
319
|
-
if
|
|
348
|
+
if (
|
|
349
|
+
isinstance(expected_dtype, str)
|
|
350
|
+
and expected_dtype.startswith("Optional")
|
|
351
|
+
and value is not None
|
|
352
|
+
):
|
|
320
353
|
if "int" in expected_dtype and not isinstance(value, int):
|
|
321
354
|
try:
|
|
322
355
|
value = int(value)
|