masster 0.5.17__py3-none-any.whl → 0.5.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/sample/adducts.py +12 -0
- masster/sample/defaults/find_ms2_def.py +5 -5
- masster/sample/defaults/sample_def.py +30 -6
- masster/sample/h5.py +59 -13
- masster/sample/lib.py +9 -3
- masster/sample/load.py +47 -120
- masster/sample/processing.py +3 -3
- masster/sample/sample.py +5 -3
- masster/sample/sciex.py +62 -648
- masster/sample/thermo.py +801 -0
- masster/study/id.py +3 -1
- masster/study/load.py +15 -792
- masster/study/study.py +1 -0
- masster/wizard/README.md +15 -15
- masster/wizard/wizard.py +82 -28
- {masster-0.5.17.dist-info → masster-0.5.19.dist-info}/METADATA +3 -2
- {masster-0.5.17.dist-info → masster-0.5.19.dist-info}/RECORD +21 -20
- {masster-0.5.17.dist-info → masster-0.5.19.dist-info}/WHEEL +0 -0
- {masster-0.5.17.dist-info → masster-0.5.19.dist-info}/entry_points.txt +0 -0
- {masster-0.5.17.dist-info → masster-0.5.19.dist-info}/licenses/LICENSE +0 -0
masster/_version.py
CHANGED
masster/sample/adducts.py
CHANGED
|
@@ -403,6 +403,7 @@ def find_adducts(self, **kwargs):
|
|
|
403
403
|
|
|
404
404
|
Main parameters (from ``find_adducts_defaults``):
|
|
405
405
|
- adducts (list[str] | str | None): List of potential adduct strings or ionization mode ('pos'/'neg').
|
|
406
|
+
If None, automatically uses sample.polarity to select appropriate default adducts.
|
|
406
407
|
- charge_min (int): Minimum allowed charge state (default: -4).
|
|
407
408
|
- charge_max (int): Maximum allowed charge state (default: 4).
|
|
408
409
|
- retention_max_diff (float): Maximum RT difference in seconds (default: 1.0).
|
|
@@ -431,6 +432,17 @@ def find_adducts(self, **kwargs):
|
|
|
431
432
|
else:
|
|
432
433
|
self.logger.warning(f"Unknown parameter {key} ignored")
|
|
433
434
|
|
|
435
|
+
# Auto-set adducts based on sample polarity if not explicitly provided
|
|
436
|
+
if params.adducts is None and hasattr(self, 'polarity') and self.polarity is not None:
|
|
437
|
+
if self.polarity.lower() in ['positive', 'pos']:
|
|
438
|
+
params.set('adducts', 'positive', validate=True)
|
|
439
|
+
self.logger.debug(f"Auto-set adducts to 'positive' based on sample polarity: {self.polarity}")
|
|
440
|
+
elif self.polarity.lower() in ['negative', 'neg']:
|
|
441
|
+
params.set('adducts', 'negative', validate=True)
|
|
442
|
+
self.logger.debug(f"Auto-set adducts to 'negative' based on sample polarity: {self.polarity}")
|
|
443
|
+
else:
|
|
444
|
+
self.logger.debug(f"Unknown sample polarity '{self.polarity}', using default adducts")
|
|
445
|
+
|
|
434
446
|
# Check if features_df exists and has data
|
|
435
447
|
if not hasattr(self, "features_df") or len(self.features_df) == 0:
|
|
436
448
|
self.logger.warning(
|
|
@@ -42,7 +42,7 @@ class find_ms2_defaults:
|
|
|
42
42
|
- get_description(param_name): Get parameter description
|
|
43
43
|
- get_info(param_name): Get full parameter metadata
|
|
44
44
|
- list_parameters(): Get list of all parameter names
|
|
45
|
-
- get_mz_tolerance(
|
|
45
|
+
- get_mz_tolerance(type): Get appropriate m/z tolerance based on type
|
|
46
46
|
"""
|
|
47
47
|
|
|
48
48
|
# Core MS2 linking parameters
|
|
@@ -270,16 +270,16 @@ class find_ms2_defaults:
|
|
|
270
270
|
|
|
271
271
|
return len(invalid_params) == 0, invalid_params
|
|
272
272
|
|
|
273
|
-
def get_mz_tolerance(self,
|
|
273
|
+
def get_mz_tolerance(self, type=None):
|
|
274
274
|
"""
|
|
275
|
-
Get the appropriate m/z tolerance based on
|
|
275
|
+
Get the appropriate m/z tolerance based on type.
|
|
276
276
|
|
|
277
277
|
Args:
|
|
278
|
-
|
|
278
|
+
type (str, optional): Acquisition type ('ztscan', 'dia', or other)
|
|
279
279
|
|
|
280
280
|
Returns:
|
|
281
281
|
float: Appropriate m/z tolerance value
|
|
282
282
|
"""
|
|
283
|
-
if
|
|
283
|
+
if type is not None and type.lower() in ["ztscan", "dia"]:
|
|
284
284
|
return self.get("mz_tol_ztscan")
|
|
285
285
|
return self.get("mz_tol")
|
|
@@ -32,30 +32,42 @@ class sample_defaults:
|
|
|
32
32
|
centroid_prominence (int): Prominence parameter for centroiding. Default is -1.
|
|
33
33
|
max_points_per_spectrum (int): Maximum number of points per spectrum. Default is 50000.
|
|
34
34
|
dia_window (Optional[float]): DIA window size. Default is None.
|
|
35
|
+
type (str): Acquisition type/mode. Options are 'dda', 'swath', 'ztscan', 'fia'. Default is 'dda'.
|
|
36
|
+
polarity (Optional[str]): Ionization polarity. Options are None, 'positive', 'negative'. Default is None.
|
|
35
37
|
"""
|
|
36
38
|
|
|
37
39
|
filename: Optional[str] = None
|
|
38
|
-
ondisk: bool = False
|
|
39
40
|
label: str | None = None
|
|
40
41
|
log_level: str = "INFO"
|
|
41
42
|
log_label: Optional[str] = ""
|
|
42
43
|
log_sink: str = "sys.stdout"
|
|
43
|
-
|
|
44
|
+
ondisk: bool = False
|
|
45
|
+
|
|
46
|
+
# file and data handling settings
|
|
47
|
+
type: str = "dda"
|
|
48
|
+
polarity: str | None = None
|
|
49
|
+
|
|
50
|
+
# chromatographic settings
|
|
51
|
+
#chrom_fwhm: float = 1.0
|
|
52
|
+
eic_mz_tol: float = 0.01
|
|
53
|
+
eic_rt_tol: float = 10.0
|
|
54
|
+
|
|
55
|
+
# mz tolerances
|
|
44
56
|
mz_tol_ms1_da: float = 0.002
|
|
45
57
|
mz_tol_ms2_da: float = 0.005
|
|
46
58
|
mz_tol_ms1_ppm: float = 5.0
|
|
47
59
|
mz_tol_ms2_ppm: float = 10.0
|
|
60
|
+
|
|
61
|
+
# centroiding settings
|
|
48
62
|
centroid_algo: str = "lmp"
|
|
49
63
|
centroid_min_points_ms1: int = 5
|
|
50
64
|
centroid_min_points_ms2: int = 4
|
|
51
65
|
centroid_smooth: int = 5
|
|
52
66
|
centroid_refine: bool = True
|
|
53
67
|
centroid_prominence: int = -1
|
|
68
|
+
|
|
69
|
+
# data retrieval settings
|
|
54
70
|
max_points_per_spectrum: int = 50000
|
|
55
|
-
dia_window: float | None = None
|
|
56
|
-
|
|
57
|
-
eic_mz_tol: float = 0.01
|
|
58
|
-
eic_rt_tol: float = 10.0
|
|
59
71
|
|
|
60
72
|
_param_metadata: dict[str, dict[str, Any]] = field(
|
|
61
73
|
default_factory=lambda: {
|
|
@@ -178,6 +190,18 @@ class sample_defaults:
|
|
|
178
190
|
"min_value": 0.2,
|
|
179
191
|
"max_value": 60.0,
|
|
180
192
|
},
|
|
193
|
+
"type": {
|
|
194
|
+
"dtype": str,
|
|
195
|
+
"description": "Acquisition type/mode",
|
|
196
|
+
"default": "dda",
|
|
197
|
+
"allowed_values": ["dda", "swath", "ztscan", "fia"],
|
|
198
|
+
},
|
|
199
|
+
"polarity": {
|
|
200
|
+
"dtype": "Optional[str]",
|
|
201
|
+
"description": "Ionization polarity",
|
|
202
|
+
"default": None,
|
|
203
|
+
"allowed_values": ["positive", "negative"],
|
|
204
|
+
},
|
|
181
205
|
},
|
|
182
206
|
repr=False,
|
|
183
207
|
)
|
masster/sample/h5.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import json
|
|
1
|
+
import json
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
4
|
import h5py
|
|
@@ -94,8 +94,8 @@ def _save_sample5(
|
|
|
94
94
|
metadata_group.attrs["file_source"] = str(self.file_source)
|
|
95
95
|
else:
|
|
96
96
|
metadata_group.attrs["file_source"] = ""
|
|
97
|
-
if self.
|
|
98
|
-
metadata_group.attrs["file_type"] = str(self.
|
|
97
|
+
if hasattr(self, 'type') and self.type is not None:
|
|
98
|
+
metadata_group.attrs["file_type"] = str(self.type)
|
|
99
99
|
else:
|
|
100
100
|
metadata_group.attrs["file_type"] = ""
|
|
101
101
|
if self.label is not None:
|
|
@@ -287,11 +287,41 @@ def _save_sample5(
|
|
|
287
287
|
compression="gzip",
|
|
288
288
|
)
|
|
289
289
|
|
|
290
|
-
# Store parameters as JSON
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
290
|
+
# Store parameters/history as JSON
|
|
291
|
+
# Always ensure we sync instance attributes to parameters before saving
|
|
292
|
+
if hasattr(self, 'parameters') and self.parameters is not None:
|
|
293
|
+
if hasattr(self, 'polarity') and self.polarity is not None:
|
|
294
|
+
self.parameters.polarity = self.polarity
|
|
295
|
+
if hasattr(self, 'type') and self.type is not None:
|
|
296
|
+
self.parameters.type = self.type
|
|
297
|
+
|
|
298
|
+
# Prepare save data
|
|
299
|
+
save_data = {}
|
|
300
|
+
|
|
301
|
+
# Add parameters as a dictionary
|
|
302
|
+
if hasattr(self, 'parameters') and self.parameters is not None:
|
|
303
|
+
save_data["sample"] = self.parameters.to_dict()
|
|
304
|
+
|
|
305
|
+
# Add history data (but ensure it's JSON serializable)
|
|
306
|
+
if hasattr(self, 'history') and self.history is not None:
|
|
307
|
+
# Convert any non-JSON-serializable objects to strings/dicts
|
|
308
|
+
serializable_history = {}
|
|
309
|
+
for key, value in self.history.items():
|
|
310
|
+
if key == "sample":
|
|
311
|
+
# Use our properly serialized parameters
|
|
312
|
+
continue # Skip, we'll add it from parameters above
|
|
313
|
+
try:
|
|
314
|
+
# Test if value is JSON serializable
|
|
315
|
+
json.dumps(value)
|
|
316
|
+
serializable_history[key] = value
|
|
317
|
+
except (TypeError, ValueError):
|
|
318
|
+
# Convert to string if not serializable
|
|
319
|
+
serializable_history[key] = str(value)
|
|
320
|
+
save_data.update(serializable_history)
|
|
321
|
+
|
|
322
|
+
# Save as JSON
|
|
323
|
+
params_json = json.dumps(save_data, indent=2)
|
|
324
|
+
metadata_group.attrs["parameters"] = params_json
|
|
295
325
|
|
|
296
326
|
# Store lib and lib_match - removed (no longer saving lib data)
|
|
297
327
|
|
|
@@ -363,7 +393,7 @@ def _load_sample5(self, filename: str, map: bool = False):
|
|
|
363
393
|
else:
|
|
364
394
|
self.file_source = self.file_path
|
|
365
395
|
|
|
366
|
-
self.
|
|
396
|
+
self.type = decode_metadata_attr(
|
|
367
397
|
metadata_group.attrs.get("file_type", ""),
|
|
368
398
|
)
|
|
369
399
|
self.label = decode_metadata_attr(metadata_group.attrs.get("label", ""))
|
|
@@ -1072,6 +1102,14 @@ def _load_sample5(self, filename: str, map: bool = False):
|
|
|
1072
1102
|
# set self.label to basename without extension
|
|
1073
1103
|
if self.label is None or self.label == "":
|
|
1074
1104
|
self.label = os.path.splitext(os.path.basename(filename))[0]
|
|
1105
|
+
|
|
1106
|
+
# Sync instance attributes from loaded parameters
|
|
1107
|
+
if hasattr(self, 'parameters') and self.parameters is not None:
|
|
1108
|
+
if hasattr(self.parameters, 'polarity') and self.parameters.polarity is not None:
|
|
1109
|
+
self.polarity = self.parameters.polarity
|
|
1110
|
+
if hasattr(self.parameters, 'type') and self.parameters.type is not None:
|
|
1111
|
+
self.type = self.parameters.type
|
|
1112
|
+
|
|
1075
1113
|
self.logger.info(f"Sample loaded from {filename}")
|
|
1076
1114
|
|
|
1077
1115
|
|
|
@@ -1122,7 +1160,7 @@ def _load_sample5_study(self, filename: str, map: bool = False):
|
|
|
1122
1160
|
else:
|
|
1123
1161
|
self.file_source = self.file_path
|
|
1124
1162
|
|
|
1125
|
-
self.
|
|
1163
|
+
self.type = decode_metadata_attr(
|
|
1126
1164
|
metadata_group.attrs.get("file_type", ""),
|
|
1127
1165
|
)
|
|
1128
1166
|
self.label = decode_metadata_attr(metadata_group.attrs.get("label", ""))
|
|
@@ -1810,6 +1848,14 @@ def _load_sample5_study(self, filename: str, map: bool = False):
|
|
|
1810
1848
|
# set self.label to basename without extension
|
|
1811
1849
|
if self.label is None or self.label == "":
|
|
1812
1850
|
self.label = os.path.splitext(os.path.basename(filename))[0]
|
|
1851
|
+
|
|
1852
|
+
# Sync instance attributes from loaded parameters
|
|
1853
|
+
if hasattr(self, 'parameters') and self.parameters is not None:
|
|
1854
|
+
if hasattr(self.parameters, 'polarity') and self.parameters.polarity is not None:
|
|
1855
|
+
self.polarity = self.parameters.polarity
|
|
1856
|
+
if hasattr(self.parameters, 'type') and self.parameters.type is not None:
|
|
1857
|
+
self.type = self.parameters.type
|
|
1858
|
+
|
|
1813
1859
|
self.logger.info(
|
|
1814
1860
|
f"Sample loaded successfully from {filename} (optimized for study)",
|
|
1815
1861
|
)
|
|
@@ -2256,7 +2302,7 @@ def create_h5_metadata_group(
|
|
|
2256
2302
|
f: h5py.File,
|
|
2257
2303
|
file_path: Optional[str],
|
|
2258
2304
|
file_source: Optional[str],
|
|
2259
|
-
|
|
2305
|
+
type: Optional[str],
|
|
2260
2306
|
label: Optional[str],
|
|
2261
2307
|
) -> None:
|
|
2262
2308
|
"""
|
|
@@ -2266,7 +2312,7 @@ def create_h5_metadata_group(
|
|
|
2266
2312
|
f: The HDF5 file object
|
|
2267
2313
|
file_path: Source file path
|
|
2268
2314
|
file_source: Original source file path
|
|
2269
|
-
|
|
2315
|
+
type: Source file type
|
|
2270
2316
|
label: Sample label
|
|
2271
2317
|
"""
|
|
2272
2318
|
metadata_group = f.create_group("metadata")
|
|
@@ -2275,5 +2321,5 @@ def create_h5_metadata_group(
|
|
|
2275
2321
|
metadata_group.attrs["file_source"] = (
|
|
2276
2322
|
str(file_source) if file_source is not None else ""
|
|
2277
2323
|
)
|
|
2278
|
-
metadata_group.attrs["file_type"] = str(
|
|
2324
|
+
metadata_group.attrs["file_type"] = str(type) if type is not None else ""
|
|
2279
2325
|
metadata_group.attrs["label"] = str(label) if label is not None else ""
|
masster/sample/lib.py
CHANGED
|
@@ -71,7 +71,7 @@ def load_lib(self, *args, **kwargs):
|
|
|
71
71
|
lib_load(self, *args, **kwargs)
|
|
72
72
|
|
|
73
73
|
|
|
74
|
-
def lib_load(self, csvfile=None, polarity=
|
|
74
|
+
def lib_load(self, csvfile=None, polarity=None):
|
|
75
75
|
delta_m = {
|
|
76
76
|
"[M+H]+": 1.007276,
|
|
77
77
|
"[M+Na]+": 22.989218,
|
|
@@ -97,10 +97,11 @@ def lib_load(self, csvfile=None, polarity="positive"):
|
|
|
97
97
|
"""
|
|
98
98
|
Load target compounds from a CSV file.
|
|
99
99
|
This method reads a CSV file containing target compounds and their properties, such as m/z, retention time (RT),
|
|
100
|
-
and adducts. It filters the targets based on the specified
|
|
100
|
+
and adducts. It filters the targets based on the specified polarity and returns a DataFrame of the targets.
|
|
101
101
|
Parameters:
|
|
102
102
|
csvfile (str): The path to the CSV file containing target compounds.
|
|
103
|
-
|
|
103
|
+
polarity (str, optional): Ion polarity to filter adducts ('positive' or 'negative').
|
|
104
|
+
If None, uses the sample's polarity property. Default is None.
|
|
104
105
|
Returns:
|
|
105
106
|
pd.DataFrame: A DataFrame containing the filtered target compounds with columns 'mz', 'rt', 'adduct'.
|
|
106
107
|
"""
|
|
@@ -220,6 +221,11 @@ def lib_load(self, csvfile=None, polarity="positive"):
|
|
|
220
221
|
self.lib = self.lib.where(pd.notnull(self.lib), None)
|
|
221
222
|
# find all elements == nan and replace them with None
|
|
222
223
|
self.lib = self.lib.replace({np.nan: None})
|
|
224
|
+
|
|
225
|
+
# Use sample.polarity if polarity parameter is None
|
|
226
|
+
if polarity is None:
|
|
227
|
+
polarity = getattr(self, 'polarity', 'positive')
|
|
228
|
+
|
|
223
229
|
if polarity is not None:
|
|
224
230
|
if polarity.lower() == "positive":
|
|
225
231
|
self.lib = self.lib[self.lib["z"] > 0]
|
masster/sample/load.py
CHANGED
|
@@ -73,7 +73,7 @@ def load(
|
|
|
73
73
|
filename (str): The path to the file to load. The file must exist and have one of the following extensions:
|
|
74
74
|
.mzML, .wiff, or .raw.
|
|
75
75
|
ondisk (bool, optional): Indicates whether the file should be treated as on disk. Defaults to False.
|
|
76
|
-
type (str, optional): Specifies the type of file. If provided and set to 'ztscan' (case-insensitive), the
|
|
76
|
+
type (str, optional): Specifies the type of file. If provided and set to 'ztscan' (case-insensitive), the type
|
|
77
77
|
attribute will be adjusted accordingly. Defaults to None.
|
|
78
78
|
label (Any, optional): An optional label to associate with the loaded file. Defaults to None.
|
|
79
79
|
Raises:
|
|
@@ -84,7 +84,7 @@ def load(
|
|
|
84
84
|
- ".mzml": Calls _load_mzML(filename)
|
|
85
85
|
- ".wiff": Calls _load_wiff(filename)
|
|
86
86
|
- ".raw": Calls _load_raw(filename)
|
|
87
|
-
After loading, the
|
|
87
|
+
After loading, the type attribute is set to 'dda', unless the optional 'type' parameter is provided as 'ztscan',
|
|
88
88
|
in which case it is updated to 'ztscan'. The label attribute is updated if a label is provided.
|
|
89
89
|
"""
|
|
90
90
|
|
|
@@ -109,9 +109,9 @@ def load(
|
|
|
109
109
|
else:
|
|
110
110
|
raise ValueError("File must be .mzML, .wiff, *.raw, or .sample5")
|
|
111
111
|
|
|
112
|
-
self.
|
|
112
|
+
self.type = "dda"
|
|
113
113
|
if type is not None and type.lower() in ["ztscan"]:
|
|
114
|
-
self.
|
|
114
|
+
self.type = "ztscan"
|
|
115
115
|
|
|
116
116
|
if label is not None:
|
|
117
117
|
self.label = label
|
|
@@ -167,9 +167,9 @@ def load_noms1(
|
|
|
167
167
|
else:
|
|
168
168
|
raise ValueError("File must be .mzML, .wiff, *.raw, or .sample5")
|
|
169
169
|
|
|
170
|
-
self.
|
|
170
|
+
self.type = "dda"
|
|
171
171
|
if type is not None and type.lower() in ["ztscan"]:
|
|
172
|
-
self.
|
|
172
|
+
self.type = "ztscan"
|
|
173
173
|
|
|
174
174
|
if label is not None:
|
|
175
175
|
self.label = label
|
|
@@ -255,6 +255,7 @@ def _load_mzML(
|
|
|
255
255
|
)
|
|
256
256
|
|
|
257
257
|
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
258
|
+
polarity = None
|
|
258
259
|
# iterate over all spectra
|
|
259
260
|
for i, s in tqdm(
|
|
260
261
|
enumerate(omsexp.getSpectra()), # type: ignore[union-attr]
|
|
@@ -262,23 +263,36 @@ def _load_mzML(
|
|
|
262
263
|
desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Scans",
|
|
263
264
|
disable=tdqm_disable,
|
|
264
265
|
):
|
|
266
|
+
# try to get polarity
|
|
267
|
+
if polarity is None:
|
|
268
|
+
try:
|
|
269
|
+
pol = s.getInstrumentSettings().getPolarity()
|
|
270
|
+
if pol == 1:
|
|
271
|
+
polarity = "positive"
|
|
272
|
+
elif pol == 2:
|
|
273
|
+
polarity = "negative"
|
|
274
|
+
except Exception:
|
|
275
|
+
pass
|
|
265
276
|
# create a dict
|
|
266
277
|
if s.getMSLevel() == 1:
|
|
267
278
|
cycle += 1
|
|
268
279
|
prec_mz = None
|
|
269
280
|
precursorIsolationWindowLowerMZ = None
|
|
270
281
|
precursorIsolationWindowUpperMZ = None
|
|
271
|
-
|
|
282
|
+
prec_inty = None
|
|
272
283
|
energy = None
|
|
273
284
|
else:
|
|
274
|
-
prec_mz = s.getPrecursors()
|
|
285
|
+
prec_mz = s.getPrecursors()
|
|
286
|
+
if len(prec_mz) == 0:
|
|
287
|
+
continue
|
|
288
|
+
prec_mz = prec_mz[0].getMZ()
|
|
275
289
|
precursorIsolationWindowLowerMZ = s.getPrecursors()[
|
|
276
290
|
0
|
|
277
291
|
].getIsolationWindowLowerOffset()
|
|
278
292
|
precursorIsolationWindowUpperMZ = s.getPrecursors()[
|
|
279
293
|
0
|
|
280
294
|
].getIsolationWindowUpperOffset()
|
|
281
|
-
|
|
295
|
+
prec_inty = s.getPrecursors()[0].getIntensity()
|
|
282
296
|
# Try to get collision energy from meta values first, fallback to getActivationEnergy()
|
|
283
297
|
try:
|
|
284
298
|
energy = s.getPrecursors()[0].getMetaValue("collision energy")
|
|
@@ -321,7 +335,7 @@ def _load_mzML(
|
|
|
321
335
|
"prec_mz": prec_mz,
|
|
322
336
|
"prec_mz_min": precursorIsolationWindowLowerMZ,
|
|
323
337
|
"prec_mz_max": precursorIsolationWindowUpperMZ,
|
|
324
|
-
"prec_inty":
|
|
338
|
+
"prec_inty": prec_inty,
|
|
325
339
|
"energy": energy,
|
|
326
340
|
"feature_uid": -1,
|
|
327
341
|
}
|
|
@@ -367,10 +381,11 @@ def _load_mzML(
|
|
|
367
381
|
},
|
|
368
382
|
infer_schema_length=None,
|
|
369
383
|
)
|
|
384
|
+
self.polarity = polarity
|
|
370
385
|
self.file_interface = "oms"
|
|
371
386
|
self.ms1_df = ms1_df
|
|
372
387
|
self.label = os.path.basename(filename)
|
|
373
|
-
if self.
|
|
388
|
+
if self.type != "ztscan":
|
|
374
389
|
self.analyze_dda()
|
|
375
390
|
|
|
376
391
|
|
|
@@ -401,7 +416,8 @@ def _load_raw(
|
|
|
401
416
|
- Updates instance attributes including self.file_path, self.file_obj, self.file_interface, and self.label.
|
|
402
417
|
- Initiates further analysis by invoking analyze_dda().
|
|
403
418
|
"""
|
|
404
|
-
from alpharaw.thermo import ThermoRawData
|
|
419
|
+
#from alpharaw.thermo import ThermoRawData
|
|
420
|
+
from masster.sample.thermo import ThermoRawData
|
|
405
421
|
|
|
406
422
|
if not filename:
|
|
407
423
|
raise ValueError("Filename must be provided.")
|
|
@@ -464,6 +480,13 @@ def _load_raw(
|
|
|
464
480
|
prec_intyensity = None
|
|
465
481
|
energy = s["nce"]
|
|
466
482
|
|
|
483
|
+
# try to get polarity
|
|
484
|
+
if self.polarity is None:
|
|
485
|
+
if s['polarity'] == 'positive':
|
|
486
|
+
self.polarity = 'positive'
|
|
487
|
+
elif s['polarity'] == 'negative':
|
|
488
|
+
self.polarity = 'negative'
|
|
489
|
+
|
|
467
490
|
peak_start_idx = s["peak_start_idx"]
|
|
468
491
|
peak_stop_idx = s["peak_stop_idx"]
|
|
469
492
|
peaks = raw_data.peak_df.loc[peak_start_idx : peak_stop_idx - 1]
|
|
@@ -564,16 +587,10 @@ def _load_wiff(
|
|
|
564
587
|
self,
|
|
565
588
|
filename=None,
|
|
566
589
|
):
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
from masster.sample.sciex import SciexWiffData as MassterSciexWiffData
|
|
590
|
+
# Use masster's own implementation first
|
|
591
|
+
from masster.sample.sciex import SciexWiffData as MassterSciexWiffData
|
|
570
592
|
|
|
571
|
-
|
|
572
|
-
except ImportError:
|
|
573
|
-
# Fallback to alpharaw if masster implementation fails
|
|
574
|
-
from alpharaw.sciex import SciexWiffData as AlpharawSciexWiffData
|
|
575
|
-
|
|
576
|
-
SciexWiffDataClass = AlpharawSciexWiffData
|
|
593
|
+
SciexWiffDataClass = MassterSciexWiffData
|
|
577
594
|
|
|
578
595
|
if not filename:
|
|
579
596
|
raise ValueError("Filename must be provided.")
|
|
@@ -610,7 +627,7 @@ def _load_wiff(
|
|
|
610
627
|
"mz": pl.Float64,
|
|
611
628
|
"inty": pl.Float64,
|
|
612
629
|
}
|
|
613
|
-
|
|
630
|
+
polarity = None
|
|
614
631
|
# iterate over rows of specs
|
|
615
632
|
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
616
633
|
for i, s in tqdm(
|
|
@@ -620,6 +637,13 @@ def _load_wiff(
|
|
|
620
637
|
disable=tdqm_disable,
|
|
621
638
|
):
|
|
622
639
|
ms_level = s["ms_level"]
|
|
640
|
+
# try to get polarity
|
|
641
|
+
if polarity is None:
|
|
642
|
+
if s['polarity'] == 'positive':
|
|
643
|
+
polarity = 'positive'
|
|
644
|
+
elif s['polarity'] == 'negative':
|
|
645
|
+
polarity = 'negative'
|
|
646
|
+
|
|
623
647
|
if ms_level == 1:
|
|
624
648
|
cycle += 1
|
|
625
649
|
prec_mz = None
|
|
@@ -723,7 +747,7 @@ def _load_wiff(
|
|
|
723
747
|
self.file_interface = "alpharaw"
|
|
724
748
|
self.label = os.path.basename(filename)
|
|
725
749
|
self.ms1_df = pl.DataFrame(ms1_df_records, schema=schema)
|
|
726
|
-
if self.
|
|
750
|
+
if self.type != "ztscan":
|
|
727
751
|
self.analyze_dda()
|
|
728
752
|
|
|
729
753
|
|
|
@@ -750,103 +774,6 @@ def _load_featureXML(
|
|
|
750
774
|
fm = oms.FeatureMap()
|
|
751
775
|
fh.load(filename, fm)
|
|
752
776
|
self._oms_features_map = fm
|
|
753
|
-
"""if self.features_df is None:
|
|
754
|
-
df = self._oms_features_map.get_df(export_peptide_identifications=False)
|
|
755
|
-
df = self._clean_features_df(df)
|
|
756
|
-
|
|
757
|
-
# desotope features
|
|
758
|
-
df = self._features_deisotope(df, mz_tol=0.02, rt_tol=0.5)
|
|
759
|
-
|
|
760
|
-
# update eic
|
|
761
|
-
df["chrom"] = None
|
|
762
|
-
mz_tol = 0.01
|
|
763
|
-
rt_tol = 10
|
|
764
|
-
# iterate over all rows in df
|
|
765
|
-
for i, row in df.iterrows():
|
|
766
|
-
# select data in ms1_df with mz in range [mz_start - mz_tol, mz_end + mz_tol] and rt in range [rt_start - rt_tol, rt_end + rt_tol]
|
|
767
|
-
d = self.ms1_df.filter(
|
|
768
|
-
(pl.col("rt") >= row["rt_start"] - rt_tol)
|
|
769
|
-
& (pl.col("rt") <= row["rt_end"] + rt_tol)
|
|
770
|
-
& (pl.col("mz") >= row["mz"] - mz_tol)
|
|
771
|
-
& (pl.col("mz") <= row["mz"] + mz_tol)
|
|
772
|
-
)
|
|
773
|
-
# for all unique rt values, find the maximum inty
|
|
774
|
-
eic_rt = d.group_by("rt").agg(pl.col("inty").max())
|
|
775
|
-
if len(eic_rt) < 4:
|
|
776
|
-
continue
|
|
777
|
-
eic = Chromatogram(
|
|
778
|
-
eic_rt["rt"].to_numpy(),
|
|
779
|
-
eic_rt["inty"].to_numpy(),
|
|
780
|
-
label=f"EIC mz={row['mz']:.4f}",
|
|
781
|
-
feature_start=row["rt_start"],
|
|
782
|
-
feature_end=row["rt_end"],
|
|
783
|
-
feature_apex=row["rt"],
|
|
784
|
-
).find_peaks()
|
|
785
|
-
# set eic in df
|
|
786
|
-
df.at[i, "chrom"] = eic
|
|
787
|
-
if len(eic.peak_widths) > 0:
|
|
788
|
-
df.at[i, "chrom_coherence"] = round(eic.feature_coherence, 3)
|
|
789
|
-
df.at[i, "chrom_prominence"] = round(
|
|
790
|
-
eic.peak_prominences[0], 3
|
|
791
|
-
) # eic.peak_prominences[0]
|
|
792
|
-
df.at[i, "chrom_prominence_scaled"] = round(
|
|
793
|
-
eic.peak_prominences[0] / (np.mean(eic.inty) + 1e-10), 3
|
|
794
|
-
)
|
|
795
|
-
df.at[i, "chrom_height_scaled"] = round(
|
|
796
|
-
eic.peak_heights[0] / (np.mean(eic.inty) + 1e-10), 3
|
|
797
|
-
)
|
|
798
|
-
|
|
799
|
-
self.features_df = df"""
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
'''
|
|
803
|
-
def _load_mzpkl(
|
|
804
|
-
self,
|
|
805
|
-
filename="sample.mzpkl",
|
|
806
|
-
ondisk=False,
|
|
807
|
-
):
|
|
808
|
-
"""
|
|
809
|
-
Load the mzpkl data file, initialize the experiment attributes, and set up the file object.
|
|
810
|
-
Parameters:
|
|
811
|
-
filename (str, optional): The path to the .mzpkl file to be loaded. Defaults to "data.mzpkl".
|
|
812
|
-
ondisk (bool, optional): A flag indicating whether the data should be loaded for on-disk usage.
|
|
813
|
-
If True, self.ondisk is set to True and an OnDiscMSExperiment is used.
|
|
814
|
-
Otherwise, an MSExperiment is used.
|
|
815
|
-
Side Effects:
|
|
816
|
-
- Decompresses and unpickles the specified file.
|
|
817
|
-
- Sets attributes on self for each key in the loaded data dictionary, except for keys named 'format'.
|
|
818
|
-
- Renames the attribute 'spectra_df' to 'scans_df' if present.
|
|
819
|
-
- Initializes self.file_obj as either an OnDiscMSExperiment or MSExperiment based on the ondisk flag.
|
|
820
|
-
- Checks for an associated featureXML file (with the same base name as the input file) and loads it if found.
|
|
821
|
-
"""
|
|
822
|
-
|
|
823
|
-
if ondisk is True:
|
|
824
|
-
self.ondisk = True
|
|
825
|
-
|
|
826
|
-
with bz2.BZ2File(filename, "rb") as f:
|
|
827
|
-
data = pickle.load(f)
|
|
828
|
-
|
|
829
|
-
for k, v in data.items():
|
|
830
|
-
if k in ["format"]:
|
|
831
|
-
continue
|
|
832
|
-
if k == "spectra_df":
|
|
833
|
-
k = "scans_df"
|
|
834
|
-
setattr(self, k, v)
|
|
835
|
-
|
|
836
|
-
# Import and call internal sanitize function
|
|
837
|
-
from masster.study.load import _sanitize
|
|
838
|
-
_sanitize(self) if self.ondisk:
|
|
839
|
-
self.file_obj = oms.OnDiscMSExperiment()
|
|
840
|
-
else:
|
|
841
|
-
self.file_obj = oms.MSExperiment()
|
|
842
|
-
|
|
843
|
-
# check if *.featureXML exists
|
|
844
|
-
featureXML = filename.replace(".mzpkl", ".featureXML")
|
|
845
|
-
if os.path.exists(featureXML):
|
|
846
|
-
self._load_featureXML(featureXML)
|
|
847
|
-
|
|
848
|
-
'''
|
|
849
|
-
|
|
850
777
|
|
|
851
778
|
def _wiff_to_dict(
|
|
852
779
|
filename=None,
|
masster/sample/processing.py
CHANGED
|
@@ -248,7 +248,7 @@ def get_spectrum(self, scan, **kwargs):
|
|
|
248
248
|
spect = spect.keep_top(max_peaks)
|
|
249
249
|
|
|
250
250
|
if dia_stats:
|
|
251
|
-
if self.
|
|
251
|
+
if self.type in ["ztscan", "dia", "swath"]:
|
|
252
252
|
spect = self._get_ztscan_stats(
|
|
253
253
|
spec=spect,
|
|
254
254
|
scan_uid=scan_uid,
|
|
@@ -1028,7 +1028,7 @@ def find_ms2(self, **kwargs):
|
|
|
1028
1028
|
|
|
1029
1029
|
- mz_tol (float):
|
|
1030
1030
|
Precursor m/z tolerance used for matching. The effective tolerance may be
|
|
1031
|
-
adjusted by
|
|
1031
|
+
adjusted by type (the defaults class provides ``get_mz_tolerance(type)``).
|
|
1032
1032
|
Default: 0.5 (ztscan/DIA defaults may be larger).
|
|
1033
1033
|
|
|
1034
1034
|
- centroid (bool):
|
|
@@ -1077,7 +1077,7 @@ def find_ms2(self, **kwargs):
|
|
|
1077
1077
|
|
|
1078
1078
|
# Extract parameter values
|
|
1079
1079
|
features = params.get("features")
|
|
1080
|
-
mz_tol = params.get_mz_tolerance(self.
|
|
1080
|
+
mz_tol = params.get_mz_tolerance(self.type)
|
|
1081
1081
|
centroid = params.get("centroid")
|
|
1082
1082
|
deisotope = params.get("deisotope")
|
|
1083
1083
|
dia_stats = params.get("dia_stats")
|
masster/sample/sample.py
CHANGED
|
@@ -240,12 +240,13 @@ class Sample:
|
|
|
240
240
|
# Initialize label from parameters
|
|
241
241
|
self.label = params.label
|
|
242
242
|
|
|
243
|
+
self.type = params.type # dda, dia, ztscan
|
|
244
|
+
self.polarity = params.polarity # Initialize from parameters, may be overridden during raw file loading
|
|
245
|
+
|
|
243
246
|
# this is the path to the original file. It's never sample5
|
|
244
247
|
self.file_source = None
|
|
245
248
|
# this is the path to the object that was loaded. It could be sample5
|
|
246
249
|
self.file_path = None
|
|
247
|
-
# Type of the file (e.g., mzML, RAW, WIFF, mzpkl)
|
|
248
|
-
self.file_type = None
|
|
249
250
|
# Interface to handle the file operations (e.g., oms, alpharaw)
|
|
250
251
|
self.file_interface = None
|
|
251
252
|
# The file object once loaded, can be oms.MzMLFile or alpharaw.AlphaRawFile
|
|
@@ -327,7 +328,6 @@ class Sample:
|
|
|
327
328
|
_save_sample5 = _save_sample5
|
|
328
329
|
_load_sample5 = _load_sample5
|
|
329
330
|
|
|
330
|
-
|
|
331
331
|
# Removed internal-only load methods: _load_featureXML, _load_ms2data, _load_mzML, _load_raw, _load_wiff
|
|
332
332
|
chrom_extract = chrom_extract
|
|
333
333
|
_index_file = _index_file # Renamed from index_file to be internal-only
|
|
@@ -503,6 +503,8 @@ class Sample:
|
|
|
503
503
|
str = f"File: {os.path.basename(self.file_path)}\n"
|
|
504
504
|
str += f"Path: {os.path.dirname(self.file_path)}\n"
|
|
505
505
|
str += f"Source: {self.file_source}\n"
|
|
506
|
+
str += f"Type: {self.type}\n"
|
|
507
|
+
str += f"Polarity: {self.polarity}\n"
|
|
506
508
|
str += f"MS1 scans: {len(self.scans_df.filter(pl.col('ms_level') == 1))}\n"
|
|
507
509
|
str += f"MS2 scans: {len(self.scans_df.filter(pl.col('ms_level') == 2))}\n"
|
|
508
510
|
if self.features_df is not None:
|