masster 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +27 -27
- masster/_version.py +17 -17
- masster/chromatogram.py +497 -503
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
- masster/logger.py +318 -244
- masster/sample/__init__.py +9 -9
- masster/sample/defaults/__init__.py +15 -15
- masster/sample/defaults/find_adducts_def.py +325 -325
- masster/sample/defaults/find_features_def.py +366 -366
- masster/sample/defaults/find_ms2_def.py +285 -285
- masster/sample/defaults/get_spectrum_def.py +314 -318
- masster/sample/defaults/sample_def.py +374 -378
- masster/sample/h5.py +1321 -1297
- masster/sample/helpers.py +833 -364
- masster/sample/lib.py +762 -0
- masster/sample/load.py +1220 -1187
- masster/sample/parameters.py +131 -131
- masster/sample/plot.py +1610 -1622
- masster/sample/processing.py +1402 -1416
- masster/sample/quant.py +209 -0
- masster/sample/sample.py +391 -387
- masster/sample/sample5_schema.json +181 -181
- masster/sample/save.py +737 -736
- masster/sample/sciex.py +1213 -0
- masster/spectrum.py +1287 -1319
- masster/study/__init__.py +9 -9
- masster/study/defaults/__init__.py +21 -19
- masster/study/defaults/align_def.py +267 -267
- masster/study/defaults/export_def.py +41 -40
- masster/study/defaults/fill_chrom_def.py +264 -264
- masster/study/defaults/fill_def.py +260 -0
- masster/study/defaults/find_consensus_def.py +256 -256
- masster/study/defaults/find_ms2_def.py +163 -163
- masster/study/defaults/integrate_chrom_def.py +225 -225
- masster/study/defaults/integrate_def.py +221 -0
- masster/study/defaults/merge_def.py +256 -0
- masster/study/defaults/study_def.py +272 -269
- masster/study/export.py +674 -287
- masster/study/h5.py +1398 -886
- masster/study/helpers.py +1650 -433
- masster/study/helpers_optimized.py +317 -0
- masster/study/load.py +1201 -1078
- masster/study/parameters.py +99 -99
- masster/study/plot.py +632 -645
- masster/study/processing.py +1057 -1046
- masster/study/save.py +149 -134
- masster/study/study.py +606 -522
- masster/study/study5_schema.json +247 -241
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/METADATA +15 -10
- masster-0.3.0.dist-info/RECORD +59 -0
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/licenses/LICENSE +661 -661
- masster-0.2.5.dist-info/RECORD +0 -50
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/WHEEL +0 -0
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,378 +1,374 @@
|
|
|
1
|
-
"""Parameter class for Sample core parameters."""
|
|
2
|
-
|
|
3
|
-
from dataclasses import dataclass, field
|
|
4
|
-
from typing import Optional, Any
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@dataclass
|
|
8
|
-
class sample_defaults:
|
|
9
|
-
"""
|
|
10
|
-
Parameter class for Sample core parameters.
|
|
11
|
-
|
|
12
|
-
This class encapsulates parameters for sample loading, logging configuration,
|
|
13
|
-
mass tolerance, centroiding, and data-independent acquisition (DIA) settings.
|
|
14
|
-
|
|
15
|
-
Attributes:
|
|
16
|
-
filename (Optional[str]): Path to the file to be loaded. Default is None.
|
|
17
|
-
ondisk (bool): Whether to keep data on disk or load into memory. Default is False.
|
|
18
|
-
label (Optional[str]): Optional label to identify the file or dataset. Default is None.
|
|
19
|
-
log_level (str): Logging level to be set for the logger. Default is "INFO".
|
|
20
|
-
log_label (Optional[str]): Optional label for the logger. Default is None.
|
|
21
|
-
log_sink (str): Output sink for logging. Default is "sys.stdout".
|
|
22
|
-
chrom_fwhm (float): Full width at half maximum for chromatographic peaks. Default is 1.0.
|
|
23
|
-
mz_tol_ms1_da (float): Mass tolerance in Daltons for MS1 spectra. Default is 0.002.
|
|
24
|
-
mz_tol_ms2_da (float): Mass tolerance in Daltons for MS2 spectra. Default is 0.005.
|
|
25
|
-
mz_tol_ms1_ppm (float): Mass tolerance in parts per million for MS1 spectra. Default is 5.0.
|
|
26
|
-
mz_tol_ms2_ppm (float): Mass tolerance in parts per million for MS2 spectra. Default is 10.0.
|
|
27
|
-
centroid_algo (str): Algorithm used for centroiding. Default is "lmp".
|
|
28
|
-
centroid_min_points_ms1 (int): Minimum points required for MS1 centroiding. Default is 5.
|
|
29
|
-
centroid_min_points_ms2 (int): Minimum points required for MS2 centroiding. Default is 4.
|
|
30
|
-
centroid_smooth (int): Smoothing parameter for centroiding. Default is 5.
|
|
31
|
-
centroid_refine (bool): Whether to refine centroiding results. Default is True.
|
|
32
|
-
centroid_prominence (int): Prominence parameter for centroiding. Default is -1.
|
|
33
|
-
max_points_per_spectrum (int): Maximum number of points per spectrum. Default is 50000.
|
|
34
|
-
dia_window (Optional[float]): DIA window size. Default is None.
|
|
35
|
-
"""
|
|
36
|
-
|
|
37
|
-
filename: Optional[str] = None
|
|
38
|
-
ondisk: bool = False
|
|
39
|
-
label: str | None = None
|
|
40
|
-
log_level: str = "INFO"
|
|
41
|
-
log_label: Optional[str] = ""
|
|
42
|
-
log_sink: str = "sys.stdout"
|
|
43
|
-
chrom_fwhm: float = 1.0
|
|
44
|
-
mz_tol_ms1_da: float = 0.002
|
|
45
|
-
mz_tol_ms2_da: float = 0.005
|
|
46
|
-
mz_tol_ms1_ppm: float = 5.0
|
|
47
|
-
mz_tol_ms2_ppm: float = 10.0
|
|
48
|
-
centroid_algo: str = "lmp"
|
|
49
|
-
centroid_min_points_ms1: int = 5
|
|
50
|
-
centroid_min_points_ms2: int = 4
|
|
51
|
-
centroid_smooth: int = 5
|
|
52
|
-
centroid_refine: bool = True
|
|
53
|
-
centroid_prominence: int = -1
|
|
54
|
-
max_points_per_spectrum: int = 50000
|
|
55
|
-
dia_window: float | None = None
|
|
56
|
-
|
|
57
|
-
_param_metadata: dict[str, dict[str, Any]] = field(
|
|
58
|
-
default_factory=lambda: {
|
|
59
|
-
"filename": {
|
|
60
|
-
"dtype": "Optional[str]",
|
|
61
|
-
"description": "Path to the file to be loaded",
|
|
62
|
-
"default": None,
|
|
63
|
-
},
|
|
64
|
-
"ondisk": {
|
|
65
|
-
"dtype": bool,
|
|
66
|
-
"description": "Whether to keep data on disk or load into memory",
|
|
67
|
-
"default": False,
|
|
68
|
-
},
|
|
69
|
-
"label": {
|
|
70
|
-
"dtype": "Optional[str]",
|
|
71
|
-
"description": "Optional label to identify the file or dataset",
|
|
72
|
-
"default": None,
|
|
73
|
-
},
|
|
74
|
-
"log_level": {
|
|
75
|
-
"dtype": str,
|
|
76
|
-
"description": "Logging level to be set for the logger",
|
|
77
|
-
"default": "INFO",
|
|
78
|
-
"allowed_values": ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
|
79
|
-
},
|
|
80
|
-
"log_label": {
|
|
81
|
-
"dtype": "Optional[str]",
|
|
82
|
-
"description": "Optional label for the logger",
|
|
83
|
-
"default": None,
|
|
84
|
-
},
|
|
85
|
-
"log_sink": {
|
|
86
|
-
"dtype": str,
|
|
87
|
-
"description": "Output sink for logging. Use 'sys.stdout' for console output, or a file path",
|
|
88
|
-
"default": "sys.stdout",
|
|
89
|
-
},
|
|
90
|
-
"chrom_fwhm": {
|
|
91
|
-
"dtype": float,
|
|
92
|
-
"description": "Full width at half maximum for chromatographic peaks",
|
|
93
|
-
"default": 1.0,
|
|
94
|
-
"min_value": 0.0,
|
|
95
|
-
},
|
|
96
|
-
"mz_tol_ms1_da": {
|
|
97
|
-
"dtype": float,
|
|
98
|
-
"description": "Mass tolerance in Daltons for MS1 spectra",
|
|
99
|
-
"default": 0.002,
|
|
100
|
-
"min_value": 0.0,
|
|
101
|
-
},
|
|
102
|
-
"mz_tol_ms2_da": {
|
|
103
|
-
"dtype": float,
|
|
104
|
-
"description": "Mass tolerance in Daltons for MS2 spectra",
|
|
105
|
-
"default": 0.005,
|
|
106
|
-
"min_value": 0.0,
|
|
107
|
-
},
|
|
108
|
-
"mz_tol_ms1_ppm": {
|
|
109
|
-
"dtype": float,
|
|
110
|
-
"description": "Mass tolerance in parts per million for MS1 spectra",
|
|
111
|
-
"default": 5.0,
|
|
112
|
-
"min_value": 0.0,
|
|
113
|
-
},
|
|
114
|
-
"mz_tol_ms2_ppm": {
|
|
115
|
-
"dtype": float,
|
|
116
|
-
"description": "Mass tolerance in parts per million for MS2 spectra",
|
|
117
|
-
"default": 10.0,
|
|
118
|
-
"min_value": 0.0,
|
|
119
|
-
},
|
|
120
|
-
"centroid_algo": {
|
|
121
|
-
"dtype": str,
|
|
122
|
-
"description": "Algorithm used for centroiding",
|
|
123
|
-
"default": "lmp",
|
|
124
|
-
"allowed_values": ["lmp", "other"],
|
|
125
|
-
},
|
|
126
|
-
"centroid_min_points_ms1": {
|
|
127
|
-
"dtype": int,
|
|
128
|
-
"description": "Minimum points required for MS1 centroiding",
|
|
129
|
-
"default": 5,
|
|
130
|
-
"min_value": 1,
|
|
131
|
-
},
|
|
132
|
-
"centroid_min_points_ms2": {
|
|
133
|
-
"dtype": int,
|
|
134
|
-
"description": "Minimum points required for MS2 centroiding",
|
|
135
|
-
"default": 4,
|
|
136
|
-
"min_value": 1,
|
|
137
|
-
},
|
|
138
|
-
"centroid_smooth": {
|
|
139
|
-
"dtype": int,
|
|
140
|
-
"description": "Smoothing parameter for centroiding",
|
|
141
|
-
"default": 5,
|
|
142
|
-
"min_value": 0,
|
|
143
|
-
},
|
|
144
|
-
"centroid_refine": {
|
|
145
|
-
"dtype": bool,
|
|
146
|
-
"description": "Whether to refine centroiding results",
|
|
147
|
-
"default": True,
|
|
148
|
-
},
|
|
149
|
-
"centroid_prominence": {
|
|
150
|
-
"dtype": int,
|
|
151
|
-
"description": "Prominence parameter for centroiding",
|
|
152
|
-
"default": -1,
|
|
153
|
-
},
|
|
154
|
-
"max_points_per_spectrum": {
|
|
155
|
-
"dtype": int,
|
|
156
|
-
"description": "Maximum number of points per spectrum",
|
|
157
|
-
"default": 50000,
|
|
158
|
-
"min_value": 1,
|
|
159
|
-
},
|
|
160
|
-
"dia_window": {
|
|
161
|
-
"dtype": "Optional[float]",
|
|
162
|
-
"description": "DIA window size",
|
|
163
|
-
"default": None,
|
|
164
|
-
"min_value": 0.0,
|
|
165
|
-
},
|
|
166
|
-
},
|
|
167
|
-
repr=False,
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
def get_info(self, param_name: str) -> dict[str, Any]:
|
|
171
|
-
"""
|
|
172
|
-
Get information about a specific parameter.
|
|
173
|
-
|
|
174
|
-
Args:
|
|
175
|
-
param_name: Name of the parameter
|
|
176
|
-
|
|
177
|
-
Returns:
|
|
178
|
-
Dictionary containing parameter metadata
|
|
179
|
-
|
|
180
|
-
Raises:
|
|
181
|
-
KeyError: If parameter name is not found
|
|
182
|
-
"""
|
|
183
|
-
if param_name not in self._param_metadata:
|
|
184
|
-
raise KeyError(f"Parameter '{param_name}' not found")
|
|
185
|
-
return self._param_metadata[param_name]
|
|
186
|
-
|
|
187
|
-
def get_description(self, param_name: str) -> str:
|
|
188
|
-
"""
|
|
189
|
-
Get description for a specific parameter.
|
|
190
|
-
|
|
191
|
-
Args:
|
|
192
|
-
param_name: Name of the parameter
|
|
193
|
-
|
|
194
|
-
Returns:
|
|
195
|
-
Parameter description string
|
|
196
|
-
"""
|
|
197
|
-
return str(self.get_info(param_name)["description"])
|
|
198
|
-
|
|
199
|
-
def validate(self, param_name: str, value: Any) -> bool:
|
|
200
|
-
"""
|
|
201
|
-
Validate a parameter value against its constraints.
|
|
202
|
-
|
|
203
|
-
Args:
|
|
204
|
-
param_name: Name of the parameter
|
|
205
|
-
value: Value to validate
|
|
206
|
-
|
|
207
|
-
Returns:
|
|
208
|
-
True if value is valid, False otherwise
|
|
209
|
-
"""
|
|
210
|
-
if param_name not in self._param_metadata:
|
|
211
|
-
return False
|
|
212
|
-
|
|
213
|
-
metadata = self._param_metadata[param_name]
|
|
214
|
-
expected_dtype = metadata["dtype"]
|
|
215
|
-
|
|
216
|
-
# Handle optional types
|
|
217
|
-
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional"):
|
|
218
|
-
if value is None:
|
|
219
|
-
return True
|
|
220
|
-
# Extract the inner type for validation
|
|
221
|
-
if "str" in expected_dtype:
|
|
222
|
-
expected_dtype = str
|
|
223
|
-
elif "float" in expected_dtype:
|
|
224
|
-
expected_dtype = float
|
|
225
|
-
elif "int" in expected_dtype:
|
|
226
|
-
expected_dtype = int
|
|
227
|
-
|
|
228
|
-
# Type checking
|
|
229
|
-
if expected_dtype is int:
|
|
230
|
-
if not isinstance(value, int):
|
|
231
|
-
try:
|
|
232
|
-
value = int(value)
|
|
233
|
-
except (ValueError, TypeError):
|
|
234
|
-
return False
|
|
235
|
-
elif expected_dtype is float:
|
|
236
|
-
if not isinstance(value, (int, float)):
|
|
237
|
-
try:
|
|
238
|
-
value = float(value)
|
|
239
|
-
except (ValueError, TypeError):
|
|
240
|
-
return False
|
|
241
|
-
elif expected_dtype is bool:
|
|
242
|
-
if not isinstance(value, bool):
|
|
243
|
-
return False
|
|
244
|
-
elif expected_dtype is str:
|
|
245
|
-
if not isinstance(value, str):
|
|
246
|
-
return False
|
|
247
|
-
|
|
248
|
-
# Range validation for numeric types
|
|
249
|
-
if expected_dtype in (int, float) and isinstance(value, (int, float)):
|
|
250
|
-
if "min_value" in metadata and value < metadata["min_value"]:
|
|
251
|
-
return False
|
|
252
|
-
if "max_value" in metadata and value > metadata["max_value"]:
|
|
253
|
-
return False
|
|
254
|
-
|
|
255
|
-
# Allowed values validation for strings
|
|
256
|
-
if expected_dtype is str and "allowed_values" in metadata:
|
|
257
|
-
if value not in metadata["allowed_values"]:
|
|
258
|
-
return False
|
|
259
|
-
|
|
260
|
-
return True
|
|
261
|
-
|
|
262
|
-
def set(self, param_name: str, value: Any, validate: bool = True) -> bool:
|
|
263
|
-
"""
|
|
264
|
-
Set a parameter value with optional validation.
|
|
265
|
-
|
|
266
|
-
Args:
|
|
267
|
-
param_name: Name of the parameter
|
|
268
|
-
value: New value for the parameter
|
|
269
|
-
validate: Whether to validate the value before setting
|
|
270
|
-
|
|
271
|
-
Returns:
|
|
272
|
-
True if parameter was set successfully, False otherwise
|
|
273
|
-
"""
|
|
274
|
-
if not hasattr(self, param_name):
|
|
275
|
-
return False
|
|
276
|
-
|
|
277
|
-
if validate and not self.validate(param_name, value):
|
|
278
|
-
return False
|
|
279
|
-
|
|
280
|
-
# Convert to expected type if needed
|
|
281
|
-
if param_name in self._param_metadata:
|
|
282
|
-
expected_dtype = self._param_metadata[param_name]["dtype"]
|
|
283
|
-
|
|
284
|
-
# Handle optional types
|
|
285
|
-
if (
|
|
286
|
-
isinstance(
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
"""
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
"""
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
if not self.validate(param_name, current_value):
|
|
376
|
-
invalid_params.append(param_name)
|
|
377
|
-
|
|
378
|
-
return len(invalid_params) == 0, invalid_params
|
|
1
|
+
"""Parameter class for Sample core parameters."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Optional, Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class sample_defaults:
|
|
9
|
+
"""
|
|
10
|
+
Parameter class for Sample core parameters.
|
|
11
|
+
|
|
12
|
+
This class encapsulates parameters for sample loading, logging configuration,
|
|
13
|
+
mass tolerance, centroiding, and data-independent acquisition (DIA) settings.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
filename (Optional[str]): Path to the file to be loaded. Default is None.
|
|
17
|
+
ondisk (bool): Whether to keep data on disk or load into memory. Default is False.
|
|
18
|
+
label (Optional[str]): Optional label to identify the file or dataset. Default is None.
|
|
19
|
+
log_level (str): Logging level to be set for the logger. Default is "INFO".
|
|
20
|
+
log_label (Optional[str]): Optional label for the logger. Default is None.
|
|
21
|
+
log_sink (str): Output sink for logging. Default is "sys.stdout".
|
|
22
|
+
chrom_fwhm (float): Full width at half maximum for chromatographic peaks. Default is 1.0.
|
|
23
|
+
mz_tol_ms1_da (float): Mass tolerance in Daltons for MS1 spectra. Default is 0.002.
|
|
24
|
+
mz_tol_ms2_da (float): Mass tolerance in Daltons for MS2 spectra. Default is 0.005.
|
|
25
|
+
mz_tol_ms1_ppm (float): Mass tolerance in parts per million for MS1 spectra. Default is 5.0.
|
|
26
|
+
mz_tol_ms2_ppm (float): Mass tolerance in parts per million for MS2 spectra. Default is 10.0.
|
|
27
|
+
centroid_algo (str): Algorithm used for centroiding. Default is "lmp".
|
|
28
|
+
centroid_min_points_ms1 (int): Minimum points required for MS1 centroiding. Default is 5.
|
|
29
|
+
centroid_min_points_ms2 (int): Minimum points required for MS2 centroiding. Default is 4.
|
|
30
|
+
centroid_smooth (int): Smoothing parameter for centroiding. Default is 5.
|
|
31
|
+
centroid_refine (bool): Whether to refine centroiding results. Default is True.
|
|
32
|
+
centroid_prominence (int): Prominence parameter for centroiding. Default is -1.
|
|
33
|
+
max_points_per_spectrum (int): Maximum number of points per spectrum. Default is 50000.
|
|
34
|
+
dia_window (Optional[float]): DIA window size. Default is None.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
filename: Optional[str] = None
|
|
38
|
+
ondisk: bool = False
|
|
39
|
+
label: str | None = None
|
|
40
|
+
log_level: str = "INFO"
|
|
41
|
+
log_label: Optional[str] = ""
|
|
42
|
+
log_sink: str = "sys.stdout"
|
|
43
|
+
chrom_fwhm: float = 1.0
|
|
44
|
+
mz_tol_ms1_da: float = 0.002
|
|
45
|
+
mz_tol_ms2_da: float = 0.005
|
|
46
|
+
mz_tol_ms1_ppm: float = 5.0
|
|
47
|
+
mz_tol_ms2_ppm: float = 10.0
|
|
48
|
+
centroid_algo: str = "lmp"
|
|
49
|
+
centroid_min_points_ms1: int = 5
|
|
50
|
+
centroid_min_points_ms2: int = 4
|
|
51
|
+
centroid_smooth: int = 5
|
|
52
|
+
centroid_refine: bool = True
|
|
53
|
+
centroid_prominence: int = -1
|
|
54
|
+
max_points_per_spectrum: int = 50000
|
|
55
|
+
dia_window: float | None = None
|
|
56
|
+
|
|
57
|
+
_param_metadata: dict[str, dict[str, Any]] = field(
|
|
58
|
+
default_factory=lambda: {
|
|
59
|
+
"filename": {
|
|
60
|
+
"dtype": "Optional[str]",
|
|
61
|
+
"description": "Path to the file to be loaded",
|
|
62
|
+
"default": None,
|
|
63
|
+
},
|
|
64
|
+
"ondisk": {
|
|
65
|
+
"dtype": bool,
|
|
66
|
+
"description": "Whether to keep data on disk or load into memory",
|
|
67
|
+
"default": False,
|
|
68
|
+
},
|
|
69
|
+
"label": {
|
|
70
|
+
"dtype": "Optional[str]",
|
|
71
|
+
"description": "Optional label to identify the file or dataset",
|
|
72
|
+
"default": None,
|
|
73
|
+
},
|
|
74
|
+
"log_level": {
|
|
75
|
+
"dtype": str,
|
|
76
|
+
"description": "Logging level to be set for the logger",
|
|
77
|
+
"default": "INFO",
|
|
78
|
+
"allowed_values": ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
|
79
|
+
},
|
|
80
|
+
"log_label": {
|
|
81
|
+
"dtype": "Optional[str]",
|
|
82
|
+
"description": "Optional label for the logger",
|
|
83
|
+
"default": None,
|
|
84
|
+
},
|
|
85
|
+
"log_sink": {
|
|
86
|
+
"dtype": str,
|
|
87
|
+
"description": "Output sink for logging. Use 'sys.stdout' for console output, or a file path",
|
|
88
|
+
"default": "sys.stdout",
|
|
89
|
+
},
|
|
90
|
+
"chrom_fwhm": {
|
|
91
|
+
"dtype": float,
|
|
92
|
+
"description": "Full width at half maximum for chromatographic peaks",
|
|
93
|
+
"default": 1.0,
|
|
94
|
+
"min_value": 0.0,
|
|
95
|
+
},
|
|
96
|
+
"mz_tol_ms1_da": {
|
|
97
|
+
"dtype": float,
|
|
98
|
+
"description": "Mass tolerance in Daltons for MS1 spectra",
|
|
99
|
+
"default": 0.002,
|
|
100
|
+
"min_value": 0.0,
|
|
101
|
+
},
|
|
102
|
+
"mz_tol_ms2_da": {
|
|
103
|
+
"dtype": float,
|
|
104
|
+
"description": "Mass tolerance in Daltons for MS2 spectra",
|
|
105
|
+
"default": 0.005,
|
|
106
|
+
"min_value": 0.0,
|
|
107
|
+
},
|
|
108
|
+
"mz_tol_ms1_ppm": {
|
|
109
|
+
"dtype": float,
|
|
110
|
+
"description": "Mass tolerance in parts per million for MS1 spectra",
|
|
111
|
+
"default": 5.0,
|
|
112
|
+
"min_value": 0.0,
|
|
113
|
+
},
|
|
114
|
+
"mz_tol_ms2_ppm": {
|
|
115
|
+
"dtype": float,
|
|
116
|
+
"description": "Mass tolerance in parts per million for MS2 spectra",
|
|
117
|
+
"default": 10.0,
|
|
118
|
+
"min_value": 0.0,
|
|
119
|
+
},
|
|
120
|
+
"centroid_algo": {
|
|
121
|
+
"dtype": str,
|
|
122
|
+
"description": "Algorithm used for centroiding",
|
|
123
|
+
"default": "lmp",
|
|
124
|
+
"allowed_values": ["lmp", "other"],
|
|
125
|
+
},
|
|
126
|
+
"centroid_min_points_ms1": {
|
|
127
|
+
"dtype": int,
|
|
128
|
+
"description": "Minimum points required for MS1 centroiding",
|
|
129
|
+
"default": 5,
|
|
130
|
+
"min_value": 1,
|
|
131
|
+
},
|
|
132
|
+
"centroid_min_points_ms2": {
|
|
133
|
+
"dtype": int,
|
|
134
|
+
"description": "Minimum points required for MS2 centroiding",
|
|
135
|
+
"default": 4,
|
|
136
|
+
"min_value": 1,
|
|
137
|
+
},
|
|
138
|
+
"centroid_smooth": {
|
|
139
|
+
"dtype": int,
|
|
140
|
+
"description": "Smoothing parameter for centroiding",
|
|
141
|
+
"default": 5,
|
|
142
|
+
"min_value": 0,
|
|
143
|
+
},
|
|
144
|
+
"centroid_refine": {
|
|
145
|
+
"dtype": bool,
|
|
146
|
+
"description": "Whether to refine centroiding results",
|
|
147
|
+
"default": True,
|
|
148
|
+
},
|
|
149
|
+
"centroid_prominence": {
|
|
150
|
+
"dtype": int,
|
|
151
|
+
"description": "Prominence parameter for centroiding",
|
|
152
|
+
"default": -1,
|
|
153
|
+
},
|
|
154
|
+
"max_points_per_spectrum": {
|
|
155
|
+
"dtype": int,
|
|
156
|
+
"description": "Maximum number of points per spectrum",
|
|
157
|
+
"default": 50000,
|
|
158
|
+
"min_value": 1,
|
|
159
|
+
},
|
|
160
|
+
"dia_window": {
|
|
161
|
+
"dtype": "Optional[float]",
|
|
162
|
+
"description": "DIA window size",
|
|
163
|
+
"default": None,
|
|
164
|
+
"min_value": 0.0,
|
|
165
|
+
},
|
|
166
|
+
},
|
|
167
|
+
repr=False,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
def get_info(self, param_name: str) -> dict[str, Any]:
|
|
171
|
+
"""
|
|
172
|
+
Get information about a specific parameter.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
param_name: Name of the parameter
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Dictionary containing parameter metadata
|
|
179
|
+
|
|
180
|
+
Raises:
|
|
181
|
+
KeyError: If parameter name is not found
|
|
182
|
+
"""
|
|
183
|
+
if param_name not in self._param_metadata:
|
|
184
|
+
raise KeyError(f"Parameter '{param_name}' not found")
|
|
185
|
+
return self._param_metadata[param_name]
|
|
186
|
+
|
|
187
|
+
def get_description(self, param_name: str) -> str:
|
|
188
|
+
"""
|
|
189
|
+
Get description for a specific parameter.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
param_name: Name of the parameter
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Parameter description string
|
|
196
|
+
"""
|
|
197
|
+
return str(self.get_info(param_name)["description"])
|
|
198
|
+
|
|
199
|
+
def validate(self, param_name: str, value: Any) -> bool:
|
|
200
|
+
"""
|
|
201
|
+
Validate a parameter value against its constraints.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
param_name: Name of the parameter
|
|
205
|
+
value: Value to validate
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
True if value is valid, False otherwise
|
|
209
|
+
"""
|
|
210
|
+
if param_name not in self._param_metadata:
|
|
211
|
+
return False
|
|
212
|
+
|
|
213
|
+
metadata = self._param_metadata[param_name]
|
|
214
|
+
expected_dtype = metadata["dtype"]
|
|
215
|
+
|
|
216
|
+
# Handle optional types
|
|
217
|
+
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional"):
|
|
218
|
+
if value is None:
|
|
219
|
+
return True
|
|
220
|
+
# Extract the inner type for validation
|
|
221
|
+
if "str" in expected_dtype:
|
|
222
|
+
expected_dtype = str
|
|
223
|
+
elif "float" in expected_dtype:
|
|
224
|
+
expected_dtype = float
|
|
225
|
+
elif "int" in expected_dtype:
|
|
226
|
+
expected_dtype = int
|
|
227
|
+
|
|
228
|
+
# Type checking
|
|
229
|
+
if expected_dtype is int:
|
|
230
|
+
if not isinstance(value, int):
|
|
231
|
+
try:
|
|
232
|
+
value = int(value)
|
|
233
|
+
except (ValueError, TypeError):
|
|
234
|
+
return False
|
|
235
|
+
elif expected_dtype is float:
|
|
236
|
+
if not isinstance(value, (int, float)):
|
|
237
|
+
try:
|
|
238
|
+
value = float(value)
|
|
239
|
+
except (ValueError, TypeError):
|
|
240
|
+
return False
|
|
241
|
+
elif expected_dtype is bool:
|
|
242
|
+
if not isinstance(value, bool):
|
|
243
|
+
return False
|
|
244
|
+
elif expected_dtype is str:
|
|
245
|
+
if not isinstance(value, str):
|
|
246
|
+
return False
|
|
247
|
+
|
|
248
|
+
# Range validation for numeric types
|
|
249
|
+
if expected_dtype in (int, float) and isinstance(value, (int, float)):
|
|
250
|
+
if "min_value" in metadata and value < metadata["min_value"]:
|
|
251
|
+
return False
|
|
252
|
+
if "max_value" in metadata and value > metadata["max_value"]:
|
|
253
|
+
return False
|
|
254
|
+
|
|
255
|
+
# Allowed values validation for strings
|
|
256
|
+
if expected_dtype is str and "allowed_values" in metadata:
|
|
257
|
+
if value not in metadata["allowed_values"]:
|
|
258
|
+
return False
|
|
259
|
+
|
|
260
|
+
return True
|
|
261
|
+
|
|
262
|
+
def set(self, param_name: str, value: Any, validate: bool = True) -> bool:
|
|
263
|
+
"""
|
|
264
|
+
Set a parameter value with optional validation.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
param_name: Name of the parameter
|
|
268
|
+
value: New value for the parameter
|
|
269
|
+
validate: Whether to validate the value before setting
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
True if parameter was set successfully, False otherwise
|
|
273
|
+
"""
|
|
274
|
+
if not hasattr(self, param_name):
|
|
275
|
+
return False
|
|
276
|
+
|
|
277
|
+
if validate and not self.validate(param_name, value):
|
|
278
|
+
return False
|
|
279
|
+
|
|
280
|
+
# Convert to expected type if needed
|
|
281
|
+
if param_name in self._param_metadata:
|
|
282
|
+
expected_dtype = self._param_metadata[param_name]["dtype"]
|
|
283
|
+
|
|
284
|
+
# Handle optional types
|
|
285
|
+
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
|
|
286
|
+
if "int" in expected_dtype and not isinstance(value, int):
|
|
287
|
+
try:
|
|
288
|
+
value = int(value)
|
|
289
|
+
except (ValueError, TypeError):
|
|
290
|
+
if validate:
|
|
291
|
+
return False
|
|
292
|
+
elif "float" in expected_dtype and not isinstance(value, float):
|
|
293
|
+
try:
|
|
294
|
+
value = float(value)
|
|
295
|
+
except (ValueError, TypeError):
|
|
296
|
+
if validate:
|
|
297
|
+
return False
|
|
298
|
+
|
|
299
|
+
setattr(self, param_name, value)
|
|
300
|
+
return True
|
|
301
|
+
|
|
302
|
+
def get(self, param_name: str) -> Any:
|
|
303
|
+
"""
|
|
304
|
+
Get the value of a parameter by name.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
param_name: Name of the parameter
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
Current value of the parameter
|
|
311
|
+
"""
|
|
312
|
+
if not hasattr(self, param_name):
|
|
313
|
+
raise KeyError(f"Parameter '{param_name}' not found")
|
|
314
|
+
return getattr(self, param_name)
|
|
315
|
+
|
|
316
|
+
def set_from_dict(
|
|
317
|
+
self,
|
|
318
|
+
param_dict: dict[str, Any],
|
|
319
|
+
validate: bool = True,
|
|
320
|
+
) -> list[str]:
|
|
321
|
+
"""
|
|
322
|
+
Update multiple parameters from a dictionary.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
param_dict: Dictionary of parameter names and values
|
|
326
|
+
validate: Whether to validate values before setting
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
List of parameter names that could not be set
|
|
330
|
+
"""
|
|
331
|
+
failed_params = []
|
|
332
|
+
|
|
333
|
+
for param_name, value in param_dict.items():
|
|
334
|
+
if not self.set(param_name, value, validate):
|
|
335
|
+
failed_params.append(param_name)
|
|
336
|
+
|
|
337
|
+
return failed_params
|
|
338
|
+
|
|
339
|
+
def to_dict(self) -> dict[str, Any]:
|
|
340
|
+
"""
|
|
341
|
+
Convert parameters to dictionary, excluding metadata.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
Dictionary of parameter names and values
|
|
345
|
+
"""
|
|
346
|
+
return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
|
|
347
|
+
|
|
348
|
+
def list_parameters(self) -> list[str]:
|
|
349
|
+
"""
|
|
350
|
+
Get list of all parameter names.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
List of parameter names
|
|
354
|
+
"""
|
|
355
|
+
return [k for k in self.__dict__.keys() if not k.startswith("_")]
|
|
356
|
+
|
|
357
|
+
def validate_all(self) -> tuple[bool, list[str]]:
|
|
358
|
+
"""
|
|
359
|
+
Validate all parameters in the instance.
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
Tuple of (all_valid, list_of_invalid_params)
|
|
363
|
+
- all_valid: True if all parameters are valid, False otherwise
|
|
364
|
+
- list_of_invalid_params: List of parameter names that failed validation
|
|
365
|
+
"""
|
|
366
|
+
invalid_params = []
|
|
367
|
+
|
|
368
|
+
for param_name in self.list_parameters():
|
|
369
|
+
if param_name in self._param_metadata:
|
|
370
|
+
current_value = getattr(self, param_name)
|
|
371
|
+
if not self.validate(param_name, current_value):
|
|
372
|
+
invalid_params.append(param_name)
|
|
373
|
+
|
|
374
|
+
return len(invalid_params) == 0, invalid_params
|