masster 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/docs/SCX_API_Documentation.md +0 -0
- masster/docs/SCX_DLL_Analysis.md +0 -0
- masster/logger.py +92 -78
- masster/sample/defaults/find_features_def.py +16 -6
- masster/sample/defaults/sample_def.py +1 -1
- masster/sample/h5.py +2 -2
- masster/sample/helpers.py +137 -136
- masster/sample/load.py +13 -9
- masster/sample/plot.py +156 -131
- masster/sample/processing.py +18 -12
- masster/sample/sample.py +4 -4
- masster/sample/sample5_schema.json +62 -62
- masster/sample/save.py +16 -13
- masster/sample/sciex.py +187 -176
- masster/study/defaults/align_def.py +224 -6
- masster/study/defaults/fill_chrom_def.py +1 -5
- masster/study/defaults/integrate_chrom_def.py +1 -5
- masster/study/defaults/study_def.py +2 -2
- masster/study/export.py +144 -131
- masster/study/h5.py +193 -133
- masster/study/helpers.py +293 -245
- masster/study/helpers_optimized.py +99 -57
- masster/study/load.py +51 -25
- masster/study/plot.py +453 -17
- masster/study/processing.py +159 -76
- masster/study/save.py +7 -7
- masster/study/study.py +97 -88
- masster/study/study5_schema.json +82 -82
- {masster-0.3.10.dist-info → masster-0.3.11.dist-info}/METADATA +1 -1
- {masster-0.3.10.dist-info → masster-0.3.11.dist-info}/RECORD +33 -31
- {masster-0.3.10.dist-info → masster-0.3.11.dist-info}/WHEEL +0 -0
- {masster-0.3.10.dist-info → masster-0.3.11.dist-info}/entry_points.txt +0 -0
- {masster-0.3.10.dist-info → masster-0.3.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,22 +14,74 @@ class align_defaults:
|
|
|
14
14
|
alignment algorithm settings.
|
|
15
15
|
|
|
16
16
|
Attributes:
|
|
17
|
+
algo (str): Alignment algorithm ('pc' for PoseClustering, 'kd' for KD). Default is 'pc'.
|
|
17
18
|
rt_max_diff (float): Maximum retention time difference for alignment. Default is 60.0.
|
|
18
|
-
mz_max_diff (float): Maximum m/z difference for alignment. Default is 0.
|
|
19
|
+
mz_max_diff (float): Maximum m/z difference for alignment. Default is 0.02.
|
|
19
20
|
rt_pair_distance_frac (float): Fraction of RT difference for pair distance. Default is 0.2.
|
|
20
21
|
mz_pair_max_distance (float): Maximum m/z pair distance. Default is 0.01.
|
|
21
22
|
num_used_points (int): Number of points used for alignment. Default is 1000.
|
|
22
|
-
save_features (bool): Whether to save features after alignment. Default is
|
|
23
|
-
skip_blanks (bool): Whether to skip blank samples. Default is
|
|
23
|
+
save_features (bool): Whether to save features after alignment. Default is False.
|
|
24
|
+
skip_blanks (bool): Whether to skip blank samples. Default is False.
|
|
25
|
+
|
|
26
|
+
KD algorithm specific parameters:
|
|
27
|
+
nr_partitions (int): Number of partitions in m/z dimension. Default is 100.
|
|
28
|
+
warp_enabled (bool): Enable non-linear retention time transformation. Default is True.
|
|
29
|
+
warp_rt_tol (float): RT tolerance for the LOWESS fit. Default is 5.0.
|
|
30
|
+
warp_mz_tol (float): m/z tolerance for the LOWESS fit. Default is 0.05.
|
|
31
|
+
warp_max_pairwise_log_fc (float): Maximum absolute log10 fold change threshold for pairing. Default is 0.5.
|
|
32
|
+
warp_min_rel_cc_size (float): Minimum relative connected component size. Default is 0.5.
|
|
33
|
+
warp_max_nr_conflicts (int): Allow up to this many conflicts per connected component for alignment. Default is 0.
|
|
34
|
+
link_rt_tol (float): Width of RT tolerance window for linking (seconds). Default is 30.0.
|
|
35
|
+
link_mz_tol (float): m/z tolerance for linking features (ppm or Da). Default is 10.0.
|
|
36
|
+
link_charge_merging (str): Charge merging strategy for linking. Default is "With_charge_zero".
|
|
37
|
+
link_adduct_merging (str): Adduct merging strategy for linking. Default is "Any".
|
|
38
|
+
distance_RT_exponent (float): Exponent for normalized RT differences. Default is 1.0.
|
|
39
|
+
distance_RT_weight (float): Weight factor for final RT distances. Default is 1.0.
|
|
40
|
+
distance_MZ_exponent (float): Exponent for normalized m/z differences. Default is 2.0.
|
|
41
|
+
distance_MZ_weight (float): Weight factor for final m/z distances. Default is 1.0.
|
|
42
|
+
distance_intensity_exponent (float): Exponent for differences in relative intensity. Default is 1.0.
|
|
43
|
+
distance_intensity_weight (float): Weight factor for final intensity distances. Default is 1.0.
|
|
44
|
+
distance_intensity_log_transform (str): Log-transform intensities. Default is "enabled".
|
|
45
|
+
LOWESS_span (float): Fraction of datapoints for each local regression. Default is 0.666666666666667.
|
|
46
|
+
LOWESS_num_iterations (int): Number of robustifying iterations for LOWESS fitting. Default is 3.
|
|
47
|
+
LOWESS_delta (float): Parameter for LOWESS computations (negative auto-computes). Default is -1.0.
|
|
48
|
+
LOWESS_interpolation_type (str): Method for interpolation between datapoints. Default is "cspline".
|
|
49
|
+
LOWESS_extrapolation_type (str): Method for extrapolation outside data range. Default is "four-point-linear".
|
|
24
50
|
"""
|
|
25
51
|
|
|
26
52
|
rt_max_diff: float = 60.0
|
|
27
|
-
mz_max_diff: float = 0.
|
|
53
|
+
mz_max_diff: float = 0.02
|
|
28
54
|
rt_pair_distance_frac: float = 0.2
|
|
29
55
|
mz_pair_max_distance: float = 0.01
|
|
30
56
|
num_used_points: int = 1000
|
|
31
|
-
save_features: bool =
|
|
32
|
-
skip_blanks: bool =
|
|
57
|
+
save_features: bool = False
|
|
58
|
+
skip_blanks: bool = False
|
|
59
|
+
algo: str = "pc"
|
|
60
|
+
|
|
61
|
+
# KD algorithm specific parameters
|
|
62
|
+
nr_partitions: int = 100
|
|
63
|
+
warp_enabled: bool = True
|
|
64
|
+
warp_rt_tol: float = 5.0
|
|
65
|
+
warp_mz_tol: float = 0.05
|
|
66
|
+
warp_max_pairwise_log_fc: float = 0.5
|
|
67
|
+
warp_min_rel_cc_size: float = 0.5
|
|
68
|
+
warp_max_nr_conflicts: int = 0
|
|
69
|
+
link_rt_tol: float = 30.0
|
|
70
|
+
link_mz_tol: float = 10.0
|
|
71
|
+
link_charge_merging: str = "With_charge_zero"
|
|
72
|
+
link_adduct_merging: str = "Any"
|
|
73
|
+
distance_RT_exponent: float = 1.0
|
|
74
|
+
distance_RT_weight: float = 1.0
|
|
75
|
+
distance_MZ_exponent: float = 2.0
|
|
76
|
+
distance_MZ_weight: float = 1.0
|
|
77
|
+
distance_intensity_exponent: float = 1.0
|
|
78
|
+
distance_intensity_weight: float = 1.0
|
|
79
|
+
distance_intensity_log_transform: str = "enabled"
|
|
80
|
+
LOWESS_span: float = 0.666666666666667
|
|
81
|
+
LOWESS_num_iterations: int = 3
|
|
82
|
+
LOWESS_delta: float = -1.0
|
|
83
|
+
LOWESS_interpolation_type: str = "cspline"
|
|
84
|
+
LOWESS_extrapolation_type: str = "four-point-linear"
|
|
33
85
|
|
|
34
86
|
_param_metadata: dict[str, dict[str, Any]] = field(
|
|
35
87
|
default_factory=lambda: {
|
|
@@ -78,6 +130,167 @@ class align_defaults:
|
|
|
78
130
|
"description": "Whether to skip blank samples during alignment",
|
|
79
131
|
"default": True,
|
|
80
132
|
},
|
|
133
|
+
"algo": {
|
|
134
|
+
"dtype": str,
|
|
135
|
+
"description": "Alignment algorithm to use",
|
|
136
|
+
"default": "pc",
|
|
137
|
+
"allowed_values": ["pc", "kd"],
|
|
138
|
+
},
|
|
139
|
+
# KD algorithm specific parameters
|
|
140
|
+
"nr_partitions": {
|
|
141
|
+
"dtype": int,
|
|
142
|
+
"description": "Number of partitions in m/z dimension for KD algorithm",
|
|
143
|
+
"default": 100,
|
|
144
|
+
"min_value": 1,
|
|
145
|
+
"max_value": 1000,
|
|
146
|
+
},
|
|
147
|
+
"warp_enabled": {
|
|
148
|
+
"dtype": bool,
|
|
149
|
+
"description": "Enable non-linear retention time transformation for KD algorithm",
|
|
150
|
+
"default": True,
|
|
151
|
+
},
|
|
152
|
+
"warp_rt_tol": {
|
|
153
|
+
"dtype": float,
|
|
154
|
+
"description": "RT tolerance for the LOWESS fit in KD algorithm (seconds)",
|
|
155
|
+
"default": 5.0,
|
|
156
|
+
"min_value": 0.1,
|
|
157
|
+
"max_value": 60.0,
|
|
158
|
+
},
|
|
159
|
+
"warp_mz_tol": {
|
|
160
|
+
"dtype": float,
|
|
161
|
+
"description": "m/z tolerance for the LOWESS fit in KD algorithm (Da)",
|
|
162
|
+
"default": 0.05,
|
|
163
|
+
"min_value": 0.001,
|
|
164
|
+
"max_value": 1.0,
|
|
165
|
+
},
|
|
166
|
+
"warp_max_pairwise_log_fc": {
|
|
167
|
+
"dtype": float,
|
|
168
|
+
"description": "Maximum absolute log10 fold change between two compatible signals during compatibility graph construction in KD algorithm",
|
|
169
|
+
"default": 0.5,
|
|
170
|
+
"min_value": -1.0,
|
|
171
|
+
"max_value": 10.0,
|
|
172
|
+
},
|
|
173
|
+
"warp_min_rel_cc_size": {
|
|
174
|
+
"dtype": float,
|
|
175
|
+
"description": "Minimum relative connected component size for KD algorithm",
|
|
176
|
+
"default": 0.5,
|
|
177
|
+
"min_value": 0.0,
|
|
178
|
+
"max_value": 1.0,
|
|
179
|
+
},
|
|
180
|
+
"warp_max_nr_conflicts": {
|
|
181
|
+
"dtype": int,
|
|
182
|
+
"description": "Allow up to this many conflicts (features from the same map) per connected component to be used for alignment (-1 means allow any number of conflicts)",
|
|
183
|
+
"default": 0,
|
|
184
|
+
"min_value": -1,
|
|
185
|
+
"max_value": 1000,
|
|
186
|
+
},
|
|
187
|
+
"link_rt_tol": {
|
|
188
|
+
"dtype": float,
|
|
189
|
+
"description": "Width of RT tolerance window for linking in KD algorithm (seconds)",
|
|
190
|
+
"default": 30.0,
|
|
191
|
+
"min_value": 0.0,
|
|
192
|
+
"max_value": 300.0,
|
|
193
|
+
},
|
|
194
|
+
"link_mz_tol": {
|
|
195
|
+
"dtype": float,
|
|
196
|
+
"description": "m/z tolerance for linking features in KD algorithm (ppm or Da)",
|
|
197
|
+
"default": 10.0,
|
|
198
|
+
"min_value": 0.0,
|
|
199
|
+
"max_value": 100.0,
|
|
200
|
+
},
|
|
201
|
+
"link_charge_merging": {
|
|
202
|
+
"dtype": str,
|
|
203
|
+
"description": "Charge merging strategy for linking features in KD algorithm",
|
|
204
|
+
"default": "With_charge_zero",
|
|
205
|
+
"allowed_values": ["Identical", "With_charge_zero", "Any"],
|
|
206
|
+
},
|
|
207
|
+
"link_adduct_merging": {
|
|
208
|
+
"dtype": str,
|
|
209
|
+
"description": "Adduct merging strategy for linking features in KD algorithm",
|
|
210
|
+
"default": "Any",
|
|
211
|
+
"allowed_values": ["Identical", "With_unknown_adducts", "Any"],
|
|
212
|
+
},
|
|
213
|
+
"distance_RT_exponent": {
|
|
214
|
+
"dtype": float,
|
|
215
|
+
"description": "Normalized RT differences are raised to this power in KD algorithm",
|
|
216
|
+
"default": 1.0,
|
|
217
|
+
"min_value": 0.0,
|
|
218
|
+
"max_value": 10.0,
|
|
219
|
+
},
|
|
220
|
+
"distance_RT_weight": {
|
|
221
|
+
"dtype": float,
|
|
222
|
+
"description": "Final RT distances are weighted by this factor in KD algorithm",
|
|
223
|
+
"default": 1.0,
|
|
224
|
+
"min_value": 0.0,
|
|
225
|
+
"max_value": 100.0,
|
|
226
|
+
},
|
|
227
|
+
"distance_MZ_exponent": {
|
|
228
|
+
"dtype": float,
|
|
229
|
+
"description": "Normalized m/z differences are raised to this power in KD algorithm",
|
|
230
|
+
"default": 2.0,
|
|
231
|
+
"min_value": 0.0,
|
|
232
|
+
"max_value": 10.0,
|
|
233
|
+
},
|
|
234
|
+
"distance_MZ_weight": {
|
|
235
|
+
"dtype": float,
|
|
236
|
+
"description": "Final m/z distances are weighted by this factor in KD algorithm",
|
|
237
|
+
"default": 1.0,
|
|
238
|
+
"min_value": 0.0,
|
|
239
|
+
"max_value": 100.0,
|
|
240
|
+
},
|
|
241
|
+
"distance_intensity_exponent": {
|
|
242
|
+
"dtype": float,
|
|
243
|
+
"description": "Differences in relative intensity are raised to this power in KD algorithm",
|
|
244
|
+
"default": 1.0,
|
|
245
|
+
"min_value": 0.0,
|
|
246
|
+
"max_value": 10.0,
|
|
247
|
+
},
|
|
248
|
+
"distance_intensity_weight": {
|
|
249
|
+
"dtype": float,
|
|
250
|
+
"description": "Final intensity distances are weighted by this factor in KD algorithm",
|
|
251
|
+
"default": 1.0,
|
|
252
|
+
"min_value": 0.0,
|
|
253
|
+
"max_value": 100.0,
|
|
254
|
+
},
|
|
255
|
+
"distance_intensity_log_transform": {
|
|
256
|
+
"dtype": str,
|
|
257
|
+
"description": "Log-transform intensities in KD algorithm distance calculation",
|
|
258
|
+
"default": "enabled",
|
|
259
|
+
"allowed_values": ["enabled", "disabled"],
|
|
260
|
+
},
|
|
261
|
+
"LOWESS_span": {
|
|
262
|
+
"dtype": float,
|
|
263
|
+
"description": "Fraction of datapoints to use for each local regression in LOWESS fitting",
|
|
264
|
+
"default": 0.666666666666667,
|
|
265
|
+
"min_value": 0.0,
|
|
266
|
+
"max_value": 1.0,
|
|
267
|
+
},
|
|
268
|
+
"LOWESS_num_iterations": {
|
|
269
|
+
"dtype": int,
|
|
270
|
+
"description": "Number of robustifying iterations for LOWESS fitting",
|
|
271
|
+
"default": 3,
|
|
272
|
+
"min_value": 0,
|
|
273
|
+
"max_value": 10,
|
|
274
|
+
},
|
|
275
|
+
"LOWESS_delta": {
|
|
276
|
+
"dtype": float,
|
|
277
|
+
"description": "Nonnegative parameter for LOWESS computations (negative value auto-computes)",
|
|
278
|
+
"default": -1.0,
|
|
279
|
+
"min_value": -1.0,
|
|
280
|
+
"max_value": 1000.0,
|
|
281
|
+
},
|
|
282
|
+
"LOWESS_interpolation_type": {
|
|
283
|
+
"dtype": str,
|
|
284
|
+
"description": "Method to use for interpolation between datapoints computed by LOWESS",
|
|
285
|
+
"default": "cspline",
|
|
286
|
+
"allowed_values": ["linear", "cspline", "akima"],
|
|
287
|
+
},
|
|
288
|
+
"LOWESS_extrapolation_type": {
|
|
289
|
+
"dtype": str,
|
|
290
|
+
"description": "Method to use for extrapolation outside the data range in LOWESS",
|
|
291
|
+
"default": "four-point-linear",
|
|
292
|
+
"allowed_values": ["two-point-linear", "four-point-linear", "global-linear"],
|
|
293
|
+
},
|
|
81
294
|
},
|
|
82
295
|
repr=False,
|
|
83
296
|
)
|
|
@@ -152,6 +365,11 @@ class align_defaults:
|
|
|
152
365
|
if "max_value" in metadata and value > metadata["max_value"]:
|
|
153
366
|
return False
|
|
154
367
|
|
|
368
|
+
# Allowed values validation for string types
|
|
369
|
+
if expected_dtype is str and "allowed_values" in metadata:
|
|
370
|
+
if value not in metadata["allowed_values"]:
|
|
371
|
+
return False
|
|
372
|
+
|
|
155
373
|
return True
|
|
156
374
|
|
|
157
375
|
def set(self, param_name: str, value: Any, validate: bool = True) -> bool:
|
|
@@ -168,11 +168,7 @@ class fill_chrom_defaults:
|
|
|
168
168
|
expected_dtype = self._param_metadata[param_name]["dtype"]
|
|
169
169
|
|
|
170
170
|
# Handle optional types
|
|
171
|
-
if (
|
|
172
|
-
isinstance(expected_dtype, str)
|
|
173
|
-
and expected_dtype.startswith("Optional")
|
|
174
|
-
and value is not None
|
|
175
|
-
):
|
|
171
|
+
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
|
|
176
172
|
if "int" in expected_dtype and not isinstance(value, int):
|
|
177
173
|
try:
|
|
178
174
|
value = int(value)
|
|
@@ -135,11 +135,7 @@ class integrate_chrom_defaults:
|
|
|
135
135
|
expected_dtype = self._param_metadata[param_name]["dtype"]
|
|
136
136
|
|
|
137
137
|
# Handle optional types
|
|
138
|
-
if (
|
|
139
|
-
isinstance(expected_dtype, str)
|
|
140
|
-
and expected_dtype.startswith("Optional")
|
|
141
|
-
and value is not None
|
|
142
|
-
):
|
|
138
|
+
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
|
|
143
139
|
if "float" in expected_dtype and not isinstance(value, float):
|
|
144
140
|
try:
|
|
145
141
|
value = float(value)
|
|
@@ -53,13 +53,13 @@ class study_defaults:
|
|
|
53
53
|
"log_sink": {
|
|
54
54
|
"dtype": str,
|
|
55
55
|
"description": "Output sink for logging. Use 'sys.stdout' for console output, or a file path",
|
|
56
|
-
"default": "sys.stdout",
|
|
56
|
+
"default": "sys.stdout",
|
|
57
57
|
},
|
|
58
58
|
"polarity": {
|
|
59
59
|
"dtype": str,
|
|
60
60
|
"description": "Polarity of the study (positive/negative)",
|
|
61
61
|
"default": "positive",
|
|
62
|
-
"allowed_values": ["positive", "negative","pos","neg"],
|
|
62
|
+
"allowed_values": ["positive", "negative", "pos", "neg"],
|
|
63
63
|
},
|
|
64
64
|
},
|
|
65
65
|
repr=False,
|