masster 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/docs/SCX_API_Documentation.md +0 -0
- masster/docs/SCX_DLL_Analysis.md +0 -0
- masster/logger.py +92 -78
- masster/sample/defaults/find_features_def.py +90 -94
- masster/sample/defaults/sample_def.py +15 -0
- masster/sample/h5.py +2 -2
- masster/sample/helpers.py +137 -136
- masster/sample/lib.py +11 -11
- masster/sample/load.py +13 -9
- masster/sample/plot.py +167 -60
- masster/sample/processing.py +150 -153
- masster/sample/sample.py +4 -4
- masster/sample/sample5_schema.json +62 -62
- masster/sample/save.py +16 -13
- masster/sample/sciex.py +187 -176
- masster/study/defaults/align_def.py +224 -6
- masster/study/defaults/fill_chrom_def.py +1 -5
- masster/study/defaults/integrate_chrom_def.py +1 -5
- masster/study/defaults/study_def.py +2 -2
- masster/study/export.py +144 -131
- masster/study/h5.py +193 -133
- masster/study/helpers.py +293 -245
- masster/study/helpers_optimized.py +99 -57
- masster/study/load.py +51 -25
- masster/study/plot.py +453 -17
- masster/study/processing.py +197 -123
- masster/study/save.py +7 -7
- masster/study/study.py +97 -88
- masster/study/study5_schema.json +82 -82
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/METADATA +1 -1
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/RECORD +34 -32
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/WHEEL +0 -0
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/entry_points.txt +0 -0
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/licenses/LICENSE +0 -0
masster/study/processing.py
CHANGED
|
@@ -17,22 +17,45 @@ from masster.study.defaults import (
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def align(self, **kwargs):
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
20
|
+
"""Align feature maps using pose clustering or KD algorithm and update feature RTs.
|
|
21
|
+
|
|
22
|
+
Parameters can be provided as an ``align_defaults`` instance or as
|
|
23
|
+
individual keyword arguments; they are validated against the defaults class.
|
|
24
|
+
|
|
25
|
+
Key parameters (from ``align_defaults``):
|
|
26
|
+
- rt_max_diff (float): Maximum RT difference for pair finding (seconds).
|
|
27
|
+
- mz_max_diff (float): Maximum m/z difference for pair finding (Da).
|
|
28
|
+
- rt_pair_distance_frac (float): RT fraction used by the superimposer.
|
|
29
|
+
- mz_pair_max_distance (float): Max m/z distance for pair selection.
|
|
30
|
+
- num_used_points (int): Number of points to use for alignment estimation.
|
|
31
|
+
- save_features (bool): If True, save updated features after alignment.
|
|
32
|
+
- skip_blanks (bool): If True, skip blank samples during alignment.
|
|
33
|
+
- algo (str): Alignment algorithm ('pc' for PoseClustering, 'kd' for KD).
|
|
34
|
+
|
|
35
|
+
KD algorithm specific parameters:
|
|
36
|
+
- nr_partitions (int): Number of partitions in m/z dimension.
|
|
37
|
+
- warp_enabled (bool): Enable non-linear retention time transformation.
|
|
38
|
+
- warp_rt_tol (float): RT tolerance for the LOWESS fit.
|
|
39
|
+
- warp_mz_tol (float): m/z tolerance for the LOWESS fit.
|
|
40
|
+
- warp_max_pairwise_log_fc (float): Maximum absolute log10 fold-change threshold for pairing.
|
|
41
|
+
- warp_min_rel_cc_size (float): Minimum relative connected component size.
|
|
42
|
+
- warp_max_nr_conflicts (int): Allow up to this many conflicts per connected component for alignment.
|
|
43
|
+
- link_rt_tol (float): Width of RT tolerance window for linking features.
|
|
44
|
+
- link_mz_tol (float): m/z tolerance for linking features.
|
|
45
|
+
- link_charge_merging (str): Charge merging strategy for linking features.
|
|
46
|
+
- link_adduct_merging (str): Adduct merging strategy for linking features.
|
|
47
|
+
- distance_RT_exponent (float): Exponent for normalized RT differences.
|
|
48
|
+
- distance_RT_weight (float): Weight factor for final RT distances.
|
|
49
|
+
- distance_MZ_exponent (float): Exponent for normalized m/z differences.
|
|
50
|
+
- distance_MZ_weight (float): Weight factor for final m/z distances.
|
|
51
|
+
- distance_intensity_exponent (float): Exponent for differences in relative intensity.
|
|
52
|
+
- distance_intensity_weight (float): Weight factor for final intensity distances.
|
|
53
|
+
- distance_intensity_log_transform (str): Log-transform intensities.
|
|
54
|
+
- LOWESS_span (float): Fraction of datapoints for each local regression.
|
|
55
|
+
- LOWESS_num_iterations (int): Number of robustifying iterations for LOWESS fitting.
|
|
56
|
+
- LOWESS_delta (float): Parameter for LOWESS computations (negative auto-computes).
|
|
57
|
+
- LOWESS_interpolation_type (str): Method for interpolation between datapoints.
|
|
58
|
+
- LOWESS_extrapolation_type (str): Method for extrapolation outside data range.
|
|
36
59
|
"""
|
|
37
60
|
# parameters initialization
|
|
38
61
|
params = align_defaults()
|
|
@@ -60,78 +83,135 @@ def align(self, **kwargs):
|
|
|
60
83
|
self.features_maps = []
|
|
61
84
|
self.load_features()
|
|
62
85
|
|
|
63
|
-
self.logger.debug("Starting alignment")
|
|
86
|
+
# self.logger.debug("Starting alignment")
|
|
64
87
|
|
|
65
88
|
fmaps = self.features_maps
|
|
66
|
-
# set ref_index to feature map index with largest number of features
|
|
67
|
-
ref_index = [i[0] for i in sorted(enumerate([fm.size() for fm in fmaps]), key=lambda x: x[1])][-1]
|
|
68
|
-
|
|
69
|
-
self.logger.info(
|
|
70
|
-
f"Align on {self.samples_df.row(ref_index, named=True)['sample_name']}",
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
aligner = oms.MapAlignmentAlgorithmPoseClustering()
|
|
74
89
|
|
|
90
|
+
# Initialize OpenMS parameters
|
|
75
91
|
params_oms = oms.Param()
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
if
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
92
|
+
# Choose alignment algorithm based on parameter
|
|
93
|
+
algo = params.get("algo").lower()
|
|
94
|
+
|
|
95
|
+
# Set common parameters for both algorithms
|
|
96
|
+
if algo == "pc":
|
|
97
|
+
# Parameters specific to PoseClustering
|
|
98
|
+
params_oms.setValue("pairfinder:distance_intensity:log_transform", "disabled")
|
|
99
|
+
params_oms.setValue("pairfinder:ignore_charge", "true")
|
|
100
|
+
params_oms.setValue("max_num_peaks_considered", 1000)
|
|
101
|
+
params_oms.setValue("pairfinder:distance_RT:max_difference", params.get("rt_max_diff"))
|
|
102
|
+
params_oms.setValue("pairfinder:distance_MZ:max_difference", params.get("mz_max_diff"))
|
|
103
|
+
params_oms.setValue("superimposer:rt_pair_distance_fraction", params.get("rt_pair_distance_frac"))
|
|
104
|
+
params_oms.setValue("superimposer:mz_pair_max_distance", params.get("mz_pair_max_distance"))
|
|
105
|
+
params_oms.setValue("superimposer:num_used_points", params.get("num_used_points"))
|
|
106
|
+
params_oms.setValue("pairfinder:distance_MZ:exponent", 3.0)
|
|
107
|
+
params_oms.setValue("pairfinder:distance_RT:exponent", 2.0)
|
|
108
|
+
|
|
109
|
+
"""
|
|
110
|
+
{b'max_num_peaks_considered': 1000,
|
|
111
|
+
b'superimposer:mz_pair_max_distance': 0.5,
|
|
112
|
+
b'superimposer:rt_pair_distance_fraction': 0.1,
|
|
113
|
+
b'superimposer:num_used_points': 2000,
|
|
114
|
+
b'superimposer:scaling_bucket_size': 0.005,
|
|
115
|
+
b'superimposer:shift_bucket_size': 3.0,
|
|
116
|
+
b'superimposer:max_shift': 1000.0,
|
|
117
|
+
b'superimposer:max_scaling': 2.0,
|
|
118
|
+
b'superimposer:dump_buckets': '',
|
|
119
|
+
b'superimposer:dump_pairs': '',
|
|
120
|
+
b'pairfinder:second_nearest_gap': 2.0,
|
|
121
|
+
b'pairfinder:use_identifications': 'false',
|
|
122
|
+
b'pairfinder:ignore_charge': 'false',
|
|
123
|
+
b'pairfinder:ignore_adduct': 'true',
|
|
124
|
+
b'pairfinder:distance_RT:max_difference': 100.0,
|
|
125
|
+
b'pairfinder:distance_RT:exponent': 1.0,
|
|
126
|
+
b'pairfinder:distance_RT:weight': 1.0,
|
|
127
|
+
b'pairfinder:distance_MZ:max_difference': 0.3,
|
|
128
|
+
b'pairfinder:distance_MZ:unit': 'Da',
|
|
129
|
+
b'pairfinder:distance_MZ:exponent': 2.0,
|
|
130
|
+
b'pairfinder:distance_MZ:weight': 1.0,
|
|
131
|
+
b'pairfinder:distance_intensity:exponent': 1.0,
|
|
132
|
+
b'pairfinder:distance_intensity:weight': 0.0,
|
|
133
|
+
b'pairfinder:distance_intensity:log_transform': 'disabled'}
|
|
134
|
+
"""
|
|
135
|
+
elif algo == "kd":
|
|
136
|
+
# Parameters specific to KD algorithm
|
|
137
|
+
params_oms.setValue("mz_unit", "Da")
|
|
138
|
+
params_oms.setValue("nr_partitions", params.get("nr_partitions"))
|
|
139
|
+
|
|
140
|
+
# Warp parameters for non-linear RT transformation
|
|
141
|
+
params_oms.setValue("warp:enabled", "true" if params.get("warp_enabled") else "false")
|
|
142
|
+
params_oms.setValue("warp:rt_tol", params.get("warp_rt_tol"))
|
|
143
|
+
params_oms.setValue("warp:mz_tol", params.get("warp_mz_tol"))
|
|
144
|
+
params_oms.setValue("warp:max_pairwise_log_fc", params.get("warp_max_pairwise_log_fc"))
|
|
145
|
+
params_oms.setValue("warp:min_rel_cc_size", params.get("warp_min_rel_cc_size"))
|
|
146
|
+
params_oms.setValue("warp:max_nr_conflicts", params.get("warp_max_nr_conflicts"))
|
|
147
|
+
|
|
148
|
+
# Link parameters
|
|
149
|
+
params_oms.setValue("link:rt_tol", params.get("link_rt_tol"))
|
|
150
|
+
params_oms.setValue("link:mz_tol", params.get("link_mz_tol"))
|
|
151
|
+
params_oms.setValue("link:charge_merging", params.get("link_charge_merging"))
|
|
152
|
+
params_oms.setValue("link:adduct_merging", params.get("link_adduct_merging"))
|
|
153
|
+
|
|
154
|
+
# Distance parameters
|
|
155
|
+
params_oms.setValue("distance_RT:exponent", params.get("distance_RT_exponent"))
|
|
156
|
+
params_oms.setValue("distance_RT:weight", params.get("distance_RT_weight"))
|
|
157
|
+
params_oms.setValue("distance_MZ:exponent", params.get("distance_MZ_exponent"))
|
|
158
|
+
params_oms.setValue("distance_MZ:weight", params.get("distance_MZ_weight"))
|
|
159
|
+
params_oms.setValue("distance_intensity:exponent", params.get("distance_intensity_exponent"))
|
|
160
|
+
params_oms.setValue("distance_intensity:weight", params.get("distance_intensity_weight"))
|
|
161
|
+
params_oms.setValue("distance_intensity:log_transform", params.get("distance_intensity_log_transform"))
|
|
162
|
+
|
|
163
|
+
# LOWESS parameters
|
|
164
|
+
params_oms.setValue("LOWESS:span", params.get("LOWESS_span"))
|
|
165
|
+
params_oms.setValue("LOWESS:num_iterations", params.get("LOWESS_num_iterations"))
|
|
166
|
+
params_oms.setValue("LOWESS:delta", params.get("LOWESS_delta"))
|
|
167
|
+
params_oms.setValue("LOWESS:interpolation_type", params.get("LOWESS_interpolation_type"))
|
|
168
|
+
params_oms.setValue("LOWESS:extrapolation_type", params.get("LOWESS_extrapolation_type"))
|
|
169
|
+
|
|
170
|
+
if algo == "pc":
|
|
171
|
+
aligner = oms.MapAlignmentAlgorithmPoseClustering()
|
|
172
|
+
self.logger.info("Starting alignment with PoseClustering")
|
|
173
|
+
# set ref_index to feature map index with largest number of features
|
|
174
|
+
ref_index = [i[0] for i in sorted(enumerate([fm.size() for fm in fmaps]), key=lambda x: x[1])][-1]
|
|
175
|
+
self.logger.debug(
|
|
176
|
+
f"Reference map is {self.samples_df.row(ref_index, named=True)['sample_name']}",
|
|
177
|
+
)
|
|
178
|
+
aligner.setParameters(params_oms)
|
|
179
|
+
aligner.setReference(fmaps[ref_index])
|
|
180
|
+
self.logger.debug(f"Parameters for alignment: {params}")
|
|
181
|
+
# perform alignment and transformation of feature maps to the reference map (exclude reference map)
|
|
182
|
+
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
183
|
+
for index, fm in tqdm(
|
|
184
|
+
list(enumerate(fmaps)),
|
|
185
|
+
total=len(fmaps),
|
|
186
|
+
desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Align feature maps",
|
|
187
|
+
disable=tdqm_disable,
|
|
188
|
+
):
|
|
189
|
+
if index == ref_index:
|
|
190
|
+
continue
|
|
191
|
+
if params.get("skip_blanks") and self.samples_df.row(index, named=True)["sample_type"] == "blank":
|
|
192
|
+
continue
|
|
193
|
+
trafo = oms.TransformationDescription()
|
|
194
|
+
aligner.align(fm, trafo)
|
|
195
|
+
transformer = oms.MapAlignmentTransformer()
|
|
196
|
+
transformer.transformRetentionTimes(fm, trafo, True)
|
|
197
|
+
|
|
198
|
+
self.alignment_ref_index = ref_index
|
|
199
|
+
|
|
200
|
+
elif algo == "kd":
|
|
201
|
+
# KD algorithm requires num_maps and Param parameters
|
|
202
|
+
num_maps = len(fmaps)
|
|
203
|
+
aligner = oms.MapAlignmentAlgorithmKD(3, params_oms)
|
|
204
|
+
self.logger.info(f"Starting alignment with KD algorithm using {num_maps} maps")
|
|
205
|
+
|
|
206
|
+
kdtree = oms.KDTreeFeatureMaps()
|
|
207
|
+
kdtree.addMaps(fmaps) # Add all feature maps to the KDTree
|
|
208
|
+
# kdtree.optimizeTree()
|
|
209
|
+
aligner.addRTFitData(kdtree)
|
|
210
|
+
aligner.fitLOWESS()
|
|
211
|
+
aligner.transform(kdtree)
|
|
133
212
|
|
|
134
|
-
|
|
213
|
+
else:
|
|
214
|
+
self.logger.error(f"Unknown alignment algorithm '{algo}'")
|
|
135
215
|
|
|
136
216
|
# check if rt_original exists in features_df, if not, add it after rt
|
|
137
217
|
if "rt_original" not in self.features_df.columns:
|
|
@@ -225,27 +305,24 @@ def align(self, **kwargs):
|
|
|
225
305
|
|
|
226
306
|
|
|
227
307
|
def merge(self, **kwargs):
|
|
228
|
-
"""
|
|
229
|
-
Groups features across samples into consensus features using the specified algorithm.
|
|
308
|
+
"""Group features across samples into consensus features.
|
|
230
309
|
|
|
231
|
-
Parameters
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
mz_tol (float): m/z tolerance for grouping (default: 0.01).
|
|
241
|
-
rt_tol (float): RT tolerance for grouping (default: 1.0).
|
|
310
|
+
Parameters can be provided as a ``merge_defaults`` instance or as
|
|
311
|
+
individual keyword arguments; they are validated against the defaults class.
|
|
312
|
+
|
|
313
|
+
Key parameters (from ``merge_defaults``):
|
|
314
|
+
- algorithm (str): Grouping algorithm to use ('qt', 'kd', 'unlabeled', 'sequential').
|
|
315
|
+
- min_samples (int): Minimum number of samples required for a consensus feature.
|
|
316
|
+
- link_ms2 (bool): Whether to attach/link MS2 spectra to consensus features.
|
|
317
|
+
- mz_tol (float): m/z tolerance for grouping (Da).
|
|
318
|
+
- rt_tol (float): RT tolerance for grouping (seconds).
|
|
242
319
|
"""
|
|
243
320
|
# Reset consensus-related DataFrames at the start
|
|
244
321
|
self.consensus_df = pl.DataFrame()
|
|
245
322
|
self.consensus_ms2 = pl.DataFrame()
|
|
246
323
|
self.consensus_mapping_df = pl.DataFrame()
|
|
247
|
-
|
|
248
|
-
self.logger.info(
|
|
324
|
+
|
|
325
|
+
self.logger.info("Merging...")
|
|
249
326
|
# parameters initialization
|
|
250
327
|
params = merge_defaults()
|
|
251
328
|
for key, value in kwargs.items():
|
|
@@ -488,17 +565,17 @@ def merge(self, **kwargs):
|
|
|
488
565
|
# Collect all adducts from feature_data_list to create consensus adduct information
|
|
489
566
|
all_adducts = []
|
|
490
567
|
adduct_masses = {}
|
|
491
|
-
|
|
568
|
+
|
|
492
569
|
for fd in feature_data_list:
|
|
493
570
|
# Get individual adduct and mass from each feature data (fd)
|
|
494
571
|
adduct = fd.get("adduct")
|
|
495
572
|
adduct_mass = fd.get("adduct_mass")
|
|
496
|
-
|
|
573
|
+
|
|
497
574
|
if adduct is not None:
|
|
498
575
|
all_adducts.append(adduct)
|
|
499
576
|
if adduct_mass is not None:
|
|
500
577
|
adduct_masses[adduct] = adduct_mass
|
|
501
|
-
|
|
578
|
+
|
|
502
579
|
# Calculate adduct_values for the consensus feature
|
|
503
580
|
adduct_values = []
|
|
504
581
|
if all_adducts:
|
|
@@ -512,9 +589,9 @@ def merge(self, **kwargs):
|
|
|
512
589
|
"adduct": str(adduct),
|
|
513
590
|
"count": int(count),
|
|
514
591
|
"percentage": float(round(percentage, 2)),
|
|
515
|
-
"mass": float(mass) if mass is not None else None
|
|
592
|
+
"mass": float(mass) if mass is not None else None,
|
|
516
593
|
})
|
|
517
|
-
|
|
594
|
+
|
|
518
595
|
# Sort adduct_values by count in descending order
|
|
519
596
|
adduct_values.sort(key=lambda x: x["count"], reverse=True) # type: ignore[arg-type,return-value]
|
|
520
597
|
# Store adduct_values for use in metadata
|
|
@@ -619,7 +696,7 @@ def find_ms2(self, **kwargs):
|
|
|
619
696
|
"""
|
|
620
697
|
# Reset consensus_ms2 DataFrame at the start
|
|
621
698
|
self.consensus_ms2 = pl.DataFrame()
|
|
622
|
-
|
|
699
|
+
|
|
623
700
|
# parameters initialization
|
|
624
701
|
params = find_ms2_defaults()
|
|
625
702
|
for key, value in kwargs.items():
|
|
@@ -768,17 +845,20 @@ def filter_consensus(
|
|
|
768
845
|
|
|
769
846
|
## TODO is uid supposed to be a list? rt_tol 0?
|
|
770
847
|
def _integrate_chrom_impl(self, **kwargs):
|
|
771
|
-
"""
|
|
772
|
-
Given a consensus_id, integrate the intensity of all features in the consensus map.
|
|
848
|
+
"""Integrate chromatogram intensities for consensus features.
|
|
773
849
|
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
850
|
+
Integrates EICs for consensus features using parameters defined in
|
|
851
|
+
:class:`integrate_defaults`. Pass an ``integrate_defaults`` instance via
|
|
852
|
+
``**kwargs`` or override individual parameters (they will be validated
|
|
853
|
+
against the defaults class).
|
|
854
|
+
|
|
855
|
+
Main parameters (from ``integrate_defaults``):
|
|
778
856
|
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
857
|
+
- uids (Optional[list]): List of consensus UIDs to integrate; ``None`` means all.
|
|
858
|
+
- rt_tol (float): RT tolerance (seconds) used when locating integration boundaries.
|
|
859
|
+
|
|
860
|
+
Notes:
|
|
861
|
+
This function batches updates to the study's feature table for efficiency.
|
|
782
862
|
"""
|
|
783
863
|
# parameters initialization
|
|
784
864
|
params = integrate_defaults()
|
|
@@ -984,17 +1064,11 @@ def _integrate_chrom_impl(self, **kwargs):
|
|
|
984
1064
|
|
|
985
1065
|
|
|
986
1066
|
def integrate(self, **kwargs):
|
|
987
|
-
"""
|
|
988
|
-
Integrate chromatograms across consensus features.
|
|
989
|
-
|
|
990
|
-
Parameters:
|
|
991
|
-
**kwargs: Keyword arguments for integration parameters. Can include:
|
|
992
|
-
- An integrate_defaults instance to set all parameters at once
|
|
993
|
-
- Individual parameter names and values (see integrate_defaults for details)
|
|
1067
|
+
"""Integrate chromatograms across consensus features.
|
|
994
1068
|
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
1069
|
+
Wrapper that extracts parameters from :class:`integrate_defaults` and
|
|
1070
|
+
calls the underlying implementation. See ``integrate_defaults`` for
|
|
1071
|
+
the canonical parameter list and descriptions.
|
|
998
1072
|
"""
|
|
999
1073
|
# parameters initialization
|
|
1000
1074
|
params = integrate_defaults()
|
masster/study/save.py
CHANGED
|
@@ -21,7 +21,7 @@ def save(self, filename=None, add_timestamp=True, compress=False):
|
|
|
21
21
|
filename (str, optional): Target file name. If None, uses default.
|
|
22
22
|
add_timestamp (bool, optional): If True, appends timestamp to avoid overwriting.
|
|
23
23
|
Default True for safety (original behavior).
|
|
24
|
-
compress (bool, optional): If True, uses compressed mode and skips
|
|
24
|
+
compress (bool, optional): If True, uses compressed mode and skips
|
|
25
25
|
some heavy columns for maximum speed. Default False.
|
|
26
26
|
"""
|
|
27
27
|
|
|
@@ -46,11 +46,11 @@ def save(self, filename=None, add_timestamp=True, compress=False):
|
|
|
46
46
|
filename = f"{filename.replace('.study5', '')}_{timestamp}.study5"
|
|
47
47
|
|
|
48
48
|
# Log file size information for performance monitoring
|
|
49
|
-
if hasattr(self,
|
|
49
|
+
if hasattr(self, "features_df") and not self.features_df.is_empty():
|
|
50
50
|
feature_count = len(self.features_df)
|
|
51
|
-
sample_count = len(self.samples_df) if hasattr(self,
|
|
51
|
+
sample_count = len(self.samples_df) if hasattr(self, "samples_df") and not self.samples_df.is_empty() else 0
|
|
52
52
|
self.logger.info(f"Saving study with {sample_count} samples and {feature_count} features to {filename}")
|
|
53
|
-
|
|
53
|
+
|
|
54
54
|
# Use compressed mode for large datasets
|
|
55
55
|
if compress:
|
|
56
56
|
self._save_study5_compressed(filename)
|
|
@@ -106,7 +106,7 @@ def save_samples(self, samples=None):
|
|
|
106
106
|
ddaobj.save()
|
|
107
107
|
sample_name = sample_row.row(0, named=True)["sample_name"]
|
|
108
108
|
sample_path = sample_row.row(0, named=True)["sample_path"]
|
|
109
|
-
|
|
109
|
+
|
|
110
110
|
# Find the index of this sample in the original order for features_maps
|
|
111
111
|
sample_index = next(
|
|
112
112
|
(
|
|
@@ -116,7 +116,7 @@ def save_samples(self, samples=None):
|
|
|
116
116
|
),
|
|
117
117
|
None,
|
|
118
118
|
)
|
|
119
|
-
|
|
119
|
+
|
|
120
120
|
# Determine where to save the featureXML file based on sample_path location
|
|
121
121
|
if sample_path.endswith(".sample5"):
|
|
122
122
|
# If sample_path is a .sample5 file, save featureXML in the same directory
|
|
@@ -135,7 +135,7 @@ def save_samples(self, samples=None):
|
|
|
135
135
|
sample_name + ".featureXML",
|
|
136
136
|
)
|
|
137
137
|
self.logger.debug(f"Saving featureXML to default location: {featurexml_filename}")
|
|
138
|
-
|
|
138
|
+
|
|
139
139
|
fh = oms.FeatureXMLFile()
|
|
140
140
|
if sample_index is not None and sample_index < len(self.features_maps):
|
|
141
141
|
fh.store(featurexml_filename, self.features_maps[sample_index])
|