masster 0.4.21__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/sample/adducts.py +1 -1
- masster/sample/load.py +10 -9
- masster/sample/plot.py +1 -1
- masster/sample/processing.py +4 -4
- masster/sample/sample.py +29 -32
- masster/sample/save.py +0 -2
- masster/study/analysis.py +1762 -0
- masster/study/export.py +8 -6
- masster/study/helpers.py +153 -80
- masster/study/id.py +3 -3
- masster/study/load.py +56 -55
- masster/study/merge.py +316 -313
- masster/study/parameters.py +3 -3
- masster/study/plot.py +491 -203
- masster/study/processing.py +109 -15
- masster/study/save.py +8 -4
- masster/study/study.py +97 -139
- masster/wizard/wizard.py +8 -8
- {masster-0.4.21.dist-info → masster-0.5.0.dist-info}/METADATA +54 -14
- {masster-0.4.21.dist-info → masster-0.5.0.dist-info}/RECORD +24 -23
- {masster-0.4.21.dist-info → masster-0.5.0.dist-info}/WHEEL +0 -0
- {masster-0.4.21.dist-info → masster-0.5.0.dist-info}/entry_points.txt +0 -0
- {masster-0.4.21.dist-info → masster-0.5.0.dist-info}/licenses/LICENSE +0 -0
masster/study/processing.py
CHANGED
|
@@ -15,6 +15,85 @@ from masster.study.defaults import (
|
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
def _generate_feature_maps_on_demand_for_align(study):
|
|
19
|
+
"""
|
|
20
|
+
Generate feature maps on-demand from study.features_df for alignment operations.
|
|
21
|
+
Returns temporary feature maps that are not cached in the study.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
study: Study object containing features_df and samples_df
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
list: List of temporary FeatureMap objects
|
|
28
|
+
"""
|
|
29
|
+
import polars as pl
|
|
30
|
+
import pyopenms as oms
|
|
31
|
+
|
|
32
|
+
if study.features_df is None or len(study.features_df) == 0:
|
|
33
|
+
study.logger.error("No features_df available for generating feature maps")
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
temp_feature_maps = []
|
|
37
|
+
|
|
38
|
+
# Process each sample in order
|
|
39
|
+
for sample_index, row_dict in enumerate(study.samples_df.iter_rows(named=True)):
|
|
40
|
+
sample_uid = row_dict["sample_uid"]
|
|
41
|
+
sample_name = row_dict["sample_name"]
|
|
42
|
+
|
|
43
|
+
# Get features for this sample from features_df
|
|
44
|
+
sample_features = study.features_df.filter(pl.col("sample_uid") == sample_uid)
|
|
45
|
+
|
|
46
|
+
# Create new FeatureMap
|
|
47
|
+
feature_map = oms.FeatureMap()
|
|
48
|
+
|
|
49
|
+
# Convert DataFrame features to OpenMS Features
|
|
50
|
+
for feature_row in sample_features.iter_rows(named=True):
|
|
51
|
+
feature = oms.Feature()
|
|
52
|
+
|
|
53
|
+
# Set properties from DataFrame (handle missing values gracefully)
|
|
54
|
+
try:
|
|
55
|
+
# Skip features with missing critical data
|
|
56
|
+
if feature_row["mz"] is None:
|
|
57
|
+
study.logger.warning("Skipping feature due to missing mz")
|
|
58
|
+
continue
|
|
59
|
+
if feature_row["rt"] is None:
|
|
60
|
+
study.logger.warning("Skipping feature due to missing rt")
|
|
61
|
+
continue
|
|
62
|
+
if feature_row["inty"] is None:
|
|
63
|
+
study.logger.warning("Skipping feature due to missing inty")
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
# Handle missing feature_id by generating a new one
|
|
67
|
+
if feature_row["feature_id"] is None:
|
|
68
|
+
# Use a simple incremental ID for alignment purposes
|
|
69
|
+
feature_id = len(temp_feature_maps) * 100000 + feature_map.size() + 1
|
|
70
|
+
study.logger.debug(f"Generated new feature_id {feature_id} for feature with missing ID in sample {sample_name}")
|
|
71
|
+
else:
|
|
72
|
+
feature_id = int(feature_row["feature_id"])
|
|
73
|
+
|
|
74
|
+
feature.setUniqueId(feature_id)
|
|
75
|
+
feature.setMZ(float(feature_row["mz"]))
|
|
76
|
+
feature.setRT(float(feature_row["rt"]))
|
|
77
|
+
feature.setIntensity(float(feature_row["inty"]))
|
|
78
|
+
|
|
79
|
+
# Handle optional fields that might be None
|
|
80
|
+
if feature_row.get("quality") is not None:
|
|
81
|
+
feature.setOverallQuality(float(feature_row["quality"]))
|
|
82
|
+
if feature_row.get("charge") is not None:
|
|
83
|
+
feature.setCharge(int(feature_row["charge"]))
|
|
84
|
+
|
|
85
|
+
# Add to feature map
|
|
86
|
+
feature_map.push_back(feature)
|
|
87
|
+
except (ValueError, TypeError) as e:
|
|
88
|
+
study.logger.warning(f"Skipping feature due to conversion error: {e}")
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
temp_feature_maps.append(feature_map)
|
|
92
|
+
|
|
93
|
+
study.logger.debug(f"Generated {len(temp_feature_maps)} temporary feature maps from features_df for alignment")
|
|
94
|
+
return temp_feature_maps
|
|
95
|
+
|
|
96
|
+
|
|
18
97
|
def align(self, **kwargs):
|
|
19
98
|
"""Align feature maps using pose clustering or KD algorithm and update feature RTs.
|
|
20
99
|
|
|
@@ -87,16 +166,12 @@ def align(self, **kwargs):
|
|
|
87
166
|
# end of parameter initialization
|
|
88
167
|
|
|
89
168
|
# Store parameters in the Study object
|
|
90
|
-
self.
|
|
169
|
+
self.update_history(["align"], params.to_dict())
|
|
91
170
|
self.logger.debug("Parameters stored to align")
|
|
92
171
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
# self.logger.debug("Starting alignment")
|
|
98
|
-
|
|
99
|
-
fmaps = self.features_maps
|
|
172
|
+
# Generate temporary feature maps on-demand from features_df instead of using cached data
|
|
173
|
+
self.logger.debug("Generating feature maps on-demand from features_df for alignment")
|
|
174
|
+
fmaps = _generate_feature_maps_on_demand_for_align(self)
|
|
100
175
|
|
|
101
176
|
# Choose alignment algorithm
|
|
102
177
|
algorithm = params.get("algorithm").lower()
|
|
@@ -108,6 +183,9 @@ def align(self, **kwargs):
|
|
|
108
183
|
_align_kd_algorithm(self, fmaps, params)
|
|
109
184
|
else:
|
|
110
185
|
self.logger.error(f"Unknown alignment algorithm '{algorithm}'")
|
|
186
|
+
# Clean up temporary feature maps to release memory
|
|
187
|
+
del fmaps
|
|
188
|
+
return
|
|
111
189
|
|
|
112
190
|
# check if rt_original exists in features_df, if not, add it after rt
|
|
113
191
|
if "rt_original" not in self.features_df.columns:
|
|
@@ -256,6 +334,10 @@ def align(self, **kwargs):
|
|
|
256
334
|
if params.get("save_features"):
|
|
257
335
|
self.save_samples()
|
|
258
336
|
|
|
337
|
+
# Clean up temporary feature maps to release memory
|
|
338
|
+
del fmaps
|
|
339
|
+
self.logger.debug("Temporary feature maps deleted to release memory")
|
|
340
|
+
|
|
259
341
|
|
|
260
342
|
def find_ms2(self, **kwargs):
|
|
261
343
|
"""
|
|
@@ -288,7 +370,7 @@ def find_ms2(self, **kwargs):
|
|
|
288
370
|
# end of parameter initialization
|
|
289
371
|
|
|
290
372
|
# Store parameters in the Study object
|
|
291
|
-
self.
|
|
373
|
+
self.update_history(["find_ms2"], params.to_dict())
|
|
292
374
|
self.logger.debug("Parameters stored to find_ms2")
|
|
293
375
|
|
|
294
376
|
data = []
|
|
@@ -469,7 +551,7 @@ def _integrate_chrom_impl(self, **kwargs):
|
|
|
469
551
|
# end of parameter initialization
|
|
470
552
|
|
|
471
553
|
# Store parameters in the Study object
|
|
472
|
-
self.
|
|
554
|
+
self.update_history(["integrate_chrom"], params.to_dict())
|
|
473
555
|
self.logger.debug("Parameters stored to integrate_chrom")
|
|
474
556
|
|
|
475
557
|
# Get parameter values for use in the method
|
|
@@ -687,7 +769,7 @@ def integrate(self, **kwargs):
|
|
|
687
769
|
# end of parameter initialization
|
|
688
770
|
|
|
689
771
|
# Store parameters in the Study object
|
|
690
|
-
self.
|
|
772
|
+
self.update_history(["integrate"], params.to_dict())
|
|
691
773
|
self.logger.debug("Parameters stored to integrate")
|
|
692
774
|
|
|
693
775
|
# Call the original integrate_chrom function with extracted parameters
|
|
@@ -787,10 +869,22 @@ def _align_pose_clustering(study_obj, fmaps, params):
|
|
|
787
869
|
and study_obj.samples_df.row(index, named=True)["sample_type"] == "blank"
|
|
788
870
|
):
|
|
789
871
|
continue
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
872
|
+
|
|
873
|
+
# Skip feature maps with insufficient data points for alignment
|
|
874
|
+
if fm.size() < 2:
|
|
875
|
+
sample_name = study_obj.samples_df.row(index, named=True)["sample_name"]
|
|
876
|
+
study_obj.logger.warning(f"Skipping alignment for sample '{sample_name}' - insufficient features ({fm.size()} features)")
|
|
877
|
+
continue
|
|
878
|
+
|
|
879
|
+
try:
|
|
880
|
+
trafo = oms.TransformationDescription()
|
|
881
|
+
aligner.align(fm, trafo)
|
|
882
|
+
transformer = oms.MapAlignmentTransformer()
|
|
883
|
+
transformer.transformRetentionTimes(fm, trafo, True)
|
|
884
|
+
except RuntimeError as e:
|
|
885
|
+
sample_name = study_obj.samples_df.row(index, named=True)["sample_name"]
|
|
886
|
+
study_obj.logger.warning(f"Failed to align sample '{sample_name}': {e}")
|
|
887
|
+
continue
|
|
794
888
|
|
|
795
889
|
study_obj.alignment_ref_index = ref_index
|
|
796
890
|
|
masster/study/save.py
CHANGED
|
@@ -59,13 +59,16 @@ def save(self, filename=None, add_timestamp=True, compress=False):
|
|
|
59
59
|
|
|
60
60
|
# Use compressed mode for large datasets
|
|
61
61
|
if compress:
|
|
62
|
-
|
|
62
|
+
from masster.study.h5 import _save_study5_compressed
|
|
63
|
+
_save_study5_compressed(self, filename)
|
|
63
64
|
else:
|
|
64
|
-
|
|
65
|
+
from masster.study.h5 import _save_study5
|
|
66
|
+
_save_study5(self, filename)
|
|
65
67
|
|
|
66
68
|
if self.consensus_map is not None:
|
|
67
69
|
# save the features as a separate file
|
|
68
|
-
|
|
70
|
+
from masster.study.save import _save_consensusXML
|
|
71
|
+
_save_consensusXML(self, filename=filename.replace(".study5", ".consensusXML"))
|
|
69
72
|
self.filename = filename
|
|
70
73
|
|
|
71
74
|
|
|
@@ -211,4 +214,5 @@ def save_consensus(self, **kwargs):
|
|
|
211
214
|
if self.consensus_map is None:
|
|
212
215
|
self.logger.error("No consensus map found.")
|
|
213
216
|
return
|
|
214
|
-
|
|
217
|
+
from masster.study.save import _save_consensusXML
|
|
218
|
+
_save_consensusXML(self, **kwargs)
|
masster/study/study.py
CHANGED
|
@@ -52,36 +52,26 @@ import sys
|
|
|
52
52
|
import polars as pl
|
|
53
53
|
|
|
54
54
|
# Study-specific imports
|
|
55
|
-
from masster.study.
|
|
56
|
-
from masster.study.h5 import _save_study5
|
|
57
|
-
from masster.study.h5 import _save_study5_compressed
|
|
58
|
-
from masster.study.h5 import _load_ms1
|
|
55
|
+
from masster.study.analysis import analyze_umap
|
|
59
56
|
from masster.study.helpers import _get_consensus_uids
|
|
60
|
-
from masster.study.helpers import
|
|
61
|
-
from masster.study.helpers import
|
|
62
|
-
from masster.study.helpers import _ensure_features_df_schema_order
|
|
57
|
+
from masster.study.helpers import _get_features_uids
|
|
58
|
+
from masster.study.helpers import _get_samples_uids
|
|
63
59
|
from masster.study.helpers import compress
|
|
64
|
-
from masster.study.helpers import compress_features
|
|
65
|
-
from masster.study.helpers import compress_ms2
|
|
66
|
-
from masster.study.helpers import compress_chrom
|
|
67
|
-
from masster.study.helpers import restore_features
|
|
68
|
-
from masster.study.helpers import restore_chrom
|
|
69
|
-
from masster.study.helpers import restore_ms2
|
|
70
60
|
from masster.study.helpers import decompress
|
|
71
61
|
from masster.study.helpers import fill_reset
|
|
72
62
|
from masster.study.helpers import get_chrom
|
|
73
|
-
from masster.study.helpers import
|
|
63
|
+
from masster.study.helpers import get_samples
|
|
74
64
|
from masster.study.helpers import get_consensus
|
|
75
65
|
from masster.study.helpers import get_consensus_matches
|
|
76
66
|
from masster.study.helpers import get_consensus_matrix
|
|
77
67
|
from masster.study.helpers import get_orphans
|
|
68
|
+
from masster.study.helpers import get_sample_stats
|
|
78
69
|
from masster.study.helpers import get_gaps_matrix
|
|
79
70
|
from masster.study.helpers import get_gaps_stats
|
|
80
71
|
from masster.study.helpers import align_reset
|
|
81
|
-
from masster.study.helpers import
|
|
82
|
-
from masster.study.helpers import
|
|
83
|
-
from masster.study.helpers import
|
|
84
|
-
from masster.study.helpers import sample_color_reset
|
|
72
|
+
from masster.study.helpers import set_study_folder
|
|
73
|
+
from masster.study.helpers import set_samples_source
|
|
74
|
+
from masster.study.helpers import set_samples_color
|
|
85
75
|
from masster.study.helpers import sample_name_replace
|
|
86
76
|
from masster.study.helpers import sample_name_reset
|
|
87
77
|
from masster.study.helpers import samples_select
|
|
@@ -94,61 +84,34 @@ from masster.study.helpers import consensus_filter
|
|
|
94
84
|
from masster.study.helpers import consensus_delete
|
|
95
85
|
from masster.study.load import add
|
|
96
86
|
from masster.study.load import add_sample
|
|
97
|
-
from masster.study.load import _add_samples_batch
|
|
98
|
-
from masster.study.load import _add_sample_optimized
|
|
99
|
-
from masster.study.load import _add_sample_standard
|
|
100
|
-
from masster.study.load import _sample_color_reset_optimized
|
|
101
|
-
from masster.study.load import fill_single
|
|
102
87
|
from masster.study.load import fill
|
|
103
|
-
from masster.study.load import _process_sample_for_parallel_fill
|
|
104
|
-
from masster.study.load import _get_missing_consensus_sample_combinations
|
|
105
88
|
from masster.study.load import load
|
|
106
|
-
from masster.study.load import
|
|
107
|
-
from masster.study.load import load_features
|
|
108
|
-
from masster.study.load import sanitize
|
|
89
|
+
from masster.study.load import _load_features
|
|
109
90
|
from masster.study.plot import plot_alignment
|
|
110
91
|
from masster.study.plot import plot_consensus_2d
|
|
111
92
|
from masster.study.plot import plot_samples_2d
|
|
112
93
|
from masster.study.plot import plot_consensus_stats
|
|
113
94
|
from masster.study.plot import plot_chrom
|
|
114
|
-
from masster.study.plot import
|
|
95
|
+
from masster.study.plot import plot_samples_pca
|
|
96
|
+
from masster.study.plot import plot_samples_umap
|
|
115
97
|
from masster.study.plot import plot_bpc
|
|
116
98
|
from masster.study.plot import plot_tic
|
|
117
99
|
from masster.study.plot import plot_eic
|
|
118
100
|
from masster.study.plot import plot_rt_correction
|
|
119
101
|
from masster.study.processing import align
|
|
120
102
|
from masster.study.merge import merge
|
|
121
|
-
from masster.study.merge import _reset_consensus_data
|
|
122
|
-
from masster.study.merge import _extract_consensus_features
|
|
123
|
-
from masster.study.merge import _perform_adduct_grouping
|
|
124
|
-
from masster.study.merge import _consensus_cleanup
|
|
125
|
-
from masster.study.merge import _identify_adduct_by_mass_shift
|
|
126
|
-
from masster.study.merge import _finalize_merge
|
|
127
|
-
from masster.study.merge import _count_tight_clusters
|
|
128
103
|
from masster.study.processing import integrate
|
|
129
104
|
from masster.study.processing import find_ms2
|
|
130
105
|
from masster.study.processing import find_iso
|
|
131
106
|
from masster.study.processing import reset_iso
|
|
132
|
-
from masster.study.parameters import
|
|
107
|
+
from masster.study.parameters import update_history
|
|
133
108
|
from masster.study.parameters import get_parameters
|
|
134
109
|
from masster.study.parameters import update_parameters
|
|
135
110
|
from masster.study.parameters import get_parameters_property
|
|
136
111
|
from masster.study.parameters import set_parameters_property
|
|
137
|
-
from masster.study.save import save, save_consensus,
|
|
138
|
-
from masster.study.export import
|
|
139
|
-
export_mgf,
|
|
140
|
-
export_mztab,
|
|
141
|
-
export_xlsx,
|
|
142
|
-
export_parquet,
|
|
143
|
-
_get_mgf_df,
|
|
144
|
-
)
|
|
112
|
+
from masster.study.save import save, save_consensus, save_samples
|
|
113
|
+
from masster.study.export import export_mgf, export_mztab, export_xlsx, export_parquet
|
|
145
114
|
from masster.study.id import lib_load, identify, get_id, id_reset, lib_reset
|
|
146
|
-
from masster.study.id import (
|
|
147
|
-
_get_adducts,
|
|
148
|
-
_calculate_formula_mass_shift,
|
|
149
|
-
_format_adduct_name,
|
|
150
|
-
_parse_element_counts,
|
|
151
|
-
)
|
|
152
115
|
|
|
153
116
|
from masster.logger import MassterLogger
|
|
154
117
|
from masster.study.defaults.study_def import study_defaults
|
|
@@ -253,8 +216,24 @@ class Study:
|
|
|
253
216
|
For backward compatibility, original signature is supported:
|
|
254
217
|
Study(folder=..., label=..., log_level=..., log_label=..., log_sink=...)
|
|
255
218
|
"""
|
|
256
|
-
#
|
|
219
|
+
# ===== PARAMETER INITIALIZATION =====
|
|
220
|
+
auto_load_filename = self._init_parameters(filename, kwargs)
|
|
221
|
+
|
|
222
|
+
# ===== DATA STRUCTURES INITIALIZATION =====
|
|
223
|
+
self._init_data_structures()
|
|
224
|
+
|
|
225
|
+
# ===== LOGGER INITIALIZATION =====
|
|
226
|
+
self._init_logger()
|
|
227
|
+
|
|
228
|
+
# ===== AUTO-LOAD FILE IF PROVIDED =====
|
|
229
|
+
if auto_load_filename is not None:
|
|
230
|
+
self.load(filename=auto_load_filename)
|
|
231
|
+
|
|
232
|
+
# ===== SAMPLE CACHE =====
|
|
233
|
+
self._samples_cache = {}
|
|
257
234
|
|
|
235
|
+
def _init_parameters(self, filename, kwargs):
|
|
236
|
+
"""Initialize parameters and handle filename for auto-loading."""
|
|
258
237
|
# Handle filename parameter for automatic loading
|
|
259
238
|
auto_load_filename = None
|
|
260
239
|
if filename is not None:
|
|
@@ -281,13 +260,11 @@ class Study:
|
|
|
281
260
|
if hasattr(params, key):
|
|
282
261
|
params.set(key, value, validate=True)
|
|
283
262
|
|
|
284
|
-
#
|
|
285
|
-
self.filename = None
|
|
286
|
-
|
|
287
|
-
# Store parameter instance for method access
|
|
263
|
+
# Store parameter instance and initialize history
|
|
264
|
+
self.filename = None # Keeps a pointer to study5 whenever it's saved or loaded
|
|
288
265
|
self.parameters = params
|
|
289
266
|
self.history = {}
|
|
290
|
-
self.
|
|
267
|
+
self.update_history(["study"], params.to_dict())
|
|
291
268
|
|
|
292
269
|
# Set instance attributes (ensure proper string values for logger)
|
|
293
270
|
self.folder = params.folder
|
|
@@ -301,10 +278,15 @@ class Study:
|
|
|
301
278
|
self.log_label = params.log_label + " | " if params.log_label else ""
|
|
302
279
|
self.log_sink = params.log_sink
|
|
303
280
|
|
|
281
|
+
# Create folder if it doesn't exist
|
|
304
282
|
if self.folder is not None and not os.path.exists(self.folder):
|
|
305
|
-
# create the folder if it does not exist
|
|
306
283
|
os.makedirs(self.folder)
|
|
284
|
+
|
|
285
|
+
return auto_load_filename
|
|
307
286
|
|
|
287
|
+
def _init_data_structures(self):
|
|
288
|
+
"""Initialize all data structures used by the Study."""
|
|
289
|
+
# Sample information DataFrame
|
|
308
290
|
self.samples_df = pl.DataFrame(
|
|
309
291
|
{
|
|
310
292
|
"sample_uid": [],
|
|
@@ -337,21 +319,24 @@ class Study:
|
|
|
337
319
|
"num_ms2": pl.Int64,
|
|
338
320
|
},
|
|
339
321
|
)
|
|
322
|
+
|
|
323
|
+
# Feature-related data structures
|
|
340
324
|
self.features_maps = []
|
|
341
325
|
self.features_df = pl.DataFrame()
|
|
326
|
+
|
|
327
|
+
# Consensus-related data structures
|
|
342
328
|
self.consensus_ms2 = pl.DataFrame()
|
|
343
329
|
self.consensus_df = pl.DataFrame()
|
|
344
330
|
self.consensus_map = None
|
|
345
331
|
self.consensus_mapping_df = pl.DataFrame()
|
|
346
332
|
self.alignment_ref_index = None
|
|
347
333
|
|
|
348
|
-
# Library
|
|
349
|
-
self.lib_df = pl.DataFrame()
|
|
334
|
+
# Library and identification data structures
|
|
335
|
+
self.lib_df = pl.DataFrame() # populated by lib_load
|
|
336
|
+
self.id_df = pl.DataFrame() # populated by identify
|
|
350
337
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
# Initialize independent logger
|
|
338
|
+
def _init_logger(self):
|
|
339
|
+
"""Initialize the logger for this Study instance."""
|
|
355
340
|
self.logger = MassterLogger(
|
|
356
341
|
instance_type="study",
|
|
357
342
|
level=self.log_level.upper(),
|
|
@@ -361,85 +346,68 @@ class Study:
|
|
|
361
346
|
self.logger.debug(f"Study folder: {self.folder}")
|
|
362
347
|
self.logger.debug(f"Polarity: {self.polarity}")
|
|
363
348
|
|
|
364
|
-
# Auto-load study file if filename was provided
|
|
365
|
-
if auto_load_filename is not None:
|
|
366
|
-
self.load(filename=auto_load_filename)
|
|
367
|
-
|
|
368
|
-
# cache for Sample instances created/loaded by this Study
|
|
369
|
-
self._samples_cache = {}
|
|
370
|
-
|
|
371
|
-
# ===== ATTACH MODULE FUNCTIONS AS CLASS METHODS =====
|
|
372
|
-
|
|
373
349
|
# === File I/O Operations ===
|
|
374
350
|
load = load
|
|
375
351
|
save = save
|
|
376
352
|
save_consensus = save_consensus
|
|
377
353
|
save_samples = save_samples
|
|
378
|
-
|
|
379
|
-
set_folder = set_folder
|
|
354
|
+
set_study_folder = set_study_folder
|
|
380
355
|
|
|
381
356
|
# === Sample Management ===
|
|
382
357
|
add = add
|
|
383
|
-
add_folder = add # backward compatibility alias
|
|
384
358
|
add_sample = add_sample
|
|
385
359
|
|
|
386
360
|
# === Core Processing Operations ===
|
|
387
361
|
align = align
|
|
388
|
-
merge = merge
|
|
389
|
-
|
|
362
|
+
merge = merge
|
|
363
|
+
|
|
390
364
|
find_ms2 = find_ms2
|
|
391
365
|
find_iso = find_iso
|
|
392
366
|
reset_iso = reset_iso
|
|
393
367
|
iso_reset = reset_iso
|
|
394
368
|
integrate = integrate
|
|
395
|
-
|
|
369
|
+
|
|
396
370
|
fill = fill
|
|
397
|
-
|
|
398
|
-
fill_single = fill_single
|
|
399
|
-
fill_chrom_single = fill_single # Backward compatibility alias
|
|
371
|
+
|
|
400
372
|
# === Data Retrieval and Access ===
|
|
401
373
|
get_consensus = get_consensus
|
|
402
374
|
get_chrom = get_chrom
|
|
403
|
-
|
|
375
|
+
get_samples = get_samples
|
|
404
376
|
get_consensus_matches = get_consensus_matches
|
|
405
377
|
get_consensus_matrix = get_consensus_matrix
|
|
406
378
|
get_gaps_matrix = get_gaps_matrix
|
|
407
379
|
get_gaps_stats = get_gaps_stats
|
|
408
380
|
get_orphans = get_orphans
|
|
381
|
+
get_sample_stats = get_sample_stats
|
|
409
382
|
|
|
410
383
|
# === Data Selection and Filtering ===
|
|
411
384
|
samples_select = samples_select
|
|
412
385
|
samples_delete = samples_delete
|
|
386
|
+
|
|
413
387
|
features_select = features_select
|
|
414
388
|
features_filter = features_filter
|
|
415
389
|
features_delete = features_delete
|
|
416
390
|
consensus_select = consensus_select
|
|
417
391
|
consensus_filter = consensus_filter
|
|
418
392
|
consensus_delete = consensus_delete
|
|
419
|
-
# Backward compatibility aliases
|
|
420
|
-
filter_consensus = consensus_filter
|
|
421
|
-
select_consensus = consensus_select
|
|
422
|
-
filter_features = features_filter
|
|
423
|
-
select_features = features_select
|
|
424
|
-
consensus_find = merge
|
|
425
|
-
|
|
426
|
-
# === Sample Metadata and Styling ===
|
|
427
|
-
set_source = set_source
|
|
428
|
-
sample_color = sample_color
|
|
429
|
-
sample_color_reset = sample_color_reset
|
|
430
|
-
reset_sample_color = sample_color_reset
|
|
431
|
-
name_replace = sample_name_replace
|
|
432
|
-
name_reset = sample_name_reset
|
|
433
|
-
reset_name = sample_name_reset
|
|
434
393
|
|
|
394
|
+
# === Sample Metadata and Styling ===
|
|
395
|
+
set_samples_source = set_samples_source
|
|
396
|
+
set_samples_color = set_samples_color
|
|
397
|
+
|
|
398
|
+
samples_name_replace = sample_name_replace
|
|
399
|
+
samples_name_reset = sample_name_reset
|
|
400
|
+
|
|
401
|
+
# Backward compatibility aliases for renamed methods
|
|
402
|
+
set_folder = set_study_folder
|
|
403
|
+
set_source = set_samples_source
|
|
404
|
+
#sample_color = set_samples_color
|
|
405
|
+
#get_sample = get_samples
|
|
406
|
+
#load_features = _load_features
|
|
407
|
+
store_history = update_history
|
|
408
|
+
|
|
435
409
|
# === Data Compression and Storage ===
|
|
436
410
|
compress = compress
|
|
437
|
-
compress_features = compress_features
|
|
438
|
-
compress_ms2 = compress_ms2
|
|
439
|
-
compress_chrom = compress_chrom
|
|
440
|
-
restore_features = restore_features
|
|
441
|
-
restore_chrom = restore_chrom
|
|
442
|
-
restore_ms2 = restore_ms2
|
|
443
411
|
decompress = decompress
|
|
444
412
|
|
|
445
413
|
# === Reset Operations ===
|
|
@@ -453,12 +421,16 @@ class Study:
|
|
|
453
421
|
plot_chrom = plot_chrom
|
|
454
422
|
plot_consensus_2d = plot_consensus_2d
|
|
455
423
|
plot_consensus_stats = plot_consensus_stats
|
|
456
|
-
|
|
424
|
+
plot_samples_pca = plot_samples_pca
|
|
425
|
+
plot_samples_umap = plot_samples_umap
|
|
457
426
|
plot_samples_2d = plot_samples_2d
|
|
458
427
|
plot_bpc = plot_bpc
|
|
459
428
|
plot_rt_correction = plot_rt_correction
|
|
460
429
|
plot_tic = plot_tic
|
|
461
430
|
plot_eic = plot_eic
|
|
431
|
+
|
|
432
|
+
# === Analysis Operations ===
|
|
433
|
+
analyze_umap = analyze_umap
|
|
462
434
|
|
|
463
435
|
# === Export Operations ===
|
|
464
436
|
export_mgf = export_mgf
|
|
@@ -476,45 +448,23 @@ class Study:
|
|
|
476
448
|
reset_lib = lib_reset
|
|
477
449
|
|
|
478
450
|
# === Parameter Management ===
|
|
479
|
-
|
|
451
|
+
update_history = update_history
|
|
480
452
|
get_parameters = get_parameters
|
|
481
453
|
update_parameters = update_parameters
|
|
482
454
|
get_parameters_property = get_parameters_property
|
|
483
455
|
set_parameters_property = set_parameters_property
|
|
484
456
|
|
|
485
457
|
# === Private/Internal Methods ===
|
|
486
|
-
_add_samples_batch = _add_samples_batch
|
|
487
|
-
_add_sample_optimized = _add_sample_optimized
|
|
488
|
-
_add_sample_standard = _add_sample_standard
|
|
489
|
-
_sample_color_reset_optimized = _sample_color_reset_optimized
|
|
490
|
-
_load_study5 = _load_study5
|
|
491
|
-
_save_study5 = _save_study5
|
|
492
|
-
_save_study5_compressed = _save_study5_compressed
|
|
493
|
-
_load_ms1 = _load_ms1
|
|
494
458
|
_get_consensus_uids = _get_consensus_uids
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
)
|
|
502
|
-
_load_consensusXML = _load_consensusXML
|
|
503
|
-
load_features = load_features
|
|
504
|
-
_save_consensusXML = _save_consensusXML
|
|
505
|
-
_get_mgf_df = _get_mgf_df
|
|
506
|
-
_get_adducts = _get_adducts
|
|
507
|
-
_calculate_formula_mass_shift = _calculate_formula_mass_shift
|
|
508
|
-
_format_adduct_name = _format_adduct_name
|
|
509
|
-
_parse_element_counts = _parse_element_counts
|
|
459
|
+
_get_features_uids = _get_features_uids
|
|
460
|
+
_get_samples_uids = _get_samples_uids
|
|
461
|
+
_load_features = _load_features
|
|
462
|
+
|
|
463
|
+
# Note: _load_study5 and _save_study5 are not exposed as class methods
|
|
464
|
+
# They are used internally by load() and save() methods only
|
|
510
465
|
|
|
511
466
|
# === Merge Helper Methods ===
|
|
512
|
-
|
|
513
|
-
_extract_consensus_features = _extract_consensus_features
|
|
514
|
-
_perform_adduct_grouping = _perform_adduct_grouping
|
|
515
|
-
_consensus_cleanup = _consensus_cleanup
|
|
516
|
-
_identify_adduct_by_mass_shift = _identify_adduct_by_mass_shift
|
|
517
|
-
_finalize_merge = _finalize_merge
|
|
467
|
+
# (All merge helper methods are now internal to the merge module)
|
|
518
468
|
|
|
519
469
|
# === Default Parameters ===
|
|
520
470
|
study_defaults = study_defaults
|
|
@@ -627,16 +577,24 @@ class Study:
|
|
|
627
577
|
'find_consensus', # alias for merge
|
|
628
578
|
'integrate_chrom', # alias for integrate
|
|
629
579
|
'fill_chrom', # alias for fill
|
|
630
|
-
'fill_chrom_single', # alias for fill_single
|
|
631
580
|
'filter_consensus', # alias for consensus_filter
|
|
632
581
|
'select_consensus', # alias for consensus_select
|
|
633
582
|
'filter_features', # alias for features_filter
|
|
634
583
|
'select_features', # alias for features_select
|
|
635
584
|
'consensus_find', # alias for merge
|
|
585
|
+
# Backward compatibility for renamed methods
|
|
586
|
+
'set_folder', # alias for set_study_folder
|
|
587
|
+
'set_source', # alias for set_samples_source
|
|
588
|
+
'sample_color', # alias for set_samples_color
|
|
589
|
+
'get_sample', # alias for get_samples
|
|
590
|
+
'load_features', # alias for _load_features
|
|
591
|
+
'store_history', # alias for update_history
|
|
592
|
+
'sample_color_reset', # alias for set_samples_color(by=None)
|
|
593
|
+
'reset_sample_color', # alias for sample_color_reset
|
|
636
594
|
}
|
|
637
595
|
|
|
638
596
|
# Get all attributes from the class
|
|
639
|
-
all_attrs = set()
|
|
597
|
+
all_attrs: set[str] = set()
|
|
640
598
|
|
|
641
599
|
# Add attributes from the class and all its bases
|
|
642
600
|
for cls in self.__class__.__mro__:
|
|
@@ -895,8 +853,9 @@ class Study:
|
|
|
895
853
|
tight_clusters_count = 0
|
|
896
854
|
if consensus_df_len > 0:
|
|
897
855
|
try:
|
|
856
|
+
from masster.study.merge import _count_tight_clusters
|
|
898
857
|
tight_clusters_count = _count_tight_clusters(self, mz_tol=0.04, rt_tol=0.3)
|
|
899
|
-
except Exception
|
|
858
|
+
except Exception:
|
|
900
859
|
# If tight clusters calculation fails, just use 0
|
|
901
860
|
tight_clusters_count = 0
|
|
902
861
|
|
|
@@ -952,7 +911,6 @@ class Study:
|
|
|
952
911
|
|
|
953
912
|
print(summary)
|
|
954
913
|
|
|
955
|
-
|
|
956
914
|
if __name__ == "__main__":
|
|
957
915
|
# This block is executed when the script is run directly
|
|
958
916
|
pass
|
masster/wizard/wizard.py
CHANGED
|
@@ -127,7 +127,7 @@ class wizard_def:
|
|
|
127
127
|
|
|
128
128
|
# === Feature Detection ===
|
|
129
129
|
chrom_fwhm: float = 0.5
|
|
130
|
-
|
|
130
|
+
noise: float = 50.0
|
|
131
131
|
chrom_peak_snr: float = 5.0
|
|
132
132
|
tol_ppm: float = 10.0
|
|
133
133
|
detector_type: str = "unknown" # Detected detector type ("orbitrap", "quadrupole", "unknown")
|
|
@@ -307,15 +307,15 @@ class Wizard:
|
|
|
307
307
|
"""
|
|
308
308
|
try:
|
|
309
309
|
# Find first file
|
|
310
|
-
for extension in ['.wiff', '.raw', '.mzML'
|
|
310
|
+
for extension in ['.wiff', '.raw', '.mzML']:
|
|
311
311
|
pattern = f"**/*{extension}" if True else f"*{extension}" # search_subfolders=True
|
|
312
312
|
files = list(self.source_path.rglob(pattern))
|
|
313
313
|
if files:
|
|
314
314
|
first_file = files[0]
|
|
315
315
|
break
|
|
316
316
|
else:
|
|
317
|
-
return
|
|
318
|
-
|
|
317
|
+
return 'positive'
|
|
318
|
+
|
|
319
319
|
# Only implement for .wiff files initially (most common format)
|
|
320
320
|
if first_file.suffix.lower() == '.wiff':
|
|
321
321
|
from masster.sample.load import _wiff_to_dict
|
|
@@ -337,7 +337,7 @@ class Wizard:
|
|
|
337
337
|
# Silently fall back to default if inference fails
|
|
338
338
|
pass
|
|
339
339
|
|
|
340
|
-
return
|
|
340
|
+
return 'positive'
|
|
341
341
|
|
|
342
342
|
@property
|
|
343
343
|
def polarity(self) -> str:
|
|
@@ -543,9 +543,9 @@ class Wizard:
|
|
|
543
543
|
' sample = Sample(log_label=sample_name)',
|
|
544
544
|
' sample.load(filename=str(raw_file))',
|
|
545
545
|
' sample.find_features(',
|
|
546
|
-
' noise=PARAMS[\'
|
|
547
|
-
' chrom_fwhm=PARAMS[\'
|
|
548
|
-
' chrom_peak_snr=PARAMS[\'
|
|
546
|
+
' noise=PARAMS[\'noise\'],',
|
|
547
|
+
' chrom_fwhm=PARAMS[\'chrom_fwhm\'],',
|
|
548
|
+
' chrom_peak_snr=PARAMS[\'chrom_peak_snr\']',
|
|
549
549
|
' )',
|
|
550
550
|
' sample.find_adducts(adducts=PARAMS[\'adducts\'])',
|
|
551
551
|
' sample.find_ms2()',
|