masster 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +27 -27
- masster/_version.py +17 -17
- masster/chromatogram.py +497 -503
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
- masster/logger.py +318 -244
- masster/sample/__init__.py +9 -9
- masster/sample/defaults/__init__.py +15 -15
- masster/sample/defaults/find_adducts_def.py +325 -325
- masster/sample/defaults/find_features_def.py +366 -366
- masster/sample/defaults/find_ms2_def.py +285 -285
- masster/sample/defaults/get_spectrum_def.py +314 -318
- masster/sample/defaults/sample_def.py +374 -378
- masster/sample/h5.py +1321 -1297
- masster/sample/helpers.py +833 -364
- masster/sample/lib.py +762 -0
- masster/sample/load.py +1220 -1187
- masster/sample/parameters.py +131 -131
- masster/sample/plot.py +1610 -1622
- masster/sample/processing.py +1402 -1416
- masster/sample/quant.py +209 -0
- masster/sample/sample.py +391 -387
- masster/sample/sample5_schema.json +181 -181
- masster/sample/save.py +737 -736
- masster/sample/sciex.py +1213 -0
- masster/spectrum.py +1287 -1319
- masster/study/__init__.py +9 -9
- masster/study/defaults/__init__.py +21 -19
- masster/study/defaults/align_def.py +267 -267
- masster/study/defaults/export_def.py +41 -40
- masster/study/defaults/fill_chrom_def.py +264 -264
- masster/study/defaults/fill_def.py +260 -0
- masster/study/defaults/find_consensus_def.py +256 -256
- masster/study/defaults/find_ms2_def.py +163 -163
- masster/study/defaults/integrate_chrom_def.py +225 -225
- masster/study/defaults/integrate_def.py +221 -0
- masster/study/defaults/merge_def.py +256 -0
- masster/study/defaults/study_def.py +272 -269
- masster/study/export.py +674 -287
- masster/study/h5.py +1398 -886
- masster/study/helpers.py +1650 -433
- masster/study/helpers_optimized.py +317 -0
- masster/study/load.py +1201 -1078
- masster/study/parameters.py +99 -99
- masster/study/plot.py +632 -645
- masster/study/processing.py +1057 -1046
- masster/study/save.py +149 -134
- masster/study/study.py +606 -522
- masster/study/study5_schema.json +247 -241
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/METADATA +15 -10
- masster-0.3.0.dist-info/RECORD +59 -0
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/licenses/LICENSE +661 -661
- masster-0.2.5.dist-info/RECORD +0 -50
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/WHEEL +0 -0
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/entry_points.txt +0 -0
masster/study/save.py
CHANGED
|
@@ -1,134 +1,149 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
|
|
5
|
-
from datetime import datetime
|
|
6
|
-
|
|
7
|
-
import polars as pl
|
|
8
|
-
import pyopenms as oms
|
|
9
|
-
|
|
10
|
-
from tqdm import tqdm
|
|
11
|
-
|
|
12
|
-
from masster.sample.sample import Sample
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def save(self, filename=None):
|
|
16
|
-
"""
|
|
17
|
-
Save the study to an HDF5 file with proper serialization of complex objects.
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
study: The study object to save
|
|
21
|
-
filename (str, optional): Target file name. If None, uses default.
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
ddaobj.
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
import pyopenms as oms
|
|
9
|
+
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
|
|
12
|
+
from masster.sample.sample import Sample
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def save(self, filename=None, add_timestamp=True, compress=False):
|
|
16
|
+
"""
|
|
17
|
+
Save the study to an HDF5 file with proper serialization of complex objects.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
study: The study object to save
|
|
21
|
+
filename (str, optional): Target file name. If None, uses default.
|
|
22
|
+
add_timestamp (bool, optional): If True, appends timestamp to avoid overwriting.
|
|
23
|
+
Default True for safety (original behavior).
|
|
24
|
+
compress (bool, optional): If True, uses compressed mode and skips
|
|
25
|
+
some heavy columns for maximum speed. Default False.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
if filename is None:
|
|
29
|
+
# save to default file name in folder
|
|
30
|
+
if self.folder is not None:
|
|
31
|
+
filename = os.path.join(self.folder, "data.study5")
|
|
32
|
+
else:
|
|
33
|
+
self.logger.error("either filename or folder must be provided")
|
|
34
|
+
return
|
|
35
|
+
else:
|
|
36
|
+
# check if filename includes any path
|
|
37
|
+
if not os.path.isabs(filename):
|
|
38
|
+
if self.folder is not None:
|
|
39
|
+
filename = os.path.join(self.folder, filename)
|
|
40
|
+
else:
|
|
41
|
+
filename = os.path.join(os.getcwd(), filename)
|
|
42
|
+
|
|
43
|
+
# Add timestamp by default to avoid overwriting (original behavior restored)
|
|
44
|
+
if add_timestamp:
|
|
45
|
+
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
46
|
+
filename = f"{filename.replace('.study5', '')}_{timestamp}.study5"
|
|
47
|
+
|
|
48
|
+
# Log file size information for performance monitoring
|
|
49
|
+
if hasattr(self, 'features_df') and not self.features_df.is_empty():
|
|
50
|
+
feature_count = len(self.features_df)
|
|
51
|
+
sample_count = len(self.samples_df) if hasattr(self, 'samples_df') and not self.samples_df.is_empty() else 0
|
|
52
|
+
self.logger.info(f"Saving study with {sample_count} samples and {feature_count} features to {filename}")
|
|
53
|
+
|
|
54
|
+
# Use compressed mode for large datasets
|
|
55
|
+
if compress:
|
|
56
|
+
self._save_study5_compressed(filename)
|
|
57
|
+
else:
|
|
58
|
+
self._save_study5(filename)
|
|
59
|
+
|
|
60
|
+
if self.consensus_map is not None:
|
|
61
|
+
# save the features as a separate file
|
|
62
|
+
self._save_consensusXML(filename=filename.replace(".study5", ".consensusXML"))
|
|
63
|
+
self.filename = filename
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def save_samples(self, samples=None):
|
|
67
|
+
if samples is None:
|
|
68
|
+
# get all sample_uids from samples_df
|
|
69
|
+
samples = self.samples_df["sample_uid"].to_list()
|
|
70
|
+
|
|
71
|
+
self.logger.info(f"Saving features for {len(samples)} samples...")
|
|
72
|
+
|
|
73
|
+
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
74
|
+
for sample_uid in tqdm(
|
|
75
|
+
samples,
|
|
76
|
+
total=len(samples),
|
|
77
|
+
desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Save samples",
|
|
78
|
+
disable=tdqm_disable,
|
|
79
|
+
):
|
|
80
|
+
# check if sample_uid is in samples_df
|
|
81
|
+
if sample_uid not in self.samples_df.get_column("sample_uid").to_list():
|
|
82
|
+
self.logger.warning(
|
|
83
|
+
f"Sample with uid {sample_uid} not found in samples_df.",
|
|
84
|
+
)
|
|
85
|
+
continue
|
|
86
|
+
# load the mzpkl file
|
|
87
|
+
sample_row = self.samples_df.filter(pl.col("sample_uid") == sample_uid)
|
|
88
|
+
if sample_row.is_empty():
|
|
89
|
+
continue
|
|
90
|
+
ddaobj = Sample(filename=sample_row.row(0, named=True)["sample_path"])
|
|
91
|
+
if "rt_original" not in ddaobj.features_df.columns:
|
|
92
|
+
# add column 'rt_original' with rt values
|
|
93
|
+
ddaobj.features_df = ddaobj.features_df.with_columns(
|
|
94
|
+
pl.col("rt").alias("rt_original"),
|
|
95
|
+
)
|
|
96
|
+
# find the rows in features_df that match the sample_uid
|
|
97
|
+
matching_rows = self.features_df.filter(pl.col("sample_uid") == sample_uid)
|
|
98
|
+
if not matching_rows.is_empty():
|
|
99
|
+
# Update rt values in ddaobj.features_df based on matching_rows
|
|
100
|
+
rt_values = matching_rows["rt"].to_list()
|
|
101
|
+
if len(rt_values) == len(ddaobj.features_df):
|
|
102
|
+
ddaobj.features_df = ddaobj.features_df.with_columns(
|
|
103
|
+
pl.lit(rt_values).alias("rt"),
|
|
104
|
+
)
|
|
105
|
+
# save ddaobj
|
|
106
|
+
ddaobj.save()
|
|
107
|
+
sample_name = sample_row.row(0, named=True)["sample_name"]
|
|
108
|
+
# Find the index of this sample in the original order for features_maps
|
|
109
|
+
sample_index = next(
|
|
110
|
+
(
|
|
111
|
+
i
|
|
112
|
+
for i, row_dict in enumerate(self.samples_df.iter_rows(named=True))
|
|
113
|
+
if row_dict["sample_uid"] == sample_uid
|
|
114
|
+
),
|
|
115
|
+
None,
|
|
116
|
+
)
|
|
117
|
+
if self.folder is not None:
|
|
118
|
+
filename = os.path.join(
|
|
119
|
+
self.folder,
|
|
120
|
+
sample_name + ".featureXML",
|
|
121
|
+
)
|
|
122
|
+
else:
|
|
123
|
+
filename = os.path.join(
|
|
124
|
+
os.getcwd(),
|
|
125
|
+
sample_name + ".featureXML",
|
|
126
|
+
)
|
|
127
|
+
fh = oms.FeatureXMLFile()
|
|
128
|
+
if sample_index is not None and sample_index < len(self.features_maps):
|
|
129
|
+
fh.store(filename, self.features_maps[sample_index])
|
|
130
|
+
|
|
131
|
+
self.logger.debug("All samples saved successfully.")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _save_consensusXML(self, filename: str):
|
|
135
|
+
if self.consensus_map is None:
|
|
136
|
+
self.logger.error("No consensus map found.")
|
|
137
|
+
return
|
|
138
|
+
|
|
139
|
+
fh = oms.ConsensusXMLFile()
|
|
140
|
+
fh.store(filename, self.consensus_map)
|
|
141
|
+
self.logger.info(f"Saved consensus map to {filename}")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def save_consensus(self, **kwargs):
|
|
145
|
+
"""Save the consensus map to a file."""
|
|
146
|
+
if self.consensus_map is None:
|
|
147
|
+
self.logger.error("No consensus map found.")
|
|
148
|
+
return
|
|
149
|
+
self._save_consensusXML(**kwargs)
|