masster 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (55) hide show
  1. masster/__init__.py +27 -27
  2. masster/_version.py +17 -17
  3. masster/chromatogram.py +497 -503
  4. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
  5. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
  6. masster/logger.py +318 -244
  7. masster/sample/__init__.py +9 -9
  8. masster/sample/defaults/__init__.py +15 -15
  9. masster/sample/defaults/find_adducts_def.py +325 -325
  10. masster/sample/defaults/find_features_def.py +366 -366
  11. masster/sample/defaults/find_ms2_def.py +285 -285
  12. masster/sample/defaults/get_spectrum_def.py +314 -318
  13. masster/sample/defaults/sample_def.py +374 -378
  14. masster/sample/h5.py +1321 -1297
  15. masster/sample/helpers.py +833 -364
  16. masster/sample/lib.py +762 -0
  17. masster/sample/load.py +1220 -1187
  18. masster/sample/parameters.py +131 -131
  19. masster/sample/plot.py +1685 -1622
  20. masster/sample/processing.py +1402 -1416
  21. masster/sample/quant.py +209 -0
  22. masster/sample/sample.py +393 -387
  23. masster/sample/sample5_schema.json +181 -181
  24. masster/sample/save.py +737 -736
  25. masster/sample/sciex.py +1213 -0
  26. masster/spectrum.py +1287 -1319
  27. masster/study/__init__.py +9 -9
  28. masster/study/defaults/__init__.py +21 -19
  29. masster/study/defaults/align_def.py +267 -267
  30. masster/study/defaults/export_def.py +41 -40
  31. masster/study/defaults/fill_chrom_def.py +264 -264
  32. masster/study/defaults/fill_def.py +260 -0
  33. masster/study/defaults/find_consensus_def.py +256 -256
  34. masster/study/defaults/find_ms2_def.py +163 -163
  35. masster/study/defaults/integrate_chrom_def.py +225 -225
  36. masster/study/defaults/integrate_def.py +221 -0
  37. masster/study/defaults/merge_def.py +256 -0
  38. masster/study/defaults/study_def.py +272 -269
  39. masster/study/export.py +674 -287
  40. masster/study/h5.py +1406 -886
  41. masster/study/helpers.py +1713 -433
  42. masster/study/helpers_optimized.py +317 -0
  43. masster/study/load.py +1231 -1078
  44. masster/study/parameters.py +99 -99
  45. masster/study/plot.py +632 -645
  46. masster/study/processing.py +1057 -1046
  47. masster/study/save.py +161 -134
  48. masster/study/study.py +612 -522
  49. masster/study/study5_schema.json +253 -241
  50. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/METADATA +15 -10
  51. masster-0.3.1.dist-info/RECORD +59 -0
  52. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/licenses/LICENSE +661 -661
  53. masster-0.2.5.dist-info/RECORD +0 -50
  54. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/WHEEL +0 -0
  55. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/entry_points.txt +0 -0
masster/study/save.py CHANGED
@@ -1,134 +1,161 @@
1
- from __future__ import annotations
2
-
3
- import os
4
-
5
- from datetime import datetime
6
-
7
- import polars as pl
8
- import pyopenms as oms
9
-
10
- from tqdm import tqdm
11
-
12
- from masster.sample.sample import Sample
13
-
14
-
15
- def save(self, filename=None):
16
- """
17
- Save the study to an HDF5 file with proper serialization of complex objects.
18
-
19
- Args:
20
- study: The study object to save
21
- filename (str, optional): Target file name. If None, uses default.
22
- """
23
-
24
- if filename is None:
25
- # save to default file name in default_folder
26
- if self.default_folder is not None:
27
- filename = os.path.join(self.default_folder, "data.study5")
28
- else:
29
- self.logger.error("either filename or default_folder must be provided")
30
- return
31
- else:
32
- # check if filename includes any path
33
- if not os.path.isabs(filename):
34
- if self.default_folder is not None:
35
- filename = os.path.join(self.default_folder, filename)
36
- else:
37
- filename = os.path.join(os.getcwd(), filename)
38
-
39
- # if filename exists, append a timestamp to avoid overwriting
40
- #if os.path.exists(filename):
41
- timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
42
- filename = f"{filename.replace('.study5', '')}_{timestamp}.study5"
43
-
44
- self._save_study5(filename)
45
-
46
- if self.consensus_map is not None:
47
- # save the features as a separate file
48
- self._save_consensusXML(filename=filename.replace(".study5", ".consensusXML"))
49
-
50
-
51
- def save_samples(self, samples=None):
52
- if samples is None:
53
- # get all sample_uids from samples_df
54
- samples = self.samples_df["sample_uid"].to_list()
55
-
56
- self.logger.info(f"Saving features for {len(samples)} samples...")
57
-
58
- tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
59
- for sample_uid in tqdm(
60
- samples,
61
- total=len(samples),
62
- desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Save samples",
63
- disable=tdqm_disable,
64
- ):
65
- # check if sample_uid is in samples_df
66
- if sample_uid not in self.samples_df.get_column("sample_uid").to_list():
67
- self.logger.warning(
68
- f"Sample with uid {sample_uid} not found in samples_df.",
69
- )
70
- continue
71
- # load the mzpkl file
72
- sample_row = self.samples_df.filter(pl.col("sample_uid") == sample_uid)
73
- if sample_row.is_empty():
74
- continue
75
- ddaobj = Sample(filename=sample_row.row(0, named=True)["sample_path"])
76
- if "rt_original" not in ddaobj.features_df.columns:
77
- # add column 'rt_original' with rt values
78
- ddaobj.features_df = ddaobj.features_df.with_columns(
79
- pl.col("rt").alias("rt_original"),
80
- )
81
- # find the rows in features_df that match the sample_uid
82
- matching_rows = self.features_df.filter(pl.col("sample_uid") == sample_uid)
83
- if not matching_rows.is_empty():
84
- # Update rt values in ddaobj.features_df based on matching_rows
85
- rt_values = matching_rows["rt"].to_list()
86
- if len(rt_values) == len(ddaobj.features_df):
87
- ddaobj.features_df = ddaobj.features_df.with_columns(
88
- pl.lit(rt_values).alias("rt"),
89
- )
90
- # save ddaobj
91
- ddaobj.save()
92
- sample_name = sample_row.row(0, named=True)["sample_name"]
93
- # Find the index of this sample in the original order for features_maps
94
- sample_index = next(
95
- (
96
- i
97
- for i, row_dict in enumerate(self.samples_df.iter_rows(named=True))
98
- if row_dict["sample_uid"] == sample_uid
99
- ),
100
- None,
101
- )
102
- if self.default_folder is not None:
103
- filename = os.path.join(
104
- self.default_folder,
105
- sample_name + ".featureXML",
106
- )
107
- else:
108
- filename = os.path.join(
109
- os.getcwd(),
110
- sample_name + ".featureXML",
111
- )
112
- fh = oms.FeatureXMLFile()
113
- if sample_index is not None and sample_index < len(self.features_maps):
114
- fh.store(filename, self.features_maps[sample_index])
115
-
116
- self.logger.debug("All samples saved successfully.")
117
-
118
-
119
- def _save_consensusXML(self, filename:str):
120
- if self.consensus_map is None:
121
- self.logger.error("No consensus map found.")
122
- return
123
-
124
- fh = oms.ConsensusXMLFile()
125
- fh.store(filename, self.consensus_map)
126
- self.logger.info(f"Saved consensus map to {filename}")
127
-
128
-
129
- def save_consensus(self, **kwargs):
130
- """Save the consensus map to a file."""
131
- if self.consensus_map is None:
132
- self.logger.error("No consensus map found.")
133
- return
134
- self._save_consensusXML(**kwargs)
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ from datetime import datetime
6
+
7
+ import polars as pl
8
+ import pyopenms as oms
9
+
10
+ from tqdm import tqdm
11
+
12
+ from masster.sample.sample import Sample
13
+
14
+
15
+ def save(self, filename=None, add_timestamp=True, compress=False):
16
+ """
17
+ Save the study to an HDF5 file with proper serialization of complex objects.
18
+
19
+ Args:
20
+ study: The study object to save
21
+ filename (str, optional): Target file name. If None, uses default.
22
+ add_timestamp (bool, optional): If True, appends timestamp to avoid overwriting.
23
+ Default True for safety (original behavior).
24
+ compress (bool, optional): If True, uses compressed mode and skips
25
+ some heavy columns for maximum speed. Default False.
26
+ """
27
+
28
+ if filename is None:
29
+ # save to default file name in folder
30
+ if self.folder is not None:
31
+ filename = os.path.join(self.folder, "data.study5")
32
+ else:
33
+ self.logger.error("either filename or folder must be provided")
34
+ return
35
+ else:
36
+ # check if filename includes any path
37
+ if not os.path.isabs(filename):
38
+ if self.folder is not None:
39
+ filename = os.path.join(self.folder, filename)
40
+ else:
41
+ filename = os.path.join(os.getcwd(), filename)
42
+
43
+ # Add timestamp by default to avoid overwriting (original behavior restored)
44
+ if add_timestamp:
45
+ timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
46
+ filename = f"{filename.replace('.study5', '')}_{timestamp}.study5"
47
+
48
+ # Log file size information for performance monitoring
49
+ if hasattr(self, 'features_df') and not self.features_df.is_empty():
50
+ feature_count = len(self.features_df)
51
+ sample_count = len(self.samples_df) if hasattr(self, 'samples_df') and not self.samples_df.is_empty() else 0
52
+ self.logger.info(f"Saving study with {sample_count} samples and {feature_count} features to {filename}")
53
+
54
+ # Use compressed mode for large datasets
55
+ if compress:
56
+ self._save_study5_compressed(filename)
57
+ else:
58
+ self._save_study5(filename)
59
+
60
+ if self.consensus_map is not None:
61
+ # save the features as a separate file
62
+ self._save_consensusXML(filename=filename.replace(".study5", ".consensusXML"))
63
+ self.filename = filename
64
+
65
+
66
+ def save_samples(self, samples=None):
67
+ if samples is None:
68
+ # get all sample_uids from samples_df
69
+ samples = self.samples_df["sample_uid"].to_list()
70
+
71
+ self.logger.info(f"Saving features for {len(samples)} samples...")
72
+
73
+ tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
74
+ for sample_uid in tqdm(
75
+ samples,
76
+ total=len(samples),
77
+ desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Save samples",
78
+ disable=tdqm_disable,
79
+ ):
80
+ # check if sample_uid is in samples_df
81
+ if sample_uid not in self.samples_df.get_column("sample_uid").to_list():
82
+ self.logger.warning(
83
+ f"Sample with uid {sample_uid} not found in samples_df.",
84
+ )
85
+ continue
86
+ # load the mzpkl file
87
+ sample_row = self.samples_df.filter(pl.col("sample_uid") == sample_uid)
88
+ if sample_row.is_empty():
89
+ continue
90
+ ddaobj = Sample(filename=sample_row.row(0, named=True)["sample_path"])
91
+ if "rt_original" not in ddaobj.features_df.columns:
92
+ # add column 'rt_original' with rt values
93
+ ddaobj.features_df = ddaobj.features_df.with_columns(
94
+ pl.col("rt").alias("rt_original"),
95
+ )
96
+ # find the rows in features_df that match the sample_uid
97
+ matching_rows = self.features_df.filter(pl.col("sample_uid") == sample_uid)
98
+ if not matching_rows.is_empty():
99
+ # Update rt values in ddaobj.features_df based on matching_rows
100
+ rt_values = matching_rows["rt"].to_list()
101
+ if len(rt_values) == len(ddaobj.features_df):
102
+ ddaobj.features_df = ddaobj.features_df.with_columns(
103
+ pl.lit(rt_values).alias("rt"),
104
+ )
105
+ # save ddaobj
106
+ ddaobj.save()
107
+ sample_name = sample_row.row(0, named=True)["sample_name"]
108
+ sample_path = sample_row.row(0, named=True)["sample_path"]
109
+
110
+ # Find the index of this sample in the original order for features_maps
111
+ sample_index = next(
112
+ (
113
+ i
114
+ for i, row_dict in enumerate(self.samples_df.iter_rows(named=True))
115
+ if row_dict["sample_uid"] == sample_uid
116
+ ),
117
+ None,
118
+ )
119
+
120
+ # Determine where to save the featureXML file based on sample_path location
121
+ if sample_path.endswith(".sample5"):
122
+ # If sample_path is a .sample5 file, save featureXML in the same directory
123
+ featurexml_filename = sample_path.replace(".sample5", ".featureXML")
124
+ self.logger.debug(f"Saving featureXML alongside .sample5 file: {featurexml_filename}")
125
+ else:
126
+ # Fallback to study folder or current directory (original behavior)
127
+ if self.folder is not None:
128
+ featurexml_filename = os.path.join(
129
+ self.folder,
130
+ sample_name + ".featureXML",
131
+ )
132
+ else:
133
+ featurexml_filename = os.path.join(
134
+ os.getcwd(),
135
+ sample_name + ".featureXML",
136
+ )
137
+ self.logger.debug(f"Saving featureXML to default location: {featurexml_filename}")
138
+
139
+ fh = oms.FeatureXMLFile()
140
+ if sample_index is not None and sample_index < len(self.features_maps):
141
+ fh.store(featurexml_filename, self.features_maps[sample_index])
142
+
143
+ self.logger.debug("All samples saved successfully.")
144
+
145
+
146
+ def _save_consensusXML(self, filename: str):
147
+ if self.consensus_map is None:
148
+ self.logger.error("No consensus map found.")
149
+ return
150
+
151
+ fh = oms.ConsensusXMLFile()
152
+ fh.store(filename, self.consensus_map)
153
+ self.logger.info(f"Saved consensus map to {filename}")
154
+
155
+
156
+ def save_consensus(self, **kwargs):
157
+ """Save the consensus map to a file."""
158
+ if self.consensus_map is None:
159
+ self.logger.error("No consensus map found.")
160
+ return
161
+ self._save_consensusXML(**kwargs)