masster 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (55) hide show
  1. masster/__init__.py +27 -27
  2. masster/_version.py +17 -17
  3. masster/chromatogram.py +497 -503
  4. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
  5. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
  6. masster/logger.py +318 -244
  7. masster/sample/__init__.py +9 -9
  8. masster/sample/defaults/__init__.py +15 -15
  9. masster/sample/defaults/find_adducts_def.py +325 -325
  10. masster/sample/defaults/find_features_def.py +366 -366
  11. masster/sample/defaults/find_ms2_def.py +285 -285
  12. masster/sample/defaults/get_spectrum_def.py +314 -318
  13. masster/sample/defaults/sample_def.py +374 -378
  14. masster/sample/h5.py +1321 -1297
  15. masster/sample/helpers.py +833 -364
  16. masster/sample/lib.py +762 -0
  17. masster/sample/load.py +1220 -1187
  18. masster/sample/parameters.py +131 -131
  19. masster/sample/plot.py +1610 -1622
  20. masster/sample/processing.py +1402 -1416
  21. masster/sample/quant.py +209 -0
  22. masster/sample/sample.py +391 -387
  23. masster/sample/sample5_schema.json +181 -181
  24. masster/sample/save.py +737 -736
  25. masster/sample/sciex.py +1213 -0
  26. masster/spectrum.py +1287 -1319
  27. masster/study/__init__.py +9 -9
  28. masster/study/defaults/__init__.py +21 -19
  29. masster/study/defaults/align_def.py +267 -267
  30. masster/study/defaults/export_def.py +41 -40
  31. masster/study/defaults/fill_chrom_def.py +264 -264
  32. masster/study/defaults/fill_def.py +260 -0
  33. masster/study/defaults/find_consensus_def.py +256 -256
  34. masster/study/defaults/find_ms2_def.py +163 -163
  35. masster/study/defaults/integrate_chrom_def.py +225 -225
  36. masster/study/defaults/integrate_def.py +221 -0
  37. masster/study/defaults/merge_def.py +256 -0
  38. masster/study/defaults/study_def.py +272 -269
  39. masster/study/export.py +674 -287
  40. masster/study/h5.py +1398 -886
  41. masster/study/helpers.py +1650 -433
  42. masster/study/helpers_optimized.py +317 -0
  43. masster/study/load.py +1201 -1078
  44. masster/study/parameters.py +99 -99
  45. masster/study/plot.py +632 -645
  46. masster/study/processing.py +1057 -1046
  47. masster/study/save.py +149 -134
  48. masster/study/study.py +606 -522
  49. masster/study/study5_schema.json +247 -241
  50. {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/METADATA +15 -10
  51. masster-0.3.0.dist-info/RECORD +59 -0
  52. {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/licenses/LICENSE +661 -661
  53. masster-0.2.5.dist-info/RECORD +0 -50
  54. {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/WHEEL +0 -0
  55. {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/entry_points.txt +0 -0
masster/sample/sample.py CHANGED
@@ -1,387 +1,391 @@
1
- """
2
- sample.py
3
-
4
- This module provides tools for processing and analyzing Data-Dependent Acquisition (DDA) mass spectrometry data.
5
- It defines the `Sample` class, which offers methods to load, process, analyze, and visualize mass spectrometry data
6
- from various file formats, including mzML, Thermo RAW, and Sciex WIFF formats.
7
-
8
- Key Features:
9
- - **File Handling**: Load and save data in multiple formats.
10
- - **Feature Detection**: Detect and process mass spectrometry features.
11
- - **Spectrum Analysis**: Retrieve and analyze MS1/MS2 spectra.
12
- - **Visualization**: Generate interactive and static plots for spectra and chromatograms.
13
- - **Statistics**: Compute and export detailed DDA run statistics.
14
-
15
- Dependencies:
16
- - `pyopenms`: For file handling and feature detection.
17
- - `polars` and `pandas`: For data manipulation.
18
- - `numpy`: For numerical computations.
19
- - `bokeh`, `panel`, `holoviews`, `datashader`: For interactive visualizations.
20
-
21
- Classes:
22
- - `Sample`: Main class for handling DDA data, providing methods for data import, processing, and visualization.
23
-
24
- Example Usage:
25
- ```python
26
- from masster.sample import Sample
27
-
28
- sample = Sample(file="example.mzML")
29
- sample.find_features()
30
- sample.plot_2d()
31
- ```
32
-
33
- """
34
-
35
- import importlib
36
- import os
37
- import sys
38
-
39
- import polars as pl
40
-
41
- from masster._version import get_version
42
-
43
- from masster.sample.defaults.sample_def import sample_defaults
44
-
45
- # Sample-specific imports
46
- from masster.sample.h5 import _load_sample5
47
- from masster.sample.h5 import _save_sample5
48
- from masster.sample.helpers import _delete_ms2
49
- from masster.sample.helpers import _get_scan_uids
50
- from masster.sample.helpers import _get_feature_uids
51
- from masster.sample.helpers import filter_features
52
- from masster.sample.helpers import find_closest_scan
53
- from masster.sample.helpers import get_dda_stats
54
- from masster.sample.helpers import get_feature
55
- from masster.sample.helpers import get_scan
56
- from masster.sample.load import _load_featureXML
57
- from masster.sample.load import _load_ms2data
58
- from masster.sample.load import _load_mzML
59
- from masster.sample.load import _load_raw
60
- from masster.sample.load import _load_wiff
61
- from masster.sample.load import chrom_extract
62
- from masster.sample.load import index_file
63
- from masster.sample.load import load
64
- from masster.sample.load import sanitize
65
- from masster.logger import MassterLogger
66
- from masster.sample.plot import plot_2d
67
- from masster.sample.plot import plot_2d_oracle
68
- from masster.sample.plot import plot_dda_stats
69
- from masster.sample.plot import plot_eic
70
- from masster.sample.plot import plot_feature_stats
71
- from masster.sample.plot import plot_ms2_cycle
72
- from masster.sample.plot import plot_ms2_eic
73
- from masster.sample.plot import plot_ms2_q1
74
- from masster.sample.processing import _clean_features_df
75
- from masster.sample.processing import _features_deisotope
76
- from masster.sample.processing import _get_ztscan_stats
77
- from masster.sample.processing import _spec_to_mat
78
- from masster.sample.processing import analyze_dda
79
- from masster.sample.processing import find_adducts
80
- from masster.sample.processing import find_features
81
- from masster.sample.processing import find_ms2
82
- from masster.sample.processing import get_spectrum
83
- from masster.sample.parameters import store_history
84
- from masster.sample.parameters import get_parameters
85
- from masster.sample.parameters import update_parameters
86
- from masster.sample.parameters import get_parameters_property
87
- from masster.sample.parameters import set_parameters_property
88
- from masster.sample.defaults.find_features_def import find_features_defaults
89
- from masster.sample.defaults.find_adducts_def import find_adducts_defaults
90
- from masster.sample.defaults.find_ms2_def import find_ms2_defaults
91
- from masster.sample.defaults.get_spectrum_def import get_spectrum_defaults
92
- from masster.sample.save import _save_featureXML
93
- from masster.sample.save import export_chrom
94
- from masster.sample.save import export_dda_stats
95
- from masster.sample.save import export_features
96
- from masster.sample.save import export_mgf
97
- from masster.sample.save import save
98
-
99
-
100
- class Sample:
101
- """
102
- Main class for handling mass spectrometry sample data analysis.
103
-
104
- This class provides comprehensive functionality for loading, processing,
105
- and analyzing DDA (data-dependent acquisition) mass spectrometry data.
106
- """
107
-
108
- def __init__(
109
- self,
110
- **kwargs,
111
- ):
112
- """
113
- Initialize a DDA (data-dependent acquisition) instance.
114
-
115
- This constructor initializes various attributes related to file handling,
116
- data storage, and processing parameters used for mass spectrometry data analysis.
117
-
118
- Parameters:
119
- **kwargs: Keyword arguments for setting sample parameters. Can include:
120
- - A sample_defaults instance to set all parameters at once (pass as params=sample_defaults(...))
121
- - Individual parameter names and values (see sample_defaults for available parameters)
122
-
123
- Core initialization parameters:
124
- - file (str, optional): The file path or file object to be loaded
125
- - ondisk (bool): Whether to keep data on disk or load into memory. Default is False
126
- - label (str, optional): An optional label to identify the file or dataset
127
- - log_level (str): The logging level to be set for the logger. Defaults to 'INFO'
128
- - log_label (str, optional): Optional label for the logger
129
-
130
- Processing parameters:
131
- - All parameters from sample_defaults class (see class documentation)
132
-
133
- For backward compatibility, original signature is supported:
134
- Sample(file=..., ondisk=..., label=..., log_level=..., log_label=...)
135
- """
136
- # Initialize default parameters
137
-
138
- # Check if a sample_defaults instance was passed
139
- if "params" in kwargs and isinstance(kwargs["params"], sample_defaults):
140
- params = kwargs.pop("params")
141
- else:
142
- # Create default parameters and update with provided values
143
- params = sample_defaults()
144
-
145
- # Update with any provided parameters
146
- for key, value in kwargs.items():
147
- if hasattr(params, key):
148
- params.set(key, value, validate=True)
149
-
150
- # Store parameter instance for method access
151
- self.parameters = params
152
-
153
- # Set instance attributes for logger
154
- self.log_level = params.log_level
155
- self.log_label = (params.log_label + " | " if params.log_label else "")
156
- self.log_sink = params.log_sink
157
-
158
- # Initialize independent logger
159
- from masster.logger import MassterLogger
160
- self.logger = MassterLogger(
161
- instance_type="sample",
162
- level=params.log_level,
163
- label=params.log_label if params.log_label else "",
164
- sink=params.log_sink
165
- )
166
-
167
- # Initialize history as dict to keep track of processing parameters
168
- self.history = {}
169
- self.store_history(["sample"], params.to_dict())
170
-
171
- # these are sample attributes
172
- self.file_path = None # Path to the file
173
- # Type of the file (e.g., mzML, RAW, WIFF, mzpkl)
174
- self.file_type = None
175
- # Interface to handle the file operations (e.g., oms, alpharaw)
176
- self.file_interface = None
177
- # The file object once loaded, can be oms.MzMLFile or alpharaw.AlphaRawFile
178
- self.file_obj = None
179
-
180
- self.features = None # the feature map as obtained by openMS
181
- self.features_df = None # the polars data frame with features
182
- # the polars data frame with metadata of all scans in the file
183
- self.scans_df = pl.DataFrame()
184
- # the polars data frame with MS1 level data
185
- self.ms1_df = pl.DataFrame()
186
-
187
- # lightweight lib data for matching, targeted analyses, etc. > superseded by study methods
188
- self.lib = None
189
- self.lib_match = None
190
- self.chrom_df = None
191
-
192
- if params.filename is not None:
193
- self.load(params.filename, ondisk=params.ondisk)
194
-
195
-
196
- # Attach module functions as class methods
197
- load = load
198
- save = save
199
- find_features = find_features
200
- find_adducts = find_adducts
201
- find_ms2 = find_ms2
202
- get_spectrum = get_spectrum
203
- filter_features = filter_features
204
- analyze_dda = analyze_dda
205
- store_history = store_history
206
- get_parameters = get_parameters
207
- update_parameters = update_parameters
208
- get_parameters_property = get_parameters_property
209
- set_parameters_property = set_parameters_property
210
- export_features = export_features
211
- export_mgf = export_mgf
212
- export_chrom = export_chrom
213
- export_dda_stats = export_dda_stats
214
- plot_2d = plot_2d
215
- plot_2d_oracle = plot_2d_oracle
216
- plot_dda_stats = plot_dda_stats
217
- plot_eic = plot_eic
218
- plot_feature_stats = plot_feature_stats
219
- plot_ms2_cycle = plot_ms2_cycle
220
- plot_ms2_eic = plot_ms2_eic
221
- plot_ms2_q1 = plot_ms2_q1
222
- get_feature = get_feature
223
- get_scan = get_scan
224
- get_dda_stats = get_dda_stats
225
- find_closest_scan = find_closest_scan
226
-
227
- # Additional method assignments for all imported functions
228
- _load_sample5 = _load_sample5
229
- _save_sample5 = _save_sample5
230
- _delete_ms2 = _delete_ms2
231
- _get_scan_uids = _get_scan_uids
232
- _get_feature_uids = _get_feature_uids
233
- _load_featureXML = _load_featureXML
234
- _load_ms2data = _load_ms2data
235
- _load_mzML = _load_mzML
236
- _load_raw = _load_raw
237
- _load_wiff = _load_wiff
238
- chrom_extract = chrom_extract
239
- index_file = index_file
240
- sanitize = sanitize
241
- _clean_features_df = _clean_features_df
242
- _features_deisotope = _features_deisotope
243
- _get_ztscan_stats = _get_ztscan_stats
244
- _spec_to_mat = _spec_to_mat
245
- _save_featureXML = _save_featureXML
246
-
247
-
248
- def logger_update(self, level: str | None = None, label: str | None = None, sink: str | None = None):
249
- """Update the logging configuration for this Sample instance.
250
-
251
- Args:
252
- level: New logging level ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL")
253
- label: New label for log messages
254
- sink: New output sink (file path, file object, or "sys.stdout")
255
- """
256
- if level is not None:
257
- self.log_level = level.upper()
258
- self.logger.update_level(level)
259
-
260
- if label is not None:
261
- self.log_label = label + " | " if len(label) > 0 else ""
262
- self.logger.update_label(self.log_label)
263
-
264
- if sink is not None:
265
- if sink == "sys.stdout":
266
- self.log_sink = sys.stdout
267
- else:
268
- self.log_sink = sink
269
- self.logger.update_sink(self.log_sink)
270
-
271
- def reload(self):
272
- """
273
- Reloads all masster modules to pick up any changes to their source code,
274
- and updates the instance's class reference to the newly reloaded class version.
275
- This ensures that the instance uses the latest implementation without restarting the interpreter.
276
- """
277
- # Reset logger configuration flags to allow proper reconfiguration after reload
278
- try:
279
- import masster.sample.logger as logger_module
280
-
281
- if hasattr(logger_module, "_SAMPLE_LOGGER_CONFIGURED"):
282
- logger_module._SAMPLE_LOGGER_CONFIGURED = False
283
- except Exception:
284
- pass
285
-
286
- # Get the base module name (masster)
287
- base_modname = self.__class__.__module__.split(".")[0]
288
- current_module = self.__class__.__module__
289
-
290
- # Dynamically find all sample submodules
291
- sample_modules = []
292
- sample_module_prefix = f"{base_modname}.sample."
293
-
294
- # Get all currently loaded modules that are part of the sample package
295
- for module_name in sys.modules:
296
- if (
297
- module_name.startswith(sample_module_prefix)
298
- and module_name != current_module
299
- ):
300
- sample_modules.append(module_name)
301
-
302
- # Add core masster modules
303
- core_modules = [
304
- f"{base_modname}._version",
305
- f"{base_modname}.chromatogram",
306
- f"{base_modname}.spectrum",
307
- ]
308
-
309
- # Add study submodules
310
- study_modules = []
311
- study_module_prefix = f"{base_modname}.study."
312
- for module_name in sys.modules:
313
- if (
314
- module_name.startswith(study_module_prefix)
315
- and module_name != current_module
316
- ):
317
- study_modules.append(module_name)
318
-
319
- # Add parameters submodules
320
- parameters_modules = []
321
- parameters_module_prefix = f"{base_modname}.parameters."
322
- for module_name in sys.modules:
323
- if (
324
- module_name.startswith(parameters_module_prefix)
325
- and module_name != current_module
326
- ):
327
- parameters_modules.append(module_name)
328
-
329
- all_modules_to_reload = (
330
- core_modules + sample_modules + study_modules + parameters_modules
331
- )
332
-
333
- # Reload all discovered modules
334
- for full_module_name in all_modules_to_reload:
335
- try:
336
- if full_module_name in sys.modules:
337
- mod = sys.modules[full_module_name]
338
- importlib.reload(mod)
339
- self.logger.debug(f"Reloaded module: {full_module_name}")
340
- except Exception as e:
341
- self.logger.warning(f"Failed to reload module {full_module_name}: {e}")
342
-
343
- # Finally, reload the current module (sample.py)
344
- try:
345
- mod = __import__(current_module, fromlist=[current_module.split(".")[0]])
346
- importlib.reload(mod)
347
-
348
- # Get the updated class reference from the reloaded module
349
- new = getattr(mod, self.__class__.__name__)
350
- # Update the class reference of the instance
351
- self.__class__ = new
352
-
353
- self.logger.debug("Module reload completed")
354
- except Exception as e:
355
- self.logger.error(f"Failed to reload current module {current_module}: {e}")
356
-
357
- def get_version(self):
358
- return get_version()
359
-
360
- def info(self):
361
- # show the key attributes of the object
362
- str = f"File: {os.path.basename(self.file_path)}\n"
363
- str += f"Path: {os.path.dirname(self.file_path)}\n"
364
- str += f"MS1 scans: {len(self.scans_df.filter(pl.col('ms_level') == 1))}\n"
365
- str += f"MS2 scans: {len(self.scans_df.filter(pl.col('ms_level') == 2))}\n"
366
- if self.features_df is not None:
367
- str += f"Features: {len(self.features_df) if self.features_df is not None else 0}\n"
368
- str += f"Features with MS2 spectra: {len(self.features_df.filter(pl.col('ms2_scans').is_not_null()))}\n"
369
- else:
370
- str += "Features: 0\n"
371
- str += "Features with MS2 spectra: 0\n"
372
-
373
- print(str)
374
-
375
- def __str__(self):
376
- if self.features_df is None:
377
- str = f"masster Sample, source: {os.path.basename(self.file_path)}, features: 0"
378
- else:
379
- str = f"masster Sample, source: {os.path.basename(self.file_path)}, features: {len(self.features_df)}"
380
- return str
381
-
382
-
383
- if __name__ == "__main__":
384
- print(
385
- "This module is not meant to be run directly. Please import it in your script.",
386
- )
387
-
1
+ """
2
+ sample.py
3
+
4
+ This module provides tools for processing and analyzing Data-Dependent Acquisition (DDA) mass spectrometry data.
5
+ It defines the `Sample` class, which offers methods to load, process, analyze, and visualize mass spectrometry data
6
+ from various file formats, including mzML, Thermo RAW, and Sciex WIFF formats.
7
+
8
+ Key Features:
9
+ - **File Handling**: Load and save data in multiple formats.
10
+ - **Feature Detection**: Detect and process mass spectrometry features.
11
+ - **Spectrum Analysis**: Retrieve and analyze MS1/MS2 spectra.
12
+ - **Visualization**: Generate interactive and static plots for spectra and chromatograms.
13
+ - **Statistics**: Compute and export detailed DDA run statistics.
14
+
15
+ Dependencies:
16
+ - `pyopenms`: For file handling and feature detection.
17
+ - `polars` and `pandas`: For data manipulation.
18
+ - `numpy`: For numerical computations.
19
+ - `bokeh`, `panel`, `holoviews`, `datashader`: For interactive visualizations.
20
+
21
+ Classes:
22
+ - `Sample`: Main class for handling DDA data, providing methods for data import, processing, and visualization.
23
+
24
+ Example Usage:
25
+ ```python
26
+ from masster.sample import Sample
27
+
28
+ sample = Sample(file="example.mzML")
29
+ sample.find_features()
30
+ sample.plot_2d()
31
+ ```
32
+
33
+ """
34
+
35
+ import importlib
36
+ import os
37
+ import sys
38
+
39
+ import polars as pl
40
+
41
+ from masster._version import get_version
42
+
43
+ from masster.sample.defaults.sample_def import sample_defaults
44
+
45
+ # Sample-specific imports
46
+ from masster.sample.h5 import _load_sample5
47
+ from masster.sample.h5 import _save_sample5
48
+ from masster.sample.helpers import _delete_ms2
49
+ from masster.sample.helpers import _estimate_memory_usage
50
+ from masster.sample.helpers import _get_scan_uids
51
+ from masster.sample.helpers import _get_feature_uids
52
+ from masster.sample.helpers import _features_sync
53
+ from masster.sample.helpers import features_delete
54
+ from masster.sample.helpers import features_filter
55
+ from masster.sample.helpers import select
56
+ from masster.sample.helpers import select_closest_scan
57
+ from masster.sample.helpers import get_dda_stats
58
+ from masster.sample.helpers import get_feature
59
+ from masster.sample.helpers import get_scan
60
+ from masster.sample.helpers import set_source
61
+ from masster.sample.load import _load_featureXML
62
+ from masster.sample.load import _load_ms2data
63
+ from masster.sample.load import _load_mzML
64
+ from masster.sample.load import _load_raw
65
+ from masster.sample.load import _load_wiff
66
+ from masster.sample.load import chrom_extract
67
+ from masster.sample.load import index_file
68
+ from masster.sample.load import load
69
+ from masster.sample.load import sanitize
70
+ from masster.logger import MassterLogger
71
+ from masster.sample.plot import plot_2d
72
+ from masster.sample.plot import plot_2d_oracle
73
+ from masster.sample.plot import plot_dda_stats
74
+ from masster.sample.plot import plot_eic
75
+ from masster.sample.plot import plot_feature_stats
76
+ from masster.sample.plot import plot_ms2_cycle
77
+ from masster.sample.plot import plot_ms2_eic
78
+ from masster.sample.plot import plot_ms2_q1
79
+ from masster.sample.processing import _clean_features_df
80
+ from masster.sample.processing import _features_deisotope
81
+ from masster.sample.processing import _get_ztscan_stats
82
+ from masster.sample.processing import _spec_to_mat
83
+ from masster.sample.processing import analyze_dda
84
+ from masster.sample.processing import find_adducts
85
+ from masster.sample.processing import find_features
86
+ from masster.sample.processing import find_ms2
87
+ from masster.sample.processing import get_spectrum
88
+ from masster.sample.parameters import store_history
89
+ from masster.sample.parameters import get_parameters
90
+ from masster.sample.parameters import update_parameters
91
+ from masster.sample.parameters import get_parameters_property
92
+ from masster.sample.parameters import set_parameters_property
93
+ from masster.sample.save import _save_featureXML
94
+ from masster.sample.save import export_chrom
95
+ from masster.sample.save import export_dda_stats
96
+ from masster.sample.save import export_features
97
+ from masster.sample.save import export_mgf
98
+ from masster.sample.save import save
99
+
100
+
101
+ class Sample:
102
+ """
103
+ Main class for handling mass spectrometry sample data analysis.
104
+
105
+ This class provides comprehensive functionality for loading, processing,
106
+ and analyzing DDA (data-dependent acquisition) mass spectrometry data.
107
+ """
108
+
109
+ def __init__(
110
+ self,
111
+ **kwargs,
112
+ ):
113
+ """
114
+ Initialize a DDA (data-dependent acquisition) instance.
115
+
116
+ This constructor initializes various attributes related to file handling,
117
+ data storage, and processing parameters used for mass spectrometry data analysis.
118
+
119
+ Parameters:
120
+ **kwargs: Keyword arguments for setting sample parameters. Can include:
121
+ - A sample_defaults instance to set all parameters at once (pass as params=sample_defaults(...))
122
+ - Individual parameter names and values (see sample_defaults for available parameters)
123
+
124
+ Core initialization parameters:
125
+ - file (str, optional): The file path or file object to be loaded
126
+ - ondisk (bool): Whether to keep data on disk or load into memory. Default is False
127
+ - label (str, optional): An optional label to identify the file or dataset
128
+ - log_level (str): The logging level to be set for the logger. Defaults to 'INFO'
129
+ - log_label (str, optional): Optional label for the logger
130
+
131
+ Processing parameters:
132
+ - All parameters from sample_defaults class (see class documentation)
133
+
134
+ For backward compatibility, original signature is supported:
135
+ Sample(file=..., ondisk=..., label=..., log_level=..., log_label=...)
136
+ """
137
+ # Initialize default parameters
138
+
139
+ # Check if a sample_defaults instance was passed
140
+ if "params" in kwargs and isinstance(kwargs["params"], sample_defaults):
141
+ params = kwargs.pop("params")
142
+ else:
143
+ # Create default parameters and update with provided values
144
+ params = sample_defaults()
145
+
146
+ # Update with any provided parameters
147
+ for key, value in kwargs.items():
148
+ if hasattr(params, key):
149
+ params.set(key, value, validate=True)
150
+
151
+ # Store parameter instance for method access
152
+ self.parameters = params
153
+
154
+ # Set instance attributes for logger
155
+ self.log_level = params.log_level.upper()
156
+ self.log_label = params.log_label + " | " if params.log_label else ""
157
+ self.log_sink = params.log_sink
158
+
159
+ # Initialize independent logger
160
+ from masster.logger import MassterLogger
161
+ self.logger = MassterLogger(
162
+ instance_type="sample",
163
+ level=params.log_level.upper(),
164
+ label=params.log_label if params.log_label else "",
165
+ sink=params.log_sink,
166
+ )
167
+
168
+ # Initialize history as dict to keep track of processing parameters
169
+ self.history = {}
170
+ self.store_history(["sample"], params.to_dict())
171
+
172
+ # this is the path to the original file. It's never sample5
173
+ self.file_source = None
174
+ # this is the path to the object that was loaded. It could be sample5
175
+ self.file_path = None
176
+ # Type of the file (e.g., mzML, RAW, WIFF, mzpkl)
177
+ self.file_type = None
178
+ # Interface to handle the file operations (e.g., oms, alpharaw)
179
+ self.file_interface = None
180
+ # The file object once loaded, can be oms.MzMLFile or alpharaw.AlphaRawFile
181
+ self.file_obj = None
182
+
183
+ self.features = None # the feature map as obtained by openMS
184
+ self.features_df = None # the polars data frame with features
185
+ # the polars data frame with metadata of all scans in the file
186
+ self.scans_df = pl.DataFrame()
187
+ # the polars data frame with MS1 level data
188
+ self.ms1_df = pl.DataFrame()
189
+
190
+ # lightweight lib data for matching, targeted analyses, etc. > superseded by study methods
191
+ self.lib = None
192
+ self.lib_match = None
193
+ self.chrom_df = None
194
+
195
+ if params.filename is not None:
196
+ self.load(params.filename, ondisk=params.ondisk)
197
+
198
+ # Attach module functions as class methods
199
+ load = load
200
+ save = save
201
+ find_features = find_features
202
+ find_adducts = find_adducts
203
+ find_ms2 = find_ms2
204
+ get_spectrum = get_spectrum
205
+ filter = filter
206
+ select = select
207
+ features_filter = features_filter # New function that keeps only specified features
208
+ filter_features = filter
209
+ features_select = select
210
+ select_features = select
211
+ analyze_dda = analyze_dda
212
+ store_history = store_history
213
+ get_parameters = get_parameters
214
+ update_parameters = update_parameters
215
+ get_parameters_property = get_parameters_property
216
+ set_parameters_property = set_parameters_property
217
+ export_features = export_features
218
+ export_mgf = export_mgf
219
+ export_chrom = export_chrom
220
+ export_dda_stats = export_dda_stats
221
+ plot_2d = plot_2d
222
+ plot_2d_oracle = plot_2d_oracle
223
+ plot_dda_stats = plot_dda_stats
224
+ plot_eic = plot_eic
225
+ plot_feature_stats = plot_feature_stats
226
+ plot_ms2_cycle = plot_ms2_cycle
227
+ plot_ms2_eic = plot_ms2_eic
228
+ plot_ms2_q1 = plot_ms2_q1
229
+ get_feature = get_feature
230
+ get_scan = get_scan
231
+ get_dda_stats = get_dda_stats
232
+ select_closest_scan = select_closest_scan
233
+ set_source = set_source
234
+
235
+ # Additional method assignments for all imported functions
236
+ _load_sample5 = _load_sample5
237
+ _save_sample5 = _save_sample5
238
+ _delete_ms2 = _delete_ms2
239
+ _estimate_memory_usage = _estimate_memory_usage
240
+ _get_scan_uids = _get_scan_uids
241
+ _get_feature_uids = _get_feature_uids
242
+ _features_sync = _features_sync
243
+ features_delete = features_delete
244
+ features_filter = features_filter
245
+ _load_featureXML = _load_featureXML
246
+ _load_ms2data = _load_ms2data
247
+ _load_mzML = _load_mzML
248
+ _load_raw = _load_raw
249
+ _load_wiff = _load_wiff
250
+ chrom_extract = chrom_extract
251
+ index_file = index_file
252
+ sanitize = sanitize
253
+ _clean_features_df = _clean_features_df
254
+ _features_deisotope = _features_deisotope
255
+ _get_ztscan_stats = _get_ztscan_stats
256
+ _spec_to_mat = _spec_to_mat
257
+ _save_featureXML = _save_featureXML
258
+
259
+ def logger_update(self, level: str | None = None, label: str | None = None, sink: str | None = None):
260
+ """Update the logging configuration for this Sample instance.
261
+
262
+ Args:
263
+ level: New logging level ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL")
264
+ label: New label for log messages
265
+ sink: New output sink (file path, file object, or "sys.stdout")
266
+ """
267
+ if level is not None:
268
+ self.log_level = level.upper()
269
+ self.logger.update_level(level)
270
+
271
+ if label is not None:
272
+ self.log_label = label + " | " if len(label) > 0 else ""
273
+ self.logger.update_label(self.log_label)
274
+
275
+ if sink is not None:
276
+ if sink == "sys.stdout":
277
+ self.log_sink = sys.stdout
278
+ else:
279
+ self.log_sink = sink
280
+ self.logger.update_sink(self.log_sink)
281
+
282
+ def _reload(self):
283
+ """
284
+ Reloads all masster modules to pick up any changes to their source code,
285
+ and updates the instance's class reference to the newly reloaded class version.
286
+ This ensures that the instance uses the latest implementation without restarting the interpreter.
287
+ """
288
+ # Reset logger configuration flags to allow proper reconfiguration after reload
289
+ try:
290
+ import masster.sample.logger as logger_module
291
+
292
+ if hasattr(logger_module, "_SAMPLE_LOGGER_CONFIGURED"):
293
+ logger_module._SAMPLE_LOGGER_CONFIGURED = False
294
+ except Exception:
295
+ pass
296
+
297
+ # Get the base module name (masster)
298
+ base_modname = self.__class__.__module__.split(".")[0]
299
+ current_module = self.__class__.__module__
300
+
301
+ # Dynamically find all sample submodules
302
+ sample_modules = []
303
+ sample_module_prefix = f"{base_modname}.sample."
304
+
305
+ # Get all currently loaded modules that are part of the sample package
306
+ for module_name in sys.modules:
307
+ if module_name.startswith(sample_module_prefix) and module_name != current_module:
308
+ sample_modules.append(module_name)
309
+
310
+ # Add core masster modules
311
+ core_modules = [
312
+ f"{base_modname}._version",
313
+ f"{base_modname}.chromatogram",
314
+ f"{base_modname}.spectrum",
315
+ ]
316
+
317
+ # Add study submodules
318
+ study_modules = []
319
+ study_module_prefix = f"{base_modname}.study."
320
+ for module_name in sys.modules:
321
+ if module_name.startswith(study_module_prefix) and module_name != current_module:
322
+ study_modules.append(module_name)
323
+
324
+ # Add parameters submodules
325
+ parameters_modules = []
326
+ parameters_module_prefix = f"{base_modname}.parameters."
327
+ for module_name in sys.modules:
328
+ if module_name.startswith(parameters_module_prefix) and module_name != current_module:
329
+ parameters_modules.append(module_name)
330
+
331
+ all_modules_to_reload = core_modules + sample_modules + study_modules + parameters_modules
332
+
333
+ # Reload all discovered modules
334
+ for full_module_name in all_modules_to_reload:
335
+ try:
336
+ if full_module_name in sys.modules:
337
+ mod = sys.modules[full_module_name]
338
+ importlib.reload(mod)
339
+ self.logger.debug(f"Reloaded module: {full_module_name}")
340
+ except Exception as e:
341
+ self.logger.warning(f"Failed to reload module {full_module_name}: {e}")
342
+
343
+ # Finally, reload the current module (sample.py)
344
+ try:
345
+ mod = __import__(current_module, fromlist=[current_module.split(".")[0]])
346
+ importlib.reload(mod)
347
+
348
+ # Get the updated class reference from the reloaded module
349
+ new = getattr(mod, self.__class__.__name__)
350
+ # Update the class reference of the instance
351
+ self.__class__ = new
352
+
353
+ self.logger.debug("Module reload completed")
354
+ except Exception as e:
355
+ self.logger.error(f"Failed to reload current module {current_module}: {e}")
356
+
357
+ def get_version(self):
358
+ return get_version()
359
+
360
+ def info(self):
361
+ # show the key attributes of the object
362
+ str = f"File: {os.path.basename(self.file_path)}\n"
363
+ str += f"Path: {os.path.dirname(self.file_path)}\n"
364
+ str += f"Source: {self.file_source}\n"
365
+ str += f"MS1 scans: {len(self.scans_df.filter(pl.col('ms_level') == 1))}\n"
366
+ str += f"MS2 scans: {len(self.scans_df.filter(pl.col('ms_level') == 2))}\n"
367
+ if self.features_df is not None:
368
+ str += f"Features: {len(self.features_df) if self.features_df is not None else 0}\n"
369
+ str += f"Features with MS2 spectra: {len(self.features_df.filter(pl.col('ms2_scans').is_not_null()))}\n"
370
+ else:
371
+ str += "Features: 0\n"
372
+ str += "Features with MS2 spectra: 0\n"
373
+
374
+ # estimate memory usage
375
+ mem_usage = self._estimate_memory_usage()
376
+ str += f"Estimated memory usage: {mem_usage:.2f} MB\n"
377
+
378
+ print(str)
379
+
380
+ def __str__(self):
381
+ if self.features_df is None:
382
+ str = f"masster Sample, source: {os.path.basename(self.file_path)}, features: 0"
383
+ else:
384
+ str = f"masster Sample, source: {os.path.basename(self.file_path)}, features: {len(self.features_df)}"
385
+ return str
386
+
387
+
388
+ if __name__ == "__main__":
389
+ print(
390
+ "This module is not meant to be run directly. Please import it in your script.",
391
+ )