masster 0.5.16__tar.gz → 0.5.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- {masster-0.5.16 → masster-0.5.18}/PKG-INFO +3 -4
- {masster-0.5.16 → masster-0.5.18}/README.md +2 -1
- {masster-0.5.16 → masster-0.5.18}/pyproject.toml +1 -3
- {masster-0.5.16 → masster-0.5.18}/src/masster/_version.py +1 -1
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/adducts.py +12 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/sample_def.py +30 -6
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/h5.py +52 -6
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/lib.py +9 -3
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/load.py +47 -120
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/processing.py +1 -1
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/sample.py +5 -3
- masster-0.5.18/src/masster/sample/sciex.py +638 -0
- masster-0.5.18/src/masster/sample/thermo.py +801 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/id.py +3 -1
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/load.py +15 -792
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/study.py +1 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/wizard/wizard.py +178 -225
- {masster-0.5.16 → masster-0.5.18}/uv.lock +1 -179
- masster-0.5.16/src/masster/sample/sciex.py +0 -1224
- {masster-0.5.16 → masster-0.5.18}/.github/workflows/publish.yml +0 -0
- {masster-0.5.16 → masster-0.5.18}/.github/workflows/security.yml +0 -0
- {masster-0.5.16 → masster-0.5.18}/.github/workflows/test.yml +0 -0
- {masster-0.5.16 → masster-0.5.18}/.gitignore +0 -0
- {masster-0.5.16 → masster-0.5.18}/.pre-commit-config.yaml +0 -0
- {masster-0.5.16 → masster-0.5.18}/LICENSE +0 -0
- {masster-0.5.16 → masster-0.5.18}/Makefile +0 -0
- {masster-0.5.16 → masster-0.5.18}/TESTING.md +0 -0
- {masster-0.5.16 → masster-0.5.18}/demo/example_batch_process.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/demo/example_sample_process.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/__init__.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/chromatogram.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/libs/aa.csv +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/libs/ccm.csv +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/libs/hilic.csv +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/libs/urine.csv +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/lib/__init__.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/lib/lib.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/logger.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/__init__.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/__init__.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/find_adducts_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/find_features_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/find_ms2_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/helpers.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/parameters.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/plot.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/quant.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/sample5_schema.json +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/sample/save.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/spectrum.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/__init__.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/analysis.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/__init__.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/align_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/export_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/fill_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/find_consensus_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/find_ms2_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/identify_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/integrate_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/merge_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/study_def.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/export.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/h5.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/helpers.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/importers.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/merge.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/parameters.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/plot.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/processing.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/save.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/study/study5_schema.json +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/wizard/README.md +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/wizard/__init__.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/src/masster/wizard/example.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tests/conftest.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tests/test_chromatogram.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tests/test_defaults.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tests/test_imports.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tests/test_integration.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tests/test_logger.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tests/test_parameters.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tests/test_sample.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tests/test_spectrum.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tests/test_study.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tests/test_version.py +0 -0
- {masster-0.5.16 → masster-0.5.18}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: masster
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.18
|
|
4
4
|
Summary: Mass spectrometry data analysis package
|
|
5
5
|
Project-URL: homepage, https://github.com/zamboni-lab/masster
|
|
6
6
|
Project-URL: repository, https://github.com/zamboni-lab/masster
|
|
@@ -681,7 +681,6 @@ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
|
681
681
|
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
682
682
|
Requires-Python: >=3.11
|
|
683
683
|
Requires-Dist: alpharaw>=0.4.8
|
|
684
|
-
Requires-Dist: altair>=5.5.0
|
|
685
684
|
Requires-Dist: bokeh>=3.7.3
|
|
686
685
|
Requires-Dist: cmap>=0.6.2
|
|
687
686
|
Requires-Dist: datashader>=0.18.1
|
|
@@ -698,7 +697,6 @@ Requires-Dist: pandas>=2.2.0
|
|
|
698
697
|
Requires-Dist: panel>=1.7.0
|
|
699
698
|
Requires-Dist: polars>=1.0.0
|
|
700
699
|
Requires-Dist: pyopenms>=3.3.0
|
|
701
|
-
Requires-Dist: pyteomics>=4.7.0
|
|
702
700
|
Requires-Dist: pythonnet>=3.0.0
|
|
703
701
|
Requires-Dist: scikit-learn>=1.7.1
|
|
704
702
|
Requires-Dist: scipy>=1.12.0
|
|
@@ -749,10 +747,11 @@ pip install masster
|
|
|
749
747
|
|
|
750
748
|
```python
|
|
751
749
|
import masster
|
|
752
|
-
masster.wizard.
|
|
750
|
+
wiz = masster.wizard.create_scripts(
|
|
753
751
|
source=r'..\..\folder_with_raw_data',
|
|
754
752
|
folder=r'..\..folder_to_store_results'
|
|
755
753
|
)
|
|
754
|
+
wiz.run()
|
|
756
755
|
```
|
|
757
756
|
|
|
758
757
|
This will run a wizard that should perform all key steps and save the results to the `folder`.
|
|
@@ -19,10 +19,11 @@ pip install masster
|
|
|
19
19
|
|
|
20
20
|
```python
|
|
21
21
|
import masster
|
|
22
|
-
masster.wizard.
|
|
22
|
+
wiz = masster.wizard.create_scripts(
|
|
23
23
|
source=r'..\..\folder_with_raw_data',
|
|
24
24
|
folder=r'..\..folder_to_store_results'
|
|
25
25
|
)
|
|
26
|
+
wiz.run()
|
|
26
27
|
```
|
|
27
28
|
|
|
28
29
|
This will run a wizard that should perform all key steps and save the results to the `folder`.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
[project]
|
|
3
3
|
name = "masster"
|
|
4
|
-
version = "0.5.
|
|
4
|
+
version = "0.5.18"
|
|
5
5
|
description = "Mass spectrometry data analysis package"
|
|
6
6
|
authors = [
|
|
7
7
|
{ name = "Zamboni Lab" }
|
|
@@ -27,7 +27,6 @@ urls.repository = "https://github.com/zamboni-lab/masster"
|
|
|
27
27
|
urls.documentation = "https://github.com/zamboni-lab/masster#readme"
|
|
28
28
|
dependencies = [
|
|
29
29
|
"alpharaw>=0.4.8",
|
|
30
|
-
"altair>=5.5.0",
|
|
31
30
|
"bokeh>=3.7.3",
|
|
32
31
|
"datashader>=0.18.1",
|
|
33
32
|
"holoviews>=1.21.0",
|
|
@@ -40,7 +39,6 @@ dependencies = [
|
|
|
40
39
|
"panel>=1.7.0",
|
|
41
40
|
"polars>=1.0.0",
|
|
42
41
|
"pyopenms>=3.3.0",
|
|
43
|
-
"pyteomics>=4.7.0",
|
|
44
42
|
"pythonnet>=3.0.0",
|
|
45
43
|
"scipy>=1.12.0",
|
|
46
44
|
"tqdm>=4.65.0",
|
|
@@ -403,6 +403,7 @@ def find_adducts(self, **kwargs):
|
|
|
403
403
|
|
|
404
404
|
Main parameters (from ``find_adducts_defaults``):
|
|
405
405
|
- adducts (list[str] | str | None): List of potential adduct strings or ionization mode ('pos'/'neg').
|
|
406
|
+
If None, automatically uses sample.polarity to select appropriate default adducts.
|
|
406
407
|
- charge_min (int): Minimum allowed charge state (default: -4).
|
|
407
408
|
- charge_max (int): Maximum allowed charge state (default: 4).
|
|
408
409
|
- retention_max_diff (float): Maximum RT difference in seconds (default: 1.0).
|
|
@@ -431,6 +432,17 @@ def find_adducts(self, **kwargs):
|
|
|
431
432
|
else:
|
|
432
433
|
self.logger.warning(f"Unknown parameter {key} ignored")
|
|
433
434
|
|
|
435
|
+
# Auto-set adducts based on sample polarity if not explicitly provided
|
|
436
|
+
if params.adducts is None and hasattr(self, 'polarity') and self.polarity is not None:
|
|
437
|
+
if self.polarity.lower() in ['positive', 'pos']:
|
|
438
|
+
params.set('adducts', 'positive', validate=True)
|
|
439
|
+
self.logger.debug(f"Auto-set adducts to 'positive' based on sample polarity: {self.polarity}")
|
|
440
|
+
elif self.polarity.lower() in ['negative', 'neg']:
|
|
441
|
+
params.set('adducts', 'negative', validate=True)
|
|
442
|
+
self.logger.debug(f"Auto-set adducts to 'negative' based on sample polarity: {self.polarity}")
|
|
443
|
+
else:
|
|
444
|
+
self.logger.debug(f"Unknown sample polarity '{self.polarity}', using default adducts")
|
|
445
|
+
|
|
434
446
|
# Check if features_df exists and has data
|
|
435
447
|
if not hasattr(self, "features_df") or len(self.features_df) == 0:
|
|
436
448
|
self.logger.warning(
|
|
@@ -32,30 +32,42 @@ class sample_defaults:
|
|
|
32
32
|
centroid_prominence (int): Prominence parameter for centroiding. Default is -1.
|
|
33
33
|
max_points_per_spectrum (int): Maximum number of points per spectrum. Default is 50000.
|
|
34
34
|
dia_window (Optional[float]): DIA window size. Default is None.
|
|
35
|
+
type (str): Acquisition type/mode. Options are 'dda', 'swath', 'ztscan', 'fia'. Default is 'dda'.
|
|
36
|
+
polarity (Optional[str]): Ionization polarity. Options are None, 'positive', 'negative'. Default is None.
|
|
35
37
|
"""
|
|
36
38
|
|
|
37
39
|
filename: Optional[str] = None
|
|
38
|
-
ondisk: bool = False
|
|
39
40
|
label: str | None = None
|
|
40
41
|
log_level: str = "INFO"
|
|
41
42
|
log_label: Optional[str] = ""
|
|
42
43
|
log_sink: str = "sys.stdout"
|
|
43
|
-
|
|
44
|
+
ondisk: bool = False
|
|
45
|
+
|
|
46
|
+
# file and data handling settings
|
|
47
|
+
type: str = "dda"
|
|
48
|
+
polarity: str | None = None
|
|
49
|
+
|
|
50
|
+
# chromatographic settings
|
|
51
|
+
#chrom_fwhm: float = 1.0
|
|
52
|
+
eic_mz_tol: float = 0.01
|
|
53
|
+
eic_rt_tol: float = 10.0
|
|
54
|
+
|
|
55
|
+
# mz tolerances
|
|
44
56
|
mz_tol_ms1_da: float = 0.002
|
|
45
57
|
mz_tol_ms2_da: float = 0.005
|
|
46
58
|
mz_tol_ms1_ppm: float = 5.0
|
|
47
59
|
mz_tol_ms2_ppm: float = 10.0
|
|
60
|
+
|
|
61
|
+
# centroiding settings
|
|
48
62
|
centroid_algo: str = "lmp"
|
|
49
63
|
centroid_min_points_ms1: int = 5
|
|
50
64
|
centroid_min_points_ms2: int = 4
|
|
51
65
|
centroid_smooth: int = 5
|
|
52
66
|
centroid_refine: bool = True
|
|
53
67
|
centroid_prominence: int = -1
|
|
68
|
+
|
|
69
|
+
# data retrieval settings
|
|
54
70
|
max_points_per_spectrum: int = 50000
|
|
55
|
-
dia_window: float | None = None
|
|
56
|
-
|
|
57
|
-
eic_mz_tol: float = 0.01
|
|
58
|
-
eic_rt_tol: float = 10.0
|
|
59
71
|
|
|
60
72
|
_param_metadata: dict[str, dict[str, Any]] = field(
|
|
61
73
|
default_factory=lambda: {
|
|
@@ -178,6 +190,18 @@ class sample_defaults:
|
|
|
178
190
|
"min_value": 0.2,
|
|
179
191
|
"max_value": 60.0,
|
|
180
192
|
},
|
|
193
|
+
"type": {
|
|
194
|
+
"dtype": str,
|
|
195
|
+
"description": "Acquisition type/mode",
|
|
196
|
+
"default": "dda",
|
|
197
|
+
"allowed_values": ["dda", "swath", "ztscan", "fia"],
|
|
198
|
+
},
|
|
199
|
+
"polarity": {
|
|
200
|
+
"dtype": "Optional[str]",
|
|
201
|
+
"description": "Ionization polarity",
|
|
202
|
+
"default": None,
|
|
203
|
+
"allowed_values": ["positive", "negative"],
|
|
204
|
+
},
|
|
181
205
|
},
|
|
182
206
|
repr=False,
|
|
183
207
|
)
|
|
@@ -94,7 +94,7 @@ def _save_sample5(
|
|
|
94
94
|
metadata_group.attrs["file_source"] = str(self.file_source)
|
|
95
95
|
else:
|
|
96
96
|
metadata_group.attrs["file_source"] = ""
|
|
97
|
-
if self.file_type is not None:
|
|
97
|
+
if hasattr(self, 'file_type') and self.file_type is not None:
|
|
98
98
|
metadata_group.attrs["file_type"] = str(self.file_type)
|
|
99
99
|
else:
|
|
100
100
|
metadata_group.attrs["file_type"] = ""
|
|
@@ -287,11 +287,41 @@ def _save_sample5(
|
|
|
287
287
|
compression="gzip",
|
|
288
288
|
)
|
|
289
289
|
|
|
290
|
-
# Store parameters as JSON
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
290
|
+
# Store parameters/history as JSON
|
|
291
|
+
# Always ensure we sync instance attributes to parameters before saving
|
|
292
|
+
if hasattr(self, 'parameters') and self.parameters is not None:
|
|
293
|
+
if hasattr(self, 'polarity') and self.polarity is not None:
|
|
294
|
+
self.parameters.polarity = self.polarity
|
|
295
|
+
if hasattr(self, 'type') and self.type is not None:
|
|
296
|
+
self.parameters.type = self.type
|
|
297
|
+
|
|
298
|
+
# Prepare save data
|
|
299
|
+
save_data = {}
|
|
300
|
+
|
|
301
|
+
# Add parameters as a dictionary
|
|
302
|
+
if hasattr(self, 'parameters') and self.parameters is not None:
|
|
303
|
+
save_data["sample"] = self.parameters.to_dict()
|
|
304
|
+
|
|
305
|
+
# Add history data (but ensure it's JSON serializable)
|
|
306
|
+
if hasattr(self, 'history') and self.history is not None:
|
|
307
|
+
# Convert any non-JSON-serializable objects to strings/dicts
|
|
308
|
+
serializable_history = {}
|
|
309
|
+
for key, value in self.history.items():
|
|
310
|
+
if key == "sample":
|
|
311
|
+
# Use our properly serialized parameters
|
|
312
|
+
continue # Skip, we'll add it from parameters above
|
|
313
|
+
try:
|
|
314
|
+
# Test if value is JSON serializable
|
|
315
|
+
json.dumps(value)
|
|
316
|
+
serializable_history[key] = value
|
|
317
|
+
except (TypeError, ValueError):
|
|
318
|
+
# Convert to string if not serializable
|
|
319
|
+
serializable_history[key] = str(value)
|
|
320
|
+
save_data.update(serializable_history)
|
|
321
|
+
|
|
322
|
+
# Save as JSON
|
|
323
|
+
params_json = json.dumps(save_data, indent=2)
|
|
324
|
+
metadata_group.attrs["parameters"] = params_json
|
|
295
325
|
|
|
296
326
|
# Store lib and lib_match - removed (no longer saving lib data)
|
|
297
327
|
|
|
@@ -1072,6 +1102,14 @@ def _load_sample5(self, filename: str, map: bool = False):
|
|
|
1072
1102
|
# set self.label to basename without extension
|
|
1073
1103
|
if self.label is None or self.label == "":
|
|
1074
1104
|
self.label = os.path.splitext(os.path.basename(filename))[0]
|
|
1105
|
+
|
|
1106
|
+
# Sync instance attributes from loaded parameters
|
|
1107
|
+
if hasattr(self, 'parameters') and self.parameters is not None:
|
|
1108
|
+
if hasattr(self.parameters, 'polarity') and self.parameters.polarity is not None:
|
|
1109
|
+
self.polarity = self.parameters.polarity
|
|
1110
|
+
if hasattr(self.parameters, 'type') and self.parameters.type is not None:
|
|
1111
|
+
self.type = self.parameters.type
|
|
1112
|
+
|
|
1075
1113
|
self.logger.info(f"Sample loaded from {filename}")
|
|
1076
1114
|
|
|
1077
1115
|
|
|
@@ -1810,6 +1848,14 @@ def _load_sample5_study(self, filename: str, map: bool = False):
|
|
|
1810
1848
|
# set self.label to basename without extension
|
|
1811
1849
|
if self.label is None or self.label == "":
|
|
1812
1850
|
self.label = os.path.splitext(os.path.basename(filename))[0]
|
|
1851
|
+
|
|
1852
|
+
# Sync instance attributes from loaded parameters
|
|
1853
|
+
if hasattr(self, 'parameters') and self.parameters is not None:
|
|
1854
|
+
if hasattr(self.parameters, 'polarity') and self.parameters.polarity is not None:
|
|
1855
|
+
self.polarity = self.parameters.polarity
|
|
1856
|
+
if hasattr(self.parameters, 'type') and self.parameters.type is not None:
|
|
1857
|
+
self.type = self.parameters.type
|
|
1858
|
+
|
|
1813
1859
|
self.logger.info(
|
|
1814
1860
|
f"Sample loaded successfully from {filename} (optimized for study)",
|
|
1815
1861
|
)
|
|
@@ -71,7 +71,7 @@ def load_lib(self, *args, **kwargs):
|
|
|
71
71
|
lib_load(self, *args, **kwargs)
|
|
72
72
|
|
|
73
73
|
|
|
74
|
-
def lib_load(self, csvfile=None, polarity=
|
|
74
|
+
def lib_load(self, csvfile=None, polarity=None):
|
|
75
75
|
delta_m = {
|
|
76
76
|
"[M+H]+": 1.007276,
|
|
77
77
|
"[M+Na]+": 22.989218,
|
|
@@ -97,10 +97,11 @@ def lib_load(self, csvfile=None, polarity="positive"):
|
|
|
97
97
|
"""
|
|
98
98
|
Load target compounds from a CSV file.
|
|
99
99
|
This method reads a CSV file containing target compounds and their properties, such as m/z, retention time (RT),
|
|
100
|
-
and adducts. It filters the targets based on the specified
|
|
100
|
+
and adducts. It filters the targets based on the specified polarity and returns a DataFrame of the targets.
|
|
101
101
|
Parameters:
|
|
102
102
|
csvfile (str): The path to the CSV file containing target compounds.
|
|
103
|
-
|
|
103
|
+
polarity (str, optional): Ion polarity to filter adducts ('positive' or 'negative').
|
|
104
|
+
If None, uses the sample's polarity property. Default is None.
|
|
104
105
|
Returns:
|
|
105
106
|
pd.DataFrame: A DataFrame containing the filtered target compounds with columns 'mz', 'rt', 'adduct'.
|
|
106
107
|
"""
|
|
@@ -220,6 +221,11 @@ def lib_load(self, csvfile=None, polarity="positive"):
|
|
|
220
221
|
self.lib = self.lib.where(pd.notnull(self.lib), None)
|
|
221
222
|
# find all elements == nan and replace them with None
|
|
222
223
|
self.lib = self.lib.replace({np.nan: None})
|
|
224
|
+
|
|
225
|
+
# Use sample.polarity if polarity parameter is None
|
|
226
|
+
if polarity is None:
|
|
227
|
+
polarity = getattr(self, 'polarity', 'positive')
|
|
228
|
+
|
|
223
229
|
if polarity is not None:
|
|
224
230
|
if polarity.lower() == "positive":
|
|
225
231
|
self.lib = self.lib[self.lib["z"] > 0]
|
|
@@ -73,7 +73,7 @@ def load(
|
|
|
73
73
|
filename (str): The path to the file to load. The file must exist and have one of the following extensions:
|
|
74
74
|
.mzML, .wiff, or .raw.
|
|
75
75
|
ondisk (bool, optional): Indicates whether the file should be treated as on disk. Defaults to False.
|
|
76
|
-
type (str, optional): Specifies the type of file. If provided and set to 'ztscan' (case-insensitive), the
|
|
76
|
+
type (str, optional): Specifies the type of file. If provided and set to 'ztscan' (case-insensitive), the type
|
|
77
77
|
attribute will be adjusted accordingly. Defaults to None.
|
|
78
78
|
label (Any, optional): An optional label to associate with the loaded file. Defaults to None.
|
|
79
79
|
Raises:
|
|
@@ -84,7 +84,7 @@ def load(
|
|
|
84
84
|
- ".mzml": Calls _load_mzML(filename)
|
|
85
85
|
- ".wiff": Calls _load_wiff(filename)
|
|
86
86
|
- ".raw": Calls _load_raw(filename)
|
|
87
|
-
After loading, the
|
|
87
|
+
After loading, the type attribute is set to 'dda', unless the optional 'type' parameter is provided as 'ztscan',
|
|
88
88
|
in which case it is updated to 'ztscan'. The label attribute is updated if a label is provided.
|
|
89
89
|
"""
|
|
90
90
|
|
|
@@ -109,9 +109,9 @@ def load(
|
|
|
109
109
|
else:
|
|
110
110
|
raise ValueError("File must be .mzML, .wiff, *.raw, or .sample5")
|
|
111
111
|
|
|
112
|
-
self.
|
|
112
|
+
self.type = "dda"
|
|
113
113
|
if type is not None and type.lower() in ["ztscan"]:
|
|
114
|
-
self.
|
|
114
|
+
self.type = "ztscan"
|
|
115
115
|
|
|
116
116
|
if label is not None:
|
|
117
117
|
self.label = label
|
|
@@ -167,9 +167,9 @@ def load_noms1(
|
|
|
167
167
|
else:
|
|
168
168
|
raise ValueError("File must be .mzML, .wiff, *.raw, or .sample5")
|
|
169
169
|
|
|
170
|
-
self.
|
|
170
|
+
self.type = "dda"
|
|
171
171
|
if type is not None and type.lower() in ["ztscan"]:
|
|
172
|
-
self.
|
|
172
|
+
self.type = "ztscan"
|
|
173
173
|
|
|
174
174
|
if label is not None:
|
|
175
175
|
self.label = label
|
|
@@ -255,6 +255,7 @@ def _load_mzML(
|
|
|
255
255
|
)
|
|
256
256
|
|
|
257
257
|
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
258
|
+
polarity = None
|
|
258
259
|
# iterate over all spectra
|
|
259
260
|
for i, s in tqdm(
|
|
260
261
|
enumerate(omsexp.getSpectra()), # type: ignore[union-attr]
|
|
@@ -262,23 +263,36 @@ def _load_mzML(
|
|
|
262
263
|
desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Scans",
|
|
263
264
|
disable=tdqm_disable,
|
|
264
265
|
):
|
|
266
|
+
# try to get polarity
|
|
267
|
+
if polarity is None:
|
|
268
|
+
try:
|
|
269
|
+
pol = s.getInstrumentSettings().getPolarity()
|
|
270
|
+
if pol == 1:
|
|
271
|
+
polarity = "positive"
|
|
272
|
+
elif pol == 2:
|
|
273
|
+
polarity = "negative"
|
|
274
|
+
except Exception:
|
|
275
|
+
pass
|
|
265
276
|
# create a dict
|
|
266
277
|
if s.getMSLevel() == 1:
|
|
267
278
|
cycle += 1
|
|
268
279
|
prec_mz = None
|
|
269
280
|
precursorIsolationWindowLowerMZ = None
|
|
270
281
|
precursorIsolationWindowUpperMZ = None
|
|
271
|
-
|
|
282
|
+
prec_inty = None
|
|
272
283
|
energy = None
|
|
273
284
|
else:
|
|
274
|
-
prec_mz = s.getPrecursors()
|
|
285
|
+
prec_mz = s.getPrecursors()
|
|
286
|
+
if len(prec_mz) == 0:
|
|
287
|
+
continue
|
|
288
|
+
prec_mz = prec_mz[0].getMZ()
|
|
275
289
|
precursorIsolationWindowLowerMZ = s.getPrecursors()[
|
|
276
290
|
0
|
|
277
291
|
].getIsolationWindowLowerOffset()
|
|
278
292
|
precursorIsolationWindowUpperMZ = s.getPrecursors()[
|
|
279
293
|
0
|
|
280
294
|
].getIsolationWindowUpperOffset()
|
|
281
|
-
|
|
295
|
+
prec_inty = s.getPrecursors()[0].getIntensity()
|
|
282
296
|
# Try to get collision energy from meta values first, fallback to getActivationEnergy()
|
|
283
297
|
try:
|
|
284
298
|
energy = s.getPrecursors()[0].getMetaValue("collision energy")
|
|
@@ -321,7 +335,7 @@ def _load_mzML(
|
|
|
321
335
|
"prec_mz": prec_mz,
|
|
322
336
|
"prec_mz_min": precursorIsolationWindowLowerMZ,
|
|
323
337
|
"prec_mz_max": precursorIsolationWindowUpperMZ,
|
|
324
|
-
"prec_inty":
|
|
338
|
+
"prec_inty": prec_inty,
|
|
325
339
|
"energy": energy,
|
|
326
340
|
"feature_uid": -1,
|
|
327
341
|
}
|
|
@@ -367,10 +381,11 @@ def _load_mzML(
|
|
|
367
381
|
},
|
|
368
382
|
infer_schema_length=None,
|
|
369
383
|
)
|
|
384
|
+
self.polarity = polarity
|
|
370
385
|
self.file_interface = "oms"
|
|
371
386
|
self.ms1_df = ms1_df
|
|
372
387
|
self.label = os.path.basename(filename)
|
|
373
|
-
if self.
|
|
388
|
+
if self.type != "ztscan":
|
|
374
389
|
self.analyze_dda()
|
|
375
390
|
|
|
376
391
|
|
|
@@ -401,7 +416,8 @@ def _load_raw(
|
|
|
401
416
|
- Updates instance attributes including self.file_path, self.file_obj, self.file_interface, and self.label.
|
|
402
417
|
- Initiates further analysis by invoking analyze_dda().
|
|
403
418
|
"""
|
|
404
|
-
from alpharaw.thermo import ThermoRawData
|
|
419
|
+
#from alpharaw.thermo import ThermoRawData
|
|
420
|
+
from masster.sample.thermo import ThermoRawData
|
|
405
421
|
|
|
406
422
|
if not filename:
|
|
407
423
|
raise ValueError("Filename must be provided.")
|
|
@@ -464,6 +480,13 @@ def _load_raw(
|
|
|
464
480
|
prec_intyensity = None
|
|
465
481
|
energy = s["nce"]
|
|
466
482
|
|
|
483
|
+
# try to get polarity
|
|
484
|
+
if self.polarity is None:
|
|
485
|
+
if s['polarity'] == 'positive':
|
|
486
|
+
self.polarity = 'positive'
|
|
487
|
+
elif s['polarity'] == 'negative':
|
|
488
|
+
self.polarity = 'negative'
|
|
489
|
+
|
|
467
490
|
peak_start_idx = s["peak_start_idx"]
|
|
468
491
|
peak_stop_idx = s["peak_stop_idx"]
|
|
469
492
|
peaks = raw_data.peak_df.loc[peak_start_idx : peak_stop_idx - 1]
|
|
@@ -564,16 +587,10 @@ def _load_wiff(
|
|
|
564
587
|
self,
|
|
565
588
|
filename=None,
|
|
566
589
|
):
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
from masster.sample.sciex import SciexWiffData as MassterSciexWiffData
|
|
590
|
+
# Use masster's own implementation first
|
|
591
|
+
from masster.sample.sciex import SciexWiffData as MassterSciexWiffData
|
|
570
592
|
|
|
571
|
-
|
|
572
|
-
except ImportError:
|
|
573
|
-
# Fallback to alpharaw if masster implementation fails
|
|
574
|
-
from alpharaw.sciex import SciexWiffData as AlpharawSciexWiffData
|
|
575
|
-
|
|
576
|
-
SciexWiffDataClass = AlpharawSciexWiffData
|
|
593
|
+
SciexWiffDataClass = MassterSciexWiffData
|
|
577
594
|
|
|
578
595
|
if not filename:
|
|
579
596
|
raise ValueError("Filename must be provided.")
|
|
@@ -610,7 +627,7 @@ def _load_wiff(
|
|
|
610
627
|
"mz": pl.Float64,
|
|
611
628
|
"inty": pl.Float64,
|
|
612
629
|
}
|
|
613
|
-
|
|
630
|
+
polarity = None
|
|
614
631
|
# iterate over rows of specs
|
|
615
632
|
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
616
633
|
for i, s in tqdm(
|
|
@@ -620,6 +637,13 @@ def _load_wiff(
|
|
|
620
637
|
disable=tdqm_disable,
|
|
621
638
|
):
|
|
622
639
|
ms_level = s["ms_level"]
|
|
640
|
+
# try to get polarity
|
|
641
|
+
if polarity is None:
|
|
642
|
+
if s['polarity'] == 'positive':
|
|
643
|
+
polarity = 'positive'
|
|
644
|
+
elif s['polarity'] == 'negative':
|
|
645
|
+
polarity = 'negative'
|
|
646
|
+
|
|
623
647
|
if ms_level == 1:
|
|
624
648
|
cycle += 1
|
|
625
649
|
prec_mz = None
|
|
@@ -723,7 +747,7 @@ def _load_wiff(
|
|
|
723
747
|
self.file_interface = "alpharaw"
|
|
724
748
|
self.label = os.path.basename(filename)
|
|
725
749
|
self.ms1_df = pl.DataFrame(ms1_df_records, schema=schema)
|
|
726
|
-
if self.
|
|
750
|
+
if self.type != "ztscan":
|
|
727
751
|
self.analyze_dda()
|
|
728
752
|
|
|
729
753
|
|
|
@@ -750,103 +774,6 @@ def _load_featureXML(
|
|
|
750
774
|
fm = oms.FeatureMap()
|
|
751
775
|
fh.load(filename, fm)
|
|
752
776
|
self._oms_features_map = fm
|
|
753
|
-
"""if self.features_df is None:
|
|
754
|
-
df = self._oms_features_map.get_df(export_peptide_identifications=False)
|
|
755
|
-
df = self._clean_features_df(df)
|
|
756
|
-
|
|
757
|
-
# desotope features
|
|
758
|
-
df = self._features_deisotope(df, mz_tol=0.02, rt_tol=0.5)
|
|
759
|
-
|
|
760
|
-
# update eic
|
|
761
|
-
df["chrom"] = None
|
|
762
|
-
mz_tol = 0.01
|
|
763
|
-
rt_tol = 10
|
|
764
|
-
# iterate over all rows in df
|
|
765
|
-
for i, row in df.iterrows():
|
|
766
|
-
# select data in ms1_df with mz in range [mz_start - mz_tol, mz_end + mz_tol] and rt in range [rt_start - rt_tol, rt_end + rt_tol]
|
|
767
|
-
d = self.ms1_df.filter(
|
|
768
|
-
(pl.col("rt") >= row["rt_start"] - rt_tol)
|
|
769
|
-
& (pl.col("rt") <= row["rt_end"] + rt_tol)
|
|
770
|
-
& (pl.col("mz") >= row["mz"] - mz_tol)
|
|
771
|
-
& (pl.col("mz") <= row["mz"] + mz_tol)
|
|
772
|
-
)
|
|
773
|
-
# for all unique rt values, find the maximum inty
|
|
774
|
-
eic_rt = d.group_by("rt").agg(pl.col("inty").max())
|
|
775
|
-
if len(eic_rt) < 4:
|
|
776
|
-
continue
|
|
777
|
-
eic = Chromatogram(
|
|
778
|
-
eic_rt["rt"].to_numpy(),
|
|
779
|
-
eic_rt["inty"].to_numpy(),
|
|
780
|
-
label=f"EIC mz={row['mz']:.4f}",
|
|
781
|
-
feature_start=row["rt_start"],
|
|
782
|
-
feature_end=row["rt_end"],
|
|
783
|
-
feature_apex=row["rt"],
|
|
784
|
-
).find_peaks()
|
|
785
|
-
# set eic in df
|
|
786
|
-
df.at[i, "chrom"] = eic
|
|
787
|
-
if len(eic.peak_widths) > 0:
|
|
788
|
-
df.at[i, "chrom_coherence"] = round(eic.feature_coherence, 3)
|
|
789
|
-
df.at[i, "chrom_prominence"] = round(
|
|
790
|
-
eic.peak_prominences[0], 3
|
|
791
|
-
) # eic.peak_prominences[0]
|
|
792
|
-
df.at[i, "chrom_prominence_scaled"] = round(
|
|
793
|
-
eic.peak_prominences[0] / (np.mean(eic.inty) + 1e-10), 3
|
|
794
|
-
)
|
|
795
|
-
df.at[i, "chrom_height_scaled"] = round(
|
|
796
|
-
eic.peak_heights[0] / (np.mean(eic.inty) + 1e-10), 3
|
|
797
|
-
)
|
|
798
|
-
|
|
799
|
-
self.features_df = df"""
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
'''
|
|
803
|
-
def _load_mzpkl(
|
|
804
|
-
self,
|
|
805
|
-
filename="sample.mzpkl",
|
|
806
|
-
ondisk=False,
|
|
807
|
-
):
|
|
808
|
-
"""
|
|
809
|
-
Load the mzpkl data file, initialize the experiment attributes, and set up the file object.
|
|
810
|
-
Parameters:
|
|
811
|
-
filename (str, optional): The path to the .mzpkl file to be loaded. Defaults to "data.mzpkl".
|
|
812
|
-
ondisk (bool, optional): A flag indicating whether the data should be loaded for on-disk usage.
|
|
813
|
-
If True, self.ondisk is set to True and an OnDiscMSExperiment is used.
|
|
814
|
-
Otherwise, an MSExperiment is used.
|
|
815
|
-
Side Effects:
|
|
816
|
-
- Decompresses and unpickles the specified file.
|
|
817
|
-
- Sets attributes on self for each key in the loaded data dictionary, except for keys named 'format'.
|
|
818
|
-
- Renames the attribute 'spectra_df' to 'scans_df' if present.
|
|
819
|
-
- Initializes self.file_obj as either an OnDiscMSExperiment or MSExperiment based on the ondisk flag.
|
|
820
|
-
- Checks for an associated featureXML file (with the same base name as the input file) and loads it if found.
|
|
821
|
-
"""
|
|
822
|
-
|
|
823
|
-
if ondisk is True:
|
|
824
|
-
self.ondisk = True
|
|
825
|
-
|
|
826
|
-
with bz2.BZ2File(filename, "rb") as f:
|
|
827
|
-
data = pickle.load(f)
|
|
828
|
-
|
|
829
|
-
for k, v in data.items():
|
|
830
|
-
if k in ["format"]:
|
|
831
|
-
continue
|
|
832
|
-
if k == "spectra_df":
|
|
833
|
-
k = "scans_df"
|
|
834
|
-
setattr(self, k, v)
|
|
835
|
-
|
|
836
|
-
# Import and call internal sanitize function
|
|
837
|
-
from masster.study.load import _sanitize
|
|
838
|
-
_sanitize(self) if self.ondisk:
|
|
839
|
-
self.file_obj = oms.OnDiscMSExperiment()
|
|
840
|
-
else:
|
|
841
|
-
self.file_obj = oms.MSExperiment()
|
|
842
|
-
|
|
843
|
-
# check if *.featureXML exists
|
|
844
|
-
featureXML = filename.replace(".mzpkl", ".featureXML")
|
|
845
|
-
if os.path.exists(featureXML):
|
|
846
|
-
self._load_featureXML(featureXML)
|
|
847
|
-
|
|
848
|
-
'''
|
|
849
|
-
|
|
850
777
|
|
|
851
778
|
def _wiff_to_dict(
|
|
852
779
|
filename=None,
|
|
@@ -248,7 +248,7 @@ def get_spectrum(self, scan, **kwargs):
|
|
|
248
248
|
spect = spect.keep_top(max_peaks)
|
|
249
249
|
|
|
250
250
|
if dia_stats:
|
|
251
|
-
if self.
|
|
251
|
+
if self.type in ["ztscan", "dia", "swath"]:
|
|
252
252
|
spect = self._get_ztscan_stats(
|
|
253
253
|
spec=spect,
|
|
254
254
|
scan_uid=scan_uid,
|
|
@@ -240,12 +240,13 @@ class Sample:
|
|
|
240
240
|
# Initialize label from parameters
|
|
241
241
|
self.label = params.label
|
|
242
242
|
|
|
243
|
+
self.type = params.type # dda, dia, ztscan
|
|
244
|
+
self.polarity = params.polarity # Initialize from parameters, may be overridden during raw file loading
|
|
245
|
+
|
|
243
246
|
# this is the path to the original file. It's never sample5
|
|
244
247
|
self.file_source = None
|
|
245
248
|
# this is the path to the object that was loaded. It could be sample5
|
|
246
249
|
self.file_path = None
|
|
247
|
-
# Type of the file (e.g., mzML, RAW, WIFF, mzpkl)
|
|
248
|
-
self.file_type = None
|
|
249
250
|
# Interface to handle the file operations (e.g., oms, alpharaw)
|
|
250
251
|
self.file_interface = None
|
|
251
252
|
# The file object once loaded, can be oms.MzMLFile or alpharaw.AlphaRawFile
|
|
@@ -327,7 +328,6 @@ class Sample:
|
|
|
327
328
|
_save_sample5 = _save_sample5
|
|
328
329
|
_load_sample5 = _load_sample5
|
|
329
330
|
|
|
330
|
-
|
|
331
331
|
# Removed internal-only load methods: _load_featureXML, _load_ms2data, _load_mzML, _load_raw, _load_wiff
|
|
332
332
|
chrom_extract = chrom_extract
|
|
333
333
|
_index_file = _index_file # Renamed from index_file to be internal-only
|
|
@@ -503,6 +503,8 @@ class Sample:
|
|
|
503
503
|
str = f"File: {os.path.basename(self.file_path)}\n"
|
|
504
504
|
str += f"Path: {os.path.dirname(self.file_path)}\n"
|
|
505
505
|
str += f"Source: {self.file_source}\n"
|
|
506
|
+
str += f"Type: {self.type}\n"
|
|
507
|
+
str += f"Polarity: {self.polarity}\n"
|
|
506
508
|
str += f"MS1 scans: {len(self.scans_df.filter(pl.col('ms_level') == 1))}\n"
|
|
507
509
|
str += f"MS2 scans: {len(self.scans_df.filter(pl.col('ms_level') == 2))}\n"
|
|
508
510
|
if self.features_df is not None:
|