masster 0.4.22__tar.gz → 0.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- {masster-0.4.22 → masster-0.5.1}/PKG-INFO +54 -14
- masster-0.5.1/README.md +101 -0
- {masster-0.4.22 → masster-0.5.1}/pyproject.toml +3 -1
- {masster-0.4.22 → masster-0.5.1}/src/masster/_version.py +1 -1
- masster-0.5.1/src/masster/data/libs/aa.csv +22 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/lib/lib.py +6 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/adducts.py +1 -1
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/load.py +10 -9
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/plot.py +1 -1
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/processing.py +4 -4
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/sample.py +29 -32
- masster-0.5.1/src/masster/study/analysis.py +1762 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/fill_def.py +1 -1
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/export.py +5 -3
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/h5.py +3 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/helpers.py +153 -80
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/id.py +545 -4
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/load.py +33 -59
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/merge.py +413 -315
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/parameters.py +3 -3
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/plot.py +398 -43
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/processing.py +6 -14
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/save.py +8 -4
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/study.py +179 -139
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/study5_schema.json +9 -0
- {masster-0.4.22 → masster-0.5.1}/uv.lock +57 -1
- masster-0.4.22/README.md +0 -63
- {masster-0.4.22 → masster-0.5.1}/.github/workflows/publish.yml +0 -0
- {masster-0.4.22 → masster-0.5.1}/.github/workflows/security.yml +0 -0
- {masster-0.4.22 → masster-0.5.1}/.github/workflows/test.yml +0 -0
- {masster-0.4.22 → masster-0.5.1}/.gitignore +0 -0
- {masster-0.4.22 → masster-0.5.1}/.pre-commit-config.yaml +0 -0
- {masster-0.4.22 → masster-0.5.1}/LICENSE +0 -0
- {masster-0.4.22 → masster-0.5.1}/Makefile +0 -0
- {masster-0.4.22 → masster-0.5.1}/TESTING.md +0 -0
- {masster-0.4.22 → masster-0.5.1}/demo/example_batch_process.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/demo/example_sample_process.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/__init__.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/chromatogram.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/data/libs/ccm.csv +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/data/libs/urine.csv +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/lib/__init__.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/logger.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/__init__.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/defaults/__init__.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/defaults/find_adducts_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/defaults/find_features_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/defaults/find_ms2_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/defaults/sample_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/h5.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/helpers.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/lib.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/parameters.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/quant.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/sample5_schema.json +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/save.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/sample/sciex.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/spectrum.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/__init__.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/__init__.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/align_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/export_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/fill_chrom_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/find_consensus_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/find_ms2_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/identify_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/integrate_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/merge_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/study/defaults/study_def.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/wizard/README.md +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/wizard/__init__.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/wizard/example.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/src/masster/wizard/wizard.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tests/conftest.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tests/test_chromatogram.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tests/test_defaults.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tests/test_imports.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tests/test_integration.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tests/test_logger.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tests/test_parameters.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tests/test_sample.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tests/test_spectrum.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tests/test_study.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tests/test_version.py +0 -0
- {masster-0.4.22 → masster-0.5.1}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: masster
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: Mass spectrometry data analysis package
|
|
5
5
|
Project-URL: homepage, https://github.com/zamboni-lab/masster
|
|
6
6
|
Project-URL: repository, https://github.com/zamboni-lab/masster
|
|
@@ -686,6 +686,7 @@ Requires-Dist: bokeh>=3.7.3
|
|
|
686
686
|
Requires-Dist: cmap>=0.6.2
|
|
687
687
|
Requires-Dist: datashader>=0.18.1
|
|
688
688
|
Requires-Dist: h5py>=3.14.0
|
|
689
|
+
Requires-Dist: hdbscan>=0.8.40
|
|
689
690
|
Requires-Dist: holoviews>=1.21.0
|
|
690
691
|
Requires-Dist: hvplot>=0.11.3
|
|
691
692
|
Requires-Dist: loguru>=0.7.3
|
|
@@ -701,6 +702,7 @@ Requires-Dist: pythonnet>=3.0.0
|
|
|
701
702
|
Requires-Dist: scikit-learn>=1.7.1
|
|
702
703
|
Requires-Dist: scipy>=1.12.0
|
|
703
704
|
Requires-Dist: tqdm>=4.65.0
|
|
705
|
+
Requires-Dist: umap-learn>=0.5.9.post2
|
|
704
706
|
Provides-Extra: dev
|
|
705
707
|
Requires-Dist: bandit>=1.7.0; extra == 'dev'
|
|
706
708
|
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
@@ -726,12 +728,13 @@ Description-Content-Type: text/markdown
|
|
|
726
728
|
|
|
727
729
|
# MASSter
|
|
728
730
|
|
|
729
|
-
**MASSter** is a
|
|
731
|
+
**MASSter** is a Python package for the analysis of mass spectrometry data, tailored for the purpose of metabolomics and LC-MS data processing. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS. All other functionalities for e.g. centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc. were redesigned and engineered to maximize scalability (tested with 3000 LC-MS), speed, quality, and results.
|
|
730
732
|
|
|
731
733
|
This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab.
|
|
732
734
|
|
|
733
|
-
|
|
735
|
+
## Prerequisites
|
|
734
736
|
|
|
737
|
+
**MASSter** reads raw (Thermo), wiff (SCIEX), or mzML data. It's recommended to provide raw, profile data.
|
|
735
738
|
|
|
736
739
|
## Installation
|
|
737
740
|
|
|
@@ -739,20 +742,60 @@ Some of the core processing functions are derived from OpenMS. We use the same n
|
|
|
739
742
|
pip install masster
|
|
740
743
|
```
|
|
741
744
|
|
|
742
|
-
|
|
745
|
+
## Basic usage
|
|
746
|
+
### Quick start: use the wizard
|
|
747
|
+
|
|
748
|
+
```python
|
|
749
|
+
import masster
|
|
750
|
+
masster.wizard.execute(
|
|
751
|
+
source=r'..\..\folder_with_raw_data',
|
|
752
|
+
folder=r'..\..folder_to_store_results'
|
|
753
|
+
)
|
|
754
|
+
```
|
|
743
755
|
|
|
756
|
+
This will run a wizard that should perform all key steps and save the results to the `folder`.
|
|
757
|
+
|
|
758
|
+
### Basic workflow for analyzing a single sample
|
|
744
759
|
```python
|
|
745
760
|
import masster
|
|
761
|
+
sample = masster.Sample(filename='...') # full path to a *.raw, *.wiff, or *.mzML file
|
|
762
|
+
# process
|
|
763
|
+
sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
|
|
764
|
+
sample.find_adducts()
|
|
765
|
+
sample.find_ms2()
|
|
766
|
+
|
|
767
|
+
# access data
|
|
768
|
+
sample.features_df
|
|
769
|
+
|
|
770
|
+
# save results
|
|
771
|
+
sample.save() # stores to *.sample5, our custom hdf5 format
|
|
772
|
+
sample.export_mgf()
|
|
773
|
+
|
|
774
|
+
# some plots
|
|
775
|
+
sample.plot_bpc()
|
|
776
|
+
sample.plot_tic()
|
|
777
|
+
sample.plot_2d()
|
|
778
|
+
sample.plot_features_stats()
|
|
779
|
+
|
|
780
|
+
# explore methods
|
|
781
|
+
dir(study)
|
|
782
|
+
```
|
|
783
|
+
|
|
784
|
+
### Basic Workflow for analyzing LC-MS study with 2-... samples
|
|
746
785
|
|
|
786
|
+
```python
|
|
787
|
+
import masster
|
|
747
788
|
# Initialize the Study object with the default folder
|
|
748
|
-
study = masster.Study(
|
|
789
|
+
study = masster.Study(folder=r'D:\...\mylcms')
|
|
749
790
|
|
|
750
791
|
# Load data from folder with raw data, here: WIFF
|
|
751
792
|
study.add(r'D:\...\...\...\*.wiff')
|
|
752
793
|
|
|
753
794
|
# Perform retention time correction
|
|
754
|
-
study.align(
|
|
795
|
+
study.align(rt_tol=2.0)
|
|
755
796
|
study.plot_alignment()
|
|
797
|
+
study.plot_bpc()
|
|
798
|
+
study.plot_rt_correction()
|
|
756
799
|
|
|
757
800
|
# Find consensus features
|
|
758
801
|
study.merge(min_samples=3)
|
|
@@ -772,18 +815,15 @@ study.export_parquet()
|
|
|
772
815
|
|
|
773
816
|
# Save the study to .study5
|
|
774
817
|
study.save()
|
|
775
|
-
```
|
|
776
|
-
|
|
777
|
-
## Requirements
|
|
778
818
|
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
819
|
+
# Some of the plots...
|
|
820
|
+
study.plot_samples_pca()
|
|
821
|
+
study.plot_samples_umap()
|
|
822
|
+
study.plot_samples_2d()
|
|
823
|
+
```
|
|
782
824
|
|
|
783
825
|
## License
|
|
784
|
-
|
|
785
826
|
GNU Affero General Public License v3
|
|
786
827
|
|
|
787
828
|
## Citation
|
|
788
|
-
|
|
789
829
|
If you use Masster in your research, please cite this repository.
|
masster-0.5.1/README.md
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# MASSter
|
|
2
|
+
|
|
3
|
+
**MASSter** is a Python package for the analysis of mass spectrometry data, tailored for the purpose of metabolomics and LC-MS data processing. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS. All other functionalities for e.g. centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc. were redesigned and engineered to maximize scalability (tested with 3000 LC-MS), speed, quality, and results.
|
|
4
|
+
|
|
5
|
+
This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab.
|
|
6
|
+
|
|
7
|
+
## Prerequisites
|
|
8
|
+
|
|
9
|
+
**MASSter** reads raw (Thermo), wiff (SCIEX), or mzML data. It's recommended to provide raw, profile data.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install masster
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Basic usage
|
|
18
|
+
### Quick start: use the wizard
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
import masster
|
|
22
|
+
masster.wizard.execute(
|
|
23
|
+
source=r'..\..\folder_with_raw_data',
|
|
24
|
+
folder=r'..\..folder_to_store_results'
|
|
25
|
+
)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
This will run a wizard that should perform all key steps and save the results to the `folder`.
|
|
29
|
+
|
|
30
|
+
### Basic workflow for analyzing a single sample
|
|
31
|
+
```python
|
|
32
|
+
import masster
|
|
33
|
+
sample = masster.Sample(filename='...') # full path to a *.raw, *.wiff, or *.mzML file
|
|
34
|
+
# process
|
|
35
|
+
sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
|
|
36
|
+
sample.find_adducts()
|
|
37
|
+
sample.find_ms2()
|
|
38
|
+
|
|
39
|
+
# access data
|
|
40
|
+
sample.features_df
|
|
41
|
+
|
|
42
|
+
# save results
|
|
43
|
+
sample.save() # stores to *.sample5, our custom hdf5 format
|
|
44
|
+
sample.export_mgf()
|
|
45
|
+
|
|
46
|
+
# some plots
|
|
47
|
+
sample.plot_bpc()
|
|
48
|
+
sample.plot_tic()
|
|
49
|
+
sample.plot_2d()
|
|
50
|
+
sample.plot_features_stats()
|
|
51
|
+
|
|
52
|
+
# explore methods
|
|
53
|
+
dir(study)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Basic Workflow for analyzing LC-MS study with 2-... samples
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
import masster
|
|
60
|
+
# Initialize the Study object with the default folder
|
|
61
|
+
study = masster.Study(folder=r'D:\...\mylcms')
|
|
62
|
+
|
|
63
|
+
# Load data from folder with raw data, here: WIFF
|
|
64
|
+
study.add(r'D:\...\...\...\*.wiff')
|
|
65
|
+
|
|
66
|
+
# Perform retention time correction
|
|
67
|
+
study.align(rt_tol=2.0)
|
|
68
|
+
study.plot_alignment()
|
|
69
|
+
study.plot_bpc()
|
|
70
|
+
study.plot_rt_correction()
|
|
71
|
+
|
|
72
|
+
# Find consensus features
|
|
73
|
+
study.merge(min_samples=3)
|
|
74
|
+
study.plot_consensus_2d()
|
|
75
|
+
|
|
76
|
+
# Retrieve missing data for quantification
|
|
77
|
+
study.fill()
|
|
78
|
+
|
|
79
|
+
# Integrate according to consensus metadata
|
|
80
|
+
study.integrate()
|
|
81
|
+
|
|
82
|
+
# export results
|
|
83
|
+
study.export_mgf()
|
|
84
|
+
study.export_mztab()
|
|
85
|
+
study.export_xlsx()
|
|
86
|
+
study.export_parquet()
|
|
87
|
+
|
|
88
|
+
# Save the study to .study5
|
|
89
|
+
study.save()
|
|
90
|
+
|
|
91
|
+
# Some of the plots...
|
|
92
|
+
study.plot_samples_pca()
|
|
93
|
+
study.plot_samples_umap()
|
|
94
|
+
study.plot_samples_2d()
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## License
|
|
98
|
+
GNU Affero General Public License v3
|
|
99
|
+
|
|
100
|
+
## Citation
|
|
101
|
+
If you use Masster in your research, please cite this repository.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
[project]
|
|
3
3
|
name = "masster"
|
|
4
|
-
version = "0.
|
|
4
|
+
version = "0.5.1"
|
|
5
5
|
description = "Mass spectrometry data analysis package"
|
|
6
6
|
authors = [
|
|
7
7
|
{ name = "Zamboni Lab" }
|
|
@@ -47,6 +47,8 @@ dependencies = [
|
|
|
47
47
|
"openpyxl>=3.1.5",
|
|
48
48
|
"cmap>=0.6.2",
|
|
49
49
|
"scikit-learn>=1.7.1",
|
|
50
|
+
"umap-learn>=0.5.9.post2",
|
|
51
|
+
"hdbscan>=0.8.40",
|
|
50
52
|
]
|
|
51
53
|
|
|
52
54
|
[project.optional-dependencies]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name,smiles,inchikey,formula,db_id,db
|
|
2
|
+
L-Glutamic acid,N[C@@H](CCC(O)=O)C(O)=O,WHUUTDBJXJRKMK-VKHMYHEASA-N,C5H9NO4,CID:33032,pubchem
|
|
3
|
+
L-Tyrosine,N[C@@H](CC1=CC=C(O)C=C1)C(O)=O,OUYCCCASQSFEME-QMMMGPOBSA-N,C9H11NO3,CID:6057,pubchem
|
|
4
|
+
L-Phenylalanine,N[C@@H](CC1=CC=CC=C1)C(O)=O,COLNVLDHVKWLRT-QMMMGPOBSA-N,C9H11NO2,CID:6140,pubchem
|
|
5
|
+
L-Alanine,C[C@H](N)C(O)=O,QNAYBMKLOCPYGJ-REOHCLBHSA-N,C3H7NO2,CID:5950,pubchem
|
|
6
|
+
L-Proline,OC(=O)[C@@H]1CCCN1,ONIBWKKTOPOVIA-BYPYZUCNSA-N,C5H9NO2,CID:145742,pubchem
|
|
7
|
+
L-Threonine,C[C@@H](O)[C@H](N)C(O)=O,AYFVYJQAPQTCCC-GBXIJSLDSA-N,C4H9NO3,CID:6288,pubchem
|
|
8
|
+
L-Asparagine,N[C@@H](CC(N)=O)C(O)=O,DCXYFEDJOCDNAF-REOHCLBHSA-N,C4H8N2O3,CID:6267,pubchem
|
|
9
|
+
L-Isoleucine,CC[C@H](C)[C@H](N)C(O)=O,AGPKZVBTJJNPAG-WHFBIAKZSA-N,C6H13NO2,CID:6306,pubchem
|
|
10
|
+
L-Histidine,N[C@@H](CC1=CN=CN1)C(O)=O,HNDVDQJCIGZPNO-YFKPBYRVSA-N,C6H9N3O2,CID:6274,pubchem
|
|
11
|
+
L-Lysine,NCCCC[C@H](N)C(O)=O,KDXKERNSBIXSRK-YFKPBYRVSA-N,C6H14N2O2,CID:5962,pubchem
|
|
12
|
+
L-Serine,N[C@@H](CO)C(O)=O,MTCFGRXMJLQNBG-REOHCLBHSA-N,C3H7NO3,CID:5951,pubchem
|
|
13
|
+
L-Aspartic acid,N[C@@H](CC(O)=O)C(O)=O,CKLJMWTZIZZHCS-REOHCLBHSA-N,C4H7NO4,CID:5960,pubchem
|
|
14
|
+
L-Cystine,N[C@@H](CSSC[C@H](N)C(O)=O)C(O)=O,LEVWYRKDKASIDU-IMJSIDKUSA-N,C6H12N2O4S2,CID:67678,pubchem
|
|
15
|
+
L-Arginine,N[C@@H](CCCNC(N)=N)C(O)=O,ODKSFYDXXFIFQN-BYPYZUCNSA-N,C6H14N4O2,CID:6322,pubchem
|
|
16
|
+
L-Cysteine,N[C@@H](CS)C(O)=O,XUJNEKJLAYXESH-REOHCLBHSA-N,C3H7NO2S,CID:5862,pubchem
|
|
17
|
+
L-Glutamine,N[C@@H](CCC(N)=O)C(O)=O,ZDXPYRJPNDTMRX-VKHMYHEASA-N,C5H10N2O3,CID:5961,pubchem
|
|
18
|
+
L-Leucine,CC(C)C[C@H](N)C(O)=O,ROHFNLRQFUQHCH-YFKPBYRVSA-N,C6H13NO2,CID:6106,pubchem
|
|
19
|
+
L-Methionine,CSCC[C@H](N)C(O)=O,FFEARJCKVFRZRR-BYPYZUCNSA-N,C5H11NO2S,CID:6137,pubchem
|
|
20
|
+
L-Valine,CC(C)[C@H](N)C(O)=O,KZSNJWFQEVHDMF-BYPYZUCNSA-N,C5H11NO2,CID:6287,pubchem
|
|
21
|
+
L-Tryptophan,N[C@@H](CC1=CNC2=C1C=CC=C2)C(O)=O,QIVBCDIJIAJPQS-VIFPVBQESA-N,C11H12N2O2,CID:6305,pubchem
|
|
22
|
+
Glycine,NCC(O)=O,QNAYBMKLOCPYGJ-UHFFFAOYSA-N,C2H5NO2,CID:750,Glycine
|
|
@@ -123,11 +123,13 @@ class Lib:
|
|
|
123
123
|
"inchi": pl.Series([], dtype=pl.Utf8),
|
|
124
124
|
"inchikey": pl.Series([], dtype=pl.Utf8),
|
|
125
125
|
"formula": pl.Series([], dtype=pl.Utf8),
|
|
126
|
+
"iso": pl.Series([], dtype=pl.Int64),
|
|
126
127
|
"adduct": pl.Series([], dtype=pl.Utf8),
|
|
127
128
|
"m": pl.Series([], dtype=pl.Float64),
|
|
128
129
|
"z": pl.Series([], dtype=pl.Int8),
|
|
129
130
|
"mz": pl.Series([], dtype=pl.Float64),
|
|
130
131
|
"rt": pl.Series([], dtype=pl.Float64),
|
|
132
|
+
"quant_group": pl.Series([], dtype=pl.Int64),
|
|
131
133
|
"db_id": pl.Series([], dtype=pl.Utf8),
|
|
132
134
|
"db": pl.Series([], dtype=pl.Utf8),
|
|
133
135
|
})
|
|
@@ -245,11 +247,13 @@ class Lib:
|
|
|
245
247
|
"inchi": compound_data.get("inchi", ""),
|
|
246
248
|
"inchikey": compound_data.get("inchikey", ""),
|
|
247
249
|
"formula": compound_data["formula"],
|
|
250
|
+
"iso": 0, # Default to zero
|
|
248
251
|
"adduct": adduct,
|
|
249
252
|
"m": adducted_mass,
|
|
250
253
|
"z": charge,
|
|
251
254
|
"mz": mz,
|
|
252
255
|
"rt": compound_data.get("rt", None),
|
|
256
|
+
"quant_group": counter, # Use same as lib_uid for default
|
|
253
257
|
"db_id": compound_data.get("db_id", None),
|
|
254
258
|
"db": compound_data.get("db", None),
|
|
255
259
|
}
|
|
@@ -526,12 +530,14 @@ class Lib:
|
|
|
526
530
|
"source_id": match_row.get("source_id"),
|
|
527
531
|
"name": match_row["name"],
|
|
528
532
|
"formula": match_row["formula"],
|
|
533
|
+
"iso": match_row.get("iso", 0),
|
|
529
534
|
"adduct": match_row["adduct"],
|
|
530
535
|
"smiles": match_row["smiles"],
|
|
531
536
|
"inchi": match_row["inchi"],
|
|
532
537
|
"inchikey": match_row["inchikey"],
|
|
533
538
|
"lib_mz": match_row["mz"],
|
|
534
539
|
"lib_rt": match_row["rt"],
|
|
540
|
+
"quant_group": match_row.get("quant_group"),
|
|
535
541
|
"delta_mz": abs(feature_mz - match_row["mz"]),
|
|
536
542
|
"delta_rt": abs(feature_rt - match_row["rt"]) if feature_rt is not None and match_row["rt"] is not None else None,
|
|
537
543
|
}
|
|
@@ -809,5 +809,5 @@ def find_adducts(self, **kwargs):
|
|
|
809
809
|
["name", "charge", "mass_shift", "probability"],
|
|
810
810
|
).to_dicts()
|
|
811
811
|
|
|
812
|
-
self.
|
|
812
|
+
self.update_history(["find_adducts"], history_params)
|
|
813
813
|
self.logger.debug("Parameters stored successfully")
|
|
@@ -173,8 +173,8 @@ def load_noms1(
|
|
|
173
173
|
self.label = label
|
|
174
174
|
|
|
175
175
|
|
|
176
|
-
#
|
|
177
|
-
def
|
|
176
|
+
# Renamed for clarity and internal use
|
|
177
|
+
def _load_ms1(
|
|
178
178
|
self,
|
|
179
179
|
filename=None,
|
|
180
180
|
ondisk=False,
|
|
@@ -182,7 +182,8 @@ def load_study(
|
|
|
182
182
|
label=None,
|
|
183
183
|
):
|
|
184
184
|
"""
|
|
185
|
-
|
|
185
|
+
Load MS1-only data (renamed from load_study for clarity).
|
|
186
|
+
Optimized version for study loading that excludes MS2 data.
|
|
186
187
|
|
|
187
188
|
This method is deprecated. Use load_noms1() instead.
|
|
188
189
|
"""
|
|
@@ -828,11 +829,11 @@ def _load_mzpkl(
|
|
|
828
829
|
continue
|
|
829
830
|
if k == "spectra_df":
|
|
830
831
|
k = "scans_df"
|
|
831
|
-
|
|
832
|
+
setattr(self, k, v)
|
|
832
833
|
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
if self.ondisk:
|
|
834
|
+
# Import and call internal sanitize function
|
|
835
|
+
from masster.study.load import _sanitize
|
|
836
|
+
_sanitize(self) if self.ondisk:
|
|
836
837
|
self.file_obj = oms.OnDiscMSExperiment()
|
|
837
838
|
else:
|
|
838
839
|
self.file_obj = oms.MSExperiment()
|
|
@@ -955,7 +956,7 @@ def sanitize(self):
|
|
|
955
956
|
self.features_df.at[_i, "ms2_specs"][_j] = new_ms2_specs
|
|
956
957
|
|
|
957
958
|
|
|
958
|
-
def
|
|
959
|
+
def _index_file(self):
|
|
959
960
|
"""
|
|
960
961
|
Reload raw data from a file based on its extension.
|
|
961
962
|
|
|
@@ -1022,7 +1023,7 @@ def index_file(self):
|
|
|
1022
1023
|
raise FileNotFoundError(
|
|
1023
1024
|
f"File {self.file_source} not found. Did the path change? Consider running source().",
|
|
1024
1025
|
)
|
|
1025
|
-
self.
|
|
1026
|
+
self._index_file()
|
|
1026
1027
|
else:
|
|
1027
1028
|
raise FileNotFoundError(
|
|
1028
1029
|
f"File {self.file_source} not found. Did the path change? Consider running source().",
|
|
@@ -799,7 +799,7 @@ def find_features(self, **kwargs):
|
|
|
799
799
|
self.logger.info(f"Feature detection completed. Total features: {len(df)}")
|
|
800
800
|
|
|
801
801
|
# store params
|
|
802
|
-
self.
|
|
802
|
+
self.update_history(["find_features"], params.to_dict())
|
|
803
803
|
self.logger.debug(
|
|
804
804
|
"Parameters stored to find_features",
|
|
805
805
|
)
|
|
@@ -1114,7 +1114,7 @@ def find_ms2(self, **kwargs):
|
|
|
1114
1114
|
c = 0
|
|
1115
1115
|
|
|
1116
1116
|
if self.file_interface is None:
|
|
1117
|
-
self.
|
|
1117
|
+
self._index_file()
|
|
1118
1118
|
|
|
1119
1119
|
# Vectorize the entire operation for better performance
|
|
1120
1120
|
features_subset = features_df.filter(pl.col("feature_uid").is_in(feature_uid_list))
|
|
@@ -1269,7 +1269,7 @@ def find_ms2(self, **kwargs):
|
|
|
1269
1269
|
self.features_df = features_df
|
|
1270
1270
|
|
|
1271
1271
|
# store params
|
|
1272
|
-
self.
|
|
1272
|
+
self.update_history(["find_ms2"], params.to_dict())
|
|
1273
1273
|
self.logger.debug(
|
|
1274
1274
|
"Parameters stored to find_ms2",
|
|
1275
1275
|
)
|
|
@@ -1430,4 +1430,4 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
|
|
|
1430
1430
|
# Store parameters in history
|
|
1431
1431
|
params_dict = {"rt_tolerance": rt_tolerance}
|
|
1432
1432
|
params_dict.update(kwargs)
|
|
1433
|
-
self.
|
|
1433
|
+
self.update_history(["find_iso"], params_dict)
|
|
@@ -47,17 +47,17 @@ from masster.sample.defaults.find_adducts_def import find_adducts_defaults
|
|
|
47
47
|
from masster.sample.defaults.find_ms2_def import find_ms2_defaults
|
|
48
48
|
from masster.sample.defaults.get_spectrum_def import get_spectrum_defaults
|
|
49
49
|
|
|
50
|
-
# Sample-specific imports
|
|
51
|
-
from masster.sample.h5 import _load_sample5
|
|
52
|
-
from masster.sample.h5 import _load_sample5_study
|
|
53
|
-
from masster.sample.h5 import _save_sample5
|
|
54
|
-
from masster.sample.helpers import _delete_ms2
|
|
50
|
+
# Sample-specific imports - keeping these private, only for internal use
|
|
51
|
+
# from masster.sample.h5 import _load_sample5
|
|
52
|
+
# from masster.sample.h5 import _load_sample5_study
|
|
53
|
+
# from masster.sample.h5 import _save_sample5
|
|
54
|
+
# from masster.sample.helpers import _delete_ms2
|
|
55
55
|
from masster.sample.helpers import _estimate_memory_usage
|
|
56
56
|
from masster.sample.helpers import _get_scan_uids
|
|
57
57
|
from masster.sample.helpers import _get_feature_uids
|
|
58
|
-
from masster.sample.helpers import _features_sync
|
|
59
|
-
from masster.sample.adducts import _get_adducts
|
|
58
|
+
# from masster.sample.helpers import _features_sync - made internal only
|
|
60
59
|
from masster.sample.adducts import find_adducts
|
|
60
|
+
# Removed _get_adducts - only used in study modules
|
|
61
61
|
from masster.sample.helpers import features_delete
|
|
62
62
|
from masster.sample.helpers import features_filter
|
|
63
63
|
from masster.sample.helpers import select
|
|
@@ -69,22 +69,23 @@ from masster.sample.helpers import get_eic
|
|
|
69
69
|
from masster.sample.helpers import set_source
|
|
70
70
|
from masster.sample.helpers import _recreate_feature_map
|
|
71
71
|
from masster.sample.helpers import _get_feature_map
|
|
72
|
-
|
|
73
|
-
from masster.sample.load import
|
|
74
|
-
from masster.sample.load import
|
|
75
|
-
from masster.sample.load import
|
|
76
|
-
from masster.sample.load import
|
|
72
|
+
# Load functions - keeping only specific ones needed for external API
|
|
73
|
+
# from masster.sample.load import _load_featureXML - made internal only
|
|
74
|
+
# from masster.sample.load import _load_ms2data - made internal only
|
|
75
|
+
# from masster.sample.load import _load_mzML - made internal only
|
|
76
|
+
# from masster.sample.load import _load_raw - made internal only
|
|
77
|
+
# from masster.sample.load import _load_wiff - made internal only
|
|
77
78
|
from masster.sample.load import chrom_extract
|
|
78
|
-
from masster.sample.load import
|
|
79
|
+
from masster.sample.load import _index_file
|
|
79
80
|
from masster.sample.load import load
|
|
80
81
|
from masster.sample.load import load_noms1
|
|
81
|
-
from masster.sample.load import load_study
|
|
82
|
+
from masster.sample.load import _load_ms1 # Renamed from load_study
|
|
82
83
|
from masster.sample.load import sanitize
|
|
83
84
|
from masster.sample.plot import plot_2d
|
|
84
85
|
from masster.sample.plot import plot_2d_oracle
|
|
85
86
|
from masster.sample.plot import plot_dda_stats
|
|
86
87
|
from masster.sample.plot import plot_chrom
|
|
87
|
-
from masster.sample.plot import plot_feature_stats
|
|
88
|
+
from masster.sample.plot import plot_features_stats # Renamed from plot_feature_stats
|
|
88
89
|
from masster.sample.plot import plot_ms2_cycle
|
|
89
90
|
from masster.sample.plot import plot_ms2_eic
|
|
90
91
|
from masster.sample.plot import plot_ms2_q1
|
|
@@ -105,12 +106,12 @@ from masster.sample.parameters import get_parameters
|
|
|
105
106
|
from masster.sample.parameters import update_parameters
|
|
106
107
|
from masster.sample.parameters import get_parameters_property
|
|
107
108
|
from masster.sample.parameters import set_parameters_property
|
|
108
|
-
from masster.sample.save import _save_featureXML
|
|
109
109
|
from masster.sample.save import export_chrom
|
|
110
110
|
from masster.sample.save import export_dda_stats
|
|
111
111
|
from masster.sample.save import export_features
|
|
112
112
|
from masster.sample.save import export_mgf
|
|
113
113
|
from masster.sample.save import save
|
|
114
|
+
# Removed internal-only import: _save_featureXML
|
|
114
115
|
|
|
115
116
|
|
|
116
117
|
class Sample:
|
|
@@ -215,7 +216,8 @@ class Sample:
|
|
|
215
216
|
# Attach module functions as class methods
|
|
216
217
|
load = load
|
|
217
218
|
load_noms1 = load_noms1
|
|
218
|
-
|
|
219
|
+
_load_ms1 = _load_ms1 # Renamed from load_study for clarity
|
|
220
|
+
load_study = _load_ms1 # Backward compatibility alias
|
|
219
221
|
save = save
|
|
220
222
|
find_features = find_features
|
|
221
223
|
find_adducts = find_adducts
|
|
@@ -230,6 +232,7 @@ class Sample:
|
|
|
230
232
|
select_features = select
|
|
231
233
|
analyze_dda = analyze_dda
|
|
232
234
|
store_history = store_history
|
|
235
|
+
update_history = store_history # Preferred name for consistency with Study class
|
|
233
236
|
get_parameters = get_parameters
|
|
234
237
|
update_parameters = update_parameters
|
|
235
238
|
get_parameters_property = get_parameters_property
|
|
@@ -242,7 +245,8 @@ class Sample:
|
|
|
242
245
|
plot_2d_oracle = plot_2d_oracle
|
|
243
246
|
plot_dda_stats = plot_dda_stats
|
|
244
247
|
plot_chrom = plot_chrom
|
|
245
|
-
|
|
248
|
+
plot_features_stats = plot_features_stats # Renamed from plot_feature_stats
|
|
249
|
+
plot_feature_stats = plot_features_stats # Backward compatibility alias
|
|
246
250
|
plot_ms2_cycle = plot_ms2_cycle
|
|
247
251
|
plot_ms2_eic = plot_ms2_eic
|
|
248
252
|
plot_ms2_q1 = plot_ms2_q1
|
|
@@ -259,30 +263,21 @@ class Sample:
|
|
|
259
263
|
_get_feature_map = _get_feature_map
|
|
260
264
|
|
|
261
265
|
# Additional method assignments for all imported functions
|
|
262
|
-
_load_sample5
|
|
263
|
-
_load_sample5_study = _load_sample5_study
|
|
264
|
-
_save_sample5 = _save_sample5
|
|
265
|
-
_delete_ms2 = _delete_ms2
|
|
266
|
+
# Removed internal-only methods: _load_sample5, _load_sample5_study, _save_sample5, _delete_ms2, _features_sync
|
|
266
267
|
_estimate_memory_usage = _estimate_memory_usage
|
|
267
268
|
_get_scan_uids = _get_scan_uids
|
|
268
269
|
_get_feature_uids = _get_feature_uids
|
|
269
|
-
_features_sync = _features_sync
|
|
270
270
|
features_delete = features_delete
|
|
271
271
|
features_filter = features_filter
|
|
272
|
-
_load_featureXML
|
|
273
|
-
_load_ms2data = _load_ms2data
|
|
274
|
-
_load_mzML = _load_mzML
|
|
275
|
-
_load_raw = _load_raw
|
|
276
|
-
_load_wiff = _load_wiff
|
|
272
|
+
# Removed internal-only load methods: _load_featureXML, _load_ms2data, _load_mzML, _load_raw, _load_wiff
|
|
277
273
|
chrom_extract = chrom_extract
|
|
278
|
-
|
|
274
|
+
_index_file = _index_file # Renamed from index_file to be internal-only
|
|
279
275
|
sanitize = sanitize
|
|
280
276
|
_clean_features_df = _clean_features_df
|
|
281
277
|
_features_deisotope = _features_deisotope
|
|
282
278
|
_get_ztscan_stats = _get_ztscan_stats
|
|
283
279
|
_spec_to_mat = _spec_to_mat
|
|
284
|
-
_save_featureXML
|
|
285
|
-
_get_adducts = _get_adducts
|
|
280
|
+
# Removed internal-only methods: _save_featureXML, _get_adducts (used only in study modules)
|
|
286
281
|
|
|
287
282
|
# defaults
|
|
288
283
|
sample_defaults = sample_defaults
|
|
@@ -303,12 +298,14 @@ class Sample:
|
|
|
303
298
|
"""
|
|
304
299
|
# Define backward compatibility aliases to hide
|
|
305
300
|
backward_compatibility_aliases = {
|
|
306
|
-
'load_study', # deprecated alias for
|
|
301
|
+
'load_study', # deprecated alias for _load_ms1
|
|
307
302
|
'filter_features', # alias for filter (deprecated naming)
|
|
308
303
|
'select_features', # alias for select (deprecated naming)
|
|
309
304
|
'features_filter', # confusing duplicate of filter
|
|
310
305
|
'features_select', # confusing duplicate of select
|
|
311
306
|
'merge_defaults', # alias for find_features_defaults (confusing)
|
|
307
|
+
'plot_feature_stats', # backward compatibility for plot_features_stats
|
|
308
|
+
'store_history', # deprecated alias for update_history
|
|
312
309
|
}
|
|
313
310
|
|
|
314
311
|
# Get all attributes from the class
|