masster 0.5.27__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- {masster-0.5.27 → masster-0.6.0}/PKG-INFO +99 -60
- masster-0.6.0/README.md +172 -0
- {masster-0.5.27 → masster-0.6.0}/pyproject.toml +4 -2
- {masster-0.5.27 → masster-0.6.0}/src/masster/_version.py +1 -1
- masster-0.6.0/src/masster/data/libs/aa_nort.json +240 -0
- masster-0.6.0/src/masster/data/libs/ccm_nort.json +1319 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/lib/lib.py +1 -1
- {masster-0.5.27 → masster-0.6.0}/src/masster/logger.py +0 -6
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/adducts.py +1 -1
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/defaults/find_adducts_def.py +1 -1
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/h5.py +152 -2
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/helpers.py +91 -5
- masster-0.6.0/src/masster/sample/id.py +1160 -0
- masster-0.6.0/src/masster/sample/importers.py +316 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/plot.py +175 -71
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/sample.py +18 -3
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/sample5_schema.json +99 -1
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/defaults/study_def.py +8 -12
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/id.py +59 -12
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/load.py +0 -11
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/merge.py +153 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/plot.py +197 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/study.py +3 -1
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/study5_schema.json +15 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/wizard/wizard.py +11 -12
- masster-0.5.27/README.md +0 -133
- masster-0.5.27/src/masster/data/libs/aa.csv +0 -22
- masster-0.5.27/src/masster/data/libs/ccm.csv +0 -120
- masster-0.5.27/src/masster/data/libs/urine.csv +0 -4693
- masster-0.5.27/tests/conftest.py +0 -12
- masster-0.5.27/tests/test_chromatogram.py +0 -193
- masster-0.5.27/tests/test_defaults.py +0 -384
- masster-0.5.27/tests/test_imports.py +0 -76
- masster-0.5.27/tests/test_integration.py +0 -132
- masster-0.5.27/tests/test_logger.py +0 -268
- masster-0.5.27/tests/test_parameters.py +0 -109
- masster-0.5.27/tests/test_sample.py +0 -170
- masster-0.5.27/tests/test_spectrum.py +0 -143
- masster-0.5.27/tests/test_study.py +0 -133
- masster-0.5.27/tests/test_version.py +0 -51
- {masster-0.5.27 → masster-0.6.0}/.gitignore +0 -0
- {masster-0.5.27 → masster-0.6.0}/LICENSE +0 -0
- {masster-0.5.27 → masster-0.6.0}/THIRD_PARTY_NOTICES.md +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/__init__.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/chromatogram.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/lib/__init__.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/__init__.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/defaults/__init__.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/defaults/find_features_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/defaults/find_ms2_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/defaults/sample_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/lib.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/load.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/parameters.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/processing.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/quant.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/save.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/sciex.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/sample/thermo.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/spectrum.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/__init__.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/analysis.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/defaults/__init__.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/defaults/align_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/defaults/export_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/defaults/fill_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/defaults/find_consensus_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/defaults/find_ms2_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/defaults/identify_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/defaults/integrate_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/defaults/merge_def.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/export.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/h5.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/helpers.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/importers.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/parameters.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/processing.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/study/save.py +0 -0
- {masster-0.5.27 → masster-0.6.0}/src/masster/wizard/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: masster
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Mass spectrometry data analysis package
|
|
5
5
|
Project-URL: homepage, https://github.com/zamboni-lab/masster
|
|
6
6
|
Project-URL: repository, https://github.com/zamboni-lab/masster
|
|
@@ -726,17 +726,39 @@ Requires-Dist: pytest-mock>=3.10.0; extra == 'test'
|
|
|
726
726
|
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
727
727
|
Description-Content-Type: text/markdown
|
|
728
728
|
|
|
729
|
-
#
|
|
729
|
+
# masster
|
|
730
730
|
[](https://badge.fury.io/py/masster)
|
|
731
731
|
[](https://badge.fury.io/py/masster)
|
|
732
732
|
|
|
733
|
-
**MASSter** is a Python package for the analysis of
|
|
733
|
+
**MASSter** is a Python package for the analysis of metabolomics experiments by LC-MS/MS data, with a main focus on the challenging tasks of untargeted and large-scale studies.
|
|
734
734
|
|
|
735
|
-
|
|
735
|
+
## Background and motivation
|
|
736
|
+
|
|
737
|
+
MASSter is actively used, maintained, and developed by the Zamboni Lab at ETH Zurich. The project started because many needs were unmet by the "usual" software packages (mzMine, MS-DIAL, Workflow4Metabolomics (W4M), ...), for example performance, scalability, sensitivity, robustness, speed, rapid implementation of new features, and embedding in ETL systems.
|
|
738
|
+
|
|
739
|
+
All methods include many parameters and may wrap alternative algorithms. These options are primarily relevant for advanced users. We recommend running the processing methods with the defaults or using the Wizard.
|
|
740
|
+
|
|
741
|
+
## Content
|
|
742
|
+
|
|
743
|
+
MASSter is designed to deal with DDA data, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS, which is both accurate and fast, and it was wrapped with additional code to improve isotope and adduct detection. All other functionalities are own implementations: centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc.
|
|
744
|
+
|
|
745
|
+
MASSter was engineered to maximize result quality, sensitivity, scalability, and speed. Yes, it's Python, which can be slower than other languages, but considerable effort was spent on optimizations, including the systematic use of [Polars](https://pola.rs/), NumPy vectorization, multiprocessing, and chunking. MASSter has been tested on studies with 3,000+ LC–MS/MS samples (≈1 million MS2 spectra) and autonomously completed analyses within a few hours.
|
|
746
|
+
|
|
747
|
+
## Architecture
|
|
748
|
+
|
|
749
|
+
MASSter defines classes for Spectra, Chromatograms, Libraries, Samples, and Studies (a Study is a collection of samples, i.e. an LC–MS sequence). Users will typically work with a single `Study` object at a time. `Sample` objects are created when analyzing a batch (and saved for caching), or used for development, troubleshooting, or generating illustrations.
|
|
750
|
+
|
|
751
|
+
The analysis can be done in scripts (without user intervention, e.g. by the integrated Wizard), or interactively in notebooks, i.e. [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/).
|
|
736
752
|
|
|
737
753
|
## Prerequisites
|
|
738
754
|
|
|
739
|
-
|
|
755
|
+
You'll need to install Python (3.10-3.13, 3.14 has not been tested yet).
|
|
756
|
+
|
|
757
|
+
MASSter reads raw (Thermo), wiff (SCIEX), or mzML data. Reading vendor formats relies on .NET libraries, and is only possible in Windows. On Linux or MacOS, you'll be forced to use mzML data.
|
|
758
|
+
|
|
759
|
+
**It's recommended to use data in either the vendor's raw formats (WIFF and Thermo RAW) or mzML in profile mode.** MASSter includes a sophisticated and sufficiently fast centroiding algorithm that works well across the full dynamic range and will only act on spectra that are relevant. In our tests with data from different vendors, the centroiding performed much better than most vendor implementations (which are primarily proteomics-centric).
|
|
760
|
+
|
|
761
|
+
If you still want to convert raw data to centroided mzML, please use CentroidR: https://github.com/Adafede/CentroidR/tree/0.0.0.9001
|
|
740
762
|
|
|
741
763
|
## Installation
|
|
742
764
|
|
|
@@ -744,48 +766,33 @@ This is a poorly documented, stable branch of the development codebase in use in
|
|
|
744
766
|
pip install masster
|
|
745
767
|
```
|
|
746
768
|
|
|
747
|
-
##
|
|
748
|
-
|
|
769
|
+
## Getting started
|
|
770
|
+
**The quickest way to use, or learn how to use MASSter, is to use the Wizard** which we integrated and, ideally, takes care of everything automatically.
|
|
749
771
|
|
|
772
|
+
The Wizard only needs to know where to find the MS files and where to store the results.
|
|
750
773
|
```python
|
|
751
|
-
import
|
|
752
|
-
wiz =
|
|
753
|
-
source=r'..\..\folder_with_raw_data',
|
|
754
|
-
folder=r'..\..folder_to_store_results'
|
|
774
|
+
from masster import Wizard
|
|
775
|
+
wiz = Wizard(
|
|
776
|
+
source=r'..\..\folder_with_raw_data', # where to find the data
|
|
777
|
+
folder=r'..\..folder_to_store_results', # where to save the results
|
|
778
|
+
ncores=10 # this is optional
|
|
755
779
|
)
|
|
756
|
-
wiz.
|
|
780
|
+
wiz.test_and_run()
|
|
757
781
|
```
|
|
758
782
|
|
|
759
|
-
This will
|
|
760
|
-
|
|
761
|
-
### Basic workflow for analyzing a single sample
|
|
762
|
-
```python
|
|
763
|
-
import masster
|
|
764
|
-
sample = masster.Sample(filename='...') # full path to a *.raw, *.wiff, or *.mzML file
|
|
765
|
-
# process
|
|
766
|
-
sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
|
|
767
|
-
sample.find_adducts()
|
|
768
|
-
sample.find_ms2()
|
|
769
|
-
|
|
770
|
-
# access data
|
|
771
|
-
sample.features_df
|
|
783
|
+
This will trigger the analysis of raw data, and the creation of a script to process all samples and then assemble the study. The whole processing will be stored as `1_masster_workflow.py` in the output folder. The wizard will test once and, if successful, run the full workflow using parallel processes. Once the processing is over you, navigate to `folder` to see what happened...
|
|
772
784
|
|
|
773
|
-
|
|
774
|
-
sample.save() # stores to *.sample5, our custom hdf5 format
|
|
775
|
-
sample.export_mgf()
|
|
776
|
-
|
|
777
|
-
# some plots
|
|
778
|
-
sample.plot_bpc()
|
|
779
|
-
sample.plot_tic()
|
|
780
|
-
sample.plot_2d()
|
|
781
|
-
sample.plot_features_stats()
|
|
785
|
+
If you want to interact with your data, we recommend using [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/) and open the `*.study5` file, for example:
|
|
782
786
|
|
|
783
|
-
|
|
784
|
-
|
|
787
|
+
```bash
|
|
788
|
+
# use marimo to open the script created by marimo
|
|
789
|
+
marimo edit '..\\..\\folder_to_store_results\\2_interactive_analysis.py'
|
|
790
|
+
# or, if you use uv to manage an environment with masster
|
|
791
|
+
uv run marimo edit '..\\..\\folder_to_store_results\\2_interactive_analysis.py'
|
|
785
792
|
```
|
|
786
793
|
|
|
787
|
-
### Basic Workflow for analyzing LC-MS study with
|
|
788
|
-
|
|
794
|
+
### Basic Workflow for analyzing LC-MS study with 1-1000+ samples
|
|
795
|
+
In MASSter, the main object for data analysis is a `Study`, which consists of a bunch of `Samples`.
|
|
789
796
|
```python
|
|
790
797
|
import masster
|
|
791
798
|
# Initialize the Study object with the default folder
|
|
@@ -797,17 +804,20 @@ study.add(r'D:\...\...\...\*.wiff')
|
|
|
797
804
|
# Perform retention time correction
|
|
798
805
|
study.align(rt_tol=2.0)
|
|
799
806
|
study.plot_alignment()
|
|
800
|
-
study.plot_bpc()
|
|
801
807
|
study.plot_rt_correction()
|
|
808
|
+
study.plot_bpc()
|
|
802
809
|
|
|
803
810
|
# Find consensus features
|
|
804
|
-
study.merge(min_samples=3)
|
|
811
|
+
study.merge(min_samples=3) # this will keep only the features that were found in 3 or more samples
|
|
805
812
|
study.plot_consensus_2d()
|
|
806
813
|
|
|
807
|
-
#
|
|
814
|
+
# retrieve information
|
|
815
|
+
study.info()
|
|
816
|
+
|
|
817
|
+
# Retrieve EICs for quantification
|
|
808
818
|
study.fill()
|
|
809
819
|
|
|
810
|
-
# Integrate according to consensus metadata
|
|
820
|
+
# Integrate EICs according to consensus metadata
|
|
811
821
|
study.integrate()
|
|
812
822
|
|
|
813
823
|
# export results
|
|
@@ -823,32 +833,61 @@ study.save()
|
|
|
823
833
|
study.plot_samples_pca()
|
|
824
834
|
study.plot_samples_umap()
|
|
825
835
|
study.plot_samples_2d()
|
|
826
|
-
```
|
|
827
836
|
|
|
828
|
-
|
|
829
|
-
|
|
837
|
+
# To know more about the available methods...
|
|
838
|
+
dir(study)
|
|
839
|
+
```
|
|
840
|
+
The information is stored in Polars data frame, in particular:
|
|
841
|
+
```python
|
|
842
|
+
# information on samples
|
|
843
|
+
study.samples_df
|
|
844
|
+
# information on consensus features
|
|
845
|
+
study.consensus_df
|
|
846
|
+
# information on original features from ALL samples, including MS2 and EICs
|
|
847
|
+
study.features_df
|
|
848
|
+
```
|
|
830
849
|
|
|
850
|
+
### Analysis of a single sample
|
|
851
|
+
For troubleshooting, exploration, or just to create a figure on a single file, you might want to open and process a single file:
|
|
831
852
|
```python
|
|
832
|
-
from masster import
|
|
853
|
+
from masster import Sample
|
|
854
|
+
sample = Sample(filename='...') # full path to a *.raw, *.wiff, *.mzML, or *.sample5 file
|
|
855
|
+
# peek into sample
|
|
856
|
+
sample.info()
|
|
857
|
+
|
|
858
|
+
# process
|
|
859
|
+
sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
|
|
860
|
+
sample.find_adducts()
|
|
861
|
+
sample.find_ms2()
|
|
833
862
|
|
|
834
|
-
#
|
|
835
|
-
|
|
836
|
-
folder="./output",
|
|
837
|
-
num_cores=8)
|
|
863
|
+
# access data
|
|
864
|
+
sample.features_df
|
|
838
865
|
|
|
839
|
-
#
|
|
840
|
-
|
|
866
|
+
# save results
|
|
867
|
+
sample.save() # stores to *.sample5, our custom hdf5 format
|
|
868
|
+
sample.export_mgf()
|
|
841
869
|
|
|
842
|
-
#
|
|
843
|
-
|
|
844
|
-
|
|
870
|
+
# some plots
|
|
871
|
+
sample.plot_bpc()
|
|
872
|
+
sample.plot_tic()
|
|
873
|
+
sample.plot_2d()
|
|
874
|
+
sample.plot_features_stats()
|
|
845
875
|
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
```bash
|
|
849
|
-
python -c "from masster import Wizard; wiz = Wizard(source='D:/Data/studies/my_study/raw', folder='D:/Data/studies/my_study/masster'); wiz.create_scripts(); wiz.test_and_run()"
|
|
876
|
+
# explore methods
|
|
877
|
+
dir(sample)
|
|
850
878
|
```
|
|
851
879
|
|
|
880
|
+
## Disclaimer
|
|
881
|
+
|
|
882
|
+
**MASSter is research software under active development.** While we use it extensively in our lab and strive for quality and reliability, please be aware:
|
|
883
|
+
|
|
884
|
+
- **No warranties**: The software is provided "as is" without any warranty of any kind, express or implied
|
|
885
|
+
- **Backward compatibility**: We do not guarantee backward compatibility between versions. Breaking changes may occur as we improve the software
|
|
886
|
+
- **Performance**: While optimized for our workflows, performance may vary depending on your data and system configuration
|
|
887
|
+
- **Results**: We do our best to ensure accuracy, but you should validate results independently for your research
|
|
888
|
+
- **Support**: This is an academic project with limited resources. At the moment, we do not provide external user support.
|
|
889
|
+
- **Production use**: If you plan to use MASSter in production or critical workflows, thorough testing with your data is recommended
|
|
890
|
+
|
|
852
891
|
## License
|
|
853
892
|
GNU Affero General Public License v3
|
|
854
893
|
|
|
@@ -858,4 +897,4 @@ See the [LICENSE](LICENSE) file for details.
|
|
|
858
897
|
This project uses several third-party libraries, including pyOpenMS which is licensed under the BSD 3-Clause License. For complete information about third-party dependencies and their licenses, see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md).
|
|
859
898
|
|
|
860
899
|
## Citation
|
|
861
|
-
If you use
|
|
900
|
+
If you use MASSter in your research, please cite this repository.
|
masster-0.6.0/README.md
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# masster
|
|
2
|
+
[](https://badge.fury.io/py/masster)
|
|
3
|
+
[](https://badge.fury.io/py/masster)
|
|
4
|
+
|
|
5
|
+
**MASSter** is a Python package for the analysis of metabolomics experiments by LC-MS/MS data, with a main focus on the challenging tasks of untargeted and large-scale studies.
|
|
6
|
+
|
|
7
|
+
## Background and motivation
|
|
8
|
+
|
|
9
|
+
MASSter is actively used, maintained, and developed by the Zamboni Lab at ETH Zurich. The project started because many needs were unmet by the "usual" software packages (mzMine, MS-DIAL, Workflow4Metabolomics (W4M), ...), for example performance, scalability, sensitivity, robustness, speed, rapid implementation of new features, and embedding in ETL systems.
|
|
10
|
+
|
|
11
|
+
All methods include many parameters and may wrap alternative algorithms. These options are primarily relevant for advanced users. We recommend running the processing methods with the defaults or using the Wizard.
|
|
12
|
+
|
|
13
|
+
## Content
|
|
14
|
+
|
|
15
|
+
MASSter is designed to deal with DDA data, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS, which is both accurate and fast, and it was wrapped with additional code to improve isotope and adduct detection. All other functionalities are own implementations: centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc.
|
|
16
|
+
|
|
17
|
+
MASSter was engineered to maximize result quality, sensitivity, scalability, and speed. Yes, it's Python, which can be slower than other languages, but considerable effort was spent on optimizations, including the systematic use of [Polars](https://pola.rs/), NumPy vectorization, multiprocessing, and chunking. MASSter has been tested on studies with 3,000+ LC–MS/MS samples (≈1 million MS2 spectra) and autonomously completed analyses within a few hours.
|
|
18
|
+
|
|
19
|
+
## Architecture
|
|
20
|
+
|
|
21
|
+
MASSter defines classes for Spectra, Chromatograms, Libraries, Samples, and Studies (a Study is a collection of samples, i.e. an LC–MS sequence). Users will typically work with a single `Study` object at a time. `Sample` objects are created when analyzing a batch (and saved for caching), or used for development, troubleshooting, or generating illustrations.
|
|
22
|
+
|
|
23
|
+
The analysis can be done in scripts (without user intervention, e.g. by the integrated Wizard), or interactively in notebooks, i.e. [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/).
|
|
24
|
+
|
|
25
|
+
## Prerequisites
|
|
26
|
+
|
|
27
|
+
You'll need to install Python (3.10-3.13, 3.14 has not been tested yet).
|
|
28
|
+
|
|
29
|
+
MASSter reads raw (Thermo), wiff (SCIEX), or mzML data. Reading vendor formats relies on .NET libraries, and is only possible in Windows. On Linux or MacOS, you'll be forced to use mzML data.
|
|
30
|
+
|
|
31
|
+
**It's recommended to use data in either the vendor's raw formats (WIFF and Thermo RAW) or mzML in profile mode.** MASSter includes a sophisticated and sufficiently fast centroiding algorithm that works well across the full dynamic range and will only act on spectra that are relevant. In our tests with data from different vendors, the centroiding performed much better than most vendor implementations (which are primarily proteomics-centric).
|
|
32
|
+
|
|
33
|
+
If you still want to convert raw data to centroided mzML, please use CentroidR: https://github.com/Adafede/CentroidR/tree/0.0.0.9001
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install masster
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Getting started
|
|
42
|
+
**The quickest way to use, or learn how to use MASSter, is to use the Wizard** which we integrated and, ideally, takes care of everything automatically.
|
|
43
|
+
|
|
44
|
+
The Wizard only needs to know where to find the MS files and where to store the results.
|
|
45
|
+
```python
|
|
46
|
+
from masster import Wizard
|
|
47
|
+
wiz = Wizard(
|
|
48
|
+
source=r'..\..\folder_with_raw_data', # where to find the data
|
|
49
|
+
folder=r'..\..folder_to_store_results', # where to save the results
|
|
50
|
+
ncores=10 # this is optional
|
|
51
|
+
)
|
|
52
|
+
wiz.test_and_run()
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
This will trigger the analysis of raw data, and the creation of a script to process all samples and then assemble the study. The whole processing will be stored as `1_masster_workflow.py` in the output folder. The wizard will test once and, if successful, run the full workflow using parallel processes. Once the processing is over you, navigate to `folder` to see what happened...
|
|
56
|
+
|
|
57
|
+
If you want to interact with your data, we recommend using [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/) and open the `*.study5` file, for example:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# use marimo to open the script created by marimo
|
|
61
|
+
marimo edit '..\\..\\folder_to_store_results\\2_interactive_analysis.py'
|
|
62
|
+
# or, if you use uv to manage an environment with masster
|
|
63
|
+
uv run marimo edit '..\\..\\folder_to_store_results\\2_interactive_analysis.py'
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Basic Workflow for analyzing LC-MS study with 1-1000+ samples
|
|
67
|
+
In MASSter, the main object for data analysis is a `Study`, which consists of a bunch of `Samples`.
|
|
68
|
+
```python
|
|
69
|
+
import masster
|
|
70
|
+
# Initialize the Study object with the default folder
|
|
71
|
+
study = masster.Study(folder=r'D:\...\mylcms')
|
|
72
|
+
|
|
73
|
+
# Load data from folder with raw data, here: WIFF
|
|
74
|
+
study.add(r'D:\...\...\...\*.wiff')
|
|
75
|
+
|
|
76
|
+
# Perform retention time correction
|
|
77
|
+
study.align(rt_tol=2.0)
|
|
78
|
+
study.plot_alignment()
|
|
79
|
+
study.plot_rt_correction()
|
|
80
|
+
study.plot_bpc()
|
|
81
|
+
|
|
82
|
+
# Find consensus features
|
|
83
|
+
study.merge(min_samples=3) # this will keep only the features that were found in 3 or more samples
|
|
84
|
+
study.plot_consensus_2d()
|
|
85
|
+
|
|
86
|
+
# retrieve information
|
|
87
|
+
study.info()
|
|
88
|
+
|
|
89
|
+
# Retrieve EICs for quantification
|
|
90
|
+
study.fill()
|
|
91
|
+
|
|
92
|
+
# Integrate EICs according to consensus metadata
|
|
93
|
+
study.integrate()
|
|
94
|
+
|
|
95
|
+
# export results
|
|
96
|
+
study.export_mgf()
|
|
97
|
+
study.export_mztab()
|
|
98
|
+
study.export_xlsx()
|
|
99
|
+
study.export_parquet()
|
|
100
|
+
|
|
101
|
+
# Save the study to .study5
|
|
102
|
+
study.save()
|
|
103
|
+
|
|
104
|
+
# Some of the plots...
|
|
105
|
+
study.plot_samples_pca()
|
|
106
|
+
study.plot_samples_umap()
|
|
107
|
+
study.plot_samples_2d()
|
|
108
|
+
|
|
109
|
+
# To know more about the available methods...
|
|
110
|
+
dir(study)
|
|
111
|
+
```
|
|
112
|
+
The information is stored in Polars data frame, in particular:
|
|
113
|
+
```python
|
|
114
|
+
# information on samples
|
|
115
|
+
study.samples_df
|
|
116
|
+
# information on consensus features
|
|
117
|
+
study.consensus_df
|
|
118
|
+
# information on original features from ALL samples, including MS2 and EICs
|
|
119
|
+
study.features_df
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Analysis of a single sample
|
|
123
|
+
For troubleshooting, exploration, or just to create a figure on a single file, you might want to open and process a single file:
|
|
124
|
+
```python
|
|
125
|
+
from masster import Sample
|
|
126
|
+
sample = Sample(filename='...') # full path to a *.raw, *.wiff, *.mzML, or *.sample5 file
|
|
127
|
+
# peek into sample
|
|
128
|
+
sample.info()
|
|
129
|
+
|
|
130
|
+
# process
|
|
131
|
+
sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
|
|
132
|
+
sample.find_adducts()
|
|
133
|
+
sample.find_ms2()
|
|
134
|
+
|
|
135
|
+
# access data
|
|
136
|
+
sample.features_df
|
|
137
|
+
|
|
138
|
+
# save results
|
|
139
|
+
sample.save() # stores to *.sample5, our custom hdf5 format
|
|
140
|
+
sample.export_mgf()
|
|
141
|
+
|
|
142
|
+
# some plots
|
|
143
|
+
sample.plot_bpc()
|
|
144
|
+
sample.plot_tic()
|
|
145
|
+
sample.plot_2d()
|
|
146
|
+
sample.plot_features_stats()
|
|
147
|
+
|
|
148
|
+
# explore methods
|
|
149
|
+
dir(sample)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Disclaimer
|
|
153
|
+
|
|
154
|
+
**MASSter is research software under active development.** While we use it extensively in our lab and strive for quality and reliability, please be aware:
|
|
155
|
+
|
|
156
|
+
- **No warranties**: The software is provided "as is" without any warranty of any kind, express or implied
|
|
157
|
+
- **Backward compatibility**: We do not guarantee backward compatibility between versions. Breaking changes may occur as we improve the software
|
|
158
|
+
- **Performance**: While optimized for our workflows, performance may vary depending on your data and system configuration
|
|
159
|
+
- **Results**: We do our best to ensure accuracy, but you should validate results independently for your research
|
|
160
|
+
- **Support**: This is an academic project with limited resources. At the moment, we do not provide external user support.
|
|
161
|
+
- **Production use**: If you plan to use MASSter in production or critical workflows, thorough testing with your data is recommended
|
|
162
|
+
|
|
163
|
+
## License
|
|
164
|
+
GNU Affero General Public License v3
|
|
165
|
+
|
|
166
|
+
See the [LICENSE](LICENSE) file for details.
|
|
167
|
+
|
|
168
|
+
### Third-Party Licenses
|
|
169
|
+
This project uses several third-party libraries, including pyOpenMS which is licensed under the BSD 3-Clause License. For complete information about third-party dependencies and their licenses, see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md).
|
|
170
|
+
|
|
171
|
+
## Citation
|
|
172
|
+
If you use MASSter in your research, please cite this repository.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
[project]
|
|
3
3
|
name = "masster"
|
|
4
|
-
version = "0.
|
|
4
|
+
version = "0.6.0"
|
|
5
5
|
description = "Mass spectrometry data analysis package"
|
|
6
6
|
authors = [
|
|
7
7
|
{ name = "Zamboni Lab" }
|
|
@@ -88,7 +88,6 @@ build-backend = "hatchling.build"
|
|
|
88
88
|
[tool.hatch.build.targets.sdist]
|
|
89
89
|
include = [
|
|
90
90
|
"/src",
|
|
91
|
-
"/tests",
|
|
92
91
|
"/LICENSE",
|
|
93
92
|
"/README.md",
|
|
94
93
|
"/THIRD_PARTY_NOTICES.md",
|
|
@@ -100,6 +99,9 @@ packages = ["src/masster"]
|
|
|
100
99
|
include = [
|
|
101
100
|
"/THIRD_PARTY_NOTICES.md",
|
|
102
101
|
]
|
|
102
|
+
exclude = [
|
|
103
|
+
"/tests",
|
|
104
|
+
]
|
|
103
105
|
|
|
104
106
|
# Testing configuration
|
|
105
107
|
[tool.pytest.ini_options]
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "1.0",
|
|
3
|
+
"creation_date": "2025-10-30T14:38:00.595771",
|
|
4
|
+
"description": "Converted from CSV file aa.csv containing 21 records",
|
|
5
|
+
"source_file": "aa.csv",
|
|
6
|
+
"record_count": 21,
|
|
7
|
+
"data": [
|
|
8
|
+
{
|
|
9
|
+
"Name": "L-Glutamic acid",
|
|
10
|
+
"Formula": "C5H9NO4",
|
|
11
|
+
"SMILES": "N[C@@H](CCC(O)=O)C(O)=O",
|
|
12
|
+
"InChIKey": "WHUUTDBJXJRKMK-VKHMYHEASA-N",
|
|
13
|
+
"db_id": "CID:33032",
|
|
14
|
+
"db": "pubchem",
|
|
15
|
+
"rt": "",
|
|
16
|
+
"rt_min": "",
|
|
17
|
+
"rt_max": ""
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"Name": "L-Tyrosine",
|
|
21
|
+
"Formula": "C9H11NO3",
|
|
22
|
+
"SMILES": "N[C@@H](CC1=CC=C(O)C=C1)C(O)=O",
|
|
23
|
+
"InChIKey": "OUYCCCASQSFEME-QMMMGPOBSA-N",
|
|
24
|
+
"db_id": "CID:6057",
|
|
25
|
+
"db": "pubchem",
|
|
26
|
+
"rt": "",
|
|
27
|
+
"rt_min": "",
|
|
28
|
+
"rt_max": ""
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"Name": "L-Phenylalanine",
|
|
32
|
+
"Formula": "C9H11NO2",
|
|
33
|
+
"SMILES": "N[C@@H](CC1=CC=CC=C1)C(O)=O",
|
|
34
|
+
"InChIKey": "COLNVLDHVKWLRT-QMMMGPOBSA-N",
|
|
35
|
+
"db_id": "CID:6140",
|
|
36
|
+
"db": "pubchem",
|
|
37
|
+
"rt": "",
|
|
38
|
+
"rt_min": "",
|
|
39
|
+
"rt_max": ""
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"Name": "L-Alanine",
|
|
43
|
+
"Formula": "C3H7NO2",
|
|
44
|
+
"SMILES": "C[C@H](N)C(O)=O",
|
|
45
|
+
"InChIKey": "QNAYBMKLOCPYGJ-REOHCLBHSA-N",
|
|
46
|
+
"db_id": "CID:5950",
|
|
47
|
+
"db": "pubchem",
|
|
48
|
+
"rt": "",
|
|
49
|
+
"rt_min": "",
|
|
50
|
+
"rt_max": ""
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"Name": "L-Proline",
|
|
54
|
+
"Formula": "C5H9NO2",
|
|
55
|
+
"SMILES": "OC(=O)[C@@H]1CCCN1",
|
|
56
|
+
"InChIKey": "ONIBWKKTOPOVIA-BYPYZUCNSA-N",
|
|
57
|
+
"db_id": "CID:145742",
|
|
58
|
+
"db": "pubchem",
|
|
59
|
+
"rt": "",
|
|
60
|
+
"rt_min": "",
|
|
61
|
+
"rt_max": ""
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"Name": "L-Threonine",
|
|
65
|
+
"Formula": "C4H9NO3",
|
|
66
|
+
"SMILES": "C[C@@H](O)[C@H](N)C(O)=O",
|
|
67
|
+
"InChIKey": "AYFVYJQAPQTCCC-GBXIJSLDSA-N",
|
|
68
|
+
"db_id": "CID:6288",
|
|
69
|
+
"db": "pubchem",
|
|
70
|
+
"rt": "",
|
|
71
|
+
"rt_min": "",
|
|
72
|
+
"rt_max": ""
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"Name": "L-Asparagine",
|
|
76
|
+
"Formula": "C4H8N2O3",
|
|
77
|
+
"SMILES": "N[C@@H](CC(N)=O)C(O)=O",
|
|
78
|
+
"InChIKey": "DCXYFEDJOCDNAF-REOHCLBHSA-N",
|
|
79
|
+
"db_id": "CID:6267",
|
|
80
|
+
"db": "pubchem",
|
|
81
|
+
"rt": "",
|
|
82
|
+
"rt_min": "",
|
|
83
|
+
"rt_max": ""
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
"Name": "L-Isoleucine",
|
|
87
|
+
"Formula": "C6H13NO2",
|
|
88
|
+
"SMILES": "CC[C@H](C)[C@H](N)C(O)=O",
|
|
89
|
+
"InChIKey": "AGPKZVBTJJNPAG-WHFBIAKZSA-N",
|
|
90
|
+
"db_id": "CID:6306",
|
|
91
|
+
"db": "pubchem",
|
|
92
|
+
"rt": "",
|
|
93
|
+
"rt_min": "",
|
|
94
|
+
"rt_max": ""
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"Name": "L-Histidine",
|
|
98
|
+
"Formula": "C6H9N3O2",
|
|
99
|
+
"SMILES": "N[C@@H](CC1=CN=CN1)C(O)=O",
|
|
100
|
+
"InChIKey": "HNDVDQJCIGZPNO-YFKPBYRVSA-N",
|
|
101
|
+
"db_id": "CID:6274",
|
|
102
|
+
"db": "pubchem",
|
|
103
|
+
"rt": "",
|
|
104
|
+
"rt_min": "",
|
|
105
|
+
"rt_max": ""
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
"Name": "L-Lysine",
|
|
109
|
+
"Formula": "C6H14N2O2",
|
|
110
|
+
"SMILES": "NCCCC[C@H](N)C(O)=O",
|
|
111
|
+
"InChIKey": "KDXKERNSBIXSRK-YFKPBYRVSA-N",
|
|
112
|
+
"db_id": "CID:5962",
|
|
113
|
+
"db": "pubchem",
|
|
114
|
+
"rt": "",
|
|
115
|
+
"rt_min": "",
|
|
116
|
+
"rt_max": ""
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"Name": "L-Serine",
|
|
120
|
+
"Formula": "C3H7NO3",
|
|
121
|
+
"SMILES": "N[C@@H](CO)C(O)=O",
|
|
122
|
+
"InChIKey": "MTCFGRXMJLQNBG-REOHCLBHSA-N",
|
|
123
|
+
"db_id": "CID:5951",
|
|
124
|
+
"db": "pubchem",
|
|
125
|
+
"rt": "",
|
|
126
|
+
"rt_min": "",
|
|
127
|
+
"rt_max": ""
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
"Name": "L-Aspartic acid",
|
|
131
|
+
"Formula": "C4H7NO4",
|
|
132
|
+
"SMILES": "N[C@@H](CC(O)=O)C(O)=O",
|
|
133
|
+
"InChIKey": "CKLJMWTZIZZHCS-REOHCLBHSA-N",
|
|
134
|
+
"db_id": "CID:5960",
|
|
135
|
+
"db": "pubchem",
|
|
136
|
+
"rt": "",
|
|
137
|
+
"rt_min": "",
|
|
138
|
+
"rt_max": ""
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"Name": "L-Cystine",
|
|
142
|
+
"Formula": "C6H12N2O4S2",
|
|
143
|
+
"SMILES": "N[C@@H](CSSC[C@H](N)C(O)=O)C(O)=O",
|
|
144
|
+
"InChIKey": "LEVWYRKDKASIDU-IMJSIDKUSA-N",
|
|
145
|
+
"db_id": "CID:67678",
|
|
146
|
+
"db": "pubchem",
|
|
147
|
+
"rt": "",
|
|
148
|
+
"rt_min": "",
|
|
149
|
+
"rt_max": ""
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
"Name": "L-Arginine",
|
|
153
|
+
"Formula": "C6H14N4O2",
|
|
154
|
+
"SMILES": "N[C@@H](CCCNC(N)=N)C(O)=O",
|
|
155
|
+
"InChIKey": "ODKSFYDXXFIFQN-BYPYZUCNSA-N",
|
|
156
|
+
"db_id": "CID:6322",
|
|
157
|
+
"db": "pubchem",
|
|
158
|
+
"rt": "",
|
|
159
|
+
"rt_min": "",
|
|
160
|
+
"rt_max": ""
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"Name": "L-Cysteine",
|
|
164
|
+
"Formula": "C3H7NO2S",
|
|
165
|
+
"SMILES": "N[C@@H](CS)C(O)=O",
|
|
166
|
+
"InChIKey": "XUJNEKJLAYXESH-REOHCLBHSA-N",
|
|
167
|
+
"db_id": "CID:5862",
|
|
168
|
+
"db": "pubchem",
|
|
169
|
+
"rt": "",
|
|
170
|
+
"rt_min": "",
|
|
171
|
+
"rt_max": ""
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
"Name": "L-Glutamine",
|
|
175
|
+
"Formula": "C5H10N2O3",
|
|
176
|
+
"SMILES": "N[C@@H](CCC(N)=O)C(O)=O",
|
|
177
|
+
"InChIKey": "ZDXPYRJPNDTMRX-VKHMYHEASA-N",
|
|
178
|
+
"db_id": "CID:5961",
|
|
179
|
+
"db": "pubchem",
|
|
180
|
+
"rt": "",
|
|
181
|
+
"rt_min": "",
|
|
182
|
+
"rt_max": ""
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
"Name": "L-Leucine",
|
|
186
|
+
"Formula": "C6H13NO2",
|
|
187
|
+
"SMILES": "CC(C)C[C@H](N)C(O)=O",
|
|
188
|
+
"InChIKey": "ROHFNLRQFUQHCH-YFKPBYRVSA-N",
|
|
189
|
+
"db_id": "CID:6106",
|
|
190
|
+
"db": "pubchem",
|
|
191
|
+
"rt": "",
|
|
192
|
+
"rt_min": "",
|
|
193
|
+
"rt_max": ""
|
|
194
|
+
},
|
|
195
|
+
{
|
|
196
|
+
"Name": "L-Methionine",
|
|
197
|
+
"Formula": "C5H11NO2S",
|
|
198
|
+
"SMILES": "CSCC[C@H](N)C(O)=O",
|
|
199
|
+
"InChIKey": "FFEARJCKVFRZRR-BYPYZUCNSA-N",
|
|
200
|
+
"db_id": "CID:6137",
|
|
201
|
+
"db": "pubchem",
|
|
202
|
+
"rt": "",
|
|
203
|
+
"rt_min": "",
|
|
204
|
+
"rt_max": ""
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
"Name": "L-Valine",
|
|
208
|
+
"Formula": "C5H11NO2",
|
|
209
|
+
"SMILES": "CC(C)[C@H](N)C(O)=O",
|
|
210
|
+
"InChIKey": "KZSNJWFQEVHDMF-BYPYZUCNSA-N",
|
|
211
|
+
"db_id": "CID:6287",
|
|
212
|
+
"db": "pubchem",
|
|
213
|
+
"rt": "",
|
|
214
|
+
"rt_min": "",
|
|
215
|
+
"rt_max": ""
|
|
216
|
+
},
|
|
217
|
+
{
|
|
218
|
+
"Name": "L-Tryptophan",
|
|
219
|
+
"Formula": "C11H12N2O2",
|
|
220
|
+
"SMILES": "N[C@@H](CC1=CNC2=C1C=CC=C2)C(O)=O",
|
|
221
|
+
"InChIKey": "QIVBCDIJIAJPQS-VIFPVBQESA-N",
|
|
222
|
+
"db_id": "CID:6305",
|
|
223
|
+
"db": "pubchem",
|
|
224
|
+
"rt": "",
|
|
225
|
+
"rt_min": "",
|
|
226
|
+
"rt_max": ""
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
"Name": "Glycine",
|
|
230
|
+
"Formula": "C2H5NO2",
|
|
231
|
+
"SMILES": "NCC(O)=O",
|
|
232
|
+
"InChIKey": "QNAYBMKLOCPYGJ-UHFFFAOYSA-N",
|
|
233
|
+
"db_id": "CID:750",
|
|
234
|
+
"db": "Glycine",
|
|
235
|
+
"rt": "",
|
|
236
|
+
"rt_min": "",
|
|
237
|
+
"rt_max": ""
|
|
238
|
+
}
|
|
239
|
+
]
|
|
240
|
+
}
|