masster 0.5.27__tar.gz → 0.5.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (81) hide show
  1. {masster-0.5.27 → masster-0.5.28}/PKG-INFO +101 -60
  2. masster-0.5.28/README.md +174 -0
  3. {masster-0.5.27 → masster-0.5.28}/pyproject.toml +4 -2
  4. masster-0.5.27/README.md +0 -133
  5. masster-0.5.27/tests/conftest.py +0 -12
  6. masster-0.5.27/tests/test_chromatogram.py +0 -193
  7. masster-0.5.27/tests/test_defaults.py +0 -384
  8. masster-0.5.27/tests/test_imports.py +0 -76
  9. masster-0.5.27/tests/test_integration.py +0 -132
  10. masster-0.5.27/tests/test_logger.py +0 -268
  11. masster-0.5.27/tests/test_parameters.py +0 -109
  12. masster-0.5.27/tests/test_sample.py +0 -170
  13. masster-0.5.27/tests/test_spectrum.py +0 -143
  14. masster-0.5.27/tests/test_study.py +0 -133
  15. masster-0.5.27/tests/test_version.py +0 -51
  16. {masster-0.5.27 → masster-0.5.28}/.gitignore +0 -0
  17. {masster-0.5.27 → masster-0.5.28}/LICENSE +0 -0
  18. {masster-0.5.27 → masster-0.5.28}/THIRD_PARTY_NOTICES.md +0 -0
  19. {masster-0.5.27 → masster-0.5.28}/src/masster/__init__.py +0 -0
  20. {masster-0.5.27 → masster-0.5.28}/src/masster/_version.py +0 -0
  21. {masster-0.5.27 → masster-0.5.28}/src/masster/chromatogram.py +0 -0
  22. {masster-0.5.27 → masster-0.5.28}/src/masster/data/libs/aa.csv +0 -0
  23. {masster-0.5.27 → masster-0.5.28}/src/masster/data/libs/ccm.csv +0 -0
  24. {masster-0.5.27 → masster-0.5.28}/src/masster/data/libs/urine.csv +0 -0
  25. {masster-0.5.27 → masster-0.5.28}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  26. {masster-0.5.27 → masster-0.5.28}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  27. {masster-0.5.27 → masster-0.5.28}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  28. {masster-0.5.27 → masster-0.5.28}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  29. {masster-0.5.27 → masster-0.5.28}/src/masster/lib/__init__.py +0 -0
  30. {masster-0.5.27 → masster-0.5.28}/src/masster/lib/lib.py +0 -0
  31. {masster-0.5.27 → masster-0.5.28}/src/masster/logger.py +0 -0
  32. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/__init__.py +0 -0
  33. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/adducts.py +0 -0
  34. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/defaults/__init__.py +0 -0
  35. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  36. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/defaults/find_features_def.py +0 -0
  37. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/defaults/find_ms2_def.py +0 -0
  38. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  39. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/defaults/sample_def.py +0 -0
  40. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/h5.py +0 -0
  41. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/helpers.py +0 -0
  42. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/lib.py +0 -0
  43. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/load.py +0 -0
  44. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/parameters.py +0 -0
  45. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/plot.py +0 -0
  46. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/processing.py +0 -0
  47. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/quant.py +0 -0
  48. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/sample.py +0 -0
  49. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/sample5_schema.json +0 -0
  50. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/save.py +0 -0
  51. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/sciex.py +0 -0
  52. {masster-0.5.27 → masster-0.5.28}/src/masster/sample/thermo.py +0 -0
  53. {masster-0.5.27 → masster-0.5.28}/src/masster/spectrum.py +0 -0
  54. {masster-0.5.27 → masster-0.5.28}/src/masster/study/__init__.py +0 -0
  55. {masster-0.5.27 → masster-0.5.28}/src/masster/study/analysis.py +0 -0
  56. {masster-0.5.27 → masster-0.5.28}/src/masster/study/defaults/__init__.py +0 -0
  57. {masster-0.5.27 → masster-0.5.28}/src/masster/study/defaults/align_def.py +0 -0
  58. {masster-0.5.27 → masster-0.5.28}/src/masster/study/defaults/export_def.py +0 -0
  59. {masster-0.5.27 → masster-0.5.28}/src/masster/study/defaults/fill_def.py +0 -0
  60. {masster-0.5.27 → masster-0.5.28}/src/masster/study/defaults/find_consensus_def.py +0 -0
  61. {masster-0.5.27 → masster-0.5.28}/src/masster/study/defaults/find_ms2_def.py +0 -0
  62. {masster-0.5.27 → masster-0.5.28}/src/masster/study/defaults/identify_def.py +0 -0
  63. {masster-0.5.27 → masster-0.5.28}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  64. {masster-0.5.27 → masster-0.5.28}/src/masster/study/defaults/integrate_def.py +0 -0
  65. {masster-0.5.27 → masster-0.5.28}/src/masster/study/defaults/merge_def.py +0 -0
  66. {masster-0.5.27 → masster-0.5.28}/src/masster/study/defaults/study_def.py +0 -0
  67. {masster-0.5.27 → masster-0.5.28}/src/masster/study/export.py +0 -0
  68. {masster-0.5.27 → masster-0.5.28}/src/masster/study/h5.py +0 -0
  69. {masster-0.5.27 → masster-0.5.28}/src/masster/study/helpers.py +0 -0
  70. {masster-0.5.27 → masster-0.5.28}/src/masster/study/id.py +0 -0
  71. {masster-0.5.27 → masster-0.5.28}/src/masster/study/importers.py +0 -0
  72. {masster-0.5.27 → masster-0.5.28}/src/masster/study/load.py +0 -0
  73. {masster-0.5.27 → masster-0.5.28}/src/masster/study/merge.py +0 -0
  74. {masster-0.5.27 → masster-0.5.28}/src/masster/study/parameters.py +0 -0
  75. {masster-0.5.27 → masster-0.5.28}/src/masster/study/plot.py +0 -0
  76. {masster-0.5.27 → masster-0.5.28}/src/masster/study/processing.py +0 -0
  77. {masster-0.5.27 → masster-0.5.28}/src/masster/study/save.py +0 -0
  78. {masster-0.5.27 → masster-0.5.28}/src/masster/study/study.py +0 -0
  79. {masster-0.5.27 → masster-0.5.28}/src/masster/study/study5_schema.json +0 -0
  80. {masster-0.5.27 → masster-0.5.28}/src/masster/wizard/__init__.py +0 -0
  81. {masster-0.5.27 → masster-0.5.28}/src/masster/wizard/wizard.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.27
3
+ Version: 0.5.28
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -726,17 +726,39 @@ Requires-Dist: pytest-mock>=3.10.0; extra == 'test'
726
726
  Requires-Dist: pytest>=7.0.0; extra == 'test'
727
727
  Description-Content-Type: text/markdown
728
728
 
729
- # MASSter
729
+ # masster
730
730
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/masster)](https://badge.fury.io/py/masster)
731
731
  [![PyPI version](https://badge.fury.io/py/masster.svg)](https://badge.fury.io/py/masster)
732
732
 
733
- **MASSter** is a Python package for the analysis of mass spectrometry data, tailored for the purpose of metabolomics and LC-MS data processing. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS. All other functionalities for e.g. centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc. were redesigned and engineered to maximize scalability (tested with 3000 LC-MS), speed, quality, and results.
733
+ **MASSter** is a Python package for the analysis of metabolomics experiments by LC-MS/MS data, with a main focus on the challenging tasks of untargeted and large-scale studies.
734
734
 
735
- This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab.
735
+ ## Background and motivation
736
+
737
+ MASSter is actively used, maintainted, and developed by the Zamboni Lab at ETH Zurich. The project started because many needs of were unmatched by the "usual" software packages (mzmine, msdial, W4M, ...), e.g. performance, scalability, sensitivity, robustness, speed, rapid implementation of new features, embedding in ETL systems, and so on.
738
+
739
+ All methods include a long list of parameters, and might wrap alternative algorithms. These are only relevant for advanced users. We recommend running the processing methods with defaults, or using the Wizard.
740
+
741
+ ## Content
742
+
743
+ MASSter is designed to deal with DDA data, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS, which is both accurate and fast, and it was wrapped with additional code to improve isotope and adduct detection. All other functionalities are own implementations: centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc.
744
+
745
+ MASSter was engineered to maximize quality of results, sensitivity, scalability, and also speed. Yes, it's Python which is notoriously slower than other languages, but considerable time was spent in speeding up everything, including the systematic use of [polars](https://pola.rs/), numpy vectorization, multiprocessing, chunking, etc. MASSter was tested with studies with 3000+ LC-MS/MS samples (1 Mio MS2 spectra), and it autonomously completed analysis within a few hours.
746
+
747
+ ## Architecture
748
+
749
+ MASSter defines own classes for Spectra, Chromatograms, Libraries, Samples, and Studies (= bunch of samples, i.e. a LC-MS sequence). Users will deal mostly with one Study() object at the time. Sample() objects are created when analyzing a batch - and saved for caching -, or will be used only for development, troubleshooting, or to generate illustrations.
750
+
751
+ The analysis can be done in scripts (without user intervention, e.g. by the integrated Wizard), or interactively in notebooks, i.e. [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/).
736
752
 
737
753
  ## Prerequisites
738
754
 
739
- **MASSter** reads raw (Thermo), wiff (SCIEX), or mzML data. It's recommended to provide raw, profile data.
755
+ You'll need to install Python (3.10-3.13, 3.14 has not been tested yet).
756
+
757
+ MASSter reads raw (Thermo), wiff (SCIEX), or mzML data. Reading vendor formats relies on .NET libraries, and is only possible in Windows. On Linux or MacOS, you'll be forced to use mzML data.
758
+
759
+ **It's recommended to use data in either vendor's raw format (wiff and raw) or mzML in profile data.** MASSter includes a sophisticated and sufficiently fast centroiding algorithm that works well across the full dynamic range and will only act on the spectra that are relevant. In our tests with data from different vendors, the centroiding performed much better than most Vendor's implementations (that are primarily proteomics-centric).
760
+
761
+ If still want to convert raw data to centroided mzML, please use (CentroidR)[https://github.com/Adafede/CentroidR/tree/0.0.0.9001].
740
762
 
741
763
  ## Installation
742
764
 
@@ -744,48 +766,33 @@ This is a poorly documented, stable branch of the development codebase in use in
744
766
  pip install masster
745
767
  ```
746
768
 
747
- ## Basic usage
748
- ### Quick start: use the wizard
769
+ ## Getting started
770
+ **The quickest way to use, or learn how to use MASSter, is to use the Wizard** which we integrated and, ideally, takes care of everything automatically.
749
771
 
772
+ The Wizard only needs to know where to find the MS files and were the store the results.
750
773
  ```python
751
- import masster
752
- wiz = masster.wizard.create_scripts(
753
- source=r'..\..\folder_with_raw_data',
754
- folder=r'..\..folder_to_store_results'
774
+ from masster import Wizard
775
+ wiz = Wizard(
776
+ source=r'..\..\folder_with_raw_data', # where to find the data
777
+ folder=r'..\..folder_to_store_results', # where to save the results
778
+ ncores=10 # this is optional
755
779
  )
756
- wiz.run()
780
+ wiz.test_and_run()
757
781
  ```
758
782
 
759
- This will run a wizard that should perform all key steps and save the results to the `folder`.
783
+ This will trigger the analysis of raw data, and the creation of a script to process all samples and then assemble the study. The whole processing will be stored as `1_masster_workflow.py` in the output folder. The wizard will test once and, if successull, run the full workflow using parallel processes. Once the processing is over you, navigate to `folder` to see what happened...
760
784
 
761
- ### Basic workflow for analyzing a single sample
762
- ```python
763
- import masster
764
- sample = masster.Sample(filename='...') # full path to a *.raw, *.wiff, or *.mzML file
765
- # process
766
- sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
767
- sample.find_adducts()
768
- sample.find_ms2()
769
-
770
- # access data
771
- sample.features_df
772
-
773
- # save results
774
- sample.save() # stores to *.sample5, our custom hdf5 format
775
- sample.export_mgf()
785
+ If you want to interact with your data, we recommend using [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/) and open the `*.study5` file, for example:
776
786
 
777
- # some plots
778
- sample.plot_bpc()
779
- sample.plot_tic()
780
- sample.plot_2d()
781
- sample.plot_features_stats()
782
-
783
- # explore methods
784
- dir(study)
787
+ ```bash
788
+ # use marimo to open the script created by marino
789
+ marimo edit '..\..folder_to_store_results\2_interactive_analysis.py'
790
+ # or, if you use uv to manage an environment with masster
791
+ uv run marimo edit '..\..folder_to_store_results\2_interactive_analysis.py'
785
792
  ```
786
793
 
787
- ### Basic Workflow for analyzing LC-MS study with 2-... samples
788
-
794
+ ### Basic Workflow for analyzing LC-MS study with 1-1000+ samples
795
+ In MASSter, the main object for data analysis is a `Study`, which consists of a bunch of `Samples`.
789
796
  ```python
790
797
  import masster
791
798
  # Initialize the Study object with the default folder
@@ -797,17 +804,20 @@ study.add(r'D:\...\...\...\*.wiff')
797
804
  # Perform retention time correction
798
805
  study.align(rt_tol=2.0)
799
806
  study.plot_alignment()
800
- study.plot_bpc()
801
807
  study.plot_rt_correction()
808
+ study.plot_bpc()
802
809
 
803
810
  # Find consensus features
804
- study.merge(min_samples=3)
811
+ study.merge(min_samples=3) # this will keep only the features that were found in 3 or more samples
805
812
  study.plot_consensus_2d()
806
813
 
807
- # Retrieve missing data for quantification
814
+ # retrieve information
815
+ study.info()
816
+
817
+ # Retrieve EICs for quantification
808
818
  study.fill()
809
819
 
810
- # Integrate according to consensus metadata
820
+ # Integrate EICs according to consensus metadata
811
821
  study.integrate()
812
822
 
813
823
  # export results
@@ -823,32 +833,63 @@ study.save()
823
833
  study.plot_samples_pca()
824
834
  study.plot_samples_umap()
825
835
  study.plot_samples_2d()
826
- ```
827
836
 
828
- ### Quick Start with Wizard
829
- MASSter includes a Wizard to automatically process everything:
837
+ # To know more about the available methods...
838
+ dir(study)
839
+ ```
840
+ The information is stored in Polars data frame, in particular:
841
+ ```python
842
+ # information on samples
843
+ study.samples_df
844
+ # information on consensus features
845
+ study.consensus_df
846
+ # information on original features from ALL samples, including MS2 and EICs
847
+ study.features_df
848
+ ```
830
849
 
850
+ ### Analysis of a single sample
851
+ For troubleshooting, exploration, or just to create a figure on a single file, you might want to open and process a single file:
831
852
  ```python
832
- from masster import Wizard
853
+ from masster import Sample
854
+ sample = Sample(filename='...') # full path to a *.raw, *.wiff, *.mzML, or *.sample5 file
855
+ # peek into sample
856
+ sample.info()
833
857
 
834
- # Create wizard instance
835
- wiz = Wizard(source="./raw_data",
836
- folder="./output",
837
- num_cores=8)
858
+ # process
859
+ sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
860
+ sample.find_adducts()
861
+ sample.find_ms2()
838
862
 
839
- # Generate analysis scripts
840
- wiz.create_scripts()
863
+ # access data
864
+ sample.features_df
841
865
 
842
- # Test with single file, then run full batch
843
- wiz.test_and_run()
844
- ```
866
+ # save results
867
+ sample.save() # stores to *.sample5, our custom hdf5 format
868
+ sample.export_mgf()
845
869
 
846
- ### One-Line Command Processing
847
- Or, from the command-line:
848
- ```bash
849
- python -c "from masster import Wizard; wiz = Wizard(source='D:/Data/studies/my_study/raw', folder='D:/Data/studies/my_study/masster'); wiz.create_scripts(); wiz.test_and_run()"
870
+ # some plots
871
+ sample.plot_bpc()
872
+ sample.plot_tic()
873
+ sample.plot_2d()
874
+ sample.plot_features_stats()
875
+
876
+ # explore methods
877
+ dir(study)
850
878
  ```
851
879
 
880
+ ## Disclaimer
881
+
882
+ **MASSter is research software under active development.** While we use it extensively in our lab and strive for quality and reliability, please be aware:
883
+
884
+ - **No warranties**: The software is provided "as is" without any warranty of any kind, express or implied
885
+ - **Backward compatibility**: We do not guarantee backward compatibility between versions. Breaking changes may occur as we improve the software
886
+ - **Performance**: While optimized for our workflows, performance may vary depending on your data and system configuration
887
+ - **Results**: We do our best to ensure accuracy, but you should validate results independently for your research
888
+ - **Support**: This is an academic project with limited resources. Community support is available through GitHub issues, but we cannot guarantee response times
889
+ - **Production use**: If you plan to use MASSter in production or critical workflows, thorough testing with your data is recommended
890
+
891
+ We welcome feedback, bug reports, and contributions via GitHub!
892
+
852
893
  ## License
853
894
  GNU Affero General Public License v3
854
895
 
@@ -858,4 +899,4 @@ See the [LICENSE](LICENSE) file for details.
858
899
  This project uses several third-party libraries, including pyOpenMS which is licensed under the BSD 3-Clause License. For complete information about third-party dependencies and their licenses, see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md).
859
900
 
860
901
  ## Citation
861
- If you use Masster in your research, please cite this repository.
902
+ If you use MASSter in your research, please cite this repository.
@@ -0,0 +1,174 @@
1
+ # masster
2
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/masster)](https://badge.fury.io/py/masster)
3
+ [![PyPI version](https://badge.fury.io/py/masster.svg)](https://badge.fury.io/py/masster)
4
+
5
+ **MASSter** is a Python package for the analysis of metabolomics experiments by LC-MS/MS data, with a main focus on the challenging tasks of untargeted and large-scale studies.
6
+
7
+ ## Background and motivation
8
+
9
+ MASSter is actively used, maintainted, and developed by the Zamboni Lab at ETH Zurich. The project started because many needs of were unmatched by the "usual" software packages (mzmine, msdial, W4M, ...), e.g. performance, scalability, sensitivity, robustness, speed, rapid implementation of new features, embedding in ETL systems, and so on.
10
+
11
+ All methods include a long list of parameters, and might wrap alternative algorithms. These are only relevant for advanced users. We recommend running the processing methods with defaults, or using the Wizard.
12
+
13
+ ## Content
14
+
15
+ MASSter is designed to deal with DDA data, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS, which is both accurate and fast, and it was wrapped with additional code to improve isotope and adduct detection. All other functionalities are own implementations: centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc.
16
+
17
+ MASSter was engineered to maximize quality of results, sensitivity, scalability, and also speed. Yes, it's Python which is notoriously slower than other languages, but considerable time was spent in speeding up everything, including the systematic use of [polars](https://pola.rs/), numpy vectorization, multiprocessing, chunking, etc. MASSter was tested with studies with 3000+ LC-MS/MS samples (1 Mio MS2 spectra), and it autonomously completed analysis within a few hours.
18
+
19
+ ## Architecture
20
+
21
+ MASSter defines own classes for Spectra, Chromatograms, Libraries, Samples, and Studies (= bunch of samples, i.e. a LC-MS sequence). Users will deal mostly with one Study() object at the time. Sample() objects are created when analyzing a batch - and saved for caching -, or will be used only for development, troubleshooting, or to generate illustrations.
22
+
23
+ The analysis can be done in scripts (without user intervention, e.g. by the integrated Wizard), or interactively in notebooks, i.e. [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/).
24
+
25
+ ## Prerequisites
26
+
27
+ You'll need to install Python (3.10-3.13, 3.14 has not been tested yet).
28
+
29
+ MASSter reads raw (Thermo), wiff (SCIEX), or mzML data. Reading vendor formats relies on .NET libraries, and is only possible in Windows. On Linux or MacOS, you'll be forced to use mzML data.
30
+
31
+ **It's recommended to use data in either vendor's raw format (wiff and raw) or mzML in profile data.** MASSter includes a sophisticated and sufficiently fast centroiding algorithm that works well across the full dynamic range and will only act on the spectra that are relevant. In our tests with data from different vendors, the centroiding performed much better than most Vendor's implementations (that are primarily proteomics-centric).
32
+
33
+ If still want to convert raw data to centroided mzML, please use (CentroidR)[https://github.com/Adafede/CentroidR/tree/0.0.0.9001].
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install masster
39
+ ```
40
+
41
+ ## Getting started
42
+ **The quickest way to use, or learn how to use MASSter, is to use the Wizard** which we integrated and, ideally, takes care of everything automatically.
43
+
44
+ The Wizard only needs to know where to find the MS files and were the store the results.
45
+ ```python
46
+ from masster import Wizard
47
+ wiz = Wizard(
48
+ source=r'..\..\folder_with_raw_data', # where to find the data
49
+ folder=r'..\..folder_to_store_results', # where to save the results
50
+ ncores=10 # this is optional
51
+ )
52
+ wiz.test_and_run()
53
+ ```
54
+
55
+ This will trigger the analysis of raw data, and the creation of a script to process all samples and then assemble the study. The whole processing will be stored as `1_masster_workflow.py` in the output folder. The wizard will test once and, if successull, run the full workflow using parallel processes. Once the processing is over you, navigate to `folder` to see what happened...
56
+
57
+ If you want to interact with your data, we recommend using [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/) and open the `*.study5` file, for example:
58
+
59
+ ```bash
60
+ # use marimo to open the script created by marino
61
+ marimo edit '..\..folder_to_store_results\2_interactive_analysis.py'
62
+ # or, if you use uv to manage an environment with masster
63
+ uv run marimo edit '..\..folder_to_store_results\2_interactive_analysis.py'
64
+ ```
65
+
66
+ ### Basic Workflow for analyzing LC-MS study with 1-1000+ samples
67
+ In MASSter, the main object for data analysis is a `Study`, which consists of a bunch of `Samples`.
68
+ ```python
69
+ import masster
70
+ # Initialize the Study object with the default folder
71
+ study = masster.Study(folder=r'D:\...\mylcms')
72
+
73
+ # Load data from folder with raw data, here: WIFF
74
+ study.add(r'D:\...\...\...\*.wiff')
75
+
76
+ # Perform retention time correction
77
+ study.align(rt_tol=2.0)
78
+ study.plot_alignment()
79
+ study.plot_rt_correction()
80
+ study.plot_bpc()
81
+
82
+ # Find consensus features
83
+ study.merge(min_samples=3) # this will keep only the features that were found in 3 or more samples
84
+ study.plot_consensus_2d()
85
+
86
+ # retrieve information
87
+ study.info()
88
+
89
+ # Retrieve EICs for quantification
90
+ study.fill()
91
+
92
+ # Integrate EICs according to consensus metadata
93
+ study.integrate()
94
+
95
+ # export results
96
+ study.export_mgf()
97
+ study.export_mztab()
98
+ study.export_xlsx()
99
+ study.export_parquet()
100
+
101
+ # Save the study to .study5
102
+ study.save()
103
+
104
+ # Some of the plots...
105
+ study.plot_samples_pca()
106
+ study.plot_samples_umap()
107
+ study.plot_samples_2d()
108
+
109
+ # To know more about the available methods...
110
+ dir(study)
111
+ ```
112
+ The information is stored in Polars data frame, in particular:
113
+ ```python
114
+ # information on samples
115
+ study.samples_df
116
+ # information on consensus features
117
+ study.consensus_df
118
+ # information on original features from ALL samples, including MS2 and EICs
119
+ study.features_df
120
+ ```
121
+
122
+ ### Analysis of a single sample
123
+ For troubleshooting, exploration, or just to create a figure on a single file, you might want to open and process a single file:
124
+ ```python
125
+ from masster import Sample
126
+ sample = Sample(filename='...') # full path to a *.raw, *.wiff, *.mzML, or *.sample5 file
127
+ # peek into sample
128
+ sample.info()
129
+
130
+ # process
131
+ sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
132
+ sample.find_adducts()
133
+ sample.find_ms2()
134
+
135
+ # access data
136
+ sample.features_df
137
+
138
+ # save results
139
+ sample.save() # stores to *.sample5, our custom hdf5 format
140
+ sample.export_mgf()
141
+
142
+ # some plots
143
+ sample.plot_bpc()
144
+ sample.plot_tic()
145
+ sample.plot_2d()
146
+ sample.plot_features_stats()
147
+
148
+ # explore methods
149
+ dir(study)
150
+ ```
151
+
152
+ ## Disclaimer
153
+
154
+ **MASSter is research software under active development.** While we use it extensively in our lab and strive for quality and reliability, please be aware:
155
+
156
+ - **No warranties**: The software is provided "as is" without any warranty of any kind, express or implied
157
+ - **Backward compatibility**: We do not guarantee backward compatibility between versions. Breaking changes may occur as we improve the software
158
+ - **Performance**: While optimized for our workflows, performance may vary depending on your data and system configuration
159
+ - **Results**: We do our best to ensure accuracy, but you should validate results independently for your research
160
+ - **Support**: This is an academic project with limited resources. Community support is available through GitHub issues, but we cannot guarantee response times
161
+ - **Production use**: If you plan to use MASSter in production or critical workflows, thorough testing with your data is recommended
162
+
163
+ We welcome feedback, bug reports, and contributions via GitHub!
164
+
165
+ ## License
166
+ GNU Affero General Public License v3
167
+
168
+ See the [LICENSE](LICENSE) file for details.
169
+
170
+ ### Third-Party Licenses
171
+ This project uses several third-party libraries, including pyOpenMS which is licensed under the BSD 3-Clause License. For complete information about third-party dependencies and their licenses, see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md).
172
+
173
+ ## Citation
174
+ If you use MASSter in your research, please cite this repository.
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.5.27"
4
+ version = "0.5.28"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -88,7 +88,6 @@ build-backend = "hatchling.build"
88
88
  [tool.hatch.build.targets.sdist]
89
89
  include = [
90
90
  "/src",
91
- "/tests",
92
91
  "/LICENSE",
93
92
  "/README.md",
94
93
  "/THIRD_PARTY_NOTICES.md",
@@ -100,6 +99,9 @@ packages = ["src/masster"]
100
99
  include = [
101
100
  "/THIRD_PARTY_NOTICES.md",
102
101
  ]
102
+ exclude = [
103
+ "/tests",
104
+ ]
103
105
 
104
106
  # Testing configuration
105
107
  [tool.pytest.ini_options]
masster-0.5.27/README.md DELETED
@@ -1,133 +0,0 @@
1
- # MASSter
2
- [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/masster)](https://badge.fury.io/py/masster)
3
- [![PyPI version](https://badge.fury.io/py/masster.svg)](https://badge.fury.io/py/masster)
4
-
5
- **MASSter** is a Python package for the analysis of mass spectrometry data, tailored for the purpose of metabolomics and LC-MS data processing. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS. All other functionalities for e.g. centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc. were redesigned and engineered to maximize scalability (tested with 3000 LC-MS), speed, quality, and results.
6
-
7
- This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab.
8
-
9
- ## Prerequisites
10
-
11
- **MASSter** reads raw (Thermo), wiff (SCIEX), or mzML data. It's recommended to provide raw, profile data.
12
-
13
- ## Installation
14
-
15
- ```bash
16
- pip install masster
17
- ```
18
-
19
- ## Basic usage
20
- ### Quick start: use the wizard
21
-
22
- ```python
23
- import masster
24
- wiz = masster.wizard.create_scripts(
25
- source=r'..\..\folder_with_raw_data',
26
- folder=r'..\..folder_to_store_results'
27
- )
28
- wiz.run()
29
- ```
30
-
31
- This will run a wizard that should perform all key steps and save the results to the `folder`.
32
-
33
- ### Basic workflow for analyzing a single sample
34
- ```python
35
- import masster
36
- sample = masster.Sample(filename='...') # full path to a *.raw, *.wiff, or *.mzML file
37
- # process
38
- sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
39
- sample.find_adducts()
40
- sample.find_ms2()
41
-
42
- # access data
43
- sample.features_df
44
-
45
- # save results
46
- sample.save() # stores to *.sample5, our custom hdf5 format
47
- sample.export_mgf()
48
-
49
- # some plots
50
- sample.plot_bpc()
51
- sample.plot_tic()
52
- sample.plot_2d()
53
- sample.plot_features_stats()
54
-
55
- # explore methods
56
- dir(study)
57
- ```
58
-
59
- ### Basic Workflow for analyzing LC-MS study with 2-... samples
60
-
61
- ```python
62
- import masster
63
- # Initialize the Study object with the default folder
64
- study = masster.Study(folder=r'D:\...\mylcms')
65
-
66
- # Load data from folder with raw data, here: WIFF
67
- study.add(r'D:\...\...\...\*.wiff')
68
-
69
- # Perform retention time correction
70
- study.align(rt_tol=2.0)
71
- study.plot_alignment()
72
- study.plot_bpc()
73
- study.plot_rt_correction()
74
-
75
- # Find consensus features
76
- study.merge(min_samples=3)
77
- study.plot_consensus_2d()
78
-
79
- # Retrieve missing data for quantification
80
- study.fill()
81
-
82
- # Integrate according to consensus metadata
83
- study.integrate()
84
-
85
- # export results
86
- study.export_mgf()
87
- study.export_mztab()
88
- study.export_xlsx()
89
- study.export_parquet()
90
-
91
- # Save the study to .study5
92
- study.save()
93
-
94
- # Some of the plots...
95
- study.plot_samples_pca()
96
- study.plot_samples_umap()
97
- study.plot_samples_2d()
98
- ```
99
-
100
- ### Quick Start with Wizard
101
- MASSter includes a Wizard to automatically process everything:
102
-
103
- ```python
104
- from masster import Wizard
105
-
106
- # Create wizard instance
107
- wiz = Wizard(source="./raw_data",
108
- folder="./output",
109
- num_cores=8)
110
-
111
- # Generate analysis scripts
112
- wiz.create_scripts()
113
-
114
- # Test with single file, then run full batch
115
- wiz.test_and_run()
116
- ```
117
-
118
- ### One-Line Command Processing
119
- Or, from the command-line:
120
- ```bash
121
- python -c "from masster import Wizard; wiz = Wizard(source='D:/Data/studies/my_study/raw', folder='D:/Data/studies/my_study/masster'); wiz.create_scripts(); wiz.test_and_run()"
122
- ```
123
-
124
- ## License
125
- GNU Affero General Public License v3
126
-
127
- See the [LICENSE](LICENSE) file for details.
128
-
129
- ### Third-Party Licenses
130
- This project uses several third-party libraries, including pyOpenMS which is licensed under the BSD 3-Clause License. For complete information about third-party dependencies and their licenses, see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md).
131
-
132
- ## Citation
133
- If you use Masster in your research, please cite this repository.
@@ -1,12 +0,0 @@
1
- """Test configuration for pytest."""
2
-
3
- import sys
4
- from pathlib import Path
5
-
6
- # Add src directory to path for testing
7
- src_path = Path(__file__).parent.parent / "src"
8
- sys.path.insert(0, str(src_path))
9
-
10
- # Test data directory
11
- TEST_DATA_DIR = Path(__file__).parent / "data"
12
- TEST_DATA_DIR.mkdir(exist_ok=True)