masster 0.5.27__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/plot.py CHANGED
@@ -2955,6 +2955,203 @@ def plot_tic(
2955
2955
  return p
2956
2956
 
2957
2957
 
2958
+ def plot_heatmap(
2959
+ self,
2960
+ filename=None,
2961
+ width=800,
2962
+ height=600,
2963
+ cmap="viridis",
2964
+ title="Consensus Matrix Heatmap",
2965
+ quant="chrom_area",
2966
+ samples=None,
2967
+ ):
2968
+ """
2969
+ Plot a heatmap of the consensus matrix data.
2970
+
2971
+ Samples are ordered from left to right, features are ordered by m/z from top to bottom.
2972
+ Values are log10 transformed for better visualization.
2973
+
2974
+ Parameters:
2975
+ filename (str, optional): Path to save the plot
2976
+ width (int): Plot width in pixels (default: 800)
2977
+ height (int): Plot height in pixels (default: 600)
2978
+ cmap (str): Colormap name (default: "viridis")
2979
+ title (str): Plot title (default: "Consensus Matrix Heatmap")
2980
+ quant (str): Quantification method column name (default: "chrom_area")
2981
+ samples: Sample identifier(s) to include. Can be:
2982
+ - None: include all samples (default)
2983
+ - int: single sample_uid
2984
+ - str: single sample_name
2985
+ - list: multiple sample_uids or sample_names
2986
+ """
2987
+ from bokeh.plotting import figure
2988
+ from bokeh.models import LinearColorMapper, ColorBar, BasicTicker
2989
+ from bokeh.transform import transform
2990
+ import numpy as np
2991
+ import pandas as pd
2992
+
2993
+ # Get consensus matrix
2994
+ matrix_df = self.get_consensus_matrix(quant=quant, samples=samples)
2995
+
2996
+ if matrix_df is None or matrix_df.is_empty():
2997
+ self.logger.error("No consensus matrix available for heatmap.")
2998
+ return
2999
+
3000
+ # Get m/z values for each consensus_uid to sort by
3001
+ if self.consensus_df is None or self.consensus_df.is_empty():
3002
+ self.logger.error("No consensus_df available for sorting features by m/z.")
3003
+ return
3004
+
3005
+ # Join with consensus_df to get m/z values
3006
+ matrix_with_mz = matrix_df.join(
3007
+ self.consensus_df.select(["consensus_uid", "mz"]),
3008
+ on="consensus_uid",
3009
+ how="left",
3010
+ )
3011
+
3012
+ # Sort by m/z (ascending - lowest m/z at top)
3013
+ matrix_with_mz = matrix_with_mz.sort("mz")
3014
+
3015
+ # Remove the m/z column after sorting
3016
+ matrix_sorted = matrix_with_mz.drop("mz")
3017
+
3018
+ # Extract consensus_uid and sample columns
3019
+ consensus_uids = matrix_sorted["consensus_uid"].to_list()
3020
+ sample_cols = [col for col in matrix_sorted.columns if col != "consensus_uid"]
3021
+
3022
+ # Convert to pandas for easier heatmap processing
3023
+ matrix_pd = matrix_sorted.select(sample_cols).to_pandas()
3024
+
3025
+ # Apply log10 transformation (add 1 to avoid log(0))
3026
+ matrix_log = np.log10(matrix_pd.values + 1)
3027
+
3028
+ # Prepare data for Bokeh heatmap
3029
+ # Create a list of (sample, feature, value) tuples
3030
+ heatmap_data = []
3031
+ for i, feature_idx in enumerate(range(len(consensus_uids))):
3032
+ for j, sample in enumerate(sample_cols):
3033
+ value = matrix_log[feature_idx, j]
3034
+ heatmap_data.append({
3035
+ "sample": sample,
3036
+ "feature": str(consensus_uids[feature_idx]),
3037
+ "feature_idx": str(i), # Use string index for y-axis position
3038
+ "value": value,
3039
+ })
3040
+
3041
+ # Convert to DataFrame for Bokeh ColumnDataSource
3042
+ heatmap_df = pd.DataFrame(heatmap_data)
3043
+
3044
+ from bokeh.models import ColumnDataSource
3045
+
3046
+ source = ColumnDataSource(heatmap_df)
3047
+
3048
+ # Handle colormap using cmap.Colormap
3049
+ try:
3050
+ # Get colormap palette using cmap
3051
+ if isinstance(cmap, str):
3052
+ colormap = Colormap(cmap)
3053
+ # Generate 256 colors and convert to hex
3054
+ import matplotlib.colors as mcolors
3055
+
3056
+ colors = colormap(np.linspace(0, 1, 256))
3057
+ palette = [mcolors.rgb2hex(color) for color in colors]
3058
+ else:
3059
+ colormap = cmap
3060
+ # Try to use to_bokeh() method first
3061
+ try:
3062
+ palette = colormap.to_bokeh()
3063
+ # Ensure we got a color palette, not another mapper
3064
+ if not isinstance(palette, (list, tuple)):
3065
+ # Fall back to generating colors manually
3066
+ import matplotlib.colors as mcolors
3067
+
3068
+ colors = colormap(np.linspace(0, 1, 256))
3069
+ palette = [mcolors.rgb2hex(color) for color in colors]
3070
+ except AttributeError:
3071
+ # Fall back to generating colors manually
3072
+ import matplotlib.colors as mcolors
3073
+
3074
+ colors = colormap(np.linspace(0, 1, 256))
3075
+ palette = [mcolors.rgb2hex(color) for color in colors]
3076
+ except (AttributeError, ValueError, TypeError) as e:
3077
+ # Fallback to viridis if cmap interpretation fails
3078
+ self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
3079
+ from bokeh.palettes import viridis
3080
+
3081
+ palette = viridis(256)
3082
+
3083
+ # Create color mapper
3084
+ color_mapper = LinearColorMapper(
3085
+ palette=palette,
3086
+ low=heatmap_df["value"].min(),
3087
+ high=heatmap_df["value"].max(),
3088
+ )
3089
+
3090
+ # Create figure with categorical ranges for both axes
3091
+ p = figure(
3092
+ width=width,
3093
+ height=height,
3094
+ title=title,
3095
+ x_range=sample_cols,
3096
+ y_range=[str(i) for i in range(len(consensus_uids))],
3097
+ toolbar_location="above",
3098
+ tools="pan,wheel_zoom,box_zoom,reset,save,hover",
3099
+ tooltips=[
3100
+ ("Sample", "@sample"),
3101
+ ("Feature UID", "@feature"),
3102
+ ("log10(Value+1)", "@value{0.00}"),
3103
+ ],
3104
+ )
3105
+
3106
+ # Draw rectangles for heatmap
3107
+ p.rect(
3108
+ x="sample",
3109
+ y="feature_idx",
3110
+ width=1,
3111
+ height=1,
3112
+ source=source,
3113
+ fill_color=transform("value", color_mapper),
3114
+ line_color=None,
3115
+ )
3116
+
3117
+ # Add colorbar
3118
+ color_bar = ColorBar(
3119
+ color_mapper=color_mapper,
3120
+ width=8,
3121
+ location=(0, 0),
3122
+ title=f"log10({quant}+1)",
3123
+ ticker=BasicTicker(desired_num_ticks=8),
3124
+ )
3125
+ p.add_layout(color_bar, "right")
3126
+
3127
+ # Style the plot
3128
+ p.axis.axis_line_color = None
3129
+ p.axis.major_tick_line_color = None
3130
+ p.grid.grid_line_color = None
3131
+ p.xaxis.major_label_orientation = 45
3132
+ p.yaxis.axis_label = "Features (sorted by m/z)"
3133
+ p.xaxis.axis_label = "Samples"
3134
+
3135
+ # Apply consistent save/display behavior
3136
+ if filename is not None:
3137
+ # Convert relative paths to absolute paths using study folder as base
3138
+ import os
3139
+
3140
+ if not os.path.isabs(filename):
3141
+ filename = os.path.join(self.folder, filename)
3142
+
3143
+ # Convert to absolute path for logging
3144
+ abs_filename = os.path.abspath(filename)
3145
+
3146
+ # Use isolated file saving
3147
+ _isolated_save_plot(p, filename, abs_filename, self.logger, "Heatmap Plot")
3148
+ else:
3149
+ # Show in notebook when no filename provided
3150
+ _isolated_show_notebook(p)
3151
+
3152
+ return p
3153
+
3154
+
2958
3155
  def plot_pca(self, *args, **kwargs):
2959
3156
  """Deprecated: Use plot_samples_pca instead."""
2960
3157
  import warnings
masster/study/study.py CHANGED
@@ -14,7 +14,7 @@ Main class:
14
14
  consensus_select/filter/delete
15
15
  - Retrieval: get_consensus, get_chrom, get_samples, get_*_stats, get_*_matrix
16
16
  - Plotting: plot_alignment, plot_samples_pca/umap/2d, plot_tic/bpc/eic, plot_chrom,
17
- plot_rt_correction, plot_consensus_2d/stats
17
+ plot_rt_correction, plot_consensus_2d/stats, plot_heatmap
18
18
  - Export: export_mgf, export_mztab, export_xlsx, export_parquet
19
19
  - Identification: lib_load, identify, get_id, id_reset, lib_reset
20
20
  - Parameters: get/update parameters, update_history
@@ -96,6 +96,7 @@ from masster.study.plot import plot_bpc
96
96
  from masster.study.plot import plot_tic
97
97
  from masster.study.plot import plot_eic
98
98
  from masster.study.plot import plot_rt_correction
99
+ from masster.study.plot import plot_heatmap
99
100
  from masster.study.processing import align
100
101
  from masster.study.merge import merge
101
102
  from masster.study.processing import integrate
@@ -429,6 +430,7 @@ class Study:
429
430
  plot_rt_correction = plot_rt_correction
430
431
  plot_tic = plot_tic
431
432
  plot_eic = plot_eic
433
+ plot_heatmap = plot_heatmap
432
434
 
433
435
  # === Analysis Operations ===
434
436
  analyze_umap = analyze_umap
@@ -261,6 +261,21 @@
261
261
  },
262
262
  "ms1_spec": {
263
263
  "dtype": "pl.Object"
264
+ },
265
+ "id_top_name": {
266
+ "dtype": "pl.Utf8"
267
+ },
268
+ "id_top_class": {
269
+ "dtype": "pl.Utf8"
270
+ },
271
+ "id_top_adduct": {
272
+ "dtype": "pl.Utf8"
273
+ },
274
+ "id_top_score": {
275
+ "dtype": "pl.Float64"
276
+ },
277
+ "id_source": {
278
+ "dtype": "pl.Utf8"
264
279
  }
265
280
  }
266
281
  },
masster/wizard/wizard.py CHANGED
@@ -200,12 +200,12 @@ class wizard_def:
200
200
  # Set default adducts based on polarity if not provided
201
201
  if not self.adducts:
202
202
  if self.polarity and self.polarity.lower() in ["positive", "pos"]:
203
- self.adducts = ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"]
203
+ self.adducts = ["+H:1:0.8", "+Na:1:0.1", "+NH4:1:0.1"]
204
204
  elif self.polarity and self.polarity.lower() in ["negative", "neg"]:
205
- self.adducts = ["H-1:-:1.0", "CH2O2:0:0.5"]
205
+ self.adducts = ["-H:-1:1.0", "+CH2O2:0:0.5"]
206
206
  else:
207
207
  # Default to positive if polarity is None or unknown
208
- self.adducts = ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"]
208
+ self.adducts = ["+H:1:0.8", "+Na:1:0.1", "+NH4:1:0.1"]
209
209
 
210
210
  # Validate num_cores
211
211
  max_cores = multiprocessing.cpu_count()
@@ -676,9 +676,7 @@ class Wizard:
676
676
  " ",
677
677
  " # Step 3: Create and configure study",
678
678
  ' print("\\nStep 3/7: Initializing study...")',
679
- " study = Study(folder=PARAMS['folder'])",
680
- " study.polarity = PARAMS['polarity']",
681
- " study.adducts = PARAMS['adducts']",
679
+ " study = Study(folder=PARAMS['folder'], polarity=PARAMS['polarity'], adducts=PARAMS['adducts'])",
682
680
  " ",
683
681
  " # Step 4: Add sample5 files to study",
684
682
  ' print("\\nStep 4/7: Adding samples to study...")',
@@ -692,6 +690,12 @@ class Wizard:
692
690
  " rt_tol=PARAMS['rt_tol']",
693
691
  " )",
694
692
  " ",
693
+ " # Check that more than 1 file has been loaded",
694
+ " if len(study.samples) <= 1:",
695
+ ' print("\\nWARNING: Study merging requires more than 1 sample file.")',
696
+ ' print(f"Only {len(study.samples)} sample(s) loaded. Terminating execution.")',
697
+ " return False",
698
+ " ",
695
699
  " study.merge(",
696
700
  ' method="qt",',
697
701
  " min_samples=PARAMS['min_samples_per_feature'],",
@@ -764,14 +768,9 @@ class Wizard:
764
768
  'app = marimo.App(width="medium")',
765
769
  "",
766
770
  "@app.cell",
767
- "def __():",
768
- " import marimo as mo",
769
- " return (mo,)",
770
- "",
771
- "@app.cell",
772
771
  "def __(mo):",
773
772
  ' mo.md(r"""',
774
- " # MASSter Interactive Analysis",
773
+ " ## MASSter Interactive Analysis",
775
774
  " ",
776
775
  f" **Source:** {source_info.get('number_of_files', 0)} files detected",
777
776
  f" **Polarity:** {source_info.get('polarity', 'unknown')}",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.27
3
+ Version: 0.6.0
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -726,17 +726,39 @@ Requires-Dist: pytest-mock>=3.10.0; extra == 'test'
726
726
  Requires-Dist: pytest>=7.0.0; extra == 'test'
727
727
  Description-Content-Type: text/markdown
728
728
 
729
- # MASSter
729
+ # masster
730
730
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/masster)](https://badge.fury.io/py/masster)
731
731
  [![PyPI version](https://badge.fury.io/py/masster.svg)](https://badge.fury.io/py/masster)
732
732
 
733
- **MASSter** is a Python package for the analysis of mass spectrometry data, tailored for the purpose of metabolomics and LC-MS data processing. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS. All other functionalities for e.g. centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc. were redesigned and engineered to maximize scalability (tested with 3000 LC-MS), speed, quality, and results.
733
+ **MASSter** is a Python package for the analysis of metabolomics experiments by LC-MS/MS data, with a main focus on the challenging tasks of untargeted and large-scale studies.
734
734
 
735
- This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab.
735
+ ## Background and motivation
736
+
737
+ MASSter is actively used, maintained, and developed by the Zamboni Lab at ETH Zurich. The project started because many needs were unmet by the "usual" software packages (mzMine, MS-DIAL, Workflow4Metabolomics (W4M), ...), for example performance, scalability, sensitivity, robustness, speed, rapid implementation of new features, and embedding in ETL systems.
738
+
739
+ All methods include many parameters and may wrap alternative algorithms. These options are primarily relevant for advanced users. We recommend running the processing methods with the defaults or using the Wizard.
740
+
741
+ ## Content
742
+
743
+ MASSter is designed to deal with DDA data, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS, which is both accurate and fast, and it was wrapped with additional code to improve isotope and adduct detection. All other functionalities are own implementations: centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc.
744
+
745
+ MASSter was engineered to maximize result quality, sensitivity, scalability, and speed. Yes, it's Python, which can be slower than other languages, but considerable effort was spent on optimizations, including the systematic use of [Polars](https://pola.rs/), NumPy vectorization, multiprocessing, and chunking. MASSter has been tested on studies with 3,000+ LC–MS/MS samples (≈1 million MS2 spectra) and autonomously completed analyses within a few hours.
746
+
747
+ ## Architecture
748
+
749
+ MASSter defines classes for Spectra, Chromatograms, Libraries, Samples, and Studies (a Study is a collection of samples, i.e. an LC–MS sequence). Users will typically work with a single `Study` object at a time. `Sample` objects are created when analyzing a batch (and saved for caching), or used for development, troubleshooting, or generating illustrations.
750
+
751
+ The analysis can be done in scripts (without user intervention, e.g. by the integrated Wizard), or interactively in notebooks, i.e. [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/).
736
752
 
737
753
  ## Prerequisites
738
754
 
739
- **MASSter** reads raw (Thermo), wiff (SCIEX), or mzML data. It's recommended to provide raw, profile data.
755
+ You'll need to install Python (3.10-3.13, 3.14 has not been tested yet).
756
+
757
+ MASSter reads raw (Thermo), wiff (SCIEX), or mzML data. Reading vendor formats relies on .NET libraries, and is only possible in Windows. On Linux or MacOS, you'll be forced to use mzML data.
758
+
759
+ **It's recommended to use data in either the vendor's raw formats (WIFF and Thermo RAW) or mzML in profile mode.** MASSter includes a sophisticated and sufficiently fast centroiding algorithm that works well across the full dynamic range and will only act on spectra that are relevant. In our tests with data from different vendors, the centroiding performed much better than most vendor implementations (which are primarily proteomics-centric).
760
+
761
+ If you still want to convert raw data to centroided mzML, please use CentroidR: https://github.com/Adafede/CentroidR/tree/0.0.0.9001
740
762
 
741
763
  ## Installation
742
764
 
@@ -744,48 +766,33 @@ This is a poorly documented, stable branch of the development codebase in use in
744
766
  pip install masster
745
767
  ```
746
768
 
747
- ## Basic usage
748
- ### Quick start: use the wizard
769
+ ## Getting started
770
+ **The quickest way to use, or learn how to use MASSter, is to use the Wizard** which we integrated and, ideally, takes care of everything automatically.
749
771
 
772
+ The Wizard only needs to know where to find the MS files and where to store the results.
750
773
  ```python
751
- import masster
752
- wiz = masster.wizard.create_scripts(
753
- source=r'..\..\folder_with_raw_data',
754
- folder=r'..\..folder_to_store_results'
774
+ from masster import Wizard
775
+ wiz = Wizard(
776
+ source=r'..\..\folder_with_raw_data', # where to find the data
777
+ folder=r'..\..folder_to_store_results', # where to save the results
778
+ ncores=10 # this is optional
755
779
  )
756
- wiz.run()
780
+ wiz.test_and_run()
757
781
  ```
758
782
 
759
- This will run a wizard that should perform all key steps and save the results to the `folder`.
760
-
761
- ### Basic workflow for analyzing a single sample
762
- ```python
763
- import masster
764
- sample = masster.Sample(filename='...') # full path to a *.raw, *.wiff, or *.mzML file
765
- # process
766
- sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
767
- sample.find_adducts()
768
- sample.find_ms2()
769
-
770
- # access data
771
- sample.features_df
783
+ This will trigger the analysis of raw data, and the creation of a script to process all samples and then assemble the study. The whole processing will be stored as `1_masster_workflow.py` in the output folder. The wizard will test once and, if successful, run the full workflow using parallel processes. Once the processing is over you, navigate to `folder` to see what happened...
772
784
 
773
- # save results
774
- sample.save() # stores to *.sample5, our custom hdf5 format
775
- sample.export_mgf()
776
-
777
- # some plots
778
- sample.plot_bpc()
779
- sample.plot_tic()
780
- sample.plot_2d()
781
- sample.plot_features_stats()
785
+ If you want to interact with your data, we recommend using [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/) and open the `*.study5` file, for example:
782
786
 
783
- # explore methods
784
- dir(study)
787
+ ```bash
788
+ # use marimo to open the script created by marimo
789
+ marimo edit '..\\..\\folder_to_store_results\\2_interactive_analysis.py'
790
+ # or, if you use uv to manage an environment with masster
791
+ uv run marimo edit '..\\..\\folder_to_store_results\\2_interactive_analysis.py'
785
792
  ```
786
793
 
787
- ### Basic Workflow for analyzing LC-MS study with 2-... samples
788
-
794
+ ### Basic Workflow for analyzing LC-MS study with 1-1000+ samples
795
+ In MASSter, the main object for data analysis is a `Study`, which consists of a bunch of `Samples`.
789
796
  ```python
790
797
  import masster
791
798
  # Initialize the Study object with the default folder
@@ -797,17 +804,20 @@ study.add(r'D:\...\...\...\*.wiff')
797
804
  # Perform retention time correction
798
805
  study.align(rt_tol=2.0)
799
806
  study.plot_alignment()
800
- study.plot_bpc()
801
807
  study.plot_rt_correction()
808
+ study.plot_bpc()
802
809
 
803
810
  # Find consensus features
804
- study.merge(min_samples=3)
811
+ study.merge(min_samples=3) # this will keep only the features that were found in 3 or more samples
805
812
  study.plot_consensus_2d()
806
813
 
807
- # Retrieve missing data for quantification
814
+ # retrieve information
815
+ study.info()
816
+
817
+ # Retrieve EICs for quantification
808
818
  study.fill()
809
819
 
810
- # Integrate according to consensus metadata
820
+ # Integrate EICs according to consensus metadata
811
821
  study.integrate()
812
822
 
813
823
  # export results
@@ -823,32 +833,61 @@ study.save()
823
833
  study.plot_samples_pca()
824
834
  study.plot_samples_umap()
825
835
  study.plot_samples_2d()
826
- ```
827
836
 
828
- ### Quick Start with Wizard
829
- MASSter includes a Wizard to automatically process everything:
837
+ # To know more about the available methods...
838
+ dir(study)
839
+ ```
840
+ The information is stored in Polars data frame, in particular:
841
+ ```python
842
+ # information on samples
843
+ study.samples_df
844
+ # information on consensus features
845
+ study.consensus_df
846
+ # information on original features from ALL samples, including MS2 and EICs
847
+ study.features_df
848
+ ```
830
849
 
850
+ ### Analysis of a single sample
851
+ For troubleshooting, exploration, or just to create a figure on a single file, you might want to open and process a single file:
831
852
  ```python
832
- from masster import Wizard
853
+ from masster import Sample
854
+ sample = Sample(filename='...') # full path to a *.raw, *.wiff, *.mzML, or *.sample5 file
855
+ # peek into sample
856
+ sample.info()
857
+
858
+ # process
859
+ sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
860
+ sample.find_adducts()
861
+ sample.find_ms2()
833
862
 
834
- # Create wizard instance
835
- wiz = Wizard(source="./raw_data",
836
- folder="./output",
837
- num_cores=8)
863
+ # access data
864
+ sample.features_df
838
865
 
839
- # Generate analysis scripts
840
- wiz.create_scripts()
866
+ # save results
867
+ sample.save() # stores to *.sample5, our custom hdf5 format
868
+ sample.export_mgf()
841
869
 
842
- # Test with single file, then run full batch
843
- wiz.test_and_run()
844
- ```
870
+ # some plots
871
+ sample.plot_bpc()
872
+ sample.plot_tic()
873
+ sample.plot_2d()
874
+ sample.plot_features_stats()
845
875
 
846
- ### One-Line Command Processing
847
- Or, from the command-line:
848
- ```bash
849
- python -c "from masster import Wizard; wiz = Wizard(source='D:/Data/studies/my_study/raw', folder='D:/Data/studies/my_study/masster'); wiz.create_scripts(); wiz.test_and_run()"
876
+ # explore methods
877
+ dir(sample)
850
878
  ```
851
879
 
880
+ ## Disclaimer
881
+
882
+ **MASSter is research software under active development.** While we use it extensively in our lab and strive for quality and reliability, please be aware:
883
+
884
+ - **No warranties**: The software is provided "as is" without any warranty of any kind, express or implied
885
+ - **Backward compatibility**: We do not guarantee backward compatibility between versions. Breaking changes may occur as we improve the software
886
+ - **Performance**: While optimized for our workflows, performance may vary depending on your data and system configuration
887
+ - **Results**: We do our best to ensure accuracy, but you should validate results independently for your research
888
+ - **Support**: This is an academic project with limited resources. At the moment, we do not provide external user support.
889
+ - **Production use**: If you plan to use MASSter in production or critical workflows, thorough testing with your data is recommended
890
+
852
891
  ## License
853
892
  GNU Affero General Public License v3
854
893
 
@@ -858,4 +897,4 @@ See the [LICENSE](LICENSE) file for details.
858
897
  This project uses several third-party libraries, including pyOpenMS which is licensed under the BSD 3-Clause License. For complete information about third-party dependencies and their licenses, see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md).
859
898
 
860
899
  ## Citation
861
- If you use Masster in your research, please cite this repository.
900
+ If you use MASSter in your research, please cite this repository.
@@ -1,34 +1,35 @@
1
1
  masster/__init__.py,sha256=B7zftzdElF2Wb5B7KvkD6TONnMIY-Jxeen3s49dgmzs,1029
2
- masster/_version.py,sha256=1ymsIuIjSvggBFAIWxPqWntiqdxjPZEjAmDJy3vRsLE,257
2
+ masster/_version.py,sha256=3NDsZkQSczLt43gO7ybvmieIAj9lFBUU6zYYdJIGsc0,257
3
3
  masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
4
- masster/logger.py,sha256=oHEFPH1LzBHbmeP9WFaRZCWyqd14GasJjZMLI8a4O3I,19439
4
+ masster/logger.py,sha256=MbQ3uLI3N0G3tnvtGIJZN_HEwjKRfIhBVZmIzUev7oc,18827
5
5
  masster/spectrum.py,sha256=LlmxrI5MFS3aPrGSdqUSKVY0rJnKeBh3Frdh6a4dPvA,49722
6
- masster/data/libs/aa.csv,sha256=Sja1DyMsiaM2NfLcct4kAAcXYwPCukJJW8sDkup9w_c,1924
7
- masster/data/libs/ccm.csv,sha256=Q6nylV1152uTpX-ydqWeGrc6L9kgv45xN_fBZ4f7Tvo,12754
8
- masster/data/libs/urine.csv,sha256=iRrR4N8Wzb8KDhHJA4LqoQC35pp93FSaOKvXPrgFHis,653736
6
+ masster/data/libs/aa_nort.json,sha256=tadOrusSrcAIxTcvME-Vy_PVNQskCU4jN9XVDeYhq4o,6052
7
+ masster/data/libs/ccm_nort.json,sha256=9Dq_JqLqQ700Nri6LDe7stBUQkiTGXukK2GqsFaOrTw,35425
9
8
  masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data,sha256=01vC6m__Qqm2rLvlTMZoeKIKowFvovBTUnrNl8Uav3E,24576
10
9
  masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff,sha256=go5N9gAM1rn4PZAVaoCmdteY9f7YGEM9gyPdSmkQ8PE,1447936
11
10
  masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan,sha256=ahi1Y3UhAj9Bj4Q2MlbgPekNdkJvMOoMXVOoR6CeIxc,13881220
12
11
  masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2,sha256=TFB0HW4Agkig6yht7FtgjUdbXax8jjKaHpSZSvuU5vs,3252224
13
12
  masster/lib/__init__.py,sha256=TcePNx3SYZHz6763TL9Sg4gUNXaRWjlrOtyS6vsu-hg,178
14
- masster/lib/lib.py,sha256=SGWuiCTHc65khmLndC2cFBCO1rk8-SS6BkG4C_nOf-o,44984
13
+ masster/lib/lib.py,sha256=j3aFmS_xohvjgVi2XrfIOcNA5v3-slusDIOqHTij-Og,45016
15
14
  masster/sample/__init__.py,sha256=HL0m1ept0PMAYUCQtDDnkdOS12IFl6oLAq4TZQz83uY,170
16
- masster/sample/adducts.py,sha256=ALUSeY8fDf4oWl-g1wEffBDRk_apKTDwd_FOdxtP5Es,33511
17
- masster/sample/h5.py,sha256=uEbsfaMgRWgADLhw3j6rdXjHqRz4bmLQKjAzXXGA12M,109285
18
- masster/sample/helpers.py,sha256=opitF12aS3KNfwM0GFwiITX129AAi0HXvEW32oljTL4,43627
15
+ masster/sample/adducts.py,sha256=kVsVZTUIQsVGDgmzvkhPn-9XdilGjuNe-xqUZG--Huc,33519
16
+ masster/sample/h5.py,sha256=80ClWBCZH8eY5hLmUIy0GRvHshq0FvT4LWKmQ9Hn7L4,116017
17
+ masster/sample/helpers.py,sha256=ldQ05ha4whONSB-5YouZEXf0E9v90AnAN7fePAc3Y4s,48094
18
+ masster/sample/id.py,sha256=f2Y3JFn_0PPAscnfpllDR_82tiHH1j-SuxahiGsBjjU,46428
19
+ masster/sample/importers.py,sha256=F7hcOaDKPVYsT1iYLHseWSpOYa586u17jsQOPpXLS1I,13286
19
20
  masster/sample/lib.py,sha256=YIeG9nBiSMllu3xkqcQXnMe6pXJ9sJSN7un8SORgVJ4,33968
20
21
  masster/sample/load.py,sha256=tpPqRZtmfOY1AibHBBqcYfPD8SoI8Uue-IiDOA20DYc,48402
21
22
  masster/sample/parameters.py,sha256=Gg2KcuNbV_wZ_Wwv93QlM5J19ji0oSIvZLPV1NoBmq0,4456
22
- masster/sample/plot.py,sha256=fQWWG-BeJz3XdtIVYTlV1_mM5xqZw5cYjQEmJU68t2U,105244
23
+ masster/sample/plot.py,sha256=CHfsAIeTxxjXwCd4E6rAKTZdtjtq_cpDNoYAo0Gi0zs,110454
23
24
  masster/sample/processing.py,sha256=7FmlDO_vsVbUfI62QSoHXKkgGtfjMLHLRdvaB4KdmP4,56018
24
25
  masster/sample/quant.py,sha256=tHNjvUFTdehKR31BXBZnVsBxMD9XJHgaltITOjr71uE,7562
25
- masster/sample/sample.py,sha256=-xliZfE_6I0tRkMZa8yChiMtdbKXdLnF4xnD-LtIgZY,22078
26
- masster/sample/sample5_schema.json,sha256=H5e2T6rHIDzul2kp_yP-ILUUWUpW08wP2pEQjMR0nSk,3977
26
+ masster/sample/sample.py,sha256=EfB4CweOCkuxseHVxLIlxaL6hGWd6k9J5LKT5wHyId0,22672
27
+ masster/sample/sample5_schema.json,sha256=szuRsrs2o50jEjXOAT7T6zLQhZauN_0nc_605uQjgVA,5839
27
28
  masster/sample/save.py,sha256=RD3tRoTNy2ANKoU-oZSfu47nQ4ATSAB-Io2EN0RUZaI,37994
28
29
  masster/sample/sciex.py,sha256=jzMrw5iKzbCFVgmgbU65eIr10eegzKng9WKx1Inl8Dg,21740
29
30
  masster/sample/thermo.py,sha256=zcH4aZg2hQnZp9rM618ZUiQXRuUqFRmVFdbCg0SnIOQ,27775
30
31
  masster/sample/defaults/__init__.py,sha256=A09AOP44cxD_oYohyt7XFUho0zndRcrzVD4DUaGnKH4,447
31
- masster/sample/defaults/find_adducts_def.py,sha256=bK05FcACdj0t-T8x6zWUbCzxRxbR7P0u8o8U23RIFrc,13552
32
+ masster/sample/defaults/find_adducts_def.py,sha256=RFZGaP3VeVEnSxlqvUaHq6wl1m5mfr7yvyf7pHWgtJI,13553
32
33
  masster/sample/defaults/find_features_def.py,sha256=Bcd39uav1BniwKgrsB-I1maF3ljf4Wb1f5yv0pDYfts,17745
33
34
  masster/sample/defaults/find_ms2_def.py,sha256=mr_XtzlhYfXt7kYWVFPt1ReGqHZQDPXe-1pGND2VvV8,9844
34
35
  masster/sample/defaults/get_spectrum_def.py,sha256=o62p31PhGd-LiIkTOzKQhwPtnO2AtQDHcPu-O-YoQPs,11460
@@ -38,16 +39,16 @@ masster/study/analysis.py,sha256=bf2o_ywvwdPz1mZAHPETCPjWbvhoL9eEl1rLaz46Rp4,820
38
39
  masster/study/export.py,sha256=oRgM4F4hL3-nBRr_xd4KTin8WoH8QqCJnz3K_S1M14E,60258
39
40
  masster/study/h5.py,sha256=gJRWNQxBTyFKD3qRmEbM24YZ-HdyUk-veYgwQbK0eoE,99104
40
41
  masster/study/helpers.py,sha256=pRcVvGmm6NX-GEvWfYZXZjGc_C0WyklqSQx1PdpYn2E,189694
41
- masster/study/id.py,sha256=dTMNdBE7eOQZdFSU0KZwd7vFpqOmNlQnLI0VGW98y8w,89570
42
+ masster/study/id.py,sha256=iKMcxEzFpCKSBWWrBoOHWWM43tYJSRFvDqmYJTIQ1eU,92348
42
43
  masster/study/importers.py,sha256=iOe9w6uEn39ShosRms8n_zIrsSBczb51CAMoMrxSUw4,13587
43
- masster/study/load.py,sha256=-qz9tAVYzI5is7_-z43FndcJJk95BmuMUOIxYzZoO_I,71314
44
- masster/study/merge.py,sha256=FEya_v2cMNvzNBtxzTv57KxfwNxT1R63LDrvnYIHORY,164557
44
+ masster/study/load.py,sha256=GrdXuY7EFrmkqnzNk4gdqE41ufFE39pUvBrQaA7RPXA,70649
45
+ masster/study/merge.py,sha256=6jZPaCFp4Z8lnG8ztgEeoOzoxRmULIS4SKBYoPnEaTc,171480
45
46
  masster/study/parameters.py,sha256=bTvmcwX9INxzcrEAmTiFH8qeWVhwkvMTZjuP394pz5o,3279
46
- masster/study/plot.py,sha256=HIz8GMxN988KDXP425cpYohCgrgMrMCWBQ2s5ncmLoI,113816
47
+ masster/study/plot.py,sha256=2tRjZFViYq_I5ih1TruelzIdnGynKsOpnHyhTzGUr-k,120731
47
48
  masster/study/processing.py,sha256=oQGepG5-AXP9GGk8NTzl_i1ztgDp3TPwE2x15BmnpMw,57353
48
49
  masster/study/save.py,sha256=tF00anwwQDe2qxumv_4AP_1UOo0-f28Tkd2WXGyNHbI,9182
49
- masster/study/study.py,sha256=_IL6pCw29JI0p2ujdaMr_gsfaFkq3PFWO5NPuLpshYA,37867
50
- masster/study/study5_schema.json,sha256=uLA9TIb5loki87_BEdwSNyPYahwOanacYisXdIK0_Z0,8044
50
+ masster/study/study.py,sha256=sIoi03DBTzdcJjqvXZTt15kWK_pHiyQXJQSkR1cwCII,37959
51
+ masster/study/study5_schema.json,sha256=tvlQZezS4bwRRxlUi8cpoiPIE3qTzk2WDQfZw8mE724,8359
51
52
  masster/study/defaults/__init__.py,sha256=m3Z5KXGqsTdh7GjYzZoENERt39yRg0ceVRV1DeCt1P0,610
52
53
  masster/study/defaults/align_def.py,sha256=Du0F592ej2einT8kOx8EUs610axSvur8_-6N19O-uJY,10209
53
54
  masster/study/defaults/export_def.py,sha256=eXl3h4aoLX88XkHTpqahLd-QZ2gjUqrmjq8IJULXeWo,1203
@@ -58,11 +59,11 @@ masster/study/defaults/identify_def.py,sha256=nFj-pv6q1eRgSgoRr78YEnqulPPMWo2Ju5
58
59
  masster/study/defaults/integrate_chrom_def.py,sha256=0MNIWGTjty-Zu-NTQsIweuj3UVqEY3x1x8pK0mPwYak,7264
59
60
  masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
60
61
  masster/study/defaults/merge_def.py,sha256=99TJtIk7mSoq8NMJMJ4b-cy7gUUixQN69krxttBnkfA,12899
61
- masster/study/defaults/study_def.py,sha256=xXOAcb8hez0woWwA1_T3fcokjiLJkq3hwA3OS6elb6I,15965
62
+ masster/study/defaults/study_def.py,sha256=kSvhiqpFp8b84vUsE8608LQsSXwz9lAPcU2BqK0T8z0,16095
62
63
  masster/wizard/__init__.py,sha256=L9G_datyGSFJjrBVklEVpZVLGXzUhDiWobtiygBH8vQ,669
63
- masster/wizard/wizard.py,sha256=11utDrZSt7R8D16Sl-NbRKHcgzhQEu8gW_q2V02-Qi0,66483
64
- masster-0.5.27.dist-info/METADATA,sha256=PnpTZ5qf1lH8R7eOZ4IACjRXoH_67n8qPgv3SfL-7U4,46360
65
- masster-0.5.27.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
66
- masster-0.5.27.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
67
- masster-0.5.27.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
68
- masster-0.5.27.dist-info/RECORD,,
64
+ masster/wizard/wizard.py,sha256=yAcEK7aPzWV9fILY4TQcwAhmJKpKE0q9BK8Ur9Eu9Og,66677
65
+ masster-0.6.0.dist-info/METADATA,sha256=BA9Ncf_p9wjyeZSlZDtxExrPBMaIcTKDblsGW4Gk2K0,50785
66
+ masster-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
67
+ masster-0.6.0.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
68
+ masster-0.6.0.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
69
+ masster-0.6.0.dist-info/RECORD,,
masster/data/libs/aa.csv DELETED
@@ -1,22 +0,0 @@
1
- name,smiles,inchikey,formula,db_id,db
2
- L-Glutamic acid,N[C@@H](CCC(O)=O)C(O)=O,WHUUTDBJXJRKMK-VKHMYHEASA-N,C5H9NO4,CID:33032,pubchem
3
- L-Tyrosine,N[C@@H](CC1=CC=C(O)C=C1)C(O)=O,OUYCCCASQSFEME-QMMMGPOBSA-N,C9H11NO3,CID:6057,pubchem
4
- L-Phenylalanine,N[C@@H](CC1=CC=CC=C1)C(O)=O,COLNVLDHVKWLRT-QMMMGPOBSA-N,C9H11NO2,CID:6140,pubchem
5
- L-Alanine,C[C@H](N)C(O)=O,QNAYBMKLOCPYGJ-REOHCLBHSA-N,C3H7NO2,CID:5950,pubchem
6
- L-Proline,OC(=O)[C@@H]1CCCN1,ONIBWKKTOPOVIA-BYPYZUCNSA-N,C5H9NO2,CID:145742,pubchem
7
- L-Threonine,C[C@@H](O)[C@H](N)C(O)=O,AYFVYJQAPQTCCC-GBXIJSLDSA-N,C4H9NO3,CID:6288,pubchem
8
- L-Asparagine,N[C@@H](CC(N)=O)C(O)=O,DCXYFEDJOCDNAF-REOHCLBHSA-N,C4H8N2O3,CID:6267,pubchem
9
- L-Isoleucine,CC[C@H](C)[C@H](N)C(O)=O,AGPKZVBTJJNPAG-WHFBIAKZSA-N,C6H13NO2,CID:6306,pubchem
10
- L-Histidine,N[C@@H](CC1=CN=CN1)C(O)=O,HNDVDQJCIGZPNO-YFKPBYRVSA-N,C6H9N3O2,CID:6274,pubchem
11
- L-Lysine,NCCCC[C@H](N)C(O)=O,KDXKERNSBIXSRK-YFKPBYRVSA-N,C6H14N2O2,CID:5962,pubchem
12
- L-Serine,N[C@@H](CO)C(O)=O,MTCFGRXMJLQNBG-REOHCLBHSA-N,C3H7NO3,CID:5951,pubchem
13
- L-Aspartic acid,N[C@@H](CC(O)=O)C(O)=O,CKLJMWTZIZZHCS-REOHCLBHSA-N,C4H7NO4,CID:5960,pubchem
14
- L-Cystine,N[C@@H](CSSC[C@H](N)C(O)=O)C(O)=O,LEVWYRKDKASIDU-IMJSIDKUSA-N,C6H12N2O4S2,CID:67678,pubchem
15
- L-Arginine,N[C@@H](CCCNC(N)=N)C(O)=O,ODKSFYDXXFIFQN-BYPYZUCNSA-N,C6H14N4O2,CID:6322,pubchem
16
- L-Cysteine,N[C@@H](CS)C(O)=O,XUJNEKJLAYXESH-REOHCLBHSA-N,C3H7NO2S,CID:5862,pubchem
17
- L-Glutamine,N[C@@H](CCC(N)=O)C(O)=O,ZDXPYRJPNDTMRX-VKHMYHEASA-N,C5H10N2O3,CID:5961,pubchem
18
- L-Leucine,CC(C)C[C@H](N)C(O)=O,ROHFNLRQFUQHCH-YFKPBYRVSA-N,C6H13NO2,CID:6106,pubchem
19
- L-Methionine,CSCC[C@H](N)C(O)=O,FFEARJCKVFRZRR-BYPYZUCNSA-N,C5H11NO2S,CID:6137,pubchem
20
- L-Valine,CC(C)[C@H](N)C(O)=O,KZSNJWFQEVHDMF-BYPYZUCNSA-N,C5H11NO2,CID:6287,pubchem
21
- L-Tryptophan,N[C@@H](CC1=CNC2=C1C=CC=C2)C(O)=O,QIVBCDIJIAJPQS-VIFPVBQESA-N,C11H12N2O2,CID:6305,pubchem
22
- Glycine,NCC(O)=O,QNAYBMKLOCPYGJ-UHFFFAOYSA-N,C2H5NO2,CID:750,Glycine