masster 0.5.28__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/data/libs/aa_nort.json +240 -0
- masster/data/libs/ccm_nort.json +1319 -0
- masster/lib/lib.py +1 -1
- masster/logger.py +0 -6
- masster/sample/adducts.py +1 -1
- masster/sample/defaults/find_adducts_def.py +1 -1
- masster/sample/h5.py +152 -2
- masster/sample/helpers.py +91 -5
- masster/sample/id.py +1160 -0
- masster/sample/importers.py +316 -0
- masster/sample/plot.py +175 -71
- masster/sample/sample.py +18 -3
- masster/sample/sample5_schema.json +99 -1
- masster/study/defaults/study_def.py +8 -12
- masster/study/id.py +59 -12
- masster/study/load.py +0 -11
- masster/study/merge.py +153 -0
- masster/study/plot.py +197 -0
- masster/study/study.py +3 -1
- masster/study/study5_schema.json +15 -0
- masster/wizard/wizard.py +11 -12
- {masster-0.5.28.dist-info → masster-0.6.0.dist-info}/METADATA +15 -17
- {masster-0.5.28.dist-info → masster-0.6.0.dist-info}/RECORD +27 -26
- masster/data/libs/aa.csv +0 -22
- masster/data/libs/ccm.csv +0 -120
- masster/data/libs/urine.csv +0 -4693
- {masster-0.5.28.dist-info → masster-0.6.0.dist-info}/WHEEL +0 -0
- {masster-0.5.28.dist-info → masster-0.6.0.dist-info}/entry_points.txt +0 -0
- {masster-0.5.28.dist-info → masster-0.6.0.dist-info}/licenses/LICENSE +0 -0
masster/study/plot.py
CHANGED
|
@@ -2955,6 +2955,203 @@ def plot_tic(
|
|
|
2955
2955
|
return p
|
|
2956
2956
|
|
|
2957
2957
|
|
|
2958
|
+
def plot_heatmap(
|
|
2959
|
+
self,
|
|
2960
|
+
filename=None,
|
|
2961
|
+
width=800,
|
|
2962
|
+
height=600,
|
|
2963
|
+
cmap="viridis",
|
|
2964
|
+
title="Consensus Matrix Heatmap",
|
|
2965
|
+
quant="chrom_area",
|
|
2966
|
+
samples=None,
|
|
2967
|
+
):
|
|
2968
|
+
"""
|
|
2969
|
+
Plot a heatmap of the consensus matrix data.
|
|
2970
|
+
|
|
2971
|
+
Samples are ordered from left to right, features are ordered by m/z from top to bottom.
|
|
2972
|
+
Values are log10 transformed for better visualization.
|
|
2973
|
+
|
|
2974
|
+
Parameters:
|
|
2975
|
+
filename (str, optional): Path to save the plot
|
|
2976
|
+
width (int): Plot width in pixels (default: 800)
|
|
2977
|
+
height (int): Plot height in pixels (default: 600)
|
|
2978
|
+
cmap (str): Colormap name (default: "viridis")
|
|
2979
|
+
title (str): Plot title (default: "Consensus Matrix Heatmap")
|
|
2980
|
+
quant (str): Quantification method column name (default: "chrom_area")
|
|
2981
|
+
samples: Sample identifier(s) to include. Can be:
|
|
2982
|
+
- None: include all samples (default)
|
|
2983
|
+
- int: single sample_uid
|
|
2984
|
+
- str: single sample_name
|
|
2985
|
+
- list: multiple sample_uids or sample_names
|
|
2986
|
+
"""
|
|
2987
|
+
from bokeh.plotting import figure
|
|
2988
|
+
from bokeh.models import LinearColorMapper, ColorBar, BasicTicker
|
|
2989
|
+
from bokeh.transform import transform
|
|
2990
|
+
import numpy as np
|
|
2991
|
+
import pandas as pd
|
|
2992
|
+
|
|
2993
|
+
# Get consensus matrix
|
|
2994
|
+
matrix_df = self.get_consensus_matrix(quant=quant, samples=samples)
|
|
2995
|
+
|
|
2996
|
+
if matrix_df is None or matrix_df.is_empty():
|
|
2997
|
+
self.logger.error("No consensus matrix available for heatmap.")
|
|
2998
|
+
return
|
|
2999
|
+
|
|
3000
|
+
# Get m/z values for each consensus_uid to sort by
|
|
3001
|
+
if self.consensus_df is None or self.consensus_df.is_empty():
|
|
3002
|
+
self.logger.error("No consensus_df available for sorting features by m/z.")
|
|
3003
|
+
return
|
|
3004
|
+
|
|
3005
|
+
# Join with consensus_df to get m/z values
|
|
3006
|
+
matrix_with_mz = matrix_df.join(
|
|
3007
|
+
self.consensus_df.select(["consensus_uid", "mz"]),
|
|
3008
|
+
on="consensus_uid",
|
|
3009
|
+
how="left",
|
|
3010
|
+
)
|
|
3011
|
+
|
|
3012
|
+
# Sort by m/z (ascending - lowest m/z at top)
|
|
3013
|
+
matrix_with_mz = matrix_with_mz.sort("mz")
|
|
3014
|
+
|
|
3015
|
+
# Remove the m/z column after sorting
|
|
3016
|
+
matrix_sorted = matrix_with_mz.drop("mz")
|
|
3017
|
+
|
|
3018
|
+
# Extract consensus_uid and sample columns
|
|
3019
|
+
consensus_uids = matrix_sorted["consensus_uid"].to_list()
|
|
3020
|
+
sample_cols = [col for col in matrix_sorted.columns if col != "consensus_uid"]
|
|
3021
|
+
|
|
3022
|
+
# Convert to pandas for easier heatmap processing
|
|
3023
|
+
matrix_pd = matrix_sorted.select(sample_cols).to_pandas()
|
|
3024
|
+
|
|
3025
|
+
# Apply log10 transformation (add 1 to avoid log(0))
|
|
3026
|
+
matrix_log = np.log10(matrix_pd.values + 1)
|
|
3027
|
+
|
|
3028
|
+
# Prepare data for Bokeh heatmap
|
|
3029
|
+
# Create a list of (sample, feature, value) tuples
|
|
3030
|
+
heatmap_data = []
|
|
3031
|
+
for i, feature_idx in enumerate(range(len(consensus_uids))):
|
|
3032
|
+
for j, sample in enumerate(sample_cols):
|
|
3033
|
+
value = matrix_log[feature_idx, j]
|
|
3034
|
+
heatmap_data.append({
|
|
3035
|
+
"sample": sample,
|
|
3036
|
+
"feature": str(consensus_uids[feature_idx]),
|
|
3037
|
+
"feature_idx": str(i), # Use string index for y-axis position
|
|
3038
|
+
"value": value,
|
|
3039
|
+
})
|
|
3040
|
+
|
|
3041
|
+
# Convert to DataFrame for Bokeh ColumnDataSource
|
|
3042
|
+
heatmap_df = pd.DataFrame(heatmap_data)
|
|
3043
|
+
|
|
3044
|
+
from bokeh.models import ColumnDataSource
|
|
3045
|
+
|
|
3046
|
+
source = ColumnDataSource(heatmap_df)
|
|
3047
|
+
|
|
3048
|
+
# Handle colormap using cmap.Colormap
|
|
3049
|
+
try:
|
|
3050
|
+
# Get colormap palette using cmap
|
|
3051
|
+
if isinstance(cmap, str):
|
|
3052
|
+
colormap = Colormap(cmap)
|
|
3053
|
+
# Generate 256 colors and convert to hex
|
|
3054
|
+
import matplotlib.colors as mcolors
|
|
3055
|
+
|
|
3056
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
3057
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
3058
|
+
else:
|
|
3059
|
+
colormap = cmap
|
|
3060
|
+
# Try to use to_bokeh() method first
|
|
3061
|
+
try:
|
|
3062
|
+
palette = colormap.to_bokeh()
|
|
3063
|
+
# Ensure we got a color palette, not another mapper
|
|
3064
|
+
if not isinstance(palette, (list, tuple)):
|
|
3065
|
+
# Fall back to generating colors manually
|
|
3066
|
+
import matplotlib.colors as mcolors
|
|
3067
|
+
|
|
3068
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
3069
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
3070
|
+
except AttributeError:
|
|
3071
|
+
# Fall back to generating colors manually
|
|
3072
|
+
import matplotlib.colors as mcolors
|
|
3073
|
+
|
|
3074
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
3075
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
3076
|
+
except (AttributeError, ValueError, TypeError) as e:
|
|
3077
|
+
# Fallback to viridis if cmap interpretation fails
|
|
3078
|
+
self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
|
|
3079
|
+
from bokeh.palettes import viridis
|
|
3080
|
+
|
|
3081
|
+
palette = viridis(256)
|
|
3082
|
+
|
|
3083
|
+
# Create color mapper
|
|
3084
|
+
color_mapper = LinearColorMapper(
|
|
3085
|
+
palette=palette,
|
|
3086
|
+
low=heatmap_df["value"].min(),
|
|
3087
|
+
high=heatmap_df["value"].max(),
|
|
3088
|
+
)
|
|
3089
|
+
|
|
3090
|
+
# Create figure with categorical ranges for both axes
|
|
3091
|
+
p = figure(
|
|
3092
|
+
width=width,
|
|
3093
|
+
height=height,
|
|
3094
|
+
title=title,
|
|
3095
|
+
x_range=sample_cols,
|
|
3096
|
+
y_range=[str(i) for i in range(len(consensus_uids))],
|
|
3097
|
+
toolbar_location="above",
|
|
3098
|
+
tools="pan,wheel_zoom,box_zoom,reset,save,hover",
|
|
3099
|
+
tooltips=[
|
|
3100
|
+
("Sample", "@sample"),
|
|
3101
|
+
("Feature UID", "@feature"),
|
|
3102
|
+
("log10(Value+1)", "@value{0.00}"),
|
|
3103
|
+
],
|
|
3104
|
+
)
|
|
3105
|
+
|
|
3106
|
+
# Draw rectangles for heatmap
|
|
3107
|
+
p.rect(
|
|
3108
|
+
x="sample",
|
|
3109
|
+
y="feature_idx",
|
|
3110
|
+
width=1,
|
|
3111
|
+
height=1,
|
|
3112
|
+
source=source,
|
|
3113
|
+
fill_color=transform("value", color_mapper),
|
|
3114
|
+
line_color=None,
|
|
3115
|
+
)
|
|
3116
|
+
|
|
3117
|
+
# Add colorbar
|
|
3118
|
+
color_bar = ColorBar(
|
|
3119
|
+
color_mapper=color_mapper,
|
|
3120
|
+
width=8,
|
|
3121
|
+
location=(0, 0),
|
|
3122
|
+
title=f"log10({quant}+1)",
|
|
3123
|
+
ticker=BasicTicker(desired_num_ticks=8),
|
|
3124
|
+
)
|
|
3125
|
+
p.add_layout(color_bar, "right")
|
|
3126
|
+
|
|
3127
|
+
# Style the plot
|
|
3128
|
+
p.axis.axis_line_color = None
|
|
3129
|
+
p.axis.major_tick_line_color = None
|
|
3130
|
+
p.grid.grid_line_color = None
|
|
3131
|
+
p.xaxis.major_label_orientation = 45
|
|
3132
|
+
p.yaxis.axis_label = "Features (sorted by m/z)"
|
|
3133
|
+
p.xaxis.axis_label = "Samples"
|
|
3134
|
+
|
|
3135
|
+
# Apply consistent save/display behavior
|
|
3136
|
+
if filename is not None:
|
|
3137
|
+
# Convert relative paths to absolute paths using study folder as base
|
|
3138
|
+
import os
|
|
3139
|
+
|
|
3140
|
+
if not os.path.isabs(filename):
|
|
3141
|
+
filename = os.path.join(self.folder, filename)
|
|
3142
|
+
|
|
3143
|
+
# Convert to absolute path for logging
|
|
3144
|
+
abs_filename = os.path.abspath(filename)
|
|
3145
|
+
|
|
3146
|
+
# Use isolated file saving
|
|
3147
|
+
_isolated_save_plot(p, filename, abs_filename, self.logger, "Heatmap Plot")
|
|
3148
|
+
else:
|
|
3149
|
+
# Show in notebook when no filename provided
|
|
3150
|
+
_isolated_show_notebook(p)
|
|
3151
|
+
|
|
3152
|
+
return p
|
|
3153
|
+
|
|
3154
|
+
|
|
2958
3155
|
def plot_pca(self, *args, **kwargs):
|
|
2959
3156
|
"""Deprecated: Use plot_samples_pca instead."""
|
|
2960
3157
|
import warnings
|
masster/study/study.py
CHANGED
|
@@ -14,7 +14,7 @@ Main class:
|
|
|
14
14
|
consensus_select/filter/delete
|
|
15
15
|
- Retrieval: get_consensus, get_chrom, get_samples, get_*_stats, get_*_matrix
|
|
16
16
|
- Plotting: plot_alignment, plot_samples_pca/umap/2d, plot_tic/bpc/eic, plot_chrom,
|
|
17
|
-
plot_rt_correction, plot_consensus_2d/stats
|
|
17
|
+
plot_rt_correction, plot_consensus_2d/stats, plot_heatmap
|
|
18
18
|
- Export: export_mgf, export_mztab, export_xlsx, export_parquet
|
|
19
19
|
- Identification: lib_load, identify, get_id, id_reset, lib_reset
|
|
20
20
|
- Parameters: get/update parameters, update_history
|
|
@@ -96,6 +96,7 @@ from masster.study.plot import plot_bpc
|
|
|
96
96
|
from masster.study.plot import plot_tic
|
|
97
97
|
from masster.study.plot import plot_eic
|
|
98
98
|
from masster.study.plot import plot_rt_correction
|
|
99
|
+
from masster.study.plot import plot_heatmap
|
|
99
100
|
from masster.study.processing import align
|
|
100
101
|
from masster.study.merge import merge
|
|
101
102
|
from masster.study.processing import integrate
|
|
@@ -429,6 +430,7 @@ class Study:
|
|
|
429
430
|
plot_rt_correction = plot_rt_correction
|
|
430
431
|
plot_tic = plot_tic
|
|
431
432
|
plot_eic = plot_eic
|
|
433
|
+
plot_heatmap = plot_heatmap
|
|
432
434
|
|
|
433
435
|
# === Analysis Operations ===
|
|
434
436
|
analyze_umap = analyze_umap
|
masster/study/study5_schema.json
CHANGED
|
@@ -261,6 +261,21 @@
|
|
|
261
261
|
},
|
|
262
262
|
"ms1_spec": {
|
|
263
263
|
"dtype": "pl.Object"
|
|
264
|
+
},
|
|
265
|
+
"id_top_name": {
|
|
266
|
+
"dtype": "pl.Utf8"
|
|
267
|
+
},
|
|
268
|
+
"id_top_class": {
|
|
269
|
+
"dtype": "pl.Utf8"
|
|
270
|
+
},
|
|
271
|
+
"id_top_adduct": {
|
|
272
|
+
"dtype": "pl.Utf8"
|
|
273
|
+
},
|
|
274
|
+
"id_top_score": {
|
|
275
|
+
"dtype": "pl.Float64"
|
|
276
|
+
},
|
|
277
|
+
"id_source": {
|
|
278
|
+
"dtype": "pl.Utf8"
|
|
264
279
|
}
|
|
265
280
|
}
|
|
266
281
|
},
|
masster/wizard/wizard.py
CHANGED
|
@@ -200,12 +200,12 @@ class wizard_def:
|
|
|
200
200
|
# Set default adducts based on polarity if not provided
|
|
201
201
|
if not self.adducts:
|
|
202
202
|
if self.polarity and self.polarity.lower() in ["positive", "pos"]:
|
|
203
|
-
self.adducts = ["H
|
|
203
|
+
self.adducts = ["+H:1:0.8", "+Na:1:0.1", "+NH4:1:0.1"]
|
|
204
204
|
elif self.polarity and self.polarity.lower() in ["negative", "neg"]:
|
|
205
|
-
self.adducts = ["H
|
|
205
|
+
self.adducts = ["-H:-1:1.0", "+CH2O2:0:0.5"]
|
|
206
206
|
else:
|
|
207
207
|
# Default to positive if polarity is None or unknown
|
|
208
|
-
self.adducts = ["H
|
|
208
|
+
self.adducts = ["+H:1:0.8", "+Na:1:0.1", "+NH4:1:0.1"]
|
|
209
209
|
|
|
210
210
|
# Validate num_cores
|
|
211
211
|
max_cores = multiprocessing.cpu_count()
|
|
@@ -676,9 +676,7 @@ class Wizard:
|
|
|
676
676
|
" ",
|
|
677
677
|
" # Step 3: Create and configure study",
|
|
678
678
|
' print("\\nStep 3/7: Initializing study...")',
|
|
679
|
-
" study = Study(folder=PARAMS['folder'])",
|
|
680
|
-
" study.polarity = PARAMS['polarity']",
|
|
681
|
-
" study.adducts = PARAMS['adducts']",
|
|
679
|
+
" study = Study(folder=PARAMS['folder'], polarity=PARAMS['polarity'], adducts=PARAMS['adducts'])",
|
|
682
680
|
" ",
|
|
683
681
|
" # Step 4: Add sample5 files to study",
|
|
684
682
|
' print("\\nStep 4/7: Adding samples to study...")',
|
|
@@ -692,6 +690,12 @@ class Wizard:
|
|
|
692
690
|
" rt_tol=PARAMS['rt_tol']",
|
|
693
691
|
" )",
|
|
694
692
|
" ",
|
|
693
|
+
" # Check that more than 1 file has been loaded",
|
|
694
|
+
" if len(study.samples) <= 1:",
|
|
695
|
+
' print("\\nWARNING: Study merging requires more than 1 sample file.")',
|
|
696
|
+
' print(f"Only {len(study.samples)} sample(s) loaded. Terminating execution.")',
|
|
697
|
+
" return False",
|
|
698
|
+
" ",
|
|
695
699
|
" study.merge(",
|
|
696
700
|
' method="qt",',
|
|
697
701
|
" min_samples=PARAMS['min_samples_per_feature'],",
|
|
@@ -764,14 +768,9 @@ class Wizard:
|
|
|
764
768
|
'app = marimo.App(width="medium")',
|
|
765
769
|
"",
|
|
766
770
|
"@app.cell",
|
|
767
|
-
"def __():",
|
|
768
|
-
" import marimo as mo",
|
|
769
|
-
" return (mo,)",
|
|
770
|
-
"",
|
|
771
|
-
"@app.cell",
|
|
772
771
|
"def __(mo):",
|
|
773
772
|
' mo.md(r"""',
|
|
774
|
-
"
|
|
773
|
+
" ## MASSter Interactive Analysis",
|
|
775
774
|
" ",
|
|
776
775
|
f" **Source:** {source_info.get('number_of_files', 0)} files detected",
|
|
777
776
|
f" **Polarity:** {source_info.get('polarity', 'unknown')}",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: masster
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Mass spectrometry data analysis package
|
|
5
5
|
Project-URL: homepage, https://github.com/zamboni-lab/masster
|
|
6
6
|
Project-URL: repository, https://github.com/zamboni-lab/masster
|
|
@@ -734,19 +734,19 @@ Description-Content-Type: text/markdown
|
|
|
734
734
|
|
|
735
735
|
## Background and motivation
|
|
736
736
|
|
|
737
|
-
MASSter is actively used,
|
|
737
|
+
MASSter is actively used, maintained, and developed by the Zamboni Lab at ETH Zurich. The project started because many needs were unmet by the "usual" software packages (mzMine, MS-DIAL, Workflow4Metabolomics (W4M), ...), for example performance, scalability, sensitivity, robustness, speed, rapid implementation of new features, and embedding in ETL systems.
|
|
738
738
|
|
|
739
|
-
All methods include
|
|
739
|
+
All methods include many parameters and may wrap alternative algorithms. These options are primarily relevant for advanced users. We recommend running the processing methods with the defaults or using the Wizard.
|
|
740
740
|
|
|
741
741
|
## Content
|
|
742
742
|
|
|
743
743
|
MASSter is designed to deal with DDA data, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS, which is both accurate and fast, and it was wrapped with additional code to improve isotope and adduct detection. All other functionalities are own implementations: centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc.
|
|
744
744
|
|
|
745
|
-
MASSter was engineered to maximize quality
|
|
745
|
+
MASSter was engineered to maximize result quality, sensitivity, scalability, and speed. Yes, it's Python, which can be slower than other languages, but considerable effort was spent on optimizations, including the systematic use of [Polars](https://pola.rs/), NumPy vectorization, multiprocessing, and chunking. MASSter has been tested on studies with 3,000+ LC–MS/MS samples (≈1 million MS2 spectra) and autonomously completed analyses within a few hours.
|
|
746
746
|
|
|
747
747
|
## Architecture
|
|
748
748
|
|
|
749
|
-
MASSter defines
|
|
749
|
+
MASSter defines classes for Spectra, Chromatograms, Libraries, Samples, and Studies (a Study is a collection of samples, i.e. an LC–MS sequence). Users will typically work with a single `Study` object at a time. `Sample` objects are created when analyzing a batch (and saved for caching), or used for development, troubleshooting, or generating illustrations.
|
|
750
750
|
|
|
751
751
|
The analysis can be done in scripts (without user intervention, e.g. by the integrated Wizard), or interactively in notebooks, i.e. [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/).
|
|
752
752
|
|
|
@@ -756,9 +756,9 @@ You'll need to install Python (3.10-3.13, 3.14 has not been tested yet).
|
|
|
756
756
|
|
|
757
757
|
MASSter reads raw (Thermo), wiff (SCIEX), or mzML data. Reading vendor formats relies on .NET libraries, and is only possible in Windows. On Linux or MacOS, you'll be forced to use mzML data.
|
|
758
758
|
|
|
759
|
-
**It's recommended to use data in either vendor's raw
|
|
759
|
+
**It's recommended to use data in either the vendor's raw formats (WIFF and Thermo RAW) or mzML in profile mode.** MASSter includes a sophisticated and sufficiently fast centroiding algorithm that works well across the full dynamic range and will only act on spectra that are relevant. In our tests with data from different vendors, the centroiding performed much better than most vendor implementations (which are primarily proteomics-centric).
|
|
760
760
|
|
|
761
|
-
If still want to convert raw data to centroided mzML, please use
|
|
761
|
+
If you still want to convert raw data to centroided mzML, please use CentroidR: https://github.com/Adafede/CentroidR/tree/0.0.0.9001
|
|
762
762
|
|
|
763
763
|
## Installation
|
|
764
764
|
|
|
@@ -769,7 +769,7 @@ pip install masster
|
|
|
769
769
|
## Getting started
|
|
770
770
|
**The quickest way to use, or learn how to use MASSter, is to use the Wizard** which we integrated and, ideally, takes care of everything automatically.
|
|
771
771
|
|
|
772
|
-
The Wizard only needs to know where to find the MS files and
|
|
772
|
+
The Wizard only needs to know where to find the MS files and where to store the results.
|
|
773
773
|
```python
|
|
774
774
|
from masster import Wizard
|
|
775
775
|
wiz = Wizard(
|
|
@@ -780,15 +780,15 @@ wiz = Wizard(
|
|
|
780
780
|
wiz.test_and_run()
|
|
781
781
|
```
|
|
782
782
|
|
|
783
|
-
This will trigger the analysis of raw data, and the creation of a script to process all samples and then assemble the study. The whole processing will be stored as `1_masster_workflow.py` in the output folder. The wizard will test once and, if
|
|
783
|
+
This will trigger the analysis of raw data, and the creation of a script to process all samples and then assemble the study. The whole processing will be stored as `1_masster_workflow.py` in the output folder. The wizard will test once and, if successful, run the full workflow using parallel processes. Once the processing is over you, navigate to `folder` to see what happened...
|
|
784
784
|
|
|
785
785
|
If you want to interact with your data, we recommend using [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/) and open the `*.study5` file, for example:
|
|
786
786
|
|
|
787
787
|
```bash
|
|
788
|
-
# use marimo to open the script created by
|
|
789
|
-
marimo edit '
|
|
790
|
-
# or, if you use uv to manage an environment with masster
|
|
791
|
-
uv run marimo edit '
|
|
788
|
+
# use marimo to open the script created by marimo
|
|
789
|
+
marimo edit '..\\..\\folder_to_store_results\\2_interactive_analysis.py'
|
|
790
|
+
# or, if you use uv to manage an environment with masster
|
|
791
|
+
uv run marimo edit '..\\..\\folder_to_store_results\\2_interactive_analysis.py'
|
|
792
792
|
```
|
|
793
793
|
|
|
794
794
|
### Basic Workflow for analyzing LC-MS study with 1-1000+ samples
|
|
@@ -874,7 +874,7 @@ sample.plot_2d()
|
|
|
874
874
|
sample.plot_features_stats()
|
|
875
875
|
|
|
876
876
|
# explore methods
|
|
877
|
-
dir(
|
|
877
|
+
dir(sample)
|
|
878
878
|
```
|
|
879
879
|
|
|
880
880
|
## Disclaimer
|
|
@@ -885,11 +885,9 @@ dir(study)
|
|
|
885
885
|
- **Backward compatibility**: We do not guarantee backward compatibility between versions. Breaking changes may occur as we improve the software
|
|
886
886
|
- **Performance**: While optimized for our workflows, performance may vary depending on your data and system configuration
|
|
887
887
|
- **Results**: We do our best to ensure accuracy, but you should validate results independently for your research
|
|
888
|
-
- **Support**: This is an academic project with limited resources.
|
|
888
|
+
- **Support**: This is an academic project with limited resources. At the moment, we do not provide external user support.
|
|
889
889
|
- **Production use**: If you plan to use MASSter in production or critical workflows, thorough testing with your data is recommended
|
|
890
890
|
|
|
891
|
-
We welcome feedback, bug reports, and contributions via GitHub!
|
|
892
|
-
|
|
893
891
|
## License
|
|
894
892
|
GNU Affero General Public License v3
|
|
895
893
|
|
|
@@ -1,34 +1,35 @@
|
|
|
1
1
|
masster/__init__.py,sha256=B7zftzdElF2Wb5B7KvkD6TONnMIY-Jxeen3s49dgmzs,1029
|
|
2
|
-
masster/_version.py,sha256=
|
|
2
|
+
masster/_version.py,sha256=3NDsZkQSczLt43gO7ybvmieIAj9lFBUU6zYYdJIGsc0,257
|
|
3
3
|
masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
|
|
4
|
-
masster/logger.py,sha256=
|
|
4
|
+
masster/logger.py,sha256=MbQ3uLI3N0G3tnvtGIJZN_HEwjKRfIhBVZmIzUev7oc,18827
|
|
5
5
|
masster/spectrum.py,sha256=LlmxrI5MFS3aPrGSdqUSKVY0rJnKeBh3Frdh6a4dPvA,49722
|
|
6
|
-
masster/data/libs/
|
|
7
|
-
masster/data/libs/
|
|
8
|
-
masster/data/libs/urine.csv,sha256=iRrR4N8Wzb8KDhHJA4LqoQC35pp93FSaOKvXPrgFHis,653736
|
|
6
|
+
masster/data/libs/aa_nort.json,sha256=tadOrusSrcAIxTcvME-Vy_PVNQskCU4jN9XVDeYhq4o,6052
|
|
7
|
+
masster/data/libs/ccm_nort.json,sha256=9Dq_JqLqQ700Nri6LDe7stBUQkiTGXukK2GqsFaOrTw,35425
|
|
9
8
|
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data,sha256=01vC6m__Qqm2rLvlTMZoeKIKowFvovBTUnrNl8Uav3E,24576
|
|
10
9
|
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff,sha256=go5N9gAM1rn4PZAVaoCmdteY9f7YGEM9gyPdSmkQ8PE,1447936
|
|
11
10
|
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan,sha256=ahi1Y3UhAj9Bj4Q2MlbgPekNdkJvMOoMXVOoR6CeIxc,13881220
|
|
12
11
|
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2,sha256=TFB0HW4Agkig6yht7FtgjUdbXax8jjKaHpSZSvuU5vs,3252224
|
|
13
12
|
masster/lib/__init__.py,sha256=TcePNx3SYZHz6763TL9Sg4gUNXaRWjlrOtyS6vsu-hg,178
|
|
14
|
-
masster/lib/lib.py,sha256=
|
|
13
|
+
masster/lib/lib.py,sha256=j3aFmS_xohvjgVi2XrfIOcNA5v3-slusDIOqHTij-Og,45016
|
|
15
14
|
masster/sample/__init__.py,sha256=HL0m1ept0PMAYUCQtDDnkdOS12IFl6oLAq4TZQz83uY,170
|
|
16
|
-
masster/sample/adducts.py,sha256=
|
|
17
|
-
masster/sample/h5.py,sha256=
|
|
18
|
-
masster/sample/helpers.py,sha256=
|
|
15
|
+
masster/sample/adducts.py,sha256=kVsVZTUIQsVGDgmzvkhPn-9XdilGjuNe-xqUZG--Huc,33519
|
|
16
|
+
masster/sample/h5.py,sha256=80ClWBCZH8eY5hLmUIy0GRvHshq0FvT4LWKmQ9Hn7L4,116017
|
|
17
|
+
masster/sample/helpers.py,sha256=ldQ05ha4whONSB-5YouZEXf0E9v90AnAN7fePAc3Y4s,48094
|
|
18
|
+
masster/sample/id.py,sha256=f2Y3JFn_0PPAscnfpllDR_82tiHH1j-SuxahiGsBjjU,46428
|
|
19
|
+
masster/sample/importers.py,sha256=F7hcOaDKPVYsT1iYLHseWSpOYa586u17jsQOPpXLS1I,13286
|
|
19
20
|
masster/sample/lib.py,sha256=YIeG9nBiSMllu3xkqcQXnMe6pXJ9sJSN7un8SORgVJ4,33968
|
|
20
21
|
masster/sample/load.py,sha256=tpPqRZtmfOY1AibHBBqcYfPD8SoI8Uue-IiDOA20DYc,48402
|
|
21
22
|
masster/sample/parameters.py,sha256=Gg2KcuNbV_wZ_Wwv93QlM5J19ji0oSIvZLPV1NoBmq0,4456
|
|
22
|
-
masster/sample/plot.py,sha256=
|
|
23
|
+
masster/sample/plot.py,sha256=CHfsAIeTxxjXwCd4E6rAKTZdtjtq_cpDNoYAo0Gi0zs,110454
|
|
23
24
|
masster/sample/processing.py,sha256=7FmlDO_vsVbUfI62QSoHXKkgGtfjMLHLRdvaB4KdmP4,56018
|
|
24
25
|
masster/sample/quant.py,sha256=tHNjvUFTdehKR31BXBZnVsBxMD9XJHgaltITOjr71uE,7562
|
|
25
|
-
masster/sample/sample.py,sha256
|
|
26
|
-
masster/sample/sample5_schema.json,sha256=
|
|
26
|
+
masster/sample/sample.py,sha256=EfB4CweOCkuxseHVxLIlxaL6hGWd6k9J5LKT5wHyId0,22672
|
|
27
|
+
masster/sample/sample5_schema.json,sha256=szuRsrs2o50jEjXOAT7T6zLQhZauN_0nc_605uQjgVA,5839
|
|
27
28
|
masster/sample/save.py,sha256=RD3tRoTNy2ANKoU-oZSfu47nQ4ATSAB-Io2EN0RUZaI,37994
|
|
28
29
|
masster/sample/sciex.py,sha256=jzMrw5iKzbCFVgmgbU65eIr10eegzKng9WKx1Inl8Dg,21740
|
|
29
30
|
masster/sample/thermo.py,sha256=zcH4aZg2hQnZp9rM618ZUiQXRuUqFRmVFdbCg0SnIOQ,27775
|
|
30
31
|
masster/sample/defaults/__init__.py,sha256=A09AOP44cxD_oYohyt7XFUho0zndRcrzVD4DUaGnKH4,447
|
|
31
|
-
masster/sample/defaults/find_adducts_def.py,sha256=
|
|
32
|
+
masster/sample/defaults/find_adducts_def.py,sha256=RFZGaP3VeVEnSxlqvUaHq6wl1m5mfr7yvyf7pHWgtJI,13553
|
|
32
33
|
masster/sample/defaults/find_features_def.py,sha256=Bcd39uav1BniwKgrsB-I1maF3ljf4Wb1f5yv0pDYfts,17745
|
|
33
34
|
masster/sample/defaults/find_ms2_def.py,sha256=mr_XtzlhYfXt7kYWVFPt1ReGqHZQDPXe-1pGND2VvV8,9844
|
|
34
35
|
masster/sample/defaults/get_spectrum_def.py,sha256=o62p31PhGd-LiIkTOzKQhwPtnO2AtQDHcPu-O-YoQPs,11460
|
|
@@ -38,16 +39,16 @@ masster/study/analysis.py,sha256=bf2o_ywvwdPz1mZAHPETCPjWbvhoL9eEl1rLaz46Rp4,820
|
|
|
38
39
|
masster/study/export.py,sha256=oRgM4F4hL3-nBRr_xd4KTin8WoH8QqCJnz3K_S1M14E,60258
|
|
39
40
|
masster/study/h5.py,sha256=gJRWNQxBTyFKD3qRmEbM24YZ-HdyUk-veYgwQbK0eoE,99104
|
|
40
41
|
masster/study/helpers.py,sha256=pRcVvGmm6NX-GEvWfYZXZjGc_C0WyklqSQx1PdpYn2E,189694
|
|
41
|
-
masster/study/id.py,sha256=
|
|
42
|
+
masster/study/id.py,sha256=iKMcxEzFpCKSBWWrBoOHWWM43tYJSRFvDqmYJTIQ1eU,92348
|
|
42
43
|
masster/study/importers.py,sha256=iOe9w6uEn39ShosRms8n_zIrsSBczb51CAMoMrxSUw4,13587
|
|
43
|
-
masster/study/load.py,sha256
|
|
44
|
-
masster/study/merge.py,sha256=
|
|
44
|
+
masster/study/load.py,sha256=GrdXuY7EFrmkqnzNk4gdqE41ufFE39pUvBrQaA7RPXA,70649
|
|
45
|
+
masster/study/merge.py,sha256=6jZPaCFp4Z8lnG8ztgEeoOzoxRmULIS4SKBYoPnEaTc,171480
|
|
45
46
|
masster/study/parameters.py,sha256=bTvmcwX9INxzcrEAmTiFH8qeWVhwkvMTZjuP394pz5o,3279
|
|
46
|
-
masster/study/plot.py,sha256=
|
|
47
|
+
masster/study/plot.py,sha256=2tRjZFViYq_I5ih1TruelzIdnGynKsOpnHyhTzGUr-k,120731
|
|
47
48
|
masster/study/processing.py,sha256=oQGepG5-AXP9GGk8NTzl_i1ztgDp3TPwE2x15BmnpMw,57353
|
|
48
49
|
masster/study/save.py,sha256=tF00anwwQDe2qxumv_4AP_1UOo0-f28Tkd2WXGyNHbI,9182
|
|
49
|
-
masster/study/study.py,sha256=
|
|
50
|
-
masster/study/study5_schema.json,sha256=
|
|
50
|
+
masster/study/study.py,sha256=sIoi03DBTzdcJjqvXZTt15kWK_pHiyQXJQSkR1cwCII,37959
|
|
51
|
+
masster/study/study5_schema.json,sha256=tvlQZezS4bwRRxlUi8cpoiPIE3qTzk2WDQfZw8mE724,8359
|
|
51
52
|
masster/study/defaults/__init__.py,sha256=m3Z5KXGqsTdh7GjYzZoENERt39yRg0ceVRV1DeCt1P0,610
|
|
52
53
|
masster/study/defaults/align_def.py,sha256=Du0F592ej2einT8kOx8EUs610axSvur8_-6N19O-uJY,10209
|
|
53
54
|
masster/study/defaults/export_def.py,sha256=eXl3h4aoLX88XkHTpqahLd-QZ2gjUqrmjq8IJULXeWo,1203
|
|
@@ -58,11 +59,11 @@ masster/study/defaults/identify_def.py,sha256=nFj-pv6q1eRgSgoRr78YEnqulPPMWo2Ju5
|
|
|
58
59
|
masster/study/defaults/integrate_chrom_def.py,sha256=0MNIWGTjty-Zu-NTQsIweuj3UVqEY3x1x8pK0mPwYak,7264
|
|
59
60
|
masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
|
|
60
61
|
masster/study/defaults/merge_def.py,sha256=99TJtIk7mSoq8NMJMJ4b-cy7gUUixQN69krxttBnkfA,12899
|
|
61
|
-
masster/study/defaults/study_def.py,sha256=
|
|
62
|
+
masster/study/defaults/study_def.py,sha256=kSvhiqpFp8b84vUsE8608LQsSXwz9lAPcU2BqK0T8z0,16095
|
|
62
63
|
masster/wizard/__init__.py,sha256=L9G_datyGSFJjrBVklEVpZVLGXzUhDiWobtiygBH8vQ,669
|
|
63
|
-
masster/wizard/wizard.py,sha256=
|
|
64
|
-
masster-0.
|
|
65
|
-
masster-0.
|
|
66
|
-
masster-0.
|
|
67
|
-
masster-0.
|
|
68
|
-
masster-0.
|
|
64
|
+
masster/wizard/wizard.py,sha256=yAcEK7aPzWV9fILY4TQcwAhmJKpKE0q9BK8Ur9Eu9Og,66677
|
|
65
|
+
masster-0.6.0.dist-info/METADATA,sha256=BA9Ncf_p9wjyeZSlZDtxExrPBMaIcTKDblsGW4Gk2K0,50785
|
|
66
|
+
masster-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
67
|
+
masster-0.6.0.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
68
|
+
masster-0.6.0.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
|
|
69
|
+
masster-0.6.0.dist-info/RECORD,,
|
masster/data/libs/aa.csv
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
name,smiles,inchikey,formula,db_id,db
|
|
2
|
-
L-Glutamic acid,N[C@@H](CCC(O)=O)C(O)=O,WHUUTDBJXJRKMK-VKHMYHEASA-N,C5H9NO4,CID:33032,pubchem
|
|
3
|
-
L-Tyrosine,N[C@@H](CC1=CC=C(O)C=C1)C(O)=O,OUYCCCASQSFEME-QMMMGPOBSA-N,C9H11NO3,CID:6057,pubchem
|
|
4
|
-
L-Phenylalanine,N[C@@H](CC1=CC=CC=C1)C(O)=O,COLNVLDHVKWLRT-QMMMGPOBSA-N,C9H11NO2,CID:6140,pubchem
|
|
5
|
-
L-Alanine,C[C@H](N)C(O)=O,QNAYBMKLOCPYGJ-REOHCLBHSA-N,C3H7NO2,CID:5950,pubchem
|
|
6
|
-
L-Proline,OC(=O)[C@@H]1CCCN1,ONIBWKKTOPOVIA-BYPYZUCNSA-N,C5H9NO2,CID:145742,pubchem
|
|
7
|
-
L-Threonine,C[C@@H](O)[C@H](N)C(O)=O,AYFVYJQAPQTCCC-GBXIJSLDSA-N,C4H9NO3,CID:6288,pubchem
|
|
8
|
-
L-Asparagine,N[C@@H](CC(N)=O)C(O)=O,DCXYFEDJOCDNAF-REOHCLBHSA-N,C4H8N2O3,CID:6267,pubchem
|
|
9
|
-
L-Isoleucine,CC[C@H](C)[C@H](N)C(O)=O,AGPKZVBTJJNPAG-WHFBIAKZSA-N,C6H13NO2,CID:6306,pubchem
|
|
10
|
-
L-Histidine,N[C@@H](CC1=CN=CN1)C(O)=O,HNDVDQJCIGZPNO-YFKPBYRVSA-N,C6H9N3O2,CID:6274,pubchem
|
|
11
|
-
L-Lysine,NCCCC[C@H](N)C(O)=O,KDXKERNSBIXSRK-YFKPBYRVSA-N,C6H14N2O2,CID:5962,pubchem
|
|
12
|
-
L-Serine,N[C@@H](CO)C(O)=O,MTCFGRXMJLQNBG-REOHCLBHSA-N,C3H7NO3,CID:5951,pubchem
|
|
13
|
-
L-Aspartic acid,N[C@@H](CC(O)=O)C(O)=O,CKLJMWTZIZZHCS-REOHCLBHSA-N,C4H7NO4,CID:5960,pubchem
|
|
14
|
-
L-Cystine,N[C@@H](CSSC[C@H](N)C(O)=O)C(O)=O,LEVWYRKDKASIDU-IMJSIDKUSA-N,C6H12N2O4S2,CID:67678,pubchem
|
|
15
|
-
L-Arginine,N[C@@H](CCCNC(N)=N)C(O)=O,ODKSFYDXXFIFQN-BYPYZUCNSA-N,C6H14N4O2,CID:6322,pubchem
|
|
16
|
-
L-Cysteine,N[C@@H](CS)C(O)=O,XUJNEKJLAYXESH-REOHCLBHSA-N,C3H7NO2S,CID:5862,pubchem
|
|
17
|
-
L-Glutamine,N[C@@H](CCC(N)=O)C(O)=O,ZDXPYRJPNDTMRX-VKHMYHEASA-N,C5H10N2O3,CID:5961,pubchem
|
|
18
|
-
L-Leucine,CC(C)C[C@H](N)C(O)=O,ROHFNLRQFUQHCH-YFKPBYRVSA-N,C6H13NO2,CID:6106,pubchem
|
|
19
|
-
L-Methionine,CSCC[C@H](N)C(O)=O,FFEARJCKVFRZRR-BYPYZUCNSA-N,C5H11NO2S,CID:6137,pubchem
|
|
20
|
-
L-Valine,CC(C)[C@H](N)C(O)=O,KZSNJWFQEVHDMF-BYPYZUCNSA-N,C5H11NO2,CID:6287,pubchem
|
|
21
|
-
L-Tryptophan,N[C@@H](CC1=CNC2=C1C=CC=C2)C(O)=O,QIVBCDIJIAJPQS-VIFPVBQESA-N,C11H12N2O2,CID:6305,pubchem
|
|
22
|
-
Glycine,NCC(O)=O,QNAYBMKLOCPYGJ-UHFFFAOYSA-N,C2H5NO2,CID:750,Glycine
|