masster 0.5.28__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/merge.py CHANGED
@@ -441,9 +441,15 @@ def merge(study, **kwargs) -> None:
441
441
  cached_valid_adducts = None
442
442
  try:
443
443
  cached_adducts_df = study._get_adducts()
444
+ # Remove all adducts with wrong polarity
445
+ if study.polarity == "positive":
446
+ cached_adducts_df = cached_adducts_df.filter(pl.col("charge") >= 0)
447
+ else:
448
+ cached_adducts_df = cached_adducts_df.filter(pl.col("charge") <= 0)
444
449
  if not cached_adducts_df.is_empty():
445
450
  cached_valid_adducts = set(cached_adducts_df["name"].to_list())
446
451
  else:
452
+ study.logger.warning(f"No valid adducts found for polarity '{study.polarity}'")
447
453
  cached_valid_adducts = set()
448
454
  except Exception as e:
449
455
  study.logger.warning(f"Could not retrieve study adducts: {e}")
@@ -452,6 +458,13 @@ def merge(study, **kwargs) -> None:
452
458
  # Always allow '?' adducts
453
459
  cached_valid_adducts.add("?")
454
460
 
461
+ # Bypass for single sample case
462
+ if len(study.samples_df) == 1:
463
+ study.logger.info("Single sample detected - bypassing merge algorithm and using direct feature mapping")
464
+ _handle_single_sample_merge(study, cached_adducts_df, cached_valid_adducts)
465
+ # Skip all post-processing for single sample case
466
+ return
467
+
455
468
  # Route to algorithm implementation
456
469
  if params.method == "kd":
457
470
  consensus_map = _merge_kd(study, params)
@@ -1719,6 +1732,10 @@ def _calculate_consensus_statistics(
1719
1732
  mz_values: m/z values from chunk consensus features
1720
1733
  intensity_values: Intensity values from chunk consensus features
1721
1734
  quality_values: Quality values from chunk consensus features
1735
+ number_features: Number of unique features contributing
1736
+ number_samples: Number of unique samples contributing
1737
+ cached_adducts_df: Cached DataFrame of valid adducts for the study
1738
+ cached_valid_adducts: Cached set of valid adduct names for the study
1722
1739
 
1723
1740
  Returns:
1724
1741
  Dictionary with consensus feature metadata
@@ -3612,6 +3629,142 @@ def __merge_adduct_grouping(study, consensus_data, rt_tol, mz_tol):
3612
3629
  return adduct_group_list, adduct_of_list
3613
3630
 
3614
3631
 
3632
+ def _handle_single_sample_merge(study, cached_adducts_df=None, cached_valid_adducts=None):
3633
+ """
3634
+ Handle merge for the special case of a single sample.
3635
+ Directly populate consensus_df from the sample's features_df without any filtering.
3636
+
3637
+ Args:
3638
+ study: Study object with single sample
3639
+ cached_adducts_df: Pre-computed adducts DataFrame (optional)
3640
+ cached_valid_adducts: Set of valid adduct names (optional)
3641
+ """
3642
+ import polars as pl
3643
+ import uuid
3644
+
3645
+ if len(study.samples_df) != 1:
3646
+ raise ValueError("_handle_single_sample_merge should only be called with exactly one sample")
3647
+
3648
+ # Get the single sample's features
3649
+ sample_row = study.samples_df.row(0, named=True)
3650
+ sample_uid = sample_row["sample_uid"]
3651
+
3652
+ # Filter features for this sample
3653
+ sample_features = study.features_df.filter(pl.col("sample_uid") == sample_uid)
3654
+
3655
+ if len(sample_features) == 0:
3656
+ study.logger.warning("No features found for single sample")
3657
+ study.consensus_df = pl.DataFrame()
3658
+ study.consensus_mapping_df = pl.DataFrame()
3659
+ return
3660
+
3661
+ study.logger.info(f"Creating consensus from {len(sample_features)} features in single sample")
3662
+
3663
+ # Create consensus features directly from sample features
3664
+ consensus_list = []
3665
+ mapping_list = []
3666
+
3667
+ # Cache valid adducts
3668
+ valid_adducts = cached_valid_adducts if cached_valid_adducts is not None else set()
3669
+ valid_adducts.add("?") # Always allow '?' adducts
3670
+
3671
+ for i, feature_row in enumerate(sample_features.iter_rows(named=True)):
3672
+ # Generate unique consensus ID
3673
+ consensus_id_str = str(uuid.uuid4()).replace("-", "")[:16]
3674
+
3675
+ # Handle adduct information
3676
+ adduct = feature_row.get("adduct")
3677
+ if adduct is None or adduct not in valid_adducts:
3678
+ # Set default adduct based on study polarity
3679
+ study_polarity = getattr(study, "polarity", "positive")
3680
+ if study_polarity in ["negative", "neg"]:
3681
+ adduct = "[M-?]1-"
3682
+ adduct_charge = -1
3683
+ adduct_mass_shift = -1.007825
3684
+ else:
3685
+ adduct = "[M+?]1+"
3686
+ adduct_charge = 1
3687
+ adduct_mass_shift = 1.007825
3688
+ else:
3689
+ # Try to get charge and mass shift from cached adducts
3690
+ adduct_charge = 1
3691
+ adduct_mass_shift = 1.007825
3692
+ if cached_adducts_df is not None and not cached_adducts_df.is_empty():
3693
+ matching_adduct = cached_adducts_df.filter(pl.col("name") == adduct)
3694
+ if not matching_adduct.is_empty():
3695
+ adduct_row = matching_adduct.row(0, named=True)
3696
+ adduct_charge = adduct_row["charge"]
3697
+ adduct_mass_shift = adduct_row["mass_shift"]
3698
+
3699
+ # Calculate neutral mass
3700
+ mz = feature_row.get("mz", 0.0)
3701
+ if adduct_charge and adduct_mass_shift is not None:
3702
+ adduct_mass_neutral = mz * abs(adduct_charge) - adduct_mass_shift
3703
+ else:
3704
+ adduct_mass_neutral = None
3705
+
3706
+ # Count MS2 scans
3707
+ ms2_scans = feature_row.get("ms2_scans", [])
3708
+ ms2_count = len(ms2_scans) if ms2_scans else 0
3709
+
3710
+ # Create consensus feature metadata
3711
+ consensus_feature = {
3712
+ "consensus_uid": i,
3713
+ "consensus_id": consensus_id_str,
3714
+ "quality": feature_row.get("quality", 1.0),
3715
+ "number_samples": 1, # Always 1 for single sample
3716
+ "rt": feature_row.get("rt", 0.0),
3717
+ "mz": mz,
3718
+ "rt_min": feature_row.get("rt", 0.0),
3719
+ "rt_max": feature_row.get("rt", 0.0),
3720
+ "rt_mean": feature_row.get("rt", 0.0),
3721
+ "rt_start_mean": feature_row.get("rt_start", 0.0),
3722
+ "rt_end_mean": feature_row.get("rt_end", 0.0),
3723
+ "rt_delta_mean": feature_row.get("rt_delta", 0.0),
3724
+ "mz_min": mz,
3725
+ "mz_max": mz,
3726
+ "mz_mean": mz,
3727
+ "mz_start_mean": feature_row.get("mz_start", 0.0),
3728
+ "mz_end_mean": feature_row.get("mz_end", 0.0),
3729
+ "inty_mean": feature_row.get("inty", 0.0),
3730
+ "bl": -1.0,
3731
+ "chrom_coherence_mean": feature_row.get("chrom_coherence", 0.0),
3732
+ "chrom_prominence_mean": feature_row.get("chrom_prominence", 0.0),
3733
+ "chrom_prominence_scaled_mean": feature_row.get("chrom_prominence_scaled", 0.0),
3734
+ "chrom_height_scaled_mean": feature_row.get("chrom_height_scaled", 0.0),
3735
+ "iso": None, # Will be filled by find_iso() function
3736
+ "iso_mean": feature_row.get("iso", 0.0),
3737
+ "charge_mean": feature_row.get("charge", 0.0),
3738
+ "number_ms2": ms2_count,
3739
+ "adducts": [[adduct, 1, 100.0]], # Single adduct with 100% frequency
3740
+ "adduct_top": adduct,
3741
+ "adduct_charge_top": adduct_charge,
3742
+ "adduct_mass_neutral_top": adduct_mass_neutral,
3743
+ "adduct_mass_shift_top": adduct_mass_shift,
3744
+ "id_top_name": None,
3745
+ "id_top_class": None,
3746
+ "id_top_adduct": None,
3747
+ "id_top_score": None,
3748
+ "id_source": None,
3749
+ }
3750
+
3751
+ consensus_list.append(consensus_feature)
3752
+
3753
+ # Create mapping entry
3754
+ mapping_entry = {
3755
+ "consensus_uid": i,
3756
+ "sample_uid": sample_uid,
3757
+ "feature_uid": feature_row.get("feature_uid"),
3758
+ }
3759
+ mapping_list.append(mapping_entry)
3760
+
3761
+ # Create DataFrames
3762
+ study.consensus_df = pl.DataFrame(consensus_list, strict=False)
3763
+ study.consensus_mapping_df = pl.DataFrame(mapping_list, strict=False)
3764
+
3765
+ study.logger.info(f"Created {len(consensus_list)} consensus features from single sample")
3766
+
3767
+
3615
3768
  def _fast_correlation(x, y):
3616
3769
  """
3617
3770
  Fast correlation coefficient calculation for consensus matrix data.
masster/study/plot.py CHANGED
@@ -2955,6 +2955,203 @@ def plot_tic(
2955
2955
  return p
2956
2956
 
2957
2957
 
2958
+ def plot_heatmap(
2959
+ self,
2960
+ filename=None,
2961
+ width=800,
2962
+ height=600,
2963
+ cmap="viridis",
2964
+ title="Consensus Matrix Heatmap",
2965
+ quant="chrom_area",
2966
+ samples=None,
2967
+ ):
2968
+ """
2969
+ Plot a heatmap of the consensus matrix data.
2970
+
2971
+ Samples are ordered from left to right, features are ordered by m/z from top to bottom.
2972
+ Values are log10 transformed for better visualization.
2973
+
2974
+ Parameters:
2975
+ filename (str, optional): Path to save the plot
2976
+ width (int): Plot width in pixels (default: 800)
2977
+ height (int): Plot height in pixels (default: 600)
2978
+ cmap (str): Colormap name (default: "viridis")
2979
+ title (str): Plot title (default: "Consensus Matrix Heatmap")
2980
+ quant (str): Quantification method column name (default: "chrom_area")
2981
+ samples: Sample identifier(s) to include. Can be:
2982
+ - None: include all samples (default)
2983
+ - int: single sample_uid
2984
+ - str: single sample_name
2985
+ - list: multiple sample_uids or sample_names
2986
+ """
2987
+ from bokeh.plotting import figure
2988
+ from bokeh.models import LinearColorMapper, ColorBar, BasicTicker
2989
+ from bokeh.transform import transform
2990
+ import numpy as np
2991
+ import pandas as pd
2992
+
2993
+ # Get consensus matrix
2994
+ matrix_df = self.get_consensus_matrix(quant=quant, samples=samples)
2995
+
2996
+ if matrix_df is None or matrix_df.is_empty():
2997
+ self.logger.error("No consensus matrix available for heatmap.")
2998
+ return
2999
+
3000
+ # Get m/z values for each consensus_uid to sort by
3001
+ if self.consensus_df is None or self.consensus_df.is_empty():
3002
+ self.logger.error("No consensus_df available for sorting features by m/z.")
3003
+ return
3004
+
3005
+ # Join with consensus_df to get m/z values
3006
+ matrix_with_mz = matrix_df.join(
3007
+ self.consensus_df.select(["consensus_uid", "mz"]),
3008
+ on="consensus_uid",
3009
+ how="left",
3010
+ )
3011
+
3012
+ # Sort by m/z (ascending - lowest m/z at top)
3013
+ matrix_with_mz = matrix_with_mz.sort("mz")
3014
+
3015
+ # Remove the m/z column after sorting
3016
+ matrix_sorted = matrix_with_mz.drop("mz")
3017
+
3018
+ # Extract consensus_uid and sample columns
3019
+ consensus_uids = matrix_sorted["consensus_uid"].to_list()
3020
+ sample_cols = [col for col in matrix_sorted.columns if col != "consensus_uid"]
3021
+
3022
+ # Convert to pandas for easier heatmap processing
3023
+ matrix_pd = matrix_sorted.select(sample_cols).to_pandas()
3024
+
3025
+ # Apply log10 transformation (add 1 to avoid log(0))
3026
+ matrix_log = np.log10(matrix_pd.values + 1)
3027
+
3028
+ # Prepare data for Bokeh heatmap
3029
+ # Create a list of (sample, feature, value) tuples
3030
+ heatmap_data = []
3031
+ for i, feature_idx in enumerate(range(len(consensus_uids))):
3032
+ for j, sample in enumerate(sample_cols):
3033
+ value = matrix_log[feature_idx, j]
3034
+ heatmap_data.append({
3035
+ "sample": sample,
3036
+ "feature": str(consensus_uids[feature_idx]),
3037
+ "feature_idx": str(i), # Use string index for y-axis position
3038
+ "value": value,
3039
+ })
3040
+
3041
+ # Convert to DataFrame for Bokeh ColumnDataSource
3042
+ heatmap_df = pd.DataFrame(heatmap_data)
3043
+
3044
+ from bokeh.models import ColumnDataSource
3045
+
3046
+ source = ColumnDataSource(heatmap_df)
3047
+
3048
+ # Handle colormap using cmap.Colormap
3049
+ try:
3050
+ # Get colormap palette using cmap
3051
+ if isinstance(cmap, str):
3052
+ colormap = Colormap(cmap)
3053
+ # Generate 256 colors and convert to hex
3054
+ import matplotlib.colors as mcolors
3055
+
3056
+ colors = colormap(np.linspace(0, 1, 256))
3057
+ palette = [mcolors.rgb2hex(color) for color in colors]
3058
+ else:
3059
+ colormap = cmap
3060
+ # Try to use to_bokeh() method first
3061
+ try:
3062
+ palette = colormap.to_bokeh()
3063
+ # Ensure we got a color palette, not another mapper
3064
+ if not isinstance(palette, (list, tuple)):
3065
+ # Fall back to generating colors manually
3066
+ import matplotlib.colors as mcolors
3067
+
3068
+ colors = colormap(np.linspace(0, 1, 256))
3069
+ palette = [mcolors.rgb2hex(color) for color in colors]
3070
+ except AttributeError:
3071
+ # Fall back to generating colors manually
3072
+ import matplotlib.colors as mcolors
3073
+
3074
+ colors = colormap(np.linspace(0, 1, 256))
3075
+ palette = [mcolors.rgb2hex(color) for color in colors]
3076
+ except (AttributeError, ValueError, TypeError) as e:
3077
+ # Fallback to viridis if cmap interpretation fails
3078
+ self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
3079
+ from bokeh.palettes import viridis
3080
+
3081
+ palette = viridis(256)
3082
+
3083
+ # Create color mapper
3084
+ color_mapper = LinearColorMapper(
3085
+ palette=palette,
3086
+ low=heatmap_df["value"].min(),
3087
+ high=heatmap_df["value"].max(),
3088
+ )
3089
+
3090
+ # Create figure with categorical ranges for both axes
3091
+ p = figure(
3092
+ width=width,
3093
+ height=height,
3094
+ title=title,
3095
+ x_range=sample_cols,
3096
+ y_range=[str(i) for i in range(len(consensus_uids))],
3097
+ toolbar_location="above",
3098
+ tools="pan,wheel_zoom,box_zoom,reset,save,hover",
3099
+ tooltips=[
3100
+ ("Sample", "@sample"),
3101
+ ("Feature UID", "@feature"),
3102
+ ("log10(Value+1)", "@value{0.00}"),
3103
+ ],
3104
+ )
3105
+
3106
+ # Draw rectangles for heatmap
3107
+ p.rect(
3108
+ x="sample",
3109
+ y="feature_idx",
3110
+ width=1,
3111
+ height=1,
3112
+ source=source,
3113
+ fill_color=transform("value", color_mapper),
3114
+ line_color=None,
3115
+ )
3116
+
3117
+ # Add colorbar
3118
+ color_bar = ColorBar(
3119
+ color_mapper=color_mapper,
3120
+ width=8,
3121
+ location=(0, 0),
3122
+ title=f"log10({quant}+1)",
3123
+ ticker=BasicTicker(desired_num_ticks=8),
3124
+ )
3125
+ p.add_layout(color_bar, "right")
3126
+
3127
+ # Style the plot
3128
+ p.axis.axis_line_color = None
3129
+ p.axis.major_tick_line_color = None
3130
+ p.grid.grid_line_color = None
3131
+ p.xaxis.major_label_orientation = 45
3132
+ p.yaxis.axis_label = "Features (sorted by m/z)"
3133
+ p.xaxis.axis_label = "Samples"
3134
+
3135
+ # Apply consistent save/display behavior
3136
+ if filename is not None:
3137
+ # Convert relative paths to absolute paths using study folder as base
3138
+ import os
3139
+
3140
+ if not os.path.isabs(filename):
3141
+ filename = os.path.join(self.folder, filename)
3142
+
3143
+ # Convert to absolute path for logging
3144
+ abs_filename = os.path.abspath(filename)
3145
+
3146
+ # Use isolated file saving
3147
+ _isolated_save_plot(p, filename, abs_filename, self.logger, "Heatmap Plot")
3148
+ else:
3149
+ # Show in notebook when no filename provided
3150
+ _isolated_show_notebook(p)
3151
+
3152
+ return p
3153
+
3154
+
2958
3155
  def plot_pca(self, *args, **kwargs):
2959
3156
  """Deprecated: Use plot_samples_pca instead."""
2960
3157
  import warnings
masster/study/study.py CHANGED
@@ -14,7 +14,7 @@ Main class:
14
14
  consensus_select/filter/delete
15
15
  - Retrieval: get_consensus, get_chrom, get_samples, get_*_stats, get_*_matrix
16
16
  - Plotting: plot_alignment, plot_samples_pca/umap/2d, plot_tic/bpc/eic, plot_chrom,
17
- plot_rt_correction, plot_consensus_2d/stats
17
+ plot_rt_correction, plot_consensus_2d/stats, plot_heatmap
18
18
  - Export: export_mgf, export_mztab, export_xlsx, export_parquet
19
19
  - Identification: lib_load, identify, get_id, id_reset, lib_reset
20
20
  - Parameters: get/update parameters, update_history
@@ -96,6 +96,7 @@ from masster.study.plot import plot_bpc
96
96
  from masster.study.plot import plot_tic
97
97
  from masster.study.plot import plot_eic
98
98
  from masster.study.plot import plot_rt_correction
99
+ from masster.study.plot import plot_heatmap
99
100
  from masster.study.processing import align
100
101
  from masster.study.merge import merge
101
102
  from masster.study.processing import integrate
@@ -429,6 +430,7 @@ class Study:
429
430
  plot_rt_correction = plot_rt_correction
430
431
  plot_tic = plot_tic
431
432
  plot_eic = plot_eic
433
+ plot_heatmap = plot_heatmap
432
434
 
433
435
  # === Analysis Operations ===
434
436
  analyze_umap = analyze_umap
@@ -261,6 +261,21 @@
261
261
  },
262
262
  "ms1_spec": {
263
263
  "dtype": "pl.Object"
264
+ },
265
+ "id_top_name": {
266
+ "dtype": "pl.Utf8"
267
+ },
268
+ "id_top_class": {
269
+ "dtype": "pl.Utf8"
270
+ },
271
+ "id_top_adduct": {
272
+ "dtype": "pl.Utf8"
273
+ },
274
+ "id_top_score": {
275
+ "dtype": "pl.Float64"
276
+ },
277
+ "id_source": {
278
+ "dtype": "pl.Utf8"
264
279
  }
265
280
  }
266
281
  },
masster/wizard/wizard.py CHANGED
@@ -200,12 +200,12 @@ class wizard_def:
200
200
  # Set default adducts based on polarity if not provided
201
201
  if not self.adducts:
202
202
  if self.polarity and self.polarity.lower() in ["positive", "pos"]:
203
- self.adducts = ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"]
203
+ self.adducts = ["+H:1:0.8", "+Na:1:0.1", "+NH4:1:0.1"]
204
204
  elif self.polarity and self.polarity.lower() in ["negative", "neg"]:
205
- self.adducts = ["H-1:-:1.0", "CH2O2:0:0.5"]
205
+ self.adducts = ["-H:-1:1.0", "+CH2O2:0:0.5"]
206
206
  else:
207
207
  # Default to positive if polarity is None or unknown
208
- self.adducts = ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"]
208
+ self.adducts = ["+H:1:0.8", "+Na:1:0.1", "+NH4:1:0.1"]
209
209
 
210
210
  # Validate num_cores
211
211
  max_cores = multiprocessing.cpu_count()
@@ -676,9 +676,7 @@ class Wizard:
676
676
  " ",
677
677
  " # Step 3: Create and configure study",
678
678
  ' print("\\nStep 3/7: Initializing study...")',
679
- " study = Study(folder=PARAMS['folder'])",
680
- " study.polarity = PARAMS['polarity']",
681
- " study.adducts = PARAMS['adducts']",
679
+ " study = Study(folder=PARAMS['folder'], polarity=PARAMS['polarity'], adducts=PARAMS['adducts'])",
682
680
  " ",
683
681
  " # Step 4: Add sample5 files to study",
684
682
  ' print("\\nStep 4/7: Adding samples to study...")',
@@ -692,6 +690,12 @@ class Wizard:
692
690
  " rt_tol=PARAMS['rt_tol']",
693
691
  " )",
694
692
  " ",
693
+ " # Check that more than 1 file has been loaded",
694
+ " if len(study.samples) <= 1:",
695
+ ' print("\\nWARNING: Study merging requires more than 1 sample file.")',
696
+ ' print(f"Only {len(study.samples)} sample(s) loaded. Terminating execution.")',
697
+ " return False",
698
+ " ",
695
699
  " study.merge(",
696
700
  ' method="qt",',
697
701
  " min_samples=PARAMS['min_samples_per_feature'],",
@@ -764,14 +768,9 @@ class Wizard:
764
768
  'app = marimo.App(width="medium")',
765
769
  "",
766
770
  "@app.cell",
767
- "def __():",
768
- " import marimo as mo",
769
- " return (mo,)",
770
- "",
771
- "@app.cell",
772
771
  "def __(mo):",
773
772
  ' mo.md(r"""',
774
- " # MASSter Interactive Analysis",
773
+ " ## MASSter Interactive Analysis",
775
774
  " ",
776
775
  f" **Source:** {source_info.get('number_of_files', 0)} files detected",
777
776
  f" **Polarity:** {source_info.get('polarity', 'unknown')}",
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.28
3
+ Version: 0.6.1
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
7
7
  Project-URL: documentation, https://github.com/zamboni-lab/masster#readme
8
8
  Project-URL: Third-Party Licenses, https://github.com/zamboni-lab/masster/blob/main/THIRD_PARTY_NOTICES.md
9
- Author: Zamboni Lab
9
+ Author: Zamboni Lab, ETH Zurich
10
10
  License: GNU AFFERO GENERAL PUBLIC LICENSE
11
11
  Version 3, 19 November 2007
12
12
 
@@ -734,19 +734,19 @@ Description-Content-Type: text/markdown
734
734
 
735
735
  ## Background and motivation
736
736
 
737
- MASSter is actively used, maintainted, and developed by the Zamboni Lab at ETH Zurich. The project started because many needs of were unmatched by the "usual" software packages (mzmine, msdial, W4M, ...), e.g. performance, scalability, sensitivity, robustness, speed, rapid implementation of new features, embedding in ETL systems, and so on.
737
+ MASSter is actively used, maintained, and developed by the Zamboni Lab at ETH Zurich. The project started because many needs were unmet by the "usual" software packages (mzMine, MS-DIAL, Workflow4Metabolomics (W4M), ...), for example performance, scalability, sensitivity, robustness, speed, rapid implementation of new features, and embedding in ETL systems.
738
738
 
739
- All methods include a long list of parameters, and might wrap alternative algorithms. These are only relevant for advanced users. We recommend running the processing methods with defaults, or using the Wizard.
739
+ All methods include many parameters and may wrap alternative algorithms. These options are primarily relevant for advanced users. We recommend running the processing methods with the defaults or using the Wizard.
740
740
 
741
741
  ## Content
742
742
 
743
743
  MASSter is designed to deal with DDA data, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS, which is both accurate and fast, and it was wrapped with additional code to improve isotope and adduct detection. All other functionalities are own implementations: centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc.
744
744
 
745
- MASSter was engineered to maximize quality of results, sensitivity, scalability, and also speed. Yes, it's Python which is notoriously slower than other languages, but considerable time was spent in speeding up everything, including the systematic use of [polars](https://pola.rs/), numpy vectorization, multiprocessing, chunking, etc. MASSter was tested with studies with 3000+ LC-MS/MS samples (1 Mio MS2 spectra), and it autonomously completed analysis within a few hours.
745
+ MASSter was engineered to maximize result quality, sensitivity, scalability, and speed. Yes, it's Python, which can be slower than other languages, but considerable effort was spent on optimizations, including the systematic use of [Polars](https://pola.rs/), NumPy vectorization, multiprocessing, and chunking. MASSter has been tested on studies with 3,000+ LCMS/MS samples (1 million MS2 spectra) and autonomously completed analyses within a few hours.
746
746
 
747
747
  ## Architecture
748
748
 
749
- MASSter defines own classes for Spectra, Chromatograms, Libraries, Samples, and Studies (= bunch of samples, i.e. a LC-MS sequence). Users will deal mostly with one Study() object at the time. Sample() objects are created when analyzing a batch - and saved for caching -, or will be used only for development, troubleshooting, or to generate illustrations.
749
+ MASSter defines classes for Spectra, Chromatograms, Libraries, Samples, and Studies (a Study is a collection of samples, i.e. an LCMS sequence). Users will typically work with a single `Study` object at a time. `Sample` objects are created when analyzing a batch (and saved for caching), or used for development, troubleshooting, or generating illustrations.
750
750
 
751
751
  The analysis can be done in scripts (without user intervention, e.g. by the integrated Wizard), or interactively in notebooks, i.e. [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/).
752
752
 
@@ -756,9 +756,9 @@ You'll need to install Python (3.10-3.13, 3.14 has not been tested yet).
756
756
 
757
757
  MASSter reads raw (Thermo), wiff (SCIEX), or mzML data. Reading vendor formats relies on .NET libraries, and is only possible in Windows. On Linux or MacOS, you'll be forced to use mzML data.
758
758
 
759
- **It's recommended to use data in either vendor's raw format (wiff and raw) or mzML in profile data.** MASSter includes a sophisticated and sufficiently fast centroiding algorithm that works well across the full dynamic range and will only act on the spectra that are relevant. In our tests with data from different vendors, the centroiding performed much better than most Vendor's implementations (that are primarily proteomics-centric).
759
+ **It's recommended to use data in either the vendor's raw formats (WIFF and Thermo RAW) or mzML in profile mode.** MASSter includes a sophisticated and sufficiently fast centroiding algorithm that works well across the full dynamic range and will only act on spectra that are relevant. In our tests with data from different vendors, the centroiding performed much better than most vendor implementations (which are primarily proteomics-centric).
760
760
 
761
- If still want to convert raw data to centroided mzML, please use (CentroidR)[https://github.com/Adafede/CentroidR/tree/0.0.0.9001].
761
+ If you still want to convert raw data to centroided mzML, please use CentroidR: https://github.com/Adafede/CentroidR/tree/0.0.0.9001
762
762
 
763
763
  ## Installation
764
764
 
@@ -769,7 +769,7 @@ pip install masster
769
769
  ## Getting started
770
770
  **The quickest way to use, or learn how to use MASSter, is to use the Wizard** which we integrated and, ideally, takes care of everything automatically.
771
771
 
772
- The Wizard only needs to know where to find the MS files and were the store the results.
772
+ The Wizard only needs to know where to find the MS files and where to store the results.
773
773
  ```python
774
774
  from masster import Wizard
775
775
  wiz = Wizard(
@@ -780,15 +780,15 @@ wiz = Wizard(
780
780
  wiz.test_and_run()
781
781
  ```
782
782
 
783
- This will trigger the analysis of raw data, and the creation of a script to process all samples and then assemble the study. The whole processing will be stored as `1_masster_workflow.py` in the output folder. The wizard will test once and, if successull, run the full workflow using parallel processes. Once the processing is over you, navigate to `folder` to see what happened...
783
+ This will trigger the analysis of raw data, and the creation of a script to process all samples and then assemble the study. The whole processing will be stored as `1_masster_workflow.py` in the output folder. The wizard will test once and, if successful, run the full workflow using parallel processes. Once the processing is over you, navigate to `folder` to see what happened...
784
784
 
785
785
  If you want to interact with your data, we recommend using [marimo](https://marimo.io/) or [jupyter](https://jupyter.org/) and open the `*.study5` file, for example:
786
786
 
787
787
  ```bash
788
- # use marimo to open the script created by marino
789
- marimo edit '..\..folder_to_store_results\2_interactive_analysis.py'
790
- # or, if you use uv to manage an environment with masster
791
- uv run marimo edit '..\..folder_to_store_results\2_interactive_analysis.py'
788
+ # use marimo to open the script created by marimo
789
+ marimo edit '..\\..\\folder_to_store_results\\2_interactive_analysis.py'
790
+ # or, if you use uv to manage an environment with masster
791
+ uv run marimo edit '..\\..\\folder_to_store_results\\2_interactive_analysis.py'
792
792
  ```
793
793
 
794
794
  ### Basic Workflow for analyzing LC-MS study with 1-1000+ samples
@@ -833,6 +833,7 @@ study.save()
833
833
  study.plot_samples_pca()
834
834
  study.plot_samples_umap()
835
835
  study.plot_samples_2d()
836
+ study.plot_heatmap()
836
837
 
837
838
  # To know more about the available methods...
838
839
  dir(study)
@@ -874,7 +875,7 @@ sample.plot_2d()
874
875
  sample.plot_features_stats()
875
876
 
876
877
  # explore methods
877
- dir(study)
878
+ dir(sample)
878
879
  ```
879
880
 
880
881
  ## Disclaimer
@@ -885,11 +886,9 @@ dir(study)
885
886
  - **Backward compatibility**: We do not guarantee backward compatibility between versions. Breaking changes may occur as we improve the software
886
887
  - **Performance**: While optimized for our workflows, performance may vary depending on your data and system configuration
887
888
  - **Results**: We do our best to ensure accuracy, but you should validate results independently for your research
888
- - **Support**: This is an academic project with limited resources. Community support is available through GitHub issues, but we cannot guarantee response times
889
+ - **Support**: This is an academic project with limited resources. At the moment, we do not provide external user support.
889
890
  - **Production use**: If you plan to use MASSter in production or critical workflows, thorough testing with your data is recommended
890
891
 
891
- We welcome feedback, bug reports, and contributions via GitHub!
892
-
893
892
  ## License
894
893
  GNU Affero General Public License v3
895
894