masster 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/save.py CHANGED
@@ -411,6 +411,11 @@ def export_mgf(
411
411
  rt_str = f"{rt:.2f}"
412
412
  mz_str = f"{mz:.4f}"
413
413
 
414
+ # Initialize charge for this feature
415
+ charge = preferred_charge
416
+ if row["charge"] is not None and row["charge"] != 0:
417
+ charge = row["charge"]
418
+
414
419
  # Skip features without MS2 data (unless include_all_ms1 is True, but we already handled MS1 above)
415
420
  if row["ms2_scans"] is None:
416
421
  skip = skip + 1
masster/study/helpers.py CHANGED
@@ -500,7 +500,7 @@ def align_reset(self):
500
500
  # TODO I don't get this param
501
501
  def get_consensus(self, quant="chrom_area"):
502
502
  if self.consensus_df is None:
503
- self.logger.error("No consensus map found.")
503
+ self.logger.error("No consensus found.")
504
504
  return None
505
505
 
506
506
  # Convert Polars DataFrame to pandas for this operation since the result is used for export
@@ -613,7 +613,7 @@ def get_gaps_matrix(self, uids=None, samples=None):
613
613
  import polars as pl
614
614
 
615
615
  if self.consensus_df is None or self.consensus_df.is_empty():
616
- self.logger.error("No consensus map found.")
616
+ self.logger.error("No consensus found.")
617
617
  return None
618
618
 
619
619
  if self.consensus_mapping_df is None or self.consensus_mapping_df.is_empty():
masster/study/plot.py CHANGED
@@ -564,6 +564,10 @@ def plot_consensus_2d(
564
564
  Parameters:
565
565
  filename (str, optional): Path to save the plot
566
566
  colorby (str): Column name to use for color mapping (default: "number_samples")
567
+ Automatically detects if column contains categorical (string) or
568
+ numeric data and applies appropriate color mapping:
569
+ - Categorical: Uses factor_cmap with distinct colors and legend
570
+ - Numeric: Uses LinearColorMapper with continuous colorbar
567
571
  sizeby (str): Column name to use for size mapping (default: "inty_mean")
568
572
  markersize (int): Base marker size (default: 6)
569
573
  scaling (str): Controls whether points scale with zoom. Options:
@@ -645,12 +649,13 @@ def plot_consensus_2d(
645
649
  from bokeh.models import HoverTool
646
650
  from bokeh.models import LinearColorMapper
647
651
  from bokeh.io.export import export_png
652
+ from bokeh.transform import factor_cmap
648
653
 
649
654
  try:
650
655
  from bokeh.models import ColorBar # type: ignore[attr-defined]
651
656
  except ImportError:
652
657
  from bokeh.models.annotations import ColorBar
653
- from bokeh.palettes import viridis
658
+ from bokeh.palettes import viridis, Category20
654
659
 
655
660
  # Import cmap for colormap handling
656
661
  from cmap import Colormap
@@ -695,61 +700,144 @@ def plot_consensus_2d(
695
700
  self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
696
701
  palette = viridis(256)
697
702
 
698
- color_mapper = LinearColorMapper(
699
- palette=palette,
700
- low=data[colorby].min(),
701
- high=data[colorby].max(),
703
+ # Check if colorby column contains categorical data (string/object)
704
+ colorby_values = data[colorby].to_list()
705
+ is_categorical = (
706
+ data_pd[colorby].dtype in ["object", "string", "category"] or
707
+ isinstance(colorby_values[0], str) if colorby_values else False
702
708
  )
709
+
710
+ if is_categorical:
711
+ # Handle categorical coloring
712
+ # Use natural order of unique values - don't sort to preserve correct legend mapping
713
+ # Sorting would break the correspondence between legend labels and point colors
714
+ unique_values = [v for v in data_pd[colorby].unique() if v is not None]
715
+
716
+ if len(unique_values) <= 20:
717
+ palette = Category20[min(20, max(3, len(unique_values)))]
718
+ else:
719
+ # For many categories, use a subset of the viridis palette
720
+ palette = viridis(min(256, len(unique_values)))
721
+
722
+ color_mapper = factor_cmap(colorby, palette, unique_values)
723
+ else:
724
+ # Handle numeric coloring with LinearColorMapper
725
+ color_mapper = LinearColorMapper(
726
+ palette=palette,
727
+ low=data[colorby].min(),
728
+ high=data[colorby].max(),
729
+ )
703
730
  # scatter plot rt vs mz
704
731
  p = bp.figure(
705
732
  width=width,
706
733
  height=height,
707
- title="Consensus map",
734
+ title=f"Consensus features, colored by {colorby}",
708
735
  )
709
- p.xaxis.axis_label = "Retention Time (min)"
710
- p.yaxis.axis_label = "m/z"
736
+ p.xaxis.axis_label = "RT [s]"
737
+ p.yaxis.axis_label = "m/z [Th]"
711
738
  scatter_renderer: Any = None
712
- if scaling.lower() in ["dyn", "dynamic"]:
713
- # Calculate appropriate radius for dynamic scaling based on data range
714
- rt_range = data["rt"].max() - data["rt"].min()
715
- mz_range = data["mz"].max() - data["mz"].min()
716
- # Use a fraction of the smaller dimension for radius, similar to sample plotting
717
- dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
739
+ if is_categorical:
740
+ # For categorical data, create separate renderers for each category
741
+ # This enables proper legend interactivity where each category can be toggled independently
742
+ unique_values = [v for v in data_pd[colorby].unique() if v is not None]
743
+
744
+ if len(unique_values) <= 20:
745
+ palette = Category20[min(20, max(3, len(unique_values)))]
746
+ else:
747
+ palette = viridis(min(256, len(unique_values)))
748
+
749
+ # Create a separate renderer for each category
750
+ for i, category in enumerate(unique_values):
751
+ # Filter data for this category
752
+ category_data = data.filter(pl.col(colorby) == category)
753
+ category_data_pd = category_data.to_pandas()
754
+ category_source = bp.ColumnDataSource(category_data_pd)
755
+
756
+ color = palette[i % len(palette)]
757
+
758
+ if scaling.lower() in ["dyn", "dynamic"]:
759
+ # Calculate appropriate radius for dynamic scaling
760
+ rt_range = data["rt"].max() - data["rt"].min()
761
+ mz_range = data["mz"].max() - data["mz"].min()
762
+ dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
763
+
764
+ renderer = p.circle(
765
+ x="rt",
766
+ y="mz",
767
+ radius=dynamic_radius,
768
+ fill_color=color,
769
+ line_color=None,
770
+ alpha=alpha,
771
+ source=category_source,
772
+ legend_label=str(category),
773
+ )
774
+ else:
775
+ renderer = p.scatter(
776
+ x="rt",
777
+ y="mz",
778
+ size="markersize",
779
+ fill_color=color,
780
+ line_color=None,
781
+ alpha=alpha,
782
+ source=category_source,
783
+ legend_label=str(category),
784
+ )
785
+
786
+ # No single scatter_renderer for categorical data
787
+ scatter_renderer = None
718
788
 
719
- scatter_renderer = p.circle(
720
- x="rt",
721
- y="mz",
722
- radius=dynamic_radius,
723
- fill_color={"field": colorby, "transform": color_mapper},
724
- line_color=None,
725
- alpha=alpha,
726
- source=source,
727
- )
728
789
  else:
729
- scatter_renderer = p.scatter(
730
- x="rt",
731
- y="mz",
732
- size="markersize",
733
- fill_color={"field": colorby, "transform": color_mapper},
734
- line_color=None,
735
- alpha=alpha,
736
- source=source,
737
- )
790
+ # Handle numeric coloring - single renderer with color mapping
791
+ if scaling.lower() in ["dyn", "dynamic"]:
792
+ # Calculate appropriate radius for dynamic scaling
793
+ rt_range = data["rt"].max() - data["rt"].min()
794
+ mz_range = data["mz"].max() - data["mz"].min()
795
+ dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
796
+
797
+ scatter_renderer = p.circle(
798
+ x="rt",
799
+ y="mz",
800
+ radius=dynamic_radius,
801
+ fill_color={"field": colorby, "transform": color_mapper},
802
+ line_color=None,
803
+ alpha=alpha,
804
+ source=source,
805
+ )
806
+ else:
807
+ scatter_renderer = p.scatter(
808
+ x="rt",
809
+ y="mz",
810
+ size="markersize",
811
+ fill_color={"field": colorby, "transform": color_mapper},
812
+ line_color=None,
813
+ alpha=alpha,
814
+ source=source,
815
+ )
738
816
  # add hover tool
739
- # Start with base tooltips
817
+ # Start with base tooltips - rt and mz moved to top, removed consensus_id and iso_mean
740
818
  tooltips = [
819
+ ("rt", "@rt"),
820
+ ("mz", "@mz"),
741
821
  ("consensus_uid", "@consensus_uid"),
742
- ("consensus_id", "@consensus_id"),
743
822
  ("number_samples", "@number_samples"),
744
823
  ("number_ms2", "@number_ms2"),
745
- ("rt", "@rt"),
746
- ("mz", "@mz"),
747
824
  ("inty_mean", "@inty_mean"),
748
- ("iso_mean", "@iso_mean"),
749
825
  ("coherence_mean", "@chrom_coherence_mean"),
750
826
  ("prominence_scaled_mean", "@chrom_prominence_scaled_mean"),
751
827
  ]
752
828
 
829
+ # Add adduct_top if it exists in data
830
+ if "adduct_top" in data.columns:
831
+ tooltips.append(("adduct_top", "@adduct_top"))
832
+
833
+ # Add id_top_name if it exists in data
834
+ if "id_top_name" in data.columns:
835
+ tooltips.append(("id_top_name", "@id_top_name"))
836
+
837
+ # Add id_top_adduct if it exists in data
838
+ if "id_top_adduct" in data.columns:
839
+ tooltips.append(("id_top_adduct", "@id_top_adduct"))
840
+
753
841
  # Add id_top_* columns if they exist and have non-null values
754
842
  id_top_columns = ["id_top_name", "id_top_class", "id_top_adduct", "id_top_score"]
755
843
  for col in id_top_columns:
@@ -764,19 +852,28 @@ def plot_consensus_2d(
764
852
 
765
853
  hover = HoverTool(
766
854
  tooltips=tooltips,
767
- renderers=[scatter_renderer],
768
855
  )
856
+ # For categorical data, hover will work on all renderers automatically
857
+ # For numeric data, specify the single renderer
858
+ if not is_categorical and scatter_renderer:
859
+ hover.renderers = [scatter_renderer]
860
+
769
861
  p.add_tools(hover)
770
862
 
771
- # add colorbar
772
- color_bar = ColorBar(
773
- color_mapper=color_mapper,
774
- label_standoff=12,
775
- location=(0, 0),
776
- title=colorby,
777
- ticker=BasicTicker(desired_num_ticks=8),
778
- )
779
- p.add_layout(color_bar, "right")
863
+ # add colorbar only for numeric data (LinearColorMapper)
864
+ if not is_categorical:
865
+ color_bar = ColorBar(
866
+ color_mapper=color_mapper,
867
+ label_standoff=12,
868
+ location=(0, 0),
869
+ title=colorby,
870
+ ticker=BasicTicker(desired_num_ticks=8),
871
+ )
872
+ p.add_layout(color_bar, "right")
873
+ else:
874
+ # For categorical data, configure the legend that was automatically created
875
+ p.legend.location = "top_right"
876
+ p.legend.click_policy = "hide"
780
877
 
781
878
  if filename is not None:
782
879
  # Convert relative paths to absolute paths using study folder as base
@@ -341,9 +341,6 @@ def _integrate_chrom_impl(self, **kwargs):
341
341
  uids = params.get("uids")
342
342
  rt_tol = params.get("rt_tol")
343
343
 
344
- if self.consensus_map is None:
345
- self.logger.error("No consensus map found.")
346
- return
347
344
  if uids is None:
348
345
  # get all consensus_id from consensus_df
349
346
  ids = self.consensus_df["consensus_uid"].to_list()
masster/wizard/wizard.py CHANGED
@@ -455,6 +455,9 @@ class Wizard:
455
455
  params_lines.append(' # === Processing Parameters ===')
456
456
  params_lines.append(f' "adducts": {params_dict.get("adducts", [])!r}, # Adduct specifications for feature detection and annotation')
457
457
  params_lines.append(f' "detector_type": {params_dict.get("detector_type", "unknown")!r}, # MS detector type ("orbitrap", "tof", "unknown")')
458
+ params_lines.append(f' "noise": {params_dict.get("noise", 50.0)}, # Noise threshold for feature detection')
459
+ params_lines.append(f' "chrom_fwhm": {params_dict.get("chrom_fwhm", 0.5)}, # Chromatographic peak full width at half maximum (seconds)')
460
+ params_lines.append(f' "chrom_peak_snr": {params_dict.get("chrom_peak_snr", 5.0)}, # Minimum signal-to-noise ratio for chromatographic peaks')
458
461
  params_lines.append('')
459
462
 
460
463
  # Alignment & Merging
@@ -643,6 +646,7 @@ class Wizard:
643
646
  ' # Step 4: Add sample5 files to study',
644
647
  ' print("\\nStep 4/7: Adding samples to study...")',
645
648
  ' study.add(str(Path(PARAMS[\'folder\']) / "*.sample5"))',
649
+ ' study.features_filter(study.features_select(chrom_coherence=0.1, chrom_prominence_scaled=1))',
646
650
  ' ',
647
651
  ' # Step 5: Core processing',
648
652
  ' print("\\nStep 5/7: Processing...")',
@@ -651,29 +655,14 @@ class Wizard:
651
655
  ' rt_tol=PARAMS[\'rt_tol\']',
652
656
  ' )',
653
657
  ' ',
654
- ' # Merge and create consensus features',
655
- ' # Use optimized method for large datasets (>500 samples)',
656
- ' num_samples = len(study.samples)',
657
- ' if num_samples > 500:',
658
- ' print(f" Large dataset detected ({num_samples} samples), using optimized qt_chunked + hierarchical method")',
659
- ' study.merge(',
660
- ' method="qt_chunked",',
661
- ' dechunking="hierarchical",',
662
- ' min_samples=PARAMS[\'min_samples_per_feature\'],',
663
- ' threads=PARAMS[\'num_cores\'],',
664
- ' rt_tol=PARAMS[\'rt_tol\'],',
665
- ' mz_tol=PARAMS[\'mz_tol\']',
666
- ' )',
667
- ' else:',
668
- ' print(f" Using standard merge method for {num_samples} samples")',
669
- ' study.merge(',
670
- ' min_samples=PARAMS[\'min_samples_per_feature\'],',
671
- ' threads=PARAMS[\'num_cores\'],',
672
- ' rt_tol=PARAMS[\'rt_tol\'],',
673
- ' mz_tol=PARAMS[\'mz_tol\']',
674
- ' )',
658
+ ' study.merge(',
659
+ ' method="qt",',
660
+ ' min_samples=PARAMS[\'min_samples_per_feature\'],',
661
+ ' threads=PARAMS[\'num_cores\'],',
662
+ ' rt_tol=PARAMS[\'rt_tol\'],'
663
+ ' )',
675
664
  ' study.find_iso()',
676
- ' study.fill(min_samples_rel=0.0)',
665
+ ' study.fill()',
677
666
  ' study.integrate()',
678
667
  ' ',
679
668
  ' # Step 6/7: Saving results',
@@ -689,8 +678,8 @@ class Wizard:
689
678
  ' study.plot_consensus_2d(filename="consensus.png")',
690
679
  ' study.plot_alignment(filename="alignment.html")',
691
680
  ' study.plot_alignment(filename="alignment.png")',
692
- ' study.plot_pca(filename="pca.html")',
693
- ' study.plot_pca(filename="pca.png")',
681
+ ' study.plot_samples_pca(filename="pca.html")',
682
+ ' study.plot_samples_pca(filename="pca.png")',
694
683
  ' study.plot_bpc(filename="bpc.html")',
695
684
  ' study.plot_bpc(filename="bpc.png")',
696
685
  ' study.plot_rt_correction(filename="rt_correction.html")',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.4
3
+ Version: 0.5.5
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -29,7 +29,7 @@ masster/sample/processing.py,sha256=CjaLCElDKECeCvYWqzT5EH_-rPQ0Y4A30zKjZfqmS5s,
29
29
  masster/sample/quant.py,sha256=tHNjvUFTdehKR31BXBZnVsBxMD9XJHgaltITOjr71uE,7562
30
30
  masster/sample/sample.py,sha256=O2PY7DnRx7VkASBU4qKQcTqND1aRfd0OCjKK9Rjn5uw,20574
31
31
  masster/sample/sample5_schema.json,sha256=H5e2T6rHIDzul2kp_yP-ILUUWUpW08wP2pEQjMR0nSk,3977
32
- masster/sample/save.py,sha256=2yQtcQcRJjgAKPImTydj7LpyyMop_Q9JKRlNEK4yU6k,36339
32
+ masster/sample/save.py,sha256=q1DjzVCB2FsTi9Sk-szd2-Nr7kPbqqFhJpjAWNfSCno,36536
33
33
  masster/sample/sciex.py,sha256=vnbxsq_qnAQVuzcpziP1o3IC4kM5amGBcPmC2TAuDLw,46319
34
34
  masster/sample/defaults/__init__.py,sha256=A09AOP44cxD_oYohyt7XFUho0zndRcrzVD4DUaGnKH4,447
35
35
  masster/sample/defaults/find_adducts_def.py,sha256=Bu2KiBJRxD0SAnOPNMm_Nk-6fx6QYoRXjFNGzz-0_o0,13570
@@ -41,13 +41,13 @@ masster/study/__init__.py,sha256=55axdFuqRX4aXtJ8ocnhcLB32fNtmmJpCi58moO0r4g,237
41
41
  masster/study/analysis.py,sha256=L-wXBnGZCLB5UUDrjIdOiMG9zdej3Tw_SftcEmmTukM,84264
42
42
  masster/study/export.py,sha256=joFK9jip2UM4lVAvhkdKVeUdNdM4D8uP2WE49IaVJgw,60172
43
43
  masster/study/h5.py,sha256=84plxM7gYFdn_mNbcg8XxE_NRZmiIBqs_XhfHMiXshk,95364
44
- masster/study/helpers.py,sha256=s5jLUmxDAs_Qn6dVwpkwlwuwliMDEBjmeikS6OrxdSE,183137
44
+ masster/study/helpers.py,sha256=ZhvLetoEROHMSrMKZo8jXQMJhTochITgAG_vwjs_8Mg,183129
45
45
  masster/study/id.py,sha256=r_vZQYNxqNXf_pjgk_CLkl1doLnLa956mTuVmlHN52o,80075
46
46
  masster/study/load.py,sha256=7d11294YYEGrSKox3cwvetv2vqcstYT1SnyAhHH5V_Q,107706
47
47
  masster/study/merge.py,sha256=D9xNRlEaMPTPZQAZhiBBSzQ-27lD60fCDmKb0cYST-M,149764
48
48
  masster/study/parameters.py,sha256=bTvmcwX9INxzcrEAmTiFH8qeWVhwkvMTZjuP394pz5o,3279
49
- masster/study/plot.py,sha256=wg2X3P-0J6mW0N0dJS0dX6KLGtt9mXj6w1j3E9nvlvo,103086
50
- masster/study/processing.py,sha256=O6X7wgeq0kXSyMO12g23cqB8cYO60gLRMxuJC2uhSMY,58644
49
+ masster/study/plot.py,sha256=LEIzoYiUyq1aswh-sw8S-ESvN2DaQKN5l22yLW8gZe8,107647
50
+ masster/study/processing.py,sha256=n5208v-JQGq3bBP-ncgl2__hHWSQQYHx2fl4Mm0THdI,58538
51
51
  masster/study/save.py,sha256=47AP518epJJ9TjaGGyrLKsMsyjIk8_J4ka7bmsnRtFQ,9268
52
52
  masster/study/study.py,sha256=TnZkTLB8Z5R-AVqoHfUNvmkTthfUI4OPmBo_LYR_e8g,38654
53
53
  masster/study/study5_schema.json,sha256=0IZxM9VVI0TUlx74BPzJDT44kySi6NZZ6iLR0j8bU_s,7736
@@ -65,9 +65,9 @@ masster/study/defaults/study_def.py,sha256=h8dYbi9xv0sesCSQik49Z53IkskMmNtW6ixl7
65
65
  masster/wizard/README.md,sha256=mL1A3YWJZOefpJ6D0-HqGLkVRmUlOpwyVFdvJBeeoZM,14149
66
66
  masster/wizard/__init__.py,sha256=a2hcZnHASjfuw1lqZhZnvTR58rc33rRnoGAY_JfvGhI,683
67
67
  masster/wizard/example.py,sha256=xEZFTH9UZ8HKOm6s3JL8Js0Uw5ChnISWBHSZCL32vsM,7983
68
- masster/wizard/wizard.py,sha256=6VqeOyKJ-9n0376CVbNuQo4vKLFjE0Sl2KexWZclQew,38580
69
- masster-0.5.4.dist-info/METADATA,sha256=wjgydggoAm3JBUOi0PrJ1oPUgUTvB3qKZ1MorC8NBOE,45113
70
- masster-0.5.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
71
- masster-0.5.4.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
72
- masster-0.5.4.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
73
- masster-0.5.4.dist-info/RECORD,,
68
+ masster/wizard/wizard.py,sha256=UobIGFZtp1s_9WJlpl6DQ2-pp7flPQ6dlYZJqYE92OM,38131
69
+ masster-0.5.5.dist-info/METADATA,sha256=ALpQYEYxgqYZ0XGZjcdXvgkEE_AKC0-KqepR1fwQ3hc,45113
70
+ masster-0.5.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
71
+ masster-0.5.5.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
72
+ masster-0.5.5.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
73
+ masster-0.5.5.dist-info/RECORD,,