masster 0.4.20__tar.gz → 0.4.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (96) hide show
  1. {masster-0.4.20 → masster-0.4.22}/PKG-INFO +1 -1
  2. {masster-0.4.20 → masster-0.4.22}/pyproject.toml +1 -1
  3. {masster-0.4.20 → masster-0.4.22}/src/masster/__init__.py +6 -0
  4. {masster-0.4.20 → masster-0.4.22}/src/masster/_version.py +1 -1
  5. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/h5.py +58 -1
  6. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/load.py +7 -1
  7. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/plot.py +56 -65
  8. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/processing.py +158 -0
  9. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/sample.py +2 -0
  10. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/sample5_schema.json +3 -0
  11. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/save.py +135 -59
  12. {masster-0.4.20 → masster-0.4.22}/src/masster/spectrum.py +58 -9
  13. {masster-0.4.20 → masster-0.4.22}/src/masster/study/export.py +240 -154
  14. {masster-0.4.20 → masster-0.4.22}/src/masster/study/h5.py +65 -1
  15. {masster-0.4.20 → masster-0.4.22}/src/masster/study/helpers.py +3 -3
  16. {masster-0.4.20 → masster-0.4.22}/src/masster/study/load.py +39 -3
  17. {masster-0.4.20 → masster-0.4.22}/src/masster/study/merge.py +25 -10
  18. {masster-0.4.20 → masster-0.4.22}/src/masster/study/plot.py +162 -192
  19. {masster-0.4.20 → masster-0.4.22}/src/masster/study/processing.py +362 -12
  20. {masster-0.4.20 → masster-0.4.22}/src/masster/study/save.py +48 -5
  21. {masster-0.4.20 → masster-0.4.22}/src/masster/study/study.py +16 -3
  22. {masster-0.4.20 → masster-0.4.22}/src/masster/study/study5_schema.json +3 -0
  23. {masster-0.4.20 → masster-0.4.22}/src/masster/wizard/__init__.py +5 -2
  24. masster-0.4.22/src/masster/wizard/wizard.py +911 -0
  25. {masster-0.4.20 → masster-0.4.22}/uv.lock +1 -1
  26. masster-0.4.20/src/masster/wizard/test_structure.py +0 -49
  27. masster-0.4.20/src/masster/wizard/test_wizard.py +0 -285
  28. masster-0.4.20/src/masster/wizard/wizard.py +0 -2347
  29. {masster-0.4.20 → masster-0.4.22}/.github/workflows/publish.yml +0 -0
  30. {masster-0.4.20 → masster-0.4.22}/.github/workflows/security.yml +0 -0
  31. {masster-0.4.20 → masster-0.4.22}/.github/workflows/test.yml +0 -0
  32. {masster-0.4.20 → masster-0.4.22}/.gitignore +0 -0
  33. {masster-0.4.20 → masster-0.4.22}/.pre-commit-config.yaml +0 -0
  34. {masster-0.4.20 → masster-0.4.22}/LICENSE +0 -0
  35. {masster-0.4.20 → masster-0.4.22}/Makefile +0 -0
  36. {masster-0.4.20 → masster-0.4.22}/README.md +0 -0
  37. {masster-0.4.20 → masster-0.4.22}/TESTING.md +0 -0
  38. {masster-0.4.20 → masster-0.4.22}/demo/example_batch_process.py +0 -0
  39. {masster-0.4.20 → masster-0.4.22}/demo/example_sample_process.py +0 -0
  40. {masster-0.4.20 → masster-0.4.22}/src/masster/chromatogram.py +0 -0
  41. {masster-0.4.20 → masster-0.4.22}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
  42. {masster-0.4.20 → masster-0.4.22}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
  43. {masster-0.4.20 → masster-0.4.22}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
  44. {masster-0.4.20 → masster-0.4.22}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
  45. {masster-0.4.20 → masster-0.4.22}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
  46. {masster-0.4.20 → masster-0.4.22}/src/masster/data/libs/ccm.csv +0 -0
  47. {masster-0.4.20 → masster-0.4.22}/src/masster/data/libs/urine.csv +0 -0
  48. {masster-0.4.20 → masster-0.4.22}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  49. {masster-0.4.20 → masster-0.4.22}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  50. {masster-0.4.20 → masster-0.4.22}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  51. {masster-0.4.20 → masster-0.4.22}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  52. {masster-0.4.20 → masster-0.4.22}/src/masster/lib/__init__.py +0 -0
  53. {masster-0.4.20 → masster-0.4.22}/src/masster/lib/lib.py +0 -0
  54. {masster-0.4.20 → masster-0.4.22}/src/masster/logger.py +0 -0
  55. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/__init__.py +0 -0
  56. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/adducts.py +0 -0
  57. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/defaults/__init__.py +0 -0
  58. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  59. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/defaults/find_features_def.py +0 -0
  60. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/defaults/find_ms2_def.py +0 -0
  61. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  62. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/defaults/sample_def.py +0 -0
  63. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/helpers.py +0 -0
  64. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/lib.py +0 -0
  65. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/parameters.py +0 -0
  66. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/quant.py +0 -0
  67. {masster-0.4.20 → masster-0.4.22}/src/masster/sample/sciex.py +0 -0
  68. {masster-0.4.20 → masster-0.4.22}/src/masster/study/__init__.py +0 -0
  69. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/__init__.py +0 -0
  70. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/align_def.py +0 -0
  71. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/export_def.py +0 -0
  72. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/fill_chrom_def.py +0 -0
  73. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/fill_def.py +0 -0
  74. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/find_consensus_def.py +0 -0
  75. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/find_ms2_def.py +0 -0
  76. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/identify_def.py +0 -0
  77. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  78. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/integrate_def.py +0 -0
  79. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/merge_def.py +0 -0
  80. {masster-0.4.20 → masster-0.4.22}/src/masster/study/defaults/study_def.py +0 -0
  81. {masster-0.4.20 → masster-0.4.22}/src/masster/study/id.py +0 -0
  82. {masster-0.4.20 → masster-0.4.22}/src/masster/study/parameters.py +0 -0
  83. {masster-0.4.20 → masster-0.4.22}/src/masster/wizard/README.md +0 -0
  84. {masster-0.4.20 → masster-0.4.22}/src/masster/wizard/example.py +0 -0
  85. {masster-0.4.20 → masster-0.4.22}/tests/conftest.py +0 -0
  86. {masster-0.4.20 → masster-0.4.22}/tests/test_chromatogram.py +0 -0
  87. {masster-0.4.20 → masster-0.4.22}/tests/test_defaults.py +0 -0
  88. {masster-0.4.20 → masster-0.4.22}/tests/test_imports.py +0 -0
  89. {masster-0.4.20 → masster-0.4.22}/tests/test_integration.py +0 -0
  90. {masster-0.4.20 → masster-0.4.22}/tests/test_logger.py +0 -0
  91. {masster-0.4.20 → masster-0.4.22}/tests/test_parameters.py +0 -0
  92. {masster-0.4.20 → masster-0.4.22}/tests/test_sample.py +0 -0
  93. {masster-0.4.20 → masster-0.4.22}/tests/test_spectrum.py +0 -0
  94. {masster-0.4.20 → masster-0.4.22}/tests/test_study.py +0 -0
  95. {masster-0.4.20 → masster-0.4.22}/tests/test_version.py +0 -0
  96. {masster-0.4.20 → masster-0.4.22}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.4.20
3
+ Version: 0.4.22
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.4.20"
4
+ version = "0.4.22"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -8,6 +8,12 @@ mass spectrometry workflows.
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ import warnings
12
+
13
+ # Suppress pyOpenMS environment variable warnings globally
14
+ warnings.filterwarnings("ignore", message=".*OPENMS_DATA_PATH.*", category=UserWarning)
15
+ warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH.*", category=UserWarning)
16
+
11
17
  from masster._version import __version__
12
18
 
13
19
  # from masster._version import get_version
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.4.20"
4
+ __version__ = "0.4.22"
5
5
 
6
6
 
7
7
  def get_version():
@@ -235,6 +235,22 @@ def _save_sample5(
235
235
  data=serialized_data,
236
236
  compression="gzip",
237
237
  )
238
+ elif col == "ms1_spec":
239
+ # this column contains either None or numpy arrays with isotope pattern data
240
+ # serialize numpy arrays to JSON strings for storage
241
+ data = features[col]
242
+ data_as_json_strings = []
243
+ for i in range(len(data)):
244
+ if data[i] is not None:
245
+ # Convert numpy array to list and then to JSON
246
+ data_as_json_strings.append(json.dumps(data[i].tolist()))
247
+ else:
248
+ data_as_json_strings.append("None")
249
+ features_group.create_dataset(
250
+ col,
251
+ data=data_as_json_strings,
252
+ compression="gzip",
253
+ )
238
254
 
239
255
  else:
240
256
  self.logger.warning(
@@ -630,6 +646,25 @@ def _load_sample5(self, filename: str, map: bool = False):
630
646
  )
631
647
  reconstructed_data.append(spectrum_list)
632
648
 
649
+ data[col] = reconstructed_data
650
+ case "ms1_spec":
651
+ data_col = features_group[col][:]
652
+ # Convert JSON strings back to numpy arrays
653
+ reconstructed_data = []
654
+ for item in data_col:
655
+ if isinstance(item, bytes):
656
+ item = item.decode("utf-8")
657
+
658
+ if item == "None" or item == "":
659
+ reconstructed_data.append(None)
660
+ else:
661
+ try:
662
+ # Parse JSON string to get list and convert to numpy array
663
+ array_data = json.loads(item)
664
+ reconstructed_data.append(np.array(array_data, dtype=np.float64))
665
+ except (json.JSONDecodeError, ValueError, TypeError):
666
+ reconstructed_data.append(None)
667
+
633
668
  data[col] = reconstructed_data
634
669
  case _:
635
670
  self.logger.debug(f"Unexpected Object column '{col}'")
@@ -1371,6 +1406,25 @@ def _load_sample5_study(self, filename: str, map: bool = False):
1371
1406
  ):
1372
1407
  reconstructed_data.append(None)
1373
1408
 
1409
+ data[col] = reconstructed_data
1410
+ case "ms1_spec":
1411
+ data_col = features_group[col][:]
1412
+ # Convert JSON strings back to numpy arrays
1413
+ reconstructed_data = []
1414
+ for item in data_col:
1415
+ if isinstance(item, bytes):
1416
+ item = item.decode("utf-8")
1417
+
1418
+ if item == "None" or item == "":
1419
+ reconstructed_data.append(None)
1420
+ else:
1421
+ try:
1422
+ # Parse JSON string to get list and convert to numpy array
1423
+ array_data = json.loads(item)
1424
+ reconstructed_data.append(np.array(array_data, dtype=np.float64))
1425
+ except (json.JSONDecodeError, ValueError, TypeError):
1426
+ reconstructed_data.append(None)
1427
+
1374
1428
  data[col] = reconstructed_data
1375
1429
  case _:
1376
1430
  # Handle other Object columns as raw data
@@ -1407,6 +1461,9 @@ def _load_sample5_study(self, filename: str, map: bool = False):
1407
1461
  # Add Object columns one by one
1408
1462
  for col, values in object_columns.items():
1409
1463
  if not self.features_df.is_empty():
1464
+ # Fix for missing columns: if values is None, create list of None with correct length
1465
+ if values is None:
1466
+ values = [None] * len(self.features_df)
1410
1467
  self.features_df = self.features_df.with_columns(
1411
1468
  pl.Series(col, values, dtype=pl.Object).alias(col),
1412
1469
  )
@@ -2027,7 +2084,7 @@ def load_dataframe_from_h5_group(
2027
2084
  for col in schema_columns:
2028
2085
  if col not in group:
2029
2086
  if logger:
2030
- logger.warning(f"Column '{col}' not found in {df_name}.")
2087
+ logger.info(f"Column '{col}' not found in {df_name}.")
2031
2088
  data[col] = None
2032
2089
  missing_columns.append(col)
2033
2090
  continue
@@ -48,9 +48,14 @@ from tqdm import tqdm
48
48
  from masster.chromatogram import Chromatogram
49
49
  from masster.spectrum import Spectrum
50
50
 
51
+ # Suppress pyOpenMS warnings globally
52
+ warnings.filterwarnings("ignore", message=".*OPENMS_DATA_PATH.*", category=UserWarning)
53
+ warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH.*", category=UserWarning)
54
+
51
55
  # Import pyopenms with suppressed warnings
52
56
  with warnings.catch_warnings():
53
- warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH environment variable already exists.*", category=UserWarning)
57
+ warnings.filterwarnings("ignore", message=".*OPENMS_DATA_PATH environment variable already exists.*", category=UserWarning)
58
+ warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH.*", category=UserWarning)
54
59
  import pyopenms as oms
55
60
 
56
61
 
@@ -633,6 +638,7 @@ def _load_wiff(
633
638
  mz=peaks.mz.values,
634
639
  inty=peaks.intensity.values,
635
640
  ms_level=ms_level,
641
+ centroided=False, # WIFF files always contain profile data
636
642
  )
637
643
  bl = spect.baseline()
638
644
  spect = spect.denoise(threshold=bl)
@@ -387,18 +387,19 @@ def plot_2d(
387
387
  show_only_features_with_ms2=False,
388
388
  show_isotopes=False,
389
389
  show_ms2=False,
390
+ show_in_browser=False,
390
391
  title=None,
391
392
  cmap=None,
392
393
  marker="circle",
393
394
  markersize=10,
394
- size="dynamic",
395
+ size="static",
395
396
  raster_dynamic=True,
396
397
  raster_max_px=8,
397
398
  raster_threshold=0.8,
398
399
  height=600,
399
400
  width=800,
400
401
  mz_range=None,
401
- rt_range=None,
402
+ rt_range=None
402
403
  ):
403
404
  """
404
405
  Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
@@ -634,8 +635,10 @@ def plot_2d(
634
635
  ("m/z", "@mz{0.0000}"),
635
636
  ("feature_uid", "@feature_uid"),
636
637
  ("inty", "@inty"),
637
- ("quality", "@quality"),
638
- ("rt_delta", "@rt_delta"),
638
+ ("iso", "@iso"),
639
+ ("adduct", "@adduct"),
640
+ ("chrom_coherence", "@chrom_coherence"),
641
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
639
642
  ],
640
643
  )
641
644
  feature_points_1 = hv.Points(
@@ -644,8 +647,8 @@ def plot_2d(
644
647
  vdims=[
645
648
  "feature_uid",
646
649
  "inty",
647
- "quality",
648
- "rt_delta",
650
+ "iso",
651
+ "adduct",
649
652
  "ms2_scans",
650
653
  "chrom_coherence",
651
654
  "chrom_prominence_scaled",
@@ -666,8 +669,10 @@ def plot_2d(
666
669
  ("m/z", "@mz{0.0000}"),
667
670
  ("feature_uid", "@feature_uid"),
668
671
  ("inty", "@inty"),
669
- ("quality", "@quality"),
670
- ("rt_delta", "@rt_delta"),
672
+ ("iso", "@iso"),
673
+ ("adduct", "@adduct"),
674
+ ("chrom_coherence", "@chrom_coherence"),
675
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
671
676
  ],
672
677
  )
673
678
  feature_points_2 = hv.Points(
@@ -676,8 +681,8 @@ def plot_2d(
676
681
  vdims=[
677
682
  "feature_uid",
678
683
  "inty",
679
- "quality",
680
- "rt_delta",
684
+ "iso",
685
+ "adduct",
681
686
  "chrom_coherence",
682
687
  "chrom_prominence_scaled",
683
688
  ],
@@ -702,10 +707,11 @@ def plot_2d(
702
707
  ("m/z", "@mz{0.0000}"),
703
708
  ("feature_uid", "@feature_uid"),
704
709
  ("inty", "@inty"),
705
- ("quality", "@quality"),
706
- ("rt_delta", "@rt_delta"),
707
710
  ("iso", "@iso"),
708
711
  ("iso_of", "@iso_of"),
712
+ ("adduct", "@adduct"),
713
+ ("chrom_coherence", "@chrom_coherence"),
714
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
709
715
  ],
710
716
  )
711
717
  feature_points_iso = hv.Points(
@@ -714,10 +720,9 @@ def plot_2d(
714
720
  vdims=[
715
721
  "feature_uid",
716
722
  "inty",
717
- "quality",
718
- "rt_delta",
719
723
  "iso",
720
724
  "iso_of",
725
+ "adduct",
721
726
  "chrom_coherence",
722
727
  "chrom_prominence_scaled",
723
728
  ],
@@ -918,21 +923,24 @@ def plot_2d(
918
923
  else:
919
924
  # For slider plots, save the current state
920
925
  hv.save(create_feature_overlay(markersize), filename, fmt="png")
921
- return None
922
926
  else:
923
- # For notebook display, return the interactive layout
924
- return _display_plot(layout, layout)
927
+ # Use show() for display in notebook
928
+ layout.show()
925
929
  else:
926
930
  # Create a panel layout without slider
927
931
  layout = panel.Column(overlay)
928
932
 
933
+ # Handle display logic based on show_in_browser and raster_dynamic
929
934
  if filename is not None:
930
935
  # Use consistent save/display behavior
931
936
  self._handle_sample_plot_output(layout, filename, "panel")
932
- return None
933
937
  else:
934
- # Check if we're in a notebook environment and display appropriately
935
- return _display_plot(overlay, layout)
938
+ # Show in browser if both show_in_browser and raster_dynamic are True
939
+ if show_in_browser and raster_dynamic:
940
+ layout.show()
941
+ else:
942
+ # Return to notebook for inline display
943
+ return layout
936
944
 
937
945
 
938
946
  def plot_2d_oracle(
@@ -1952,11 +1960,10 @@ def plot_feature_stats(
1952
1960
  filename=None,
1953
1961
  ):
1954
1962
  """
1955
- Generates overlaid distribution plots for selected feature metrics.
1963
+ Generates vertically stacked density plots for selected feature metrics.
1956
1964
  The distributions are created separately for features with and without MS2 data.
1957
- Metrics include intensity, quality, retention time, m/z (and m/z delta), number of MS2 peaks,
1958
- summed MS2 intensities, and the MS2-to-MS1 ratio. The plots help to visualize the distribution
1959
- differences between features that are linked to MS2 spectra and those that are not.
1965
+ Metrics include mz, rt, log10(inty), chrom_coherence, chrom_prominence, and chrom_prominence_scaled.
1966
+ The plots help to visualize the distribution differences between features that are linked to MS2 spectra and those that are not.
1960
1967
 
1961
1968
  Parameters:
1962
1969
  filename (str, optional): The output filename. If the filename ends with ".html",
@@ -1972,54 +1979,28 @@ def plot_feature_stats(
1972
1979
  # Convert to pandas for operations that require pandas functionality
1973
1980
  if hasattr(feats, "to_pandas"):
1974
1981
  feats = feats.to_pandas()
1975
- # Compute m/z delta for each feature
1976
- feats["mz_delta"] = feats["mz_end"] - feats["mz_start"]
1977
- # Add a column with the number of peaks in the MS2 spectrum
1978
- feats["MS2peaks"] = feats["ms2_specs"].apply(
1979
- lambda x: len(x[0]) if x is not None else 0,
1980
- )
1981
- # Add a column with the sum of intensities in the MS2 spectrum
1982
- feats["MS2int"] = feats["ms2_specs"].apply(
1983
- lambda x: sum(x[0].inty) if x is not None else 0,
1984
- )
1985
1982
 
1986
- # Calculate the ratio of MS2 to MS1 intensities
1987
- feats["MS2toMS1"] = feats["MS2int"] / feats["inty"]
1988
- # Apply log10 transformation to intensity, quality, and MS2int columns (handling non-positive values)
1983
+ # Apply log10 transformation to intensity (handling non-positive values)
1989
1984
  feats["inty"] = np.where(feats["inty"] <= 0, np.nan, np.log10(feats["inty"]))
1990
- # COMMENT: AR was bugging
1991
- # feats["chrom_heights"] = np.where(
1992
- # feats["chrom_heights"] <= 0, np.nan, np.log10(feats["chrom_heights"])
1993
- # )
1994
-
1995
- feats["quality"] = np.where(
1996
- feats["quality"] <= 0,
1997
- np.nan,
1998
- np.log10(feats["quality"]),
1999
- )
2000
- feats["MS2int"] = np.where(feats["MS2int"] <= 0, np.nan, np.log10(feats["MS2int"]))
1985
+
1986
+ # Apply log10 transformation to quality (handling non-positive values)
1987
+ feats["quality"] = np.where(feats["quality"] <= 0, np.nan, np.log10(feats["quality"]))
2001
1988
 
2002
1989
  # Separate features based on presence of MS2 data
2003
1990
  feats_with_MS2 = feats[feats["ms2_scans"].notnull()]
2004
1991
  feats_without_MS2 = feats[feats["ms2_scans"].isnull()]
2005
1992
 
2006
- # Define the metrics to plot
1993
+ # Define the specific metrics to plot
2007
1994
  cols_to_plot = [
2008
1995
  "mz",
2009
- "mz_delta",
2010
- "inty",
2011
- "quality",
2012
- "rt",
1996
+ "rt",
1997
+ "inty", # Already log10 transformed above
2013
1998
  "rt_delta",
1999
+ "quality", # Already log10 transformed above
2014
2000
  "chrom_coherence",
2015
2001
  "chrom_prominence",
2016
2002
  "chrom_prominence_scaled",
2017
- # COMMENT: AR was bugging
2018
- # "chrom_heights",
2019
- # "chrom_heights_scaled",
2020
- "MS2peaks",
2021
- "MS2int",
2022
- "MS2toMS1",
2003
+ "chrom_height_scaled",
2023
2004
  ]
2024
2005
 
2025
2006
  # Ensure an index column is available for plotting
@@ -2032,29 +2013,39 @@ def plot_feature_stats(
2032
2013
  data_with = feats_with_MS2[col].dropna().values
2033
2014
  data_without = feats_without_MS2[col].dropna().values
2034
2015
 
2035
- # Create distribution elements for features with and without MS2
2016
+ # Create distribution elements - Green for WITH MS2, Red for WITHOUT MS2
2036
2017
  dist_with = hv.Distribution(data_with, label="With MS2").opts(
2037
- color="red",
2018
+ color="green",
2038
2019
  alpha=0.6,
2039
2020
  )
2040
2021
  dist_without = hv.Distribution(data_without, label="Without MS2").opts(
2041
- color="blue",
2022
+ color="red",
2042
2023
  alpha=0.6,
2043
2024
  )
2044
2025
 
2045
2026
  # Overlay the distributions with a legend and hover tool enabled
2027
+ title = col
2028
+ if col == "inty":
2029
+ title = "log10(inty)"
2030
+ elif col == "quality":
2031
+ title = "log10(quality)"
2032
+
2046
2033
  overlay = (dist_with * dist_without).opts(
2047
- title=col,
2034
+ title=title,
2048
2035
  show_legend=True,
2049
2036
  tools=["hover"],
2050
2037
  )
2051
2038
  density_plots.append(overlay)
2052
2039
 
2053
- # Arrange the plots in a layout with three columns
2040
+ # Arrange the plots in a grid layout (3 columns for 7 plots)
2054
2041
  layout = hv.Layout(density_plots).cols(3).opts(shared_axes=False)
2055
2042
 
2056
2043
  # Use consistent save/display behavior
2057
- self._handle_sample_plot_output(layout, filename, "holoviews")
2044
+ if filename is not None:
2045
+ self._handle_sample_plot_output(layout, filename, "holoviews")
2046
+ else:
2047
+ # Return the layout directly for notebook display
2048
+ return layout
2058
2049
 
2059
2050
 
2060
2051
  def plot_tic(
@@ -1273,3 +1273,161 @@ def find_ms2(self, **kwargs):
1273
1273
  self.logger.debug(
1274
1274
  "Parameters stored to find_ms2",
1275
1275
  )
1276
+
1277
+
1278
+ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
1279
+ """Extract isotopic distributions from MS1 data and add to features_df.
1280
+
1281
+ This method processes each feature to find isotopic distributions from MS1 data,
1282
+ similar to the study.find_iso() method but for individual samples. The method
1283
+ adds a new 'ms1_spec' column to features_df containing numpy arrays with
1284
+ isotopic distribution data.
1285
+
1286
+ Args:
1287
+ rt_tolerance (float): RT tolerance in minutes for matching MS1 scans. Default 0.1.
1288
+ **kwargs: Additional parameters
1289
+
1290
+ Notes:
1291
+ - Adds a new 'ms1_spec' column to features_df containing numpy arrays
1292
+ - Each array contains [mz, intensity] pairs for the isotopic distribution
1293
+ - Uses the same isotope shift pattern as study.find_iso()
1294
+ - Only processes features that don't already have ms1_spec data
1295
+ """
1296
+ if self.features_df is None or self.features_df.is_empty():
1297
+ self.logger.warning("No features found. Run find_features() first.")
1298
+ return
1299
+
1300
+ if self.ms1_df is None or self.ms1_df.is_empty():
1301
+ self.logger.warning("No MS1 data found.")
1302
+ return
1303
+
1304
+ # Check if ms1_spec column already exists
1305
+ if "ms1_spec" in self.features_df.columns:
1306
+ features_without_spec = self.features_df.filter(pl.col("ms1_spec").is_null())
1307
+ if features_without_spec.is_empty():
1308
+ self.logger.info("All features already have isotopic distributions.")
1309
+ return
1310
+ self.logger.info(f"Processing {len(features_without_spec)} features without isotopic distributions.")
1311
+ else:
1312
+ # Add the ms1_spec column with None values
1313
+ self.features_df = self.features_df.with_columns(
1314
+ pl.lit(None, dtype=pl.Object).alias("ms1_spec")
1315
+ )
1316
+ features_without_spec = self.features_df
1317
+ self.logger.info(f"Processing {len(features_without_spec)} features for isotopic distributions.")
1318
+
1319
+ # Define isotope shifts (same as study.find_iso)
1320
+ isotope_shifts = np.array([
1321
+ 0.33,
1322
+ 0.50,
1323
+ 0.66,
1324
+ 1.00335,
1325
+ 1.50502,
1326
+ 2.00670,
1327
+ 3.01005,
1328
+ 4.01340,
1329
+ 5.01675,
1330
+ 6.02010,
1331
+ 7.02345,
1332
+ ])
1333
+
1334
+ # Convert rt_tolerance from minutes to seconds
1335
+ rt_tolerance_s = rt_tolerance * 60
1336
+
1337
+ # Process each feature
1338
+ ms1_specs = []
1339
+ feature_indices = []
1340
+
1341
+ for i, row in enumerate(tqdm(
1342
+ features_without_spec.rows(named=True),
1343
+ desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Extracting isotope patterns"
1344
+ )):
1345
+ feature_rt = row["rt"]
1346
+ feature_mz = row["mz"]
1347
+
1348
+ # Find MS1 scans within RT tolerance
1349
+ rt_mask = (
1350
+ (self.ms1_df["rt"] >= (feature_rt - rt_tolerance_s)) &
1351
+ (self.ms1_df["rt"] <= (feature_rt + rt_tolerance_s))
1352
+ )
1353
+ ms1_in_range = self.ms1_df.filter(rt_mask)
1354
+
1355
+ if ms1_in_range.is_empty():
1356
+ ms1_specs.append(None)
1357
+ feature_indices.append(row["feature_uid"])
1358
+ continue
1359
+
1360
+ # Extract isotopic pattern
1361
+ isotope_pattern = []
1362
+
1363
+ # Start with the monoisotopic peak (M+0)
1364
+ base_intensity = 0
1365
+ mz_tolerance = 0.01 # 10 ppm at 1000 Da
1366
+
1367
+ # Find the base peak intensity
1368
+ base_mask = (
1369
+ (ms1_in_range["mz"] >= (feature_mz - mz_tolerance)) &
1370
+ (ms1_in_range["mz"] <= (feature_mz + mz_tolerance))
1371
+ )
1372
+ base_peaks = ms1_in_range.filter(base_mask)
1373
+
1374
+ if not base_peaks.is_empty():
1375
+ base_intensity = base_peaks["inty"].max()
1376
+ isotope_pattern.append([feature_mz, base_intensity])
1377
+
1378
+ # Look for isotope peaks
1379
+ for shift in isotope_shifts:
1380
+ isotope_mz = feature_mz + shift
1381
+ isotope_mask = (
1382
+ (ms1_in_range["mz"] >= (isotope_mz - mz_tolerance)) &
1383
+ (ms1_in_range["mz"] <= (isotope_mz + mz_tolerance))
1384
+ )
1385
+ isotope_peaks = ms1_in_range.filter(isotope_mask)
1386
+
1387
+ if not isotope_peaks.is_empty():
1388
+ max_intensity = isotope_peaks["inty"].max()
1389
+ # Only keep isotope peaks that are at least 1% of base peak
1390
+ if base_intensity > 0 and max_intensity >= 0.01 * base_intensity:
1391
+ # Get the mz of the most intense peak
1392
+ max_peak = isotope_peaks.filter(pl.col("inty") == max_intensity).row(0, named=True)
1393
+ isotope_pattern.append([max_peak["mz"], max_intensity])
1394
+
1395
+ # Convert to numpy array or None if empty
1396
+ if len(isotope_pattern) > 1: # Need at least 2 points (monoisotopic + 1 isotope)
1397
+ ms1_spec = np.array(isotope_pattern, dtype=np.float64)
1398
+ else:
1399
+ ms1_spec = None
1400
+
1401
+ ms1_specs.append(ms1_spec)
1402
+ feature_indices.append(row["feature_uid"])
1403
+
1404
+ # Update the features_df with the isotopic spectra
1405
+ update_df = pl.DataFrame({
1406
+ "feature_uid": feature_indices,
1407
+ "ms1_spec_new": pl.Series("ms1_spec_new", ms1_specs, dtype=pl.Object)
1408
+ })
1409
+
1410
+ # Join and update
1411
+ self.features_df = (
1412
+ self.features_df.join(
1413
+ update_df,
1414
+ on="feature_uid",
1415
+ how="left"
1416
+ )
1417
+ .with_columns([
1418
+ pl.when(pl.col("ms1_spec_new").is_not_null())
1419
+ .then(pl.col("ms1_spec_new"))
1420
+ .otherwise(pl.col("ms1_spec"))
1421
+ .alias("ms1_spec")
1422
+ ])
1423
+ .drop("ms1_spec_new")
1424
+ )
1425
+
1426
+ # Log results
1427
+ non_null_count = len([spec for spec in ms1_specs if spec is not None])
1428
+ self.logger.info(f"Extracted isotopic distributions for {non_null_count}/{len(ms1_specs)} features.")
1429
+
1430
+ # Store parameters in history
1431
+ params_dict = {"rt_tolerance": rt_tolerance}
1432
+ params_dict.update(kwargs)
1433
+ self.store_history(["find_iso"], params_dict)
@@ -97,6 +97,7 @@ from masster.sample.processing import _get_ztscan_stats
97
97
  from masster.sample.processing import _spec_to_mat
98
98
  from masster.sample.processing import analyze_dda
99
99
  from masster.sample.processing import find_features
100
+ from masster.sample.processing import find_iso
100
101
  from masster.sample.processing import find_ms2
101
102
  from masster.sample.processing import get_spectrum
102
103
  from masster.sample.parameters import store_history
@@ -218,6 +219,7 @@ class Sample:
218
219
  save = save
219
220
  find_features = find_features
220
221
  find_adducts = find_adducts
222
+ find_iso = find_iso
221
223
  find_ms2 = find_ms2
222
224
  get_spectrum = get_spectrum
223
225
  filter = features_filter
@@ -90,6 +90,9 @@
90
90
  },
91
91
  "ms2_specs": {
92
92
  "dtype": "pl.Object"
93
+ },
94
+ "ms1_spec": {
95
+ "dtype": "pl.Object"
93
96
  }
94
97
  }
95
98
  },