masster 0.5.16__tar.gz → 0.5.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (98) hide show
  1. {masster-0.5.16 → masster-0.5.18}/PKG-INFO +3 -4
  2. {masster-0.5.16 → masster-0.5.18}/README.md +2 -1
  3. {masster-0.5.16 → masster-0.5.18}/pyproject.toml +1 -3
  4. {masster-0.5.16 → masster-0.5.18}/src/masster/_version.py +1 -1
  5. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/adducts.py +12 -0
  6. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/sample_def.py +30 -6
  7. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/h5.py +52 -6
  8. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/lib.py +9 -3
  9. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/load.py +47 -120
  10. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/processing.py +1 -1
  11. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/sample.py +5 -3
  12. masster-0.5.18/src/masster/sample/sciex.py +638 -0
  13. masster-0.5.18/src/masster/sample/thermo.py +801 -0
  14. {masster-0.5.16 → masster-0.5.18}/src/masster/study/id.py +3 -1
  15. {masster-0.5.16 → masster-0.5.18}/src/masster/study/load.py +15 -792
  16. {masster-0.5.16 → masster-0.5.18}/src/masster/study/study.py +1 -0
  17. {masster-0.5.16 → masster-0.5.18}/src/masster/wizard/wizard.py +178 -225
  18. {masster-0.5.16 → masster-0.5.18}/uv.lock +1 -179
  19. masster-0.5.16/src/masster/sample/sciex.py +0 -1224
  20. {masster-0.5.16 → masster-0.5.18}/.github/workflows/publish.yml +0 -0
  21. {masster-0.5.16 → masster-0.5.18}/.github/workflows/security.yml +0 -0
  22. {masster-0.5.16 → masster-0.5.18}/.github/workflows/test.yml +0 -0
  23. {masster-0.5.16 → masster-0.5.18}/.gitignore +0 -0
  24. {masster-0.5.16 → masster-0.5.18}/.pre-commit-config.yaml +0 -0
  25. {masster-0.5.16 → masster-0.5.18}/LICENSE +0 -0
  26. {masster-0.5.16 → masster-0.5.18}/Makefile +0 -0
  27. {masster-0.5.16 → masster-0.5.18}/TESTING.md +0 -0
  28. {masster-0.5.16 → masster-0.5.18}/demo/example_batch_process.py +0 -0
  29. {masster-0.5.16 → masster-0.5.18}/demo/example_sample_process.py +0 -0
  30. {masster-0.5.16 → masster-0.5.18}/src/masster/__init__.py +0 -0
  31. {masster-0.5.16 → masster-0.5.18}/src/masster/chromatogram.py +0 -0
  32. {masster-0.5.16 → masster-0.5.18}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
  33. {masster-0.5.16 → masster-0.5.18}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
  34. {masster-0.5.16 → masster-0.5.18}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
  35. {masster-0.5.16 → masster-0.5.18}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
  36. {masster-0.5.16 → masster-0.5.18}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
  37. {masster-0.5.16 → masster-0.5.18}/src/masster/data/libs/aa.csv +0 -0
  38. {masster-0.5.16 → masster-0.5.18}/src/masster/data/libs/ccm.csv +0 -0
  39. {masster-0.5.16 → masster-0.5.18}/src/masster/data/libs/hilic.csv +0 -0
  40. {masster-0.5.16 → masster-0.5.18}/src/masster/data/libs/urine.csv +0 -0
  41. {masster-0.5.16 → masster-0.5.18}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  42. {masster-0.5.16 → masster-0.5.18}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  43. {masster-0.5.16 → masster-0.5.18}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  44. {masster-0.5.16 → masster-0.5.18}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  45. {masster-0.5.16 → masster-0.5.18}/src/masster/lib/__init__.py +0 -0
  46. {masster-0.5.16 → masster-0.5.18}/src/masster/lib/lib.py +0 -0
  47. {masster-0.5.16 → masster-0.5.18}/src/masster/logger.py +0 -0
  48. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/__init__.py +0 -0
  49. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/__init__.py +0 -0
  50. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  51. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/find_features_def.py +0 -0
  52. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/find_ms2_def.py +0 -0
  53. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  54. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/helpers.py +0 -0
  55. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/parameters.py +0 -0
  56. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/plot.py +0 -0
  57. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/quant.py +0 -0
  58. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/sample5_schema.json +0 -0
  59. {masster-0.5.16 → masster-0.5.18}/src/masster/sample/save.py +0 -0
  60. {masster-0.5.16 → masster-0.5.18}/src/masster/spectrum.py +0 -0
  61. {masster-0.5.16 → masster-0.5.18}/src/masster/study/__init__.py +0 -0
  62. {masster-0.5.16 → masster-0.5.18}/src/masster/study/analysis.py +0 -0
  63. {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/__init__.py +0 -0
  64. {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/align_def.py +0 -0
  65. {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/export_def.py +0 -0
  66. {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/fill_def.py +0 -0
  67. {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/find_consensus_def.py +0 -0
  68. {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/find_ms2_def.py +0 -0
  69. {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/identify_def.py +0 -0
  70. {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  71. {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/integrate_def.py +0 -0
  72. {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/merge_def.py +0 -0
  73. {masster-0.5.16 → masster-0.5.18}/src/masster/study/defaults/study_def.py +0 -0
  74. {masster-0.5.16 → masster-0.5.18}/src/masster/study/export.py +0 -0
  75. {masster-0.5.16 → masster-0.5.18}/src/masster/study/h5.py +0 -0
  76. {masster-0.5.16 → masster-0.5.18}/src/masster/study/helpers.py +0 -0
  77. {masster-0.5.16 → masster-0.5.18}/src/masster/study/importers.py +0 -0
  78. {masster-0.5.16 → masster-0.5.18}/src/masster/study/merge.py +0 -0
  79. {masster-0.5.16 → masster-0.5.18}/src/masster/study/parameters.py +0 -0
  80. {masster-0.5.16 → masster-0.5.18}/src/masster/study/plot.py +0 -0
  81. {masster-0.5.16 → masster-0.5.18}/src/masster/study/processing.py +0 -0
  82. {masster-0.5.16 → masster-0.5.18}/src/masster/study/save.py +0 -0
  83. {masster-0.5.16 → masster-0.5.18}/src/masster/study/study5_schema.json +0 -0
  84. {masster-0.5.16 → masster-0.5.18}/src/masster/wizard/README.md +0 -0
  85. {masster-0.5.16 → masster-0.5.18}/src/masster/wizard/__init__.py +0 -0
  86. {masster-0.5.16 → masster-0.5.18}/src/masster/wizard/example.py +0 -0
  87. {masster-0.5.16 → masster-0.5.18}/tests/conftest.py +0 -0
  88. {masster-0.5.16 → masster-0.5.18}/tests/test_chromatogram.py +0 -0
  89. {masster-0.5.16 → masster-0.5.18}/tests/test_defaults.py +0 -0
  90. {masster-0.5.16 → masster-0.5.18}/tests/test_imports.py +0 -0
  91. {masster-0.5.16 → masster-0.5.18}/tests/test_integration.py +0 -0
  92. {masster-0.5.16 → masster-0.5.18}/tests/test_logger.py +0 -0
  93. {masster-0.5.16 → masster-0.5.18}/tests/test_parameters.py +0 -0
  94. {masster-0.5.16 → masster-0.5.18}/tests/test_sample.py +0 -0
  95. {masster-0.5.16 → masster-0.5.18}/tests/test_spectrum.py +0 -0
  96. {masster-0.5.16 → masster-0.5.18}/tests/test_study.py +0 -0
  97. {masster-0.5.16 → masster-0.5.18}/tests/test_version.py +0 -0
  98. {masster-0.5.16 → masster-0.5.18}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.16
3
+ Version: 0.5.18
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -681,7 +681,6 @@ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
681
681
  Classifier: Topic :: Scientific/Engineering :: Chemistry
682
682
  Requires-Python: >=3.11
683
683
  Requires-Dist: alpharaw>=0.4.8
684
- Requires-Dist: altair>=5.5.0
685
684
  Requires-Dist: bokeh>=3.7.3
686
685
  Requires-Dist: cmap>=0.6.2
687
686
  Requires-Dist: datashader>=0.18.1
@@ -698,7 +697,6 @@ Requires-Dist: pandas>=2.2.0
698
697
  Requires-Dist: panel>=1.7.0
699
698
  Requires-Dist: polars>=1.0.0
700
699
  Requires-Dist: pyopenms>=3.3.0
701
- Requires-Dist: pyteomics>=4.7.0
702
700
  Requires-Dist: pythonnet>=3.0.0
703
701
  Requires-Dist: scikit-learn>=1.7.1
704
702
  Requires-Dist: scipy>=1.12.0
@@ -749,10 +747,11 @@ pip install masster
749
747
 
750
748
  ```python
751
749
  import masster
752
- masster.wizard.execute(
750
+ wiz = masster.wizard.create_scripts(
753
751
  source=r'..\..\folder_with_raw_data',
754
752
  folder=r'..\..folder_to_store_results'
755
753
  )
754
+ wiz.run()
756
755
  ```
757
756
 
758
757
  This will run a wizard that should perform all key steps and save the results to the `folder`.
@@ -19,10 +19,11 @@ pip install masster
19
19
 
20
20
  ```python
21
21
  import masster
22
- masster.wizard.execute(
22
+ wiz = masster.wizard.create_scripts(
23
23
  source=r'..\..\folder_with_raw_data',
24
24
  folder=r'..\..folder_to_store_results'
25
25
  )
26
+ wiz.run()
26
27
  ```
27
28
 
28
29
  This will run a wizard that should perform all key steps and save the results to the `folder`.
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.5.16"
4
+ version = "0.5.18"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -27,7 +27,6 @@ urls.repository = "https://github.com/zamboni-lab/masster"
27
27
  urls.documentation = "https://github.com/zamboni-lab/masster#readme"
28
28
  dependencies = [
29
29
  "alpharaw>=0.4.8",
30
- "altair>=5.5.0",
31
30
  "bokeh>=3.7.3",
32
31
  "datashader>=0.18.1",
33
32
  "holoviews>=1.21.0",
@@ -40,7 +39,6 @@ dependencies = [
40
39
  "panel>=1.7.0",
41
40
  "polars>=1.0.0",
42
41
  "pyopenms>=3.3.0",
43
- "pyteomics>=4.7.0",
44
42
  "pythonnet>=3.0.0",
45
43
  "scipy>=1.12.0",
46
44
  "tqdm>=4.65.0",
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.5.16"
4
+ __version__ = "0.5.18"
5
5
 
6
6
 
7
7
  def get_version():
@@ -403,6 +403,7 @@ def find_adducts(self, **kwargs):
403
403
 
404
404
  Main parameters (from ``find_adducts_defaults``):
405
405
  - adducts (list[str] | str | None): List of potential adduct strings or ionization mode ('pos'/'neg').
406
+ If None, automatically uses sample.polarity to select appropriate default adducts.
406
407
  - charge_min (int): Minimum allowed charge state (default: -4).
407
408
  - charge_max (int): Maximum allowed charge state (default: 4).
408
409
  - retention_max_diff (float): Maximum RT difference in seconds (default: 1.0).
@@ -431,6 +432,17 @@ def find_adducts(self, **kwargs):
431
432
  else:
432
433
  self.logger.warning(f"Unknown parameter {key} ignored")
433
434
 
435
+ # Auto-set adducts based on sample polarity if not explicitly provided
436
+ if params.adducts is None and hasattr(self, 'polarity') and self.polarity is not None:
437
+ if self.polarity.lower() in ['positive', 'pos']:
438
+ params.set('adducts', 'positive', validate=True)
439
+ self.logger.debug(f"Auto-set adducts to 'positive' based on sample polarity: {self.polarity}")
440
+ elif self.polarity.lower() in ['negative', 'neg']:
441
+ params.set('adducts', 'negative', validate=True)
442
+ self.logger.debug(f"Auto-set adducts to 'negative' based on sample polarity: {self.polarity}")
443
+ else:
444
+ self.logger.debug(f"Unknown sample polarity '{self.polarity}', using default adducts")
445
+
434
446
  # Check if features_df exists and has data
435
447
  if not hasattr(self, "features_df") or len(self.features_df) == 0:
436
448
  self.logger.warning(
@@ -32,30 +32,42 @@ class sample_defaults:
32
32
  centroid_prominence (int): Prominence parameter for centroiding. Default is -1.
33
33
  max_points_per_spectrum (int): Maximum number of points per spectrum. Default is 50000.
34
34
  dia_window (Optional[float]): DIA window size. Default is None.
35
+ type (str): Acquisition type/mode. Options are 'dda', 'swath', 'ztscan', 'fia'. Default is 'dda'.
36
+ polarity (Optional[str]): Ionization polarity. Options are None, 'positive', 'negative'. Default is None.
35
37
  """
36
38
 
37
39
  filename: Optional[str] = None
38
- ondisk: bool = False
39
40
  label: str | None = None
40
41
  log_level: str = "INFO"
41
42
  log_label: Optional[str] = ""
42
43
  log_sink: str = "sys.stdout"
43
- chrom_fwhm: float = 1.0
44
+ ondisk: bool = False
45
+
46
+ # file and data handling settings
47
+ type: str = "dda"
48
+ polarity: str | None = None
49
+
50
+ # chromatographic settings
51
+ #chrom_fwhm: float = 1.0
52
+ eic_mz_tol: float = 0.01
53
+ eic_rt_tol: float = 10.0
54
+
55
+ # mz tolerances
44
56
  mz_tol_ms1_da: float = 0.002
45
57
  mz_tol_ms2_da: float = 0.005
46
58
  mz_tol_ms1_ppm: float = 5.0
47
59
  mz_tol_ms2_ppm: float = 10.0
60
+
61
+ # centroiding settings
48
62
  centroid_algo: str = "lmp"
49
63
  centroid_min_points_ms1: int = 5
50
64
  centroid_min_points_ms2: int = 4
51
65
  centroid_smooth: int = 5
52
66
  centroid_refine: bool = True
53
67
  centroid_prominence: int = -1
68
+
69
+ # data retrieval settings
54
70
  max_points_per_spectrum: int = 50000
55
- dia_window: float | None = None
56
-
57
- eic_mz_tol: float = 0.01
58
- eic_rt_tol: float = 10.0
59
71
 
60
72
  _param_metadata: dict[str, dict[str, Any]] = field(
61
73
  default_factory=lambda: {
@@ -178,6 +190,18 @@ class sample_defaults:
178
190
  "min_value": 0.2,
179
191
  "max_value": 60.0,
180
192
  },
193
+ "type": {
194
+ "dtype": str,
195
+ "description": "Acquisition type/mode",
196
+ "default": "dda",
197
+ "allowed_values": ["dda", "swath", "ztscan", "fia"],
198
+ },
199
+ "polarity": {
200
+ "dtype": "Optional[str]",
201
+ "description": "Ionization polarity",
202
+ "default": None,
203
+ "allowed_values": ["positive", "negative"],
204
+ },
181
205
  },
182
206
  repr=False,
183
207
  )
@@ -94,7 +94,7 @@ def _save_sample5(
94
94
  metadata_group.attrs["file_source"] = str(self.file_source)
95
95
  else:
96
96
  metadata_group.attrs["file_source"] = ""
97
- if self.file_type is not None:
97
+ if hasattr(self, 'file_type') and self.file_type is not None:
98
98
  metadata_group.attrs["file_type"] = str(self.file_type)
99
99
  else:
100
100
  metadata_group.attrs["file_type"] = ""
@@ -287,11 +287,41 @@ def _save_sample5(
287
287
  compression="gzip",
288
288
  )
289
289
 
290
- # Store parameters as JSON
291
- if self.parameters is not None:
292
- # Convert parameters dict to JSON string
293
- params_json = json.dumps(self.parameters, default=str)
294
- metadata_group.attrs["parameters"] = params_json
290
+ # Store parameters/history as JSON
291
+ # Always ensure we sync instance attributes to parameters before saving
292
+ if hasattr(self, 'parameters') and self.parameters is not None:
293
+ if hasattr(self, 'polarity') and self.polarity is not None:
294
+ self.parameters.polarity = self.polarity
295
+ if hasattr(self, 'type') and self.type is not None:
296
+ self.parameters.type = self.type
297
+
298
+ # Prepare save data
299
+ save_data = {}
300
+
301
+ # Add parameters as a dictionary
302
+ if hasattr(self, 'parameters') and self.parameters is not None:
303
+ save_data["sample"] = self.parameters.to_dict()
304
+
305
+ # Add history data (but ensure it's JSON serializable)
306
+ if hasattr(self, 'history') and self.history is not None:
307
+ # Convert any non-JSON-serializable objects to strings/dicts
308
+ serializable_history = {}
309
+ for key, value in self.history.items():
310
+ if key == "sample":
311
+ # Use our properly serialized parameters
312
+ continue # Skip, we'll add it from parameters above
313
+ try:
314
+ # Test if value is JSON serializable
315
+ json.dumps(value)
316
+ serializable_history[key] = value
317
+ except (TypeError, ValueError):
318
+ # Convert to string if not serializable
319
+ serializable_history[key] = str(value)
320
+ save_data.update(serializable_history)
321
+
322
+ # Save as JSON
323
+ params_json = json.dumps(save_data, indent=2)
324
+ metadata_group.attrs["parameters"] = params_json
295
325
 
296
326
  # Store lib and lib_match - removed (no longer saving lib data)
297
327
 
@@ -1072,6 +1102,14 @@ def _load_sample5(self, filename: str, map: bool = False):
1072
1102
  # set self.label to basename without extension
1073
1103
  if self.label is None or self.label == "":
1074
1104
  self.label = os.path.splitext(os.path.basename(filename))[0]
1105
+
1106
+ # Sync instance attributes from loaded parameters
1107
+ if hasattr(self, 'parameters') and self.parameters is not None:
1108
+ if hasattr(self.parameters, 'polarity') and self.parameters.polarity is not None:
1109
+ self.polarity = self.parameters.polarity
1110
+ if hasattr(self.parameters, 'type') and self.parameters.type is not None:
1111
+ self.type = self.parameters.type
1112
+
1075
1113
  self.logger.info(f"Sample loaded from {filename}")
1076
1114
 
1077
1115
 
@@ -1810,6 +1848,14 @@ def _load_sample5_study(self, filename: str, map: bool = False):
1810
1848
  # set self.label to basename without extension
1811
1849
  if self.label is None or self.label == "":
1812
1850
  self.label = os.path.splitext(os.path.basename(filename))[0]
1851
+
1852
+ # Sync instance attributes from loaded parameters
1853
+ if hasattr(self, 'parameters') and self.parameters is not None:
1854
+ if hasattr(self.parameters, 'polarity') and self.parameters.polarity is not None:
1855
+ self.polarity = self.parameters.polarity
1856
+ if hasattr(self.parameters, 'type') and self.parameters.type is not None:
1857
+ self.type = self.parameters.type
1858
+
1813
1859
  self.logger.info(
1814
1860
  f"Sample loaded successfully from {filename} (optimized for study)",
1815
1861
  )
@@ -71,7 +71,7 @@ def load_lib(self, *args, **kwargs):
71
71
  lib_load(self, *args, **kwargs)
72
72
 
73
73
 
74
- def lib_load(self, csvfile=None, polarity="positive"):
74
+ def lib_load(self, csvfile=None, polarity=None):
75
75
  delta_m = {
76
76
  "[M+H]+": 1.007276,
77
77
  "[M+Na]+": 22.989218,
@@ -97,10 +97,11 @@ def lib_load(self, csvfile=None, polarity="positive"):
97
97
  """
98
98
  Load target compounds from a CSV file.
99
99
  This method reads a CSV file containing target compounds and their properties, such as m/z, retention time (RT),
100
- and adducts. It filters the targets based on the specified adducts and returns a DataFrame of the targets.
100
+ and adducts. It filters the targets based on the specified polarity and returns a DataFrame of the targets.
101
101
  Parameters:
102
102
  csvfile (str): The path to the CSV file containing target compounds.
103
- adducts (list, optional): A list of adducts to filter the targets. Default is ['[M+H]+', '[M+Na]+', '[M+K]+'].
103
+ polarity (str, optional): Ion polarity to filter adducts ('positive' or 'negative').
104
+ If None, uses the sample's polarity property. Default is None.
104
105
  Returns:
105
106
  pd.DataFrame: A DataFrame containing the filtered target compounds with columns 'mz', 'rt', 'adduct'.
106
107
  """
@@ -220,6 +221,11 @@ def lib_load(self, csvfile=None, polarity="positive"):
220
221
  self.lib = self.lib.where(pd.notnull(self.lib), None)
221
222
  # find all elements == nan and replace them with None
222
223
  self.lib = self.lib.replace({np.nan: None})
224
+
225
+ # Use sample.polarity if polarity parameter is None
226
+ if polarity is None:
227
+ polarity = getattr(self, 'polarity', 'positive')
228
+
223
229
  if polarity is not None:
224
230
  if polarity.lower() == "positive":
225
231
  self.lib = self.lib[self.lib["z"] > 0]
@@ -73,7 +73,7 @@ def load(
73
73
  filename (str): The path to the file to load. The file must exist and have one of the following extensions:
74
74
  .mzML, .wiff, or .raw.
75
75
  ondisk (bool, optional): Indicates whether the file should be treated as on disk. Defaults to False.
76
- type (str, optional): Specifies the type of file. If provided and set to 'ztscan' (case-insensitive), the file_type
76
+ type (str, optional): Specifies the type of file. If provided and set to 'ztscan' (case-insensitive), the type
77
77
  attribute will be adjusted accordingly. Defaults to None.
78
78
  label (Any, optional): An optional label to associate with the loaded file. Defaults to None.
79
79
  Raises:
@@ -84,7 +84,7 @@ def load(
84
84
  - ".mzml": Calls _load_mzML(filename)
85
85
  - ".wiff": Calls _load_wiff(filename)
86
86
  - ".raw": Calls _load_raw(filename)
87
- After loading, the file_type attribute is set to 'dda', unless the optional 'type' parameter is provided as 'ztscan',
87
+ After loading, the type attribute is set to 'dda', unless the optional 'type' parameter is provided as 'ztscan',
88
88
  in which case it is updated to 'ztscan'. The label attribute is updated if a label is provided.
89
89
  """
90
90
 
@@ -109,9 +109,9 @@ def load(
109
109
  else:
110
110
  raise ValueError("File must be .mzML, .wiff, *.raw, or .sample5")
111
111
 
112
- self.file_type = "dda"
112
+ self.type = "dda"
113
113
  if type is not None and type.lower() in ["ztscan"]:
114
- self.file_type = "ztscan"
114
+ self.type = "ztscan"
115
115
 
116
116
  if label is not None:
117
117
  self.label = label
@@ -167,9 +167,9 @@ def load_noms1(
167
167
  else:
168
168
  raise ValueError("File must be .mzML, .wiff, *.raw, or .sample5")
169
169
 
170
- self.file_type = "dda"
170
+ self.type = "dda"
171
171
  if type is not None and type.lower() in ["ztscan"]:
172
- self.file_type = "ztscan"
172
+ self.type = "ztscan"
173
173
 
174
174
  if label is not None:
175
175
  self.label = label
@@ -255,6 +255,7 @@ def _load_mzML(
255
255
  )
256
256
 
257
257
  tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
258
+ polarity = None
258
259
  # iterate over all spectra
259
260
  for i, s in tqdm(
260
261
  enumerate(omsexp.getSpectra()), # type: ignore[union-attr]
@@ -262,23 +263,36 @@ def _load_mzML(
262
263
  desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Scans",
263
264
  disable=tdqm_disable,
264
265
  ):
266
+ # try to get polarity
267
+ if polarity is None:
268
+ try:
269
+ pol = s.getInstrumentSettings().getPolarity()
270
+ if pol == 1:
271
+ polarity = "positive"
272
+ elif pol == 2:
273
+ polarity = "negative"
274
+ except Exception:
275
+ pass
265
276
  # create a dict
266
277
  if s.getMSLevel() == 1:
267
278
  cycle += 1
268
279
  prec_mz = None
269
280
  precursorIsolationWindowLowerMZ = None
270
281
  precursorIsolationWindowUpperMZ = None
271
- prec_intyensity = None
282
+ prec_inty = None
272
283
  energy = None
273
284
  else:
274
- prec_mz = s.getPrecursors()[0].getMZ()
285
+ prec_mz = s.getPrecursors()
286
+ if len(prec_mz) == 0:
287
+ continue
288
+ prec_mz = prec_mz[0].getMZ()
275
289
  precursorIsolationWindowLowerMZ = s.getPrecursors()[
276
290
  0
277
291
  ].getIsolationWindowLowerOffset()
278
292
  precursorIsolationWindowUpperMZ = s.getPrecursors()[
279
293
  0
280
294
  ].getIsolationWindowUpperOffset()
281
- prec_intyensity = s.getPrecursors()[0].getIntensity()
295
+ prec_inty = s.getPrecursors()[0].getIntensity()
282
296
  # Try to get collision energy from meta values first, fallback to getActivationEnergy()
283
297
  try:
284
298
  energy = s.getPrecursors()[0].getMetaValue("collision energy")
@@ -321,7 +335,7 @@ def _load_mzML(
321
335
  "prec_mz": prec_mz,
322
336
  "prec_mz_min": precursorIsolationWindowLowerMZ,
323
337
  "prec_mz_max": precursorIsolationWindowUpperMZ,
324
- "prec_inty": prec_intyensity,
338
+ "prec_inty": prec_inty,
325
339
  "energy": energy,
326
340
  "feature_uid": -1,
327
341
  }
@@ -367,10 +381,11 @@ def _load_mzML(
367
381
  },
368
382
  infer_schema_length=None,
369
383
  )
384
+ self.polarity = polarity
370
385
  self.file_interface = "oms"
371
386
  self.ms1_df = ms1_df
372
387
  self.label = os.path.basename(filename)
373
- if self.file_type != "ztscan":
388
+ if self.type != "ztscan":
374
389
  self.analyze_dda()
375
390
 
376
391
 
@@ -401,7 +416,8 @@ def _load_raw(
401
416
  - Updates instance attributes including self.file_path, self.file_obj, self.file_interface, and self.label.
402
417
  - Initiates further analysis by invoking analyze_dda().
403
418
  """
404
- from alpharaw.thermo import ThermoRawData
419
+ #from alpharaw.thermo import ThermoRawData
420
+ from masster.sample.thermo import ThermoRawData
405
421
 
406
422
  if not filename:
407
423
  raise ValueError("Filename must be provided.")
@@ -464,6 +480,13 @@ def _load_raw(
464
480
  prec_intyensity = None
465
481
  energy = s["nce"]
466
482
 
483
+ # try to get polarity
484
+ if self.polarity is None:
485
+ if s['polarity'] == 'positive':
486
+ self.polarity = 'positive'
487
+ elif s['polarity'] == 'negative':
488
+ self.polarity = 'negative'
489
+
467
490
  peak_start_idx = s["peak_start_idx"]
468
491
  peak_stop_idx = s["peak_stop_idx"]
469
492
  peaks = raw_data.peak_df.loc[peak_start_idx : peak_stop_idx - 1]
@@ -564,16 +587,10 @@ def _load_wiff(
564
587
  self,
565
588
  filename=None,
566
589
  ):
567
- try:
568
- # Use masster's own implementation first
569
- from masster.sample.sciex import SciexWiffData as MassterSciexWiffData
590
+ # Use masster's own implementation first
591
+ from masster.sample.sciex import SciexWiffData as MassterSciexWiffData
570
592
 
571
- SciexWiffDataClass = MassterSciexWiffData
572
- except ImportError:
573
- # Fallback to alpharaw if masster implementation fails
574
- from alpharaw.sciex import SciexWiffData as AlpharawSciexWiffData
575
-
576
- SciexWiffDataClass = AlpharawSciexWiffData
593
+ SciexWiffDataClass = MassterSciexWiffData
577
594
 
578
595
  if not filename:
579
596
  raise ValueError("Filename must be provided.")
@@ -610,7 +627,7 @@ def _load_wiff(
610
627
  "mz": pl.Float64,
611
628
  "inty": pl.Float64,
612
629
  }
613
-
630
+ polarity = None
614
631
  # iterate over rows of specs
615
632
  tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
616
633
  for i, s in tqdm(
@@ -620,6 +637,13 @@ def _load_wiff(
620
637
  disable=tdqm_disable,
621
638
  ):
622
639
  ms_level = s["ms_level"]
640
+ # try to get polarity
641
+ if polarity is None:
642
+ if s['polarity'] == 'positive':
643
+ polarity = 'positive'
644
+ elif s['polarity'] == 'negative':
645
+ polarity = 'negative'
646
+
623
647
  if ms_level == 1:
624
648
  cycle += 1
625
649
  prec_mz = None
@@ -723,7 +747,7 @@ def _load_wiff(
723
747
  self.file_interface = "alpharaw"
724
748
  self.label = os.path.basename(filename)
725
749
  self.ms1_df = pl.DataFrame(ms1_df_records, schema=schema)
726
- if self.file_type != "ztscan":
750
+ if self.type != "ztscan":
727
751
  self.analyze_dda()
728
752
 
729
753
 
@@ -750,103 +774,6 @@ def _load_featureXML(
750
774
  fm = oms.FeatureMap()
751
775
  fh.load(filename, fm)
752
776
  self._oms_features_map = fm
753
- """if self.features_df is None:
754
- df = self._oms_features_map.get_df(export_peptide_identifications=False)
755
- df = self._clean_features_df(df)
756
-
757
- # desotope features
758
- df = self._features_deisotope(df, mz_tol=0.02, rt_tol=0.5)
759
-
760
- # update eic
761
- df["chrom"] = None
762
- mz_tol = 0.01
763
- rt_tol = 10
764
- # iterate over all rows in df
765
- for i, row in df.iterrows():
766
- # select data in ms1_df with mz in range [mz_start - mz_tol, mz_end + mz_tol] and rt in range [rt_start - rt_tol, rt_end + rt_tol]
767
- d = self.ms1_df.filter(
768
- (pl.col("rt") >= row["rt_start"] - rt_tol)
769
- & (pl.col("rt") <= row["rt_end"] + rt_tol)
770
- & (pl.col("mz") >= row["mz"] - mz_tol)
771
- & (pl.col("mz") <= row["mz"] + mz_tol)
772
- )
773
- # for all unique rt values, find the maximum inty
774
- eic_rt = d.group_by("rt").agg(pl.col("inty").max())
775
- if len(eic_rt) < 4:
776
- continue
777
- eic = Chromatogram(
778
- eic_rt["rt"].to_numpy(),
779
- eic_rt["inty"].to_numpy(),
780
- label=f"EIC mz={row['mz']:.4f}",
781
- feature_start=row["rt_start"],
782
- feature_end=row["rt_end"],
783
- feature_apex=row["rt"],
784
- ).find_peaks()
785
- # set eic in df
786
- df.at[i, "chrom"] = eic
787
- if len(eic.peak_widths) > 0:
788
- df.at[i, "chrom_coherence"] = round(eic.feature_coherence, 3)
789
- df.at[i, "chrom_prominence"] = round(
790
- eic.peak_prominences[0], 3
791
- ) # eic.peak_prominences[0]
792
- df.at[i, "chrom_prominence_scaled"] = round(
793
- eic.peak_prominences[0] / (np.mean(eic.inty) + 1e-10), 3
794
- )
795
- df.at[i, "chrom_height_scaled"] = round(
796
- eic.peak_heights[0] / (np.mean(eic.inty) + 1e-10), 3
797
- )
798
-
799
- self.features_df = df"""
800
-
801
-
802
- '''
803
- def _load_mzpkl(
804
- self,
805
- filename="sample.mzpkl",
806
- ondisk=False,
807
- ):
808
- """
809
- Load the mzpkl data file, initialize the experiment attributes, and set up the file object.
810
- Parameters:
811
- filename (str, optional): The path to the .mzpkl file to be loaded. Defaults to "data.mzpkl".
812
- ondisk (bool, optional): A flag indicating whether the data should be loaded for on-disk usage.
813
- If True, self.ondisk is set to True and an OnDiscMSExperiment is used.
814
- Otherwise, an MSExperiment is used.
815
- Side Effects:
816
- - Decompresses and unpickles the specified file.
817
- - Sets attributes on self for each key in the loaded data dictionary, except for keys named 'format'.
818
- - Renames the attribute 'spectra_df' to 'scans_df' if present.
819
- - Initializes self.file_obj as either an OnDiscMSExperiment or MSExperiment based on the ondisk flag.
820
- - Checks for an associated featureXML file (with the same base name as the input file) and loads it if found.
821
- """
822
-
823
- if ondisk is True:
824
- self.ondisk = True
825
-
826
- with bz2.BZ2File(filename, "rb") as f:
827
- data = pickle.load(f)
828
-
829
- for k, v in data.items():
830
- if k in ["format"]:
831
- continue
832
- if k == "spectra_df":
833
- k = "scans_df"
834
- setattr(self, k, v)
835
-
836
- # Import and call internal sanitize function
837
- from masster.study.load import _sanitize
838
- _sanitize(self) if self.ondisk:
839
- self.file_obj = oms.OnDiscMSExperiment()
840
- else:
841
- self.file_obj = oms.MSExperiment()
842
-
843
- # check if *.featureXML exists
844
- featureXML = filename.replace(".mzpkl", ".featureXML")
845
- if os.path.exists(featureXML):
846
- self._load_featureXML(featureXML)
847
-
848
- '''
849
-
850
777
 
851
778
  def _wiff_to_dict(
852
779
  filename=None,
@@ -248,7 +248,7 @@ def get_spectrum(self, scan, **kwargs):
248
248
  spect = spect.keep_top(max_peaks)
249
249
 
250
250
  if dia_stats:
251
- if self.file_type in ["ztscan", "dia"]:
251
+ if self.type in ["ztscan", "dia", "swath"]:
252
252
  spect = self._get_ztscan_stats(
253
253
  spec=spect,
254
254
  scan_uid=scan_uid,
@@ -240,12 +240,13 @@ class Sample:
240
240
  # Initialize label from parameters
241
241
  self.label = params.label
242
242
 
243
+ self.type = params.type # dda, dia, ztscan
244
+ self.polarity = params.polarity # Initialize from parameters, may be overridden during raw file loading
245
+
243
246
  # this is the path to the original file. It's never sample5
244
247
  self.file_source = None
245
248
  # this is the path to the object that was loaded. It could be sample5
246
249
  self.file_path = None
247
- # Type of the file (e.g., mzML, RAW, WIFF, mzpkl)
248
- self.file_type = None
249
250
  # Interface to handle the file operations (e.g., oms, alpharaw)
250
251
  self.file_interface = None
251
252
  # The file object once loaded, can be oms.MzMLFile or alpharaw.AlphaRawFile
@@ -327,7 +328,6 @@ class Sample:
327
328
  _save_sample5 = _save_sample5
328
329
  _load_sample5 = _load_sample5
329
330
 
330
-
331
331
  # Removed internal-only load methods: _load_featureXML, _load_ms2data, _load_mzML, _load_raw, _load_wiff
332
332
  chrom_extract = chrom_extract
333
333
  _index_file = _index_file # Renamed from index_file to be internal-only
@@ -503,6 +503,8 @@ class Sample:
503
503
  str = f"File: {os.path.basename(self.file_path)}\n"
504
504
  str += f"Path: {os.path.dirname(self.file_path)}\n"
505
505
  str += f"Source: {self.file_source}\n"
506
+ str += f"Type: {self.type}\n"
507
+ str += f"Polarity: {self.polarity}\n"
506
508
  str += f"MS1 scans: {len(self.scans_df.filter(pl.col('ms_level') == 1))}\n"
507
509
  str += f"MS2 scans: {len(self.scans_df.filter(pl.col('ms_level') == 2))}\n"
508
510
  if self.features_df is not None: