masster 0.5.18__tar.gz → 0.5.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (97) hide show
  1. {masster-0.5.18 → masster-0.5.19}/PKG-INFO +1 -1
  2. {masster-0.5.18 → masster-0.5.19}/pyproject.toml +1 -1
  3. {masster-0.5.18 → masster-0.5.19}/src/masster/_version.py +1 -1
  4. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/defaults/find_ms2_def.py +5 -5
  5. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/h5.py +8 -8
  6. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/processing.py +2 -2
  7. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/sciex.py +2 -2
  8. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/thermo.py +1 -1
  9. {masster-0.5.18 → masster-0.5.19}/src/masster/wizard/README.md +15 -15
  10. {masster-0.5.18 → masster-0.5.19}/src/masster/wizard/wizard.py +82 -28
  11. {masster-0.5.18 → masster-0.5.19}/uv.lock +1 -1
  12. {masster-0.5.18 → masster-0.5.19}/.github/workflows/publish.yml +0 -0
  13. {masster-0.5.18 → masster-0.5.19}/.github/workflows/security.yml +0 -0
  14. {masster-0.5.18 → masster-0.5.19}/.github/workflows/test.yml +0 -0
  15. {masster-0.5.18 → masster-0.5.19}/.gitignore +0 -0
  16. {masster-0.5.18 → masster-0.5.19}/.pre-commit-config.yaml +0 -0
  17. {masster-0.5.18 → masster-0.5.19}/LICENSE +0 -0
  18. {masster-0.5.18 → masster-0.5.19}/Makefile +0 -0
  19. {masster-0.5.18 → masster-0.5.19}/README.md +0 -0
  20. {masster-0.5.18 → masster-0.5.19}/TESTING.md +0 -0
  21. {masster-0.5.18 → masster-0.5.19}/demo/example_batch_process.py +0 -0
  22. {masster-0.5.18 → masster-0.5.19}/demo/example_sample_process.py +0 -0
  23. {masster-0.5.18 → masster-0.5.19}/src/masster/__init__.py +0 -0
  24. {masster-0.5.18 → masster-0.5.19}/src/masster/chromatogram.py +0 -0
  25. {masster-0.5.18 → masster-0.5.19}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
  26. {masster-0.5.18 → masster-0.5.19}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
  27. {masster-0.5.18 → masster-0.5.19}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
  28. {masster-0.5.18 → masster-0.5.19}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
  29. {masster-0.5.18 → masster-0.5.19}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
  30. {masster-0.5.18 → masster-0.5.19}/src/masster/data/libs/aa.csv +0 -0
  31. {masster-0.5.18 → masster-0.5.19}/src/masster/data/libs/ccm.csv +0 -0
  32. {masster-0.5.18 → masster-0.5.19}/src/masster/data/libs/hilic.csv +0 -0
  33. {masster-0.5.18 → masster-0.5.19}/src/masster/data/libs/urine.csv +0 -0
  34. {masster-0.5.18 → masster-0.5.19}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  35. {masster-0.5.18 → masster-0.5.19}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  36. {masster-0.5.18 → masster-0.5.19}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  37. {masster-0.5.18 → masster-0.5.19}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  38. {masster-0.5.18 → masster-0.5.19}/src/masster/lib/__init__.py +0 -0
  39. {masster-0.5.18 → masster-0.5.19}/src/masster/lib/lib.py +0 -0
  40. {masster-0.5.18 → masster-0.5.19}/src/masster/logger.py +0 -0
  41. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/__init__.py +0 -0
  42. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/adducts.py +0 -0
  43. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/defaults/__init__.py +0 -0
  44. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  45. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/defaults/find_features_def.py +0 -0
  46. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  47. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/defaults/sample_def.py +0 -0
  48. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/helpers.py +0 -0
  49. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/lib.py +0 -0
  50. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/load.py +0 -0
  51. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/parameters.py +0 -0
  52. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/plot.py +0 -0
  53. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/quant.py +0 -0
  54. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/sample.py +0 -0
  55. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/sample5_schema.json +0 -0
  56. {masster-0.5.18 → masster-0.5.19}/src/masster/sample/save.py +0 -0
  57. {masster-0.5.18 → masster-0.5.19}/src/masster/spectrum.py +0 -0
  58. {masster-0.5.18 → masster-0.5.19}/src/masster/study/__init__.py +0 -0
  59. {masster-0.5.18 → masster-0.5.19}/src/masster/study/analysis.py +0 -0
  60. {masster-0.5.18 → masster-0.5.19}/src/masster/study/defaults/__init__.py +0 -0
  61. {masster-0.5.18 → masster-0.5.19}/src/masster/study/defaults/align_def.py +0 -0
  62. {masster-0.5.18 → masster-0.5.19}/src/masster/study/defaults/export_def.py +0 -0
  63. {masster-0.5.18 → masster-0.5.19}/src/masster/study/defaults/fill_def.py +0 -0
  64. {masster-0.5.18 → masster-0.5.19}/src/masster/study/defaults/find_consensus_def.py +0 -0
  65. {masster-0.5.18 → masster-0.5.19}/src/masster/study/defaults/find_ms2_def.py +0 -0
  66. {masster-0.5.18 → masster-0.5.19}/src/masster/study/defaults/identify_def.py +0 -0
  67. {masster-0.5.18 → masster-0.5.19}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  68. {masster-0.5.18 → masster-0.5.19}/src/masster/study/defaults/integrate_def.py +0 -0
  69. {masster-0.5.18 → masster-0.5.19}/src/masster/study/defaults/merge_def.py +0 -0
  70. {masster-0.5.18 → masster-0.5.19}/src/masster/study/defaults/study_def.py +0 -0
  71. {masster-0.5.18 → masster-0.5.19}/src/masster/study/export.py +0 -0
  72. {masster-0.5.18 → masster-0.5.19}/src/masster/study/h5.py +0 -0
  73. {masster-0.5.18 → masster-0.5.19}/src/masster/study/helpers.py +0 -0
  74. {masster-0.5.18 → masster-0.5.19}/src/masster/study/id.py +0 -0
  75. {masster-0.5.18 → masster-0.5.19}/src/masster/study/importers.py +0 -0
  76. {masster-0.5.18 → masster-0.5.19}/src/masster/study/load.py +0 -0
  77. {masster-0.5.18 → masster-0.5.19}/src/masster/study/merge.py +0 -0
  78. {masster-0.5.18 → masster-0.5.19}/src/masster/study/parameters.py +0 -0
  79. {masster-0.5.18 → masster-0.5.19}/src/masster/study/plot.py +0 -0
  80. {masster-0.5.18 → masster-0.5.19}/src/masster/study/processing.py +0 -0
  81. {masster-0.5.18 → masster-0.5.19}/src/masster/study/save.py +0 -0
  82. {masster-0.5.18 → masster-0.5.19}/src/masster/study/study.py +0 -0
  83. {masster-0.5.18 → masster-0.5.19}/src/masster/study/study5_schema.json +0 -0
  84. {masster-0.5.18 → masster-0.5.19}/src/masster/wizard/__init__.py +0 -0
  85. {masster-0.5.18 → masster-0.5.19}/src/masster/wizard/example.py +0 -0
  86. {masster-0.5.18 → masster-0.5.19}/tests/conftest.py +0 -0
  87. {masster-0.5.18 → masster-0.5.19}/tests/test_chromatogram.py +0 -0
  88. {masster-0.5.18 → masster-0.5.19}/tests/test_defaults.py +0 -0
  89. {masster-0.5.18 → masster-0.5.19}/tests/test_imports.py +0 -0
  90. {masster-0.5.18 → masster-0.5.19}/tests/test_integration.py +0 -0
  91. {masster-0.5.18 → masster-0.5.19}/tests/test_logger.py +0 -0
  92. {masster-0.5.18 → masster-0.5.19}/tests/test_parameters.py +0 -0
  93. {masster-0.5.18 → masster-0.5.19}/tests/test_sample.py +0 -0
  94. {masster-0.5.18 → masster-0.5.19}/tests/test_spectrum.py +0 -0
  95. {masster-0.5.18 → masster-0.5.19}/tests/test_study.py +0 -0
  96. {masster-0.5.18 → masster-0.5.19}/tests/test_version.py +0 -0
  97. {masster-0.5.18 → masster-0.5.19}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.18
3
+ Version: 0.5.19
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.5.18"
4
+ version = "0.5.19"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.5.18"
4
+ __version__ = "0.5.19"
5
5
 
6
6
 
7
7
  def get_version():
@@ -42,7 +42,7 @@ class find_ms2_defaults:
42
42
  - get_description(param_name): Get parameter description
43
43
  - get_info(param_name): Get full parameter metadata
44
44
  - list_parameters(): Get list of all parameter names
45
- - get_mz_tolerance(file_type): Get appropriate m/z tolerance based on file type
45
+ - get_mz_tolerance(type): Get appropriate m/z tolerance based on type
46
46
  """
47
47
 
48
48
  # Core MS2 linking parameters
@@ -270,16 +270,16 @@ class find_ms2_defaults:
270
270
 
271
271
  return len(invalid_params) == 0, invalid_params
272
272
 
273
- def get_mz_tolerance(self, file_type=None):
273
+ def get_mz_tolerance(self, type=None):
274
274
  """
275
- Get the appropriate m/z tolerance based on file type.
275
+ Get the appropriate m/z tolerance based on type.
276
276
 
277
277
  Args:
278
- file_type (str, optional): File type ('ztscan', 'dia', or other)
278
+ type (str, optional): Acquisition type ('ztscan', 'dia', or other)
279
279
 
280
280
  Returns:
281
281
  float: Appropriate m/z tolerance value
282
282
  """
283
- if file_type is not None and file_type.lower() in ["ztscan", "dia"]:
283
+ if type is not None and type.lower() in ["ztscan", "dia"]:
284
284
  return self.get("mz_tol_ztscan")
285
285
  return self.get("mz_tol")
@@ -1,4 +1,4 @@
1
- import json
1
+ import json
2
2
  import os
3
3
 
4
4
  import h5py
@@ -94,8 +94,8 @@ def _save_sample5(
94
94
  metadata_group.attrs["file_source"] = str(self.file_source)
95
95
  else:
96
96
  metadata_group.attrs["file_source"] = ""
97
- if hasattr(self, 'file_type') and self.file_type is not None:
98
- metadata_group.attrs["file_type"] = str(self.file_type)
97
+ if hasattr(self, 'type') and self.type is not None:
98
+ metadata_group.attrs["file_type"] = str(self.type)
99
99
  else:
100
100
  metadata_group.attrs["file_type"] = ""
101
101
  if self.label is not None:
@@ -393,7 +393,7 @@ def _load_sample5(self, filename: str, map: bool = False):
393
393
  else:
394
394
  self.file_source = self.file_path
395
395
 
396
- self.file_type = decode_metadata_attr(
396
+ self.type = decode_metadata_attr(
397
397
  metadata_group.attrs.get("file_type", ""),
398
398
  )
399
399
  self.label = decode_metadata_attr(metadata_group.attrs.get("label", ""))
@@ -1160,7 +1160,7 @@ def _load_sample5_study(self, filename: str, map: bool = False):
1160
1160
  else:
1161
1161
  self.file_source = self.file_path
1162
1162
 
1163
- self.file_type = decode_metadata_attr(
1163
+ self.type = decode_metadata_attr(
1164
1164
  metadata_group.attrs.get("file_type", ""),
1165
1165
  )
1166
1166
  self.label = decode_metadata_attr(metadata_group.attrs.get("label", ""))
@@ -2302,7 +2302,7 @@ def create_h5_metadata_group(
2302
2302
  f: h5py.File,
2303
2303
  file_path: Optional[str],
2304
2304
  file_source: Optional[str],
2305
- file_type: Optional[str],
2305
+ type: Optional[str],
2306
2306
  label: Optional[str],
2307
2307
  ) -> None:
2308
2308
  """
@@ -2312,7 +2312,7 @@ def create_h5_metadata_group(
2312
2312
  f: The HDF5 file object
2313
2313
  file_path: Source file path
2314
2314
  file_source: Original source file path
2315
- file_type: Source file type
2315
+ type: Source file type
2316
2316
  label: Sample label
2317
2317
  """
2318
2318
  metadata_group = f.create_group("metadata")
@@ -2321,5 +2321,5 @@ def create_h5_metadata_group(
2321
2321
  metadata_group.attrs["file_source"] = (
2322
2322
  str(file_source) if file_source is not None else ""
2323
2323
  )
2324
- metadata_group.attrs["file_type"] = str(file_type) if file_type is not None else ""
2324
+ metadata_group.attrs["file_type"] = str(type) if type is not None else ""
2325
2325
  metadata_group.attrs["label"] = str(label) if label is not None else ""
@@ -1028,7 +1028,7 @@ def find_ms2(self, **kwargs):
1028
1028
 
1029
1029
  - mz_tol (float):
1030
1030
  Precursor m/z tolerance used for matching. The effective tolerance may be
1031
- adjusted by file type (the defaults class provides ``get_mz_tolerance(file_type)``).
1031
+ adjusted by type (the defaults class provides ``get_mz_tolerance(type)``).
1032
1032
  Default: 0.5 (ztscan/DIA defaults may be larger).
1033
1033
 
1034
1034
  - centroid (bool):
@@ -1077,7 +1077,7 @@ def find_ms2(self, **kwargs):
1077
1077
 
1078
1078
  # Extract parameter values
1079
1079
  features = params.get("features")
1080
- mz_tol = params.get_mz_tolerance(self.file_type)
1080
+ mz_tol = params.get_mz_tolerance(self.type)
1081
1081
  centroid = params.get("centroid")
1082
1082
  deisotope = params.get("deisotope")
1083
1083
  dia_stats = params.get("dia_stats")
@@ -379,7 +379,7 @@ class SciexWiffData:
379
379
  self._raw_file_path = ""
380
380
  self.centroided = centroided
381
381
  self.creation_time = ""
382
- self.file_type = "sciex"
382
+ self.type = "sciex"
383
383
  self.instrument = "sciex"
384
384
 
385
385
  if self.centroided:
@@ -616,7 +616,7 @@ if __name__ == "__main__":
616
616
  print(f" - Number of spectra: {len(wiff_data.spectrum_df)}")
617
617
  print(f" - Number of peaks: {len(wiff_data.peak_df)}")
618
618
  print(f" - Creation time: {wiff_data.creation_time}")
619
- print(f" - File type: {wiff_data.file_type}")
619
+ print(f" - File type: {wiff_data.type}")
620
620
  print(f" - Instrument: {wiff_data.instrument}")
621
621
 
622
622
  # Test getting peaks from first spectrum
@@ -524,7 +524,7 @@ class ThermoRawData:
524
524
  # File and instrument information
525
525
  self._raw_file_path = ""
526
526
  self.creation_time = ""
527
- self.file_type = "thermo"
527
+ self.type = "thermo"
528
528
  self.instrument = "thermo"
529
529
 
530
530
  # Processing parameters
@@ -12,7 +12,7 @@ from masster import Wizard
12
12
  # Create wizard with minimal configuration
13
13
  wizard = Wizard(
14
14
  data_source="./raw_data", # Directory with raw files
15
- study_folder="./processed", # Output directory
15
+ study_folder="./processed", # Output directory
16
16
  polarity="positive", # or "negative"
17
17
  num_cores=4 # CPU cores to use
18
18
  )
@@ -35,22 +35,22 @@ params = wizard_def(
35
35
  study_folder="./processed_advanced",
36
36
  polarity="negative",
37
37
  num_cores=8,
38
-
38
+
39
39
  # File discovery
40
40
  file_extensions=[".wiff", ".raw", ".mzML"],
41
41
  search_subfolders=True,
42
42
  skip_patterns=["blank", "QC", "test"],
43
-
43
+
44
44
  # Processing parameters
45
45
  adducts=["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2"],
46
46
  chrom_fwhm=0.15,
47
47
  noise_threshold=5e4,
48
-
48
+
49
49
  # Study assembly
50
50
  rt_tolerance=1.0,
51
51
  mz_tolerance=0.008,
52
52
  min_samples_for_merge=30,
53
-
53
+
54
54
  # Output options
55
55
  export_formats=["csv", "xlsx", "mgf", "parquet"],
56
56
  generate_plots=True,
@@ -73,7 +73,7 @@ wizard.run_full_pipeline()
73
73
 
74
74
  ### 💾 Intelligent Resume
75
75
  - **Checkpoint System**: Automatically saves progress at key points
76
- - **File Tracking**: Remembers which files have been processed successfully
76
+ - **File Tracking**: Remembers which files have been processed successfully
77
77
  - **Smart Recovery**: Resumes from last successful step after interruption
78
78
  - **Validation**: Verifies existing outputs before skipping
79
79
 
@@ -112,7 +112,7 @@ wizard.run_full_pipeline()
112
112
 
113
113
  ### 4. Feature Alignment
114
114
  - **RT Alignment**: Corrects retention time shifts between samples
115
- - **Mass Alignment**: Accounts for mass calibration differences
115
+ - **Mass Alignment**: Accounts for mass calibration differences
116
116
  - **Algorithm Selection**: Supports KD-tree, QT-clustering, and chunked methods
117
117
  - **Validation**: Reports alignment statistics and quality metrics
118
118
 
@@ -232,7 +232,7 @@ Returns detailed status dictionary with current step, processed files, timing, a
232
232
  - Ensure sufficient disk space in output directory
233
233
  - Close any applications that might lock files
234
234
 
235
- **Processing Failures**
235
+ **Processing Failures**
236
236
  - Check individual file integrity
237
237
  - Review `skip_patterns` to exclude problematic files
238
238
  - Examine detailed logs in `wizard.log` and `processing.log`
@@ -249,7 +249,7 @@ The Wizard includes built-in validation at each step:
249
249
 
250
250
  - **File Validation**: Checks file accessibility and format compatibility
251
251
  - **Processing Validation**: Verifies sample5 outputs can be loaded
252
- - **Study Validation**: Ensures study assembly completed successfully
252
+ - **Study Validation**: Ensures study assembly completed successfully
253
253
  - **Alignment Validation**: Reports alignment statistics and warnings
254
254
  - **Export Validation**: Confirms all requested outputs were created
255
255
 
@@ -257,7 +257,7 @@ The Wizard includes built-in validation at each step:
257
257
 
258
258
  ### System Requirements
259
259
  - **Minimum**: 4 CPU cores, 8 GB RAM
260
- - **Recommended**: 8+ CPU cores, 16+ GB RAM
260
+ - **Recommended**: 8+ CPU cores, 16+ GB RAM
261
261
  - **Large Studies**: 16+ CPU cores, 32+ GB RAM
262
262
  - **Storage**: SSD recommended, ~2-3x raw data size free space
263
263
 
@@ -265,7 +265,7 @@ The Wizard includes built-in validation at each step:
265
265
 
266
266
  **For Small Studies (< 50 samples)**
267
267
  - Use `num_cores = 4-6`
268
- - Set `batch_size = 4-8`
268
+ - Set `batch_size = 4-8`
269
269
  - Use `merge_method = "kd"`
270
270
  - Enable all export formats
271
271
 
@@ -297,7 +297,7 @@ if not wizard.study_folder_path.glob("*.sample5"):
297
297
 
298
298
  # Continue with study-level processing
299
299
  wizard.assemble_study()
300
- wizard.align_and_merge()
300
+ wizard.align_and_merge()
301
301
  wizard.export_results()
302
302
  ```
303
303
 
@@ -312,7 +312,7 @@ studies = [
312
312
  for study_config in studies:
313
313
  wizard = Wizard(**study_config, num_cores=8)
314
314
  success = wizard.run_full_pipeline()
315
-
315
+
316
316
  if success:
317
317
  print(f"✅ {study_config['output']} completed")
318
318
  else:
@@ -338,7 +338,7 @@ if hasattr(wizard.study, 'features_filter'):
338
338
  )
339
339
  wizard.study.features_filter(selection)
340
340
 
341
- # Continue with standard pipeline
341
+ # Continue with standard pipeline
342
342
  wizard.align_and_merge()
343
343
  wizard.generate_plots()
344
344
  ```
@@ -370,4 +370,4 @@ The Wizard generates several types of output files:
370
370
  - `sample_name.mgf` - Individual sample MS2 spectra
371
371
  - `sample_name_2d.html` - Individual sample 2D plot
372
372
 
373
- The Wizard provides a complete, automated solution for mass spectrometry data processing while maintaining flexibility for custom workflows and providing robust error handling and recovery capabilities.
373
+ The Wizard provides a complete, automated solution for mass spectrometry data processing while maintaining flexibility for custom workflows and providing robust error handling and recovery capabilities.
@@ -66,7 +66,7 @@ class wizard_def:
66
66
  # Core Configuration
67
67
  source (str): Path to directory containing raw data files
68
68
  folder (str): Output directory for processed study
69
- polarity (str): Ion polarity mode ("positive" or "negative")
69
+ polarity (Optional[str]): Ion polarity mode ("positive", "negative", or None for auto-detection)
70
70
  num_cores (int): Number of CPU cores to use for parallel processing
71
71
 
72
72
  # File Discovery
@@ -98,7 +98,7 @@ class wizard_def:
98
98
  # === Core Configuration ===
99
99
  source: str = ""
100
100
  folder: str = ""
101
- polarity: str = "positive"
101
+ polarity: Optional[str] = None
102
102
  num_cores: int = 4
103
103
 
104
104
  # === File Discovery ===
@@ -198,12 +198,12 @@ class wizard_def:
198
198
  """Set polarity-specific defaults after initialization."""
199
199
  # Set default adducts based on polarity if not provided
200
200
  if not self.adducts:
201
- if self.polarity.lower() in ["positive", "pos"]:
201
+ if self.polarity and self.polarity.lower() in ["positive", "pos"]:
202
202
  self.adducts = ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"]
203
- elif self.polarity.lower() in ["negative", "neg"]:
203
+ elif self.polarity and self.polarity.lower() in ["negative", "neg"]:
204
204
  self.adducts = ["H-1:-:1.0", "CH2O2:0:0.5"]
205
205
  else:
206
- # Default to positive
206
+ # Default to positive if polarity is None or unknown
207
207
  self.adducts = ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"]
208
208
 
209
209
  # Validate num_cores
@@ -245,7 +245,7 @@ class Wizard:
245
245
  self,
246
246
  source: str = "",
247
247
  folder: str = "",
248
- polarity: str = "positive",
248
+ polarity: Optional[str] = None,
249
249
  adducts: Optional[List[str]] = None,
250
250
  num_cores: int = 6,
251
251
  **kwargs
@@ -256,7 +256,7 @@ class Wizard:
256
256
  Parameters:
257
257
  source: Directory containing raw data files
258
258
  folder: Output directory for processed study
259
- polarity: Ion polarity mode ("positive" or "negative")
259
+ polarity: Ion polarity mode ("positive", "negative", or None for auto-detection)
260
260
  adducts: List of adduct specifications (auto-set if None)
261
261
  num_cores: Number of CPU cores (0 = auto-detect 75% of available)
262
262
  **kwargs: Additional parameters (see wizard_def for full list)
@@ -298,8 +298,8 @@ class Wizard:
298
298
  self.folder_path = Path(self.params.folder)
299
299
  self.folder_path.mkdir(parents=True, exist_ok=True)
300
300
 
301
- # Auto-infer polarity from the first file if not explicitly set by user
302
- if polarity == "positive" and "polarity" not in kwargs:
301
+ # Auto-infer polarity from the first file if polarity is None
302
+ if self.params.polarity is None:
303
303
  inferred_polarity = self._infer_polarity_from_first_file()
304
304
  if inferred_polarity:
305
305
  self.params.polarity = inferred_polarity
@@ -311,7 +311,7 @@ class Wizard:
311
311
  Infer polarity from the first available raw data file.
312
312
 
313
313
  Returns:
314
- Inferred polarity string ("positive" or "negative") or None if detection fails
314
+ Inferred polarity string ("positive" or "negative") or "positive" as fallback
315
315
  """
316
316
  try:
317
317
  # Find first file
@@ -324,31 +324,85 @@ class Wizard:
324
324
  else:
325
325
  return 'positive'
326
326
 
327
- # Only implement for .wiff files initially (most common format)
327
+ # Handle different file formats
328
328
  if first_file.suffix.lower() == '.wiff':
329
- from masster.sample.load import _wiff_to_dict
330
-
331
- # Extract metadata from first file
332
- metadata_df = _wiff_to_dict(str(first_file))
333
-
334
- if not metadata_df.empty and 'polarity' in metadata_df.columns:
335
- # Get polarity from first experiment
336
- first_polarity = metadata_df['polarity'].iloc[0]
337
-
338
- # Convert numeric polarity codes to string
339
- if first_polarity == 1 or str(first_polarity).lower() in ['positive', 'pos', '+']:
340
- return "positive"
341
- elif first_polarity == -1 or str(first_polarity).lower() in ['negative', 'neg', '-']:
342
- return "negative"
329
+ return self._infer_polarity_from_wiff(str(first_file))
330
+ elif first_file.suffix.lower() == '.raw':
331
+ return self._infer_polarity_from_raw(str(first_file))
332
+ elif first_file.suffix.lower() == '.mzml':
333
+ return self._infer_polarity_from_mzml(str(first_file))
343
334
 
344
335
  except Exception:
345
336
  # Silently fall back to default if inference fails
346
337
  pass
347
338
 
348
339
  return 'positive'
340
+
341
+ def _infer_polarity_from_wiff(self, filename: str) -> str:
342
+ """Infer polarity from WIFF file."""
343
+ try:
344
+ from masster.sample.load import _wiff_to_dict
345
+
346
+ # Extract metadata from first file
347
+ metadata_df = _wiff_to_dict(filename)
348
+
349
+ if not metadata_df.empty and 'polarity' in metadata_df.columns:
350
+ # Get polarity from first experiment
351
+ first_polarity = metadata_df['polarity'].iloc[0]
352
+
353
+ # Convert numeric polarity codes to string
354
+ if first_polarity == 1 or str(first_polarity).lower() in ['positive', 'pos', '+']:
355
+ return "positive"
356
+ elif first_polarity == -1 or str(first_polarity).lower() in ['negative', 'neg', '-']:
357
+ return "negative"
358
+ except Exception:
359
+ pass
360
+ return 'positive'
361
+
362
+ def _infer_polarity_from_raw(self, filename: str) -> str:
363
+ """Infer polarity from Thermo RAW file."""
364
+ try:
365
+ from masster.sample.thermo import ThermoRawFileReader
366
+
367
+ with ThermoRawFileReader(filename) as raw_reader:
368
+ # Get polarity from first scan
369
+ first_scan = 1
370
+ polarity = raw_reader.get_polarity_from_scan_event(first_scan)
371
+ if polarity in ['positive', 'negative']:
372
+ return polarity
373
+ except Exception:
374
+ pass
375
+ return 'positive'
376
+
377
+ def _infer_polarity_from_mzml(self, filename: str) -> str:
378
+ """Infer polarity from mzML file."""
379
+ try:
380
+ # Import pyopenms with warnings suppression
381
+ import warnings
382
+ with warnings.catch_warnings():
383
+ warnings.filterwarnings("ignore", message=".*OPENMS_DATA_PATH.*", category=UserWarning)
384
+ import pyopenms as oms
385
+
386
+ # Load the first few spectra to check polarity
387
+ omsexp = oms.MSExperiment()
388
+ oms.MzMLFile().load(filename, omsexp)
389
+
390
+ if omsexp.getNrSpectra() > 0:
391
+ first_spectrum = omsexp.getSpectra()[0]
392
+ try:
393
+ pol = first_spectrum.getInstrumentSettings().getPolarity()
394
+ if pol == 1:
395
+ return "positive"
396
+ elif pol == 2:
397
+ return "negative"
398
+ except Exception:
399
+ pass
400
+ except Exception:
401
+ pass
402
+ return 'positive'
349
403
 
350
404
  @property
351
- def polarity(self) -> str:
405
+ def polarity(self) -> Optional[str]:
352
406
  """Get the ion polarity mode."""
353
407
  return self.params.polarity
354
408
 
@@ -1323,7 +1377,7 @@ class Wizard:
1323
1377
  def create_scripts(
1324
1378
  source: str = "",
1325
1379
  folder: str = "",
1326
- polarity: str = "positive",
1380
+ polarity: Optional[str] = None,
1327
1381
  adducts: Optional[List[str]] = None,
1328
1382
  num_cores: int = 0,
1329
1383
  **kwargs
@@ -1337,7 +1391,7 @@ def create_scripts(
1337
1391
  Parameters:
1338
1392
  source: Directory containing raw data files
1339
1393
  folder: Output directory for processed study
1340
- polarity: Ion polarity mode ("positive" or "negative")
1394
+ polarity: Ion polarity mode ("positive", "negative", or None for auto-detection)
1341
1395
  adducts: List of adduct specifications (auto-set if None)
1342
1396
  num_cores: Number of CPU cores (0 = auto-detect)
1343
1397
  **kwargs: Additional parameters
@@ -1368,7 +1368,7 @@ wheels = [
1368
1368
 
1369
1369
  [[package]]
1370
1370
  name = "masster"
1371
- version = "0.5.18"
1371
+ version = "0.5.19"
1372
1372
  source = { editable = "." }
1373
1373
  dependencies = [
1374
1374
  { name = "alpharaw" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes