PyPI - masster - Versions diffs - 0.5.18__tar.gz → 0.5.19__tar.gz - Mend

masster 0.5.18tar.gz → 0.5.19tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (97) hide show

{masster-0.5.18 → masster-0.5.19}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: masster
-Version: 0.5.18
+Version: 0.5.19
 Summary: Mass spectrometry data analysis package
 Project-URL: homepage, https://github.com/zamboni-lab/masster
 Project-URL: repository, https://github.com/zamboni-lab/masster

{masster-0.5.18 → masster-0.5.19}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "masster"
-version = "0.5.18"
+version = "0.5.19"
 description = "Mass spectrometry data analysis package"
 authors = [
     { name = "Zamboni Lab" }

{masster-0.5.18 → masster-0.5.19}/src/masster/_version.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
-__version__ = "0.5.18"
+__version__ = "0.5.19"
 def get_version():

{masster-0.5.18 → masster-0.5.19}/src/masster/sample/defaults/find_ms2_def.py RENAMED Viewed

@@ -42,7 +42,7 @@ class find_ms2_defaults:
         - get_description(param_name): Get parameter description
         - get_info(param_name): Get full parameter metadata
         - list_parameters(): Get list of all parameter names
-        - get_mz_tolerance(file_type): Get appropriate m/z tolerance based on file type
+        - get_mz_tolerance(type): Get appropriate m/z tolerance based on type
     """
     # Core MS2 linking parameters
@@ -270,16 +270,16 @@ class find_ms2_defaults:
         return len(invalid_params) == 0, invalid_params
-    def get_mz_tolerance(self, file_type=None):
+    def get_mz_tolerance(self, type=None):
         """
-        Get the appropriate m/z tolerance based on file type.
+        Get the appropriate m/z tolerance based on type.
         Args:
-            file_type (str, optional): File type ('ztscan', 'dia', or other)
+            type (str, optional): Acquisition type ('ztscan', 'dia', or other)
         Returns:
             float: Appropriate m/z tolerance value
         """
-        if file_type is not None and file_type.lower() in ["ztscan", "dia"]:
+        if type is not None and type.lower() in ["ztscan", "dia"]:
             return self.get("mz_tol_ztscan")
         return self.get("mz_tol")

{masster-0.5.18 → masster-0.5.19}/src/masster/sample/h5.py RENAMED Viewed

@@ -1,4 +1,4 @@
-import json
+import json
 import os
 import h5py
@@ -94,8 +94,8 @@ def _save_sample5(
             metadata_group.attrs["file_source"] = str(self.file_source)
         else:
             metadata_group.attrs["file_source"] = ""
-        if hasattr(self, 'file_type') and self.file_type is not None:
-            metadata_group.attrs["file_type"] = str(self.file_type)
+        if hasattr(self, 'type') and self.type is not None:
+            metadata_group.attrs["file_type"] = str(self.type)
         else:
             metadata_group.attrs["file_type"] = ""
         if self.label is not None:
@@ -393,7 +393,7 @@ def _load_sample5(self, filename: str, map: bool = False):
             else:
                 self.file_source = self.file_path
-            self.file_type = decode_metadata_attr(
+            self.type = decode_metadata_attr(
                 metadata_group.attrs.get("file_type", ""),
             )
             self.label = decode_metadata_attr(metadata_group.attrs.get("label", ""))
@@ -1160,7 +1160,7 @@ def _load_sample5_study(self, filename: str, map: bool = False):
             else:
                 self.file_source = self.file_path
-            self.file_type = decode_metadata_attr(
+            self.type = decode_metadata_attr(
                 metadata_group.attrs.get("file_type", ""),
             )
             self.label = decode_metadata_attr(metadata_group.attrs.get("label", ""))
@@ -2302,7 +2302,7 @@ def create_h5_metadata_group(
     f: h5py.File,
     file_path: Optional[str],
     file_source: Optional[str],
-    file_type: Optional[str],
+    type: Optional[str],
     label: Optional[str],
 ) -> None:
     """
@@ -2312,7 +2312,7 @@ def create_h5_metadata_group(
         f: The HDF5 file object
         file_path: Source file path
         file_source: Original source file path
-        file_type: Source file type
+        type: Source file type
         label: Sample label
     """
     metadata_group = f.create_group("metadata")
@@ -2321,5 +2321,5 @@ def create_h5_metadata_group(
     metadata_group.attrs["file_source"] = (
         str(file_source) if file_source is not None else ""
     )
-    metadata_group.attrs["file_type"] = str(file_type) if file_type is not None else ""
+    metadata_group.attrs["file_type"] = str(type) if type is not None else ""
     metadata_group.attrs["label"] = str(label) if label is not None else ""

{masster-0.5.18 → masster-0.5.19}/src/masster/sample/processing.py RENAMED Viewed

@@ -1028,7 +1028,7 @@ def find_ms2(self, **kwargs):
     - mz_tol (float):
         Precursor m/z tolerance used for matching. The effective tolerance may be
-        adjusted by file type (the defaults class provides ``get_mz_tolerance(file_type)``).
+        adjusted by type (the defaults class provides ``get_mz_tolerance(type)``).
         Default: 0.5 (ztscan/DIA defaults may be larger).
     - centroid (bool):
@@ -1077,7 +1077,7 @@ def find_ms2(self, **kwargs):
     # Extract parameter values
     features = params.get("features")
-    mz_tol = params.get_mz_tolerance(self.file_type)
+    mz_tol = params.get_mz_tolerance(self.type)
     centroid = params.get("centroid")
     deisotope = params.get("deisotope")
     dia_stats = params.get("dia_stats")

{masster-0.5.18 → masster-0.5.19}/src/masster/sample/sciex.py RENAMED Viewed

@@ -379,7 +379,7 @@ class SciexWiffData:
         self._raw_file_path = ""
         self.centroided = centroided
         self.creation_time = ""
-        self.file_type = "sciex"
+        self.type = "sciex"
         self.instrument = "sciex"
         if self.centroided:
@@ -616,7 +616,7 @@ if __name__ == "__main__":
             print(f"  - Number of spectra: {len(wiff_data.spectrum_df)}")
             print(f"  - Number of peaks: {len(wiff_data.peak_df)}")
             print(f"  - Creation time: {wiff_data.creation_time}")
-            print(f"  - File type: {wiff_data.file_type}")
+            print(f"  - File type: {wiff_data.type}")
             print(f"  - Instrument: {wiff_data.instrument}")
             # Test getting peaks from first spectrum

{masster-0.5.18 → masster-0.5.19}/src/masster/sample/thermo.py RENAMED Viewed

@@ -524,7 +524,7 @@ class ThermoRawData:
         # File and instrument information
         self._raw_file_path = ""
         self.creation_time = ""
-        self.file_type = "thermo"
+        self.type = "thermo"
         self.instrument = "thermo"
         # Processing parameters

{masster-0.5.18 → masster-0.5.19}/src/masster/wizard/README.md RENAMED Viewed

@@ -12,7 +12,7 @@ from masster import Wizard
 # Create wizard with minimal configuration
 wizard = Wizard(
     data_source="./raw_data",      # Directory with raw files
-    study_folder="./processed",    # Output directory
+    study_folder="./processed",    # Output directory
     polarity="positive",           # or "negative"
     num_cores=4                    # CPU cores to use
 )
@@ -35,22 +35,22 @@ params = wizard_def(
     study_folder="./processed_advanced",
     polarity="negative",
     num_cores=8,
     # File discovery
     file_extensions=[".wiff", ".raw", ".mzML"],
     search_subfolders=True,
     skip_patterns=["blank", "QC", "test"],
     # Processing parameters
     adducts=["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2"],
     chrom_fwhm=0.15,
     noise_threshold=5e4,
     # Study assembly
     rt_tolerance=1.0,
     mz_tolerance=0.008,
     min_samples_for_merge=30,
     # Output options
     export_formats=["csv", "xlsx", "mgf", "parquet"],
     generate_plots=True,
@@ -73,7 +73,7 @@ wizard.run_full_pipeline()
 ### 💾 Intelligent Resume
 - **Checkpoint System**: Automatically saves progress at key points
-- **File Tracking**: Remembers which files have been processed successfully
+- **File Tracking**: Remembers which files have been processed successfully
 - **Smart Recovery**: Resumes from last successful step after interruption
 - **Validation**: Verifies existing outputs before skipping
@@ -112,7 +112,7 @@ wizard.run_full_pipeline()
 ### 4. Feature Alignment
 - **RT Alignment**: Corrects retention time shifts between samples
-- **Mass Alignment**: Accounts for mass calibration differences
+- **Mass Alignment**: Accounts for mass calibration differences
 - **Algorithm Selection**: Supports KD-tree, QT-clustering, and chunked methods
 - **Validation**: Reports alignment statistics and quality metrics
@@ -232,7 +232,7 @@ Returns detailed status dictionary with current step, processed files, timing, a
 - Ensure sufficient disk space in output directory
 - Close any applications that might lock files
-**Processing Failures**
+**Processing Failures**
 - Check individual file integrity
 - Review `skip_patterns` to exclude problematic files
 - Examine detailed logs in `wizard.log` and `processing.log`
@@ -249,7 +249,7 @@ The Wizard includes built-in validation at each step:
 - **File Validation**: Checks file accessibility and format compatibility
 - **Processing Validation**: Verifies sample5 outputs can be loaded
-- **Study Validation**: Ensures study assembly completed successfully
+- **Study Validation**: Ensures study assembly completed successfully
 - **Alignment Validation**: Reports alignment statistics and warnings
 - **Export Validation**: Confirms all requested outputs were created
@@ -257,7 +257,7 @@ The Wizard includes built-in validation at each step:
 ### System Requirements
 - **Minimum**: 4 CPU cores, 8 GB RAM
-- **Recommended**: 8+ CPU cores, 16+ GB RAM
+- **Recommended**: 8+ CPU cores, 16+ GB RAM
 - **Large Studies**: 16+ CPU cores, 32+ GB RAM
 - **Storage**: SSD recommended, ~2-3x raw data size free space
@@ -265,7 +265,7 @@ The Wizard includes built-in validation at each step:
 **For Small Studies (< 50 samples)**
 - Use `num_cores = 4-6`
-- Set `batch_size = 4-8`
+- Set `batch_size = 4-8`
 - Use `merge_method = "kd"`
 - Enable all export formats
@@ -297,7 +297,7 @@ if not wizard.study_folder_path.glob("*.sample5"):
 # Continue with study-level processing
 wizard.assemble_study()
-wizard.align_and_merge()
+wizard.align_and_merge()
 wizard.export_results()
 ```
@@ -312,7 +312,7 @@ studies = [
 for study_config in studies:
     wizard = Wizard(**study_config, num_cores=8)
     success = wizard.run_full_pipeline()
     if success:
         print(f"✅ {study_config['output']} completed")
     else:
@@ -338,7 +338,7 @@ if hasattr(wizard.study, 'features_filter'):
     )
     wizard.study.features_filter(selection)
-# Continue with standard pipeline
+# Continue with standard pipeline
 wizard.align_and_merge()
 wizard.generate_plots()
 ```
@@ -370,4 +370,4 @@ The Wizard generates several types of output files:
 - `sample_name.mgf` - Individual sample MS2 spectra
 - `sample_name_2d.html` - Individual sample 2D plot
-The Wizard provides a complete, automated solution for mass spectrometry data processing while maintaining flexibility for custom workflows and providing robust error handling and recovery capabilities.
+The Wizard provides a complete, automated solution for mass spectrometry data processing while maintaining flexibility for custom workflows and providing robust error handling and recovery capabilities.

{masster-0.5.18 → masster-0.5.19}/src/masster/wizard/wizard.py RENAMED Viewed

@@ -66,7 +66,7 @@ class wizard_def:
         # Core Configuration
         source (str): Path to directory containing raw data files
         folder (str): Output directory for processed study
-        polarity (str): Ion polarity mode ("positive" or "negative")
+        polarity (Optional[str]): Ion polarity mode ("positive", "negative", or None for auto-detection)
         num_cores (int): Number of CPU cores to use for parallel processing
         # File Discovery
@@ -98,7 +98,7 @@ class wizard_def:
     # === Core Configuration ===
     source: str = ""
     folder: str = ""
-    polarity: str = "positive"
+    polarity: Optional[str] = None
     num_cores: int = 4
     # === File Discovery ===
@@ -198,12 +198,12 @@ class wizard_def:
         """Set polarity-specific defaults after initialization."""
         # Set default adducts based on polarity if not provided
         if not self.adducts:
-            if self.polarity.lower() in ["positive", "pos"]:
+            if self.polarity and self.polarity.lower() in ["positive", "pos"]:
                 self.adducts = ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"]
-            elif self.polarity.lower() in ["negative", "neg"]:
+            elif self.polarity and self.polarity.lower() in ["negative", "neg"]:
                 self.adducts = ["H-1:-:1.0", "CH2O2:0:0.5"]
             else:
-                # Default to positive
+                # Default to positive if polarity is None or unknown
                 self.adducts = ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"]
         # Validate num_cores
@@ -245,7 +245,7 @@ class Wizard:
         self,
         source: str = "",
         folder: str = "",
-        polarity: str = "positive",
+        polarity: Optional[str] = None,
         adducts: Optional[List[str]] = None,
         num_cores: int = 6,
         **kwargs
@@ -256,7 +256,7 @@ class Wizard:
         Parameters:
             source: Directory containing raw data files
             folder: Output directory for processed study
-            polarity: Ion polarity mode ("positive" or "negative")
+            polarity: Ion polarity mode ("positive", "negative", or None for auto-detection)
             adducts: List of adduct specifications (auto-set if None)
             num_cores: Number of CPU cores (0 = auto-detect 75% of available)
             **kwargs: Additional parameters (see wizard_def for full list)
@@ -298,8 +298,8 @@ class Wizard:
         self.folder_path = Path(self.params.folder)
         self.folder_path.mkdir(parents=True, exist_ok=True)
-        # Auto-infer polarity from the first file if not explicitly set by user
-        if polarity == "positive" and "polarity" not in kwargs:
+        # Auto-infer polarity from the first file if polarity is None
+        if self.params.polarity is None:
             inferred_polarity = self._infer_polarity_from_first_file()
             if inferred_polarity:
                 self.params.polarity = inferred_polarity
@@ -311,7 +311,7 @@ class Wizard:
         Infer polarity from the first available raw data file.
         Returns:
-            Inferred polarity string ("positive" or "negative") or None if detection fails
+            Inferred polarity string ("positive" or "negative") or "positive" as fallback
         """
         try:
             # Find first file
@@ -324,31 +324,85 @@ class Wizard:
             else:
                 return 'positive'
-            # Only implement for .wiff files initially (most common format)
+            # Handle different file formats
             if first_file.suffix.lower() == '.wiff':
-                from masster.sample.load import _wiff_to_dict
-                # Extract metadata from first file
-                metadata_df = _wiff_to_dict(str(first_file))
-                if not metadata_df.empty and 'polarity' in metadata_df.columns:
-                    # Get polarity from first experiment
-                    first_polarity = metadata_df['polarity'].iloc[0]
-                    # Convert numeric polarity codes to string
-                    if first_polarity == 1 or str(first_polarity).lower() in ['positive', 'pos', '+']:
-                        return "positive"
-                    elif first_polarity == -1 or str(first_polarity).lower() in ['negative', 'neg', '-']:
-                        return "negative"
+                return self._infer_polarity_from_wiff(str(first_file))
+            elif first_file.suffix.lower() == '.raw':
+                return self._infer_polarity_from_raw(str(first_file))
+            elif first_file.suffix.lower() == '.mzml':
+                return self._infer_polarity_from_mzml(str(first_file))
         except Exception:
             # Silently fall back to default if inference fails
             pass
         return 'positive'
+    def _infer_polarity_from_wiff(self, filename: str) -> str:
+        """Infer polarity from WIFF file."""
+        try:
+            from masster.sample.load import _wiff_to_dict
+            # Extract metadata from first file
+            metadata_df = _wiff_to_dict(filename)
+            if not metadata_df.empty and 'polarity' in metadata_df.columns:
+                # Get polarity from first experiment
+                first_polarity = metadata_df['polarity'].iloc[0]
+                # Convert numeric polarity codes to string
+                if first_polarity == 1 or str(first_polarity).lower() in ['positive', 'pos', '+']:
+                    return "positive"
+                elif first_polarity == -1 or str(first_polarity).lower() in ['negative', 'neg', '-']:
+                    return "negative"
+        except Exception:
+            pass
+        return 'positive'
+    def _infer_polarity_from_raw(self, filename: str) -> str:
+        """Infer polarity from Thermo RAW file."""
+        try:
+            from masster.sample.thermo import ThermoRawFileReader
+            with ThermoRawFileReader(filename) as raw_reader:
+                # Get polarity from first scan
+                first_scan = 1
+                polarity = raw_reader.get_polarity_from_scan_event(first_scan)
+                if polarity in ['positive', 'negative']:
+                    return polarity
+        except Exception:
+            pass
+        return 'positive'
+    def _infer_polarity_from_mzml(self, filename: str) -> str:
+        """Infer polarity from mzML file."""
+        try:
+            # Import pyopenms with warnings suppression
+            import warnings
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", message=".*OPENMS_DATA_PATH.*", category=UserWarning)
+                import pyopenms as oms
+            # Load the first few spectra to check polarity
+            omsexp = oms.MSExperiment()
+            oms.MzMLFile().load(filename, omsexp)
+            if omsexp.getNrSpectra() > 0:
+                first_spectrum = omsexp.getSpectra()[0]
+                try:
+                    pol = first_spectrum.getInstrumentSettings().getPolarity()
+                    if pol == 1:
+                        return "positive"
+                    elif pol == 2:
+                        return "negative"
+                except Exception:
+                    pass
+        except Exception:
+            pass
+        return 'positive'
     @property
-    def polarity(self) -> str:
+    def polarity(self) -> Optional[str]:
         """Get the ion polarity mode."""
         return self.params.polarity
@@ -1323,7 +1377,7 @@ class Wizard:
 def create_scripts(
     source: str = "",
     folder: str = "",
-    polarity: str = "positive",
+    polarity: Optional[str] = None,
     adducts: Optional[List[str]] = None,
     num_cores: int = 0,
     **kwargs
@@ -1337,7 +1391,7 @@ def create_scripts(
     Parameters:
         source: Directory containing raw data files
         folder: Output directory for processed study
-        polarity: Ion polarity mode ("positive" or "negative")
+        polarity: Ion polarity mode ("positive", "negative", or None for auto-detection)
         adducts: List of adduct specifications (auto-set if None)
         num_cores: Number of CPU cores (0 = auto-detect)
         **kwargs: Additional parameters

{masster-0.5.18 → masster-0.5.19}/uv.lock RENAMED Viewed

@@ -1368,7 +1368,7 @@ wheels = [
 [[package]]
 name = "masster"
-version = "0.5.18"
+version = "0.5.19"
 source = { editable = "." }
 dependencies = [
     { name = "alpharaw" },