PyPI - masster - Versions diffs - 0.4.18__py3-none-any.whl → 0.4.20__py3-none-any.whl - Mend

masster 0.4.18py3-none-any.whl → 0.4.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (18) hide show

masster/__init__.py +0 -1
masster/_version.py +1 -1
masster/logger.py +42 -0
masster/sample/load.py +6 -5
masster/sample/sample.py +0 -9
masster/study/defaults/merge_def.py +43 -2
masster/study/helpers.py +52 -11
masster/study/merge.py +1418 -105
masster/study/plot.py +11 -5
masster/study/study.py +18 -0
masster/wizard/__init__.py +5 -2
masster/wizard/wizard.py +1199 -27
{masster-0.4.18.dist-info → masster-0.4.20.dist-info}/METADATA +1 -1
{masster-0.4.18.dist-info → masster-0.4.20.dist-info}/RECORD +17 -18
masster/wizard.py +0 -1175
{masster-0.4.18.dist-info → masster-0.4.20.dist-info}/WHEEL +0 -0
{masster-0.4.18.dist-info → masster-0.4.20.dist-info}/entry_points.txt +0 -0
{masster-0.4.18.dist-info → masster-0.4.20.dist-info}/licenses/LICENSE +0 -0

masster/wizard/wizard.py CHANGED Viewed

@@ -44,7 +44,9 @@ wizard.export_results()
 from __future__ import annotations
 import os
+import sys
 import time
+import importlib
 import multiprocessing
 from pathlib import Path
 from typing import Optional, Any, Dict, List
@@ -124,17 +126,18 @@ class wizard_def:
     checkpoint_interval: int = 10  # Save progress every N files
     # === Study Assembly ===
-    min_samples_for_merge: int = 50
+    min_samples_for_merge: int = 2
     rt_tolerance: float = 1.5
-    mz_tolerance: float = 0.01
+    mz_max_diff: float = 0.01
     alignment_algorithm: str = "kd"
     merge_method: str = "chunked"
     # === Feature Detection ===
-    chrom_fwhm: float = 0.2
-    noise_threshold: float = 1e5
+    chrom_fwhm: float = 0.5
+    noise_threshold: float = 200.0
     chrom_peak_snr: float = 5.0
     tol_ppm: float = 10.0
+    detector_type: str = "unknown"  # Detected detector type ("orbitrap", "quadrupole", "unknown")
     # === Output & Export ===
     generate_plots: bool = True
@@ -232,10 +235,16 @@ class Wizard:
     of mass spectrometry files from raw data to final study results, including:
     1. Raw data discovery and batch conversion to sample5 format
-    2. Study assembly with feature alignment and merging
-    3. Automated plot generation and result export
-    4. Intelligent resume capability for interrupted processes
-    5. Adaptive optimization based on study size and system resources
+    2. Automatic detector type detection and parameter optimization
+    3. Study assembly with feature alignment and merging
+    4. Automated plot generation and result export
+    5. Intelligent resume capability for interrupted processes
+    6. Adaptive optimization based on study size and system resources
+    The wizard automatically detects the type of MS detector using simplified rules:
+    - .raw files: Assume Orbitrap (noise threshold = 1e5)
+    - .wiff files: Assume Quadrupole (noise threshold = 200)
+    - .mzML files: Check metadata for Orbitrap detection
     The wizard handles the complete workflow with minimal user intervention
     while providing comprehensive logging and progress tracking.
@@ -262,6 +271,10 @@ class Wizard:
             **kwargs: Additional parameters (see wizard_def for full list)
         """
+        # Auto-detect optimal number of cores (75% of total)
+        if num_cores <= 0:
+            num_cores = max(1, int(multiprocessing.cpu_count() * 0.75))
         # Create parameters instance
         if "params" in kwargs and isinstance(kwargs["params"], wizard_def):
             self.params = kwargs.pop("params")
@@ -293,6 +306,14 @@ class Wizard:
         self.study_folder_path = Path(self.params.study_folder)
         self.study_folder_path.mkdir(parents=True, exist_ok=True)
+        # Auto-infer polarity from the first file if not explicitly set by user
+        if polarity == "positive" and "polarity" not in kwargs:
+            inferred_polarity = self._infer_polarity_from_first_file()
+            if inferred_polarity:
+                self.params.polarity = inferred_polarity
+                # Update adducts based on inferred polarity
+                self.params.__post_init__()
         # Setup logging
         self._setup_logging()
@@ -315,6 +336,47 @@ class Wizard:
         if self.params.resume_enabled:
             self._load_checkpoint()
+    def _infer_polarity_from_first_file(self) -> str:
+        """
+        Infer polarity from the first available raw data file.
+        Returns:
+            Inferred polarity string ("positive" or "negative") or None if detection fails
+        """
+        try:
+            # Find first file
+            for extension in ['.wiff', '.raw', '.mzML', '.d']:
+                pattern = f"**/*{extension}" if True else f"*{extension}"  # search_subfolders=True
+                files = list(self.data_source_path.rglob(pattern))
+                if files:
+                    first_file = files[0]
+                    break
+            else:
+                return None
+            # Only implement for .wiff files initially (most common format)
+            if first_file.suffix.lower() == '.wiff':
+                from masster.sample.load import _wiff_to_dict
+                # Extract metadata from first file
+                metadata_df = _wiff_to_dict(str(first_file))
+                if not metadata_df.empty and 'polarity' in metadata_df.columns:
+                    # Get polarity from first experiment
+                    first_polarity = metadata_df['polarity'].iloc[0]
+                    # Convert numeric polarity codes to string
+                    if first_polarity == 1 or str(first_polarity).lower() in ['positive', 'pos', '+']:
+                        return "positive"
+                    elif first_polarity == -1 or str(first_polarity).lower() in ['negative', 'neg', '-']:
+                        return "negative"
+        except Exception:
+            # Silently fall back to default if inference fails
+            pass
+        return None
     @property
     def polarity(self) -> str:
         """Get the polarity setting."""
@@ -324,6 +386,93 @@ class Wizard:
     def adducts(self) -> List[str]:
         """Get the adducts list."""
         return self.params.adducts
+    def _reload(self):
+        """
+        Reloads all masster modules to pick up any changes to their source code,
+        and updates the instance's class reference to the newly reloaded class version.
+        This ensures that the instance uses the latest implementation without restarting the interpreter.
+        """
+        # Reset logger configuration flags to allow proper reconfiguration after reload
+        try:
+            import masster.logger as logger_module
+            if hasattr(logger_module, "_WIZARD_LOGGER_CONFIGURED"):
+                logger_module._WIZARD_LOGGER_CONFIGURED = False
+        except Exception:
+            pass
+        # Get the base module name (masster)
+        base_modname = self.__class__.__module__.split(".")[0]
+        current_module = self.__class__.__module__
+        # Dynamically find all wizard submodules
+        wizard_modules = []
+        wizard_module_prefix = f"{base_modname}.wizard."
+        # Get all currently loaded modules that are part of the wizard package
+        for module_name in sys.modules:
+            if (
+                module_name.startswith(wizard_module_prefix)
+                and module_name != current_module
+            ):
+                wizard_modules.append(module_name)
+        # Add core masster modules
+        core_modules = [
+            f"{base_modname}._version",
+            f"{base_modname}.chromatogram",
+            f"{base_modname}.spectrum",
+            f"{base_modname}.logger",
+        ]
+        # Add sample submodules
+        sample_modules = []
+        sample_module_prefix = f"{base_modname}.sample."
+        for module_name in sys.modules:
+            if (
+                module_name.startswith(sample_module_prefix)
+                and module_name != current_module
+            ):
+                sample_modules.append(module_name)
+        # Add study submodules
+        study_modules = []
+        study_module_prefix = f"{base_modname}.study."
+        for module_name in sys.modules:
+            if (
+                module_name.startswith(study_module_prefix)
+                and module_name != current_module
+            ):
+                study_modules.append(module_name)
+        all_modules_to_reload = (
+            core_modules + wizard_modules + sample_modules + study_modules
+        )
+        # Reload all discovered modules
+        for full_module_name in all_modules_to_reload:
+            try:
+                if full_module_name in sys.modules:
+                    mod = sys.modules[full_module_name]
+                    importlib.reload(mod)
+                    self.logger.debug(f"Reloaded module: {full_module_name}")
+            except Exception as e:
+                self.logger.warning(f"Failed to reload module {full_module_name}: {e}")
+        # Finally, reload the current module (wizard.py)
+        try:
+            mod = __import__(current_module, fromlist=[current_module.split(".")[0]])
+            importlib.reload(mod)
+            # Get the updated class reference from the reloaded module
+            new = getattr(mod, self.__class__.__name__)
+            # Update the class reference of the instance
+            self.__class__ = new
+            self.logger.debug("Module reload completed")
+        except Exception as e:
+            self.logger.error(f"Failed to reload current module {current_module}: {e}")
     def _setup_logging(self):
         """Setup comprehensive logging system."""
@@ -526,6 +675,15 @@ class Wizard:
                 chrom_fwhm=2.0  # Wider peaks
             )
+            # STEP 3.5: Validate feature detection results
+            if not hasattr(sample, 'features_df') or sample.features_df is None or len(sample.features_df) == 0:
+                self.logger.warning(f"No features detected in {file_path.name} - skipping additional processing")
+                # Still save the sample5 file for record keeping
+                sample.save(filename=str(output_file))
+                return output_file.stem
+            self.logger.info(f"Detected {len(sample.features_df)} features in {file_path.name}")
             # STEP 4: Adduct detection
             sample.find_adducts(adducts=self.adducts)
@@ -535,17 +693,11 @@ class Wizard:
             # STEP 6: Save processed data
             sample.save(filename=str(output_file))
-            # STEP 7: Generate additional outputs if requested
-            if "csv" in self.params.export_formats:
-                csv_file = output_file.with_suffix('.features.csv')
-                sample.export_features(filename=str(csv_file))
-            if "mgf" in self.params.export_formats:
-                mgf_file = output_file.with_suffix('.mgf')
-                sample.export_mgf(filename=str(mgf_file), use_cache=False)
+            # STEP 7: Generate additional outputs (only for samples with features)
+            # Skip CSV export and individual MGF export as requested
             if self.params.generate_plots:
-                plot_file = output_file.with_suffix('_2d.html')
+                plot_file = output_file.parent / (output_file.stem + "_2d.html")
                 sample.plot_2d(filename=str(plot_file), markersize=4)
             # Memory cleanup
@@ -592,6 +744,10 @@ class Wizard:
             self.logger.warning("No files found for conversion")
             return False
+        # Detect detector type and adjust parameters before processing
+        detector_type = self._detect_detector_type()
+        self._adjust_parameters_for_detector(detector_type)
         # Filter out already processed files if resuming
         if self.params.resume_enabled and self.processed_files:
             remaining_files = []
@@ -705,6 +861,125 @@ class Wizard:
         self._save_checkpoint()
         return successful_count > 0
+    def _detect_detector_type(self) -> str:
+        """
+        Detect the type of MS detector from the first available file.
+        Simplified detection rules:
+        - .raw files: Assume Orbitrap (Thermo instruments)
+        - .wiff files: Assume Quadrupole (SCIEX instruments)
+        - .mzML files: Check metadata for Orbitrap detection
+        Returns:
+            String indicating detector type ("orbitrap", "quadrupole", "unknown")
+        """
+        try:
+            # Find first raw file to analyze
+            for extension in ['.raw', '.wiff', '.mzML', '.d']:
+                if self.params.search_subfolders:
+                    pattern = f"**/*{extension}"
+                    files = list(self.data_source_path.rglob(pattern))
+                else:
+                    pattern = f"*{extension}"
+                    files = list(self.data_source_path.glob(pattern))
+                if files:
+                    first_file = files[0]
+                    break
+            else:
+                self.logger.warning("No raw files found for detector detection")
+                return "unknown"
+            self.logger.info(f"Detecting detector type from: {first_file.name}")
+            # Simplified detection rules
+            if first_file.suffix.lower() == '.raw':
+                # RAW files are Thermo -> assume Orbitrap
+                detector_type = "orbitrap"
+                self.logger.info("Detected .raw file -> Thermo Orbitrap detector")
+                return detector_type
+            elif first_file.suffix.lower() in ['.wiff', '.wiff2']:
+                # WIFF files are SCIEX -> assume Quadrupole
+                detector_type = "quadrupole"
+                self.logger.info("Detected .wiff file -> SCIEX Quadrupole detector")
+                return detector_type
+            elif first_file.suffix.lower() == '.mzml':
+                # For mzML files, check metadata for Orbitrap detection
+                try:
+                    import warnings
+                    with warnings.catch_warnings():
+                        warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH environment variable already exists.*", category=UserWarning)
+                        import pyopenms as oms
+                    exp = oms.MSExperiment()
+                    oms.MzMLFile().load(str(first_file), exp)
+                    # Check instrument metadata for Orbitrap keywords
+                    instrument_info = []
+                    if hasattr(exp, 'getExperimentalSettings'):
+                        settings = exp.getExperimentalSettings()
+                        if hasattr(settings, 'getInstrument'):
+                            instrument = settings.getInstrument()
+                            if hasattr(instrument, 'getName'):
+                                name = instrument.getName().decode() if hasattr(instrument.getName(), 'decode') else str(instrument.getName())
+                                instrument_info.append(name.lower())
+                            if hasattr(instrument, 'getModel'):
+                                model = instrument.getModel().decode() if hasattr(instrument.getModel(), 'decode') else str(instrument.getModel())
+                                instrument_info.append(model.lower())
+                    # Check for Orbitrap keywords in instrument info
+                    orbitrap_keywords = ['orbitrap', 'exactive', 'q-exactive', 'exploris', 'fusion', 'lumos', 'velos', 'elite']
+                    instrument_text = ' '.join(instrument_info)
+                    if any(keyword in instrument_text for keyword in orbitrap_keywords):
+                        detector_type = "orbitrap"
+                        self.logger.info(f"Detected mzML with Orbitrap instrument: {instrument_text}")
+                    else:
+                        detector_type = "unknown"
+                        self.logger.info(f"Detected mzML with unknown instrument: {instrument_text}")
+                    return detector_type
+                except Exception as e:
+                    self.logger.warning(f"Failed to analyze mzML file for detector type: {e}")
+                    return "unknown"
+        except Exception as e:
+            self.logger.warning(f"Detector type detection failed: {e}")
+        return "unknown"
+    def _adjust_parameters_for_detector(self, detector_type: str):
+        """
+        Adjust processing parameters based on detected detector type.
+        Simplified rules:
+        - "orbitrap": Use 1e5 noise threshold (high background noise)
+        - "quadrupole": Use 200 noise threshold (default, lower noise)
+        - "unknown": Use 200 noise threshold (default)
+        Parameters:
+            detector_type: Type of detector detected ("orbitrap", "quadrupole", "unknown")
+        """
+        original_noise = self.params.noise_threshold
+        self.params.detector_type = detector_type  # Store the detected type
+        if detector_type == "orbitrap":
+            # Orbitraps have much higher background noise, use 1e5 threshold
+            self.params.noise_threshold = 1e5
+            self._log_progress(f"Detector: Orbitrap detected - adjusted noise threshold: {original_noise} -> {self.params.noise_threshold}")
+        elif detector_type == "quadrupole":
+            # Quadrupole instruments have lower noise, use default threshold
+            self.params.noise_threshold = 200.0
+            self._log_progress(f"Detector: Quadrupole detected - noise threshold: {self.params.noise_threshold}")
+        else:
+            # Unknown detector type, keep default
+            self.params.noise_threshold = 200.0
+            self._log_progress(f"Detector: Unknown type detected - using default noise threshold: {self.params.noise_threshold}")
     def assemble_study(self) -> bool:
         """
         Assemble processed sample5 files into a study.
@@ -725,6 +1000,10 @@ class Wizard:
         self._log_progress(f"Assembling study from {len(sample5_files)} sample5 files")
         try:
+            # Detect detector type and adjust parameters if needed
+            detector_type = self._detect_detector_type()
+            self._adjust_parameters_for_detector(detector_type)
             # Create study with optimized settings
             import masster
             study_params = study_defaults(
@@ -779,25 +1058,39 @@ class Wizard:
             return False
         try:
+            # Determine optimal algorithms based on study size
+            num_samples = len(self.study.samples_df)
+            if num_samples < 500:
+                # For smaller studies: use qt for both alignment and merge
+                alignment_algorithm = "qt"
+                merge_method = "qt"
+                self.logger.info(f"Small study ({num_samples} samples) - using qt algorithms")
+            else:
+                # For larger studies: use kd for alignment and qt-chunked for merge
+                alignment_algorithm = "kd"
+                merge_method = "qt-chunked"
+                self.logger.info(f"Large study ({num_samples} samples) - using kd alignment and qt-chunked merge")
             # Align features across samples
             align_params = align_defaults(
                 rt_tol=self.params.rt_tolerance,
-                mz_tol=self.params.mz_tolerance,
-                algorithm=self.params.alignment_algorithm
+                mz_max_diff=self.params.mz_max_diff,
+                algorithm=alignment_algorithm
             )
-            self.logger.info(f"Aligning features with RT tolerance {self.params.rt_tolerance}s, m/z tolerance {self.params.mz_tolerance} Da")
+            self.logger.info(f"Aligning features with RT tolerance {self.params.rt_tolerance}s, m/z max diff {self.params.mz_max_diff} Da, algorithm: {alignment_algorithm}")
             self.study.align(params=align_params)
             # Merge aligned features
             merge_params = merge_defaults(
-                method=self.params.merge_method,
+                method=merge_method,
                 rt_tol=self.params.rt_tolerance,
-                mz_tol=self.params.mz_tolerance,
+                mz_tol=self.params.mz_max_diff,
                 min_samples=self.params.min_samples_for_merge
             )
-            self.logger.info(f"Merging features using {self.params.merge_method} method")
+            self.logger.info(f"Merging features using {merge_method} method")
             self.study.merge(params=merge_params)
             # Log results
@@ -920,12 +1213,47 @@ class Wizard:
             self._log_progress(f"Completed {exports_completed} exports")
             self._save_checkpoint()
+            # Always perform additional export methods as requested
+            self._export_additional_formats()
             return True
         except Exception as e:
             self.logger.error(f"Failed to export results: {e}")
             return False
+    def _export_additional_formats(self):
+        """Export additional formats: xlsx, parquet, save, and mgf."""
+        self.logger.info("=== Exporting Additional Formats ===")
+        try:
+            # Force export xlsx (study results in Excel format)
+            xlsx_file = self.study_folder_path / "study_results.xlsx"
+            if hasattr(self.study, 'export_xlsx'):
+                self.study.export_xlsx(filename=str(xlsx_file))
+                self.logger.info(f"Exported Excel: {xlsx_file}")
+            # Force export parquet (efficient binary format)
+            parquet_file = self.study_folder_path / "study_data.parquet"
+            if hasattr(self.study, 'export_parquet'):
+                self.study.export_parquet(filename=str(parquet_file))
+                self.logger.info(f"Exported Parquet: {parquet_file}")
+            # Force save the study in study5 format
+            study_file = self.study_folder_path / "final_study.study5"
+            self.study.save(filename=str(study_file))
+            self.logger.info(f"Saved study: {study_file}")
+            # Force export MGF for MS2 spectra
+            mgf_file = self.study_folder_path / "consensus_ms2.mgf"
+            if hasattr(self.study, 'export_mgf'):
+                self.study.export_mgf(filename=str(mgf_file))
+                self.logger.info(f"Exported MGF: {mgf_file}")
+        except Exception as e:
+            self.logger.warning(f"Some additional exports failed: {e}")
     def save_study(self) -> bool:
         """
         Save the final study in optimized format.
@@ -977,7 +1305,7 @@ class Wizard:
                 f.write(f"Successful Files: {len(self.processed_files)}\n")
                 f.write(f"Failed Files: {len(self.failed_files)}\n")
                 f.write(f"RT Tolerance: {self.params.rt_tolerance}s\n")
-                f.write(f"m/z Tolerance: {self.params.mz_tolerance} Da\n")
+                f.write(f"m/z Max Diff: {self.params.mz_max_diff} Da\n")
                 f.write(f"Merge Method: {self.params.merge_method}\n")
                 f.write(f"Processing Time: {self._get_total_processing_time()}\n")
@@ -1144,6 +1472,753 @@ class Wizard:
             }
         }
+    def execute(self) -> bool:
+        """
+        Execute the complete automated processing pipeline.
+        This is a convenience method that runs the full pipeline with the wizard's
+        current configuration. It performs standalone analysis of the samples/studies
+        as proposed by the Wizard.
+        Returns:
+            True if execution completed successfully, False otherwise
+        """
+        self._log_progress("Executing Wizard automated processing...")
+        return self.run_full_pipeline()
+    def export_script(self, filename: str) -> bool:
+        """
+        Generate a Python script that replicates the wizard's processing steps.
+        Creates a standalone Python script that can be executed independently
+        to perform the same analysis as the wizard with the current configuration.
+        The script will be saved in the study folder.
+        This is useful for:
+        - Creating reproducible analysis scripts
+        - Customizing processing steps
+        - Running analysis in different environments
+        - Batch processing automation
+        Parameters:
+            filename: Filename for the script (should end with .py). Script will be saved in the study folder.
+        Returns:
+            True if script was generated successfully, False otherwise
+        """
+        self._log_progress("Generating analysis script...")
+        try:
+            # Ensure the filename is just a filename, not a full path
+            script_filename = Path(filename).name
+            if not script_filename.endswith('.py'):
+                script_filename = script_filename.replace(Path(script_filename).suffix, '') + '.py'
+            # Place the script in the study folder
+            script_path = self.study_folder_path / script_filename
+            # Generate the script content
+            script_content = self._generate_script_content()
+            # Write the script
+            with open(script_path, 'w', encoding='utf-8') as f:
+                f.write(script_content)
+            self._log_progress(f"Analysis script saved: {os.path.abspath(script_path)}")
+            self.logger.info(f"Generated standalone analysis script: {os.path.abspath(script_path)}")
+            return True
+        except Exception as e:
+            self.logger.error(f"Failed to generate script: {e}")
+            return False
+    def to_script(self, filename: str) -> bool:
+        """
+        [DEPRECATED] Use export_script() instead.
+        Backward compatibility alias for export_script().
+        """
+        return self.export_script(filename)
+    def _generate_script_content(self) -> str:
+        """
+        Generate the content for the standalone analysis script.
+        Returns:
+            Complete Python script content as string
+        """
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        # Create script header
+        script_lines = [
+            '#!/usr/bin/env python3',
+            '"""',
+            'Standalone Mass Spectrometry Analysis Script',
+            f'Generated by masster.Wizard on {timestamp}',
+            '',
+            'This script replicates the automated processing pipeline configured',
+            'in the Wizard with the following settings:',
+            f'- Data Source: {self.params.data_source.replace(chr(92), chr(92)*2)}',
+            f'- Study Folder: {self.params.study_folder.replace(chr(92), chr(92)*2)}',
+            f'- Polarity: {self.params.polarity}',
+            f'- Number of Cores: {self.params.num_cores}',
+            f'- Adducts: {", ".join(self.params.adducts)}',
+            f'- RT Tolerance: {self.params.rt_tolerance}s',
+            f'- m/z Max Diff: {self.params.mz_max_diff} Da',
+            f'- Merge Method: {self.params.merge_method}',
+            '"""',
+            '',
+            'import os',
+            'import sys',
+            'import time',
+            'import multiprocessing',
+            'from pathlib import Path',
+            'from typing import List, Optional',
+            'import concurrent.futures',
+            'from datetime import datetime',
+            '',
+            '# Add error handling for masster import',
+            '# First, try to add the masster directory to the Python path',
+            'try:',
+            '    # Try to find masster by looking for it in common development locations',
+            '    possible_paths = [',
+            '        Path(__file__).parent.parent,  # Script is in masster subfolder',
+            '        Path(__file__).parent.parent.parent,  # Script is in study folder',
+            '        Path(os.getcwd()),  # Current working directory',
+            '        Path(os.getcwd()).parent,  # Parent of current directory',
+            '        Path(r"D:\\SW\\massistant"),  # Specific development path',
+            '        Path.home() / "massistant",  # Home directory',
+            '        Path.home() / "SW" / "massistant",  # Common dev location',
+            '    ]',
+            '    ',
+            '    masster_found = False',
+            '    for possible_path in possible_paths:',
+            '        masster_dir = possible_path / "masster"',
+            '        if masster_dir.exists() and (masster_dir / "__init__.py").exists():',
+            '            if str(possible_path) not in sys.path:',
+            '                sys.path.insert(0, str(possible_path))',
+            '            masster_found = True',
+            '            print(f"Found masster at: {possible_path}")',
+            '            break',
+            '    ',
+            '    if not masster_found:',
+            '        # Try adding current directory to path as fallback',
+            '        current_dir = Path(os.getcwd())',
+            '        if str(current_dir) not in sys.path:',
+            '            sys.path.insert(0, str(current_dir))',
+            '    ',
+            '    import masster',
+            'except ImportError as e:',
+            '    print(f"Error: masster library not found. {e}")',
+            '    print("Please ensure masster is installed or run this script from the masster directory.")',
+            '    print("You can install masster with: pip install -e .")',
+            '    sys.exit(1)',
+            '',
+            '',
+            'def infer_polarity_from_first_file():',
+            '    """Infer polarity from the first available raw data file."""',
+            '    try:',
+            '        data_source_path = Path(DATA_SOURCE)',
+            '        # Find first file',
+            '        for extension in [\'.wiff\', \'.raw\', \'.mzML\', \'.d\']:',
+            '            pattern = f"**/*{extension}"',
+            '            files = list(data_source_path.rglob(pattern))',
+            '            if files:',
+            '                first_file = files[0]',
+            '                break',
+            '        else:',
+            '            return None',
+            '        ',
+            '        # Only implement for .wiff files initially',
+            '        if first_file.suffix.lower() == \'.wiff\':',
+            '            from masster.sample.load import _wiff_to_dict',
+            '            ',
+            '            # Extract metadata from first file',
+            '            metadata_df = _wiff_to_dict(str(first_file))',
+            '            ',
+            '            if not metadata_df.empty and \'polarity\' in metadata_df.columns:',
+            '                # Get polarity from first experiment',
+            '                first_polarity = metadata_df[\'polarity\'].iloc[0]',
+            '                ',
+            '                # Convert numeric polarity codes to string',
+            '                if first_polarity == 1 or str(first_polarity).lower() in [\'positive\', \'pos\', \'+\']:',
+            '                    return "positive"',
+            '                elif first_polarity == -1 or str(first_polarity).lower() in [\'negative\', \'neg\', \'-\']:',
+            '                    return "negative"',
+            '    except Exception:',
+            '        pass',
+            '    return None',
+            '',
+            '',
+            '# Configuration Parameters',
+            f'DATA_SOURCE = r"{self.params.data_source}"',
+            f'STUDY_FOLDER = r"{self.params.study_folder}"',
+            '',
+            '# Auto-infer polarity from first file, fall back to default',
+            'detected_polarity = infer_polarity_from_first_file()',
+            f'POLARITY = detected_polarity or "{self.params.polarity}"',
+            'NUM_CORES = max(1, int(multiprocessing.cpu_count() * 0.75))  # Auto-detect 75% of cores',
+            '',
+            '# Set adducts based on detected polarity',
+            'if POLARITY.lower() in ["positive", "pos"]:',
+            '    ADDUCTS = ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"]',
+            'elif POLARITY.lower() in ["negative", "neg"]:',
+            '    ADDUCTS = ["H-1:-:1.0", "CH2O2:0:0.5"]',
+            'else:',
+            f'    ADDUCTS = {self.params.adducts!r}  # Fall back to original',
+            f'RT_TOLERANCE = {self.params.rt_tolerance}',
+            f'MZ_TOLERANCE = {self.params.mz_max_diff}',
+            f'MERGE_METHOD = "{self.params.merge_method}"',
+            f'BATCH_SIZE = {self.params.batch_size}',
+            f'CHROM_FWHM = {self.params.chrom_fwhm}',
+            f'NOISE_THRESHOLD = {self.params.noise_threshold}',
+            f'CHROM_PEAK_SNR = {self.params.chrom_peak_snr}',
+            f'TOL_PPM = {self.params.tol_ppm}',
+            f'MIN_SAMPLES_FOR_MERGE = {self.params.min_samples_for_merge}',
+            '',
+            '# File discovery settings',
+            "FILE_EXTENSIONS = ['.wiff', '.raw', '.mzML']",
+            f'SEARCH_SUBFOLDERS = {self.params.search_subfolders}',
+            "SKIP_PATTERNS = []",
+            f'MAX_FILE_SIZE_GB = {self.params.max_file_size_gb}',
+            '',
+            '# Output settings',
+            f'GENERATE_PLOTS = {self.params.generate_plots}',
+            f'EXPORT_FORMATS = {self.params.export_formats!r}',
+            f'COMPRESS_OUTPUT = {self.params.compress_output}',
+            f'CLEANUP_TEMP_FILES = {self.params.cleanup_temp_files}',
+            '',
+            '',
+            'def log_progress(message: str):',
+            '    """Log progress message with timestamp."""',
+            '    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")',
+            '    print(f"[{timestamp}] {message}")',
+            '',
+            '',
+            'def discover_files() -> List[Path]:',
+            '    """Discover raw data files in the source directory."""',
+            '    log_progress("Discovering raw data files...")',
+            '    data_source_path = Path(DATA_SOURCE)',
+            '    found_files = []',
+            '    ',
+            '    for extension in FILE_EXTENSIONS:',
+            '        if SEARCH_SUBFOLDERS:',
+            '            pattern = f"**/*{extension}"',
+            '            files = list(data_source_path.rglob(pattern))',
+            '        else:',
+            '            pattern = f"*{extension}"',
+            '            files = list(data_source_path.glob(pattern))',
+            '        ',
+            '        # Filter out files matching skip patterns',
+            '        filtered_files = []',
+            '        for file_path in files:',
+            '            skip_file = False',
+            '            for pattern in SKIP_PATTERNS:',
+            '                if pattern.lower() in file_path.name.lower():',
+            '                    skip_file = True',
+            '                    print(f"Skipping file (matches pattern \'{pattern}\'): {file_path.name}")',
+            '                    break',
+            '            ',
+            '            if not skip_file:',
+            '                # Check file size',
+            '                try:',
+            '                    file_size_gb = file_path.stat().st_size / (1024**3)',
+            '                    if file_size_gb > MAX_FILE_SIZE_GB:',
+            '                        print(f"Large file ({file_size_gb:.1f}GB): {file_path.name}")',
+            '                    filtered_files.append(file_path)',
+            '                except Exception as e:',
+            '                    print(f"Could not check file size for {file_path}: {e}")',
+            '                    filtered_files.append(file_path)',
+            '        ',
+            '        found_files.extend(filtered_files)',
+            '        log_progress(f"Found {len(filtered_files)} {extension} files")',
+            '    ',
+            '    # Remove duplicates and sort',
+            '    found_files = sorted(list(set(found_files)))',
+            '    log_progress(f"Total files discovered: {len(found_files)}")',
+            '    return found_files',
+            '',
+            '',
+            'def process_single_file(file_path: Path) -> Optional[str]:',
+            '    """Process a single file to sample5 format."""',
+            '    import gc',
+            '    study_folder_path = Path(STUDY_FOLDER)',
+            '    ',
+            '    # Generate output filename',
+            '    file_out = file_path.stem + ".sample5"',
+            '    output_file = study_folder_path / file_out',
+            '    ',
+            '    # Check if file already exists',
+            '    if output_file.exists():',
+            '        try:',
+            '            # Try to load existing file to verify it\'s valid',
+            '            sample = masster.Sample(log_level="ERROR")',
+            '            sample.load(str(output_file))',
+            '            print(f"Skipping {file_path.name} (already processed)")',
+            '            return output_file.stem',
+            '        except Exception:',
+            '            # If loading fails, file needs to be reprocessed',
+            '            pass',
+            '    ',
+            '    print(f"Processing {file_path.name}")',
+            '    ',
+            '    try:',
+            '        # Initialize sample',
+            '        sample = masster.Sample(',
+            '            log_label=file_path.name,',
+            '            log_level="ERROR"  # Reduce logging overhead',
+            '        )',
+            '        ',
+            '        # STEP 1: Load raw data',
+            '        sample.load(str(file_path))',
+            '        ',
+            '        # STEP 2: Feature detection - First pass (strict parameters)',
+            '        sample.find_features(',
+            '            chrom_fwhm=CHROM_FWHM,',
+            '            noise=NOISE_THRESHOLD,',
+            '            tol_ppm=TOL_PPM,',
+            '            chrom_peak_snr=CHROM_PEAK_SNR,',
+            '            min_trace_length_multiplier=0.5,',
+            '            chrom_fwhm_min=CHROM_FWHM',
+            '        )',
+            '        ',
+            '        # STEP 3: Feature detection - Second pass (relaxed parameters)',
+            '        sample.find_features(',
+            '            chrom_peak_snr=CHROM_PEAK_SNR,',
+            '            noise=NOISE_THRESHOLD / 10,  # Lower noise threshold',
+            '            chrom_fwhm=2.0  # Wider peaks',
+            '        )',
+            '        ',
+            '        # STEP 3.5: Validate feature detection results',
+            '        if not hasattr(sample, "features_df") or sample.features_df is None or len(sample.features_df) == 0:',
+            '            print(f"WARNING: No features detected in {file_path.name} - skipping additional processing")',
+            '            # Still save the sample5 file for record keeping',
+            '            sample.save(filename=str(output_file))',
+            '            return output_file.stem',
+            '        ',
+            '        print(f"Detected {len(sample.features_df)} features in {file_path.name}")',
+            '        ',
+            '        # STEP 4: Adduct detection',
+            '        sample.find_adducts(adducts=ADDUCTS)',
+            '        ',
+            '        # STEP 5: MS2 spectrum identification',
+            '        sample.find_ms2()',
+            '        ',
+            '        # STEP 6: Save processed data',
+            '        sample.save(filename=str(output_file))',
+            '        ',
+            '        # STEP 7: Generate additional outputs (only for samples with features)',
+            '        # Skip CSV export and individual MGF export as requested',
+            '        ',
+            '        if GENERATE_PLOTS:',
+            '            plot_file = output_file.parent / (output_file.stem + "_2d.html")',
+            '            sample.plot_2d(filename=str(plot_file), markersize=4)',
+            '        ',
+            '        # Memory cleanup',
+            '        result = output_file.stem',
+            '        del sample',
+            '        gc.collect()',
+            '        return result',
+            '        ',
+            '    except Exception as e:',
+            '        print(f"Error processing {file_path.name}: {e}")',
+            '        gc.collect()',
+            '        return None',
+            '',
+            '',
+            'def convert_to_sample5(file_list: List[Path]) -> bool:',
+            '    """Convert raw data files to sample5 format in parallel."""',
+            '    log_progress("=== Starting Sample5 Conversion ===")',
+            '    log_progress(f"Converting {len(file_list)} files to sample5 format")',
+            '    ',
+            '    conversion_start = time.time()',
+            '    successful_count = 0',
+            '    failed_count = 0',
+            '    ',
+            '    with concurrent.futures.ProcessPoolExecutor(max_workers=NUM_CORES) as executor:',
+            '        futures = [executor.submit(process_single_file, file_path) for file_path in file_list]',
+            '        ',
+            '        for i, future in enumerate(concurrent.futures.as_completed(futures)):',
+            '            result = future.result()',
+            '            if result:',
+            '                successful_count += 1',
+            '            else:',
+            '                failed_count += 1',
+            '            ',
+            '            # Progress update',
+            '            if (successful_count + failed_count) % 10 == 0:',
+            '                progress = (successful_count + failed_count) / len(file_list) * 100',
+            '                log_progress(f"Progress: {progress:.1f}% ({successful_count} successful, {failed_count} failed)")',
+            '    ',
+            '    conversion_time = time.time() - conversion_start',
+            '    log_progress("=== Sample5 Conversion Complete ===")',
+            '    log_progress(f"Successful: {successful_count}")',
+            '    log_progress(f"Failed: {failed_count}")',
+            '    log_progress(f"Total time: {conversion_time:.1f} seconds")',
+            '    ',
+            '    return successful_count > 0',
+            '',
+            '',
+            'def assemble_study() -> masster.Study:',
+            '    """Assemble processed sample5 files into a study."""',
+            '    log_progress("=== Starting Study Assembly ===")',
+            '    study_folder_path = Path(STUDY_FOLDER)',
+            '    ',
+            '    # Find all sample5 files',
+            '    sample5_files = list(study_folder_path.glob("*.sample5"))',
+            '    if not sample5_files:',
+            '        raise RuntimeError("No sample5 files found for study assembly")',
+            '    ',
+            '    log_progress(f"Assembling study from {len(sample5_files)} sample5 files")',
+            '    ',
+            '    # Create study with optimized settings',
+            '    from masster.study.defaults.study_def import study_defaults',
+            '    study_params = study_defaults(',
+            '        folder=str(study_folder_path),',
+            '        polarity=POLARITY,',
+            '        log_level="INFO",',
+            f'        log_label="Study-{self.params.polarity}",',
+            '        adducts=ADDUCTS',
+            '    )',
+            '    ',
+            '    study = masster.Study(params=study_params)',
+            '    ',
+            '    # Add all sample5 files',
+            '    sample5_pattern = str(study_folder_path / "*.sample5")',
+            '    study.add(sample5_pattern)',
+            '    log_progress(f"Added {len(study.samples_df)} samples to study")',
+            '    ',
+            '    # Filter features based on quality criteria',
+            '    if hasattr(study, "features_filter"):',
+            '        initial_features = len(study.features_df) if hasattr(study, "features_df") else 0',
+            '        feature_selection = study.features_select(',
+            '            chrom_coherence=0.3,',
+            '            chrom_prominence_scaled=1',
+            '        )',
+            '        study.features_filter(feature_selection)',
+            '        final_features = len(study.features_df) if hasattr(study, "features_df") else 0',
+            '        log_progress(f"Feature filtering: {initial_features} -> {final_features} features")',
+            '    ',
+            '    return study',
+            '',
+            '',
+            'def align_and_merge(study: masster.Study) -> masster.Study:',
+            '    """Perform feature alignment and merging."""',
+            '    log_progress("=== Starting Feature Alignment and Merging ===")',
+            '    ',
+            '    # Import alignment and merge defaults',
+            '    from masster.study.defaults.align_def import align_defaults',
+            '    from masster.study.defaults.merge_def import merge_defaults',
+            '    ',
+            '    # Determine optimal algorithms based on study size',
+            '    num_samples = len(study.samples_df)',
+            '    ',
+            '    if num_samples < 500:',
+            '        # For smaller studies: use qt for both alignment and merge',
+            '        alignment_algorithm = "qt"',
+            '        merge_method = "qt"',
+            '        log_progress(f"Small study ({num_samples} samples) - using qt algorithms")',
+            '    else:',
+            '        # For larger studies: use kd for alignment and qt-chunked for merge',
+            '        alignment_algorithm = "kd"',
+            '        merge_method = "qt-chunked"',
+            '        log_progress(f"Large study ({num_samples} samples) - using kd alignment and qt-chunked merge")',
+            '    ',
+            '    # Align features across samples',
+            '    align_params = align_defaults(',
+            '        rt_tol=RT_TOLERANCE,',
+            '        mz_max_diff=MZ_TOLERANCE,',
+            '        algorithm=alignment_algorithm',
+            '    )',
+            '    ',
+            '    log_progress(f"Aligning features with RT tolerance {RT_TOLERANCE}s, m/z tolerance {MZ_TOLERANCE} Da, algorithm: {alignment_algorithm}")',
+            '    study.align(params=align_params)',
+            '    ',
+            '    # Merge aligned features',
+            '    merge_params = merge_defaults(',
+            '        method=merge_method,',
+            '        rt_tol=RT_TOLERANCE,',
+            '        mz_tol=MZ_TOLERANCE,',
+            '        min_samples=MIN_SAMPLES_FOR_MERGE',
+            '    )',
+            '    ',
+            '    log_progress(f"Merging features using {merge_method} method")',
+            '    study.merge(params=merge_params)',
+            '    ',
+            '    # Log results',
+            '    num_consensus = len(study.consensus_df) if hasattr(study, "consensus_df") else 0',
+            '    log_progress(f"Generated {num_consensus} consensus features")',
+            '    ',
+            '    # Get study info',
+            '    if hasattr(study, "info"):',
+            '        study.info()',
+            '    ',
+            '    return study',
+            '',
+            '',
+            'def generate_plots(study: masster.Study) -> bool:',
+            '    """Generate visualization plots for the study."""',
+            '    if not GENERATE_PLOTS:',
+            '        log_progress("Plot generation disabled, skipping...")',
+            '        return True',
+            '    ',
+            '    log_progress("=== Generating Visualization Plots ===")',
+            '    study_folder_path = Path(STUDY_FOLDER)',
+            '    plots_generated = 0',
+            '    ',
+            '    try:',
+            '        # Alignment plot',
+            '        if hasattr(study, "plot_alignment"):',
+            '            alignment_plot = study_folder_path / "alignment_plot.html"',
+            '            study.plot_alignment(filename=str(alignment_plot))',
+            '            plots_generated += 1',
+            '            log_progress(f"Generated alignment plot: {alignment_plot}")',
+            '        ',
+            '        # Consensus 2D plot',
+            '        if hasattr(study, "plot_consensus_2d"):',
+            '            consensus_2d_plot = study_folder_path / "consensus_2d.html"',
+            '            study.plot_consensus_2d(filename=str(consensus_2d_plot))',
+            '            plots_generated += 1',
+            '            log_progress(f"Generated consensus 2D plot: {consensus_2d_plot}")',
+            '        ',
+            '        # PCA plot',
+            '        if hasattr(study, "plot_pca"):',
+            '            pca_plot = study_folder_path / "pca_plot.html"',
+            '            study.plot_pca(filename=str(pca_plot))',
+            '            plots_generated += 1',
+            '            log_progress(f"Generated PCA plot: {pca_plot}")',
+            '        ',
+            '        # Consensus statistics',
+            '        if hasattr(study, "plot_consensus_stats"):',
+            '            stats_plot = study_folder_path / "consensus_stats.html"',
+            '            study.plot_consensus_stats(filename=str(stats_plot))',
+            '            plots_generated += 1',
+            '            log_progress(f"Generated statistics plot: {stats_plot}")',
+            '        ',
+            '        log_progress(f"Generated {plots_generated} visualization plots")',
+            '        return True',
+            '        ',
+            '    except Exception as e:',
+            '        print(f"Failed to generate plots: {e}")',
+            '        return False',
+            '',
+            '',
+            'def export_results(study: masster.Study) -> bool:',
+            '    """Export study results in requested formats."""',
+            '    log_progress("=== Exporting Study Results ===")',
+            '    study_folder_path = Path(STUDY_FOLDER)',
+            '    exports_completed = 0',
+            '    ',
+            '    try:',
+            '        # Skip CSV export as requested',
+            '        ',
+            '        # Export as Excel',
+            '        if "xlsx" in EXPORT_FORMATS and hasattr(study, "export_xlsx"):',
+            '            xlsx_file = study_folder_path / "study_results.xlsx"',
+            '            study.export_xlsx(filename=str(xlsx_file))',
+            '            exports_completed += 1',
+            '            log_progress(f"Exported Excel: {xlsx_file}")',
+            '        ',
+            '        # Export MGF for MS2 spectra',
+            '        if "mgf" in EXPORT_FORMATS and hasattr(study, "export_mgf"):',
+            '            mgf_file = study_folder_path / "consensus_ms2.mgf"',
+            '            study.export_mgf(filename=str(mgf_file))',
+            '            exports_completed += 1',
+            '            log_progress(f"Exported MGF: {mgf_file}")',
+            '        ',
+            '        # Export as Parquet for efficient storage',
+            '        if "parquet" in EXPORT_FORMATS and hasattr(study, "export_parquet"):',
+            '            parquet_file = study_folder_path / "study_data.parquet"',
+            '            study.export_parquet(filename=str(parquet_file))',
+            '            exports_completed += 1',
+            '            log_progress(f"Exported Parquet: {parquet_file}")',
+            '        ',
+            '        log_progress(f"Completed {exports_completed} exports")',
+            '        ',
+            '        # Always perform additional exports as requested',
+            '        log_progress("=== Exporting Additional Formats ===")',
+            '        ',
+            '        try:',
+            '            # Force export xlsx (study results in Excel format)',
+            '            xlsx_file = study_folder_path / "study_results.xlsx"',
+            '            if hasattr(study, "export_xlsx"):',
+            '                study.export_xlsx(filename=str(xlsx_file))',
+            '                log_progress(f"Exported Excel: {xlsx_file}")',
+            '            ',
+            '            # Force export parquet (efficient binary format)',
+            '            parquet_file = study_folder_path / "study_data.parquet"',
+            '            if hasattr(study, "export_parquet"):',
+            '                study.export_parquet(filename=str(parquet_file))',
+            '                log_progress(f"Exported Parquet: {parquet_file}")',
+            '            ',
+            '            # Force save the study in study5 format',
+            '            study_file = study_folder_path / "final_study.study5"',
+            '            study.save(filename=str(study_file))',
+            '            log_progress(f"Saved study: {study_file}")',
+            '            ',
+            '            # Force export MGF for MS2 spectra',
+            '            mgf_file = study_folder_path / "consensus_ms2.mgf"',
+            '            if hasattr(study, "export_mgf"):',
+            '                study.export_mgf(filename=str(mgf_file))',
+            '                log_progress(f"Exported MGF: {mgf_file}")',
+            '        ',
+            '        except Exception as e:',
+            '            print(f"Some additional exports failed: {e}")',
+            '        ',
+            '        return True',
+            '        ',
+            '    except Exception as e:',
+            '        print(f"Failed to export results: {e}")',
+            '        return False',
+            '',
+            '',
+            'def save_study(study: masster.Study) -> bool:',
+            '    """Save the final study in optimized format."""',
+            '    log_progress("=== Saving Final Study ===")',
+            '    study_folder_path = Path(STUDY_FOLDER)',
+            '    ',
+            '    try:',
+            '        study_file = study_folder_path / "final_study.study5"',
+            '        ',
+            '        # Determine optimal save format based on study size',
+            '        num_samples = len(study.samples_df)',
+            '        num_features = len(study.consensus_df) if hasattr(study, "consensus_df") else 0',
+            '        ',
+            '        if num_samples > 50 or num_features > 10000:',
+            '            log_progress(f"Large study detected ({num_samples} samples, {num_features} features) - using compressed format")',
+            '            compress_output = True',
+            '        else:',
+            '            log_progress(f"Small study ({num_samples} samples, {num_features} features) - using standard format")',
+            '            compress_output = False',
+            '        ',
+            '        # Save study',
+            '        if compress_output and hasattr(study, "save_compressed"):',
+            '            study.save_compressed(filename=str(study_file))',
+            '            log_progress(f"Saved compressed study: {study_file}")',
+            '        else:',
+            '            study.save(filename=str(study_file))',
+            '            log_progress(f"Saved study: {study_file}")',
+            '        ',
+            '        # Save metadata summary',
+            '        metadata_file = study_folder_path / "study_metadata.txt"',
+            '        with open(metadata_file, "w") as f:',
+            '            f.write("Study Processing Summary\\n")',
+            '            f.write("========================\\n")',
+            '            f.write(f"Processing Date: {datetime.now().strftime(\'%Y-%m-%d %H:%M:%S\')}\\n")',
+            '            f.write(f"Polarity: {POLARITY}\\n")',
+            '            f.write(f"Adducts: {\', \'.join(ADDUCTS)}\\n")',
+            '            f.write(f"Number of Samples: {num_samples}\\n")',
+            '            f.write(f"Number of Consensus Features: {num_features}\\n")',
+            '            f.write(f"RT Tolerance: {RT_TOLERANCE}s\\n")',
+            '            f.write(f"m/z Tolerance: {MZ_TOLERANCE} Da\\n")',
+            '            f.write(f"Merge Method: {MERGE_METHOD}\\n")',
+            '        ',
+            '        log_progress(f"Saved study metadata: {metadata_file}")',
+            '        return True',
+            '        ',
+            '    except Exception as e:',
+            '        print(f"Failed to save study: {e}")',
+            '        return False',
+            '',
+            '',
+            'def cleanup_temp_files() -> bool:',
+            '    """Clean up temporary files if requested."""',
+            '    if not CLEANUP_TEMP_FILES:',
+            '        return True',
+            '    ',
+            '    log_progress("=== Cleaning Up Temporary Files ===")',
+            '    study_folder_path = Path(STUDY_FOLDER)',
+            '    ',
+            '    try:',
+            '        cleaned_count = 0',
+            '        ',
+            '        # Remove individual sample plots if study plots were generated',
+            '        if GENERATE_PLOTS:',
+            '            temp_plots = list(study_folder_path.glob("*_2d.html"))',
+            '            for plot_file in temp_plots:',
+            '                if plot_file.name not in ["alignment_plot.html", "consensus_2d.html", "pca_plot.html"]:',
+            '                    plot_file.unlink()',
+            '                    cleaned_count += 1',
+            '        ',
+            '        log_progress(f"Cleaned up {cleaned_count} temporary files")',
+            '        return True',
+            '        ',
+            '    except Exception as e:',
+            '        print(f"Failed to cleanup temp files: {e}")',
+            '        return False',
+            '',
+            '',
+            'def main():',
+            '    """Main execution function."""',
+            '    print("=" * 70)',
+            '    print("AUTOMATED MASS SPECTROMETRY ANALYSIS SCRIPT")',
+            f'    print("Generated by masster.Wizard on {timestamp}")',
+            '    print("=" * 70)',
+            '    ',
+            '    start_time = time.time()',
+            '    ',
+            '    try:',
+            '        # Ensure output directory exists',
+            '        Path(STUDY_FOLDER).mkdir(parents=True, exist_ok=True)',
+            '        ',
+            '        # Step 1: Discover files',
+            '        file_list = discover_files()',
+            '        if not file_list:',
+            '            print("No files found for processing")',
+            '            return False',
+            '        ',
+            '        # Step 2: Convert to sample5',
+            '        if not convert_to_sample5(file_list):',
+            '            print("Sample5 conversion failed")',
+            '            return False',
+            '        ',
+            '        # Step 3: Assemble study',
+            '        study = assemble_study()',
+            '        ',
+            '        # Step 4: Align and merge',
+            '        study = align_and_merge(study)',
+            '        ',
+            '        # Step 5: Generate plots',
+            '        generate_plots(study)',
+            '        ',
+            '        # Step 6: Export results',
+            '        export_results(study)',
+            '        ',
+            '        # Step 7: Save study',
+            '        save_study(study)',
+            '        ',
+            '        # Step 8: Cleanup',
+            '        cleanup_temp_files()',
+            '        ',
+            '        # Final summary',
+            '        total_time = time.time() - start_time',
+            '        print("=" * 70)',
+            '        print("ANALYSIS COMPLETED SUCCESSFULLY")',
+            '        print(f"Total processing time: {total_time:.1f} seconds ({total_time/60:.1f} minutes)")',
+            '        if hasattr(study, "consensus_df"):',
+            '            print(f"Consensus features generated: {len(study.consensus_df)}")',
+            '        print("=" * 70)',
+            '        ',
+            '        return True',
+            '        ',
+            '    except KeyboardInterrupt:',
+            '        print("\\nAnalysis interrupted by user")',
+            '        return False',
+            '    except Exception as e:',
+            '        print(f"Analysis failed with error: {e}")',
+            '        import traceback',
+            '        traceback.print_exc()',
+            '        return False',
+            '',
+            '',
+            'if __name__ == "__main__":',
+            '    success = main()',
+            '    sys.exit(0 if success else 1)',
+        ]
+        return '\n'.join(script_lines)
     def info(self):
         """Print comprehensive wizard status information."""
         status = self.get_status()
@@ -1157,6 +2232,8 @@ class Wizard:
         print(f"Polarity: {status['parameters']['polarity']}")
         print(f"CPU Cores: {status['parameters']['num_cores']}")
         print(f"Adducts: {', '.join(status['parameters']['adducts'])}")
+        print(f"Detector Type: {self.params.detector_type}")
+        print(f"Noise Threshold: {self.params.noise_threshold}")
         print(f"Processing Time: {status['processing_time']}")
         print(f"Files Processed: {status['processed_files']}")
         print(f"Files Failed: {status['failed_files']}")
@@ -1171,5 +2248,100 @@ class Wizard:
         print("=" * 50)
-# Export the main classes
-__all__ = ["Wizard", "wizard_def"]
+def create_script(
+    source: str,
+    study_folder: str,
+    filename: str,
+    polarity: str = "positive",
+    adducts: Optional[List[str]] = None,
+    params: Optional[wizard_def] = None,
+    num_cores: int = 0,
+    **kwargs
+) -> bool:
+    """
+    Create a standalone analysis script without initializing a Wizard instance.
+    This function generates a Python script that replicates automated processing
+    steps with the specified configuration. The script can be executed independently
+    to perform the same analysis.
+    Parameters:
+        source: Directory containing raw data files
+        study_folder: Output directory for processed study
+        filename: Filename for the generated script (should end with .py)
+        polarity: Ion polarity mode ("positive" or "negative")
+        adducts: List of adduct specifications (auto-set if None)
+        params: Custom wizard_def parameters (optional)
+        num_cores: Number of CPU cores (0 = auto-detect)
+        **kwargs: Additional parameters to override defaults
+    Returns:
+        True if script was generated successfully, False otherwise
+    Example:
+        >>> from masster.wizard import create_script
+        >>> create_script(
+        ...     source=r'D:\\Data\\raw_files',
+        ...     study_folder=r'D:\\Data\\output',
+        ...     filename='run_masster.py',
+        ...     polarity='positive'
+        ... )
+    """
+    try:
+        # Create parameters
+        if params is not None:
+            # Use provided params as base
+            wizard_params = params
+            # Update with provided values
+            wizard_params.data_source = source
+            wizard_params.study_folder = study_folder
+            if polarity != "positive":  # Only override if explicitly different
+                wizard_params.polarity = polarity
+            if num_cores > 0:
+                wizard_params.num_cores = num_cores
+            if adducts is not None:
+                wizard_params.adducts = adducts
+        else:
+            # Create new params with provided values
+            wizard_params = wizard_def(
+                data_source=source,
+                study_folder=study_folder,
+                polarity=polarity,
+                num_cores=max(1, int(multiprocessing.cpu_count() * 0.75)) if num_cores <= 0 else num_cores
+            )
+            if adducts is not None:
+                wizard_params.adducts = adducts
+            # Apply any additional kwargs
+            for key, value in kwargs.items():
+                if hasattr(wizard_params, key):
+                    setattr(wizard_params, key, value)
+        # Ensure study folder exists
+        study_path = Path(study_folder)
+        study_path.mkdir(parents=True, exist_ok=True)
+        # Create a temporary Wizard instance to generate the script
+        temp_wizard = Wizard(params=wizard_params)
+        # Generate the script using the existing method
+        success = temp_wizard.export_script(filename)
+        if success:
+            script_path = study_path / Path(filename).name
+            print(f"Analysis script created: {script_path.absolute()}")
+            print(f"Run with: python \"{script_path}\"")
+        return success
+    except Exception as e:
+        print(f"Failed to create script: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+# Export the main classes and functions
+__all__ = ["Wizard", "wizard_def", "create_script"]

masster 0.4.18__py3-none-any.whl → 0.4.20__py3-none-any.whl

Potentially problematic release.

masster 0.4.18py3-none-any.whl → 0.4.20py3-none-any.whl