PyPI - masster - Versions diffs - 0.4.19__py3-none-any.whl → 0.4.21__py3-none-any.whl - Mend

masster 0.4.19py3-none-any.whl → 0.4.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (30) hide show

masster/__init__.py +6 -1
masster/_version.py +1 -1
masster/logger.py +42 -0
masster/sample/h5.py +58 -1
masster/sample/load.py +12 -5
masster/sample/plot.py +56 -65
masster/sample/processing.py +158 -0
masster/sample/sample.py +2 -9
masster/sample/sample5_schema.json +3 -0
masster/sample/save.py +137 -59
masster/spectrum.py +58 -9
masster/study/export.py +238 -152
masster/study/h5.py +65 -1
masster/study/helpers.py +55 -14
masster/study/merge.py +910 -67
masster/study/plot.py +50 -7
masster/study/processing.py +257 -1
masster/study/save.py +48 -5
masster/study/study.py +34 -3
masster/study/study5_schema.json +3 -0
masster/wizard/__init__.py +8 -2
masster/wizard/wizard.py +612 -876
{masster-0.4.19.dist-info → masster-0.4.21.dist-info}/METADATA +1 -1
{masster-0.4.19.dist-info → masster-0.4.21.dist-info}/RECORD +27 -30
masster/wizard/test_structure.py +0 -49
masster/wizard/test_wizard.py +0 -285
masster/wizard.py +0 -1175
{masster-0.4.19.dist-info → masster-0.4.21.dist-info}/WHEEL +0 -0
{masster-0.4.19.dist-info → masster-0.4.21.dist-info}/entry_points.txt +0 -0
{masster-0.4.19.dist-info → masster-0.4.21.dist-info}/licenses/LICENSE +0 -0

masster/wizard/wizard.py CHANGED Viewed

@@ -23,28 +23,22 @@ from masster import Wizard, wizard_def
 # Create wizard with default parameters
 wizard = Wizard(
-    data_source="./raw_data",
-    study_folder="./processed_study",
+    source="./raw_data",
+    folder="./processed_study",
     polarity="positive",
     num_cores=4
 )
-# Run complete processing pipeline
-wizard.run_full_pipeline()
-# Or run individual steps
-wizard.convert_to_sample5()
-wizard.assemble_study()
-wizard.align_and_merge()
-wizard.generate_plots()
-wizard.export_results()
 ```
 """
 from __future__ import annotations
 import os
+import sys
 import time
+import importlib
+import glob
 import multiprocessing
 from pathlib import Path
 from typing import Optional, Any, Dict, List
@@ -57,6 +51,7 @@ from masster.logger import MassterLogger
 from masster.study.defaults.study_def import study_defaults
 from masster.study.defaults.align_def import align_defaults
 from masster.study.defaults.merge_def import merge_defaults
+from masster._version import __version__ as version
 @dataclass
@@ -69,8 +64,8 @@ class wizard_def:
     Attributes:
         # Core Configuration
-        data_source (str): Path to directory containing raw data files
-        study_folder (str): Output directory for processed study
+        source (str): Path to directory containing raw data files
+        folder (str): Output directory for processed study
         polarity (str): Ion polarity mode ("positive" or "negative")
         num_cores (int): Number of CPU cores to use for parallel processing
@@ -101,15 +96,15 @@ class wizard_def:
     """
     # === Core Configuration ===
-    data_source: str = ""
-    study_folder: str = ""
+    source: str = ""
+    folder: str = ""
     polarity: str = "positive"
     num_cores: int = 4
     # === File Discovery ===
-    file_extensions: List[str] = field(default_factory=lambda: [".wiff", ".raw", ".mzML", ".d"])
+    file_extensions: List[str] = field(default_factory=lambda: [".wiff", ".raw", ".mzML"])
     search_subfolders: bool = True
-    skip_patterns: List[str] = field(default_factory=lambda: ["blank", "QC", "test"])
+    skip_patterns: List[str] = field(default_factory=lambda: ["blank", "test"])
     # === Processing Parameters ===
     adducts: List[str] = field(default_factory=list)  # Will be set based on polarity
@@ -124,17 +119,18 @@ class wizard_def:
     checkpoint_interval: int = 10  # Save progress every N files
     # === Study Assembly ===
-    min_samples_for_merge: int = 50
+    min_samples_for_merge: int = 2
     rt_tolerance: float = 1.5
-    mz_tolerance: float = 0.01
+    mz_max_diff: float = 0.01
     alignment_algorithm: str = "kd"
-    merge_method: str = "chunked"
+    merge_method: str = "qt"
     # === Feature Detection ===
-    chrom_fwhm: float = 0.2
-    noise_threshold: float = 1e5
+    chrom_fwhm: float = 0.5
+    noise_threshold: float = 200.0
     chrom_peak_snr: float = 5.0
     tol_ppm: float = 10.0
+    detector_type: str = "unknown"  # Detected detector type ("orbitrap", "quadrupole", "unknown")
     # === Output & Export ===
     generate_plots: bool = True
@@ -154,15 +150,15 @@ class wizard_def:
     optimize_memory: bool = True
     cleanup_temp_files: bool = True
     validate_outputs: bool = True
     _param_metadata: dict[str, dict[str, Any]] = field(
         default_factory=lambda: {
-            "data_source": {
+            "source": {
                 "dtype": str,
                 "description": "Path to directory containing raw data files",
                 "required": True,
             },
-            "study_folder": {
+            "folder": {
                 "dtype": str,
                 "description": "Output directory for processed study",
                 "required": True,
@@ -218,62 +214,63 @@ class wizard_def:
             self.num_cores = max_cores
         # Ensure paths are absolute
-        if self.data_source:
-            self.data_source = os.path.abspath(self.data_source)
-        if self.study_folder:
-            self.study_folder = os.path.abspath(self.study_folder)
+        if self.source:
+            self.source = os.path.abspath(self.source)
+        if self.folder:
+            self.folder = os.path.abspath(self.folder)
 class Wizard:
     """
-    Automated processing wizard for mass spectrometry studies.
-    The Wizard class provides end-to-end automation for processing collections
-    of mass spectrometry files from raw data to final study results, including:
+    Simplified Wizard for automated mass spectrometry data processing.
-    1. Raw data discovery and batch conversion to sample5 format
-    2. Study assembly with feature alignment and merging
-    3. Automated plot generation and result export
-    4. Intelligent resume capability for interrupted processes
-    5. Adaptive optimization based on study size and system resources
+    The Wizard provides a clean interface for creating and executing analysis scripts
+    that process raw MS data through the complete pipeline: file discovery, feature
+    detection, sample processing, study assembly, alignment, merging, and export.
-    The wizard handles the complete workflow with minimal user intervention
-    while providing comprehensive logging and progress tracking.
+    This simplified version focuses on two core functions:
+    - create_script(): Generate standalone analysis scripts
+    - execute(): Create and run analysis scripts
     """
     def __init__(
         self,
-        data_source: str = "",
-        study_folder: str = "",
+        source: str = "",
+        folder: str = "",
         polarity: str = "positive",
         adducts: Optional[List[str]] = None,
-        num_cores: int = 4,
+        num_cores: int = 0,
         **kwargs
     ):
         """
-        Initialize the Wizard for automated study processing.
+        Initialize the Wizard with analysis parameters.
         Parameters:
-            data_source: Directory containing raw data files
-            study_folder: Output directory for processed study
+            source: Directory containing raw data files
+            folder: Output directory for processed study
             polarity: Ion polarity mode ("positive" or "negative")
             adducts: List of adduct specifications (auto-set if None)
-            num_cores: Number of CPU cores for parallel processing
+            num_cores: Number of CPU cores (0 = auto-detect 75% of available)
             **kwargs: Additional parameters (see wizard_def for full list)
         """
+        # Auto-detect optimal number of cores if not specified
+        if num_cores <= 0:
+            num_cores = max(1, int(multiprocessing.cpu_count() * 0.75))
         # Create parameters instance
         if "params" in kwargs and isinstance(kwargs["params"], wizard_def):
             self.params = kwargs.pop("params")
         else:
-            # Create default parameters and update with provided values
+            # Create default parameters
             self.params = wizard_def(
-                data_source=data_source,
-                study_folder=study_folder,
+                source=source,
+                folder=folder,
                 polarity=polarity,
                 num_cores=num_cores
             )
+            # Set adducts if provided
             if adducts is not None:
                 self.params.adducts = adducts
@@ -283,893 +280,632 @@ class Wizard:
                     setattr(self.params, key, value)
         # Validate required parameters
-        if not self.params.data_source:
-            raise ValueError("data_source is required")
-        if not self.params.study_folder:
-            raise ValueError("study_folder is required")
-        # Create directories
-        self.data_source_path = Path(self.params.data_source)
-        self.study_folder_path = Path(self.params.study_folder)
-        self.study_folder_path.mkdir(parents=True, exist_ok=True)
-        # Setup logging
-        self._setup_logging()
-        # Initialize state tracking
-        self.processed_files = []
-        self.failed_files = []
-        self.study = None
-        self.start_time = None
-        self.current_step = "initialized"
-        # Create checkpoint file path
-        self.checkpoint_file = self.study_folder_path / "wizard_checkpoint.json"
-        self.logger.info(f"Wizard initialized for {self.polarity} mode")
-        self.logger.info(f"Data source: {self.data_source_path}")
-        self.logger.info(f"Study folder: {self.study_folder_path}")
-        self.logger.info(f"Using {self.params.num_cores} CPU cores")
-        # Load checkpoint if resuming
-        if self.params.resume_enabled:
-            self._load_checkpoint()
-    @property
-    def polarity(self) -> str:
-        """Get the polarity setting."""
-        return self.params.polarity
-    @property
-    def adducts(self) -> List[str]:
-        """Get the adducts list."""
-        return self.params.adducts
-    def _setup_logging(self):
-        """Setup comprehensive logging system."""
-        # Create logger
-        log_label = f"Wizard-{self.polarity}"
-        if self.params.log_to_file:
-            log_file = self.study_folder_path / "wizard.log"
-            sink = str(log_file)
-        else:
-            sink = "sys.stdout"
-        self.logger = MassterLogger(
-            instance_type="wizard",
-            level=self.params.log_level.upper(),
-            label=log_label,
-            sink=sink,
-        )
-        # Also create a simple file logger for critical info
-        self.log_file = self.study_folder_path / "processing.log"
-    def _log_progress(self, message: str, level: str = "INFO"):
-        """Log progress message with timestamp."""
-        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        full_message = f"[{timestamp}] {message}"
-        # Log to masster logger
-        getattr(self.logger, level.lower())(message)
-        # Also write to simple log file
-        with open(self.log_file, "a", encoding="utf-8") as f:
-            f.write(f"{full_message}\n")
-        if self.params.verbose_progress and level in ["INFO", "WARNING", "ERROR"]:
-            print(full_message)
-    def _save_checkpoint(self):
-        """Save processing checkpoint for resume capability."""
-        if not self.params.resume_enabled:
-            return
-        import json
-        checkpoint_data = {
-            "timestamp": datetime.now().isoformat(),
-            "current_step": self.current_step,
-            "processed_files": self.processed_files,
-            "failed_files": self.failed_files,
-            "params": {
-                "data_source": self.params.data_source,
-                "study_folder": self.params.study_folder,
-                "polarity": self.params.polarity,
-                "adducts": self.params.adducts,
-                "num_cores": self.params.num_cores,
-            }
-        }
-        try:
-            with open(self.checkpoint_file, "w") as f:
-                json.dump(checkpoint_data, f, indent=2)
-            self.logger.debug(f"Checkpoint saved: {len(self.processed_files)} files processed")
-        except Exception as e:
-            self.logger.warning(f"Failed to save checkpoint: {e}")
-    def _load_checkpoint(self):
-        """Load processing checkpoint for resume capability."""
-        if not self.checkpoint_file.exists():
-            return
-        import json
-        try:
-            with open(self.checkpoint_file, "r") as f:
-                checkpoint_data = json.load(f)
-            self.processed_files = checkpoint_data.get("processed_files", [])
-            self.failed_files = checkpoint_data.get("failed_files", [])
-            self.current_step = checkpoint_data.get("current_step", "initialized")
-            self.logger.info(f"Resuming from checkpoint: {len(self.processed_files)} files already processed")
-            self.logger.info(f"Previous step: {self.current_step}")
-        except Exception as e:
-            self.logger.warning(f"Failed to load checkpoint: {e}")
-            self.processed_files = []
-            self.failed_files = []
-    def discover_files(self) -> List[Path]:
+        if not self.params.source:
+            raise ValueError("source is required")
+        if not self.params.folder:
+            raise ValueError("folder is required")
+        # Create and validate paths
+        self.source_path = Path(self.params.source)
+        self.folder_path = Path(self.params.folder)
+        self.folder_path.mkdir(parents=True, exist_ok=True)
+        # Auto-infer polarity from the first file if not explicitly set by user
+        if polarity == "positive" and "polarity" not in kwargs:
+            inferred_polarity = self._infer_polarity_from_first_file()
+            if inferred_polarity:
+                self.params.polarity = inferred_polarity
+                # Update adducts based on inferred polarity
+                self.params.__post_init__()
+    def _infer_polarity_from_first_file(self) -> str:
         """
-        Discover raw data files in the source directory.
+        Infer polarity from the first available raw data file.
         Returns:
-            List of file paths found for processing
+            Inferred polarity string ("positive" or "negative") or None if detection fails
         """
-        self._log_progress("Discovering raw data files...")
-        self.current_step = "discovering_files"
-        found_files = []
-        for extension in self.params.file_extensions:
-            if self.params.search_subfolders:
-                pattern = f"**/*{extension}"
-                files = list(self.data_source_path.rglob(pattern))
+        try:
+            # Find first file
+            for extension in ['.wiff', '.raw', '.mzML', '.d']:
+                pattern = f"**/*{extension}" if True else f"*{extension}"  # search_subfolders=True
+                files = list(self.source_path.rglob(pattern))
+                if files:
+                    first_file = files[0]
+                    break
             else:
-                pattern = f"*{extension}"
-                files = list(self.data_source_path.glob(pattern))
+                return None
-            # Filter out files matching skip patterns
-            filtered_files = []
-            for file_path in files:
-                skip_file = False
-                for pattern in self.params.skip_patterns:
-                    if pattern.lower() in file_path.name.lower():
-                        skip_file = True
-                        self.logger.debug(f"Skipping file (matches pattern '{pattern}'): {file_path.name}")
-                        break
+            # Only implement for .wiff files initially (most common format)
+            if first_file.suffix.lower() == '.wiff':
+                from masster.sample.load import _wiff_to_dict
+                # Extract metadata from first file
+                metadata_df = _wiff_to_dict(str(first_file))
-                if not skip_file:
-                    # Check file size
-                    try:
-                        file_size_gb = file_path.stat().st_size / (1024**3)
-                        if file_size_gb > self.params.max_file_size_gb:
-                            self.logger.warning(f"Large file ({file_size_gb:.1f}GB): {file_path.name}")
-                        filtered_files.append(file_path)
-                    except Exception as e:
-                        self.logger.warning(f"Could not check file size for {file_path}: {e}")
-                        filtered_files.append(file_path)
+                if not metadata_df.empty and 'polarity' in metadata_df.columns:
+                    # Get polarity from first experiment
+                    first_polarity = metadata_df['polarity'].iloc[0]
+                    # Convert numeric polarity codes to string
+                    if first_polarity == 1 or str(first_polarity).lower() in ['positive', 'pos', '+']:
+                        return "positive"
+                    elif first_polarity == -1 or str(first_polarity).lower() in ['negative', 'neg', '-']:
+                        return "negative"
+        except Exception:
+            # Silently fall back to default if inference fails
+            pass
-            found_files.extend(filtered_files)
-            self.logger.info(f"Found {len(filtered_files)} {extension} files")
-        # Remove duplicates and sort
-        found_files = sorted(list(set(found_files)))
-        self._log_progress(f"Total files discovered: {len(found_files)}")
-        return found_files
-    def _process_single_file(self, file_path: Path, reset: bool = False) -> Optional[str]:
+        return None
+    @property
+    def polarity(self) -> str:
+        """Get the ion polarity mode."""
+        return self.params.polarity
+    @property
+    def adducts(self) -> List[str]:
+        """Get the adduct specifications."""
+        return self.params.adducts
+    def create_script(self, filename: str = "run_masster.py") -> bool:
         """
-        Process a single file to sample5 format.
-        This method replicates the core processing from parallel_sample_processing.py
-        but with wizard-specific configuration and error handling.
+        Generate a standalone Python script for the analysis pipeline.
         Parameters:
-            file_path: Path to the raw data file
-            reset: Force reprocessing even if output exists
+            filename: Name for the generated script file
         Returns:
-            Base filename of output on success, None on failure
+            True if script was generated successfully, False otherwise
         """
-        import gc
-        # Generate output filename
-        file_out = file_path.stem + '.sample5'
-        output_file = self.study_folder_path / file_out
-        # Initialize masster Sample with delayed import
-        import masster
-        sample = masster.Sample(
-            log_label=file_path.name,
-            log_level='ERROR'  # Reduce logging overhead in parallel processing
-        )
-        # Check if file should be skipped
-        skip = False
-        if not reset and not self.params.force_reprocess and output_file.exists():
-            try:
-                # Attempt to load existing processed file to verify it's valid
-                sample.load(str(output_file))
-                skip = True
-            except Exception:
-                # If loading fails, file needs to be reprocessed
-                skip = False
-        if skip:
-            self.logger.debug(f"Skipping {file_path.name} (already processed)")
-            return output_file.stem
-        self.logger.info(f"Processing {file_path.name}")
         try:
-            # STEP 1: Load raw data
-            sample.load(str(file_path))
-            # STEP 2: Feature detection - First pass (strict parameters)
-            sample.find_features(
-                chrom_fwhm=self.params.chrom_fwhm,
-                noise=self.params.noise_threshold,
-                tol_ppm=self.params.tol_ppm,
-                chrom_peak_snr=self.params.chrom_peak_snr,
-                min_trace_length_multiplier=0.5,
-                chrom_fwhm_min=self.params.chrom_fwhm
-            )
-            # STEP 3: Feature detection - Second pass (relaxed parameters)
-            sample.find_features(
-                chrom_peak_snr=self.params.chrom_peak_snr,
-                noise=self.params.noise_threshold / 10,  # Lower noise threshold
-                chrom_fwhm=2.0  # Wider peaks
-            )
-            # STEP 4: Adduct detection
-            sample.find_adducts(adducts=self.adducts)
-            # STEP 5: MS2 spectrum identification
-            sample.find_ms2()
-            # STEP 6: Save processed data
-            sample.save(filename=str(output_file))
-            # STEP 7: Generate additional outputs if requested
-            if "csv" in self.params.export_formats:
-                csv_file = output_file.with_suffix('.features.csv')
-                sample.export_features(filename=str(csv_file))
-            if "mgf" in self.params.export_formats:
-                mgf_file = output_file.with_suffix('.mgf')
-                sample.export_mgf(filename=str(mgf_file), use_cache=False)
+            script_path = self.folder_path / filename
+            script_content = self._generate_script_content()
-            if self.params.generate_plots:
-                plot_file = output_file.with_suffix('_2d.html')
-                sample.plot_2d(filename=str(plot_file), markersize=4)
+            with open(script_path, 'w', encoding='utf-8') as f:
+                f.write(script_content)
-            # Memory cleanup
-            result = output_file.stem
-            del sample
-            gc.collect()
-            return result
+            print(f"Analysis script created: {script_path}")
+            return True
         except Exception as e:
-            self.logger.error(f"Error processing {file_path.name}: {e}")
-            # Cleanup on error
-            gc.collect()
-            return None
-    def _process_batch(self, file_batch: List[Path]) -> List[str]:
-        """Process a batch of files in a single worker."""
-        results = []
-        for file_path in file_batch:
-            result = self._process_single_file(file_path)
-            if result:
-                results.append(result)
-            else:
-                results.append(None)
-        return results
-    def convert_to_sample5(self, file_list: Optional[List[Path]] = None) -> bool:
+            print(f"Failed to create script: {e}")
+            return False
+    def execute(self, filename: str = "run_masster.py") -> bool:
         """
-        Convert raw data files to sample5 format in parallel.
+        Create and execute a standalone analysis script.
         Parameters:
-            file_list: List of files to process (None to discover automatically)
-        Returns:
-            True if conversion completed successfully
-        """
-        self._log_progress("=== Starting Sample5 Conversion ===")
-        self.current_step = "converting_to_sample5"
-        if file_list is None:
-            file_list = self.discover_files()
-        if not file_list:
-            self.logger.warning("No files found for conversion")
-            return False
-        # Filter out already processed files if resuming
-        if self.params.resume_enabled and self.processed_files:
-            remaining_files = []
-            for file_path in file_list:
-                if str(file_path) not in self.processed_files:
-                    remaining_files.append(file_path)
-            file_list = remaining_files
+            filename: Name for the generated script file
-            if not file_list:
-                self._log_progress("All files already processed")
-                return True
-        self._log_progress(f"Converting {len(file_list)} files to sample5 format")
-        conversion_start = time.time()
-        successful_count = 0
-        failed_count = 0
-        if self.params.use_process_pool:
-            # ProcessPoolExecutor approach - better for CPU-intensive work
-            if len(file_list) <= self.params.batch_size:
-                # Few files: process individually
-                self.logger.info(f"Processing {len(file_list)} files individually with {self.params.num_cores} workers")
-                with concurrent.futures.ProcessPoolExecutor(max_workers=self.params.num_cores) as executor:
-                    futures = [
-                        executor.submit(self._process_single_file, file_path)
-                        for file_path in file_list
-                    ]
-                    for i, future in enumerate(concurrent.futures.as_completed(futures)):
-                        result = future.result()
-                        if result:
-                            successful_count += 1
-                            self.processed_files.append(str(file_list[i]))
-                        else:
-                            failed_count += 1
-                            self.failed_files.append(str(file_list[i]))
-                        # Progress update and checkpoint
-                        if (successful_count + failed_count) % self.params.checkpoint_interval == 0:
-                            progress = (successful_count + failed_count) / len(file_list) * 100
-                            self._log_progress(f"Progress: {progress:.1f}% ({successful_count} successful, {failed_count} failed)")
-                            self._save_checkpoint()
-            else:
-                # Many files: process in batches
-                batches = [
-                    file_list[i:i + self.params.batch_size]
-                    for i in range(0, len(file_list), self.params.batch_size)
-                ]
-                self.logger.info(f"Processing {len(file_list)} files in {len(batches)} batches")
-                with concurrent.futures.ProcessPoolExecutor(max_workers=self.params.num_cores) as executor:
-                    futures = [executor.submit(self._process_batch, batch) for batch in batches]
-                    for batch_idx, future in enumerate(concurrent.futures.as_completed(futures)):
-                        batch_results = future.result()
-                        batch = batches[batch_idx]
-                        for i, result in enumerate(batch_results):
-                            if result:
-                                successful_count += 1
-                                self.processed_files.append(str(batch[i]))
-                            else:
-                                failed_count += 1
-                                self.failed_files.append(str(batch[i]))
-                        # Progress update
-                        progress = (successful_count + failed_count) / len(file_list) * 100
-                        self._log_progress(f"Batch {batch_idx + 1}/{len(batches)} complete. Progress: {progress:.1f}%")
-                        self._save_checkpoint()
-        else:
-            # ThreadPoolExecutor approach
-            self.logger.info(f"Processing {len(file_list)} files with {self.params.num_cores} threads")
-            with concurrent.futures.ThreadPoolExecutor(max_workers=self.params.num_cores) as executor:
-                futures = [
-                    executor.submit(self._process_single_file, file_path)
-                    for file_path in file_list
-                ]
-                for i, future in enumerate(concurrent.futures.as_completed(futures)):
-                    result = future.result()
-                    if result:
-                        successful_count += 1
-                        self.processed_files.append(str(file_list[i]))
-                    else:
-                        failed_count += 1
-                        self.failed_files.append(str(file_list[i]))
-                    if (successful_count + failed_count) % self.params.checkpoint_interval == 0:
-                        progress = (successful_count + failed_count) / len(file_list) * 100
-                        self._log_progress(f"Progress: {progress:.1f}%")
-                        self._save_checkpoint()
-        conversion_time = time.time() - conversion_start
-        self._log_progress("=== Sample5 Conversion Complete ===")
-        self._log_progress(f"Successful: {successful_count}")
-        self._log_progress(f"Failed: {failed_count}")
-        self._log_progress(f"Total time: {conversion_time:.1f} seconds")
-        if failed_count > 0:
-            self.logger.warning(f"{failed_count} files failed to process")
-            for failed_file in self.failed_files[-failed_count:]:
-                self.logger.warning(f"Failed: {failed_file}")
-        self._save_checkpoint()
-        return successful_count > 0
-    def assemble_study(self) -> bool:
-        """
-        Assemble processed sample5 files into a study.
         Returns:
-            True if study assembly was successful
+            True if execution completed successfully, False otherwise
         """
-        self._log_progress("=== Starting Study Assembly ===")
-        self.current_step = "assembling_study"
-        # Find all sample5 files
-        sample5_files = list(self.study_folder_path.glob("*.sample5"))
-        if not sample5_files:
-            self.logger.error("No sample5 files found for study assembly")
+        # First create the script
+        if not self.create_script(filename):
             return False
-        self._log_progress(f"Assembling study from {len(sample5_files)} sample5 files")
+        # Then execute it
+        script_path = self.folder_path / filename
         try:
-            # Create study with optimized settings
-            import masster
-            study_params = study_defaults(
-                folder=str(self.study_folder_path),
-                polarity=self.polarity,
-                log_level="INFO",
-                log_label=f"Study-{self.polarity}",
-                adducts=self.adducts
-            )
-            self.study = masster.Study(params=study_params)
+            print("Executing...")
-            # Add all sample5 files
-            sample5_pattern = str(self.study_folder_path / "*.sample5")
-            self.study.add(sample5_pattern)
+            import subprocess
+            result = subprocess.run([
+                sys.executable, str(script_path)
+            ], cwd=str(self.folder_path), encoding='utf-8', errors='replace')
-            self._log_progress(f"Added {len(self.study.samples_df)} samples to study")
+            success = result.returncode == 0
-            # Filter features based on quality criteria
-            if hasattr(self.study, 'features_filter'):
-                initial_features = len(self.study.features_df) if hasattr(self.study, 'features_df') else 0
-                # Apply feature filtering
-                feature_selection = self.study.features_select(
-                    chrom_coherence=0.3,
-                    chrom_prominence_scaled=1
-                )
-                self.study.features_filter(feature_selection)
+            if success:
+                print("=" * 70)
+                print("Script execution completed successfully")
+            else:
+                print("=" * 70)
+                print(f"Script execution failed with return code: {result.returncode}")
-                final_features = len(self.study.features_df) if hasattr(self.study, 'features_df') else 0
-                self._log_progress(f"Feature filtering: {initial_features} -> {final_features} features")
-            self._save_checkpoint()
-            return True
+            return success
         except Exception as e:
-            self.logger.error(f"Failed to assemble study: {e}")
+            print(f"Error during script execution: {e}")
             return False
-    def align_and_merge(self) -> bool:
-        """
-        Perform feature alignment and merging.
+    def _generate_script_content(self) -> str:
+        """Generate the complete analysis script content."""
+        # Convert Path objects to strings for JSON serialization
+        params_dict = {}
+        for key, value in self.params.__dict__.items():
+            if key == '_param_metadata':  # Skip metadata in generated script
+                continue
+            if isinstance(value, Path):
+                params_dict[key] = str(value)
+            else:
+                params_dict[key] = value
+        # Obtain list of files in source with extension wiff, .raw, .mzML
+        raw_files = []
+        for ext in params_dict.get('file_extensions', []):
+            raw_files.extend(glob.glob(f"{params_dict.get('source', '')}/**/*{ext}", recursive=True))
+        # Create readable PARAMS dict with comments
+        params_lines = []
+        params_lines.append('# Analysis parameters')
+        params_lines.append('PARAMS = {')
-        Returns:
-            True if alignment and merging were successful
-        """
-        self._log_progress("=== Starting Feature Alignment and Merging ===")
-        self.current_step = "aligning_and_merging"
+        # Core Configuration
+        params_lines.append('    # === Core Configuration ===')
+        params_lines.append(f'    "source": {params_dict.get("source", "")!r},  # Directory containing raw data files')
+        params_lines.append(f'    "folder": {params_dict.get("folder", "")!r},  # Output directory for processed study')
+        params_lines.append(f'    "polarity": {params_dict.get("polarity", "positive")!r},  # Ion polarity mode ("positive" or "negative")')
+        params_lines.append(f'    "num_cores": {params_dict.get("num_cores", 4)},  # Number of CPU cores for parallel processing')
+        params_lines.append('')
-        if self.study is None:
-            self.logger.error("Study not assembled. Run assemble_study() first.")
-            return False
+        # File Discovery
+        params_lines.append('    # === File Discovery ===')
+        params_lines.append(f'    "file_extensions": {params_dict.get("file_extensions", [".wiff", ".raw", ".mzML"])!r},  # File extensions to search for')
+        params_lines.append(f'    "search_subfolders": {params_dict.get("search_subfolders", True)},  # Whether to search subdirectories recursively')
+        params_lines.append(f'    "skip_patterns": {params_dict.get("skip_patterns", ["blank", "condition"])!r},  # Filename patterns to skip')
+        params_lines.append('')
-        try:
-            # Align features across samples
-            align_params = align_defaults(
-                rt_tol=self.params.rt_tolerance,
-                mz_tol=self.params.mz_tolerance,
-                algorithm=self.params.alignment_algorithm
-            )
-            self.logger.info(f"Aligning features with RT tolerance {self.params.rt_tolerance}s, m/z tolerance {self.params.mz_tolerance} Da")
-            self.study.align(params=align_params)
-            # Merge aligned features
-            merge_params = merge_defaults(
-                method=self.params.merge_method,
-                rt_tol=self.params.rt_tolerance,
-                mz_tol=self.params.mz_tolerance,
-                min_samples=self.params.min_samples_for_merge
+        # Processing Parameters
+        params_lines.append('    # === Processing Parameters ===')
+        params_lines.append(f'    "adducts": {params_dict.get("adducts", [])!r},  # Adduct specifications for feature detection and annotation')
+        params_lines.append(f'    "detector_type": {params_dict.get("detector_type", "unknown")!r},  # MS detector type ("orbitrap", "tof", "unknown")')
+        params_lines.append('')
+        # Alignment & Merging
+        params_lines.append('    # === Alignment & Merging ===')
+        params_lines.append(f'    "rt_tol": {params_dict.get("rt_tol", 2.0)},  # Retention time tolerance for alignment (seconds)')
+        params_lines.append(f'    "mz_tol": {params_dict.get("mz_tol", 0.01)},  # Mass-to-charge ratio tolerance for alignment (Da)')
+        params_lines.append(f'    "alignment_method": {params_dict.get("alignment_method", "kd")!r},  # Algorithm for sample alignment')
+        params_lines.append(f'    "min_samples_per_feature": {params_dict.get("min_samples_per_feature", 1)},  # Minimum samples required per consensus feature')
+        params_lines.append(f'    "merge_method": {params_dict.get("merge_method", "qt")!r},  # Method for merging consensus features')
+        params_lines.append('')
+        # Sample Processing
+        params_lines.append('    # === Sample Processing (used in add_samples_from_folder) ===')
+        params_lines.append(f'    "batch_size": {params_dict.get("batch_size", 8)},  # Number of files to process per batch')
+        params_lines.append(f'    "memory_limit_gb": {params_dict.get("memory_limit_gb", 16.0)},  # Memory limit for processing (GB)')
+        params_lines.append('')
+        # Script Options
+        params_lines.append('    # === Script Options ===')
+        params_lines.append(f'    "resume_enabled": {params_dict.get("resume_enabled", True)},  # Enable automatic resume capability')
+        params_lines.append(f'    "force_reprocess": {params_dict.get("force_reprocess", False)},  # Force reprocessing of existing files')
+        params_lines.append(f'    "cleanup_temp_files": {params_dict.get("cleanup_temp_files", True)},  # Clean up temporary files after processing')
+        params_lines.append('}')
+        # Create script lines
+        script_lines = [
+            '#!/usr/bin/env python3',
+            '"""',
+            'Automated Mass Spectrometry Data Analysis Pipeline',
+            f'Generated by masster wizard v{version}',
+            '"""',
+            '',
+            'import sys',
+            'import time',
+            'from pathlib import Path',
+            '',
+            '# Import masster modules',
+            'from masster.study import Study',
+            'from masster import __version__',
+            '',
+        ]
+        # Add the formatted PARAMS
+        script_lines.extend(params_lines)
+        # Add the main function and pipeline
+        script_lines.extend([
+            '',
+            '',
+            'def discover_raw_files(source_folder, file_extensions, search_subfolders=True):',
+            '    """Discover raw data files in the source folder."""',
+            '    source_path = Path(source_folder)',
+            '    raw_files = []',
+            '    ',
+            '    for ext in file_extensions:',
+            '        if search_subfolders:',
+            '            pattern = f"**/*{ext}"',
+            '            files = list(source_path.rglob(pattern))',
+            '        else:',
+            '            pattern = f"*{ext}"',
+            '            files = list(source_path.glob(pattern))',
+            '        raw_files.extend(files)',
+            '    ',
+            '    return raw_files',
+            '',
+            '',
+            'def process_single_file(args):',
+            '    """Process a single raw file to sample5 format - module level for multiprocessing."""',
+            '    raw_file, output_folder = args',
+            '    from masster.sample import Sample',
+            '    ',
+            '    try:',
+            '        # Create sample5 filename',
+            '        sample_name = raw_file.stem',
+            '        sample5_path = Path(output_folder) / f"{sample_name}.sample5"',
+            '        ',
+            '        # Skip if sample5 already exists',
+            '        if sample5_path.exists():',
+            '            print(f"  Skipping {raw_file.name} (sample5 already exists)")',
+            '            return str(sample5_path)',
+            '        ',
+            '        print(f"  Converting {raw_file.name}...")',
+            '        ',
+            '        # Load and process raw file with full pipeline',
+            '        sample = Sample(log_label=sample_name)',
+            '        sample.load(filename=str(raw_file))',
+            '        sample.find_features(',
+            '            noise=PARAMS[\'noise_threshold\'],',
+            '            chrom_fwhm=PARAMS[\'smoothing_width\'],',
+            '            chrom_peak_snr=PARAMS[\'peak_threshold\']',
+            '        )',
+            '        sample.find_adducts(adducts=PARAMS[\'adducts\'])',
+            '        sample.find_ms2()',
+            '        # sample.find_iso()',
+            '        # sample.export_mgf()',
+            '        # sample.export_mztab()',
+            '        # sample.plot_2d(filename="{sample_name}.html")',
+            '        sample.save(str(sample5_path))',
+            '        ',
+            '        # print(f"  Completed {raw_file.name} -> {sample5_path.name}")',
+            '        return str(sample5_path)',
+            '        ',
+            '    except Exception as e:',
+            '        print(f"  ERROR processing {raw_file.name}: {e}")',
+            '        return None',
+            '',
+            '',
+            'def convert_raw_to_sample5(raw_files, output_folder, polarity, num_cores):',
+            '    """Convert raw data files to sample5 format."""',
+            '    import concurrent.futures',
+            '    import os',
+            '    ',
+            '    # Create output directory',
+            '    os.makedirs(output_folder, exist_ok=True)',
+            '    ',
+            '    # Prepare arguments for multiprocessing',
+            '    file_args = [(raw_file, output_folder) for raw_file in raw_files]',
+            '    ',
+            '    # Process files in parallel',
+            '    sample5_files = []',
+            '    with concurrent.futures.ProcessPoolExecutor(max_workers=num_cores) as executor:',
+            '        futures = [executor.submit(process_single_file, args) for args in file_args]',
+            '        ',
+            '        for future in concurrent.futures.as_completed(futures):',
+            '            result = future.result()',
+            '            if result:',
+            '                sample5_files.append(result)',
+            '    ',
+            '    return sample5_files',
+            '',
+            '',
+            'def main():',
+            '    """Main analysis pipeline."""',
+            '    try:',
+            '        print("=" * 70)',
+            f'        print("masster {version} - Automated MS Data Analysis")',
+            '        print("=" * 70)',
+            '        print(f"Source: {PARAMS[\'source\']}")',
+            '        print(f"Output: {PARAMS[\'folder\']}")',
+            '        print(f"Polarity: {PARAMS[\'polarity\']}")',
+            '        print(f"CPU Cores: {PARAMS[\'num_cores\']}")',
+            '        print("=" * 70)',
+            '        ',
+            '        start_time = time.time()',
+            '        ',
+            '        # Step 1: Discover raw data files',
+            '        print("\\nStep 1/7: Discovering raw data files...")',
+            '        raw_files = discover_raw_files(',
+            '            PARAMS[\'source\'],',
+            '            PARAMS[\'file_extensions\'],',
+            '            PARAMS[\'search_subfolders\']',
+            '        )',
+            '        ',
+            '        if not raw_files:',
+            '            print("No raw data files found!")',
+            '            return False',
+            '        ',
+            '        print(f"Found {len(raw_files)} raw data files")',
+            '        for f in raw_files[:5]:  # Show first 5 files',
+            '            print(f"  {f.name}")',
+            '        if len(raw_files) > 5:',
+            '            print(f"  ... and {len(raw_files) - 5} more")',
+            '        ',
+            '        # Step 2: Process raw files',
+            '        print("\\nStep 2/7: Processing raw files...")',
+            '        sample5_files = convert_raw_to_sample5(',
+            '            raw_files,',
+            '            PARAMS[\'folder\'],',
+            '            PARAMS[\'polarity\'],',
+            '            PARAMS[\'num_cores\']',
+            '        )',
+            '        ',
+            '        if not sample5_files:',
+            '            print("No sample5 files were created!")',
+            '            return False',
+            '        ',
+            '        print(f"Successfully processed {len(sample5_files)} files to sample5")',
+            '        ',
+            '        # Step 3: Create and configure study',
+            '        print("\\nStep 3/7: Initializing study...")',
+            '        study = Study(folder=PARAMS[\'folder\'])',
+            '        study.polarity = PARAMS[\'polarity\']',
+            '        study.adducts = PARAMS[\'adducts\']',
+            '        ',
+            '        # Step 4: Add sample5 files to study',
+            '        print("\\nStep 4/7: Adding samples to study...")',
+            '        study.add(str(Path(PARAMS[\'folder\']) / "*.sample5"))',
+            '        ',
+            '        # Step 5: Core processing',
+            '        print("\\nStep 5/7: Processing...")',
+            '        study.align(',
+            '            algorithm=PARAMS[\'alignment_method\'],',
+            '            rt_tol=PARAMS[\'rt_tol\']',
+            '        )',
+            '        ',
+            '        # Merge and create consensus features',
+            '        study.merge(',
+            '            min_samples=PARAMS[\'min_samples_per_feature\'],',
+            '            threads=PARAMS[\'num_cores\'],',
+            '            rt_tol=PARAMS[\'rt_tol\'],',
+            '            mz_tol=PARAMS[\'mz_tol\']',
+            '        )',
+            '        study.find_iso()',
+            '        study.fill(min_samples_rel=0.0)',
+            '        study.integrate()',
+            '        ',
+            '        # Step 6/7: Saving results',
+            '        print("\\nStep 6/7: Saving results...")',
+            '        study.save()',
+            '        study.export_xlsx()',
+            '        study.export_mgf()',
+            '        study.export_mztab()',
+            '        ',
+            '        # Step 7: Plots',
+            '        print("\\nStep 7/7: Exporting plots...")',
+            '        study.plot_consensus_2d(filename="consensus.html")',
+            '        study.plot_consensus_2d(filename="consensus.png")',
+            '        study.plot_alignment(filename="alignment.html")',
+            '        study.plot_alignment(filename="alignment.png")',
+            '        study.plot_pca(filename="pca.html")',
+            '        study.plot_pca(filename="pca.png")',
+            '        study.plot_bpc(filename="bpc.html")',
+            '        study.plot_bpc(filename="bpc.png")',
+            '        study.plot_rt_correction(filename="rt_correction.html")',
+            '        study.plot_rt_correction(filename="rt_correction.png")',
+            '        ',
+            '        # Print summary',
+            '        study.info()',
+            '        total_time = time.time() - start_time',
+            '        print("\\n" + "=" * 70)',
+            '        print("ANALYSIS COMPLETE")',
+            '        print("=" * 70)',
+            '        print(f"Total processing time: {total_time:.1f} seconds ({total_time/60:.1f} minutes)")',
+            '        print(f"Raw files processed: {len(raw_files)}")',
+            '        print(f"Sample5 files created: {len(sample5_files)}")',
+            '        if hasattr(study, "consensus_df"):',
+            '            print(f"Consensus features generated: {len(study.consensus_df)}")',
+            '        print("=" * 70)',
+            '        ',
+            '        return True',
+            '        ',
+            '    except KeyboardInterrupt:',
+            '        print("\\nAnalysis interrupted by user")',
+            '        return False',
+            '    except Exception as e:',
+            '        print(f"Analysis failed with error: {e}")',
+            '        import traceback',
+            '        traceback.print_exc()',
+            '        return False',
+            '',
+            '',
+            'if __name__ == "__main__":',
+            '    success = main()',
+            '    sys.exit(0 if success else 1)',
+        ])
+        return '\n'.join(script_lines)
+def create_script(
+    source: str,
+    folder: str,
+    filename: str = 'run_masster.py',
+    polarity: str = "positive",
+    adducts: Optional[List[str]] = None,
+    params: Optional[wizard_def] = None,
+    num_cores: int = 0,
+    **kwargs
+) -> bool:
+    """
+    Create a standalone analysis script without initializing a Wizard instance.
+    This function generates a Python script that replicates automated processing
+    steps with the specified configuration. The script can be executed independently
+    to perform the same analysis.
+    Parameters:
+        source: Directory containing raw data files
+        folder: Output directory for processed study
+        filename: Filename for the generated script (should end with .py)
+        polarity: Ion polarity mode ("positive" or "negative")
+        adducts: List of adduct specifications (auto-set if None)
+        params: Custom wizard_def parameters (optional)
+        num_cores: Number of CPU cores (0 = auto-detect)
+        **kwargs: Additional parameters to override defaults
+    Returns:
+        True if script was generated successfully, False otherwise
+    Example:
+        >>> from masster.wizard import create_script
+        >>> create_script(
+        ...     source=r'D:\\Data\\raw_files',
+        ...     folder=r'D:\\Data\\output',
+        ...     filename='run_masster.py',
+        ...     polarity='positive'
+        ... )
+    """
+    try:
+        # Create parameters
+        if params is not None:
+            # Use provided params as base
+            wizard_params = params
+            # Update with provided values
+            wizard_params.source = source
+            wizard_params.folder = folder
+            if polarity != "positive":  # Only override if explicitly different
+                wizard_params.polarity = polarity
+            if num_cores > 0:
+                wizard_params.num_cores = num_cores
+            if adducts is not None:
+                wizard_params.adducts = adducts
+        else:
+            # Create new params with provided values
+            wizard_params = wizard_def(
+                source=source,
+                folder=folder,
+                polarity=polarity,
+                num_cores=max(1, int(multiprocessing.cpu_count() * 0.75)) if num_cores <= 0 else num_cores
             )
-            self.logger.info(f"Merging features using {self.params.merge_method} method")
-            self.study.merge(params=merge_params)
-            # Log results
-            num_consensus = len(self.study.consensus_df) if hasattr(self.study, 'consensus_df') else 0
-            self._log_progress(f"Generated {num_consensus} consensus features")
-            # Get study info
-            if hasattr(self.study, 'info'):
-                self.study.info()
-            self._save_checkpoint()
-            return True
-        except Exception as e:
-            self.logger.error(f"Failed to align and merge: {e}")
-            return False
-    def generate_plots(self) -> bool:
-        """
-        Generate visualization plots for the study.
-        Returns:
-            True if plot generation was successful
-        """
-        if not self.params.generate_plots:
-            self._log_progress("Plot generation disabled, skipping...")
-            return True
+            if adducts is not None:
+                wizard_params.adducts = adducts
-        self._log_progress("=== Generating Visualization Plots ===")
-        self.current_step = "generating_plots"
+            # Apply any additional kwargs
+            for key, value in kwargs.items():
+                if hasattr(wizard_params, key):
+                    setattr(wizard_params, key, value)
-        if self.study is None:
-            self.logger.error("Study not available. Complete previous steps first.")
-            return False
+        # Ensure study folder exists
+        study_path = Path(folder)
+        study_path.mkdir(parents=True, exist_ok=True)
-        try:
-            plots_generated = 0
-            # Alignment plot
-            if hasattr(self.study, 'plot_alignment'):
-                alignment_plot = self.study_folder_path / "alignment_plot.html"
-                self.study.plot_alignment(filename=str(alignment_plot))
-                plots_generated += 1
-                self.logger.info(f"Generated alignment plot: {alignment_plot}")
-            # Consensus 2D plot
-            if hasattr(self.study, 'plot_consensus_2d'):
-                consensus_2d_plot = self.study_folder_path / "consensus_2d.html"
-                self.study.plot_consensus_2d(filename=str(consensus_2d_plot))
-                plots_generated += 1
-                self.logger.info(f"Generated consensus 2D plot: {consensus_2d_plot}")
-            # PCA plot
-            if hasattr(self.study, 'plot_pca'):
-                pca_plot = self.study_folder_path / "pca_plot.html"
-                self.study.plot_pca(filename=str(pca_plot))
-                plots_generated += 1
-                self.logger.info(f"Generated PCA plot: {pca_plot}")
-            # Consensus statistics
-            if hasattr(self.study, 'plot_consensus_stats'):
-                stats_plot = self.study_folder_path / "consensus_stats.html"
-                self.study.plot_consensus_stats(filename=str(stats_plot))
-                plots_generated += 1
-                self.logger.info(f"Generated statistics plot: {stats_plot}")
-            self._log_progress(f"Generated {plots_generated} visualization plots")
-            self._save_checkpoint()
-            return True
-        except Exception as e:
-            self.logger.error(f"Failed to generate plots: {e}")
-            return False
-    def export_results(self) -> bool:
-        """
-        Export study results in requested formats.
+        # Create a temporary Wizard instance to generate the script
+        temp_wizard = Wizard(params=wizard_params)
-        Returns:
-            True if export was successful
-        """
-        self._log_progress("=== Exporting Study Results ===")
-        self.current_step = "exporting_results"
+        # Generate the script using the instance method
+        success = temp_wizard.create_script(filename)
-        if self.study is None:
-            self.logger.error("Study not available. Complete previous steps first.")
-            return False
+        return success
-        try:
-            exports_completed = 0
-            # Export consensus features as CSV
-            if "csv" in self.params.export_formats:
-                csv_file = self.study_folder_path / "consensus_features.csv"
-                if hasattr(self.study.consensus_df, 'write_csv'):
-                    self.study.consensus_df.write_csv(str(csv_file))
-                exports_completed += 1
-                self.logger.info(f"Exported CSV: {csv_file}")
-            # Export as Excel
-            if "xlsx" in self.params.export_formats and hasattr(self.study, 'export_xlsx'):
-                xlsx_file = self.study_folder_path / "study_results.xlsx"
-                self.study.export_xlsx(filename=str(xlsx_file))
-                exports_completed += 1
-                self.logger.info(f"Exported Excel: {xlsx_file}")
-            # Export MGF for MS2 spectra
-            if "mgf" in self.params.export_formats and hasattr(self.study, 'export_mgf'):
-                mgf_file = self.study_folder_path / "consensus_ms2.mgf"
-                self.study.export_mgf(filename=str(mgf_file))
-                exports_completed += 1
-                self.logger.info(f"Exported MGF: {mgf_file}")
-            # Export as Parquet for efficient storage
-            if "parquet" in self.params.export_formats and hasattr(self.study, 'export_parquet'):
-                parquet_file = self.study_folder_path / "study_data.parquet"
-                self.study.export_parquet(filename=str(parquet_file))
-                exports_completed += 1
-                self.logger.info(f"Exported Parquet: {parquet_file}")
-            self._log_progress(f"Completed {exports_completed} exports")
-            self._save_checkpoint()
-            return True
-        except Exception as e:
-            self.logger.error(f"Failed to export results: {e}")
-            return False
+    except Exception as e:
+        print(f"Failed to create script: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def execute(
+    source: str,
+    folder: str,
+    filename: str = 'run_masster.py',
+    polarity: str = "positive",
+    adducts: Optional[List[str]] = None,
+    params: Optional[wizard_def] = None,
+    num_cores: int = 0,
+    **kwargs
+) -> bool:
+    """
+    Create and execute a standalone analysis script for automated MS data processing.
+    This function generates a Python script with the same parameters as create_script(),
+    but immediately executes it after creation. Combines script generation and execution
+    in a single step.
+    Parameters:
+        source: Directory containing raw data files
+        folder: Output directory for processed study
+        filename: Filename for the generated script (should end with .py)
+        polarity: Ion polarity mode ("positive" or "negative")
+        adducts: List of adduct specifications (auto-set if None)
+        params: Custom wizard_def parameters (optional)
+        num_cores: Number of CPU cores (0 = auto-detect)
+        **kwargs: Additional parameters to override defaults
+    Returns:
+        True if script was created and executed successfully, False otherwise
+    Example:
+        >>> from masster.wizard import execute
+        >>> execute(
+        ...     source=r'D:\\Data\\raw_files',
+        ...     folder=r'D:\\Data\\output',
+        ...     polarity='positive'
+        ... )
+    """
-    def save_study(self) -> bool:
-        """
-        Save the final study in optimized format.
-        Returns:
-            True if study was saved successfully
-        """
-        self._log_progress("=== Saving Final Study ===")
-        self.current_step = "saving_study"
-        if self.study is None:
-            self.logger.error("Study not available. Complete previous steps first.")
-            return False
+    try:
+        # First, create the script using create_script()
+        script_created = create_script(
+            source=source,
+            folder=folder,
+            filename=filename,
+            polarity=polarity,
+            adducts=adducts,
+            params=params,
+            num_cores=num_cores,
+            **kwargs
+        )
-        try:
-            study_file = self.study_folder_path / "final_study.study5"
-            # Determine optimal save format based on study size
-            num_samples = len(self.study.samples_df)
-            num_features = len(self.study.consensus_df) if hasattr(self.study, 'consensus_df') else 0
-            if self.params.adaptive_compression:
-                # Use compressed format for large studies
-                if num_samples > 50 or num_features > 10000:
-                    self.logger.info(f"Large study detected ({num_samples} samples, {num_features} features) - using compressed format")
-                    self.params.compress_output = True
-                else:
-                    self.logger.info(f"Small study ({num_samples} samples, {num_features} features) - using standard format")
-                    self.params.compress_output = False
-            # Save study
-            if self.params.compress_output and hasattr(self.study, 'save_compressed'):
-                self.study.save_compressed(filename=str(study_file))
-                self.logger.info(f"Saved compressed study: {study_file}")
-            else:
-                self.study.save(filename=str(study_file))
-                self.logger.info(f"Saved study: {study_file}")
-            # Save metadata summary
-            metadata_file = self.study_folder_path / "study_metadata.txt"
-            with open(metadata_file, "w") as f:
-                f.write("Study Processing Summary\n")
-                f.write("========================\n")
-                f.write(f"Processing Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
-                f.write(f"Polarity: {self.polarity}\n")
-                f.write(f"Adducts: {', '.join(self.adducts)}\n")
-                f.write(f"Number of Samples: {num_samples}\n")
-                f.write(f"Number of Consensus Features: {num_features}\n")
-                f.write(f"Successful Files: {len(self.processed_files)}\n")
-                f.write(f"Failed Files: {len(self.failed_files)}\n")
-                f.write(f"RT Tolerance: {self.params.rt_tolerance}s\n")
-                f.write(f"m/z Tolerance: {self.params.mz_tolerance} Da\n")
-                f.write(f"Merge Method: {self.params.merge_method}\n")
-                f.write(f"Processing Time: {self._get_total_processing_time()}\n")
-            self._log_progress(f"Saved study metadata: {metadata_file}")
-            self._save_checkpoint()
-            return True
-        except Exception as e:
-            self.logger.error(f"Failed to save study: {e}")
+        if not script_created:
+            print("Failed to create analysis script")
             return False
-    def cleanup_temp_files(self) -> bool:
-        """
-        Clean up temporary files if requested.
-        Returns:
-            True if cleanup was successful
-        """
-        if not self.params.cleanup_temp_files:
-            return True
-        self._log_progress("=== Cleaning Up Temporary Files ===")
+        # Get the full path to the created script
+        study_path = Path(folder)
+        script_path = study_path / Path(filename).name
-        try:
-            cleaned_count = 0
-            # Remove individual sample plots if study plots were generated
-            if self.params.generate_plots:
-                temp_plots = list(self.study_folder_path.glob("*_2d.html"))
-                for plot_file in temp_plots:
-                    if plot_file.name not in ["alignment_plot.html", "consensus_2d.html", "pca_plot.html"]:
-                        plot_file.unlink()
-                        cleaned_count += 1
-            # Remove checkpoint file
-            if self.checkpoint_file.exists():
-                self.checkpoint_file.unlink()
-                cleaned_count += 1
-            self._log_progress(f"Cleaned up {cleaned_count} temporary files")
-            return True
-        except Exception as e:
-            self.logger.error(f"Failed to cleanup temp files: {e}")
+        if not script_path.exists():
+            print(f"Script file not found: {script_path}")
             return False
-    def run_full_pipeline(self) -> bool:
-        """
-        Run the complete automated processing pipeline.
-        This method executes all processing steps in sequence:
-        1. Convert raw files to sample5 format
-        2. Assemble study from sample5 files
-        3. Align and merge features
-        4. Generate visualization plots
-        5. Export results in requested formats
-        6. Save final study
-        7. Clean up temporary files
-        Returns:
-            True if the entire pipeline completed successfully
-        """
-        self._log_progress("=" * 60)
-        self._log_progress("STARTING AUTOMATED STUDY PROCESSING PIPELINE")
-        self._log_progress("=" * 60)
+        print(f"Executing...")
+        #print("=" * 70)
-        self.start_time = time.time()
-        pipeline_success = True
+        # Execute the script using subprocess with real-time output
+        import subprocess
+        # Run the script with Python, letting it inherit our stdout/stderr
         try:
-            # Step 1: Convert to sample5
-            if not self.convert_to_sample5():
-                self.logger.error("Sample5 conversion failed")
-                return False
-            # Step 2: Assemble study
-            if not self.assemble_study():
-                self.logger.error("Study assembly failed")
-                return False
-            # Step 3: Align and merge
-            if not self.align_and_merge():
-                self.logger.error("Feature alignment and merging failed")
-                return False
-            # Step 4: Generate plots
-            if not self.generate_plots():
-                self.logger.warning("Plot generation failed, continuing...")
-                pipeline_success = False
-            # Step 5: Export results
-            if not self.export_results():
-                self.logger.warning("Result export failed, continuing...")
-                pipeline_success = False
+            # Use subprocess.run for direct output inheritance - no capturing/re-printing
+            result = subprocess.run([
+                sys.executable, str(script_path)
+            ], cwd=str(study_path))
-            # Step 6: Save study
-            if not self.save_study():
-                self.logger.error("Study saving failed")
-                return False
+            return_code = result.returncode
-            # Step 7: Cleanup
-            if not self.cleanup_temp_files():
-                self.logger.warning("Cleanup failed, continuing...")
-            # Final summary
-            total_time = time.time() - self.start_time
-            self._log_progress("=" * 60)
-            self._log_progress("PIPELINE COMPLETED SUCCESSFULLY")
-            self._log_progress(f"Total processing time: {total_time:.1f} seconds ({total_time/60:.1f} minutes)")
-            self._log_progress(f"Files processed: {len(self.processed_files)}")
-            self._log_progress(f"Files failed: {len(self.failed_files)}")
-            if hasattr(self.study, 'consensus_df'):
-                self._log_progress(f"Consensus features: {len(self.study.consensus_df)}")
-            self._log_progress("=" * 60)
-            return pipeline_success
-        except KeyboardInterrupt:
-            self.logger.info("Pipeline interrupted by user")
-            self._save_checkpoint()
-            return False
         except Exception as e:
-            self.logger.error(f"Pipeline failed with unexpected error: {e}")
-            self._save_checkpoint()
+            print(f"Error during script execution: {e}")
             return False
-    def _get_total_processing_time(self) -> str:
-        """Get formatted total processing time."""
-        if self.start_time is None:
-            return "Unknown"
-        total_seconds = time.time() - self.start_time
-        hours = int(total_seconds // 3600)
-        minutes = int((total_seconds % 3600) // 60)
-        seconds = int(total_seconds % 60)
+        success = return_code == 0
-        if hours > 0:
-            return f"{hours}h {minutes}m {seconds}s"
-        elif minutes > 0:
-            return f"{minutes}m {seconds}s"
+        if success:
+            print("=" * 70)
+            print("Script execution completed successfully")
         else:
-            return f"{seconds}s"
-    def get_status(self) -> Dict[str, Any]:
-        """
-        Get current processing status.
-        Returns:
-            Dictionary with current status information
-        """
-        return {
-            "current_step": self.current_step,
-            "processed_files": len(self.processed_files),
-            "failed_files": len(self.failed_files),
-            "study_loaded": self.study is not None,
-            "start_time": self.start_time.isoformat() if self.start_time else None,
-            "processing_time": self._get_total_processing_time(),
-            "parameters": {
-                "data_source": self.params.data_source,
-                "study_folder": self.params.study_folder,
-                "polarity": self.params.polarity,
-                "num_cores": self.params.num_cores,
-                "adducts": self.params.adducts,
-            }
-        }
-    def info(self):
-        """Print comprehensive wizard status information."""
-        status = self.get_status()
-        print("\n" + "=" * 50)
-        print("WIZARD STATUS")
-        print("=" * 50)
-        print(f"Current Step: {status['current_step']}")
-        print(f"Data Source: {self.params.data_source}")
-        print(f"Study Folder: {self.params.study_folder}")
-        print(f"Polarity: {status['parameters']['polarity']}")
-        print(f"CPU Cores: {status['parameters']['num_cores']}")
-        print(f"Adducts: {', '.join(status['parameters']['adducts'])}")
-        print(f"Processing Time: {status['processing_time']}")
-        print(f"Files Processed: {status['processed_files']}")
-        print(f"Files Failed: {status['failed_files']}")
-        print(f"Study Loaded: {status['study_loaded']}")
-        if self.study is not None and hasattr(self.study, 'samples_df'):
-            print(f"Samples in Study: {len(self.study.samples_df)}")
-        if self.study is not None and hasattr(self.study, 'consensus_df'):
-            print(f"Consensus Features: {len(self.study.consensus_df)}")
+            print("=" * 70)
+            print(f"Script execution failed with return code: {return_code}")
+        return success
-        print("=" * 50)
+    except Exception as e:
+        print(f"Failed to execute script: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
-# Export the main classes
-__all__ = ["Wizard", "wizard_def"]
+# Export the main classes and functions
+__all__ = ["Wizard", "wizard_def", "create_script", "execute"]

masster 0.4.19__py3-none-any.whl → 0.4.21__py3-none-any.whl

Potentially problematic release.

masster 0.4.19py3-none-any.whl → 0.4.21py3-none-any.whl