PyPI - masster - Versions diffs - 0.5.15__py3-none-any.whl → 0.5.17__py3-none-any.whl - Mend - Supply Chain Defender

masster 0.5.15py3-none-any.whl → 0.5.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (10) hide show

masster/__init__.py +1 -1
masster/_version.py +1 -1
masster/study/plot.py +3 -0
masster/wizard/__init__.py +2 -2
masster/wizard/wizard.py +492 -825
{masster-0.5.15.dist-info → masster-0.5.17.dist-info}/METADATA +1 -3
{masster-0.5.15.dist-info → masster-0.5.17.dist-info}/RECORD +10 -10
{masster-0.5.15.dist-info → masster-0.5.17.dist-info}/WHEEL +0 -0
{masster-0.5.15.dist-info → masster-0.5.17.dist-info}/entry_points.txt +0 -0
{masster-0.5.15.dist-info → masster-0.5.17.dist-info}/licenses/LICENSE +0 -0

masster/wizard/wizard.py CHANGED Viewed

@@ -228,9 +228,17 @@ class Wizard:
     that process raw MS data through the complete pipeline: file discovery, feature
     detection, sample processing, study assembly, alignment, merging, and export.
-    This simplified version focuses on three core functions:
-    - create_scripts(): Generate workflow and interactive analysis scripts
-    - analyze(): Create and run analysis scripts with interactive notebook
+    Core functions:
+    - create_scripts(): Generate standalone analysis scripts
+    - test_only(): Process only one file for parameter validation
+    - test_and_run(): Test with single file, then run full batch if successful
+    - run(): Execute full batch processing on all files
+    Recommended workflow:
+    1. wizard = Wizard(source="raw_data", folder="output")
+    2. wizard.create_scripts()  # Generate analysis scripts
+    3. wizard.test_only()       # Validate with single file
+    4. wizard.run()             # Process all files
     """
     def __init__(
@@ -239,7 +247,7 @@ class Wizard:
         folder: str = "",
         polarity: str = "positive",
         adducts: Optional[List[str]] = None,
-        num_cores: int = 0,
+        num_cores: int = 6,
         **kwargs
     ):
         """
@@ -290,14 +298,6 @@ class Wizard:
         self.folder_path = Path(self.params.folder)
         self.folder_path.mkdir(parents=True, exist_ok=True)
-        # Initialize logger
-        self.logger = MassterLogger(
-            instance_type="wizard",
-            level="INFO",
-            label="Wizard",
-            sink=None
-        )
         # Auto-infer polarity from the first file if not explicitly set by user
         if polarity == "positive" and "polarity" not in kwargs:
             inferred_polarity = self._infer_polarity_from_first_file()
@@ -306,164 +306,6 @@ class Wizard:
                 # Update adducts based on inferred polarity
                 self.params.__post_init__()
-    def _analyze_source_files(self) -> Dict[str, Any]:
-        """
-        Analyze source files to extract metadata: number of files, file type, polarity, and acquisition length.
-        Returns:
-            Dictionary containing:
-            - number_of_files: Total count of data files found
-            - file_types: List of file extensions found
-            - polarity: Detected polarity ("positive" or "negative")
-            - length_minutes: Acquisition length in minutes
-            - first_file: Path to first file analyzed
-        """
-        result = {
-            'number_of_files': 0,
-            'file_types': [],
-            'polarity': 'positive',
-            'length_minutes': 0.0,
-            'first_file': None
-        }
-        try:
-            # Find all data files
-            all_files = []
-            file_types_found = set()
-            for extension in self.params.file_extensions:
-                if self.params.search_subfolders:
-                    pattern = f"**/*{extension}"
-                    files = list(self.source_path.rglob(pattern))
-                else:
-                    pattern = f"*{extension}"
-                    files = list(self.source_path.glob(pattern))
-                if files:
-                    all_files.extend(files)
-                    file_types_found.add(extension)
-            result['number_of_files'] = len(all_files)
-            result['file_types'] = list(file_types_found)
-            if not all_files:
-                return result
-            # Analyze first file for polarity and acquisition length
-            first_file = all_files[0]
-            result['first_file'] = str(first_file)
-            # Extract metadata based on file type
-            if first_file.suffix.lower() == '.wiff':
-                metadata = self._analyze_wiff_file(first_file)
-            elif first_file.suffix.lower() == '.mzml':
-                metadata = self._analyze_mzml_file(first_file)
-            elif first_file.suffix.lower() == '.raw':
-                metadata = self._analyze_raw_file(first_file)
-            else:
-                metadata = {'polarity': 'positive', 'length_minutes': 0.0}
-            result['polarity'] = metadata.get('polarity', 'positive')
-            result['length_minutes'] = metadata.get('length_minutes', 0.0)
-        except Exception as e:
-            self.logger.warning(f"Failed to analyze source files: {e}")
-        return result
-    def _analyze_wiff_file(self, file_path: Path) -> Dict[str, Any]:
-        """Analyze WIFF file to extract polarity and acquisition length."""
-        try:
-            from masster.sample.load import _wiff_to_dict
-            # Extract metadata from WIFF file
-            metadata_df = _wiff_to_dict(str(file_path))
-            result = {'polarity': 'positive', 'length_minutes': 0.0}
-            if not metadata_df.empty:
-                # Get polarity from first experiment
-                if 'polarity' in metadata_df.columns:
-                    first_polarity = metadata_df['polarity'].iloc[0]
-                    # Convert numeric polarity codes to string
-                    if first_polarity == 1 or str(first_polarity).lower() in ['positive', 'pos', '+']:
-                        result['polarity'] = "positive"
-                    elif first_polarity == -1 or str(first_polarity).lower() in ['negative', 'neg', '-']:
-                        result['polarity'] = "negative"
-                # Estimate acquisition length by loading the file briefly
-                # For a rough estimate, we'll load just the scan info
-                from masster.sample import Sample
-                sample = Sample()
-                sample.logger_update(level="ERROR")  # Suppress logs
-                sample.load(str(file_path))
-                if hasattr(sample, 'scans_df') and sample.scans_df is not None:
-                    if not sample.scans_df.is_empty():
-                        rt_values = sample.scans_df.select('rt').to_numpy().flatten()
-                        if len(rt_values) > 0:
-                            # RT is in seconds, convert to minutes
-                            result['length_minutes'] = float(rt_values.max()) / 60.0
-            return result
-        except Exception as e:
-            self.logger.debug(f"Failed to analyze WIFF file {file_path}: {e}")
-            return {'polarity': 'positive', 'length_minutes': 0.0}
-    def _analyze_mzml_file(self, file_path: Path) -> Dict[str, Any]:
-        """Analyze mzML file to extract polarity and acquisition length."""
-        try:
-            from masster.sample import Sample
-            sample = Sample()
-            sample.logger_update(level="ERROR")  # Suppress logs
-            sample.load(str(file_path))
-            result = {'polarity': 'positive', 'length_minutes': 0.0}
-            if hasattr(sample, 'scans_df') and sample.scans_df is not None:
-                if not sample.scans_df.is_empty():
-                    rt_values = sample.scans_df.select('rt').to_numpy().flatten()
-                    if len(rt_values) > 0:
-                        # RT is in seconds, convert to minutes
-                        result['length_minutes'] = float(rt_values.max()) / 60.0
-            # For mzML, polarity detection would require more detailed parsing
-            # For now, use default
-            return result
-        except Exception as e:
-            self.logger.debug(f"Failed to analyze mzML file {file_path}: {e}")
-            return {'polarity': 'positive', 'length_minutes': 0.0}
-    def _analyze_raw_file(self, file_path: Path) -> Dict[str, Any]:
-        """Analyze RAW file to extract polarity and acquisition length."""
-        try:
-            from masster.sample import Sample
-            sample = Sample()
-            sample.logger_update(level="ERROR")  # Suppress logs
-            sample.load(str(file_path))
-            result = {'polarity': 'positive', 'length_minutes': 0.0}
-            if hasattr(sample, 'scans_df') and sample.scans_df is not None:
-                if not sample.scans_df.is_empty():
-                    rt_values = sample.scans_df.select('rt').to_numpy().flatten()
-                    if len(rt_values) > 0:
-                        # RT is in seconds, convert to minutes
-                        result['length_minutes'] = float(rt_values.max()) / 60.0
-            # For RAW files, polarity detection would require more detailed parsing
-            # For now, use default
-            return result
-        except Exception as e:
-            self.logger.debug(f"Failed to analyze RAW file {file_path}: {e}")
-            return {'polarity': 'positive', 'length_minutes': 0.0}
     def _infer_polarity_from_first_file(self) -> str:
         """
         Infer polarity from the first available raw data file.
@@ -538,10 +380,8 @@ class Wizard:
             source_info = self._analyze_source_files()
             # Update wizard parameters based on detected metadata
-            if source_info['polarity'] != 'positive':  # Only update if different from default
+            if source_info.get('polarity') and source_info['polarity'] != 'positive':
                 self.params.polarity = source_info['polarity']
-                # Update adducts based on detected polarity
-                self.params.__post_init__()
             files_created = []
@@ -549,6 +389,9 @@ class Wizard:
             workflow_script_path = self.folder_path / "1_masster_workflow.py"
             workflow_content = self._generate_workflow_script_content(source_info)
+            # Apply test mode modifications
+            workflow_content = self._add_test_mode_support(workflow_content)
             with open(workflow_script_path, 'w', encoding='utf-8') as f:
                 f.write(workflow_content)
             files_created.append(str(workflow_script_path))
@@ -573,7 +416,6 @@ class Wizard:
             }
         except Exception as e:
-            self.logger.error(f"Failed to create scripts: {e}")
             return {
                 "status": "error",
                 "message": f"Failed to create scripts: {e}",
@@ -582,120 +424,109 @@ class Wizard:
                 "source_info": {}
             }
-    def _generate_workflow_script_content(self, source_info: Dict[str, Any]) -> str:
-        """Generate the content for 1_masster_workflow.py script."""
+    def _analyze_source_files(self) -> Dict[str, Any]:
+        """Analyze source files to extract metadata."""
+        result = {
+            "number_of_files": 0,
+            "file_types": [],
+            "polarity": "positive",
+            "length_minutes": 0.0,
+            "first_file": None
+        }
-        # Convert Path objects to strings for JSON serialization
-        params_dict = {}
-        for key, value in self.params.__dict__.items():
-            if key == '_param_metadata':  # Skip metadata in generated script
-                continue
-            if isinstance(value, Path):
-                params_dict[key] = str(value)
-            else:
-                params_dict[key] = value
+        try:
+            # Find raw data files
+            extensions = [".wiff", ".raw", ".mzML"]
+            raw_files = []
+            for ext in extensions:
+                pattern = f"**/*{ext}"
+                files = list(self.source_path.rglob(pattern))
+                if files:
+                    raw_files.extend(files)
+                    if ext not in result["file_types"]:
+                        result["file_types"].append(ext)
+            result["number_of_files"] = len(raw_files)
+            if raw_files:
+                result["first_file"] = str(raw_files[0])
+                # Simple heuristic: assume 30 minutes per file if we can't determine
+                result["length_minutes"] = 30.0
+        except Exception as e:
+            print(f"Warning: Could not analyze source files: {e}")
+        return result
-        # Create readable PARAMS dict with comments including discovered info
-        params_lines = []
-        params_lines.append('# Analysis parameters (auto-detected from source files)')
-        params_lines.append('PARAMS = {')
-        # File Discovery Summary
-        params_lines.append('    # === Source File Analysis ===')
-        params_lines.append(f'    "number_of_files": {source_info.get("number_of_files", 0)},  # Total raw data files found')
-        params_lines.append(f'    "file_types": {source_info.get("file_types", [])!r},  # Detected file extensions')
-        params_lines.append(f'    "length_minutes": {source_info.get("length_minutes", 0.0):.1f},  # Estimated acquisition length per file (minutes)')
-        if source_info.get('first_file'):
-            params_lines.append(f'    "first_file": {source_info["first_file"]!r},  # First file analyzed for metadata')
-        params_lines.append('')
-        # Core Configuration
-        params_lines.append('    # === Core Configuration ===')
-        params_lines.append(f'    "source": {params_dict.get("source", "")!r},  # Directory containing raw data files')
-        params_lines.append(f'    "folder": {params_dict.get("folder", "")!r},  # Output directory for processed study')
-        params_lines.append(f'    "polarity": {params_dict.get("polarity", "positive")!r},  # Ion polarity mode (auto-detected)')
-        params_lines.append(f'    "num_cores": {params_dict.get("num_cores", 4)},  # Number of CPU cores for parallel processing')
-        params_lines.append('')
-        # File Discovery
-        params_lines.append('    # === File Discovery ===')
-        params_lines.append(f'    "file_extensions": {params_dict.get("file_extensions", [".wiff", ".raw", ".mzML"])!r},  # File extensions to search for')
-        params_lines.append(f'    "search_subfolders": {params_dict.get("search_subfolders", True)},  # Whether to search subdirectories recursively')
-        params_lines.append(f'    "skip_patterns": {params_dict.get("skip_patterns", ["blank", "condition"])!r},  # Filename patterns to skip')
-        params_lines.append('')
-        # Processing Parameters - Critical values to review
-        params_lines.append('    # === Processing Parameters (REVIEW THESE VALUES) ===')
-        params_lines.append(f'    "adducts": {params_dict.get("adducts", [])!r},  # Adduct specifications for feature detection and annotation')
-        params_lines.append(f'    "detector_type": {params_dict.get("detector_type", "unknown")!r},  # MS detector type ("orbitrap", "tof", "unknown")')
-        params_lines.append(f'    "noise": {params_dict.get("noise", 50.0)},  # REVIEW: Noise threshold for feature detection. Set to 1e5 for Orbitraps')
-        params_lines.append(f'    "chrom_fwhm": {params_dict.get("chrom_fwhm", 0.5)},  # REVIEW: Chromatographic peak FWHM (seconds)')
-        params_lines.append(f'    "chrom_peak_snr": {params_dict.get("chrom_peak_snr", 5.0)},  # Minimum signal-to-noise ratio for chromatographic peaks')
-        params_lines.append('')
-        # Other parameters...
-        params_lines.append('    # === Alignment & Merging ===')
-        params_lines.append(f'    "rt_tol": {params_dict.get("rt_tol", 5.0)},  # Retention time tolerance for alignment (seconds)')
-        params_lines.append(f'    "mz_tol": {params_dict.get("mz_tol", 0.01)},  # Mass-to-charge ratio tolerance for alignment (Da)')
-        params_lines.append(f'    "alignment_method": {params_dict.get("alignment_method", "kd")!r},  # Algorithm for sample alignment')
-        params_lines.append(f'    "min_samples_per_feature": {params_dict.get("min_samples_per_feature", 1)},  # Minimum samples required per consensus feature')
-        params_lines.append(f'    "merge_method": {params_dict.get("merge_method", "qt")!r},  # Method for merging consensus features')
-        params_lines.append('')
-        # Other params
-        params_lines.append('    # === Sample Processing ===')
-        params_lines.append(f'    "batch_size": {params_dict.get("batch_size", 8)},  # Number of files to process per batch')
-        params_lines.append(f'    "memory_limit_gb": {params_dict.get("memory_limit_gb", 16.0)},  # Memory limit for processing (GB)')
-        params_lines.append('')
-        params_lines.append('    # === Script Options ===')
-        params_lines.append(f'    "resume_enabled": {params_dict.get("resume_enabled", True)},  # Enable automatic resume capability')
-        params_lines.append(f'    "force_reprocess": {params_dict.get("force_reprocess", False)},  # Force reprocessing of existing files')
-        params_lines.append(f'    "cleanup_temp_files": {params_dict.get("cleanup_temp_files", True)},  # Clean up temporary files after processing')
-        params_lines.append('}')
+    def _generate_workflow_script_content(self, source_info: Dict[str, Any]) -> str:
+        """Generate the content for 1_masster_workflow.py script."""
-        # Create script lines
         script_lines = [
             '#!/usr/bin/env python3',
             '"""',
-            'MASSter Workflow Script - Sample Processing',
-            f'Generated by masster wizard v{version}',
-            '',
-            'Source Analysis:',
-            f'  - Files found: {source_info.get("number_of_files", 0)}',
-            f'  - File types: {", ".join(source_info.get("file_types", []))}',
-            f'  - Polarity detected: {source_info.get("polarity", "unknown")}',
-            f'  - Acquisition length: ~{source_info.get("length_minutes", 0.0):.1f} minutes per file',
-            '',
-            'This script processes raw MS data files into sample5 format.',
-            'Review the NOISE and CHROM_FWHM parameters below before running.',
+            'Automated Mass Spectrometry Data Analysis Pipeline',
+            'Generated by masster wizard',
             '"""',
             '',
+            'import os',
             'import sys',
             'import time',
             'from pathlib import Path',
-            'import concurrent.futures',
-            'import os',
             '',
             '# Import masster modules',
-            'from masster.sample import Sample',
+            'from masster.study import Study',
             'from masster import __version__',
             '',
-        ]
-        # Add the formatted PARAMS
-        script_lines.extend(params_lines)
-        # Add the functions
-        script_lines.extend([
+            '# Test mode configuration',
+            'TEST_MODE = os.environ.get("MASSTER_TEST_MODE", "0") == "1"',
+            'TEST_ONLY = os.environ.get("MASSTER_TEST_ONLY", "0") == "1"  # Only run test, don\'t continue to full batch',
+            '',
+            '# Analysis parameters',
+            'PARAMS = {',
+            '    # === Core Configuration ===',
+            f'    "source": {str(self.source_path)!r},  # Directory containing raw data files',
+            f'    "folder": {str(self.folder_path)!r},  # Output directory for processed study',
+            f'    "polarity": {self.params.polarity!r},  # Ion polarity mode ("positive" or "negative")',
+            f'    "num_cores": {self.params.num_cores},  # Number of CPU cores for parallel processing',
+            '',
+            '    # === Test Mode ===',
+            '    "test_mode": TEST_MODE,  # Process only first file for testing',
+            '    "test_only": TEST_ONLY,  # Stop after test, don\'t run full batch',
+            '',
+            '    # === File Discovery ===',
+            f'    "file_extensions": {self.params.file_extensions!r},  # File extensions to search for',
+            f'    "search_subfolders": {self.params.search_subfolders},  # Whether to search subdirectories recursively',
+            f'    "skip_patterns": {self.params.skip_patterns!r},  # Filename patterns to skip',
+            '',
+            '    # === Processing Parameters ===',
+            f'    "adducts": {self.params.adducts!r},  # Adduct specifications for feature detection and annotation',
+            f'    "noise": {self.params.noise},  # Noise threshold for feature detection',
+            f'    "chrom_fwhm": {self.params.chrom_fwhm},  # Chromatographic peak full width at half maximum (seconds)',
+            f'    "chrom_peak_snr": {self.params.chrom_peak_snr},  # Minimum signal-to-noise ratio for chromatographic peaks',
+            '',
+            '    # === Alignment & Merging ===',
+            f'    "rt_tol": {self.params.rt_tolerance},  # Retention time tolerance for alignment (seconds)',
+            f'    "mz_tol": {self.params.mz_max_diff},  # Mass-to-charge ratio tolerance for alignment (Da)',
+            f'    "alignment_method": {self.params.alignment_algorithm!r},  # Algorithm for sample alignment',
+            f'    "min_samples_per_feature": {self.params.min_samples_for_merge},  # Minimum samples required per consensus feature',
+            f'    "merge_method": {self.params.merge_method!r},  # Method for merging consensus features',
+            '',
+            '    # === Sample Processing (used in add_samples_from_folder) ===',
+            f'    "batch_size": {self.params.batch_size},  # Number of files to process per batch',
+            f'    "memory_limit_gb": {self.params.memory_limit_gb},  # Memory limit for processing (GB)',
+            '',
+            '    # === Script Options ===',
+            f'    "resume_enabled": {self.params.resume_enabled},  # Enable automatic resume capability',
+            f'    "force_reprocess": {self.params.force_reprocess},  # Force reprocessing of existing files',
+            f'    "cleanup_temp_files": {self.params.cleanup_temp_files},  # Clean up temporary files after processing',
+            '}',
             '',
             '',
-            'def discover_raw_files(source_folder, file_extensions, search_subfolders=True, skip_patterns=None):',
+            'def discover_raw_files(source_folder, file_extensions, search_subfolders=True):',
             '    """Discover raw data files in the source folder."""',
             '    source_path = Path(source_folder)',
             '    raw_files = []',
-            '    skip_patterns = skip_patterns or []',
             '    ',
             '    for ext in file_extensions:',
             '        if search_subfolders:',
@@ -704,191 +535,189 @@ class Wizard:
             '        else:',
             '            pattern = f"*{ext}"',
             '            files = list(source_path.glob(pattern))',
-            '        ',
-            '        # Filter out files matching skip patterns',
-            '        for file in files:',
-            '            skip_file = False',
-            '            for skip_pattern in skip_patterns:',
-            '                if skip_pattern.lower() in file.name.lower():',
-            '                    skip_file = True',
-            '                    break',
-            '            if not skip_file:',
-            '                raw_files.append(file)',
+            '        raw_files.extend(files)',
             '    ',
             '    return raw_files',
             '',
             '',
             'def process_single_file(args):',
-            '    """Process a single raw file to sample5 format - designed for multiprocessing."""',
-            '    raw_file, output_folder, params = args',
+            '    """Process a single raw file to sample5 format - module level for multiprocessing."""',
+            '    raw_file, output_folder = args',
+            '    from masster.sample import Sample',
             '    ',
             '    try:',
             '        # Create sample5 filename',
             '        sample_name = raw_file.stem',
             '        sample5_path = Path(output_folder) / f"{sample_name}.sample5"',
             '        ',
-            '        # Skip if sample5 already exists and resume is enabled',
-            '        if sample5_path.exists() and params["resume_enabled"]:',
-            '            print(f"    ✓ Skipping {raw_file.name} (sample5 already exists)")',
-            '            return {"status": "skipped", "file": str(sample5_path), "message": "Already exists"}',
+            '        # Skip if sample5 already exists',
+            '        if sample5_path.exists() and not PARAMS["force_reprocess"]:',
+            '            print(f"  Skipping {raw_file.name} (sample5 already exists)")',
+            '            return str(sample5_path)',
             '        ',
-            '        print(f"    🔄 Processing {raw_file.name}...")',
-            '        start_time = time.time()',
+            '        print(f"  Converting {raw_file.name}...")',
             '        ',
             '        # Load and process raw file with full pipeline',
             '        sample = Sample(log_label=sample_name)',
             '        sample.load(filename=str(raw_file))',
             '        sample.find_features(',
-            '            noise=params["noise"],',
-            '            chrom_fwhm=params["chrom_fwhm"],',
-            '            chrom_peak_snr=params["chrom_peak_snr"]',
+            '            noise=PARAMS["noise"],',
+            '            chrom_fwhm=PARAMS["chrom_fwhm"],',
+            '            chrom_peak_snr=PARAMS["chrom_peak_snr"]',
             '        )',
-            '        # sample.find_adducts(adducts=params["adducts"])',
             '        sample.find_ms2()',
-            '        # sample.find_iso()  # Optional - can be uncommented if needed',
+            '        sample.find_iso()',
+            '        # sample.export_mgf()',
+            '        # sample.plot_2d(filename=f"{sample5_path.replace(".sample5", ".html")}")',
             '        sample.save(str(sample5_path))',
             '        ',
-            '        elapsed = time.time() - start_time',
-            '        print(f"    ✅ Completed {raw_file.name} -> {sample5_path.name} ({elapsed:.1f}s)")',
-            '        ',
-            '        return {"status": "success", "file": str(sample5_path), "elapsed": elapsed}',
+            '        # print(f"  Completed {raw_file.name} -> {sample5_path.name}")',
+            '        return str(sample5_path)',
             '        ',
             '    except Exception as e:',
-            '        print(f"    ❌ ERROR processing {raw_file.name}: {e}")',
-            '        return {"status": "error", "file": str(raw_file), "error": str(e)}',
+            '        print(f"  ERROR processing {raw_file.name}: {e}")',
+            '        return None',
             '',
             '',
-            'def convert_raw_to_sample5_parallel(raw_files, output_folder, params):',
-            '    """Convert raw data files to sample5 format with parallel processing and progress tracking."""',
+            'def convert_raw_to_sample5(raw_files, output_folder, polarity, num_cores):',
+            '    """Convert raw data files to sample5 format."""',
             '    import concurrent.futures',
             '    import os',
             '    ',
             '    # Create output directory',
             '    os.makedirs(output_folder, exist_ok=True)',
             '    ',
-            '    print(f"\\n🚀 Processing {len(raw_files)} files using {params[\'num_cores\']} CPU cores...")',
-            '    print("=" * 70)',
-            '    ',
             '    # Prepare arguments for multiprocessing',
-            '    file_args = [(raw_file, output_folder, params) for raw_file in raw_files]',
-            '    ',
-            '    # Process files in parallel with progress tracking',
-            '    results = []',
-            '    successful = 0',
-            '    skipped = 0',
-            '    failed = 0',
-            '    total_elapsed = 0',
+            '    file_args = [(raw_file, output_folder) for raw_file in raw_files]',
             '    ',
-            '    with concurrent.futures.ProcessPoolExecutor(max_workers=params["num_cores"]) as executor:',
-            '        # Submit all jobs',
-            '        future_to_file = {executor.submit(process_single_file, args): args[0] for args in file_args}',
+            '    # Process files in parallel',
+            '    sample5_files = []',
+            '    with concurrent.futures.ProcessPoolExecutor(max_workers=num_cores) as executor:',
+            '        futures = [executor.submit(process_single_file, args) for args in file_args]',
             '        ',
-            '        # Collect results as they complete',
-            '        for i, future in enumerate(concurrent.futures.as_completed(future_to_file), 1):',
+            '        for future in concurrent.futures.as_completed(futures):',
             '            result = future.result()',
-            '            results.append(result)',
-            '            ',
-            '            if result["status"] == "success":',
-            '                successful += 1',
-            '                total_elapsed += result.get("elapsed", 0)',
-            '            elif result["status"] == "skipped":',
-            '                skipped += 1',
-            '            else:',
-            '                failed += 1',
-            '            ',
-            '            # Progress update',
-            '            print(f"\\r    Progress: {i}/{len(raw_files)} files completed ({successful} success, {skipped} skipped, {failed} failed)", end="", flush=True)',
-            '    ',
-            '    print()  # New line after progress',
-            '    print("=" * 70)',
-            '    ',
-            '    # Summary',
-            '    if successful > 0:',
-            '        avg_time = total_elapsed / successful',
-            '        print(f"✅ Successfully processed {successful} files (avg: {avg_time:.1f}s per file)")',
-            '    if skipped > 0:',
-            '        print(f"⏩ Skipped {skipped} files (already exist)")',
-            '    if failed > 0:',
-            '        print(f"❌ Failed to process {failed} files")',
-            '        for result in results:',
-            '            if result["status"] == "error":',
-            '                print(f"    - {Path(result[\'file\']).name}: {result[\'error\']}")',
+            '            if result:',
+            '                sample5_files.append(result)',
             '    ',
-            '    # Return list of successful sample5 files',
-            '    sample5_files = [result["file"] for result in results if result["status"] in ["success", "skipped"]]',
             '    return sample5_files',
             '',
             '',
             'def main():',
-            '    """Main sample processing workflow."""',
+            '    """Main analysis pipeline."""',
             '    try:',
             '        print("=" * 70)',
-            f'        print("MASSter {version} - Sample Processing Workflow")',
+            f'        print("masster {version} - Automated MS Data Analysis")',
             '        print("=" * 70)',
             '        print(f"Source: {PARAMS[\'source\']}")',
             '        print(f"Output: {PARAMS[\'folder\']}")',
-            '        print(f"Polarity: {PARAMS[\'polarity\']} (detected)")',
+            '        print(f"Polarity: {PARAMS[\'polarity\']}")',
             '        print(f"CPU Cores: {PARAMS[\'num_cores\']}")',
             '        print("=" * 70)',
-            '        print("\\n⚙️  IMPORTANT: Review these parameters before processing:")',
-            '        print(f"   NOISE threshold: {PARAMS[\'noise\']} (adjust based on your instrument)")',
-            '        print(f"   CHROM_FWHM: {PARAMS[\'chrom_fwhm\']}s (adjust based on your chromatography)")',
-            '        print("   You can edit these values in the PARAMS section above.")',
-            '        print("=" * 70)',
             '        ',
             '        start_time = time.time()',
             '        ',
             '        # Step 1: Discover raw data files',
-            '        print("\\n📁 Step 1/2: Discovering raw data files...")',
+            '        print("\\nStep 1/7: Discovering raw data files...")',
             '        raw_files = discover_raw_files(',
             '            PARAMS[\'source\'],',
             '            PARAMS[\'file_extensions\'],',
-            '            PARAMS[\'search_subfolders\'],',
-            '            PARAMS[\'skip_patterns\']',
+            '            PARAMS[\'search_subfolders\']',
             '        )',
             '        ',
             '        if not raw_files:',
-            '            print("❌ No raw data files found!")',
+            '            print("No raw data files found!")',
             '            return False',
             '        ',
             '        print(f"Found {len(raw_files)} raw data files")',
-            '        for i, f in enumerate(raw_files[:5]):  # Show first 5 files',
-            '            print(f"  {i+1}. {f.name}")',
+            '        for f in raw_files[:5]:  # Show first 5 files',
+            '            print(f"  {f.name}")',
             '        if len(raw_files) > 5:',
-            '            print(f"  ... and {len(raw_files) - 5} more files")',
+            '            print(f"  ... and {len(raw_files) - 5} more")',
             '        ',
-            '        # Step 2: Process raw files to sample5',
-            '        print("\\n🔄 Step 2/2: Processing raw files to sample5 format...")',
-            '        sample5_files = convert_raw_to_sample5_parallel(',
+            '        # Step 2: Process raw files',
+            '        print("\\nStep 2/7: Processing raw files...")',
+            '        sample5_files = convert_raw_to_sample5(',
             '            raw_files,',
             '            PARAMS[\'folder\'],',
-            '            PARAMS',
+            '            PARAMS[\'polarity\'],',
+            '            PARAMS[\'num_cores\']',
             '        )',
             '        ',
             '        if not sample5_files:',
-            '            print("❌ No sample5 files were created!")',
+            '            print("No sample5 files were created!")',
             '            return False',
             '        ',
-            '        # Summary',
+            '        print(f"Successfully processed {len(sample5_files)} files to sample5")',
+            '        ',
+            '        # Step 3: Create and configure study',
+            '        print("\\nStep 3/7: Initializing study...")',
+            '        study = Study(folder=PARAMS[\'folder\'])',
+            '        study.polarity = PARAMS[\'polarity\']',
+            '        study.adducts = PARAMS[\'adducts\']',
+            '        ',
+            '        # Step 4: Add sample5 files to study',
+            '        print("\\nStep 4/7: Adding samples to study...")',
+            '        study.add(str(Path(PARAMS[\'folder\']) / "*.sample5"))',
+            '        study.features_filter(study.features_select(chrom_coherence=0.1, chrom_prominence_scaled=1))',
+            '        ',
+            '        # Step 5: Core processing',
+            '        print("\\nStep 5/7: Processing...")',
+            '        study.align(',
+            '            algorithm=PARAMS[\'alignment_method\'],',
+            '            rt_tol=PARAMS[\'rt_tol\']',
+            '        )',
+            '        ',
+            '        study.merge(',
+            '            method="qt",',
+            '            min_samples=PARAMS[\'min_samples_per_feature\'],',
+            '            threads=PARAMS[\'num_cores\'],',
+            '            rt_tol=PARAMS[\'rt_tol\']',
+            '        )',
+            '        study.find_iso()',
+            '        study.fill()',
+            '        study.integrate()',
+            '        ',
+            '        # Step 6/7: Saving results',
+            '        print("\\nStep 6/7: Saving results...")',
+            '        study.save()',
+            '        study.export_xlsx()',
+            '        study.export_mgf()',
+            '        study.export_mztab()',
+            '        ',
+            '        # Step 7: Plots',
+            '        print("\\nStep 7/7: Exporting plots...")',
+            '        study.plot_consensus_2d(filename="consensus.html")',
+            '        study.plot_consensus_2d(filename="consensus.png")',
+            '        study.plot_alignment(filename="alignment.html")',
+            '        study.plot_alignment(filename="alignment.png")',
+            '        study.plot_samples_pca(filename="pca.html")',
+            '        study.plot_samples_pca(filename="pca.png")',
+            '        study.plot_bpc(filename="bpc.html")',
+            '        study.plot_bpc(filename="bpc.png")',
+            '        study.plot_rt_correction(filename="rt_correction.html")',
+            '        study.plot_rt_correction(filename="rt_correction.png")',
+            '        ',
+            '        # Print summary',
+            '        study.info()',
             '        total_time = time.time() - start_time',
             '        print("\\n" + "=" * 70)',
-            '        print("🎉 SAMPLE PROCESSING COMPLETE")',
+            '        print("ANALYSIS COMPLETE")',
             '        print("=" * 70)',
-            '        print(f"Processing time: {total_time/60:.1f} minutes")',
-            '        print(f"Raw files found: {len(raw_files)}")',
+            '        print(f"Total processing time: {total_time:.1f} seconds ({total_time/60:.1f} minutes)")',
+            '        print(f"Raw files processed: {len(raw_files)}")',
             '        print(f"Sample5 files created: {len(sample5_files)}")',
-            '        print("\\nNext steps:")',
-            '        print("1. Run the interactive analysis: uv run marimo edit 2_interactive_analysis.py")',
-            '        print("2. Or use the sample5 files in your own analysis scripts")',
+            '        if hasattr(study, "consensus_df"):',
+            '            print(f"Consensus features generated: {len(study.consensus_df)}")',
             '        print("=" * 70)',
             '        ',
             '        return True',
             '        ',
             '    except KeyboardInterrupt:',
-            '        print("\\n❌ Processing interrupted by user")',
+            '        print("\\nAnalysis interrupted by user")',
             '        return False',
             '    except Exception as e:',
-            '        print(f"❌ Processing failed with error: {e}")',
+            '        print(f"Analysis failed with error: {e}")',
             '        import traceback',
             '        traceback.print_exc()',
             '        return False',
@@ -897,115 +726,292 @@ class Wizard:
             'if __name__ == "__main__":',
             '    success = main()',
             '    sys.exit(0 if success else 1)',
-        ])
+        ]
+        return '\n'.join(script_lines)
+    def _generate_interactive_notebook_content(self, source_info: Dict[str, Any]) -> str:
+        """Generate the content for 2_interactive_analysis.py marimo notebook."""
+        notebook_lines = [
+            'import marimo',
+            '',
+            '__generated_with = "0.9.14"',
+            'app = marimo.App(width="medium")',
+            '',
+            '@app.cell',
+            'def __():',
+            '    import marimo as mo',
+            '    return (mo,)',
+            '',
+            '@app.cell',
+            'def __(mo):',
+            '    mo.md(r"""',
+            '    # MASSter Interactive Analysis',
+            '    ',
+            f'    **Source:** {source_info.get("number_of_files", 0)} files detected',
+            f'    **Polarity:** {source_info.get("polarity", "unknown")}',
+            '    ',
+            '    This notebook provides interactive exploration of your processed study.',
+            '    Make sure you have run `python 1_masster_workflow.py` first.',
+            '    """)',
+            '    return ()',
+            '',
+            '@app.cell',
+            'def __():',
+            '    import masster',
+            '    return (masster,)',
+            '',
+            '@app.cell',
+            'def __(masster):',
+            '    study = masster.Study(folder=".")',
+            '    return (study,)',
+            '',
+            '@app.cell',
+            'def __(study):',
+            '    study.info()',
+            '    return ()',
+            '',
+            'if __name__ == "__main__":',
+            '    app.run()',
+        ]
+        return '\n'.join(notebook_lines)
+    def _generate_instructions(self, source_info: Dict[str, Any], files_created: List[str]) -> List[str]:
+        """Generate usage instructions for the created scripts."""
+        instructions = [f"Source analysis: {source_info.get('number_of_files', 0)} files found",
+            f"Polarity detected: {source_info.get('polarity', 'unknown')}",
+            "Files created:"]
+        for file_path in files_created:
+            instructions.append(f"  ✅ {str(Path(file_path).resolve())}")
+        # Find the workflow script name from created files
+        workflow_script_name = "1_masster_workflow.py"
+        for file_path in files_created:
+            if Path(file_path).name == "1_masster_workflow.py":
+                workflow_script_name = Path(file_path).name
+                break
+        instructions.extend([
+            "",
+            "Next steps:",
+            f"1. REVIEW PARAMETERS in {workflow_script_name}:",
+            f"   In particular, verify the NOISE, CHROM_FWHM, and MIN_SAMPLES_FOR_MERGE",
+            "",
+            "2. TEST SINGLE FILE (RECOMMENDED):",
+            f"   wizard.test_only()  # Validate parameters with first file only",
+            "",
+            "3. EXECUTE FULL BATCH:",
+            f"   wizard.run()        # Process all files",
+            f"   # OR: wizard.test_and_run()  # Test first, then run all",
+            f"   # OR: uv run python {workflow_script_name}",
+            "",
+            "4. INTERACTIVE ANALYSIS:",
+            f"   uv run marimo edit {Path('2_interactive_analysis.py').name}",
+            ""]
+        )
+        return instructions
+    def _add_test_mode_support(self, workflow_content: str) -> str:
+        """Add test mode functionality to the generated workflow script."""
+        lines = workflow_content.split('\n')
+        # Insert test mode code after print statements in main function
+        for i, line in enumerate(lines):
+            # Add test mode print after the masster version line
+            if 'print("masster' in line and 'Automated MS Data Analysis")' in line:
+                lines.insert(i + 1, '        if TEST_MODE:')
+                lines.insert(i + 2, '            print("🧪 TEST MODE: Processing single file only")')
+                break
+        # Add mode info after num_cores print
+        for i, line in enumerate(lines):
+            if 'print(f"CPU Cores: {PARAMS[\'num_cores\']}")' in line:
+                lines.insert(i + 1, '        if TEST_MODE:')
+                lines.insert(i + 2, '            print(f"Mode: {\'Test Only\' if TEST_ONLY else \'Test + Full Batch\'}")')
+                break
+        # Add file limitation logic after file listing
+        for i, line in enumerate(lines):
+            if 'print(f"  ... and {len(raw_files) - 5} more")' in line:
+                lines.insert(i + 1, '        ')
+                lines.insert(i + 2, '        # Limit to first file in test mode')
+                lines.insert(i + 3, '        if TEST_MODE:')
+                lines.insert(i + 4, '            raw_files = raw_files[:1]')
+                lines.insert(i + 5, '            print(f"\\n🧪 TEST MODE: Processing only first file: {raw_files[0].name}")')
+                break
+        # Modify num_cores for test mode
+        for i, line in enumerate(lines):
+            if 'PARAMS[\'num_cores\']' in line and 'convert_raw_to_sample5(' in lines[i-2:i+3]:
+                lines[i] = line.replace('PARAMS[\'num_cores\']', 'PARAMS[\'num_cores\'] if not TEST_MODE else 1  # Use single core for test')
+                break
+        # Add test-only exit logic after successful processing
+        for i, line in enumerate(lines):
+            if 'print(f"Successfully processed {len(sample5_files)} files to sample5")' in line:
+                lines.insert(i + 1, '        ')
+                lines.insert(i + 2, '        # Stop here if test-only mode')
+                lines.insert(i + 3, '        if TEST_ONLY:')
+                lines.insert(i + 4, '            print("\\n🧪 TEST ONLY mode: Stopping after successful single file processing")')
+                lines.insert(i + 5, '            print(f"Test file created: {sample5_files[0]}")')
+                lines.insert(i + 6, '            print("\\nTo run full batch, use: wizard.run()")')
+                lines.insert(i + 7, '            total_time = time.time() - start_time')
+                lines.insert(i + 8, '            print(f"\\nTest processing time: {total_time:.1f} seconds")')
+                lines.insert(i + 9, '            return True')
+                break
+        return '\n'.join(lines)
+    def test_and_run(self) -> Dict[str, Any]:
+        """
+        Test the sample processing workflow with a single file, then run full batch.
+        This method runs the 1_masster_workflow.py script in test mode to process
+        the first raw file for validation, then automatically continues with the
+        full batch if the test succeeds. The script must already exist - call
+        create_scripts() first if needed.
+        Returns:
+            Dictionary containing:
+            - status: "success" or "error"
+            - message: Status message
+            - instructions: List of next steps
+        """
+        return self._execute_workflow(test_mode=True)
+    def test_only(self) -> Dict[str, Any]:
+        """
+        Test the sample processing workflow with a single file only.
-        return '\n'.join(script_lines)
+        This method runs the 1_masster_workflow.py script in test-only mode to process
+        only the first raw file and then stops (does not continue to full study processing).
+        The script must already exist - call create_scripts() first if needed.
+        Returns:
+            Dictionary containing:
+            - status: "success" or "error"
+            - message: Status message
+            - instructions: List of next steps
+            - test_file: Path to the processed test file (if successful)
+        """
+        return self._execute_workflow(test_mode=True, test_only=True)
-    def analyze(self) -> Dict[str, Any]:
+    def run(self) -> Dict[str, Any]:
         """
-        Execute the complete analysis workflow.
+        Run the sample processing workflow.
-        This method:
-        1. Checks if 1_masster_workflow.py exists and runs it
-        2. If not, creates scripts first then runs the workflow
-        3. Provides clear feedback about next steps
+        This method runs the 1_masster_workflow.py script to process raw files.
+        The script must already exist - call create_scripts() first if needed.
         Returns:
-            Dictionary containing execution results and instructions
+            Dictionary containing:
+            - status: "success" or "error"
+            - message: Status message
+            - instructions: List of next steps
+        """
+        return self._execute_workflow(test_mode=False)
+    def _execute_workflow(self, test_mode: bool = False, test_only: bool = False) -> Dict[str, Any]:
         """
-        workflow_script = self.folder_path / "1_masster_workflow.py"
+        Execute the workflow script in either test or full mode.
+        Args:
+            test_mode: If True, run in test mode (single file), otherwise full batch
+            test_only: If True, stop after single file test (only used with test_mode=True)
+        """
         try:
+            workflow_script_path = self.folder_path / "1_masster_workflow.py"
             # Check if workflow script exists
-            if workflow_script.exists():
-                print("📋 Found existing workflow script, executing...")
-                return self._execute_workflow_script(workflow_script)
+            if not workflow_script_path.exists():
+                return {
+                    "status": "error",
+                    "message": "Workflow script not found. Please run create_scripts() first.",
+                    "instructions": [
+                        "❌ Missing 1_masster_workflow.py",
+                        "Run: wizard.create_scripts()",
+                        "Then: wizard.run()"
+                    ]
+                }
+            # Setup execution mode
+            if test_only:
+                mode_label = "test-only"
+            elif test_mode:
+                mode_label = "test"
             else:
-                print("📝 Creating analysis scripts...")
-                # Create scripts first
-                result = self.create_scripts()
-                if result["status"] != "success":
-                    return result
-                # Print instructions
-                print("\n" + "="*70)
-                for instruction in result["instructions"]:
-                    print(instruction)
-                print("="*70)
+                mode_label = "full batch"
-                # Ask user if they want to proceed with execution
-                print("\n🤔 Would you like to proceed with sample processing now?")
-                print("   This will execute 1_masster_workflow.py")
-                response = input("   Proceed? [y/N]: ").strip().lower()
+            env = None
+            if test_mode:
+                import os
+                env = os.environ.copy()
+                env['MASSTER_TEST_MODE'] = '1'
+                if test_only:
+                    env['MASSTER_TEST_ONLY'] = '1'
-                if response in ['y', 'yes']:
-                    return self._execute_workflow_script(workflow_script)
-                else:
-                    print("✋ Processing paused. Run the scripts manually when ready.")
-                    return {
-                        "status": "scripts_created",
-                        "message": "Scripts created successfully, execution deferred",
-                        "instructions": result["instructions"],
-                        "files_created": result["files_created"]
-                    }
-        except Exception as e:
-            self.logger.error(f"Analysis failed: {e}")
-            return {
-                "status": "error",
-                "message": f"Analysis failed: {e}",
-                "instructions": [],
-                "files_created": []
-            }
-    def _execute_workflow_script(self, script_path: Path) -> Dict[str, Any]:
-        """Execute the workflow script and return results."""
-        try:
-            print(f"🚀 Executing {script_path.name}...")
+            # Execute the workflow script
+            print(f"🚀 Executing {mode_label} processing workflow...")
+            print(f"📄 Running: {workflow_script_path.name}")
+            print("=" * 60)
             import subprocess
             result = subprocess.run([
-                sys.executable, str(script_path)
-            ], cwd=str(self.folder_path), capture_output=False, text=True)
+                sys.executable, str(workflow_script_path)
+            ], cwd=str(self.folder_path), env=env)
             success = result.returncode == 0
             if success:
-                print("="*70)
-                print("✅ Workflow execution completed successfully!")
-                print("="*70)
-                print("Next step: Run interactive analysis")
-                print("  uv run marimo edit 2_interactive_analysis.py")
-                print("="*70)
+                print("=" * 60)
+                if test_only:
+                    print("✅ Test-only processing completed successfully!")
+                    print("📋 Single file validated - ready for full batch")
+                    print("   wizard.run()")
+                elif test_mode:
+                    print("✅ Test processing completed successfully!")
+                    print("📋 Next step: Run full batch")
+                    print("   wizard.run()")
+                else:
+                    print("✅ Sample processing completed successfully!")
+                    print("📋 Next step: Run interactive analysis")
+                    print("   uv run marimo edit 2_interactive_analysis.py")
+                print("=" * 60)
+                next_step = ("Next: wizard.run()" if test_mode else
+                           "Next: uv run marimo edit 2_interactive_analysis.py")
                 return {
                     "status": "success",
-                    "message": "Workflow completed successfully",
+                    "message": f"{mode_label.capitalize()} processing completed successfully",
                     "instructions": [
-                        "✅ Sample processing completed",
-                        "Next: uv run marimo edit 2_interactive_analysis.py"
-                    ],
-                    "files_created": []
+                        f"✅ {mode_label.capitalize()} processing completed",
+                        next_step
+                    ]
                 }
             else:
                 return {
                     "status": "error",
-                    "message": f"Workflow execution failed with code {result.returncode}",
+                    "message": f"Workflow execution failed with return code {result.returncode}",
                     "instructions": [
                         "❌ Check the error messages above",
                         "Review parameters in 1_masster_workflow.py",
-                        "Try running: python 1_masster_workflow.py"
-                    ],
-                    "files_created": []
+                        f"Try running manually: python {workflow_script_path.name}"
+                    ]
                 }
         except Exception as e:
             return {
                 "status": "error",
                 "message": f"Failed to execute workflow: {e}",
-                "instructions": [],
-                "files_created": []
+                "instructions": [
+                    "❌ Execution failed",
+                    "Check that source files exist and are accessible",
+                    "Verify folder permissions"
+                ]
             }
     def _generate_script_content(self) -> str:
@@ -1313,413 +1319,74 @@ class Wizard:
         return '\n'.join(script_lines)
-    def _generate_notebook_content(self) -> str:
-        """Generate the content for a marimo interactive notebook."""
-        notebook_lines = [
-            'import marimo',
-            '',
-            '__generated_with = "0.9.14"',
-            'app = marimo.App(width="medium")',
-            '',
-            '',
-            '@app.cell',
-            'def __():',
-            '    import marimo as mo',
-            '    return (mo,)',
-            '',
-            '',
-            '@app.cell',
-            'def __(mo):',
-            '    mo.md(r"""',
-            '    # MASSter Interactive Analysis',
-            '    ',
-            '    This notebook provides interactive exploration of your mass spectrometry study results.',
-            '    The study has been processed and is ready for analysis.',
-            '    """)',
-            '',
-            '',
-            '@app.cell',
-            'def __():',
-            '    # Import masster',
-            '    import masster',
-            '    return (masster,)',
-            '',
-            '',
-            '@app.cell',
-            'def __(masster):',
-            '    # Load the processed study',
-            '    study = masster.Study(folder=".")',
-            '    study.load()',
-            '    return (study,)',
-            '',
-            '',
-            '@app.cell',
-            'def __(mo, study):',
-            '    # Display study information',
-            '    mo.md(f"""',
-            '    ## Study Overview',
-            '    ',
-            '    **Samples:** {len(study.samples) if hasattr(study, "samples") else "Not loaded"}',
-            '    ',
-            '    **Features:** {len(study.consensus_df) if hasattr(study, "consensus_df") else "Not available"}',
-            '    ',
-            '    **Polarity:** {study.polarity if hasattr(study, "polarity") else "Unknown"}',
-            '    """)',
-            '',
-            '',
-            '@app.cell',
-            'def __(study):',
-            '    # Print detailed study info',
-            '    study.info()',
-            '',
-            '',
-            '@app.cell',
-            'def __(mo):',
-            '    mo.md(r"""',
-            '    ## Quick Visualizations',
-            '    ',
-            '    Use the cells below to create interactive plots of your data.',
-            '    """)',
-            '',
-            '',
-            '@app.cell',
-            'def __(study):',
-            '    # Generate consensus 2D plot',
-            '    if hasattr(study, "consensus_df") and len(study.consensus_df) > 0:',
-            '        study.plot_consensus_2d(filename="consensus_interactive.html")',
-            '        print("Consensus 2D plot saved as: consensus_interactive.html")',
-            '    else:',
-            '        print("No consensus features available for plotting")',
-            '',
-            '',
-            '@app.cell',
-            'def __(study):',
-            '    # Generate PCA plot',
-            '    if hasattr(study, "samples") and len(study.samples) > 1:',
-            '        study.plot_samples_pca(filename="pca_interactive.html")',
-            '        print("PCA plot saved as: pca_interactive.html")',
-            '    else:',
-            '        print("Not enough samples for PCA analysis")',
-            '',
-            '',
-            '@app.cell',
-            'def __(mo):',
-            '    mo.md(r"""',
-            '    ## Data Export',
-            '    ',
-            '    Export your processed data in various formats.',
-            '    """)',
-            '',
-            '',
-            '@app.cell',
-            'def __(study):',
-            '    # Export options',
-            '    if hasattr(study, "consensus_df"):',
-            '        # Export to Excel',
-            '        study.export_xlsx(filename="study_results.xlsx")',
-            '        print("✓ Results exported to: study_results.xlsx")',
-            '        ',
-            '        # Export to MGF',
-            '        study.export_mgf(filename="study_spectra.mgf")',
-            '        print("✓ Spectra exported to: study_spectra.mgf")',
-            '    else:',
-            '        print("No data available for export")',
-            '',
-            '',
-            '@app.cell',
-            'def __(mo):',
-            '    mo.md(r"""',
-            '    ## Custom Analysis',
-            '    ',
-            '    Add your own analysis code in the cells below.',
-            '    """)',
-            '',
-            '',
-            '@app.cell',
-            'def __(study):',
-            '    # Access consensus features dataframe',
-            '    if hasattr(study, "consensus_df"):',
-            '        df = study.consensus_df',
-            '        print(f"Consensus features shape: {df.shape}")',
-            '        print("\\nFirst 5 features:")',
-            '        print(df.head())',
-            '    return (df,) if "df" in locals() else ()',
-            '',
-            '',
-            '@app.cell',
-            'def __():',
-            '    # Your custom analysis here',
-            '    pass',
-            '',
-            '',
-            'if __name__ == "__main__":',
-            '    app.run()',
-        ]
-        return '\n'.join(notebook_lines)
-def create_analysis(
-    source: str,
-    folder: str,
-    filename: str = 'run_masster.py',
-    polarity: str = "positive",
-    adducts: Optional[List[str]] = None,
-    params: Optional[wizard_def] = None,
-    num_cores: int = 0,
-    **kwargs
-) -> bool:
-    """
-    Create standalone analysis scripts without initializing a Wizard instance.
-    This function generates analysis scripts with the specified configuration.
-    Parameters:
-        source: Directory containing raw data files
-        folder: Output directory for processed study
-        filename: Filename for the generated script (deprecated, will create standard files)
-        polarity: Ion polarity mode ("positive" or "negative")
-        adducts: List of adduct specifications (auto-set if None)
-        params: Custom wizard_def parameters (optional)
-        num_cores: Number of CPU cores (0 = auto-detect)
-        **kwargs: Additional parameters to override defaults
-    Returns:
-        True if scripts were generated successfully, False otherwise
-    Example:
-        >>> from masster.wizard import create_analysis
-        >>> create_analysis(
-        ...     source=r'D:\\Data\\raw_files',
-        ...     folder=r'D:\\Data\\output',
-        ...     polarity='positive'
-        ... )
-    """
-    try:
-        # Create parameters
-        if params is not None:
-            # Use provided params as base
-            wizard_params = params
-            # Update with provided values
-            wizard_params.source = source
-            wizard_params.folder = folder
-            if polarity != "positive":  # Only override if explicitly different
-                wizard_params.polarity = polarity
-            if num_cores > 0:
-                wizard_params.num_cores = num_cores
-            if adducts is not None:
-                wizard_params.adducts = adducts
-        else:
-            # Create new params with provided values
-            wizard_params = wizard_def(
-                source=source,
-                folder=folder,
-                polarity=polarity,
-                num_cores=max(1, int(multiprocessing.cpu_count() * 0.75)) if num_cores <= 0 else num_cores
-            )
-            if adducts is not None:
-                wizard_params.adducts = adducts
-            # Apply any additional kwargs
-            for key, value in kwargs.items():
-                if hasattr(wizard_params, key):
-                    setattr(wizard_params, key, value)
-        # Ensure study folder exists
-        study_path = Path(folder)
-        study_path.mkdir(parents=True, exist_ok=True)
-        # Create a temporary Wizard instance to generate the scripts
-        temp_wizard = Wizard(params=wizard_params)
-        # Generate the scripts using the new method
-        result = temp_wizard.create_scripts()
-        if result["status"] == "success":
-            print("Scripts created successfully!")
-            for instruction in result["instructions"]:
-                print(instruction)
-        return result["status"] == "success"
-    except Exception as e:
-        print(f"Failed to create scripts: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-def analyze(
-    source: str,
-    folder: str,
-    filename: str = 'run_masster.py',
+def create_scripts(
+    source: str = "",
+    folder: str = "",
     polarity: str = "positive",
     adducts: Optional[List[str]] = None,
-    params: Optional[wizard_def] = None,
     num_cores: int = 0,
     **kwargs
-) -> bool:
+) -> Dict[str, Any]:
     """
-    Create and execute a standalone analysis script for automated MS data processing.
+    Create analysis scripts without explicitly instantiating a Wizard.
-    This function generates a Python script with the same parameters as create_analysis(),
-    but immediately executes it after creation. Combines script generation and execution
-    in a single step.
+    This is a convenience function that creates a Wizard instance internally
+    and calls its create_scripts() method.
     Parameters:
         source: Directory containing raw data files
-        folder: Output directory for processed study
-        filename: Filename for the generated script (should end with .py)
+        folder: Output directory for processed study
         polarity: Ion polarity mode ("positive" or "negative")
         adducts: List of adduct specifications (auto-set if None)
-        params: Custom wizard_def parameters (optional)
         num_cores: Number of CPU cores (0 = auto-detect)
-        **kwargs: Additional parameters to override defaults
+        **kwargs: Additional parameters
     Returns:
-        True if script was created and executed successfully, False otherwise
+        Dictionary containing:
+        - status: "success" or "error"
+        - message: Status message
+        - instructions: List of next steps
+        - files_created: List of created file paths
+        - source_info: Metadata about source files
     Example:
-        >>> from masster.wizard import analyze
-        >>> analyze(
+        >>> import masster.wizard
+        >>> result = masster.wizard.create_scripts(
         ...     source=r'D:\\Data\\raw_files',
         ...     folder=r'D:\\Data\\output',
-        ...     polarity='positive'
+        ...     polarity='negative'
         ... )
+        >>> print("Status:", result["status"])
     """
     try:
-        # Create parameters (same logic as create_analysis)
-        if params is not None:
-            wizard_params = params
-            wizard_params.source = source
-            wizard_params.folder = folder
-            if polarity != "positive":
-                wizard_params.polarity = polarity
-            if num_cores > 0:
-                wizard_params.num_cores = num_cores
-            if adducts is not None:
-                wizard_params.adducts = adducts
-        else:
-            wizard_params = wizard_def(
-                source=source,
-                folder=folder,
-                polarity=polarity,
-                num_cores=max(1, int(multiprocessing.cpu_count() * 0.75)) if num_cores <= 0 else num_cores
-            )
-            if adducts is not None:
-                wizard_params.adducts = adducts
-            # Apply any additional kwargs
-            for key, value in kwargs.items():
-                if hasattr(wizard_params, key):
-                    setattr(wizard_params, key, value)
+        # Auto-detect optimal number of cores if not specified
+        if num_cores <= 0:
+            num_cores = max(1, int(multiprocessing.cpu_count() * 0.75))
-        # Create Wizard instance and run analysis
-        wizard = Wizard(params=wizard_params)
-        result = wizard.analyze()
+        # Create Wizard instance
+        wizard = Wizard(
+            source=source,
+            folder=folder,
+            polarity=polarity,
+            adducts=adducts,
+            num_cores=num_cores,
+            **kwargs
+        )
-        # Return success status
-        return result.get("status") in ["success", "scripts_created"]
+        # Call the instance method
+        return wizard.create_scripts()
     except Exception as e:
-        print(f"Failed to execute script: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-    def _generate_interactive_notebook_content(self, source_info: Dict[str, Any]) -> str:
-        """Generate the content for 2_interactive_analysis.py marimo notebook."""
-        notebook_lines = [
-            'import marimo',
-            '',
-            '__generated_with = "0.9.14"',
-            'app = marimo.App(width="medium")',
-            '',
-            '',
-            '@app.cell',
-            'def __():',
-            '    import marimo as mo',
-            '    return (mo,)',
-            '',
-            '',
-            '@app.cell',
-            'def __(mo):',
-            '    mo.md(r"""',
-            '    # MASSter Interactive Analysis',
-            '    ',
-            f'    **Source:** {source_info.get("number_of_files", 0)} files ({", ".join(source_info.get("file_types", []))}) detected',
-            f'    **Polarity:** {source_info.get("polarity", "unknown")} (auto-detected)',
-            f'    **Acquisition length:** ~{source_info.get("length_minutes", 0.0):.1f} minutes per file',
-            '    ',
-            '    This notebook provides interactive exploration of your processed mass spectrometry study.',
-            '    Make sure you have run `python 1_masster_workflow.py` first to create the sample5 files.',
-            '    """)',
-            '',
-            '',
-            '@app.cell',
-            'def __():',
-            '    # Import masster',
-            '    import masster',
-            '    return (masster,)',
-            '',
-            '',
-            '@app.cell',
-            'def __(masster):',
-            '    # Load the study from sample5 files',
-            '    study = masster.Study(folder=".")',
-            '    return (study,)',
-            '',
-            '',
-            '@app.cell',
-            'def __(mo, study):',
-            '    # Display study information',
-            '    study.info()',
-            '    return ()',
-            '',
-            '',
-            'if __name__ == "__main__":',
-            '    app.run()',
-        ]
-        return '\n'.join(notebook_lines)
-    def _generate_instructions(self, source_info: Dict[str, Any], files_created: List[str]) -> List[str]:
-        """Generate usage instructions for the created scripts."""
-        instructions = [
-            "🎯 NEXT STEPS:",
-            "",
-            f"Source analysis completed: {source_info.get('number_of_files', 0)} files found",
-            f"Polarity detected: {source_info.get('polarity', 'unknown')}",
-            f"Estimated processing time: {source_info.get('number_of_files', 0) * source_info.get('length_minutes', 0.0) * 0.1:.1f} minutes",
-            "",
-            "1. REVIEW PARAMETERS:",
-            "   Edit 1_masster_workflow.py and verify these key settings:",
-            "   - NOISE threshold (adjust based on your instrument sensitivity)",
-            "   - CHROM_FWHM (adjust based on your chromatography peak width)",
-            "",
-            "2. EXECUTE SAMPLE PROCESSING:",
-            "   python 1_masster_workflow.py",
-            "   (This will process all raw files to sample5 format)",
-            "",
-            "3. INTERACTIVE ANALYSIS:",
-            "   uv run marimo edit 2_interactive_analysis.py",
-            "   (This opens an interactive notebook for data exploration)",
-            "",
-            "FILES CREATED:"
-        ]
-        for file_path in files_created:
-            instructions.append(f"  ✅ {Path(file_path).name}")
-        return instructions
+        return {
+            "status": "error",
+            "message": f"Failed to create scripts: {e}",
+            "instructions": [],
+            "files_created": [],
+            "source_info": {}
+        }
 # Export the main classes and functions
-__all__ = ["Wizard", "wizard_def", "create_analysis", "analyze"]
+__all__ = ["Wizard", "wizard_def", "create_scripts"]