PyPI - masster - Versions diffs - 0.4.13__py3-none-any.whl → 0.4.16__py3-none-any.whl - Mend

masster 0.4.13py3-none-any.whl → 0.4.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (25) hide show

masster/__init__.py +2 -0
masster/_version.py +1 -1
masster/sample/sample.py +41 -0
masster/study/__init__.py +1 -0
masster/study/defaults/find_consensus_def.py +1 -1
masster/study/defaults/merge_def.py +69 -25
masster/study/h5.py +65 -106
masster/study/id.py +1 -1
masster/study/load.py +11 -6
masster/study/merge.py +1607 -0
masster/study/processing.py +0 -874
masster/study/save.py +1 -1
masster/study/study.py +79 -21
masster/wizard/README.md +373 -0
masster/wizard/__init__.py +11 -0
masster/wizard/example.py +223 -0
masster/wizard/test_structure.py +49 -0
masster/wizard/test_wizard.py +285 -0
masster/wizard/wizard.py +1175 -0
masster/wizard.py +1175 -0
{masster-0.4.13.dist-info → masster-0.4.16.dist-info}/METADATA +1 -1
{masster-0.4.13.dist-info → masster-0.4.16.dist-info}/RECORD +25 -17
{masster-0.4.13.dist-info → masster-0.4.16.dist-info}/WHEEL +0 -0
{masster-0.4.13.dist-info → masster-0.4.16.dist-info}/entry_points.txt +0 -0
{masster-0.4.13.dist-info → masster-0.4.16.dist-info}/licenses/LICENSE +0 -0

masster/wizard/example.py ADDED Viewed

@@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+"""
+Example script demonstrating the Wizard class for automated study processing.
+This script shows how to use the Wizard class to automatically process
+mass spectrometry data from raw files to final study results.
+"""
+from pathlib import Path
+from masster import Wizard, wizard_def
+def main():
+    """Main example function."""
+    # =================================================================
+    # EXAMPLE 1: Basic Usage with Minimal Configuration
+    # =================================================================
+    print("=== Example 1: Basic Wizard Usage ===\n")
+    # Set up paths (adjust these for your data)
+    data_source = r"D:\Data\raw_files"  # Directory with .wiff, .raw, .mzML files
+    study_folder = r"D:\Data\processed_study"  # Output directory
+    # Create wizard with basic settings
+    wizard = Wizard(
+        data_source=data_source,
+        study_folder=study_folder,
+        polarity="positive",  # or "negative"
+        num_cores=4
+    )
+    # Run the complete pipeline
+    success = wizard.run_full_pipeline()
+    if success:
+        print("✅ Processing completed successfully!")
+        wizard.info()  # Print status summary
+    else:
+        print("❌ Processing failed. Check logs for details.")
+    print("\n" + "="*60 + "\n")
+    # =================================================================
+    # EXAMPLE 2: Advanced Configuration with Custom Parameters
+    # =================================================================
+    print("=== Example 2: Advanced Wizard Configuration ===\n")
+    # Create custom parameters
+    params = wizard_def(
+        # Core settings
+        data_source=data_source,
+        study_folder=study_folder + "_advanced",
+        polarity="negative",
+        num_cores=8,
+        # File discovery settings
+        file_extensions=[".wiff", ".raw", ".mzML"],
+        search_subfolders=True,
+        skip_patterns=["blank", "QC", "test", "solvent"],
+        # Processing parameters
+        adducts=["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2"],
+        batch_size=4,  # Process 4 files at once
+        memory_limit_gb=32.0,
+        # Feature detection parameters
+        chrom_fwhm=0.15,  # Narrower peaks for UHPLC
+        noise_threshold=5e4,  # Lower noise threshold
+        chrom_peak_snr=7.0,  # Higher S/N requirement
+        tol_ppm=8.0,  # Tighter mass tolerance
+        # Study assembly parameters
+        rt_tolerance=1.0,  # Tighter RT tolerance
+        mz_tolerance=0.008,  # Tighter m/z tolerance
+        min_samples_for_merge=30,  # Require feature in at least 30 samples
+        merge_method="chunked",  # Memory-efficient merging
+        # Output options
+        generate_plots=True,
+        generate_interactive=True,
+        export_formats=["csv", "xlsx", "mgf", "parquet"],
+        compress_output=True,
+        adaptive_compression=True,
+        # Advanced options
+        resume_enabled=True,  # Can resume if interrupted
+        force_reprocess=False,  # Skip already processed files
+        backup_enabled=True,
+        cleanup_temp_files=True,
+        log_level="INFO",
+        verbose_progress=True,
+    )
+    # Create wizard with custom parameters
+    wizard_advanced = Wizard(params=params)
+    # You can also run individual steps for more control
+    print("Running step-by-step processing...")
+    # Step 1: Discover files
+    files = wizard_advanced.discover_files()
+    print(f"Found {len(files)} files for processing")
+    # Step 2: Convert to sample5 (can be resumed if interrupted)
+    if wizard_advanced.convert_to_sample5():
+        print("✅ Sample5 conversion completed")
+        # Step 3: Assemble study
+        if wizard_advanced.assemble_study():
+            print("✅ Study assembly completed")
+            # Step 4: Align and merge
+            if wizard_advanced.align_and_merge():
+                print("✅ Alignment and merging completed")
+                # Step 5: Generate plots
+                if wizard_advanced.generate_plots():
+                    print("✅ Plot generation completed")
+                # Step 6: Export results
+                if wizard_advanced.export_results():
+                    print("✅ Results exported")
+                # Step 7: Save final study
+                if wizard_advanced.save_study():
+                    print("✅ Study saved")
+                    # Optional cleanup
+                    wizard_advanced.cleanup_temp_files()
+                    print("✅ Cleanup completed")
+    # Print final status
+    wizard_advanced.info()
+    print("\n" + "="*60 + "\n")
+    # =================================================================
+    # EXAMPLE 3: Resume Interrupted Processing
+    # =================================================================
+    print("=== Example 3: Resume Capability ===\n")
+    # If processing was interrupted, you can resume by creating a new wizard
+    # with the same parameters. It will automatically detect and skip
+    # already processed files.
+    resume_wizard = Wizard(
+        data_source=data_source,
+        study_folder=study_folder + "_resume",
+        polarity="positive",
+        num_cores=4,
+        resume_enabled=True  # This is the default
+    )
+    # The wizard will automatically load checkpoint and continue from where it left off
+    print("Status after loading checkpoint:")
+    resume_wizard.info()
+    print("\n" + "="*60 + "\n")
+    # =================================================================
+    # EXAMPLE 4: Monitoring and Status
+    # =================================================================
+    print("=== Example 4: Status Monitoring ===\n")
+    # You can check wizard status at any time
+    status = wizard.get_status()
+    print("Wizard Status:")
+    for key, value in status.items():
+        print(f"  {key}: {value}")
+    # The wizard maintains comprehensive logs
+    log_file = Path(study_folder) / "wizard.log"
+    if log_file.exists():
+        print(f"\nDetailed logs available at: {log_file}")
+    processing_log = Path(study_folder) / "processing.log"
+    if processing_log.exists():
+        print(f"Processing summary at: {processing_log}")
+def example_batch_different_polarities():
+    """Example of processing positive and negative mode data separately."""
+    print("=== Processing Both Polarities ===\n")
+    base_data_source = r"D:\Data\raw_files"
+    base_output = r"D:\Data\processed_studies"
+    # Process positive mode
+    pos_wizard = Wizard(
+        data_source=base_data_source + r"\positive",
+        study_folder=base_output + r"\positive_study",
+        polarity="positive",
+        adducts=["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
+        num_cores=6
+    )
+    print("Processing positive mode data...")
+    pos_success = pos_wizard.run_full_pipeline()
+    # Process negative mode
+    neg_wizard = Wizard(
+        data_source=base_data_source + r"\negative",
+        study_folder=base_output + r"\negative_study",
+        polarity="negative",
+        adducts=["H-1:-:0.95", "Cl:-:0.05"],
+        num_cores=6
+    )
+    print("Processing negative mode data...")
+    neg_success = neg_wizard.run_full_pipeline()
+    print("\nResults:")
+    print(f"Positive mode: {'✅ Success' if pos_success else '❌ Failed'}")
+    print(f"Negative mode: {'✅ Success' if neg_success else '❌ Failed'}")
+if __name__ == "__main__":
+    # Run basic examples
+    main()
+    # Uncomment to run polarity-specific processing
+    # example_batch_different_polarities()

masster/wizard/test_structure.py ADDED Viewed

@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+"""
+Simple test to verify the wizard module structure works correctly.
+"""
+def test_wizard_module_import():
+    """Test that the wizard module can be imported."""
+    try:
+        # Test direct wizard module import
+        import sys
+        from pathlib import Path
+        # Add the masster directory to path
+        masster_path = Path(__file__).parent.parent
+        sys.path.insert(0, str(masster_path))
+        # Import wizard directly from its module
+        from wizard import Wizard, wizard_def
+        print("✅ Successfully imported Wizard from wizard module")
+        print(f"✅ wizard_def class available: {wizard_def}")
+        print(f"✅ Wizard class available: {Wizard}")
+        # Test creating wizard_def instance
+        defaults = wizard_def(
+            data_source="/test/data",
+            study_folder="/test/output",
+            polarity="positive"
+        )
+        print(f"✅ Created wizard_def instance with polarity: {defaults.polarity}")
+        print(f"✅ Default adducts: {defaults.adducts}")
+        return True
+    except Exception as e:
+        print(f"❌ Import failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+if __name__ == "__main__":
+    success = test_wizard_module_import()
+    print("\n" + "="*50)
+    if success:
+        print("🎉 WIZARD MODULE STRUCTURE TEST PASSED!")
+    else:
+        print("❌ WIZARD MODULE STRUCTURE TEST FAILED!")
+    print("="*50)

masster/wizard/test_wizard.py ADDED Viewed

@@ -0,0 +1,285 @@
+#!/usr/bin/env python3
+"""
+Test script for the Wizard class.
+This script tests the basic functionality of the Wizard class without
+requiring actual raw data files.
+"""
+import tempfile
+from pathlib import Path
+import sys
+# Add masster to path if needed
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from masster import Wizard, wizard_def
+def test_wizard_initialization():
+    """Test wizard initialization and parameter handling."""
+    print("Testing Wizard initialization...")
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+        data_source = temp_path / "data"
+        study_folder = temp_path / "study"
+        # Create directories
+        data_source.mkdir()
+        # Test basic initialization
+        wizard = Wizard(
+            data_source=str(data_source),
+            study_folder=str(study_folder),
+            polarity="positive",
+            num_cores=2
+        )
+        assert wizard.polarity == "positive"
+        assert wizard.params.num_cores == 2
+        assert len(wizard.adducts) > 0  # Should have default adducts
+        assert study_folder.exists()  # Should create output directory
+        print("✅ Basic initialization works")
+        # Test parameter validation
+        try:
+            Wizard(
+                data_source="",  # Empty data source should fail
+                study_folder=str(study_folder)
+            )
+            assert False, "Should have failed with empty data_source"
+        except ValueError:
+            print("✅ Parameter validation works")
+        # Test custom parameters
+        custom_params = wizard_def(
+            data_source=str(data_source),
+            study_folder=str(study_folder / "custom"),
+            polarity="negative",
+            num_cores=4,
+            adducts=["H-1:-:1.0", "Cl:-:0.1"],
+            batch_size=2,
+            generate_plots=False
+        )
+        custom_wizard = Wizard(params=custom_params)
+        assert custom_wizard.polarity == "negative"
+        assert custom_wizard.params.batch_size == 2
+        assert not custom_wizard.params.generate_plots
+        print("✅ Custom parameters work")
+def test_file_discovery():
+    """Test file discovery functionality."""
+    print("\nTesting file discovery...")
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+        data_source = temp_path / "data"
+        study_folder = temp_path / "study"
+        # Create test directory structure
+        data_source.mkdir()
+        (data_source / "subdir").mkdir()
+        # Create mock files
+        test_files = [
+            "sample1.wiff",
+            "sample2.raw",
+            "sample3.mzML",
+            "blank.wiff",  # Should be skipped
+            "QC_test.raw",  # Should be skipped
+            "subdir/sample4.wiff",
+        ]
+        for filename in test_files:
+            file_path = data_source / filename
+            file_path.parent.mkdir(parents=True, exist_ok=True)
+            file_path.write_text("mock file content")
+        # Create wizard
+        wizard = Wizard(
+            data_source=str(data_source),
+            study_folder=str(study_folder),
+            polarity="positive"
+        )
+        # Test file discovery
+        found_files = wizard.discover_files()
+        found_names = [f.name for f in found_files]
+        # Should find sample files but skip blanks and QC
+        assert "sample1.wiff" in found_names
+        assert "sample2.raw" in found_names
+        assert "sample3.mzML" in found_names
+        assert "sample4.wiff" in found_names  # From subdirectory
+        assert "blank.wiff" not in found_names  # Should be skipped
+        assert "QC_test.raw" not in found_names  # Should be skipped
+        print(f"✅ Found {len(found_files)} files, correctly filtered")
+        # Test without subdirectory search
+        wizard.params.search_subfolders = False
+        found_files_no_sub = wizard.discover_files()
+        found_names_no_sub = [f.name for f in found_files_no_sub]
+        assert "sample4.wiff" not in found_names_no_sub  # Should not find in subdir
+        assert len(found_files_no_sub) < len(found_files)
+        print("✅ Subdirectory search control works")
+def test_wizard_status():
+    """Test status monitoring and checkpointing."""
+    print("\nTesting status monitoring...")
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+        data_source = temp_path / "data"
+        study_folder = temp_path / "study"
+        data_source.mkdir()
+        wizard = Wizard(
+            data_source=str(data_source),
+            study_folder=str(study_folder),
+            polarity="positive"
+        )
+        # Test initial status
+        status = wizard.get_status()
+        assert status["current_step"] == "initialized"
+        assert status["processed_files"] == 0
+        assert not status["study_loaded"]
+        print("✅ Initial status correct")
+        # Test status update
+        wizard.current_step = "converting_to_sample5"
+        wizard.processed_files = ["file1.wiff", "file2.raw"]
+        status = wizard.get_status()
+        assert status["current_step"] == "converting_to_sample5"
+        assert status["processed_files"] == 2
+        print("✅ Status updates work")
+        # Test checkpoint save/load
+        wizard._save_checkpoint()
+        checkpoint_file = wizard.checkpoint_file
+        assert checkpoint_file.exists()
+        print("✅ Checkpoint saving works")
+        # Create new wizard and test checkpoint loading
+        new_wizard = Wizard(
+            data_source=str(data_source),
+            study_folder=str(study_folder),
+            polarity="positive",
+            resume_enabled=True
+        )
+        # Should load from checkpoint
+        assert len(new_wizard.processed_files) == 2
+        assert new_wizard.current_step == "converting_to_sample5"
+        print("✅ Checkpoint loading works")
+def test_defaults_and_validation():
+    """Test default parameter classes and validation."""
+    print("\nTesting parameter defaults and validation...")
+    # Test wizard_def defaults
+    defaults = wizard_def()
+    # Should set polarity-specific adducts
+    assert len(defaults.adducts) > 0
+    # Test polarity switching
+    neg_defaults = wizard_def(polarity="negative")
+    pos_defaults = wizard_def(polarity="positive")
+    # Should have different adducts
+    assert neg_defaults.adducts != pos_defaults.adducts
+    print("✅ Polarity-specific defaults work")
+    # Test parameter validation
+    defaults = wizard_def(
+        data_source="/test/path",
+        study_folder="/test/output",
+        num_cores=999  # Should be capped to available cores
+    )
+    import multiprocessing
+    max_cores = multiprocessing.cpu_count()
+    assert defaults.num_cores <= max_cores
+    print("✅ Parameter validation works")
+def test_logging_setup():
+    """Test logging configuration."""
+    print("\nTesting logging setup...")
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+        data_source = temp_path / "data"
+        study_folder = temp_path / "study"
+        data_source.mkdir()
+        wizard = Wizard(
+            data_source=str(data_source),
+            study_folder=str(study_folder),
+            polarity="positive",
+            log_to_file=True,
+            log_level="DEBUG"
+        )
+        # Test logging
+        wizard._log_progress("Test message")
+        # Check log files exist
+        assert wizard.log_file.exists()
+        # Check log content
+        log_content = wizard.log_file.read_text()
+        assert "Test message" in log_content
+        print("✅ Logging setup works")
+def main():
+    """Run all tests."""
+    print("=" * 50)
+    print("WIZARD CLASS TESTS")
+    print("=" * 50)
+    try:
+        test_wizard_initialization()
+        test_file_discovery()
+        test_wizard_status()
+        test_defaults_and_validation()
+        test_logging_setup()
+        print("\n" + "=" * 50)
+        print("🎉 ALL TESTS PASSED!")
+        print("=" * 50)
+    except Exception as e:
+        print(f"\n❌ TEST FAILED: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+    return True
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)

masster 0.4.13__py3-none-any.whl → 0.4.16__py3-none-any.whl

Potentially problematic release.

masster 0.4.13py3-none-any.whl → 0.4.16py3-none-any.whl