masster 0.4.13__py3-none-any.whl → 0.4.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

@@ -0,0 +1,223 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Example script demonstrating the Wizard class for automated study processing.
4
+
5
+ This script shows how to use the Wizard class to automatically process
6
+ mass spectrometry data from raw files to final study results.
7
+ """
8
+
9
+ from pathlib import Path
10
+ from masster import Wizard, wizard_def
11
+
12
+ def main():
13
+ """Main example function."""
14
+
15
+ # =================================================================
16
+ # EXAMPLE 1: Basic Usage with Minimal Configuration
17
+ # =================================================================
18
+ print("=== Example 1: Basic Wizard Usage ===\n")
19
+
20
+ # Set up paths (adjust these for your data)
21
+ data_source = r"D:\Data\raw_files" # Directory with .wiff, .raw, .mzML files
22
+ study_folder = r"D:\Data\processed_study" # Output directory
23
+
24
+ # Create wizard with basic settings
25
+ wizard = Wizard(
26
+ data_source=data_source,
27
+ study_folder=study_folder,
28
+ polarity="positive", # or "negative"
29
+ num_cores=4
30
+ )
31
+
32
+ # Run the complete pipeline
33
+ success = wizard.run_full_pipeline()
34
+
35
+ if success:
36
+ print("✅ Processing completed successfully!")
37
+ wizard.info() # Print status summary
38
+ else:
39
+ print("❌ Processing failed. Check logs for details.")
40
+
41
+ print("\n" + "="*60 + "\n")
42
+
43
+ # =================================================================
44
+ # EXAMPLE 2: Advanced Configuration with Custom Parameters
45
+ # =================================================================
46
+ print("=== Example 2: Advanced Wizard Configuration ===\n")
47
+
48
+ # Create custom parameters
49
+ params = wizard_def(
50
+ # Core settings
51
+ data_source=data_source,
52
+ study_folder=study_folder + "_advanced",
53
+ polarity="negative",
54
+ num_cores=8,
55
+
56
+ # File discovery settings
57
+ file_extensions=[".wiff", ".raw", ".mzML"],
58
+ search_subfolders=True,
59
+ skip_patterns=["blank", "QC", "test", "solvent"],
60
+
61
+ # Processing parameters
62
+ adducts=["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2"],
63
+ batch_size=4, # Process 4 files at once
64
+ memory_limit_gb=32.0,
65
+
66
+ # Feature detection parameters
67
+ chrom_fwhm=0.15, # Narrower peaks for UHPLC
68
+ noise_threshold=5e4, # Lower noise threshold
69
+ chrom_peak_snr=7.0, # Higher S/N requirement
70
+ tol_ppm=8.0, # Tighter mass tolerance
71
+
72
+ # Study assembly parameters
73
+ rt_tolerance=1.0, # Tighter RT tolerance
74
+ mz_tolerance=0.008, # Tighter m/z tolerance
75
+ min_samples_for_merge=30, # Require feature in at least 30 samples
76
+ merge_method="chunked", # Memory-efficient merging
77
+
78
+ # Output options
79
+ generate_plots=True,
80
+ generate_interactive=True,
81
+ export_formats=["csv", "xlsx", "mgf", "parquet"],
82
+ compress_output=True,
83
+ adaptive_compression=True,
84
+
85
+ # Advanced options
86
+ resume_enabled=True, # Can resume if interrupted
87
+ force_reprocess=False, # Skip already processed files
88
+ backup_enabled=True,
89
+ cleanup_temp_files=True,
90
+ log_level="INFO",
91
+ verbose_progress=True,
92
+ )
93
+
94
+ # Create wizard with custom parameters
95
+ wizard_advanced = Wizard(params=params)
96
+
97
+ # You can also run individual steps for more control
98
+ print("Running step-by-step processing...")
99
+
100
+ # Step 1: Discover files
101
+ files = wizard_advanced.discover_files()
102
+ print(f"Found {len(files)} files for processing")
103
+
104
+ # Step 2: Convert to sample5 (can be resumed if interrupted)
105
+ if wizard_advanced.convert_to_sample5():
106
+ print("✅ Sample5 conversion completed")
107
+
108
+ # Step 3: Assemble study
109
+ if wizard_advanced.assemble_study():
110
+ print("✅ Study assembly completed")
111
+
112
+ # Step 4: Align and merge
113
+ if wizard_advanced.align_and_merge():
114
+ print("✅ Alignment and merging completed")
115
+
116
+ # Step 5: Generate plots
117
+ if wizard_advanced.generate_plots():
118
+ print("✅ Plot generation completed")
119
+
120
+ # Step 6: Export results
121
+ if wizard_advanced.export_results():
122
+ print("✅ Results exported")
123
+
124
+ # Step 7: Save final study
125
+ if wizard_advanced.save_study():
126
+ print("✅ Study saved")
127
+
128
+ # Optional cleanup
129
+ wizard_advanced.cleanup_temp_files()
130
+ print("✅ Cleanup completed")
131
+
132
+ # Print final status
133
+ wizard_advanced.info()
134
+
135
+ print("\n" + "="*60 + "\n")
136
+
137
+ # =================================================================
138
+ # EXAMPLE 3: Resume Interrupted Processing
139
+ # =================================================================
140
+ print("=== Example 3: Resume Capability ===\n")
141
+
142
+ # If processing was interrupted, you can resume by creating a new wizard
143
+ # with the same parameters. It will automatically detect and skip
144
+ # already processed files.
145
+
146
+ resume_wizard = Wizard(
147
+ data_source=data_source,
148
+ study_folder=study_folder + "_resume",
149
+ polarity="positive",
150
+ num_cores=4,
151
+ resume_enabled=True # This is the default
152
+ )
153
+
154
+ # The wizard will automatically load checkpoint and continue from where it left off
155
+ print("Status after loading checkpoint:")
156
+ resume_wizard.info()
157
+
158
+ print("\n" + "="*60 + "\n")
159
+
160
+ # =================================================================
161
+ # EXAMPLE 4: Monitoring and Status
162
+ # =================================================================
163
+ print("=== Example 4: Status Monitoring ===\n")
164
+
165
+ # You can check wizard status at any time
166
+ status = wizard.get_status()
167
+ print("Wizard Status:")
168
+ for key, value in status.items():
169
+ print(f" {key}: {value}")
170
+
171
+ # The wizard maintains comprehensive logs
172
+ log_file = Path(study_folder) / "wizard.log"
173
+ if log_file.exists():
174
+ print(f"\nDetailed logs available at: {log_file}")
175
+
176
+ processing_log = Path(study_folder) / "processing.log"
177
+ if processing_log.exists():
178
+ print(f"Processing summary at: {processing_log}")
179
+
180
+
181
+ def example_batch_different_polarities():
182
+ """Example of processing positive and negative mode data separately."""
183
+
184
+ print("=== Processing Both Polarities ===\n")
185
+
186
+ base_data_source = r"D:\Data\raw_files"
187
+ base_output = r"D:\Data\processed_studies"
188
+
189
+ # Process positive mode
190
+ pos_wizard = Wizard(
191
+ data_source=base_data_source + r"\positive",
192
+ study_folder=base_output + r"\positive_study",
193
+ polarity="positive",
194
+ adducts=["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
195
+ num_cores=6
196
+ )
197
+
198
+ print("Processing positive mode data...")
199
+ pos_success = pos_wizard.run_full_pipeline()
200
+
201
+ # Process negative mode
202
+ neg_wizard = Wizard(
203
+ data_source=base_data_source + r"\negative",
204
+ study_folder=base_output + r"\negative_study",
205
+ polarity="negative",
206
+ adducts=["H-1:-:0.95", "Cl:-:0.05"],
207
+ num_cores=6
208
+ )
209
+
210
+ print("Processing negative mode data...")
211
+ neg_success = neg_wizard.run_full_pipeline()
212
+
213
+ print("\nResults:")
214
+ print(f"Positive mode: {'✅ Success' if pos_success else '❌ Failed'}")
215
+ print(f"Negative mode: {'✅ Success' if neg_success else '❌ Failed'}")
216
+
217
+
218
+ if __name__ == "__main__":
219
+ # Run basic examples
220
+ main()
221
+
222
+ # Uncomment to run polarity-specific processing
223
+ # example_batch_different_polarities()
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple test to verify the wizard module structure works correctly.
4
+ """
5
+
6
+ def test_wizard_module_import():
7
+ """Test that the wizard module can be imported."""
8
+ try:
9
+ # Test direct wizard module import
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ # Add the masster directory to path
14
+ masster_path = Path(__file__).parent.parent
15
+ sys.path.insert(0, str(masster_path))
16
+
17
+ # Import wizard directly from its module
18
+ from wizard import Wizard, wizard_def
19
+
20
+ print("✅ Successfully imported Wizard from wizard module")
21
+ print(f"✅ wizard_def class available: {wizard_def}")
22
+ print(f"✅ Wizard class available: {Wizard}")
23
+
24
+ # Test creating wizard_def instance
25
+ defaults = wizard_def(
26
+ data_source="/test/data",
27
+ study_folder="/test/output",
28
+ polarity="positive"
29
+ )
30
+
31
+ print(f"✅ Created wizard_def instance with polarity: {defaults.polarity}")
32
+ print(f"✅ Default adducts: {defaults.adducts}")
33
+
34
+ return True
35
+
36
+ except Exception as e:
37
+ print(f"❌ Import failed: {e}")
38
+ import traceback
39
+ traceback.print_exc()
40
+ return False
41
+
42
+ if __name__ == "__main__":
43
+ success = test_wizard_module_import()
44
+ print("\n" + "="*50)
45
+ if success:
46
+ print("🎉 WIZARD MODULE STRUCTURE TEST PASSED!")
47
+ else:
48
+ print("❌ WIZARD MODULE STRUCTURE TEST FAILED!")
49
+ print("="*50)
@@ -0,0 +1,285 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script for the Wizard class.
4
+
5
+ This script tests the basic functionality of the Wizard class without
6
+ requiring actual raw data files.
7
+ """
8
+
9
+ import tempfile
10
+ from pathlib import Path
11
+ import sys
12
+
13
+ # Add masster to path if needed
14
+ sys.path.insert(0, str(Path(__file__).parent.parent))
15
+
16
+ from masster import Wizard, wizard_def
17
+
18
+
19
+ def test_wizard_initialization():
20
+ """Test wizard initialization and parameter handling."""
21
+ print("Testing Wizard initialization...")
22
+
23
+ with tempfile.TemporaryDirectory() as temp_dir:
24
+ temp_path = Path(temp_dir)
25
+ data_source = temp_path / "data"
26
+ study_folder = temp_path / "study"
27
+
28
+ # Create directories
29
+ data_source.mkdir()
30
+
31
+ # Test basic initialization
32
+ wizard = Wizard(
33
+ data_source=str(data_source),
34
+ study_folder=str(study_folder),
35
+ polarity="positive",
36
+ num_cores=2
37
+ )
38
+
39
+ assert wizard.polarity == "positive"
40
+ assert wizard.params.num_cores == 2
41
+ assert len(wizard.adducts) > 0 # Should have default adducts
42
+ assert study_folder.exists() # Should create output directory
43
+
44
+ print("✅ Basic initialization works")
45
+
46
+ # Test parameter validation
47
+ try:
48
+ Wizard(
49
+ data_source="", # Empty data source should fail
50
+ study_folder=str(study_folder)
51
+ )
52
+ assert False, "Should have failed with empty data_source"
53
+ except ValueError:
54
+ print("✅ Parameter validation works")
55
+
56
+ # Test custom parameters
57
+ custom_params = wizard_def(
58
+ data_source=str(data_source),
59
+ study_folder=str(study_folder / "custom"),
60
+ polarity="negative",
61
+ num_cores=4,
62
+ adducts=["H-1:-:1.0", "Cl:-:0.1"],
63
+ batch_size=2,
64
+ generate_plots=False
65
+ )
66
+
67
+ custom_wizard = Wizard(params=custom_params)
68
+ assert custom_wizard.polarity == "negative"
69
+ assert custom_wizard.params.batch_size == 2
70
+ assert not custom_wizard.params.generate_plots
71
+
72
+ print("✅ Custom parameters work")
73
+
74
+
75
+ def test_file_discovery():
76
+ """Test file discovery functionality."""
77
+ print("\nTesting file discovery...")
78
+
79
+ with tempfile.TemporaryDirectory() as temp_dir:
80
+ temp_path = Path(temp_dir)
81
+ data_source = temp_path / "data"
82
+ study_folder = temp_path / "study"
83
+
84
+ # Create test directory structure
85
+ data_source.mkdir()
86
+ (data_source / "subdir").mkdir()
87
+
88
+ # Create mock files
89
+ test_files = [
90
+ "sample1.wiff",
91
+ "sample2.raw",
92
+ "sample3.mzML",
93
+ "blank.wiff", # Should be skipped
94
+ "QC_test.raw", # Should be skipped
95
+ "subdir/sample4.wiff",
96
+ ]
97
+
98
+ for filename in test_files:
99
+ file_path = data_source / filename
100
+ file_path.parent.mkdir(parents=True, exist_ok=True)
101
+ file_path.write_text("mock file content")
102
+
103
+ # Create wizard
104
+ wizard = Wizard(
105
+ data_source=str(data_source),
106
+ study_folder=str(study_folder),
107
+ polarity="positive"
108
+ )
109
+
110
+ # Test file discovery
111
+ found_files = wizard.discover_files()
112
+ found_names = [f.name for f in found_files]
113
+
114
+ # Should find sample files but skip blanks and QC
115
+ assert "sample1.wiff" in found_names
116
+ assert "sample2.raw" in found_names
117
+ assert "sample3.mzML" in found_names
118
+ assert "sample4.wiff" in found_names # From subdirectory
119
+ assert "blank.wiff" not in found_names # Should be skipped
120
+ assert "QC_test.raw" not in found_names # Should be skipped
121
+
122
+ print(f"✅ Found {len(found_files)} files, correctly filtered")
123
+
124
+ # Test without subdirectory search
125
+ wizard.params.search_subfolders = False
126
+ found_files_no_sub = wizard.discover_files()
127
+ found_names_no_sub = [f.name for f in found_files_no_sub]
128
+
129
+ assert "sample4.wiff" not in found_names_no_sub # Should not find in subdir
130
+ assert len(found_files_no_sub) < len(found_files)
131
+
132
+ print("✅ Subdirectory search control works")
133
+
134
+
135
+ def test_wizard_status():
136
+ """Test status monitoring and checkpointing."""
137
+ print("\nTesting status monitoring...")
138
+
139
+ with tempfile.TemporaryDirectory() as temp_dir:
140
+ temp_path = Path(temp_dir)
141
+ data_source = temp_path / "data"
142
+ study_folder = temp_path / "study"
143
+
144
+ data_source.mkdir()
145
+
146
+ wizard = Wizard(
147
+ data_source=str(data_source),
148
+ study_folder=str(study_folder),
149
+ polarity="positive"
150
+ )
151
+
152
+ # Test initial status
153
+ status = wizard.get_status()
154
+ assert status["current_step"] == "initialized"
155
+ assert status["processed_files"] == 0
156
+ assert not status["study_loaded"]
157
+
158
+ print("✅ Initial status correct")
159
+
160
+ # Test status update
161
+ wizard.current_step = "converting_to_sample5"
162
+ wizard.processed_files = ["file1.wiff", "file2.raw"]
163
+
164
+ status = wizard.get_status()
165
+ assert status["current_step"] == "converting_to_sample5"
166
+ assert status["processed_files"] == 2
167
+
168
+ print("✅ Status updates work")
169
+
170
+ # Test checkpoint save/load
171
+ wizard._save_checkpoint()
172
+ checkpoint_file = wizard.checkpoint_file
173
+ assert checkpoint_file.exists()
174
+
175
+ print("✅ Checkpoint saving works")
176
+
177
+ # Create new wizard and test checkpoint loading
178
+ new_wizard = Wizard(
179
+ data_source=str(data_source),
180
+ study_folder=str(study_folder),
181
+ polarity="positive",
182
+ resume_enabled=True
183
+ )
184
+
185
+ # Should load from checkpoint
186
+ assert len(new_wizard.processed_files) == 2
187
+ assert new_wizard.current_step == "converting_to_sample5"
188
+
189
+ print("✅ Checkpoint loading works")
190
+
191
+
192
+ def test_defaults_and_validation():
193
+ """Test default parameter classes and validation."""
194
+ print("\nTesting parameter defaults and validation...")
195
+
196
+ # Test wizard_def defaults
197
+ defaults = wizard_def()
198
+
199
+ # Should set polarity-specific adducts
200
+ assert len(defaults.adducts) > 0
201
+
202
+ # Test polarity switching
203
+ neg_defaults = wizard_def(polarity="negative")
204
+ pos_defaults = wizard_def(polarity="positive")
205
+
206
+ # Should have different adducts
207
+ assert neg_defaults.adducts != pos_defaults.adducts
208
+
209
+ print("✅ Polarity-specific defaults work")
210
+
211
+ # Test parameter validation
212
+ defaults = wizard_def(
213
+ data_source="/test/path",
214
+ study_folder="/test/output",
215
+ num_cores=999 # Should be capped to available cores
216
+ )
217
+
218
+ import multiprocessing
219
+ max_cores = multiprocessing.cpu_count()
220
+ assert defaults.num_cores <= max_cores
221
+
222
+ print("✅ Parameter validation works")
223
+
224
+
225
+ def test_logging_setup():
226
+ """Test logging configuration."""
227
+ print("\nTesting logging setup...")
228
+
229
+ with tempfile.TemporaryDirectory() as temp_dir:
230
+ temp_path = Path(temp_dir)
231
+ data_source = temp_path / "data"
232
+ study_folder = temp_path / "study"
233
+
234
+ data_source.mkdir()
235
+
236
+ wizard = Wizard(
237
+ data_source=str(data_source),
238
+ study_folder=str(study_folder),
239
+ polarity="positive",
240
+ log_to_file=True,
241
+ log_level="DEBUG"
242
+ )
243
+
244
+ # Test logging
245
+ wizard._log_progress("Test message")
246
+
247
+ # Check log files exist
248
+ assert wizard.log_file.exists()
249
+
250
+ # Check log content
251
+ log_content = wizard.log_file.read_text()
252
+ assert "Test message" in log_content
253
+
254
+ print("✅ Logging setup works")
255
+
256
+
257
+ def main():
258
+ """Run all tests."""
259
+ print("=" * 50)
260
+ print("WIZARD CLASS TESTS")
261
+ print("=" * 50)
262
+
263
+ try:
264
+ test_wizard_initialization()
265
+ test_file_discovery()
266
+ test_wizard_status()
267
+ test_defaults_and_validation()
268
+ test_logging_setup()
269
+
270
+ print("\n" + "=" * 50)
271
+ print("🎉 ALL TESTS PASSED!")
272
+ print("=" * 50)
273
+
274
+ except Exception as e:
275
+ print(f"\n❌ TEST FAILED: {e}")
276
+ import traceback
277
+ traceback.print_exc()
278
+ return False
279
+
280
+ return True
281
+
282
+
283
+ if __name__ == "__main__":
284
+ success = main()
285
+ sys.exit(0 if success else 1)