masster 0.4.13__py3-none-any.whl → 0.4.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/save.py CHANGED
@@ -53,7 +53,7 @@ def save(self, filename=None, add_timestamp=True, compress=False):
53
53
  if hasattr(self, "samples_df") and not self.samples_df.is_empty()
54
54
  else 0
55
55
  )
56
- self.logger.info(
56
+ self.logger.debug(
57
57
  f"Saving study with {sample_count} samples and {feature_count} features to {filename}",
58
58
  )
59
59
 
masster/study/study.py CHANGED
@@ -116,7 +116,11 @@ from masster.study.plot import plot_tic
116
116
  from masster.study.plot import plot_eic
117
117
  from masster.study.plot import plot_rt_correction
118
118
  from masster.study.processing import align
119
- from masster.study.processing import merge
119
+ from masster.study.merge import merge
120
+ from masster.study.merge import _reset_consensus_data
121
+ from masster.study.merge import _extract_consensus_features
122
+ from masster.study.merge import _perform_adduct_grouping
123
+ from masster.study.merge import _finalize_merge
120
124
  from masster.study.processing import integrate
121
125
  from masster.study.processing import find_ms2
122
126
  from masster.study.parameters import store_history
@@ -146,7 +150,6 @@ from masster.study.defaults.align_def import align_defaults
146
150
  from masster.study.defaults.export_def import export_mgf_defaults
147
151
  from masster.study.defaults.fill_chrom_def import fill_chrom_defaults
148
152
  from masster.study.defaults.fill_def import fill_defaults
149
- from masster.study.defaults.find_consensus_def import find_consensus_defaults
150
153
  from masster.study.defaults.find_ms2_def import find_ms2_defaults
151
154
  from masster.study.defaults.integrate_chrom_def import integrate_chrom_defaults
152
155
  from masster.study.defaults.integrate_def import integrate_defaults
@@ -385,7 +388,6 @@ class Study:
385
388
  fill_chrom = fill # Backward compatibility alias
386
389
  fill_single = fill_single
387
390
  fill_chrom_single = fill_single # Backward compatibility alias
388
-
389
391
  # === Data Retrieval and Access ===
390
392
  get_consensus = get_consensus
391
393
  get_chrom = get_chrom
@@ -490,13 +492,18 @@ class Study:
490
492
  _format_adduct_name = _format_adduct_name
491
493
  _parse_element_counts = _parse_element_counts
492
494
 
495
+ # === Merge Helper Methods ===
496
+ _reset_consensus_data = _reset_consensus_data
497
+ _extract_consensus_features = _extract_consensus_features
498
+ _perform_adduct_grouping = _perform_adduct_grouping
499
+ _finalize_merge = _finalize_merge
500
+
493
501
  # === Default Parameters ===
494
502
  study_defaults = study_defaults
495
503
  align_defaults = align_defaults
496
504
  export_mgf_defaults = export_mgf_defaults
497
505
  fill_chrom_defaults = fill_chrom_defaults
498
506
  fill_defaults = fill_defaults
499
- find_consensus_defaults = find_consensus_defaults
500
507
  find_ms2_defaults = find_ms2_defaults
501
508
  integrate_chrom_defaults = integrate_chrom_defaults
502
509
  integrate_defaults = integrate_defaults
@@ -587,6 +594,51 @@ class Study:
587
594
  except Exception as e:
588
595
  self.logger.error(f"Failed to reload current module {current_module}: {e}")
589
596
 
597
+ def __dir__(self):
598
+ """
599
+ Custom __dir__ implementation to hide internal methods starting with '_'
600
+ and backward compatibility aliases from tab completion and dir() calls,
601
+ while keeping them accessible to class methods.
602
+
603
+ Returns:
604
+ list: List of public attribute and method names (excluding internal and deprecated methods)
605
+ """
606
+ # Define backward compatibility aliases to hide
607
+ backward_compatibility_aliases = {
608
+ 'add_folder', # alias for add
609
+ 'find_consensus', # alias for merge
610
+ 'integrate_chrom', # alias for integrate
611
+ 'fill_chrom', # alias for fill
612
+ 'fill_chrom_single', # alias for fill_single
613
+ 'filter_consensus', # alias for consensus_filter
614
+ 'select_consensus', # alias for consensus_select
615
+ 'filter_features', # alias for features_filter
616
+ 'select_features', # alias for features_select
617
+ 'consensus_find', # alias for merge
618
+ }
619
+
620
+ # Get all attributes from the class
621
+ all_attrs = set()
622
+
623
+ # Add attributes from the class and all its bases
624
+ for cls in self.__class__.__mro__:
625
+ all_attrs.update(cls.__dict__.keys())
626
+
627
+ # Add instance attributes
628
+ all_attrs.update(self.__dict__.keys())
629
+
630
+ # Filter out attributes starting with '_' (but keep special methods like __init__, __str__, etc.)
631
+ # Also filter out backward compatibility aliases
632
+ public_attrs = [
633
+ attr for attr in all_attrs
634
+ if not attr.startswith('_') or attr.startswith('__') and attr.endswith('__')
635
+ ]
636
+
637
+ # Remove backward compatibility aliases from the public attributes
638
+ public_attrs = [attr for attr in public_attrs if attr not in backward_compatibility_aliases]
639
+
640
+ return sorted(public_attrs)
641
+
590
642
  def __str__(self):
591
643
  """
592
644
  Returns a string representation of the study.
@@ -632,11 +684,13 @@ class Study:
632
684
  samples count, and various statistics. Shows warning symbols for values
633
685
  that are out of normal range.
634
686
  """
635
- # Cache DataFrame lengths and existence checks
636
- consensus_df_len = (
637
- len(self.consensus_df) if not self.consensus_df.is_empty() else 0
638
- )
639
- samples_df_len = len(self.samples_df) if not self.samples_df.is_empty() else 0
687
+ if self.consensus_df is None or self.consensus_df.is_empty():
688
+ self.consensus_df = pl.DataFrame()
689
+ consensus_df_len = 0
690
+ else:
691
+ consensus_df_len = len(self.consensus_df)
692
+
693
+ samples_df_len = len(self.samples_df) if (self.samples_df is not None and not self.samples_df.is_empty()) else 0
640
694
 
641
695
  # Calculate consensus statistics only if consensus_df exists and has data
642
696
  if consensus_df_len > 0:
@@ -658,7 +712,8 @@ class Study:
658
712
  max_samples = 0
659
713
 
660
714
  # Count only features where 'filled' == False
661
- if not self.features_df.is_empty() and "filled" in self.features_df.columns:
715
+ if (self.features_df is not None and not self.features_df.is_empty() and
716
+ "filled" in self.features_df.columns):
662
717
  unfilled_features_count = self.features_df.filter(
663
718
  ~self.features_df["filled"],
664
719
  ).height
@@ -666,7 +721,8 @@ class Study:
666
721
  unfilled_features_count = 0
667
722
 
668
723
  # Calculate features in consensus vs not in consensus (only for unfilled features)
669
- if not self.features_df.is_empty() and not self.consensus_mapping_df.is_empty():
724
+ if (self.features_df is not None and not self.features_df.is_empty() and
725
+ self.consensus_mapping_df is not None and not self.consensus_mapping_df.is_empty()):
670
726
  # Get unfilled features only
671
727
  unfilled_features = (
672
728
  self.features_df.filter(~self.features_df["filled"])
@@ -720,6 +776,7 @@ class Study:
720
776
  if (
721
777
  consensus_df_len > 0
722
778
  and samples_df_len > 0
779
+ and self.features_df is not None
723
780
  and not self.features_df.is_empty()
724
781
  ):
725
782
  # Ensure matching data types for join keys
@@ -745,7 +802,8 @@ class Study:
745
802
  )
746
803
 
747
804
  # Use more efficient counting - count non-null chroms only for features in consensus mapping
748
- if not self.consensus_mapping_df.is_empty():
805
+ if (self.consensus_mapping_df is not None and
806
+ not self.consensus_mapping_df.is_empty()):
749
807
  non_null_chroms = (
750
808
  self.features_df.join(
751
809
  self.consensus_mapping_df.select("feature_uid"),
@@ -767,16 +825,16 @@ class Study:
767
825
  chrom_completeness = 0
768
826
 
769
827
  # Calculate consensus features with MS2 (count unique consensus_uids with MS2)
770
- if not self.consensus_ms2.is_empty():
828
+ if self.consensus_ms2 is not None and not self.consensus_ms2.is_empty():
771
829
  consensus_with_ms2_count = self.consensus_ms2["consensus_uid"].n_unique()
772
830
  else:
773
831
  consensus_with_ms2_count = 0
774
832
 
775
- if not self.consensus_df.is_empty():
833
+ if self.consensus_df is not None and not self.consensus_df.is_empty():
776
834
  # Compute RT spread using only consensus rows with number_samples >= half the number of samples
777
835
  threshold = (
778
836
  self.consensus_df.select(pl.col("number_samples").max()).item() / 2
779
- if not self.samples_df.is_empty()
837
+ if (self.samples_df is not None and not self.samples_df.is_empty())
780
838
  else 0
781
839
  )
782
840
  filtered = self.consensus_df.filter(pl.col("number_samples") >= threshold)
@@ -803,16 +861,16 @@ class Study:
803
861
 
804
862
  # Total MS2 spectra count
805
863
  total_ms2_count = (
806
- len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
864
+ len(self.consensus_ms2) if (self.consensus_ms2 is not None and not self.consensus_ms2.is_empty()) else 0
807
865
  )
808
866
 
809
867
  # Estimate memory usage
810
868
  memory_usage = (
811
- self.samples_df.estimated_size()
812
- + self.features_df.estimated_size()
813
- + self.consensus_df.estimated_size()
814
- + self.consensus_ms2.estimated_size()
815
- + self.consensus_mapping_df.estimated_size()
869
+ (self.samples_df.estimated_size() if self.samples_df is not None else 0)
870
+ + (self.features_df.estimated_size() if self.features_df is not None else 0)
871
+ + (self.consensus_df.estimated_size() if self.consensus_df is not None else 0)
872
+ + (self.consensus_ms2.estimated_size() if self.consensus_ms2 is not None else 0)
873
+ + (self.consensus_mapping_df.estimated_size() if self.consensus_mapping_df is not None else 0)
816
874
  )
817
875
 
818
876
  # Add warning symbols for out-of-range values
@@ -0,0 +1,373 @@
1
+ # Wizard Class Documentation
2
+
3
+ The `Wizard` class provides comprehensive automation for mass spectrometry data processing, from raw files to final study results. It handles the complete workflow with minimal user intervention while providing intelligent resume capabilities, parallel processing optimization, and adaptive output formats.
4
+
5
+ ## Quick Start
6
+
7
+ ### Basic Usage
8
+
9
+ ```python
10
+ from masster import Wizard
11
+
12
+ # Create wizard with minimal configuration
13
+ wizard = Wizard(
14
+ data_source="./raw_data", # Directory with raw files
15
+ study_folder="./processed", # Output directory
16
+ polarity="positive", # or "negative"
17
+ num_cores=4 # CPU cores to use
18
+ )
19
+
20
+ # Run complete pipeline
21
+ success = wizard.run_full_pipeline()
22
+
23
+ if success:
24
+ wizard.info() # Print summary
25
+ ```
26
+
27
+ ### Advanced Configuration
28
+
29
+ ```python
30
+ from masster import Wizard, wizard_def
31
+
32
+ # Create custom parameters
33
+ params = wizard_def(
34
+ data_source="./raw_data",
35
+ study_folder="./processed_advanced",
36
+ polarity="negative",
37
+ num_cores=8,
38
+
39
+ # File discovery
40
+ file_extensions=[".wiff", ".raw", ".mzML"],
41
+ search_subfolders=True,
42
+ skip_patterns=["blank", "QC", "test"],
43
+
44
+ # Processing parameters
45
+ adducts=["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2"],
46
+ chrom_fwhm=0.15,
47
+ noise_threshold=5e4,
48
+
49
+ # Study assembly
50
+ rt_tolerance=1.0,
51
+ mz_tolerance=0.008,
52
+ min_samples_for_merge=30,
53
+
54
+ # Output options
55
+ export_formats=["csv", "xlsx", "mgf", "parquet"],
56
+ generate_plots=True,
57
+ compress_output=True,
58
+ )
59
+
60
+ wizard = Wizard(params=params)
61
+ wizard.run_full_pipeline()
62
+ ```
63
+
64
+ ## Key Features
65
+
66
+ ### 🔄 Automated Pipeline
67
+ - **Raw Data Discovery**: Automatically finds and validates raw MS files
68
+ - **Batch Conversion**: Parallel conversion to sample5 format with optimized parameters
69
+ - **Study Assembly**: Creates study from processed samples with quality filtering
70
+ - **Feature Alignment**: Cross-sample alignment using configurable algorithms
71
+ - **Consensus Generation**: Merges aligned features with statistical validation
72
+ - **Results Export**: Multiple output formats for downstream analysis
73
+
74
+ ### 💾 Intelligent Resume
75
+ - **Checkpoint System**: Automatically saves progress at key points
76
+ - **File Tracking**: Remembers which files have been processed successfully
77
+ - **Smart Recovery**: Resumes from last successful step after interruption
78
+ - **Validation**: Verifies existing outputs before skipping
79
+
80
+ ### ⚡ Performance Optimization
81
+ - **Parallel Processing**: Utilizes multiple CPU cores efficiently
82
+ - **Memory Management**: Adaptive batch sizing based on available memory
83
+ - **Process Isolation**: Prevents memory leaks in long-running jobs
84
+ - **Adaptive Compression**: Optimizes output format based on study size
85
+
86
+ ### 📊 Comprehensive Logging
87
+ - **Progress Tracking**: Real-time status updates with time estimates
88
+ - **Detailed Logs**: Complete processing history saved to files
89
+ - **Error Reporting**: Clear error messages with recovery suggestions
90
+ - **Performance Metrics**: Processing times and resource usage statistics
91
+
92
+ ## Pipeline Steps
93
+
94
+ ### 1. File Discovery
95
+ - Searches for raw MS files (`.wiff`, `.raw`, `.mzML`, `.d`)
96
+ - Applies skip patterns to exclude unwanted files
97
+ - Validates file integrity and accessibility
98
+ - Reports file sizes and estimates processing time
99
+
100
+ ### 2. Sample5 Conversion
101
+ - **Feature Detection**: Two-pass algorithm with configurable parameters
102
+ - **Adduct Detection**: Automated adduct grouping based on polarity
103
+ - **MS2 Linking**: Associates fragmentation spectra with features
104
+ - **Quality Control**: Validates outputs and reports statistics
105
+ - **Parallel Processing**: Utilizes multiple CPU cores with batch optimization
106
+
107
+ ### 3. Study Assembly
108
+ - **Sample Loading**: Imports all processed sample5 files
109
+ - **Quality Filtering**: Removes low-quality features based on coherence/prominence
110
+ - **Metadata Organization**: Organizes sample information and experimental design
111
+ - **Memory Optimization**: Efficient data structures for large studies
112
+
113
+ ### 4. Feature Alignment
114
+ - **RT Alignment**: Corrects retention time shifts between samples
115
+ - **Mass Alignment**: Accounts for mass calibration differences
116
+ - **Algorithm Selection**: Supports KD-tree, QT-clustering, and chunked methods
117
+ - **Validation**: Reports alignment statistics and quality metrics
118
+
119
+ ### 5. Consensus Generation
120
+ - **Feature Merging**: Groups aligned features into consensus features
121
+ - **Statistical Validation**: Applies minimum sample requirements
122
+ - **Gap Filling**: Extracts chromatograms for missing values
123
+ - **MS2 Integration**: Links consensus features to MS2 spectra
124
+
125
+ ### 6. Visualization & Export
126
+ - **Interactive Plots**: 2D feature maps, PCA plots, alignment visualizations
127
+ - **Multiple Formats**: CSV, Excel, MGF, Parquet exports
128
+ - **Study Archival**: Compressed study5 format for long-term storage
129
+ - **Metadata Export**: Complete processing parameters and statistics
130
+
131
+ ## Configuration Options
132
+
133
+ ### Core Parameters
134
+
135
+ | Parameter | Type | Default | Description |
136
+ |-----------|------|---------|-------------|
137
+ | `data_source` | str | **required** | Directory containing raw data files |
138
+ | `study_folder` | str | **required** | Output directory for processed study |
139
+ | `polarity` | str | `"positive"` | Ion polarity mode (`"positive"` or `"negative"`) |
140
+ | `num_cores` | int | `4` | Number of CPU cores for parallel processing |
141
+ | `adducts` | List[str] | auto-set | Adduct specifications (set based on polarity) |
142
+
143
+ ### File Discovery
144
+
145
+ | Parameter | Type | Default | Description |
146
+ |-----------|------|---------|-------------|
147
+ | `file_extensions` | List[str] | `[".wiff", ".raw", ".mzML", ".d"]` | File types to search for |
148
+ | `search_subfolders` | bool | `True` | Search subdirectories recursively |
149
+ | `skip_patterns` | List[str] | `["blank", "QC", "test"]` | Filename patterns to skip |
150
+ | `max_file_size_gb` | float | `4.0` | Maximum file size warning threshold |
151
+
152
+ ### Processing Parameters
153
+
154
+ | Parameter | Type | Default | Description |
155
+ |-----------|------|---------|-------------|
156
+ | `batch_size` | int | `8` | Files processed per batch |
157
+ | `memory_limit_gb` | float | `16.0` | Memory usage limit |
158
+ | `chrom_fwhm` | float | `0.2` | Expected chromatographic peak width (s) |
159
+ | `noise_threshold` | float | `1e5` | Intensity threshold for peak detection |
160
+ | `chrom_peak_snr` | float | `5.0` | Signal-to-noise ratio requirement |
161
+ | `tol_ppm` | float | `10.0` | Mass tolerance (ppm) |
162
+
163
+ ### Study Assembly
164
+
165
+ | Parameter | Type | Default | Description |
166
+ |-----------|------|---------|-------------|
167
+ | `rt_tolerance` | float | `1.5` | RT tolerance for alignment (seconds) |
168
+ | `mz_tolerance` | float | `0.01` | m/z tolerance for alignment (Da) |
169
+ | `alignment_algorithm` | str | `"kd"` | Alignment algorithm (`"kd"`, `"qt"`, `"chunked"`) |
170
+ | `merge_method` | str | `"chunked"` | Merge algorithm for consensus generation |
171
+ | `min_samples_for_merge` | int | `50` | Minimum samples required for consensus |
172
+
173
+ ### Output & Logging
174
+
175
+ | Parameter | Type | Default | Description |
176
+ |-----------|------|---------|-------------|
177
+ | `generate_plots` | bool | `True` | Generate visualization plots |
178
+ | `export_formats` | List[str] | `["csv", "mgf", "xlsx"]` | Output formats to generate |
179
+ | `compress_output` | bool | `True` | Compress final study file |
180
+ | `adaptive_compression` | bool | `True` | Adapt compression based on study size |
181
+ | `log_level` | str | `"INFO"` | Logging detail level |
182
+ | `log_to_file` | bool | `True` | Save logs to file |
183
+
184
+ ### Resume & Recovery
185
+
186
+ | Parameter | Type | Default | Description |
187
+ |-----------|------|---------|-------------|
188
+ | `resume_enabled` | bool | `True` | Enable automatic resume capability |
189
+ | `force_reprocess` | bool | `False` | Force reprocessing of existing files |
190
+ | `backup_enabled` | bool | `True` | Create backups of intermediate results |
191
+ | `checkpoint_interval` | int | `10` | Save progress every N files |
192
+ | `cleanup_temp_files` | bool | `True` | Remove temporary files after completion |
193
+
194
+ ## Methods
195
+
196
+ ### Pipeline Control
197
+
198
+ #### `run_full_pipeline() -> bool`
199
+ Executes the complete processing pipeline in sequence. Returns `True` if successful.
200
+
201
+ #### Individual Steps
202
+ - `discover_files() -> List[Path]` - Find raw data files
203
+ - `convert_to_sample5(file_list=None) -> bool` - Convert to sample5 format
204
+ - `assemble_study() -> bool` - Create study from sample5 files
205
+ - `align_and_merge() -> bool` - Perform feature alignment and merging
206
+ - `generate_plots() -> bool` - Create visualization plots
207
+ - `export_results() -> bool` - Export in requested formats
208
+ - `save_study() -> bool` - Save final study file
209
+ - `cleanup_temp_files() -> bool` - Remove temporary files
210
+
211
+ ### Status & Information
212
+
213
+ #### `info()`
214
+ Prints comprehensive wizard status including progress, timings, and results.
215
+
216
+ #### `get_status() -> Dict[str, Any]`
217
+ Returns detailed status dictionary with current step, processed files, timing, and parameters.
218
+
219
+ ## Error Handling & Recovery
220
+
221
+ ### Common Issues and Solutions
222
+
223
+ **Memory Errors**
224
+ - Reduce `batch_size` parameter
225
+ - Increase `memory_limit_gb` if available
226
+ - Use `merge_method="chunked"` for large studies
227
+ - Enable `cleanup_temp_files=True`
228
+
229
+ **File Access Errors**
230
+ - Check file permissions on source and destination folders
231
+ - Verify network connectivity for remote file systems
232
+ - Ensure sufficient disk space in output directory
233
+ - Close any applications that might lock files
234
+
235
+ **Processing Failures**
236
+ - Check individual file integrity
237
+ - Review `skip_patterns` to exclude problematic files
238
+ - Examine detailed logs in `wizard.log` and `processing.log`
239
+ - Try processing failed files individually for debugging
240
+
241
+ **Resume Issues**
242
+ - Delete `wizard_checkpoint.json` to force fresh start
243
+ - Verify output directory permissions
244
+ - Check for corrupted intermediate files
245
+
246
+ ### Validation and Quality Control
247
+
248
+ The Wizard includes built-in validation at each step:
249
+
250
+ - **File Validation**: Checks file accessibility and format compatibility
251
+ - **Processing Validation**: Verifies sample5 outputs can be loaded
252
+ - **Study Validation**: Ensures study assembly completed successfully
253
+ - **Alignment Validation**: Reports alignment statistics and warnings
254
+ - **Export Validation**: Confirms all requested outputs were created
255
+
256
+ ## Performance Guidelines
257
+
258
+ ### System Requirements
259
+ - **Minimum**: 4 CPU cores, 8 GB RAM
260
+ - **Recommended**: 8+ CPU cores, 16+ GB RAM
261
+ - **Large Studies**: 16+ CPU cores, 32+ GB RAM
262
+ - **Storage**: SSD recommended, ~2-3x raw data size free space
263
+
264
+ ### Optimization Tips
265
+
266
+ **For Small Studies (< 50 samples)**
267
+ - Use `num_cores = 4-6`
268
+ - Set `batch_size = 4-8`
269
+ - Use `merge_method = "kd"`
270
+ - Enable all export formats
271
+
272
+ **For Large Studies (100+ samples)**
273
+ - Use `num_cores = 8-16`
274
+ - Set `batch_size = 16-32`
275
+ - Use `merge_method = "chunked"`
276
+ - Enable `adaptive_compression = True`
277
+ - Consider processing in polarity-specific batches
278
+
279
+ **For Very Large Studies (500+ samples)**
280
+ - Process positive/negative modes separately
281
+ - Use `memory_limit_gb = 64+`
282
+ - Set `checkpoint_interval = 50`
283
+ - Enable `cleanup_temp_files = True`
284
+ - Consider cluster/cloud processing
285
+
286
+ ## Integration Examples
287
+
288
+ ### With Existing Workflows
289
+
290
+ ```python
291
+ # Integration with custom preprocessing
292
+ wizard = Wizard(data_source="./preprocessed", ...)
293
+
294
+ # Skip conversion if already done
295
+ if not wizard.study_folder_path.glob("*.sample5"):
296
+ wizard.convert_to_sample5()
297
+
298
+ # Continue with study-level processing
299
+ wizard.assemble_study()
300
+ wizard.align_and_merge()
301
+ wizard.export_results()
302
+ ```
303
+
304
+ ### Batch Processing Multiple Studies
305
+
306
+ ```python
307
+ studies = [
308
+ {"source": "./batch1", "output": "./results/batch1", "polarity": "pos"},
309
+ {"source": "./batch2", "output": "./results/batch2", "polarity": "neg"},
310
+ ]
311
+
312
+ for study_config in studies:
313
+ wizard = Wizard(**study_config, num_cores=8)
314
+ success = wizard.run_full_pipeline()
315
+
316
+ if success:
317
+ print(f"✅ {study_config['output']} completed")
318
+ else:
319
+ print(f"❌ {study_config['output']} failed")
320
+ ```
321
+
322
+ ### Custom Processing Steps
323
+
324
+ ```python
325
+ wizard = Wizard(...)
326
+
327
+ # Standard conversion
328
+ wizard.convert_to_sample5()
329
+
330
+ # Custom study assembly with specific parameters
331
+ wizard.assemble_study()
332
+
333
+ # Custom filtering before alignment
334
+ if hasattr(wizard.study, 'features_filter'):
335
+ selection = wizard.study.features_select(
336
+ chrom_coherence=0.5, # Higher quality threshold
337
+ chrom_prominence_scaled=2.0
338
+ )
339
+ wizard.study.features_filter(selection)
340
+
341
+ # Continue with standard pipeline
342
+ wizard.align_and_merge()
343
+ wizard.generate_plots()
344
+ ```
345
+
346
+ ## Output Files
347
+
348
+ The Wizard generates several types of output files:
349
+
350
+ ### Primary Results
351
+ - `final_study.study5` - Complete study in masster native format
352
+ - `consensus_features.csv` - Feature table with RT, m/z, intensity data
353
+ - `study_results.xlsx` - Multi-sheet Excel workbook with results and metadata
354
+ - `consensus_ms2.mgf` - MS2 spectra for database searching
355
+
356
+ ### Visualizations
357
+ - `alignment_plot.html` - Interactive alignment visualization
358
+ - `consensus_2d.html` - 2D feature map of consensus features
359
+ - `pca_plot.html` - Principal component analysis plot
360
+ - `consensus_stats.html` - Study statistics and quality metrics
361
+
362
+ ### Processing Logs
363
+ - `wizard.log` - Detailed processing log with debug information
364
+ - `processing.log` - Simple progress log with timestamps
365
+ - `study_metadata.txt` - Study summary with parameters and statistics
366
+
367
+ ### Individual Sample Outputs (if enabled)
368
+ - `sample_name.sample5` - Processed sample in masster format
369
+ - `sample_name.features.csv` - Individual sample feature table
370
+ - `sample_name.mgf` - Individual sample MS2 spectra
371
+ - `sample_name_2d.html` - Individual sample 2D plot
372
+
373
+ The Wizard provides a complete, automated solution for mass spectrometry data processing while maintaining flexibility for custom workflows and providing robust error handling and recovery capabilities.
@@ -0,0 +1,11 @@
1
+ """
2
+ Wizard module for automated processing of mass spectrometry studies.
3
+
4
+ This module provides the Wizard class for fully automated processing of MS data
5
+ from raw files to final study results, including batch conversion, assembly,
6
+ alignment, merging, plotting, and export.
7
+ """
8
+
9
+ from .wizard import Wizard, wizard_def
10
+
11
+ __all__ = ["Wizard", "wizard_def"]