masster 0.5.20__py3-none-any.whl → 0.5.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/_version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.5.19"
4
+ __version__ = "0.5.22"
5
5
 
6
6
 
7
7
  def get_version():
masster/wizard/wizard.py CHANGED
@@ -818,7 +818,8 @@ class Wizard:
818
818
  '',
819
819
  '@app.cell',
820
820
  'def __(masster):',
821
- ' study = masster.Study(folder=".")',
821
+ f' study = masster.Study(folder={str(self.folder_path)!r})',
822
+ ' study.load()',
822
823
  ' return (study,)',
823
824
  '',
824
825
  '@app.cell',
@@ -838,7 +839,7 @@ class Wizard:
838
839
  f"Polarity detected: {source_info.get('polarity', 'unknown')}",
839
840
  "Files created:"]
840
841
  for file_path in files_created:
841
- instructions.append(f" {str(Path(file_path).resolve())}")
842
+ instructions.append(f" [OK] {str(Path(file_path).resolve())}")
842
843
 
843
844
  # Find the workflow script name from created files
844
845
  workflow_script_name = "1_masster_workflow.py"
@@ -877,7 +878,7 @@ class Wizard:
877
878
  # Add test mode print after the masster version line
878
879
  if 'print("masster' in line and 'Automated MS Data Analysis")' in line:
879
880
  lines.insert(i + 1, ' if TEST_MODE:')
880
- lines.insert(i + 2, ' print("🧪 TEST MODE: Processing single file only")')
881
+ lines.insert(i + 2, ' print("[TEST] TEST MODE: Processing single file only")')
881
882
  break
882
883
 
883
884
  # Add mode info after num_cores print
@@ -894,7 +895,7 @@ class Wizard:
894
895
  lines.insert(i + 2, ' # Limit to first file in test mode')
895
896
  lines.insert(i + 3, ' if TEST_MODE:')
896
897
  lines.insert(i + 4, ' raw_files = raw_files[:1]')
897
- lines.insert(i + 5, ' print(f"\\n🧪 TEST MODE: Processing only first file: {raw_files[0].name}")')
898
+ lines.insert(i + 5, ' print(f"\\n[TEST] TEST MODE: Processing only first file: {raw_files[0].name}")')
898
899
  break
899
900
 
900
901
  # Modify num_cores for test mode
@@ -909,7 +910,7 @@ class Wizard:
909
910
  lines.insert(i + 1, ' ')
910
911
  lines.insert(i + 2, ' # Stop here if test-only mode')
911
912
  lines.insert(i + 3, ' if TEST_ONLY:')
912
- lines.insert(i + 4, ' print("\\n🧪 TEST ONLY mode: Stopping after successful single file processing")')
913
+ lines.insert(i + 4, ' print("\\n[TEST] TEST ONLY mode: Stopping after successful single file processing")')
913
914
  lines.insert(i + 5, ' print(f"Test file created: {sample5_files[0]}")')
914
915
  lines.insert(i + 6, ' print("\\nTo run full batch, use: wizard.run()")')
915
916
  lines.insert(i + 7, ' total_time = time.time() - start_time')
@@ -985,7 +986,7 @@ class Wizard:
985
986
  "status": "error",
986
987
  "message": "Workflow script not found. Please run create_scripts() first.",
987
988
  "instructions": [
988
- " Missing 1_masster_workflow.py",
989
+ "[ERROR] Missing 1_masster_workflow.py",
989
990
  "Run: wizard.create_scripts()",
990
991
  "Then: wizard.run()"
991
992
  ]
@@ -1008,8 +1009,8 @@ class Wizard:
1008
1009
  env['MASSTER_TEST_ONLY'] = '1'
1009
1010
 
1010
1011
  # Execute the workflow script
1011
- print(f"🚀 Executing {mode_label} processing workflow...")
1012
- print(f"📄 Running: {workflow_script_path.name}")
1012
+ print(f">> Executing {mode_label} processing workflow...")
1013
+ print(f"[SCRIPT] Running: {workflow_script_path.name}")
1013
1014
  print("=" * 60)
1014
1015
 
1015
1016
  import subprocess
@@ -1022,16 +1023,16 @@ class Wizard:
1022
1023
  if success:
1023
1024
  print("=" * 60)
1024
1025
  if test_only:
1025
- print(" Test-only processing completed successfully!")
1026
- print("📋 Single file validated - ready for full batch")
1026
+ print("[OK] Test-only processing completed successfully!")
1027
+ print("[INFO] Single file validated - ready for full batch")
1027
1028
  print(" wizard.run()")
1028
1029
  elif test_mode:
1029
- print(" Test processing completed successfully!")
1030
- print("📋 Next step: Run full batch")
1030
+ print("[OK] Test processing completed successfully!")
1031
+ print("[INFO] Next step: Run full batch")
1031
1032
  print(" wizard.run()")
1032
1033
  else:
1033
- print(" Sample processing completed successfully!")
1034
- print("📋 Next step: Run interactive analysis")
1034
+ print("[OK] Sample processing completed successfully!")
1035
+ print("[INFO] Next step: Run interactive analysis")
1035
1036
  print(" uv run marimo edit 2_interactive_analysis.py")
1036
1037
  print("=" * 60)
1037
1038
 
@@ -1042,7 +1043,7 @@ class Wizard:
1042
1043
  "status": "success",
1043
1044
  "message": f"{mode_label.capitalize()} processing completed successfully",
1044
1045
  "instructions": [
1045
- f" {mode_label.capitalize()} processing completed",
1046
+ f"[OK] {mode_label.capitalize()} processing completed",
1046
1047
  next_step
1047
1048
  ]
1048
1049
  }
@@ -1051,7 +1052,7 @@ class Wizard:
1051
1052
  "status": "error",
1052
1053
  "message": f"Workflow execution failed with return code {result.returncode}",
1053
1054
  "instructions": [
1054
- " Check the error messages above",
1055
+ "[ERROR] Check the error messages above",
1055
1056
  "Review parameters in 1_masster_workflow.py",
1056
1057
  f"Try running manually: python {workflow_script_path.name}"
1057
1058
  ]
@@ -1062,7 +1063,7 @@ class Wizard:
1062
1063
  "status": "error",
1063
1064
  "message": f"Failed to execute workflow: {e}",
1064
1065
  "instructions": [
1065
- " Execution failed",
1066
+ "[ERROR] Execution failed",
1066
1067
  "Check that source files exist and are accessible",
1067
1068
  "Verify folder permissions"
1068
1069
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.20
3
+ Version: 0.5.22
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,5 +1,5 @@
1
1
  masster/__init__.py,sha256=B7zftzdElF2Wb5B7KvkD6TONnMIY-Jxeen3s49dgmzs,1029
2
- masster/_version.py,sha256=uCkE1NJ7J1aQrPg6o1mVRwVi9N10aB8nbGRTr0cwkNY,257
2
+ masster/_version.py,sha256=4HOjpCuCuNY78DZ2P_GWL7HWMcoGK_P8IwMz9LKoTq0,257
3
3
  masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
4
4
  masster/logger.py,sha256=XT2gUcUIct8LWzTp9n484g5MaB89toT76CGA41oBvfA,18375
5
5
  masster/spectrum.py,sha256=TWIgDcl0lveG40cLVZTWGp8-FxMolu-P8EjZyRBtXL4,49850
@@ -59,12 +59,10 @@ masster/study/defaults/integrate_chrom_def.py,sha256=0MNIWGTjty-Zu-NTQsIweuj3UVq
59
59
  masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
60
60
  masster/study/defaults/merge_def.py,sha256=krR099IkENLlJVxpSjdje3E6h-_qtlc3Ep6Hpy6inrU,12978
61
61
  masster/study/defaults/study_def.py,sha256=h8dYbi9xv0sesCSQik49Z53IkskMmNtW6ixl7it5pL0,16033
62
- masster/wizard/README.md,sha256=RX3uxT1qD5i9iDSznZUbnukixonqr96JlUE4TwssAgY,14111
63
62
  masster/wizard/__init__.py,sha256=L9G_datyGSFJjrBVklEVpZVLGXzUhDiWobtiygBH8vQ,669
64
- masster/wizard/example.py,sha256=xEZFTH9UZ8HKOm6s3JL8Js0Uw5ChnISWBHSZCL32vsM,7983
65
- masster/wizard/wizard.py,sha256=ckOz-8zrk8i7tDjqkk-shYFA2Ni9fV4nejocEjTX78M,65265
66
- masster-0.5.20.dist-info/METADATA,sha256=FJGXFasiyqxkR1R34mXnMANYqQ_ArHLktC3DqLRvg1I,45153
67
- masster-0.5.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
68
- masster-0.5.20.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
69
- masster-0.5.20.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
70
- masster-0.5.20.dist-info/RECORD,,
63
+ masster/wizard/wizard.py,sha256=aTNVhYoMK5B1uR2zrdbwEyZX-Zd86e4sUGhkQ2bgiDo,65352
64
+ masster-0.5.22.dist-info/METADATA,sha256=tLbIWPgMewKVqyz7MSbwLZZY5JOPUaEfYMx-q5w4rkk,45153
65
+ masster-0.5.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
66
+ masster-0.5.22.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
67
+ masster-0.5.22.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
68
+ masster-0.5.22.dist-info/RECORD,,
masster/wizard/README.md DELETED
@@ -1,373 +0,0 @@
1
- # Wizard Class Documentation
2
-
3
- The `Wizard` class provides comprehensive automation for mass spectrometry data processing, from raw files to final study results. It handles the complete workflow with minimal user intervention while providing intelligent resume capabilities, parallel processing optimization, and adaptive output formats.
4
-
5
- ## Quick Start
6
-
7
- ### Basic Usage
8
-
9
- ```python
10
- from masster import Wizard
11
-
12
- # Create wizard with minimal configuration
13
- wizard = Wizard(
14
- data_source="./raw_data", # Directory with raw files
15
- study_folder="./processed", # Output directory
16
- polarity="positive", # or "negative"
17
- num_cores=4 # CPU cores to use
18
- )
19
-
20
- # Run complete pipeline
21
- success = wizard.run_full_pipeline()
22
-
23
- if success:
24
- wizard.info() # Print summary
25
- ```
26
-
27
- ### Advanced Configuration
28
-
29
- ```python
30
- from masster import Wizard, wizard_def
31
-
32
- # Create custom parameters
33
- params = wizard_def(
34
- data_source="./raw_data",
35
- study_folder="./processed_advanced",
36
- polarity="negative",
37
- num_cores=8,
38
-
39
- # File discovery
40
- file_extensions=[".wiff", ".raw", ".mzML"],
41
- search_subfolders=True,
42
- skip_patterns=["blank", "QC", "test"],
43
-
44
- # Processing parameters
45
- adducts=["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2"],
46
- chrom_fwhm=0.15,
47
- noise_threshold=5e4,
48
-
49
- # Study assembly
50
- rt_tolerance=1.0,
51
- mz_tolerance=0.008,
52
- min_samples_for_merge=30,
53
-
54
- # Output options
55
- export_formats=["csv", "xlsx", "mgf", "parquet"],
56
- generate_plots=True,
57
- compress_output=True,
58
- )
59
-
60
- wizard = Wizard(params=params)
61
- wizard.run_full_pipeline()
62
- ```
63
-
64
- ## Key Features
65
-
66
- ### 🔄 Automated Pipeline
67
- - **Raw Data Discovery**: Automatically finds and validates raw MS files
68
- - **Batch Conversion**: Parallel conversion to sample5 format with optimized parameters
69
- - **Study Assembly**: Creates study from processed samples with quality filtering
70
- - **Feature Alignment**: Cross-sample alignment using configurable algorithms
71
- - **Consensus Generation**: Merges aligned features with statistical validation
72
- - **Results Export**: Multiple output formats for downstream analysis
73
-
74
- ### 💾 Intelligent Resume
75
- - **Checkpoint System**: Automatically saves progress at key points
76
- - **File Tracking**: Remembers which files have been processed successfully
77
- - **Smart Recovery**: Resumes from last successful step after interruption
78
- - **Validation**: Verifies existing outputs before skipping
79
-
80
- ### ⚡ Performance Optimization
81
- - **Parallel Processing**: Utilizes multiple CPU cores efficiently
82
- - **Memory Management**: Adaptive batch sizing based on available memory
83
- - **Process Isolation**: Prevents memory leaks in long-running jobs
84
- - **Adaptive Compression**: Optimizes output format based on study size
85
-
86
- ### 📊 Comprehensive Logging
87
- - **Progress Tracking**: Real-time status updates with time estimates
88
- - **Detailed Logs**: Complete processing history saved to files
89
- - **Error Reporting**: Clear error messages with recovery suggestions
90
- - **Performance Metrics**: Processing times and resource usage statistics
91
-
92
- ## Pipeline Steps
93
-
94
- ### 1. File Discovery
95
- - Searches for raw MS files (`.wiff`, `.raw`, `.mzML`, `.d`)
96
- - Applies skip patterns to exclude unwanted files
97
- - Validates file integrity and accessibility
98
- - Reports file sizes and estimates processing time
99
-
100
- ### 2. Sample5 Conversion
101
- - **Feature Detection**: Two-pass algorithm with configurable parameters
102
- - **Adduct Detection**: Automated adduct grouping based on polarity
103
- - **MS2 Linking**: Associates fragmentation spectra with features
104
- - **Quality Control**: Validates outputs and reports statistics
105
- - **Parallel Processing**: Utilizes multiple CPU cores with batch optimization
106
-
107
- ### 3. Study Assembly
108
- - **Sample Loading**: Imports all processed sample5 files
109
- - **Quality Filtering**: Removes low-quality features based on coherence/prominence
110
- - **Metadata Organization**: Organizes sample information and experimental design
111
- - **Memory Optimization**: Efficient data structures for large studies
112
-
113
- ### 4. Feature Alignment
114
- - **RT Alignment**: Corrects retention time shifts between samples
115
- - **Mass Alignment**: Accounts for mass calibration differences
116
- - **Algorithm Selection**: Supports KD-tree, QT-clustering, and chunked methods
117
- - **Validation**: Reports alignment statistics and quality metrics
118
-
119
- ### 5. Consensus Generation
120
- - **Feature Merging**: Groups aligned features into consensus features
121
- - **Statistical Validation**: Applies minimum sample requirements
122
- - **Gap Filling**: Extracts chromatograms for missing values
123
- - **MS2 Integration**: Links consensus features to MS2 spectra
124
-
125
- ### 6. Visualization & Export
126
- - **Interactive Plots**: 2D feature maps, PCA plots, alignment visualizations
127
- - **Multiple Formats**: CSV, Excel, MGF, Parquet exports
128
- - **Study Archival**: Compressed study5 format for long-term storage
129
- - **Metadata Export**: Complete processing parameters and statistics
130
-
131
- ## Configuration Options
132
-
133
- ### Core Parameters
134
-
135
- | Parameter | Type | Default | Description |
136
- |-----------|------|---------|-------------|
137
- | `data_source` | str | **required** | Directory containing raw data files |
138
- | `study_folder` | str | **required** | Output directory for processed study |
139
- | `polarity` | str | `"positive"` | Ion polarity mode (`"positive"` or `"negative"`) |
140
- | `num_cores` | int | `4` | Number of CPU cores for parallel processing |
141
- | `adducts` | List[str] | auto-set | Adduct specifications (set based on polarity) |
142
-
143
- ### File Discovery
144
-
145
- | Parameter | Type | Default | Description |
146
- |-----------|------|---------|-------------|
147
- | `file_extensions` | List[str] | `[".wiff", ".raw", ".mzML", ".d"]` | File types to search for |
148
- | `search_subfolders` | bool | `True` | Search subdirectories recursively |
149
- | `skip_patterns` | List[str] | `["blank", "QC", "test"]` | Filename patterns to skip |
150
- | `max_file_size_gb` | float | `4.0` | Maximum file size warning threshold |
151
-
152
- ### Processing Parameters
153
-
154
- | Parameter | Type | Default | Description |
155
- |-----------|------|---------|-------------|
156
- | `batch_size` | int | `8` | Files processed per batch |
157
- | `memory_limit_gb` | float | `16.0` | Memory usage limit |
158
- | `chrom_fwhm` | float | `0.2` | Expected chromatographic peak width (s) |
159
- | `noise_threshold` | float | `1e5` | Intensity threshold for peak detection |
160
- | `chrom_peak_snr` | float | `5.0` | Signal-to-noise ratio requirement |
161
- | `tol_ppm` | float | `10.0` | Mass tolerance (ppm) |
162
-
163
- ### Study Assembly
164
-
165
- | Parameter | Type | Default | Description |
166
- |-----------|------|---------|-------------|
167
- | `rt_tolerance` | float | `1.5` | RT tolerance for alignment (seconds) |
168
- | `mz_tolerance` | float | `0.01` | m/z tolerance for alignment (Da) |
169
- | `alignment_algorithm` | str | `"kd"` | Alignment algorithm (`"kd"`, `"qt"`, `"chunked"`) |
170
- | `merge_method` | str | `"chunked"` | Merge algorithm for consensus generation |
171
- | `min_samples_for_merge` | int | `50` | Minimum samples required for consensus |
172
-
173
- ### Output & Logging
174
-
175
- | Parameter | Type | Default | Description |
176
- |-----------|------|---------|-------------|
177
- | `generate_plots` | bool | `True` | Generate visualization plots |
178
- | `export_formats` | List[str] | `["csv", "mgf", "xlsx"]` | Output formats to generate |
179
- | `compress_output` | bool | `True` | Compress final study file |
180
- | `adaptive_compression` | bool | `True` | Adapt compression based on study size |
181
- | `log_level` | str | `"INFO"` | Logging detail level |
182
- | `log_to_file` | bool | `True` | Save logs to file |
183
-
184
- ### Resume & Recovery
185
-
186
- | Parameter | Type | Default | Description |
187
- |-----------|------|---------|-------------|
188
- | `resume_enabled` | bool | `True` | Enable automatic resume capability |
189
- | `force_reprocess` | bool | `False` | Force reprocessing of existing files |
190
- | `backup_enabled` | bool | `True` | Create backups of intermediate results |
191
- | `checkpoint_interval` | int | `10` | Save progress every N files |
192
- | `cleanup_temp_files` | bool | `True` | Remove temporary files after completion |
193
-
194
- ## Methods
195
-
196
- ### Pipeline Control
197
-
198
- #### `run_full_pipeline() -> bool`
199
- Executes the complete processing pipeline in sequence. Returns `True` if successful.
200
-
201
- #### Individual Steps
202
- - `discover_files() -> List[Path]` - Find raw data files
203
- - `convert_to_sample5(file_list=None) -> bool` - Convert to sample5 format
204
- - `assemble_study() -> bool` - Create study from sample5 files
205
- - `align_and_merge() -> bool` - Perform feature alignment and merging
206
- - `generate_plots() -> bool` - Create visualization plots
207
- - `export_results() -> bool` - Export in requested formats
208
- - `save_study() -> bool` - Save final study file
209
- - `cleanup_temp_files() -> bool` - Remove temporary files
210
-
211
- ### Status & Information
212
-
213
- #### `info()`
214
- Prints comprehensive wizard status including progress, timings, and results.
215
-
216
- #### `get_status() -> Dict[str, Any]`
217
- Returns detailed status dictionary with current step, processed files, timing, and parameters.
218
-
219
- ## Error Handling & Recovery
220
-
221
- ### Common Issues and Solutions
222
-
223
- **Memory Errors**
224
- - Reduce `batch_size` parameter
225
- - Increase `memory_limit_gb` if available
226
- - Use `merge_method="chunked"` for large studies
227
- - Enable `cleanup_temp_files=True`
228
-
229
- **File Access Errors**
230
- - Check file permissions on source and destination folders
231
- - Verify network connectivity for remote file systems
232
- - Ensure sufficient disk space in output directory
233
- - Close any applications that might lock files
234
-
235
- **Processing Failures**
236
- - Check individual file integrity
237
- - Review `skip_patterns` to exclude problematic files
238
- - Examine detailed logs in `wizard.log` and `processing.log`
239
- - Try processing failed files individually for debugging
240
-
241
- **Resume Issues**
242
- - Delete `wizard_checkpoint.json` to force fresh start
243
- - Verify output directory permissions
244
- - Check for corrupted intermediate files
245
-
246
- ### Validation and Quality Control
247
-
248
- The Wizard includes built-in validation at each step:
249
-
250
- - **File Validation**: Checks file accessibility and format compatibility
251
- - **Processing Validation**: Verifies sample5 outputs can be loaded
252
- - **Study Validation**: Ensures study assembly completed successfully
253
- - **Alignment Validation**: Reports alignment statistics and warnings
254
- - **Export Validation**: Confirms all requested outputs were created
255
-
256
- ## Performance Guidelines
257
-
258
- ### System Requirements
259
- - **Minimum**: 4 CPU cores, 8 GB RAM
260
- - **Recommended**: 8+ CPU cores, 16+ GB RAM
261
- - **Large Studies**: 16+ CPU cores, 32+ GB RAM
262
- - **Storage**: SSD recommended, ~2-3x raw data size free space
263
-
264
- ### Optimization Tips
265
-
266
- **For Small Studies (< 50 samples)**
267
- - Use `num_cores = 4-6`
268
- - Set `batch_size = 4-8`
269
- - Use `merge_method = "kd"`
270
- - Enable all export formats
271
-
272
- **For Large Studies (100+ samples)**
273
- - Use `num_cores = 8-16`
274
- - Set `batch_size = 16-32`
275
- - Use `merge_method = "chunked"`
276
- - Enable `adaptive_compression = True`
277
- - Consider processing in polarity-specific batches
278
-
279
- **For Very Large Studies (500+ samples)**
280
- - Process positive/negative modes separately
281
- - Use `memory_limit_gb = 64+`
282
- - Set `checkpoint_interval = 50`
283
- - Enable `cleanup_temp_files = True`
284
- - Consider cluster/cloud processing
285
-
286
- ## Integration Examples
287
-
288
- ### With Existing Workflows
289
-
290
- ```python
291
- # Integration with custom preprocessing
292
- wizard = Wizard(data_source="./preprocessed", ...)
293
-
294
- # Skip conversion if already done
295
- if not wizard.study_folder_path.glob("*.sample5"):
296
- wizard.convert_to_sample5()
297
-
298
- # Continue with study-level processing
299
- wizard.assemble_study()
300
- wizard.align_and_merge()
301
- wizard.export_results()
302
- ```
303
-
304
- ### Batch Processing Multiple Studies
305
-
306
- ```python
307
- studies = [
308
- {"source": "./batch1", "output": "./results/batch1", "polarity": "pos"},
309
- {"source": "./batch2", "output": "./results/batch2", "polarity": "neg"},
310
- ]
311
-
312
- for study_config in studies:
313
- wizard = Wizard(**study_config, num_cores=8)
314
- success = wizard.run_full_pipeline()
315
-
316
- if success:
317
- print(f"✅ {study_config['output']} completed")
318
- else:
319
- print(f"❌ {study_config['output']} failed")
320
- ```
321
-
322
- ### Custom Processing Steps
323
-
324
- ```python
325
- wizard = Wizard(...)
326
-
327
- # Standard conversion
328
- wizard.convert_to_sample5()
329
-
330
- # Custom study assembly with specific parameters
331
- wizard.assemble_study()
332
-
333
- # Custom filtering before alignment
334
- if hasattr(wizard.study, 'features_filter'):
335
- selection = wizard.study.features_select(
336
- chrom_coherence=0.5, # Higher quality threshold
337
- chrom_prominence_scaled=2.0
338
- )
339
- wizard.study.features_filter(selection)
340
-
341
- # Continue with standard pipeline
342
- wizard.align_and_merge()
343
- wizard.generate_plots()
344
- ```
345
-
346
- ## Output Files
347
-
348
- The Wizard generates several types of output files:
349
-
350
- ### Primary Results
351
- - `final_study.study5` - Complete study in masster native format
352
- - `consensus_features.csv` - Feature table with RT, m/z, intensity data
353
- - `study_results.xlsx` - Multi-sheet Excel workbook with results and metadata
354
- - `consensus_ms2.mgf` - MS2 spectra for database searching
355
-
356
- ### Visualizations
357
- - `alignment_plot.html` - Interactive alignment visualization
358
- - `consensus_2d.html` - 2D feature map of consensus features
359
- - `pca_plot.html` - Principal component analysis plot
360
- - `consensus_stats.html` - Study statistics and quality metrics
361
-
362
- ### Processing Logs
363
- - `wizard.log` - Detailed processing log with debug information
364
- - `processing.log` - Simple progress log with timestamps
365
- - `study_metadata.txt` - Study summary with parameters and statistics
366
-
367
- ### Individual Sample Outputs (if enabled)
368
- - `sample_name.sample5` - Processed sample in masster format
369
- - `sample_name.features.csv` - Individual sample feature table
370
- - `sample_name.mgf` - Individual sample MS2 spectra
371
- - `sample_name_2d.html` - Individual sample 2D plot
372
-
373
- The Wizard provides a complete, automated solution for mass spectrometry data processing while maintaining flexibility for custom workflows and providing robust error handling and recovery capabilities.
masster/wizard/example.py DELETED
@@ -1,223 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Example script demonstrating the Wizard class for automated study processing.
4
-
5
- This script shows how to use the Wizard class to automatically process
6
- mass spectrometry data from raw files to final study results.
7
- """
8
-
9
- from pathlib import Path
10
- from masster import Wizard, wizard_def
11
-
12
- def main():
13
- """Main example function."""
14
-
15
- # =================================================================
16
- # EXAMPLE 1: Basic Usage with Minimal Configuration
17
- # =================================================================
18
- print("=== Example 1: Basic Wizard Usage ===\n")
19
-
20
- # Set up paths (adjust these for your data)
21
- data_source = r"D:\Data\raw_files" # Directory with .wiff, .raw, .mzML files
22
- study_folder = r"D:\Data\processed_study" # Output directory
23
-
24
- # Create wizard with basic settings
25
- wizard = Wizard(
26
- data_source=data_source,
27
- study_folder=study_folder,
28
- polarity="positive", # or "negative"
29
- num_cores=4
30
- )
31
-
32
- # Run the complete pipeline
33
- success = wizard.run_full_pipeline()
34
-
35
- if success:
36
- print("✅ Processing completed successfully!")
37
- wizard.info() # Print status summary
38
- else:
39
- print("❌ Processing failed. Check logs for details.")
40
-
41
- print("\n" + "="*60 + "\n")
42
-
43
- # =================================================================
44
- # EXAMPLE 2: Advanced Configuration with Custom Parameters
45
- # =================================================================
46
- print("=== Example 2: Advanced Wizard Configuration ===\n")
47
-
48
- # Create custom parameters
49
- params = wizard_def(
50
- # Core settings
51
- data_source=data_source,
52
- study_folder=study_folder + "_advanced",
53
- polarity="negative",
54
- num_cores=8,
55
-
56
- # File discovery settings
57
- file_extensions=[".wiff", ".raw", ".mzML"],
58
- search_subfolders=True,
59
- skip_patterns=["blank", "QC", "test", "solvent"],
60
-
61
- # Processing parameters
62
- adducts=["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2"],
63
- batch_size=4, # Process 4 files at once
64
- memory_limit_gb=32.0,
65
-
66
- # Feature detection parameters
67
- chrom_fwhm=0.15, # Narrower peaks for UHPLC
68
- noise_threshold=5e4, # Lower noise threshold
69
- chrom_peak_snr=7.0, # Higher S/N requirement
70
- tol_ppm=8.0, # Tighter mass tolerance
71
-
72
- # Study assembly parameters
73
- rt_tolerance=1.0, # Tighter RT tolerance
74
- mz_tolerance=0.008, # Tighter m/z tolerance
75
- min_samples_for_merge=30, # Require feature in at least 30 samples
76
- merge_method="chunked", # Memory-efficient merging
77
-
78
- # Output options
79
- generate_plots=True,
80
- generate_interactive=True,
81
- export_formats=["csv", "xlsx", "mgf", "parquet"],
82
- compress_output=True,
83
- adaptive_compression=True,
84
-
85
- # Advanced options
86
- resume_enabled=True, # Can resume if interrupted
87
- force_reprocess=False, # Skip already processed files
88
- backup_enabled=True,
89
- cleanup_temp_files=True,
90
- log_level="INFO",
91
- verbose_progress=True,
92
- )
93
-
94
- # Create wizard with custom parameters
95
- wizard_advanced = Wizard(params=params)
96
-
97
- # You can also run individual steps for more control
98
- print("Running step-by-step processing...")
99
-
100
- # Step 1: Discover files
101
- files = wizard_advanced.discover_files()
102
- print(f"Found {len(files)} files for processing")
103
-
104
- # Step 2: Convert to sample5 (can be resumed if interrupted)
105
- if wizard_advanced.convert_to_sample5():
106
- print("✅ Sample5 conversion completed")
107
-
108
- # Step 3: Assemble study
109
- if wizard_advanced.assemble_study():
110
- print("✅ Study assembly completed")
111
-
112
- # Step 4: Align and merge
113
- if wizard_advanced.align_and_merge():
114
- print("✅ Alignment and merging completed")
115
-
116
- # Step 5: Generate plots
117
- if wizard_advanced.generate_plots():
118
- print("✅ Plot generation completed")
119
-
120
- # Step 6: Export results
121
- if wizard_advanced.export_results():
122
- print("✅ Results exported")
123
-
124
- # Step 7: Save final study
125
- if wizard_advanced.save_study():
126
- print("✅ Study saved")
127
-
128
- # Optional cleanup
129
- wizard_advanced.cleanup_temp_files()
130
- print("✅ Cleanup completed")
131
-
132
- # Print final status
133
- wizard_advanced.info()
134
-
135
- print("\n" + "="*60 + "\n")
136
-
137
- # =================================================================
138
- # EXAMPLE 3: Resume Interrupted Processing
139
- # =================================================================
140
- print("=== Example 3: Resume Capability ===\n")
141
-
142
- # If processing was interrupted, you can resume by creating a new wizard
143
- # with the same parameters. It will automatically detect and skip
144
- # already processed files.
145
-
146
- resume_wizard = Wizard(
147
- data_source=data_source,
148
- study_folder=study_folder + "_resume",
149
- polarity="positive",
150
- num_cores=4,
151
- resume_enabled=True # This is the default
152
- )
153
-
154
- # The wizard will automatically load checkpoint and continue from where it left off
155
- print("Status after loading checkpoint:")
156
- resume_wizard.info()
157
-
158
- print("\n" + "="*60 + "\n")
159
-
160
- # =================================================================
161
- # EXAMPLE 4: Monitoring and Status
162
- # =================================================================
163
- print("=== Example 4: Status Monitoring ===\n")
164
-
165
- # You can check wizard status at any time
166
- status = wizard.get_status()
167
- print("Wizard Status:")
168
- for key, value in status.items():
169
- print(f" {key}: {value}")
170
-
171
- # The wizard maintains comprehensive logs
172
- log_file = Path(study_folder) / "wizard.log"
173
- if log_file.exists():
174
- print(f"\nDetailed logs available at: {log_file}")
175
-
176
- processing_log = Path(study_folder) / "processing.log"
177
- if processing_log.exists():
178
- print(f"Processing summary at: {processing_log}")
179
-
180
-
181
- def example_batch_different_polarities():
182
- """Example of processing positive and negative mode data separately."""
183
-
184
- print("=== Processing Both Polarities ===\n")
185
-
186
- base_data_source = r"D:\Data\raw_files"
187
- base_output = r"D:\Data\processed_studies"
188
-
189
- # Process positive mode
190
- pos_wizard = Wizard(
191
- data_source=base_data_source + r"\positive",
192
- study_folder=base_output + r"\positive_study",
193
- polarity="positive",
194
- adducts=["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
195
- num_cores=6
196
- )
197
-
198
- print("Processing positive mode data...")
199
- pos_success = pos_wizard.run_full_pipeline()
200
-
201
- # Process negative mode
202
- neg_wizard = Wizard(
203
- data_source=base_data_source + r"\negative",
204
- study_folder=base_output + r"\negative_study",
205
- polarity="negative",
206
- adducts=["H-1:-:0.95", "Cl:-:0.05"],
207
- num_cores=6
208
- )
209
-
210
- print("Processing negative mode data...")
211
- neg_success = neg_wizard.run_full_pipeline()
212
-
213
- print("\nResults:")
214
- print(f"Positive mode: {'✅ Success' if pos_success else '❌ Failed'}")
215
- print(f"Negative mode: {'✅ Success' if neg_success else '❌ Failed'}")
216
-
217
-
218
- if __name__ == "__main__":
219
- # Run basic examples
220
- main()
221
-
222
- # Uncomment to run polarity-specific processing
223
- # example_batch_different_polarities()