masster 0.5.20__py3-none-any.whl → 0.5.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/wizard/wizard.py +18 -17
- {masster-0.5.20.dist-info → masster-0.5.22.dist-info}/METADATA +1 -1
- {masster-0.5.20.dist-info → masster-0.5.22.dist-info}/RECORD +7 -9
- masster/wizard/README.md +0 -373
- masster/wizard/example.py +0 -223
- {masster-0.5.20.dist-info → masster-0.5.22.dist-info}/WHEEL +0 -0
- {masster-0.5.20.dist-info → masster-0.5.22.dist-info}/entry_points.txt +0 -0
- {masster-0.5.20.dist-info → masster-0.5.22.dist-info}/licenses/LICENSE +0 -0
masster/_version.py
CHANGED
masster/wizard/wizard.py
CHANGED
|
@@ -818,7 +818,8 @@ class Wizard:
|
|
|
818
818
|
'',
|
|
819
819
|
'@app.cell',
|
|
820
820
|
'def __(masster):',
|
|
821
|
-
' study = masster.Study(folder=
|
|
821
|
+
f' study = masster.Study(folder={str(self.folder_path)!r})',
|
|
822
|
+
' study.load()',
|
|
822
823
|
' return (study,)',
|
|
823
824
|
'',
|
|
824
825
|
'@app.cell',
|
|
@@ -838,7 +839,7 @@ class Wizard:
|
|
|
838
839
|
f"Polarity detected: {source_info.get('polarity', 'unknown')}",
|
|
839
840
|
"Files created:"]
|
|
840
841
|
for file_path in files_created:
|
|
841
|
-
instructions.append(f"
|
|
842
|
+
instructions.append(f" [OK] {str(Path(file_path).resolve())}")
|
|
842
843
|
|
|
843
844
|
# Find the workflow script name from created files
|
|
844
845
|
workflow_script_name = "1_masster_workflow.py"
|
|
@@ -877,7 +878,7 @@ class Wizard:
|
|
|
877
878
|
# Add test mode print after the masster version line
|
|
878
879
|
if 'print("masster' in line and 'Automated MS Data Analysis")' in line:
|
|
879
880
|
lines.insert(i + 1, ' if TEST_MODE:')
|
|
880
|
-
lines.insert(i + 2, ' print("
|
|
881
|
+
lines.insert(i + 2, ' print("[TEST] TEST MODE: Processing single file only")')
|
|
881
882
|
break
|
|
882
883
|
|
|
883
884
|
# Add mode info after num_cores print
|
|
@@ -894,7 +895,7 @@ class Wizard:
|
|
|
894
895
|
lines.insert(i + 2, ' # Limit to first file in test mode')
|
|
895
896
|
lines.insert(i + 3, ' if TEST_MODE:')
|
|
896
897
|
lines.insert(i + 4, ' raw_files = raw_files[:1]')
|
|
897
|
-
lines.insert(i + 5, ' print(f"\\n
|
|
898
|
+
lines.insert(i + 5, ' print(f"\\n[TEST] TEST MODE: Processing only first file: {raw_files[0].name}")')
|
|
898
899
|
break
|
|
899
900
|
|
|
900
901
|
# Modify num_cores for test mode
|
|
@@ -909,7 +910,7 @@ class Wizard:
|
|
|
909
910
|
lines.insert(i + 1, ' ')
|
|
910
911
|
lines.insert(i + 2, ' # Stop here if test-only mode')
|
|
911
912
|
lines.insert(i + 3, ' if TEST_ONLY:')
|
|
912
|
-
lines.insert(i + 4, ' print("\\n
|
|
913
|
+
lines.insert(i + 4, ' print("\\n[TEST] TEST ONLY mode: Stopping after successful single file processing")')
|
|
913
914
|
lines.insert(i + 5, ' print(f"Test file created: {sample5_files[0]}")')
|
|
914
915
|
lines.insert(i + 6, ' print("\\nTo run full batch, use: wizard.run()")')
|
|
915
916
|
lines.insert(i + 7, ' total_time = time.time() - start_time')
|
|
@@ -985,7 +986,7 @@ class Wizard:
|
|
|
985
986
|
"status": "error",
|
|
986
987
|
"message": "Workflow script not found. Please run create_scripts() first.",
|
|
987
988
|
"instructions": [
|
|
988
|
-
"
|
|
989
|
+
"[ERROR] Missing 1_masster_workflow.py",
|
|
989
990
|
"Run: wizard.create_scripts()",
|
|
990
991
|
"Then: wizard.run()"
|
|
991
992
|
]
|
|
@@ -1008,8 +1009,8 @@ class Wizard:
|
|
|
1008
1009
|
env['MASSTER_TEST_ONLY'] = '1'
|
|
1009
1010
|
|
|
1010
1011
|
# Execute the workflow script
|
|
1011
|
-
print(f"
|
|
1012
|
-
print(f"
|
|
1012
|
+
print(f">> Executing {mode_label} processing workflow...")
|
|
1013
|
+
print(f"[SCRIPT] Running: {workflow_script_path.name}")
|
|
1013
1014
|
print("=" * 60)
|
|
1014
1015
|
|
|
1015
1016
|
import subprocess
|
|
@@ -1022,16 +1023,16 @@ class Wizard:
|
|
|
1022
1023
|
if success:
|
|
1023
1024
|
print("=" * 60)
|
|
1024
1025
|
if test_only:
|
|
1025
|
-
print("
|
|
1026
|
-
print("
|
|
1026
|
+
print("[OK] Test-only processing completed successfully!")
|
|
1027
|
+
print("[INFO] Single file validated - ready for full batch")
|
|
1027
1028
|
print(" wizard.run()")
|
|
1028
1029
|
elif test_mode:
|
|
1029
|
-
print("
|
|
1030
|
-
print("
|
|
1030
|
+
print("[OK] Test processing completed successfully!")
|
|
1031
|
+
print("[INFO] Next step: Run full batch")
|
|
1031
1032
|
print(" wizard.run()")
|
|
1032
1033
|
else:
|
|
1033
|
-
print("
|
|
1034
|
-
print("
|
|
1034
|
+
print("[OK] Sample processing completed successfully!")
|
|
1035
|
+
print("[INFO] Next step: Run interactive analysis")
|
|
1035
1036
|
print(" uv run marimo edit 2_interactive_analysis.py")
|
|
1036
1037
|
print("=" * 60)
|
|
1037
1038
|
|
|
@@ -1042,7 +1043,7 @@ class Wizard:
|
|
|
1042
1043
|
"status": "success",
|
|
1043
1044
|
"message": f"{mode_label.capitalize()} processing completed successfully",
|
|
1044
1045
|
"instructions": [
|
|
1045
|
-
f"
|
|
1046
|
+
f"[OK] {mode_label.capitalize()} processing completed",
|
|
1046
1047
|
next_step
|
|
1047
1048
|
]
|
|
1048
1049
|
}
|
|
@@ -1051,7 +1052,7 @@ class Wizard:
|
|
|
1051
1052
|
"status": "error",
|
|
1052
1053
|
"message": f"Workflow execution failed with return code {result.returncode}",
|
|
1053
1054
|
"instructions": [
|
|
1054
|
-
"
|
|
1055
|
+
"[ERROR] Check the error messages above",
|
|
1055
1056
|
"Review parameters in 1_masster_workflow.py",
|
|
1056
1057
|
f"Try running manually: python {workflow_script_path.name}"
|
|
1057
1058
|
]
|
|
@@ -1062,7 +1063,7 @@ class Wizard:
|
|
|
1062
1063
|
"status": "error",
|
|
1063
1064
|
"message": f"Failed to execute workflow: {e}",
|
|
1064
1065
|
"instructions": [
|
|
1065
|
-
"
|
|
1066
|
+
"[ERROR] Execution failed",
|
|
1066
1067
|
"Check that source files exist and are accessible",
|
|
1067
1068
|
"Verify folder permissions"
|
|
1068
1069
|
]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
masster/__init__.py,sha256=B7zftzdElF2Wb5B7KvkD6TONnMIY-Jxeen3s49dgmzs,1029
|
|
2
|
-
masster/_version.py,sha256=
|
|
2
|
+
masster/_version.py,sha256=4HOjpCuCuNY78DZ2P_GWL7HWMcoGK_P8IwMz9LKoTq0,257
|
|
3
3
|
masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
|
|
4
4
|
masster/logger.py,sha256=XT2gUcUIct8LWzTp9n484g5MaB89toT76CGA41oBvfA,18375
|
|
5
5
|
masster/spectrum.py,sha256=TWIgDcl0lveG40cLVZTWGp8-FxMolu-P8EjZyRBtXL4,49850
|
|
@@ -59,12 +59,10 @@ masster/study/defaults/integrate_chrom_def.py,sha256=0MNIWGTjty-Zu-NTQsIweuj3UVq
|
|
|
59
59
|
masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
|
|
60
60
|
masster/study/defaults/merge_def.py,sha256=krR099IkENLlJVxpSjdje3E6h-_qtlc3Ep6Hpy6inrU,12978
|
|
61
61
|
masster/study/defaults/study_def.py,sha256=h8dYbi9xv0sesCSQik49Z53IkskMmNtW6ixl7it5pL0,16033
|
|
62
|
-
masster/wizard/README.md,sha256=RX3uxT1qD5i9iDSznZUbnukixonqr96JlUE4TwssAgY,14111
|
|
63
62
|
masster/wizard/__init__.py,sha256=L9G_datyGSFJjrBVklEVpZVLGXzUhDiWobtiygBH8vQ,669
|
|
64
|
-
masster/wizard/
|
|
65
|
-
masster/
|
|
66
|
-
masster-0.5.
|
|
67
|
-
masster-0.5.
|
|
68
|
-
masster-0.5.
|
|
69
|
-
masster-0.5.
|
|
70
|
-
masster-0.5.20.dist-info/RECORD,,
|
|
63
|
+
masster/wizard/wizard.py,sha256=aTNVhYoMK5B1uR2zrdbwEyZX-Zd86e4sUGhkQ2bgiDo,65352
|
|
64
|
+
masster-0.5.22.dist-info/METADATA,sha256=tLbIWPgMewKVqyz7MSbwLZZY5JOPUaEfYMx-q5w4rkk,45153
|
|
65
|
+
masster-0.5.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
66
|
+
masster-0.5.22.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
67
|
+
masster-0.5.22.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
|
|
68
|
+
masster-0.5.22.dist-info/RECORD,,
|
masster/wizard/README.md
DELETED
|
@@ -1,373 +0,0 @@
|
|
|
1
|
-
# Wizard Class Documentation
|
|
2
|
-
|
|
3
|
-
The `Wizard` class provides comprehensive automation for mass spectrometry data processing, from raw files to final study results. It handles the complete workflow with minimal user intervention while providing intelligent resume capabilities, parallel processing optimization, and adaptive output formats.
|
|
4
|
-
|
|
5
|
-
## Quick Start
|
|
6
|
-
|
|
7
|
-
### Basic Usage
|
|
8
|
-
|
|
9
|
-
```python
|
|
10
|
-
from masster import Wizard
|
|
11
|
-
|
|
12
|
-
# Create wizard with minimal configuration
|
|
13
|
-
wizard = Wizard(
|
|
14
|
-
data_source="./raw_data", # Directory with raw files
|
|
15
|
-
study_folder="./processed", # Output directory
|
|
16
|
-
polarity="positive", # or "negative"
|
|
17
|
-
num_cores=4 # CPU cores to use
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
# Run complete pipeline
|
|
21
|
-
success = wizard.run_full_pipeline()
|
|
22
|
-
|
|
23
|
-
if success:
|
|
24
|
-
wizard.info() # Print summary
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
### Advanced Configuration
|
|
28
|
-
|
|
29
|
-
```python
|
|
30
|
-
from masster import Wizard, wizard_def
|
|
31
|
-
|
|
32
|
-
# Create custom parameters
|
|
33
|
-
params = wizard_def(
|
|
34
|
-
data_source="./raw_data",
|
|
35
|
-
study_folder="./processed_advanced",
|
|
36
|
-
polarity="negative",
|
|
37
|
-
num_cores=8,
|
|
38
|
-
|
|
39
|
-
# File discovery
|
|
40
|
-
file_extensions=[".wiff", ".raw", ".mzML"],
|
|
41
|
-
search_subfolders=True,
|
|
42
|
-
skip_patterns=["blank", "QC", "test"],
|
|
43
|
-
|
|
44
|
-
# Processing parameters
|
|
45
|
-
adducts=["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2"],
|
|
46
|
-
chrom_fwhm=0.15,
|
|
47
|
-
noise_threshold=5e4,
|
|
48
|
-
|
|
49
|
-
# Study assembly
|
|
50
|
-
rt_tolerance=1.0,
|
|
51
|
-
mz_tolerance=0.008,
|
|
52
|
-
min_samples_for_merge=30,
|
|
53
|
-
|
|
54
|
-
# Output options
|
|
55
|
-
export_formats=["csv", "xlsx", "mgf", "parquet"],
|
|
56
|
-
generate_plots=True,
|
|
57
|
-
compress_output=True,
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
wizard = Wizard(params=params)
|
|
61
|
-
wizard.run_full_pipeline()
|
|
62
|
-
```
|
|
63
|
-
|
|
64
|
-
## Key Features
|
|
65
|
-
|
|
66
|
-
### 🔄 Automated Pipeline
|
|
67
|
-
- **Raw Data Discovery**: Automatically finds and validates raw MS files
|
|
68
|
-
- **Batch Conversion**: Parallel conversion to sample5 format with optimized parameters
|
|
69
|
-
- **Study Assembly**: Creates study from processed samples with quality filtering
|
|
70
|
-
- **Feature Alignment**: Cross-sample alignment using configurable algorithms
|
|
71
|
-
- **Consensus Generation**: Merges aligned features with statistical validation
|
|
72
|
-
- **Results Export**: Multiple output formats for downstream analysis
|
|
73
|
-
|
|
74
|
-
### 💾 Intelligent Resume
|
|
75
|
-
- **Checkpoint System**: Automatically saves progress at key points
|
|
76
|
-
- **File Tracking**: Remembers which files have been processed successfully
|
|
77
|
-
- **Smart Recovery**: Resumes from last successful step after interruption
|
|
78
|
-
- **Validation**: Verifies existing outputs before skipping
|
|
79
|
-
|
|
80
|
-
### ⚡ Performance Optimization
|
|
81
|
-
- **Parallel Processing**: Utilizes multiple CPU cores efficiently
|
|
82
|
-
- **Memory Management**: Adaptive batch sizing based on available memory
|
|
83
|
-
- **Process Isolation**: Prevents memory leaks in long-running jobs
|
|
84
|
-
- **Adaptive Compression**: Optimizes output format based on study size
|
|
85
|
-
|
|
86
|
-
### 📊 Comprehensive Logging
|
|
87
|
-
- **Progress Tracking**: Real-time status updates with time estimates
|
|
88
|
-
- **Detailed Logs**: Complete processing history saved to files
|
|
89
|
-
- **Error Reporting**: Clear error messages with recovery suggestions
|
|
90
|
-
- **Performance Metrics**: Processing times and resource usage statistics
|
|
91
|
-
|
|
92
|
-
## Pipeline Steps
|
|
93
|
-
|
|
94
|
-
### 1. File Discovery
|
|
95
|
-
- Searches for raw MS files (`.wiff`, `.raw`, `.mzML`, `.d`)
|
|
96
|
-
- Applies skip patterns to exclude unwanted files
|
|
97
|
-
- Validates file integrity and accessibility
|
|
98
|
-
- Reports file sizes and estimates processing time
|
|
99
|
-
|
|
100
|
-
### 2. Sample5 Conversion
|
|
101
|
-
- **Feature Detection**: Two-pass algorithm with configurable parameters
|
|
102
|
-
- **Adduct Detection**: Automated adduct grouping based on polarity
|
|
103
|
-
- **MS2 Linking**: Associates fragmentation spectra with features
|
|
104
|
-
- **Quality Control**: Validates outputs and reports statistics
|
|
105
|
-
- **Parallel Processing**: Utilizes multiple CPU cores with batch optimization
|
|
106
|
-
|
|
107
|
-
### 3. Study Assembly
|
|
108
|
-
- **Sample Loading**: Imports all processed sample5 files
|
|
109
|
-
- **Quality Filtering**: Removes low-quality features based on coherence/prominence
|
|
110
|
-
- **Metadata Organization**: Organizes sample information and experimental design
|
|
111
|
-
- **Memory Optimization**: Efficient data structures for large studies
|
|
112
|
-
|
|
113
|
-
### 4. Feature Alignment
|
|
114
|
-
- **RT Alignment**: Corrects retention time shifts between samples
|
|
115
|
-
- **Mass Alignment**: Accounts for mass calibration differences
|
|
116
|
-
- **Algorithm Selection**: Supports KD-tree, QT-clustering, and chunked methods
|
|
117
|
-
- **Validation**: Reports alignment statistics and quality metrics
|
|
118
|
-
|
|
119
|
-
### 5. Consensus Generation
|
|
120
|
-
- **Feature Merging**: Groups aligned features into consensus features
|
|
121
|
-
- **Statistical Validation**: Applies minimum sample requirements
|
|
122
|
-
- **Gap Filling**: Extracts chromatograms for missing values
|
|
123
|
-
- **MS2 Integration**: Links consensus features to MS2 spectra
|
|
124
|
-
|
|
125
|
-
### 6. Visualization & Export
|
|
126
|
-
- **Interactive Plots**: 2D feature maps, PCA plots, alignment visualizations
|
|
127
|
-
- **Multiple Formats**: CSV, Excel, MGF, Parquet exports
|
|
128
|
-
- **Study Archival**: Compressed study5 format for long-term storage
|
|
129
|
-
- **Metadata Export**: Complete processing parameters and statistics
|
|
130
|
-
|
|
131
|
-
## Configuration Options
|
|
132
|
-
|
|
133
|
-
### Core Parameters
|
|
134
|
-
|
|
135
|
-
| Parameter | Type | Default | Description |
|
|
136
|
-
|-----------|------|---------|-------------|
|
|
137
|
-
| `data_source` | str | **required** | Directory containing raw data files |
|
|
138
|
-
| `study_folder` | str | **required** | Output directory for processed study |
|
|
139
|
-
| `polarity` | str | `"positive"` | Ion polarity mode (`"positive"` or `"negative"`) |
|
|
140
|
-
| `num_cores` | int | `4` | Number of CPU cores for parallel processing |
|
|
141
|
-
| `adducts` | List[str] | auto-set | Adduct specifications (set based on polarity) |
|
|
142
|
-
|
|
143
|
-
### File Discovery
|
|
144
|
-
|
|
145
|
-
| Parameter | Type | Default | Description |
|
|
146
|
-
|-----------|------|---------|-------------|
|
|
147
|
-
| `file_extensions` | List[str] | `[".wiff", ".raw", ".mzML", ".d"]` | File types to search for |
|
|
148
|
-
| `search_subfolders` | bool | `True` | Search subdirectories recursively |
|
|
149
|
-
| `skip_patterns` | List[str] | `["blank", "QC", "test"]` | Filename patterns to skip |
|
|
150
|
-
| `max_file_size_gb` | float | `4.0` | Maximum file size warning threshold |
|
|
151
|
-
|
|
152
|
-
### Processing Parameters
|
|
153
|
-
|
|
154
|
-
| Parameter | Type | Default | Description |
|
|
155
|
-
|-----------|------|---------|-------------|
|
|
156
|
-
| `batch_size` | int | `8` | Files processed per batch |
|
|
157
|
-
| `memory_limit_gb` | float | `16.0` | Memory usage limit |
|
|
158
|
-
| `chrom_fwhm` | float | `0.2` | Expected chromatographic peak width (s) |
|
|
159
|
-
| `noise_threshold` | float | `1e5` | Intensity threshold for peak detection |
|
|
160
|
-
| `chrom_peak_snr` | float | `5.0` | Signal-to-noise ratio requirement |
|
|
161
|
-
| `tol_ppm` | float | `10.0` | Mass tolerance (ppm) |
|
|
162
|
-
|
|
163
|
-
### Study Assembly
|
|
164
|
-
|
|
165
|
-
| Parameter | Type | Default | Description |
|
|
166
|
-
|-----------|------|---------|-------------|
|
|
167
|
-
| `rt_tolerance` | float | `1.5` | RT tolerance for alignment (seconds) |
|
|
168
|
-
| `mz_tolerance` | float | `0.01` | m/z tolerance for alignment (Da) |
|
|
169
|
-
| `alignment_algorithm` | str | `"kd"` | Alignment algorithm (`"kd"`, `"qt"`, `"chunked"`) |
|
|
170
|
-
| `merge_method` | str | `"chunked"` | Merge algorithm for consensus generation |
|
|
171
|
-
| `min_samples_for_merge` | int | `50` | Minimum samples required for consensus |
|
|
172
|
-
|
|
173
|
-
### Output & Logging
|
|
174
|
-
|
|
175
|
-
| Parameter | Type | Default | Description |
|
|
176
|
-
|-----------|------|---------|-------------|
|
|
177
|
-
| `generate_plots` | bool | `True` | Generate visualization plots |
|
|
178
|
-
| `export_formats` | List[str] | `["csv", "mgf", "xlsx"]` | Output formats to generate |
|
|
179
|
-
| `compress_output` | bool | `True` | Compress final study file |
|
|
180
|
-
| `adaptive_compression` | bool | `True` | Adapt compression based on study size |
|
|
181
|
-
| `log_level` | str | `"INFO"` | Logging detail level |
|
|
182
|
-
| `log_to_file` | bool | `True` | Save logs to file |
|
|
183
|
-
|
|
184
|
-
### Resume & Recovery
|
|
185
|
-
|
|
186
|
-
| Parameter | Type | Default | Description |
|
|
187
|
-
|-----------|------|---------|-------------|
|
|
188
|
-
| `resume_enabled` | bool | `True` | Enable automatic resume capability |
|
|
189
|
-
| `force_reprocess` | bool | `False` | Force reprocessing of existing files |
|
|
190
|
-
| `backup_enabled` | bool | `True` | Create backups of intermediate results |
|
|
191
|
-
| `checkpoint_interval` | int | `10` | Save progress every N files |
|
|
192
|
-
| `cleanup_temp_files` | bool | `True` | Remove temporary files after completion |
|
|
193
|
-
|
|
194
|
-
## Methods
|
|
195
|
-
|
|
196
|
-
### Pipeline Control
|
|
197
|
-
|
|
198
|
-
#### `run_full_pipeline() -> bool`
|
|
199
|
-
Executes the complete processing pipeline in sequence. Returns `True` if successful.
|
|
200
|
-
|
|
201
|
-
#### Individual Steps
|
|
202
|
-
- `discover_files() -> List[Path]` - Find raw data files
|
|
203
|
-
- `convert_to_sample5(file_list=None) -> bool` - Convert to sample5 format
|
|
204
|
-
- `assemble_study() -> bool` - Create study from sample5 files
|
|
205
|
-
- `align_and_merge() -> bool` - Perform feature alignment and merging
|
|
206
|
-
- `generate_plots() -> bool` - Create visualization plots
|
|
207
|
-
- `export_results() -> bool` - Export in requested formats
|
|
208
|
-
- `save_study() -> bool` - Save final study file
|
|
209
|
-
- `cleanup_temp_files() -> bool` - Remove temporary files
|
|
210
|
-
|
|
211
|
-
### Status & Information
|
|
212
|
-
|
|
213
|
-
#### `info()`
|
|
214
|
-
Prints comprehensive wizard status including progress, timings, and results.
|
|
215
|
-
|
|
216
|
-
#### `get_status() -> Dict[str, Any]`
|
|
217
|
-
Returns detailed status dictionary with current step, processed files, timing, and parameters.
|
|
218
|
-
|
|
219
|
-
## Error Handling & Recovery
|
|
220
|
-
|
|
221
|
-
### Common Issues and Solutions
|
|
222
|
-
|
|
223
|
-
**Memory Errors**
|
|
224
|
-
- Reduce `batch_size` parameter
|
|
225
|
-
- Increase `memory_limit_gb` if available
|
|
226
|
-
- Use `merge_method="chunked"` for large studies
|
|
227
|
-
- Enable `cleanup_temp_files=True`
|
|
228
|
-
|
|
229
|
-
**File Access Errors**
|
|
230
|
-
- Check file permissions on source and destination folders
|
|
231
|
-
- Verify network connectivity for remote file systems
|
|
232
|
-
- Ensure sufficient disk space in output directory
|
|
233
|
-
- Close any applications that might lock files
|
|
234
|
-
|
|
235
|
-
**Processing Failures**
|
|
236
|
-
- Check individual file integrity
|
|
237
|
-
- Review `skip_patterns` to exclude problematic files
|
|
238
|
-
- Examine detailed logs in `wizard.log` and `processing.log`
|
|
239
|
-
- Try processing failed files individually for debugging
|
|
240
|
-
|
|
241
|
-
**Resume Issues**
|
|
242
|
-
- Delete `wizard_checkpoint.json` to force fresh start
|
|
243
|
-
- Verify output directory permissions
|
|
244
|
-
- Check for corrupted intermediate files
|
|
245
|
-
|
|
246
|
-
### Validation and Quality Control
|
|
247
|
-
|
|
248
|
-
The Wizard includes built-in validation at each step:
|
|
249
|
-
|
|
250
|
-
- **File Validation**: Checks file accessibility and format compatibility
|
|
251
|
-
- **Processing Validation**: Verifies sample5 outputs can be loaded
|
|
252
|
-
- **Study Validation**: Ensures study assembly completed successfully
|
|
253
|
-
- **Alignment Validation**: Reports alignment statistics and warnings
|
|
254
|
-
- **Export Validation**: Confirms all requested outputs were created
|
|
255
|
-
|
|
256
|
-
## Performance Guidelines
|
|
257
|
-
|
|
258
|
-
### System Requirements
|
|
259
|
-
- **Minimum**: 4 CPU cores, 8 GB RAM
|
|
260
|
-
- **Recommended**: 8+ CPU cores, 16+ GB RAM
|
|
261
|
-
- **Large Studies**: 16+ CPU cores, 32+ GB RAM
|
|
262
|
-
- **Storage**: SSD recommended, ~2-3x raw data size free space
|
|
263
|
-
|
|
264
|
-
### Optimization Tips
|
|
265
|
-
|
|
266
|
-
**For Small Studies (< 50 samples)**
|
|
267
|
-
- Use `num_cores = 4-6`
|
|
268
|
-
- Set `batch_size = 4-8`
|
|
269
|
-
- Use `merge_method = "kd"`
|
|
270
|
-
- Enable all export formats
|
|
271
|
-
|
|
272
|
-
**For Large Studies (100+ samples)**
|
|
273
|
-
- Use `num_cores = 8-16`
|
|
274
|
-
- Set `batch_size = 16-32`
|
|
275
|
-
- Use `merge_method = "chunked"`
|
|
276
|
-
- Enable `adaptive_compression = True`
|
|
277
|
-
- Consider processing in polarity-specific batches
|
|
278
|
-
|
|
279
|
-
**For Very Large Studies (500+ samples)**
|
|
280
|
-
- Process positive/negative modes separately
|
|
281
|
-
- Use `memory_limit_gb = 64+`
|
|
282
|
-
- Set `checkpoint_interval = 50`
|
|
283
|
-
- Enable `cleanup_temp_files = True`
|
|
284
|
-
- Consider cluster/cloud processing
|
|
285
|
-
|
|
286
|
-
## Integration Examples
|
|
287
|
-
|
|
288
|
-
### With Existing Workflows
|
|
289
|
-
|
|
290
|
-
```python
|
|
291
|
-
# Integration with custom preprocessing
|
|
292
|
-
wizard = Wizard(data_source="./preprocessed", ...)
|
|
293
|
-
|
|
294
|
-
# Skip conversion if already done
|
|
295
|
-
if not wizard.study_folder_path.glob("*.sample5"):
|
|
296
|
-
wizard.convert_to_sample5()
|
|
297
|
-
|
|
298
|
-
# Continue with study-level processing
|
|
299
|
-
wizard.assemble_study()
|
|
300
|
-
wizard.align_and_merge()
|
|
301
|
-
wizard.export_results()
|
|
302
|
-
```
|
|
303
|
-
|
|
304
|
-
### Batch Processing Multiple Studies
|
|
305
|
-
|
|
306
|
-
```python
|
|
307
|
-
studies = [
|
|
308
|
-
{"source": "./batch1", "output": "./results/batch1", "polarity": "pos"},
|
|
309
|
-
{"source": "./batch2", "output": "./results/batch2", "polarity": "neg"},
|
|
310
|
-
]
|
|
311
|
-
|
|
312
|
-
for study_config in studies:
|
|
313
|
-
wizard = Wizard(**study_config, num_cores=8)
|
|
314
|
-
success = wizard.run_full_pipeline()
|
|
315
|
-
|
|
316
|
-
if success:
|
|
317
|
-
print(f"✅ {study_config['output']} completed")
|
|
318
|
-
else:
|
|
319
|
-
print(f"❌ {study_config['output']} failed")
|
|
320
|
-
```
|
|
321
|
-
|
|
322
|
-
### Custom Processing Steps
|
|
323
|
-
|
|
324
|
-
```python
|
|
325
|
-
wizard = Wizard(...)
|
|
326
|
-
|
|
327
|
-
# Standard conversion
|
|
328
|
-
wizard.convert_to_sample5()
|
|
329
|
-
|
|
330
|
-
# Custom study assembly with specific parameters
|
|
331
|
-
wizard.assemble_study()
|
|
332
|
-
|
|
333
|
-
# Custom filtering before alignment
|
|
334
|
-
if hasattr(wizard.study, 'features_filter'):
|
|
335
|
-
selection = wizard.study.features_select(
|
|
336
|
-
chrom_coherence=0.5, # Higher quality threshold
|
|
337
|
-
chrom_prominence_scaled=2.0
|
|
338
|
-
)
|
|
339
|
-
wizard.study.features_filter(selection)
|
|
340
|
-
|
|
341
|
-
# Continue with standard pipeline
|
|
342
|
-
wizard.align_and_merge()
|
|
343
|
-
wizard.generate_plots()
|
|
344
|
-
```
|
|
345
|
-
|
|
346
|
-
## Output Files
|
|
347
|
-
|
|
348
|
-
The Wizard generates several types of output files:
|
|
349
|
-
|
|
350
|
-
### Primary Results
|
|
351
|
-
- `final_study.study5` - Complete study in masster native format
|
|
352
|
-
- `consensus_features.csv` - Feature table with RT, m/z, intensity data
|
|
353
|
-
- `study_results.xlsx` - Multi-sheet Excel workbook with results and metadata
|
|
354
|
-
- `consensus_ms2.mgf` - MS2 spectra for database searching
|
|
355
|
-
|
|
356
|
-
### Visualizations
|
|
357
|
-
- `alignment_plot.html` - Interactive alignment visualization
|
|
358
|
-
- `consensus_2d.html` - 2D feature map of consensus features
|
|
359
|
-
- `pca_plot.html` - Principal component analysis plot
|
|
360
|
-
- `consensus_stats.html` - Study statistics and quality metrics
|
|
361
|
-
|
|
362
|
-
### Processing Logs
|
|
363
|
-
- `wizard.log` - Detailed processing log with debug information
|
|
364
|
-
- `processing.log` - Simple progress log with timestamps
|
|
365
|
-
- `study_metadata.txt` - Study summary with parameters and statistics
|
|
366
|
-
|
|
367
|
-
### Individual Sample Outputs (if enabled)
|
|
368
|
-
- `sample_name.sample5` - Processed sample in masster format
|
|
369
|
-
- `sample_name.features.csv` - Individual sample feature table
|
|
370
|
-
- `sample_name.mgf` - Individual sample MS2 spectra
|
|
371
|
-
- `sample_name_2d.html` - Individual sample 2D plot
|
|
372
|
-
|
|
373
|
-
The Wizard provides a complete, automated solution for mass spectrometry data processing while maintaining flexibility for custom workflows and providing robust error handling and recovery capabilities.
|
masster/wizard/example.py
DELETED
|
@@ -1,223 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Example script demonstrating the Wizard class for automated study processing.
|
|
4
|
-
|
|
5
|
-
This script shows how to use the Wizard class to automatically process
|
|
6
|
-
mass spectrometry data from raw files to final study results.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from masster import Wizard, wizard_def
|
|
11
|
-
|
|
12
|
-
def main():
|
|
13
|
-
"""Main example function."""
|
|
14
|
-
|
|
15
|
-
# =================================================================
|
|
16
|
-
# EXAMPLE 1: Basic Usage with Minimal Configuration
|
|
17
|
-
# =================================================================
|
|
18
|
-
print("=== Example 1: Basic Wizard Usage ===\n")
|
|
19
|
-
|
|
20
|
-
# Set up paths (adjust these for your data)
|
|
21
|
-
data_source = r"D:\Data\raw_files" # Directory with .wiff, .raw, .mzML files
|
|
22
|
-
study_folder = r"D:\Data\processed_study" # Output directory
|
|
23
|
-
|
|
24
|
-
# Create wizard with basic settings
|
|
25
|
-
wizard = Wizard(
|
|
26
|
-
data_source=data_source,
|
|
27
|
-
study_folder=study_folder,
|
|
28
|
-
polarity="positive", # or "negative"
|
|
29
|
-
num_cores=4
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
# Run the complete pipeline
|
|
33
|
-
success = wizard.run_full_pipeline()
|
|
34
|
-
|
|
35
|
-
if success:
|
|
36
|
-
print("✅ Processing completed successfully!")
|
|
37
|
-
wizard.info() # Print status summary
|
|
38
|
-
else:
|
|
39
|
-
print("❌ Processing failed. Check logs for details.")
|
|
40
|
-
|
|
41
|
-
print("\n" + "="*60 + "\n")
|
|
42
|
-
|
|
43
|
-
# =================================================================
|
|
44
|
-
# EXAMPLE 2: Advanced Configuration with Custom Parameters
|
|
45
|
-
# =================================================================
|
|
46
|
-
print("=== Example 2: Advanced Wizard Configuration ===\n")
|
|
47
|
-
|
|
48
|
-
# Create custom parameters
|
|
49
|
-
params = wizard_def(
|
|
50
|
-
# Core settings
|
|
51
|
-
data_source=data_source,
|
|
52
|
-
study_folder=study_folder + "_advanced",
|
|
53
|
-
polarity="negative",
|
|
54
|
-
num_cores=8,
|
|
55
|
-
|
|
56
|
-
# File discovery settings
|
|
57
|
-
file_extensions=[".wiff", ".raw", ".mzML"],
|
|
58
|
-
search_subfolders=True,
|
|
59
|
-
skip_patterns=["blank", "QC", "test", "solvent"],
|
|
60
|
-
|
|
61
|
-
# Processing parameters
|
|
62
|
-
adducts=["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2"],
|
|
63
|
-
batch_size=4, # Process 4 files at once
|
|
64
|
-
memory_limit_gb=32.0,
|
|
65
|
-
|
|
66
|
-
# Feature detection parameters
|
|
67
|
-
chrom_fwhm=0.15, # Narrower peaks for UHPLC
|
|
68
|
-
noise_threshold=5e4, # Lower noise threshold
|
|
69
|
-
chrom_peak_snr=7.0, # Higher S/N requirement
|
|
70
|
-
tol_ppm=8.0, # Tighter mass tolerance
|
|
71
|
-
|
|
72
|
-
# Study assembly parameters
|
|
73
|
-
rt_tolerance=1.0, # Tighter RT tolerance
|
|
74
|
-
mz_tolerance=0.008, # Tighter m/z tolerance
|
|
75
|
-
min_samples_for_merge=30, # Require feature in at least 30 samples
|
|
76
|
-
merge_method="chunked", # Memory-efficient merging
|
|
77
|
-
|
|
78
|
-
# Output options
|
|
79
|
-
generate_plots=True,
|
|
80
|
-
generate_interactive=True,
|
|
81
|
-
export_formats=["csv", "xlsx", "mgf", "parquet"],
|
|
82
|
-
compress_output=True,
|
|
83
|
-
adaptive_compression=True,
|
|
84
|
-
|
|
85
|
-
# Advanced options
|
|
86
|
-
resume_enabled=True, # Can resume if interrupted
|
|
87
|
-
force_reprocess=False, # Skip already processed files
|
|
88
|
-
backup_enabled=True,
|
|
89
|
-
cleanup_temp_files=True,
|
|
90
|
-
log_level="INFO",
|
|
91
|
-
verbose_progress=True,
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
# Create wizard with custom parameters
|
|
95
|
-
wizard_advanced = Wizard(params=params)
|
|
96
|
-
|
|
97
|
-
# You can also run individual steps for more control
|
|
98
|
-
print("Running step-by-step processing...")
|
|
99
|
-
|
|
100
|
-
# Step 1: Discover files
|
|
101
|
-
files = wizard_advanced.discover_files()
|
|
102
|
-
print(f"Found {len(files)} files for processing")
|
|
103
|
-
|
|
104
|
-
# Step 2: Convert to sample5 (can be resumed if interrupted)
|
|
105
|
-
if wizard_advanced.convert_to_sample5():
|
|
106
|
-
print("✅ Sample5 conversion completed")
|
|
107
|
-
|
|
108
|
-
# Step 3: Assemble study
|
|
109
|
-
if wizard_advanced.assemble_study():
|
|
110
|
-
print("✅ Study assembly completed")
|
|
111
|
-
|
|
112
|
-
# Step 4: Align and merge
|
|
113
|
-
if wizard_advanced.align_and_merge():
|
|
114
|
-
print("✅ Alignment and merging completed")
|
|
115
|
-
|
|
116
|
-
# Step 5: Generate plots
|
|
117
|
-
if wizard_advanced.generate_plots():
|
|
118
|
-
print("✅ Plot generation completed")
|
|
119
|
-
|
|
120
|
-
# Step 6: Export results
|
|
121
|
-
if wizard_advanced.export_results():
|
|
122
|
-
print("✅ Results exported")
|
|
123
|
-
|
|
124
|
-
# Step 7: Save final study
|
|
125
|
-
if wizard_advanced.save_study():
|
|
126
|
-
print("✅ Study saved")
|
|
127
|
-
|
|
128
|
-
# Optional cleanup
|
|
129
|
-
wizard_advanced.cleanup_temp_files()
|
|
130
|
-
print("✅ Cleanup completed")
|
|
131
|
-
|
|
132
|
-
# Print final status
|
|
133
|
-
wizard_advanced.info()
|
|
134
|
-
|
|
135
|
-
print("\n" + "="*60 + "\n")
|
|
136
|
-
|
|
137
|
-
# =================================================================
|
|
138
|
-
# EXAMPLE 3: Resume Interrupted Processing
|
|
139
|
-
# =================================================================
|
|
140
|
-
print("=== Example 3: Resume Capability ===\n")
|
|
141
|
-
|
|
142
|
-
# If processing was interrupted, you can resume by creating a new wizard
|
|
143
|
-
# with the same parameters. It will automatically detect and skip
|
|
144
|
-
# already processed files.
|
|
145
|
-
|
|
146
|
-
resume_wizard = Wizard(
|
|
147
|
-
data_source=data_source,
|
|
148
|
-
study_folder=study_folder + "_resume",
|
|
149
|
-
polarity="positive",
|
|
150
|
-
num_cores=4,
|
|
151
|
-
resume_enabled=True # This is the default
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
# The wizard will automatically load checkpoint and continue from where it left off
|
|
155
|
-
print("Status after loading checkpoint:")
|
|
156
|
-
resume_wizard.info()
|
|
157
|
-
|
|
158
|
-
print("\n" + "="*60 + "\n")
|
|
159
|
-
|
|
160
|
-
# =================================================================
|
|
161
|
-
# EXAMPLE 4: Monitoring and Status
|
|
162
|
-
# =================================================================
|
|
163
|
-
print("=== Example 4: Status Monitoring ===\n")
|
|
164
|
-
|
|
165
|
-
# You can check wizard status at any time
|
|
166
|
-
status = wizard.get_status()
|
|
167
|
-
print("Wizard Status:")
|
|
168
|
-
for key, value in status.items():
|
|
169
|
-
print(f" {key}: {value}")
|
|
170
|
-
|
|
171
|
-
# The wizard maintains comprehensive logs
|
|
172
|
-
log_file = Path(study_folder) / "wizard.log"
|
|
173
|
-
if log_file.exists():
|
|
174
|
-
print(f"\nDetailed logs available at: {log_file}")
|
|
175
|
-
|
|
176
|
-
processing_log = Path(study_folder) / "processing.log"
|
|
177
|
-
if processing_log.exists():
|
|
178
|
-
print(f"Processing summary at: {processing_log}")
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
def example_batch_different_polarities():
|
|
182
|
-
"""Example of processing positive and negative mode data separately."""
|
|
183
|
-
|
|
184
|
-
print("=== Processing Both Polarities ===\n")
|
|
185
|
-
|
|
186
|
-
base_data_source = r"D:\Data\raw_files"
|
|
187
|
-
base_output = r"D:\Data\processed_studies"
|
|
188
|
-
|
|
189
|
-
# Process positive mode
|
|
190
|
-
pos_wizard = Wizard(
|
|
191
|
-
data_source=base_data_source + r"\positive",
|
|
192
|
-
study_folder=base_output + r"\positive_study",
|
|
193
|
-
polarity="positive",
|
|
194
|
-
adducts=["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
|
|
195
|
-
num_cores=6
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
print("Processing positive mode data...")
|
|
199
|
-
pos_success = pos_wizard.run_full_pipeline()
|
|
200
|
-
|
|
201
|
-
# Process negative mode
|
|
202
|
-
neg_wizard = Wizard(
|
|
203
|
-
data_source=base_data_source + r"\negative",
|
|
204
|
-
study_folder=base_output + r"\negative_study",
|
|
205
|
-
polarity="negative",
|
|
206
|
-
adducts=["H-1:-:0.95", "Cl:-:0.05"],
|
|
207
|
-
num_cores=6
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
print("Processing negative mode data...")
|
|
211
|
-
neg_success = neg_wizard.run_full_pipeline()
|
|
212
|
-
|
|
213
|
-
print("\nResults:")
|
|
214
|
-
print(f"Positive mode: {'✅ Success' if pos_success else '❌ Failed'}")
|
|
215
|
-
print(f"Negative mode: {'✅ Success' if neg_success else '❌ Failed'}")
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
if __name__ == "__main__":
|
|
219
|
-
# Run basic examples
|
|
220
|
-
main()
|
|
221
|
-
|
|
222
|
-
# Uncomment to run polarity-specific processing
|
|
223
|
-
# example_batch_different_polarities()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|