masster 0.5.15__py3-none-any.whl → 0.5.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +1 -1
- masster/_version.py +1 -1
- masster/study/plot.py +3 -0
- masster/wizard/__init__.py +2 -2
- masster/wizard/wizard.py +492 -825
- {masster-0.5.15.dist-info → masster-0.5.17.dist-info}/METADATA +1 -3
- {masster-0.5.15.dist-info → masster-0.5.17.dist-info}/RECORD +10 -10
- {masster-0.5.15.dist-info → masster-0.5.17.dist-info}/WHEEL +0 -0
- {masster-0.5.15.dist-info → masster-0.5.17.dist-info}/entry_points.txt +0 -0
- {masster-0.5.15.dist-info → masster-0.5.17.dist-info}/licenses/LICENSE +0 -0
masster/wizard/wizard.py
CHANGED
|
@@ -228,9 +228,17 @@ class Wizard:
|
|
|
228
228
|
that process raw MS data through the complete pipeline: file discovery, feature
|
|
229
229
|
detection, sample processing, study assembly, alignment, merging, and export.
|
|
230
230
|
|
|
231
|
-
|
|
232
|
-
- create_scripts(): Generate
|
|
233
|
-
-
|
|
231
|
+
Core functions:
|
|
232
|
+
- create_scripts(): Generate standalone analysis scripts
|
|
233
|
+
- test_only(): Process only one file for parameter validation
|
|
234
|
+
- test_and_run(): Test with single file, then run full batch if successful
|
|
235
|
+
- run(): Execute full batch processing on all files
|
|
236
|
+
|
|
237
|
+
Recommended workflow:
|
|
238
|
+
1. wizard = Wizard(source="raw_data", folder="output")
|
|
239
|
+
2. wizard.create_scripts() # Generate analysis scripts
|
|
240
|
+
3. wizard.test_only() # Validate with single file
|
|
241
|
+
4. wizard.run() # Process all files
|
|
234
242
|
"""
|
|
235
243
|
|
|
236
244
|
def __init__(
|
|
@@ -239,7 +247,7 @@ class Wizard:
|
|
|
239
247
|
folder: str = "",
|
|
240
248
|
polarity: str = "positive",
|
|
241
249
|
adducts: Optional[List[str]] = None,
|
|
242
|
-
num_cores: int =
|
|
250
|
+
num_cores: int = 6,
|
|
243
251
|
**kwargs
|
|
244
252
|
):
|
|
245
253
|
"""
|
|
@@ -290,14 +298,6 @@ class Wizard:
|
|
|
290
298
|
self.folder_path = Path(self.params.folder)
|
|
291
299
|
self.folder_path.mkdir(parents=True, exist_ok=True)
|
|
292
300
|
|
|
293
|
-
# Initialize logger
|
|
294
|
-
self.logger = MassterLogger(
|
|
295
|
-
instance_type="wizard",
|
|
296
|
-
level="INFO",
|
|
297
|
-
label="Wizard",
|
|
298
|
-
sink=None
|
|
299
|
-
)
|
|
300
|
-
|
|
301
301
|
# Auto-infer polarity from the first file if not explicitly set by user
|
|
302
302
|
if polarity == "positive" and "polarity" not in kwargs:
|
|
303
303
|
inferred_polarity = self._infer_polarity_from_first_file()
|
|
@@ -306,164 +306,6 @@ class Wizard:
|
|
|
306
306
|
# Update adducts based on inferred polarity
|
|
307
307
|
self.params.__post_init__()
|
|
308
308
|
|
|
309
|
-
def _analyze_source_files(self) -> Dict[str, Any]:
|
|
310
|
-
"""
|
|
311
|
-
Analyze source files to extract metadata: number of files, file type, polarity, and acquisition length.
|
|
312
|
-
|
|
313
|
-
Returns:
|
|
314
|
-
Dictionary containing:
|
|
315
|
-
- number_of_files: Total count of data files found
|
|
316
|
-
- file_types: List of file extensions found
|
|
317
|
-
- polarity: Detected polarity ("positive" or "negative")
|
|
318
|
-
- length_minutes: Acquisition length in minutes
|
|
319
|
-
- first_file: Path to first file analyzed
|
|
320
|
-
"""
|
|
321
|
-
result = {
|
|
322
|
-
'number_of_files': 0,
|
|
323
|
-
'file_types': [],
|
|
324
|
-
'polarity': 'positive',
|
|
325
|
-
'length_minutes': 0.0,
|
|
326
|
-
'first_file': None
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
try:
|
|
330
|
-
# Find all data files
|
|
331
|
-
all_files = []
|
|
332
|
-
file_types_found = set()
|
|
333
|
-
|
|
334
|
-
for extension in self.params.file_extensions:
|
|
335
|
-
if self.params.search_subfolders:
|
|
336
|
-
pattern = f"**/*{extension}"
|
|
337
|
-
files = list(self.source_path.rglob(pattern))
|
|
338
|
-
else:
|
|
339
|
-
pattern = f"*{extension}"
|
|
340
|
-
files = list(self.source_path.glob(pattern))
|
|
341
|
-
|
|
342
|
-
if files:
|
|
343
|
-
all_files.extend(files)
|
|
344
|
-
file_types_found.add(extension)
|
|
345
|
-
|
|
346
|
-
result['number_of_files'] = len(all_files)
|
|
347
|
-
result['file_types'] = list(file_types_found)
|
|
348
|
-
|
|
349
|
-
if not all_files:
|
|
350
|
-
return result
|
|
351
|
-
|
|
352
|
-
# Analyze first file for polarity and acquisition length
|
|
353
|
-
first_file = all_files[0]
|
|
354
|
-
result['first_file'] = str(first_file)
|
|
355
|
-
|
|
356
|
-
# Extract metadata based on file type
|
|
357
|
-
if first_file.suffix.lower() == '.wiff':
|
|
358
|
-
metadata = self._analyze_wiff_file(first_file)
|
|
359
|
-
elif first_file.suffix.lower() == '.mzml':
|
|
360
|
-
metadata = self._analyze_mzml_file(first_file)
|
|
361
|
-
elif first_file.suffix.lower() == '.raw':
|
|
362
|
-
metadata = self._analyze_raw_file(first_file)
|
|
363
|
-
else:
|
|
364
|
-
metadata = {'polarity': 'positive', 'length_minutes': 0.0}
|
|
365
|
-
|
|
366
|
-
result['polarity'] = metadata.get('polarity', 'positive')
|
|
367
|
-
result['length_minutes'] = metadata.get('length_minutes', 0.0)
|
|
368
|
-
|
|
369
|
-
except Exception as e:
|
|
370
|
-
self.logger.warning(f"Failed to analyze source files: {e}")
|
|
371
|
-
|
|
372
|
-
return result
|
|
373
|
-
|
|
374
|
-
def _analyze_wiff_file(self, file_path: Path) -> Dict[str, Any]:
|
|
375
|
-
"""Analyze WIFF file to extract polarity and acquisition length."""
|
|
376
|
-
try:
|
|
377
|
-
from masster.sample.load import _wiff_to_dict
|
|
378
|
-
|
|
379
|
-
# Extract metadata from WIFF file
|
|
380
|
-
metadata_df = _wiff_to_dict(str(file_path))
|
|
381
|
-
|
|
382
|
-
result = {'polarity': 'positive', 'length_minutes': 0.0}
|
|
383
|
-
|
|
384
|
-
if not metadata_df.empty:
|
|
385
|
-
# Get polarity from first experiment
|
|
386
|
-
if 'polarity' in metadata_df.columns:
|
|
387
|
-
first_polarity = metadata_df['polarity'].iloc[0]
|
|
388
|
-
|
|
389
|
-
# Convert numeric polarity codes to string
|
|
390
|
-
if first_polarity == 1 or str(first_polarity).lower() in ['positive', 'pos', '+']:
|
|
391
|
-
result['polarity'] = "positive"
|
|
392
|
-
elif first_polarity == -1 or str(first_polarity).lower() in ['negative', 'neg', '-']:
|
|
393
|
-
result['polarity'] = "negative"
|
|
394
|
-
|
|
395
|
-
# Estimate acquisition length by loading the file briefly
|
|
396
|
-
# For a rough estimate, we'll load just the scan info
|
|
397
|
-
from masster.sample import Sample
|
|
398
|
-
sample = Sample()
|
|
399
|
-
sample.logger_update(level="ERROR") # Suppress logs
|
|
400
|
-
sample.load(str(file_path))
|
|
401
|
-
|
|
402
|
-
if hasattr(sample, 'scans_df') and sample.scans_df is not None:
|
|
403
|
-
if not sample.scans_df.is_empty():
|
|
404
|
-
rt_values = sample.scans_df.select('rt').to_numpy().flatten()
|
|
405
|
-
if len(rt_values) > 0:
|
|
406
|
-
# RT is in seconds, convert to minutes
|
|
407
|
-
result['length_minutes'] = float(rt_values.max()) / 60.0
|
|
408
|
-
|
|
409
|
-
return result
|
|
410
|
-
|
|
411
|
-
except Exception as e:
|
|
412
|
-
self.logger.debug(f"Failed to analyze WIFF file {file_path}: {e}")
|
|
413
|
-
return {'polarity': 'positive', 'length_minutes': 0.0}
|
|
414
|
-
|
|
415
|
-
def _analyze_mzml_file(self, file_path: Path) -> Dict[str, Any]:
|
|
416
|
-
"""Analyze mzML file to extract polarity and acquisition length."""
|
|
417
|
-
try:
|
|
418
|
-
from masster.sample import Sample
|
|
419
|
-
|
|
420
|
-
sample = Sample()
|
|
421
|
-
sample.logger_update(level="ERROR") # Suppress logs
|
|
422
|
-
sample.load(str(file_path))
|
|
423
|
-
|
|
424
|
-
result = {'polarity': 'positive', 'length_minutes': 0.0}
|
|
425
|
-
|
|
426
|
-
if hasattr(sample, 'scans_df') and sample.scans_df is not None:
|
|
427
|
-
if not sample.scans_df.is_empty():
|
|
428
|
-
rt_values = sample.scans_df.select('rt').to_numpy().flatten()
|
|
429
|
-
if len(rt_values) > 0:
|
|
430
|
-
# RT is in seconds, convert to minutes
|
|
431
|
-
result['length_minutes'] = float(rt_values.max()) / 60.0
|
|
432
|
-
|
|
433
|
-
# For mzML, polarity detection would require more detailed parsing
|
|
434
|
-
# For now, use default
|
|
435
|
-
return result
|
|
436
|
-
|
|
437
|
-
except Exception as e:
|
|
438
|
-
self.logger.debug(f"Failed to analyze mzML file {file_path}: {e}")
|
|
439
|
-
return {'polarity': 'positive', 'length_minutes': 0.0}
|
|
440
|
-
|
|
441
|
-
def _analyze_raw_file(self, file_path: Path) -> Dict[str, Any]:
|
|
442
|
-
"""Analyze RAW file to extract polarity and acquisition length."""
|
|
443
|
-
try:
|
|
444
|
-
from masster.sample import Sample
|
|
445
|
-
|
|
446
|
-
sample = Sample()
|
|
447
|
-
sample.logger_update(level="ERROR") # Suppress logs
|
|
448
|
-
sample.load(str(file_path))
|
|
449
|
-
|
|
450
|
-
result = {'polarity': 'positive', 'length_minutes': 0.0}
|
|
451
|
-
|
|
452
|
-
if hasattr(sample, 'scans_df') and sample.scans_df is not None:
|
|
453
|
-
if not sample.scans_df.is_empty():
|
|
454
|
-
rt_values = sample.scans_df.select('rt').to_numpy().flatten()
|
|
455
|
-
if len(rt_values) > 0:
|
|
456
|
-
# RT is in seconds, convert to minutes
|
|
457
|
-
result['length_minutes'] = float(rt_values.max()) / 60.0
|
|
458
|
-
|
|
459
|
-
# For RAW files, polarity detection would require more detailed parsing
|
|
460
|
-
# For now, use default
|
|
461
|
-
return result
|
|
462
|
-
|
|
463
|
-
except Exception as e:
|
|
464
|
-
self.logger.debug(f"Failed to analyze RAW file {file_path}: {e}")
|
|
465
|
-
return {'polarity': 'positive', 'length_minutes': 0.0}
|
|
466
|
-
|
|
467
309
|
def _infer_polarity_from_first_file(self) -> str:
|
|
468
310
|
"""
|
|
469
311
|
Infer polarity from the first available raw data file.
|
|
@@ -538,10 +380,8 @@ class Wizard:
|
|
|
538
380
|
source_info = self._analyze_source_files()
|
|
539
381
|
|
|
540
382
|
# Update wizard parameters based on detected metadata
|
|
541
|
-
if source_info['polarity'] != 'positive':
|
|
383
|
+
if source_info.get('polarity') and source_info['polarity'] != 'positive':
|
|
542
384
|
self.params.polarity = source_info['polarity']
|
|
543
|
-
# Update adducts based on detected polarity
|
|
544
|
-
self.params.__post_init__()
|
|
545
385
|
|
|
546
386
|
files_created = []
|
|
547
387
|
|
|
@@ -549,6 +389,9 @@ class Wizard:
|
|
|
549
389
|
workflow_script_path = self.folder_path / "1_masster_workflow.py"
|
|
550
390
|
workflow_content = self._generate_workflow_script_content(source_info)
|
|
551
391
|
|
|
392
|
+
# Apply test mode modifications
|
|
393
|
+
workflow_content = self._add_test_mode_support(workflow_content)
|
|
394
|
+
|
|
552
395
|
with open(workflow_script_path, 'w', encoding='utf-8') as f:
|
|
553
396
|
f.write(workflow_content)
|
|
554
397
|
files_created.append(str(workflow_script_path))
|
|
@@ -573,7 +416,6 @@ class Wizard:
|
|
|
573
416
|
}
|
|
574
417
|
|
|
575
418
|
except Exception as e:
|
|
576
|
-
self.logger.error(f"Failed to create scripts: {e}")
|
|
577
419
|
return {
|
|
578
420
|
"status": "error",
|
|
579
421
|
"message": f"Failed to create scripts: {e}",
|
|
@@ -582,120 +424,109 @@ class Wizard:
|
|
|
582
424
|
"source_info": {}
|
|
583
425
|
}
|
|
584
426
|
|
|
585
|
-
def
|
|
586
|
-
"""
|
|
427
|
+
def _analyze_source_files(self) -> Dict[str, Any]:
|
|
428
|
+
"""Analyze source files to extract metadata."""
|
|
429
|
+
result = {
|
|
430
|
+
"number_of_files": 0,
|
|
431
|
+
"file_types": [],
|
|
432
|
+
"polarity": "positive",
|
|
433
|
+
"length_minutes": 0.0,
|
|
434
|
+
"first_file": None
|
|
435
|
+
}
|
|
587
436
|
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
437
|
+
try:
|
|
438
|
+
# Find raw data files
|
|
439
|
+
extensions = [".wiff", ".raw", ".mzML"]
|
|
440
|
+
raw_files = []
|
|
441
|
+
|
|
442
|
+
for ext in extensions:
|
|
443
|
+
pattern = f"**/*{ext}"
|
|
444
|
+
files = list(self.source_path.rglob(pattern))
|
|
445
|
+
if files:
|
|
446
|
+
raw_files.extend(files)
|
|
447
|
+
if ext not in result["file_types"]:
|
|
448
|
+
result["file_types"].append(ext)
|
|
449
|
+
|
|
450
|
+
result["number_of_files"] = len(raw_files)
|
|
451
|
+
|
|
452
|
+
if raw_files:
|
|
453
|
+
result["first_file"] = str(raw_files[0])
|
|
454
|
+
# Simple heuristic: assume 30 minutes per file if we can't determine
|
|
455
|
+
result["length_minutes"] = 30.0
|
|
456
|
+
|
|
457
|
+
except Exception as e:
|
|
458
|
+
print(f"Warning: Could not analyze source files: {e}")
|
|
459
|
+
|
|
460
|
+
return result
|
|
597
461
|
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
params_lines.append('# Analysis parameters (auto-detected from source files)')
|
|
601
|
-
params_lines.append('PARAMS = {')
|
|
602
|
-
|
|
603
|
-
# File Discovery Summary
|
|
604
|
-
params_lines.append(' # === Source File Analysis ===')
|
|
605
|
-
params_lines.append(f' "number_of_files": {source_info.get("number_of_files", 0)}, # Total raw data files found')
|
|
606
|
-
params_lines.append(f' "file_types": {source_info.get("file_types", [])!r}, # Detected file extensions')
|
|
607
|
-
params_lines.append(f' "length_minutes": {source_info.get("length_minutes", 0.0):.1f}, # Estimated acquisition length per file (minutes)')
|
|
608
|
-
if source_info.get('first_file'):
|
|
609
|
-
params_lines.append(f' "first_file": {source_info["first_file"]!r}, # First file analyzed for metadata')
|
|
610
|
-
params_lines.append('')
|
|
611
|
-
|
|
612
|
-
# Core Configuration
|
|
613
|
-
params_lines.append(' # === Core Configuration ===')
|
|
614
|
-
params_lines.append(f' "source": {params_dict.get("source", "")!r}, # Directory containing raw data files')
|
|
615
|
-
params_lines.append(f' "folder": {params_dict.get("folder", "")!r}, # Output directory for processed study')
|
|
616
|
-
params_lines.append(f' "polarity": {params_dict.get("polarity", "positive")!r}, # Ion polarity mode (auto-detected)')
|
|
617
|
-
params_lines.append(f' "num_cores": {params_dict.get("num_cores", 4)}, # Number of CPU cores for parallel processing')
|
|
618
|
-
params_lines.append('')
|
|
619
|
-
|
|
620
|
-
# File Discovery
|
|
621
|
-
params_lines.append(' # === File Discovery ===')
|
|
622
|
-
params_lines.append(f' "file_extensions": {params_dict.get("file_extensions", [".wiff", ".raw", ".mzML"])!r}, # File extensions to search for')
|
|
623
|
-
params_lines.append(f' "search_subfolders": {params_dict.get("search_subfolders", True)}, # Whether to search subdirectories recursively')
|
|
624
|
-
params_lines.append(f' "skip_patterns": {params_dict.get("skip_patterns", ["blank", "condition"])!r}, # Filename patterns to skip')
|
|
625
|
-
params_lines.append('')
|
|
626
|
-
|
|
627
|
-
# Processing Parameters - Critical values to review
|
|
628
|
-
params_lines.append(' # === Processing Parameters (REVIEW THESE VALUES) ===')
|
|
629
|
-
params_lines.append(f' "adducts": {params_dict.get("adducts", [])!r}, # Adduct specifications for feature detection and annotation')
|
|
630
|
-
params_lines.append(f' "detector_type": {params_dict.get("detector_type", "unknown")!r}, # MS detector type ("orbitrap", "tof", "unknown")')
|
|
631
|
-
params_lines.append(f' "noise": {params_dict.get("noise", 50.0)}, # REVIEW: Noise threshold for feature detection. Set to 1e5 for Orbitraps')
|
|
632
|
-
params_lines.append(f' "chrom_fwhm": {params_dict.get("chrom_fwhm", 0.5)}, # REVIEW: Chromatographic peak FWHM (seconds)')
|
|
633
|
-
params_lines.append(f' "chrom_peak_snr": {params_dict.get("chrom_peak_snr", 5.0)}, # Minimum signal-to-noise ratio for chromatographic peaks')
|
|
634
|
-
params_lines.append('')
|
|
635
|
-
|
|
636
|
-
# Other parameters...
|
|
637
|
-
params_lines.append(' # === Alignment & Merging ===')
|
|
638
|
-
params_lines.append(f' "rt_tol": {params_dict.get("rt_tol", 5.0)}, # Retention time tolerance for alignment (seconds)')
|
|
639
|
-
params_lines.append(f' "mz_tol": {params_dict.get("mz_tol", 0.01)}, # Mass-to-charge ratio tolerance for alignment (Da)')
|
|
640
|
-
params_lines.append(f' "alignment_method": {params_dict.get("alignment_method", "kd")!r}, # Algorithm for sample alignment')
|
|
641
|
-
params_lines.append(f' "min_samples_per_feature": {params_dict.get("min_samples_per_feature", 1)}, # Minimum samples required per consensus feature')
|
|
642
|
-
params_lines.append(f' "merge_method": {params_dict.get("merge_method", "qt")!r}, # Method for merging consensus features')
|
|
643
|
-
params_lines.append('')
|
|
644
|
-
|
|
645
|
-
# Other params
|
|
646
|
-
params_lines.append(' # === Sample Processing ===')
|
|
647
|
-
params_lines.append(f' "batch_size": {params_dict.get("batch_size", 8)}, # Number of files to process per batch')
|
|
648
|
-
params_lines.append(f' "memory_limit_gb": {params_dict.get("memory_limit_gb", 16.0)}, # Memory limit for processing (GB)')
|
|
649
|
-
params_lines.append('')
|
|
650
|
-
|
|
651
|
-
params_lines.append(' # === Script Options ===')
|
|
652
|
-
params_lines.append(f' "resume_enabled": {params_dict.get("resume_enabled", True)}, # Enable automatic resume capability')
|
|
653
|
-
params_lines.append(f' "force_reprocess": {params_dict.get("force_reprocess", False)}, # Force reprocessing of existing files')
|
|
654
|
-
params_lines.append(f' "cleanup_temp_files": {params_dict.get("cleanup_temp_files", True)}, # Clean up temporary files after processing')
|
|
655
|
-
|
|
656
|
-
params_lines.append('}')
|
|
462
|
+
def _generate_workflow_script_content(self, source_info: Dict[str, Any]) -> str:
|
|
463
|
+
"""Generate the content for 1_masster_workflow.py script."""
|
|
657
464
|
|
|
658
|
-
# Create script lines
|
|
659
465
|
script_lines = [
|
|
660
466
|
'#!/usr/bin/env python3',
|
|
661
467
|
'"""',
|
|
662
|
-
'
|
|
663
|
-
|
|
664
|
-
'',
|
|
665
|
-
'Source Analysis:',
|
|
666
|
-
f' - Files found: {source_info.get("number_of_files", 0)}',
|
|
667
|
-
f' - File types: {", ".join(source_info.get("file_types", []))}',
|
|
668
|
-
f' - Polarity detected: {source_info.get("polarity", "unknown")}',
|
|
669
|
-
f' - Acquisition length: ~{source_info.get("length_minutes", 0.0):.1f} minutes per file',
|
|
670
|
-
'',
|
|
671
|
-
'This script processes raw MS data files into sample5 format.',
|
|
672
|
-
'Review the NOISE and CHROM_FWHM parameters below before running.',
|
|
468
|
+
'Automated Mass Spectrometry Data Analysis Pipeline',
|
|
469
|
+
'Generated by masster wizard',
|
|
673
470
|
'"""',
|
|
674
471
|
'',
|
|
472
|
+
'import os',
|
|
675
473
|
'import sys',
|
|
676
474
|
'import time',
|
|
677
475
|
'from pathlib import Path',
|
|
678
|
-
'import concurrent.futures',
|
|
679
|
-
'import os',
|
|
680
476
|
'',
|
|
681
477
|
'# Import masster modules',
|
|
682
|
-
'from masster.
|
|
478
|
+
'from masster.study import Study',
|
|
683
479
|
'from masster import __version__',
|
|
684
480
|
'',
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
481
|
+
'# Test mode configuration',
|
|
482
|
+
'TEST_MODE = os.environ.get("MASSTER_TEST_MODE", "0") == "1"',
|
|
483
|
+
'TEST_ONLY = os.environ.get("MASSTER_TEST_ONLY", "0") == "1" # Only run test, don\'t continue to full batch',
|
|
484
|
+
'',
|
|
485
|
+
'# Analysis parameters',
|
|
486
|
+
'PARAMS = {',
|
|
487
|
+
' # === Core Configuration ===',
|
|
488
|
+
f' "source": {str(self.source_path)!r}, # Directory containing raw data files',
|
|
489
|
+
f' "folder": {str(self.folder_path)!r}, # Output directory for processed study',
|
|
490
|
+
f' "polarity": {self.params.polarity!r}, # Ion polarity mode ("positive" or "negative")',
|
|
491
|
+
f' "num_cores": {self.params.num_cores}, # Number of CPU cores for parallel processing',
|
|
492
|
+
'',
|
|
493
|
+
' # === Test Mode ===',
|
|
494
|
+
' "test_mode": TEST_MODE, # Process only first file for testing',
|
|
495
|
+
' "test_only": TEST_ONLY, # Stop after test, don\'t run full batch',
|
|
496
|
+
'',
|
|
497
|
+
' # === File Discovery ===',
|
|
498
|
+
f' "file_extensions": {self.params.file_extensions!r}, # File extensions to search for',
|
|
499
|
+
f' "search_subfolders": {self.params.search_subfolders}, # Whether to search subdirectories recursively',
|
|
500
|
+
f' "skip_patterns": {self.params.skip_patterns!r}, # Filename patterns to skip',
|
|
501
|
+
'',
|
|
502
|
+
' # === Processing Parameters ===',
|
|
503
|
+
f' "adducts": {self.params.adducts!r}, # Adduct specifications for feature detection and annotation',
|
|
504
|
+
f' "noise": {self.params.noise}, # Noise threshold for feature detection',
|
|
505
|
+
f' "chrom_fwhm": {self.params.chrom_fwhm}, # Chromatographic peak full width at half maximum (seconds)',
|
|
506
|
+
f' "chrom_peak_snr": {self.params.chrom_peak_snr}, # Minimum signal-to-noise ratio for chromatographic peaks',
|
|
507
|
+
'',
|
|
508
|
+
' # === Alignment & Merging ===',
|
|
509
|
+
f' "rt_tol": {self.params.rt_tolerance}, # Retention time tolerance for alignment (seconds)',
|
|
510
|
+
f' "mz_tol": {self.params.mz_max_diff}, # Mass-to-charge ratio tolerance for alignment (Da)',
|
|
511
|
+
f' "alignment_method": {self.params.alignment_algorithm!r}, # Algorithm for sample alignment',
|
|
512
|
+
f' "min_samples_per_feature": {self.params.min_samples_for_merge}, # Minimum samples required per consensus feature',
|
|
513
|
+
f' "merge_method": {self.params.merge_method!r}, # Method for merging consensus features',
|
|
514
|
+
'',
|
|
515
|
+
' # === Sample Processing (used in add_samples_from_folder) ===',
|
|
516
|
+
f' "batch_size": {self.params.batch_size}, # Number of files to process per batch',
|
|
517
|
+
f' "memory_limit_gb": {self.params.memory_limit_gb}, # Memory limit for processing (GB)',
|
|
518
|
+
'',
|
|
519
|
+
' # === Script Options ===',
|
|
520
|
+
f' "resume_enabled": {self.params.resume_enabled}, # Enable automatic resume capability',
|
|
521
|
+
f' "force_reprocess": {self.params.force_reprocess}, # Force reprocessing of existing files',
|
|
522
|
+
f' "cleanup_temp_files": {self.params.cleanup_temp_files}, # Clean up temporary files after processing',
|
|
523
|
+
'}',
|
|
692
524
|
'',
|
|
693
525
|
'',
|
|
694
|
-
'def discover_raw_files(source_folder, file_extensions, search_subfolders=True
|
|
526
|
+
'def discover_raw_files(source_folder, file_extensions, search_subfolders=True):',
|
|
695
527
|
' """Discover raw data files in the source folder."""',
|
|
696
528
|
' source_path = Path(source_folder)',
|
|
697
529
|
' raw_files = []',
|
|
698
|
-
' skip_patterns = skip_patterns or []',
|
|
699
530
|
' ',
|
|
700
531
|
' for ext in file_extensions:',
|
|
701
532
|
' if search_subfolders:',
|
|
@@ -704,191 +535,189 @@ class Wizard:
|
|
|
704
535
|
' else:',
|
|
705
536
|
' pattern = f"*{ext}"',
|
|
706
537
|
' files = list(source_path.glob(pattern))',
|
|
707
|
-
' ',
|
|
708
|
-
' # Filter out files matching skip patterns',
|
|
709
|
-
' for file in files:',
|
|
710
|
-
' skip_file = False',
|
|
711
|
-
' for skip_pattern in skip_patterns:',
|
|
712
|
-
' if skip_pattern.lower() in file.name.lower():',
|
|
713
|
-
' skip_file = True',
|
|
714
|
-
' break',
|
|
715
|
-
' if not skip_file:',
|
|
716
|
-
' raw_files.append(file)',
|
|
538
|
+
' raw_files.extend(files)',
|
|
717
539
|
' ',
|
|
718
540
|
' return raw_files',
|
|
719
541
|
'',
|
|
720
542
|
'',
|
|
721
543
|
'def process_single_file(args):',
|
|
722
|
-
' """Process a single raw file to sample5 format -
|
|
723
|
-
' raw_file, output_folder
|
|
544
|
+
' """Process a single raw file to sample5 format - module level for multiprocessing."""',
|
|
545
|
+
' raw_file, output_folder = args',
|
|
546
|
+
' from masster.sample import Sample',
|
|
724
547
|
' ',
|
|
725
548
|
' try:',
|
|
726
549
|
' # Create sample5 filename',
|
|
727
550
|
' sample_name = raw_file.stem',
|
|
728
551
|
' sample5_path = Path(output_folder) / f"{sample_name}.sample5"',
|
|
729
552
|
' ',
|
|
730
|
-
' # Skip if sample5 already exists
|
|
731
|
-
' if sample5_path.exists() and
|
|
732
|
-
' print(f"
|
|
733
|
-
' return
|
|
553
|
+
' # Skip if sample5 already exists',
|
|
554
|
+
' if sample5_path.exists() and not PARAMS["force_reprocess"]:',
|
|
555
|
+
' print(f" Skipping {raw_file.name} (sample5 already exists)")',
|
|
556
|
+
' return str(sample5_path)',
|
|
734
557
|
' ',
|
|
735
|
-
' print(f"
|
|
736
|
-
' start_time = time.time()',
|
|
558
|
+
' print(f" Converting {raw_file.name}...")',
|
|
737
559
|
' ',
|
|
738
560
|
' # Load and process raw file with full pipeline',
|
|
739
561
|
' sample = Sample(log_label=sample_name)',
|
|
740
562
|
' sample.load(filename=str(raw_file))',
|
|
741
563
|
' sample.find_features(',
|
|
742
|
-
' noise=
|
|
743
|
-
' chrom_fwhm=
|
|
744
|
-
' chrom_peak_snr=
|
|
564
|
+
' noise=PARAMS["noise"],',
|
|
565
|
+
' chrom_fwhm=PARAMS["chrom_fwhm"],',
|
|
566
|
+
' chrom_peak_snr=PARAMS["chrom_peak_snr"]',
|
|
745
567
|
' )',
|
|
746
|
-
' # sample.find_adducts(adducts=params["adducts"])',
|
|
747
568
|
' sample.find_ms2()',
|
|
748
|
-
'
|
|
569
|
+
' sample.find_iso()',
|
|
570
|
+
' # sample.export_mgf()',
|
|
571
|
+
' # sample.plot_2d(filename=f"{sample5_path.replace(".sample5", ".html")}")',
|
|
749
572
|
' sample.save(str(sample5_path))',
|
|
750
573
|
' ',
|
|
751
|
-
'
|
|
752
|
-
'
|
|
753
|
-
' ',
|
|
754
|
-
' return {"status": "success", "file": str(sample5_path), "elapsed": elapsed}',
|
|
574
|
+
' # print(f" Completed {raw_file.name} -> {sample5_path.name}")',
|
|
575
|
+
' return str(sample5_path)',
|
|
755
576
|
' ',
|
|
756
577
|
' except Exception as e:',
|
|
757
|
-
' print(f"
|
|
758
|
-
' return
|
|
578
|
+
' print(f" ERROR processing {raw_file.name}: {e}")',
|
|
579
|
+
' return None',
|
|
759
580
|
'',
|
|
760
581
|
'',
|
|
761
|
-
'def
|
|
762
|
-
' """Convert raw data files to sample5 format
|
|
582
|
+
'def convert_raw_to_sample5(raw_files, output_folder, polarity, num_cores):',
|
|
583
|
+
' """Convert raw data files to sample5 format."""',
|
|
763
584
|
' import concurrent.futures',
|
|
764
585
|
' import os',
|
|
765
586
|
' ',
|
|
766
587
|
' # Create output directory',
|
|
767
588
|
' os.makedirs(output_folder, exist_ok=True)',
|
|
768
589
|
' ',
|
|
769
|
-
' print(f"\\n🚀 Processing {len(raw_files)} files using {params[\'num_cores\']} CPU cores...")',
|
|
770
|
-
' print("=" * 70)',
|
|
771
|
-
' ',
|
|
772
590
|
' # Prepare arguments for multiprocessing',
|
|
773
|
-
' file_args = [(raw_file, output_folder
|
|
774
|
-
' ',
|
|
775
|
-
' # Process files in parallel with progress tracking',
|
|
776
|
-
' results = []',
|
|
777
|
-
' successful = 0',
|
|
778
|
-
' skipped = 0',
|
|
779
|
-
' failed = 0',
|
|
780
|
-
' total_elapsed = 0',
|
|
591
|
+
' file_args = [(raw_file, output_folder) for raw_file in raw_files]',
|
|
781
592
|
' ',
|
|
782
|
-
'
|
|
783
|
-
'
|
|
784
|
-
'
|
|
593
|
+
' # Process files in parallel',
|
|
594
|
+
' sample5_files = []',
|
|
595
|
+
' with concurrent.futures.ProcessPoolExecutor(max_workers=num_cores) as executor:',
|
|
596
|
+
' futures = [executor.submit(process_single_file, args) for args in file_args]',
|
|
785
597
|
' ',
|
|
786
|
-
'
|
|
787
|
-
' for i, future in enumerate(concurrent.futures.as_completed(future_to_file), 1):',
|
|
598
|
+
' for future in concurrent.futures.as_completed(futures):',
|
|
788
599
|
' result = future.result()',
|
|
789
|
-
'
|
|
790
|
-
'
|
|
791
|
-
' if result["status"] == "success":',
|
|
792
|
-
' successful += 1',
|
|
793
|
-
' total_elapsed += result.get("elapsed", 0)',
|
|
794
|
-
' elif result["status"] == "skipped":',
|
|
795
|
-
' skipped += 1',
|
|
796
|
-
' else:',
|
|
797
|
-
' failed += 1',
|
|
798
|
-
' ',
|
|
799
|
-
' # Progress update',
|
|
800
|
-
' print(f"\\r Progress: {i}/{len(raw_files)} files completed ({successful} success, {skipped} skipped, {failed} failed)", end="", flush=True)',
|
|
801
|
-
' ',
|
|
802
|
-
' print() # New line after progress',
|
|
803
|
-
' print("=" * 70)',
|
|
804
|
-
' ',
|
|
805
|
-
' # Summary',
|
|
806
|
-
' if successful > 0:',
|
|
807
|
-
' avg_time = total_elapsed / successful',
|
|
808
|
-
' print(f"✅ Successfully processed {successful} files (avg: {avg_time:.1f}s per file)")',
|
|
809
|
-
' if skipped > 0:',
|
|
810
|
-
' print(f"⏩ Skipped {skipped} files (already exist)")',
|
|
811
|
-
' if failed > 0:',
|
|
812
|
-
' print(f"❌ Failed to process {failed} files")',
|
|
813
|
-
' for result in results:',
|
|
814
|
-
' if result["status"] == "error":',
|
|
815
|
-
' print(f" - {Path(result[\'file\']).name}: {result[\'error\']}")',
|
|
600
|
+
' if result:',
|
|
601
|
+
' sample5_files.append(result)',
|
|
816
602
|
' ',
|
|
817
|
-
' # Return list of successful sample5 files',
|
|
818
|
-
' sample5_files = [result["file"] for result in results if result["status"] in ["success", "skipped"]]',
|
|
819
603
|
' return sample5_files',
|
|
820
604
|
'',
|
|
821
605
|
'',
|
|
822
606
|
'def main():',
|
|
823
|
-
' """Main
|
|
607
|
+
' """Main analysis pipeline."""',
|
|
824
608
|
' try:',
|
|
825
609
|
' print("=" * 70)',
|
|
826
|
-
f' print("
|
|
610
|
+
f' print("masster {version} - Automated MS Data Analysis")',
|
|
827
611
|
' print("=" * 70)',
|
|
828
612
|
' print(f"Source: {PARAMS[\'source\']}")',
|
|
829
613
|
' print(f"Output: {PARAMS[\'folder\']}")',
|
|
830
|
-
' print(f"Polarity: {PARAMS[\'polarity\']}
|
|
614
|
+
' print(f"Polarity: {PARAMS[\'polarity\']}")',
|
|
831
615
|
' print(f"CPU Cores: {PARAMS[\'num_cores\']}")',
|
|
832
616
|
' print("=" * 70)',
|
|
833
|
-
' print("\\n⚙️ IMPORTANT: Review these parameters before processing:")',
|
|
834
|
-
' print(f" NOISE threshold: {PARAMS[\'noise\']} (adjust based on your instrument)")',
|
|
835
|
-
' print(f" CHROM_FWHM: {PARAMS[\'chrom_fwhm\']}s (adjust based on your chromatography)")',
|
|
836
|
-
' print(" You can edit these values in the PARAMS section above.")',
|
|
837
|
-
' print("=" * 70)',
|
|
838
617
|
' ',
|
|
839
618
|
' start_time = time.time()',
|
|
840
619
|
' ',
|
|
841
620
|
' # Step 1: Discover raw data files',
|
|
842
|
-
' print("\\
|
|
621
|
+
' print("\\nStep 1/7: Discovering raw data files...")',
|
|
843
622
|
' raw_files = discover_raw_files(',
|
|
844
623
|
' PARAMS[\'source\'],',
|
|
845
624
|
' PARAMS[\'file_extensions\'],',
|
|
846
|
-
' PARAMS[\'search_subfolders\']
|
|
847
|
-
' PARAMS[\'skip_patterns\']',
|
|
625
|
+
' PARAMS[\'search_subfolders\']',
|
|
848
626
|
' )',
|
|
849
627
|
' ',
|
|
850
628
|
' if not raw_files:',
|
|
851
|
-
' print("
|
|
629
|
+
' print("No raw data files found!")',
|
|
852
630
|
' return False',
|
|
853
631
|
' ',
|
|
854
632
|
' print(f"Found {len(raw_files)} raw data files")',
|
|
855
|
-
' for
|
|
856
|
-
' print(f" {
|
|
633
|
+
' for f in raw_files[:5]: # Show first 5 files',
|
|
634
|
+
' print(f" {f.name}")',
|
|
857
635
|
' if len(raw_files) > 5:',
|
|
858
|
-
' print(f" ... and {len(raw_files) - 5} more
|
|
636
|
+
' print(f" ... and {len(raw_files) - 5} more")',
|
|
859
637
|
' ',
|
|
860
|
-
' # Step 2: Process raw files
|
|
861
|
-
' print("\\
|
|
862
|
-
' sample5_files =
|
|
638
|
+
' # Step 2: Process raw files',
|
|
639
|
+
' print("\\nStep 2/7: Processing raw files...")',
|
|
640
|
+
' sample5_files = convert_raw_to_sample5(',
|
|
863
641
|
' raw_files,',
|
|
864
642
|
' PARAMS[\'folder\'],',
|
|
865
|
-
' PARAMS',
|
|
643
|
+
' PARAMS[\'polarity\'],',
|
|
644
|
+
' PARAMS[\'num_cores\']',
|
|
866
645
|
' )',
|
|
867
646
|
' ',
|
|
868
647
|
' if not sample5_files:',
|
|
869
|
-
' print("
|
|
648
|
+
' print("No sample5 files were created!")',
|
|
870
649
|
' return False',
|
|
871
650
|
' ',
|
|
872
|
-
'
|
|
651
|
+
' print(f"Successfully processed {len(sample5_files)} files to sample5")',
|
|
652
|
+
' ',
|
|
653
|
+
' # Step 3: Create and configure study',
|
|
654
|
+
' print("\\nStep 3/7: Initializing study...")',
|
|
655
|
+
' study = Study(folder=PARAMS[\'folder\'])',
|
|
656
|
+
' study.polarity = PARAMS[\'polarity\']',
|
|
657
|
+
' study.adducts = PARAMS[\'adducts\']',
|
|
658
|
+
' ',
|
|
659
|
+
' # Step 4: Add sample5 files to study',
|
|
660
|
+
' print("\\nStep 4/7: Adding samples to study...")',
|
|
661
|
+
' study.add(str(Path(PARAMS[\'folder\']) / "*.sample5"))',
|
|
662
|
+
' study.features_filter(study.features_select(chrom_coherence=0.1, chrom_prominence_scaled=1))',
|
|
663
|
+
' ',
|
|
664
|
+
' # Step 5: Core processing',
|
|
665
|
+
' print("\\nStep 5/7: Processing...")',
|
|
666
|
+
' study.align(',
|
|
667
|
+
' algorithm=PARAMS[\'alignment_method\'],',
|
|
668
|
+
' rt_tol=PARAMS[\'rt_tol\']',
|
|
669
|
+
' )',
|
|
670
|
+
' ',
|
|
671
|
+
' study.merge(',
|
|
672
|
+
' method="qt",',
|
|
673
|
+
' min_samples=PARAMS[\'min_samples_per_feature\'],',
|
|
674
|
+
' threads=PARAMS[\'num_cores\'],',
|
|
675
|
+
' rt_tol=PARAMS[\'rt_tol\']',
|
|
676
|
+
' )',
|
|
677
|
+
' study.find_iso()',
|
|
678
|
+
' study.fill()',
|
|
679
|
+
' study.integrate()',
|
|
680
|
+
' ',
|
|
681
|
+
' # Step 6/7: Saving results',
|
|
682
|
+
' print("\\nStep 6/7: Saving results...")',
|
|
683
|
+
' study.save()',
|
|
684
|
+
' study.export_xlsx()',
|
|
685
|
+
' study.export_mgf()',
|
|
686
|
+
' study.export_mztab()',
|
|
687
|
+
' ',
|
|
688
|
+
' # Step 7: Plots',
|
|
689
|
+
' print("\\nStep 7/7: Exporting plots...")',
|
|
690
|
+
' study.plot_consensus_2d(filename="consensus.html")',
|
|
691
|
+
' study.plot_consensus_2d(filename="consensus.png")',
|
|
692
|
+
' study.plot_alignment(filename="alignment.html")',
|
|
693
|
+
' study.plot_alignment(filename="alignment.png")',
|
|
694
|
+
' study.plot_samples_pca(filename="pca.html")',
|
|
695
|
+
' study.plot_samples_pca(filename="pca.png")',
|
|
696
|
+
' study.plot_bpc(filename="bpc.html")',
|
|
697
|
+
' study.plot_bpc(filename="bpc.png")',
|
|
698
|
+
' study.plot_rt_correction(filename="rt_correction.html")',
|
|
699
|
+
' study.plot_rt_correction(filename="rt_correction.png")',
|
|
700
|
+
' ',
|
|
701
|
+
' # Print summary',
|
|
702
|
+
' study.info()',
|
|
873
703
|
' total_time = time.time() - start_time',
|
|
874
704
|
' print("\\n" + "=" * 70)',
|
|
875
|
-
' print("
|
|
705
|
+
' print("ANALYSIS COMPLETE")',
|
|
876
706
|
' print("=" * 70)',
|
|
877
|
-
' print(f"
|
|
878
|
-
' print(f"Raw files
|
|
707
|
+
' print(f"Total processing time: {total_time:.1f} seconds ({total_time/60:.1f} minutes)")',
|
|
708
|
+
' print(f"Raw files processed: {len(raw_files)}")',
|
|
879
709
|
' print(f"Sample5 files created: {len(sample5_files)}")',
|
|
880
|
-
'
|
|
881
|
-
'
|
|
882
|
-
' print("2. Or use the sample5 files in your own analysis scripts")',
|
|
710
|
+
' if hasattr(study, "consensus_df"):',
|
|
711
|
+
' print(f"Consensus features generated: {len(study.consensus_df)}")',
|
|
883
712
|
' print("=" * 70)',
|
|
884
713
|
' ',
|
|
885
714
|
' return True',
|
|
886
715
|
' ',
|
|
887
716
|
' except KeyboardInterrupt:',
|
|
888
|
-
' print("\\
|
|
717
|
+
' print("\\nAnalysis interrupted by user")',
|
|
889
718
|
' return False',
|
|
890
719
|
' except Exception as e:',
|
|
891
|
-
' print(f"
|
|
720
|
+
' print(f"Analysis failed with error: {e}")',
|
|
892
721
|
' import traceback',
|
|
893
722
|
' traceback.print_exc()',
|
|
894
723
|
' return False',
|
|
@@ -897,115 +726,292 @@ class Wizard:
|
|
|
897
726
|
'if __name__ == "__main__":',
|
|
898
727
|
' success = main()',
|
|
899
728
|
' sys.exit(0 if success else 1)',
|
|
900
|
-
]
|
|
729
|
+
]
|
|
730
|
+
|
|
731
|
+
return '\n'.join(script_lines)
|
|
732
|
+
|
|
733
|
+
def _generate_interactive_notebook_content(self, source_info: Dict[str, Any]) -> str:
|
|
734
|
+
"""Generate the content for 2_interactive_analysis.py marimo notebook."""
|
|
735
|
+
|
|
736
|
+
notebook_lines = [
|
|
737
|
+
'import marimo',
|
|
738
|
+
'',
|
|
739
|
+
'__generated_with = "0.9.14"',
|
|
740
|
+
'app = marimo.App(width="medium")',
|
|
741
|
+
'',
|
|
742
|
+
'@app.cell',
|
|
743
|
+
'def __():',
|
|
744
|
+
' import marimo as mo',
|
|
745
|
+
' return (mo,)',
|
|
746
|
+
'',
|
|
747
|
+
'@app.cell',
|
|
748
|
+
'def __(mo):',
|
|
749
|
+
' mo.md(r"""',
|
|
750
|
+
' # MASSter Interactive Analysis',
|
|
751
|
+
' ',
|
|
752
|
+
f' **Source:** {source_info.get("number_of_files", 0)} files detected',
|
|
753
|
+
f' **Polarity:** {source_info.get("polarity", "unknown")}',
|
|
754
|
+
' ',
|
|
755
|
+
' This notebook provides interactive exploration of your processed study.',
|
|
756
|
+
' Make sure you have run `python 1_masster_workflow.py` first.',
|
|
757
|
+
' """)',
|
|
758
|
+
' return ()',
|
|
759
|
+
'',
|
|
760
|
+
'@app.cell',
|
|
761
|
+
'def __():',
|
|
762
|
+
' import masster',
|
|
763
|
+
' return (masster,)',
|
|
764
|
+
'',
|
|
765
|
+
'@app.cell',
|
|
766
|
+
'def __(masster):',
|
|
767
|
+
' study = masster.Study(folder=".")',
|
|
768
|
+
' return (study,)',
|
|
769
|
+
'',
|
|
770
|
+
'@app.cell',
|
|
771
|
+
'def __(study):',
|
|
772
|
+
' study.info()',
|
|
773
|
+
' return ()',
|
|
774
|
+
'',
|
|
775
|
+
'if __name__ == "__main__":',
|
|
776
|
+
' app.run()',
|
|
777
|
+
]
|
|
778
|
+
|
|
779
|
+
return '\n'.join(notebook_lines)
|
|
780
|
+
|
|
781
|
+
def _generate_instructions(self, source_info: Dict[str, Any], files_created: List[str]) -> List[str]:
|
|
782
|
+
"""Generate usage instructions for the created scripts."""
|
|
783
|
+
instructions = [f"Source analysis: {source_info.get('number_of_files', 0)} files found",
|
|
784
|
+
f"Polarity detected: {source_info.get('polarity', 'unknown')}",
|
|
785
|
+
"Files created:"]
|
|
786
|
+
for file_path in files_created:
|
|
787
|
+
instructions.append(f" ✅ {str(Path(file_path).resolve())}")
|
|
788
|
+
|
|
789
|
+
# Find the workflow script name from created files
|
|
790
|
+
workflow_script_name = "1_masster_workflow.py"
|
|
791
|
+
for file_path in files_created:
|
|
792
|
+
if Path(file_path).name == "1_masster_workflow.py":
|
|
793
|
+
workflow_script_name = Path(file_path).name
|
|
794
|
+
break
|
|
795
|
+
|
|
796
|
+
instructions.extend([
|
|
797
|
+
"",
|
|
798
|
+
"Next steps:",
|
|
799
|
+
f"1. REVIEW PARAMETERS in {workflow_script_name}:",
|
|
800
|
+
f" In particular, verify the NOISE, CHROM_FWHM, and MIN_SAMPLES_FOR_MERGE",
|
|
801
|
+
"",
|
|
802
|
+
"2. TEST SINGLE FILE (RECOMMENDED):",
|
|
803
|
+
f" wizard.test_only() # Validate parameters with first file only",
|
|
804
|
+
"",
|
|
805
|
+
"3. EXECUTE FULL BATCH:",
|
|
806
|
+
f" wizard.run() # Process all files",
|
|
807
|
+
f" # OR: wizard.test_and_run() # Test first, then run all",
|
|
808
|
+
f" # OR: uv run python {workflow_script_name}",
|
|
809
|
+
"",
|
|
810
|
+
"4. INTERACTIVE ANALYSIS:",
|
|
811
|
+
f" uv run marimo edit {Path('2_interactive_analysis.py').name}",
|
|
812
|
+
""]
|
|
813
|
+
)
|
|
814
|
+
|
|
815
|
+
return instructions
|
|
816
|
+
|
|
817
|
+
def _add_test_mode_support(self, workflow_content: str) -> str:
|
|
818
|
+
"""Add test mode functionality to the generated workflow script."""
|
|
819
|
+
lines = workflow_content.split('\n')
|
|
820
|
+
|
|
821
|
+
# Insert test mode code after print statements in main function
|
|
822
|
+
for i, line in enumerate(lines):
|
|
823
|
+
# Add test mode print after the masster version line
|
|
824
|
+
if 'print("masster' in line and 'Automated MS Data Analysis")' in line:
|
|
825
|
+
lines.insert(i + 1, ' if TEST_MODE:')
|
|
826
|
+
lines.insert(i + 2, ' print("🧪 TEST MODE: Processing single file only")')
|
|
827
|
+
break
|
|
828
|
+
|
|
829
|
+
# Add mode info after num_cores print
|
|
830
|
+
for i, line in enumerate(lines):
|
|
831
|
+
if 'print(f"CPU Cores: {PARAMS[\'num_cores\']}")' in line:
|
|
832
|
+
lines.insert(i + 1, ' if TEST_MODE:')
|
|
833
|
+
lines.insert(i + 2, ' print(f"Mode: {\'Test Only\' if TEST_ONLY else \'Test + Full Batch\'}")')
|
|
834
|
+
break
|
|
835
|
+
|
|
836
|
+
# Add file limitation logic after file listing
|
|
837
|
+
for i, line in enumerate(lines):
|
|
838
|
+
if 'print(f" ... and {len(raw_files) - 5} more")' in line:
|
|
839
|
+
lines.insert(i + 1, ' ')
|
|
840
|
+
lines.insert(i + 2, ' # Limit to first file in test mode')
|
|
841
|
+
lines.insert(i + 3, ' if TEST_MODE:')
|
|
842
|
+
lines.insert(i + 4, ' raw_files = raw_files[:1]')
|
|
843
|
+
lines.insert(i + 5, ' print(f"\\n🧪 TEST MODE: Processing only first file: {raw_files[0].name}")')
|
|
844
|
+
break
|
|
845
|
+
|
|
846
|
+
# Modify num_cores for test mode
|
|
847
|
+
for i, line in enumerate(lines):
|
|
848
|
+
if 'PARAMS[\'num_cores\']' in line and 'convert_raw_to_sample5(' in lines[i-2:i+3]:
|
|
849
|
+
lines[i] = line.replace('PARAMS[\'num_cores\']', 'PARAMS[\'num_cores\'] if not TEST_MODE else 1 # Use single core for test')
|
|
850
|
+
break
|
|
851
|
+
|
|
852
|
+
# Add test-only exit logic after successful processing
|
|
853
|
+
for i, line in enumerate(lines):
|
|
854
|
+
if 'print(f"Successfully processed {len(sample5_files)} files to sample5")' in line:
|
|
855
|
+
lines.insert(i + 1, ' ')
|
|
856
|
+
lines.insert(i + 2, ' # Stop here if test-only mode')
|
|
857
|
+
lines.insert(i + 3, ' if TEST_ONLY:')
|
|
858
|
+
lines.insert(i + 4, ' print("\\n🧪 TEST ONLY mode: Stopping after successful single file processing")')
|
|
859
|
+
lines.insert(i + 5, ' print(f"Test file created: {sample5_files[0]}")')
|
|
860
|
+
lines.insert(i + 6, ' print("\\nTo run full batch, use: wizard.run()")')
|
|
861
|
+
lines.insert(i + 7, ' total_time = time.time() - start_time')
|
|
862
|
+
lines.insert(i + 8, ' print(f"\\nTest processing time: {total_time:.1f} seconds")')
|
|
863
|
+
lines.insert(i + 9, ' return True')
|
|
864
|
+
break
|
|
865
|
+
|
|
866
|
+
return '\n'.join(lines)
|
|
867
|
+
|
|
868
|
+
def test_and_run(self) -> Dict[str, Any]:
|
|
869
|
+
"""
|
|
870
|
+
Test the sample processing workflow with a single file, then run full batch.
|
|
871
|
+
|
|
872
|
+
This method runs the 1_masster_workflow.py script in test mode to process
|
|
873
|
+
the first raw file for validation, then automatically continues with the
|
|
874
|
+
full batch if the test succeeds. The script must already exist - call
|
|
875
|
+
create_scripts() first if needed.
|
|
876
|
+
|
|
877
|
+
Returns:
|
|
878
|
+
Dictionary containing:
|
|
879
|
+
- status: "success" or "error"
|
|
880
|
+
- message: Status message
|
|
881
|
+
- instructions: List of next steps
|
|
882
|
+
"""
|
|
883
|
+
return self._execute_workflow(test_mode=True)
|
|
884
|
+
|
|
885
|
+
def test_only(self) -> Dict[str, Any]:
|
|
886
|
+
"""
|
|
887
|
+
Test the sample processing workflow with a single file only.
|
|
901
888
|
|
|
902
|
-
|
|
889
|
+
This method runs the 1_masster_workflow.py script in test-only mode to process
|
|
890
|
+
only the first raw file and then stops (does not continue to full study processing).
|
|
891
|
+
The script must already exist - call create_scripts() first if needed.
|
|
892
|
+
|
|
893
|
+
Returns:
|
|
894
|
+
Dictionary containing:
|
|
895
|
+
- status: "success" or "error"
|
|
896
|
+
- message: Status message
|
|
897
|
+
- instructions: List of next steps
|
|
898
|
+
- test_file: Path to the processed test file (if successful)
|
|
899
|
+
"""
|
|
900
|
+
return self._execute_workflow(test_mode=True, test_only=True)
|
|
903
901
|
|
|
904
|
-
def
|
|
902
|
+
def run(self) -> Dict[str, Any]:
|
|
905
903
|
"""
|
|
906
|
-
|
|
904
|
+
Run the sample processing workflow.
|
|
907
905
|
|
|
908
|
-
This method
|
|
909
|
-
|
|
910
|
-
2. If not, creates scripts first then runs the workflow
|
|
911
|
-
3. Provides clear feedback about next steps
|
|
906
|
+
This method runs the 1_masster_workflow.py script to process raw files.
|
|
907
|
+
The script must already exist - call create_scripts() first if needed.
|
|
912
908
|
|
|
913
909
|
Returns:
|
|
914
|
-
Dictionary containing
|
|
910
|
+
Dictionary containing:
|
|
911
|
+
- status: "success" or "error"
|
|
912
|
+
- message: Status message
|
|
913
|
+
- instructions: List of next steps
|
|
914
|
+
"""
|
|
915
|
+
return self._execute_workflow(test_mode=False)
|
|
916
|
+
|
|
917
|
+
def _execute_workflow(self, test_mode: bool = False, test_only: bool = False) -> Dict[str, Any]:
|
|
915
918
|
"""
|
|
916
|
-
|
|
919
|
+
Execute the workflow script in either test or full mode.
|
|
917
920
|
|
|
921
|
+
Args:
|
|
922
|
+
test_mode: If True, run in test mode (single file), otherwise full batch
|
|
923
|
+
test_only: If True, stop after single file test (only used with test_mode=True)
|
|
924
|
+
"""
|
|
918
925
|
try:
|
|
926
|
+
workflow_script_path = self.folder_path / "1_masster_workflow.py"
|
|
927
|
+
|
|
919
928
|
# Check if workflow script exists
|
|
920
|
-
if
|
|
921
|
-
|
|
922
|
-
|
|
929
|
+
if not workflow_script_path.exists():
|
|
930
|
+
return {
|
|
931
|
+
"status": "error",
|
|
932
|
+
"message": "Workflow script not found. Please run create_scripts() first.",
|
|
933
|
+
"instructions": [
|
|
934
|
+
"❌ Missing 1_masster_workflow.py",
|
|
935
|
+
"Run: wizard.create_scripts()",
|
|
936
|
+
"Then: wizard.run()"
|
|
937
|
+
]
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
# Setup execution mode
|
|
941
|
+
if test_only:
|
|
942
|
+
mode_label = "test-only"
|
|
943
|
+
elif test_mode:
|
|
944
|
+
mode_label = "test"
|
|
923
945
|
else:
|
|
924
|
-
|
|
925
|
-
# Create scripts first
|
|
926
|
-
result = self.create_scripts()
|
|
927
|
-
|
|
928
|
-
if result["status"] != "success":
|
|
929
|
-
return result
|
|
930
|
-
|
|
931
|
-
# Print instructions
|
|
932
|
-
print("\n" + "="*70)
|
|
933
|
-
for instruction in result["instructions"]:
|
|
934
|
-
print(instruction)
|
|
935
|
-
print("="*70)
|
|
946
|
+
mode_label = "full batch"
|
|
936
947
|
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
948
|
+
env = None
|
|
949
|
+
if test_mode:
|
|
950
|
+
import os
|
|
951
|
+
env = os.environ.copy()
|
|
952
|
+
env['MASSTER_TEST_MODE'] = '1'
|
|
953
|
+
if test_only:
|
|
954
|
+
env['MASSTER_TEST_ONLY'] = '1'
|
|
941
955
|
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
return {
|
|
947
|
-
"status": "scripts_created",
|
|
948
|
-
"message": "Scripts created successfully, execution deferred",
|
|
949
|
-
"instructions": result["instructions"],
|
|
950
|
-
"files_created": result["files_created"]
|
|
951
|
-
}
|
|
952
|
-
|
|
953
|
-
except Exception as e:
|
|
954
|
-
self.logger.error(f"Analysis failed: {e}")
|
|
955
|
-
return {
|
|
956
|
-
"status": "error",
|
|
957
|
-
"message": f"Analysis failed: {e}",
|
|
958
|
-
"instructions": [],
|
|
959
|
-
"files_created": []
|
|
960
|
-
}
|
|
961
|
-
|
|
962
|
-
def _execute_workflow_script(self, script_path: Path) -> Dict[str, Any]:
|
|
963
|
-
"""Execute the workflow script and return results."""
|
|
964
|
-
try:
|
|
965
|
-
print(f"🚀 Executing {script_path.name}...")
|
|
956
|
+
# Execute the workflow script
|
|
957
|
+
print(f"🚀 Executing {mode_label} processing workflow...")
|
|
958
|
+
print(f"📄 Running: {workflow_script_path.name}")
|
|
959
|
+
print("=" * 60)
|
|
966
960
|
|
|
967
961
|
import subprocess
|
|
968
962
|
result = subprocess.run([
|
|
969
|
-
sys.executable, str(
|
|
970
|
-
], cwd=str(self.folder_path),
|
|
963
|
+
sys.executable, str(workflow_script_path)
|
|
964
|
+
], cwd=str(self.folder_path), env=env)
|
|
971
965
|
|
|
972
966
|
success = result.returncode == 0
|
|
973
967
|
|
|
974
968
|
if success:
|
|
975
|
-
print("="*
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
969
|
+
print("=" * 60)
|
|
970
|
+
if test_only:
|
|
971
|
+
print("✅ Test-only processing completed successfully!")
|
|
972
|
+
print("📋 Single file validated - ready for full batch")
|
|
973
|
+
print(" wizard.run()")
|
|
974
|
+
elif test_mode:
|
|
975
|
+
print("✅ Test processing completed successfully!")
|
|
976
|
+
print("📋 Next step: Run full batch")
|
|
977
|
+
print(" wizard.run()")
|
|
978
|
+
else:
|
|
979
|
+
print("✅ Sample processing completed successfully!")
|
|
980
|
+
print("📋 Next step: Run interactive analysis")
|
|
981
|
+
print(" uv run marimo edit 2_interactive_analysis.py")
|
|
982
|
+
print("=" * 60)
|
|
983
|
+
|
|
984
|
+
next_step = ("Next: wizard.run()" if test_mode else
|
|
985
|
+
"Next: uv run marimo edit 2_interactive_analysis.py")
|
|
981
986
|
|
|
982
987
|
return {
|
|
983
988
|
"status": "success",
|
|
984
|
-
"message": "
|
|
989
|
+
"message": f"{mode_label.capitalize()} processing completed successfully",
|
|
985
990
|
"instructions": [
|
|
986
|
-
"✅
|
|
987
|
-
|
|
988
|
-
]
|
|
989
|
-
"files_created": []
|
|
991
|
+
f"✅ {mode_label.capitalize()} processing completed",
|
|
992
|
+
next_step
|
|
993
|
+
]
|
|
990
994
|
}
|
|
991
995
|
else:
|
|
992
996
|
return {
|
|
993
997
|
"status": "error",
|
|
994
|
-
"message": f"Workflow execution failed with code {result.returncode}",
|
|
998
|
+
"message": f"Workflow execution failed with return code {result.returncode}",
|
|
995
999
|
"instructions": [
|
|
996
1000
|
"❌ Check the error messages above",
|
|
997
1001
|
"Review parameters in 1_masster_workflow.py",
|
|
998
|
-
"Try running: python
|
|
999
|
-
]
|
|
1000
|
-
"files_created": []
|
|
1002
|
+
f"Try running manually: python {workflow_script_path.name}"
|
|
1003
|
+
]
|
|
1001
1004
|
}
|
|
1002
1005
|
|
|
1003
1006
|
except Exception as e:
|
|
1004
1007
|
return {
|
|
1005
1008
|
"status": "error",
|
|
1006
1009
|
"message": f"Failed to execute workflow: {e}",
|
|
1007
|
-
"instructions": [
|
|
1008
|
-
|
|
1010
|
+
"instructions": [
|
|
1011
|
+
"❌ Execution failed",
|
|
1012
|
+
"Check that source files exist and are accessible",
|
|
1013
|
+
"Verify folder permissions"
|
|
1014
|
+
]
|
|
1009
1015
|
}
|
|
1010
1016
|
|
|
1011
1017
|
def _generate_script_content(self) -> str:
|
|
@@ -1313,413 +1319,74 @@ class Wizard:
|
|
|
1313
1319
|
|
|
1314
1320
|
return '\n'.join(script_lines)
|
|
1315
1321
|
|
|
1316
|
-
def _generate_notebook_content(self) -> str:
|
|
1317
|
-
"""Generate the content for a marimo interactive notebook."""
|
|
1318
|
-
|
|
1319
|
-
notebook_lines = [
|
|
1320
|
-
'import marimo',
|
|
1321
|
-
'',
|
|
1322
|
-
'__generated_with = "0.9.14"',
|
|
1323
|
-
'app = marimo.App(width="medium")',
|
|
1324
|
-
'',
|
|
1325
|
-
'',
|
|
1326
|
-
'@app.cell',
|
|
1327
|
-
'def __():',
|
|
1328
|
-
' import marimo as mo',
|
|
1329
|
-
' return (mo,)',
|
|
1330
|
-
'',
|
|
1331
|
-
'',
|
|
1332
|
-
'@app.cell',
|
|
1333
|
-
'def __(mo):',
|
|
1334
|
-
' mo.md(r"""',
|
|
1335
|
-
' # MASSter Interactive Analysis',
|
|
1336
|
-
' ',
|
|
1337
|
-
' This notebook provides interactive exploration of your mass spectrometry study results.',
|
|
1338
|
-
' The study has been processed and is ready for analysis.',
|
|
1339
|
-
' """)',
|
|
1340
|
-
'',
|
|
1341
|
-
'',
|
|
1342
|
-
'@app.cell',
|
|
1343
|
-
'def __():',
|
|
1344
|
-
' # Import masster',
|
|
1345
|
-
' import masster',
|
|
1346
|
-
' return (masster,)',
|
|
1347
|
-
'',
|
|
1348
|
-
'',
|
|
1349
|
-
'@app.cell',
|
|
1350
|
-
'def __(masster):',
|
|
1351
|
-
' # Load the processed study',
|
|
1352
|
-
' study = masster.Study(folder=".")',
|
|
1353
|
-
' study.load()',
|
|
1354
|
-
' return (study,)',
|
|
1355
|
-
'',
|
|
1356
|
-
'',
|
|
1357
|
-
'@app.cell',
|
|
1358
|
-
'def __(mo, study):',
|
|
1359
|
-
' # Display study information',
|
|
1360
|
-
' mo.md(f"""',
|
|
1361
|
-
' ## Study Overview',
|
|
1362
|
-
' ',
|
|
1363
|
-
' **Samples:** {len(study.samples) if hasattr(study, "samples") else "Not loaded"}',
|
|
1364
|
-
' ',
|
|
1365
|
-
' **Features:** {len(study.consensus_df) if hasattr(study, "consensus_df") else "Not available"}',
|
|
1366
|
-
' ',
|
|
1367
|
-
' **Polarity:** {study.polarity if hasattr(study, "polarity") else "Unknown"}',
|
|
1368
|
-
' """)',
|
|
1369
|
-
'',
|
|
1370
|
-
'',
|
|
1371
|
-
'@app.cell',
|
|
1372
|
-
'def __(study):',
|
|
1373
|
-
' # Print detailed study info',
|
|
1374
|
-
' study.info()',
|
|
1375
|
-
'',
|
|
1376
|
-
'',
|
|
1377
|
-
'@app.cell',
|
|
1378
|
-
'def __(mo):',
|
|
1379
|
-
' mo.md(r"""',
|
|
1380
|
-
' ## Quick Visualizations',
|
|
1381
|
-
' ',
|
|
1382
|
-
' Use the cells below to create interactive plots of your data.',
|
|
1383
|
-
' """)',
|
|
1384
|
-
'',
|
|
1385
|
-
'',
|
|
1386
|
-
'@app.cell',
|
|
1387
|
-
'def __(study):',
|
|
1388
|
-
' # Generate consensus 2D plot',
|
|
1389
|
-
' if hasattr(study, "consensus_df") and len(study.consensus_df) > 0:',
|
|
1390
|
-
' study.plot_consensus_2d(filename="consensus_interactive.html")',
|
|
1391
|
-
' print("Consensus 2D plot saved as: consensus_interactive.html")',
|
|
1392
|
-
' else:',
|
|
1393
|
-
' print("No consensus features available for plotting")',
|
|
1394
|
-
'',
|
|
1395
|
-
'',
|
|
1396
|
-
'@app.cell',
|
|
1397
|
-
'def __(study):',
|
|
1398
|
-
' # Generate PCA plot',
|
|
1399
|
-
' if hasattr(study, "samples") and len(study.samples) > 1:',
|
|
1400
|
-
' study.plot_samples_pca(filename="pca_interactive.html")',
|
|
1401
|
-
' print("PCA plot saved as: pca_interactive.html")',
|
|
1402
|
-
' else:',
|
|
1403
|
-
' print("Not enough samples for PCA analysis")',
|
|
1404
|
-
'',
|
|
1405
|
-
'',
|
|
1406
|
-
'@app.cell',
|
|
1407
|
-
'def __(mo):',
|
|
1408
|
-
' mo.md(r"""',
|
|
1409
|
-
' ## Data Export',
|
|
1410
|
-
' ',
|
|
1411
|
-
' Export your processed data in various formats.',
|
|
1412
|
-
' """)',
|
|
1413
|
-
'',
|
|
1414
|
-
'',
|
|
1415
|
-
'@app.cell',
|
|
1416
|
-
'def __(study):',
|
|
1417
|
-
' # Export options',
|
|
1418
|
-
' if hasattr(study, "consensus_df"):',
|
|
1419
|
-
' # Export to Excel',
|
|
1420
|
-
' study.export_xlsx(filename="study_results.xlsx")',
|
|
1421
|
-
' print("✓ Results exported to: study_results.xlsx")',
|
|
1422
|
-
' ',
|
|
1423
|
-
' # Export to MGF',
|
|
1424
|
-
' study.export_mgf(filename="study_spectra.mgf")',
|
|
1425
|
-
' print("✓ Spectra exported to: study_spectra.mgf")',
|
|
1426
|
-
' else:',
|
|
1427
|
-
' print("No data available for export")',
|
|
1428
|
-
'',
|
|
1429
|
-
'',
|
|
1430
|
-
'@app.cell',
|
|
1431
|
-
'def __(mo):',
|
|
1432
|
-
' mo.md(r"""',
|
|
1433
|
-
' ## Custom Analysis',
|
|
1434
|
-
' ',
|
|
1435
|
-
' Add your own analysis code in the cells below.',
|
|
1436
|
-
' """)',
|
|
1437
|
-
'',
|
|
1438
|
-
'',
|
|
1439
|
-
'@app.cell',
|
|
1440
|
-
'def __(study):',
|
|
1441
|
-
' # Access consensus features dataframe',
|
|
1442
|
-
' if hasattr(study, "consensus_df"):',
|
|
1443
|
-
' df = study.consensus_df',
|
|
1444
|
-
' print(f"Consensus features shape: {df.shape}")',
|
|
1445
|
-
' print("\\nFirst 5 features:")',
|
|
1446
|
-
' print(df.head())',
|
|
1447
|
-
' return (df,) if "df" in locals() else ()',
|
|
1448
|
-
'',
|
|
1449
|
-
'',
|
|
1450
|
-
'@app.cell',
|
|
1451
|
-
'def __():',
|
|
1452
|
-
' # Your custom analysis here',
|
|
1453
|
-
' pass',
|
|
1454
|
-
'',
|
|
1455
|
-
'',
|
|
1456
|
-
'if __name__ == "__main__":',
|
|
1457
|
-
' app.run()',
|
|
1458
|
-
]
|
|
1459
|
-
|
|
1460
|
-
return '\n'.join(notebook_lines)
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
def create_analysis(
|
|
1464
|
-
source: str,
|
|
1465
|
-
folder: str,
|
|
1466
|
-
filename: str = 'run_masster.py',
|
|
1467
|
-
polarity: str = "positive",
|
|
1468
|
-
adducts: Optional[List[str]] = None,
|
|
1469
|
-
params: Optional[wizard_def] = None,
|
|
1470
|
-
num_cores: int = 0,
|
|
1471
|
-
**kwargs
|
|
1472
|
-
) -> bool:
|
|
1473
|
-
"""
|
|
1474
|
-
Create standalone analysis scripts without initializing a Wizard instance.
|
|
1475
|
-
|
|
1476
|
-
This function generates analysis scripts with the specified configuration.
|
|
1477
|
-
|
|
1478
|
-
Parameters:
|
|
1479
|
-
source: Directory containing raw data files
|
|
1480
|
-
folder: Output directory for processed study
|
|
1481
|
-
filename: Filename for the generated script (deprecated, will create standard files)
|
|
1482
|
-
polarity: Ion polarity mode ("positive" or "negative")
|
|
1483
|
-
adducts: List of adduct specifications (auto-set if None)
|
|
1484
|
-
params: Custom wizard_def parameters (optional)
|
|
1485
|
-
num_cores: Number of CPU cores (0 = auto-detect)
|
|
1486
|
-
**kwargs: Additional parameters to override defaults
|
|
1487
|
-
|
|
1488
|
-
Returns:
|
|
1489
|
-
True if scripts were generated successfully, False otherwise
|
|
1490
|
-
|
|
1491
|
-
Example:
|
|
1492
|
-
>>> from masster.wizard import create_analysis
|
|
1493
|
-
>>> create_analysis(
|
|
1494
|
-
... source=r'D:\\Data\\raw_files',
|
|
1495
|
-
... folder=r'D:\\Data\\output',
|
|
1496
|
-
... polarity='positive'
|
|
1497
|
-
... )
|
|
1498
|
-
"""
|
|
1499
|
-
|
|
1500
|
-
try:
|
|
1501
|
-
# Create parameters
|
|
1502
|
-
if params is not None:
|
|
1503
|
-
# Use provided params as base
|
|
1504
|
-
wizard_params = params
|
|
1505
|
-
# Update with provided values
|
|
1506
|
-
wizard_params.source = source
|
|
1507
|
-
wizard_params.folder = folder
|
|
1508
|
-
if polarity != "positive": # Only override if explicitly different
|
|
1509
|
-
wizard_params.polarity = polarity
|
|
1510
|
-
if num_cores > 0:
|
|
1511
|
-
wizard_params.num_cores = num_cores
|
|
1512
|
-
if adducts is not None:
|
|
1513
|
-
wizard_params.adducts = adducts
|
|
1514
|
-
else:
|
|
1515
|
-
# Create new params with provided values
|
|
1516
|
-
wizard_params = wizard_def(
|
|
1517
|
-
source=source,
|
|
1518
|
-
folder=folder,
|
|
1519
|
-
polarity=polarity,
|
|
1520
|
-
num_cores=max(1, int(multiprocessing.cpu_count() * 0.75)) if num_cores <= 0 else num_cores
|
|
1521
|
-
)
|
|
1522
|
-
|
|
1523
|
-
if adducts is not None:
|
|
1524
|
-
wizard_params.adducts = adducts
|
|
1525
|
-
|
|
1526
|
-
# Apply any additional kwargs
|
|
1527
|
-
for key, value in kwargs.items():
|
|
1528
|
-
if hasattr(wizard_params, key):
|
|
1529
|
-
setattr(wizard_params, key, value)
|
|
1530
|
-
|
|
1531
|
-
# Ensure study folder exists
|
|
1532
|
-
study_path = Path(folder)
|
|
1533
|
-
study_path.mkdir(parents=True, exist_ok=True)
|
|
1534
|
-
|
|
1535
|
-
# Create a temporary Wizard instance to generate the scripts
|
|
1536
|
-
temp_wizard = Wizard(params=wizard_params)
|
|
1537
|
-
|
|
1538
|
-
# Generate the scripts using the new method
|
|
1539
|
-
result = temp_wizard.create_scripts()
|
|
1540
|
-
|
|
1541
|
-
if result["status"] == "success":
|
|
1542
|
-
print("Scripts created successfully!")
|
|
1543
|
-
for instruction in result["instructions"]:
|
|
1544
|
-
print(instruction)
|
|
1545
|
-
|
|
1546
|
-
return result["status"] == "success"
|
|
1547
|
-
|
|
1548
|
-
except Exception as e:
|
|
1549
|
-
print(f"Failed to create scripts: {e}")
|
|
1550
|
-
import traceback
|
|
1551
|
-
traceback.print_exc()
|
|
1552
|
-
return False
|
|
1553
|
-
|
|
1554
1322
|
|
|
1555
|
-
def
|
|
1556
|
-
source: str,
|
|
1557
|
-
folder: str,
|
|
1558
|
-
filename: str = 'run_masster.py',
|
|
1323
|
+
def create_scripts(
|
|
1324
|
+
source: str = "",
|
|
1325
|
+
folder: str = "",
|
|
1559
1326
|
polarity: str = "positive",
|
|
1560
1327
|
adducts: Optional[List[str]] = None,
|
|
1561
|
-
params: Optional[wizard_def] = None,
|
|
1562
1328
|
num_cores: int = 0,
|
|
1563
1329
|
**kwargs
|
|
1564
|
-
) ->
|
|
1330
|
+
) -> Dict[str, Any]:
|
|
1565
1331
|
"""
|
|
1566
|
-
Create
|
|
1332
|
+
Create analysis scripts without explicitly instantiating a Wizard.
|
|
1567
1333
|
|
|
1568
|
-
This
|
|
1569
|
-
|
|
1570
|
-
in a single step.
|
|
1334
|
+
This is a convenience function that creates a Wizard instance internally
|
|
1335
|
+
and calls its create_scripts() method.
|
|
1571
1336
|
|
|
1572
1337
|
Parameters:
|
|
1573
1338
|
source: Directory containing raw data files
|
|
1574
|
-
folder: Output directory for processed study
|
|
1575
|
-
filename: Filename for the generated script (should end with .py)
|
|
1339
|
+
folder: Output directory for processed study
|
|
1576
1340
|
polarity: Ion polarity mode ("positive" or "negative")
|
|
1577
1341
|
adducts: List of adduct specifications (auto-set if None)
|
|
1578
|
-
params: Custom wizard_def parameters (optional)
|
|
1579
1342
|
num_cores: Number of CPU cores (0 = auto-detect)
|
|
1580
|
-
**kwargs: Additional parameters
|
|
1343
|
+
**kwargs: Additional parameters
|
|
1581
1344
|
|
|
1582
1345
|
Returns:
|
|
1583
|
-
|
|
1346
|
+
Dictionary containing:
|
|
1347
|
+
- status: "success" or "error"
|
|
1348
|
+
- message: Status message
|
|
1349
|
+
- instructions: List of next steps
|
|
1350
|
+
- files_created: List of created file paths
|
|
1351
|
+
- source_info: Metadata about source files
|
|
1584
1352
|
|
|
1585
1353
|
Example:
|
|
1586
|
-
>>>
|
|
1587
|
-
>>>
|
|
1354
|
+
>>> import masster.wizard
|
|
1355
|
+
>>> result = masster.wizard.create_scripts(
|
|
1588
1356
|
... source=r'D:\\Data\\raw_files',
|
|
1589
1357
|
... folder=r'D:\\Data\\output',
|
|
1590
|
-
... polarity='
|
|
1358
|
+
... polarity='negative'
|
|
1591
1359
|
... )
|
|
1360
|
+
>>> print("Status:", result["status"])
|
|
1592
1361
|
"""
|
|
1593
1362
|
|
|
1594
1363
|
try:
|
|
1595
|
-
#
|
|
1596
|
-
if
|
|
1597
|
-
|
|
1598
|
-
wizard_params.source = source
|
|
1599
|
-
wizard_params.folder = folder
|
|
1600
|
-
if polarity != "positive":
|
|
1601
|
-
wizard_params.polarity = polarity
|
|
1602
|
-
if num_cores > 0:
|
|
1603
|
-
wizard_params.num_cores = num_cores
|
|
1604
|
-
if adducts is not None:
|
|
1605
|
-
wizard_params.adducts = adducts
|
|
1606
|
-
else:
|
|
1607
|
-
wizard_params = wizard_def(
|
|
1608
|
-
source=source,
|
|
1609
|
-
folder=folder,
|
|
1610
|
-
polarity=polarity,
|
|
1611
|
-
num_cores=max(1, int(multiprocessing.cpu_count() * 0.75)) if num_cores <= 0 else num_cores
|
|
1612
|
-
)
|
|
1613
|
-
|
|
1614
|
-
if adducts is not None:
|
|
1615
|
-
wizard_params.adducts = adducts
|
|
1616
|
-
|
|
1617
|
-
# Apply any additional kwargs
|
|
1618
|
-
for key, value in kwargs.items():
|
|
1619
|
-
if hasattr(wizard_params, key):
|
|
1620
|
-
setattr(wizard_params, key, value)
|
|
1364
|
+
# Auto-detect optimal number of cores if not specified
|
|
1365
|
+
if num_cores <= 0:
|
|
1366
|
+
num_cores = max(1, int(multiprocessing.cpu_count() * 0.75))
|
|
1621
1367
|
|
|
1622
|
-
# Create Wizard instance
|
|
1623
|
-
wizard = Wizard(
|
|
1624
|
-
|
|
1368
|
+
# Create Wizard instance
|
|
1369
|
+
wizard = Wizard(
|
|
1370
|
+
source=source,
|
|
1371
|
+
folder=folder,
|
|
1372
|
+
polarity=polarity,
|
|
1373
|
+
adducts=adducts,
|
|
1374
|
+
num_cores=num_cores,
|
|
1375
|
+
**kwargs
|
|
1376
|
+
)
|
|
1625
1377
|
|
|
1626
|
-
#
|
|
1627
|
-
return
|
|
1378
|
+
# Call the instance method
|
|
1379
|
+
return wizard.create_scripts()
|
|
1628
1380
|
|
|
1629
1381
|
except Exception as e:
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
"""Generate the content for 2_interactive_analysis.py marimo notebook."""
|
|
1638
|
-
|
|
1639
|
-
notebook_lines = [
|
|
1640
|
-
'import marimo',
|
|
1641
|
-
'',
|
|
1642
|
-
'__generated_with = "0.9.14"',
|
|
1643
|
-
'app = marimo.App(width="medium")',
|
|
1644
|
-
'',
|
|
1645
|
-
'',
|
|
1646
|
-
'@app.cell',
|
|
1647
|
-
'def __():',
|
|
1648
|
-
' import marimo as mo',
|
|
1649
|
-
' return (mo,)',
|
|
1650
|
-
'',
|
|
1651
|
-
'',
|
|
1652
|
-
'@app.cell',
|
|
1653
|
-
'def __(mo):',
|
|
1654
|
-
' mo.md(r"""',
|
|
1655
|
-
' # MASSter Interactive Analysis',
|
|
1656
|
-
' ',
|
|
1657
|
-
f' **Source:** {source_info.get("number_of_files", 0)} files ({", ".join(source_info.get("file_types", []))}) detected',
|
|
1658
|
-
f' **Polarity:** {source_info.get("polarity", "unknown")} (auto-detected)',
|
|
1659
|
-
f' **Acquisition length:** ~{source_info.get("length_minutes", 0.0):.1f} minutes per file',
|
|
1660
|
-
' ',
|
|
1661
|
-
' This notebook provides interactive exploration of your processed mass spectrometry study.',
|
|
1662
|
-
' Make sure you have run `python 1_masster_workflow.py` first to create the sample5 files.',
|
|
1663
|
-
' """)',
|
|
1664
|
-
'',
|
|
1665
|
-
'',
|
|
1666
|
-
'@app.cell',
|
|
1667
|
-
'def __():',
|
|
1668
|
-
' # Import masster',
|
|
1669
|
-
' import masster',
|
|
1670
|
-
' return (masster,)',
|
|
1671
|
-
'',
|
|
1672
|
-
'',
|
|
1673
|
-
'@app.cell',
|
|
1674
|
-
'def __(masster):',
|
|
1675
|
-
' # Load the study from sample5 files',
|
|
1676
|
-
' study = masster.Study(folder=".")',
|
|
1677
|
-
' return (study,)',
|
|
1678
|
-
'',
|
|
1679
|
-
'',
|
|
1680
|
-
'@app.cell',
|
|
1681
|
-
'def __(mo, study):',
|
|
1682
|
-
' # Display study information',
|
|
1683
|
-
' study.info()',
|
|
1684
|
-
' return ()',
|
|
1685
|
-
'',
|
|
1686
|
-
'',
|
|
1687
|
-
'if __name__ == "__main__":',
|
|
1688
|
-
' app.run()',
|
|
1689
|
-
]
|
|
1690
|
-
|
|
1691
|
-
return '\n'.join(notebook_lines)
|
|
1692
|
-
|
|
1693
|
-
def _generate_instructions(self, source_info: Dict[str, Any], files_created: List[str]) -> List[str]:
|
|
1694
|
-
"""Generate usage instructions for the created scripts."""
|
|
1695
|
-
instructions = [
|
|
1696
|
-
"🎯 NEXT STEPS:",
|
|
1697
|
-
"",
|
|
1698
|
-
f"Source analysis completed: {source_info.get('number_of_files', 0)} files found",
|
|
1699
|
-
f"Polarity detected: {source_info.get('polarity', 'unknown')}",
|
|
1700
|
-
f"Estimated processing time: {source_info.get('number_of_files', 0) * source_info.get('length_minutes', 0.0) * 0.1:.1f} minutes",
|
|
1701
|
-
"",
|
|
1702
|
-
"1. REVIEW PARAMETERS:",
|
|
1703
|
-
" Edit 1_masster_workflow.py and verify these key settings:",
|
|
1704
|
-
" - NOISE threshold (adjust based on your instrument sensitivity)",
|
|
1705
|
-
" - CHROM_FWHM (adjust based on your chromatography peak width)",
|
|
1706
|
-
"",
|
|
1707
|
-
"2. EXECUTE SAMPLE PROCESSING:",
|
|
1708
|
-
" python 1_masster_workflow.py",
|
|
1709
|
-
" (This will process all raw files to sample5 format)",
|
|
1710
|
-
"",
|
|
1711
|
-
"3. INTERACTIVE ANALYSIS:",
|
|
1712
|
-
" uv run marimo edit 2_interactive_analysis.py",
|
|
1713
|
-
" (This opens an interactive notebook for data exploration)",
|
|
1714
|
-
"",
|
|
1715
|
-
"FILES CREATED:"
|
|
1716
|
-
]
|
|
1717
|
-
|
|
1718
|
-
for file_path in files_created:
|
|
1719
|
-
instructions.append(f" ✅ {Path(file_path).name}")
|
|
1720
|
-
|
|
1721
|
-
return instructions
|
|
1382
|
+
return {
|
|
1383
|
+
"status": "error",
|
|
1384
|
+
"message": f"Failed to create scripts: {e}",
|
|
1385
|
+
"instructions": [],
|
|
1386
|
+
"files_created": [],
|
|
1387
|
+
"source_info": {}
|
|
1388
|
+
}
|
|
1722
1389
|
|
|
1723
1390
|
|
|
1724
1391
|
# Export the main classes and functions
|
|
1725
|
-
__all__ = ["Wizard", "wizard_def", "
|
|
1392
|
+
__all__ = ["Wizard", "wizard_def", "create_scripts"]
|