masster 0.5.15__py3-none-any.whl → 0.5.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +1 -1
- masster/_version.py +1 -1
- masster/study/plot.py +3 -0
- masster/wizard/__init__.py +2 -2
- masster/wizard/wizard.py +430 -716
- {masster-0.5.15.dist-info → masster-0.5.16.dist-info}/METADATA +1 -1
- {masster-0.5.15.dist-info → masster-0.5.16.dist-info}/RECORD +10 -10
- {masster-0.5.15.dist-info → masster-0.5.16.dist-info}/WHEEL +0 -0
- {masster-0.5.15.dist-info → masster-0.5.16.dist-info}/entry_points.txt +0 -0
- {masster-0.5.15.dist-info → masster-0.5.16.dist-info}/licenses/LICENSE +0 -0
masster/wizard/wizard.py
CHANGED
|
@@ -228,9 +228,9 @@ class Wizard:
|
|
|
228
228
|
that process raw MS data through the complete pipeline: file discovery, feature
|
|
229
229
|
detection, sample processing, study assembly, alignment, merging, and export.
|
|
230
230
|
|
|
231
|
-
This simplified version focuses on
|
|
232
|
-
- create_scripts(): Generate
|
|
233
|
-
-
|
|
231
|
+
This simplified version focuses on two core functions:
|
|
232
|
+
- create_scripts(): Generate standalone analysis scripts
|
|
233
|
+
- execute(): Create and run analysis scripts
|
|
234
234
|
"""
|
|
235
235
|
|
|
236
236
|
def __init__(
|
|
@@ -239,7 +239,7 @@ class Wizard:
|
|
|
239
239
|
folder: str = "",
|
|
240
240
|
polarity: str = "positive",
|
|
241
241
|
adducts: Optional[List[str]] = None,
|
|
242
|
-
num_cores: int =
|
|
242
|
+
num_cores: int = 6,
|
|
243
243
|
**kwargs
|
|
244
244
|
):
|
|
245
245
|
"""
|
|
@@ -290,14 +290,6 @@ class Wizard:
|
|
|
290
290
|
self.folder_path = Path(self.params.folder)
|
|
291
291
|
self.folder_path.mkdir(parents=True, exist_ok=True)
|
|
292
292
|
|
|
293
|
-
# Initialize logger
|
|
294
|
-
self.logger = MassterLogger(
|
|
295
|
-
instance_type="wizard",
|
|
296
|
-
level="INFO",
|
|
297
|
-
label="Wizard",
|
|
298
|
-
sink=None
|
|
299
|
-
)
|
|
300
|
-
|
|
301
293
|
# Auto-infer polarity from the first file if not explicitly set by user
|
|
302
294
|
if polarity == "positive" and "polarity" not in kwargs:
|
|
303
295
|
inferred_polarity = self._infer_polarity_from_first_file()
|
|
@@ -306,164 +298,6 @@ class Wizard:
|
|
|
306
298
|
# Update adducts based on inferred polarity
|
|
307
299
|
self.params.__post_init__()
|
|
308
300
|
|
|
309
|
-
def _analyze_source_files(self) -> Dict[str, Any]:
|
|
310
|
-
"""
|
|
311
|
-
Analyze source files to extract metadata: number of files, file type, polarity, and acquisition length.
|
|
312
|
-
|
|
313
|
-
Returns:
|
|
314
|
-
Dictionary containing:
|
|
315
|
-
- number_of_files: Total count of data files found
|
|
316
|
-
- file_types: List of file extensions found
|
|
317
|
-
- polarity: Detected polarity ("positive" or "negative")
|
|
318
|
-
- length_minutes: Acquisition length in minutes
|
|
319
|
-
- first_file: Path to first file analyzed
|
|
320
|
-
"""
|
|
321
|
-
result = {
|
|
322
|
-
'number_of_files': 0,
|
|
323
|
-
'file_types': [],
|
|
324
|
-
'polarity': 'positive',
|
|
325
|
-
'length_minutes': 0.0,
|
|
326
|
-
'first_file': None
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
try:
|
|
330
|
-
# Find all data files
|
|
331
|
-
all_files = []
|
|
332
|
-
file_types_found = set()
|
|
333
|
-
|
|
334
|
-
for extension in self.params.file_extensions:
|
|
335
|
-
if self.params.search_subfolders:
|
|
336
|
-
pattern = f"**/*{extension}"
|
|
337
|
-
files = list(self.source_path.rglob(pattern))
|
|
338
|
-
else:
|
|
339
|
-
pattern = f"*{extension}"
|
|
340
|
-
files = list(self.source_path.glob(pattern))
|
|
341
|
-
|
|
342
|
-
if files:
|
|
343
|
-
all_files.extend(files)
|
|
344
|
-
file_types_found.add(extension)
|
|
345
|
-
|
|
346
|
-
result['number_of_files'] = len(all_files)
|
|
347
|
-
result['file_types'] = list(file_types_found)
|
|
348
|
-
|
|
349
|
-
if not all_files:
|
|
350
|
-
return result
|
|
351
|
-
|
|
352
|
-
# Analyze first file for polarity and acquisition length
|
|
353
|
-
first_file = all_files[0]
|
|
354
|
-
result['first_file'] = str(first_file)
|
|
355
|
-
|
|
356
|
-
# Extract metadata based on file type
|
|
357
|
-
if first_file.suffix.lower() == '.wiff':
|
|
358
|
-
metadata = self._analyze_wiff_file(first_file)
|
|
359
|
-
elif first_file.suffix.lower() == '.mzml':
|
|
360
|
-
metadata = self._analyze_mzml_file(first_file)
|
|
361
|
-
elif first_file.suffix.lower() == '.raw':
|
|
362
|
-
metadata = self._analyze_raw_file(first_file)
|
|
363
|
-
else:
|
|
364
|
-
metadata = {'polarity': 'positive', 'length_minutes': 0.0}
|
|
365
|
-
|
|
366
|
-
result['polarity'] = metadata.get('polarity', 'positive')
|
|
367
|
-
result['length_minutes'] = metadata.get('length_minutes', 0.0)
|
|
368
|
-
|
|
369
|
-
except Exception as e:
|
|
370
|
-
self.logger.warning(f"Failed to analyze source files: {e}")
|
|
371
|
-
|
|
372
|
-
return result
|
|
373
|
-
|
|
374
|
-
def _analyze_wiff_file(self, file_path: Path) -> Dict[str, Any]:
|
|
375
|
-
"""Analyze WIFF file to extract polarity and acquisition length."""
|
|
376
|
-
try:
|
|
377
|
-
from masster.sample.load import _wiff_to_dict
|
|
378
|
-
|
|
379
|
-
# Extract metadata from WIFF file
|
|
380
|
-
metadata_df = _wiff_to_dict(str(file_path))
|
|
381
|
-
|
|
382
|
-
result = {'polarity': 'positive', 'length_minutes': 0.0}
|
|
383
|
-
|
|
384
|
-
if not metadata_df.empty:
|
|
385
|
-
# Get polarity from first experiment
|
|
386
|
-
if 'polarity' in metadata_df.columns:
|
|
387
|
-
first_polarity = metadata_df['polarity'].iloc[0]
|
|
388
|
-
|
|
389
|
-
# Convert numeric polarity codes to string
|
|
390
|
-
if first_polarity == 1 or str(first_polarity).lower() in ['positive', 'pos', '+']:
|
|
391
|
-
result['polarity'] = "positive"
|
|
392
|
-
elif first_polarity == -1 or str(first_polarity).lower() in ['negative', 'neg', '-']:
|
|
393
|
-
result['polarity'] = "negative"
|
|
394
|
-
|
|
395
|
-
# Estimate acquisition length by loading the file briefly
|
|
396
|
-
# For a rough estimate, we'll load just the scan info
|
|
397
|
-
from masster.sample import Sample
|
|
398
|
-
sample = Sample()
|
|
399
|
-
sample.logger_update(level="ERROR") # Suppress logs
|
|
400
|
-
sample.load(str(file_path))
|
|
401
|
-
|
|
402
|
-
if hasattr(sample, 'scans_df') and sample.scans_df is not None:
|
|
403
|
-
if not sample.scans_df.is_empty():
|
|
404
|
-
rt_values = sample.scans_df.select('rt').to_numpy().flatten()
|
|
405
|
-
if len(rt_values) > 0:
|
|
406
|
-
# RT is in seconds, convert to minutes
|
|
407
|
-
result['length_minutes'] = float(rt_values.max()) / 60.0
|
|
408
|
-
|
|
409
|
-
return result
|
|
410
|
-
|
|
411
|
-
except Exception as e:
|
|
412
|
-
self.logger.debug(f"Failed to analyze WIFF file {file_path}: {e}")
|
|
413
|
-
return {'polarity': 'positive', 'length_minutes': 0.0}
|
|
414
|
-
|
|
415
|
-
def _analyze_mzml_file(self, file_path: Path) -> Dict[str, Any]:
|
|
416
|
-
"""Analyze mzML file to extract polarity and acquisition length."""
|
|
417
|
-
try:
|
|
418
|
-
from masster.sample import Sample
|
|
419
|
-
|
|
420
|
-
sample = Sample()
|
|
421
|
-
sample.logger_update(level="ERROR") # Suppress logs
|
|
422
|
-
sample.load(str(file_path))
|
|
423
|
-
|
|
424
|
-
result = {'polarity': 'positive', 'length_minutes': 0.0}
|
|
425
|
-
|
|
426
|
-
if hasattr(sample, 'scans_df') and sample.scans_df is not None:
|
|
427
|
-
if not sample.scans_df.is_empty():
|
|
428
|
-
rt_values = sample.scans_df.select('rt').to_numpy().flatten()
|
|
429
|
-
if len(rt_values) > 0:
|
|
430
|
-
# RT is in seconds, convert to minutes
|
|
431
|
-
result['length_minutes'] = float(rt_values.max()) / 60.0
|
|
432
|
-
|
|
433
|
-
# For mzML, polarity detection would require more detailed parsing
|
|
434
|
-
# For now, use default
|
|
435
|
-
return result
|
|
436
|
-
|
|
437
|
-
except Exception as e:
|
|
438
|
-
self.logger.debug(f"Failed to analyze mzML file {file_path}: {e}")
|
|
439
|
-
return {'polarity': 'positive', 'length_minutes': 0.0}
|
|
440
|
-
|
|
441
|
-
def _analyze_raw_file(self, file_path: Path) -> Dict[str, Any]:
|
|
442
|
-
"""Analyze RAW file to extract polarity and acquisition length."""
|
|
443
|
-
try:
|
|
444
|
-
from masster.sample import Sample
|
|
445
|
-
|
|
446
|
-
sample = Sample()
|
|
447
|
-
sample.logger_update(level="ERROR") # Suppress logs
|
|
448
|
-
sample.load(str(file_path))
|
|
449
|
-
|
|
450
|
-
result = {'polarity': 'positive', 'length_minutes': 0.0}
|
|
451
|
-
|
|
452
|
-
if hasattr(sample, 'scans_df') and sample.scans_df is not None:
|
|
453
|
-
if not sample.scans_df.is_empty():
|
|
454
|
-
rt_values = sample.scans_df.select('rt').to_numpy().flatten()
|
|
455
|
-
if len(rt_values) > 0:
|
|
456
|
-
# RT is in seconds, convert to minutes
|
|
457
|
-
result['length_minutes'] = float(rt_values.max()) / 60.0
|
|
458
|
-
|
|
459
|
-
# For RAW files, polarity detection would require more detailed parsing
|
|
460
|
-
# For now, use default
|
|
461
|
-
return result
|
|
462
|
-
|
|
463
|
-
except Exception as e:
|
|
464
|
-
self.logger.debug(f"Failed to analyze RAW file {file_path}: {e}")
|
|
465
|
-
return {'polarity': 'positive', 'length_minutes': 0.0}
|
|
466
|
-
|
|
467
301
|
def _infer_polarity_from_first_file(self) -> str:
|
|
468
302
|
"""
|
|
469
303
|
Infer polarity from the first available raw data file.
|
|
@@ -538,10 +372,8 @@ class Wizard:
|
|
|
538
372
|
source_info = self._analyze_source_files()
|
|
539
373
|
|
|
540
374
|
# Update wizard parameters based on detected metadata
|
|
541
|
-
if source_info['polarity'] != 'positive':
|
|
375
|
+
if source_info.get('polarity') and source_info['polarity'] != 'positive':
|
|
542
376
|
self.params.polarity = source_info['polarity']
|
|
543
|
-
# Update adducts based on detected polarity
|
|
544
|
-
self.params.__post_init__()
|
|
545
377
|
|
|
546
378
|
files_created = []
|
|
547
379
|
|
|
@@ -573,7 +405,6 @@ class Wizard:
|
|
|
573
405
|
}
|
|
574
406
|
|
|
575
407
|
except Exception as e:
|
|
576
|
-
self.logger.error(f"Failed to create scripts: {e}")
|
|
577
408
|
return {
|
|
578
409
|
"status": "error",
|
|
579
410
|
"message": f"Failed to create scripts: {e}",
|
|
@@ -582,120 +413,100 @@ class Wizard:
|
|
|
582
413
|
"source_info": {}
|
|
583
414
|
}
|
|
584
415
|
|
|
585
|
-
def
|
|
586
|
-
"""
|
|
416
|
+
def _analyze_source_files(self) -> Dict[str, Any]:
|
|
417
|
+
"""Analyze source files to extract metadata."""
|
|
418
|
+
result = {
|
|
419
|
+
"number_of_files": 0,
|
|
420
|
+
"file_types": [],
|
|
421
|
+
"polarity": "positive",
|
|
422
|
+
"length_minutes": 0.0,
|
|
423
|
+
"first_file": None
|
|
424
|
+
}
|
|
587
425
|
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
426
|
+
try:
|
|
427
|
+
# Find raw data files
|
|
428
|
+
extensions = [".wiff", ".raw", ".mzML"]
|
|
429
|
+
raw_files = []
|
|
430
|
+
|
|
431
|
+
for ext in extensions:
|
|
432
|
+
pattern = f"**/*{ext}"
|
|
433
|
+
files = list(self.source_path.rglob(pattern))
|
|
434
|
+
if files:
|
|
435
|
+
raw_files.extend(files)
|
|
436
|
+
if ext not in result["file_types"]:
|
|
437
|
+
result["file_types"].append(ext)
|
|
438
|
+
|
|
439
|
+
result["number_of_files"] = len(raw_files)
|
|
440
|
+
|
|
441
|
+
if raw_files:
|
|
442
|
+
result["first_file"] = str(raw_files[0])
|
|
443
|
+
# Simple heuristic: assume 30 minutes per file if we can't determine
|
|
444
|
+
result["length_minutes"] = 30.0
|
|
445
|
+
|
|
446
|
+
except Exception as e:
|
|
447
|
+
print(f"Warning: Could not analyze source files: {e}")
|
|
448
|
+
|
|
449
|
+
return result
|
|
597
450
|
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
params_lines.append('# Analysis parameters (auto-detected from source files)')
|
|
601
|
-
params_lines.append('PARAMS = {')
|
|
602
|
-
|
|
603
|
-
# File Discovery Summary
|
|
604
|
-
params_lines.append(' # === Source File Analysis ===')
|
|
605
|
-
params_lines.append(f' "number_of_files": {source_info.get("number_of_files", 0)}, # Total raw data files found')
|
|
606
|
-
params_lines.append(f' "file_types": {source_info.get("file_types", [])!r}, # Detected file extensions')
|
|
607
|
-
params_lines.append(f' "length_minutes": {source_info.get("length_minutes", 0.0):.1f}, # Estimated acquisition length per file (minutes)')
|
|
608
|
-
if source_info.get('first_file'):
|
|
609
|
-
params_lines.append(f' "first_file": {source_info["first_file"]!r}, # First file analyzed for metadata')
|
|
610
|
-
params_lines.append('')
|
|
611
|
-
|
|
612
|
-
# Core Configuration
|
|
613
|
-
params_lines.append(' # === Core Configuration ===')
|
|
614
|
-
params_lines.append(f' "source": {params_dict.get("source", "")!r}, # Directory containing raw data files')
|
|
615
|
-
params_lines.append(f' "folder": {params_dict.get("folder", "")!r}, # Output directory for processed study')
|
|
616
|
-
params_lines.append(f' "polarity": {params_dict.get("polarity", "positive")!r}, # Ion polarity mode (auto-detected)')
|
|
617
|
-
params_lines.append(f' "num_cores": {params_dict.get("num_cores", 4)}, # Number of CPU cores for parallel processing')
|
|
618
|
-
params_lines.append('')
|
|
619
|
-
|
|
620
|
-
# File Discovery
|
|
621
|
-
params_lines.append(' # === File Discovery ===')
|
|
622
|
-
params_lines.append(f' "file_extensions": {params_dict.get("file_extensions", [".wiff", ".raw", ".mzML"])!r}, # File extensions to search for')
|
|
623
|
-
params_lines.append(f' "search_subfolders": {params_dict.get("search_subfolders", True)}, # Whether to search subdirectories recursively')
|
|
624
|
-
params_lines.append(f' "skip_patterns": {params_dict.get("skip_patterns", ["blank", "condition"])!r}, # Filename patterns to skip')
|
|
625
|
-
params_lines.append('')
|
|
626
|
-
|
|
627
|
-
# Processing Parameters - Critical values to review
|
|
628
|
-
params_lines.append(' # === Processing Parameters (REVIEW THESE VALUES) ===')
|
|
629
|
-
params_lines.append(f' "adducts": {params_dict.get("adducts", [])!r}, # Adduct specifications for feature detection and annotation')
|
|
630
|
-
params_lines.append(f' "detector_type": {params_dict.get("detector_type", "unknown")!r}, # MS detector type ("orbitrap", "tof", "unknown")')
|
|
631
|
-
params_lines.append(f' "noise": {params_dict.get("noise", 50.0)}, # REVIEW: Noise threshold for feature detection. Set to 1e5 for Orbitraps')
|
|
632
|
-
params_lines.append(f' "chrom_fwhm": {params_dict.get("chrom_fwhm", 0.5)}, # REVIEW: Chromatographic peak FWHM (seconds)')
|
|
633
|
-
params_lines.append(f' "chrom_peak_snr": {params_dict.get("chrom_peak_snr", 5.0)}, # Minimum signal-to-noise ratio for chromatographic peaks')
|
|
634
|
-
params_lines.append('')
|
|
635
|
-
|
|
636
|
-
# Other parameters...
|
|
637
|
-
params_lines.append(' # === Alignment & Merging ===')
|
|
638
|
-
params_lines.append(f' "rt_tol": {params_dict.get("rt_tol", 5.0)}, # Retention time tolerance for alignment (seconds)')
|
|
639
|
-
params_lines.append(f' "mz_tol": {params_dict.get("mz_tol", 0.01)}, # Mass-to-charge ratio tolerance for alignment (Da)')
|
|
640
|
-
params_lines.append(f' "alignment_method": {params_dict.get("alignment_method", "kd")!r}, # Algorithm for sample alignment')
|
|
641
|
-
params_lines.append(f' "min_samples_per_feature": {params_dict.get("min_samples_per_feature", 1)}, # Minimum samples required per consensus feature')
|
|
642
|
-
params_lines.append(f' "merge_method": {params_dict.get("merge_method", "qt")!r}, # Method for merging consensus features')
|
|
643
|
-
params_lines.append('')
|
|
644
|
-
|
|
645
|
-
# Other params
|
|
646
|
-
params_lines.append(' # === Sample Processing ===')
|
|
647
|
-
params_lines.append(f' "batch_size": {params_dict.get("batch_size", 8)}, # Number of files to process per batch')
|
|
648
|
-
params_lines.append(f' "memory_limit_gb": {params_dict.get("memory_limit_gb", 16.0)}, # Memory limit for processing (GB)')
|
|
649
|
-
params_lines.append('')
|
|
650
|
-
|
|
651
|
-
params_lines.append(' # === Script Options ===')
|
|
652
|
-
params_lines.append(f' "resume_enabled": {params_dict.get("resume_enabled", True)}, # Enable automatic resume capability')
|
|
653
|
-
params_lines.append(f' "force_reprocess": {params_dict.get("force_reprocess", False)}, # Force reprocessing of existing files')
|
|
654
|
-
params_lines.append(f' "cleanup_temp_files": {params_dict.get("cleanup_temp_files", True)}, # Clean up temporary files after processing')
|
|
655
|
-
|
|
656
|
-
params_lines.append('}')
|
|
451
|
+
def _generate_workflow_script_content(self, source_info: Dict[str, Any]) -> str:
|
|
452
|
+
"""Generate the content for 1_masster_workflow.py script."""
|
|
657
453
|
|
|
658
|
-
# Create script lines
|
|
659
454
|
script_lines = [
|
|
660
455
|
'#!/usr/bin/env python3',
|
|
661
456
|
'"""',
|
|
662
|
-
'
|
|
663
|
-
|
|
664
|
-
'',
|
|
665
|
-
'Source Analysis:',
|
|
666
|
-
f' - Files found: {source_info.get("number_of_files", 0)}',
|
|
667
|
-
f' - File types: {", ".join(source_info.get("file_types", []))}',
|
|
668
|
-
f' - Polarity detected: {source_info.get("polarity", "unknown")}',
|
|
669
|
-
f' - Acquisition length: ~{source_info.get("length_minutes", 0.0):.1f} minutes per file',
|
|
670
|
-
'',
|
|
671
|
-
'This script processes raw MS data files into sample5 format.',
|
|
672
|
-
'Review the NOISE and CHROM_FWHM parameters below before running.',
|
|
457
|
+
'Automated Mass Spectrometry Data Analysis Pipeline',
|
|
458
|
+
'Generated by masster wizard',
|
|
673
459
|
'"""',
|
|
674
460
|
'',
|
|
675
461
|
'import sys',
|
|
676
462
|
'import time',
|
|
677
463
|
'from pathlib import Path',
|
|
678
|
-
'import concurrent.futures',
|
|
679
|
-
'import os',
|
|
680
464
|
'',
|
|
681
465
|
'# Import masster modules',
|
|
682
|
-
'from masster.
|
|
466
|
+
'from masster.study import Study',
|
|
683
467
|
'from masster import __version__',
|
|
684
468
|
'',
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
469
|
+
'# Analysis parameters',
|
|
470
|
+
'PARAMS = {',
|
|
471
|
+
' # === Core Configuration ===',
|
|
472
|
+
f' "source": {str(self.source_path)!r}, # Directory containing raw data files',
|
|
473
|
+
f' "folder": {str(self.folder_path)!r}, # Output directory for processed study',
|
|
474
|
+
f' "polarity": {self.params.polarity!r}, # Ion polarity mode ("positive" or "negative")',
|
|
475
|
+
f' "num_cores": {self.params.num_cores}, # Number of CPU cores for parallel processing',
|
|
476
|
+
'',
|
|
477
|
+
' # === File Discovery ===',
|
|
478
|
+
f' "file_extensions": {self.params.file_extensions!r}, # File extensions to search for',
|
|
479
|
+
f' "search_subfolders": {self.params.search_subfolders}, # Whether to search subdirectories recursively',
|
|
480
|
+
f' "skip_patterns": {self.params.skip_patterns!r}, # Filename patterns to skip',
|
|
481
|
+
'',
|
|
482
|
+
' # === Processing Parameters ===',
|
|
483
|
+
f' "adducts": {self.params.adducts!r}, # Adduct specifications for feature detection and annotation',
|
|
484
|
+
f' "noise": {self.params.noise}, # Noise threshold for feature detection',
|
|
485
|
+
f' "chrom_fwhm": {self.params.chrom_fwhm}, # Chromatographic peak full width at half maximum (seconds)',
|
|
486
|
+
f' "chrom_peak_snr": {self.params.chrom_peak_snr}, # Minimum signal-to-noise ratio for chromatographic peaks',
|
|
487
|
+
'',
|
|
488
|
+
' # === Alignment & Merging ===',
|
|
489
|
+
f' "rt_tol": {self.params.rt_tolerance}, # Retention time tolerance for alignment (seconds)',
|
|
490
|
+
f' "mz_tol": {self.params.mz_max_diff}, # Mass-to-charge ratio tolerance for alignment (Da)',
|
|
491
|
+
f' "alignment_method": {self.params.alignment_algorithm!r}, # Algorithm for sample alignment',
|
|
492
|
+
f' "min_samples_per_feature": {self.params.min_samples_for_merge}, # Minimum samples required per consensus feature',
|
|
493
|
+
f' "merge_method": {self.params.merge_method!r}, # Method for merging consensus features',
|
|
494
|
+
'',
|
|
495
|
+
' # === Sample Processing (used in add_samples_from_folder) ===',
|
|
496
|
+
f' "batch_size": {self.params.batch_size}, # Number of files to process per batch',
|
|
497
|
+
f' "memory_limit_gb": {self.params.memory_limit_gb}, # Memory limit for processing (GB)',
|
|
498
|
+
'',
|
|
499
|
+
' # === Script Options ===',
|
|
500
|
+
f' "resume_enabled": {self.params.resume_enabled}, # Enable automatic resume capability',
|
|
501
|
+
f' "force_reprocess": {self.params.force_reprocess}, # Force reprocessing of existing files',
|
|
502
|
+
f' "cleanup_temp_files": {self.params.cleanup_temp_files}, # Clean up temporary files after processing',
|
|
503
|
+
'}',
|
|
692
504
|
'',
|
|
693
505
|
'',
|
|
694
|
-
'def discover_raw_files(source_folder, file_extensions, search_subfolders=True
|
|
506
|
+
'def discover_raw_files(source_folder, file_extensions, search_subfolders=True):',
|
|
695
507
|
' """Discover raw data files in the source folder."""',
|
|
696
508
|
' source_path = Path(source_folder)',
|
|
697
509
|
' raw_files = []',
|
|
698
|
-
' skip_patterns = skip_patterns or []',
|
|
699
510
|
' ',
|
|
700
511
|
' for ext in file_extensions:',
|
|
701
512
|
' if search_subfolders:',
|
|
@@ -704,191 +515,189 @@ class Wizard:
|
|
|
704
515
|
' else:',
|
|
705
516
|
' pattern = f"*{ext}"',
|
|
706
517
|
' files = list(source_path.glob(pattern))',
|
|
707
|
-
' ',
|
|
708
|
-
' # Filter out files matching skip patterns',
|
|
709
|
-
' for file in files:',
|
|
710
|
-
' skip_file = False',
|
|
711
|
-
' for skip_pattern in skip_patterns:',
|
|
712
|
-
' if skip_pattern.lower() in file.name.lower():',
|
|
713
|
-
' skip_file = True',
|
|
714
|
-
' break',
|
|
715
|
-
' if not skip_file:',
|
|
716
|
-
' raw_files.append(file)',
|
|
518
|
+
' raw_files.extend(files)',
|
|
717
519
|
' ',
|
|
718
520
|
' return raw_files',
|
|
719
521
|
'',
|
|
720
522
|
'',
|
|
721
523
|
'def process_single_file(args):',
|
|
722
|
-
' """Process a single raw file to sample5 format -
|
|
723
|
-
' raw_file, output_folder
|
|
524
|
+
' """Process a single raw file to sample5 format - module level for multiprocessing."""',
|
|
525
|
+
' raw_file, output_folder = args',
|
|
526
|
+
' from masster.sample import Sample',
|
|
724
527
|
' ',
|
|
725
528
|
' try:',
|
|
726
529
|
' # Create sample5 filename',
|
|
727
530
|
' sample_name = raw_file.stem',
|
|
728
531
|
' sample5_path = Path(output_folder) / f"{sample_name}.sample5"',
|
|
729
532
|
' ',
|
|
730
|
-
' # Skip if sample5 already exists
|
|
731
|
-
' if sample5_path.exists() and
|
|
732
|
-
' print(f"
|
|
733
|
-
' return
|
|
533
|
+
' # Skip if sample5 already exists',
|
|
534
|
+
' if sample5_path.exists() and not PARAMS["force_reprocess"]:',
|
|
535
|
+
' print(f" Skipping {raw_file.name} (sample5 already exists)")',
|
|
536
|
+
' return str(sample5_path)',
|
|
734
537
|
' ',
|
|
735
|
-
' print(f"
|
|
736
|
-
' start_time = time.time()',
|
|
538
|
+
' print(f" Converting {raw_file.name}...")',
|
|
737
539
|
' ',
|
|
738
540
|
' # Load and process raw file with full pipeline',
|
|
739
541
|
' sample = Sample(log_label=sample_name)',
|
|
740
542
|
' sample.load(filename=str(raw_file))',
|
|
741
543
|
' sample.find_features(',
|
|
742
|
-
' noise=
|
|
743
|
-
' chrom_fwhm=
|
|
744
|
-
' chrom_peak_snr=
|
|
544
|
+
' noise=PARAMS["noise"],',
|
|
545
|
+
' chrom_fwhm=PARAMS["chrom_fwhm"],',
|
|
546
|
+
' chrom_peak_snr=PARAMS["chrom_peak_snr"]',
|
|
745
547
|
' )',
|
|
746
|
-
' # sample.find_adducts(adducts=params["adducts"])',
|
|
747
548
|
' sample.find_ms2()',
|
|
748
|
-
'
|
|
549
|
+
' sample.find_iso()',
|
|
550
|
+
' # sample.export_mgf()',
|
|
551
|
+
' # sample.plot_2d(filename=f"{sample5_path.replace(".sample5", ".html")}")',
|
|
749
552
|
' sample.save(str(sample5_path))',
|
|
750
553
|
' ',
|
|
751
|
-
'
|
|
752
|
-
'
|
|
753
|
-
' ',
|
|
754
|
-
' return {"status": "success", "file": str(sample5_path), "elapsed": elapsed}',
|
|
554
|
+
' # print(f" Completed {raw_file.name} -> {sample5_path.name}")',
|
|
555
|
+
' return str(sample5_path)',
|
|
755
556
|
' ',
|
|
756
557
|
' except Exception as e:',
|
|
757
|
-
' print(f"
|
|
758
|
-
' return
|
|
558
|
+
' print(f" ERROR processing {raw_file.name}: {e}")',
|
|
559
|
+
' return None',
|
|
759
560
|
'',
|
|
760
561
|
'',
|
|
761
|
-
'def
|
|
762
|
-
' """Convert raw data files to sample5 format
|
|
562
|
+
'def convert_raw_to_sample5(raw_files, output_folder, polarity, num_cores):',
|
|
563
|
+
' """Convert raw data files to sample5 format."""',
|
|
763
564
|
' import concurrent.futures',
|
|
764
565
|
' import os',
|
|
765
566
|
' ',
|
|
766
567
|
' # Create output directory',
|
|
767
568
|
' os.makedirs(output_folder, exist_ok=True)',
|
|
768
569
|
' ',
|
|
769
|
-
' print(f"\\n🚀 Processing {len(raw_files)} files using {params[\'num_cores\']} CPU cores...")',
|
|
770
|
-
' print("=" * 70)',
|
|
771
|
-
' ',
|
|
772
570
|
' # Prepare arguments for multiprocessing',
|
|
773
|
-
' file_args = [(raw_file, output_folder
|
|
774
|
-
' ',
|
|
775
|
-
' # Process files in parallel with progress tracking',
|
|
776
|
-
' results = []',
|
|
777
|
-
' successful = 0',
|
|
778
|
-
' skipped = 0',
|
|
779
|
-
' failed = 0',
|
|
780
|
-
' total_elapsed = 0',
|
|
571
|
+
' file_args = [(raw_file, output_folder) for raw_file in raw_files]',
|
|
781
572
|
' ',
|
|
782
|
-
'
|
|
783
|
-
'
|
|
784
|
-
'
|
|
573
|
+
' # Process files in parallel',
|
|
574
|
+
' sample5_files = []',
|
|
575
|
+
' with concurrent.futures.ProcessPoolExecutor(max_workers=num_cores) as executor:',
|
|
576
|
+
' futures = [executor.submit(process_single_file, args) for args in file_args]',
|
|
785
577
|
' ',
|
|
786
|
-
'
|
|
787
|
-
' for i, future in enumerate(concurrent.futures.as_completed(future_to_file), 1):',
|
|
578
|
+
' for future in concurrent.futures.as_completed(futures):',
|
|
788
579
|
' result = future.result()',
|
|
789
|
-
'
|
|
790
|
-
'
|
|
791
|
-
' if result["status"] == "success":',
|
|
792
|
-
' successful += 1',
|
|
793
|
-
' total_elapsed += result.get("elapsed", 0)',
|
|
794
|
-
' elif result["status"] == "skipped":',
|
|
795
|
-
' skipped += 1',
|
|
796
|
-
' else:',
|
|
797
|
-
' failed += 1',
|
|
798
|
-
' ',
|
|
799
|
-
' # Progress update',
|
|
800
|
-
' print(f"\\r Progress: {i}/{len(raw_files)} files completed ({successful} success, {skipped} skipped, {failed} failed)", end="", flush=True)',
|
|
801
|
-
' ',
|
|
802
|
-
' print() # New line after progress',
|
|
803
|
-
' print("=" * 70)',
|
|
804
|
-
' ',
|
|
805
|
-
' # Summary',
|
|
806
|
-
' if successful > 0:',
|
|
807
|
-
' avg_time = total_elapsed / successful',
|
|
808
|
-
' print(f"✅ Successfully processed {successful} files (avg: {avg_time:.1f}s per file)")',
|
|
809
|
-
' if skipped > 0:',
|
|
810
|
-
' print(f"⏩ Skipped {skipped} files (already exist)")',
|
|
811
|
-
' if failed > 0:',
|
|
812
|
-
' print(f"❌ Failed to process {failed} files")',
|
|
813
|
-
' for result in results:',
|
|
814
|
-
' if result["status"] == "error":',
|
|
815
|
-
' print(f" - {Path(result[\'file\']).name}: {result[\'error\']}")',
|
|
580
|
+
' if result:',
|
|
581
|
+
' sample5_files.append(result)',
|
|
816
582
|
' ',
|
|
817
|
-
' # Return list of successful sample5 files',
|
|
818
|
-
' sample5_files = [result["file"] for result in results if result["status"] in ["success", "skipped"]]',
|
|
819
583
|
' return sample5_files',
|
|
820
584
|
'',
|
|
821
585
|
'',
|
|
822
586
|
'def main():',
|
|
823
|
-
' """Main
|
|
587
|
+
' """Main analysis pipeline."""',
|
|
824
588
|
' try:',
|
|
825
589
|
' print("=" * 70)',
|
|
826
|
-
f' print("
|
|
590
|
+
f' print("masster {version} - Automated MS Data Analysis")',
|
|
827
591
|
' print("=" * 70)',
|
|
828
592
|
' print(f"Source: {PARAMS[\'source\']}")',
|
|
829
593
|
' print(f"Output: {PARAMS[\'folder\']}")',
|
|
830
|
-
' print(f"Polarity: {PARAMS[\'polarity\']}
|
|
594
|
+
' print(f"Polarity: {PARAMS[\'polarity\']}")',
|
|
831
595
|
' print(f"CPU Cores: {PARAMS[\'num_cores\']}")',
|
|
832
596
|
' print("=" * 70)',
|
|
833
|
-
' print("\\n⚙️ IMPORTANT: Review these parameters before processing:")',
|
|
834
|
-
' print(f" NOISE threshold: {PARAMS[\'noise\']} (adjust based on your instrument)")',
|
|
835
|
-
' print(f" CHROM_FWHM: {PARAMS[\'chrom_fwhm\']}s (adjust based on your chromatography)")',
|
|
836
|
-
' print(" You can edit these values in the PARAMS section above.")',
|
|
837
|
-
' print("=" * 70)',
|
|
838
597
|
' ',
|
|
839
598
|
' start_time = time.time()',
|
|
840
599
|
' ',
|
|
841
600
|
' # Step 1: Discover raw data files',
|
|
842
|
-
' print("\\
|
|
601
|
+
' print("\\nStep 1/7: Discovering raw data files...")',
|
|
843
602
|
' raw_files = discover_raw_files(',
|
|
844
603
|
' PARAMS[\'source\'],',
|
|
845
604
|
' PARAMS[\'file_extensions\'],',
|
|
846
|
-
' PARAMS[\'search_subfolders\']
|
|
847
|
-
' PARAMS[\'skip_patterns\']',
|
|
605
|
+
' PARAMS[\'search_subfolders\']',
|
|
848
606
|
' )',
|
|
849
607
|
' ',
|
|
850
608
|
' if not raw_files:',
|
|
851
|
-
' print("
|
|
609
|
+
' print("No raw data files found!")',
|
|
852
610
|
' return False',
|
|
853
611
|
' ',
|
|
854
612
|
' print(f"Found {len(raw_files)} raw data files")',
|
|
855
|
-
' for
|
|
856
|
-
' print(f" {
|
|
613
|
+
' for f in raw_files[:5]: # Show first 5 files',
|
|
614
|
+
' print(f" {f.name}")',
|
|
857
615
|
' if len(raw_files) > 5:',
|
|
858
|
-
' print(f" ... and {len(raw_files) - 5} more
|
|
616
|
+
' print(f" ... and {len(raw_files) - 5} more")',
|
|
859
617
|
' ',
|
|
860
|
-
' # Step 2: Process raw files
|
|
861
|
-
' print("\\
|
|
862
|
-
' sample5_files =
|
|
618
|
+
' # Step 2: Process raw files',
|
|
619
|
+
' print("\\nStep 2/7: Processing raw files...")',
|
|
620
|
+
' sample5_files = convert_raw_to_sample5(',
|
|
863
621
|
' raw_files,',
|
|
864
622
|
' PARAMS[\'folder\'],',
|
|
865
|
-
' PARAMS',
|
|
623
|
+
' PARAMS[\'polarity\'],',
|
|
624
|
+
' PARAMS[\'num_cores\']',
|
|
866
625
|
' )',
|
|
867
626
|
' ',
|
|
868
627
|
' if not sample5_files:',
|
|
869
|
-
' print("
|
|
628
|
+
' print("No sample5 files were created!")',
|
|
870
629
|
' return False',
|
|
871
630
|
' ',
|
|
872
|
-
'
|
|
631
|
+
' print(f"Successfully processed {len(sample5_files)} files to sample5")',
|
|
632
|
+
' ',
|
|
633
|
+
' # Step 3: Create and configure study',
|
|
634
|
+
' print("\\nStep 3/7: Initializing study...")',
|
|
635
|
+
' study = Study(folder=PARAMS[\'folder\'])',
|
|
636
|
+
' study.polarity = PARAMS[\'polarity\']',
|
|
637
|
+
' study.adducts = PARAMS[\'adducts\']',
|
|
638
|
+
' ',
|
|
639
|
+
' # Step 4: Add sample5 files to study',
|
|
640
|
+
' print("\\nStep 4/7: Adding samples to study...")',
|
|
641
|
+
' study.add(str(Path(PARAMS[\'folder\']) / "*.sample5"))',
|
|
642
|
+
' study.features_filter(study.features_select(chrom_coherence=0.1, chrom_prominence_scaled=1))',
|
|
643
|
+
' ',
|
|
644
|
+
' # Step 5: Core processing',
|
|
645
|
+
' print("\\nStep 5/7: Processing...")',
|
|
646
|
+
' study.align(',
|
|
647
|
+
' algorithm=PARAMS[\'alignment_method\'],',
|
|
648
|
+
' rt_tol=PARAMS[\'rt_tol\']',
|
|
649
|
+
' )',
|
|
650
|
+
' ',
|
|
651
|
+
' study.merge(',
|
|
652
|
+
' method="qt",',
|
|
653
|
+
' min_samples=PARAMS[\'min_samples_per_feature\'],',
|
|
654
|
+
' threads=PARAMS[\'num_cores\'],',
|
|
655
|
+
' rt_tol=PARAMS[\'rt_tol\']',
|
|
656
|
+
' )',
|
|
657
|
+
' study.find_iso()',
|
|
658
|
+
' study.fill()',
|
|
659
|
+
' study.integrate()',
|
|
660
|
+
' ',
|
|
661
|
+
' # Step 6/7: Saving results',
|
|
662
|
+
' print("\\nStep 6/7: Saving results...")',
|
|
663
|
+
' study.save()',
|
|
664
|
+
' study.export_xlsx()',
|
|
665
|
+
' study.export_mgf()',
|
|
666
|
+
' study.export_mztab()',
|
|
667
|
+
' ',
|
|
668
|
+
' # Step 7: Plots',
|
|
669
|
+
' print("\\nStep 7/7: Exporting plots...")',
|
|
670
|
+
' study.plot_consensus_2d(filename="consensus.html")',
|
|
671
|
+
' study.plot_consensus_2d(filename="consensus.png")',
|
|
672
|
+
' study.plot_alignment(filename="alignment.html")',
|
|
673
|
+
' study.plot_alignment(filename="alignment.png")',
|
|
674
|
+
' study.plot_samples_pca(filename="pca.html")',
|
|
675
|
+
' study.plot_samples_pca(filename="pca.png")',
|
|
676
|
+
' study.plot_bpc(filename="bpc.html")',
|
|
677
|
+
' study.plot_bpc(filename="bpc.png")',
|
|
678
|
+
' study.plot_rt_correction(filename="rt_correction.html")',
|
|
679
|
+
' study.plot_rt_correction(filename="rt_correction.png")',
|
|
680
|
+
' ',
|
|
681
|
+
' # Print summary',
|
|
682
|
+
' study.info()',
|
|
873
683
|
' total_time = time.time() - start_time',
|
|
874
684
|
' print("\\n" + "=" * 70)',
|
|
875
|
-
' print("
|
|
685
|
+
' print("ANALYSIS COMPLETE")',
|
|
876
686
|
' print("=" * 70)',
|
|
877
|
-
' print(f"
|
|
878
|
-
' print(f"Raw files
|
|
687
|
+
' print(f"Total processing time: {total_time:.1f} seconds ({total_time/60:.1f} minutes)")',
|
|
688
|
+
' print(f"Raw files processed: {len(raw_files)}")',
|
|
879
689
|
' print(f"Sample5 files created: {len(sample5_files)}")',
|
|
880
|
-
'
|
|
881
|
-
'
|
|
882
|
-
' print("2. Or use the sample5 files in your own analysis scripts")',
|
|
690
|
+
' if hasattr(study, "consensus_df"):',
|
|
691
|
+
' print(f"Consensus features generated: {len(study.consensus_df)}")',
|
|
883
692
|
' print("=" * 70)',
|
|
884
693
|
' ',
|
|
885
694
|
' return True',
|
|
886
695
|
' ',
|
|
887
696
|
' except KeyboardInterrupt:',
|
|
888
|
-
' print("\\
|
|
697
|
+
' print("\\nAnalysis interrupted by user")',
|
|
889
698
|
' return False',
|
|
890
699
|
' except Exception as e:',
|
|
891
|
-
' print(f"
|
|
700
|
+
' print(f"Analysis failed with error: {e}")',
|
|
892
701
|
' import traceback',
|
|
893
702
|
' traceback.print_exc()',
|
|
894
703
|
' return False',
|
|
@@ -897,91 +706,139 @@ class Wizard:
|
|
|
897
706
|
'if __name__ == "__main__":',
|
|
898
707
|
' success = main()',
|
|
899
708
|
' sys.exit(0 if success else 1)',
|
|
900
|
-
]
|
|
709
|
+
]
|
|
710
|
+
|
|
711
|
+
return '\n'.join(script_lines)
|
|
712
|
+
|
|
713
|
+
def _generate_interactive_notebook_content(self, source_info: Dict[str, Any]) -> str:
|
|
714
|
+
"""Generate the content for 2_interactive_analysis.py marimo notebook."""
|
|
715
|
+
|
|
716
|
+
notebook_lines = [
|
|
717
|
+
'import marimo',
|
|
718
|
+
'',
|
|
719
|
+
'__generated_with = "0.9.14"',
|
|
720
|
+
'app = marimo.App(width="medium")',
|
|
721
|
+
'',
|
|
722
|
+
'@app.cell',
|
|
723
|
+
'def __():',
|
|
724
|
+
' import marimo as mo',
|
|
725
|
+
' return (mo,)',
|
|
726
|
+
'',
|
|
727
|
+
'@app.cell',
|
|
728
|
+
'def __(mo):',
|
|
729
|
+
' mo.md(r"""',
|
|
730
|
+
' # MASSter Interactive Analysis',
|
|
731
|
+
' ',
|
|
732
|
+
f' **Source:** {source_info.get("number_of_files", 0)} files detected',
|
|
733
|
+
f' **Polarity:** {source_info.get("polarity", "unknown")}',
|
|
734
|
+
' ',
|
|
735
|
+
' This notebook provides interactive exploration of your processed study.',
|
|
736
|
+
' Make sure you have run `python 1_masster_workflow.py` first.',
|
|
737
|
+
' """)',
|
|
738
|
+
' return ()',
|
|
739
|
+
'',
|
|
740
|
+
'@app.cell',
|
|
741
|
+
'def __():',
|
|
742
|
+
' import masster',
|
|
743
|
+
' return (masster,)',
|
|
744
|
+
'',
|
|
745
|
+
'@app.cell',
|
|
746
|
+
'def __(masster):',
|
|
747
|
+
' study = masster.Study(folder=".")',
|
|
748
|
+
' return (study,)',
|
|
749
|
+
'',
|
|
750
|
+
'@app.cell',
|
|
751
|
+
'def __(study):',
|
|
752
|
+
' study.info()',
|
|
753
|
+
' return ()',
|
|
754
|
+
'',
|
|
755
|
+
'if __name__ == "__main__":',
|
|
756
|
+
' app.run()',
|
|
757
|
+
]
|
|
758
|
+
|
|
759
|
+
return '\n'.join(notebook_lines)
|
|
760
|
+
|
|
761
|
+
def _generate_instructions(self, source_info: Dict[str, Any], files_created: List[str]) -> List[str]:
|
|
762
|
+
"""Generate usage instructions for the created scripts."""
|
|
763
|
+
instructions = [f"Source analysis: {source_info.get('number_of_files', 0)} files found",
|
|
764
|
+
f"Polarity detected: {source_info.get('polarity', 'unknown')}",
|
|
765
|
+
"Files created:"]
|
|
766
|
+
for file_path in files_created:
|
|
767
|
+
instructions.append(f" ✅ {str(Path(file_path).resolve())}")
|
|
901
768
|
|
|
902
|
-
|
|
769
|
+
# Find the workflow script name from created files
|
|
770
|
+
workflow_script_name = "1_masster_workflow.py"
|
|
771
|
+
for file_path in files_created:
|
|
772
|
+
if Path(file_path).name == "1_masster_workflow.py":
|
|
773
|
+
workflow_script_name = Path(file_path).name
|
|
774
|
+
break
|
|
775
|
+
|
|
776
|
+
instructions.extend([
|
|
777
|
+
"",
|
|
778
|
+
"Next steps:",
|
|
779
|
+
f"1. REVIEW PARAMETERS in {workflow_script_name}:",
|
|
780
|
+
f" In particular, verify the NOISE, CHROM_FWHM, and MIN_SAMPLES_FOR_MERGE",
|
|
781
|
+
"",
|
|
782
|
+
"2. EXECUTE SAMPLE PROCESSING:",
|
|
783
|
+
f" uv run python {workflow_script_name}",
|
|
784
|
+
"",
|
|
785
|
+
"3. INTERACTIVE ANALYSIS:",
|
|
786
|
+
f" uv run marimo edit {Path('2_interactive_analysis.py').name}",
|
|
787
|
+
""]
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
return instructions
|
|
903
791
|
|
|
904
|
-
def
|
|
792
|
+
def execute(self) -> Dict[str, Any]:
|
|
905
793
|
"""
|
|
906
|
-
Execute the
|
|
794
|
+
Execute the sample processing workflow.
|
|
907
795
|
|
|
908
796
|
This method:
|
|
909
|
-
1.
|
|
910
|
-
2.
|
|
911
|
-
3. Provides clear feedback about next steps
|
|
797
|
+
1. Creates scripts if they don't exist (calls create_scripts())
|
|
798
|
+
2. Runs the 1_masster_workflow.py script to process raw files
|
|
912
799
|
|
|
913
800
|
Returns:
|
|
914
|
-
Dictionary containing
|
|
801
|
+
Dictionary containing:
|
|
802
|
+
- status: "success", "error", or "scripts_created"
|
|
803
|
+
- message: Status message
|
|
804
|
+
- instructions: List of next steps
|
|
805
|
+
- files_created: List of created file paths (if scripts were created)
|
|
915
806
|
"""
|
|
916
|
-
workflow_script = self.folder_path / "1_masster_workflow.py"
|
|
917
|
-
|
|
918
807
|
try:
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
print("📝 Creating analysis scripts...")
|
|
925
|
-
# Create scripts first
|
|
808
|
+
workflow_script_path = self.folder_path / "1_masster_workflow.py"
|
|
809
|
+
|
|
810
|
+
# Check if workflow script exists, create it if not
|
|
811
|
+
if not workflow_script_path.exists():
|
|
812
|
+
print("📝 Workflow script not found, creating scripts first...")
|
|
926
813
|
result = self.create_scripts()
|
|
927
|
-
|
|
928
814
|
if result["status"] != "success":
|
|
929
815
|
return result
|
|
930
|
-
|
|
931
|
-
# Print instructions
|
|
932
|
-
print("\n" + "="*70)
|
|
933
|
-
for instruction in result["instructions"]:
|
|
934
|
-
print(instruction)
|
|
935
|
-
print("="*70)
|
|
936
816
|
|
|
937
|
-
|
|
938
|
-
print("
|
|
939
|
-
print(" This will execute 1_masster_workflow.py")
|
|
940
|
-
response = input(" Proceed? [y/N]: ").strip().lower()
|
|
817
|
+
print("✅ Scripts created successfully")
|
|
818
|
+
print(f"📁 Output folder: {self.folder_path}")
|
|
941
819
|
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
return {
|
|
947
|
-
"status": "scripts_created",
|
|
948
|
-
"message": "Scripts created successfully, execution deferred",
|
|
949
|
-
"instructions": result["instructions"],
|
|
950
|
-
"files_created": result["files_created"]
|
|
951
|
-
}
|
|
952
|
-
|
|
953
|
-
except Exception as e:
|
|
954
|
-
self.logger.error(f"Analysis failed: {e}")
|
|
955
|
-
return {
|
|
956
|
-
"status": "error",
|
|
957
|
-
"message": f"Analysis failed: {e}",
|
|
958
|
-
"instructions": [],
|
|
959
|
-
"files_created": []
|
|
960
|
-
}
|
|
961
|
-
|
|
962
|
-
def _execute_workflow_script(self, script_path: Path) -> Dict[str, Any]:
|
|
963
|
-
"""Execute the workflow script and return results."""
|
|
964
|
-
try:
|
|
965
|
-
print(f"🚀 Executing {script_path.name}...")
|
|
820
|
+
# Execute the workflow script
|
|
821
|
+
print(f"🚀 Executing sample processing workflow...")
|
|
822
|
+
print(f"📄 Running: {workflow_script_path.name}")
|
|
823
|
+
print("=" * 60)
|
|
966
824
|
|
|
967
825
|
import subprocess
|
|
968
826
|
result = subprocess.run([
|
|
969
|
-
sys.executable, str(
|
|
970
|
-
], cwd=str(self.folder_path)
|
|
827
|
+
sys.executable, str(workflow_script_path)
|
|
828
|
+
], cwd=str(self.folder_path))
|
|
971
829
|
|
|
972
830
|
success = result.returncode == 0
|
|
973
831
|
|
|
974
832
|
if success:
|
|
975
|
-
print("="*
|
|
976
|
-
print("✅
|
|
977
|
-
print("
|
|
978
|
-
print("
|
|
979
|
-
print("
|
|
980
|
-
print("="*70)
|
|
833
|
+
print("=" * 60)
|
|
834
|
+
print("✅ Sample processing completed successfully!")
|
|
835
|
+
print("📋 Next step: Run interactive analysis")
|
|
836
|
+
print(" uv run marimo edit 2_interactive_analysis.py")
|
|
837
|
+
print("=" * 60)
|
|
981
838
|
|
|
982
839
|
return {
|
|
983
840
|
"status": "success",
|
|
984
|
-
"message": "
|
|
841
|
+
"message": "Sample processing completed successfully",
|
|
985
842
|
"instructions": [
|
|
986
843
|
"✅ Sample processing completed",
|
|
987
844
|
"Next: uv run marimo edit 2_interactive_analysis.py"
|
|
@@ -991,11 +848,11 @@ class Wizard:
|
|
|
991
848
|
else:
|
|
992
849
|
return {
|
|
993
850
|
"status": "error",
|
|
994
|
-
"message": f"Workflow execution failed with code {result.returncode}",
|
|
851
|
+
"message": f"Workflow execution failed with return code {result.returncode}",
|
|
995
852
|
"instructions": [
|
|
996
853
|
"❌ Check the error messages above",
|
|
997
854
|
"Review parameters in 1_masster_workflow.py",
|
|
998
|
-
"Try running: python
|
|
855
|
+
f"Try running manually: python {workflow_script_path.name}"
|
|
999
856
|
],
|
|
1000
857
|
"files_created": []
|
|
1001
858
|
}
|
|
@@ -1004,7 +861,11 @@ class Wizard:
|
|
|
1004
861
|
return {
|
|
1005
862
|
"status": "error",
|
|
1006
863
|
"message": f"Failed to execute workflow: {e}",
|
|
1007
|
-
"instructions": [
|
|
864
|
+
"instructions": [
|
|
865
|
+
"❌ Execution failed",
|
|
866
|
+
"Check that source files exist and are accessible",
|
|
867
|
+
"Verify folder permissions"
|
|
868
|
+
],
|
|
1008
869
|
"files_created": []
|
|
1009
870
|
}
|
|
1010
871
|
|
|
@@ -1313,154 +1174,8 @@ class Wizard:
|
|
|
1313
1174
|
|
|
1314
1175
|
return '\n'.join(script_lines)
|
|
1315
1176
|
|
|
1316
|
-
def _generate_notebook_content(self) -> str:
|
|
1317
|
-
"""Generate the content for a marimo interactive notebook."""
|
|
1318
|
-
|
|
1319
|
-
notebook_lines = [
|
|
1320
|
-
'import marimo',
|
|
1321
|
-
'',
|
|
1322
|
-
'__generated_with = "0.9.14"',
|
|
1323
|
-
'app = marimo.App(width="medium")',
|
|
1324
|
-
'',
|
|
1325
|
-
'',
|
|
1326
|
-
'@app.cell',
|
|
1327
|
-
'def __():',
|
|
1328
|
-
' import marimo as mo',
|
|
1329
|
-
' return (mo,)',
|
|
1330
|
-
'',
|
|
1331
|
-
'',
|
|
1332
|
-
'@app.cell',
|
|
1333
|
-
'def __(mo):',
|
|
1334
|
-
' mo.md(r"""',
|
|
1335
|
-
' # MASSter Interactive Analysis',
|
|
1336
|
-
' ',
|
|
1337
|
-
' This notebook provides interactive exploration of your mass spectrometry study results.',
|
|
1338
|
-
' The study has been processed and is ready for analysis.',
|
|
1339
|
-
' """)',
|
|
1340
|
-
'',
|
|
1341
|
-
'',
|
|
1342
|
-
'@app.cell',
|
|
1343
|
-
'def __():',
|
|
1344
|
-
' # Import masster',
|
|
1345
|
-
' import masster',
|
|
1346
|
-
' return (masster,)',
|
|
1347
|
-
'',
|
|
1348
|
-
'',
|
|
1349
|
-
'@app.cell',
|
|
1350
|
-
'def __(masster):',
|
|
1351
|
-
' # Load the processed study',
|
|
1352
|
-
' study = masster.Study(folder=".")',
|
|
1353
|
-
' study.load()',
|
|
1354
|
-
' return (study,)',
|
|
1355
|
-
'',
|
|
1356
|
-
'',
|
|
1357
|
-
'@app.cell',
|
|
1358
|
-
'def __(mo, study):',
|
|
1359
|
-
' # Display study information',
|
|
1360
|
-
' mo.md(f"""',
|
|
1361
|
-
' ## Study Overview',
|
|
1362
|
-
' ',
|
|
1363
|
-
' **Samples:** {len(study.samples) if hasattr(study, "samples") else "Not loaded"}',
|
|
1364
|
-
' ',
|
|
1365
|
-
' **Features:** {len(study.consensus_df) if hasattr(study, "consensus_df") else "Not available"}',
|
|
1366
|
-
' ',
|
|
1367
|
-
' **Polarity:** {study.polarity if hasattr(study, "polarity") else "Unknown"}',
|
|
1368
|
-
' """)',
|
|
1369
|
-
'',
|
|
1370
|
-
'',
|
|
1371
|
-
'@app.cell',
|
|
1372
|
-
'def __(study):',
|
|
1373
|
-
' # Print detailed study info',
|
|
1374
|
-
' study.info()',
|
|
1375
|
-
'',
|
|
1376
|
-
'',
|
|
1377
|
-
'@app.cell',
|
|
1378
|
-
'def __(mo):',
|
|
1379
|
-
' mo.md(r"""',
|
|
1380
|
-
' ## Quick Visualizations',
|
|
1381
|
-
' ',
|
|
1382
|
-
' Use the cells below to create interactive plots of your data.',
|
|
1383
|
-
' """)',
|
|
1384
|
-
'',
|
|
1385
|
-
'',
|
|
1386
|
-
'@app.cell',
|
|
1387
|
-
'def __(study):',
|
|
1388
|
-
' # Generate consensus 2D plot',
|
|
1389
|
-
' if hasattr(study, "consensus_df") and len(study.consensus_df) > 0:',
|
|
1390
|
-
' study.plot_consensus_2d(filename="consensus_interactive.html")',
|
|
1391
|
-
' print("Consensus 2D plot saved as: consensus_interactive.html")',
|
|
1392
|
-
' else:',
|
|
1393
|
-
' print("No consensus features available for plotting")',
|
|
1394
|
-
'',
|
|
1395
|
-
'',
|
|
1396
|
-
'@app.cell',
|
|
1397
|
-
'def __(study):',
|
|
1398
|
-
' # Generate PCA plot',
|
|
1399
|
-
' if hasattr(study, "samples") and len(study.samples) > 1:',
|
|
1400
|
-
' study.plot_samples_pca(filename="pca_interactive.html")',
|
|
1401
|
-
' print("PCA plot saved as: pca_interactive.html")',
|
|
1402
|
-
' else:',
|
|
1403
|
-
' print("Not enough samples for PCA analysis")',
|
|
1404
|
-
'',
|
|
1405
|
-
'',
|
|
1406
|
-
'@app.cell',
|
|
1407
|
-
'def __(mo):',
|
|
1408
|
-
' mo.md(r"""',
|
|
1409
|
-
' ## Data Export',
|
|
1410
|
-
' ',
|
|
1411
|
-
' Export your processed data in various formats.',
|
|
1412
|
-
' """)',
|
|
1413
|
-
'',
|
|
1414
|
-
'',
|
|
1415
|
-
'@app.cell',
|
|
1416
|
-
'def __(study):',
|
|
1417
|
-
' # Export options',
|
|
1418
|
-
' if hasattr(study, "consensus_df"):',
|
|
1419
|
-
' # Export to Excel',
|
|
1420
|
-
' study.export_xlsx(filename="study_results.xlsx")',
|
|
1421
|
-
' print("✓ Results exported to: study_results.xlsx")',
|
|
1422
|
-
' ',
|
|
1423
|
-
' # Export to MGF',
|
|
1424
|
-
' study.export_mgf(filename="study_spectra.mgf")',
|
|
1425
|
-
' print("✓ Spectra exported to: study_spectra.mgf")',
|
|
1426
|
-
' else:',
|
|
1427
|
-
' print("No data available for export")',
|
|
1428
|
-
'',
|
|
1429
|
-
'',
|
|
1430
|
-
'@app.cell',
|
|
1431
|
-
'def __(mo):',
|
|
1432
|
-
' mo.md(r"""',
|
|
1433
|
-
' ## Custom Analysis',
|
|
1434
|
-
' ',
|
|
1435
|
-
' Add your own analysis code in the cells below.',
|
|
1436
|
-
' """)',
|
|
1437
|
-
'',
|
|
1438
|
-
'',
|
|
1439
|
-
'@app.cell',
|
|
1440
|
-
'def __(study):',
|
|
1441
|
-
' # Access consensus features dataframe',
|
|
1442
|
-
' if hasattr(study, "consensus_df"):',
|
|
1443
|
-
' df = study.consensus_df',
|
|
1444
|
-
' print(f"Consensus features shape: {df.shape}")',
|
|
1445
|
-
' print("\\nFirst 5 features:")',
|
|
1446
|
-
' print(df.head())',
|
|
1447
|
-
' return (df,) if "df" in locals() else ()',
|
|
1448
|
-
'',
|
|
1449
|
-
'',
|
|
1450
|
-
'@app.cell',
|
|
1451
|
-
'def __():',
|
|
1452
|
-
' # Your custom analysis here',
|
|
1453
|
-
' pass',
|
|
1454
|
-
'',
|
|
1455
|
-
'',
|
|
1456
|
-
'if __name__ == "__main__":',
|
|
1457
|
-
' app.run()',
|
|
1458
|
-
]
|
|
1459
|
-
|
|
1460
|
-
return '\n'.join(notebook_lines)
|
|
1461
|
-
|
|
1462
1177
|
|
|
1463
|
-
def
|
|
1178
|
+
def create_script(
|
|
1464
1179
|
source: str,
|
|
1465
1180
|
folder: str,
|
|
1466
1181
|
filename: str = 'run_masster.py',
|
|
@@ -1471,14 +1186,16 @@ def create_analysis(
|
|
|
1471
1186
|
**kwargs
|
|
1472
1187
|
) -> bool:
|
|
1473
1188
|
"""
|
|
1474
|
-
Create standalone analysis
|
|
1189
|
+
Create a standalone analysis script without initializing a Wizard instance.
|
|
1475
1190
|
|
|
1476
|
-
This function generates
|
|
1191
|
+
This function generates a Python script that replicates automated processing
|
|
1192
|
+
steps with the specified configuration. The script can be executed independently
|
|
1193
|
+
to perform the same analysis.
|
|
1477
1194
|
|
|
1478
1195
|
Parameters:
|
|
1479
1196
|
source: Directory containing raw data files
|
|
1480
1197
|
folder: Output directory for processed study
|
|
1481
|
-
filename: Filename for the generated script (
|
|
1198
|
+
filename: Filename for the generated script (should end with .py)
|
|
1482
1199
|
polarity: Ion polarity mode ("positive" or "negative")
|
|
1483
1200
|
adducts: List of adduct specifications (auto-set if None)
|
|
1484
1201
|
params: Custom wizard_def parameters (optional)
|
|
@@ -1486,13 +1203,14 @@ def create_analysis(
|
|
|
1486
1203
|
**kwargs: Additional parameters to override defaults
|
|
1487
1204
|
|
|
1488
1205
|
Returns:
|
|
1489
|
-
True if
|
|
1206
|
+
True if script was generated successfully, False otherwise
|
|
1490
1207
|
|
|
1491
1208
|
Example:
|
|
1492
|
-
>>> from masster.wizard import
|
|
1493
|
-
>>>
|
|
1209
|
+
>>> from masster.wizard import create_script
|
|
1210
|
+
>>> create_script(
|
|
1494
1211
|
... source=r'D:\\Data\\raw_files',
|
|
1495
1212
|
... folder=r'D:\\Data\\output',
|
|
1213
|
+
... filename='run_masster.py',
|
|
1496
1214
|
... polarity='positive'
|
|
1497
1215
|
... )
|
|
1498
1216
|
"""
|
|
@@ -1532,27 +1250,23 @@ def create_analysis(
|
|
|
1532
1250
|
study_path = Path(folder)
|
|
1533
1251
|
study_path.mkdir(parents=True, exist_ok=True)
|
|
1534
1252
|
|
|
1535
|
-
# Create a temporary Wizard instance to generate the
|
|
1253
|
+
# Create a temporary Wizard instance to generate the script
|
|
1536
1254
|
temp_wizard = Wizard(params=wizard_params)
|
|
1537
1255
|
|
|
1538
|
-
# Generate the scripts using the
|
|
1256
|
+
# Generate the scripts using the instance method
|
|
1539
1257
|
result = temp_wizard.create_scripts()
|
|
1258
|
+
success = result.get("status") == "success"
|
|
1540
1259
|
|
|
1541
|
-
|
|
1542
|
-
print("Scripts created successfully!")
|
|
1543
|
-
for instruction in result["instructions"]:
|
|
1544
|
-
print(instruction)
|
|
1545
|
-
|
|
1546
|
-
return result["status"] == "success"
|
|
1260
|
+
return success
|
|
1547
1261
|
|
|
1548
1262
|
except Exception as e:
|
|
1549
|
-
print(f"Failed to create
|
|
1263
|
+
print(f"Failed to create script: {e}")
|
|
1550
1264
|
import traceback
|
|
1551
1265
|
traceback.print_exc()
|
|
1552
1266
|
return False
|
|
1553
1267
|
|
|
1554
1268
|
|
|
1555
|
-
def
|
|
1269
|
+
def execute(
|
|
1556
1270
|
source: str,
|
|
1557
1271
|
folder: str,
|
|
1558
1272
|
filename: str = 'run_masster.py',
|
|
@@ -1565,7 +1279,7 @@ def analyze(
|
|
|
1565
1279
|
"""
|
|
1566
1280
|
Create and execute a standalone analysis script for automated MS data processing.
|
|
1567
1281
|
|
|
1568
|
-
This function generates a Python script with the same parameters as
|
|
1282
|
+
This function generates a Python script with the same parameters as create_script(),
|
|
1569
1283
|
but immediately executes it after creation. Combines script generation and execution
|
|
1570
1284
|
in a single step.
|
|
1571
1285
|
|
|
@@ -1583,8 +1297,8 @@ def analyze(
|
|
|
1583
1297
|
True if script was created and executed successfully, False otherwise
|
|
1584
1298
|
|
|
1585
1299
|
Example:
|
|
1586
|
-
>>> from masster.wizard import
|
|
1587
|
-
>>>
|
|
1300
|
+
>>> from masster.wizard import execute
|
|
1301
|
+
>>> execute(
|
|
1588
1302
|
... source=r'D:\\Data\\raw_files',
|
|
1589
1303
|
... folder=r'D:\\Data\\output',
|
|
1590
1304
|
... polarity='positive'
|
|
@@ -1592,39 +1306,59 @@ def analyze(
|
|
|
1592
1306
|
"""
|
|
1593
1307
|
|
|
1594
1308
|
try:
|
|
1595
|
-
#
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
polarity=polarity,
|
|
1611
|
-
num_cores=max(1, int(multiprocessing.cpu_count() * 0.75)) if num_cores <= 0 else num_cores
|
|
1612
|
-
)
|
|
1309
|
+
# First, create the script using create_script()
|
|
1310
|
+
script_created = create_script(
|
|
1311
|
+
source=source,
|
|
1312
|
+
folder=folder,
|
|
1313
|
+
filename=filename,
|
|
1314
|
+
polarity=polarity,
|
|
1315
|
+
adducts=adducts,
|
|
1316
|
+
params=params,
|
|
1317
|
+
num_cores=num_cores,
|
|
1318
|
+
**kwargs
|
|
1319
|
+
)
|
|
1320
|
+
|
|
1321
|
+
if not script_created:
|
|
1322
|
+
print("Failed to create analysis script")
|
|
1323
|
+
return False
|
|
1613
1324
|
|
|
1614
|
-
|
|
1615
|
-
|
|
1325
|
+
# Get the full path to the created script
|
|
1326
|
+
study_path = Path(folder)
|
|
1327
|
+
script_path = study_path / Path(filename).name
|
|
1328
|
+
|
|
1329
|
+
if not script_path.exists():
|
|
1330
|
+
print(f"Script file not found: {script_path}")
|
|
1331
|
+
return False
|
|
1616
1332
|
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1333
|
+
print(f"Executing...")
|
|
1334
|
+
#print("=" * 70)
|
|
1335
|
+
|
|
1336
|
+
# Execute the script using subprocess with real-time output
|
|
1337
|
+
import subprocess
|
|
1338
|
+
|
|
1339
|
+
# Run the script with Python, letting it inherit our stdout/stderr
|
|
1340
|
+
try:
|
|
1341
|
+
# Use subprocess.run for direct output inheritance - no capturing/re-printing
|
|
1342
|
+
result = subprocess.run([
|
|
1343
|
+
sys.executable, str(script_path)
|
|
1344
|
+
], cwd=str(study_path))
|
|
1345
|
+
|
|
1346
|
+
return_code = result.returncode
|
|
1347
|
+
|
|
1348
|
+
except Exception as e:
|
|
1349
|
+
print(f"Error during script execution: {e}")
|
|
1350
|
+
return False
|
|
1621
1351
|
|
|
1622
|
-
|
|
1623
|
-
wizard = Wizard(params=wizard_params)
|
|
1624
|
-
result = wizard.analyze()
|
|
1352
|
+
success = return_code == 0
|
|
1625
1353
|
|
|
1626
|
-
|
|
1627
|
-
|
|
1354
|
+
if success:
|
|
1355
|
+
print("=" * 70)
|
|
1356
|
+
print("Script execution completed successfully")
|
|
1357
|
+
else:
|
|
1358
|
+
print("=" * 70)
|
|
1359
|
+
print(f"Script execution failed with return code: {return_code}")
|
|
1360
|
+
|
|
1361
|
+
return success
|
|
1628
1362
|
|
|
1629
1363
|
except Exception as e:
|
|
1630
1364
|
print(f"Failed to execute script: {e}")
|
|
@@ -1633,93 +1367,73 @@ def analyze(
|
|
|
1633
1367
|
return False
|
|
1634
1368
|
|
|
1635
1369
|
|
|
1636
|
-
|
|
1637
|
-
|
|
1370
|
+
def create_scripts(
|
|
1371
|
+
source: str = "",
|
|
1372
|
+
folder: str = "",
|
|
1373
|
+
polarity: str = "positive",
|
|
1374
|
+
adducts: Optional[List[str]] = None,
|
|
1375
|
+
num_cores: int = 0,
|
|
1376
|
+
**kwargs
|
|
1377
|
+
) -> Dict[str, Any]:
|
|
1378
|
+
"""
|
|
1379
|
+
Create analysis scripts without explicitly instantiating a Wizard.
|
|
1380
|
+
|
|
1381
|
+
This is a convenience function that creates a Wizard instance internally
|
|
1382
|
+
and calls its create_scripts() method.
|
|
1383
|
+
|
|
1384
|
+
Parameters:
|
|
1385
|
+
source: Directory containing raw data files
|
|
1386
|
+
folder: Output directory for processed study
|
|
1387
|
+
polarity: Ion polarity mode ("positive" or "negative")
|
|
1388
|
+
adducts: List of adduct specifications (auto-set if None)
|
|
1389
|
+
num_cores: Number of CPU cores (0 = auto-detect)
|
|
1390
|
+
**kwargs: Additional parameters
|
|
1638
1391
|
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
'@app.cell',
|
|
1647
|
-
'def __():',
|
|
1648
|
-
' import marimo as mo',
|
|
1649
|
-
' return (mo,)',
|
|
1650
|
-
'',
|
|
1651
|
-
'',
|
|
1652
|
-
'@app.cell',
|
|
1653
|
-
'def __(mo):',
|
|
1654
|
-
' mo.md(r"""',
|
|
1655
|
-
' # MASSter Interactive Analysis',
|
|
1656
|
-
' ',
|
|
1657
|
-
f' **Source:** {source_info.get("number_of_files", 0)} files ({", ".join(source_info.get("file_types", []))}) detected',
|
|
1658
|
-
f' **Polarity:** {source_info.get("polarity", "unknown")} (auto-detected)',
|
|
1659
|
-
f' **Acquisition length:** ~{source_info.get("length_minutes", 0.0):.1f} minutes per file',
|
|
1660
|
-
' ',
|
|
1661
|
-
' This notebook provides interactive exploration of your processed mass spectrometry study.',
|
|
1662
|
-
' Make sure you have run `python 1_masster_workflow.py` first to create the sample5 files.',
|
|
1663
|
-
' """)',
|
|
1664
|
-
'',
|
|
1665
|
-
'',
|
|
1666
|
-
'@app.cell',
|
|
1667
|
-
'def __():',
|
|
1668
|
-
' # Import masster',
|
|
1669
|
-
' import masster',
|
|
1670
|
-
' return (masster,)',
|
|
1671
|
-
'',
|
|
1672
|
-
'',
|
|
1673
|
-
'@app.cell',
|
|
1674
|
-
'def __(masster):',
|
|
1675
|
-
' # Load the study from sample5 files',
|
|
1676
|
-
' study = masster.Study(folder=".")',
|
|
1677
|
-
' return (study,)',
|
|
1678
|
-
'',
|
|
1679
|
-
'',
|
|
1680
|
-
'@app.cell',
|
|
1681
|
-
'def __(mo, study):',
|
|
1682
|
-
' # Display study information',
|
|
1683
|
-
' study.info()',
|
|
1684
|
-
' return ()',
|
|
1685
|
-
'',
|
|
1686
|
-
'',
|
|
1687
|
-
'if __name__ == "__main__":',
|
|
1688
|
-
' app.run()',
|
|
1689
|
-
]
|
|
1392
|
+
Returns:
|
|
1393
|
+
Dictionary containing:
|
|
1394
|
+
- status: "success" or "error"
|
|
1395
|
+
- message: Status message
|
|
1396
|
+
- instructions: List of next steps
|
|
1397
|
+
- files_created: List of created file paths
|
|
1398
|
+
- source_info: Metadata about source files
|
|
1690
1399
|
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
" - CHROM_FWHM (adjust based on your chromatography peak width)",
|
|
1706
|
-
"",
|
|
1707
|
-
"2. EXECUTE SAMPLE PROCESSING:",
|
|
1708
|
-
" python 1_masster_workflow.py",
|
|
1709
|
-
" (This will process all raw files to sample5 format)",
|
|
1710
|
-
"",
|
|
1711
|
-
"3. INTERACTIVE ANALYSIS:",
|
|
1712
|
-
" uv run marimo edit 2_interactive_analysis.py",
|
|
1713
|
-
" (This opens an interactive notebook for data exploration)",
|
|
1714
|
-
"",
|
|
1715
|
-
"FILES CREATED:"
|
|
1716
|
-
]
|
|
1400
|
+
Example:
|
|
1401
|
+
>>> import masster.wizard
|
|
1402
|
+
>>> result = masster.wizard.create_scripts(
|
|
1403
|
+
... source=r'D:\\Data\\raw_files',
|
|
1404
|
+
... folder=r'D:\\Data\\output',
|
|
1405
|
+
... polarity='negative'
|
|
1406
|
+
... )
|
|
1407
|
+
>>> print("Status:", result["status"])
|
|
1408
|
+
"""
|
|
1409
|
+
|
|
1410
|
+
try:
|
|
1411
|
+
# Auto-detect optimal number of cores if not specified
|
|
1412
|
+
if num_cores <= 0:
|
|
1413
|
+
num_cores = max(1, int(multiprocessing.cpu_count() * 0.75))
|
|
1717
1414
|
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1415
|
+
# Create Wizard instance
|
|
1416
|
+
wizard = Wizard(
|
|
1417
|
+
source=source,
|
|
1418
|
+
folder=folder,
|
|
1419
|
+
polarity=polarity,
|
|
1420
|
+
adducts=adducts,
|
|
1421
|
+
num_cores=num_cores,
|
|
1422
|
+
**kwargs
|
|
1423
|
+
)
|
|
1424
|
+
|
|
1425
|
+
# Call the instance method
|
|
1426
|
+
return wizard.create_scripts()
|
|
1427
|
+
|
|
1428
|
+
except Exception as e:
|
|
1429
|
+
return {
|
|
1430
|
+
"status": "error",
|
|
1431
|
+
"message": f"Failed to create scripts: {e}",
|
|
1432
|
+
"instructions": [],
|
|
1433
|
+
"files_created": [],
|
|
1434
|
+
"source_info": {}
|
|
1435
|
+
}
|
|
1722
1436
|
|
|
1723
1437
|
|
|
1724
1438
|
# Export the main classes and functions
|
|
1725
|
-
__all__ = ["Wizard", "wizard_def", "
|
|
1439
|
+
__all__ = ["Wizard", "wizard_def", "create_scripts"]
|