masster 0.5.14__py3-none-any.whl → 0.5.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +1 -1
- masster/_version.py +1 -1
- masster/sample/adducts.py +8 -5
- masster/sample/processing.py +6 -0
- masster/study/id.py +4 -3
- masster/study/plot.py +3 -0
- masster/wizard/__init__.py +2 -2
- masster/wizard/wizard.py +544 -437
- {masster-0.5.14.dist-info → masster-0.5.16.dist-info}/METADATA +1 -1
- {masster-0.5.14.dist-info → masster-0.5.16.dist-info}/RECORD +13 -13
- {masster-0.5.14.dist-info → masster-0.5.16.dist-info}/WHEEL +0 -0
- {masster-0.5.14.dist-info → masster-0.5.16.dist-info}/entry_points.txt +0 -0
- {masster-0.5.14.dist-info → masster-0.5.16.dist-info}/licenses/LICENSE +0 -0
masster/wizard/wizard.py
CHANGED
|
@@ -228,10 +228,9 @@ class Wizard:
|
|
|
228
228
|
that process raw MS data through the complete pipeline: file discovery, feature
|
|
229
229
|
detection, sample processing, study assembly, alignment, merging, and export.
|
|
230
230
|
|
|
231
|
-
This simplified version focuses on
|
|
232
|
-
-
|
|
233
|
-
-
|
|
234
|
-
- analyze(): Create and run analysis scripts with interactive notebook
|
|
231
|
+
This simplified version focuses on two core functions:
|
|
232
|
+
- create_scripts(): Generate standalone analysis scripts
|
|
233
|
+
- execute(): Create and run analysis scripts
|
|
235
234
|
"""
|
|
236
235
|
|
|
237
236
|
def __init__(
|
|
@@ -240,7 +239,7 @@ class Wizard:
|
|
|
240
239
|
folder: str = "",
|
|
241
240
|
polarity: str = "positive",
|
|
242
241
|
adducts: Optional[List[str]] = None,
|
|
243
|
-
num_cores: int =
|
|
242
|
+
num_cores: int = 6,
|
|
244
243
|
**kwargs
|
|
245
244
|
):
|
|
246
245
|
"""
|
|
@@ -350,101 +349,525 @@ class Wizard:
|
|
|
350
349
|
"""Get the adduct specifications."""
|
|
351
350
|
return self.params.adducts
|
|
352
351
|
|
|
353
|
-
def
|
|
352
|
+
def create_scripts(self) -> Dict[str, Any]:
|
|
354
353
|
"""
|
|
355
|
-
Generate
|
|
354
|
+
Generate analysis scripts based on source file analysis.
|
|
355
|
+
|
|
356
|
+
This method:
|
|
357
|
+
1. Analyzes the source files to extract metadata
|
|
358
|
+
2. Creates 1_masster_workflow.py with sample processing logic
|
|
359
|
+
3. Creates 2_interactive_analysis.py marimo notebook for study exploration
|
|
360
|
+
4. Returns instructions for next steps
|
|
356
361
|
|
|
357
|
-
Parameters:
|
|
358
|
-
filename: Name for the generated script file
|
|
359
|
-
|
|
360
362
|
Returns:
|
|
361
|
-
|
|
363
|
+
Dictionary containing:
|
|
364
|
+
- status: "success" or "error"
|
|
365
|
+
- message: Status message
|
|
366
|
+
- instructions: List of next steps
|
|
367
|
+
- files_created: List of created file paths
|
|
368
|
+
- source_info: Metadata about source files
|
|
362
369
|
"""
|
|
363
370
|
try:
|
|
364
|
-
|
|
365
|
-
|
|
371
|
+
# Step 1: Analyze source files to extract metadata
|
|
372
|
+
source_info = self._analyze_source_files()
|
|
366
373
|
|
|
367
|
-
|
|
368
|
-
|
|
374
|
+
# Update wizard parameters based on detected metadata
|
|
375
|
+
if source_info.get('polarity') and source_info['polarity'] != 'positive':
|
|
376
|
+
self.params.polarity = source_info['polarity']
|
|
369
377
|
|
|
370
|
-
|
|
371
|
-
return True
|
|
378
|
+
files_created = []
|
|
372
379
|
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
def create_notebook(self, filename: str = "interactive.py") -> bool:
|
|
378
|
-
"""
|
|
379
|
-
Generate a marimo notebook for interactive analysis of the study.
|
|
380
|
-
|
|
381
|
-
Parameters:
|
|
382
|
-
filename: Name for the generated notebook file
|
|
380
|
+
# Step 2: Create 1_masster_workflow.py
|
|
381
|
+
workflow_script_path = self.folder_path / "1_masster_workflow.py"
|
|
382
|
+
workflow_content = self._generate_workflow_script_content(source_info)
|
|
383
383
|
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
384
|
+
with open(workflow_script_path, 'w', encoding='utf-8') as f:
|
|
385
|
+
f.write(workflow_content)
|
|
386
|
+
files_created.append(str(workflow_script_path))
|
|
387
|
+
|
|
388
|
+
# Step 3: Create 2_interactive_analysis.py marimo notebook
|
|
389
|
+
notebook_path = self.folder_path / "2_interactive_analysis.py"
|
|
390
|
+
notebook_content = self._generate_interactive_notebook_content(source_info)
|
|
390
391
|
|
|
391
392
|
with open(notebook_path, 'w', encoding='utf-8') as f:
|
|
392
393
|
f.write(notebook_content)
|
|
394
|
+
files_created.append(str(notebook_path))
|
|
395
|
+
|
|
396
|
+
# Step 4: Generate instructions
|
|
397
|
+
instructions = self._generate_instructions(source_info, files_created)
|
|
393
398
|
|
|
394
|
-
|
|
395
|
-
|
|
399
|
+
return {
|
|
400
|
+
"status": "success",
|
|
401
|
+
"message": f"Successfully created {len(files_created)} script files",
|
|
402
|
+
"instructions": instructions,
|
|
403
|
+
"files_created": files_created,
|
|
404
|
+
"source_info": source_info
|
|
405
|
+
}
|
|
396
406
|
|
|
397
407
|
except Exception as e:
|
|
398
|
-
|
|
399
|
-
|
|
408
|
+
return {
|
|
409
|
+
"status": "error",
|
|
410
|
+
"message": f"Failed to create scripts: {e}",
|
|
411
|
+
"instructions": [],
|
|
412
|
+
"files_created": [],
|
|
413
|
+
"source_info": {}
|
|
414
|
+
}
|
|
400
415
|
|
|
401
|
-
def
|
|
402
|
-
"""
|
|
403
|
-
|
|
416
|
+
def _analyze_source_files(self) -> Dict[str, Any]:
|
|
417
|
+
"""Analyze source files to extract metadata."""
|
|
418
|
+
result = {
|
|
419
|
+
"number_of_files": 0,
|
|
420
|
+
"file_types": [],
|
|
421
|
+
"polarity": "positive",
|
|
422
|
+
"length_minutes": 0.0,
|
|
423
|
+
"first_file": None
|
|
424
|
+
}
|
|
404
425
|
|
|
405
|
-
|
|
406
|
-
|
|
426
|
+
try:
|
|
427
|
+
# Find raw data files
|
|
428
|
+
extensions = [".wiff", ".raw", ".mzML"]
|
|
429
|
+
raw_files = []
|
|
407
430
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
431
|
+
for ext in extensions:
|
|
432
|
+
pattern = f"**/*{ext}"
|
|
433
|
+
files = list(self.source_path.rglob(pattern))
|
|
434
|
+
if files:
|
|
435
|
+
raw_files.extend(files)
|
|
436
|
+
if ext not in result["file_types"]:
|
|
437
|
+
result["file_types"].append(ext)
|
|
438
|
+
|
|
439
|
+
result["number_of_files"] = len(raw_files)
|
|
414
440
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
441
|
+
if raw_files:
|
|
442
|
+
result["first_file"] = str(raw_files[0])
|
|
443
|
+
# Simple heuristic: assume 30 minutes per file if we can't determine
|
|
444
|
+
result["length_minutes"] = 30.0
|
|
445
|
+
|
|
446
|
+
except Exception as e:
|
|
447
|
+
print(f"Warning: Could not analyze source files: {e}")
|
|
418
448
|
|
|
419
|
-
|
|
420
|
-
|
|
449
|
+
return result
|
|
450
|
+
|
|
451
|
+
def _generate_workflow_script_content(self, source_info: Dict[str, Any]) -> str:
|
|
452
|
+
"""Generate the content for 1_masster_workflow.py script."""
|
|
421
453
|
|
|
454
|
+
script_lines = [
|
|
455
|
+
'#!/usr/bin/env python3',
|
|
456
|
+
'"""',
|
|
457
|
+
'Automated Mass Spectrometry Data Analysis Pipeline',
|
|
458
|
+
'Generated by masster wizard',
|
|
459
|
+
'"""',
|
|
460
|
+
'',
|
|
461
|
+
'import sys',
|
|
462
|
+
'import time',
|
|
463
|
+
'from pathlib import Path',
|
|
464
|
+
'',
|
|
465
|
+
'# Import masster modules',
|
|
466
|
+
'from masster.study import Study',
|
|
467
|
+
'from masster import __version__',
|
|
468
|
+
'',
|
|
469
|
+
'# Analysis parameters',
|
|
470
|
+
'PARAMS = {',
|
|
471
|
+
' # === Core Configuration ===',
|
|
472
|
+
f' "source": {str(self.source_path)!r}, # Directory containing raw data files',
|
|
473
|
+
f' "folder": {str(self.folder_path)!r}, # Output directory for processed study',
|
|
474
|
+
f' "polarity": {self.params.polarity!r}, # Ion polarity mode ("positive" or "negative")',
|
|
475
|
+
f' "num_cores": {self.params.num_cores}, # Number of CPU cores for parallel processing',
|
|
476
|
+
'',
|
|
477
|
+
' # === File Discovery ===',
|
|
478
|
+
f' "file_extensions": {self.params.file_extensions!r}, # File extensions to search for',
|
|
479
|
+
f' "search_subfolders": {self.params.search_subfolders}, # Whether to search subdirectories recursively',
|
|
480
|
+
f' "skip_patterns": {self.params.skip_patterns!r}, # Filename patterns to skip',
|
|
481
|
+
'',
|
|
482
|
+
' # === Processing Parameters ===',
|
|
483
|
+
f' "adducts": {self.params.adducts!r}, # Adduct specifications for feature detection and annotation',
|
|
484
|
+
f' "noise": {self.params.noise}, # Noise threshold for feature detection',
|
|
485
|
+
f' "chrom_fwhm": {self.params.chrom_fwhm}, # Chromatographic peak full width at half maximum (seconds)',
|
|
486
|
+
f' "chrom_peak_snr": {self.params.chrom_peak_snr}, # Minimum signal-to-noise ratio for chromatographic peaks',
|
|
487
|
+
'',
|
|
488
|
+
' # === Alignment & Merging ===',
|
|
489
|
+
f' "rt_tol": {self.params.rt_tolerance}, # Retention time tolerance for alignment (seconds)',
|
|
490
|
+
f' "mz_tol": {self.params.mz_max_diff}, # Mass-to-charge ratio tolerance for alignment (Da)',
|
|
491
|
+
f' "alignment_method": {self.params.alignment_algorithm!r}, # Algorithm for sample alignment',
|
|
492
|
+
f' "min_samples_per_feature": {self.params.min_samples_for_merge}, # Minimum samples required per consensus feature',
|
|
493
|
+
f' "merge_method": {self.params.merge_method!r}, # Method for merging consensus features',
|
|
494
|
+
'',
|
|
495
|
+
' # === Sample Processing (used in add_samples_from_folder) ===',
|
|
496
|
+
f' "batch_size": {self.params.batch_size}, # Number of files to process per batch',
|
|
497
|
+
f' "memory_limit_gb": {self.params.memory_limit_gb}, # Memory limit for processing (GB)',
|
|
498
|
+
'',
|
|
499
|
+
' # === Script Options ===',
|
|
500
|
+
f' "resume_enabled": {self.params.resume_enabled}, # Enable automatic resume capability',
|
|
501
|
+
f' "force_reprocess": {self.params.force_reprocess}, # Force reprocessing of existing files',
|
|
502
|
+
f' "cleanup_temp_files": {self.params.cleanup_temp_files}, # Clean up temporary files after processing',
|
|
503
|
+
'}',
|
|
504
|
+
'',
|
|
505
|
+
'',
|
|
506
|
+
'def discover_raw_files(source_folder, file_extensions, search_subfolders=True):',
|
|
507
|
+
' """Discover raw data files in the source folder."""',
|
|
508
|
+
' source_path = Path(source_folder)',
|
|
509
|
+
' raw_files = []',
|
|
510
|
+
' ',
|
|
511
|
+
' for ext in file_extensions:',
|
|
512
|
+
' if search_subfolders:',
|
|
513
|
+
' pattern = f"**/*{ext}"',
|
|
514
|
+
' files = list(source_path.rglob(pattern))',
|
|
515
|
+
' else:',
|
|
516
|
+
' pattern = f"*{ext}"',
|
|
517
|
+
' files = list(source_path.glob(pattern))',
|
|
518
|
+
' raw_files.extend(files)',
|
|
519
|
+
' ',
|
|
520
|
+
' return raw_files',
|
|
521
|
+
'',
|
|
522
|
+
'',
|
|
523
|
+
'def process_single_file(args):',
|
|
524
|
+
' """Process a single raw file to sample5 format - module level for multiprocessing."""',
|
|
525
|
+
' raw_file, output_folder = args',
|
|
526
|
+
' from masster.sample import Sample',
|
|
527
|
+
' ',
|
|
528
|
+
' try:',
|
|
529
|
+
' # Create sample5 filename',
|
|
530
|
+
' sample_name = raw_file.stem',
|
|
531
|
+
' sample5_path = Path(output_folder) / f"{sample_name}.sample5"',
|
|
532
|
+
' ',
|
|
533
|
+
' # Skip if sample5 already exists',
|
|
534
|
+
' if sample5_path.exists() and not PARAMS["force_reprocess"]:',
|
|
535
|
+
' print(f" Skipping {raw_file.name} (sample5 already exists)")',
|
|
536
|
+
' return str(sample5_path)',
|
|
537
|
+
' ',
|
|
538
|
+
' print(f" Converting {raw_file.name}...")',
|
|
539
|
+
' ',
|
|
540
|
+
' # Load and process raw file with full pipeline',
|
|
541
|
+
' sample = Sample(log_label=sample_name)',
|
|
542
|
+
' sample.load(filename=str(raw_file))',
|
|
543
|
+
' sample.find_features(',
|
|
544
|
+
' noise=PARAMS["noise"],',
|
|
545
|
+
' chrom_fwhm=PARAMS["chrom_fwhm"],',
|
|
546
|
+
' chrom_peak_snr=PARAMS["chrom_peak_snr"]',
|
|
547
|
+
' )',
|
|
548
|
+
' sample.find_ms2()',
|
|
549
|
+
' sample.find_iso()',
|
|
550
|
+
' # sample.export_mgf()',
|
|
551
|
+
' # sample.plot_2d(filename=f"{sample5_path.replace(".sample5", ".html")}")',
|
|
552
|
+
' sample.save(str(sample5_path))',
|
|
553
|
+
' ',
|
|
554
|
+
' # print(f" Completed {raw_file.name} -> {sample5_path.name}")',
|
|
555
|
+
' return str(sample5_path)',
|
|
556
|
+
' ',
|
|
557
|
+
' except Exception as e:',
|
|
558
|
+
' print(f" ERROR processing {raw_file.name}: {e}")',
|
|
559
|
+
' return None',
|
|
560
|
+
'',
|
|
561
|
+
'',
|
|
562
|
+
'def convert_raw_to_sample5(raw_files, output_folder, polarity, num_cores):',
|
|
563
|
+
' """Convert raw data files to sample5 format."""',
|
|
564
|
+
' import concurrent.futures',
|
|
565
|
+
' import os',
|
|
566
|
+
' ',
|
|
567
|
+
' # Create output directory',
|
|
568
|
+
' os.makedirs(output_folder, exist_ok=True)',
|
|
569
|
+
' ',
|
|
570
|
+
' # Prepare arguments for multiprocessing',
|
|
571
|
+
' file_args = [(raw_file, output_folder) for raw_file in raw_files]',
|
|
572
|
+
' ',
|
|
573
|
+
' # Process files in parallel',
|
|
574
|
+
' sample5_files = []',
|
|
575
|
+
' with concurrent.futures.ProcessPoolExecutor(max_workers=num_cores) as executor:',
|
|
576
|
+
' futures = [executor.submit(process_single_file, args) for args in file_args]',
|
|
577
|
+
' ',
|
|
578
|
+
' for future in concurrent.futures.as_completed(futures):',
|
|
579
|
+
' result = future.result()',
|
|
580
|
+
' if result:',
|
|
581
|
+
' sample5_files.append(result)',
|
|
582
|
+
' ',
|
|
583
|
+
' return sample5_files',
|
|
584
|
+
'',
|
|
585
|
+
'',
|
|
586
|
+
'def main():',
|
|
587
|
+
' """Main analysis pipeline."""',
|
|
588
|
+
' try:',
|
|
589
|
+
' print("=" * 70)',
|
|
590
|
+
f' print("masster {version} - Automated MS Data Analysis")',
|
|
591
|
+
' print("=" * 70)',
|
|
592
|
+
' print(f"Source: {PARAMS[\'source\']}")',
|
|
593
|
+
' print(f"Output: {PARAMS[\'folder\']}")',
|
|
594
|
+
' print(f"Polarity: {PARAMS[\'polarity\']}")',
|
|
595
|
+
' print(f"CPU Cores: {PARAMS[\'num_cores\']}")',
|
|
596
|
+
' print("=" * 70)',
|
|
597
|
+
' ',
|
|
598
|
+
' start_time = time.time()',
|
|
599
|
+
' ',
|
|
600
|
+
' # Step 1: Discover raw data files',
|
|
601
|
+
' print("\\nStep 1/7: Discovering raw data files...")',
|
|
602
|
+
' raw_files = discover_raw_files(',
|
|
603
|
+
' PARAMS[\'source\'],',
|
|
604
|
+
' PARAMS[\'file_extensions\'],',
|
|
605
|
+
' PARAMS[\'search_subfolders\']',
|
|
606
|
+
' )',
|
|
607
|
+
' ',
|
|
608
|
+
' if not raw_files:',
|
|
609
|
+
' print("No raw data files found!")',
|
|
610
|
+
' return False',
|
|
611
|
+
' ',
|
|
612
|
+
' print(f"Found {len(raw_files)} raw data files")',
|
|
613
|
+
' for f in raw_files[:5]: # Show first 5 files',
|
|
614
|
+
' print(f" {f.name}")',
|
|
615
|
+
' if len(raw_files) > 5:',
|
|
616
|
+
' print(f" ... and {len(raw_files) - 5} more")',
|
|
617
|
+
' ',
|
|
618
|
+
' # Step 2: Process raw files',
|
|
619
|
+
' print("\\nStep 2/7: Processing raw files...")',
|
|
620
|
+
' sample5_files = convert_raw_to_sample5(',
|
|
621
|
+
' raw_files,',
|
|
622
|
+
' PARAMS[\'folder\'],',
|
|
623
|
+
' PARAMS[\'polarity\'],',
|
|
624
|
+
' PARAMS[\'num_cores\']',
|
|
625
|
+
' )',
|
|
626
|
+
' ',
|
|
627
|
+
' if not sample5_files:',
|
|
628
|
+
' print("No sample5 files were created!")',
|
|
629
|
+
' return False',
|
|
630
|
+
' ',
|
|
631
|
+
' print(f"Successfully processed {len(sample5_files)} files to sample5")',
|
|
632
|
+
' ',
|
|
633
|
+
' # Step 3: Create and configure study',
|
|
634
|
+
' print("\\nStep 3/7: Initializing study...")',
|
|
635
|
+
' study = Study(folder=PARAMS[\'folder\'])',
|
|
636
|
+
' study.polarity = PARAMS[\'polarity\']',
|
|
637
|
+
' study.adducts = PARAMS[\'adducts\']',
|
|
638
|
+
' ',
|
|
639
|
+
' # Step 4: Add sample5 files to study',
|
|
640
|
+
' print("\\nStep 4/7: Adding samples to study...")',
|
|
641
|
+
' study.add(str(Path(PARAMS[\'folder\']) / "*.sample5"))',
|
|
642
|
+
' study.features_filter(study.features_select(chrom_coherence=0.1, chrom_prominence_scaled=1))',
|
|
643
|
+
' ',
|
|
644
|
+
' # Step 5: Core processing',
|
|
645
|
+
' print("\\nStep 5/7: Processing...")',
|
|
646
|
+
' study.align(',
|
|
647
|
+
' algorithm=PARAMS[\'alignment_method\'],',
|
|
648
|
+
' rt_tol=PARAMS[\'rt_tol\']',
|
|
649
|
+
' )',
|
|
650
|
+
' ',
|
|
651
|
+
' study.merge(',
|
|
652
|
+
' method="qt",',
|
|
653
|
+
' min_samples=PARAMS[\'min_samples_per_feature\'],',
|
|
654
|
+
' threads=PARAMS[\'num_cores\'],',
|
|
655
|
+
' rt_tol=PARAMS[\'rt_tol\']',
|
|
656
|
+
' )',
|
|
657
|
+
' study.find_iso()',
|
|
658
|
+
' study.fill()',
|
|
659
|
+
' study.integrate()',
|
|
660
|
+
' ',
|
|
661
|
+
' # Step 6/7: Saving results',
|
|
662
|
+
' print("\\nStep 6/7: Saving results...")',
|
|
663
|
+
' study.save()',
|
|
664
|
+
' study.export_xlsx()',
|
|
665
|
+
' study.export_mgf()',
|
|
666
|
+
' study.export_mztab()',
|
|
667
|
+
' ',
|
|
668
|
+
' # Step 7: Plots',
|
|
669
|
+
' print("\\nStep 7/7: Exporting plots...")',
|
|
670
|
+
' study.plot_consensus_2d(filename="consensus.html")',
|
|
671
|
+
' study.plot_consensus_2d(filename="consensus.png")',
|
|
672
|
+
' study.plot_alignment(filename="alignment.html")',
|
|
673
|
+
' study.plot_alignment(filename="alignment.png")',
|
|
674
|
+
' study.plot_samples_pca(filename="pca.html")',
|
|
675
|
+
' study.plot_samples_pca(filename="pca.png")',
|
|
676
|
+
' study.plot_bpc(filename="bpc.html")',
|
|
677
|
+
' study.plot_bpc(filename="bpc.png")',
|
|
678
|
+
' study.plot_rt_correction(filename="rt_correction.html")',
|
|
679
|
+
' study.plot_rt_correction(filename="rt_correction.png")',
|
|
680
|
+
' ',
|
|
681
|
+
' # Print summary',
|
|
682
|
+
' study.info()',
|
|
683
|
+
' total_time = time.time() - start_time',
|
|
684
|
+
' print("\\n" + "=" * 70)',
|
|
685
|
+
' print("ANALYSIS COMPLETE")',
|
|
686
|
+
' print("=" * 70)',
|
|
687
|
+
' print(f"Total processing time: {total_time:.1f} seconds ({total_time/60:.1f} minutes)")',
|
|
688
|
+
' print(f"Raw files processed: {len(raw_files)}")',
|
|
689
|
+
' print(f"Sample5 files created: {len(sample5_files)}")',
|
|
690
|
+
' if hasattr(study, "consensus_df"):',
|
|
691
|
+
' print(f"Consensus features generated: {len(study.consensus_df)}")',
|
|
692
|
+
' print("=" * 70)',
|
|
693
|
+
' ',
|
|
694
|
+
' return True',
|
|
695
|
+
' ',
|
|
696
|
+
' except KeyboardInterrupt:',
|
|
697
|
+
' print("\\nAnalysis interrupted by user")',
|
|
698
|
+
' return False',
|
|
699
|
+
' except Exception as e:',
|
|
700
|
+
' print(f"Analysis failed with error: {e}")',
|
|
701
|
+
' import traceback',
|
|
702
|
+
' traceback.print_exc()',
|
|
703
|
+
' return False',
|
|
704
|
+
'',
|
|
705
|
+
'',
|
|
706
|
+
'if __name__ == "__main__":',
|
|
707
|
+
' success = main()',
|
|
708
|
+
' sys.exit(0 if success else 1)',
|
|
709
|
+
]
|
|
710
|
+
|
|
711
|
+
return '\n'.join(script_lines)
|
|
712
|
+
|
|
713
|
+
def _generate_interactive_notebook_content(self, source_info: Dict[str, Any]) -> str:
|
|
714
|
+
"""Generate the content for 2_interactive_analysis.py marimo notebook."""
|
|
715
|
+
|
|
716
|
+
notebook_lines = [
|
|
717
|
+
'import marimo',
|
|
718
|
+
'',
|
|
719
|
+
'__generated_with = "0.9.14"',
|
|
720
|
+
'app = marimo.App(width="medium")',
|
|
721
|
+
'',
|
|
722
|
+
'@app.cell',
|
|
723
|
+
'def __():',
|
|
724
|
+
' import marimo as mo',
|
|
725
|
+
' return (mo,)',
|
|
726
|
+
'',
|
|
727
|
+
'@app.cell',
|
|
728
|
+
'def __(mo):',
|
|
729
|
+
' mo.md(r"""',
|
|
730
|
+
' # MASSter Interactive Analysis',
|
|
731
|
+
' ',
|
|
732
|
+
f' **Source:** {source_info.get("number_of_files", 0)} files detected',
|
|
733
|
+
f' **Polarity:** {source_info.get("polarity", "unknown")}',
|
|
734
|
+
' ',
|
|
735
|
+
' This notebook provides interactive exploration of your processed study.',
|
|
736
|
+
' Make sure you have run `python 1_masster_workflow.py` first.',
|
|
737
|
+
' """)',
|
|
738
|
+
' return ()',
|
|
739
|
+
'',
|
|
740
|
+
'@app.cell',
|
|
741
|
+
'def __():',
|
|
742
|
+
' import masster',
|
|
743
|
+
' return (masster,)',
|
|
744
|
+
'',
|
|
745
|
+
'@app.cell',
|
|
746
|
+
'def __(masster):',
|
|
747
|
+
' study = masster.Study(folder=".")',
|
|
748
|
+
' return (study,)',
|
|
749
|
+
'',
|
|
750
|
+
'@app.cell',
|
|
751
|
+
'def __(study):',
|
|
752
|
+
' study.info()',
|
|
753
|
+
' return ()',
|
|
754
|
+
'',
|
|
755
|
+
'if __name__ == "__main__":',
|
|
756
|
+
' app.run()',
|
|
757
|
+
]
|
|
758
|
+
|
|
759
|
+
return '\n'.join(notebook_lines)
|
|
760
|
+
|
|
761
|
+
def _generate_instructions(self, source_info: Dict[str, Any], files_created: List[str]) -> List[str]:
|
|
762
|
+
"""Generate usage instructions for the created scripts."""
|
|
763
|
+
instructions = [f"Source analysis: {source_info.get('number_of_files', 0)} files found",
|
|
764
|
+
f"Polarity detected: {source_info.get('polarity', 'unknown')}",
|
|
765
|
+
"Files created:"]
|
|
766
|
+
for file_path in files_created:
|
|
767
|
+
instructions.append(f" ✅ {str(Path(file_path).resolve())}")
|
|
768
|
+
|
|
769
|
+
# Find the workflow script name from created files
|
|
770
|
+
workflow_script_name = "1_masster_workflow.py"
|
|
771
|
+
for file_path in files_created:
|
|
772
|
+
if Path(file_path).name == "1_masster_workflow.py":
|
|
773
|
+
workflow_script_name = Path(file_path).name
|
|
774
|
+
break
|
|
775
|
+
|
|
776
|
+
instructions.extend([
|
|
777
|
+
"",
|
|
778
|
+
"Next steps:",
|
|
779
|
+
f"1. REVIEW PARAMETERS in {workflow_script_name}:",
|
|
780
|
+
f" In particular, verify the NOISE, CHROM_FWHM, and MIN_SAMPLES_FOR_MERGE",
|
|
781
|
+
"",
|
|
782
|
+
"2. EXECUTE SAMPLE PROCESSING:",
|
|
783
|
+
f" uv run python {workflow_script_name}",
|
|
784
|
+
"",
|
|
785
|
+
"3. INTERACTIVE ANALYSIS:",
|
|
786
|
+
f" uv run marimo edit {Path('2_interactive_analysis.py').name}",
|
|
787
|
+
""]
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
return instructions
|
|
791
|
+
|
|
792
|
+
def execute(self) -> Dict[str, Any]:
|
|
793
|
+
"""
|
|
794
|
+
Execute the sample processing workflow.
|
|
795
|
+
|
|
796
|
+
This method:
|
|
797
|
+
1. Creates scripts if they don't exist (calls create_scripts())
|
|
798
|
+
2. Runs the 1_masster_workflow.py script to process raw files
|
|
799
|
+
|
|
800
|
+
Returns:
|
|
801
|
+
Dictionary containing:
|
|
802
|
+
- status: "success", "error", or "scripts_created"
|
|
803
|
+
- message: Status message
|
|
804
|
+
- instructions: List of next steps
|
|
805
|
+
- files_created: List of created file paths (if scripts were created)
|
|
806
|
+
"""
|
|
422
807
|
try:
|
|
423
|
-
|
|
808
|
+
workflow_script_path = self.folder_path / "1_masster_workflow.py"
|
|
809
|
+
|
|
810
|
+
# Check if workflow script exists, create it if not
|
|
811
|
+
if not workflow_script_path.exists():
|
|
812
|
+
print("📝 Workflow script not found, creating scripts first...")
|
|
813
|
+
result = self.create_scripts()
|
|
814
|
+
if result["status"] != "success":
|
|
815
|
+
return result
|
|
816
|
+
|
|
817
|
+
print("✅ Scripts created successfully")
|
|
818
|
+
print(f"📁 Output folder: {self.folder_path}")
|
|
819
|
+
|
|
820
|
+
# Execute the workflow script
|
|
821
|
+
print(f"🚀 Executing sample processing workflow...")
|
|
822
|
+
print(f"📄 Running: {workflow_script_path.name}")
|
|
823
|
+
print("=" * 60)
|
|
424
824
|
|
|
425
825
|
import subprocess
|
|
426
826
|
result = subprocess.run([
|
|
427
|
-
sys.executable, str(
|
|
428
|
-
], cwd=str(self.folder_path)
|
|
827
|
+
sys.executable, str(workflow_script_path)
|
|
828
|
+
], cwd=str(self.folder_path))
|
|
429
829
|
|
|
430
830
|
success = result.returncode == 0
|
|
431
831
|
|
|
432
832
|
if success:
|
|
433
|
-
print("=" *
|
|
434
|
-
print("
|
|
435
|
-
print("
|
|
436
|
-
print("
|
|
437
|
-
print(
|
|
438
|
-
|
|
833
|
+
print("=" * 60)
|
|
834
|
+
print("✅ Sample processing completed successfully!")
|
|
835
|
+
print("📋 Next step: Run interactive analysis")
|
|
836
|
+
print(" uv run marimo edit 2_interactive_analysis.py")
|
|
837
|
+
print("=" * 60)
|
|
838
|
+
|
|
839
|
+
return {
|
|
840
|
+
"status": "success",
|
|
841
|
+
"message": "Sample processing completed successfully",
|
|
842
|
+
"instructions": [
|
|
843
|
+
"✅ Sample processing completed",
|
|
844
|
+
"Next: uv run marimo edit 2_interactive_analysis.py"
|
|
845
|
+
],
|
|
846
|
+
"files_created": []
|
|
847
|
+
}
|
|
439
848
|
else:
|
|
440
|
-
|
|
441
|
-
|
|
849
|
+
return {
|
|
850
|
+
"status": "error",
|
|
851
|
+
"message": f"Workflow execution failed with return code {result.returncode}",
|
|
852
|
+
"instructions": [
|
|
853
|
+
"❌ Check the error messages above",
|
|
854
|
+
"Review parameters in 1_masster_workflow.py",
|
|
855
|
+
f"Try running manually: python {workflow_script_path.name}"
|
|
856
|
+
],
|
|
857
|
+
"files_created": []
|
|
858
|
+
}
|
|
442
859
|
|
|
443
|
-
return success
|
|
444
|
-
|
|
445
860
|
except Exception as e:
|
|
446
|
-
|
|
447
|
-
|
|
861
|
+
return {
|
|
862
|
+
"status": "error",
|
|
863
|
+
"message": f"Failed to execute workflow: {e}",
|
|
864
|
+
"instructions": [
|
|
865
|
+
"❌ Execution failed",
|
|
866
|
+
"Check that source files exist and are accessible",
|
|
867
|
+
"Verify folder permissions"
|
|
868
|
+
],
|
|
869
|
+
"files_created": []
|
|
870
|
+
}
|
|
448
871
|
|
|
449
872
|
def _generate_script_content(self) -> str:
|
|
450
873
|
"""Generate the complete analysis script content."""
|
|
@@ -751,154 +1174,8 @@ class Wizard:
|
|
|
751
1174
|
|
|
752
1175
|
return '\n'.join(script_lines)
|
|
753
1176
|
|
|
754
|
-
def _generate_notebook_content(self) -> str:
|
|
755
|
-
"""Generate the content for a marimo interactive notebook."""
|
|
756
|
-
|
|
757
|
-
notebook_lines = [
|
|
758
|
-
'import marimo',
|
|
759
|
-
'',
|
|
760
|
-
'__generated_with = "0.9.14"',
|
|
761
|
-
'app = marimo.App(width="medium")',
|
|
762
|
-
'',
|
|
763
|
-
'',
|
|
764
|
-
'@app.cell',
|
|
765
|
-
'def __():',
|
|
766
|
-
' import marimo as mo',
|
|
767
|
-
' return (mo,)',
|
|
768
|
-
'',
|
|
769
|
-
'',
|
|
770
|
-
'@app.cell',
|
|
771
|
-
'def __(mo):',
|
|
772
|
-
' mo.md(r"""',
|
|
773
|
-
' # MASSter Interactive Analysis',
|
|
774
|
-
' ',
|
|
775
|
-
' This notebook provides interactive exploration of your mass spectrometry study results.',
|
|
776
|
-
' The study has been processed and is ready for analysis.',
|
|
777
|
-
' """)',
|
|
778
|
-
'',
|
|
779
|
-
'',
|
|
780
|
-
'@app.cell',
|
|
781
|
-
'def __():',
|
|
782
|
-
' # Import masster',
|
|
783
|
-
' import masster',
|
|
784
|
-
' return (masster,)',
|
|
785
|
-
'',
|
|
786
|
-
'',
|
|
787
|
-
'@app.cell',
|
|
788
|
-
'def __(masster):',
|
|
789
|
-
' # Load the processed study',
|
|
790
|
-
f' study = masster.Study(folder=".")',
|
|
791
|
-
' study.load()',
|
|
792
|
-
' return (study,)',
|
|
793
|
-
'',
|
|
794
|
-
'',
|
|
795
|
-
'@app.cell',
|
|
796
|
-
'def __(mo, study):',
|
|
797
|
-
' # Display study information',
|
|
798
|
-
' mo.md(f"""',
|
|
799
|
-
' ## Study Overview',
|
|
800
|
-
' ',
|
|
801
|
-
' **Samples:** {len(study.samples) if hasattr(study, "samples") else "Not loaded"}',
|
|
802
|
-
' ',
|
|
803
|
-
' **Features:** {len(study.consensus_df) if hasattr(study, "consensus_df") else "Not available"}',
|
|
804
|
-
' ',
|
|
805
|
-
' **Polarity:** {study.polarity if hasattr(study, "polarity") else "Unknown"}',
|
|
806
|
-
' """)',
|
|
807
|
-
'',
|
|
808
|
-
'',
|
|
809
|
-
'@app.cell',
|
|
810
|
-
'def __(study):',
|
|
811
|
-
' # Print detailed study info',
|
|
812
|
-
' study.info()',
|
|
813
|
-
'',
|
|
814
|
-
'',
|
|
815
|
-
'@app.cell',
|
|
816
|
-
'def __(mo):',
|
|
817
|
-
' mo.md(r"""',
|
|
818
|
-
' ## Quick Visualizations',
|
|
819
|
-
' ',
|
|
820
|
-
' Use the cells below to create interactive plots of your data.',
|
|
821
|
-
' """)',
|
|
822
|
-
'',
|
|
823
|
-
'',
|
|
824
|
-
'@app.cell',
|
|
825
|
-
'def __(study):',
|
|
826
|
-
' # Generate consensus 2D plot',
|
|
827
|
-
' if hasattr(study, "consensus_df") and len(study.consensus_df) > 0:',
|
|
828
|
-
' study.plot_consensus_2d(filename="consensus_interactive.html")',
|
|
829
|
-
' print("Consensus 2D plot saved as: consensus_interactive.html")',
|
|
830
|
-
' else:',
|
|
831
|
-
' print("No consensus features available for plotting")',
|
|
832
|
-
'',
|
|
833
|
-
'',
|
|
834
|
-
'@app.cell',
|
|
835
|
-
'def __(study):',
|
|
836
|
-
' # Generate PCA plot',
|
|
837
|
-
' if hasattr(study, "samples") and len(study.samples) > 1:',
|
|
838
|
-
' study.plot_samples_pca(filename="pca_interactive.html")',
|
|
839
|
-
' print("PCA plot saved as: pca_interactive.html")',
|
|
840
|
-
' else:',
|
|
841
|
-
' print("Not enough samples for PCA analysis")',
|
|
842
|
-
'',
|
|
843
|
-
'',
|
|
844
|
-
'@app.cell',
|
|
845
|
-
'def __(mo):',
|
|
846
|
-
' mo.md(r"""',
|
|
847
|
-
' ## Data Export',
|
|
848
|
-
' ',
|
|
849
|
-
' Export your processed data in various formats.',
|
|
850
|
-
' """)',
|
|
851
|
-
'',
|
|
852
|
-
'',
|
|
853
|
-
'@app.cell',
|
|
854
|
-
'def __(study):',
|
|
855
|
-
' # Export options',
|
|
856
|
-
' if hasattr(study, "consensus_df"):',
|
|
857
|
-
' # Export to Excel',
|
|
858
|
-
' study.export_xlsx(filename="study_results.xlsx")',
|
|
859
|
-
' print("✓ Results exported to: study_results.xlsx")',
|
|
860
|
-
' ',
|
|
861
|
-
' # Export to MGF',
|
|
862
|
-
' study.export_mgf(filename="study_spectra.mgf")',
|
|
863
|
-
' print("✓ Spectra exported to: study_spectra.mgf")',
|
|
864
|
-
' else:',
|
|
865
|
-
' print("No data available for export")',
|
|
866
|
-
'',
|
|
867
|
-
'',
|
|
868
|
-
'@app.cell',
|
|
869
|
-
'def __(mo):',
|
|
870
|
-
' mo.md(r"""',
|
|
871
|
-
' ## Custom Analysis',
|
|
872
|
-
' ',
|
|
873
|
-
' Add your own analysis code in the cells below.',
|
|
874
|
-
' """)',
|
|
875
|
-
'',
|
|
876
|
-
'',
|
|
877
|
-
'@app.cell',
|
|
878
|
-
'def __(study):',
|
|
879
|
-
' # Access consensus features dataframe',
|
|
880
|
-
' if hasattr(study, "consensus_df"):',
|
|
881
|
-
' df = study.consensus_df',
|
|
882
|
-
' print(f"Consensus features shape: {df.shape}")',
|
|
883
|
-
' print("\\nFirst 5 features:")',
|
|
884
|
-
' print(df.head())',
|
|
885
|
-
' return (df,) if "df" in locals() else ()',
|
|
886
|
-
'',
|
|
887
|
-
'',
|
|
888
|
-
'@app.cell',
|
|
889
|
-
'def __():',
|
|
890
|
-
' # Your custom analysis here',
|
|
891
|
-
' pass',
|
|
892
|
-
'',
|
|
893
|
-
'',
|
|
894
|
-
'if __name__ == "__main__":',
|
|
895
|
-
' app.run()',
|
|
896
|
-
]
|
|
897
|
-
|
|
898
|
-
return '\n'.join(notebook_lines)
|
|
899
|
-
|
|
900
1177
|
|
|
901
|
-
def
|
|
1178
|
+
def create_script(
|
|
902
1179
|
source: str,
|
|
903
1180
|
folder: str,
|
|
904
1181
|
filename: str = 'run_masster.py',
|
|
@@ -929,8 +1206,8 @@ def create_analysis(
|
|
|
929
1206
|
True if script was generated successfully, False otherwise
|
|
930
1207
|
|
|
931
1208
|
Example:
|
|
932
|
-
>>> from masster.wizard import
|
|
933
|
-
>>>
|
|
1209
|
+
>>> from masster.wizard import create_script
|
|
1210
|
+
>>> create_script(
|
|
934
1211
|
... source=r'D:\\Data\\raw_files',
|
|
935
1212
|
... folder=r'D:\\Data\\output',
|
|
936
1213
|
... filename='run_masster.py',
|
|
@@ -976,8 +1253,9 @@ def create_analysis(
|
|
|
976
1253
|
# Create a temporary Wizard instance to generate the script
|
|
977
1254
|
temp_wizard = Wizard(params=wizard_params)
|
|
978
1255
|
|
|
979
|
-
# Generate the
|
|
980
|
-
|
|
1256
|
+
# Generate the scripts using the instance method
|
|
1257
|
+
result = temp_wizard.create_scripts()
|
|
1258
|
+
success = result.get("status") == "success"
|
|
981
1259
|
|
|
982
1260
|
return success
|
|
983
1261
|
|
|
@@ -988,7 +1266,7 @@ def create_analysis(
|
|
|
988
1266
|
return False
|
|
989
1267
|
|
|
990
1268
|
|
|
991
|
-
def
|
|
1269
|
+
def execute(
|
|
992
1270
|
source: str,
|
|
993
1271
|
folder: str,
|
|
994
1272
|
filename: str = 'run_masster.py',
|
|
@@ -1001,7 +1279,7 @@ def analyze(
|
|
|
1001
1279
|
"""
|
|
1002
1280
|
Create and execute a standalone analysis script for automated MS data processing.
|
|
1003
1281
|
|
|
1004
|
-
This function generates a Python script with the same parameters as
|
|
1282
|
+
This function generates a Python script with the same parameters as create_script(),
|
|
1005
1283
|
but immediately executes it after creation. Combines script generation and execution
|
|
1006
1284
|
in a single step.
|
|
1007
1285
|
|
|
@@ -1019,8 +1297,8 @@ def analyze(
|
|
|
1019
1297
|
True if script was created and executed successfully, False otherwise
|
|
1020
1298
|
|
|
1021
1299
|
Example:
|
|
1022
|
-
>>> from masster.wizard import
|
|
1023
|
-
>>>
|
|
1300
|
+
>>> from masster.wizard import execute
|
|
1301
|
+
>>> execute(
|
|
1024
1302
|
... source=r'D:\\Data\\raw_files',
|
|
1025
1303
|
... folder=r'D:\\Data\\output',
|
|
1026
1304
|
... polarity='positive'
|
|
@@ -1028,8 +1306,8 @@ def analyze(
|
|
|
1028
1306
|
"""
|
|
1029
1307
|
|
|
1030
1308
|
try:
|
|
1031
|
-
# First, create the script using
|
|
1032
|
-
script_created =
|
|
1309
|
+
# First, create the script using create_script()
|
|
1310
|
+
script_created = create_script(
|
|
1033
1311
|
source=source,
|
|
1034
1312
|
folder=folder,
|
|
1035
1313
|
filename=filename,
|
|
@@ -1089,244 +1367,73 @@ def analyze(
|
|
|
1089
1367
|
return False
|
|
1090
1368
|
|
|
1091
1369
|
|
|
1092
|
-
def
|
|
1093
|
-
source: str,
|
|
1094
|
-
folder: str,
|
|
1095
|
-
filename: str = 'interactive.py',
|
|
1370
|
+
def create_scripts(
|
|
1371
|
+
source: str = "",
|
|
1372
|
+
folder: str = "",
|
|
1096
1373
|
polarity: str = "positive",
|
|
1097
1374
|
adducts: Optional[List[str]] = None,
|
|
1098
|
-
params: Optional[wizard_def] = None,
|
|
1099
1375
|
num_cores: int = 0,
|
|
1100
1376
|
**kwargs
|
|
1101
|
-
) ->
|
|
1377
|
+
) -> Dict[str, Any]:
|
|
1102
1378
|
"""
|
|
1103
|
-
Create
|
|
1379
|
+
Create analysis scripts without explicitly instantiating a Wizard.
|
|
1104
1380
|
|
|
1105
|
-
This
|
|
1106
|
-
|
|
1381
|
+
This is a convenience function that creates a Wizard instance internally
|
|
1382
|
+
and calls its create_scripts() method.
|
|
1107
1383
|
|
|
1108
1384
|
Parameters:
|
|
1109
1385
|
source: Directory containing raw data files
|
|
1110
|
-
folder: Output directory for processed study
|
|
1111
|
-
filename: Filename for the generated notebook (should end with .py)
|
|
1386
|
+
folder: Output directory for processed study
|
|
1112
1387
|
polarity: Ion polarity mode ("positive" or "negative")
|
|
1113
1388
|
adducts: List of adduct specifications (auto-set if None)
|
|
1114
|
-
params: Custom wizard_def parameters (optional)
|
|
1115
1389
|
num_cores: Number of CPU cores (0 = auto-detect)
|
|
1116
|
-
**kwargs: Additional parameters
|
|
1390
|
+
**kwargs: Additional parameters
|
|
1117
1391
|
|
|
1118
1392
|
Returns:
|
|
1119
|
-
|
|
1393
|
+
Dictionary containing:
|
|
1394
|
+
- status: "success" or "error"
|
|
1395
|
+
- message: Status message
|
|
1396
|
+
- instructions: List of next steps
|
|
1397
|
+
- files_created: List of created file paths
|
|
1398
|
+
- source_info: Metadata about source files
|
|
1120
1399
|
|
|
1121
1400
|
Example:
|
|
1122
|
-
>>>
|
|
1123
|
-
>>>
|
|
1401
|
+
>>> import masster.wizard
|
|
1402
|
+
>>> result = masster.wizard.create_scripts(
|
|
1124
1403
|
... source=r'D:\\Data\\raw_files',
|
|
1125
1404
|
... folder=r'D:\\Data\\output',
|
|
1126
|
-
...
|
|
1127
|
-
... polarity='positive'
|
|
1405
|
+
... polarity='negative'
|
|
1128
1406
|
... )
|
|
1407
|
+
>>> print("Status:", result["status"])
|
|
1129
1408
|
"""
|
|
1130
1409
|
|
|
1131
1410
|
try:
|
|
1132
|
-
#
|
|
1133
|
-
if
|
|
1134
|
-
|
|
1135
|
-
wizard_params = params
|
|
1136
|
-
# Update with provided values
|
|
1137
|
-
wizard_params.source = source
|
|
1138
|
-
wizard_params.folder = folder
|
|
1139
|
-
if polarity != "positive": # Only override if explicitly different
|
|
1140
|
-
wizard_params.polarity = polarity
|
|
1141
|
-
if num_cores > 0:
|
|
1142
|
-
wizard_params.num_cores = num_cores
|
|
1143
|
-
if adducts is not None:
|
|
1144
|
-
wizard_params.adducts = adducts
|
|
1145
|
-
else:
|
|
1146
|
-
# Create new params with provided values
|
|
1147
|
-
wizard_params = wizard_def(
|
|
1148
|
-
source=source,
|
|
1149
|
-
folder=folder,
|
|
1150
|
-
polarity=polarity,
|
|
1151
|
-
num_cores=max(1, int(multiprocessing.cpu_count() * 0.75)) if num_cores <= 0 else num_cores
|
|
1152
|
-
)
|
|
1153
|
-
|
|
1154
|
-
if adducts is not None:
|
|
1155
|
-
wizard_params.adducts = adducts
|
|
1156
|
-
|
|
1157
|
-
# Apply any additional kwargs
|
|
1158
|
-
for key, value in kwargs.items():
|
|
1159
|
-
if hasattr(wizard_params, key):
|
|
1160
|
-
setattr(wizard_params, key, value)
|
|
1161
|
-
|
|
1162
|
-
# Ensure study folder exists
|
|
1163
|
-
study_path = Path(folder)
|
|
1164
|
-
study_path.mkdir(parents=True, exist_ok=True)
|
|
1165
|
-
|
|
1166
|
-
# Generate notebook content
|
|
1167
|
-
notebook_content = _generate_notebook_content(wizard_params)
|
|
1411
|
+
# Auto-detect optimal number of cores if not specified
|
|
1412
|
+
if num_cores <= 0:
|
|
1413
|
+
num_cores = max(1, int(multiprocessing.cpu_count() * 0.75))
|
|
1168
1414
|
|
|
1169
|
-
#
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1415
|
+
# Create Wizard instance
|
|
1416
|
+
wizard = Wizard(
|
|
1417
|
+
source=source,
|
|
1418
|
+
folder=folder,
|
|
1419
|
+
polarity=polarity,
|
|
1420
|
+
adducts=adducts,
|
|
1421
|
+
num_cores=num_cores,
|
|
1422
|
+
**kwargs
|
|
1423
|
+
)
|
|
1173
1424
|
|
|
1174
|
-
|
|
1175
|
-
return
|
|
1425
|
+
# Call the instance method
|
|
1426
|
+
return wizard.create_scripts()
|
|
1176
1427
|
|
|
1177
1428
|
except Exception as e:
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
"""Generate the content for a marimo interactive notebook."""
|
|
1186
|
-
|
|
1187
|
-
notebook_lines = [
|
|
1188
|
-
'import marimo',
|
|
1189
|
-
'',
|
|
1190
|
-
'__generated_with = "0.9.14"',
|
|
1191
|
-
'app = marimo.App(width="medium")',
|
|
1192
|
-
'',
|
|
1193
|
-
'',
|
|
1194
|
-
'@app.cell',
|
|
1195
|
-
'def __():',
|
|
1196
|
-
' import marimo as mo',
|
|
1197
|
-
' return (mo,)',
|
|
1198
|
-
'',
|
|
1199
|
-
'',
|
|
1200
|
-
'@app.cell',
|
|
1201
|
-
'def __(mo):',
|
|
1202
|
-
' mo.md(r"""',
|
|
1203
|
-
' # MASSter Interactive Analysis',
|
|
1204
|
-
' ',
|
|
1205
|
-
' This notebook provides interactive exploration of your mass spectrometry study results.',
|
|
1206
|
-
' The study has been processed and is ready for analysis.',
|
|
1207
|
-
' """)',
|
|
1208
|
-
'',
|
|
1209
|
-
'',
|
|
1210
|
-
'@app.cell',
|
|
1211
|
-
'def __():',
|
|
1212
|
-
' # Import masster',
|
|
1213
|
-
' import masster',
|
|
1214
|
-
' return (masster,)',
|
|
1215
|
-
'',
|
|
1216
|
-
'',
|
|
1217
|
-
'@app.cell',
|
|
1218
|
-
'def __(masster):',
|
|
1219
|
-
' # Load the processed study',
|
|
1220
|
-
f' study = masster.Study(folder=".")',
|
|
1221
|
-
' study.load()',
|
|
1222
|
-
' return (study,)',
|
|
1223
|
-
'',
|
|
1224
|
-
'',
|
|
1225
|
-
'@app.cell',
|
|
1226
|
-
'def __(mo, study):',
|
|
1227
|
-
' # Display study information',
|
|
1228
|
-
' mo.md(f"""',
|
|
1229
|
-
' ## Study Overview',
|
|
1230
|
-
' ',
|
|
1231
|
-
' **Samples:** {len(study.samples) if hasattr(study, "samples") else "Not loaded"}',
|
|
1232
|
-
' ',
|
|
1233
|
-
' **Features:** {len(study.consensus_df) if hasattr(study, "consensus_df") else "Not available"}',
|
|
1234
|
-
' ',
|
|
1235
|
-
' **Polarity:** {study.polarity if hasattr(study, "polarity") else "Unknown"}',
|
|
1236
|
-
' """)',
|
|
1237
|
-
'',
|
|
1238
|
-
'',
|
|
1239
|
-
'@app.cell',
|
|
1240
|
-
'def __(study):',
|
|
1241
|
-
' # Print detailed study info',
|
|
1242
|
-
' study.info()',
|
|
1243
|
-
'',
|
|
1244
|
-
'',
|
|
1245
|
-
'@app.cell',
|
|
1246
|
-
'def __(mo):',
|
|
1247
|
-
' mo.md(r"""',
|
|
1248
|
-
' ## Quick Visualizations',
|
|
1249
|
-
' ',
|
|
1250
|
-
' Use the cells below to create interactive plots of your data.',
|
|
1251
|
-
' """)',
|
|
1252
|
-
'',
|
|
1253
|
-
'',
|
|
1254
|
-
'@app.cell',
|
|
1255
|
-
'def __(study):',
|
|
1256
|
-
' # Generate consensus 2D plot',
|
|
1257
|
-
' if hasattr(study, "consensus_df") and len(study.consensus_df) > 0:',
|
|
1258
|
-
' study.plot_consensus_2d(filename="consensus_interactive.html")',
|
|
1259
|
-
' print("Consensus 2D plot saved as: consensus_interactive.html")',
|
|
1260
|
-
' else:',
|
|
1261
|
-
' print("No consensus features available for plotting")',
|
|
1262
|
-
'',
|
|
1263
|
-
'',
|
|
1264
|
-
'@app.cell',
|
|
1265
|
-
'def __(study):',
|
|
1266
|
-
' # Generate PCA plot',
|
|
1267
|
-
' if hasattr(study, "samples") and len(study.samples) > 1:',
|
|
1268
|
-
' study.plot_samples_pca(filename="pca_interactive.html")',
|
|
1269
|
-
' print("PCA plot saved as: pca_interactive.html")',
|
|
1270
|
-
' else:',
|
|
1271
|
-
' print("Not enough samples for PCA analysis")',
|
|
1272
|
-
'',
|
|
1273
|
-
'',
|
|
1274
|
-
'@app.cell',
|
|
1275
|
-
'def __(mo):',
|
|
1276
|
-
' mo.md(r"""',
|
|
1277
|
-
' ## Data Export',
|
|
1278
|
-
' ',
|
|
1279
|
-
' Export your processed data in various formats.',
|
|
1280
|
-
' """)',
|
|
1281
|
-
'',
|
|
1282
|
-
'',
|
|
1283
|
-
'@app.cell',
|
|
1284
|
-
'def __(study):',
|
|
1285
|
-
' # Export options',
|
|
1286
|
-
' if hasattr(study, "consensus_df"):',
|
|
1287
|
-
' # Export to Excel',
|
|
1288
|
-
' study.export_xlsx(filename="study_results.xlsx")',
|
|
1289
|
-
' print("✓ Results exported to: study_results.xlsx")',
|
|
1290
|
-
' ',
|
|
1291
|
-
' # Export to MGF',
|
|
1292
|
-
' study.export_mgf(filename="study_spectra.mgf")',
|
|
1293
|
-
' print("✓ Spectra exported to: study_spectra.mgf")',
|
|
1294
|
-
' else:',
|
|
1295
|
-
' print("No data available for export")',
|
|
1296
|
-
'',
|
|
1297
|
-
'',
|
|
1298
|
-
'@app.cell',
|
|
1299
|
-
'def __(mo):',
|
|
1300
|
-
' mo.md(r"""',
|
|
1301
|
-
' ## Custom Analysis',
|
|
1302
|
-
' ',
|
|
1303
|
-
' Add your own analysis code in the cells below.',
|
|
1304
|
-
' """)',
|
|
1305
|
-
'',
|
|
1306
|
-
'',
|
|
1307
|
-
'@app.cell',
|
|
1308
|
-
'def __(study):',
|
|
1309
|
-
' # Access consensus features dataframe',
|
|
1310
|
-
' if hasattr(study, "consensus_df"):',
|
|
1311
|
-
' df = study.consensus_df',
|
|
1312
|
-
' print(f"Consensus features shape: {df.shape}")',
|
|
1313
|
-
' print("\\nFirst 5 features:")',
|
|
1314
|
-
' print(df.head())',
|
|
1315
|
-
' return (df,) if "df" in locals() else ()',
|
|
1316
|
-
'',
|
|
1317
|
-
'',
|
|
1318
|
-
'@app.cell',
|
|
1319
|
-
'def __():',
|
|
1320
|
-
' # Your custom analysis here',
|
|
1321
|
-
' pass',
|
|
1322
|
-
'',
|
|
1323
|
-
'',
|
|
1324
|
-
'if __name__ == "__main__":',
|
|
1325
|
-
' app.run()',
|
|
1326
|
-
]
|
|
1327
|
-
|
|
1328
|
-
return '\n'.join(notebook_lines)
|
|
1429
|
+
return {
|
|
1430
|
+
"status": "error",
|
|
1431
|
+
"message": f"Failed to create scripts: {e}",
|
|
1432
|
+
"instructions": [],
|
|
1433
|
+
"files_created": [],
|
|
1434
|
+
"source_info": {}
|
|
1435
|
+
}
|
|
1329
1436
|
|
|
1330
1437
|
|
|
1331
1438
|
# Export the main classes and functions
|
|
1332
|
-
__all__ = ["Wizard", "wizard_def", "
|
|
1439
|
+
__all__ = ["Wizard", "wizard_def", "create_scripts"]
|