masster 0.5.14__py3-none-any.whl → 0.5.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/wizard/wizard.py CHANGED
@@ -228,10 +228,9 @@ class Wizard:
228
228
  that process raw MS data through the complete pipeline: file discovery, feature
229
229
  detection, sample processing, study assembly, alignment, merging, and export.
230
230
 
231
- This simplified version focuses on three core functions:
232
- - create_analysis(): Generate standalone analysis scripts
233
- - create_notebook(): Generate marimo interactive notebooks
234
- - analyze(): Create and run analysis scripts with interactive notebook
231
+ This simplified version focuses on two core functions:
232
+ - create_scripts(): Generate standalone analysis scripts
233
+ - execute(): Create and run analysis scripts
235
234
  """
236
235
 
237
236
  def __init__(
@@ -240,7 +239,7 @@ class Wizard:
240
239
  folder: str = "",
241
240
  polarity: str = "positive",
242
241
  adducts: Optional[List[str]] = None,
243
- num_cores: int = 0,
242
+ num_cores: int = 6,
244
243
  **kwargs
245
244
  ):
246
245
  """
@@ -350,101 +349,525 @@ class Wizard:
350
349
  """Get the adduct specifications."""
351
350
  return self.params.adducts
352
351
 
353
- def create_analysis(self, filename: str = "run_masster.py") -> bool:
352
+ def create_scripts(self) -> Dict[str, Any]:
354
353
  """
355
- Generate a standalone Python script for the analysis pipeline.
354
+ Generate analysis scripts based on source file analysis.
355
+
356
+ This method:
357
+ 1. Analyzes the source files to extract metadata
358
+ 2. Creates 1_masster_workflow.py with sample processing logic
359
+ 3. Creates 2_interactive_analysis.py marimo notebook for study exploration
360
+ 4. Returns instructions for next steps
356
361
 
357
- Parameters:
358
- filename: Name for the generated script file
359
-
360
362
  Returns:
361
- True if script was generated successfully, False otherwise
363
+ Dictionary containing:
364
+ - status: "success" or "error"
365
+ - message: Status message
366
+ - instructions: List of next steps
367
+ - files_created: List of created file paths
368
+ - source_info: Metadata about source files
362
369
  """
363
370
  try:
364
- script_path = self.folder_path / filename
365
- script_content = self._generate_script_content()
371
+ # Step 1: Analyze source files to extract metadata
372
+ source_info = self._analyze_source_files()
366
373
 
367
- with open(script_path, 'w', encoding='utf-8') as f:
368
- f.write(script_content)
374
+ # Update wizard parameters based on detected metadata
375
+ if source_info.get('polarity') and source_info['polarity'] != 'positive':
376
+ self.params.polarity = source_info['polarity']
369
377
 
370
- print(f"Analysis script created: {script_path}")
371
- return True
378
+ files_created = []
372
379
 
373
- except Exception as e:
374
- print(f"Failed to create script: {e}")
375
- return False
376
-
377
- def create_notebook(self, filename: str = "interactive.py") -> bool:
378
- """
379
- Generate a marimo notebook for interactive analysis of the study.
380
-
381
- Parameters:
382
- filename: Name for the generated notebook file
380
+ # Step 2: Create 1_masster_workflow.py
381
+ workflow_script_path = self.folder_path / "1_masster_workflow.py"
382
+ workflow_content = self._generate_workflow_script_content(source_info)
383
383
 
384
- Returns:
385
- True if notebook was generated successfully, False otherwise
386
- """
387
- try:
388
- notebook_path = self.folder_path / filename
389
- notebook_content = self._generate_notebook_content()
384
+ with open(workflow_script_path, 'w', encoding='utf-8') as f:
385
+ f.write(workflow_content)
386
+ files_created.append(str(workflow_script_path))
387
+
388
+ # Step 3: Create 2_interactive_analysis.py marimo notebook
389
+ notebook_path = self.folder_path / "2_interactive_analysis.py"
390
+ notebook_content = self._generate_interactive_notebook_content(source_info)
390
391
 
391
392
  with open(notebook_path, 'w', encoding='utf-8') as f:
392
393
  f.write(notebook_content)
394
+ files_created.append(str(notebook_path))
395
+
396
+ # Step 4: Generate instructions
397
+ instructions = self._generate_instructions(source_info, files_created)
393
398
 
394
- print(f"Interactive notebook created: {notebook_path}")
395
- return True
399
+ return {
400
+ "status": "success",
401
+ "message": f"Successfully created {len(files_created)} script files",
402
+ "instructions": instructions,
403
+ "files_created": files_created,
404
+ "source_info": source_info
405
+ }
396
406
 
397
407
  except Exception as e:
398
- print(f"Failed to create notebook: {e}")
399
- return False
408
+ return {
409
+ "status": "error",
410
+ "message": f"Failed to create scripts: {e}",
411
+ "instructions": [],
412
+ "files_created": [],
413
+ "source_info": {}
414
+ }
400
415
 
401
- def analyze(self, filename: str = "run_masster.py") -> bool:
402
- """
403
- Create and execute a standalone analysis script.
416
+ def _analyze_source_files(self) -> Dict[str, Any]:
417
+ """Analyze source files to extract metadata."""
418
+ result = {
419
+ "number_of_files": 0,
420
+ "file_types": [],
421
+ "polarity": "positive",
422
+ "length_minutes": 0.0,
423
+ "first_file": None
424
+ }
404
425
 
405
- Parameters:
406
- filename: Name for the generated script file
426
+ try:
427
+ # Find raw data files
428
+ extensions = [".wiff", ".raw", ".mzML"]
429
+ raw_files = []
407
430
 
408
- Returns:
409
- True if execution completed successfully, False otherwise
410
- """
411
- # First create the analysis script
412
- if not self.create_analysis(filename):
413
- return False
431
+ for ext in extensions:
432
+ pattern = f"**/*{ext}"
433
+ files = list(self.source_path.rglob(pattern))
434
+ if files:
435
+ raw_files.extend(files)
436
+ if ext not in result["file_types"]:
437
+ result["file_types"].append(ext)
438
+
439
+ result["number_of_files"] = len(raw_files)
414
440
 
415
- # Create interactive notebook
416
- if not self.create_notebook("interactive.py"):
417
- print("Warning: Failed to create interactive notebook")
441
+ if raw_files:
442
+ result["first_file"] = str(raw_files[0])
443
+ # Simple heuristic: assume 30 minutes per file if we can't determine
444
+ result["length_minutes"] = 30.0
445
+
446
+ except Exception as e:
447
+ print(f"Warning: Could not analyze source files: {e}")
418
448
 
419
- # Then execute the analysis script
420
- script_path = self.folder_path / filename
449
+ return result
450
+
451
+ def _generate_workflow_script_content(self, source_info: Dict[str, Any]) -> str:
452
+ """Generate the content for 1_masster_workflow.py script."""
421
453
 
454
+ script_lines = [
455
+ '#!/usr/bin/env python3',
456
+ '"""',
457
+ 'Automated Mass Spectrometry Data Analysis Pipeline',
458
+ 'Generated by masster wizard',
459
+ '"""',
460
+ '',
461
+ 'import sys',
462
+ 'import time',
463
+ 'from pathlib import Path',
464
+ '',
465
+ '# Import masster modules',
466
+ 'from masster.study import Study',
467
+ 'from masster import __version__',
468
+ '',
469
+ '# Analysis parameters',
470
+ 'PARAMS = {',
471
+ ' # === Core Configuration ===',
472
+ f' "source": {str(self.source_path)!r}, # Directory containing raw data files',
473
+ f' "folder": {str(self.folder_path)!r}, # Output directory for processed study',
474
+ f' "polarity": {self.params.polarity!r}, # Ion polarity mode ("positive" or "negative")',
475
+ f' "num_cores": {self.params.num_cores}, # Number of CPU cores for parallel processing',
476
+ '',
477
+ ' # === File Discovery ===',
478
+ f' "file_extensions": {self.params.file_extensions!r}, # File extensions to search for',
479
+ f' "search_subfolders": {self.params.search_subfolders}, # Whether to search subdirectories recursively',
480
+ f' "skip_patterns": {self.params.skip_patterns!r}, # Filename patterns to skip',
481
+ '',
482
+ ' # === Processing Parameters ===',
483
+ f' "adducts": {self.params.adducts!r}, # Adduct specifications for feature detection and annotation',
484
+ f' "noise": {self.params.noise}, # Noise threshold for feature detection',
485
+ f' "chrom_fwhm": {self.params.chrom_fwhm}, # Chromatographic peak full width at half maximum (seconds)',
486
+ f' "chrom_peak_snr": {self.params.chrom_peak_snr}, # Minimum signal-to-noise ratio for chromatographic peaks',
487
+ '',
488
+ ' # === Alignment & Merging ===',
489
+ f' "rt_tol": {self.params.rt_tolerance}, # Retention time tolerance for alignment (seconds)',
490
+ f' "mz_tol": {self.params.mz_max_diff}, # Mass-to-charge ratio tolerance for alignment (Da)',
491
+ f' "alignment_method": {self.params.alignment_algorithm!r}, # Algorithm for sample alignment',
492
+ f' "min_samples_per_feature": {self.params.min_samples_for_merge}, # Minimum samples required per consensus feature',
493
+ f' "merge_method": {self.params.merge_method!r}, # Method for merging consensus features',
494
+ '',
495
+ ' # === Sample Processing (used in add_samples_from_folder) ===',
496
+ f' "batch_size": {self.params.batch_size}, # Number of files to process per batch',
497
+ f' "memory_limit_gb": {self.params.memory_limit_gb}, # Memory limit for processing (GB)',
498
+ '',
499
+ ' # === Script Options ===',
500
+ f' "resume_enabled": {self.params.resume_enabled}, # Enable automatic resume capability',
501
+ f' "force_reprocess": {self.params.force_reprocess}, # Force reprocessing of existing files',
502
+ f' "cleanup_temp_files": {self.params.cleanup_temp_files}, # Clean up temporary files after processing',
503
+ '}',
504
+ '',
505
+ '',
506
+ 'def discover_raw_files(source_folder, file_extensions, search_subfolders=True):',
507
+ ' """Discover raw data files in the source folder."""',
508
+ ' source_path = Path(source_folder)',
509
+ ' raw_files = []',
510
+ ' ',
511
+ ' for ext in file_extensions:',
512
+ ' if search_subfolders:',
513
+ ' pattern = f"**/*{ext}"',
514
+ ' files = list(source_path.rglob(pattern))',
515
+ ' else:',
516
+ ' pattern = f"*{ext}"',
517
+ ' files = list(source_path.glob(pattern))',
518
+ ' raw_files.extend(files)',
519
+ ' ',
520
+ ' return raw_files',
521
+ '',
522
+ '',
523
+ 'def process_single_file(args):',
524
+ ' """Process a single raw file to sample5 format - module level for multiprocessing."""',
525
+ ' raw_file, output_folder = args',
526
+ ' from masster.sample import Sample',
527
+ ' ',
528
+ ' try:',
529
+ ' # Create sample5 filename',
530
+ ' sample_name = raw_file.stem',
531
+ ' sample5_path = Path(output_folder) / f"{sample_name}.sample5"',
532
+ ' ',
533
+ ' # Skip if sample5 already exists',
534
+ ' if sample5_path.exists() and not PARAMS["force_reprocess"]:',
535
+ ' print(f" Skipping {raw_file.name} (sample5 already exists)")',
536
+ ' return str(sample5_path)',
537
+ ' ',
538
+ ' print(f" Converting {raw_file.name}...")',
539
+ ' ',
540
+ ' # Load and process raw file with full pipeline',
541
+ ' sample = Sample(log_label=sample_name)',
542
+ ' sample.load(filename=str(raw_file))',
543
+ ' sample.find_features(',
544
+ ' noise=PARAMS["noise"],',
545
+ ' chrom_fwhm=PARAMS["chrom_fwhm"],',
546
+ ' chrom_peak_snr=PARAMS["chrom_peak_snr"]',
547
+ ' )',
548
+ ' sample.find_ms2()',
549
+ ' sample.find_iso()',
550
+ ' # sample.export_mgf()',
551
+ ' # sample.plot_2d(filename=f"{sample5_path.replace(".sample5", ".html")}")',
552
+ ' sample.save(str(sample5_path))',
553
+ ' ',
554
+ ' # print(f" Completed {raw_file.name} -> {sample5_path.name}")',
555
+ ' return str(sample5_path)',
556
+ ' ',
557
+ ' except Exception as e:',
558
+ ' print(f" ERROR processing {raw_file.name}: {e}")',
559
+ ' return None',
560
+ '',
561
+ '',
562
+ 'def convert_raw_to_sample5(raw_files, output_folder, polarity, num_cores):',
563
+ ' """Convert raw data files to sample5 format."""',
564
+ ' import concurrent.futures',
565
+ ' import os',
566
+ ' ',
567
+ ' # Create output directory',
568
+ ' os.makedirs(output_folder, exist_ok=True)',
569
+ ' ',
570
+ ' # Prepare arguments for multiprocessing',
571
+ ' file_args = [(raw_file, output_folder) for raw_file in raw_files]',
572
+ ' ',
573
+ ' # Process files in parallel',
574
+ ' sample5_files = []',
575
+ ' with concurrent.futures.ProcessPoolExecutor(max_workers=num_cores) as executor:',
576
+ ' futures = [executor.submit(process_single_file, args) for args in file_args]',
577
+ ' ',
578
+ ' for future in concurrent.futures.as_completed(futures):',
579
+ ' result = future.result()',
580
+ ' if result:',
581
+ ' sample5_files.append(result)',
582
+ ' ',
583
+ ' return sample5_files',
584
+ '',
585
+ '',
586
+ 'def main():',
587
+ ' """Main analysis pipeline."""',
588
+ ' try:',
589
+ ' print("=" * 70)',
590
+ f' print("masster {version} - Automated MS Data Analysis")',
591
+ ' print("=" * 70)',
592
+ ' print(f"Source: {PARAMS[\'source\']}")',
593
+ ' print(f"Output: {PARAMS[\'folder\']}")',
594
+ ' print(f"Polarity: {PARAMS[\'polarity\']}")',
595
+ ' print(f"CPU Cores: {PARAMS[\'num_cores\']}")',
596
+ ' print("=" * 70)',
597
+ ' ',
598
+ ' start_time = time.time()',
599
+ ' ',
600
+ ' # Step 1: Discover raw data files',
601
+ ' print("\\nStep 1/7: Discovering raw data files...")',
602
+ ' raw_files = discover_raw_files(',
603
+ ' PARAMS[\'source\'],',
604
+ ' PARAMS[\'file_extensions\'],',
605
+ ' PARAMS[\'search_subfolders\']',
606
+ ' )',
607
+ ' ',
608
+ ' if not raw_files:',
609
+ ' print("No raw data files found!")',
610
+ ' return False',
611
+ ' ',
612
+ ' print(f"Found {len(raw_files)} raw data files")',
613
+ ' for f in raw_files[:5]: # Show first 5 files',
614
+ ' print(f" {f.name}")',
615
+ ' if len(raw_files) > 5:',
616
+ ' print(f" ... and {len(raw_files) - 5} more")',
617
+ ' ',
618
+ ' # Step 2: Process raw files',
619
+ ' print("\\nStep 2/7: Processing raw files...")',
620
+ ' sample5_files = convert_raw_to_sample5(',
621
+ ' raw_files,',
622
+ ' PARAMS[\'folder\'],',
623
+ ' PARAMS[\'polarity\'],',
624
+ ' PARAMS[\'num_cores\']',
625
+ ' )',
626
+ ' ',
627
+ ' if not sample5_files:',
628
+ ' print("No sample5 files were created!")',
629
+ ' return False',
630
+ ' ',
631
+ ' print(f"Successfully processed {len(sample5_files)} files to sample5")',
632
+ ' ',
633
+ ' # Step 3: Create and configure study',
634
+ ' print("\\nStep 3/7: Initializing study...")',
635
+ ' study = Study(folder=PARAMS[\'folder\'])',
636
+ ' study.polarity = PARAMS[\'polarity\']',
637
+ ' study.adducts = PARAMS[\'adducts\']',
638
+ ' ',
639
+ ' # Step 4: Add sample5 files to study',
640
+ ' print("\\nStep 4/7: Adding samples to study...")',
641
+ ' study.add(str(Path(PARAMS[\'folder\']) / "*.sample5"))',
642
+ ' study.features_filter(study.features_select(chrom_coherence=0.1, chrom_prominence_scaled=1))',
643
+ ' ',
644
+ ' # Step 5: Core processing',
645
+ ' print("\\nStep 5/7: Processing...")',
646
+ ' study.align(',
647
+ ' algorithm=PARAMS[\'alignment_method\'],',
648
+ ' rt_tol=PARAMS[\'rt_tol\']',
649
+ ' )',
650
+ ' ',
651
+ ' study.merge(',
652
+ ' method="qt",',
653
+ ' min_samples=PARAMS[\'min_samples_per_feature\'],',
654
+ ' threads=PARAMS[\'num_cores\'],',
655
+ ' rt_tol=PARAMS[\'rt_tol\']',
656
+ ' )',
657
+ ' study.find_iso()',
658
+ ' study.fill()',
659
+ ' study.integrate()',
660
+ ' ',
661
+ ' # Step 6/7: Saving results',
662
+ ' print("\\nStep 6/7: Saving results...")',
663
+ ' study.save()',
664
+ ' study.export_xlsx()',
665
+ ' study.export_mgf()',
666
+ ' study.export_mztab()',
667
+ ' ',
668
+ ' # Step 7: Plots',
669
+ ' print("\\nStep 7/7: Exporting plots...")',
670
+ ' study.plot_consensus_2d(filename="consensus.html")',
671
+ ' study.plot_consensus_2d(filename="consensus.png")',
672
+ ' study.plot_alignment(filename="alignment.html")',
673
+ ' study.plot_alignment(filename="alignment.png")',
674
+ ' study.plot_samples_pca(filename="pca.html")',
675
+ ' study.plot_samples_pca(filename="pca.png")',
676
+ ' study.plot_bpc(filename="bpc.html")',
677
+ ' study.plot_bpc(filename="bpc.png")',
678
+ ' study.plot_rt_correction(filename="rt_correction.html")',
679
+ ' study.plot_rt_correction(filename="rt_correction.png")',
680
+ ' ',
681
+ ' # Print summary',
682
+ ' study.info()',
683
+ ' total_time = time.time() - start_time',
684
+ ' print("\\n" + "=" * 70)',
685
+ ' print("ANALYSIS COMPLETE")',
686
+ ' print("=" * 70)',
687
+ ' print(f"Total processing time: {total_time:.1f} seconds ({total_time/60:.1f} minutes)")',
688
+ ' print(f"Raw files processed: {len(raw_files)}")',
689
+ ' print(f"Sample5 files created: {len(sample5_files)}")',
690
+ ' if hasattr(study, "consensus_df"):',
691
+ ' print(f"Consensus features generated: {len(study.consensus_df)}")',
692
+ ' print("=" * 70)',
693
+ ' ',
694
+ ' return True',
695
+ ' ',
696
+ ' except KeyboardInterrupt:',
697
+ ' print("\\nAnalysis interrupted by user")',
698
+ ' return False',
699
+ ' except Exception as e:',
700
+ ' print(f"Analysis failed with error: {e}")',
701
+ ' import traceback',
702
+ ' traceback.print_exc()',
703
+ ' return False',
704
+ '',
705
+ '',
706
+ 'if __name__ == "__main__":',
707
+ ' success = main()',
708
+ ' sys.exit(0 if success else 1)',
709
+ ]
710
+
711
+ return '\n'.join(script_lines)
712
+
713
+ def _generate_interactive_notebook_content(self, source_info: Dict[str, Any]) -> str:
714
+ """Generate the content for 2_interactive_analysis.py marimo notebook."""
715
+
716
+ notebook_lines = [
717
+ 'import marimo',
718
+ '',
719
+ '__generated_with = "0.9.14"',
720
+ 'app = marimo.App(width="medium")',
721
+ '',
722
+ '@app.cell',
723
+ 'def __():',
724
+ ' import marimo as mo',
725
+ ' return (mo,)',
726
+ '',
727
+ '@app.cell',
728
+ 'def __(mo):',
729
+ ' mo.md(r"""',
730
+ ' # MASSter Interactive Analysis',
731
+ ' ',
732
+ f' **Source:** {source_info.get("number_of_files", 0)} files detected',
733
+ f' **Polarity:** {source_info.get("polarity", "unknown")}',
734
+ ' ',
735
+ ' This notebook provides interactive exploration of your processed study.',
736
+ ' Make sure you have run `python 1_masster_workflow.py` first.',
737
+ ' """)',
738
+ ' return ()',
739
+ '',
740
+ '@app.cell',
741
+ 'def __():',
742
+ ' import masster',
743
+ ' return (masster,)',
744
+ '',
745
+ '@app.cell',
746
+ 'def __(masster):',
747
+ ' study = masster.Study(folder=".")',
748
+ ' return (study,)',
749
+ '',
750
+ '@app.cell',
751
+ 'def __(study):',
752
+ ' study.info()',
753
+ ' return ()',
754
+ '',
755
+ 'if __name__ == "__main__":',
756
+ ' app.run()',
757
+ ]
758
+
759
+ return '\n'.join(notebook_lines)
760
+
761
+ def _generate_instructions(self, source_info: Dict[str, Any], files_created: List[str]) -> List[str]:
762
+ """Generate usage instructions for the created scripts."""
763
+ instructions = [f"Source analysis: {source_info.get('number_of_files', 0)} files found",
764
+ f"Polarity detected: {source_info.get('polarity', 'unknown')}",
765
+ "Files created:"]
766
+ for file_path in files_created:
767
+ instructions.append(f" ✅ {str(Path(file_path).resolve())}")
768
+
769
+ # Find the workflow script name from created files
770
+ workflow_script_name = "1_masster_workflow.py"
771
+ for file_path in files_created:
772
+ if Path(file_path).name == "1_masster_workflow.py":
773
+ workflow_script_name = Path(file_path).name
774
+ break
775
+
776
+ instructions.extend([
777
+ "",
778
+ "Next steps:",
779
+ f"1. REVIEW PARAMETERS in {workflow_script_name}:",
780
+ f" In particular, verify the NOISE, CHROM_FWHM, and MIN_SAMPLES_FOR_MERGE",
781
+ "",
782
+ "2. EXECUTE SAMPLE PROCESSING:",
783
+ f" uv run python {workflow_script_name}",
784
+ "",
785
+ "3. INTERACTIVE ANALYSIS:",
786
+ f" uv run marimo edit {Path('2_interactive_analysis.py').name}",
787
+ ""]
788
+ )
789
+
790
+ return instructions
791
+
792
+ def execute(self) -> Dict[str, Any]:
793
+ """
794
+ Execute the sample processing workflow.
795
+
796
+ This method:
797
+ 1. Creates scripts if they don't exist (calls create_scripts())
798
+ 2. Runs the 1_masster_workflow.py script to process raw files
799
+
800
+ Returns:
801
+ Dictionary containing:
802
+ - status: "success", "error", or "scripts_created"
803
+ - message: Status message
804
+ - instructions: List of next steps
805
+ - files_created: List of created file paths (if scripts were created)
806
+ """
422
807
  try:
423
- print("Executing...")
808
+ workflow_script_path = self.folder_path / "1_masster_workflow.py"
809
+
810
+ # Check if workflow script exists, create it if not
811
+ if not workflow_script_path.exists():
812
+ print("📝 Workflow script not found, creating scripts first...")
813
+ result = self.create_scripts()
814
+ if result["status"] != "success":
815
+ return result
816
+
817
+ print("✅ Scripts created successfully")
818
+ print(f"📁 Output folder: {self.folder_path}")
819
+
820
+ # Execute the workflow script
821
+ print(f"🚀 Executing sample processing workflow...")
822
+ print(f"📄 Running: {workflow_script_path.name}")
823
+ print("=" * 60)
424
824
 
425
825
  import subprocess
426
826
  result = subprocess.run([
427
- sys.executable, str(script_path)
428
- ], cwd=str(self.folder_path), encoding='utf-8', errors='replace')
827
+ sys.executable, str(workflow_script_path)
828
+ ], cwd=str(self.folder_path))
429
829
 
430
830
  success = result.returncode == 0
431
831
 
432
832
  if success:
433
- print("=" * 70)
434
- print("Script execution completed successfully")
435
- print("=" * 70)
436
- print("For interactive analysis, run:")
437
- print(f" uv run marimo edit {self.folder_path / 'interactive.py'}")
438
- print("=" * 70)
833
+ print("=" * 60)
834
+ print(" Sample processing completed successfully!")
835
+ print("📋 Next step: Run interactive analysis")
836
+ print(" uv run marimo edit 2_interactive_analysis.py")
837
+ print("=" * 60)
838
+
839
+ return {
840
+ "status": "success",
841
+ "message": "Sample processing completed successfully",
842
+ "instructions": [
843
+ "✅ Sample processing completed",
844
+ "Next: uv run marimo edit 2_interactive_analysis.py"
845
+ ],
846
+ "files_created": []
847
+ }
439
848
  else:
440
- print("=" * 70)
441
- print(f"Script execution failed with return code: {result.returncode}")
849
+ return {
850
+ "status": "error",
851
+ "message": f"Workflow execution failed with return code {result.returncode}",
852
+ "instructions": [
853
+ "❌ Check the error messages above",
854
+ "Review parameters in 1_masster_workflow.py",
855
+ f"Try running manually: python {workflow_script_path.name}"
856
+ ],
857
+ "files_created": []
858
+ }
442
859
 
443
- return success
444
-
445
860
  except Exception as e:
446
- print(f"Error during script execution: {e}")
447
- return False
861
+ return {
862
+ "status": "error",
863
+ "message": f"Failed to execute workflow: {e}",
864
+ "instructions": [
865
+ "❌ Execution failed",
866
+ "Check that source files exist and are accessible",
867
+ "Verify folder permissions"
868
+ ],
869
+ "files_created": []
870
+ }
448
871
 
449
872
  def _generate_script_content(self) -> str:
450
873
  """Generate the complete analysis script content."""
@@ -751,154 +1174,8 @@ class Wizard:
751
1174
 
752
1175
  return '\n'.join(script_lines)
753
1176
 
754
- def _generate_notebook_content(self) -> str:
755
- """Generate the content for a marimo interactive notebook."""
756
-
757
- notebook_lines = [
758
- 'import marimo',
759
- '',
760
- '__generated_with = "0.9.14"',
761
- 'app = marimo.App(width="medium")',
762
- '',
763
- '',
764
- '@app.cell',
765
- 'def __():',
766
- ' import marimo as mo',
767
- ' return (mo,)',
768
- '',
769
- '',
770
- '@app.cell',
771
- 'def __(mo):',
772
- ' mo.md(r"""',
773
- ' # MASSter Interactive Analysis',
774
- ' ',
775
- ' This notebook provides interactive exploration of your mass spectrometry study results.',
776
- ' The study has been processed and is ready for analysis.',
777
- ' """)',
778
- '',
779
- '',
780
- '@app.cell',
781
- 'def __():',
782
- ' # Import masster',
783
- ' import masster',
784
- ' return (masster,)',
785
- '',
786
- '',
787
- '@app.cell',
788
- 'def __(masster):',
789
- ' # Load the processed study',
790
- f' study = masster.Study(folder=".")',
791
- ' study.load()',
792
- ' return (study,)',
793
- '',
794
- '',
795
- '@app.cell',
796
- 'def __(mo, study):',
797
- ' # Display study information',
798
- ' mo.md(f"""',
799
- ' ## Study Overview',
800
- ' ',
801
- ' **Samples:** {len(study.samples) if hasattr(study, "samples") else "Not loaded"}',
802
- ' ',
803
- ' **Features:** {len(study.consensus_df) if hasattr(study, "consensus_df") else "Not available"}',
804
- ' ',
805
- ' **Polarity:** {study.polarity if hasattr(study, "polarity") else "Unknown"}',
806
- ' """)',
807
- '',
808
- '',
809
- '@app.cell',
810
- 'def __(study):',
811
- ' # Print detailed study info',
812
- ' study.info()',
813
- '',
814
- '',
815
- '@app.cell',
816
- 'def __(mo):',
817
- ' mo.md(r"""',
818
- ' ## Quick Visualizations',
819
- ' ',
820
- ' Use the cells below to create interactive plots of your data.',
821
- ' """)',
822
- '',
823
- '',
824
- '@app.cell',
825
- 'def __(study):',
826
- ' # Generate consensus 2D plot',
827
- ' if hasattr(study, "consensus_df") and len(study.consensus_df) > 0:',
828
- ' study.plot_consensus_2d(filename="consensus_interactive.html")',
829
- ' print("Consensus 2D plot saved as: consensus_interactive.html")',
830
- ' else:',
831
- ' print("No consensus features available for plotting")',
832
- '',
833
- '',
834
- '@app.cell',
835
- 'def __(study):',
836
- ' # Generate PCA plot',
837
- ' if hasattr(study, "samples") and len(study.samples) > 1:',
838
- ' study.plot_samples_pca(filename="pca_interactive.html")',
839
- ' print("PCA plot saved as: pca_interactive.html")',
840
- ' else:',
841
- ' print("Not enough samples for PCA analysis")',
842
- '',
843
- '',
844
- '@app.cell',
845
- 'def __(mo):',
846
- ' mo.md(r"""',
847
- ' ## Data Export',
848
- ' ',
849
- ' Export your processed data in various formats.',
850
- ' """)',
851
- '',
852
- '',
853
- '@app.cell',
854
- 'def __(study):',
855
- ' # Export options',
856
- ' if hasattr(study, "consensus_df"):',
857
- ' # Export to Excel',
858
- ' study.export_xlsx(filename="study_results.xlsx")',
859
- ' print("✓ Results exported to: study_results.xlsx")',
860
- ' ',
861
- ' # Export to MGF',
862
- ' study.export_mgf(filename="study_spectra.mgf")',
863
- ' print("✓ Spectra exported to: study_spectra.mgf")',
864
- ' else:',
865
- ' print("No data available for export")',
866
- '',
867
- '',
868
- '@app.cell',
869
- 'def __(mo):',
870
- ' mo.md(r"""',
871
- ' ## Custom Analysis',
872
- ' ',
873
- ' Add your own analysis code in the cells below.',
874
- ' """)',
875
- '',
876
- '',
877
- '@app.cell',
878
- 'def __(study):',
879
- ' # Access consensus features dataframe',
880
- ' if hasattr(study, "consensus_df"):',
881
- ' df = study.consensus_df',
882
- ' print(f"Consensus features shape: {df.shape}")',
883
- ' print("\\nFirst 5 features:")',
884
- ' print(df.head())',
885
- ' return (df,) if "df" in locals() else ()',
886
- '',
887
- '',
888
- '@app.cell',
889
- 'def __():',
890
- ' # Your custom analysis here',
891
- ' pass',
892
- '',
893
- '',
894
- 'if __name__ == "__main__":',
895
- ' app.run()',
896
- ]
897
-
898
- return '\n'.join(notebook_lines)
899
-
900
1177
 
901
- def create_analysis(
1178
+ def create_script(
902
1179
  source: str,
903
1180
  folder: str,
904
1181
  filename: str = 'run_masster.py',
@@ -929,8 +1206,8 @@ def create_analysis(
929
1206
  True if script was generated successfully, False otherwise
930
1207
 
931
1208
  Example:
932
- >>> from masster.wizard import create_analysis
933
- >>> create_analysis(
1209
+ >>> from masster.wizard import create_script
1210
+ >>> create_script(
934
1211
  ... source=r'D:\\Data\\raw_files',
935
1212
  ... folder=r'D:\\Data\\output',
936
1213
  ... filename='run_masster.py',
@@ -976,8 +1253,9 @@ def create_analysis(
976
1253
  # Create a temporary Wizard instance to generate the script
977
1254
  temp_wizard = Wizard(params=wizard_params)
978
1255
 
979
- # Generate the script using the instance method
980
- success = temp_wizard.create_analysis(filename)
1256
+ # Generate the scripts using the instance method
1257
+ result = temp_wizard.create_scripts()
1258
+ success = result.get("status") == "success"
981
1259
 
982
1260
  return success
983
1261
 
@@ -988,7 +1266,7 @@ def create_analysis(
988
1266
  return False
989
1267
 
990
1268
 
991
- def analyze(
1269
+ def execute(
992
1270
  source: str,
993
1271
  folder: str,
994
1272
  filename: str = 'run_masster.py',
@@ -1001,7 +1279,7 @@ def analyze(
1001
1279
  """
1002
1280
  Create and execute a standalone analysis script for automated MS data processing.
1003
1281
 
1004
- This function generates a Python script with the same parameters as create_analysis(),
1282
+ This function generates a Python script with the same parameters as create_script(),
1005
1283
  but immediately executes it after creation. Combines script generation and execution
1006
1284
  in a single step.
1007
1285
 
@@ -1019,8 +1297,8 @@ def analyze(
1019
1297
  True if script was created and executed successfully, False otherwise
1020
1298
 
1021
1299
  Example:
1022
- >>> from masster.wizard import analyze
1023
- >>> analyze(
1300
+ >>> from masster.wizard import execute
1301
+ >>> execute(
1024
1302
  ... source=r'D:\\Data\\raw_files',
1025
1303
  ... folder=r'D:\\Data\\output',
1026
1304
  ... polarity='positive'
@@ -1028,8 +1306,8 @@ def analyze(
1028
1306
  """
1029
1307
 
1030
1308
  try:
1031
- # First, create the script using create_analysis()
1032
- script_created = create_analysis(
1309
+ # First, create the script using create_script()
1310
+ script_created = create_script(
1033
1311
  source=source,
1034
1312
  folder=folder,
1035
1313
  filename=filename,
@@ -1089,244 +1367,73 @@ def analyze(
1089
1367
  return False
1090
1368
 
1091
1369
 
1092
- def create_notebook(
1093
- source: str,
1094
- folder: str,
1095
- filename: str = 'interactive.py',
1370
+ def create_scripts(
1371
+ source: str = "",
1372
+ folder: str = "",
1096
1373
  polarity: str = "positive",
1097
1374
  adducts: Optional[List[str]] = None,
1098
- params: Optional[wizard_def] = None,
1099
1375
  num_cores: int = 0,
1100
1376
  **kwargs
1101
- ) -> bool:
1377
+ ) -> Dict[str, Any]:
1102
1378
  """
1103
- Create a marimo interactive notebook for analysis without initializing a Wizard instance.
1379
+ Create analysis scripts without explicitly instantiating a Wizard.
1104
1380
 
1105
- This function generates a marimo notebook file that provides interactive exploration
1106
- of mass spectrometry study results with the specified configuration.
1381
+ This is a convenience function that creates a Wizard instance internally
1382
+ and calls its create_scripts() method.
1107
1383
 
1108
1384
  Parameters:
1109
1385
  source: Directory containing raw data files
1110
- folder: Output directory for processed study
1111
- filename: Filename for the generated notebook (should end with .py)
1386
+ folder: Output directory for processed study
1112
1387
  polarity: Ion polarity mode ("positive" or "negative")
1113
1388
  adducts: List of adduct specifications (auto-set if None)
1114
- params: Custom wizard_def parameters (optional)
1115
1389
  num_cores: Number of CPU cores (0 = auto-detect)
1116
- **kwargs: Additional parameters to override defaults
1390
+ **kwargs: Additional parameters
1117
1391
 
1118
1392
  Returns:
1119
- True if notebook was generated successfully, False otherwise
1393
+ Dictionary containing:
1394
+ - status: "success" or "error"
1395
+ - message: Status message
1396
+ - instructions: List of next steps
1397
+ - files_created: List of created file paths
1398
+ - source_info: Metadata about source files
1120
1399
 
1121
1400
  Example:
1122
- >>> from masster.wizard import create_notebook
1123
- >>> create_notebook(
1401
+ >>> import masster.wizard
1402
+ >>> result = masster.wizard.create_scripts(
1124
1403
  ... source=r'D:\\Data\\raw_files',
1125
1404
  ... folder=r'D:\\Data\\output',
1126
- ... filename='interactive.py',
1127
- ... polarity='positive'
1405
+ ... polarity='negative'
1128
1406
  ... )
1407
+ >>> print("Status:", result["status"])
1129
1408
  """
1130
1409
 
1131
1410
  try:
1132
- # Create parameters
1133
- if params is not None:
1134
- # Use provided params as base
1135
- wizard_params = params
1136
- # Update with provided values
1137
- wizard_params.source = source
1138
- wizard_params.folder = folder
1139
- if polarity != "positive": # Only override if explicitly different
1140
- wizard_params.polarity = polarity
1141
- if num_cores > 0:
1142
- wizard_params.num_cores = num_cores
1143
- if adducts is not None:
1144
- wizard_params.adducts = adducts
1145
- else:
1146
- # Create new params with provided values
1147
- wizard_params = wizard_def(
1148
- source=source,
1149
- folder=folder,
1150
- polarity=polarity,
1151
- num_cores=max(1, int(multiprocessing.cpu_count() * 0.75)) if num_cores <= 0 else num_cores
1152
- )
1153
-
1154
- if adducts is not None:
1155
- wizard_params.adducts = adducts
1156
-
1157
- # Apply any additional kwargs
1158
- for key, value in kwargs.items():
1159
- if hasattr(wizard_params, key):
1160
- setattr(wizard_params, key, value)
1161
-
1162
- # Ensure study folder exists
1163
- study_path = Path(folder)
1164
- study_path.mkdir(parents=True, exist_ok=True)
1165
-
1166
- # Generate notebook content
1167
- notebook_content = _generate_notebook_content(wizard_params)
1411
+ # Auto-detect optimal number of cores if not specified
1412
+ if num_cores <= 0:
1413
+ num_cores = max(1, int(multiprocessing.cpu_count() * 0.75))
1168
1414
 
1169
- # Write notebook file
1170
- notebook_path = study_path / filename
1171
- with open(notebook_path, 'w', encoding='utf-8') as f:
1172
- f.write(notebook_content)
1415
+ # Create Wizard instance
1416
+ wizard = Wizard(
1417
+ source=source,
1418
+ folder=folder,
1419
+ polarity=polarity,
1420
+ adducts=adducts,
1421
+ num_cores=num_cores,
1422
+ **kwargs
1423
+ )
1173
1424
 
1174
- print(f"Interactive notebook created: {notebook_path}")
1175
- return True
1425
+ # Call the instance method
1426
+ return wizard.create_scripts()
1176
1427
 
1177
1428
  except Exception as e:
1178
- print(f"Failed to create notebook: {e}")
1179
- import traceback
1180
- traceback.print_exc()
1181
- return False
1182
-
1183
-
1184
- def _generate_notebook_content(params: wizard_def) -> str:
1185
- """Generate the content for a marimo interactive notebook."""
1186
-
1187
- notebook_lines = [
1188
- 'import marimo',
1189
- '',
1190
- '__generated_with = "0.9.14"',
1191
- 'app = marimo.App(width="medium")',
1192
- '',
1193
- '',
1194
- '@app.cell',
1195
- 'def __():',
1196
- ' import marimo as mo',
1197
- ' return (mo,)',
1198
- '',
1199
- '',
1200
- '@app.cell',
1201
- 'def __(mo):',
1202
- ' mo.md(r"""',
1203
- ' # MASSter Interactive Analysis',
1204
- ' ',
1205
- ' This notebook provides interactive exploration of your mass spectrometry study results.',
1206
- ' The study has been processed and is ready for analysis.',
1207
- ' """)',
1208
- '',
1209
- '',
1210
- '@app.cell',
1211
- 'def __():',
1212
- ' # Import masster',
1213
- ' import masster',
1214
- ' return (masster,)',
1215
- '',
1216
- '',
1217
- '@app.cell',
1218
- 'def __(masster):',
1219
- ' # Load the processed study',
1220
- f' study = masster.Study(folder=".")',
1221
- ' study.load()',
1222
- ' return (study,)',
1223
- '',
1224
- '',
1225
- '@app.cell',
1226
- 'def __(mo, study):',
1227
- ' # Display study information',
1228
- ' mo.md(f"""',
1229
- ' ## Study Overview',
1230
- ' ',
1231
- ' **Samples:** {len(study.samples) if hasattr(study, "samples") else "Not loaded"}',
1232
- ' ',
1233
- ' **Features:** {len(study.consensus_df) if hasattr(study, "consensus_df") else "Not available"}',
1234
- ' ',
1235
- ' **Polarity:** {study.polarity if hasattr(study, "polarity") else "Unknown"}',
1236
- ' """)',
1237
- '',
1238
- '',
1239
- '@app.cell',
1240
- 'def __(study):',
1241
- ' # Print detailed study info',
1242
- ' study.info()',
1243
- '',
1244
- '',
1245
- '@app.cell',
1246
- 'def __(mo):',
1247
- ' mo.md(r"""',
1248
- ' ## Quick Visualizations',
1249
- ' ',
1250
- ' Use the cells below to create interactive plots of your data.',
1251
- ' """)',
1252
- '',
1253
- '',
1254
- '@app.cell',
1255
- 'def __(study):',
1256
- ' # Generate consensus 2D plot',
1257
- ' if hasattr(study, "consensus_df") and len(study.consensus_df) > 0:',
1258
- ' study.plot_consensus_2d(filename="consensus_interactive.html")',
1259
- ' print("Consensus 2D plot saved as: consensus_interactive.html")',
1260
- ' else:',
1261
- ' print("No consensus features available for plotting")',
1262
- '',
1263
- '',
1264
- '@app.cell',
1265
- 'def __(study):',
1266
- ' # Generate PCA plot',
1267
- ' if hasattr(study, "samples") and len(study.samples) > 1:',
1268
- ' study.plot_samples_pca(filename="pca_interactive.html")',
1269
- ' print("PCA plot saved as: pca_interactive.html")',
1270
- ' else:',
1271
- ' print("Not enough samples for PCA analysis")',
1272
- '',
1273
- '',
1274
- '@app.cell',
1275
- 'def __(mo):',
1276
- ' mo.md(r"""',
1277
- ' ## Data Export',
1278
- ' ',
1279
- ' Export your processed data in various formats.',
1280
- ' """)',
1281
- '',
1282
- '',
1283
- '@app.cell',
1284
- 'def __(study):',
1285
- ' # Export options',
1286
- ' if hasattr(study, "consensus_df"):',
1287
- ' # Export to Excel',
1288
- ' study.export_xlsx(filename="study_results.xlsx")',
1289
- ' print("✓ Results exported to: study_results.xlsx")',
1290
- ' ',
1291
- ' # Export to MGF',
1292
- ' study.export_mgf(filename="study_spectra.mgf")',
1293
- ' print("✓ Spectra exported to: study_spectra.mgf")',
1294
- ' else:',
1295
- ' print("No data available for export")',
1296
- '',
1297
- '',
1298
- '@app.cell',
1299
- 'def __(mo):',
1300
- ' mo.md(r"""',
1301
- ' ## Custom Analysis',
1302
- ' ',
1303
- ' Add your own analysis code in the cells below.',
1304
- ' """)',
1305
- '',
1306
- '',
1307
- '@app.cell',
1308
- 'def __(study):',
1309
- ' # Access consensus features dataframe',
1310
- ' if hasattr(study, "consensus_df"):',
1311
- ' df = study.consensus_df',
1312
- ' print(f"Consensus features shape: {df.shape}")',
1313
- ' print("\\nFirst 5 features:")',
1314
- ' print(df.head())',
1315
- ' return (df,) if "df" in locals() else ()',
1316
- '',
1317
- '',
1318
- '@app.cell',
1319
- 'def __():',
1320
- ' # Your custom analysis here',
1321
- ' pass',
1322
- '',
1323
- '',
1324
- 'if __name__ == "__main__":',
1325
- ' app.run()',
1326
- ]
1327
-
1328
- return '\n'.join(notebook_lines)
1429
+ return {
1430
+ "status": "error",
1431
+ "message": f"Failed to create scripts: {e}",
1432
+ "instructions": [],
1433
+ "files_created": [],
1434
+ "source_info": {}
1435
+ }
1329
1436
 
1330
1437
 
1331
1438
  # Export the main classes and functions
1332
- __all__ = ["Wizard", "wizard_def", "create_analysis", "create_notebook", "analyze"]
1439
+ __all__ = ["Wizard", "wizard_def", "create_scripts"]