masster 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/study/h5.py +317 -137
- masster/study/load.py +23 -134
- masster/study/save.py +0 -6
- masster/study/study.py +26 -3
- {masster-0.2.1.dist-info → masster-0.2.3.dist-info}/METADATA +8 -151
- {masster-0.2.1.dist-info → masster-0.2.3.dist-info}/RECORD +10 -10
- {masster-0.2.1.dist-info → masster-0.2.3.dist-info}/WHEEL +0 -0
- {masster-0.2.1.dist-info → masster-0.2.3.dist-info}/entry_points.txt +0 -0
- {masster-0.2.1.dist-info → masster-0.2.3.dist-info}/licenses/LICENSE +0 -0
masster/_version.py
CHANGED
masster/study/h5.py
CHANGED
|
@@ -2,25 +2,7 @@
|
|
|
2
2
|
_study_h5.py
|
|
3
3
|
|
|
4
4
|
This module provides HDF5-based save/load functionality for the Study class.
|
|
5
|
-
It handles
|
|
6
|
-
# elif col == "spectrum":
|
|
7
|
-
# Handle single Spectrum objects
|
|
8
|
-
data_as_str = []
|
|
9
|
-
for item in data:
|
|
10
|
-
if item is not None:
|
|
11
|
-
data_as_str.append(item.to_json())
|
|
12
|
-
else:
|
|
13
|
-
data_as_str.append("None")
|
|
14
|
-
group.create_dataset(col, data=data_as_str, **optimal_compression)hromatogram objects
|
|
15
|
-
data_as_str = []
|
|
16
|
-
for item in data:
|
|
17
|
-
if item is not None:
|
|
18
|
-
data_as_str.append(item.to_json())
|
|
19
|
-
else:
|
|
20
|
-
data_as_str.append("None")
|
|
21
|
-
group.create_dataset(col, data=data_as_str, **optimal_compression) else:
|
|
22
|
-
data_as_str.append("null")
|
|
23
|
-
group.create_dataset(col, data=data_as_str, **optimal_compression)n and deserialization of Polars DataFrames with complex objects
|
|
5
|
+
It handles serialization and deserialization of Polars DataFrames with complex objects
|
|
24
6
|
like Chromatogram and Spectrum instances.
|
|
25
7
|
|
|
26
8
|
Key Features:
|
|
@@ -449,7 +431,7 @@ def _save_study5(self, filename=None):
|
|
|
449
431
|
if not filename.endswith(".study5"):
|
|
450
432
|
filename += ".study5"
|
|
451
433
|
|
|
452
|
-
self.logger.
|
|
434
|
+
self.logger.info(f"Saving study to {filename}")
|
|
453
435
|
|
|
454
436
|
# delete existing file if it exists
|
|
455
437
|
if os.path.exists(filename):
|
|
@@ -529,7 +511,7 @@ def _save_study5(self, filename=None):
|
|
|
529
511
|
data = consensus_ms2[col] if dtype == "object" else consensus_ms2[col].to_list()
|
|
530
512
|
_save_dataframe_column(consensus_ms2_group, col, data, dtype, self.logger)
|
|
531
513
|
|
|
532
|
-
self.logger.
|
|
514
|
+
self.logger.debug(f"Save completed for {filename}")
|
|
533
515
|
|
|
534
516
|
|
|
535
517
|
def _load_study5(self, filename=None):
|
|
@@ -551,6 +533,11 @@ def _load_study5(self, filename=None):
|
|
|
551
533
|
- Properly handles MS2 scan lists and spectrum lists
|
|
552
534
|
- Restores parameters dictionary from JSON serialization
|
|
553
535
|
"""
|
|
536
|
+
from datetime import datetime
|
|
537
|
+
from tqdm import tqdm
|
|
538
|
+
|
|
539
|
+
self.logger.info(f"Loading study from {filename}")
|
|
540
|
+
|
|
554
541
|
# Handle default filename
|
|
555
542
|
if filename is None:
|
|
556
543
|
if self.default_folder is not None:
|
|
@@ -573,134 +560,327 @@ def _load_study5(self, filename=None):
|
|
|
573
560
|
if not schema:
|
|
574
561
|
self.logger.warning(f"Schema file {schema_path} not found. Using default types.")
|
|
575
562
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
if isinstance(parameters_data, bytes):
|
|
589
|
-
parameters_data = parameters_data.decode("utf-8")
|
|
563
|
+
# Define loading steps for progress tracking
|
|
564
|
+
loading_steps = [
|
|
565
|
+
"metadata",
|
|
566
|
+
"samples_df",
|
|
567
|
+
"features_df",
|
|
568
|
+
"consensus_df",
|
|
569
|
+
"consensus_mapping_df",
|
|
570
|
+
"consensus_ms2"
|
|
571
|
+
]
|
|
572
|
+
|
|
573
|
+
# Check if progress bar should be disabled based on log level
|
|
574
|
+
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
590
575
|
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
576
|
+
with h5py.File(filename, "r") as f:
|
|
577
|
+
# Use progress bar to show loading progress
|
|
578
|
+
with tqdm(
|
|
579
|
+
total=len(loading_steps),
|
|
580
|
+
desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading study",
|
|
581
|
+
disable=tdqm_disable,
|
|
582
|
+
) as pbar:
|
|
583
|
+
|
|
584
|
+
# Load metadata
|
|
585
|
+
pbar.set_description(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading metadata")
|
|
586
|
+
if "metadata" in f:
|
|
587
|
+
metadata = f["metadata"]
|
|
588
|
+
self.default_folder = _decode_bytes_attr(metadata.attrs.get("default_folder", ""))
|
|
589
|
+
if hasattr(self, "label"):
|
|
590
|
+
self.label = _decode_bytes_attr(metadata.attrs.get("label", ""))
|
|
591
|
+
|
|
592
|
+
# Load parameters from JSON
|
|
593
|
+
if "parameters" in metadata:
|
|
594
|
+
try:
|
|
595
|
+
parameters_data = metadata["parameters"][()]
|
|
596
|
+
if isinstance(parameters_data, bytes):
|
|
597
|
+
parameters_data = parameters_data.decode("utf-8")
|
|
598
|
+
|
|
599
|
+
if parameters_data and parameters_data != "":
|
|
600
|
+
self.history = json.loads(parameters_data)
|
|
601
|
+
else:
|
|
602
|
+
self.history = {}
|
|
603
|
+
except (json.JSONDecodeError, ValueError, TypeError) as e:
|
|
604
|
+
self.logger.warning(f"Failed to deserialize parameters: {e}")
|
|
594
605
|
self.history = {}
|
|
595
|
-
|
|
596
|
-
self.logger.warning(f"Failed to deserialize parameters: {e}")
|
|
606
|
+
else:
|
|
597
607
|
self.history = {}
|
|
598
|
-
else:
|
|
599
|
-
self.history = {}
|
|
600
608
|
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
609
|
+
# Reconstruct self.parameters from loaded history
|
|
610
|
+
from masster.study.defaults.study_def import study_defaults
|
|
611
|
+
|
|
612
|
+
# Always create a fresh study_defaults object to ensure we have all defaults
|
|
613
|
+
self.parameters = study_defaults()
|
|
614
|
+
|
|
615
|
+
# Update parameters from loaded history if available
|
|
616
|
+
if self.history and "study" in self.history:
|
|
617
|
+
study_params = self.history["study"]
|
|
618
|
+
if isinstance(study_params, dict):
|
|
619
|
+
failed_params = self.parameters.set_from_dict(study_params, validate=False)
|
|
620
|
+
if failed_params:
|
|
621
|
+
self.logger.debug(f"Could not set study parameters: {failed_params}")
|
|
622
|
+
else:
|
|
623
|
+
self.logger.debug("Successfully updated parameters from loaded history")
|
|
614
624
|
else:
|
|
615
|
-
self.logger.debug("
|
|
625
|
+
self.logger.debug("Study parameters in history are not a valid dictionary")
|
|
616
626
|
else:
|
|
617
|
-
self.logger.debug("
|
|
627
|
+
self.logger.debug("No study parameters found in history, using defaults")
|
|
628
|
+
|
|
629
|
+
# Synchronize instance attributes with parameters (similar to __init__)
|
|
630
|
+
# Note: default_folder and label are already loaded from metadata attributes above
|
|
631
|
+
# but we ensure they match the parameters for consistency
|
|
632
|
+
if hasattr(self.parameters, 'default_folder') and self.parameters.default_folder is not None:
|
|
633
|
+
self.default_folder = self.parameters.default_folder
|
|
634
|
+
if hasattr(self.parameters, 'label') and self.parameters.label is not None:
|
|
635
|
+
self.label = self.parameters.label
|
|
636
|
+
if hasattr(self.parameters, 'log_level'):
|
|
637
|
+
self.log_level = self.parameters.log_level
|
|
638
|
+
if hasattr(self.parameters, 'log_label'):
|
|
639
|
+
self.log_label = self.parameters.log_label if self.parameters.log_label is not None else ""
|
|
640
|
+
if hasattr(self.parameters, 'log_sink'):
|
|
641
|
+
self.log_sink = self.parameters.log_sink
|
|
642
|
+
pbar.update(1)
|
|
643
|
+
|
|
644
|
+
# Load samples_df
|
|
645
|
+
pbar.set_description(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading samples")
|
|
646
|
+
if "samples" in f and len(f["samples"].keys()) > 0:
|
|
647
|
+
self.samples_df = _load_dataframe_from_group(f["samples"], schema, "samples_df", self.logger)
|
|
618
648
|
else:
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
649
|
+
# Initialize empty samples_df with the correct schema if no data exists
|
|
650
|
+
self.logger.debug("No samples data found in study5 file. Initializing empty samples_df.")
|
|
651
|
+
self.samples_df = pl.DataFrame(
|
|
652
|
+
{
|
|
653
|
+
"sample_uid": [],
|
|
654
|
+
"sample_name": [],
|
|
655
|
+
"sample_path": [],
|
|
656
|
+
"sample_type": [],
|
|
657
|
+
"size": [],
|
|
658
|
+
"map_id": [],
|
|
659
|
+
},
|
|
660
|
+
schema={
|
|
661
|
+
"sample_uid": pl.Int64,
|
|
662
|
+
"sample_name": pl.Utf8,
|
|
663
|
+
"sample_path": pl.Utf8,
|
|
664
|
+
"sample_type": pl.Utf8,
|
|
665
|
+
"size": pl.Int64,
|
|
666
|
+
"map_id": pl.Utf8,
|
|
667
|
+
},
|
|
668
|
+
)
|
|
669
|
+
pbar.update(1)
|
|
670
|
+
|
|
671
|
+
# Load features_df
|
|
672
|
+
pbar.set_description(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading features")
|
|
673
|
+
if "features" in f and len(f["features"].keys()) > 0:
|
|
674
|
+
object_columns = ["chrom", "ms2_scans", "ms2_specs"]
|
|
675
|
+
self.features_df = _load_dataframe_from_group(f["features"], schema, "features_df", self.logger, object_columns)
|
|
676
|
+
else:
|
|
677
|
+
self.features_df = None
|
|
678
|
+
pbar.update(1)
|
|
679
|
+
|
|
680
|
+
# Load consensus_df
|
|
681
|
+
pbar.set_description(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading consensus")
|
|
682
|
+
if "consensus" in f and len(f["consensus"].keys()) > 0:
|
|
683
|
+
self.consensus_df = _load_dataframe_from_group(f["consensus"], schema, "consensus_df", self.logger)
|
|
684
|
+
else:
|
|
685
|
+
self.consensus_df = None
|
|
686
|
+
pbar.update(1)
|
|
687
|
+
|
|
688
|
+
# Load consensus_mapping_df
|
|
689
|
+
pbar.set_description(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading consensus mapping")
|
|
690
|
+
if "consensus_mapping" in f and len(f["consensus_mapping"].keys()) > 0:
|
|
691
|
+
self.consensus_mapping_df = _load_dataframe_from_group(f["consensus_mapping"], schema, "consensus_mapping_df", self.logger)
|
|
692
|
+
else:
|
|
693
|
+
self.consensus_mapping_df = None
|
|
694
|
+
pbar.update(1)
|
|
695
|
+
|
|
696
|
+
# Load consensus_ms2
|
|
697
|
+
pbar.set_description(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading consensus MS2")
|
|
698
|
+
if "consensus_ms2" in f and len(f["consensus_ms2"].keys()) > 0:
|
|
699
|
+
object_columns = ["spec"]
|
|
700
|
+
self.consensus_ms2 = _load_dataframe_from_group(f["consensus_ms2"], schema, "consensus_ms2", self.logger, object_columns)
|
|
701
|
+
else:
|
|
702
|
+
self.consensus_ms2 = None
|
|
703
|
+
pbar.update(1)
|
|
704
|
+
|
|
705
|
+
self.logger.info(f"Study loaded from {filename}")
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
def _load_h5(self, filename=None):
|
|
709
|
+
"""
|
|
710
|
+
Load Study instance data from a legacy .h5 HDF5 file with progress tracking.
|
|
711
|
+
|
|
712
|
+
This is a legacy method for loading older HDF5 format files. For new files,
|
|
713
|
+
use _load_study5() which has improved schema handling and performance.
|
|
714
|
+
|
|
715
|
+
Args:
|
|
716
|
+
filename (str, optional): Path to the .h5 HDF5 file to load. If None, uses default.
|
|
717
|
+
|
|
718
|
+
Returns:
|
|
719
|
+
None (modifies self in place)
|
|
720
|
+
|
|
721
|
+
Notes:
|
|
722
|
+
- Legacy format loader with basic DataFrame reconstruction
|
|
723
|
+
- Includes progress bar for loading steps
|
|
724
|
+
- For new projects, prefer _load_study5() method
|
|
725
|
+
"""
|
|
726
|
+
from datetime import datetime
|
|
727
|
+
from tqdm import tqdm
|
|
728
|
+
|
|
729
|
+
# Handle default filename
|
|
730
|
+
if filename is None:
|
|
731
|
+
if self.default_folder is not None:
|
|
732
|
+
filename = os.path.join(self.default_folder, "study.h5")
|
|
638
733
|
else:
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
734
|
+
self.logger.error("Either filename or default_folder must be provided")
|
|
735
|
+
return
|
|
736
|
+
|
|
737
|
+
# Add .h5 extension if not provided
|
|
738
|
+
if not filename.endswith(".h5"):
|
|
739
|
+
filename += ".h5"
|
|
740
|
+
|
|
741
|
+
if not os.path.exists(filename):
|
|
742
|
+
self.logger.error(f"File {filename} does not exist")
|
|
743
|
+
return
|
|
744
|
+
|
|
745
|
+
# Define loading steps for progress tracking
|
|
746
|
+
loading_steps = [
|
|
747
|
+
"metadata",
|
|
748
|
+
"samples_df",
|
|
749
|
+
"features_df",
|
|
750
|
+
"consensus_df",
|
|
751
|
+
"consensus_mapping_df"
|
|
752
|
+
]
|
|
753
|
+
|
|
754
|
+
# Check if progress bar should be disabled based on log level
|
|
755
|
+
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
756
|
+
|
|
757
|
+
with h5py.File(filename, "r") as f:
|
|
758
|
+
# Use progress bar to show loading progress
|
|
759
|
+
with tqdm(
|
|
760
|
+
total=len(loading_steps),
|
|
761
|
+
desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading legacy study",
|
|
762
|
+
disable=tdqm_disable,
|
|
763
|
+
) as pbar:
|
|
764
|
+
|
|
765
|
+
# Load metadata
|
|
766
|
+
pbar.set_description(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading metadata")
|
|
767
|
+
if "metadata" in f:
|
|
768
|
+
metadata = f["metadata"]
|
|
769
|
+
self.default_folder = _decode_bytes_attr(metadata.attrs.get("default_folder", ""))
|
|
770
|
+
if hasattr(self, "label"):
|
|
771
|
+
self.label = _decode_bytes_attr(metadata.attrs.get("label", ""))
|
|
772
|
+
|
|
773
|
+
# Load parameters from JSON if available
|
|
774
|
+
if "parameters" in metadata:
|
|
775
|
+
try:
|
|
776
|
+
parameters_data = metadata["parameters"][()]
|
|
777
|
+
if isinstance(parameters_data, bytes):
|
|
778
|
+
parameters_data = parameters_data.decode("utf-8")
|
|
779
|
+
|
|
780
|
+
if parameters_data and parameters_data != "":
|
|
781
|
+
self.history = json.loads(parameters_data)
|
|
782
|
+
else:
|
|
783
|
+
self.history = {}
|
|
784
|
+
except (json.JSONDecodeError, ValueError, TypeError) as e:
|
|
785
|
+
self.logger.warning(f"Failed to deserialize parameters: {e}")
|
|
786
|
+
self.history = {}
|
|
787
|
+
else:
|
|
788
|
+
self.history = {}
|
|
789
|
+
pbar.update(1)
|
|
790
|
+
|
|
791
|
+
# Load samples_df (legacy format)
|
|
792
|
+
pbar.set_description(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading samples")
|
|
793
|
+
if "samples" in f and len(f["samples"].keys()) > 0:
|
|
794
|
+
samples_data = {}
|
|
795
|
+
for col in f["samples"].keys():
|
|
796
|
+
column_data = f["samples"][col][:]
|
|
797
|
+
# Handle byte strings
|
|
798
|
+
if len(column_data) > 0 and isinstance(column_data[0], bytes):
|
|
799
|
+
column_data = [item.decode("utf-8") if isinstance(item, bytes) else item for item in column_data]
|
|
800
|
+
samples_data[col] = column_data
|
|
801
|
+
|
|
802
|
+
if samples_data:
|
|
803
|
+
self.samples_df = pl.DataFrame(samples_data)
|
|
804
|
+
else:
|
|
805
|
+
# Initialize empty samples_df
|
|
806
|
+
self.samples_df = pl.DataFrame({
|
|
807
|
+
"sample_uid": [],
|
|
808
|
+
"sample_name": [],
|
|
809
|
+
"sample_path": [],
|
|
810
|
+
"sample_type": [],
|
|
811
|
+
"size": [],
|
|
812
|
+
"map_id": [],
|
|
813
|
+
})
|
|
814
|
+
else:
|
|
815
|
+
self.samples_df = pl.DataFrame({
|
|
663
816
|
"sample_uid": [],
|
|
664
817
|
"sample_name": [],
|
|
665
818
|
"sample_path": [],
|
|
666
819
|
"sample_type": [],
|
|
667
820
|
"size": [],
|
|
668
821
|
"map_id": [],
|
|
669
|
-
}
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
822
|
+
})
|
|
823
|
+
pbar.update(1)
|
|
824
|
+
|
|
825
|
+
# Load features_df (legacy format)
|
|
826
|
+
pbar.set_description(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading features")
|
|
827
|
+
if "features" in f and len(f["features"].keys()) > 0:
|
|
828
|
+
features_data = {}
|
|
829
|
+
for col in f["features"].keys():
|
|
830
|
+
column_data = f["features"][col][:]
|
|
831
|
+
# Handle special object columns
|
|
832
|
+
if col in ["chrom", "ms2_specs"]:
|
|
833
|
+
reconstructed_data = _reconstruct_object_column(column_data, col)
|
|
834
|
+
features_data[col] = reconstructed_data
|
|
835
|
+
else:
|
|
836
|
+
# Handle byte strings
|
|
837
|
+
if len(column_data) > 0 and isinstance(column_data[0], bytes):
|
|
838
|
+
column_data = [item.decode("utf-8") if isinstance(item, bytes) else item for item in column_data]
|
|
839
|
+
features_data[col] = column_data
|
|
840
|
+
|
|
841
|
+
if features_data:
|
|
842
|
+
# Create DataFrame with Object columns handled properly
|
|
843
|
+
object_columns = ["chrom", "ms2_specs"]
|
|
844
|
+
self.features_df = _create_dataframe_with_objects(features_data, object_columns)
|
|
845
|
+
else:
|
|
846
|
+
self.features_df = None
|
|
847
|
+
else:
|
|
848
|
+
self.features_df = None
|
|
849
|
+
pbar.update(1)
|
|
850
|
+
|
|
851
|
+
# Load consensus_df (legacy format)
|
|
852
|
+
pbar.set_description(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading consensus")
|
|
853
|
+
if "consensus" in f and len(f["consensus"].keys()) > 0:
|
|
854
|
+
consensus_data = {}
|
|
855
|
+
for col in f["consensus"].keys():
|
|
856
|
+
column_data = f["consensus"][col][:]
|
|
857
|
+
# Handle byte strings
|
|
858
|
+
if len(column_data) > 0 and isinstance(column_data[0], bytes):
|
|
859
|
+
column_data = [item.decode("utf-8") if isinstance(item, bytes) else item for item in column_data]
|
|
860
|
+
consensus_data[col] = column_data
|
|
861
|
+
|
|
862
|
+
if consensus_data:
|
|
863
|
+
self.consensus_df = pl.DataFrame(consensus_data)
|
|
864
|
+
else:
|
|
865
|
+
self.consensus_df = None
|
|
866
|
+
else:
|
|
867
|
+
self.consensus_df = None
|
|
868
|
+
pbar.update(1)
|
|
869
|
+
|
|
870
|
+
# Load consensus_mapping_df (legacy format)
|
|
871
|
+
pbar.set_description(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Loading consensus mapping")
|
|
872
|
+
if "consensus_mapping" in f and len(f["consensus_mapping"].keys()) > 0:
|
|
873
|
+
mapping_data = {}
|
|
874
|
+
for col in f["consensus_mapping"].keys():
|
|
875
|
+
column_data = f["consensus_mapping"][col][:]
|
|
876
|
+
mapping_data[col] = column_data
|
|
877
|
+
|
|
878
|
+
if mapping_data:
|
|
879
|
+
self.consensus_mapping_df = pl.DataFrame(mapping_data)
|
|
880
|
+
else:
|
|
881
|
+
self.consensus_mapping_df = None
|
|
882
|
+
else:
|
|
883
|
+
self.consensus_mapping_df = None
|
|
884
|
+
pbar.update(1)
|
|
705
885
|
|
|
706
|
-
self.logger.info(f"
|
|
886
|
+
self.logger.info(f"Legacy study loaded from {filename}")
|
masster/study/load.py
CHANGED
|
@@ -256,7 +256,8 @@ def load(self, filename=None):
|
|
|
256
256
|
else:
|
|
257
257
|
self.logger.error("Either filename or default_folder must be provided")
|
|
258
258
|
return
|
|
259
|
-
|
|
259
|
+
|
|
260
|
+
self.logger.info(f"Loading study from {filename}")
|
|
260
261
|
self._load_study5(filename)
|
|
261
262
|
# After loading the study, check if consensus XML exists and load it
|
|
262
263
|
consensus_xml_path = filename.replace(".study5", ".consensusXML")
|
|
@@ -267,13 +268,13 @@ def load(self, filename=None):
|
|
|
267
268
|
self.logger.warning(f"No consensus XML file found at {consensus_xml_path}")
|
|
268
269
|
|
|
269
270
|
|
|
270
|
-
def
|
|
271
|
+
def fill_chrom_single(
|
|
271
272
|
self,
|
|
272
|
-
uids=
|
|
273
|
-
mz_tol=
|
|
274
|
-
rt_tol=
|
|
275
|
-
min_samples_rel=
|
|
276
|
-
min_samples_abs=
|
|
273
|
+
uids=None,
|
|
274
|
+
mz_tol: float = 0.010,
|
|
275
|
+
rt_tol: float = 10.0,
|
|
276
|
+
min_samples_rel: float = 0.0,
|
|
277
|
+
min_samples_abs: int = 2,
|
|
277
278
|
):
|
|
278
279
|
"""Fill missing chromatograms by extracting from raw data.
|
|
279
280
|
|
|
@@ -281,10 +282,10 @@ def fill_chrom(
|
|
|
281
282
|
|
|
282
283
|
Args:
|
|
283
284
|
uids: Consensus UIDs to process (default: all)
|
|
284
|
-
mz_tol: m/z tolerance for extraction
|
|
285
|
-
rt_tol: RT tolerance for extraction
|
|
286
|
-
min_samples_rel: Relative minimum sample threshold
|
|
287
|
-
min_samples_abs: Absolute minimum sample threshold
|
|
285
|
+
mz_tol: m/z tolerance for extraction (default: 0.010 Da)
|
|
286
|
+
rt_tol: RT tolerance for extraction (default: 10.0 seconds)
|
|
287
|
+
min_samples_rel: Relative minimum sample threshold (default: 0.0)
|
|
288
|
+
min_samples_abs: Absolute minimum sample threshold (default: 2)
|
|
288
289
|
"""
|
|
289
290
|
uids = self._get_consensus_uids(uids)
|
|
290
291
|
|
|
@@ -685,28 +686,28 @@ def _process_sample_for_parallel_fill(
|
|
|
685
686
|
return new_features, new_mapping, counter
|
|
686
687
|
|
|
687
688
|
|
|
688
|
-
def
|
|
689
|
+
def fill_chrom(
|
|
689
690
|
self,
|
|
690
|
-
uids=
|
|
691
|
-
mz_tol=
|
|
692
|
-
rt_tol=
|
|
693
|
-
min_samples_rel=
|
|
694
|
-
min_samples_abs=
|
|
691
|
+
uids=None,
|
|
692
|
+
mz_tol: float = 0.010,
|
|
693
|
+
rt_tol: float = 10.0,
|
|
694
|
+
min_samples_rel: float = 0.0,
|
|
695
|
+
min_samples_abs: int = 2,
|
|
695
696
|
num_workers=4,
|
|
696
697
|
):
|
|
697
698
|
"""Fill missing chromatograms by extracting from raw data using parallel processing.
|
|
698
699
|
|
|
699
700
|
Args:
|
|
700
701
|
uids: Consensus UIDs to process (default: all)
|
|
701
|
-
mz_tol: m/z tolerance for extraction
|
|
702
|
-
rt_tol: RT tolerance for extraction
|
|
703
|
-
min_samples_rel: Relative minimum sample threshold
|
|
704
|
-
min_samples_abs: Absolute minimum sample threshold
|
|
702
|
+
mz_tol: m/z tolerance for extraction (default: 0.010 Da)
|
|
703
|
+
rt_tol: RT tolerance for extraction (default: 10.0 seconds)
|
|
704
|
+
min_samples_rel: Relative minimum sample threshold (default: 0.0)
|
|
705
|
+
min_samples_abs: Absolute minimum sample threshold (default: 2)
|
|
705
706
|
num_workers: Number of parallel workers (default: 4)
|
|
706
707
|
"""
|
|
707
708
|
uids = self._get_consensus_uids(uids)
|
|
708
709
|
|
|
709
|
-
self.logger.info("Gap filling...")
|
|
710
|
+
self.logger.info(f"Gap filling with {num_workers} workers...")
|
|
710
711
|
self.logger.debug(
|
|
711
712
|
f"Parameters: mz_tol={mz_tol}, rt_tol={rt_tol}, min_samples_rel={min_samples_rel}, min_samples_abs={min_samples_abs}, num_workers={num_workers}",
|
|
712
713
|
)
|
|
@@ -1075,115 +1076,3 @@ def _load_consensusXML(self, filename="alignment.consensusXML"):
|
|
|
1075
1076
|
fh.load(filename, self.consensus_map)
|
|
1076
1077
|
self.logger.debug(f"Loaded consensus map from {filename}.")
|
|
1077
1078
|
|
|
1078
|
-
|
|
1079
|
-
"""def find_features(
|
|
1080
|
-
self,
|
|
1081
|
-
reset=None,
|
|
1082
|
-
chrom_peak_snr=None,
|
|
1083
|
-
noise=None,
|
|
1084
|
-
chrom_fwhm=None,
|
|
1085
|
-
chrom_coherence=None,
|
|
1086
|
-
prominence_scaled=None,
|
|
1087
|
-
link_ms2=None,
|
|
1088
|
-
save_mgf=None,
|
|
1089
|
-
save_stats=None,
|
|
1090
|
-
):
|
|
1091
|
-
self.logger.debug("Finding features for all samples in the study.")
|
|
1092
|
-
# Initialize default parameters inside the function
|
|
1093
|
-
if reset is None:
|
|
1094
|
-
reset = False
|
|
1095
|
-
if chrom_peak_snr is None:
|
|
1096
|
-
chrom_peak_snr = 10.0
|
|
1097
|
-
if noise is None:
|
|
1098
|
-
noise = 200
|
|
1099
|
-
|
|
1100
|
-
# Create parameter object and update with provided values
|
|
1101
|
-
params = fill_chrom_defaults()
|
|
1102
|
-
|
|
1103
|
-
# Set explicit parameters
|
|
1104
|
-
params.set('uids', uids, validate=True)
|
|
1105
|
-
params.set('mz_tol', mz_tol, validate=True)
|
|
1106
|
-
params.set('rt_tol', rt_tol, validate=True)
|
|
1107
|
-
params.set('min_samples_rel', min_samples_rel, validate=True)
|
|
1108
|
-
params.set('min_samples_abs', min_samples_abs, validate=True)
|
|
1109
|
-
|
|
1110
|
-
# Store parameters in the Study object
|
|
1111
|
-
self.store_history(["fill_chrom"], params.to_dict())
|
|
1112
|
-
self.logger.debug("Parameters stored to fill_chrom")
|
|
1113
|
-
|
|
1114
|
-
if chrom_fwhm is None:
|
|
1115
|
-
chrom_fwhm = 1.0
|
|
1116
|
-
if chrom_coherence is None:
|
|
1117
|
-
chrom_coherence = 0.3
|
|
1118
|
-
if prominence_scaled is None:
|
|
1119
|
-
prominence_scaled = 1.0
|
|
1120
|
-
if link_ms2 is None:
|
|
1121
|
-
link_ms2 = True
|
|
1122
|
-
if save_mgf is None:
|
|
1123
|
-
save_mgf = False
|
|
1124
|
-
if save_stats is None:
|
|
1125
|
-
save_stats = False
|
|
1126
|
-
|
|
1127
|
-
# iterate over all samples in samples_df - using Polars iteration
|
|
1128
|
-
for index, row_dict in enumerate(self.samples_df.iter_rows(named=True)):
|
|
1129
|
-
# check if features_maps is None
|
|
1130
|
-
if self.features_maps[index] is not None and not reset:
|
|
1131
|
-
# skip this sample
|
|
1132
|
-
continue
|
|
1133
|
-
if self.features_maps[index] is not None and not reset:
|
|
1134
|
-
# skip this sample
|
|
1135
|
-
continue
|
|
1136
|
-
# load the sample
|
|
1137
|
-
ddaobj = Sample(row_dict["sample_path"])
|
|
1138
|
-
# find features
|
|
1139
|
-
ddaobj.find_features(
|
|
1140
|
-
chrom_peak_snr=chrom_peak_snr,
|
|
1141
|
-
noise=noise,
|
|
1142
|
-
chrom_fwhm=chrom_fwhm,
|
|
1143
|
-
)
|
|
1144
|
-
ddaobj.filter_features(
|
|
1145
|
-
prominence_scaled=prominence_scaled,
|
|
1146
|
-
coherence=chrom_coherence,
|
|
1147
|
-
)
|
|
1148
|
-
# link MS2
|
|
1149
|
-
if link_ms2:
|
|
1150
|
-
ddaobj.find_ms2()
|
|
1151
|
-
|
|
1152
|
-
# add to features_maps at the index of the sample
|
|
1153
|
-
self.features_maps[index] = ddaobj.features
|
|
1154
|
-
# add to features_df
|
|
1155
|
-
f_df = ddaobj.features_df.clone()
|
|
1156
|
-
# add column 'feature_uid' with the uid as uint64
|
|
1157
|
-
|
|
1158
|
-
f_df = f_df.with_columns(pl.lit(row_dict["sample_uid"]).alias("sample_uid"))
|
|
1159
|
-
# move sample_uid to the first column
|
|
1160
|
-
other_cols = [col for col in f_df.columns if col != "sample_uid"]
|
|
1161
|
-
f_df = f_df.select(["sample_uid"] + other_cols)
|
|
1162
|
-
|
|
1163
|
-
offset = (
|
|
1164
|
-
self.features_df.get_column("feature_uid").max() + 1
|
|
1165
|
-
if not self.features_df.is_empty()
|
|
1166
|
-
else 1
|
|
1167
|
-
)
|
|
1168
|
-
f_df = f_df.with_columns(
|
|
1169
|
-
pl.int_range(offset, offset + len(f_df)).alias("feature_uid"),
|
|
1170
|
-
)
|
|
1171
|
-
# remove all rows with sample_uid=row_dict['sample_uid']
|
|
1172
|
-
self.features_df = self.features_df.filter(
|
|
1173
|
-
pl.col("sample_uid") != row_dict["sample_uid"],
|
|
1174
|
-
)
|
|
1175
|
-
self.features_df = pl.concat([self.features_df, f_df])
|
|
1176
|
-
|
|
1177
|
-
if self.default_folder is not None:
|
|
1178
|
-
bname = os.path.join(self.default_folder, row_dict["sample_name"])
|
|
1179
|
-
ddaobj.save(filename=bname + ".mzpkl")
|
|
1180
|
-
ddaobj.save_features(filename=bname + ".featureXML")
|
|
1181
|
-
else:
|
|
1182
|
-
bname = row_dict["sample_path"].replace(".mzpkl", "").replace(".wiff", "")
|
|
1183
|
-
ddaobj.save(filename=bname + ".mzpkl")
|
|
1184
|
-
ddaobj.save_features(filename=bname + ".featureXML")
|
|
1185
|
-
if save_stats:
|
|
1186
|
-
ddaobj.save_stats(filename=bname + "_stats.csv")
|
|
1187
|
-
if save_mgf:
|
|
1188
|
-
ddaobj.save_mgf(filename=bname + ".mgf", include_all_ms1=True)
|
|
1189
|
-
"""
|
masster/study/save.py
CHANGED
|
@@ -122,12 +122,6 @@ def _save_consensusXML(self, filename:str):
|
|
|
122
122
|
return
|
|
123
123
|
|
|
124
124
|
fh = oms.ConsensusXMLFile()
|
|
125
|
-
# check if filename includes any path
|
|
126
|
-
if not os.path.isabs(filename):
|
|
127
|
-
if self.default_folder is not None:
|
|
128
|
-
filename = os.path.join(self.default_folder, filename)
|
|
129
|
-
else:
|
|
130
|
-
filename = os.path.join(os.getcwd(), filename)
|
|
131
125
|
fh.store(filename, self.consensus_map)
|
|
132
126
|
self.logger.info(f"Saved consensus map to {filename}")
|
|
133
127
|
|
masster/study/study.py
CHANGED
|
@@ -71,8 +71,8 @@ from masster.study.helpers import set_default_folder
|
|
|
71
71
|
from masster.study.load import add_folder
|
|
72
72
|
from masster.study.load import add_sample
|
|
73
73
|
from masster.study.load import (
|
|
74
|
+
fill_chrom_single,
|
|
74
75
|
fill_chrom,
|
|
75
|
-
fill_chrom_parallel,
|
|
76
76
|
_process_sample_for_parallel_fill,
|
|
77
77
|
)
|
|
78
78
|
from masster.study.load import _get_missing_consensus_sample_combinations
|
|
@@ -147,6 +147,7 @@ class Study:
|
|
|
147
147
|
|
|
148
148
|
def __init__(
|
|
149
149
|
self,
|
|
150
|
+
filename=None,
|
|
150
151
|
**kwargs,
|
|
151
152
|
):
|
|
152
153
|
"""
|
|
@@ -156,6 +157,10 @@ class Study:
|
|
|
156
157
|
data storage, and processing parameters used for study-level analysis.
|
|
157
158
|
|
|
158
159
|
Parameters:
|
|
160
|
+
filename (str, optional): Path to a .study5 file to load automatically.
|
|
161
|
+
If provided, the default_folder will be set to the
|
|
162
|
+
directory containing this file, and the study will
|
|
163
|
+
be loaded automatically.
|
|
159
164
|
**kwargs: Keyword arguments for setting study parameters. Can include:
|
|
160
165
|
- A study_defaults instance to set all parameters at once (pass as params=study_defaults(...))
|
|
161
166
|
- Individual parameter names and values (see study_defaults for available parameters)
|
|
@@ -172,6 +177,20 @@ class Study:
|
|
|
172
177
|
"""
|
|
173
178
|
# Initialize default parameters
|
|
174
179
|
|
|
180
|
+
# Handle filename parameter for automatic loading
|
|
181
|
+
auto_load_filename = None
|
|
182
|
+
if filename is not None:
|
|
183
|
+
if not filename.endswith('.study5'):
|
|
184
|
+
raise ValueError("filename must be a .study5 file")
|
|
185
|
+
if not os.path.exists(filename):
|
|
186
|
+
raise FileNotFoundError(f"Study file not found: {filename}")
|
|
187
|
+
|
|
188
|
+
# Set default_folder to the directory containing the file if not already specified
|
|
189
|
+
if 'default_folder' not in kwargs:
|
|
190
|
+
kwargs['default_folder'] = os.path.dirname(os.path.abspath(filename))
|
|
191
|
+
|
|
192
|
+
auto_load_filename = filename
|
|
193
|
+
|
|
175
194
|
# Check if a study_defaults instance was passed
|
|
176
195
|
if "params" in kwargs and isinstance(kwargs["params"], study_defaults):
|
|
177
196
|
params = kwargs.pop("params")
|
|
@@ -234,6 +253,10 @@ class Study:
|
|
|
234
253
|
sink=self.log_sink
|
|
235
254
|
)
|
|
236
255
|
|
|
256
|
+
# Auto-load study file if filename was provided
|
|
257
|
+
if auto_load_filename is not None:
|
|
258
|
+
self.load(filename=auto_load_filename)
|
|
259
|
+
|
|
237
260
|
|
|
238
261
|
|
|
239
262
|
# Attach module functions as class methods
|
|
@@ -242,7 +265,7 @@ class Study:
|
|
|
242
265
|
save_consensus = save_consensus
|
|
243
266
|
save_samples = save_samples
|
|
244
267
|
align = align
|
|
245
|
-
|
|
268
|
+
fill_chrom_single = fill_chrom_single
|
|
246
269
|
find_consensus = find_consensus
|
|
247
270
|
find_ms2 = find_ms2
|
|
248
271
|
integrate_chrom = integrate_chrom
|
|
@@ -276,7 +299,7 @@ class Study:
|
|
|
276
299
|
get_gaps_stats = get_gaps_stats
|
|
277
300
|
get_orphans = get_orphans
|
|
278
301
|
set_default_folder = set_default_folder
|
|
279
|
-
|
|
302
|
+
fill_chrom = fill_chrom
|
|
280
303
|
_process_sample_for_parallel_fill = _process_sample_for_parallel_fill
|
|
281
304
|
_get_missing_consensus_sample_combinations = _get_missing_consensus_sample_combinations
|
|
282
305
|
_load_consensusXML = _load_consensusXML
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: masster
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Mass spectrometry data analysis package
|
|
5
5
|
Project-URL: homepage, https://github.com/zamboni-lab/masster
|
|
6
6
|
Project-URL: repository, https://github.com/zamboni-lab/masster
|
|
@@ -730,18 +730,11 @@ Description-Content-Type: text/markdown
|
|
|
730
730
|
|
|
731
731
|
# MASSter
|
|
732
732
|
|
|
733
|
-
**MASSter** is a comprehensive Python package for mass spectrometry data analysis, designed for metabolomics and LC-MS data processing. It provides tools for feature detection, alignment, consensus building, and interactive visualization of mass spectrometry datasets.
|
|
733
|
+
**MASSter** is a comprehensive Python package for mass spectrometry data analysis, designed for metabolomics and LC-MS data processing. It provides tools for feature detection, alignment, consensus building, and interactive visualization of mass spectrometry datasets. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data.
|
|
734
734
|
|
|
735
735
|
Most core processing functions are derived from OpenMS. We use the same nomenclature and refer to their documentation for an explanation of the parameters. To a large extent, however, you should be able to use the defaults (=no parameters) when calling processing steps.
|
|
736
736
|
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
- **Mass spectrometry data processing**: Support for multiple file formats (.wiff, .mzML, .raw, .mzpkl)
|
|
740
|
-
- **Feature detection and alignment**: Automated chromatographic peak detection and retention time alignment
|
|
741
|
-
- **Consensus feature building**: Identification of features across multiple samples
|
|
742
|
-
- **Interactive visualizations**: 2D plots, chromatograms, and statistical dashboards
|
|
743
|
-
- **Batch processing**: Process entire studies with multiple samples
|
|
744
|
-
- **Export capabilities**: MGF export for spectral library searches
|
|
737
|
+
This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab. Novel functionalities will be added based on need and requests.
|
|
745
738
|
|
|
746
739
|
## Installation
|
|
747
740
|
|
|
@@ -749,9 +742,7 @@ Most core processing functions are derived from OpenMS. We use the same nomencla
|
|
|
749
742
|
pip install masster
|
|
750
743
|
```
|
|
751
744
|
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
### Basic Workflow
|
|
745
|
+
### Basic Workflow for analyzing LC-MS study with 2-... samples
|
|
755
746
|
|
|
756
747
|
```python
|
|
757
748
|
import masster
|
|
@@ -769,146 +760,19 @@ study.align(rt_max_diff=2.0)
|
|
|
769
760
|
study.find_consensus(min_samples=3)
|
|
770
761
|
|
|
771
762
|
# Retrieve missing data for quantification
|
|
772
|
-
study.
|
|
763
|
+
study.fill_chrom(abs_)
|
|
773
764
|
|
|
774
765
|
# Integrate according to consensus metadata
|
|
775
766
|
study.integrate_chrom()
|
|
776
767
|
|
|
777
|
-
# link MS2 across the whole study
|
|
768
|
+
# link MS2 across the whole study and export them
|
|
778
769
|
study.find_ms2()
|
|
779
|
-
|
|
780
|
-
# Export MGF file
|
|
781
770
|
study.export_mgf()
|
|
782
771
|
|
|
783
|
-
# Save the study
|
|
772
|
+
# Save the study to .study5
|
|
784
773
|
study.save()
|
|
785
774
|
```
|
|
786
775
|
|
|
787
|
-
### Single Sample Processing
|
|
788
|
-
|
|
789
|
-
```python
|
|
790
|
-
from masster.sample import Sample
|
|
791
|
-
|
|
792
|
-
# Load a single sample (mzML, RAW, WIFF)
|
|
793
|
-
sample = Sample("path/to/your/file.mzML")
|
|
794
|
-
|
|
795
|
-
# Detect features
|
|
796
|
-
sample.find_features(chrom_peak_snr=10, noise=500, chrom_fwhm=1.0)
|
|
797
|
-
|
|
798
|
-
# Detect adducts
|
|
799
|
-
sample.find_adducts()
|
|
800
|
-
|
|
801
|
-
# Find MS2 spectra
|
|
802
|
-
sample.find_ms2()
|
|
803
|
-
|
|
804
|
-
# Save results
|
|
805
|
-
sample.save()
|
|
806
|
-
```
|
|
807
|
-
|
|
808
|
-
## Visualization Examples
|
|
809
|
-
|
|
810
|
-
Masster provides extensive plotting capabilities for data exploration and quality control:
|
|
811
|
-
|
|
812
|
-
### 2D Data Visualization
|
|
813
|
-
|
|
814
|
-
```python
|
|
815
|
-
# Plot 2D overview of MS data with detected features
|
|
816
|
-
sample.plot_2d(
|
|
817
|
-
filename="overview_2d.html",
|
|
818
|
-
show_features=True,
|
|
819
|
-
show_ms2=True,
|
|
820
|
-
title="MS Data Overview"
|
|
821
|
-
)
|
|
822
|
-
|
|
823
|
-
# Plot with feature filtering
|
|
824
|
-
sample.plot_2d(
|
|
825
|
-
filename="features_ms2_only.html",
|
|
826
|
-
show_only_features_with_ms2=True,
|
|
827
|
-
markersize=8
|
|
828
|
-
)
|
|
829
|
-
```
|
|
830
|
-
|
|
831
|
-
### Study-Level Plots
|
|
832
|
-
|
|
833
|
-
```python
|
|
834
|
-
# Plot features from multiple samples
|
|
835
|
-
study.plot_samples_2d(
|
|
836
|
-
samples=None, # Use all samples
|
|
837
|
-
filename="multi_sample_overview.html",
|
|
838
|
-
markersize=3,
|
|
839
|
-
alpha_max=0.8
|
|
840
|
-
)
|
|
841
|
-
|
|
842
|
-
# Plot consensus features
|
|
843
|
-
study.plot_consensus_2d(
|
|
844
|
-
filename="consensus_features.html",
|
|
845
|
-
colorby="number_samples",
|
|
846
|
-
sizeby="inty_mean"
|
|
847
|
-
)
|
|
848
|
-
|
|
849
|
-
# Plot chromatograms for specific features
|
|
850
|
-
study.plot_chrom(
|
|
851
|
-
uids=[1, 2, 3], # Feature UIDs
|
|
852
|
-
filename="chromatograms.html",
|
|
853
|
-
aligned=True
|
|
854
|
-
)
|
|
855
|
-
```
|
|
856
|
-
|
|
857
|
-
### Quality Control Plots
|
|
858
|
-
|
|
859
|
-
```python
|
|
860
|
-
# Plot DDA acquisition statistics
|
|
861
|
-
sample.plot_dda_stats(filename="dda_stats.html")
|
|
862
|
-
|
|
863
|
-
# Plot feature statistics
|
|
864
|
-
sample.plot_feature_stats(filename="feature_stats.html")
|
|
865
|
-
|
|
866
|
-
# Plot total ion chromatogram
|
|
867
|
-
sample.plot_tic(filename="tic.html")
|
|
868
|
-
```
|
|
869
|
-
|
|
870
|
-
### Advanced Plotting Options
|
|
871
|
-
|
|
872
|
-
```python
|
|
873
|
-
# Plot with Oracle annotation data
|
|
874
|
-
sample.plot_2d_oracle(
|
|
875
|
-
oracle_folder="path/to/oracle/results",
|
|
876
|
-
colorby="hg", # Color by chemical class
|
|
877
|
-
filename="annotated_features.html"
|
|
878
|
-
)
|
|
879
|
-
|
|
880
|
-
# Plot MS2 cycle view
|
|
881
|
-
sample.plot_ms2_cycle(
|
|
882
|
-
cycle=100,
|
|
883
|
-
filename="ms2_cycle.html",
|
|
884
|
-
centroid=True
|
|
885
|
-
)
|
|
886
|
-
|
|
887
|
-
# Plot extracted ion chromatogram
|
|
888
|
-
sample.plot_eic(
|
|
889
|
-
feature_uid=123,
|
|
890
|
-
rt_tol=10,
|
|
891
|
-
mz_tol=0.005,
|
|
892
|
-
filename="eic.html"
|
|
893
|
-
)
|
|
894
|
-
```
|
|
895
|
-
|
|
896
|
-
## File Format Support
|
|
897
|
-
|
|
898
|
-
- **Input formats**: .wiff, .mzML, .raw files
|
|
899
|
-
- **Intermediate formats**: .sample5 and .study5 (HDF5) for fast loading
|
|
900
|
-
- **Export formats**: .mgf, .csv
|
|
901
|
-
- **Visualization**: .html (interactive), .png, .svg
|
|
902
|
-
|
|
903
|
-
## Advanced Features
|
|
904
|
-
|
|
905
|
-
### Batch Processing
|
|
906
|
-
Use the command-line interface for processing multiple files:
|
|
907
|
-
|
|
908
|
-
```bash
|
|
909
|
-
python -m masster.demo.example_batch_process input_directory --recursive --dest output_directory
|
|
910
|
-
```
|
|
911
|
-
|
|
912
776
|
## Requirements
|
|
913
777
|
|
|
914
778
|
- Python ≥ 3.11
|
|
@@ -919,13 +783,6 @@ python -m masster.demo.example_batch_process input_directory --recursive --dest
|
|
|
919
783
|
|
|
920
784
|
GNU Affero General Public License v3
|
|
921
785
|
|
|
922
|
-
## Contributing
|
|
923
|
-
|
|
924
|
-
Contributions are welcome! Please see our contributing guidelines and code of conduct.
|
|
925
|
-
|
|
926
786
|
## Citation
|
|
927
787
|
|
|
928
|
-
If you use Masster in your research, please cite
|
|
929
|
-
```
|
|
930
|
-
[Citation details to be added]
|
|
931
|
-
```
|
|
788
|
+
If you use Masster in your research, please cite this repository.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
masster/__init__.py,sha256=xeh-hwR_2umE0CpRXn8t22wbkt4IT-FBEzeJknL8J6c,670
|
|
2
|
-
masster/_version.py,sha256
|
|
2
|
+
masster/_version.py,sha256=-QmvlpTZa_4FtjijQydS9z8bCyNLc0Gv3QiTHg5Ncro,239
|
|
3
3
|
masster/chromatogram.py,sha256=f25rMrNvCQN0A93wp9QPdG3H4FiOlYPbRY3H4yd7Q5Y,18910
|
|
4
4
|
masster/logger.py,sha256=9uzuVEPwQkVlnsqT_eVvh33FZY_FIm3Wn2TaJcGhZP8,10674
|
|
5
5
|
masster/spectrum.py,sha256=XiClDcN1uiG-_2TIr7Bqp7x8gWvHPbC5oh3zUu3fr6Y,46789
|
|
@@ -26,14 +26,14 @@ masster/sample/defaults/get_spectrum_def.py,sha256=hy3t3zbIVvKRQmVQl8xAXrmQ4LSDb
|
|
|
26
26
|
masster/sample/defaults/sample_def.py,sha256=WHjw-jsYinPKCC02J2Fn5SGB2OW12ntEQn-sHmqESqs,13758
|
|
27
27
|
masster/study/__init__.py,sha256=bTbxmTgBAL_1iB73JE8fKdo9wik9m4dcmMppElU0V18,157
|
|
28
28
|
masster/study/export.py,sha256=xmT2WhAuSGGcqHw8Wa44r6g5ud1mzzywOc3TnNqNh8E,12624
|
|
29
|
-
masster/study/h5.py,sha256=
|
|
29
|
+
masster/study/h5.py,sha256=BPpcEV_fZ3dJCEkzEga_V1zUkKQEj_kxAeMSF56sSts,39260
|
|
30
30
|
masster/study/helpers.py,sha256=ePh5hPgSAgfu7-crsm4th0QYGeQbHk9kNj7OyHMclpQ,15860
|
|
31
|
-
masster/study/load.py,sha256=
|
|
31
|
+
masster/study/load.py,sha256=rTmm5E-UsTg0SJqwa4i4II5ca82m8OEn05yWW2G_YPc,38718
|
|
32
32
|
masster/study/parameters.py,sha256=iKCIf7_bivi0Jkz4hreKmCyusXpQX5IIuuhnmS52-Q4,3177
|
|
33
33
|
masster/study/plot.py,sha256=nY6zWKUOhlyDHra4BI0c8dx7PX5fHFW8v2Ma9YpscvU,21437
|
|
34
34
|
masster/study/processing.py,sha256=PjfpsVASaR0uSE4vqKzBppq4jM3HexzbGw_bn5kDwdA,42552
|
|
35
|
-
masster/study/save.py,sha256=
|
|
36
|
-
masster/study/study.py,sha256=
|
|
35
|
+
masster/study/save.py,sha256=_DmnAwhlZQRNeVDLNER63pXVhinV-poKMvJlIz6Bt-Y,4791
|
|
36
|
+
masster/study/study.py,sha256=gXc1j4wljbw-Zx-JPsyYO86EoXPaR0N7D2GepJZOPhA,21530
|
|
37
37
|
masster/study/study5_schema.json,sha256=7LfsgI-dZGpoaPiAy0kh6gDJL4yKuA7-7PHbo9j4A6E,4630
|
|
38
38
|
masster/study/defaults/__init__.py,sha256=wkul1Qq83nPHI5XebWvu3yKjp5tF8OdZDJJho8r2_qA,569
|
|
39
39
|
masster/study/defaults/align_def.py,sha256=8Itwit6gaqVhF9A3w9V-uqgKlcQE6uCXyC3ul_gPWFo,8872
|
|
@@ -43,8 +43,8 @@ masster/study/defaults/find_consensus_def.py,sha256=artvErq4w07SfHB0WHi68ZjxGg0X
|
|
|
43
43
|
masster/study/defaults/find_ms2_def.py,sha256=k-GmnCKgQuVO6M-EAjzGOqgdFrqZviRaNAdiFmwVujY,4907
|
|
44
44
|
masster/study/defaults/integrate_chrom_def.py,sha256=FY9QdJpdWe18sYucrwNKoZYY0eoOo0a_hcdkZHm_W00,7107
|
|
45
45
|
masster/study/defaults/study_def.py,sha256=SzUzd2YTGDGCHNMR-Dw57j5PprEnPhpITonv7wx6HQA,9035
|
|
46
|
-
masster-0.2.
|
|
47
|
-
masster-0.2.
|
|
48
|
-
masster-0.2.
|
|
49
|
-
masster-0.2.
|
|
50
|
-
masster-0.2.
|
|
46
|
+
masster-0.2.3.dist-info/METADATA,sha256=hYc0JozT_r5KPMj4znX9ee0omRbd1p8sK9SU9OaIEm8,44324
|
|
47
|
+
masster-0.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
48
|
+
masster-0.2.3.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
49
|
+
masster-0.2.3.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
50
|
+
masster-0.2.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|