PyPI - masster - Versions diffs - 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl - Mend

masster 0.3.10py3-none-any.whl → 0.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (33) hide show

masster/docs/SCX_API_Documentation.md +0 -0
masster/docs/SCX_DLL_Analysis.md +0 -0
masster/logger.py +92 -78
masster/sample/defaults/find_features_def.py +16 -6
masster/sample/defaults/sample_def.py +1 -1
masster/sample/h5.py +2 -2
masster/sample/helpers.py +137 -136
masster/sample/load.py +13 -9
masster/sample/plot.py +156 -131
masster/sample/processing.py +18 -12
masster/sample/sample.py +4 -4
masster/sample/sample5_schema.json +62 -62
masster/sample/save.py +16 -13
masster/sample/sciex.py +187 -176
masster/study/defaults/align_def.py +224 -6
masster/study/defaults/fill_chrom_def.py +1 -5
masster/study/defaults/integrate_chrom_def.py +1 -5
masster/study/defaults/study_def.py +2 -2
masster/study/export.py +144 -131
masster/study/h5.py +193 -133
masster/study/helpers.py +293 -245
masster/study/helpers_optimized.py +99 -57
masster/study/load.py +51 -25
masster/study/plot.py +453 -17
masster/study/processing.py +159 -76
masster/study/save.py +7 -7
masster/study/study.py +97 -88
masster/study/study5_schema.json +82 -82
{masster-0.3.10.dist-info → masster-0.3.11.dist-info}/METADATA +1 -1
{masster-0.3.10.dist-info → masster-0.3.11.dist-info}/RECORD +33 -31
{masster-0.3.10.dist-info → masster-0.3.11.dist-info}/WHEEL +0 -0
{masster-0.3.10.dist-info → masster-0.3.11.dist-info}/entry_points.txt +0 -0
{masster-0.3.10.dist-info → masster-0.3.11.dist-info}/licenses/LICENSE +0 -0

masster/study/study.py CHANGED Viewed

@@ -95,9 +95,10 @@ from masster.study.plot import plot_alignment
 from masster.study.plot import plot_alignment_bokeh
 from masster.study.plot import plot_chrom
 from masster.study.plot import plot_consensus_2d
+from masster.study.plot import plot_consensus_stats
+from masster.study.plot import plot_pca
 from masster.study.plot import plot_samples_2d
 from masster.study.processing import align
-from masster.study.processing import filter_consensus
 from masster.study.processing import merge
 from masster.study.processing import integrate
 from masster.study.processing import find_ms2
@@ -170,7 +171,7 @@ class Study:
         - `ddafile`: For individual sample processing before study-level analysis.
         - `StudyParameters`: For configuring study-specific parameters.
     """
     # Defaults class attributes
     study_defaults = study_defaults
     sample_defaults = sample_defaults
@@ -219,15 +220,15 @@ class Study:
         # Handle filename parameter for automatic loading
         auto_load_filename = None
         if filename is not None:
-            if not filename.endswith('.study5'):
+            if not filename.endswith(".study5"):
                 raise ValueError("filename must be a .study5 file")
             if not os.path.exists(filename):
                 raise FileNotFoundError(f"Study file not found: {filename}")
             # Set folder to the directory containing the file if not already specified
-            if 'folder' not in kwargs:
-                kwargs['folder'] = os.path.dirname(os.path.abspath(filename))
+            if "folder" not in kwargs:
+                kwargs["folder"] = os.path.dirname(os.path.abspath(filename))
             auto_load_filename = filename
         # Check if a study_defaults instance was passed
@@ -257,7 +258,7 @@ class Study:
         self.log_level = params.log_level.upper() if params.log_level else "INFO"
         self.log_label = params.log_label + " | " if params.log_label else ""
         self.log_sink = params.log_sink
         if self.folder is not None and not os.path.exists(self.folder):
             # create the folder if it does not exist
             os.makedirs(self.folder)
@@ -308,42 +309,42 @@ class Study:
         if auto_load_filename is not None:
             self.load(filename=auto_load_filename)
-    # Attach module functions as class methods
+    # Attach module functions as class methods
     load = load
-    save = save
-    save_consensus = save_consensus
-    save_samples = save_samples
-    align = align
-    fill_single = fill_single
+    save = save
+    save_consensus = save_consensus
+    save_samples = save_samples
+    align = align
+    fill_single = fill_single
     fill_chrom_single = fill_single  # Backward compatibility alias
-    merge = merge
+    merge = merge
     find_consensus = merge  # Backward compatibility alias
-    find_ms2 = find_ms2
+    find_ms2 = find_ms2
     integrate = integrate
     integrate_chrom = integrate  # Backward compatibility alias
-    store_history = store_history
-    get_parameters = get_parameters
-    update_parameters = update_parameters
-    get_parameters_property = get_parameters_property
-    set_parameters_property = set_parameters_property
-    plot_alignment = plot_alignment
-    plot_alignment_bokeh = plot_alignment_bokeh
-    plot_chrom = plot_chrom
-    plot_consensus_2d = plot_consensus_2d
-    plot_samples_2d = plot_samples_2d
-    get_consensus = get_consensus
-    get_chrom = get_chrom
-    get_consensus_matches = get_consensus_matches
-    compress = compress
+    store_history = store_history
+    get_parameters = get_parameters
+    update_parameters = update_parameters
+    get_parameters_property = get_parameters_property
+    set_parameters_property = set_parameters_property
+    plot_alignment = plot_alignment
+    plot_alignment_bokeh = plot_alignment_bokeh
+    plot_chrom = plot_chrom
+    plot_consensus_2d = plot_consensus_2d
+    plot_consensus_stats = plot_consensus_stats
+    plot_pca = plot_pca
+    plot_samples_2d = plot_samples_2d
+    get_consensus = get_consensus
+    get_chrom = get_chrom
+    get_consensus_matches = get_consensus_matches
+    compress = compress
     compress_features = compress_features
     compress_ms2 = compress_ms2
     compress_chrom = compress_chrom
-    restore_features = restore_features
+    restore_features = restore_features
     restore_chrom = restore_chrom
-    fill_reset = fill_reset
-    align_reset = align_reset
+    fill_reset = fill_reset
+    align_reset = align_reset
     set_source = set_source
     features_select = features_select
     features_filter = features_filter
@@ -351,37 +352,37 @@ class Study:
     consensus_select = consensus_select
     consensus_filter = consensus_filter
     consensus_delete = consensus_delete
-    filter_consensus = consensus_filter
+    filter_consensus = consensus_filter
     select_consensus = consensus_select
     filter_features = features_filter
     select_features = features_select
     consensus_find = merge
-    filter_features = features_filter
+    filter_features = features_filter
     # Additional method assignments for all imported functions
-    add_folder = add # backward compatibility alias
-    add = add
+    add_folder = add  # backward compatibility alias
+    add = add
     add_sample = add_sample
-    _load_study5 = _load_study5
-    _save_study5 = _save_study5
-    _save_study5_compressed = _save_study5_compressed
-    _get_consensus_uids = _get_consensus_uids
-    _get_feature_uids = _get_feature_uids
-    _get_sample_uids = _get_sample_uids
-    get_consensus_matrix = get_consensus_matrix
-    get_gaps_matrix = get_gaps_matrix
-    get_gaps_stats = get_gaps_stats
+    _load_study5 = _load_study5
+    _save_study5 = _save_study5
+    _save_study5_compressed = _save_study5_compressed
+    _get_consensus_uids = _get_consensus_uids
+    _get_feature_uids = _get_feature_uids
+    _get_sample_uids = _get_sample_uids
+    get_consensus_matrix = get_consensus_matrix
+    get_gaps_matrix = get_gaps_matrix
+    get_gaps_stats = get_gaps_stats
     get_orphans = get_orphans
-    set_folder = set_folder
-    fill = fill
+    set_folder = set_folder
+    fill = fill
     fill_chrom = fill  # Backward compatibility alias
-    _process_sample_for_parallel_fill = _process_sample_for_parallel_fill
-    _get_missing_consensus_sample_combinations = _get_missing_consensus_sample_combinations
-    _load_consensusXML = _load_consensusXML
-    load_features = load_features
-    sanitize = sanitize
-    _save_consensusXML = _save_consensusXML
-    export_mgf = export_mgf
+    _process_sample_for_parallel_fill = _process_sample_for_parallel_fill
+    _get_missing_consensus_sample_combinations = _get_missing_consensus_sample_combinations
+    _load_consensusXML = _load_consensusXML
+    load_features = load_features
+    sanitize = sanitize
+    _save_consensusXML = _save_consensusXML
+    export_mgf = export_mgf
     export_mztab = export_mztab
     _get_mgf_df = _get_mgf_df  # New function for MGF data extraction
@@ -404,13 +405,13 @@ class Study:
         This ensures that the instance uses the latest implementation without restarting the interpreter.
         """
         # Reset logger configuration flags to allow proper reconfiguration after reload
-        '''        try:
+        """        try:
             import masster.sample.logger as logger_module
             if hasattr(logger_module, "_STUDY_LOGGER_CONFIGURED"):
                 logger_module._STUDY_LOGGER_CONFIGURED = False
         except Exception:
-            pass'''
+            pass"""
         # Get the base module name (masster)
         base_modname = self.__class__.__module__.split(".")[0]
@@ -508,7 +509,7 @@ class Study:
         # Cache DataFrame lengths and existence checks
         consensus_df_len = len(self.consensus_df) if not self.consensus_df.is_empty() else 0
         samples_df_len = len(self.samples_df) if not self.samples_df.is_empty() else 0
         # Calculate consensus statistics only if consensus_df exists and has data
         if consensus_df_len > 0:
             # Execute the aggregation once
@@ -525,40 +526,44 @@ class Study:
             min_samples = 0
             mean_samples = 0
             max_samples = 0
         # Count only features where 'filled' == False
-        if not self.features_df.is_empty() and 'filled' in self.features_df.columns:
-            unfilled_features_count = self.features_df.filter(~self.features_df['filled']).height
+        if not self.features_df.is_empty() and "filled" in self.features_df.columns:
+            unfilled_features_count = self.features_df.filter(~self.features_df["filled"]).height
         else:
             unfilled_features_count = 0
         # Calculate features in consensus vs not in consensus (only for unfilled features)
         if not self.features_df.is_empty() and not self.consensus_mapping_df.is_empty():
             # Get unfilled features only
-            unfilled_features = self.features_df.filter(~self.features_df['filled']) if 'filled' in self.features_df.columns else self.features_df
+            unfilled_features = (
+                self.features_df.filter(~self.features_df["filled"])
+                if "filled" in self.features_df.columns
+                else self.features_df
+            )
             # Ensure the column and list have matching data types
-            consensus_feature_uids = self.consensus_mapping_df['feature_uid'].to_list()
+            consensus_feature_uids = self.consensus_mapping_df["feature_uid"].to_list()
             # Check if we need to cast either side to match types
-            unfilled_dtype = unfilled_features['feature_uid'].dtype
-            consensus_dtype = self.consensus_mapping_df['feature_uid'].dtype
+            unfilled_dtype = unfilled_features["feature_uid"].dtype
+            consensus_dtype = self.consensus_mapping_df["feature_uid"].dtype
             if unfilled_dtype != consensus_dtype:
                 # Cast both to Int64 if possible, otherwise keep as string
                 try:
-                    unfilled_features = unfilled_features.with_columns(pl.col('feature_uid').cast(pl.Int64))
+                    unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Int64))
                     consensus_feature_uids = [int(uid) for uid in consensus_feature_uids]
                 except Exception:
                     # If casting fails, ensure both are strings
-                    unfilled_features = unfilled_features.with_columns(pl.col('feature_uid').cast(pl.Utf8))
+                    unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Utf8))
                     consensus_feature_uids = [str(uid) for uid in consensus_feature_uids]
             # Count unfilled features that are in consensus
             in_consensus_count = unfilled_features.filter(
-                pl.col('feature_uid').is_in(consensus_feature_uids)
+                pl.col("feature_uid").is_in(consensus_feature_uids),
             ).height
             # Calculate ratios that sum to 100%
             total_unfilled = unfilled_features.height
             ratio_in_consensus_to_total = (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
@@ -572,16 +577,20 @@ class Study:
             # Ensure matching data types for join keys
             features_dtype = self.features_df["feature_uid"].dtype
             consensus_dtype = self.consensus_mapping_df["feature_uid"].dtype
             if features_dtype != consensus_dtype:
                 # Try to cast both to Int64, fallback to string if needed
                 try:
                     self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Int64))
-                    self.consensus_mapping_df = self.consensus_mapping_df.with_columns(pl.col("feature_uid").cast(pl.Int64))
+                    self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
+                        pl.col("feature_uid").cast(pl.Int64)
+                    )
                 except Exception:
                     # If casting to Int64 fails, cast both to string
                     self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Utf8))
-                    self.consensus_mapping_df = self.consensus_mapping_df.with_columns(pl.col("feature_uid").cast(pl.Utf8))
+                    self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
+                        pl.col("feature_uid").cast(pl.Utf8)
+                    )
             # Use more efficient counting - count non-null chroms only for features in consensus mapping
             if not self.consensus_mapping_df.is_empty():
@@ -599,21 +608,21 @@ class Study:
             else:
                 non_null_chroms = 0
             total_possible = samples_df_len * consensus_df_len
-            chrom_completeness = (
-                non_null_chroms / total_possible if total_possible > 0 else 0
-            )
+            chrom_completeness = non_null_chroms / total_possible if total_possible > 0 else 0
         else:
             chrom_completeness = 0
         # Calculate consensus features with MS2 (count unique consensus_uids with MS2)
         if not self.consensus_ms2.is_empty():
             consensus_with_ms2_count = self.consensus_ms2["consensus_uid"].n_unique()
         else:
             consensus_with_ms2_count = 0
         # Calculate percentage of consensus features with MS2
-        consensus_with_ms2_percentage = (consensus_with_ms2_count / consensus_df_len * 100) if consensus_df_len > 0 else 0
+        consensus_with_ms2_percentage = (
+            (consensus_with_ms2_count / consensus_df_len * 100) if consensus_df_len > 0 else 0
+        )
         # Total MS2 spectra count
         total_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
@@ -632,15 +641,15 @@ class Study:
             f"Samples:                {samples_df_len}\n"
             f"Features:               {unfilled_features_count}\n"
             f"- in consensus:         {ratio_in_consensus_to_total:.0f}%\n"
-            f"- not in consensus:     {ratio_not_in_consensus_to_total:.0f}%\n"
+            f"- not in consensus:     {ratio_not_in_consensus_to_total:.0f}%\n"
             f"Consensus:              {consensus_df_len}\n"
             f"- Min samples count:    {min_samples:.0f}\n"
             f"- Mean samples count:   {mean_samples:.0f}\n"
-            f"- Max samples count:    {max_samples:.0f}\n"
+            f"- Max samples count:    {max_samples:.0f}\n"
             f"- with MS2:             {consensus_with_ms2_percentage:.0f}%\n"
             f"- total MS2:            {total_ms2_count}\n"
-            f"Chrom completeness:     {chrom_completeness*100:.0f}%\n"
-            f"Memory usage:           {memory_usage / (1024 ** 2):.2f} MB\n"
+            f"Chrom completeness:     {chrom_completeness * 100:.0f}%\n"
+            f"Memory usage:           {memory_usage / (1024**2):.2f} MB\n"
         )
         print(summary)

masster/study/study5_schema.json CHANGED Viewed

@@ -1,43 +1,43 @@
 {
   "consensus_df": {
     "columns": {
-      "consensus_uid": {
-        "dtype": "pl.Int64"
-      },
-      "consensus_id": {
-        "dtype": "pl.Utf8"
+      "adducts": {
+        "dtype": "pl.Object"
       },
-      "quality": {
+      "bl": {
         "dtype": "pl.Float64"
       },
-      "number_samples": {
-        "dtype": "pl.Int64"
-      },
-      "rt": {
+      "charge_mean": {
         "dtype": "pl.Float64"
       },
-      "mz": {
+      "chrom_coherence_mean": {
         "dtype": "pl.Float64"
       },
-      "rt_min": {
+      "chrom_height_scaled_mean": {
         "dtype": "pl.Float64"
       },
-      "rt_max": {
+      "chrom_prominence_mean": {
         "dtype": "pl.Float64"
       },
-      "rt_mean": {
+      "chrom_prominence_scaled_mean": {
         "dtype": "pl.Float64"
       },
-      "rt_start_mean": {
+      "consensus_id": {
+        "dtype": "pl.Utf8"
+      },
+      "consensus_uid": {
+        "dtype": "pl.Int64"
+      },
+      "inty_mean": {
         "dtype": "pl.Float64"
       },
-      "rt_end_mean": {
+      "iso_mean": {
         "dtype": "pl.Float64"
       },
-      "rt_delta_mean": {
+      "mz": {
         "dtype": "pl.Float64"
       },
-      "mz_min": {
+      "mz_end_mean": {
         "dtype": "pl.Float64"
       },
       "mz_max": {
@@ -46,41 +46,41 @@
       "mz_mean": {
         "dtype": "pl.Float64"
       },
-      "mz_start_mean": {
+      "mz_min": {
         "dtype": "pl.Float64"
       },
-      "mz_end_mean": {
+      "mz_start_mean": {
         "dtype": "pl.Float64"
       },
-      "inty_mean": {
-        "dtype": "pl.Float64"
+      "number_ms2": {
+        "dtype": "pl.Int64"
       },
-      "bl": {
-        "dtype": "pl.Float64"
+      "number_samples": {
+        "dtype": "pl.Int64"
       },
-      "chrom_coherence_mean": {
+      "quality": {
         "dtype": "pl.Float64"
       },
-      "chrom_prominence_mean": {
+      "rt": {
         "dtype": "pl.Float64"
       },
-      "chrom_prominence_scaled_mean": {
+      "rt_delta_mean": {
         "dtype": "pl.Float64"
       },
-      "chrom_height_scaled_mean": {
+      "rt_end_mean": {
         "dtype": "pl.Float64"
       },
-      "iso_mean": {
+      "rt_max": {
         "dtype": "pl.Float64"
       },
-      "charge_mean": {
+      "rt_mean": {
         "dtype": "pl.Float64"
       },
-      "number_ms2": {
-        "dtype": "pl.Int64"
+      "rt_min": {
+        "dtype": "pl.Float64"
       },
-      "adducts": {
-        "dtype": "pl.Object"
+      "rt_start_mean": {
+        "dtype": "pl.Float64"
       }
     }
   },
@@ -133,106 +133,112 @@
   },
   "features_df": {
     "columns": {
-      "feature_uid": {
+      "adduct": {
+        "dtype": "pl.Utf8"
+      },
+      "adduct_group": {
         "dtype": "pl.Int64"
       },
-      "sample_uid": {
+      "adduct_mass": {
+        "dtype": "pl.Float64"
+      },
+      "charge": {
         "dtype": "pl.Int32"
       },
-      "feature_id": {
-        "dtype": "pl.Utf8"
+      "chrom": {
+        "dtype": "pl.Object"
       },
-      "mz": {
+      "chrom_area": {
         "dtype": "pl.Float64"
       },
-      "rt": {
+      "chrom_coherence": {
         "dtype": "pl.Float64"
       },
-      "rt_original": {
+      "chrom_height_scaled": {
         "dtype": "pl.Float64"
       },
-      "rt_start": {
+      "chrom_prominence": {
         "dtype": "pl.Float64"
       },
-      "rt_end": {
+      "chrom_prominence_scaled": {
         "dtype": "pl.Float64"
       },
-      "rt_delta": {
-        "dtype": "pl.Float64"
+      "feature_id": {
+        "dtype": "pl.Utf8"
       },
-      "mz_start": {
-        "dtype": "pl.Float64"
+      "feature_uid": {
+        "dtype": "pl.Int64"
       },
-      "mz_end": {
-        "dtype": "pl.Float64"
+      "filled": {
+        "dtype": "pl.Boolean"
       },
       "inty": {
         "dtype": "pl.Float64"
       },
-      "quality": {
-        "dtype": "pl.Float64"
-      },
-      "charge": {
-        "dtype": "pl.Int32"
-      },
       "iso": {
         "dtype": "pl.Int64"
       },
       "iso_of": {
         "dtype": "pl.Int64"
       },
-      "adduct_group": {
-        "dtype": "pl.Int64"
+      "ms2_scans": {
+        "dtype": "pl.Object"
       },
-      "adduct": {
-        "dtype": "pl.Utf8"
+      "ms2_specs": {
+        "dtype": "pl.Object"
       },
-      "adduct_mass": {
+      "mz": {
         "dtype": "pl.Float64"
       },
-      "filled": {
-        "dtype": "pl.Boolean"
+      "mz_end": {
+        "dtype": "pl.Float64"
       },
-      "chrom_area": {
+      "mz_start": {
         "dtype": "pl.Float64"
       },
-      "chrom": {
-        "dtype": "pl.Object"
+      "quality": {
+        "dtype": "pl.Float64"
       },
-      "chrom_coherence": {
+      "rt": {
         "dtype": "pl.Float64"
       },
-      "chrom_prominence": {
+      "rt_delta": {
         "dtype": "pl.Float64"
       },
-      "chrom_prominence_scaled": {
+      "rt_end": {
         "dtype": "pl.Float64"
       },
-      "chrom_height_scaled": {
+      "rt_original": {
         "dtype": "pl.Float64"
       },
-      "ms2_scans": {
-        "dtype": "pl.Object"
+      "rt_start": {
+        "dtype": "pl.Float64"
       },
-      "ms2_specs": {
-        "dtype": "pl.Object"
+      "sample_uid": {
+        "dtype": "pl.Int32"
       }
     }
   },
   "samples_df": {
     "columns": {
+      "file_source": {
+        "dtype": "pl.Utf8"
+      },
       "map_id": {
         "dtype": "pl.Utf8"
       },
+      "ms1": {
+        "dtype": "pl.Int64"
+      },
+      "ms2": {
+        "dtype": "pl.Int64"
+      },
       "sample_name": {
         "dtype": "pl.Utf8"
       },
       "sample_path": {
         "dtype": "pl.Utf8"
       },
-      "file_source": {
-        "dtype": "pl.Utf8"
-      },
       "sample_type": {
         "dtype": "pl.Utf8"
       },
@@ -241,12 +247,6 @@
       },
       "size": {
         "dtype": "pl.Int64"
-      },
-      "ms1": {
-        "dtype": "pl.Int64"
-      },
-      "ms2": {
-        "dtype": "pl.Int64"
       }
     }
   }

{masster-0.3.10.dist-info → masster-0.3.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: masster
-Version: 0.3.10
+Version: 0.3.11
 Summary: Mass spectrometry data analysis package
 Project-URL: homepage, https://github.com/zamboni-lab/masster
 Project-URL: repository, https://github.com/zamboni-lab/masster

masster 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl

Potentially problematic release.

masster 0.3.10py3-none-any.whl → 0.3.11py3-none-any.whl