PyPI - masster - Versions diffs - 0.4.4__py3-none-any.whl → 0.4.6__py3-none-any.whl - Mend

masster 0.4.4py3-none-any.whl → 0.4.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (34) hide show

masster/chromatogram.py +2 -2
masster/data/libs/urine.csv +3 -3
masster/logger.py +8 -8
masster/sample/adducts.py +337 -263
masster/sample/defaults/find_adducts_def.py +21 -8
masster/sample/h5.py +557 -278
masster/sample/helpers.py +131 -75
masster/sample/lib.py +2 -2
masster/sample/load.py +25 -11
masster/sample/plot.py +5 -5
masster/sample/processing.py +115 -85
masster/sample/sample.py +28 -15
masster/sample/sample5_schema.json +44 -44
masster/sample/save.py +34 -11
masster/spectrum.py +2 -2
masster/study/defaults/align_def.py +5 -1
masster/study/defaults/identify_def.py +3 -1
masster/study/defaults/study_def.py +58 -25
masster/study/export.py +354 -204
masster/study/h5.py +557 -155
masster/study/helpers.py +487 -194
masster/study/id.py +536 -347
masster/study/load.py +228 -138
masster/study/plot.py +68 -68
masster/study/processing.py +455 -253
masster/study/save.py +14 -4
masster/study/study.py +122 -40
masster/study/study5_schema.json +149 -149
{masster-0.4.4.dist-info → masster-0.4.6.dist-info}/METADATA +5 -3
{masster-0.4.4.dist-info → masster-0.4.6.dist-info}/RECORD +34 -34
{masster-0.4.4.dist-info → masster-0.4.6.dist-info}/WHEEL +0 -0
{masster-0.4.4.dist-info → masster-0.4.6.dist-info}/entry_points.txt +0 -0
{masster-0.4.4.dist-info → masster-0.4.6.dist-info}/licenses/LICENSE +0 -0
{masster-0.4.4.dist-info → masster-0.4.6.dist-info}/top_level.txt +0 -0

masster/study/save.py CHANGED Viewed

@@ -48,8 +48,14 @@ def save(self, filename=None, add_timestamp=True, compress=False):
     # Log file size information for performance monitoring
     if hasattr(self, "features_df") and not self.features_df.is_empty():
         feature_count = len(self.features_df)
-        sample_count = len(self.samples_df) if hasattr(self, "samples_df") and not self.samples_df.is_empty() else 0
-        self.logger.info(f"Saving study with {sample_count} samples and {feature_count} features to {filename}")
+        sample_count = (
+            len(self.samples_df)
+            if hasattr(self, "samples_df") and not self.samples_df.is_empty()
+            else 0
+        )
+        self.logger.info(
+            f"Saving study with {sample_count} samples and {feature_count} features to {filename}",
+        )
     # Use compressed mode for large datasets
     if compress:
@@ -121,7 +127,9 @@ def save_samples(self, samples=None):
         if sample_path.endswith(".sample5"):
             # If sample_path is a .sample5 file, save featureXML in the same directory
             featurexml_filename = sample_path.replace(".sample5", ".featureXML")
-            self.logger.debug(f"Saving featureXML alongside .sample5 file: {featurexml_filename}")
+            self.logger.debug(
+                f"Saving featureXML alongside .sample5 file: {featurexml_filename}",
+            )
         else:
             # Fallback to study folder or current directory (original behavior)
             if self.folder is not None:
@@ -134,7 +142,9 @@ def save_samples(self, samples=None):
                     os.getcwd(),
                     sample_name + ".featureXML",
                 )
-            self.logger.debug(f"Saving featureXML to default location: {featurexml_filename}")
+            self.logger.debug(
+                f"Saving featureXML to default location: {featurexml_filename}",
+            )
         fh = oms.FeatureXMLFile()
         if sample_index is not None and sample_index < len(self.features_maps):

masster/study/study.py CHANGED Viewed

@@ -125,11 +125,22 @@ from masster.study.parameters import update_parameters
 from masster.study.parameters import get_parameters_property
 from masster.study.parameters import set_parameters_property
 from masster.study.save import save, save_consensus, _save_consensusXML, save_samples
-from masster.study.export import export_mgf, export_mztab, export_xlsx, export_parquet, _get_mgf_df
+from masster.study.export import (
+    export_mgf,
+    export_mztab,
+    export_xlsx,
+    export_parquet,
+    _get_mgf_df,
+)
 from masster.study.id import lib_load, identify, get_id, id_reset, lib_reset
-from masster.study.id import _get_adducts, _calculate_formula_mass_shift, _format_adduct_name, _parse_element_counts
-from masster.logger import MassterLogger
+from masster.study.id import (
+    _get_adducts,
+    _calculate_formula_mass_shift,
+    _format_adduct_name,
+    _parse_element_counts,
+)
+from masster.logger import MasterLogger
 from masster.study.defaults.study_def import study_defaults
 from masster.study.defaults.align_def import align_defaults
 from masster.study.defaults.export_def import export_mgf_defaults
@@ -177,8 +188,8 @@ class Study:
         - `export_consensus()`: Export consensus features for downstream analysis.
     Example Usage:
-        >>> from masster import study
-        >>> study_obj = study(folder="./data")
+        >>> from masster import Study
+        >>> study_obj = Study(folder="./data")
         >>> study_obj.load_folder("./mzml_files")
         >>> study_obj.process_all()
         >>> study_obj.align()
@@ -272,7 +283,11 @@ class Study:
         # Set instance attributes (ensure proper string values for logger)
         self.folder = params.folder
         self.label = params.label
-        self.polarity = params.polarity if params.polarity in ["positive", "negative", "pos", "neg"] else "positive"
+        self.polarity = (
+            params.polarity
+            if params.polarity in ["positive", "negative", "pos", "neg"]
+            else "positive"
+        )
         self.log_level = params.log_level.upper() if params.log_level else "INFO"
         self.log_label = params.log_label + " | " if params.log_label else ""
         self.log_sink = params.log_sink
@@ -327,7 +342,7 @@ class Study:
         self.id_df = pl.DataFrame()
         # Initialize independent logger
-        self.logger = MassterLogger(
+        self.logger = MasterLogger(
             instance_type="study",
             level=self.log_level.upper(),
             label=self.log_label,
@@ -427,7 +442,9 @@ class Study:
     fill = fill
     fill_chrom = fill  # Backward compatibility alias
     _process_sample_for_parallel_fill = _process_sample_for_parallel_fill
-    _get_missing_consensus_sample_combinations = _get_missing_consensus_sample_combinations
+    _get_missing_consensus_sample_combinations = (
+        _get_missing_consensus_sample_combinations
+    )
     _load_consensusXML = _load_consensusXML
     load_features = load_features
     sanitize = sanitize
@@ -485,7 +502,10 @@ class Study:
         # Get all currently loaded modules that are part of the study package
         for module_name in sys.modules:
-            if module_name.startswith(study_module_prefix) and module_name != current_module:
+            if (
+                module_name.startswith(study_module_prefix)
+                and module_name != current_module
+            ):
                 study_modules.append(module_name)
         # Add core masster modules
@@ -500,7 +520,10 @@ class Study:
         sample_modules = []
         sample_module_prefix = f"{base_modname}.sample."
         for module_name in sys.modules:
-            if module_name.startswith(sample_module_prefix) and module_name != current_module:
+            if (
+                module_name.startswith(sample_module_prefix)
+                and module_name != current_module
+            ):
                 sample_modules.append(module_name)
         all_modules_to_reload = core_modules + sample_modules + study_modules
@@ -538,7 +561,12 @@ class Study:
         """
         return ""
-    def logger_update(self, level: str | None = None, label: str | None = None, sink: str | None = None):
+    def logger_update(
+        self,
+        level: str | None = None,
+        label: str | None = None,
+        sink: str | None = None,
+    ):
         """Update the logging configuration for this Study instance.
         Args:
@@ -570,17 +598,21 @@ class Study:
         that are out of normal range.
         """
         # Cache DataFrame lengths and existence checks
-        consensus_df_len = len(self.consensus_df) if not self.consensus_df.is_empty() else 0
+        consensus_df_len = (
+            len(self.consensus_df) if not self.consensus_df.is_empty() else 0
+        )
         samples_df_len = len(self.samples_df) if not self.samples_df.is_empty() else 0
         # Calculate consensus statistics only if consensus_df exists and has data
         if consensus_df_len > 0:
             # Execute the aggregation once
-            stats_result = self.consensus_df.select([
-                pl.col("number_samples").min().alias("min_samples"),
-                pl.col("number_samples").mean().alias("mean_samples"),
-                pl.col("number_samples").max().alias("max_samples"),
-            ]).row(0)
+            stats_result = self.consensus_df.select(
+                [
+                    pl.col("number_samples").min().alias("min_samples"),
+                    pl.col("number_samples").mean().alias("mean_samples"),
+                    pl.col("number_samples").max().alias("max_samples"),
+                ],
+            ).row(0)
             min_samples = stats_result[0] if stats_result[0] is not None else 0
             mean_samples = stats_result[1] if stats_result[1] is not None else 0
@@ -592,7 +624,9 @@ class Study:
         # Count only features where 'filled' == False
         if not self.features_df.is_empty() and "filled" in self.features_df.columns:
-            unfilled_features_count = self.features_df.filter(~self.features_df["filled"]).height
+            unfilled_features_count = self.features_df.filter(
+                ~self.features_df["filled"],
+            ).height
         else:
             unfilled_features_count = 0
@@ -615,12 +649,20 @@ class Study:
             if unfilled_dtype != consensus_dtype:
                 # Cast both to Int64 if possible, otherwise keep as string
                 try:
-                    unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Int64))
-                    consensus_feature_uids = [int(uid) for uid in consensus_feature_uids]
+                    unfilled_features = unfilled_features.with_columns(
+                        pl.col("feature_uid").cast(pl.Int64),
+                    )
+                    consensus_feature_uids = [
+                        int(uid) for uid in consensus_feature_uids
+                    ]
                 except Exception:
                     # If casting fails, ensure both are strings
-                    unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Utf8))
-                    consensus_feature_uids = [str(uid) for uid in consensus_feature_uids]
+                    unfilled_features = unfilled_features.with_columns(
+                        pl.col("feature_uid").cast(pl.Utf8),
+                    )
+                    consensus_feature_uids = [
+                        str(uid) for uid in consensus_feature_uids
+                    ]
             # Count unfilled features that are in consensus
             in_consensus_count = unfilled_features.filter(
@@ -629,14 +671,22 @@ class Study:
             # Calculate ratios that sum to 100%
             total_unfilled = unfilled_features.height
-            ratio_in_consensus_to_total = (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
-            ratio_not_in_consensus_to_total = 100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
+            ratio_in_consensus_to_total = (
+                (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
+            )
+            ratio_not_in_consensus_to_total = (
+                100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
+            )
         else:
             ratio_in_consensus_to_total = 0
             ratio_not_in_consensus_to_total = 0
         # Optimize chrom completeness calculation
-        if consensus_df_len > 0 and samples_df_len > 0 and not self.features_df.is_empty():
+        if (
+            consensus_df_len > 0
+            and samples_df_len > 0
+            and not self.features_df.is_empty()
+        ):
             # Ensure matching data types for join keys
             features_dtype = self.features_df["feature_uid"].dtype
             consensus_dtype = self.consensus_mapping_df["feature_uid"].dtype
@@ -644,13 +694,17 @@ class Study:
             if features_dtype != consensus_dtype:
                 # Try to cast both to Int64, fallback to string if needed
                 try:
-                    self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Int64))
+                    self.features_df = self.features_df.with_columns(
+                        pl.col("feature_uid").cast(pl.Int64),
+                    )
                     self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
                         pl.col("feature_uid").cast(pl.Int64),
                     )
                 except Exception:
                     # If casting to Int64 fails, cast both to string
-                    self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Utf8))
+                    self.features_df = self.features_df.with_columns(
+                        pl.col("feature_uid").cast(pl.Utf8),
+                    )
                     self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
                         pl.col("feature_uid").cast(pl.Utf8),
                     )
@@ -671,7 +725,9 @@ class Study:
             else:
                 non_null_chroms = 0
             total_possible = samples_df_len * consensus_df_len
-            chrom_completeness = non_null_chroms / total_possible if total_possible > 0 else 0
+            chrom_completeness = (
+                non_null_chroms / total_possible if total_possible > 0 else 0
+            )
         else:
             chrom_completeness = 0
@@ -683,23 +739,37 @@ class Study:
         if not self.consensus_df.is_empty():
             # Compute RT spread using only consensus rows with number_samples >= half the number of samples
-            threshold = self.consensus_df.select(pl.col("number_samples").max()).item() / 2 if not self.samples_df.is_empty() else 0
+            threshold = (
+                self.consensus_df.select(pl.col("number_samples").max()).item() / 2
+                if not self.samples_df.is_empty()
+                else 0
+            )
             filtered = self.consensus_df.filter(pl.col("number_samples") >= threshold)
             if filtered.is_empty():
                 rt_spread = -1.0
             else:
-                rt_spread_row = filtered.select((pl.col("rt_max") - pl.col("rt_min")).mean()).row(0)
-                rt_spread = float(rt_spread_row[0]) if rt_spread_row and rt_spread_row[0] is not None else 0.0
+                rt_spread_row = filtered.select(
+                    (pl.col("rt_max") - pl.col("rt_min")).mean(),
+                ).row(0)
+                rt_spread = (
+                    float(rt_spread_row[0])
+                    if rt_spread_row and rt_spread_row[0] is not None
+                    else 0.0
+                )
         else:
             rt_spread = -1.0
         # Calculate percentage of consensus features with MS2
         consensus_with_ms2_percentage = (
-            (consensus_with_ms2_count / consensus_df_len * 100) if consensus_df_len > 0 else 0
+            (consensus_with_ms2_count / consensus_df_len * 100)
+            if consensus_df_len > 0
+            else 0
         )
         # Total MS2 spectra count
-        total_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
+        total_ms2_count = (
+            len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
+        )
         # Estimate memory usage
         memory_usage = (
@@ -712,15 +782,27 @@ class Study:
         # Add warning symbols for out-of-range values
         consensus_warning = f" {_WARNING_SYMBOL}" if consensus_df_len < 50 else ""
         rt_spread_text = "N/A" if rt_spread < 0 else f"{rt_spread:.3f}s"
-        rt_spread_warning = f" {_WARNING_SYMBOL}" if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1) else ""
+        rt_spread_warning = (
+            f" {_WARNING_SYMBOL}"
+            if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1)
+            else ""
+        )
         chrom_completeness_pct = chrom_completeness * 100
-        chrom_warning = f" {_WARNING_SYMBOL}" if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0 else ""
+        chrom_warning = (
+            f" {_WARNING_SYMBOL}"
+            if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0
+            else ""
+        )
         max_samples_warning = ""
-        if isinstance(max_samples, (int, float)) and samples_df_len > 0 and max_samples > 0:
+        if (
+            isinstance(max_samples, (int, float))
+            and samples_df_len > 0
+            and max_samples > 0
+        ):
             if max_samples < samples_df_len / 3.0:
                 max_samples_warning = f" {_WARNING_SYMBOL}"
             elif max_samples < samples_df_len * 0.8:

masster 0.4.4__py3-none-any.whl → 0.4.6__py3-none-any.whl

Potentially problematic release.

masster 0.4.4py3-none-any.whl → 0.4.6py3-none-any.whl