PyPI - masster - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

masster 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

masster/__init__.py +8 -8
masster/_version.py +1 -1
masster/chromatogram.py +3 -9
masster/data/libs/README.md +1 -1
masster/data/libs/ccm.csv +120 -120
masster/data/libs/ccm.py +116 -62
masster/data/libs/central_carbon_README.md +1 -1
masster/data/libs/urine.py +161 -65
masster/data/libs/urine_metabolites.csv +4693 -4693
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +2 -2
masster/logger.py +43 -78
masster/sample/__init__.py +1 -1
masster/sample/adducts.py +264 -338
masster/sample/defaults/find_adducts_def.py +8 -21
masster/sample/defaults/find_features_def.py +1 -6
masster/sample/defaults/get_spectrum_def.py +1 -5
masster/sample/defaults/sample_def.py +1 -5
masster/sample/h5.py +282 -561
masster/sample/helpers.py +75 -131
masster/sample/lib.py +17 -42
masster/sample/load.py +17 -31
masster/sample/parameters.py +2 -6
masster/sample/plot.py +27 -88
masster/sample/processing.py +87 -117
masster/sample/quant.py +51 -57
masster/sample/sample.py +90 -103
masster/sample/sample5_schema.json +44 -44
masster/sample/save.py +12 -35
masster/sample/sciex.py +19 -66
masster/spectrum.py +20 -58
masster/study/__init__.py +1 -1
masster/study/defaults/align_def.py +1 -5
masster/study/defaults/fill_chrom_def.py +1 -5
masster/study/defaults/fill_def.py +1 -5
masster/study/defaults/integrate_chrom_def.py +1 -5
masster/study/defaults/integrate_def.py +1 -5
masster/study/defaults/study_def.py +25 -58
masster/study/export.py +207 -233
masster/study/h5.py +136 -470
masster/study/helpers.py +202 -495
masster/study/helpers_optimized.py +13 -40
masster/study/id.py +110 -213
masster/study/load.py +143 -230
masster/study/plot.py +257 -518
masster/study/processing.py +257 -469
masster/study/save.py +5 -15
masster/study/study.py +276 -379
masster/study/study5_schema.json +96 -96
{masster-0.4.0.dist-info → masster-0.4.1.dist-info}/METADATA +1 -1
masster-0.4.1.dist-info/RECORD +67 -0
masster-0.4.0.dist-info/RECORD +0 -67
{masster-0.4.0.dist-info → masster-0.4.1.dist-info}/WHEEL +0 -0
{masster-0.4.0.dist-info → masster-0.4.1.dist-info}/entry_points.txt +0 -0
{masster-0.4.0.dist-info → masster-0.4.1.dist-info}/licenses/LICENSE +0 -0

masster/study/study.py CHANGED Viewed

@@ -52,104 +52,104 @@ import sys
 import polars as pl
 # Study-specific imports
-from master.study.h5 import _load_study5
-from master.study.h5 import _save_study5
-from master.study.h5 import _save_study5_compressed
-from master.study.helpers import _get_consensus_uids
-from master.study.helpers import _get_feature_uids
-from master.study.helpers import _get_sample_uids
-from master.study.helpers import _ensure_features_df_schema_order
-from master.study.helpers import compress
-from master.study.helpers import compress_features
-from master.study.helpers import compress_ms2
-from master.study.helpers import compress_chrom
-from master.study.helpers import restore_features
-from master.study.helpers import restore_chrom
-from master.study.helpers import restore_ms2
-from master.study.helpers import decompress
-from master.study.helpers import fill_reset
-from master.study.helpers import get_chrom
-from master.study.helpers import get_sample
-from master.study.helpers import get_consensus
-from master.study.helpers import get_consensus_matches
-from master.study.helpers import get_consensus_matrix
-from master.study.helpers import get_orphans
-from master.study.helpers import get_gaps_matrix
-from master.study.helpers import get_gaps_stats
-from master.study.helpers import align_reset
-from master.study.helpers import set_folder
-from master.study.helpers import set_source
-from master.study.helpers import sample_color
-from master.study.helpers import sample_color_reset
-from master.study.helpers import sample_name_replace
-from master.study.helpers import sample_name_reset
-from master.study.helpers import samples_select
-from master.study.helpers import samples_delete
-from master.study.helpers import features_select
-from master.study.helpers import features_filter
-from master.study.helpers import features_delete
-from master.study.helpers import consensus_select
-from master.study.helpers import consensus_filter
-from master.study.helpers import consensus_delete
-from master.study.load import add
-from master.study.load import add_sample
-from master.study.load import _add_samples_batch
-from master.study.load import _add_sample_optimized
-from master.study.load import _add_sample_standard
-from master.study.load import _sample_color_reset_optimized
-from master.study.load import fill_single
-from master.study.load import fill
-from master.study.load import _process_sample_for_parallel_fill
-from master.study.load import _get_missing_consensus_sample_combinations
-from master.study.load import load
-from master.study.load import _load_consensusXML
-from master.study.load import load_features
-from master.study.load import sanitize
-from master.study.plot import plot_alignment
-from master.study.plot import plot_consensus_2d
-from master.study.plot import plot_samples_2d
-from master.study.plot import plot_consensus_stats
-from master.study.plot import plot_chrom
-from master.study.plot import plot_pca
-from master.study.plot import plot_bpc
-from master.study.plot import plot_tic
-from master.study.plot import plot_eic
-from master.study.plot import plot_rt_correction
-from master.study.processing import align
-from master.study.processing import merge
-from master.study.processing import integrate
-from master.study.processing import find_ms2
-from master.study.parameters import store_history
-from master.study.parameters import get_parameters
-from master.study.parameters import update_parameters
-from master.study.parameters import get_parameters_property
-from master.study.parameters import set_parameters_property
-from master.study.save import save
-from master.study.save import save_consensus
-from master.study.save import _save_consensusXML
-from master.study.save import save_samples
-from master.study.export import export_mgf
-from master.study.export import export_mztab
-from master.study.export import _get_mgf_df
-from master.study.id import lib_load, identify, get_id
-from master.logger import MasterLogger
-from master.study.defaults.study_def import study_defaults
-from master.study.defaults.align_def import align_defaults
-from master.study.defaults.export_def import export_mgf_defaults
-from master.study.defaults.fill_chrom_def import fill_chrom_defaults
-from master.study.defaults.fill_def import fill_defaults
-from master.study.defaults.find_consensus_def import find_consensus_defaults
-from master.study.defaults.find_ms2_def import find_ms2_defaults
-from master.study.defaults.integrate_chrom_def import integrate_chrom_defaults
-from master.study.defaults.integrate_def import integrate_defaults
-from master.study.defaults.merge_def import merge_defaults
+from masster.study.h5 import _load_study5
+from masster.study.h5 import _save_study5
+from masster.study.h5 import _save_study5_compressed
+from masster.study.helpers import _get_consensus_uids
+from masster.study.helpers import _get_feature_uids
+from masster.study.helpers import _get_sample_uids
+from masster.study.helpers import _ensure_features_df_schema_order
+from masster.study.helpers import compress
+from masster.study.helpers import compress_features
+from masster.study.helpers import compress_ms2
+from masster.study.helpers import compress_chrom
+from masster.study.helpers import restore_features
+from masster.study.helpers import restore_chrom
+from masster.study.helpers import restore_ms2
+from masster.study.helpers import decompress
+from masster.study.helpers import fill_reset
+from masster.study.helpers import get_chrom
+from masster.study.helpers import get_sample
+from masster.study.helpers import get_consensus
+from masster.study.helpers import get_consensus_matches
+from masster.study.helpers import get_consensus_matrix
+from masster.study.helpers import get_orphans
+from masster.study.helpers import get_gaps_matrix
+from masster.study.helpers import get_gaps_stats
+from masster.study.helpers import align_reset
+from masster.study.helpers import set_folder
+from masster.study.helpers import set_source
+from masster.study.helpers import sample_color
+from masster.study.helpers import sample_color_reset
+from masster.study.helpers import sample_name_replace
+from masster.study.helpers import sample_name_reset
+from masster.study.helpers import samples_select
+from masster.study.helpers import samples_delete
+from masster.study.helpers import features_select
+from masster.study.helpers import features_filter
+from masster.study.helpers import features_delete
+from masster.study.helpers import consensus_select
+from masster.study.helpers import consensus_filter
+from masster.study.helpers import consensus_delete
+from masster.study.load import add
+from masster.study.load import add_sample
+from masster.study.load import _add_samples_batch
+from masster.study.load import _add_sample_optimized
+from masster.study.load import _add_sample_standard
+from masster.study.load import _sample_color_reset_optimized
+from masster.study.load import fill_single
+from masster.study.load import fill
+from masster.study.load import _process_sample_for_parallel_fill
+from masster.study.load import _get_missing_consensus_sample_combinations
+from masster.study.load import load
+from masster.study.load import _load_consensusXML
+from masster.study.load import load_features
+from masster.study.load import sanitize
+from masster.study.plot import plot_alignment
+from masster.study.plot import plot_consensus_2d
+from masster.study.plot import plot_samples_2d
+from masster.study.plot import plot_consensus_stats
+from masster.study.plot import plot_chrom
+from masster.study.plot import plot_pca
+from masster.study.plot import plot_bpc
+from masster.study.plot import plot_tic
+from masster.study.plot import plot_eic
+from masster.study.plot import plot_rt_correction
+from masster.study.processing import align
+from masster.study.processing import merge
+from masster.study.processing import integrate
+from masster.study.processing import find_ms2
+from masster.study.parameters import store_history
+from masster.study.parameters import get_parameters
+from masster.study.parameters import update_parameters
+from masster.study.parameters import get_parameters_property
+from masster.study.parameters import set_parameters_property
+from masster.study.save import save
+from masster.study.save import save_consensus
+from masster.study.save import _save_consensusXML
+from masster.study.save import save_samples
+from masster.study.export import export_mgf
+from masster.study.export import export_mztab
+from masster.study.export import _get_mgf_df
+from masster.study.id import lib_load, identify, get_id
+from masster.logger import MassterLogger
+from masster.study.defaults.study_def import study_defaults
+from masster.study.defaults.align_def import align_defaults
+from masster.study.defaults.export_def import export_mgf_defaults
+from masster.study.defaults.fill_chrom_def import fill_chrom_defaults
+from masster.study.defaults.fill_def import fill_defaults
+from masster.study.defaults.find_consensus_def import find_consensus_defaults
+from masster.study.defaults.find_ms2_def import find_ms2_defaults
+from masster.study.defaults.integrate_chrom_def import integrate_chrom_defaults
+from masster.study.defaults.integrate_def import integrate_defaults
+from masster.study.defaults.merge_def import merge_defaults
 # Import sample defaults
-from master.sample.defaults.sample_def import sample_defaults
-from master.sample.defaults.find_features_def import find_features_defaults
-from master.sample.defaults.find_adducts_def import find_adducts_defaults
-from master.sample.defaults.get_spectrum_def import get_spectrum_defaults
+from masster.sample.defaults.sample_def import sample_defaults
+from masster.sample.defaults.find_features_def import find_features_defaults
+from masster.sample.defaults.find_adducts_def import find_adducts_defaults
+from masster.sample.defaults.get_spectrum_def import get_spectrum_defaults
 # Warning symbols for info display
 _WARNING_SYMBOL = "⚠️"  # Yellow warning triangle
@@ -181,7 +181,7 @@ class Study:
         - `export_consensus()`: Export consensus features for downstream analysis.
     Example Usage:
-        >>> from master import study
+        >>> from masster import study
         >>> study_obj = study(folder="./data")
         >>> study_obj.load_folder("./mzml_files")
         >>> study_obj.process_all()
@@ -276,11 +276,7 @@ class Study:
         # Set instance attributes (ensure proper string values for logger)
         self.folder = params.folder
         self.label = params.label
-        self.polarity = (
-            params.polarity
-            if params.polarity in ["positive", "negative", "pos", "neg"]
-            else "positive"
-        )
+        self.polarity = params.polarity if params.polarity in ["positive", "negative", "pos", "neg"] else "positive"
         self.log_level = params.log_level.upper() if params.log_level else "INFO"
         self.log_label = params.log_label + " | " if params.log_label else ""
         self.log_sink = params.log_sink
@@ -335,7 +331,7 @@ class Study:
         self.id_df = pl.DataFrame()
         # Initialize independent logger
-        self.logger = MasterLogger(
+        self.logger = MassterLogger(
             instance_type="study",
             level=self.log_level.upper(),
             label=self.log_label,
@@ -436,9 +432,7 @@ class Study:
     fill = fill
     fill_chrom = fill  # Backward compatibility alias
     _process_sample_for_parallel_fill = _process_sample_for_parallel_fill
-    _get_missing_consensus_sample_combinations = (
-        _get_missing_consensus_sample_combinations
-    )
+    _get_missing_consensus_sample_combinations = _get_missing_consensus_sample_combinations
     _load_consensusXML = _load_consensusXML
     load_features = load_features
     sanitize = sanitize
@@ -465,20 +459,20 @@ class Study:
     def _reload(self):
         """
-        Reloads all master modules to pick up any changes to their source code,
+        Reloads all masster modules to pick up any changes to their source code,
         and updates the instance's class reference to the newly reloaded class version.
         This ensures that the instance uses the latest implementation without restarting the interpreter.
         """
         # Reset logger configuration flags to allow proper reconfiguration after reload
         """        try:
-            import master.sample.logger as logger_module
+            import masster.sample.logger as logger_module
             if hasattr(logger_module, "_STUDY_LOGGER_CONFIGURED"):
                 logger_module._STUDY_LOGGER_CONFIGURED = False
         except Exception:
             pass"""
-        # Get the base module name (master)
+        # Get the base module name (masster)
         base_modname = self.__class__.__module__.split(".")[0]
         current_module = self.__class__.__module__
@@ -488,13 +482,10 @@ class Study:
         # Get all currently loaded modules that are part of the study package
         for module_name in sys.modules:
-            if (
-                module_name.startswith(study_module_prefix)
-                and module_name != current_module
-            ):
+            if module_name.startswith(study_module_prefix) and module_name != current_module:
                 study_modules.append(module_name)
-        # Add core master modules
+        # Add core masster modules
         core_modules = [
             f"{base_modname}._version",
             f"{base_modname}.chromatogram",
@@ -506,10 +497,7 @@ class Study:
         sample_modules = []
         sample_module_prefix = f"{base_modname}.sample."
         for module_name in sys.modules:
-            if (
-                module_name.startswith(sample_module_prefix)
-                and module_name != current_module
-            ):
+            if module_name.startswith(sample_module_prefix) and module_name != current_module:
                 sample_modules.append(module_name)
         all_modules_to_reload = core_modules + sample_modules + study_modules
@@ -541,11 +529,11 @@ class Study:
     def _get_adducts(self, adducts_list: list = None, **kwargs):
         """
         Generate comprehensive adduct specifications for study-level adduct filtering.
         This method creates a DataFrame of adduct combinations that will be used to filter
         and score adducts at the study level. Similar to sample._get_adducts() but uses
         study-level parameters and constraints.
         Parameters
         ----------
         adducts_list : List[str], optional
@@ -554,10 +542,10 @@ class Study:
         **kwargs : dict
             Override parameters, including:
             - charge_min: Minimum charge to consider (default 1)
-            - charge_max: Maximum charge to consider (default 3)
+            - charge_max: Maximum charge to consider (default 3)
             - max_combinations: Maximum number of adduct components to combine (default 3)
             - min_probability: Minimum probability threshold (default from study parameters)
         Returns
         -------
         pl.DataFrame
@@ -569,304 +557,272 @@ class Study:
             - complexity: Number of adduct components (1-3)
         """
         # Import required modules
+        from collections import Counter
+        from itertools import combinations
+        import numpy as np
         # Use provided adducts list or get from study parameters
         if adducts_list is None:
-            adducts_list = (
-                self.parameters.adducts
-                if hasattr(self.parameters, "adducts") and self.parameters.adducts
-                else []
-            )
+            adducts_list = self.parameters.adducts if hasattr(self.parameters, 'adducts') and self.parameters.adducts else []
         # Get parameters with study-specific defaults
-        charge_min = kwargs.get("charge_min", -3)  # Allow negative charges
-        charge_max = kwargs.get("charge_max", 3)  # Study uses up to charge ±3
-        max_combinations = kwargs.get("max_combinations", 3)  # Up to 3 combinations
-        min_probability = kwargs.get(
-            "min_probability",
-            getattr(self.parameters, "adduct_min_probability", 0.04),
-        )
+        charge_min = kwargs.get('charge_min', -3)  # Allow negative charges
+        charge_max = kwargs.get('charge_max', 3)   # Study uses up to charge ±3
+        max_combinations = kwargs.get('max_combinations', 3)  # Up to 3 combinations
+        min_probability = kwargs.get('min_probability', getattr(self.parameters, 'adduct_min_probability', 0.04))
         # Parse base adduct specifications
         base_specs = []
         for adduct_str in adducts_list:
-            if not isinstance(adduct_str, str) or ":" not in adduct_str:
+            if not isinstance(adduct_str, str) or ':' not in adduct_str:
                 continue
             try:
-                parts = adduct_str.split(":")
+                parts = adduct_str.split(':')
                 if len(parts) != 3:
                     continue
                 formula_part = parts[0]
-                charge = int(parts[1])
+                charge = int(parts[1])
                 probability = float(parts[2])
                 # Calculate mass shift from formula
                 mass_shift = self._calculate_formula_mass_shift(formula_part)
-                base_specs.append(
-                    {
-                        "formula": formula_part,
-                        "charge": charge,
-                        "mass_shift": mass_shift,
-                        "probability": probability,
-                        "raw_string": adduct_str,
-                    },
-                )
+                base_specs.append({
+                    'formula': formula_part,
+                    'charge': charge,
+                    'mass_shift': mass_shift,
+                    'probability': probability,
+                    'raw_string': adduct_str
+                })
             except (ValueError, IndexError):
                 continue
         if not base_specs:
             # Return empty DataFrame with correct schema
-            return pl.DataFrame(
-                {
-                    "name": [],
-                    "charge": [],
-                    "mass_shift": [],
-                    "probability": [],
-                    "complexity": [],
-                },
-            )
+            return pl.DataFrame({
+                'name': [],
+                'charge': [],
+                'mass_shift': [],
+                'probability': [],
+                'complexity': []
+            })
         # Generate all valid combinations
         combinations_list = []
         # Separate specs by charge type
-        positive_specs = [spec for spec in base_specs if spec["charge"] > 0]
-        negative_specs = [spec for spec in base_specs if spec["charge"] < 0]
-        neutral_specs = [spec for spec in base_specs if spec["charge"] == 0]
+        positive_specs = [spec for spec in base_specs if spec['charge'] > 0]
+        negative_specs = [spec for spec in base_specs if spec['charge'] < 0]
+        neutral_specs = [spec for spec in base_specs if spec['charge'] == 0]
         # 1. Single adducts (filter out neutral adducts with charge == 0)
         for spec in base_specs:
-            if charge_min <= spec["charge"] <= charge_max and spec["charge"] != 0:
+            if charge_min <= spec['charge'] <= charge_max and spec['charge'] != 0:
                 formatted_name = self._format_adduct_name([spec])
-                combinations_list.append(
-                    {
-                        "components": [spec],
-                        "formatted_name": formatted_name,
-                        "total_mass_shift": spec["mass_shift"],
-                        "total_charge": spec["charge"],
-                        "combined_probability": spec["probability"],
-                        "complexity": 1,
-                    },
-                )
+                combinations_list.append({
+                    'components': [spec],
+                    'formatted_name': formatted_name,
+                    'total_mass_shift': spec['mass_shift'],
+                    'total_charge': spec['charge'],
+                    'combined_probability': spec['probability'],
+                    'complexity': 1
+                })
         # 2. Generate multiply charged versions (2H+, 3H+, etc.) - already excludes charge==0
         for spec in positive_specs + negative_specs:
-            base_charge = spec["charge"]
-            for multiplier in range(
-                2,
-                min(max_combinations + 1, 4),
-            ):  # Up to 3x multiplier
+            base_charge = spec['charge']
+            for multiplier in range(2, min(max_combinations + 1, 4)):  # Up to 3x multiplier
                 total_charge = base_charge * multiplier
                 if charge_min <= total_charge <= charge_max and total_charge != 0:
                     components = [spec] * multiplier
                     formatted_name = self._format_adduct_name(components)
-                    combinations_list.append(
-                        {
-                            "components": components,
-                            "formatted_name": formatted_name,
-                            "total_mass_shift": spec["mass_shift"] * multiplier,
-                            "total_charge": total_charge,
-                            "combined_probability": spec["probability"] ** multiplier,
-                            "complexity": multiplier,
-                        },
-                    )
+                    combinations_list.append({
+                        'components': components,
+                        'formatted_name': formatted_name,
+                        'total_mass_shift': spec['mass_shift'] * multiplier,
+                        'total_charge': total_charge,
+                        'combined_probability': spec['probability'] ** multiplier,
+                        'complexity': multiplier
+                    })
         # 3. Mixed combinations (2-component) - limited for study level, filter out charge==0
         if max_combinations >= 2:
             # Positive + Neutral (1 neutral loss only) - but exclude if total charge == 0
             for pos_spec in positive_specs[:2]:  # Limit to first 2 positive specs
                 for neut_spec in neutral_specs[:1]:  # Only 1 neutral loss
-                    total_charge = pos_spec["charge"] + neut_spec["charge"]
+                    total_charge = pos_spec['charge'] + neut_spec['charge']
                     if charge_min <= total_charge <= charge_max and total_charge != 0:
                         components = [pos_spec, neut_spec]
                         formatted_name = self._format_adduct_name(components)
-                        combinations_list.append(
-                            {
-                                "components": components,
-                                "formatted_name": formatted_name,
-                                "total_mass_shift": pos_spec["mass_shift"]
-                                + neut_spec["mass_shift"],
-                                "total_charge": total_charge,
-                                "combined_probability": pos_spec["probability"]
-                                * neut_spec["probability"],
-                                "complexity": 2,
-                            },
-                        )
+                        combinations_list.append({
+                            'components': components,
+                            'formatted_name': formatted_name,
+                            'total_mass_shift': pos_spec['mass_shift'] + neut_spec['mass_shift'],
+                            'total_charge': total_charge,
+                            'combined_probability': pos_spec['probability'] * neut_spec['probability'],
+                            'complexity': 2
+                        })
         # Convert to polars DataFrame
         if combinations_list:
-            combinations_list.sort(
-                key=lambda x: (-x["combined_probability"], x["complexity"]),
-            )
-            adducts_df = pl.DataFrame(
-                [
-                    {
-                        "name": combo["formatted_name"],
-                        "charge": combo["total_charge"],
-                        "mass_shift": combo["total_mass_shift"],
-                        "probability": combo["combined_probability"],
-                        "complexity": combo["complexity"],
-                    }
-                    for combo in combinations_list
-                ],
-            )
+            combinations_list.sort(key=lambda x: (-x['combined_probability'], x['complexity']))
+            adducts_df = pl.DataFrame([
+                {
+                    'name': combo['formatted_name'],
+                    'charge': combo['total_charge'],
+                    'mass_shift': combo['total_mass_shift'],
+                    'probability': combo['combined_probability'],
+                    'complexity': combo['complexity']
+                }
+                for combo in combinations_list
+            ])
             # Filter by minimum probability threshold
             if min_probability > 0.0:
                 adducts_before_filter = len(adducts_df)
                 adducts_df = adducts_df.filter(pl.col("probability") >= min_probability)
                 adducts_after_filter = len(adducts_df)
-                self.logger.debug(
-                    f"Study adducts: generated {adducts_before_filter}, filtered to {adducts_after_filter} (min_prob={min_probability})",
-                )
+                self.logger.debug(f"Study adducts: generated {adducts_before_filter}, filtered to {adducts_after_filter} (min_prob={min_probability})")
         else:
             # Return empty DataFrame with correct schema
-            adducts_df = pl.DataFrame(
-                {
-                    "name": [],
-                    "charge": [],
-                    "mass_shift": [],
-                    "probability": [],
-                    "complexity": [],
-                },
-            )
+            adducts_df = pl.DataFrame({
+                'name': [],
+                'charge': [],
+                'mass_shift': [],
+                'probability': [],
+                'complexity': []
+            })
         return adducts_df
     def _calculate_formula_mass_shift(self, formula: str) -> float:
         """Calculate mass shift from formula string like "+H", "-H2O", "+Na-H", etc."""
         # Standard atomic masses
         atomic_masses = {
-            "H": 1.007825,
-            "C": 12.0,
-            "N": 14.003074,
-            "O": 15.994915,
-            "Na": 22.989769,
-            "K": 38.963707,
-            "Li": 7.016003,
-            "Ca": 39.962591,
-            "Mg": 23.985042,
-            "Fe": 55.934938,
-            "Cl": 34.968853,
-            "Br": 78.918336,
-            "I": 126.904473,
-            "P": 30.973762,
-            "S": 31.972071,
+            'H': 1.007825,
+            'C': 12.0,
+            'N': 14.003074,
+            'O': 15.994915,
+            'Na': 22.989769,
+            'K': 38.963707,
+            'Li': 7.016003,
+            'Ca': 39.962591,
+            'Mg': 23.985042,
+            'Fe': 55.934938,
+            'Cl': 34.968853,
+            'Br': 78.918336,
+            'I': 126.904473,
+            'P': 30.973762,
+            'S': 31.972071
         }
         total_mass = 0.0
         # Parse formula by splitting on + and - while preserving the operators
         parts = []
         current_part = ""
         current_sign = 1
         for char in formula:
-            if char == "+":
+            if char == '+':
                 if current_part:
                     parts.append((current_sign, current_part))
                 current_part = ""
                 current_sign = 1
-            elif char == "-":
+            elif char == '-':
                 if current_part:
                     parts.append((current_sign, current_part))
                 current_part = ""
                 current_sign = -1
             else:
                 current_part += char
         if current_part:
             parts.append((current_sign, current_part))
         # Process each part
         for sign, part in parts:
             if not part:
                 continue
             # Parse element and count (e.g., "H2O" -> H:2, O:1)
             elements = self._parse_element_counts(part)
             for element, count in elements.items():
                 if element in atomic_masses:
                     total_mass += sign * atomic_masses[element] * count
         return total_mass
     def _parse_element_counts(self, formula_part: str) -> dict[str, int]:
         """Parse element counts from a formula part like 'H2O' -> {'H': 2, 'O': 1}"""
         elements = {}
         i = 0
         while i < len(formula_part):
             # Get element (uppercase letter, possibly followed by lowercase)
             element = formula_part[i]
             i += 1
             while i < len(formula_part) and formula_part[i].islower():
                 element += formula_part[i]
                 i += 1
             # Get count (digits following element)
             count_str = ""
             while i < len(formula_part) and formula_part[i].isdigit():
                 count_str += formula_part[i]
                 i += 1
             count = int(count_str) if count_str else 1
             elements[element] = elements.get(element, 0) + count
         return elements
     def _format_adduct_name(self, components: list[dict]) -> str:
         """Format adduct name from components like [M+H]1+ or [M+2H]2+"""
         if not components:
             return "[M]"
         # Count occurrences of each formula
         from collections import Counter
-        formula_counts = Counter(comp["formula"] for comp in components)
-        total_charge = sum(comp["charge"] for comp in components)
+        formula_counts = Counter(comp['formula'] for comp in components)
+        total_charge = sum(comp['charge'] for comp in components)
         # Build formula part with proper multipliers
         formula_parts = []
-        for formula, count in sorted(
-            formula_counts.items(),
-        ):  # Sort for consistent ordering
+        for formula, count in sorted(formula_counts.items()):  # Sort for consistent ordering
             if count == 1:
                 formula_parts.append(formula)
             else:
                 # For multiple occurrences, use count prefix (e.g., 2H, 3Na)
                 # Handle special case where formula might already start with + or -
-                if formula.startswith(("+", "-")):
+                if formula.startswith(('+', '-')):
                     sign = formula[0]
                     base_formula = formula[1:]
                     formula_parts.append(f"{sign}{count}{base_formula}")
                 else:
                     formula_parts.append(f"{count}{formula}")
         # Combine formula parts
         formula = "".join(formula_parts)
         # Format charge
         if total_charge == 0:
             charge_str = ""
         elif abs(total_charge) == 1:
             charge_str = "1+" if total_charge > 0 else "1-"
         else:
-            charge_str = (
-                f"{abs(total_charge)}+" if total_charge > 0 else f"{abs(total_charge)}-"
-            )
+            charge_str = f"{abs(total_charge)}+" if total_charge > 0 else f"{abs(total_charge)}-"
         return f"[M{formula}]{charge_str}"
     def __str__(self):
@@ -878,12 +834,7 @@ class Study:
         """
         return ""
-    def logger_update(
-        self,
-        level: str | None = None,
-        label: str | None = None,
-        sink: str | None = None,
-    ):
+    def logger_update(self, level: str | None = None, label: str | None = None, sink: str | None = None):
         """Update the logging configuration for this Study instance.
         Args:
@@ -915,21 +866,17 @@ class Study:
         that are out of normal range.
         """
         # Cache DataFrame lengths and existence checks
-        consensus_df_len = (
-            len(self.consensus_df) if not self.consensus_df.is_empty() else 0
-        )
+        consensus_df_len = len(self.consensus_df) if not self.consensus_df.is_empty() else 0
         samples_df_len = len(self.samples_df) if not self.samples_df.is_empty() else 0
         # Calculate consensus statistics only if consensus_df exists and has data
         if consensus_df_len > 0:
             # Execute the aggregation once
-            stats_result = self.consensus_df.select(
-                [
-                    pl.col("number_samples").min().alias("min_samples"),
-                    pl.col("number_samples").mean().alias("mean_samples"),
-                    pl.col("number_samples").max().alias("max_samples"),
-                ],
-            ).row(0)
+            stats_result = self.consensus_df.select([
+                pl.col("number_samples").min().alias("min_samples"),
+                pl.col("number_samples").mean().alias("mean_samples"),
+                pl.col("number_samples").max().alias("max_samples"),
+            ]).row(0)
             min_samples = stats_result[0] if stats_result[0] is not None else 0
             mean_samples = stats_result[1] if stats_result[1] is not None else 0
@@ -941,9 +888,7 @@ class Study:
         # Count only features where 'filled' == False
         if not self.features_df.is_empty() and "filled" in self.features_df.columns:
-            unfilled_features_count = self.features_df.filter(
-                ~self.features_df["filled"],
-            ).height
+            unfilled_features_count = self.features_df.filter(~self.features_df["filled"]).height
         else:
             unfilled_features_count = 0
@@ -966,20 +911,12 @@ class Study:
             if unfilled_dtype != consensus_dtype:
                 # Cast both to Int64 if possible, otherwise keep as string
                 try:
-                    unfilled_features = unfilled_features.with_columns(
-                        pl.col("feature_uid").cast(pl.Int64),
-                    )
-                    consensus_feature_uids = [
-                        int(uid) for uid in consensus_feature_uids
-                    ]
+                    unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Int64))
+                    consensus_feature_uids = [int(uid) for uid in consensus_feature_uids]
                 except Exception:
                     # If casting fails, ensure both are strings
-                    unfilled_features = unfilled_features.with_columns(
-                        pl.col("feature_uid").cast(pl.Utf8),
-                    )
-                    consensus_feature_uids = [
-                        str(uid) for uid in consensus_feature_uids
-                    ]
+                    unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Utf8))
+                    consensus_feature_uids = [str(uid) for uid in consensus_feature_uids]
             # Count unfilled features that are in consensus
             in_consensus_count = unfilled_features.filter(
@@ -988,22 +925,14 @@ class Study:
             # Calculate ratios that sum to 100%
             total_unfilled = unfilled_features.height
-            ratio_in_consensus_to_total = (
-                (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
-            )
-            ratio_not_in_consensus_to_total = (
-                100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
-            )
+            ratio_in_consensus_to_total = (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
+            ratio_not_in_consensus_to_total = 100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
         else:
             ratio_in_consensus_to_total = 0
             ratio_not_in_consensus_to_total = 0
         # Optimize chrom completeness calculation
-        if (
-            consensus_df_len > 0
-            and samples_df_len > 0
-            and not self.features_df.is_empty()
-        ):
+        if consensus_df_len > 0 and samples_df_len > 0 and not self.features_df.is_empty():
             # Ensure matching data types for join keys
             features_dtype = self.features_df["feature_uid"].dtype
             consensus_dtype = self.consensus_mapping_df["feature_uid"].dtype
@@ -1011,17 +940,13 @@ class Study:
             if features_dtype != consensus_dtype:
                 # Try to cast both to Int64, fallback to string if needed
                 try:
-                    self.features_df = self.features_df.with_columns(
-                        pl.col("feature_uid").cast(pl.Int64),
-                    )
+                    self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Int64))
                     self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
                         pl.col("feature_uid").cast(pl.Int64),
                     )
                 except Exception:
                     # If casting to Int64 fails, cast both to string
-                    self.features_df = self.features_df.with_columns(
-                        pl.col("feature_uid").cast(pl.Utf8),
-                    )
+                    self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Utf8))
                     self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
                         pl.col("feature_uid").cast(pl.Utf8),
                     )
@@ -1042,9 +967,7 @@ class Study:
             else:
                 non_null_chroms = 0
             total_possible = samples_df_len * consensus_df_len
-            chrom_completeness = (
-                non_null_chroms / total_possible if total_possible > 0 else 0
-            )
+            chrom_completeness = non_null_chroms / total_possible if total_possible > 0 else 0
         else:
             chrom_completeness = 0
@@ -1056,37 +979,23 @@ class Study:
         if not self.consensus_df.is_empty():
             # Compute RT spread using only consensus rows with number_samples >= half the number of samples
-            threshold = (
-                self.consensus_df.select(pl.col("number_samples").max()).item() / 2
-                if not self.samples_df.is_empty()
-                else 0
-            )
+            threshold = self.consensus_df.select(pl.col("number_samples").max()).item() / 2 if not self.samples_df.is_empty() else 0
             filtered = self.consensus_df.filter(pl.col("number_samples") >= threshold)
             if filtered.is_empty():
                 rt_spread = -1.0
             else:
-                rt_spread_row = filtered.select(
-                    (pl.col("rt_max") - pl.col("rt_min")).mean(),
-                ).row(0)
-                rt_spread = (
-                    float(rt_spread_row[0])
-                    if rt_spread_row and rt_spread_row[0] is not None
-                    else 0.0
-                )
+                rt_spread_row = filtered.select((pl.col("rt_max") - pl.col("rt_min")).mean()).row(0)
+                rt_spread = float(rt_spread_row[0]) if rt_spread_row and rt_spread_row[0] is not None else 0.0
         else:
             rt_spread = -1.0
         # Calculate percentage of consensus features with MS2
         consensus_with_ms2_percentage = (
-            (consensus_with_ms2_count / consensus_df_len * 100)
-            if consensus_df_len > 0
-            else 0
+            (consensus_with_ms2_count / consensus_df_len * 100) if consensus_df_len > 0 else 0
         )
         # Total MS2 spectra count
-        total_ms2_count = (
-            len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
-        )
+        total_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
         # Estimate memory usage
         memory_usage = (
@@ -1099,27 +1008,15 @@ class Study:
         # Add warning symbols for out-of-range values
         consensus_warning = f" {_WARNING_SYMBOL}" if consensus_df_len < 50 else ""
         rt_spread_text = "N/A" if rt_spread < 0 else f"{rt_spread:.3f}s"
-        rt_spread_warning = (
-            f" {_WARNING_SYMBOL}"
-            if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1)
-            else ""
-        )
+        rt_spread_warning = f" {_WARNING_SYMBOL}" if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1) else ""
         chrom_completeness_pct = chrom_completeness * 100
-        chrom_warning = (
-            f" {_WARNING_SYMBOL}"
-            if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0
-            else ""
-        )
+        chrom_warning = f" {_WARNING_SYMBOL}" if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0 else ""
         max_samples_warning = ""
-        if (
-            isinstance(max_samples, (int, float))
-            and samples_df_len > 0
-            and max_samples > 0
-        ):
+        if isinstance(max_samples, (int, float)) and samples_df_len > 0 and max_samples > 0:
             if max_samples < samples_df_len / 3.0:
                 max_samples_warning = f" {_WARNING_SYMBOL}"
             elif max_samples < samples_df_len * 0.8:

masster 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

masster 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl