PyPI - masster - Versions diffs - 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl - Mend

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (37) hide show

masster/_version.py +1 -1
masster/logger.py +35 -19
masster/sample/adducts.py +15 -29
masster/sample/defaults/find_adducts_def.py +1 -3
masster/sample/defaults/sample_def.py +4 -4
masster/sample/h5.py +203 -361
masster/sample/helpers.py +14 -30
masster/sample/lib.py +3 -3
masster/sample/load.py +21 -29
masster/sample/plot.py +222 -132
masster/sample/processing.py +42 -55
masster/sample/sample.py +37 -46
masster/sample/save.py +37 -61
masster/sample/sciex.py +13 -11
masster/sample/thermo.py +69 -74
masster/spectrum.py +15 -15
masster/study/analysis.py +650 -586
masster/study/defaults/identify_def.py +1 -3
masster/study/defaults/merge_def.py +6 -7
masster/study/defaults/study_def.py +1 -5
masster/study/export.py +35 -96
masster/study/h5.py +134 -211
masster/study/helpers.py +385 -459
masster/study/id.py +239 -290
masster/study/importers.py +84 -93
masster/study/load.py +159 -178
masster/study/merge.py +1112 -1098
masster/study/plot.py +195 -149
masster/study/processing.py +144 -191
masster/study/save.py +14 -13
masster/study/study.py +89 -130
masster/wizard/wizard.py +764 -714
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/METADATA +27 -1
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/RECORD +37 -37
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/WHEEL +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/entry_points.txt +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/licenses/LICENSE +0 -0

masster/study/save.py CHANGED Viewed

@@ -48,11 +48,7 @@ def save(self, filename=None, add_timestamp=True, compress=False):
     # Log file size information for performance monitoring
     if hasattr(self, "features_df") and not self.features_df.is_empty():
         feature_count = len(self.features_df)
-        sample_count = (
-            len(self.samples_df)
-            if hasattr(self, "samples_df") and not self.samples_df.is_empty()
-            else 0
-        )
+        sample_count = len(self.samples_df) if hasattr(self, "samples_df") and not self.samples_df.is_empty() else 0
         self.logger.debug(
             f"Saving study with {sample_count} samples and {feature_count} features to {filename}",
         )
@@ -60,14 +56,17 @@ def save(self, filename=None, add_timestamp=True, compress=False):
     # Use compressed mode for large datasets
     if compress:
         from masster.study.h5 import _save_study5_compressed
         _save_study5_compressed(self, filename)
     else:
         from masster.study.h5 import _save_study5
         _save_study5(self, filename)
     if self.consensus_map is not None:
         # save the features as a separate file
         from masster.study.save import _save_consensusXML
         _save_consensusXML(self, filename=filename.replace(".study5", ".consensusXML"))
     self.filename = filename
@@ -160,14 +159,15 @@ def _save_consensusXML(self, filename: str):
     if self.consensus_df is None or self.consensus_df.is_empty():
         self.logger.error("No consensus features found.")
         return
     # Build consensus map from consensus_df with proper consensus_id values
     import pyopenms as oms
     consensus_map = oms.ConsensusMap()
     # Set up file descriptions for all samples
     file_descriptions = consensus_map.getColumnHeaders()
-    if hasattr(self, 'samples_df') and not self.samples_df.is_empty():
+    if hasattr(self, "samples_df") and not self.samples_df.is_empty():
         for i, sample_row in enumerate(self.samples_df.iter_rows(named=True)):
             file_description = file_descriptions.get(i, oms.ColumnHeader())
             file_description.filename = sample_row.get("sample_name", f"sample_{i}")
@@ -175,17 +175,17 @@ def _save_consensusXML(self, filename: str):
             file_description.unique_id = i + 1
             file_descriptions[i] = file_description
         consensus_map.setColumnHeaders(file_descriptions)
     # Add consensus features to the map (simplified version without individual features)
     for consensus_row in self.consensus_df.iter_rows(named=True):
         consensus_feature = oms.ConsensusFeature()
         # Set basic properties
         consensus_feature.setRT(float(consensus_row.get("rt", 0.0)))
         consensus_feature.setMZ(float(consensus_row.get("mz", 0.0)))
         consensus_feature.setIntensity(float(consensus_row.get("inty_mean", 0.0)))
         consensus_feature.setQuality(float(consensus_row.get("quality", 1.0)))
         # Set the unique consensus_id as the unique ID
         consensus_id_str = consensus_row.get("consensus_id", "")
         if consensus_id_str and len(consensus_id_str) == 16:
@@ -199,9 +199,9 @@ def _save_consensusXML(self, filename: str):
         else:
             # Fallback to consensus_uid
             consensus_feature.setUniqueId(consensus_row.get("consensus_uid", 0))
         consensus_map.push_back(consensus_feature)
     # Save the consensus map
     fh = oms.ConsensusXMLFile()
     fh.store(filename, consensus_map)
@@ -215,4 +215,5 @@ def save_consensus(self, **kwargs):
         self.logger.error("No consensus map found.")
         return
     from masster.study.save import _save_consensusXML
     _save_consensusXML(self, **kwargs)

masster/study/study.py CHANGED Viewed

@@ -80,7 +80,8 @@ from masster.study.load import add
 from masster.study.load import add_sample
 from masster.study.load import fill
 from masster.study.load import load
-#from masster.study.load import _load_features
+# from masster.study.load import _load_features
 from masster.study.h5 import _load_ms1
 from masster.study.h5 import _load_study5
 from masster.study.h5 import _save_study5
@@ -215,13 +216,13 @@ class Study:
         """
         # ===== PARAMETER INITIALIZATION =====
         auto_load_filename = self._init_parameters(filename, kwargs)
         # ===== DATA STRUCTURES INITIALIZATION =====
         self._init_data_structures()
         # ===== LOGGER INITIALIZATION =====
         self._init_logger()
         # ===== AUTO-LOAD FILE IF PROVIDED =====
         if auto_load_filename is not None:
             self.load(filename=auto_load_filename)
@@ -266,11 +267,7 @@ class Study:
         # Set instance attributes (ensure proper string values for logger)
         self.folder = params.folder
         self.label = params.label
-        self.polarity = (
-            params.polarity
-            if params.polarity in ["positive", "negative", "pos", "neg"]
-            else "positive"
-        )
+        self.polarity = params.polarity if params.polarity in ["positive", "negative", "pos", "neg"] else "positive"
         self.log_level = params.log_level.upper() if params.log_level else "INFO"
         self.log_label = params.log_label + " | " if params.log_label else ""
         self.log_sink = params.log_sink
@@ -278,7 +275,7 @@ class Study:
         # Create folder if it doesn't exist
         if self.folder is not None and not os.path.exists(self.folder):
             os.makedirs(self.folder)
         return auto_load_filename
     def _init_data_structures(self):
@@ -316,11 +313,11 @@ class Study:
                 "num_ms2": pl.Int64,
             },
         )
         # Feature-related data structures
         self.features_maps = []
         self.features_df = pl.DataFrame()
         # Consensus-related data structures
         self.consensus_ms2 = pl.DataFrame()
         self.consensus_df = pl.DataFrame()
@@ -330,7 +327,7 @@ class Study:
         # Library and identification data structures
         self.lib_df = pl.DataFrame()  # populated by lib_load
-        self.id_df = pl.DataFrame()   # populated by identify
+        self.id_df = pl.DataFrame()  # populated by identify
     def _init_logger(self):
         """Initialize the logger for this Study instance."""
@@ -352,24 +349,24 @@ class Study:
     _load_ms1 = _load_ms1
     _load_study5 = _load_study5
     _save_study5 = _save_study5
     # === Sample Management ===
     add = add
     add_sample = add_sample
     # === Core Processing Operations ===
     align = align
-    merge = merge
+    merge = merge
     find_ms2 = find_ms2
     find_iso = find_iso
     reset_iso = reset_iso
     iso_reset = reset_iso
     integrate = integrate
     fill = fill
-    #_estimate_rt_original_for_filled_feature = _estimate_rt_original_for_filled_feature
+    # _estimate_rt_original_for_filled_feature = _estimate_rt_original_for_filled_feature
     # === Data Retrieval and Access ===
     get_consensus = get_consensus
     get_chrom = get_chrom
@@ -382,11 +379,11 @@ class Study:
     get_sample_stats = get_sample_stats
     get_consensus_stats = get_consensus_stats
     _get_adducts = _get_adducts
     # === Data Selection and Filtering ===
     samples_select = samples_select
     samples_delete = samples_delete
     features_select = features_select
     features_filter = features_filter
     features_delete = features_delete
@@ -397,22 +394,22 @@ class Study:
     # === Sample Metadata and Styling ===
     set_samples_source = set_samples_source
     set_samples_color = set_samples_color
     samples_name_replace = sample_name_replace
     samples_name_reset = sample_name_reset
     # Backward compatibility aliases for renamed methods
     set_folder = set_study_folder
-    set_source = set_samples_source
-    #sample_color = set_samples_color
-    #get_sample = get_samples
-    #load_features = _load_features
+    set_source = set_samples_source
+    # sample_color = set_samples_color
+    # get_sample = get_samples
+    # load_features = _load_features
     store_history = update_history
     # === Data Compression and Storage ===
     compress = compress
     decompress = decompress
     # === Reset Operations ===
     consensus_reset = consensus_reset
     fill_reset = fill_reset
@@ -435,27 +432,29 @@ class Study:
     # === Analysis Operations ===
     analyze_umap = analyze_umap
     # === Export Operations ===
     export_mgf = export_mgf
     export_mztab = export_mztab
     export_xlsx = export_xlsx
     export_parquet = export_parquet
     # === Identification and Library Matching ===
     lib_load = lib_load
     def lib_to_consensus(self, **kwargs):
         """Create consensus features from library entries."""
         from masster.study.id import lib_to_consensus as _lib_to_consensus
         return _lib_to_consensus(self, **kwargs)
     identify = identify
     get_id = get_id
     id_reset = id_reset
     reset_id = id_reset
     lib_reset = lib_reset
     reset_lib = lib_reset
     # === Oracle Import Operations ===
     import_oracle = import_oracle
@@ -465,12 +464,12 @@ class Study:
     update_parameters = update_parameters
     get_parameters_property = get_parameters_property
     set_parameters_property = set_parameters_property
     # === Private/Internal Methods ===
     _get_consensus_uids = _get_consensus_uids
     _get_features_uids = _get_features_uids
     _get_samples_uids = _get_samples_uids
     # === Default Parameters ===
     study_defaults = study_defaults
     align_defaults = align_defaults
@@ -506,10 +505,7 @@ class Study:
         # Get all currently loaded modules that are part of the study package
         for module_name in sys.modules:
-            if (
-                module_name.startswith(study_module_prefix)
-                and module_name != current_module
-            ):
+            if module_name.startswith(study_module_prefix) and module_name != current_module:
                 study_modules.append(module_name)
         # Add core masster modules
@@ -524,20 +520,14 @@ class Study:
         sample_modules = []
         sample_module_prefix = f"{base_modname}.sample."
         for module_name in sys.modules:
-            if (
-                module_name.startswith(sample_module_prefix)
-                and module_name != current_module
-            ):
+            if module_name.startswith(sample_module_prefix) and module_name != current_module:
                 sample_modules.append(module_name)
         # Add lib submodules
         lib_modules = []
         lib_module_prefix = f"{base_modname}.lib."
         for module_name in sys.modules:
-            if (
-                module_name.startswith(lib_module_prefix)
-                and module_name != current_module
-            ):
+            if module_name.startswith(lib_module_prefix) and module_name != current_module:
                 lib_modules.append(module_name)
         all_modules_to_reload = core_modules + sample_modules + study_modules + lib_modules
@@ -565,73 +555,66 @@ class Study:
             self.logger.debug("Module reload completed")
         except Exception as e:
             self.logger.error(f"Failed to reload current module {current_module}: {e}")
     def __dir__(self):
         """
-        Custom __dir__ implementation to hide internal methods starting with '_'
-        and backward compatibility aliases from tab completion and dir() calls,
+        Custom __dir__ implementation to hide internal methods starting with '_'
+        and backward compatibility aliases from tab completion and dir() calls,
         while keeping them accessible to class methods.
         Returns:
             list: List of public attribute and method names (excluding internal and deprecated methods)
         """
         # Define backward compatibility aliases to hide
         backward_compatibility_aliases = {
-            'add_folder',           # alias for add
-            'find_consensus',       # alias for merge
-            'integrate_chrom',      # alias for integrate
-            'fill_chrom',           # alias for fill
-            'filter_consensus',     # alias for consensus_filter
-            'select_consensus',     # alias for consensus_select
-            'filter_features',      # alias for features_filter
-            'select_features',      # alias for features_select
-            'consensus_find',       # alias for merge
+            "add_folder",  # alias for add
+            "find_consensus",  # alias for merge
+            "integrate_chrom",  # alias for integrate
+            "fill_chrom",  # alias for fill
+            "filter_consensus",  # alias for consensus_filter
+            "select_consensus",  # alias for consensus_select
+            "filter_features",  # alias for features_filter
+            "select_features",  # alias for features_select
+            "consensus_find",  # alias for merge
             # Backward compatibility for renamed methods
-            'set_folder',           # alias for set_study_folder
-            'set_source',           # alias for set_samples_source
-            'sample_color',         # alias for set_samples_color
-            'get_sample',           # alias for get_samples
-            'load_features',        # alias for _load_features
-            'store_history',        # alias for update_history
-            'sample_color_reset',   # alias for set_samples_color(by=None)
-            'reset_sample_color',   # alias for sample_color_reset
+            "set_folder",  # alias for set_study_folder
+            "set_source",  # alias for set_samples_source
+            "sample_color",  # alias for set_samples_color
+            "get_sample",  # alias for get_samples
+            "load_features",  # alias for _load_features
+            "store_history",  # alias for update_history
+            "sample_color_reset",  # alias for set_samples_color(by=None)
+            "reset_sample_color",  # alias for sample_color_reset
         }
         # Get all attributes from the class
         all_attrs: set[str] = set()
         # Add attributes from the class and all its bases
         for cls in self.__class__.__mro__:
             all_attrs.update(cls.__dict__.keys())
         # Add instance attributes
         all_attrs.update(self.__dict__.keys())
         # Filter out attributes starting with '_' (but keep special methods like __init__, __str__, etc.)
         # Also filter out backward compatibility aliases
         public_attrs = [
-            attr for attr in all_attrs
-            if not attr.startswith('_') or attr.startswith('__') and attr.endswith('__')
+            attr for attr in all_attrs if not attr.startswith("_") or attr.startswith("__") and attr.endswith("__")
         ]
         # Remove backward compatibility aliases from the public attributes
         public_attrs = [attr for attr in public_attrs if attr not in backward_compatibility_aliases]
         return sorted(public_attrs)
     def __str__(self):
         """
         Return a short summary string with number of samples and consensus features.
         """
-        samples = (
-            len(self.samples_df)
-            if (self.samples_df is not None and not self.samples_df.is_empty())
-            else 0
-        )
+        samples = len(self.samples_df) if (self.samples_df is not None and not self.samples_df.is_empty()) else 0
         consensus = (
-            len(self.consensus_df)
-            if (self.consensus_df is not None and not self.consensus_df.is_empty())
-            else 0
+            len(self.consensus_df) if (self.consensus_df is not None and not self.consensus_df.is_empty()) else 0
         )
         return f"{samples} samples, {consensus} consensus"
@@ -699,8 +682,7 @@ class Study:
             max_samples = 0
         # Count only features where 'filled' == False
-        if (self.features_df is not None and not self.features_df.is_empty() and
-            "filled" in self.features_df.columns):
+        if self.features_df is not None and not self.features_df.is_empty() and "filled" in self.features_df.columns:
             unfilled_features_count = self.features_df.filter(
                 ~self.features_df["filled"],
             ).height
@@ -708,8 +690,12 @@ class Study:
             unfilled_features_count = 0
         # Calculate features in consensus vs not in consensus (only for unfilled features)
-        if (self.features_df is not None and not self.features_df.is_empty() and
-            self.consensus_mapping_df is not None and not self.consensus_mapping_df.is_empty()):
+        if (
+            self.features_df is not None
+            and not self.features_df.is_empty()
+            and self.consensus_mapping_df is not None
+            and not self.consensus_mapping_df.is_empty()
+        ):
             # Get unfilled features only
             unfilled_features = (
                 self.features_df.filter(~self.features_df["filled"])
@@ -730,17 +716,13 @@ class Study:
                     unfilled_features = unfilled_features.with_columns(
                         pl.col("feature_uid").cast(pl.Int64),
                     )
-                    consensus_feature_uids = [
-                        int(uid) for uid in consensus_feature_uids
-                    ]
+                    consensus_feature_uids = [int(uid) for uid in consensus_feature_uids]
                 except Exception:
                     # If casting fails, ensure both are strings
                     unfilled_features = unfilled_features.with_columns(
                         pl.col("feature_uid").cast(pl.Utf8),
                     )
-                    consensus_feature_uids = [
-                        str(uid) for uid in consensus_feature_uids
-                    ]
+                    consensus_feature_uids = [str(uid) for uid in consensus_feature_uids]
             # Count unfilled features that are in consensus
             in_consensus_count = unfilled_features.filter(
@@ -749,12 +731,8 @@ class Study:
             # Calculate ratios that sum to 100%
             total_unfilled = unfilled_features.height
-            ratio_in_consensus_to_total = (
-                (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
-            )
-            ratio_not_in_consensus_to_total = (
-                100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
-            )
+            ratio_in_consensus_to_total = (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
+            ratio_not_in_consensus_to_total = 100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
         else:
             ratio_in_consensus_to_total = 0
             ratio_not_in_consensus_to_total = 0
@@ -789,8 +767,7 @@ class Study:
                     )
             # Use more efficient counting - count non-null chroms only for features in consensus mapping
-            if (self.consensus_mapping_df is not None and
-                not self.consensus_mapping_df.is_empty()):
+            if self.consensus_mapping_df is not None and not self.consensus_mapping_df.is_empty():
                 non_null_chroms = (
                     self.features_df.join(
                         self.consensus_mapping_df.select("feature_uid"),
@@ -805,9 +782,7 @@ class Study:
             else:
                 non_null_chroms = 0
             total_possible = samples_df_len * consensus_df_len
-            chrom_completeness = (
-                non_null_chroms / total_possible if total_possible > 0 else 0
-            )
+            chrom_completeness = non_null_chroms / total_possible if total_possible > 0 else 0
         else:
             chrom_completeness = 0
@@ -831,19 +806,13 @@ class Study:
                 rt_spread_row = filtered.select(
                     (pl.col("rt_max") - pl.col("rt_min")).mean(),
                 ).row(0)
-                rt_spread = (
-                    float(rt_spread_row[0])
-                    if rt_spread_row and rt_spread_row[0] is not None
-                    else 0.0
-                )
+                rt_spread = float(rt_spread_row[0]) if rt_spread_row and rt_spread_row[0] is not None else 0.0
         else:
             rt_spread = -1.0
         # Calculate percentage of consensus features with MS2
         consensus_with_ms2_percentage = (
-            (consensus_with_ms2_count / consensus_df_len * 100)
-            if consensus_df_len > 0
-            else 0
+            (consensus_with_ms2_count / consensus_df_len * 100) if consensus_df_len > 0 else 0
         )
         # Total MS2 spectra count
@@ -865,6 +834,7 @@ class Study:
         if consensus_df_len > 0:
             try:
                 from masster.study.merge import _count_tight_clusters
                 tight_clusters_count = _count_tight_clusters(self, mz_tol=0.04, rt_tol=0.3)
             except Exception:
                 # If tight clusters calculation fails, just use 0
@@ -874,25 +844,13 @@ class Study:
         consensus_warning = f" {_WARNING_SYMBOL}" if consensus_df_len < 50 else ""
         rt_spread_text = "N/A" if rt_spread < 0 else f"{rt_spread:.3f}s"
-        rt_spread_warning = (
-            f" {_WARNING_SYMBOL}"
-            if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1)
-            else ""
-        )
+        rt_spread_warning = f" {_WARNING_SYMBOL}" if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1) else ""
         chrom_completeness_pct = chrom_completeness * 100
-        chrom_warning = (
-            f" {_WARNING_SYMBOL}"
-            if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0
-            else ""
-        )
+        chrom_warning = f" {_WARNING_SYMBOL}" if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0 else ""
         max_samples_warning = ""
-        if (
-            isinstance(max_samples, (int, float))
-            and samples_df_len > 0
-            and max_samples > 0
-        ):
+        if isinstance(max_samples, (int, float)) and samples_df_len > 0 and max_samples > 0:
             if max_samples < samples_df_len / 3.0:
                 max_samples_warning = f" {_WARNING_SYMBOL}"
             elif max_samples < samples_df_len * 0.8:
@@ -923,5 +881,6 @@ class Study:
         print(summary)
-if __name__ == "__main__":
+if __name__ == "__main__":
     pass

masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

Potentially problematic release.

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl