PyPI - masster - Versions diffs - 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl - Mend

masster 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (34) hide show

masster/docs/SCX_API_Documentation.md +0 -0
masster/docs/SCX_DLL_Analysis.md +0 -0
masster/logger.py +92 -78
masster/sample/defaults/find_features_def.py +90 -94
masster/sample/defaults/sample_def.py +15 -0
masster/sample/h5.py +2 -2
masster/sample/helpers.py +137 -136
masster/sample/lib.py +11 -11
masster/sample/load.py +13 -9
masster/sample/plot.py +167 -60
masster/sample/processing.py +150 -153
masster/sample/sample.py +4 -4
masster/sample/sample5_schema.json +62 -62
masster/sample/save.py +16 -13
masster/sample/sciex.py +187 -176
masster/study/defaults/align_def.py +224 -6
masster/study/defaults/fill_chrom_def.py +1 -5
masster/study/defaults/integrate_chrom_def.py +1 -5
masster/study/defaults/study_def.py +2 -2
masster/study/export.py +144 -131
masster/study/h5.py +193 -133
masster/study/helpers.py +293 -245
masster/study/helpers_optimized.py +99 -57
masster/study/load.py +51 -25
masster/study/plot.py +453 -17
masster/study/processing.py +197 -123
masster/study/save.py +7 -7
masster/study/study.py +97 -88
masster/study/study5_schema.json +82 -82
{masster-0.3.9.dist-info → masster-0.3.11.dist-info}/METADATA +1 -1
{masster-0.3.9.dist-info → masster-0.3.11.dist-info}/RECORD +34 -32
{masster-0.3.9.dist-info → masster-0.3.11.dist-info}/WHEEL +0 -0
{masster-0.3.9.dist-info → masster-0.3.11.dist-info}/entry_points.txt +0 -0
{masster-0.3.9.dist-info → masster-0.3.11.dist-info}/licenses/LICENSE +0 -0

masster/sample/helpers.py CHANGED Viewed

@@ -9,81 +9,81 @@ import polars as pl
 def _estimate_memory_usage(self):
     """
     Estimate the memory usage of all dataframes in the Sample object.
     Returns:
         dict: A dictionary containing memory usage estimates for each dataframe
               and the total memory usage in bytes and MB.
     """
     memory_usage = {}
     total_bytes = 0
     # Check features_df
     if self.features_df is not None and len(self.features_df) > 0:
         features_bytes = self.features_df.estimated_size()
-        memory_usage['features_df'] = {
-            'rows': len(self.features_df),
-            'columns': len(self.features_df.columns),
-            'bytes': features_bytes,
-            'mb': features_bytes / (1024 * 1024)
+        memory_usage["features_df"] = {
+            "rows": len(self.features_df),
+            "columns": len(self.features_df.columns),
+            "bytes": features_bytes,
+            "mb": features_bytes / (1024 * 1024),
         }
         total_bytes += features_bytes
     else:
-        memory_usage['features_df'] = {'rows': 0, 'columns': 0, 'bytes': 0, 'mb': 0}
+        memory_usage["features_df"] = {"rows": 0, "columns": 0, "bytes": 0, "mb": 0}
     # Check scans_df
     if self.scans_df is not None and len(self.scans_df) > 0:
         scans_bytes = self.scans_df.estimated_size()
-        memory_usage['scans_df'] = {
-            'rows': len(self.scans_df),
-            'columns': len(self.scans_df.columns),
-            'bytes': scans_bytes,
-            'mb': scans_bytes / (1024 * 1024)
+        memory_usage["scans_df"] = {
+            "rows": len(self.scans_df),
+            "columns": len(self.scans_df.columns),
+            "bytes": scans_bytes,
+            "mb": scans_bytes / (1024 * 1024),
         }
         total_bytes += scans_bytes
     else:
-        memory_usage['scans_df'] = {'rows': 0, 'columns': 0, 'bytes': 0, 'mb': 0}
+        memory_usage["scans_df"] = {"rows": 0, "columns": 0, "bytes": 0, "mb": 0}
     # Check ms1_df
     if self.ms1_df is not None and len(self.ms1_df) > 0:
         ms1_bytes = self.ms1_df.estimated_size()
-        memory_usage['ms1_df'] = {
-            'rows': len(self.ms1_df),
-            'columns': len(self.ms1_df.columns),
-            'bytes': ms1_bytes,
-            'mb': ms1_bytes / (1024 * 1024)
+        memory_usage["ms1_df"] = {
+            "rows": len(self.ms1_df),
+            "columns": len(self.ms1_df.columns),
+            "bytes": ms1_bytes,
+            "mb": ms1_bytes / (1024 * 1024),
         }
         total_bytes += ms1_bytes
     else:
-        memory_usage['ms1_df'] = {'rows': 0, 'columns': 0, 'bytes': 0, 'mb': 0}
+        memory_usage["ms1_df"] = {"rows": 0, "columns": 0, "bytes": 0, "mb": 0}
     # Check chrom_df
     if self.chrom_df is not None and len(self.chrom_df) > 0:
         chrom_bytes = self.chrom_df.estimated_size()
-        memory_usage['chrom_df'] = {
-            'rows': len(self.chrom_df),
-            'columns': len(self.chrom_df.columns),
-            'bytes': chrom_bytes,
-            'mb': chrom_bytes / (1024 * 1024)
+        memory_usage["chrom_df"] = {
+            "rows": len(self.chrom_df),
+            "columns": len(self.chrom_df.columns),
+            "bytes": chrom_bytes,
+            "mb": chrom_bytes / (1024 * 1024),
         }
         total_bytes += chrom_bytes
     else:
-        memory_usage['chrom_df'] = {'rows': 0, 'columns': 0, 'bytes': 0, 'mb': 0}
+        memory_usage["chrom_df"] = {"rows": 0, "columns": 0, "bytes": 0, "mb": 0}
     # Add total memory usage
-    memory_usage['total'] = {
-        'bytes': total_bytes,
-        'mb': total_bytes / (1024 * 1024),
-        'gb': total_bytes / (1024 * 1024 * 1024)
+    memory_usage["total"] = {
+        "bytes": total_bytes,
+        "mb": total_bytes / (1024 * 1024),
+        "gb": total_bytes / (1024 * 1024 * 1024),
     }
     # Log the memory usage summary
-    if hasattr(self, 'logger'):
+    if hasattr(self, "logger"):
         self.logger.debug(f"Total DataFrame memory usage: {memory_usage['total']['mb']:.2f} MB")
         for df_name, stats in memory_usage.items():
-            if df_name != 'total' and stats['bytes'] > 0:
+            if df_name != "total" and stats["bytes"] > 0:
                 self.logger.debug(f"{df_name}: {stats['rows']} rows, {stats['mb']:.2f} MB")
-    return memory_usage['total']['mb']
+    return memory_usage["total"]["mb"]
 def get_dda_stats(self):
@@ -121,7 +121,7 @@ def _get_scan_uids(self, scans=None, verbose=True):
 def _get_feature_uids(self, features=None, verbose=True):
     """
     Get feature UIDs from various input types.
     Parameters:
         features: Can be one of the following:
             - None: Returns all feature UIDs from self.features_df
@@ -129,7 +129,7 @@ def _get_feature_uids(self, features=None, verbose=True):
             - polars.DataFrame: Extracts unique values from 'feature_uid' or 'feature_id' column
             - pandas.DataFrame: Extracts unique values from 'feature_uid' or 'feature_id' column
         verbose (bool): Whether to log errors for invalid inputs
     Returns:
         list: List of feature UIDs
     """
@@ -146,7 +146,7 @@ def _get_feature_uids(self, features=None, verbose=True):
             if verbose:
                 self.logger.warning("No features_df available to validate feature UIDs.")
             return []
         valid_feature_uids = self.features_df.get_column("feature_uid").to_list()
         feature_uids = [f for f in features if f in valid_feature_uids]
         if verbose and not feature_uids:
@@ -155,50 +155,53 @@ def _get_feature_uids(self, features=None, verbose=True):
         # Handle polars and pandas DataFrames
         try:
             # Check if it's a polars DataFrame
-            if hasattr(features, 'columns') and hasattr(features, 'get_column'):
+            if hasattr(features, "columns") and hasattr(features, "get_column"):
                 # Polars DataFrame
                 feature_column = None
-                if 'feature_uid' in features.columns:
-                    feature_column = 'feature_uid'
-                elif 'feature_id' in features.columns:
-                    feature_column = 'feature_id'
+                if "feature_uid" in features.columns:
+                    feature_column = "feature_uid"
+                elif "feature_id" in features.columns:
+                    feature_column = "feature_id"
                 if feature_column is None:
                     if verbose:
                         self.logger.error("No 'feature_uid' or 'feature_id' column found in polars DataFrame.")
                     return []
                 # Get unique values from the column
                 feature_uids = features.get_column(feature_column).unique().to_list()
             # Check if it's a pandas DataFrame
-            elif hasattr(features, 'columns') and hasattr(features, 'iloc'):
+            elif hasattr(features, "columns") and hasattr(features, "iloc"):
                 # Pandas DataFrame
                 import pandas as pd
                 if not isinstance(features, pd.DataFrame):
                     if verbose:
-                        self.logger.error("Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame.")
+                        self.logger.error(
+                            "Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame."
+                        )
                     return []
                 feature_column = None
-                if 'feature_uid' in features.columns:
-                    feature_column = 'feature_uid'
-                elif 'feature_id' in features.columns:
-                    feature_column = 'feature_id'
+                if "feature_uid" in features.columns:
+                    feature_column = "feature_uid"
+                elif "feature_id" in features.columns:
+                    feature_column = "feature_id"
                 if feature_column is None:
                     if verbose:
                         self.logger.error("No 'feature_uid' or 'feature_id' column found in pandas DataFrame.")
                     return []
                 # Get unique values from the column
                 feature_uids = features[feature_column].unique().tolist()
             else:
                 if verbose:
                     self.logger.error("Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame.")
                 return []
         except Exception as e:
             if verbose:
                 self.logger.error(f"Error processing DataFrame input: {e}")
@@ -301,7 +304,7 @@ def select(
 ):
     """
     Select features based on specified criteria and return the filtered DataFrame.
     Parameters:
         mz: m/z range filter (tuple for range, single value for minimum)
         rt: retention time range filter (tuple for range, single value for minimum)
@@ -315,7 +318,7 @@ def select(
         height_scaled: scaled height filter (tuple for range, single value for minimum)
         prominence: prominence filter (tuple for range, single value for minimum)
         height: height filter (tuple for range, single value for minimum)
     Returns:
         polars.DataFrame: Filtered features DataFrame
     """
@@ -491,24 +494,22 @@ def select(
     return feats
 def _features_sync(self):
     """
-    Synchronizes the OpenMS FeatureMap and features_df by removing features that exist in one
+    Synchronizes the OpenMS FeatureMap and features_df by removing features that exist in one
     but not the other, using feature_id for mapping between them.
     This function ensures that:
     - Features in the FeatureMap that don't have corresponding entries in features_df are removed
     - Features in features_df that don't have corresponding entries in the FeatureMap are removed
     Returns:
         None
     Side Effects:
         Updates self.features (OpenMS FeatureMap) by creating a new FeatureMap with synchronized features
         Updates self.features_df by filtering to only include features present in the FeatureMap
     Note:
         Uses feature_id as the mapping key. feature_id contains OpenMS unique IDs that correspond
         to the unique IDs of features in the FeatureMap.
@@ -516,34 +517,34 @@ def _features_sync(self):
     if self.features_df is None or self.features is None:
         self.logger.warning("Cannot sync: features_df or FeatureMap is None.")
         return
     try:
         # Import pyopenms
         import pyopenms as oms
         # Get feature_ids from features_df
         df_feature_ids = set(self.features_df.get_column("feature_id").to_list())
         # Get feature unique IDs from FeatureMap
         feature_map_ids = set()
         for i in range(self.features.size()):
             feature = self.features[i]
             unique_id = str(feature.getUniqueId())  # Convert to string to match DataFrame
             feature_map_ids.add(unique_id)
         # Find features that exist in both
         common_feature_ids = df_feature_ids & feature_map_ids
         # Safety check: log error and exit if no features are matching
         if not common_feature_ids:
             self.logger.error(
                 f"No matching features found between FeatureMap and features_df. "
                 f"FeatureMap has {len(feature_map_ids)} features, "
                 f"features_df has {len(df_feature_ids)} features. "
-                f"Cannot synchronize - this indicates a data inconsistency. Exiting without changes."
+                f"Cannot synchronize - this indicates a data inconsistency. Exiting without changes.",
             )
             return
         # Create new synchronized FeatureMap with only common features
         synced_feature_map = oms.FeatureMap()
         for i in range(self.features.size()):
@@ -551,19 +552,19 @@ def _features_sync(self):
             unique_id = str(feature.getUniqueId())
             if unique_id in common_feature_ids:
                 synced_feature_map.push_back(feature)
         # Filter features_df to only include features that exist in FeatureMap
         synced_features_df = self.features_df.filter(
-            pl.col("feature_id").is_in(list(common_feature_ids))
+            pl.col("feature_id").is_in(list(common_feature_ids)),
         )
         # Update the objects
         original_map_size = self.features.size()
         original_df_size = len(self.features_df)
         self.features = synced_feature_map
         self.features_df = synced_features_df
         # Log the synchronization results
         map_removed = original_map_size - self.features.size()
         df_removed = original_df_size - len(self.features_df)
@@ -573,36 +574,36 @@ def _features_sync(self):
             self.logger.info(
                 f"Features synchronized. FeatureMap: {original_map_size} -> {self.features.size()} "
                 f"({map_removed} removed), DataFrame: {original_df_size} -> {len(self.features_df)} "
-                f"({df_removed} removed)"
+                f"({df_removed} removed)",
             )
         else:
             self.logger.debug(
                 f"Features synchronized. FeatureMap: {original_map_size} -> {self.features.size()} "
                 f"({map_removed} removed), DataFrame: {original_df_size} -> {len(self.features_df)} "
-                f"({df_removed} removed)"
+                f"({df_removed} removed)",
             )
     except ImportError:
         self.logger.warning("PyOpenMS not available, cannot sync FeatureMap")
     except Exception as e:
         self.logger.error(f"Error during feature synchronization: {e}")
-def features_delete(self, features: list|None=None):
+def features_delete(self, features: list | None = None):
     """
     Delete features from both self.features_df and self.features based on a list of feature UIDs.
     Parameters:
         features (list, optional): List of feature UIDs to delete. If None, all features will be deleted.
     Returns:
         None
     Side Effects:
         Updates self.features_df by removing specified features.
         Updates self.features (OpenMS FeatureMap) by creating a new FeatureMap with only the remaining features.
         Updates self.scans_df by removing feature_uid associations for deleted features.
     Note:
         The function preserves all OpenMS FeatureMap information by creating a new FeatureMap
         containing only the features that should remain after deletion.
@@ -610,33 +611,33 @@ def features_delete(self, features: list|None=None):
     if self.features_df is None:
         self.logger.warning("No features found.")
         return
     # Get the feature UIDs to delete
     feature_uids_to_delete = self._get_feature_uids(features=features, verbose=True)
     if not feature_uids_to_delete:
         self.logger.warning("No valid feature UIDs provided for deletion.")
         return
     original_count = len(self.features_df)
     # Update features_df by filtering out the features to delete
     self.features_df = self.features_df.filter(
-        ~pl.col("feature_uid").is_in(feature_uids_to_delete)
+        ~pl.col("feature_uid").is_in(feature_uids_to_delete),
     )
     # Update the OpenMS FeatureMap by creating a new one with only features to keep
     if self.features is not None:
         try:
             # Import pyopenms
             import pyopenms as oms
             # Create new FeatureMap with only features to keep
             filtered_map = oms.FeatureMap()
             # Get the feature UIDs that should remain after deletion
             remaining_feature_uids = self.features_df.get_column("feature_uid").to_list()
             # Iterate through existing features and keep only those not in deletion list
             for i in range(self.features.size()):
                 feature = self.features[i]
@@ -644,25 +645,25 @@ def features_delete(self, features: list|None=None):
                 # we can check if the current index is in the remaining UIDs
                 if i in remaining_feature_uids:
                     filtered_map.push_back(feature)
             # Replace the original FeatureMap with the filtered one
             self.features = filtered_map
             self.logger.debug(f"OpenMS FeatureMap updated with {filtered_map.size()} remaining features.")
         except ImportError:
             self.logger.warning("PyOpenMS not available, only updating features_df")
         except Exception as e:
             self.logger.warning(f"Could not update OpenMS FeatureMap: {e}. FeatureMap may be out of sync.")
     # Update scans_df to remove feature_uid associations for deleted features
-    if hasattr(self, 'scans_df') and self.scans_df is not None:
+    if hasattr(self, "scans_df") and self.scans_df is not None:
         self.scans_df = self.scans_df.with_columns(
             pl.when(pl.col("feature_uid").is_in(feature_uids_to_delete))
             .then(None)
             .otherwise(pl.col("feature_uid"))
-            .alias("feature_uid")
+            .alias("feature_uid"),
         )
     deleted_count = original_count - len(self.features_df)
     self.logger.info(f"Deleted {deleted_count} features. Remaining features: {len(self.features_df)}")
@@ -702,21 +703,21 @@ def _delete_ms2(self):
 def features_filter(self, features):
     """
     Keep only the specified features and delete all others. This is the opposite of features_delete().
     Parameters:
         features: Can be one of the following:
             - list: List of feature UIDs to keep
             - polars.DataFrame: DataFrame with 'feature_uid' or 'feature_id' column - extracts unique values to keep
             - pandas.DataFrame: DataFrame with 'feature_uid' or 'feature_id' column - extracts unique values to keep
     Returns:
         None
     Side Effects:
         Updates self.features_df by keeping only the specified features.
         Updates self.features (OpenMS FeatureMap) by creating a new FeatureMap with only the specified features.
         Updates self.scans_df by removing feature_uid associations for deleted features.
     Note:
         The function preserves all OpenMS FeatureMap information by creating a new FeatureMap
         containing only the features that should be kept.
@@ -724,38 +725,38 @@ def features_filter(self, features):
     if self.features_df is None:
         self.logger.warning("No features found.")
         return
     if features is None:
         self.logger.warning("No features specified to keep. Use features_delete() to delete all features.")
         return
     # Get the feature UIDs to keep
     feature_uids_to_keep = self._get_feature_uids(features=features, verbose=True)
     if not feature_uids_to_keep:
         self.logger.warning("No valid feature UIDs provided to keep.")
         return
     original_count = len(self.features_df)
     # Update features_df by keeping only the specified features
     self.features_df = self.features_df.filter(
-        pl.col("feature_uid").is_in(feature_uids_to_keep)
+        pl.col("feature_uid").is_in(feature_uids_to_keep),
     )
     # Calculate which features were deleted (all except the ones to keep)
     all_feature_uids = set(range(original_count))  # Assuming sequential UIDs
     feature_uids_to_delete = list(all_feature_uids - set(feature_uids_to_keep))
     # Update the OpenMS FeatureMap by creating a new one with only features to keep
     if self.features is not None:
         try:
             # Import pyopenms
             import pyopenms as oms
             # Create new FeatureMap with only features to keep
             filtered_map = oms.FeatureMap()
             # Iterate through existing features and keep only those in the keep list
             for i in range(self.features.size()):
                 feature = self.features[i]
@@ -763,25 +764,25 @@ def features_filter(self, features):
                 # we can check if the current index is in the keep UIDs
                 if i in feature_uids_to_keep:
                     filtered_map.push_back(feature)
             # Replace the original FeatureMap with the filtered one
             self.features = filtered_map
             self.logger.debug(f"OpenMS FeatureMap updated with {filtered_map.size()} remaining features.")
         except ImportError:
             self.logger.warning("PyOpenMS not available, only updating features_df")
         except Exception as e:
             self.logger.warning(f"Could not update OpenMS FeatureMap: {e}. FeatureMap may be out of sync.")
     # Update scans_df to remove feature_uid associations for deleted features
-    if hasattr(self, 'scans_df') and self.scans_df is not None and feature_uids_to_delete:
+    if hasattr(self, "scans_df") and self.scans_df is not None and feature_uids_to_delete:
         self.scans_df = self.scans_df.with_columns(
             pl.when(pl.col("feature_uid").is_in(feature_uids_to_delete))
             .then(None)
             .otherwise(pl.col("feature_uid"))
-            .alias("feature_uid")
+            .alias("feature_uid"),
         )
     kept_count = len(self.features_df)
     deleted_count = original_count - kept_count
     self.logger.info(f"Kept {kept_count} features, deleted {deleted_count} features. Remaining features: {kept_count}")
@@ -789,27 +790,27 @@ def features_filter(self, features):
 def set_source(self, filename):
     """
-    Reassign file_source. If filename contains only a path, keep the current basename
-    and build an absolute path. Check that the new file exists before overwriting
+    Reassign file_source. If filename contains only a path, keep the current basename
+    and build an absolute path. Check that the new file exists before overwriting
     the old file_source.
     Parameters:
         filename (str): New file path or directory path
     Returns:
         None
     """
     import os
     # Store the old file_source for logging
-    old_file_source = getattr(self, 'file_source', None)
+    old_file_source = getattr(self, "file_source", None)
     # Check if filename is just a directory path
     if os.path.isdir(filename):
         if old_file_source is None:
             self.logger.error("Cannot build path: no current file_source available")
             return
         # Get the basename from current file_source
         current_basename = os.path.basename(old_file_source)
         # Build new absolute path
@@ -817,15 +818,15 @@ def set_source(self, filename):
     else:
         # filename is a full path, make it absolute
         new_file_path = os.path.abspath(filename)
     # Check if the new file exists
     if not os.path.exists(new_file_path):
         self.logger.error(f"File does not exist: {new_file_path}")
         return
     # Update file_source
     self.file_source = new_file_path
     # Log the change
     if old_file_source is not None:
         self.logger.info(f"Updated file_source from {old_file_source} to {self.file_source}")

masster 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl

Potentially problematic release.

masster 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl