PyPI - masster - Versions diffs - 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl - Mend

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (37) hide show

masster/_version.py +1 -1
masster/logger.py +35 -19
masster/sample/adducts.py +15 -29
masster/sample/defaults/find_adducts_def.py +1 -3
masster/sample/defaults/sample_def.py +4 -4
masster/sample/h5.py +203 -361
masster/sample/helpers.py +14 -30
masster/sample/lib.py +3 -3
masster/sample/load.py +21 -29
masster/sample/plot.py +222 -132
masster/sample/processing.py +42 -55
masster/sample/sample.py +37 -46
masster/sample/save.py +37 -61
masster/sample/sciex.py +13 -11
masster/sample/thermo.py +69 -74
masster/spectrum.py +15 -15
masster/study/analysis.py +650 -586
masster/study/defaults/identify_def.py +1 -3
masster/study/defaults/merge_def.py +6 -7
masster/study/defaults/study_def.py +1 -5
masster/study/export.py +35 -96
masster/study/h5.py +134 -211
masster/study/helpers.py +385 -459
masster/study/id.py +239 -290
masster/study/importers.py +84 -93
masster/study/load.py +159 -178
masster/study/merge.py +1112 -1098
masster/study/plot.py +195 -149
masster/study/processing.py +144 -191
masster/study/save.py +14 -13
masster/study/study.py +89 -130
masster/wizard/wizard.py +764 -714
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/METADATA +27 -1
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/RECORD +37 -37
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/WHEEL +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/entry_points.txt +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/licenses/LICENSE +0 -0

masster/sample/helpers.py CHANGED Viewed

@@ -113,9 +113,7 @@ def _get_scan_uids(self, scans=None, verbose=True):
         scans_uids = self.scans_df.get_column("scan_uid").to_list()
     elif isinstance(scans, list):
         # if scans is a list, ensure all elements are valid scan_uids
-        scans_uids = [
-            s for s in scans if s in self.scans_df.get_column("scan_uid").to_list()
-        ]
+        scans_uids = [s for s in scans if s in self.scans_df.get_column("scan_uid").to_list()]
         if verbose and not scans_uids:
             self.logger.error("No valid scan_uids provided.")
@@ -340,9 +338,7 @@ def get_eic(self, mz, mz_tol=None):
             return None
         # Aggregate intensities per retention time. Use sum in case multiple points per rt.
-        chrom = (
-            matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
-        )
+        chrom = matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
         # Attach to Sample
         self.chrom_df = chrom
@@ -401,16 +397,14 @@ def features_select(
         # self.logger.info("No features found. R")
         return
     feats = self.features_df.clone()
     # Filter by feature UIDs if provided
     if uid is not None:
         if isinstance(uid, tuple) and len(uid) == 2:
             # Handle tuple as range of feature UIDs
             min_uid, max_uid = uid
             feats_len_before_filter = len(feats)
-            feats = feats.filter(
-                (pl.col("feature_uid") >= min_uid) & (pl.col("feature_uid") <= max_uid)
-            )
+            feats = feats.filter((pl.col("feature_uid") >= min_uid) & (pl.col("feature_uid") <= max_uid))
             self.logger.debug(
                 f"Selected features by UID range ({min_uid}-{max_uid}). Features removed: {feats_len_before_filter - len(feats)}",
             )
@@ -420,13 +414,13 @@ def features_select(
             if not feature_uids_to_keep:
                 self.logger.warning("No valid feature UIDs provided.")
                 return feats.limit(0)  # Return empty DataFrame with same structure
             feats_len_before_filter = len(feats)
             feats = feats.filter(pl.col("feature_uid").is_in(feature_uids_to_keep))
             self.logger.debug(
                 f"Selected features by UIDs. Features removed: {feats_len_before_filter - len(feats)}",
             )
     if coherence is not None:
         has_coherence = "chrom_coherence" in self.features_df.columns
         if not has_coherence:
@@ -437,8 +431,7 @@ def features_select(
             if isinstance(coherence, tuple) and len(coherence) == 2:
                 min_coherence, max_coherence = coherence
                 feats = feats.filter(
-                    (pl.col("chrom_coherence") >= min_coherence)
-                    & (pl.col("chrom_coherence") <= max_coherence),
+                    (pl.col("chrom_coherence") >= min_coherence) & (pl.col("chrom_coherence") <= max_coherence),
                 )
             else:
                 feats = feats.filter(pl.col("chrom_coherence") >= coherence)
@@ -489,8 +482,7 @@ def features_select(
         if isinstance(rt_delta, tuple) and len(rt_delta) == 2:
             min_rt_delta, max_rt_delta = rt_delta
             feats = feats.filter(
-                (pl.col("rt_delta") >= min_rt_delta)
-                & (pl.col("rt_delta") <= max_rt_delta),
+                (pl.col("rt_delta") >= min_rt_delta) & (pl.col("rt_delta") <= max_rt_delta),
             )
         else:
             feats = feats.filter(pl.col("rt_delta") >= rt_delta)
@@ -567,8 +559,7 @@ def features_select(
         if isinstance(prominence, tuple) and len(prominence) == 2:
             min_prominence, max_prominence = prominence
             feats = feats.filter(
-                (pl.col("chrom_prominence") >= min_prominence)
-                & (pl.col("chrom_prominence") <= max_prominence),
+                (pl.col("chrom_prominence") >= min_prominence) & (pl.col("chrom_prominence") <= max_prominence),
             )
         else:
             feats = feats.filter(pl.col("chrom_prominence") >= prominence)
@@ -579,9 +570,7 @@ def features_select(
     if height is not None:
         feats_len_before_filter = len(feats)
         # Check if chrom_height column exists, if not use chrom_height_scaled
-        height_col = (
-            "chrom_height" if "chrom_height" in feats.columns else "chrom_height_scaled"
-        )
+        height_col = "chrom_height" if "chrom_height" in feats.columns else "chrom_height_scaled"
         if isinstance(height, tuple) and len(height) == 2:
             min_height, max_height = height
             feats = feats.filter(
@@ -617,6 +606,7 @@ def features_select(
         self.logger.info(f"Selected features. Features remaining: {len(feats)}")
     return feats
 '''
 def _features_sync(self):
     """
@@ -725,6 +715,7 @@ def _features_sync(self):
         self.logger.error(f"Error during feature synchronization: {e}")
 '''
 def features_delete(self, features: list | None = None):
     """
     Delete features from both self.features_df and self._oms_features_map based on a list of feature UIDs.
@@ -841,10 +832,7 @@ def _delete_ms2(self):
     # Update scans_df to remove feature_uid association for linked MS2 spectra
     self.scans_df = self.scans_df.with_columns(
-        pl.when(pl.col("ms_level") == 2)
-        .then(None)
-        .otherwise(pl.col("feature_uid"))
-        .alias("feature_uid"),
+        pl.when(pl.col("ms_level") == 2).then(None).otherwise(pl.col("feature_uid")).alias("feature_uid"),
     )
     self.logger.info("MS2 spectra unlinked from features.")
@@ -930,11 +918,7 @@ def features_filter(self, features):
             )
     # Update scans_df to remove feature_uid associations for deleted features
-    if (
-        hasattr(self, "scans_df")
-        and self.scans_df is not None
-        and feature_uids_to_delete
-    ):
+    if hasattr(self, "scans_df") and self.scans_df is not None and feature_uids_to_delete:
         self.scans_df = self.scans_df.with_columns(
             pl.when(pl.col("feature_uid").is_in(feature_uids_to_delete))
             .then(None)

masster/sample/lib.py CHANGED Viewed

@@ -221,11 +221,11 @@ def lib_load(self, csvfile=None, polarity=None):
     self.lib = self.lib.where(pd.notnull(self.lib), None)
     # find all elements == nan and replace them with None
     self.lib = self.lib.replace({np.nan: None})
     # Use sample.polarity if polarity parameter is None
     if polarity is None:
-        polarity = getattr(self, 'polarity', 'positive')
+        polarity = getattr(self, "polarity", "positive")
     if polarity is not None:
         if polarity.lower() == "positive":
             self.lib = self.lib[self.lib["z"] > 0]

masster/sample/load.py CHANGED Viewed

@@ -13,11 +13,6 @@ Key Features:
 - **Error Handling**: Comprehensive error reporting for file loading issues.
 - **Raw Data Processing**: Handle centroided and profile data with signal smoothing.
-Dependencies:
-- `pyopenms`: For standard mass spectrometry file format support.
-- `polars` and `pandas`: For efficient data handling and manipulation.
-- `numpy`: For numerical array operations.
 Functions:
 - `load()`: Main file loading function with format detection.
 - `_load_mzML()`: Specialized mzML file loader.
@@ -55,7 +50,9 @@ warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH.*", categor
 # Import pyopenms with suppressed warnings
 with warnings.catch_warnings():
-    warnings.filterwarnings("ignore", message=".*OPENMS_DATA_PATH environment variable already exists.*", category=UserWarning)
+    warnings.filterwarnings(
+        "ignore", message=".*OPENMS_DATA_PATH environment variable already exists.*", category=UserWarning
+    )
     warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH.*", category=UserWarning)
     import pyopenms as oms
@@ -163,6 +160,7 @@ def load_noms1(
         _load_raw(self, filename)
     elif filename.lower().endswith(".sample5"):
         from masster.sample.h5 import _load_sample5_study
         _load_sample5_study(self, filename)  # Use optimized version for study loading
     else:
         raise ValueError("File must be .mzML, .wiff, *.raw, or .sample5")
@@ -286,12 +284,8 @@ def _load_mzML(
             if len(prec_mz) == 0:
                 continue
             prec_mz = prec_mz[0].getMZ()
-            precursorIsolationWindowLowerMZ = s.getPrecursors()[
-                0
-            ].getIsolationWindowLowerOffset()
-            precursorIsolationWindowUpperMZ = s.getPrecursors()[
-                0
-            ].getIsolationWindowUpperOffset()
+            precursorIsolationWindowLowerMZ = s.getPrecursors()[0].getIsolationWindowLowerOffset()
+            precursorIsolationWindowUpperMZ = s.getPrecursors()[0].getIsolationWindowUpperOffset()
             prec_inty = s.getPrecursors()[0].getIntensity()
             # Try to get collision energy from meta values first, fallback to getActivationEnergy()
             try:
@@ -416,7 +410,7 @@ def _load_raw(
         - Updates instance attributes including self.file_path, self.file_obj, self.file_interface, and self.label.
         - Initiates further analysis by invoking analyze_dda().
     """
-    #from alpharaw.thermo import ThermoRawData
+    # from alpharaw.thermo import ThermoRawData
     from masster.sample.thermo import ThermoRawData
     if not filename:
@@ -482,11 +476,11 @@ def _load_raw(
         # try to get polarity
         if self.polarity is None:
-            if s['polarity'] == 'positive':
-                self.polarity = 'positive'
-            elif s['polarity'] == 'negative':
-                self.polarity = 'negative'
+            if s["polarity"] == "positive":
+                self.polarity = "positive"
+            elif s["polarity"] == "negative":
+                self.polarity = "negative"
         peak_start_idx = s["peak_start_idx"]
         peak_stop_idx = s["peak_stop_idx"]
         peaks = raw_data.peak_df.loc[peak_start_idx : peak_stop_idx - 1]
@@ -639,11 +633,11 @@ def _load_wiff(
         ms_level = s["ms_level"]
         # try to get polarity
         if polarity is None:
-            if s['polarity'] == 'positive':
-                polarity = 'positive'
-            elif s['polarity'] == 'negative':
-                polarity = 'negative'
+            if s["polarity"] == "positive":
+                polarity = "positive"
+            elif s["polarity"] == "negative":
+                polarity = "negative"
         if ms_level == 1:
             cycle += 1
             prec_mz = None
@@ -745,6 +739,7 @@ def _load_wiff(
     self.file_source = filename
     self.file_obj = raw_data
     self.file_interface = "alpharaw"
+    self.polarity = polarity
     self.label = os.path.basename(filename)
     self.ms1_df = pl.DataFrame(ms1_df_records, schema=schema)
     if self.type != "ztscan":
@@ -775,6 +770,7 @@ def _load_featureXML(
     fh.load(filename, fm)
     self._oms_features_map = fm
 def _wiff_to_dict(
     filename=None,
 ):
@@ -1161,9 +1157,7 @@ def chrom_extract(
             scan_uid = trace["scan_uid"]
             # find all ms1 data with scan_uid and mz between q1-mz_tol and q1+mz_tol
             d = self.ms1_df.filter(
-                (pl.col("scan_uid").is_in(scan_uid))
-                & (pl.col("mz") >= q1 - mz_tol)
-                & (pl.col("mz") <= q1 + mz_tol),
+                (pl.col("scan_uid").is_in(scan_uid)) & (pl.col("mz") >= q1 - mz_tol) & (pl.col("mz") <= q1 + mz_tol),
             )
             # for all unique rt values, find the maximum inty
             eic_rt = d.group_by("rt").agg(pl.col("inty").max())
@@ -1182,9 +1176,7 @@ def chrom_extract(
             scan_uid = trace["scan_uid"]
             # find all ms2 data with scan_uid and mz between q3-mz_tol and q3+mz_tol
             d = self.ms2data.filter(
-                (pl.col("scan_uid").is_in(scan_uid))
-                & (pl.col("mz") >= q3 - mz_tol)
-                & (pl.col("mz") <= q3 + mz_tol),
+                (pl.col("scan_uid").is_in(scan_uid)) & (pl.col("mz") >= q3 - mz_tol) & (pl.col("mz") <= q3 + mz_tol),
             )
             # for all unique rt values, find the maximum inty
             eic_rt = d.group_by("rt").agg(pl.col("inty").max())

masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

Potentially problematic release.

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl