PyPI - masster - Versions diffs - 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl - Mend

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (37) hide show

masster/_version.py +1 -1
masster/logger.py +35 -19
masster/sample/adducts.py +15 -29
masster/sample/defaults/find_adducts_def.py +1 -3
masster/sample/defaults/sample_def.py +4 -4
masster/sample/h5.py +203 -361
masster/sample/helpers.py +14 -30
masster/sample/lib.py +3 -3
masster/sample/load.py +21 -29
masster/sample/plot.py +222 -132
masster/sample/processing.py +42 -55
masster/sample/sample.py +37 -46
masster/sample/save.py +37 -61
masster/sample/sciex.py +13 -11
masster/sample/thermo.py +69 -74
masster/spectrum.py +15 -15
masster/study/analysis.py +650 -586
masster/study/defaults/identify_def.py +1 -3
masster/study/defaults/merge_def.py +6 -7
masster/study/defaults/study_def.py +1 -5
masster/study/export.py +35 -96
masster/study/h5.py +134 -211
masster/study/helpers.py +385 -459
masster/study/id.py +239 -290
masster/study/importers.py +84 -93
masster/study/load.py +159 -178
masster/study/merge.py +1112 -1098
masster/study/plot.py +195 -149
masster/study/processing.py +144 -191
masster/study/save.py +14 -13
masster/study/study.py +89 -130
masster/wizard/wizard.py +764 -714
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/METADATA +27 -1
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/RECORD +37 -37
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/WHEEL +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/entry_points.txt +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/licenses/LICENSE +0 -0

masster/sample/processing.py CHANGED Viewed

@@ -253,8 +253,7 @@ def get_spectrum(self, scan, **kwargs):
                 spec=spect,
                 scan_uid=scan_uid,
                 feature_uid=scan_info["feature_uid"][0]
-                if "feature_uid" in scan_info
-                and scan_info["feature_uid"][0] is not None
+                if "feature_uid" in scan_info and scan_info["feature_uid"][0] is not None
                 else feature_uid,
                 q1_step=2,
                 deisotope=deisotope,
@@ -447,9 +446,7 @@ def _spec_to_mat(
             closest_index = np.argmin(np.abs(ar2 - val1))
             closest_indices.append((i, closest_index))
         # filter out pairs that are not within the specified tolerance
-        closest_indices = [
-            (i, j) for i, j in closest_indices if np.abs(ar1[i] - ar2[j]) <= tol
-        ]
+        closest_indices = [(i, j) for i, j in closest_indices if np.abs(ar1[i] - ar2[j]) <= tol]
         # remove duplicates from the list of indices
         closest_indices = list(set(closest_indices))
         # sort the list of indices by the first element (i) in ascending order
@@ -621,8 +618,7 @@ def find_features(self, **kwargs):
     mtd_par.setValue("noise_threshold_int", float(params.get("noise")))
     mtd_par.setValue(
         "min_trace_length",
-        float(params.get("min_trace_length_multiplier"))
-        * float(params.get("chrom_fwhm_min")),
+        float(params.get("min_trace_length_multiplier")) * float(params.get("chrom_fwhm_min")),
     )
     mtd_par.setValue(
         "trace_termination_outliers",
@@ -801,7 +797,7 @@ def find_features(self, **kwargs):
     )
     self.features_df = df
-    #self._features_sync()
+    # self._features_sync()
     self.logger.success(f"Feature detection completed. Total features: {len(df)}")
     # store params
@@ -1134,9 +1130,7 @@ def find_ms2(self, **kwargs):
     feature_rt_start = features_subset.select("rt_start").to_numpy().flatten()
     feature_rt_end = features_subset.select("rt_end").to_numpy().flatten()
     feature_uids = features_subset.select("feature_uid").to_numpy().flatten()
-    feature_indices = (
-        features_subset.with_row_index().select("index").to_numpy().flatten()
-    )
+    feature_indices = features_subset.with_row_index().select("index").to_numpy().flatten()
     # Pre-compute RT radius for all features
     rt_radius = np.minimum(feature_rt - feature_rt_start, feature_rt_end - feature_rt)
@@ -1283,16 +1277,16 @@ def find_ms2(self, **kwargs):
 def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
     """Extract isotopic distributions from MS1 data and add to features_df.
     This method processes each feature to find isotopic distributions from MS1 data,
     similar to the study.find_iso() method but for individual samples. The method
     adds a new 'ms1_spec' column to features_df containing numpy arrays with
     isotopic distribution data.
     Args:
         rt_tolerance (float): RT tolerance in minutes for matching MS1 scans. Default 0.1.
         **kwargs: Additional parameters
     Notes:
         - Adds a new 'ms1_spec' column to features_df containing numpy arrays
         - Each array contains [mz, intensity] pairs for the isotopic distribution
@@ -1302,11 +1296,11 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
     if self.features_df is None or self.features_df.is_empty():
         self.logger.warning("No features found. Run find_features() first.")
         return
     if self.ms1_df is None or self.ms1_df.is_empty():
         self.logger.warning("No MS1 data found.")
         return
     # Check if ms1_spec column already exists
     if "ms1_spec" in self.features_df.columns:
         features_without_spec = self.features_df.filter(pl.col("ms1_spec").is_null())
@@ -1316,9 +1310,7 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
         self.logger.info(f"Processing {len(features_without_spec)} features without isotopic distributions.")
     else:
         # Add the ms1_spec column with None values
-        self.features_df = self.features_df.with_columns(
-            pl.lit(None, dtype=pl.Object).alias("ms1_spec")
-        )
+        self.features_df = self.features_df.with_columns(pl.lit(None, dtype=pl.Object).alias("ms1_spec"))
         features_without_spec = self.features_df
         self.logger.info(f"Processing {len(features_without_spec)} features for isotopic distributions.")
@@ -1336,60 +1328,59 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
         6.02010,
         7.02345,
     ])
     # Convert rt_tolerance from minutes to seconds
     rt_tolerance_s = rt_tolerance * 60
     # Process each feature
     ms1_specs = []
     feature_indices = []
-    for i, row in enumerate(tqdm(
-        features_without_spec.rows(named=True),
-        desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Extracting isotope patterns"
-    )):
+    for i, row in enumerate(
+        tqdm(
+            features_without_spec.rows(named=True),
+            desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Extracting isotope patterns",
+        )
+    ):
         feature_rt = row["rt"]
         feature_mz = row["mz"]
         # Find MS1 scans within RT tolerance
-        rt_mask = (
-            (self.ms1_df["rt"] >= (feature_rt - rt_tolerance_s)) &
-            (self.ms1_df["rt"] <= (feature_rt + rt_tolerance_s))
+        rt_mask = (self.ms1_df["rt"] >= (feature_rt - rt_tolerance_s)) & (
+            self.ms1_df["rt"] <= (feature_rt + rt_tolerance_s)
         )
         ms1_in_range = self.ms1_df.filter(rt_mask)
         if ms1_in_range.is_empty():
             ms1_specs.append(None)
             feature_indices.append(row["feature_uid"])
             continue
         # Extract isotopic pattern
         isotope_pattern = []
         # Start with the monoisotopic peak (M+0)
         base_intensity = 0
         mz_tolerance = 0.01  # 10 ppm at 1000 Da
         # Find the base peak intensity
-        base_mask = (
-            (ms1_in_range["mz"] >= (feature_mz - mz_tolerance)) &
-            (ms1_in_range["mz"] <= (feature_mz + mz_tolerance))
+        base_mask = (ms1_in_range["mz"] >= (feature_mz - mz_tolerance)) & (
+            ms1_in_range["mz"] <= (feature_mz + mz_tolerance)
         )
         base_peaks = ms1_in_range.filter(base_mask)
         if not base_peaks.is_empty():
             base_intensity = base_peaks["inty"].max()
             isotope_pattern.append([feature_mz, base_intensity])
         # Look for isotope peaks
         for shift in isotope_shifts:
             isotope_mz = feature_mz + shift
-            isotope_mask = (
-                (ms1_in_range["mz"] >= (isotope_mz - mz_tolerance)) &
-                (ms1_in_range["mz"] <= (isotope_mz + mz_tolerance))
+            isotope_mask = (ms1_in_range["mz"] >= (isotope_mz - mz_tolerance)) & (
+                ms1_in_range["mz"] <= (isotope_mz + mz_tolerance)
             )
             isotope_peaks = ms1_in_range.filter(isotope_mask)
             if not isotope_peaks.is_empty():
                 max_intensity = isotope_peaks["inty"].max()
                 # Only keep isotope peaks that are at least 1% of base peak
@@ -1397,29 +1388,25 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
                     # Get the mz of the most intense peak
                     max_peak = isotope_peaks.filter(pl.col("inty") == max_intensity).row(0, named=True)
                     isotope_pattern.append([max_peak["mz"], max_intensity])
         # Convert to numpy array or None if empty
         if len(isotope_pattern) > 1:  # Need at least 2 points (monoisotopic + 1 isotope)
             ms1_spec = np.array(isotope_pattern, dtype=np.float64)
         else:
             ms1_spec = None
         ms1_specs.append(ms1_spec)
         feature_indices.append(row["feature_uid"])
     # Update the features_df with the isotopic spectra
     update_df = pl.DataFrame({
         "feature_uid": feature_indices,
-        "ms1_spec_new": pl.Series("ms1_spec_new", ms1_specs, dtype=pl.Object)
+        "ms1_spec_new": pl.Series("ms1_spec_new", ms1_specs, dtype=pl.Object),
     })
     # Join and update
     self.features_df = (
-        self.features_df.join(
-            update_df,
-            on="feature_uid",
-            how="left"
-        )
+        self.features_df.join(update_df, on="feature_uid", how="left")
         .with_columns([
             pl.when(pl.col("ms1_spec_new").is_not_null())
             .then(pl.col("ms1_spec_new"))
@@ -1428,11 +1415,11 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
         ])
         .drop("ms1_spec_new")
     )
     # Log results
     non_null_count = len([spec for spec in ms1_specs if spec is not None])
     self.logger.success(f"Extracted isotopic distributions for {non_null_count}/{len(ms1_specs)} features.")
     # Store parameters in history
     params_dict = {"rt_tolerance": rt_tolerance}
     params_dict.update(kwargs)

masster/sample/sample.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
 sample.py - Mass Spectrometry Sample Analysis Module
-This module provides comprehensive tools for processing and analyzing Data-Dependent Acquisition (DDA)
-mass spectrometry data. It defines the `Sample` class, which offers methods to load, process, analyze,
+This module provides comprehensive tools for processing and analyzing Data-Dependent Acquisition (DDA)
+mass spectrometry data. It defines the `Sample` class, which offers methods to load, process, analyze,
 and visualize mass spectrometry data from various file formats.
 Supported File Formats:
@@ -31,7 +31,7 @@ Core Dependencies:
     - `h5py`: HDF5 file format support for Sample5 files
 Classes:
-    Sample: Main class for handling DDA mass spectrometry data, providing methods for
+    Sample: Main class for handling DDA mass spectrometry data, providing methods for
             data import, processing, analysis, and visualization.
 Typical Workflow:
@@ -43,43 +43,43 @@ Typical Workflow:
 Example Usage:
     Basic analysis workflow:
     ```python
     from masster.sample import Sample
     # Load a mass spectrometry file
     sample = Sample(filename="experiment.mzML")
     # Detect features
     sample.find_features()
     # Find MS2 spectra for features
     sample.find_ms2()
     # Generate 2D visualization
     sample.plot_2d()
     # Export results
     sample.export_features("features.xlsx")
     ```
     Advanced usage with custom parameters:
     ```python
     from masster.sample import Sample
     from masster.sample.defaults import sample_defaults, find_features_defaults
     # Create custom parameters
     params = sample_defaults(log_level="DEBUG", label="My Experiment")
     ff_params = find_features_defaults(noise_threshold_int=1000)
     # Initialize with custom parameters
     sample = Sample(params=params)
     sample.load("data.raw")
     # Feature detection with custom parameters
     sample.find_features(params=ff_params)
     # Generate comprehensive statistics
     stats = sample.get_dda_stats()
     sample.plot_dda_stats()
@@ -275,7 +275,7 @@ class Sample:
     save = save
     find_features = find_features
     find_adducts = find_adducts
-    _get_adducts= _get_adducts
+    _get_adducts = _get_adducts
     find_iso = find_iso
     find_ms2 = find_ms2
     get_spectrum = get_spectrum
@@ -348,45 +348,44 @@ class Sample:
     def __dir__(self):
         """
-        Custom __dir__ implementation to hide internal methods starting with '_'
-        and backward compatibility aliases from tab completion and dir() calls,
+        Custom __dir__ implementation to hide internal methods starting with '_'
+        and backward compatibility aliases from tab completion and dir() calls,
         while keeping them accessible to class methods.
         Returns:
             list: List of public attribute and method names (excluding internal and deprecated methods)
         """
         # Define backward compatibility aliases to hide
         backward_compatibility_aliases = {
-            'load_study',           # deprecated alias for _load_ms1
-            'filter_features',      # alias for filter (deprecated naming)
-            'select_features',      # alias for select (deprecated naming)
-            'features_filter',      # confusing duplicate of filter
-            'features_select',      # confusing duplicate of select
-            'merge_defaults',       # alias for find_features_defaults (confusing)
-            'plot_feature_stats',   # backward compatibility for plot_features_stats
-            'store_history',        # deprecated alias for update_history
+            "load_study",  # deprecated alias for _load_ms1
+            "filter_features",  # alias for filter (deprecated naming)
+            "select_features",  # alias for select (deprecated naming)
+            "features_filter",  # confusing duplicate of filter
+            "features_select",  # confusing duplicate of select
+            "merge_defaults",  # alias for find_features_defaults (confusing)
+            "plot_feature_stats",  # backward compatibility for plot_features_stats
+            "store_history",  # deprecated alias for update_history
         }
         # Get all attributes from the class
         all_attrs = set()
         # Add attributes from the class and all its bases
         for cls in self.__class__.__mro__:
             all_attrs.update(cls.__dict__.keys())
         # Add instance attributes
         all_attrs.update(self.__dict__.keys())
         # Filter out attributes starting with '_' (but keep special methods like __init__, __str__, etc.)
         # Also filter out backward compatibility aliases
         public_attrs = [
-            attr for attr in all_attrs
-            if not attr.startswith('_') or attr.startswith('__') and attr.endswith('__')
+            attr for attr in all_attrs if not attr.startswith("_") or attr.startswith("__") and attr.endswith("__")
         ]
         # Remove backward compatibility aliases from the public attributes
         public_attrs = [attr for attr in public_attrs if attr not in backward_compatibility_aliases]
         return sorted(public_attrs)
     def logger_update(
@@ -442,10 +441,7 @@ class Sample:
         # Get all currently loaded modules that are part of the sample package
         for module_name in sys.modules:
-            if (
-                module_name.startswith(sample_module_prefix)
-                and module_name != current_module
-            ):
+            if module_name.startswith(sample_module_prefix) and module_name != current_module:
                 sample_modules.append(module_name)
         # Add core masster modules
@@ -461,15 +457,10 @@ class Sample:
         study_modules = []
         study_module_prefix = f"{base_modname}.study."
         for module_name in sys.modules:
-            if (
-                module_name.startswith(study_module_prefix)
-                and module_name != current_module
-            ):
+            if module_name.startswith(study_module_prefix) and module_name != current_module:
                 study_modules.append(module_name)
-        all_modules_to_reload = (
-            core_modules + sample_modules + study_modules
-        )
+        all_modules_to_reload = core_modules + sample_modules + study_modules
         # Reload all discovered modules
         for full_module_name in all_modules_to_reload:

masster/sample/save.py CHANGED Viewed

@@ -105,7 +105,8 @@ def save(self, filename=None):
     self._save_sample5(filename=filename)
     self.file_path = filename
-'''
+"""
 def _save_featureXML(self, filename="features.featureXML"):
     if self._oms_features_map is None:
         self.logger.warning("No features found.")
@@ -114,7 +115,9 @@ def _save_featureXML(self, filename="features.featureXML"):
     fh.store(filename, self._oms_features_map)
     self.logger.debug(f"Features Map saved to {filename}")
-'''
+"""
 def export_features(self, filename="features.csv"):
     """
     Export the features DataFrame to a CSV or Excel file.
@@ -140,11 +143,7 @@ def export_features(self, filename="features.csv"):
             (pl.col("ms2_scans").is_not_null()).alias("has_ms2"),
         )
     clean_df = self.features_df.select(
-        [
-            col
-            for col in self.features_df.columns
-            if self.features_df[col].dtype not in (pl.List, pl.Object)
-        ],
+        [col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)],
     )
     if filename.lower().endswith((".xls", ".xlsx")):
         clean_df.to_pandas().to_excel(filename, index=False)
@@ -231,7 +230,7 @@ def export_mgf(
     if rt_end is not None:
         features = features.filter(pl.col("rt") <= rt_end)
     # Note: We no longer filter out features without MS2 data here since we want to export
-    # MS1 spectra for ALL features with isotope data. The MS2 filtering is done in the
+    # MS1 spectra for ALL features with isotope data. The MS2 filtering is done in the
     # second pass where we specifically check for ms2_scans.
     # Convert to list of dictionaries for faster iteration
@@ -269,26 +268,26 @@ def export_mgf(
     def write_ion(f, title, fuid, fid, mz, rt, charge, spect):
         if spect is None:
             return "none"
         # For MSLEVEL=2 ions, don't write empty spectra
         ms_level = spect.ms_level if spect.ms_level is not None else 1
         if ms_level > 1 and (len(spect.mz) == 0 or len(spect.inty) == 0):
             return "empty_ms2"
         # Create dynamic title based on MS level
         if ms_level == 1:
             # MS1: uid, rt, mz
             dynamic_title = f"uid:{fuid}, rt:{rt:.2f}, mz:{mz:.4f}"
         else:
             # MS2: uid, rt, mz, energy
-            energy = spect.energy if hasattr(spect, 'energy') else 0
+            energy = spect.energy if hasattr(spect, "energy") else 0
             dynamic_title = f"uid:{fuid}, rt:{rt:.2f}, mz:{mz:.4f}, energy:{energy}"
         f.write(f"BEGIN IONS\nTITLE={dynamic_title}\n")
         f.write(f"FEATURE_UID={fuid}\n")
         f.write(f"FEATURE_ID={fid}\n")
         f.write(f"CHARGE={charge}\nPEPMASS={mz}\nRTINSECONDS={rt}\n")
         if spect.ms_level is None:
             f.write("MSLEVEL=1\n")
             # Add PRECURSORINTENSITY for MS1 spectra
@@ -301,15 +300,12 @@ def export_mgf(
             if spect.ms_level == 1 and len(spect.inty) > 0:
                 precursor_intensity = max(spect.inty)
                 f.write(f"PRECURSORINTENSITY={precursor_intensity:.0f}\n")
         if spect.ms_level is not None:
             if spect.ms_level > 1 and hasattr(spect, "energy"):
                 f.write(f"ENERGY={spect.energy}\n")
         # Use list comprehension for better performance
-        peak_lines = [
-            f"{mz_val:.5f} {inty_val:.0f}\n"
-            for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)
-        ]
+        peak_lines = [f"{mz_val:.5f} {inty_val:.0f}\n" for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)]
         f.writelines(peak_lines)
         f.write("END IONS\n\n")
         return "written"
@@ -322,8 +318,7 @@ def export_mgf(
     # count how many features have charge < 0
     if (
-        self.features_df.filter(pl.col("charge") < 0).shape[0]
-        - self.features_df.filter(pl.col("charge") > 0).shape[0]
+        self.features_df.filter(pl.col("charge") < 0).shape[0] - self.features_df.filter(pl.col("charge") > 0).shape[0]
         > 0
     ):
         preferred_charge = -1
@@ -342,7 +337,7 @@ def export_mgf(
     filename = os.path.abspath(filename)
     with open(filename, "w", encoding="utf-8") as f:
         tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
         # First pass: Export MS1 spectra for ALL features with ms1_spec data
         for row in tqdm(
             features_list,
@@ -362,19 +357,15 @@ def export_mgf(
             if "ms1_spec" in row and row["ms1_spec"] is not None:
                 # Create spectrum from ms1_spec isotope pattern data
                 from masster.spectrum import Spectrum
                 iso_data = row["ms1_spec"]
                 if len(iso_data) >= 2:  # Ensure we have mz and intensity arrays
                     ms1_mz = iso_data[0]
                     ms1_inty = iso_data[1]
                     # Create a Spectrum object from the isotope data
-                    spect = Spectrum(
-                        mz=np.array(ms1_mz),
-                        inty=np.array(ms1_inty),
-                        ms_level=1
-                    )
+                    spect = Spectrum(mz=np.array(ms1_mz), inty=np.array(ms1_inty), ms_level=1)
                     charge = preferred_charge
                     if row["charge"] is not None and row["charge"] != 0:
                         charge = row["charge"]
@@ -395,7 +386,7 @@ def export_mgf(
             else:
                 # No MS1 spectrum exported for features without ms1_spec data
                 ms1_fallback_count += 1
         # Second pass: Export MS2 spectra for features with MS2 data
         for row in tqdm(
             features_list,
@@ -453,9 +444,7 @@ def export_mgf(
                                     q1_max=q1_ratio_max,
                                 )
                                 # Get the corresponding scan_uid from the list
-                                current_scan_uid = (
-                                    scan_uids[i] if i < len(scan_uids) else "unknown"
-                                )
+                                current_scan_uid = scan_uids[i] if i < len(scan_uids) else "unknown"
                                 result = write_ion(
                                     f,
                                     f"uid:{feature_uid}",
@@ -580,18 +569,14 @@ def export_mgf(
                                 spect = spect.centroid(
                                     tolerance=self.parameters["mz_tol_ms1_da"],
                                     ppm=self.parameters["mz_tol_ms1_ppm"],
-                                    min_points=self.parameters[
-                                        "centroid_min_points_ms1"
-                                    ],
+                                    min_points=self.parameters["centroid_min_points_ms1"],
                                     algo=centroid_algo,
                                 )
                             elif spect.ms_level == 2:
                                 spect = spect.centroid(
                                     tolerance=self.parameters["mz_tol_ms2_da"],
                                     ppm=self.parameters["mz_tol_ms2_ppm"],
-                                    min_points=self.parameters[
-                                        "centroid_min_points_ms2"
-                                    ],
+                                    min_points=self.parameters["centroid_min_points_ms2"],
                                     algo=centroid_algo,
                                 )
                         if deisotope:
@@ -654,7 +639,7 @@ def export_mgf(
         self.logger.info(f"Skipped {empty_ms2_count} empty MS2 spectra")
     if ms1_fallback_count > 0:
         self.logger.info(f"Skipped MS1 export for {ms1_fallback_count} features without isotope patterns")
     # Handle None values in logging
     inty_min_str = f"{inty_min:.3f}" if inty_min != float("-inf") else "None"
     q1_ratio_min_str = f"{q1_ratio_min:.3f}" if q1_ratio_min is not None else "None"
@@ -695,9 +680,7 @@ def export_dda_stats(self, filename="stats.csv"):
     ms2_count = len(self.scans_df.filter(pl.col("ms_level") == 2))
     features_count = len(self.features_df) if self.features_df is not None else 0
     features_with_ms2 = (
-        self.features_df.filter(pl.col("ms2_scans").is_not_null()).height
-        if self.features_df is not None
-        else 0
+        self.features_df.filter(pl.col("ms2_scans").is_not_null()).height if self.features_df is not None else 0
     )
     # Initialize a dictionary to hold statistics
@@ -712,9 +695,7 @@ def export_dda_stats(self, filename="stats.csv"):
     if "time_cycle" in self.scans_df.columns:
         ms1_df = self.scans_df.filter(pl.col("ms_level") == 1)
         avg_cycle_time = ms1_df["time_cycle"].mean()
-        stats["Average_cycle_time"] = (
-            avg_cycle_time if avg_cycle_time is not None else ""
-        )
+        stats["Average_cycle_time"] = avg_cycle_time if avg_cycle_time is not None else ""
     else:
         stats["Average_cycle_time"] = 0
@@ -851,32 +832,27 @@ def export_xlsx(self, filename="features.xlsx"):
         return
     # Validate filename extension
-    if not filename.lower().endswith(('.xlsx', '.xls')):
+    if not filename.lower().endswith((".xlsx", ".xls")):
         raise ValueError("Filename must end with '.xlsx' or '.xls' for Excel export")
     filename = os.path.abspath(filename)
     # Clone the DataFrame to avoid modifying the original
     clean_df = self.features_df.clone()
     # Add a column has_ms2=True if column ms2_scans is not None
     if "ms2_scans" in clean_df.columns:
-        clean_df = clean_df.with_columns(
-            (pl.col("ms2_scans").is_not_null()).alias("has_ms2")
-        )
+        clean_df = clean_df.with_columns((pl.col("ms2_scans").is_not_null()).alias("has_ms2"))
     # Filter out columns with List or Object data types that can't be exported to Excel
-    exportable_columns = [
-        col for col in clean_df.columns
-        if clean_df[col].dtype not in (pl.List, pl.Object)
-    ]
+    exportable_columns = [col for col in clean_df.columns if clean_df[col].dtype not in (pl.List, pl.Object)]
     clean_df = clean_df.select(exportable_columns)
     # Convert to pandas and export to Excel
     pandas_df = clean_df.to_pandas()
     pandas_df.to_excel(filename, index=False)
     self.logger.success(f"Features exported to {filename} (Excel format)")
     self.logger.debug(f"Exported {len(clean_df)} features with {len(exportable_columns)} columns")

masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

Potentially problematic release.

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl