PyPI - masster - Versions diffs - 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl - Mend

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (37) hide show

masster/_version.py +1 -1
masster/logger.py +35 -19
masster/sample/adducts.py +15 -29
masster/sample/defaults/find_adducts_def.py +1 -3
masster/sample/defaults/sample_def.py +4 -4
masster/sample/h5.py +203 -361
masster/sample/helpers.py +14 -30
masster/sample/lib.py +3 -3
masster/sample/load.py +21 -29
masster/sample/plot.py +222 -132
masster/sample/processing.py +42 -55
masster/sample/sample.py +37 -46
masster/sample/save.py +37 -61
masster/sample/sciex.py +13 -11
masster/sample/thermo.py +69 -74
masster/spectrum.py +15 -15
masster/study/analysis.py +650 -586
masster/study/defaults/identify_def.py +1 -3
masster/study/defaults/merge_def.py +6 -7
masster/study/defaults/study_def.py +1 -5
masster/study/export.py +35 -96
masster/study/h5.py +134 -211
masster/study/helpers.py +385 -459
masster/study/id.py +239 -290
masster/study/importers.py +84 -93
masster/study/load.py +159 -178
masster/study/merge.py +1112 -1098
masster/study/plot.py +195 -149
masster/study/processing.py +144 -191
masster/study/save.py +14 -13
masster/study/study.py +89 -130
masster/wizard/wizard.py +764 -714
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/METADATA +27 -1
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/RECORD +37 -37
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/WHEEL +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/entry_points.txt +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/licenses/LICENSE +0 -0

masster/sample/sciex.py CHANGED Viewed

@@ -31,16 +31,16 @@ def naive_centroid(
 ) -> tuple[np.ndarray, np.ndarray]:
     """
     Simplified naive centroiding implementation.
     Parameters
     ----------
     peak_mzs : np.ndarray
         Array of m/z values
-    peak_intensities : np.ndarray
+    peak_intensities : np.ndarray
         Array of intensity values
     centroiding_ppm : float, default 20.0
         PPM tolerance for combining peaks
     Returns
     -------
     tuple[np.ndarray, np.ndarray]
@@ -150,20 +150,20 @@ except Exception as e:
 def dot_net_array_to_np_array(src) -> np.ndarray:
     """
     Convert .NET array to NumPy array.
     Parameters
     ----------
     src : .NET array or None
         Source .NET array to convert
     Returns
     -------
     np.ndarray
         Converted NumPy array
     Notes
     -----
-    Based on approach from:
+    Based on approach from:
     https://mail.python.org/pipermail/pythondotnet/2014-May/001527.html
     """
     if src is None:
@@ -204,7 +204,7 @@ class SciexWiffFileReader:
         """Close the file and clean up resources."""
         self._wiffDataProvider.Close()
-    def __enter__(self) -> 'SciexWiffFileReader':
+    def __enter__(self) -> "SciexWiffFileReader":
         """Context manager entry."""
         return self
@@ -272,11 +272,11 @@ class SciexWiffFileReader:
                 if ms_level > 1 and not details.IsSwath and mass_spectrum.NumDataPoints <= 0 and ignore_empty_scans:
                     continue
                 if exp.Details.Polarity == exp.Details.Polarity.Positive:
-                    pol = 'positive'
+                    pol = "positive"
                 elif exp.Details.Polarity == exp.Details.Polarity.Negative:
-                    pol = 'negative'
+                    pol = "negative"
                 else:
-                    pol = ''
+                    pol = ""
                 polarity_list.append(pol)
                 mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
@@ -553,6 +553,7 @@ def load_wiff_file(filename: str, **kwargs) -> SciexWiffData:
     wiff_data.import_raw(filename)
     return wiff_data
 def get_sample_names(filename: str) -> list[str]:
     """
     Get the sample names from a WIFF file.
@@ -570,6 +571,7 @@ def get_sample_names(filename: str) -> list[str]:
     with SciexWiffFileReader(filename) as reader:
         return list(reader.sample_names)
 # Example usage and testing
 if __name__ == "__main__":
     print("Standalone Sciex WIFF reader implementation")

masster/sample/thermo.py CHANGED Viewed

@@ -30,8 +30,8 @@ Example:
     >>> mz, intensity = raw_data.get_peaks(0)  # Get first spectrum peaks
 Note:
-    The .NET imports (System, ThermoFisher) will only work when pythonnet
-    is properly installed and configured. Without these dependencies, the
+    The .NET imports (System, ThermoFisher) will only work when pythonnet
+    is properly installed and configured. Without these dependencies, the
     module will still import but Thermo RAW file reading will be disabled.
 """
@@ -54,23 +54,23 @@ def naive_centroid(
 ) -> tuple[np.ndarray, np.ndarray]:
     """
     Simplified naive centroiding implementation.
     Combines nearby peaks within a PPM tolerance using intensity-weighted averaging.
     Parameters
     ----------
     peak_mzs : np.ndarray
         Array of m/z values (must be sorted)
-    peak_intensities : np.ndarray
+    peak_intensities : np.ndarray
         Array of intensity values corresponding to peak_mzs
     centroiding_ppm : float, default 20.0
         PPM tolerance for combining peaks
     Returns
     -------
     tuple[np.ndarray, np.ndarray]
         Centroided m/z and intensity arrays
     Notes
     -----
     This is a simple implementation that assumes input peaks are sorted by m/z.
@@ -78,7 +78,7 @@ def naive_centroid(
     """
     if len(peak_mzs) == 0:
         return np.array([]), np.array([])
     if len(peak_mzs) != len(peak_intensities):
         raise ValueError("peak_mzs and peak_intensities must have the same length")
@@ -89,7 +89,7 @@ def naive_centroid(
     while i < len(peak_mzs):
         current_mz = peak_mzs[i]
         current_intensity = peak_intensities[i]
         # Calculate tolerance for current m/z
         tolerance = current_mz * centroiding_ppm * 1e-6
@@ -144,6 +144,7 @@ try:
         # Try alternative locations
         try:
             import alpharaw
             alpharaw_dir = os.path.dirname(alpharaw.__file__)
             ext_dir = os.path.join(alpharaw_dir, "ext")
         except ImportError:
@@ -156,9 +157,7 @@ try:
     clr.AddReference(
         os.path.join(ext_dir, "thermo_fisher", "ThermoFisher.CommonCore.Data.dll"),
     )
-    clr.AddReference(
-        os.path.join(ext_dir, "thermo_fisher", "ThermoFisher.CommonCore.RawFileReader.dll")
-    )
+    clr.AddReference(os.path.join(ext_dir, "thermo_fisher", "ThermoFisher.CommonCore.RawFileReader.dll"))
     import ThermoFisher  # noqa: F401
@@ -189,43 +188,43 @@ except Exception as e:
 def dot_net_array_to_np_array(src) -> np.ndarray:
     """
     Convert .NET array to NumPy array with efficient memory handling.
     This function performs a zero-copy conversion from .NET arrays to NumPy arrays
     by directly accessing the underlying memory buffer. This is much faster than
     iterating through elements.
     Parameters
     ----------
     src : .NET array or None
         Source .NET array to convert (typically double[])
     Returns
     -------
     np.ndarray
         Converted NumPy array with dtype float64. Returns empty array if src is None.
     Notes
     -----
-    Based on the approach from:
+    Based on the approach from:
     https://mail.python.org/pipermail/pythondotnet/2014-May/001527.html
     The function uses GCHandle.Alloc to pin the .NET array in memory, allowing
     direct access to its underlying buffer via ctypes. The buffer is then
     wrapped as a NumPy array and copied to ensure memory safety.
     """
     if src is None:
         return np.array([], dtype=np.float64)
     # Pin the .NET array in memory to prevent garbage collection
     src_hndl = GCHandle.Alloc(src, GCHandleType.Pinned)
     try:
         # Get pointer to the pinned memory
         src_ptr = src_hndl.AddrOfPinnedObject().ToInt64()
         # Create ctypes buffer pointing to the same memory
         buf_type = ctypes.c_double * len(src)
         cbuf = buf_type.from_address(src_ptr)
         # Convert to NumPy array and make a copy for safety
         dest = np.frombuffer(cbuf, dtype="float64").copy()  # type: ignore[call-overload]
     finally:
@@ -247,7 +246,7 @@ class ThermoRawFileReader:
                 "Install pythonnet (pip install pythonnet) and ensure Thermo Fisher DLLs "
                 "are available in alpharaw's ext/thermo_fisher directory."
             )
         if not os.path.exists(filename):
             raise FileNotFoundError(f"RAW file not found: {filename}")
@@ -255,7 +254,7 @@ class ThermoRawFileReader:
             self._raw_file = RawFileReaderAdapter.FileFactory(filename)
         except Exception as e:
             raise ValueError(f"Failed to create RAW file reader for '{filename}': {e}") from e
         if not self._raw_file.IsOpen:
             raise ValueError(f"Could not open RAW file: {filename}")
@@ -271,10 +270,10 @@ class ThermoRawFileReader:
     def close(self) -> None:
         """Close the file and clean up resources."""
-        if hasattr(self, '_raw_file') and self._raw_file is not None:
+        if hasattr(self, "_raw_file") and self._raw_file is not None:
             self._raw_file.Dispose()
-    def __enter__(self) -> 'ThermoRawFileReader':
+    def __enter__(self) -> "ThermoRawFileReader":
         """Context manager entry."""
         return self
@@ -285,12 +284,12 @@ class ThermoRawFileReader:
     def get_polarity_from_scan_event(self, scan_number: int) -> str:
         """
         Extract polarity information from scan event.
         Parameters
         ----------
         scan_number : int
             Scan number to extract polarity from
         Returns
         -------
         str
@@ -299,28 +298,28 @@ class ThermoRawFileReader:
         try:
             scan_event = self._raw_file.GetScanEventForScanNumber(scan_number)
             if scan_event is None:
-                return ''
+                return ""
             # Try the direct Polarity property first (most reliable)
-            if hasattr(scan_event, 'Polarity'):
+            if hasattr(scan_event, "Polarity"):
                 polarity_str = str(scan_event.Polarity).lower()
-                if 'positive' in polarity_str:
-                    return 'positive'
-                elif 'negative' in polarity_str:
-                    return 'negative'
+                if "positive" in polarity_str:
+                    return "positive"
+                elif "negative" in polarity_str:
+                    return "negative"
             # Fallback: parse the scan filter string
             filter_string = str(scan_event.ToString()).lower()
-            if '+' in filter_string or 'positive' in filter_string:
-                return 'positive'
-            elif '-' in filter_string or 'negative' in filter_string:
-                return 'negative'
+            if "+" in filter_string or "positive" in filter_string:
+                return "positive"
+            elif "-" in filter_string or "negative" in filter_string:
+                return "negative"
         except Exception:
             # Log the exception if needed, but don't raise
             pass
-        return ''  # Unknown polarity
+        return ""  # Unknown polarity
     def _extract_precursor_info(self, scan_event, ms_level: int) -> tuple[float, int, float, float, float]:
         """Extract precursor information from scan event for MS2+ scans."""
@@ -333,17 +332,19 @@ class ThermoRawFileReader:
             precursor_mz = -1.0
         try:
-            precursor_charge = int(scan_event.GetChargeState(0)) if hasattr(scan_event, 'GetChargeState') else 0
+            precursor_charge = int(scan_event.GetChargeState(0)) if hasattr(scan_event, "GetChargeState") else 0
         except Exception:
             precursor_charge = 0
         try:
-            collision_energy = float(scan_event.GetEnergy(0)) if hasattr(scan_event, 'GetEnergy') else 0.0
+            collision_energy = float(scan_event.GetEnergy(0)) if hasattr(scan_event, "GetEnergy") else 0.0
         except Exception:
             collision_energy = 0.0
         try:
-            isolation_window = float(scan_event.GetIsolationWidth(0)) if hasattr(scan_event, 'GetIsolationWidth') else 3.0
+            isolation_window = (
+                float(scan_event.GetIsolationWidth(0)) if hasattr(scan_event, "GetIsolationWidth") else 3.0
+            )
         except Exception:
             isolation_window = 3.0
@@ -353,11 +354,7 @@ class ThermoRawFileReader:
         return precursor_mz, precursor_charge, collision_energy, isolation_lower, isolation_upper
     def _process_scan_data(
-        self,
-        scan_data,
-        centroid: bool,
-        centroid_ppm: float,
-        keep_k_peaks: int
+        self, scan_data, centroid: bool, centroid_ppm: float, keep_k_peaks: int
     ) -> tuple[np.ndarray, np.ndarray]:
         """Process scan data to extract and optionally centroid peaks."""
         if scan_data.Positions is not None and scan_data.Intensities is not None:
@@ -434,30 +431,29 @@ class ThermoRawFileReader:
                 continue
             scan_event = self._raw_file.GetScanEventForScanNumber(scan_num)
             # Extract basic scan information
             rt = scan_stats.StartTime  # in minutes
             ms_level = int(scan_event.MSOrder) if scan_event else 1
             polarity = self.get_polarity_from_scan_event(scan_num)
             # Process peak data
-            mz_array, int_array = self._process_scan_data(
-                scan_data, centroid, centroid_ppm, keep_k_peaks
-            )
+            mz_array, int_array = self._process_scan_data(scan_data, centroid, centroid_ppm, keep_k_peaks)
             # Store scan data
             peak_mz_arrays.append(mz_array)
             peak_intensity_arrays.append(int_array)
             peak_indices_list.append(len(mz_array))
             rt_list.append(rt)
             ms_level_list.append(ms_level)
             polarity_list.append(polarity)
             # Extract precursor information
-            precursor_mz, precursor_charge, collision_energy, isolation_lower, isolation_upper = \
+            precursor_mz, precursor_charge, collision_energy, isolation_lower, isolation_upper = (
                 self._extract_precursor_info(scan_event, ms_level)
+            )
             precursor_mz_list.append(precursor_mz)
             precursor_charge_list.append(precursor_charge)
             ce_list.append(collision_energy)
@@ -510,7 +506,7 @@ class ThermoRawData:
     def __init__(self, centroided: bool = True) -> None:
         """
         Initialize ThermoRawData reader.
         Parameters
         ----------
         centroided : bool, optional
@@ -520,13 +516,13 @@ class ThermoRawData:
         # Initialize dataframes
         self.spectrum_df: pd.DataFrame = pd.DataFrame()
         self.peak_df: pd.DataFrame = pd.DataFrame()
         # File and instrument information
         self._raw_file_path = ""
         self.creation_time = ""
         self.type = "thermo"
         self.instrument = "thermo"
         # Processing parameters
         self.centroided = centroided
         self.centroid_ppm = 20.0
@@ -537,8 +533,7 @@ class ThermoRawData:
         if self.centroided:
             self.centroided = False
             warnings.warn(
-                "Centroiding for Thermo data is not well implemented yet. "
-                "Data will be processed in profile mode.",
+                "Centroiding for Thermo data is not well implemented yet. Data will be processed in profile mode.",
                 UserWarning,
                 stacklevel=2,
             )
@@ -587,14 +582,14 @@ class ThermoRawData:
                 ignore_empty_scans=self.ignore_empty_scans,
                 keep_k_peaks=self.keep_k_peaks_per_spec,
             )
             # Try to get file creation time
             try:
                 creation_info = raw_reader._raw_file.GetCreationDate()
                 self.creation_time = creation_info.ToString("O") if creation_info else ""
             except Exception:
                 self.creation_time = ""
         return data_dict
     def _set_dataframes(self, raw_data: dict[str, Any]) -> None:
@@ -607,16 +602,16 @@ class ThermoRawData:
             Dictionary containing the raw spectral data with keys like 'rt', 'peak_mz', etc.
         """
         num_spectra = len(raw_data["rt"])
         # Create spectrum dataframe
         self.create_spectrum_df(num_spectra)
         # Create peak dataframe with indexed arrays
         self.set_peak_df_by_indexed_array(
             raw_data["peak_mz"],
             raw_data["peak_intensity"],
             raw_data["peak_indices"][:-1],  # start indices
-            raw_data["peak_indices"][1:],   # end indices
+            raw_data["peak_indices"][1:],  # end indices
         )
         # Add spectrum-level data to spectrum dataframe
@@ -741,17 +736,17 @@ def get_file_info(filename: str) -> dict[str, Any]:
 def main() -> None:
     """
     Main function for testing and demonstrating the module functionality.
     This function provides usage examples and tests basic module functionality
     when the script is run directly.
     """
     print("Standalone Thermo RAW Reader")
     print("=" * 40)
     # Display usage example
     print("\nUsage Example:")
     print("-" * 20)
-    example_code = '''
+    example_code = """
 from thermo import ThermoRawData, load_raw_file
 # Method 1: Create reader instance
@@ -771,18 +766,18 @@ mz, intensity = raw_data.get_peaks(0)
 # Check available polarities
 polarities = raw_data.spectrum_df['polarity'].unique()
 print(f"Polarities: {polarities}")
-'''
+"""
     print(example_code)
     # Test module functionality
     print("\nModule Status:")
     print("-" * 20)
     try:
         # Test class instantiation
         test_data = ThermoRawData()
         print("✓ ThermoRawData instantiated successfully")
         # Check .NET support
         if HAS_DOTNET:
             print("✓ .NET support available")
@@ -792,10 +787,10 @@ print(f"Polarities: {polarities}")
             print("⚠ .NET support not available")
             print("  • Install pythonnet to enable RAW file reading")
             print("  • Ensure Thermo Fisher DLLs are in alpharaw ext directory")
     except Exception as e:
         print(f"✗ Error during module testing: {e}")
 if __name__ == "__main__":
-    main()
+    main()

masster/spectrum.py CHANGED Viewed

@@ -199,61 +199,61 @@ class Spectrum:
     def check_if_centroided(self) -> bool:
         """
         Fast determination if spectrum data is centroided or profile.
         Uses optimized statistical approaches with early exits for speed:
         1. Fast median difference check (most decisive)
-        2. Small gap ratio (profile characteristic)
+        2. Small gap ratio (profile characteristic)
         3. Density check (fallback)
         Returns:
             bool: True if centroided, False if profile
         """
         if self.mz.size < 5:
             return True  # Too few points to determine, assume centroided
         # Fast path: check if mz is already sorted to avoid sorting cost
         if np.all(self.mz[:-1] <= self.mz[1:]):
             sorted_mz = self.mz
         else:
             sorted_mz = np.sort(self.mz)
         # Calculate differences efficiently
         mz_diffs = np.diff(sorted_mz)
         # Remove zeros efficiently (keep positive differences)
         mz_diffs = mz_diffs[mz_diffs > 0]
         if mz_diffs.size == 0:
             return True  # All identical m/z values
         # Fast approach 1: Median difference (most decisive, compute once)
         median_diff = np.median(mz_diffs)
         # Early exits for clear cases (>90% of cases)
         if median_diff > 0.02:
             return True  # Clearly centroided
         elif median_diff < 0.005:
             return False  # Clearly profile
         # Fast approach 2: Small gap ratio (for borderline cases)
         # Use vectorized comparison instead of creating new array
         small_gap_count = np.sum(mz_diffs < 0.005)
         small_gap_ratio = small_gap_count / mz_diffs.size
         if small_gap_ratio > 0.7:
             return False  # High ratio of small gaps = profile
         elif small_gap_ratio < 0.1:
-            return True   # Low ratio of small gaps = centroided
+            return True  # Low ratio of small gaps = centroided
         # Fast approach 3: Density check (final fallback)
-        mz_range = sorted_mz[-1] - sorted_mz[0]
+        mz_range = sorted_mz[-1] - sorted_mz[0]
         if mz_range > 0:
             density = sorted_mz.size / mz_range
             if density > 100:  # High density = profile
                 return False
             elif density < 10:  # Low density = centroided
                 return True
         # Final fallback: median threshold
         return median_diff > 0.01

masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

Potentially problematic release.

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl