PyPI - masster - Versions diffs - 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl - Mend

masster 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (33) hide show

masster/docs/SCX_API_Documentation.md +0 -0
masster/docs/SCX_DLL_Analysis.md +0 -0
masster/logger.py +92 -78
masster/sample/defaults/find_features_def.py +16 -6
masster/sample/defaults/sample_def.py +1 -1
masster/sample/h5.py +2 -2
masster/sample/helpers.py +190 -140
masster/sample/load.py +13 -9
masster/sample/plot.py +256 -147
masster/sample/processing.py +18 -12
masster/sample/sample.py +10 -4
masster/sample/sample5_schema.json +38 -29
masster/sample/save.py +16 -13
masster/sample/sciex.py +187 -176
masster/study/defaults/align_def.py +231 -13
masster/study/defaults/fill_chrom_def.py +1 -5
masster/study/defaults/integrate_chrom_def.py +1 -5
masster/study/defaults/study_def.py +2 -2
masster/study/export.py +144 -131
masster/study/h5.py +193 -133
masster/study/helpers.py +757 -246
masster/study/helpers_optimized.py +99 -57
masster/study/load.py +57 -25
masster/study/plot.py +1244 -129
masster/study/processing.py +194 -86
masster/study/save.py +7 -7
masster/study/study.py +154 -89
masster/study/study5_schema.json +15 -15
{masster-0.3.10.dist-info → masster-0.3.12.dist-info}/METADATA +1 -1
{masster-0.3.10.dist-info → masster-0.3.12.dist-info}/RECORD +33 -31
{masster-0.3.10.dist-info → masster-0.3.12.dist-info}/WHEEL +0 -0
{masster-0.3.10.dist-info → masster-0.3.12.dist-info}/entry_points.txt +0 -0
{masster-0.3.10.dist-info → masster-0.3.12.dist-info}/licenses/LICENSE +0 -0

masster/sample/processing.py CHANGED Viewed

@@ -519,6 +519,10 @@ def find_features(self, **kwargs):
         low-quality peaks), lower values make it more permissive. Typical tuning range: ~3 (relaxed) to >10
         (stringent). Default: 10.0.
+    - isotope_filtering_model (str):
+        Isotope filtering model ('metabolites (2% RMS)', 'metabolites (5% RMS)', 'peptides', 'none').
+        Default: 'metabolites (5% RMS)'.
     Tuning recommendation: first set ``chrom_fwhm`` to match your LC peak shape, then set ``noise`` to a baseline
     intensity filter for your data, and finally adjust ``chrom_peak_snr`` to reach the desired balance between
     sensitivity and specificity.
@@ -556,24 +560,25 @@ def find_features(self, **kwargs):
                 self.logger.warning(f"Unknown parameter {key} ignored")
     # Set global parameters
-    if hasattr(params, 'threads') and params.threads is not None:
+    if hasattr(params, "threads") and params.threads is not None:
         try:
             # Try setting via OpenMP environment variable first (newer approach)
             import os
-            os.environ['OMP_NUM_THREADS'] = str(params.threads)
+            os.environ["OMP_NUM_THREADS"] = str(params.threads)
             self.logger.debug(f"Set thread count to {params.threads} via OMP_NUM_THREADS")
         except Exception:
             self.logger.warning(f"Could not set thread count to {params.threads} - using default")
     # Set debug mode if enabled
-    if hasattr(params, 'debug') and params.debug:
+    if hasattr(params, "debug") and params.debug:
         self.logger.debug("Debug mode enabled")
-    elif hasattr(params, 'no_progress') and params.no_progress:
+    elif hasattr(params, "no_progress") and params.no_progress:
         self.logger.debug("No progress mode enabled")
     self.logger.info("Starting feature detection...")
     self.logger.debug(
-        f"Parameters: chrom_fwhm={params.get('chrom_fwhm')}, noise={params.get('noise')}, tol_ppm={params.get('tol_ppm')}",
+        f"Parameters: chrom_fwhm={params.get('chrom_fwhm')}, noise={params.get('noise')}, tol_ppm={params.get('tol_ppm')}, isotope_filtering_model={params.get('isotope_filtering_model')}",
     )
     exp = oms.MSExperiment()
@@ -602,7 +607,8 @@ def find_features(self, **kwargs):
     # Apply MTD parameters
     mtd_par.setValue("mass_error_ppm", float(params.get("tol_ppm")))
     mtd_par.setValue("noise_threshold_int", float(params.get("noise")))
-    mtd_par.setValue("min_trace_length",
+    mtd_par.setValue(
+        "min_trace_length",
         float(params.get("min_trace_length_multiplier")) * float(params.get("chrom_fwhm_min")),
     )
     mtd_par.setValue(
@@ -610,7 +616,7 @@ def find_features(self, **kwargs):
         int(params.get("trace_termination_outliers")),
     )
     mtd_par.setValue("chrom_peak_snr", float(params.get("chrom_peak_snr")))
     # Additional MTD parameters
     mtd_par.setValue("min_sample_rate", float(params.get("min_sample_rate")))
     mtd_par.setValue("min_trace_length", float(params.get("min_trace_length")))
@@ -636,10 +642,10 @@ def find_features(self, **kwargs):
         epd_par.setValue("masstrace_snr_filtering", "true")
     if params.get("mz_scoring_13C"):
         epd_par.setValue("mz_scoring_13C", "true")
     # Additional EPD parameters
     epd_par.setValue("enabled", "true" if params.get("enabled") else "false")
     epd.setParameters(epd_par)
     epd.detectPeaks(mass_traces, mass_traces_deconvol)
@@ -675,7 +681,7 @@ def find_features(self, **kwargs):
     ffm_par.setValue("local_mz_range", float(params.get("local_mz_range")))
     ffm_par.setValue("charge_lower_bound", int(params.get("charge_lower_bound")))
     ffm_par.setValue("charge_upper_bound", int(params.get("charge_upper_bound")))
+    ffm_par.setValue("isotope_filtering_model", params.get("isotope_filtering_model"))
     ffm.setParameters(ffm_par)

masster/sample/sample.py CHANGED Viewed

@@ -62,6 +62,7 @@ from masster.sample.helpers import select_closest_scan
 from masster.sample.helpers import get_dda_stats
 from masster.sample.helpers import get_feature
 from masster.sample.helpers import get_scan
+from masster.sample.helpers import get_eic
 from masster.sample.helpers import set_source
 from masster.sample.load import _load_featureXML
 from masster.sample.load import _load_ms2data
@@ -80,6 +81,8 @@ from masster.sample.plot import plot_feature_stats
 from masster.sample.plot import plot_ms2_cycle
 from masster.sample.plot import plot_ms2_eic
 from masster.sample.plot import plot_ms2_q1
+from masster.sample.plot import plot_bpc
+from masster.sample.plot import plot_tic
 from masster.sample.processing import _clean_features_df
 from masster.sample.processing import _features_deisotope
 from masster.sample.processing import _get_ztscan_stats
@@ -229,6 +232,9 @@ class Sample:
     plot_ms2_cycle = plot_ms2_cycle
     plot_ms2_eic = plot_ms2_eic
     plot_ms2_q1 = plot_ms2_q1
+    plot_bpc = plot_bpc
+    plot_tic = plot_tic
+    get_eic = get_eic
     get_feature = get_feature
     get_scan = get_scan
     get_dda_stats = get_dda_stats
@@ -333,15 +339,15 @@ class Sample:
             if module_name.startswith(study_module_prefix) and module_name != current_module:
                 study_modules.append(module_name)
-        ''' # Add parameters submodules
+        """ # Add parameters submodules
         parameters_modules = []
         parameters_module_prefix = f"{base_modname}.parameters."
         for module_name in sys.modules:
             if module_name.startswith(parameters_module_prefix) and module_name != current_module:
                 parameters_modules.append(module_name)
-        '''
-        all_modules_to_reload = core_modules + sample_modules + study_modules #+ parameters_modules
+        """
+        all_modules_to_reload = core_modules + sample_modules + study_modules  # + parameters_modules
         # Reload all discovered modules
         for full_module_name in all_modules_to_reload:

masster/sample/sample5_schema.json CHANGED Viewed

@@ -7,6 +7,9 @@
       "feature_id": {
         "dtype": "pl.Utf8"
       },
+      "sample_uid": {
+        "dtype": "pl.Int32"
+      },
       "mz": {
         "dtype": "pl.Float64"
       },
@@ -46,18 +49,24 @@
       "iso_of": {
         "dtype": "pl.Int64"
       },
-      "adduct_group": {
-        "dtype": "pl.Int64"
-      },
       "adduct": {
         "dtype": "pl.Utf8"
       },
       "adduct_mass": {
         "dtype": "pl.Float64"
       },
+      "adduct_group": {
+        "dtype": "pl.Int64"
+      },
       "chrom": {
         "dtype": "pl.Object"
       },
+      "filled": {
+        "dtype": "pl.Boolean"
+      },
+      "chrom_area": {
+        "dtype": "pl.Float64"
+      },
       "chrom_coherence": {
         "dtype": "pl.Float64"
       },
@@ -100,64 +109,64 @@
   },
   "scans_df": {
     "columns": {
-      "scan_uid": {
-        "dtype": "pl.Int64"
+      "bl": {
+        "dtype": "pl.Float64"
+      },
+      "comment": {
+        "dtype": "pl.Utf8"
       },
       "cycle": {
         "dtype": "pl.Int64"
       },
-      "ms_level": {
+      "energy": {
+        "dtype": "pl.Float64"
+      },
+      "feature_uid": {
         "dtype": "pl.Int64"
       },
-      "rt": {
-        "dtype": "pl.Float64"
+      "id": {
+        "dtype": "pl.Utf8"
       },
-      "inty_tot": {
+      "inty_max": {
         "dtype": "pl.Float64"
       },
       "inty_min": {
         "dtype": "pl.Float64"
       },
-      "inty_max": {
+      "inty_tot": {
         "dtype": "pl.Float64"
       },
-      "bl": {
-        "dtype": "pl.Float64"
+      "ms2_n": {
+        "dtype": "pl.Int64"
       },
-      "mz_min": {
-        "dtype": "pl.Float64"
+      "ms_level": {
+        "dtype": "pl.Int64"
       },
       "mz_max": {
         "dtype": "pl.Float64"
       },
-      "comment": {
-        "dtype": "pl.Utf8"
+      "mz_min": {
+        "dtype": "pl.Float64"
       },
       "name": {
         "dtype": "pl.Utf8"
       },
-      "id": {
-        "dtype": "pl.Utf8"
-      },
-      "prec_mz": {
+      "prec_inty": {
         "dtype": "pl.Float64"
       },
-      "prec_mz_min": {
+      "prec_mz": {
         "dtype": "pl.Float64"
       },
       "prec_mz_max": {
         "dtype": "pl.Float64"
       },
-      "prec_inty": {
+      "prec_mz_min": {
         "dtype": "pl.Float64"
       },
-      "energy": {
+      "rt": {
         "dtype": "pl.Float64"
       },
-      "feature_uid": {
-        "dtype": "pl.Int64"
-      },
-      "ms2_n": {
+      "scan_uid": {
         "dtype": "pl.Int64"
       },
       "time_cycle": {
@@ -169,10 +178,10 @@
       "time_ms1_to_ms2": {
         "dtype": "pl.Float64"
       },
-      "time_ms2_to_ms2": {
+      "time_ms2_to_ms1": {
         "dtype": "pl.Float64"
       },
-      "time_ms2_to_ms1": {
+      "time_ms2_to_ms2": {
         "dtype": "pl.Float64"
       }
     }

masster/sample/save.py CHANGED Viewed

@@ -134,10 +134,10 @@ def export_features(self, filename="features.csv"):
     # clone df
     clean_df = self.features_df.clone()
     filename = os.path.abspath(filename)
-    # add a column has_ms2=True if colum ms2_scans is not None
+    # add a column has_ms2=True if column ms2_scans is not None
     if "ms2_scans" in clean_df.columns:
         clean_df = clean_df.with_columns(
-            (pl.col("ms2_scans").is_not_null()).alias("has_ms2")
+            (pl.col("ms2_scans").is_not_null()).alias("has_ms2"),
         )
     clean_df = self.features_df.select([
         col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)
@@ -215,7 +215,7 @@ def export_mgf(
             return
         else:
             self.features_df = self.features.get_df()
     # Apply filtering at DataFrame level for better performance
     features = self.features_df
     if mz_start is not None:
@@ -228,7 +228,7 @@ def export_mgf(
         features = features.filter(pl.col("rt") <= rt_end)
     if not include_all_ms1:
         features = features.filter(pl.col("ms2_scans").is_not_null())
     # Convert to list of dictionaries for faster iteration
     features_list = features.to_dicts()
@@ -286,7 +286,10 @@ def export_mgf(
             centroid_algo = "cr"
     # count how many features have charge < 0
-    if self.features_df.filter(pl.col("charge") < 0).shape[0]- self.features_df.filter(pl.col("charge") > 0).shape[0] > 0:
+    if (
+        self.features_df.filter(pl.col("charge") < 0).shape[0] - self.features_df.filter(pl.col("charge") > 0).shape[0]
+        > 0
+    ):
         preferred_charge = -1
     else:
         preferred_charge = 1
@@ -312,7 +315,7 @@ def export_mgf(
             rt = row["rt"]
             rt_str = f"{rt:.2f}"
             mz_str = f"{mz:.4f}"
             # Filtering is now done at DataFrame level, so we can skip these checks
             if row["ms2_scans"] is None and not include_all_ms1:
                 skip = skip + 1
@@ -338,7 +341,7 @@ def export_mgf(
             charge = preferred_charge
             if row["charge"] is not None and row["charge"] != 0:
-                    charge = row["charge"]
+                charge = row["charge"]
             write_ion(
                 f,
@@ -397,7 +400,7 @@ def export_mgf(
                                 )
                                 c += 1
                         continue  # Skip the rest of the processing for this feature
             # If we reach here, either use_cache=False or no cached spectra were available
             if split_energy:
                 # get energy of all scans with scan_uid in ms2_scans by fetching them
@@ -408,20 +411,20 @@ def export_mgf(
                     for scan_uid in ms2_scan_uids:
                         spec = self.get_spectrum(scan_uid)
                         if spec is not None:
-                            spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec, 'energy') else 0))
+                            spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec, "energy") else 0))
                     # Group by energy
                     energy_groups: dict[float, list[int]] = {}
                     for scan_uid, energy in spectra_with_energy:
                         if energy not in energy_groups:
                             energy_groups[energy] = []
                         energy_groups[energy].append(scan_uid)
                     for energy, scan_uids_for_energy in energy_groups.items():
                         if selection == "best":
                             # Keep only the first scan for this energy
                             scan_uids_for_energy = [scan_uids_for_energy[0]]
                         for scan_uid in scan_uids_for_energy:
                             spect = self.get_spectrum(
                                 scan_uid,
@@ -556,7 +559,7 @@ def export_mgf(
     inty_min_str = f"{inty_min:.3f}" if inty_min != float("-inf") else "None"
     q1_ratio_min_str = f"{q1_ratio_min:.3f}" if q1_ratio_min is not None else "None"
     eic_corr_min_str = f"{eic_corr_min:.3f}" if eic_corr_min is not None else "None"
     self.logger.debug(
         f"MGF created with int>{inty_min_str}, q1_ratio>{q1_ratio_min_str}, eic_corr>{eic_corr_min_str}",
     )

masster 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

Potentially problematic release.

masster 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl