PyPI - masster - Versions diffs - 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl - Mend

masster 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (39) hide show

masster/__init__.py +8 -8
masster/chromatogram.py +1 -1
masster/data/libs/urine.csv +3 -3
masster/logger.py +11 -11
masster/sample/__init__.py +1 -1
masster/sample/adducts.py +338 -264
masster/sample/defaults/find_adducts_def.py +21 -8
masster/sample/h5.py +561 -282
masster/sample/helpers.py +131 -75
masster/sample/lib.py +4 -4
masster/sample/load.py +31 -17
masster/sample/parameters.py +1 -1
masster/sample/plot.py +7 -7
masster/sample/processing.py +117 -87
masster/sample/sample.py +103 -90
masster/sample/sample5_schema.json +44 -44
masster/sample/save.py +35 -12
masster/spectrum.py +1 -1
masster/study/__init__.py +1 -1
masster/study/defaults/align_def.py +5 -1
masster/study/defaults/identify_def.py +3 -1
masster/study/defaults/study_def.py +58 -25
masster/study/export.py +360 -210
masster/study/h5.py +560 -158
masster/study/helpers.py +496 -203
masster/study/helpers_optimized.py +1 -1
masster/study/id.py +538 -349
masster/study/load.py +233 -143
masster/study/plot.py +71 -71
masster/study/processing.py +456 -254
masster/study/save.py +15 -5
masster/study/study.py +213 -131
masster/study/study5_schema.json +149 -149
{masster-0.4.4.dist-info → masster-0.4.5.dist-info}/METADATA +3 -1
{masster-0.4.4.dist-info → masster-0.4.5.dist-info}/RECORD +39 -39
{masster-0.4.4.dist-info → masster-0.4.5.dist-info}/WHEEL +0 -0
{masster-0.4.4.dist-info → masster-0.4.5.dist-info}/entry_points.txt +0 -0
{masster-0.4.4.dist-info → masster-0.4.5.dist-info}/licenses/LICENSE +0 -0
{masster-0.4.4.dist-info → masster-0.4.5.dist-info}/top_level.txt +0 -0

masster/sample/plot.py CHANGED Viewed

@@ -144,7 +144,7 @@ def _display_plot(plot_object, layout=None):
 def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh"):
     """
     Helper function to handle consistent save/display behavior for sample plots.
     Parameters:
         plot_obj: The plot object (bokeh figure, holoviews layout, or panel object)
         filename: Optional filename to save the plot
@@ -155,10 +155,10 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
         import os
         if hasattr(self, 'folder') and self.folder and not os.path.isabs(filename):
             filename = os.path.join(self.folder, filename)
         # Convert to absolute path for logging
         abs_filename = os.path.abspath(filename)
         if filename.endswith(".html"):
             if plot_type == "panel":
                 plot_obj.save(filename, embed=True)  # type: ignore[attr-defined]
@@ -375,7 +375,7 @@ def plot_chrom(
     layout = layout.cols(1)
     layout = panel.Column(layout)
     # Use consistent save/display behavior
     self._handle_sample_plot_output(layout, filename, "panel")
@@ -927,7 +927,7 @@ def plot_2d(
         layout = panel.Column(overlay)
     if filename is not None:
-        # Use consistent save/display behavior
+        # Use consistent save/display behavior
         self._handle_sample_plot_output(layout, filename, "panel")
         return None
     else:
@@ -2073,7 +2073,7 @@ def plot_tic(
         return
     # Import helper locally to avoid circular imports
-    from masster.study.helpers import get_tic
+    from master.study.helpers import get_tic
     # Delegate TIC computation to study helper which handles ms1_df and scans_df fallbacks
     try:
@@ -2128,7 +2128,7 @@ def plot_bpc(
         return
     # Import helper locally to avoid circular imports
-    from masster.study.helpers import get_bpc
+    from master.study.helpers import get_bpc
     # Delegate BPC computation to study helper
     try:

masster/sample/processing.py CHANGED Viewed

@@ -8,13 +8,14 @@ import pyopenms as oms
 from tqdm import tqdm
-from masster.spectrum import Spectrum
+from master.spectrum import Spectrum
 from .defaults.find_features_def import find_features_defaults
 from .defaults.find_ms2_def import find_ms2_defaults
 from .defaults.get_spectrum_def import get_spectrum_defaults
-from masster.chromatogram import Chromatogram
+from master.chromatogram import Chromatogram
 def get_spectrum(self, scan, **kwargs):
     """Retrieve a single spectrum and optionally post-process it.
@@ -252,7 +253,8 @@ def get_spectrum(self, scan, **kwargs):
                 spec=spect,
                 scan_uid=scan_uid,
                 feature_uid=scan_info["feature_uid"][0]
-                if "feature_uid" in scan_info and scan_info["feature_uid"][0] is not None
+                if "feature_uid" in scan_info
+                and scan_info["feature_uid"][0] is not None
                 else feature_uid,
                 q1_step=2,
                 deisotope=deisotope,
@@ -445,7 +447,9 @@ def _spec_to_mat(
             closest_index = np.argmin(np.abs(ar2 - val1))
             closest_indices.append((i, closest_index))
         # filter out pairs that are not within the specified tolerance
-        closest_indices = [(i, j) for i, j in closest_indices if np.abs(ar1[i] - ar2[j]) <= tol]
+        closest_indices = [
+            (i, j) for i, j in closest_indices if np.abs(ar1[i] - ar2[j]) <= tol
+        ]
         # remove duplicates from the list of indices
         closest_indices = list(set(closest_indices))
         # sort the list of indices by the first element (i) in ascending order
@@ -564,9 +568,13 @@ def find_features(self, **kwargs):
             import os
             os.environ["OMP_NUM_THREADS"] = str(params.threads)
-            self.logger.debug(f"Set thread count to {params.threads} via OMP_NUM_THREADS")
+            self.logger.debug(
+                f"Set thread count to {params.threads} via OMP_NUM_THREADS",
+            )
         except Exception:
-            self.logger.warning(f"Could not set thread count to {params.threads} - using default")
+            self.logger.warning(
+                f"Could not set thread count to {params.threads} - using default",
+            )
     # Set debug mode if enabled
     if hasattr(params, "debug") and params.debug:
@@ -607,7 +615,8 @@ def find_features(self, **kwargs):
     mtd_par.setValue("noise_threshold_int", float(params.get("noise")))
     mtd_par.setValue(
         "min_trace_length",
-        float(params.get("min_trace_length_multiplier")) * float(params.get("chrom_fwhm_min")),
+        float(params.get("min_trace_length_multiplier"))
+        * float(params.get("chrom_fwhm_min")),
     )
     mtd_par.setValue(
         "trace_termination_outliers",
@@ -618,8 +627,14 @@ def find_features(self, **kwargs):
     # Additional MTD parameters
     mtd_par.setValue("min_sample_rate", float(params.get("min_sample_rate")))
     mtd_par.setValue("min_trace_length", float(params.get("min_trace_length")))
-    mtd_par.setValue("trace_termination_criterion", params.get("trace_termination_criterion"))
-    mtd_par.setValue("reestimate_mt_sd", "true" if params.get("reestimate_mt_sd") else "false")
+    mtd_par.setValue(
+        "trace_termination_criterion",
+        params.get("trace_termination_criterion"),
+    )
+    mtd_par.setValue(
+        "reestimate_mt_sd",
+        "true" if params.get("reestimate_mt_sd") else "false",
+    )
     mtd_par.setValue("quant_method", params.get("quant_method"))
     mtd.setParameters(mtd_par)  # set the new parameters
@@ -688,7 +703,7 @@ def find_features(self, **kwargs):
     df = feature_map.get_df(export_peptide_identifications=False)  # type: ignore[attr-defined]
     # Sets the file path to the primary MS run (usually the mzML file)
     feature_map.setPrimaryMSRunPath([self.file_path.encode()])
     # Store feature map in both attributes for compatibility
     self.features = feature_map
     self._oms_features_map = feature_map
@@ -769,13 +784,15 @@ def find_features(self, **kwargs):
             height_scaleds.append(None)
     # Add the computed columns to the dataframe
-    df = df.with_columns([
-        pl.Series("chrom", chroms, dtype=pl.Object),
-        pl.Series("chrom_coherence", coherences, dtype=pl.Float64),
-        pl.Series("chrom_prominence", prominences, dtype=pl.Float64),
-        pl.Series("chrom_prominence_scaled", prominence_scaleds, dtype=pl.Float64),
-        pl.Series("chrom_height_scaled", height_scaleds, dtype=pl.Float64),
-    ])
+    df = df.with_columns(
+        [
+            pl.Series("chrom", chroms, dtype=pl.Object),
+            pl.Series("chrom_coherence", coherences, dtype=pl.Float64),
+            pl.Series("chrom_prominence", prominences, dtype=pl.Float64),
+            pl.Series("chrom_prominence_scaled", prominence_scaleds, dtype=pl.Float64),
+            pl.Series("chrom_height_scaled", height_scaleds, dtype=pl.Float64),
+        ],
+    )
     self.features_df = df
     self._features_sync()
@@ -796,10 +813,10 @@ def find_features(self, **kwargs):
 def _clean_features_df(self, df):
     """Clean and standardize features DataFrame."""
     # Convert pandas DataFrame to polars if needed
-    if hasattr(df, 'index'):  # pandas DataFrame
+    if hasattr(df, "index"):  # pandas DataFrame
         df = df.copy()
         df["feature_id"] = df.index
     if hasattr(df, "columns") and not isinstance(df, pl.DataFrame):
         df_pl = pl.from_pandas(df)
     else:
@@ -809,35 +826,37 @@ def _clean_features_df(self, df):
     df2 = df_pl.filter(pl.col("quality") != 0)
     # Create new dataframe with required columns and transformations
-    df_result = df2.select([
-        pl.int_range(pl.len()).alias("feature_uid"),
-        pl.col("feature_id").cast(pl.String).alias("feature_id"),
-        pl.col("mz").round(5),
-        pl.col("RT").round(3).alias("rt"),
-        pl.col("RT").round(3).alias("rt_original"),
-        pl.col("RTstart").round(3).alias("rt_start"),
-        pl.col("RTend").round(3).alias("rt_end"),
-        (pl.col("RTend") - pl.col("RTstart")).round(3).alias("rt_delta"),
-        pl.col("MZstart").round(5).alias("mz_start"),
-        pl.col("MZend").round(5).alias("mz_end"),
-        pl.col("intensity").alias("inty"),
-        pl.col("quality"),
-        pl.col("charge"),
-        pl.lit(0).alias("iso"),
-        pl.lit(None, dtype=pl.Int64).alias("iso_of"),
-        pl.lit(None, dtype=pl.Utf8).alias("adduct"),
-        pl.lit(None, dtype=pl.Float64).alias("adduct_charge"),
-        pl.lit(None, dtype=pl.Float64).alias("adduct_mass_shift"),
-        pl.lit(None, dtype=pl.Float64).alias("adduct_mass_neutral"),
-        pl.lit(None, dtype=pl.Int64).alias("adduct_group"),
-        pl.lit(None, dtype=pl.Object).alias("chrom"),
-        pl.lit(None, dtype=pl.Float64).alias("chrom_coherence"),
-        pl.lit(None, dtype=pl.Float64).alias("chrom_prominence"),
-        pl.lit(None, dtype=pl.Float64).alias("chrom_prominence_scaled"),
-        pl.lit(None, dtype=pl.Float64).alias("chrom_height_scaled"),
-        pl.lit(None, dtype=pl.Object).alias("ms2_scans"),
-        pl.lit(None, dtype=pl.Object).alias("ms2_specs"),
-    ])
+    df_result = df2.select(
+        [
+            pl.int_range(pl.len()).alias("feature_uid"),
+            pl.col("feature_id").cast(pl.String).alias("feature_id"),
+            pl.col("mz").round(5),
+            pl.col("RT").round(3).alias("rt"),
+            pl.col("RT").round(3).alias("rt_original"),
+            pl.col("RTstart").round(3).alias("rt_start"),
+            pl.col("RTend").round(3).alias("rt_end"),
+            (pl.col("RTend") - pl.col("RTstart")).round(3).alias("rt_delta"),
+            pl.col("MZstart").round(5).alias("mz_start"),
+            pl.col("MZend").round(5).alias("mz_end"),
+            pl.col("intensity").alias("inty"),
+            pl.col("quality"),
+            pl.col("charge"),
+            pl.lit(0).alias("iso"),
+            pl.lit(None, dtype=pl.Int64).alias("iso_of"),
+            pl.lit(None, dtype=pl.Utf8).alias("adduct"),
+            pl.lit(None, dtype=pl.Float64).alias("adduct_charge"),
+            pl.lit(None, dtype=pl.Float64).alias("adduct_mass_shift"),
+            pl.lit(None, dtype=pl.Float64).alias("adduct_mass_neutral"),
+            pl.lit(None, dtype=pl.Int64).alias("adduct_group"),
+            pl.lit(None, dtype=pl.Object).alias("chrom"),
+            pl.lit(None, dtype=pl.Float64).alias("chrom_coherence"),
+            pl.lit(None, dtype=pl.Float64).alias("chrom_prominence"),
+            pl.lit(None, dtype=pl.Float64).alias("chrom_prominence_scaled"),
+            pl.lit(None, dtype=pl.Float64).alias("chrom_height_scaled"),
+            pl.lit(None, dtype=pl.Object).alias("ms2_scans"),
+            pl.lit(None, dtype=pl.Object).alias("ms2_specs"),
+        ],
+    )
     return df_result
@@ -859,10 +878,12 @@ def _features_deisotope(
         df = pl.from_pandas(df)
     # Initialize new columns
-    df = df.with_columns([
-        pl.lit(0).alias("iso"),
-        pl.col("feature_uid").alias("iso_of"),
-    ])
+    df = df.with_columns(
+        [
+            pl.lit(0).alias("iso"),
+            pl.col("feature_uid").alias("iso_of"),
+        ],
+    )
     # Sort by 'mz'
     df = df.sort("mz")
@@ -889,13 +910,13 @@ def _features_deisotope(
         for isotope_offset in [1, 2, 3]:
             offset_mz = isotope_offset * mz_diff
             tolerance_factor = 1.0 if isotope_offset == 1 else 1.5
             t_lower = base_mz + offset_mz - tolerance_factor * mz_tol
             t_upper = base_mz + offset_mz + tolerance_factor * mz_tol
             li = np.searchsorted(mz_arr, t_lower, side="left")
             ri = np.searchsorted(mz_arr, t_upper, side="right")
             if li < ri:
                 cand_idx = np.arange(li, ri)
                 mask = (
@@ -904,22 +925,23 @@ def _features_deisotope(
                     & (intensity_arr[cand_idx] < 2 * base_int)
                 )
                 valid_cand = cand_idx[mask]
                 for cand in valid_cand:
                     if cand != i and iso_of_arr[cand] == feature_uid_arr[cand]:
                         iso_arr[cand] = iso_arr[i] + isotope_offset
                         iso_of_arr[cand] = base_feature_uid
     # Update the dataframe with isotope assignments
-    df = df.with_columns([
-        pl.Series("iso", iso_arr),
-        pl.Series("iso_of", iso_of_arr),
-    ])
+    df = df.with_columns(
+        [
+            pl.Series("iso", iso_arr),
+            pl.Series("iso_of", iso_of_arr),
+        ],
+    )
     return df
 def analyze_dda(self):
     # Preallocate variables
     cycle_records = []
@@ -1106,7 +1128,9 @@ def find_ms2(self, **kwargs):
     feature_rt_start = features_subset.select("rt_start").to_numpy().flatten()
     feature_rt_end = features_subset.select("rt_end").to_numpy().flatten()
     feature_uids = features_subset.select("feature_uid").to_numpy().flatten()
-    feature_indices = features_subset.with_row_index().select("index").to_numpy().flatten()
+    feature_indices = (
+        features_subset.with_row_index().select("index").to_numpy().flatten()
+    )
     # Pre-compute RT radius for all features
     rt_radius = np.minimum(feature_rt - feature_rt_start, feature_rt_end - feature_rt)
@@ -1159,15 +1183,17 @@ def find_ms2(self, **kwargs):
         scan_uids = ms2_index_arr[final_indices].tolist()
         scan_uid_lists.append(scan_uids)
-        spec_lists.append([
-            self.get_spectrum(
-                scan_uids[0],
-                centroid=centroid,
-                deisotope=deisotope,
-                dia_stats=dia_stats,
-                feature_uid=feature_uid,
-            ),
-        ])
+        spec_lists.append(
+            [
+                self.get_spectrum(
+                    scan_uids[0],
+                    centroid=centroid,
+                    deisotope=deisotope,
+                    dia_stats=dia_stats,
+                    feature_uid=feature_uid,
+                ),
+            ],
+        )
         # Collect updates for batch processing
         updated_feature_uids.extend([feature_uid] * len(final_indices))
@@ -1181,11 +1207,13 @@ def find_ms2(self, **kwargs):
         features_df = pl.from_pandas(features_df)
     # Update the features_df
-    update_df = pl.DataFrame({
-        "temp_idx": feature_indices,
-        "ms2_scans": pl.Series("ms2_scans", scan_uid_lists, dtype=pl.Object),
-        "ms2_specs": pl.Series("ms2_specs", spec_lists, dtype=pl.Object),
-    })
+    update_df = pl.DataFrame(
+        {
+            "temp_idx": feature_indices,
+            "ms2_scans": pl.Series("ms2_scans", scan_uid_lists, dtype=pl.Object),
+            "ms2_specs": pl.Series("ms2_specs", spec_lists, dtype=pl.Object),
+        },
+    )
     # Join and update
     features_df = (
@@ -1196,16 +1224,18 @@ def find_ms2(self, **kwargs):
             how="left",
             suffix="_new",
         )
-        .with_columns([
-            pl.when(pl.col("ms2_scans_new").is_not_null())
-            .then(pl.col("ms2_scans_new"))
-            .otherwise(pl.col("ms2_scans"))
-            .alias("ms2_scans"),
-            pl.when(pl.col("ms2_specs_new").is_not_null())
-            .then(pl.col("ms2_specs_new"))
-            .otherwise(pl.col("ms2_specs"))
-            .alias("ms2_specs"),
-        ])
+        .with_columns(
+            [
+                pl.when(pl.col("ms2_scans_new").is_not_null())
+                .then(pl.col("ms2_scans_new"))
+                .otherwise(pl.col("ms2_scans"))
+                .alias("ms2_scans"),
+                pl.when(pl.col("ms2_specs_new").is_not_null())
+                .then(pl.col("ms2_specs_new"))
+                .otherwise(pl.col("ms2_specs"))
+                .alias("ms2_specs"),
+            ],
+        )
         .drop(["temp_idx", "ms2_scans_new", "ms2_specs_new"])
     )
@@ -1242,4 +1272,4 @@ def find_ms2(self, **kwargs):
     self.store_history(["find_ms2"], params.to_dict())
     self.logger.debug(
         "Parameters stored to find_ms2",
-    )
+    )

masster 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

Potentially problematic release.

masster 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl