PyPI - masster - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl - Mend

masster 0.5.1py3-none-any.whl → 0.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (20) hide show

masster/_version.py +1 -1
masster/sample/load.py +5 -4
masster/study/defaults/align_def.py +0 -204
masster/study/defaults/fill_def.py +9 -1
masster/study/defaults/merge_def.py +20 -69
masster/study/export.py +25 -5
masster/study/h5.py +160 -42
masster/study/helpers.py +430 -53
masster/study/load.py +986 -158
masster/study/merge.py +683 -1076
masster/study/plot.py +43 -38
masster/study/processing.py +337 -280
masster/study/study.py +58 -135
masster/wizard/wizard.py +20 -6
{masster-0.5.1.dist-info → masster-0.5.3.dist-info}/METADATA +1 -1
{masster-0.5.1.dist-info → masster-0.5.3.dist-info}/RECORD +19 -20
masster/study/defaults/fill_chrom_def.py +0 -260
{masster-0.5.1.dist-info → masster-0.5.3.dist-info}/WHEEL +0 -0
{masster-0.5.1.dist-info → masster-0.5.3.dist-info}/entry_points.txt +0 -0
{masster-0.5.1.dist-info → masster-0.5.3.dist-info}/licenses/LICENSE +0 -0

masster/study/plot.py CHANGED Viewed

@@ -1385,6 +1385,7 @@ def plot_rt_correction(
     """
     Plot RT correction per sample: (rt - rt_original) vs rt overlaid for selected samples.
+    Only features with filled==False are used for the RT correction plot.
     This uses the same color mapping as `plot_bpc` so curves for the same samples match.
     """
     from bokeh.plotting import figure, show, output_file
@@ -1447,29 +1448,35 @@ def plot_rt_correction(
         if sample_feats.is_empty():
             continue
-        # Convert to pandas for easy numeric handling
-        try:
-            df = sample_feats.to_pandas()
-        except Exception:
-            continue
+        # Filter to only use features with filled==False
+        if "filled" in sample_feats.columns:
+            sample_feats = sample_feats.filter(~pl.col("filled"))
+            if sample_feats.is_empty():
+                continue
-        # Need both rt and rt_original
-        if "rt" not in df.columns or "rt_original" not in df.columns:
+        # Stay in Polars - much faster than pandas conversion!
+        if "rt" not in sample_feats.columns or "rt_original" not in sample_feats.columns:
             continue
-        # Drop NA and ensure numeric arrays
-        df = df.dropna(subset=["rt", "rt_original"]).copy()
-        if df.empty:
-            continue
+        # Filter nulls and add delta column in Polars
+        sample_feats = (
+            sample_feats
+            .filter(
+                pl.col("rt").is_not_null() &
+                pl.col("rt_original").is_not_null()
+            )
+            .with_columns([
+                (pl.col("rt") - pl.col("rt_original")).alias("delta")
+            ])
+            .sort("rt")
+        )
-        rt = _np.asarray(df["rt"], dtype=float)
-        rt_orig = _np.asarray(df["rt_original"], dtype=float)
-        delta = rt - rt_orig
+        if sample_feats.is_empty():
+            continue
-        # sort by rt
-        idx = _np.argsort(rt)
-        rt = rt[idx]
-        delta = delta[idx]
+        # Extract arrays directly from Polars
+        rt = sample_feats["rt"].to_numpy()
+        delta = sample_feats["delta"].to_numpy()
         sample_name = str(uid)
         if samples_info is not None:
@@ -1759,21 +1766,26 @@ def plot_consensus_stats(
     import polars as pl
     import numpy as np
-    # Check if consensus_df exists and has data
-    if self.consensus_df is None or self.consensus_df.is_empty():
-        self.logger.error("No consensus data available. Run merge/find_consensus first.")
+    # Get the consensus statistics data using the new helper method
+    data_df = self.get_consensus_stats()
+    if data_df is None or data_df.is_empty():
+        self.logger.error("No consensus statistics data available.")
         return
-    # Get all columns and their data types - work with original dataframe
-    data_df = self.consensus_df.clone()
+    # Remove consensus_uid column for plotting (keep only numeric columns)
+    if "consensus_uid" in data_df.columns:
+        data_df_clean = data_df.drop("consensus_uid")
+    else:
+        data_df_clean = data_df
-    # Define specific columns to plot in the exact order requested
+    # Define specific columns to plot in the exact order requested (excluding consensus_uid)
     desired_columns = [
         "rt",
         "rt_delta_mean",
         "mz",
-        "mz_range",  # mz_max-mz_min (will be calculated)
-        "log10_inty_mean",  # log10(inty_mean) (will be calculated)
+        "mz_range",  # mz_max-mz_min
+        "log10_inty_mean",  # log10(inty_mean)
         "number_samples",
         "number_ms2",
         "charge_mean",
@@ -1783,20 +1795,13 @@ def plot_consensus_stats(
         "chrom_prominence_scaled_mean"
     ]
-    # Calculate derived columns if they don't exist
-    if "mz_range" not in data_df.columns and "mz_max" in data_df.columns and "mz_min" in data_df.columns:
-        data_df = data_df.with_columns((pl.col("mz_max") - pl.col("mz_min")).alias("mz_range"))
-    if "log10_inty_mean" not in data_df.columns and "inty_mean" in data_df.columns:
-        data_df = data_df.with_columns(pl.col("inty_mean").log10().alias("log10_inty_mean"))
     # Filter to only include columns that exist in the dataframe, preserving order
-    numeric_columns = [col for col in desired_columns if col in data_df.columns]
+    numeric_columns = [col for col in desired_columns if col in data_df_clean.columns]
     # Check if the numeric columns are actually numeric
     final_numeric_columns = []
     for col in numeric_columns:
-        dtype = data_df[col].dtype
+        dtype = data_df_clean[col].dtype
         if dtype in [pl.Int8, pl.Int16, pl.Int32, pl.Int64,
                     pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64,
                     pl.Float32, pl.Float64]:
@@ -1805,13 +1810,13 @@ def plot_consensus_stats(
     numeric_columns = final_numeric_columns
     if len(numeric_columns) == 0:
-        self.logger.error(f"None of the requested consensus statistics columns were found or are numeric. Available columns: {list(data_df.columns)}")
+        self.logger.error(f"None of the requested consensus statistics columns were found or are numeric. Available columns: {list(data_df_clean.columns)}")
         return
     self.logger.debug(f"Creating distribution plots for {len(numeric_columns)} specific consensus columns: {numeric_columns}")
-    # Work directly with Polars - no conversion to pandas needed
-    data_df_clean = data_df.select(numeric_columns)
+    # Select only the numeric columns for plotting
+    data_df_clean = data_df_clean.select(numeric_columns)
     # Check if all numeric columns are empty
     all_columns_empty = True

masster 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

Potentially problematic release.

masster 0.5.1py3-none-any.whl → 0.5.3py3-none-any.whl