PyPI - masster - Versions diffs - 0.5.10__py3-none-any.whl → 0.5.12__py3-none-any.whl - Mend

masster 0.5.10py3-none-any.whl → 0.5.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (12) hide show

masster/_version.py +1 -1
masster/sample/plot.py +238 -69
masster/study/export.py +4 -6
masster/study/h5.py +75 -3
masster/study/helpers.py +1 -1
masster/study/merge.py +17 -18
masster/study/processing.py +12 -9
{masster-0.5.10.dist-info → masster-0.5.12.dist-info}/METADATA +1 -1
{masster-0.5.10.dist-info → masster-0.5.12.dist-info}/RECORD +12 -12
{masster-0.5.10.dist-info → masster-0.5.12.dist-info}/WHEEL +0 -0
{masster-0.5.10.dist-info → masster-0.5.12.dist-info}/entry_points.txt +0 -0
{masster-0.5.10.dist-info → masster-0.5.12.dist-info}/licenses/LICENSE +0 -0

masster/_version.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
-__version__ = "0.5.10"
+__version__ = "0.5.12"
 def get_version():

masster/sample/plot.py CHANGED Viewed

@@ -1041,7 +1041,9 @@ def plot_2d(
     height=600,
     width=750,
     mz_range=None,
-    rt_range=None
+    rt_range=None,
+    legend=None,
+    colorby=None
 ):
     """
     Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
@@ -1087,6 +1089,13 @@ def plot_2d(
             Maximum pixel size for dynamic rasterization when using dynspread.
         raster_threshold (float, default 0.8):
             Threshold used for the dynspread process in dynamic rasterization.
+        legend (str, optional):
+            Legend position for categorical feature coloring ("top_right", "bottom_left", etc.) or None.
+            Only applies when colorby is not None and contains categorical data.
+        colorby (str, optional):
+            Feature property to use for coloring. If None (default), uses current green/red scheme
+            for features with/without MS2 data. If specified and contains categorical data, applies
+            categorical coloring with legend support (similar to plot_2d_oracle).
     Behavior:
         - Checks for a loaded mzML file by verifying that self.file_obj is not None.
         - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
@@ -1203,6 +1212,12 @@ def plot_2d(
     color_1 = "forestgreen"
     color_2 = "darkorange"
+    # Handle colorby parameter for feature coloring
+    use_categorical_coloring = False
+    feature_colors = {}
+    categorical_groups = []
     if filename is not None:
         dyn = False
         if not filename.endswith(".html"):
@@ -1270,74 +1285,203 @@ def plot_2d(
             feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
         # keep only iso==0, i.e. the main
         feats = feats[feats["iso"] == 0]
-        # find features with ms2_scans not None  and iso==0
-        features_df = feats[feats["ms2_scans"].notnull()]
-        # Create feature points with proper sizing method
-        feature_hover_1 = HoverTool(
-            tooltips=[
-                ("rt", "@rt"),
-                ("m/z", "@mz{0.0000}"),
-                ("feature_uid", "@feature_uid"),
-                ("inty", "@inty"),
-                ("iso", "@iso"),
-                ("adduct", "@adduct"),
-                ("chrom_coherence", "@chrom_coherence"),
-                ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
-            ],
-        )
-        feature_points_1 = hv.Points(
-            features_df,
-            kdims=["rt", "mz"],
-            vdims=[
-                "feature_uid",
-                "inty",
-                "iso",
-                "adduct",
-                "ms2_scans",
-                "chrom_coherence",
-                "chrom_prominence_scaled",
-            ],
-            label="Features with MS2 data",
-        ).options(
-            color=color_1,
-            marker=marker_type,
-            size=size_1,
-            tools=[feature_hover_1],
-            hooks=hooks,
-        )
-        # find features without MS2 data
-        features_df = feats[feats["ms2_scans"].isnull()]
-        feature_hover_2 = HoverTool(
-            tooltips=[
-                ("rt", "@rt"),
-                ("m/z", "@mz{0.0000}"),
-                ("feature_uid", "@feature_uid"),
-                ("inty", "@inty"),
-                ("iso", "@iso"),
-                ("adduct", "@adduct"),
-                ("chrom_coherence", "@chrom_coherence"),
-                ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
-            ],
-        )
-        feature_points_2 = hv.Points(
-            features_df,
-            kdims=["rt", "mz"],
-            vdims=[
-                "feature_uid",
-                "inty",
-                "iso",
-                "adduct",
-                "chrom_coherence",
-                "chrom_prominence_scaled",
-            ],
-            label="Features without MS2 data",
-        ).options(
-            color="red",
-            marker=marker_type,
-            size=size_2,
-            tools=[feature_hover_2],
-            hooks=hooks,
-        )
+        # Handle colorby parameter
+        if colorby is not None and colorby in feats.columns:
+            # Check if colorby data is categorical (string-like)
+            colorby_values = feats[colorby].dropna()
+            is_categorical = (
+                feats[colorby].dtype in ["object", "string", "category"] or
+                (len(colorby_values) > 0 and isinstance(colorby_values.iloc[0], str))
+            )
+            if is_categorical:
+                use_categorical_coloring = True
+                # Get unique categories, sorted
+                categorical_groups = sorted(feats[colorby].dropna().unique())
+                # Set up colors for categorical data using matplotlib colormap
+                from matplotlib.colors import to_hex
+                try:
+                    from matplotlib.cm import get_cmap
+                    colormap_func = get_cmap(cmap if cmap != 'iridescent' else 'tab20')
+                    feature_colors = {}
+                    for i, group in enumerate(categorical_groups):
+                        if len(categorical_groups) <= 20:
+                            # Use qualitative colors for small number of categories
+                            color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
+                        else:
+                            # Use continuous colormap for many categories
+                            color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
+                        feature_colors[group] = to_hex(color_val)
+                except Exception as e:
+                    self.logger.warning(f"Could not set up categorical coloring: {e}, using default colors")
+                    use_categorical_coloring = False
+        if use_categorical_coloring and colorby is not None:
+            # Create separate feature points for each category
+            for i, group in enumerate(categorical_groups):
+                group_features = feats[feats[colorby] == group]
+                if len(group_features) == 0:
+                    continue
+                # Split by MS2 status
+                group_with_ms2 = group_features[group_features["ms2_scans"].notnull()]
+                group_without_ms2 = group_features[group_features["ms2_scans"].isnull()]
+                group_color = feature_colors.get(group, color_1)
+                if len(group_with_ms2) > 0:
+                    feature_hover = HoverTool(
+                        tooltips=[
+                            ("rt", "@rt"),
+                            ("m/z", "@mz{0.0000}"),
+                            ("feature_uid", "@feature_uid"),
+                            ("inty", "@inty"),
+                            ("iso", "@iso"),
+                            ("adduct", "@adduct"),
+                            ("chrom_coherence", "@chrom_coherence"),
+                            ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
+                            (colorby, f"@{colorby}"),
+                        ],
+                    )
+                    group_points_ms2 = hv.Points(
+                        group_with_ms2,
+                        kdims=["rt", "mz"],
+                        vdims=[
+                            "feature_uid",
+                            "inty",
+                            "iso",
+                            "adduct",
+                            "ms2_scans",
+                            "chrom_coherence",
+                            "chrom_prominence_scaled",
+                            colorby,
+                        ],
+                        label=f"{group} (MS2)",
+                    ).options(
+                        color=group_color,
+                        marker=marker_type,
+                        size=size_1,
+                        tools=[feature_hover],
+                        hooks=hooks,
+                    )
+                    if feature_points_1 is None:
+                        feature_points_1 = group_points_ms2
+                    else:
+                        feature_points_1 = feature_points_1 * group_points_ms2
+                if len(group_without_ms2) > 0:
+                    feature_hover = HoverTool(
+                        tooltips=[
+                            ("rt", "@rt"),
+                            ("m/z", "@mz{0.0000}"),
+                            ("feature_uid", "@feature_uid"),
+                            ("inty", "@inty"),
+                            ("iso", "@iso"),
+                            ("adduct", "@adduct"),
+                            ("chrom_coherence", "@chrom_coherence"),
+                            ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
+                            (colorby, f"@{colorby}"),
+                        ],
+                    )
+                    group_points_no_ms2 = hv.Points(
+                        group_without_ms2,
+                        kdims=["rt", "mz"],
+                        vdims=[
+                            "feature_uid",
+                            "inty",
+                            "iso",
+                            "adduct",
+                            "chrom_coherence",
+                            "chrom_prominence_scaled",
+                            colorby,
+                        ],
+                        label=f"{group} (no MS2)",
+                    ).options(
+                        color=group_color,
+                        marker=marker_type,
+                        size=size_2,
+                        tools=[feature_hover],
+                        hooks=hooks,
+                    )
+                    if feature_points_2 is None:
+                        feature_points_2 = group_points_no_ms2
+                    else:
+                        feature_points_2 = feature_points_2 * group_points_no_ms2
+        else:
+            # Use original green/red coloring scheme for MS2 presence
+            # find features with ms2_scans not None  and iso==0
+            features_df = feats[feats["ms2_scans"].notnull()]
+            # Create feature points with proper sizing method
+            feature_hover_1 = HoverTool(
+                tooltips=[
+                    ("rt", "@rt"),
+                    ("m/z", "@mz{0.0000}"),
+                    ("feature_uid", "@feature_uid"),
+                    ("inty", "@inty"),
+                    ("iso", "@iso"),
+                    ("adduct", "@adduct"),
+                    ("chrom_coherence", "@chrom_coherence"),
+                    ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
+                ],
+            )
+            if len(features_df) > 0:
+                feature_points_1 = hv.Points(
+                    features_df,
+                    kdims=["rt", "mz"],
+                    vdims=[
+                        "feature_uid",
+                        "inty",
+                        "iso",
+                        "adduct",
+                        "ms2_scans",
+                        "chrom_coherence",
+                        "chrom_prominence_scaled",
+                    ],
+                    label="Features with MS2 data",
+                ).options(
+                    color=color_1,
+                    marker=marker_type,
+                    size=size_1,
+                    tools=[feature_hover_1],
+                    hooks=hooks,
+                )
+            # find features without MS2 data
+            features_df = feats[feats["ms2_scans"].isnull()]
+            feature_hover_2 = HoverTool(
+                tooltips=[
+                    ("rt", "@rt"),
+                    ("m/z", "@mz{0.0000}"),
+                    ("feature_uid", "@feature_uid"),
+                    ("inty", "@inty"),
+                    ("iso", "@iso"),
+                    ("adduct", "@adduct"),
+                    ("chrom_coherence", "@chrom_coherence"),
+                    ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
+                ],
+            )
+            if len(features_df) > 0:
+                feature_points_2 = hv.Points(
+                    features_df,
+                    kdims=["rt", "mz"],
+                    vdims=[
+                        "feature_uid",
+                        "inty",
+                        "iso",
+                        "adduct",
+                        "chrom_coherence",
+                        "chrom_prominence_scaled",
+                    ],
+                    label="Features without MS2 data",
+                ).options(
+                    color="red",
+                    marker=marker_type,
+                    size=size_2,
+                    tools=[feature_hover_2],
+                    hooks=hooks,
+                )
         if show_isotopes:
             # Use proper Polars filter syntax to avoid boolean indexing issues
@@ -1451,6 +1595,31 @@ def plot_2d(
     if title is not None:
         overlay = overlay.opts(title=title)
+    # Handle legend positioning for categorical coloring
+    if legend is not None and use_categorical_coloring and len(categorical_groups) > 1:
+        # Map legend position parameter to HoloViews legend position
+        legend_position_map = {
+            "top_right": "top_right",
+            "top_left": "top_left",
+            "bottom_right": "bottom_right",
+            "bottom_left": "bottom_left",
+            "right": "right",
+            "left": "left",
+            "top": "top",
+            "bottom": "bottom"
+        }
+        hv_legend_pos = legend_position_map.get(legend, "bottom_right")
+        # Apply legend configuration to the overlay
+        overlay = overlay.opts(
+            legend_position=hv_legend_pos,
+            legend_opts={'title': '', 'padding': 2, 'spacing': 2}
+        )
+    elif legend is None and use_categorical_coloring:
+        # Explicitly hide legend when legend=None but categorical coloring is used
+        overlay = overlay.opts(show_legend=False)
     # Handle slider functionality
     if use_slider_sizing:
         # For slider functionality, we need to work with the feature points directly

masster/study/export.py CHANGED Viewed

@@ -496,7 +496,7 @@ def export_mgf(self, **kwargs):
             # Write END IONS
             f.write("END IONS\n\n")
-    self.logger.info(f"Exported {len(mgf_data)} spectra to {filename}")
+    self.logger.success(f"Exported {len(mgf_data)} spectra to {filename}")
 def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs) -> None:
@@ -1183,7 +1183,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
             for line in mgf_lines:
                 f.write(line + "\n")
-    self.logger.info(f"Exported mzTab-M to {filename}")
+    self.logger.success(f"Exported mzTab-M to {filename}")
 def export_xlsx(self, filename: str | None = None) -> None:
@@ -1311,7 +1311,7 @@ def export_xlsx(self, filename: str | None = None) -> None:
                     f"Written worksheet '{sheet_name}' with shape {data.shape}",
                 )
-        self.logger.info(f"Study exported to {filename}")
+        self.logger.success(f"Study exported to {filename}")
     except Exception as e:
         self.logger.error(f"Error writing Excel file: {e}")
@@ -1424,8 +1424,6 @@ def export_parquet(self, filename: str | None = None) -> None:
     # Report results
     if exported_files:
-        self.logger.info(f"Study exported to {len(exported_files)} Parquet files:")
-        for file_path in exported_files:
-            self.logger.info(f"  - {file_path}")
+        self.logger.success(f"Study exported to {len(exported_files)} Parquet files.")
     else:
         self.logger.error("No Parquet files were created - no data available to export")

masster/study/h5.py CHANGED Viewed

@@ -834,6 +834,19 @@ def _create_dataframe_with_objects(data: dict, object_columns: list) -> pl.DataF
     object_data = {k: v for k, v in data.items() if k in object_columns}
     regular_data = {k: v for k, v in data.items() if k not in object_columns}
+    # Final check: ensure no numpy object arrays in regular_data
+    problematic_cols = []
+    for k, v in regular_data.items():
+        if hasattr(v, 'dtype') and str(v.dtype) == 'object':
+            problematic_cols.append(k)
+    if problematic_cols:
+        # Move these to object_data
+        for col in problematic_cols:
+            object_data[col] = _reconstruct_object_column(regular_data[col], col)
+            del regular_data[col]
+            object_columns.append(col)
     # Determine expected length from regular data or first object column
     expected_length = None
     if regular_data:
@@ -861,8 +874,47 @@ def _create_dataframe_with_objects(data: dict, object_columns: list) -> pl.DataF
     # Create DataFrame with regular columns first
     if regular_data:
-        df = pl.DataFrame(regular_data)
-        # print(f"DEBUG: Created DataFrame with regular columns, shape: {df.shape}")
+        # Final safety check: convert any remaining numpy object arrays to Python lists
+        # and handle numpy scalars within lists
+        safe_regular_data = {}
+        import numpy as np
+        def convert_numpy_scalars(value):
+            """Convert numpy scalars to Python native types recursively."""
+            if isinstance(value, np.generic):
+                return value.item()  # Convert numpy scalar to Python scalar
+            elif isinstance(value, list):
+                return [convert_numpy_scalars(item) for item in value]
+            else:
+                return value
+        for k, v in regular_data.items():
+            if hasattr(v, 'dtype') and str(v.dtype) == 'object':
+                # Convert numpy object array to Python list
+                safe_regular_data[k] = [convert_numpy_scalars(item) for item in (v.tolist() if hasattr(v, 'tolist') else list(v))]
+            elif isinstance(v, list):
+                # Handle lists that might contain numpy scalars
+                safe_regular_data[k] = [convert_numpy_scalars(item) for item in v]
+            else:
+                safe_regular_data[k] = convert_numpy_scalars(v)
+        # Create DataFrame with proper error handling
+        try:
+            df = pl.DataFrame(safe_regular_data)
+        except Exception as e:
+            # If direct creation fails, try creating column by column to identify and handle problematic columns
+            df = pl.DataFrame()
+            for k, v in safe_regular_data.items():
+                try:
+                    df = df.with_columns([pl.Series(k, v)])
+                except Exception:
+                    # Skip problematic columns or convert them to string as a fallback
+                    try:
+                        df = df.with_columns([pl.Series(k, [str(item) for item in v])])
+                    except Exception:
+                        # Last resort: skip the column entirely
+                        continue
         # Add Object columns one by one
         for col, values in object_data.items():
             # print(f"DEBUG: Adding object column '{col}', type: {type(values)}, length: {len(values) if values is not None else 'None'}")
@@ -1185,9 +1237,29 @@ def _load_dataframe_from_group(
                 logger.debug(
                     f"Object column '{col}': length={len(data[col]) if data[col] is not None else 'None'}",
                 )
+        # Debug: check for problematic data types in all columns before DataFrame creation
+        for col, values in data.items():
+            if hasattr(values, 'dtype') and str(values.dtype) == 'object':
+                logger.warning(f"Column '{col}' has numpy object dtype but is not in object_columns: {object_columns}")
+                if col not in object_columns:
+                    object_columns.append(col)
         df = _create_dataframe_with_objects(data, object_columns)
     else:
-        df = pl.DataFrame(data)
+        # Debug: check for problematic data types when no object columns are expected
+        for col, values in data.items():
+            if hasattr(values, 'dtype') and str(values.dtype) == 'object':
+                logger.warning(f"Column '{col}' has numpy object dtype but no object_columns specified!")
+                # Treat as object column
+                if object_columns is None:
+                    object_columns = []
+                object_columns.append(col)
+        if object_columns:
+            df = _create_dataframe_with_objects(data, object_columns)
+        else:
+            df = pl.DataFrame(data)
     # Clean null values and apply schema
     df = _clean_string_nulls(df)

masster/study/helpers.py CHANGED Viewed

@@ -2663,7 +2663,7 @@ def features_filter(
     removed_count = initial_count - final_count
     self.logger.info(
-        f"Filtered features: kept {final_count:,}, removed {removed_count:,}"
+        f"Filtered features. Kept: {final_count:,}. Removed: {removed_count:,}."
     )

masster/study/merge.py CHANGED Viewed

@@ -427,9 +427,13 @@ def merge(study, **kwargs) -> None:
         # Feature maps will be generated on-demand within each merge method
     study.logger.info(
-        f"Merge: {params.method}, samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da"
-    )
+            f"Merging samples using {params.method}, min_samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da"
+        )
+    if "chunked" in params.method:
+        study.logger.info(
+            f"threads={params.threads}, chunk_size={params.chunk_size}, dechunking='{params.dechunking}'"
+        )
     # Initialize
     study.consensus_df = pl.DataFrame()
     study.consensus_ms2 = pl.DataFrame()
@@ -751,7 +755,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
     else:
         # Parallel processing
-        study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
+        #study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
         # Prepare chunk data for parallel processing using features_df slices
         chunk_data_list = []
@@ -812,7 +816,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
                         serialized_chunk_results.append((chunk_start_idx, consensus_features))
                         completed_chunks += 1
                         n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
-                        study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
+                        study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
                     except Exception as exc:
                         # Check if this is a BrokenProcessPool exception from Windows multiprocessing issues
                         if isinstance(exc, BrokenProcessPool) or "process pool" in str(exc).lower():
@@ -846,7 +850,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
                             serialized_chunk_results.append((chunk_start_idx, consensus_features))
                             completed_chunks += 1
                             n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
-                            study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
+                            study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
                         except Exception as exc:
                             study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
                             raise exc
@@ -926,7 +930,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
     else:
         # Parallel processing
-        study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
+        #study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
         # Prepare chunk data for parallel processing using features_df slices
         chunk_data_list = []
@@ -987,7 +991,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
                         serialized_chunk_results.append((chunk_start_idx, consensus_features))
                         completed_chunks += 1
                         n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
-                        study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
+                        study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
                     except Exception as exc:
                         # Check if this is a BrokenProcessPool exception from Windows multiprocessing issues
                         if isinstance(exc, BrokenProcessPool) or "process pool" in str(exc).lower():
@@ -1021,7 +1025,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
                             serialized_chunk_results.append((chunk_start_idx, consensus_features))
                             completed_chunks += 1
                             n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
-                            study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
+                            study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
                         except Exception as exc:
                             study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
                             raise exc
@@ -2251,15 +2255,13 @@ def _perform_adduct_grouping(study, rt_tol, mz_tol):
                 {
                     "consensus_uid": row["consensus_uid"],
                     "rt": row["rt"],
-                    "mz": row["mz"],  # Add missing mz field
+                    "mz": row["mz"],
                     "adduct_mass_neutral_top": row.get("adduct_mass_neutral_top"),
                     "adduct_top": row.get("adduct_top"),
                     "inty_mean": row.get("inty_mean", 0),
                 },
             )
-        # Use optimized adduct grouping
-        study.logger.info(f"About to call adduct grouping for {len(consensus_data)} consensus features")
         adduct_group_list, adduct_of_list = __merge_adduct_grouping(
             study, consensus_data, rt_tol/3, mz_tol
         )
@@ -2714,8 +2716,6 @@ def __identify_adduct_by_mass_shift(study, rt_tol, cached_adducts_df=None):
         study.logger.debug("No consensus features for adduct identification by mass shift")
         return
-    study.logger.info(f"Identifying coeluting adducts by mass shifts in {len(study.consensus_df)} consensus features...")
     # Get adducts DataFrame if not provided
     if cached_adducts_df is None or cached_adducts_df.is_empty():
         try:
@@ -3021,8 +3021,7 @@ def __identify_adduct_by_mass_shift(study, rt_tol, cached_adducts_df=None):
             pl.Series("adduct_mass_neutral_top", new_adduct_mass_neutral_top),
             pl.Series("adduct_mass_shift_top", new_adduct_mass_shift_top)
         ])
-        study.logger.info(f"Updated adduct assignments for {updated_count} consensus features based on mass shifts")
+        study.logger.success(f"Adduct information updated for {updated_count} consensus features.")
     else:
         study.logger.debug("No consensus features updated based on mass shift analysis")
@@ -3391,7 +3390,7 @@ def __merge_adduct_grouping(study, consensus_data, rt_tol, mz_tol):
             adduct_of_list = [0] * len(consensus_data)
             return adduct_group_list, adduct_of_list
-        study.logger.info(f"Built local intensity matrix: {len(intensity_matrix_pd)} features x {len(intensity_matrix_pd.columns)} samples")
+        study.logger.debug(f"Built local intensity matrix: {len(intensity_matrix_pd)} features x {len(intensity_matrix_pd.columns)} samples")
     except Exception as e:
         study.logger.warning(f"Could not build local intensity matrix: {e}. Creating single-feature groups.")
@@ -3401,7 +3400,7 @@ def __merge_adduct_grouping(study, consensus_data, rt_tol, mz_tol):
     # Step 2: Get adduct pairs with likelihood information and build hash map for fast lookup
     adduct_pairs_with_likelihood = _get_adduct_deltas_with_likelihood(study)
-    study.logger.info(f"Using {len(adduct_pairs_with_likelihood)} adduct pairs with likelihood scoring")
+    study.logger.debug(f"Using {len(adduct_pairs_with_likelihood)} adduct pairs with likelihood scoring")
     # Build hash map for O(1) mass shift lookup
     mass_shift_map = {}  # rounded_delta -> [(likelihood, adduct1, adduct2), ...]

masster/study/processing.py CHANGED Viewed

@@ -86,8 +86,6 @@ def align(self, **kwargs):
         self.logger.error(f"Unknown alignment algorithm '{algorithm}'")
         return
-    self.logger.success("Alignment completed.")
     # Reset consensus data structures after alignment since RT changes invalidate consensus
     consensus_reset_count = 0
     if not self.consensus_df.is_empty():
@@ -681,16 +679,15 @@ def _align_pose_clustering(study_obj, params):
     params_oms.setValue("pairfinder:distance_RT:exponent", 2.0)
     aligner = oms.MapAlignmentAlgorithmPoseClustering()
-    study_obj.logger.info("Starting alignment with PoseClustering")
+    study_obj.logger.info(
+        f"Align RTs with Pose clustering: rt_tol={params.get('rt_tol')}",
+    )
     # Set ref_index to feature map index with largest number of features
     ref_index = [
         i[0] for i in sorted(enumerate([fm.size() for fm in fmaps]), key=lambda x: x[1])
     ][-1]
-    study_obj.logger.debug(
-        f"Reference map is {study_obj.samples_df.row(ref_index, named=True)['sample_name']}",
-    )
     aligner.setParameters(params_oms)
     aligner.setReference(fmaps[ref_index])
     study_obj.logger.debug(f"Parameters for alignment: {params}")
@@ -836,6 +833,12 @@ def _align_pose_clustering(study_obj, params):
     # Clean up temporary feature maps to release memory
     del fmaps
     study_obj.logger.debug("Temporary feature maps deleted to release memory")
+    # Resolve reference sample UID from the reference index
+    ref_sample_uid = sample_uid_lookup.get(ref_index)
+    study_obj.logger.success(
+        f"Alignment completed. Reference sample UID {ref_sample_uid}.",
+    )
 def _align_kd_algorithm(study_obj, params):
@@ -879,7 +882,7 @@ def _align_kd_algorithm(study_obj, params):
         _raw_mp = None
     max_points = int(_raw_mp) if _raw_mp is not None else 1000
     study_obj.logger.info(
-        f"KD align: rt_tol={params.get('rt_tol')}, max_points={max_points}",
+        f"Align RTs with KD-Tree: rt_tol={params.get('rt_tol')}, max_points={max_points}",
     )
     # Work directly with features_df instead of feature maps
@@ -1092,7 +1095,7 @@ def _align_kd_algorithm(study_obj, params):
     )
     study_obj.logger.success(
-        f"Alignment completed. Reference sample UID {ref_sample_uid} (index {ref_index}).",
+        f"Alignment completed. Reference sample UID {ref_sample_uid}.",
     )

{masster-0.5.10.dist-info → masster-0.5.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: masster
-Version: 0.5.10
+Version: 0.5.12
 Summary: Mass spectrometry data analysis package
 Project-URL: homepage, https://github.com/zamboni-lab/masster
 Project-URL: repository, https://github.com/zamboni-lab/masster

{masster-0.5.10.dist-info → masster-0.5.12.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 masster/__init__.py,sha256=ueZ224WPNRRjQEYTaQUol818nwQgJwB93HbEfmtPRmg,1041
-masster/_version.py,sha256=ykzsX9zBanG6fFefFoIDPED_UySJmLofW0r3TROBhFY,257
+masster/_version.py,sha256=NpsdpZszb6deAwFjiRivIjfQHvakNcnvE_OoEMce2HQ,257
 masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
 masster/logger.py,sha256=XT2gUcUIct8LWzTp9n484g5MaB89toT76CGA41oBvfA,18375
 masster/spectrum.py,sha256=TWIgDcl0lveG40cLVZTWGp8-FxMolu-P8EjZyRBtXL4,49850
@@ -25,7 +25,7 @@ masster/sample/helpers.py,sha256=Mt9LX-Dy1Xro1a_Sy6nxQzCkP_-q7nK4xVnNm44v7UA,438
 masster/sample/lib.py,sha256=E-j9c3Wd8f9a-H8xj7CAOwlA8KcyXPoFyYm3c8r7LtI,33755
 masster/sample/load.py,sha256=swjRBCoFGni9iPztHIKPVB5ru_xDMVryB_inPXdujTw,51819
 masster/sample/parameters.py,sha256=Gg2KcuNbV_wZ_Wwv93QlM5J19ji0oSIvZLPV1NoBmq0,4456
-masster/sample/plot.py,sha256=Cf_kuUiZnVHSlZfJQbV8Wtmdw1PPG5D3g1UbLobaXMs,96483
+masster/sample/plot.py,sha256=5qn2Cpl363f3hW1ZeI4BZV9_36VLx39PKQMrebJhfp4,104864
 masster/sample/processing.py,sha256=qk-6_v424nwfaoVmdbHj-_lJiW7OkWS7SuQzQWNAFGI,55919
 masster/sample/quant.py,sha256=tHNjvUFTdehKR31BXBZnVsBxMD9XJHgaltITOjr71uE,7562
 masster/sample/sample.py,sha256=pw4fIE5gecdupZOOWFUiRCs0x-3qa3Nv7V_UdJ-CAsc,22202
@@ -40,15 +40,15 @@ masster/sample/defaults/get_spectrum_def.py,sha256=o62p31PhGd-LiIkTOzKQhwPtnO2At
 masster/sample/defaults/sample_def.py,sha256=keoXyMyrm_iLgbYqfIbqCpJ3XHBVlNwCNmb5iMQL0iY,14579
 masster/study/__init__.py,sha256=55axdFuqRX4aXtJ8ocnhcLB32fNtmmJpCi58moO0r4g,237
 masster/study/analysis.py,sha256=L-wXBnGZCLB5UUDrjIdOiMG9zdej3Tw_SftcEmmTukM,84264
-masster/study/export.py,sha256=joFK9jip2UM4lVAvhkdKVeUdNdM4D8uP2WE49IaVJgw,60172
-masster/study/h5.py,sha256=KpvV6-0RGIAjYBNa7AodbLmlGtoDUvbeC_jB2IZdYvA,96118
-masster/study/helpers.py,sha256=QwPyGTuRKZoimK_y1kX4Ag_0rJNB1MYoP0Q2mXEVshs,191930
+masster/study/export.py,sha256=c1HJdLAM6Ply0n8f0DjMk4mXd9lOYePr60UJTBksUho,60092
+masster/study/h5.py,sha256=bznE9kKEfLNo0QtbyC6a6snfnR3Zjkx5BcjBNbRVlJ8,99579
+masster/study/helpers.py,sha256=fBZ6hDa_C8muqS4XWkE6KXtNQ-yEX4bkxnu34y1SZ5c,191933
 masster/study/id.py,sha256=heKU309cUsNeFxbWYvqxVIAJLrR1H0YqMgLanLx9Do4,80091
 masster/study/load.py,sha256=BMjoUDkXNI6iU2tRE2eBRzxMrvW0gRyLepqYOWaMPXU,101192
-masster/study/merge.py,sha256=aEZjNhrsQZxkRhyyuOUjlIN_tdA6y2VX2BAkvfPd_Sc,169300
+masster/study/merge.py,sha256=eV7iaeChBFglVBXqxgAl4P207gSYeuG2WU2rPVw1_34,169178
 masster/study/parameters.py,sha256=bTvmcwX9INxzcrEAmTiFH8qeWVhwkvMTZjuP394pz5o,3279
 masster/study/plot.py,sha256=ftQAVgEYkZuKAVIlbTR5bUypF8DpMOxSXwOyYz_BsOQ,110610
-masster/study/processing.py,sha256=n-JbH1ZHtSE1xlyi69ZrcHMsxw7dAyodC5hnaNld2to,58537
+masster/study/processing.py,sha256=5b8K4tP-Xu1-mhdf0om-m-g65Z9Uz3Dp4UBhuMLh0yU,58627
 masster/study/save.py,sha256=47AP518epJJ9TjaGGyrLKsMsyjIk8_J4ka7bmsnRtFQ,9268
 masster/study/study.py,sha256=gudugPJk3LOtZh-YsszSRCBDrBG78cexoG0CSM86EPs,38701
 masster/study/study5_schema.json,sha256=lTFePwY8bQngyBnNCP60-UP9tnZLGhFo3YtJgwHTWdo,7797
@@ -67,8 +67,8 @@ masster/wizard/README.md,sha256=mL1A3YWJZOefpJ6D0-HqGLkVRmUlOpwyVFdvJBeeoZM,1414
 masster/wizard/__init__.py,sha256=a2hcZnHASjfuw1lqZhZnvTR58rc33rRnoGAY_JfvGhI,683
 masster/wizard/example.py,sha256=xEZFTH9UZ8HKOm6s3JL8Js0Uw5ChnISWBHSZCL32vsM,7983
 masster/wizard/wizard.py,sha256=UobIGFZtp1s_9WJlpl6DQ2-pp7flPQ6dlYZJqYE92OM,38131
-masster-0.5.10.dist-info/METADATA,sha256=wPI5dLDPHYjlcafoYNdUWlnUDc-bS-HjBruaVnVDxpA,45191
-masster-0.5.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-masster-0.5.10.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
-masster-0.5.10.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
-masster-0.5.10.dist-info/RECORD,,
+masster-0.5.12.dist-info/METADATA,sha256=Zsqci475Yv3lnTJOCRHtaCeII1RDGeO9qSyYMX2OgFA,45191
+masster-0.5.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+masster-0.5.12.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
+masster-0.5.12.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
+masster-0.5.12.dist-info/RECORD,,

{masster-0.5.10.dist-info → masster-0.5.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{masster-0.5.10.dist-info → masster-0.5.12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{masster-0.5.10.dist-info → masster-0.5.12.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

masster 0.5.10__py3-none-any.whl → 0.5.12__py3-none-any.whl

Potentially problematic release.

masster 0.5.10py3-none-any.whl → 0.5.12py3-none-any.whl