PyPI - masster - Versions diffs - 0.4.22__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

masster 0.4.22py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (27) hide show

masster/_version.py +1 -1
masster/data/libs/aa.csv +22 -0
masster/lib/lib.py +6 -0
masster/sample/adducts.py +1 -1
masster/sample/load.py +10 -9
masster/sample/plot.py +1 -1
masster/sample/processing.py +4 -4
masster/sample/sample.py +29 -32
masster/study/analysis.py +1762 -0
masster/study/defaults/fill_def.py +1 -1
masster/study/export.py +5 -3
masster/study/h5.py +3 -0
masster/study/helpers.py +153 -80
masster/study/id.py +545 -4
masster/study/load.py +33 -59
masster/study/merge.py +413 -315
masster/study/parameters.py +3 -3
masster/study/plot.py +398 -43
masster/study/processing.py +6 -14
masster/study/save.py +8 -4
masster/study/study.py +179 -139
masster/study/study5_schema.json +9 -0
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/METADATA +54 -14
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/RECORD +27 -25
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/WHEEL +0 -0
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/entry_points.txt +0 -0
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/licenses/LICENSE +0 -0

masster/study/parameters.py CHANGED Viewed

@@ -8,7 +8,7 @@ similar to the sample parameters module but for study-level operations.
 from __future__ import annotations
-def store_history(self, keys, value):
+def update_history(self, keys, value):
     """
     Store parameters in a nested dictionary structure.
@@ -74,10 +74,10 @@ def update_parameters(self, **kwargs):
         # Check if it's a parameter defaults instance
         if hasattr(value, "to_dict") and callable(getattr(value, "to_dict")):
             # Store the parameter object
-            self.store_history([key], value.to_dict())
+            self.update_history([key], value.to_dict())
         else:
             # Store individual parameter
-            self.store_history([key], value)
+            self.update_history([key], value)
 def get_parameters_property(self):

masster/study/plot.py CHANGED Viewed

@@ -308,7 +308,7 @@ def plot_alignment(
         self.logger.info("Showing current RT values for both plots. Run align() first to see alignment comparison.")
     # Get sample_uids to filter by if specified
-    sample_uids = self._get_sample_uids(samples) if samples is not None else None
+    sample_uids = self._get_samples_uids(samples) if samples is not None else None
     # Start with full features_df
     features_df = self.features_df
@@ -836,7 +836,7 @@ def plot_samples_2d(
     from bokeh.io.export import export_png
     from bokeh.models import ColumnDataSource, HoverTool
-    sample_uids = self._get_sample_uids(samples)
+    sample_uids = self._get_samples_uids(samples)
     if not sample_uids:
         self.logger.error("No valid sample_uids provided.")
@@ -1053,7 +1053,7 @@ def plot_bpc(
     from bokeh.io.export import export_png
     from masster.study.helpers import get_bpc
-    sample_uids = self._get_sample_uids(samples)
+    sample_uids = self._get_samples_uids(samples)
     if not sample_uids:
         self.logger.error("No valid sample_uids provided for BPC plotting.")
         return
@@ -1238,7 +1238,7 @@ def plot_eic(
         self.logger.error("mz must be provided for EIC plotting")
         return
-    sample_uids = self._get_sample_uids(samples)
+    sample_uids = self._get_samples_uids(samples)
     if not sample_uids:
         self.logger.error("No valid sample_uids provided for EIC plotting.")
         return
@@ -1400,7 +1400,7 @@ def plot_rt_correction(
         self.logger.error("Column 'rt_original' not found in features_df. Alignment/backup RTs missing.")
         return
-    sample_uids = self._get_sample_uids(samples)
+    sample_uids = self._get_samples_uids(samples)
     if not sample_uids:
         self.logger.error("No valid sample_uids provided for RT correction plotting.")
         return
@@ -1537,7 +1537,7 @@ def plot_chrom(
     height=300,
 ):
     cons_uids = self._get_consensus_uids(uids)
-    sample_uids = self._get_sample_uids(samples)
+    sample_uids = self._get_samples_uids(samples)
     chroms = self.get_chrom(uids=cons_uids, samples=sample_uids)
@@ -1723,18 +1723,32 @@ def plot_chrom(
 def plot_consensus_stats(
     self,
     filename=None,
-    width=1200,
+    width=840,  # Reduced from 1200 (30% smaller)
     height=None,
     alpha=0.6,
     bins=30,
     n_cols=4,
 ):
     """
-    Plot histograms/distributions for all numeric columns in consensus_df.
+    Plot histograms/distributions for specific consensus statistics in the requested order.
+    Shows the following properties in order:
+    1. rt: Retention time
+    2. rt_delta_mean: Mean retention time delta
+    3. mz: Mass-to-charge ratio
+    4. mz_range: Mass range (mz_max - mz_min)
+    5. log10_inty_mean: Log10 of mean intensity
+    6. number_samples: Number of samples
+    7. number_ms2: Number of MS2 spectra
+    8. charge_mean: Mean charge
+    9. quality: Feature quality
+    10. chrom_coherence_mean: Mean chromatographic coherence
+    11. chrom_height_scaled_mean: Mean scaled chromatographic height
+    12. chrom_prominence_scaled_mean: Mean scaled chromatographic prominence
     Parameters:
         filename (str, optional): Output filename for saving the plot
-        width (int): Overall width of the plot (default: 1200)
+        width (int): Overall width of the plot (default: 840)
         height (int, optional): Overall height of the plot (auto-calculated if None)
         alpha (float): Histogram transparency (default: 0.6)
         bins (int): Number of histogram bins (default: 30)
@@ -1753,24 +1767,48 @@ def plot_consensus_stats(
     # Get all columns and their data types - work with original dataframe
     data_df = self.consensus_df.clone()
-    # Identify numeric columns (excluding ID columns that are typically strings)
-    id_columns = ["consensus_uid", "consensus_id", "uid", "id"]
-    numeric_columns = []
+    # Define specific columns to plot in the exact order requested
+    desired_columns = [
+        "rt",
+        "rt_delta_mean",
+        "mz",
+        "mz_range",  # mz_max-mz_min (will be calculated)
+        "log10_inty_mean",  # log10(inty_mean) (will be calculated)
+        "number_samples",
+        "number_ms2",
+        "charge_mean",
+        "quality",
+        "chrom_coherence_mean",
+        "chrom_height_scaled_mean",
+        "chrom_prominence_scaled_mean"
+    ]
+    # Calculate derived columns if they don't exist
+    if "mz_range" not in data_df.columns and "mz_max" in data_df.columns and "mz_min" in data_df.columns:
+        data_df = data_df.with_columns((pl.col("mz_max") - pl.col("mz_min")).alias("mz_range"))
+    if "log10_inty_mean" not in data_df.columns and "inty_mean" in data_df.columns:
+        data_df = data_df.with_columns(pl.col("inty_mean").log10().alias("log10_inty_mean"))
-    for col in data_df.columns:
-        if col not in id_columns:
-            dtype = data_df[col].dtype
-            # Check if column is numeric (int, float, or can be converted to numeric)
-            if dtype in [pl.Int8, pl.Int16, pl.Int32, pl.Int64,
-                        pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64,
-                        pl.Float32, pl.Float64]:
-                numeric_columns.append(col)
+    # Filter to only include columns that exist in the dataframe, preserving order
+    numeric_columns = [col for col in desired_columns if col in data_df.columns]
+    # Check if the numeric columns are actually numeric
+    final_numeric_columns = []
+    for col in numeric_columns:
+        dtype = data_df[col].dtype
+        if dtype in [pl.Int8, pl.Int16, pl.Int32, pl.Int64,
+                    pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64,
+                    pl.Float32, pl.Float64]:
+            final_numeric_columns.append(col)
+    numeric_columns = final_numeric_columns
     if len(numeric_columns) == 0:
-        self.logger.error("No numeric columns found in consensus_df for plotting distributions.")
+        self.logger.error(f"None of the requested consensus statistics columns were found or are numeric. Available columns: {list(data_df.columns)}")
         return
-    self.logger.debug(f"Creating distribution plots for {len(numeric_columns)} numeric columns: {numeric_columns}")
+    self.logger.debug(f"Creating distribution plots for {len(numeric_columns)} specific consensus columns: {numeric_columns}")
     # Work directly with Polars - no conversion to pandas needed
     data_df_clean = data_df.select(numeric_columns)
@@ -1798,25 +1836,29 @@ def plot_consensus_stats(
     # Auto-calculate height if not provided
     if height is None:
-        plot_height = 300
-        height = plot_height * n_rows + 100  # Add some padding
+        plot_height = 210  # Reduced from 300 (30% smaller)
+        height = plot_height * n_rows + 56  # Reduced from 80 (30% smaller)
     else:
-        plot_height = (height - 100) // n_rows  # Subtract padding and divide
+        plot_height = (height - 56) // n_rows  # Reduced padding (30% smaller)
-    plot_width = (width - 100) // n_cols  # Subtract padding and divide
+    plot_width = (width - 56) // n_cols  # Reduced padding (30% smaller)
     # Create plots grid
     plots = []
     current_row = []
     for i, col in enumerate(numeric_columns):
+        # Check if this column should use log scale for y-axis
+        y_axis_type = "log" if col in ["number_samples", "number_ms2"] else "linear"
         # Create histogram for this column
         p = figure(
             width=plot_width,
             height=plot_height,
             title=col,
             toolbar_location="above",
-            tools="pan,wheel_zoom,box_zoom,reset,save"
+            tools="pan,wheel_zoom,box_zoom,reset,save",
+            y_axis_type=y_axis_type
         )
         # Set white background
@@ -1840,10 +1882,19 @@ def plot_consensus_stats(
         values_array = valid_values.to_numpy()
         hist, edges = np.histogram(values_array, bins=bins)
+        # Handle log y-axis: replace zero counts with small positive values
+        if y_axis_type == "log":
+            # Replace zero counts with a small value (1e-1) to make them visible on log scale
+            hist_log_safe = np.where(hist == 0, 0.1, hist)
+            bottom_val = 0.1  # Use small positive value for bottom on log scale
+        else:
+            hist_log_safe = hist
+            bottom_val = 0
         # Create histogram bars
         p.quad(
-            top=hist,
-            bottom=0,
+            top=hist_log_safe,
+            bottom=bottom_val,
             left=edges[:-1],
             right=edges[1:],
             fill_color="steelblue",
@@ -1852,11 +1903,16 @@ def plot_consensus_stats(
         )
         # Style the plot
-        p.title.text_font_size = "12pt"
-        p.xaxis.axis_label = col
-        p.yaxis.axis_label = "Count"
-        p.grid.visible = True
-        p.grid.grid_line_color = "#E0E0E0"
+        p.title.text_font_size = "10pt"  # Reduced from 12pt
+        p.xaxis.axis_label = ""  # Remove x-axis title
+        p.grid.grid_line_alpha = 0.3  # Show y-axis grid with transparency
+        p.grid.grid_line_color = "gray"
+        p.grid.grid_line_dash = [6, 4]  # Dashed grid lines
+        p.xgrid.visible = False  # Hide x-axis grid
+        p.outline_line_color = None  # Remove gray border around plot area
+        # Remove y-axis label but keep y-axis visible
+        p.yaxis.axis_label = ""
         current_row.append(p)
@@ -1868,14 +1924,12 @@ def plot_consensus_stats(
             plots.append(current_row)
             current_row = []
-    # Create grid layout
-    grid = gridplot(plots)
+    # Create grid layout with white background
+    grid = gridplot(plots, toolbar_location="above", merge_tools=True)
-    # Set overall background to white
-    if hasattr(grid, "background_fill_color"):
-        grid.background_fill_color = "white"
-    if hasattr(grid, "border_fill_color"):
-        grid.border_fill_color = "white"
+    # The background should be white by default in Bokeh
+    # Individual plots already have white backgrounds set above
     # Apply consistent save/display behavior
     if filename is not None:
@@ -1895,7 +1949,7 @@ def plot_consensus_stats(
     return grid
-def plot_pca(
+def plot_samples_pca(
     self,
     filename=None,
     width=500,
@@ -2035,6 +2089,7 @@ def plot_pca(
         tools="pan,wheel_zoom,box_zoom,reset,save",
     )
+    p.grid.visible = False
     p.xaxis.axis_label = f"PC1 ({explained_var[0]:.1%} variance)"
     p.yaxis.axis_label = f"PC2 ({explained_var[1]:.1%} variance)"
@@ -2159,6 +2214,293 @@ def plot_pca(
     return p
+def plot_samples_umap(
+    self,
+    filename=None,
+    width=500,
+    height=450,
+    alpha=0.8,
+    markersize=6,
+    n_components=2,
+    colorby=None,
+    title="UMAP of Consensus Matrix",
+    n_neighbors=15,
+    min_dist=0.1,
+    metric="euclidean",
+    random_state=42,
+):
+    """
+    Plot UMAP (Uniform Manifold Approximation and Projection) of the consensus matrix using Bokeh.
+    Parameters:
+        filename (str, optional): Output filename for saving the plot
+        width (int): Plot width (default: 500)
+        height (int): Plot height (default: 450)
+        alpha (float): Point transparency (default: 0.8)
+        markersize (int): Size of points (default: 6)
+        n_components (int): Number of UMAP components to compute (default: 2)
+        colorby (str, optional): Column from samples_df to color points by
+        title (str): Plot title (default: "UMAP of Consensus Matrix")
+        n_neighbors (int): Number of neighbors for UMAP (default: 15)
+        min_dist (float): Minimum distance for UMAP (default: 0.1)
+        metric (str): Distance metric for UMAP (default: "euclidean")
+        random_state (int or None): Random state for reproducibility (default: 42).
+            - Use an integer (e.g., 42) for reproducible results (slower, single-threaded)
+            - Use None for faster computation with multiple cores (non-reproducible)
+    Note:
+        Setting random_state forces single-threaded computation but ensures reproducible results.
+        Set random_state=None to enable parallel processing for faster computation.
+    """
+    try:
+        import umap
+    except ImportError:
+        self.logger.error("UMAP not available. Please install umap-learn: pip install umap-learn")
+        return
+    from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
+    from bokeh.plotting import figure
+    from bokeh.palettes import Category20, viridis
+    from bokeh.transform import factor_cmap
+    from sklearn.preprocessing import StandardScaler
+    import pandas as pd
+    import numpy as np
+    # Check if consensus matrix and samples_df exist
+    try:
+        consensus_matrix = self.get_consensus_matrix()
+        samples_df = self.samples_df
+    except Exception as e:
+        self.logger.error(f"Error getting consensus matrix or samples_df: {e}")
+        return
+    if consensus_matrix is None or consensus_matrix.shape[0] == 0:
+        self.logger.error("No consensus matrix available. Run merge/find_consensus first.")
+        return
+    if samples_df is None or samples_df.is_empty():
+        self.logger.error("No samples dataframe available.")
+        return
+    self.logger.debug(f"Performing UMAP on consensus matrix with shape: {consensus_matrix.shape}")
+    # Extract only the sample columns (exclude consensus_uid column)
+    sample_cols = [col for col in consensus_matrix.columns if col != "consensus_uid"]
+    # Convert consensus matrix to numpy, excluding the consensus_uid column
+    if hasattr(consensus_matrix, "select"):
+        # Polars DataFrame
+        matrix_data = consensus_matrix.select(sample_cols).to_numpy()
+    else:
+        # Pandas DataFrame or other - drop consensus_uid column
+        matrix_sample_data = consensus_matrix.drop(columns=["consensus_uid"], errors="ignore")
+        if hasattr(matrix_sample_data, "values"):
+            matrix_data = matrix_sample_data.values
+        elif hasattr(matrix_sample_data, "to_numpy"):
+            matrix_data = matrix_sample_data.to_numpy()
+        else:
+            matrix_data = np.array(matrix_sample_data)
+    # Transpose matrix so samples are rows and features are columns
+    matrix_data = matrix_data.T
+    # Handle missing values by replacing with 0
+    matrix_data = np.nan_to_num(matrix_data, nan=0.0, posinf=0.0, neginf=0.0)
+    # Standardize the data
+    scaler = StandardScaler()
+    matrix_scaled = scaler.fit_transform(matrix_data)
+    # Perform UMAP
+    reducer = umap.UMAP(
+        n_components=n_components,
+        n_neighbors=n_neighbors,
+        min_dist=min_dist,
+        metric=metric,
+        random_state=random_state,
+        n_jobs=1
+    )
+    umap_result = reducer.fit_transform(matrix_scaled)
+    self.logger.debug(f"UMAP completed with shape: {umap_result.shape}")
+    # Convert samples_df to pandas for easier manipulation
+    samples_pd = samples_df.to_pandas()
+    # Create dataframe with UMAP results and sample information
+    umap_df = pd.DataFrame({
+        "UMAP1": umap_result[:, 0],
+        "UMAP2": umap_result[:, 1] if n_components > 1 else np.zeros(len(umap_result)),
+    })
+    # Add sample information to UMAP dataframe
+    if len(samples_pd) == len(umap_df):
+        for col in samples_pd.columns:
+            umap_df[col] = samples_pd[col].values
+    else:
+        self.logger.warning(
+            f"Sample count mismatch: samples_df has {len(samples_pd)} rows, "
+            f"but consensus matrix has {len(umap_df)} samples",
+        )
+    # Prepare color mapping
+    color_column = None
+    color_mapper = None
+    if colorby and colorby in umap_df.columns:
+        color_column = colorby
+        unique_values = umap_df[colorby].unique()
+        # Handle categorical vs numeric coloring
+        if umap_df[colorby].dtype in ["object", "string", "category"]:
+            # Categorical coloring
+            if len(unique_values) <= 20:
+                palette = Category20[min(20, max(3, len(unique_values)))]
+            else:
+                palette = viridis(min(256, len(unique_values)))
+            color_mapper = factor_cmap(colorby, palette, unique_values)
+        else:
+            # Numeric coloring
+            palette = viridis(256)
+            color_mapper = LinearColorMapper(
+                palette=palette,
+                low=umap_df[colorby].min(),
+                high=umap_df[colorby].max(),
+            )
+    # Create Bokeh plot
+    p = figure(
+        width=width,
+        height=height,
+        title=f"{title}",
+        tools="pan,wheel_zoom,box_zoom,reset,save",
+    )
+    p.grid.visible = False
+    p.xaxis.axis_label = "UMAP1"
+    p.yaxis.axis_label = "UMAP2"
+    # Create data source
+    source = ColumnDataSource(umap_df)
+    # Create scatter plot
+    if color_mapper:
+        if isinstance(color_mapper, LinearColorMapper):
+            scatter = p.scatter(
+                "UMAP1",
+                "UMAP2",
+                size=markersize,
+                alpha=alpha,
+                color={"field": colorby, "transform": color_mapper},
+                source=source,
+            )
+            # Add colorbar for numeric coloring
+            color_bar = ColorBar(color_mapper=color_mapper, width=8, location=(0, 0))
+            p.add_layout(color_bar, "right")
+        else:
+            scatter = p.scatter(
+                "UMAP1",
+                "UMAP2",
+                size=markersize,
+                alpha=alpha,
+                color=color_mapper,
+                source=source,
+                legend_field=colorby,
+            )
+    else:
+        # If no color_by provided, use sample_color column from samples_df
+        if "sample_uid" in umap_df.columns or "sample_name" in umap_df.columns:
+            # Choose the identifier to map colors by
+            id_col = "sample_uid" if "sample_uid" in umap_df.columns else "sample_name"
+            # Get colors from samples_df based on the identifier
+            if id_col == "sample_uid":
+                sample_colors = (
+                    self.samples_df.filter(pl.col("sample_uid").is_in(umap_df[id_col].unique()))
+                    .select(["sample_uid", "sample_color"])
+                    .to_dict(as_series=False)
+                )
+                color_map = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
+            else:  # sample_name
+                sample_colors = (
+                    self.samples_df.filter(pl.col("sample_name").is_in(umap_df[id_col].unique()))
+                    .select(["sample_name", "sample_color"])
+                    .to_dict(as_series=False)
+                )
+                color_map = dict(zip(sample_colors["sample_name"], sample_colors["sample_color"]))
+            # Map colors into dataframe
+            umap_df["color"] = [color_map.get(x, "#1f77b4") for x in umap_df[id_col]]  # fallback to blue
+            # Update the ColumnDataSource with new color column
+            source = ColumnDataSource(umap_df)
+            scatter = p.scatter(
+                "UMAP1",
+                "UMAP2",
+                size=markersize,
+                alpha=alpha,
+                color="color",
+                source=source,
+            )
+        else:
+            scatter = p.scatter(
+                "UMAP1",
+                "UMAP2",
+                size=markersize,
+                alpha=alpha,
+                color="blue",
+                source=source,
+            )
+    # Create comprehensive hover tooltips with all sample information
+    tooltip_list = []
+    # Columns to exclude from tooltips (file paths and internal/plot fields)
+    excluded_cols = {"file_source", "file_path", "sample_path", "map_id", "UMAP1", "UMAP2", "ms1", "ms2", "size"}
+    # Add all sample dataframe columns to tooltips, skipping excluded ones
+    for col in samples_pd.columns:
+        if col in excluded_cols:
+            continue
+        if col in umap_df.columns:
+            if col == "sample_color":
+                # Display sample_color as a colored swatch
+                tooltip_list.append(("color", "$color[swatch]:sample_color"))
+            elif umap_df[col].dtype in ["float64", "float32"]:
+                tooltip_list.append((col, f"@{col}{{0.00}}"))
+            else:
+                tooltip_list.append((col, f"@{col}"))
+    hover = HoverTool(
+        tooltips=tooltip_list,
+        renderers=[scatter],
+    )
+    p.add_tools(hover)
+    # Add legend if using categorical coloring
+    if color_mapper and not isinstance(color_mapper, LinearColorMapper) and colorby:
+        # Only set legend properties if legends exist (avoid Bokeh warning when none created)
+        if getattr(p, "legend", None) and len(p.legend) > 0:
+            p.legend.location = "top_left"
+            p.legend.click_policy = "hide"
+    # Apply consistent save/display behavior
+    if filename is not None:
+        # Convert relative paths to absolute paths using study folder as base
+        import os
+        if not os.path.isabs(filename):
+            filename = os.path.join(self.folder, filename)
+        # Convert to absolute path for logging
+        abs_filename = os.path.abspath(filename)
+        # Use isolated file saving
+        _isolated_save_plot(p, filename, abs_filename, self.logger, "UMAP Plot")
+    else:
+        # Show in notebook when no filename provided
+        _isolated_show_notebook(p)
+    return p
 def plot_tic(
     self,
     samples=100,
@@ -2179,7 +2521,7 @@ def plot_tic(
     from bokeh.io.export import export_png
     from masster.study.helpers import get_tic
-    sample_uids = self._get_sample_uids(samples)
+    sample_uids = self._get_samples_uids(samples)
     if not sample_uids:
         self.logger.error("No valid sample_uids provided for TIC plotting.")
         return
@@ -2312,3 +2654,16 @@ def plot_tic(
         _isolated_show_notebook(p)
     return p
+def plot_pca(self, *args, **kwargs):
+    """Deprecated: Use plot_samples_pca instead."""
+    import warnings
+    warnings.warn("plot_pca is deprecated, use plot_samples_pca instead", DeprecationWarning, stacklevel=2)
+    return self.plot_samples_pca(*args, **kwargs)
+def plot_umap(self, *args, **kwargs):
+    """Deprecated: Use plot_samples_umap instead."""
+    import warnings
+    warnings.warn("plot_umap is deprecated, use plot_samples_umap instead", DeprecationWarning, stacklevel=2)
+    return self.plot_samples_umap(*args, **kwargs)

masster/study/processing.py CHANGED Viewed

@@ -62,16 +62,8 @@ def _generate_feature_maps_on_demand_for_align(study):
                 if feature_row["inty"] is None:
                     study.logger.warning("Skipping feature due to missing inty")
                     continue
-                # Handle missing feature_id by generating a new one
-                if feature_row["feature_id"] is None:
-                    # Use a simple incremental ID for alignment purposes
-                    feature_id = len(temp_feature_maps) * 100000 + feature_map.size() + 1
-                    study.logger.debug(f"Generated new feature_id {feature_id} for feature with missing ID in sample {sample_name}")
-                else:
-                    feature_id = int(feature_row["feature_id"])
-                feature.setUniqueId(feature_id)
+                feature.setUniqueId(int(feature_row["feature_id"]))
                 feature.setMZ(float(feature_row["mz"]))
                 feature.setRT(float(feature_row["rt"]))
                 feature.setIntensity(float(feature_row["inty"]))
@@ -166,7 +158,7 @@ def align(self, **kwargs):
     # end of parameter initialization
     # Store parameters in the Study object
-    self.store_history(["align"], params.to_dict())
+    self.update_history(["align"], params.to_dict())
     self.logger.debug("Parameters stored to align")
     # Generate temporary feature maps on-demand from features_df instead of using cached data
@@ -370,7 +362,7 @@ def find_ms2(self, **kwargs):
     # end of parameter initialization
     # Store parameters in the Study object
-    self.store_history(["find_ms2"], params.to_dict())
+    self.update_history(["find_ms2"], params.to_dict())
     self.logger.debug("Parameters stored to find_ms2")
     data = []
@@ -551,7 +543,7 @@ def _integrate_chrom_impl(self, **kwargs):
     # end of parameter initialization
     # Store parameters in the Study object
-    self.store_history(["integrate_chrom"], params.to_dict())
+    self.update_history(["integrate_chrom"], params.to_dict())
     self.logger.debug("Parameters stored to integrate_chrom")
     # Get parameter values for use in the method
@@ -769,7 +761,7 @@ def integrate(self, **kwargs):
     # end of parameter initialization
     # Store parameters in the Study object
-    self.store_history(["integrate"], params.to_dict())
+    self.update_history(["integrate"], params.to_dict())
     self.logger.debug("Parameters stored to integrate")
     # Call the original integrate_chrom function with extracted parameters

masster/study/save.py CHANGED Viewed

@@ -59,13 +59,16 @@ def save(self, filename=None, add_timestamp=True, compress=False):
     # Use compressed mode for large datasets
     if compress:
-        self._save_study5_compressed(filename)
+        from masster.study.h5 import _save_study5_compressed
+        _save_study5_compressed(self, filename)
     else:
-        self._save_study5(filename)
+        from masster.study.h5 import _save_study5
+        _save_study5(self, filename)
     if self.consensus_map is not None:
         # save the features as a separate file
-        self._save_consensusXML(filename=filename.replace(".study5", ".consensusXML"))
+        from masster.study.save import _save_consensusXML
+        _save_consensusXML(self, filename=filename.replace(".study5", ".consensusXML"))
     self.filename = filename
@@ -211,4 +214,5 @@ def save_consensus(self, **kwargs):
     if self.consensus_map is None:
         self.logger.error("No consensus map found.")
         return
-    self._save_consensusXML(**kwargs)
+    from masster.study.save import _save_consensusXML
+    _save_consensusXML(self, **kwargs)

masster 0.4.22__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

masster 0.4.22py3-none-any.whl → 0.5.1py3-none-any.whl