PyPI - masster - Versions diffs - 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl - Mend

masster 0.3.18py3-none-any.whl → 0.3.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (31) hide show

masster/__init__.py +2 -0
masster/_version.py +1 -1
masster/data/libs/README.md +17 -0
masster/data/libs/ccm.py +533 -0
masster/data/libs/central_carbon_README.md +17 -0
masster/data/libs/central_carbon_metabolites.csv +120 -0
masster/data/libs/urine.py +333 -0
masster/data/libs/urine_metabolites.csv +51 -0
masster/sample/h5.py +1 -1
masster/sample/helpers.py +3 -7
masster/sample/lib.py +32 -25
masster/sample/load.py +9 -3
masster/sample/plot.py +113 -27
masster/study/export.py +27 -10
masster/study/h5.py +58 -40
masster/study/helpers.py +450 -196
masster/study/helpers_optimized.py +5 -5
masster/study/load.py +144 -118
masster/study/plot.py +691 -277
masster/study/processing.py +9 -5
masster/study/study.py +6 -6
{masster-0.3.18.dist-info → masster-0.3.20.dist-info}/METADATA +1 -1
{masster-0.3.18.dist-info → masster-0.3.20.dist-info}/RECORD +31 -25
/masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
/masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
/masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
/masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
/masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
{masster-0.3.18.dist-info → masster-0.3.20.dist-info}/WHEEL +0 -0
{masster-0.3.18.dist-info → masster-0.3.20.dist-info}/entry_points.txt +0 -0
{masster-0.3.18.dist-info → masster-0.3.20.dist-info}/licenses/LICENSE +0 -0

masster/sample/plot.py CHANGED Viewed

@@ -87,9 +87,10 @@ def _is_notebook_environment():
         # Check if marimo is in modules
         if "marimo" in sys.modules:
             return True
         # Check for marimo in the call stack or environment
         import inspect
         frame = inspect.currentframe()
         try:
             while frame:
@@ -140,6 +141,110 @@ def _display_plot(plot_object, layout=None):
         return None
+def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh"):
+    """
+    Helper function to handle consistent save/display behavior for sample plots.
+    Parameters:
+        plot_obj: The plot object (bokeh figure, holoviews layout, or panel object)
+        filename: Optional filename to save the plot
+        plot_type: Type of plot object ("bokeh", "panel", "holoviews")
+    """
+    if filename is not None:
+        # Convert relative paths to absolute paths using sample folder as base
+        import os
+        if hasattr(self, 'folder') and self.folder and not os.path.isabs(filename):
+            filename = os.path.join(self.folder, filename)
+        # Convert to absolute path for logging
+        abs_filename = os.path.abspath(filename)
+        if filename.endswith(".html"):
+            if plot_type == "panel":
+                plot_obj.save(filename, embed=True)  # type: ignore[attr-defined]
+            elif plot_type == "holoviews":
+                import panel
+                panel.panel(plot_obj).save(filename, embed=True)  # type: ignore[attr-defined]
+            elif plot_type == "bokeh":
+                from bokeh.plotting import output_file
+                from bokeh.io import save
+                output_file(filename)
+                save(plot_obj)
+            self.logger.info(f"Plot saved to: {abs_filename}")
+        elif filename.endswith(".png"):
+            try:
+                if plot_type == "bokeh":
+                    from bokeh.io.export import export_png
+                    export_png(plot_obj, filename=filename)
+                elif plot_type in ["panel", "holoviews"]:
+                    import holoviews as hv
+                    hv.save(plot_obj, filename, fmt="png")
+                self.logger.info(f"Plot saved to: {abs_filename}")
+            except Exception:
+                # Fall back to HTML if PNG export not available
+                html_filename = filename.replace('.png', '.html')
+                abs_html_filename = os.path.abspath(html_filename)
+                if plot_type == "panel":
+                    plot_obj.save(html_filename, embed=True)  # type: ignore[attr-defined]
+                elif plot_type == "holoviews":
+                    import panel
+                    panel.panel(plot_obj).save(html_filename, embed=True)  # type: ignore[attr-defined]
+                elif plot_type == "bokeh":
+                    from bokeh.plotting import output_file
+                    from bokeh.io import save
+                    output_file(html_filename)
+                    save(plot_obj)
+                self.logger.warning(f"PNG export not available, saved as HTML instead: {abs_html_filename}")
+        elif filename.endswith(".pdf"):
+            # Try to save as PDF, fall back to HTML if not available
+            try:
+                if plot_type == "bokeh":
+                    from bokeh.io.export import export_pdf
+                    export_pdf(plot_obj, filename=filename)
+                elif plot_type in ["panel", "holoviews"]:
+                    import holoviews as hv
+                    hv.save(plot_obj, filename, fmt="pdf")
+                self.logger.info(f"Plot saved to: {abs_filename}")
+            except ImportError:
+                # Fall back to HTML if PDF export not available
+                html_filename = filename.replace('.pdf', '.html')
+                abs_html_filename = os.path.abspath(html_filename)
+                if plot_type == "panel":
+                    plot_obj.save(html_filename, embed=True)  # type: ignore[attr-defined]
+                elif plot_type == "holoviews":
+                    import panel
+                    panel.panel(plot_obj).save(html_filename, embed=True)  # type: ignore[attr-defined]
+                elif plot_type == "bokeh":
+                    from bokeh.plotting import output_file
+                    from bokeh.io import save
+                    output_file(html_filename)
+                    save(plot_obj)
+                self.logger.warning(f"PDF export not available, saved as HTML instead: {abs_html_filename}")
+        else:
+            # Default to HTML for unknown extensions
+            if plot_type == "panel":
+                plot_obj.save(filename, embed=True)  # type: ignore[attr-defined]
+            elif plot_type == "holoviews":
+                import panel
+                panel.panel(plot_obj).save(filename, embed=True)  # type: ignore[attr-defined]
+            elif plot_type == "bokeh":
+                from bokeh.plotting import output_file
+                from bokeh.io import save
+                output_file(filename)
+                save(plot_obj)
+            self.logger.info(f"Plot saved to: {abs_filename}")
+    else:
+        # Show in notebook when no filename provided
+        if plot_type == "panel":
+            plot_obj.show()  # type: ignore[attr-defined]
+        elif plot_type == "holoviews":
+            import panel
+            return panel.panel(plot_obj)
+        elif plot_type == "bokeh":
+            from bokeh.plotting import show
+            show(plot_obj)
 def plot_chrom(
     self,
     feature_uid=None,
@@ -270,16 +375,9 @@ def plot_chrom(
     layout = layout.cols(1)
     layout = panel.Column(layout)
-    if filename is not None:
-        # if filename includes .html, save the panel layout to an HTML file
-        if filename.endswith(".html"):
-            layout.save(filename, embed=True)
-        else:
-            # save the panel layout as a png
-            hv.save(layout, filename, fmt="png")
-    else:
-        # Check if we're in a notebook environment and display appropriately
-        return _display_plot(layout.object, layout)
+    # Use consistent save/display behavior
+    self._handle_sample_plot_output(layout, filename, "panel")
 def plot_2d(
@@ -829,12 +927,8 @@ def plot_2d(
         layout = panel.Column(overlay)
     if filename is not None:
-        # if filename includes .html, save the panel layout to an HTML file
-        if filename.endswith(".html"):
-            layout.save(filename, embed=True)
-        else:
-            # save the panel layout as a png
-            hv.save(overlay, filename, fmt="png")
+        # Use consistent save/display behavior
+        self._handle_sample_plot_output(layout, filename, "panel")
         return None
     else:
         # Check if we're in a notebook environment and display appropriately
@@ -1959,16 +2053,8 @@ def plot_feature_stats(
     # Arrange the plots in a layout with three columns
     layout = hv.Layout(density_plots).cols(3).opts(shared_axes=False)
-    # Save or display the layout based on the filename parameter
-    if filename is not None:
-        if filename.endswith(".html"):
-            panel.panel(layout).save(filename, embed=True)  # type: ignore[attr-defined]
-        else:
-            hv.save(layout, filename, fmt="png")
-    else:
-        # Check if we're in a notebook environment and display appropriately
-        layout_obj = panel.panel(layout)
-        return _display_plot(layout, layout_obj)
+    # Use consistent save/display behavior
+    self._handle_sample_plot_output(layout, filename, "holoviews")
 def plot_tic(

masster/study/export.py CHANGED Viewed

@@ -445,7 +445,7 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
     mtd_lines.append("MTD\tsmall_molecule-quantification_unit\t[MS, MS:1001844, MS1 feature area, ]")
     mtd_lines.append("MTD\tsmall_molecule_feature-quantification_unit\t[MS, MS:1001844, MS1 feature area, ]")
     mtd_lines.append(
-        "MTD\tsmall_molecule-identification_reliability\t[MS, MS:1002955, hr-ms compound identification confidence level, ]"
+        "MTD\tsmall_molecule-identification_reliability\t[MS, MS:1002955, hr-ms compound identification confidence level, ]",
     )
     mtd_lines.append("MTD\tid_confidence_measure[1]\t[MS, MS:1002888, small molecule confidence measure, ]")
     mtd_lines.append("")
@@ -499,8 +499,16 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
     # Use the matrix as-is since it already has the correct sample columns
     # The matrix columns are sample names, which is what we want for the assay columns
-    # round to int
-    abundance_matrix = abundance_matrix.round(0)
+    # round to int - handle both Polars and Pandas DataFrames
+    if hasattr(abundance_matrix, 'with_columns'):
+        # Polars DataFrame
+        numeric_cols = [col for col in abundance_matrix.columns if abundance_matrix[col].dtype.is_numeric()]
+        abundance_matrix = abundance_matrix.with_columns([
+            abundance_matrix[col].round(0) for col in numeric_cols
+        ])
+    else:
+        # Pandas DataFrame
+        abundance_matrix = abundance_matrix.round(0)
     # Use actual number of samples from the abundance matrix
     n_assays = len(abundance_matrix.columns)
@@ -570,9 +578,14 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
         ]
         # Add abundance values for each assay
         consensus_uid = row["consensus_uid"]
-        if consensus_uid in abundance_matrix.index:
-            abundance_values = abundance_matrix.loc[consensus_uid].tolist()
-            sml_row += [str(val) if pd.notna(val) else "null" for val in abundance_values]
+        # Check if consensus_uid exists in the abundance_matrix (Polars)
+        filtered_matrix = abundance_matrix.filter(pl.col("consensus_uid") == consensus_uid)
+        if filtered_matrix.height > 0:
+            # Get the first (and should be only) matching row
+            abundance_row = filtered_matrix.row(0, named=True)
+            # Extract values excluding the consensus_uid column
+            abundance_values = [abundance_row[col] for col in abundance_matrix.columns if col != "consensus_uid"]
+            sml_row += [str(val) if val is not None else "null" for val in abundance_values]
         else:
             sml_row += ["null"] * n_assays
         sml_row += ["null", "null"]
@@ -615,11 +628,15 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
             str(row.get("retention_time_in_seconds_start", "null")),
             str(row.get("retention_time_in_seconds_end", "null")),
         ]
-        # Add abundance values for each assay - same as SML
+        # Add abundance values for each assay - same as SML (Polars)
         consensus_uid = row["consensus_uid"]
-        if consensus_uid in abundance_matrix.index:
-            abundance_values = abundance_matrix.loc[consensus_uid].tolist()
-            smf_row += [str(val) if pd.notna(val) else "null" for val in abundance_values]
+        filtered_matrix = abundance_matrix.filter(pl.col("consensus_uid") == consensus_uid)
+        if filtered_matrix.height > 0:
+            # Get the first (and should be only) matching row
+            abundance_row = filtered_matrix.row(0, named=True)
+            # Extract values excluding the consensus_uid column
+            abundance_values = [abundance_row[col] for col in abundance_matrix.columns if col != "consensus_uid"]
+            smf_row += [str(val) if val is not None else "null" for val in abundance_values]
         else:
             smf_row += ["null"] * n_assays
         smf_lines.append("\t".join(smf_row))

masster/study/h5.py CHANGED Viewed

@@ -94,7 +94,7 @@ def _save_dataframe_optimized(df, group, schema, df_name, logger, chunk_size=100
                 numeric_cols.append(col)
         logger.debug(
-            f"Saving {df_name}: {total_rows} rows, {len(numeric_cols)} numeric, {len(string_cols)} string, {len(object_cols)} object columns"
+            f"Saving {df_name}: {total_rows} rows, {len(numeric_cols)} numeric, {len(string_cols)} string, {len(object_cols)} object columns",
         )
         # Process numeric columns in batch (most efficient)
@@ -277,7 +277,7 @@ def _save_object_columns_optimized(group, df, object_cols, logger, chunk_size):
                             results[chunk_start] = chunk_result
                         except Exception as e:
                             logger.warning(
-                                f"Failed to serialize chunk starting at {chunk_start} for column '{col}': {e}"
+                                f"Failed to serialize chunk starting at {chunk_start} for column '{col}': {e}",
                             )
                             # Fallback to simple string conversion for this chunk
                             chunk = data_list[chunk_start : chunk_start + chunk_size]
@@ -435,7 +435,7 @@ def _save_dataframe_column_legacy(group, col: str, data, dtype: str, logger, com
             group.create_dataset(col, data=data_as_str, compression=compression)
         else:
             logger.warning(
-                f"Unexpectedly, column '{col}' has dtype '{dtype}'. Implement serialization for this column."
+                f"Unexpectedly, column '{col}' has dtype '{dtype}'. Implement serialization for this column.",
             )
     elif dtype == "string":
         # Handle string columns
@@ -698,17 +698,17 @@ def _load_dataframe_from_group(group, schema: dict, df_name: str, logger, object
     # Get available columns from HDF5 file
     hdf5_columns = list(group.keys())
     logger.debug(f"HDF5 columns available: {hdf5_columns}")
     # Handle column name migrations for backward compatibility first
     if df_name == "samples_df":
         # Migrate old column names to new names
         column_migrations = {
             "size": "num_features",
-            "file_source": "sample_source",
+            "file_source": "sample_source",
             "ms1": "num_ms1",
-            "ms2": "num_ms2"
+            "ms2": "num_ms2",
         }
         # Create a mapping of what's actually available after migrations
         effective_columns = hdf5_columns.copy()
         for old_name, new_name in column_migrations.items():
@@ -720,14 +720,14 @@ def _load_dataframe_from_group(group, schema: dict, df_name: str, logger, object
     # First pass: load all existing columns (including migrated ones)
     for col in schema_columns or []:
         source_col = col
         # Check if we need to load from a migrated column name
         if df_name == "samples_df":
             column_migrations = {
                 "size": "num_features",
-                "file_source": "sample_source",
+                "file_source": "sample_source",
                 "ms1": "num_ms1",
-                "ms2": "num_ms2"
+                "ms2": "num_ms2",
             }
             # Reverse lookup - find old name for new name
             reverse_migrations = {v: k for k, v in column_migrations.items()}
@@ -736,7 +736,7 @@ def _load_dataframe_from_group(group, schema: dict, df_name: str, logger, object
                 if old_name in group:
                     source_col = old_name
                     logger.info(f"Loading '{col}' from old column name '{old_name}'")
         if source_col not in group:
             missing_columns.append(col)
             continue
@@ -829,12 +829,12 @@ def _load_dataframe_from_group(group, schema: dict, df_name: str, logger, object
     if df_name == "samples_df":
         column_migrations = {
             "size": "num_features",
-            "file_source": "sample_source",
+            "file_source": "sample_source",
             "ms1": "num_ms1",
-            "ms2": "num_ms2"
+            "ms2": "num_ms2",
         }
         migrated_old_names = set(column_migrations.keys())
     extra_columns = [col for col in hdf5_columns if col not in (schema_columns or []) and col not in migrated_old_names]
     for col in extra_columns:
@@ -974,7 +974,7 @@ def _save_study5_compressed(self, filename=None):
             pbar.update(1)
             pbar.set_description(
-                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {getattr(self, 'log_label', '')}Saving dataframes"
+                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {getattr(self, 'log_label', '')}Saving dataframes",
             )
             # Store samples_df - use optimized batch processing
@@ -987,7 +987,7 @@ def _save_study5_compressed(self, filename=None):
             # Store features_df - use fast method that skips chrom and ms2_specs columns
             if self.features_df is not None and not self.features_df.is_empty():
                 self.logger.debug(
-                    f"Fast saving features_df with {len(self.features_df)} rows (skipping chrom and ms2_specs)"
+                    f"Fast saving features_df with {len(self.features_df)} rows (skipping chrom and ms2_specs)",
                 )
                 _save_dataframe_optimized_fast(self.features_df, features_group, schema, "features_df", self.logger)
                 pbar.update(1)
@@ -1066,7 +1066,7 @@ def _save_dataframe_optimized_fast(df, group, schema, df_name, logger, chunk_siz
                 numeric_cols.append(col)
         logger.debug(
-            f"Saving {df_name}: {total_rows} rows, {len(numeric_cols)} numeric, {len(string_cols)} string, {len(object_cols)} object columns"
+            f"Saving {df_name}: {total_rows} rows, {len(numeric_cols)} numeric, {len(string_cols)} string, {len(object_cols)} object columns",
         )
         # Process numeric columns in batch (most efficient)
@@ -1184,7 +1184,7 @@ def _save_study5(self, filename=None):
             pbar.update(1)
             pbar.set_description(
-                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {getattr(self, 'log_label', '')}Saving dataframes"
+                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {getattr(self, 'log_label', '')}Saving dataframes",
             )
             # Store samples_df - use optimized batch processing
@@ -1309,7 +1309,7 @@ def _load_study5(self, filename=None):
         ) as pbar:
             # Load metadata
             pbar.set_description(
-                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading metadata"
+                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading metadata",
             )
             if "metadata" in f:
                 metadata = f["metadata"]
@@ -1371,7 +1371,7 @@ def _load_study5(self, filename=None):
             # Load samples_df
             pbar.set_description(
-                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading samples"
+                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading samples",
             )
             if "samples" in f and len(f["samples"].keys()) > 0:
                 self.samples_df = _load_dataframe_from_group(f["samples"], schema, "samples_df", self.logger)
@@ -1411,7 +1411,7 @@ def _load_study5(self, filename=None):
             pbar.update(1)
             # Load samples_df
             pbar.set_description(
-                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading samples"
+                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading samples",
             )
             if "samples" in f and len(f["samples"].keys()) > 0:
                 self.samples_df = _load_dataframe_from_group(f["samples"], schema, "samples_df", self.logger)
@@ -1452,12 +1452,16 @@ def _load_study5(self, filename=None):
             # Load features_df
             pbar.set_description(
-                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading features"
+                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading features",
             )
             if "features" in f and len(f["features"].keys()) > 0:
                 object_columns = ["chrom", "ms2_scans", "ms2_specs"]
                 self.features_df = _load_dataframe_from_group(
-                    f["features"], schema, "features_df", self.logger, object_columns
+                    f["features"],
+                    schema,
+                    "features_df",
+                    self.logger,
+                    object_columns,
                 )
             else:
                 self.features_df = None
@@ -1465,7 +1469,7 @@ def _load_study5(self, filename=None):
             # Load consensus_df
             pbar.set_description(
-                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading consensus"
+                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading consensus",
             )
             if "consensus" in f and len(f["consensus"].keys()) > 0:
                 # Only include adducts in object_columns if it actually exists in the file
@@ -1474,7 +1478,11 @@ def _load_study5(self, filename=None):
                     object_columns.append("adducts")
                 self.consensus_df = _load_dataframe_from_group(
-                    f["consensus"], schema, "consensus_df", self.logger, object_columns
+                    f["consensus"],
+                    schema,
+                    "consensus_df",
+                    self.logger,
+                    object_columns,
                 )
                 # Backward compatibility: If adducts column doesn't exist, initialize with empty lists
@@ -1507,22 +1515,28 @@ def _load_study5(self, filename=None):
             # Load consensus_mapping_df
             pbar.set_description(
-                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading consensus mapping"
+                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading consensus mapping",
             )
             if "consensus_mapping" in f and len(f["consensus_mapping"].keys()) > 0:
                 self.consensus_mapping_df = _load_dataframe_from_group(
-                    f["consensus_mapping"], schema, "consensus_mapping_df", self.logger
+                    f["consensus_mapping"],
+                    schema,
+                    "consensus_mapping_df",
+                    self.logger,
                 )
             else:
                 self.consensus_mapping_df = None
             pbar.update(1)
             # Load consensus_mapping_df
             pbar.set_description(
-                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading consensus mapping"
+                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading consensus mapping",
             )
             if "consensus_mapping" in f and len(f["consensus_mapping"].keys()) > 0:
                 self.consensus_mapping_df = _load_dataframe_from_group(
-                    f["consensus_mapping"], schema, "consensus_mapping_df", self.logger
+                    f["consensus_mapping"],
+                    schema,
+                    "consensus_mapping_df",
+                    self.logger,
                 )
             else:
                 self.consensus_mapping_df = None
@@ -1530,34 +1544,38 @@ def _load_study5(self, filename=None):
             # Load consensus_ms2
             pbar.set_description(
-                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading consensus MS2"
+                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Loading consensus MS2",
             )
             if "consensus_ms2" in f and len(f["consensus_ms2"].keys()) > 0:
                 object_columns = ["spec"]
                 self.consensus_ms2 = _load_dataframe_from_group(
-                    f["consensus_ms2"], schema, "consensus_ms2", self.logger, object_columns
+                    f["consensus_ms2"],
+                    schema,
+                    "consensus_ms2",
+                    self.logger,
+                    object_columns,
                 )
             else:
                 self.consensus_ms2 = None
             pbar.update(1)
     # Check and migrate old string-based map_id to integer indices
-    if (self.samples_df is not None and
-        not self.samples_df.is_empty() and
-        self.samples_df['map_id'].dtype == pl.Utf8):
+    if self.samples_df is not None and not self.samples_df.is_empty() and self.samples_df["map_id"].dtype == pl.Utf8:
         self.logger.info("Detected old string-based map_id format, migrating to integer indices")
         # Convert string-based map_id to integer indices
         sample_count = len(self.samples_df)
         new_map_ids = list(range(sample_count))
         self.samples_df = self.samples_df.with_columns(
-            pl.lit(new_map_ids).alias("map_id")
+            pl.lit(new_map_ids).alias("map_id"),
         )
         # Ensure the column is Int64 type
         self.samples_df = self.samples_df.cast({"map_id": pl.Int64})
-        self.logger.info(f"Successfully migrated {sample_count} samples to indexed map_id format (0 to {sample_count - 1})")
+        self.logger.info(
+            f"Successfully migrated {sample_count} samples to indexed map_id format (0 to {sample_count - 1})",
+        )
     self.logger.debug("Study loaded")

masster 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl

Potentially problematic release.

masster 0.3.18py3-none-any.whl → 0.3.20py3-none-any.whl