PyPI - masster - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

masster 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (71) hide show

{masster-0.2.0 → masster-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: masster
-Version: 0.2.0
+Version: 0.2.1
 Summary: Mass spectrometry data analysis package
 Project-URL: homepage, https://github.com/zamboni-lab/masster
 Project-URL: repository, https://github.com/zamboni-lab/masster

{masster-0.2.0 → masster-0.2.1}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "masster"
-version = "0.2.0"
+version = "0.2.1"
 description = "Mass spectrometry data analysis package"
 authors = [
     { name = "Zamboni Lab" }

{masster-0.2.0 → masster-0.2.1}/src/masster/_version.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
-__version__ = "0.2.0"
+__version__ = "0.2.1"
 def get_version():

{masster-0.2.0 → masster-0.2.1}/src/masster/sample/h5.py RENAMED Viewed

@@ -309,7 +309,7 @@ def _load_sample5(self, filename: str, map: bool = True):
             missing_columns = []
             for col in schema.get("scans_df", {}).get("columns", []):
                 if col not in scans_group:
-                    self.logger.warning(f"Column '{col}' not found in sample5/scans.")
+                    self.logger.debug(f"Column '{col}' not found in sample5/scans.")
                     data[col] = None
                     missing_columns.append(col)
                     continue
@@ -444,6 +444,14 @@ def _load_sample5(self, filename: str, map: bool = True):
                             f"Column '{col}' in scans_df not found in schema, keeping original type.",
                         )
+            # Ensure column order matches schema order
+            if "scans_df" in schema and "columns" in schema["scans_df"]:
+                schema_column_order = list(schema["scans_df"]["columns"].keys())
+                # Only reorder columns that exist in both schema and DataFrame
+                existing_columns = [col for col in schema_column_order if col in self.scans_df.columns]
+                if existing_columns:
+                    self.scans_df = self.scans_df.select(existing_columns)
             else:
                 self.scans_df = None
         else:
@@ -457,7 +465,7 @@ def _load_sample5(self, filename: str, map: bool = True):
             missing_columns = []
             for col in schema.get("features_df", {}).get("columns", []):
                 if col not in features_group:
-                    self.logger.warning(
+                    self.logger.debug(
                         f"Column '{col}' not found in sample5/features.",
                     )
                     data[col] = None
@@ -804,6 +812,14 @@ def _load_sample5(self, filename: str, map: bool = True):
                             pl.col(col).fill_nan(None).alias(col),
                         ])
+                # Ensure column order matches schema order
+                if "features_df" in schema and "columns" in schema["features_df"]:
+                    schema_column_order = list(schema["features_df"]["columns"].keys())
+                    # Only reorder columns that exist in both schema and DataFrame
+                    existing_columns = [col for col in schema_column_order if col in self.features_df.columns]
+                    if existing_columns:
+                        self.features_df = self.features_df.select(existing_columns)
             else:
                 self.features_df = None
         else:

{masster-0.2.0 → masster-0.2.1}/src/masster/sample/sample5_schema.json RENAMED Viewed

@@ -1,80 +1,80 @@
 {
   "features_df": {
     "columns": {
-      "adduct": {
-        "dtype": "pl.Utf8"
-      },
-      "adduct_group": {
+      "feature_uid": {
         "dtype": "pl.Int64"
       },
-      "adduct_mass": {
-        "dtype": "pl.Float64"
+      "feature_id": {
+        "dtype": "pl.Utf8"
       },
-      "charge": {
-        "dtype": "pl.Int32"
+      "mz": {
+        "dtype": "pl.Float64"
       },
-      "chrom": {
-        "dtype": "pl.Object"
+      "rt": {
+        "dtype": "pl.Float64"
       },
-      "chrom_coherence": {
+      "rt_original": {
         "dtype": "pl.Float64"
       },
-      "chrom_height_scaled": {
+      "rt_start": {
         "dtype": "pl.Float64"
       },
-      "chrom_prominence": {
+      "rt_end": {
         "dtype": "pl.Float64"
       },
-      "chrom_prominence_scaled": {
+      "rt_delta": {
         "dtype": "pl.Float64"
       },
-      "feature_id": {
-        "dtype": "pl.Utf8"
+      "mz_start": {
+        "dtype": "pl.Float64"
       },
-      "feature_uid": {
-        "dtype": "pl.Int64"
+      "mz_end": {
+        "dtype": "pl.Float64"
       },
       "inty": {
         "dtype": "pl.Float64"
       },
+      "quality": {
+        "dtype": "pl.Float64"
+      },
+      "charge": {
+        "dtype": "pl.Int32"
+      },
       "iso": {
         "dtype": "pl.Int64"
       },
       "iso_of": {
         "dtype": "pl.Int64"
       },
-      "ms2_scans": {
-        "dtype": "pl.Object"
-      },
-      "ms2_specs": {
-        "dtype": "pl.Object"
+      "adduct_group": {
+        "dtype": "pl.Int64"
       },
-      "mz": {
-        "dtype": "pl.Float64"
+      "adduct": {
+        "dtype": "pl.Utf8"
       },
-      "mz_end": {
+      "adduct_mass": {
         "dtype": "pl.Float64"
       },
-      "mz_start": {
-        "dtype": "pl.Float64"
+      "chrom": {
+        "dtype": "pl.Object"
       },
-      "quality": {
+      "chrom_coherence": {
         "dtype": "pl.Float64"
       },
-      "rt": {
+      "chrom_prominence": {
         "dtype": "pl.Float64"
       },
-      "rt_delta": {
+      "chrom_prominence_scaled": {
         "dtype": "pl.Float64"
       },
-      "rt_end": {
+      "chrom_height_scaled": {
         "dtype": "pl.Float64"
       },
-      "rt_original": {
-        "dtype": "pl.Float64"
+      "ms2_scans": {
+        "dtype": "pl.Object"
       },
-      "rt_start": {
-        "dtype": "pl.Float64"
+      "ms2_specs": {
+        "dtype": "pl.Object"
       }
     }
   },
@@ -100,62 +100,80 @@
   },
   "scans_df": {
     "columns": {
-      "bl": {
-        "dtype": "pl.Float64"
-      },
-      "comment": {
-        "dtype": "pl.Utf8"
+      "scan_uid": {
+        "dtype": "pl.Int64"
       },
       "cycle": {
         "dtype": "pl.Int64"
       },
-      "energy": {
-        "dtype": "pl.Float64"
-      },
-      "feature_uid": {
+      "ms_level": {
         "dtype": "pl.Int64"
       },
-      "id": {
-        "dtype": "pl.Utf8"
+      "rt": {
+        "dtype": "pl.Float64"
       },
-      "inty_max": {
+      "inty_tot": {
         "dtype": "pl.Float64"
       },
       "inty_min": {
         "dtype": "pl.Float64"
       },
-      "inty_tot": {
+      "inty_max": {
         "dtype": "pl.Float64"
       },
-      "ms_level": {
-        "dtype": "pl.Int64"
-      },
-      "mz_max": {
+      "bl": {
         "dtype": "pl.Float64"
       },
       "mz_min": {
         "dtype": "pl.Float64"
       },
+      "mz_max": {
+        "dtype": "pl.Float64"
+      },
+      "comment": {
+        "dtype": "pl.Utf8"
+      },
       "name": {
         "dtype": "pl.Utf8"
       },
-      "prec_inty": {
-        "dtype": "pl.Float64"
+      "id": {
+        "dtype": "pl.Utf8"
       },
       "prec_mz": {
         "dtype": "pl.Float64"
       },
+      "prec_mz_min": {
+        "dtype": "pl.Float64"
+      },
       "prec_mz_max": {
         "dtype": "pl.Float64"
       },
-      "prec_mz_min": {
+      "prec_inty": {
         "dtype": "pl.Float64"
       },
-      "rt": {
+      "energy": {
         "dtype": "pl.Float64"
       },
-      "scan_uid": {
+      "feature_uid": {
         "dtype": "pl.Int64"
+      },
+      "ms2_n": {
+        "dtype": "pl.Int64"
+      },
+      "time_cycle": {
+        "dtype": "pl.Float64"
+      },
+      "time_ms1_to_ms1": {
+        "dtype": "pl.Float64"
+      },
+      "time_ms1_to_ms2": {
+        "dtype": "pl.Float64"
+      },
+      "time_ms2_to_ms2": {
+        "dtype": "pl.Float64"
+      },
+      "time_ms2_to_ms1": {
+        "dtype": "pl.Float64"
       }
     }
   },

{masster-0.2.0 → masster-0.2.1}/src/masster/study/h5.py RENAMED Viewed

@@ -530,7 +530,6 @@ def _save_study5(self, filename=None):
                 _save_dataframe_column(consensus_ms2_group, col, data, dtype, self.logger)
     self.logger.info(f"Study saved to {filename}")
-    self.logger.info(f"Study saved to {filename}")
 def _load_study5(self, filename=None):

{masster-0.2.0 → masster-0.2.1}/src/masster/study/helpers.py RENAMED Viewed

@@ -114,45 +114,6 @@ def get_chrom(self, uids=None, samples=None):
     # Return as Polars DataFrame (can handle complex objects like Chromatogram)
     return df2_pivoted
-'''
-def migrate_adduct_columns(self):
-    """
-    Migrate adduct_right and adduct_mass_right columns to adduct and adduct_mass.
-    This fixes an issue where join operations created _right suffixed columns.
-    """
-    if self.features_df.is_empty():
-        return
-    # Check if we have the _right suffixed columns
-    has_adduct_right = "adduct_right" in self.features_df.columns
-    has_adduct_mass_right = "adduct_mass_right" in self.features_df.columns
-    has_adduct = "adduct" in self.features_df.columns
-    has_adduct_mass = "adduct_mass" in self.features_df.columns
-    if has_adduct_right or has_adduct_mass_right:
-        self.logger.info("Migrating adduct column names...")
-        # Start with all columns except those we're replacing/dropping
-        columns_to_keep = [
-            col
-            for col in self.features_df.columns
-            if col not in ["adduct_right", "adduct_mass_right", "adduct", "adduct_mass"]
-        ]
-        # Add the migrated columns
-        if has_adduct_right:
-            columns_to_keep.append(pl.col("adduct_right").alias("adduct"))
-        if has_adduct_mass_right:
-            columns_to_keep.append(pl.col("adduct_mass_right").alias("adduct_mass"))
-        # Apply the migration
-        self.features_df = self.features_df.select(columns_to_keep)
-        self.logger.success("Adduct column migration completed.")
-    else:
-        self.logger.info("No adduct column migration needed.")
-'''
 def set_default_folder(self, folder):
     """
     Set the default folder for saving and loading files.
@@ -448,6 +409,12 @@ def _get_sample_uids(self, samples=None, seed=42):
         sample_uids = list(set(sample_uids))
         return sample_uids
+def get_orphans(self):
+    """
+    Get all features that are not in the consensus mapping.
+    """
+    not_in_consensus = self.features_df.filter(~self.features_df['feature_uid'].is_in(self.consensus_mapping_df['feature_uid'].to_list()))
+    return not_in_consensus
 def compress(self):
     """

{masster-0.2.0 → masster-0.2.1}/src/masster/study/study.py RENAMED Viewed

@@ -63,6 +63,7 @@ from masster.study.helpers import get_chrom
 from masster.study.helpers import get_consensus
 from masster.study.helpers import get_consensus_matches
 from masster.study.helpers import get_consensus_matrix
+from masster.study.helpers import get_orphans
 from masster.study.helpers import get_gaps_matrix
 from masster.study.helpers import get_gaps_stats
 from masster.study.helpers import align_reset
@@ -273,6 +274,7 @@ class Study:
     get_consensus_matrix = get_consensus_matrix
     get_gaps_matrix = get_gaps_matrix
     get_gaps_stats = get_gaps_stats
+    get_orphans = get_orphans
     set_default_folder = set_default_folder
     fill_chrom_parallel = fill_chrom_parallel
     _process_sample_for_parallel_fill = _process_sample_for_parallel_fill
@@ -423,6 +425,12 @@ class Study:
             mean_samples = 0
             max_samples = 0
+        # Count only features where 'filled' == False
+        if not self.features_df.is_empty() and 'filled' in self.features_df.columns:
+            unfilled_features_count = self.features_df.filter(~self.features_df['filled']).height
+        else:
+            unfilled_features_count = 0
         # Optimize chrom completeness calculation
         if consensus_df_len > 0 and samples_df_len > 0 and not self.features_df.is_empty():
@@ -441,8 +449,17 @@ class Study:
             chrom_completeness = (
                 non_null_chroms / total_possible if total_possible > 0 else 0
             )
+            not_in_consensus = len(self.features_df.filter(~self.features_df['feature_uid'].is_in(self.consensus_mapping_df['feature_uid'].to_list())))
+            ratio_not_in_consensus_to_total = not_in_consensus / unfilled_features_count if unfilled_features_count > 0 else 0
+            ratio_in_consensus_to_total = (unfilled_features_count- not_in_consensus) / len(self.features_df) if len(self.features_df) > 0 else 0
         else:
             chrom_completeness = 0
+            not_in_consensus = 0
+            ratio_not_in_consensus_to_total = 0
+            ratio_in_consensus_to_total = 0
         # calculate for how many consensus features there is at least one MS2 spectrum linked
         consensus_with_ms2 = self.consensus_ms2.select(
@@ -458,17 +475,18 @@ class Study:
             self.consensus_mapping_df.estimated_size()
         )
-        # Build summary string efficiently
         summary = (
             f"Default folder:         {self.default_folder}\n"
-            f"Consensus features:     {consensus_df_len}\n"
             f"Samples:                {samples_df_len}\n"
-            f"Min samples/consensus:  {min_samples:.0f}\n"
-            f"Mean samples/consensus: {mean_samples:.0f}\n"
-            f"Max samples/consensus:  {max_samples:.0f}\n"
-            f"Chrom completeness:     {chrom_completeness:.2f}\n"
-            f"Linked MS2 spectra:     {consensus_ms2_len}\n"
-            f"Consensus with MS2:     {consensus_with_ms2}\n"
+            f"Features:               {unfilled_features_count}\n"
+            f"- in consensus:         {ratio_in_consensus_to_total*100:.0f}%\n"
+            f"- non in consensus:     {ratio_not_in_consensus_to_total*100:.0f}%\n"
+            f"Consensus:              {consensus_df_len}\n"
+            f"- Min samples count:    {min_samples:.0f}\n"
+            f"- Mean samples count:   {mean_samples:.0f}\n"
+            f"- Max samples count:    {max_samples:.0f}\n"
+            f"- with MS2:             {consensus_with_ms2}\n"
+            f"Chrom completeness:     {chrom_completeness*100:.0f}%\n"
             f"Memory usage:           {memory_usage / (1024 ** 2):.2f} MB\n"
         )

{masster-0.2.0 → masster-0.2.1}/uv.lock RENAMED Viewed

@@ -1585,7 +1585,7 @@ wheels = [
 [[package]]
 name = "masster"
-version = "0.2.0"
+version = "0.2.1"
 source = { editable = "." }
 dependencies = [
     { name = "alphabase" },