PyPI - masster - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

masster 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (12) hide show

masster/_version.py +1 -1
masster/sample/h5.py +18 -2
masster/sample/sample5_schema.json +76 -58
masster/study/h5.py +317 -138
masster/study/helpers.py +6 -39
masster/study/load.py +23 -134
masster/study/study.py +29 -11
{masster-0.2.0.dist-info → masster-0.2.2.dist-info}/METADATA +31 -55
{masster-0.2.0.dist-info → masster-0.2.2.dist-info}/RECORD +12 -12
{masster-0.2.0.dist-info → masster-0.2.2.dist-info}/WHEEL +0 -0
{masster-0.2.0.dist-info → masster-0.2.2.dist-info}/entry_points.txt +0 -0
{masster-0.2.0.dist-info → masster-0.2.2.dist-info}/licenses/LICENSE +0 -0

masster/_version.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
-__version__ = "0.2.0"
+__version__ = "0.2.2"
 def get_version():

masster/sample/h5.py CHANGED Viewed

@@ -309,7 +309,7 @@ def _load_sample5(self, filename: str, map: bool = True):
             missing_columns = []
             for col in schema.get("scans_df", {}).get("columns", []):
                 if col not in scans_group:
-                    self.logger.warning(f"Column '{col}' not found in sample5/scans.")
+                    self.logger.debug(f"Column '{col}' not found in sample5/scans.")
                     data[col] = None
                     missing_columns.append(col)
                     continue
@@ -444,6 +444,14 @@ def _load_sample5(self, filename: str, map: bool = True):
                             f"Column '{col}' in scans_df not found in schema, keeping original type.",
                         )
+            # Ensure column order matches schema order
+            if "scans_df" in schema and "columns" in schema["scans_df"]:
+                schema_column_order = list(schema["scans_df"]["columns"].keys())
+                # Only reorder columns that exist in both schema and DataFrame
+                existing_columns = [col for col in schema_column_order if col in self.scans_df.columns]
+                if existing_columns:
+                    self.scans_df = self.scans_df.select(existing_columns)
             else:
                 self.scans_df = None
         else:
@@ -457,7 +465,7 @@ def _load_sample5(self, filename: str, map: bool = True):
             missing_columns = []
             for col in schema.get("features_df", {}).get("columns", []):
                 if col not in features_group:
-                    self.logger.warning(
+                    self.logger.debug(
                         f"Column '{col}' not found in sample5/features.",
                     )
                     data[col] = None
@@ -804,6 +812,14 @@ def _load_sample5(self, filename: str, map: bool = True):
                             pl.col(col).fill_nan(None).alias(col),
                         ])
+                # Ensure column order matches schema order
+                if "features_df" in schema and "columns" in schema["features_df"]:
+                    schema_column_order = list(schema["features_df"]["columns"].keys())
+                    # Only reorder columns that exist in both schema and DataFrame
+                    existing_columns = [col for col in schema_column_order if col in self.features_df.columns]
+                    if existing_columns:
+                        self.features_df = self.features_df.select(existing_columns)
             else:
                 self.features_df = None
         else:

masster/sample/sample5_schema.json CHANGED Viewed

@@ -1,80 +1,80 @@
 {
   "features_df": {
     "columns": {
-      "adduct": {
-        "dtype": "pl.Utf8"
-      },
-      "adduct_group": {
+      "feature_uid": {
         "dtype": "pl.Int64"
       },
-      "adduct_mass": {
-        "dtype": "pl.Float64"
+      "feature_id": {
+        "dtype": "pl.Utf8"
       },
-      "charge": {
-        "dtype": "pl.Int32"
+      "mz": {
+        "dtype": "pl.Float64"
       },
-      "chrom": {
-        "dtype": "pl.Object"
+      "rt": {
+        "dtype": "pl.Float64"
       },
-      "chrom_coherence": {
+      "rt_original": {
         "dtype": "pl.Float64"
       },
-      "chrom_height_scaled": {
+      "rt_start": {
         "dtype": "pl.Float64"
       },
-      "chrom_prominence": {
+      "rt_end": {
         "dtype": "pl.Float64"
       },
-      "chrom_prominence_scaled": {
+      "rt_delta": {
         "dtype": "pl.Float64"
       },
-      "feature_id": {
-        "dtype": "pl.Utf8"
+      "mz_start": {
+        "dtype": "pl.Float64"
       },
-      "feature_uid": {
-        "dtype": "pl.Int64"
+      "mz_end": {
+        "dtype": "pl.Float64"
       },
       "inty": {
         "dtype": "pl.Float64"
       },
+      "quality": {
+        "dtype": "pl.Float64"
+      },
+      "charge": {
+        "dtype": "pl.Int32"
+      },
       "iso": {
         "dtype": "pl.Int64"
       },
       "iso_of": {
         "dtype": "pl.Int64"
       },
-      "ms2_scans": {
-        "dtype": "pl.Object"
-      },
-      "ms2_specs": {
-        "dtype": "pl.Object"
+      "adduct_group": {
+        "dtype": "pl.Int64"
       },
-      "mz": {
-        "dtype": "pl.Float64"
+      "adduct": {
+        "dtype": "pl.Utf8"
       },
-      "mz_end": {
+      "adduct_mass": {
         "dtype": "pl.Float64"
       },
-      "mz_start": {
-        "dtype": "pl.Float64"
+      "chrom": {
+        "dtype": "pl.Object"
       },
-      "quality": {
+      "chrom_coherence": {
         "dtype": "pl.Float64"
       },
-      "rt": {
+      "chrom_prominence": {
         "dtype": "pl.Float64"
       },
-      "rt_delta": {
+      "chrom_prominence_scaled": {
         "dtype": "pl.Float64"
       },
-      "rt_end": {
+      "chrom_height_scaled": {
         "dtype": "pl.Float64"
       },
-      "rt_original": {
-        "dtype": "pl.Float64"
+      "ms2_scans": {
+        "dtype": "pl.Object"
       },
-      "rt_start": {
-        "dtype": "pl.Float64"
+      "ms2_specs": {
+        "dtype": "pl.Object"
       }
     }
   },
@@ -100,62 +100,80 @@
   },
   "scans_df": {
     "columns": {
-      "bl": {
-        "dtype": "pl.Float64"
-      },
-      "comment": {
-        "dtype": "pl.Utf8"
+      "scan_uid": {
+        "dtype": "pl.Int64"
       },
       "cycle": {
         "dtype": "pl.Int64"
       },
-      "energy": {
-        "dtype": "pl.Float64"
-      },
-      "feature_uid": {
+      "ms_level": {
         "dtype": "pl.Int64"
       },
-      "id": {
-        "dtype": "pl.Utf8"
+      "rt": {
+        "dtype": "pl.Float64"
       },
-      "inty_max": {
+      "inty_tot": {
         "dtype": "pl.Float64"
       },
       "inty_min": {
         "dtype": "pl.Float64"
       },
-      "inty_tot": {
+      "inty_max": {
         "dtype": "pl.Float64"
       },
-      "ms_level": {
-        "dtype": "pl.Int64"
-      },
-      "mz_max": {
+      "bl": {
         "dtype": "pl.Float64"
       },
       "mz_min": {
         "dtype": "pl.Float64"
       },
+      "mz_max": {
+        "dtype": "pl.Float64"
+      },
+      "comment": {
+        "dtype": "pl.Utf8"
+      },
       "name": {
         "dtype": "pl.Utf8"
       },
-      "prec_inty": {
-        "dtype": "pl.Float64"
+      "id": {
+        "dtype": "pl.Utf8"
       },
       "prec_mz": {
         "dtype": "pl.Float64"
       },
+      "prec_mz_min": {
+        "dtype": "pl.Float64"
+      },
       "prec_mz_max": {
         "dtype": "pl.Float64"
       },
-      "prec_mz_min": {
+      "prec_inty": {
         "dtype": "pl.Float64"
       },
-      "rt": {
+      "energy": {
         "dtype": "pl.Float64"
       },
-      "scan_uid": {
+      "feature_uid": {
         "dtype": "pl.Int64"
+      },
+      "ms2_n": {
+        "dtype": "pl.Int64"
+      },
+      "time_cycle": {
+        "dtype": "pl.Float64"
+      },
+      "time_ms1_to_ms1": {
+        "dtype": "pl.Float64"
+      },
+      "time_ms1_to_ms2": {
+        "dtype": "pl.Float64"
+      },
+      "time_ms2_to_ms2": {
+        "dtype": "pl.Float64"
+      },
+      "time_ms2_to_ms1": {
+        "dtype": "pl.Float64"
       }
     }
   },

masster 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

Potentially problematic release.

masster 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl