PyPI - masster - Versions diffs - 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl - Mend

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (37) hide show

masster/_version.py +1 -1
masster/logger.py +35 -19
masster/sample/adducts.py +15 -29
masster/sample/defaults/find_adducts_def.py +1 -3
masster/sample/defaults/sample_def.py +4 -4
masster/sample/h5.py +203 -361
masster/sample/helpers.py +14 -30
masster/sample/lib.py +3 -3
masster/sample/load.py +21 -29
masster/sample/plot.py +222 -132
masster/sample/processing.py +42 -55
masster/sample/sample.py +37 -46
masster/sample/save.py +37 -61
masster/sample/sciex.py +13 -11
masster/sample/thermo.py +69 -74
masster/spectrum.py +15 -15
masster/study/analysis.py +650 -586
masster/study/defaults/identify_def.py +1 -3
masster/study/defaults/merge_def.py +6 -7
masster/study/defaults/study_def.py +1 -5
masster/study/export.py +35 -96
masster/study/h5.py +134 -211
masster/study/helpers.py +385 -459
masster/study/id.py +239 -290
masster/study/importers.py +84 -93
masster/study/load.py +159 -178
masster/study/merge.py +1112 -1098
masster/study/plot.py +195 -149
masster/study/processing.py +144 -191
masster/study/save.py +14 -13
masster/study/study.py +89 -130
masster/wizard/wizard.py +764 -714
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/METADATA +27 -1
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/RECORD +37 -37
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/WHEEL +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/entry_points.txt +0 -0
{masster-0.5.22.dist-info → masster-0.5.24.dist-info}/licenses/LICENSE +0 -0

masster/study/defaults/identify_def.py CHANGED Viewed

@@ -158,9 +158,7 @@ class identify_defaults:
             if not isinstance(value, list):
                 return False
             # For heteroatoms, ensure all elements are strings
-            if param_name == "heteroatoms" and not all(
-                isinstance(item, str) for item in value
-            ):
+            if param_name == "heteroatoms" and not all(isinstance(item, str) for item in value):
                 return False
         # Range validation for numeric types

masster/study/defaults/merge_def.py CHANGED Viewed

@@ -36,10 +36,10 @@ class merge_defaults:
     max_nr_conflicts: int = 0
     link_ms2: bool = True
     extract_ms1: bool = True
     # Cross-chunk merging parameters
     dechunking: str = "hierarchical"
     # Parallel processing parameters
     threads: Optional[int] = None
@@ -49,8 +49,7 @@ class merge_defaults:
                 "dtype": str,
                 "description": "Merge method (algorithm) to use",
                 "default": "kd",
-                "allowed_values": ["kd", "qt",
-                                 "kd_chunked", "kd-chunked", "qt_chunked", "qt-chunked"],
+                "allowed_values": ["kd", "qt", "kd_chunked", "kd-chunked", "qt_chunked", "qt-chunked"],
             },
             "min_samples": {
                 "dtype": int,
@@ -198,14 +197,14 @@ class merge_defaults:
                 elif dtype is str and isinstance(value, str):
                     valid_type = True
                     break
             if not valid_type:
                 return False
             # For None values, skip further validation
             if value is None:
                 return True
             # Use the first non-None type for range validation
             expected_dtype = next((dt for dt in expected_dtype if dt is not type(None)), expected_dtype[0])

masster/study/defaults/study_def.py CHANGED Viewed

@@ -345,11 +345,7 @@ class study_defaults:
             expected_dtype = self._param_metadata[param_name]["dtype"]
             # Handle optional types
-            if (
-                isinstance(expected_dtype, str)
-                and expected_dtype.startswith("Optional")
-                and value is not None
-            ):
+            if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
                 if "int" in expected_dtype and not isinstance(value, int):
                     try:
                         value = int(value)

masster/study/export.py CHANGED Viewed

@@ -78,7 +78,7 @@ def _get_mgf_df(self, **kwargs):
     if self.consensus_df is None:
         self.logger.error("No consensus map found. Please run merge() first.")
         return None
     # MS2 data is optional - we can generate MS1 data without it
     ms2_available = self.consensus_ms2 is not None and not self.consensus_ms2.is_empty()
     if not ms2_available:
@@ -112,11 +112,7 @@ def _get_mgf_df(self, **kwargs):
             mask = mask & (spec.inty >= inty_min)
         for attr in spec.__dict__:
             arr = getattr(spec, attr)
-            if (
-                isinstance(arr, list | np.ndarray)
-                and hasattr(arr, "__len__")
-                and len(arr) == length
-            ):
+            if isinstance(arr, list | np.ndarray) and hasattr(arr, "__len__") and len(arr) == length:
                 setattr(spec, attr, np.array(arr)[mask])
         return spec
@@ -132,12 +128,8 @@ def _get_mgf_df(self, **kwargs):
             return None
         # Prepare spectrum data
-        spectrum_mz = (
-            spect.mz.tolist() if hasattr(spect.mz, "tolist") else list(spect.mz)
-        )
-        spectrum_inty = (
-            spect.inty.tolist() if hasattr(spect.inty, "tolist") else list(spect.inty)
-        )
+        spectrum_mz = spect.mz.tolist() if hasattr(spect.mz, "tolist") else list(spect.mz)
+        spectrum_inty = spect.inty.tolist() if hasattr(spect.inty, "tolist") else list(spect.inty)
         # Determine MS level
         ms_level = spect.ms_level if spect.ms_level is not None else 1
@@ -181,7 +173,7 @@ def _get_mgf_df(self, **kwargs):
         consensus_mz = row["mz"]
         consensus_rt = row["rt"]
         consensus_inty_mean = row.get("inty_mean", 0)
         if mz_start is not None and consensus_mz < mz_start:
             continue
         if mz_end is not None and consensus_mz > mz_end:
@@ -190,10 +182,10 @@ def _get_mgf_df(self, **kwargs):
             continue
         if rt_end is not None and consensus_rt > rt_end:
             continue
         # Create MS1 spectrum using isotope data
         iso_data = row.get("iso", None)
         if iso_data is not None and len(iso_data) > 0:
             # Use isotope data for spectrum
             spectrum_mz = [float(peak[0]) for peak in iso_data]
@@ -202,10 +194,12 @@ def _get_mgf_df(self, **kwargs):
             # Use consensus mz and inty_mean as single peak
             spectrum_mz = [float(consensus_mz)]
             spectrum_inty = [float(consensus_inty_mean)]
         # Apply intensity minimum filter if specified
         if inty_min is not None and inty_min > 0:
-            filtered_pairs = [(mz, inty) for mz, inty in zip(spectrum_mz, spectrum_inty, strict=False) if inty >= inty_min]
+            filtered_pairs = [
+                (mz, inty) for mz, inty in zip(spectrum_mz, spectrum_inty, strict=False) if inty >= inty_min
+            ]
             if filtered_pairs:
                 spectrum_mz, spectrum_inty = zip(*filtered_pairs, strict=False)
                 spectrum_mz = list(spectrum_mz)
@@ -213,9 +207,9 @@ def _get_mgf_df(self, **kwargs):
             else:
                 # If all peaks are below threshold, skip this feature
                 continue
         mgf_counter += 1
         # Create MS1 spectrum object to use with create_ion_dict
         class SimpleSpectrum:
             def __init__(self, mz_list, inty_list):
@@ -223,9 +217,9 @@ def _get_mgf_df(self, **kwargs):
                 self.inty = np.array(inty_list)
                 self.ms_level = 1
                 self.energy = None
         ms1_spectrum = SimpleSpectrum(spectrum_mz, spectrum_inty)
         # Use create_ion_dict to ensure consistent schema
         ion_dict = create_ion_dict(
             f"uid:{consensus_uid}, rt:{consensus_rt:.2f}, mz:{consensus_mz:.4f}, MS1",
@@ -237,7 +231,7 @@ def _get_mgf_df(self, **kwargs):
             ms1_spectrum,
             mgf_counter,
         )
         if ion_dict is not None:
             ion_data.append(ion_dict)
@@ -350,11 +344,7 @@ def _get_mgf_df(self, **kwargs):
                 elif selection == "all":
                     if merge:
-                        specs = [
-                            row_e["spec"]
-                            for row_e in cons_ms2.iter_rows(named=True)
-                            if row_e["spec"] is not None
-                        ]
+                        specs = [row_e["spec"] for row_e in cons_ms2.iter_rows(named=True) if row_e["spec"] is not None]
                         if not specs:
                             continue
                         spect = combine_peaks(specs)
@@ -443,6 +433,7 @@ def export_mgf(self, **kwargs):
     """
     # Get mgf data as DataFrame
     from masster.study.export import _get_mgf_df
     mgf_data = _get_mgf_df(self, **kwargs)
     if mgf_data is None or len(mgf_data) == 0:
@@ -543,11 +534,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
                 .sort("consensus_uid")
             )
             # Keep raw id_data for backward compatibility (if needed elsewhere)
-            id_data = (
-                self.id_df
-                if hasattr(self, "id_df") and self.id_df is not None
-                else None
-            )
+            id_data = self.id_df if hasattr(self, "id_df") and self.id_df is not None else None
         else:
             self.logger.info("No identification data available for mzTab export")
     except Exception as e:
@@ -561,6 +548,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
     mgf_mapping: dict[str, list[int]] = {}
     if include_mgf:
         from masster.study.export import _get_mgf_df
         mgf_data = _get_mgf_df(self, **kwargs)
         # Create mapping from feature_uid to MGF indexes
         if mgf_data is not None and len(mgf_data) > 0:
@@ -616,12 +604,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
     mtd_lines.append("")
     # Database information - updated based on identification data
-    if (
-        full_id_data is not None
-        and hasattr(self, "lib_df")
-        and self.lib_df is not None
-        and not self.lib_df.is_empty()
-    ):
+    if full_id_data is not None and hasattr(self, "lib_df") and self.lib_df is not None and not self.lib_df.is_empty():
         mtd_lines.append('MTD\tdatabase[1]\t[, , "compound library", ]')
         mtd_lines.append("MTD\tdatabase[1]-prefix\tcmpd")
         mtd_lines.append("MTD\tdatabase[1]-version\tUnknown")
@@ -688,11 +671,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
     # round to int - handle both Polars and Pandas DataFrames
     if hasattr(abundance_matrix, "with_columns"):
         # Polars DataFrame
-        numeric_cols = [
-            col
-            for col in abundance_matrix.columns
-            if abundance_matrix[col].dtype.is_numeric()
-        ]
+        numeric_cols = [col for col in abundance_matrix.columns if abundance_matrix[col].dtype.is_numeric()]
         abundance_matrix = abundance_matrix.with_columns(
             [abundance_matrix[col].round(0) for col in numeric_cols],
         )
@@ -738,9 +717,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
         best_id_confidence_measure = "null"
         best_id_confidence_value = "null"
         reliability = "4"  # Default: unknown compound
-        theoretical_neutral_mass = (
-            "null"  # Only set when we have database identification
-        )
+        theoretical_neutral_mass = "null"  # Only set when we have database identification
         if id_info:
             # Use cmpd_uid as database identifier with prefix
@@ -817,27 +794,15 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
             # Get the first (and should be only) matching row
             abundance_row = filtered_matrix.row(0, named=True)
             # Extract values excluding the consensus_uid column
-            abundance_values = [
-                abundance_row[col]
-                for col in abundance_matrix.columns
-                if col != "consensus_uid"
-            ]
-            sml_row += [
-                safe_str(val) if val is not None else "null" for val in abundance_values
-            ]
+            abundance_values = [abundance_row[col] for col in abundance_matrix.columns if col != "consensus_uid"]
+            sml_row += [safe_str(val) if val is not None else "null" for val in abundance_values]
             # Calculate study variable statistics
             non_null_values = [val for val in abundance_values if val is not None]
             if non_null_values:
                 abundance_study_variable = sum(non_null_values) / len(non_null_values)
                 abundance_variation_study_variable = (
-                    (
-                        sum(
-                            (x - abundance_study_variable) ** 2 for x in non_null_values
-                        )
-                        / len(non_null_values)
-                    )
-                    ** 0.5
+                    (sum((x - abundance_study_variable) ** 2 for x in non_null_values) / len(non_null_values)) ** 0.5
                     if len(non_null_values) > 1
                     else 0
                 )
@@ -896,9 +861,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
                 some_ids = []
                 for i, some_row in enumerate(some_matches.iter_rows(named=True)):
                     # Create a unique SOME ID based on consensus_uid and position
-                    some_id_base = (
-                        consensus_uid * 1000
-                    )  # Ensure uniqueness across consensus features
+                    some_id_base = consensus_uid * 1000  # Ensure uniqueness across consensus features
                     some_id = some_id_base + i + 1
                     some_ids.append(str(some_id))
@@ -950,14 +913,8 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
             # Get the first (and should be only) matching row
             abundance_row = filtered_matrix.row(0, named=True)
             # Extract values excluding the consensus_uid column
-            abundance_values = [
-                abundance_row[col]
-                for col in abundance_matrix.columns
-                if col != "consensus_uid"
-            ]
-            abundance_strings = [
-                safe_str(val) if val is not None else "null" for val in abundance_values
-            ]
+            abundance_values = [abundance_row[col] for col in abundance_matrix.columns if col != "consensus_uid"]
+            abundance_strings = [safe_str(val) if val is not None else "null" for val in abundance_values]
             smf_row += abundance_strings
             # Calculate study variable statistics (same as in SML section)
@@ -965,13 +922,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
             if non_null_values:
                 abundance_study_variable = sum(non_null_values) / len(non_null_values)
                 abundance_variation_study_variable = (
-                    (
-                        sum(
-                            (x - abundance_study_variable) ** 2 for x in non_null_values
-                        )
-                        / len(non_null_values)
-                    )
-                    ** 0.5
+                    (sum((x - abundance_study_variable) ** 2 for x in non_null_values) / len(non_null_values)) ** 0.5
                     if len(non_null_values) > 1
                     else 0
                 )
@@ -1023,9 +974,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
         some_lines.append("\t".join(some_header))
         # Create SOME entries for all identification results using enriched data
-        for consensus_uid in (
-            self.consensus_df.select("consensus_uid").to_series().unique()
-        ):
+        for consensus_uid in self.consensus_df.select("consensus_uid").to_series().unique():
             # Get consensus feature data for this consensus_uid
             consensus_feature_data = self.consensus_df.filter(
                 pl.col("consensus_uid") == consensus_uid,
@@ -1081,9 +1030,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
                     # Theoretical mass-to-charge from lib_df
                     theoretical_mz = "null"
-                    if (
-                        some_row.get("mz") is not None
-                    ):  # This comes from lib_df via get_id() join
+                    if some_row.get("mz") is not None:  # This comes from lib_df via get_id() join
                         theoretical_mz = safe_str(some_row["mz"])
                     some_line = [
@@ -1148,23 +1095,15 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
             spec_len = row["spec_len"] if row["spec_len"] is not None else 0
             # Format spectrum data as pipe-separated strings
-            spec_mz_str = (
-                "|".join([f"{mz:.4f}" for mz in spectrum_mz]) if spectrum_mz else ""
-            )
-            spec_int_str = (
-                "|".join([f"{int(inty)}" for inty in spectrum_inty])
-                if spectrum_inty
-                else ""
-            )
+            spec_mz_str = "|".join([f"{mz:.4f}" for mz in spectrum_mz]) if spectrum_mz else ""
+            spec_int_str = "|".join([f"{int(inty)}" for inty in spectrum_inty]) if spectrum_inty else ""
             mgf_row = [
                 "COM",
                 "MGF",
                 str(row["mgf_index"]) if row["mgf_index"] is not None else "null",
                 str(row["feature_id"]) if row["feature_id"] is not None else "null",
-                f"{row['rtinseconds']:.2f}"
-                if row["rtinseconds"] is not None
-                else "null",
+                f"{row['rtinseconds']:.2f}" if row["rtinseconds"] is not None else "null",
                 f"{row['pepmass']:.4f}" if row["pepmass"] is not None else "null",
                 "null",  # prec_int - not available in current data
                 str(row["energy"]) if row["energy"] is not None else "null",

masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

Potentially problematic release.

masster 0.5.22py3-none-any.whl → 0.5.24py3-none-any.whl