PyPI - masster - Versions diffs - 0.5.28__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

masster 0.5.28py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (31) hide show

masster/_version.py +1 -1
masster/data/libs/aa_nort.json +240 -0
masster/data/libs/ccm_nort.json +1319 -0
masster/lib/lib.py +1 -1
masster/logger.py +0 -6
masster/sample/adducts.py +1 -1
masster/sample/defaults/find_adducts_def.py +1 -1
masster/sample/h5.py +152 -2
masster/sample/helpers.py +91 -5
masster/sample/id.py +1160 -0
masster/sample/importers.py +316 -0
masster/sample/plot.py +175 -71
masster/sample/sample.py +18 -3
masster/sample/sample5_schema.json +99 -1
masster/study/defaults/study_def.py +8 -12
masster/study/export.py +62 -62
masster/study/id.py +59 -12
masster/study/load.py +0 -11
masster/study/merge.py +153 -0
masster/study/plot.py +197 -0
masster/study/study.py +3 -1
masster/study/study5_schema.json +15 -0
masster/wizard/wizard.py +11 -12
{masster-0.5.28.dist-info → masster-0.6.1.dist-info}/METADATA +17 -18
{masster-0.5.28.dist-info → masster-0.6.1.dist-info}/RECORD +28 -27
masster/data/libs/aa.csv +0 -22
masster/data/libs/ccm.csv +0 -120
masster/data/libs/urine.csv +0 -4693
{masster-0.5.28.dist-info → masster-0.6.1.dist-info}/WHEEL +0 -0
{masster-0.5.28.dist-info → masster-0.6.1.dist-info}/entry_points.txt +0 -0
{masster-0.5.28.dist-info → masster-0.6.1.dist-info}/licenses/LICENSE +0 -0

masster/sample/sample.py CHANGED Viewed

@@ -129,6 +129,12 @@ from masster.sample.helpers import get_eic
 from masster.sample.helpers import set_source
 from masster.sample.helpers import _recreate_feature_map
 from masster.sample.helpers import _get_feature_map
+from masster.sample.id import lib_load
+from masster.sample.id import identify
+from masster.sample.id import get_id
+from masster.sample.id import id_reset
+from masster.sample.id import lib_reset
+from masster.sample.importers import import_oracle
 from masster.sample.load import chrom_extract
 from masster.sample.load import _index_file
 from masster.sample.load import load
@@ -259,9 +265,10 @@ class Sample:
         # the polars data frame with MS1 level data
         self.ms1_df = pl.DataFrame()
-        # lightweight lib data for matching, targeted analyses, etc. > superseded by study methods
-        self.lib = None
-        self.lib_match = None
+        # identification DataFrames (lib_df and id_df)
+        self.lib_df = None  # library DataFrame (from masster.lib or CSV/JSON)
+        self.id_df = None   # identification results DataFrame
+        self._lib = None    # reference to Lib object if loaded
         self.chrom_df = None
         if params.filename is not None:
@@ -292,6 +299,14 @@ class Sample:
     update_parameters = update_parameters
     get_parameters_property = get_parameters_property
     set_parameters_property = set_parameters_property
+    # Identification methods from id.py
+    lib_load = lib_load
+    identify = identify
+    get_id = get_id
+    id_reset = id_reset
+    lib_reset = lib_reset
+    # Importers from importers.py
+    import_oracle = import_oracle
     export_features = export_features
     export_xlsx = export_xlsx
     export_mgf = export_mgf

masster/sample/sample5_schema.json CHANGED Viewed

@@ -93,10 +93,108 @@
       },
       "ms1_spec": {
         "dtype": "pl.Object"
+      },
+      "id_top_name": {
+        "dtype": "pl.Utf8"
+      },
+      "id_top_class": {
+        "dtype": "pl.Utf8"
+      },
+      "id_top_adduct": {
+        "dtype": "pl.Utf8"
+      },
+      "id_top_score": {
+        "dtype": "pl.Float64"
+      },
+      "id_source": {
+        "dtype": "pl.Utf8"
+      }
+    }
+  },
+  "lib_df": {
+    "columns": {
+      "lib_uid": {
+        "dtype": "pl.Int64"
+      },
+      "cmpd_uid": {
+        "dtype": "pl.Int64"
+      },
+      "name": {
+        "dtype": "pl.Utf8"
+      },
+      "shortname": {
+        "dtype": "pl.Utf8"
+      },
+      "class": {
+        "dtype": "pl.Utf8"
+      },
+      "formula": {
+        "dtype": "pl.Utf8"
+      },
+      "iso": {
+        "dtype": "pl.Int64"
+      },
+      "smiles": {
+        "dtype": "pl.Utf8"
+      },
+      "inchi": {
+        "dtype": "pl.Utf8"
+      },
+      "inchikey": {
+        "dtype": "pl.Utf8"
+      },
+      "adduct": {
+        "dtype": "pl.Utf8"
+      },
+      "z": {
+        "dtype": "pl.Int64"
+      },
+      "m": {
+        "dtype": "pl.Float64"
+      },
+      "mz": {
+        "dtype": "pl.Float64"
+      },
+      "rt": {
+        "dtype": "pl.Float64"
+      },
+      "quant_group": {
+        "dtype": "pl.Int64"
+      },
+      "probability": {
+        "dtype": "pl.Float64"
+      },
+      "source_id": {
+        "dtype": "pl.Utf8"
+      }
+    }
+  },
+  "id_df": {
+    "columns": {
+      "feature_uid": {
+        "dtype": "pl.Int64"
+      },
+      "lib_uid": {
+        "dtype": "pl.Int64"
+      },
+      "mz_delta": {
+        "dtype": "pl.Float64"
+      },
+      "rt_delta": {
+        "dtype": "pl.Float64"
+      },
+      "matcher": {
+        "dtype": "pl.Utf8"
+      },
+      "score": {
+        "dtype": "pl.Float64"
+      },
+      "iso": {
+        "dtype": "pl.Int64"
       }
     }
   },
-  "generated_date": "2025-08-03",
+  "generated_date": "2025-10-30",
   "ms1_df": {
     "columns": {
       "cycle": {

masster/study/defaults/study_def.py CHANGED Viewed

@@ -96,19 +96,15 @@ class study_defaults:
             "adducts": {
                 "dtype": "list[str]",
                 "description": "List of adduct specifications in OpenMS format (element:charge:probability). Charged adduct probabilities must sum to 1.0.",
-                "default": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
+                "default": ["+H:1:0.65", "+Na:1:0.15", "+NH4:1:0.15", "+K:1:0.05"],
                 "examples": {
-                    "positive": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
-                    "negative": [
-                        "H-1:-:0.95",
-                        "Cl:-:0.05",
-                        "CH2O2:0:0.2",
-                        "H-2-O:0:0.2",
-                    ],
+                    "positive": ["+H:1:0.65", "+Na:1:0.15", "+NH4:1:0.15", "+K:1:0.05", "-H2O:0:0.15"],
+                    "negative": ["-H:-1:0.95", "+Cl:-1:0.05", "+CH2O2:0:0.2", "-H2O:0:0.2"],
                 },
                 "validation_rules": [
-                    "Format: element:charge:probability",
-                    "Charge must be +, -, or 0 (neutral)",
+                    "Format: formula:charge:probability (e.g., '+H:1:0.65', '-H:-1:0.95', '-H2O:0:0.15')",
+                    "Formula must start with + or - to indicate gain/loss (e.g., '+H', '-H', '+Na', '-H2O')",
+                    "Charge must be an integer (positive, negative, or 0 for neutral)",
                     "Probability must be between 0.0 and 1.0",
                     "Sum of all charged adduct probabilities must equal 1.0",
                 ],
@@ -128,7 +124,7 @@ class study_defaults:
         """Set polarity-specific defaults for adducts if not explicitly provided."""
         # If adducts is None, set based on polarity
         if self.adducts is None:
-            if self.polarity.lower() in ["positive", "pos"]:
+            if self.polarity.lower() in ["positive", "pos", "+"]:
                 self.adducts = [
                     "+H:1:0.65",
                     "+Na:1:0.15",
@@ -136,7 +132,7 @@ class study_defaults:
                     "+K:1:0.05",
                     "-H2O:0:0.15",
                 ]
-            elif self.polarity.lower() in ["negative", "neg"]:
+            elif self.polarity.lower() in ["negative", "neg", "-"]:
                 self.adducts = [
                     "-H:-1:0.9",
                     "+Cl:-1:0.1",

masster/study/export.py CHANGED Viewed

@@ -524,7 +524,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
         # Import here to avoid circular imports
         from masster.study.id import get_id
-        # Get full enriched identification data for SOME section
+        # Get full enriched identification data for SME section
         full_id_data = get_id(self)
         if full_id_data is not None and not full_id_data.is_empty():
             # Get top scoring identification for each consensus_uid for SML section
@@ -828,8 +828,8 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
     smf_header = [
         "SFH",
         "SMF_ID",
-        "SOME_ID_REFS",
-        "SOME_ID_REF_ambiguity_code",
+        "SME_ID_REFS",
+        "SME_ID_REF_ambiguity_code",
         "adduct_ion",
         "isotopomer",
         "exp_mass_to_charge",
@@ -847,40 +847,40 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
     # SMF table uses the same consensus features as SML, just different metadata
     for idx, row in enumerate(self.consensus_df.iter_rows(named=True), 1):
-        # References to SOME entries - each SMF can reference multiple SOME entries for the same consensus_uid
-        some_refs = "null"
-        some_ambiguity = "null"
+        # References to SME entries - each SMF can reference multiple SME entries for the same consensus_uid
+        SME_refs = "null"
+        SME_ambiguity = "null"
         consensus_uid = row["consensus_uid"]
         if full_id_data is not None:
-            # Find all SOME entries for this consensus_uid
-            some_matches = full_id_data.filter(pl.col("consensus_uid") == consensus_uid)
-            if some_matches.height > 0:
-                # Generate SOME IDs - we'll create a mapping in the SOME section
+            # Find all SME entries for this consensus_uid
+            SME_matches = full_id_data.filter(pl.col("consensus_uid") == consensus_uid)
+            if SME_matches.height > 0:
+                # Generate SME IDs - we'll create a mapping in the SME section
                 # For now, use a simple approach based on consensus_uid and lib_uid
-                some_ids = []
-                for i, some_row in enumerate(some_matches.iter_rows(named=True)):
-                    # Create a unique SOME ID based on consensus_uid and position
-                    some_id_base = consensus_uid * 1000  # Ensure uniqueness across consensus features
-                    some_id = some_id_base + i + 1
-                    some_ids.append(str(some_id))
-                if some_ids:
-                    some_refs = "|".join(some_ids)
+                SME_ids = []
+                for i, SME_row in enumerate(SME_matches.iter_rows(named=True)):
+                    # Create a unique SME ID based on consensus_uid and position
+                    SME_id_base = consensus_uid * 1000  # Ensure uniqueness across consensus features
+                    SME_id = SME_id_base + i + 1
+                    SME_ids.append(str(SME_id))
+                if SME_ids:
+                    SME_refs = "|".join(SME_ids)
                     # Set ambiguity code: 1=ambiguous identification, 2=multiple evidence same molecule, 3=both
-                    if len(some_ids) > 1:
+                    if len(SME_ids) > 1:
                         # Check if all identifications point to the same compound
                         unique_cmpds = {
                             match["cmpd_uid"]
-                            for match in some_matches.iter_rows(named=True)
+                            for match in SME_matches.iter_rows(named=True)
                             if match.get("cmpd_uid") is not None
                         }
                         if len(unique_cmpds) > 1:
-                            some_ambiguity = "1"  # Ambiguous identification
+                            SME_ambiguity = "1"  # Ambiguous identification
                         else:
-                            some_ambiguity = "2"  # Multiple evidence for same molecule
+                            SME_ambiguity = "2"  # Multiple evidence for same molecule
                     else:
-                        some_ambiguity = "null"
+                        SME_ambiguity = "null"
         # Format isotopomer according to mzTab-M specification
         iso_value = row.get("iso_mean", 0)
@@ -892,8 +892,8 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
         smf_row = [
             "SMF",
             str(idx),
-            some_refs,
-            some_ambiguity,
+            SME_refs,
+            SME_ambiguity,
             adduct_list[idx - 1],  # adduct_ion
             isotopomer,  # isotopomer formatted according to mzTab-M specification
             safe_str(row.get("mz", "null")),  # exp_mass_to_charge
@@ -943,16 +943,16 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
         for line in smf_lines:
             f.write(line + "\n")
-    # --- SOME (Small Molecule Evidence) table ---
+    # --- SME (Small Molecule Evidence) table ---
     if full_id_data is not None and not full_id_data.is_empty():
-        some_lines = []
+        SME_lines = []
         # Add comment about spectra_ref being dummy placeholders
-        some_lines.append(
+        SME_lines.append(
             "COM\tThe spectra_ref are dummy placeholders, as the annotation was based on aggregated data",
         )
-        some_header = [
-            "SHE",
-            "SOME_ID",
+        SME_header = [
+            "SEH",
+            "SME_ID",
             "evidence_input_id",
             "database_identifier",
             "chemical_formula",
@@ -971,9 +971,9 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
             "id_confidence_measure[1]",
             "rank",
         ]
-        some_lines.append("\t".join(some_header))
+        SME_lines.append("\t".join(SME_header))
-        # Create SOME entries for all identification results using enriched data
+        # Create SME entries for all identification results using enriched data
         for consensus_uid in self.consensus_df.select("consensus_uid").to_series().unique():
             # Get consensus feature data for this consensus_uid
             consensus_feature_data = self.consensus_df.filter(
@@ -984,16 +984,16 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
             consensus_row = consensus_feature_data.row(0, named=True)
             # Get all identification results for this consensus feature from enriched data
-            some_matches = full_id_data.filter(pl.col("consensus_uid") == consensus_uid)
+            SME_matches = full_id_data.filter(pl.col("consensus_uid") == consensus_uid)
-            if some_matches.height > 0:
+            if SME_matches.height > 0:
                 # Sort by score descending to maintain rank order
-                some_matches = some_matches.sort("score", descending=True)
+                SME_matches = SME_matches.sort("score", descending=True)
-                for i, some_row in enumerate(some_matches.iter_rows(named=True)):
-                    # Generate unique SOME_ID
-                    some_id_base = consensus_uid * 1000
-                    some_id = some_id_base + i + 1
+                for i, SME_row in enumerate(SME_matches.iter_rows(named=True)):
+                    # Generate unique SME_ID
+                    SME_id_base = consensus_uid * 1000
+                    SME_id = SME_id_base + i + 1
                     # Create evidence input ID using consensus_uid:mz:rt format
                     consensus_mz = consensus_row.get("mz", 0)
@@ -1002,15 +1002,15 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
                     # Database identifier - use db_id if available, otherwise fallback to cmpd_uid
                     db_id = "null"
-                    if some_row.get("db_id") is not None and some_row["db_id"] != "":
-                        db_id = safe_str(some_row["db_id"])
-                    elif some_row.get("cmpd_uid") is not None:
-                        db_id = f"cmpd:{some_row['cmpd_uid']}"
+                    if SME_row.get("db_id") is not None and SME_row["db_id"] != "":
+                        db_id = safe_str(SME_row["db_id"])
+                    elif SME_row.get("cmpd_uid") is not None:
+                        db_id = f"cmpd:{SME_row['cmpd_uid']}"
                     # Get adduct information
                     adduct_ion = "null"
-                    if some_row.get("adduct") is not None and some_row["adduct"] != "":
-                        adduct_ion = safe_str(some_row["adduct"])
+                    if SME_row.get("adduct") is not None and SME_row["adduct"] != "":
+                        adduct_ion = safe_str(SME_row["adduct"])
                         # Replace ? with H for better mzTab compatibility
                         adduct_ion = adduct_ion.replace("?", "H")
@@ -1019,8 +1019,8 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
                     # Identification method
                     id_method = "[MS, MS:1002888, small molecule confidence measure, ]"
-                    if some_row.get("matcher") is not None:
-                        id_method = f"[MS, MS:1002888, {some_row['matcher']}, ]"
+                    if SME_row.get("matcher") is not None:
+                        id_method = f"[MS, MS:1002888, {SME_row['matcher']}, ]"
                     # MS level - assume MS1 for now
                     ms_level = "[MS, MS:1000511, ms level, 1]"
@@ -1030,18 +1030,18 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
                     # Theoretical mass-to-charge from lib_df
                     theoretical_mz = "null"
-                    if some_row.get("mz") is not None:  # This comes from lib_df via get_id() join
-                        theoretical_mz = safe_str(some_row["mz"])
+                    if SME_row.get("mz") is not None:  # This comes from lib_df via get_id() join
+                        theoretical_mz = safe_str(SME_row["mz"])
-                    some_line = [
-                        "SOME",
-                        str(some_id),
+                    SME_line = [
+                        "SME",
+                        str(SME_id),
                         evidence_id,
                         db_id,
-                        safe_str(some_row.get("formula", "null")),
-                        safe_str(some_row.get("smiles", "null")),
-                        safe_str(some_row.get("inchi", "null")),
-                        safe_str(some_row.get("name", "null")),
+                        safe_str(SME_row.get("formula", "null")),
+                        safe_str(SME_row.get("smiles", "null")),
+                        safe_str(SME_row.get("inchi", "null")),
+                        safe_str(SME_row.get("name", "null")),
                         "null",  # uri - not available in current data
                         "null",  # derivatized_form
                         adduct_ion,
@@ -1053,15 +1053,15 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
                         spectra_ref,
                         id_method,
                         ms_level,
-                        safe_str(some_row.get("score", "null")),
+                        safe_str(SME_row.get("score", "null")),
                         str(i + 1),  # rank within this consensus feature
                     ]
-                    some_lines.append("\t".join(some_line))
+                    SME_lines.append("\t".join(SME_line))
-        # Write SOME table
+        # Write SME table
         with open(filename, "a", encoding="utf-8") as f:
             f.write("\n")
-            for line in some_lines:
+            for line in SME_lines:
                 f.write(line + "\n")
     # --- MGF table ---

masster/study/id.py CHANGED Viewed

@@ -24,7 +24,8 @@ def lib_load(
         lib_source: either a CSV/JSON file path (str) or a Lib instance
         polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV/JSON path.
                  If None, uses study.polarity automatically.
-        adducts: specific adducts to generate - used when lib_source is a CSV/JSON path
+        adducts: specific adducts to generate - used when lib_source is a CSV/JSON path.
+                 If None, uses study.parameters.adducts if available.
         iso: isotope generation mode ("13C" to generate 13C isotopes, None for no isotopes)
     Side effects:
@@ -51,6 +52,18 @@ def lib_load(
         else:
             polarity = "positive"  # Default fallback
         study.logger.debug(f"Using study polarity: {polarity}")
+    # Use study.parameters.adducts if adducts not explicitly provided
+    # If study.parameters.adducts is also None, lib will use its default adducts for the polarity
+    if adducts is None:
+        if hasattr(study, "parameters") and hasattr(study.parameters, "adducts"):
+            adducts = study.parameters.adducts
+            if adducts:
+                study.logger.debug(f"Using study.parameters.adducts: {adducts}")
+            else:
+                study.logger.debug(f"study.parameters.adducts is None, lib will use default adducts for {polarity} mode")
+        else:
+            study.logger.debug(f"study.parameters.adducts not found, lib will use default adducts for {polarity} mode")
     # Handle string input (CSV or JSON file path)
     if isinstance(lib_source, str):
@@ -403,42 +416,64 @@ def _find_matches_vectorized(lib_df, cons_mz, cons_rt, mz_tol, rt_tol, logger, c
     """
     Find library matches using optimized vectorized operations.
-    FIXED VERSION: Prevents incorrect matching of same compound to different m/z values.
+    Automatically skips RT filtering if library has no RT data for the matched entries.
     """
     # Filter by m/z tolerance using vectorized operations
     matches = lib_df.filter((pl.col("mz") >= cons_mz - mz_tol) & (pl.col("mz") <= cons_mz + mz_tol))
     initial_match_count = len(matches)
-    # Apply RT filter if available - STRICT VERSION (no fallback)
+    # Apply RT filter if requested AND if data is available
+    # Strategy: Handle mixed RT/no-RT entries properly by treating them separately
     if rt_tol is not None and cons_rt is not None and not matches.is_empty():
-        # First, check if any m/z matches have RT data
+        # Separate entries with and without RT data
         rt_candidates = matches.filter(pl.col("rt").is_not_null())
+        no_rt_entries = matches.filter(pl.col("rt").is_null())
         if not rt_candidates.is_empty():
             # Apply RT filtering to candidates with RT data
             rt_matches = rt_candidates.filter((pl.col("rt") >= cons_rt - rt_tol) & (pl.col("rt") <= cons_rt + rt_tol))
-            if not rt_matches.is_empty():
+            # Combine RT-filtered matches with entries that have no RT data
+            # Rationale: Entries without RT can't be filtered by RT, so include them
+            if not rt_matches.is_empty() and not no_rt_entries.is_empty():
+                # Both RT matches and no-RT entries exist
+                matches = pl.concat([rt_matches, no_rt_entries])
+                if logger:
+                    logger.debug(
+                        f"Consensus {cons_uid}: {initial_match_count} m/z matches, {len(rt_candidates)} with RT, "
+                        f"{len(rt_matches)} passed RT filter, {len(no_rt_entries)} with no RT → {len(matches)} total matches"
+                    )
+            elif not rt_matches.is_empty():
+                # Only RT matches, no entries without RT
                 matches = rt_matches
                 if logger:
                     logger.debug(
-                        f"Consensus {cons_uid}: {initial_match_count} m/z matches, {len(rt_candidates)} with RT, {len(matches)} after RT filter"
+                        f"Consensus {cons_uid}: {initial_match_count} m/z matches, {len(rt_candidates)} with RT, "
+                        f"{len(matches)} passed RT filter"
+                    )
+            elif not no_rt_entries.is_empty():
+                # No RT matches passed filter, but there are entries without RT
+                matches = no_rt_entries
+                if logger:
+                    logger.debug(
+                        f"Consensus {cons_uid}: {initial_match_count} m/z matches, {len(rt_candidates)} with RT but none passed RT filter, "
+                        f"using {len(matches)} entries with no RT data"
                     )
             else:
-                # NO FALLBACK - if RT filtering finds no matches, return empty
-                matches = rt_matches  # This is empty
+                # No RT matches and no entries without RT - return empty
+                matches = pl.DataFrame()
                 if logger:
                     logger.debug(
                         f"Consensus {cons_uid}: RT filtering eliminated all {len(rt_candidates)} candidates (rt_tol={rt_tol}s) - no matches returned"
                     )
         else:
-            # No RT data in library matches - return empty if strict RT filtering requested
+            # All m/z matches have no RT data - keep all m/z matches
             if logger:
                 logger.debug(
-                    f"Consensus {cons_uid}: {initial_match_count} m/z matches but none have library RT data - no matches returned due to RT filtering"
+                    f"Consensus {cons_uid}: {initial_match_count} m/z matches, all have no RT data - using m/z matches only"
                 )
-            matches = pl.DataFrame()  # Return empty DataFrame
+            # matches already contains the m/z-filtered results (which are all no_rt_entries)
     # FIX 1: Add stricter m/z validation - prioritize more accurate matches
     if not matches.is_empty():
@@ -884,6 +919,18 @@ def identify(study, features=None, params=None, **kwargs):
     effective_mz_tol = getattr(params, "mz_tol", 0.01)
     effective_rt_tol = getattr(params, "rt_tol", 2.0)
+    # Check if library has RT data - if not, disable RT filtering
+    if effective_rt_tol is not None and hasattr(study, "lib_df") and study.lib_df is not None:
+        if "rt" in study.lib_df.columns:
+            # Check if library has any non-null RT values
+            rt_count = study.lib_df.filter(pl.col("rt").is_not_null()).shape[0]
+            if rt_count == 0:
+                if logger:
+                    logger.info(
+                        f"Library has no retention time data - disabling RT filtering (was rt_tol={effective_rt_tol})"
+                    )
+                effective_rt_tol = None
     if logger:
         logger.debug(
             f"Starting identification with mz_tolerance={effective_mz_tol}, rt_tolerance={effective_rt_tol}",
@@ -1483,7 +1530,7 @@ def _get_adducts(study, adducts_list: list | None = None, **kwargs):
             if charge_min <= abs(total_charge) <= charge_max and total_charge != 0:
                 components = [spec] * multiplier
                 formatted_name = _format_adduct_name(components)
-                probability_multiplied = float(spec["probability"]) ** multiplier
+                probability_multiplied = (float(spec["probability"]) ** multiplier) / 2.0
                 combinations_list.append(
                     {

masster/study/load.py CHANGED Viewed

@@ -191,17 +191,6 @@ def load(self, filename=None):
     _load_study5(self, filename)
-    # After loading the study, check if we have consensus features before loading consensus XML
-    # if (self.consensus_df is not None and not self.consensus_df.is_empty()):
-    #    consensus_xml_path = filename.replace(".study5", ".consensusXML")
-    #    if os.path.exists(consensus_xml_path):
-    #        self._load_consensusXML(filename=consensus_xml_path)
-    # self.logger.info(f"Automatically loaded consensus from {consensus_xml_path}")
-    #    else:
-    #        self.logger.warning(f"No consensus XML file found at {consensus_xml_path}")
-    # else:
-    #    self.logger.debug("No consensus features found, skipping consensusXML loading")
     self.filename = filename

masster 0.5.28__py3-none-any.whl → 0.6.1__py3-none-any.whl

Potentially problematic release.

masster 0.5.28py3-none-any.whl → 0.6.1py3-none-any.whl