PyPI - masster - Versions diffs - 0.4.11__py3-none-any.whl → 0.4.13__py3-none-any.whl - Mend

masster 0.4.11py3-none-any.whl → 0.4.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (13) hide show

masster/_version.py +1 -1
masster/lib/lib.py +45 -3
masster/study/helpers.py +262 -310
masster/study/id.py +564 -324
masster/study/plot.py +38 -23
masster/study/processing.py +268 -178
masster/study/study.py +95 -60
masster/study/study5_schema.json +12 -0
{masster-0.4.11.dist-info → masster-0.4.13.dist-info}/METADATA +1 -1
{masster-0.4.11.dist-info → masster-0.4.13.dist-info}/RECORD +13 -13
{masster-0.4.11.dist-info → masster-0.4.13.dist-info}/WHEEL +0 -0
{masster-0.4.11.dist-info → masster-0.4.13.dist-info}/entry_points.txt +0 -0
{masster-0.4.11.dist-info → masster-0.4.13.dist-info}/licenses/LICENSE +0 -0

masster/_version.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
-__version__ = "0.4.11"
+__version__ = "0.4.13"
 def get_version():

masster/lib/lib.py CHANGED Viewed

@@ -142,11 +142,41 @@ class Lib:
         Returns:
             Accurate mass as float, or None if calculation fails
         """
+        # Skip obviously invalid formulas
+        if not formula or not isinstance(formula, str):
+            return None
+        # Clean up whitespace
+        formula = formula.strip()
+        # Skip formulas that are obviously invalid
+        invalid_patterns = [
+            # Contains parentheses with multipliers like (C12H19NO19S3)nH2O
+            lambda f: '(' in f and ')' in f and any(c.isalpha() and not c.isupper() for c in f.split(')')[1:]),
+            # Contains words instead of chemical symbols
+            lambda f: any(word in f.lower() for word in ['and', 'or', 'not', 'with', 'without']),
+            # Contains lowercase letters at the start (element symbols should be uppercase)
+            lambda f: f and f[0].islower(),
+            # Contains unusual characters that shouldn't be in formulas
+            lambda f: any(char in f for char in ['@', '#', '$', '%', '^', '&', '*', '=', '+', '?', '/', '\\', '|']),
+            # Empty or very short non-standard formulas
+            lambda f: len(f) < 2 and not f.isupper(),
+        ]
+        for pattern_check in invalid_patterns:
+            try:
+                if pattern_check(formula):
+                    warnings.warn(f"Skipping obviously invalid formula: '{formula}'")
+                    return None
+            except Exception:
+                # If pattern checking fails, continue to PyOpenMS parsing
+                pass
         try:
             empirical_formula = oms.EmpiricalFormula(formula)
             return empirical_formula.getMonoWeight()
         except Exception as e:
-            warnings.warn(f"Error calculating accurate mass for formula {formula}: {e}")
+            warnings.warn(f"Error calculating accurate mass for formula '{formula}': {e}")
             return None
     def _generate_adduct_variants(self,
@@ -272,8 +302,12 @@ class Lib:
         all_variants = []
         cmpd_id_counter = 1
         lib_id_counter = 1
+        total_compounds = 0
+        skipped_compounds = 0
         for row in df.iter_rows(named=True):
+            total_compounds += 1
             # Extract compound data
             # assign a compound-level uid so all adducts share the same cmpd_uid
             compound_level_uid = cmpd_id_counter
@@ -297,8 +331,12 @@ class Lib:
             )
             all_variants.extend(variants)
+            # Track if compound was skipped due to invalid formula
+            if len(variants) == 0:
+                skipped_compounds += 1
             # Handle RT2 column if present
-            if "rt2" in column_mapping:
+            if "rt2" in column_mapping and len(variants) > 0:  # Only if main variants were created
                 rt2_value = self._safe_float_conversion(row.get(column_mapping["rt2"], None))
                 if rt2_value is not None:
                     # Create additional variants with RT2
@@ -321,9 +359,13 @@ class Lib:
             else:
                 self.lib_df = new_lib_df
-            print(f"Successfully imported {len(all_variants)} library entries from {csvfile}")
+            #successful_compounds = total_compounds - skipped_compounds
+            print(f"Imported {len(all_variants)} library entries from {csvfile}")
+            #print(f"Processed {total_compounds} compounds: {successful_compounds} successful, {skipped_compounds} skipped due to invalid formulas")
         else:
             print(f"No valid compounds found in {csvfile}")
+            if skipped_compounds > 0:
+                print(f"All {total_compounds} compounds were skipped due to invalid formulas")
     def _map_csv_columns(self, columns: List[str]) -> Dict[str, str]:
         """

masster 0.4.11__py3-none-any.whl → 0.4.13__py3-none-any.whl

Potentially problematic release.

masster 0.4.11py3-none-any.whl → 0.4.13py3-none-any.whl