PyPI - masster - Versions diffs - 0.5.19__py3-none-any.whl → 0.5.20__py3-none-any.whl - Mend

masster 0.5.19py3-none-any.whl → 0.5.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (13) hide show

masster/lib/lib.py CHANGED Viewed

@@ -46,6 +46,7 @@ annotations = lib.annotate_features(sample.features_df)
 """
 import os
+import json
 from typing import Optional, Union, List, Dict, Any, TYPE_CHECKING
 import warnings
@@ -685,6 +686,142 @@ class Lib:
             if skipped_compounds > 0:
                 print(f"All {total_compounds} compounds were skipped due to invalid formulas")
+    def import_json(self,
+                   jsonfile: str,
+                   polarity: Optional[str] = None,
+                   adducts: Optional[List[str]] = None,
+                   min_probability: float = 0.03) -> None:
+        """
+        Import compound library from a JSON file created by csv_to_json.py.
+        This method reads a JSON file with the structure created by csv_to_json.py
+        and generates adduct variants for each compound.
+        Args:
+            jsonfile: Path to the JSON file
+            polarity: Ionization polarity ("positive", "negative", or None for positive)
+            adducts: Specific adducts to generate. If None, generates defaults for the polarity
+            min_probability: Minimum probability threshold for adduct filtering
+        Expected JSON structure:
+            {
+                "version": "1.0",
+                "creation_date": "2025-10-07T09:17:06.142290",
+                "description": "Converted from CSV file...",
+                "source_file": "filename.csv",
+                "record_count": 123,
+                "data": [
+                    {
+                        "name": "compound name",
+                        "smiles": "SMILES string",
+                        "inchikey": "InChI key",
+                        "formula": "molecular formula",
+                        "db_id": "database ID",
+                        "db": "database name"
+                    },
+                    ...
+                ]
+            }
+        Raises:
+            FileNotFoundError: If JSON file doesn't exist
+            ValueError: If JSON structure is invalid or required data is missing
+        """
+        if not os.path.exists(jsonfile):
+            raise FileNotFoundError(f"JSON file not found: {jsonfile}")
+        # Read and parse JSON file
+        try:
+            with open(jsonfile, 'r', encoding='utf-8') as f:
+                json_data = json.load(f)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON file: {e}") from e
+        except Exception as e:
+            raise ValueError(f"Error reading JSON file: {e}") from e
+        # Validate JSON structure
+        if not isinstance(json_data, dict):
+            raise ValueError("JSON file must contain a dictionary at root level")
+        if "data" not in json_data:
+            raise ValueError("JSON file must contain a 'data' field with compound records")
+        data = json_data["data"]
+        if not isinstance(data, list):
+            raise ValueError("'data' field must be a list of compound records")
+        # Extract metadata for reporting
+        version = json_data.get("version", "unknown")
+        source_file = json_data.get("source_file", "unknown")
+        record_count = json_data.get("record_count", len(data))
+        print(f"Loading JSON library: version {version}, source: {source_file}, records: {record_count}")
+        # Process each compound
+        all_variants = []
+        cmpd_id_counter = 1
+        lib_id_counter = 1
+        total_compounds = 0
+        skipped_compounds = 0
+        for compound_record in data:
+            total_compounds += 1
+            # Validate required fields
+            if not isinstance(compound_record, dict):
+                skipped_compounds += 1
+                continue
+            formula = compound_record.get("formula", "")
+            if not formula or not isinstance(formula, str):
+                skipped_compounds += 1
+                continue
+            # Extract compound data, handling both CSV column names and JSON field names
+            compound_level_uid = cmpd_id_counter
+            cmpd_id_counter += 1
+            compound_data = {
+                "name": compound_record.get("name", compound_record.get("Name", "")),
+                "shortname": compound_record.get("shortname", ""),
+                "class": compound_record.get("class", ""),
+                "smiles": compound_record.get("smiles", compound_record.get("SMILES", "")),
+                "inchi": compound_record.get("inchi", compound_record.get("InChI", "")),
+                "inchikey": compound_record.get("inchikey", compound_record.get("InChIKey", "")),
+                "formula": formula,
+                "rt": self._safe_float_conversion(compound_record.get("rt", compound_record.get("RT", None))),
+                "db_id": compound_record.get("db_id", compound_record.get("database_id", None)),
+                "db": compound_record.get("db", compound_record.get("database", None)),
+                "cmpd_uid": compound_level_uid,
+            }
+            # Generate adduct variants
+            variants, lib_id_counter = self._generate_adduct_variants(
+                compound_data, adducts=adducts, polarity=polarity,
+                lib_id_counter=lib_id_counter, min_probability=min_probability
+            )
+            all_variants.extend(variants)
+            # Track if compound was skipped due to invalid formula
+            if len(variants) == 0:
+                skipped_compounds += 1
+        # Convert to DataFrame and store
+        if all_variants:
+            new_lib_df = pl.DataFrame(all_variants)
+            # Combine with existing data if any
+            if self.lib_df is not None and len(self.lib_df) > 0:
+                self.lib_df = pl.concat([self.lib_df, new_lib_df])
+            else:
+                self.lib_df = new_lib_df
+            print(f"Imported {len(all_variants)} library entries from {jsonfile}")
+        else:
+            print(f"No valid compounds found in {jsonfile}")
+            if skipped_compounds > 0:
+                print(f"All {total_compounds} compounds were skipped due to invalid formulas")
     def _map_csv_columns(self, columns: List[str]) -> Dict[str, str]:
         """
         Map CSV column names to standardized internal names (case-insensitive).

masster/study/id.py CHANGED Viewed

@@ -21,10 +21,10 @@ def lib_load(
     Args:
         study: Study instance
-        lib_source: either a CSV file path (str) or a Lib instance
-        polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV path.
+        lib_source: either a CSV/JSON file path (str) or a Lib instance
+        polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV/JSON path.
                  If None, uses study.polarity automatically.
-        adducts: specific adducts to generate - used when lib_source is a CSV path
+        adducts: specific adducts to generate - used when lib_source is a CSV/JSON path
         iso: isotope generation mode ("13C" to generate 13C isotopes, None for no isotopes)
     Side effects:
@@ -38,7 +38,7 @@ def lib_load(
         Lib = None
     if lib_source is None:
-        raise ValueError("lib_source must be a CSV file path (str) or a Lib instance")
+        raise ValueError("lib_source must be a CSV/JSON file path (str) or a Lib instance")
     # Use study polarity if not explicitly provided
     if polarity is None:
@@ -52,15 +52,23 @@ def lib_load(
             polarity = "positive"  # Default fallback
         study.logger.debug(f"Using study polarity: {polarity}")
-    # Handle string input (CSV file path)
+    # Handle string input (CSV or JSON file path)
     if isinstance(lib_source, str):
         if Lib is None:
             raise ImportError(
-                "Could not import masster.lib.lib.Lib - required for CSV loading",
+                "Could not import masster.lib.lib.Lib - required for CSV/JSON loading",
             )
         lib_obj = Lib()
-        lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
+        # Determine file type by extension
+        if lib_source.lower().endswith('.json'):
+            lib_obj.import_json(lib_source, polarity=polarity, adducts=adducts)
+        elif lib_source.lower().endswith('.csv'):
+            lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
+        else:
+            # Default to CSV behavior for backward compatibility
+            lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
     # Handle Lib instance
     elif Lib is not None and isinstance(lib_source, Lib):
@@ -72,7 +80,7 @@ def lib_load(
     else:
         raise TypeError(
-            "lib_source must be a CSV file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
+            "lib_source must be a CSV/JSON file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
         )
     # Ensure lib_df is populated
@@ -101,7 +109,7 @@ def lib_load(
     # Store pointer and DataFrame on study
     study._lib = lib_obj
-    # Add source_id column with filename (without path) if loading from CSV
+    # Add source_id column with filename (without path) if loading from CSV/JSON
     if isinstance(lib_source, str):
         import os
         filename_only = os.path.basename(lib_source)

{masster-0.5.19.dist-info → masster-0.5.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: masster
-Version: 0.5.19
+Version: 0.5.20
 Summary: Mass spectrometry data analysis package
 Project-URL: homepage, https://github.com/zamboni-lab/masster
 Project-URL: repository, https://github.com/zamboni-lab/masster

{masster-0.5.19.dist-info → masster-0.5.20.dist-info}/RECORD RENAMED Viewed

@@ -3,21 +3,15 @@ masster/_version.py,sha256=uCkE1NJ7J1aQrPg6o1mVRwVi9N10aB8nbGRTr0cwkNY,257
 masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
 masster/logger.py,sha256=XT2gUcUIct8LWzTp9n484g5MaB89toT76CGA41oBvfA,18375
 masster/spectrum.py,sha256=TWIgDcl0lveG40cLVZTWGp8-FxMolu-P8EjZyRBtXL4,49850
-masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5,sha256=LdJMF8uLoDm9ixZNHBoOzBH6hX7NGY7vTvqa2Pzetb8,6539174
-masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5,sha256=hWUfslGoOTiQw59jENSBXP4sa6DdkbOi40FJ68ep61Q,6956773
-masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5,sha256=dSd2cIgYYdRcNSzkhqlZCeWKi3x8Hhhcx8BFMuiVG4c,11382948
-masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5,sha256=wER8CHSBz54Yx1kwmU7ghPPWVwYvxv_lXGB8-8a1xpQ,9508434
-masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5,sha256=h2OOAWWTwKXzTNewhiYeL-cMYdp_JYLPya8Q9Nv9Lvw,12389587
 masster/data/libs/aa.csv,sha256=Sja1DyMsiaM2NfLcct4kAAcXYwPCukJJW8sDkup9w_c,1924
 masster/data/libs/ccm.csv,sha256=Q6nylV1152uTpX-ydqWeGrc6L9kgv45xN_fBZ4f7Tvo,12754
-masster/data/libs/hilic.csv,sha256=Ao2IN9t7GiFWEBJg21TmNJZjTbyHC3e0dJcfftAKsM4,671265
 masster/data/libs/urine.csv,sha256=iRrR4N8Wzb8KDhHJA4LqoQC35pp93FSaOKvXPrgFHis,653736
 masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data,sha256=01vC6m__Qqm2rLvlTMZoeKIKowFvovBTUnrNl8Uav3E,24576
 masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff,sha256=go5N9gAM1rn4PZAVaoCmdteY9f7YGEM9gyPdSmkQ8PE,1447936
 masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan,sha256=ahi1Y3UhAj9Bj4Q2MlbgPekNdkJvMOoMXVOoR6CeIxc,13881220
 masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2,sha256=TFB0HW4Agkig6yht7FtgjUdbXax8jjKaHpSZSvuU5vs,3252224
 masster/lib/__init__.py,sha256=TcePNx3SYZHz6763TL9Sg4gUNXaRWjlrOtyS6vsu-hg,178
-masster/lib/lib.py,sha256=cDV4bL2Ax0WWZReiTYze0W6XVMTJRC9DMlVYd6piRIs,38934
+masster/lib/lib.py,sha256=SGWuiCTHc65khmLndC2cFBCO1rk8-SS6BkG4C_nOf-o,44984
 masster/sample/__init__.py,sha256=HL0m1ept0PMAYUCQtDDnkdOS12IFl6oLAq4TZQz83uY,170
 masster/sample/adducts.py,sha256=SU6S3pyyLQUSg6yPcdj3p4MfwNDsp0_FYwYYb7F4li0,33798
 masster/sample/h5.py,sha256=0FE6eH9n8RaO59HjKnFo0kKmr8L44UOQIBqcpD3LW0s,117749
@@ -44,7 +38,7 @@ masster/study/analysis.py,sha256=L-wXBnGZCLB5UUDrjIdOiMG9zdej3Tw_SftcEmmTukM,842
 masster/study/export.py,sha256=c1HJdLAM6Ply0n8f0DjMk4mXd9lOYePr60UJTBksUho,60092
 masster/study/h5.py,sha256=bznE9kKEfLNo0QtbyC6a6snfnR3Zjkx5BcjBNbRVlJ8,99579
 masster/study/helpers.py,sha256=FdvQV-CgQyBhXMqk6_92aKSBsZhJBK4joLxEdKzmuhw,192011
-masster/study/id.py,sha256=GtOyu5vuK8utydpZn-zNjTbHZsEabqAUGrCwVbauzWA,90844
+masster/study/id.py,sha256=H6LhD0fbuxM4i0JIhHvWKLMa86fpXyEAHzbCCW5ffBA,91288
 masster/study/importers.py,sha256=1Oco0yMid_siMMZdK7rQlhS20XikgjBBNAirbTHx5O8,13958
 masster/study/load.py,sha256=EsKpxUuduu-w1TREfHTYxRdEncWTd15h2IwoB3D_nuc,72070
 masster/study/merge.py,sha256=SwrsbcoI6hIuepvTJEFNoXncwMx1XXr6FVOvkSyfIbs,169239
@@ -69,8 +63,8 @@ masster/wizard/README.md,sha256=RX3uxT1qD5i9iDSznZUbnukixonqr96JlUE4TwssAgY,1411
 masster/wizard/__init__.py,sha256=L9G_datyGSFJjrBVklEVpZVLGXzUhDiWobtiygBH8vQ,669
 masster/wizard/example.py,sha256=xEZFTH9UZ8HKOm6s3JL8Js0Uw5ChnISWBHSZCL32vsM,7983
 masster/wizard/wizard.py,sha256=ckOz-8zrk8i7tDjqkk-shYFA2Ni9fV4nejocEjTX78M,65265
-masster-0.5.19.dist-info/METADATA,sha256=bpOG_-B3UKjR1JtWcnze5-g4uNyob2mXeNbHYVWf61M,45153
-masster-0.5.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-masster-0.5.19.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
-masster-0.5.19.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
-masster-0.5.19.dist-info/RECORD,,
+masster-0.5.20.dist-info/METADATA,sha256=FJGXFasiyqxkR1R34mXnMANYqQ_ArHLktC3DqLRvg1I,45153
+masster-0.5.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+masster-0.5.20.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
+masster-0.5.20.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
+masster-0.5.20.dist-info/RECORD,,

masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 DELETED Viewed

Binary file

masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 DELETED Viewed

Binary file

masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 DELETED Viewed

Binary file

masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 DELETED Viewed

Binary file

masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 DELETED Viewed

Binary file

masster 0.5.19__py3-none-any.whl → 0.5.20__py3-none-any.whl

Potentially problematic release.

masster 0.5.19py3-none-any.whl → 0.5.20py3-none-any.whl