PyPI - masster - Versions diffs - 0.5.19__tar.gz → 0.5.20__tar.gz - Mend

masster 0.5.19tar.gz → 0.5.20tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (98) hide show

{masster-0.5.19 → masster-0.5.20}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: masster
-Version: 0.5.19
+Version: 0.5.20
 Summary: Mass spectrometry data analysis package
 Project-URL: homepage, https://github.com/zamboni-lab/masster
 Project-URL: repository, https://github.com/zamboni-lab/masster

{masster-0.5.19 → masster-0.5.20}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "masster"
-version = "0.5.19"
+version = "0.5.20"
 description = "Mass spectrometry data analysis package"
 authors = [
     { name = "Zamboni Lab" }

{masster-0.5.19 → masster-0.5.20}/src/masster/lib/lib.py RENAMED Viewed

@@ -46,6 +46,7 @@ annotations = lib.annotate_features(sample.features_df)
 """
 import os
+import json
 from typing import Optional, Union, List, Dict, Any, TYPE_CHECKING
 import warnings
@@ -685,6 +686,142 @@ class Lib:
             if skipped_compounds > 0:
                 print(f"All {total_compounds} compounds were skipped due to invalid formulas")
+    def import_json(self,
+                   jsonfile: str,
+                   polarity: Optional[str] = None,
+                   adducts: Optional[List[str]] = None,
+                   min_probability: float = 0.03) -> None:
+        """
+        Import compound library from a JSON file created by csv_to_json.py.
+        This method reads a JSON file with the structure created by csv_to_json.py
+        and generates adduct variants for each compound.
+        Args:
+            jsonfile: Path to the JSON file
+            polarity: Ionization polarity ("positive", "negative", or None for positive)
+            adducts: Specific adducts to generate. If None, generates defaults for the polarity
+            min_probability: Minimum probability threshold for adduct filtering
+        Expected JSON structure:
+            {
+                "version": "1.0",
+                "creation_date": "2025-10-07T09:17:06.142290",
+                "description": "Converted from CSV file...",
+                "source_file": "filename.csv",
+                "record_count": 123,
+                "data": [
+                    {
+                        "name": "compound name",
+                        "smiles": "SMILES string",
+                        "inchikey": "InChI key",
+                        "formula": "molecular formula",
+                        "db_id": "database ID",
+                        "db": "database name"
+                    },
+                    ...
+                ]
+            }
+        Raises:
+            FileNotFoundError: If JSON file doesn't exist
+            ValueError: If JSON structure is invalid or required data is missing
+        """
+        if not os.path.exists(jsonfile):
+            raise FileNotFoundError(f"JSON file not found: {jsonfile}")
+        # Read and parse JSON file
+        try:
+            with open(jsonfile, 'r', encoding='utf-8') as f:
+                json_data = json.load(f)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON file: {e}") from e
+        except Exception as e:
+            raise ValueError(f"Error reading JSON file: {e}") from e
+        # Validate JSON structure
+        if not isinstance(json_data, dict):
+            raise ValueError("JSON file must contain a dictionary at root level")
+        if "data" not in json_data:
+            raise ValueError("JSON file must contain a 'data' field with compound records")
+        data = json_data["data"]
+        if not isinstance(data, list):
+            raise ValueError("'data' field must be a list of compound records")
+        # Extract metadata for reporting
+        version = json_data.get("version", "unknown")
+        source_file = json_data.get("source_file", "unknown")
+        record_count = json_data.get("record_count", len(data))
+        print(f"Loading JSON library: version {version}, source: {source_file}, records: {record_count}")
+        # Process each compound
+        all_variants = []
+        cmpd_id_counter = 1
+        lib_id_counter = 1
+        total_compounds = 0
+        skipped_compounds = 0
+        for compound_record in data:
+            total_compounds += 1
+            # Validate required fields
+            if not isinstance(compound_record, dict):
+                skipped_compounds += 1
+                continue
+            formula = compound_record.get("formula", "")
+            if not formula or not isinstance(formula, str):
+                skipped_compounds += 1
+                continue
+            # Extract compound data, handling both CSV column names and JSON field names
+            compound_level_uid = cmpd_id_counter
+            cmpd_id_counter += 1
+            compound_data = {
+                "name": compound_record.get("name", compound_record.get("Name", "")),
+                "shortname": compound_record.get("shortname", ""),
+                "class": compound_record.get("class", ""),
+                "smiles": compound_record.get("smiles", compound_record.get("SMILES", "")),
+                "inchi": compound_record.get("inchi", compound_record.get("InChI", "")),
+                "inchikey": compound_record.get("inchikey", compound_record.get("InChIKey", "")),
+                "formula": formula,
+                "rt": self._safe_float_conversion(compound_record.get("rt", compound_record.get("RT", None))),
+                "db_id": compound_record.get("db_id", compound_record.get("database_id", None)),
+                "db": compound_record.get("db", compound_record.get("database", None)),
+                "cmpd_uid": compound_level_uid,
+            }
+            # Generate adduct variants
+            variants, lib_id_counter = self._generate_adduct_variants(
+                compound_data, adducts=adducts, polarity=polarity,
+                lib_id_counter=lib_id_counter, min_probability=min_probability
+            )
+            all_variants.extend(variants)
+            # Track if compound was skipped due to invalid formula
+            if len(variants) == 0:
+                skipped_compounds += 1
+        # Convert to DataFrame and store
+        if all_variants:
+            new_lib_df = pl.DataFrame(all_variants)
+            # Combine with existing data if any
+            if self.lib_df is not None and len(self.lib_df) > 0:
+                self.lib_df = pl.concat([self.lib_df, new_lib_df])
+            else:
+                self.lib_df = new_lib_df
+            print(f"Imported {len(all_variants)} library entries from {jsonfile}")
+        else:
+            print(f"No valid compounds found in {jsonfile}")
+            if skipped_compounds > 0:
+                print(f"All {total_compounds} compounds were skipped due to invalid formulas")
     def _map_csv_columns(self, columns: List[str]) -> Dict[str, str]:
         """
         Map CSV column names to standardized internal names (case-insensitive).

{masster-0.5.19 → masster-0.5.20}/src/masster/study/id.py RENAMED Viewed

@@ -21,10 +21,10 @@ def lib_load(
     Args:
         study: Study instance
-        lib_source: either a CSV file path (str) or a Lib instance
-        polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV path.
+        lib_source: either a CSV/JSON file path (str) or a Lib instance
+        polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV/JSON path.
                  If None, uses study.polarity automatically.
-        adducts: specific adducts to generate - used when lib_source is a CSV path
+        adducts: specific adducts to generate - used when lib_source is a CSV/JSON path
         iso: isotope generation mode ("13C" to generate 13C isotopes, None for no isotopes)
     Side effects:
@@ -38,7 +38,7 @@ def lib_load(
         Lib = None
     if lib_source is None:
-        raise ValueError("lib_source must be a CSV file path (str) or a Lib instance")
+        raise ValueError("lib_source must be a CSV/JSON file path (str) or a Lib instance")
     # Use study polarity if not explicitly provided
     if polarity is None:
@@ -52,15 +52,23 @@ def lib_load(
             polarity = "positive"  # Default fallback
         study.logger.debug(f"Using study polarity: {polarity}")
-    # Handle string input (CSV file path)
+    # Handle string input (CSV or JSON file path)
     if isinstance(lib_source, str):
         if Lib is None:
             raise ImportError(
-                "Could not import masster.lib.lib.Lib - required for CSV loading",
+                "Could not import masster.lib.lib.Lib - required for CSV/JSON loading",
             )
         lib_obj = Lib()
-        lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
+        # Determine file type by extension
+        if lib_source.lower().endswith('.json'):
+            lib_obj.import_json(lib_source, polarity=polarity, adducts=adducts)
+        elif lib_source.lower().endswith('.csv'):
+            lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
+        else:
+            # Default to CSV behavior for backward compatibility
+            lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
     # Handle Lib instance
     elif Lib is not None and isinstance(lib_source, Lib):
@@ -72,7 +80,7 @@ def lib_load(
     else:
         raise TypeError(
-            "lib_source must be a CSV file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
+            "lib_source must be a CSV/JSON file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
         )
     # Ensure lib_df is populated
@@ -101,7 +109,7 @@ def lib_load(
     # Store pointer and DataFrame on study
     study._lib = lib_obj
-    # Add source_id column with filename (without path) if loading from CSV
+    # Add source_id column with filename (without path) if loading from CSV/JSON
     if isinstance(lib_source, str):
         import os
         filename_only = os.path.basename(lib_source)

masster 0.5.19__tar.gz → 0.5.20__tar.gz

Potentially problematic release.

masster 0.5.19tar.gz → 0.5.20tar.gz