PyPI - masster - Versions diffs - 0.5.19__tar.gz → 0.5.21__tar.gz - Mend

masster 0.5.19tar.gz → 0.5.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (98) hide show

{masster-0.5.19 → masster-0.5.21}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: masster
-Version: 0.5.19
+Version: 0.5.21
 Summary: Mass spectrometry data analysis package
 Project-URL: homepage, https://github.com/zamboni-lab/masster
 Project-URL: repository, https://github.com/zamboni-lab/masster

{masster-0.5.19 → masster-0.5.21}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "masster"
-version = "0.5.19"
+version = "0.5.21"
 description = "Mass spectrometry data analysis package"
 authors = [
     { name = "Zamboni Lab" }

{masster-0.5.19 → masster-0.5.21}/src/masster/_version.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
-__version__ = "0.5.19"
+__version__ = "0.5.21"
 def get_version():

{masster-0.5.19 → masster-0.5.21}/src/masster/lib/lib.py RENAMED Viewed

@@ -46,6 +46,7 @@ annotations = lib.annotate_features(sample.features_df)
 """
 import os
+import json
 from typing import Optional, Union, List, Dict, Any, TYPE_CHECKING
 import warnings
@@ -685,6 +686,142 @@ class Lib:
             if skipped_compounds > 0:
                 print(f"All {total_compounds} compounds were skipped due to invalid formulas")
+    def import_json(self,
+                   jsonfile: str,
+                   polarity: Optional[str] = None,
+                   adducts: Optional[List[str]] = None,
+                   min_probability: float = 0.03) -> None:
+        """
+        Import compound library from a JSON file created by csv_to_json.py.
+        This method reads a JSON file with the structure created by csv_to_json.py
+        and generates adduct variants for each compound.
+        Args:
+            jsonfile: Path to the JSON file
+            polarity: Ionization polarity ("positive", "negative", or None for positive)
+            adducts: Specific adducts to generate. If None, generates defaults for the polarity
+            min_probability: Minimum probability threshold for adduct filtering
+        Expected JSON structure:
+            {
+                "version": "1.0",
+                "creation_date": "2025-10-07T09:17:06.142290",
+                "description": "Converted from CSV file...",
+                "source_file": "filename.csv",
+                "record_count": 123,
+                "data": [
+                    {
+                        "name": "compound name",
+                        "smiles": "SMILES string",
+                        "inchikey": "InChI key",
+                        "formula": "molecular formula",
+                        "db_id": "database ID",
+                        "db": "database name"
+                    },
+                    ...
+                ]
+            }
+        Raises:
+            FileNotFoundError: If JSON file doesn't exist
+            ValueError: If JSON structure is invalid or required data is missing
+        """
+        if not os.path.exists(jsonfile):
+            raise FileNotFoundError(f"JSON file not found: {jsonfile}")
+        # Read and parse JSON file
+        try:
+            with open(jsonfile, 'r', encoding='utf-8') as f:
+                json_data = json.load(f)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON file: {e}") from e
+        except Exception as e:
+            raise ValueError(f"Error reading JSON file: {e}") from e
+        # Validate JSON structure
+        if not isinstance(json_data, dict):
+            raise ValueError("JSON file must contain a dictionary at root level")
+        if "data" not in json_data:
+            raise ValueError("JSON file must contain a 'data' field with compound records")
+        data = json_data["data"]
+        if not isinstance(data, list):
+            raise ValueError("'data' field must be a list of compound records")
+        # Extract metadata for reporting
+        version = json_data.get("version", "unknown")
+        source_file = json_data.get("source_file", "unknown")
+        record_count = json_data.get("record_count", len(data))
+        print(f"Loading JSON library: version {version}, source: {source_file}, records: {record_count}")
+        # Process each compound
+        all_variants = []
+        cmpd_id_counter = 1
+        lib_id_counter = 1
+        total_compounds = 0
+        skipped_compounds = 0
+        for compound_record in data:
+            total_compounds += 1
+            # Validate required fields
+            if not isinstance(compound_record, dict):
+                skipped_compounds += 1
+                continue
+            formula = compound_record.get("formula", "")
+            if not formula or not isinstance(formula, str):
+                skipped_compounds += 1
+                continue
+            # Extract compound data, handling both CSV column names and JSON field names
+            compound_level_uid = cmpd_id_counter
+            cmpd_id_counter += 1
+            compound_data = {
+                "name": compound_record.get("name", compound_record.get("Name", "")),
+                "shortname": compound_record.get("shortname", ""),
+                "class": compound_record.get("class", ""),
+                "smiles": compound_record.get("smiles", compound_record.get("SMILES", "")),
+                "inchi": compound_record.get("inchi", compound_record.get("InChI", "")),
+                "inchikey": compound_record.get("inchikey", compound_record.get("InChIKey", "")),
+                "formula": formula,
+                "rt": self._safe_float_conversion(compound_record.get("rt", compound_record.get("RT", None))),
+                "db_id": compound_record.get("db_id", compound_record.get("database_id", None)),
+                "db": compound_record.get("db", compound_record.get("database", None)),
+                "cmpd_uid": compound_level_uid,
+            }
+            # Generate adduct variants
+            variants, lib_id_counter = self._generate_adduct_variants(
+                compound_data, adducts=adducts, polarity=polarity,
+                lib_id_counter=lib_id_counter, min_probability=min_probability
+            )
+            all_variants.extend(variants)
+            # Track if compound was skipped due to invalid formula
+            if len(variants) == 0:
+                skipped_compounds += 1
+        # Convert to DataFrame and store
+        if all_variants:
+            new_lib_df = pl.DataFrame(all_variants)
+            # Combine with existing data if any
+            if self.lib_df is not None and len(self.lib_df) > 0:
+                self.lib_df = pl.concat([self.lib_df, new_lib_df])
+            else:
+                self.lib_df = new_lib_df
+            print(f"Imported {len(all_variants)} library entries from {jsonfile}")
+        else:
+            print(f"No valid compounds found in {jsonfile}")
+            if skipped_compounds > 0:
+                print(f"All {total_compounds} compounds were skipped due to invalid formulas")
     def _map_csv_columns(self, columns: List[str]) -> Dict[str, str]:
         """
         Map CSV column names to standardized internal names (case-insensitive).

{masster-0.5.19 → masster-0.5.21}/src/masster/study/id.py RENAMED Viewed

@@ -21,10 +21,10 @@ def lib_load(
     Args:
         study: Study instance
-        lib_source: either a CSV file path (str) or a Lib instance
-        polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV path.
+        lib_source: either a CSV/JSON file path (str) or a Lib instance
+        polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV/JSON path.
                  If None, uses study.polarity automatically.
-        adducts: specific adducts to generate - used when lib_source is a CSV path
+        adducts: specific adducts to generate - used when lib_source is a CSV/JSON path
         iso: isotope generation mode ("13C" to generate 13C isotopes, None for no isotopes)
     Side effects:
@@ -38,7 +38,7 @@ def lib_load(
         Lib = None
     if lib_source is None:
-        raise ValueError("lib_source must be a CSV file path (str) or a Lib instance")
+        raise ValueError("lib_source must be a CSV/JSON file path (str) or a Lib instance")
     # Use study polarity if not explicitly provided
     if polarity is None:
@@ -52,15 +52,23 @@ def lib_load(
             polarity = "positive"  # Default fallback
         study.logger.debug(f"Using study polarity: {polarity}")
-    # Handle string input (CSV file path)
+    # Handle string input (CSV or JSON file path)
     if isinstance(lib_source, str):
         if Lib is None:
             raise ImportError(
-                "Could not import masster.lib.lib.Lib - required for CSV loading",
+                "Could not import masster.lib.lib.Lib - required for CSV/JSON loading",
             )
         lib_obj = Lib()
-        lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
+        # Determine file type by extension
+        if lib_source.lower().endswith('.json'):
+            lib_obj.import_json(lib_source, polarity=polarity, adducts=adducts)
+        elif lib_source.lower().endswith('.csv'):
+            lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
+        else:
+            # Default to CSV behavior for backward compatibility
+            lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
     # Handle Lib instance
     elif Lib is not None and isinstance(lib_source, Lib):
@@ -72,7 +80,7 @@ def lib_load(
     else:
         raise TypeError(
-            "lib_source must be a CSV file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
+            "lib_source must be a CSV/JSON file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
         )
     # Ensure lib_df is populated
@@ -101,7 +109,7 @@ def lib_load(
     # Store pointer and DataFrame on study
     study._lib = lib_obj
-    # Add source_id column with filename (without path) if loading from CSV
+    # Add source_id column with filename (without path) if loading from CSV/JSON
     if isinstance(lib_source, str):
         import os
         filename_only = os.path.basename(lib_source)

{masster-0.5.19 → masster-0.5.21}/src/masster/wizard/wizard.py RENAMED Viewed

@@ -838,7 +838,7 @@ class Wizard:
             f"Polarity detected: {source_info.get('polarity', 'unknown')}",
             "Files created:"]
         for file_path in files_created:
-            instructions.append(f"  ✅ {str(Path(file_path).resolve())}")
+            instructions.append(f"  [OK] {str(Path(file_path).resolve())}")
         # Find the workflow script name from created files
         workflow_script_name = "1_masster_workflow.py"
@@ -877,7 +877,7 @@ class Wizard:
             # Add test mode print after the masster version line
             if 'print("masster' in line and 'Automated MS Data Analysis")' in line:
                 lines.insert(i + 1, '        if TEST_MODE:')
-                lines.insert(i + 2, '            print("🧪 TEST MODE: Processing single file only")')
+                lines.insert(i + 2, '            print("[TEST] TEST MODE: Processing single file only")')
                 break
         # Add mode info after num_cores print
@@ -894,7 +894,7 @@ class Wizard:
                 lines.insert(i + 2, '        # Limit to first file in test mode')
                 lines.insert(i + 3, '        if TEST_MODE:')
                 lines.insert(i + 4, '            raw_files = raw_files[:1]')
-                lines.insert(i + 5, '            print(f"\\n🧪 TEST MODE: Processing only first file: {raw_files[0].name}")')
+                lines.insert(i + 5, '            print(f"\\n[TEST] TEST MODE: Processing only first file: {raw_files[0].name}")')
                 break
         # Modify num_cores for test mode
@@ -909,7 +909,7 @@ class Wizard:
                 lines.insert(i + 1, '        ')
                 lines.insert(i + 2, '        # Stop here if test-only mode')
                 lines.insert(i + 3, '        if TEST_ONLY:')
-                lines.insert(i + 4, '            print("\\n🧪 TEST ONLY mode: Stopping after successful single file processing")')
+                lines.insert(i + 4, '            print("\\n[TEST] TEST ONLY mode: Stopping after successful single file processing")')
                 lines.insert(i + 5, '            print(f"Test file created: {sample5_files[0]}")')
                 lines.insert(i + 6, '            print("\\nTo run full batch, use: wizard.run()")')
                 lines.insert(i + 7, '            total_time = time.time() - start_time')
@@ -985,7 +985,7 @@ class Wizard:
                     "status": "error",
                     "message": "Workflow script not found. Please run create_scripts() first.",
                     "instructions": [
-                        "❌ Missing 1_masster_workflow.py",
+                        "[ERROR] Missing 1_masster_workflow.py",
                         "Run: wizard.create_scripts()",
                         "Then: wizard.run()"
                     ]
@@ -1008,8 +1008,8 @@ class Wizard:
                     env['MASSTER_TEST_ONLY'] = '1'
             # Execute the workflow script
-            print(f"🚀 Executing {mode_label} processing workflow...")
-            print(f"📄 Running: {workflow_script_path.name}")
+            print(f">> Executing {mode_label} processing workflow...")
+            print(f"[SCRIPT] Running: {workflow_script_path.name}")
             print("=" * 60)
             import subprocess
@@ -1022,16 +1022,16 @@ class Wizard:
             if success:
                 print("=" * 60)
                 if test_only:
-                    print("✅ Test-only processing completed successfully!")
-                    print("📋 Single file validated - ready for full batch")
+                    print("[OK] Test-only processing completed successfully!")
+                    print("[INFO] Single file validated - ready for full batch")
                     print("   wizard.run()")
                 elif test_mode:
-                    print("✅ Test processing completed successfully!")
-                    print("📋 Next step: Run full batch")
+                    print("[OK] Test processing completed successfully!")
+                    print("[INFO] Next step: Run full batch")
                     print("   wizard.run()")
                 else:
-                    print("✅ Sample processing completed successfully!")
-                    print("📋 Next step: Run interactive analysis")
+                    print("[OK] Sample processing completed successfully!")
+                    print("[INFO] Next step: Run interactive analysis")
                     print("   uv run marimo edit 2_interactive_analysis.py")
                 print("=" * 60)
@@ -1042,7 +1042,7 @@ class Wizard:
                     "status": "success",
                     "message": f"{mode_label.capitalize()} processing completed successfully",
                     "instructions": [
-                        f"✅ {mode_label.capitalize()} processing completed",
+                        f"[OK] {mode_label.capitalize()} processing completed",
                         next_step
                     ]
                 }
@@ -1051,7 +1051,7 @@ class Wizard:
                     "status": "error",
                     "message": f"Workflow execution failed with return code {result.returncode}",
                     "instructions": [
-                        "❌ Check the error messages above",
+                        "[ERROR] Check the error messages above",
                         "Review parameters in 1_masster_workflow.py",
                         f"Try running manually: python {workflow_script_path.name}"
                     ]
@@ -1062,7 +1062,7 @@ class Wizard:
                 "status": "error",
                 "message": f"Failed to execute workflow: {e}",
                 "instructions": [
-                    "❌ Execution failed",
+                    "[ERROR] Execution failed",
                     "Check that source files exist and are accessible",
                     "Verify folder permissions"
                 ]

masster 0.5.19__tar.gz → 0.5.21__tar.gz

Potentially problematic release.

masster 0.5.19tar.gz → 0.5.21tar.gz