masster 0.5.19__tar.gz → 0.5.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (98) hide show
  1. {masster-0.5.19 → masster-0.5.20}/PKG-INFO +1 -1
  2. {masster-0.5.19 → masster-0.5.20}/pyproject.toml +1 -1
  3. {masster-0.5.19 → masster-0.5.20}/src/masster/lib/lib.py +137 -0
  4. {masster-0.5.19 → masster-0.5.20}/src/masster/study/id.py +17 -9
  5. masster-0.5.20/uv.lock +3664 -0
  6. masster-0.5.19/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
  7. masster-0.5.19/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
  8. masster-0.5.19/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
  9. masster-0.5.19/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
  10. masster-0.5.19/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
  11. masster-0.5.19/src/masster/data/libs/hilic.csv +0 -4812
  12. masster-0.5.19/uv.lock +0 -3370
  13. {masster-0.5.19 → masster-0.5.20}/.github/workflows/publish.yml +0 -0
  14. {masster-0.5.19 → masster-0.5.20}/.github/workflows/security.yml +0 -0
  15. {masster-0.5.19 → masster-0.5.20}/.github/workflows/test.yml +0 -0
  16. {masster-0.5.19 → masster-0.5.20}/.gitignore +0 -0
  17. {masster-0.5.19 → masster-0.5.20}/.pre-commit-config.yaml +0 -0
  18. {masster-0.5.19 → masster-0.5.20}/LICENSE +0 -0
  19. {masster-0.5.19 → masster-0.5.20}/Makefile +0 -0
  20. {masster-0.5.19 → masster-0.5.20}/README.md +0 -0
  21. {masster-0.5.19 → masster-0.5.20}/TESTING.md +0 -0
  22. {masster-0.5.19 → masster-0.5.20}/demo/example_batch_process.py +0 -0
  23. {masster-0.5.19 → masster-0.5.20}/demo/example_sample_process.py +0 -0
  24. {masster-0.5.19 → masster-0.5.20}/src/masster/__init__.py +0 -0
  25. {masster-0.5.19 → masster-0.5.20}/src/masster/_version.py +0 -0
  26. {masster-0.5.19 → masster-0.5.20}/src/masster/chromatogram.py +0 -0
  27. {masster-0.5.19 → masster-0.5.20}/src/masster/data/libs/aa.csv +0 -0
  28. {masster-0.5.19 → masster-0.5.20}/src/masster/data/libs/ccm.csv +0 -0
  29. {masster-0.5.19 → masster-0.5.20}/src/masster/data/libs/urine.csv +0 -0
  30. {masster-0.5.19 → masster-0.5.20}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  31. {masster-0.5.19 → masster-0.5.20}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  32. {masster-0.5.19 → masster-0.5.20}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  33. {masster-0.5.19 → masster-0.5.20}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  34. {masster-0.5.19 → masster-0.5.20}/src/masster/lib/__init__.py +0 -0
  35. {masster-0.5.19 → masster-0.5.20}/src/masster/logger.py +0 -0
  36. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/__init__.py +0 -0
  37. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/adducts.py +0 -0
  38. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/defaults/__init__.py +0 -0
  39. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  40. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/defaults/find_features_def.py +0 -0
  41. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/defaults/find_ms2_def.py +0 -0
  42. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  43. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/defaults/sample_def.py +0 -0
  44. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/h5.py +0 -0
  45. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/helpers.py +0 -0
  46. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/lib.py +0 -0
  47. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/load.py +0 -0
  48. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/parameters.py +0 -0
  49. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/plot.py +0 -0
  50. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/processing.py +0 -0
  51. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/quant.py +0 -0
  52. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/sample.py +0 -0
  53. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/sample5_schema.json +0 -0
  54. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/save.py +0 -0
  55. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/sciex.py +0 -0
  56. {masster-0.5.19 → masster-0.5.20}/src/masster/sample/thermo.py +0 -0
  57. {masster-0.5.19 → masster-0.5.20}/src/masster/spectrum.py +0 -0
  58. {masster-0.5.19 → masster-0.5.20}/src/masster/study/__init__.py +0 -0
  59. {masster-0.5.19 → masster-0.5.20}/src/masster/study/analysis.py +0 -0
  60. {masster-0.5.19 → masster-0.5.20}/src/masster/study/defaults/__init__.py +0 -0
  61. {masster-0.5.19 → masster-0.5.20}/src/masster/study/defaults/align_def.py +0 -0
  62. {masster-0.5.19 → masster-0.5.20}/src/masster/study/defaults/export_def.py +0 -0
  63. {masster-0.5.19 → masster-0.5.20}/src/masster/study/defaults/fill_def.py +0 -0
  64. {masster-0.5.19 → masster-0.5.20}/src/masster/study/defaults/find_consensus_def.py +0 -0
  65. {masster-0.5.19 → masster-0.5.20}/src/masster/study/defaults/find_ms2_def.py +0 -0
  66. {masster-0.5.19 → masster-0.5.20}/src/masster/study/defaults/identify_def.py +0 -0
  67. {masster-0.5.19 → masster-0.5.20}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  68. {masster-0.5.19 → masster-0.5.20}/src/masster/study/defaults/integrate_def.py +0 -0
  69. {masster-0.5.19 → masster-0.5.20}/src/masster/study/defaults/merge_def.py +0 -0
  70. {masster-0.5.19 → masster-0.5.20}/src/masster/study/defaults/study_def.py +0 -0
  71. {masster-0.5.19 → masster-0.5.20}/src/masster/study/export.py +0 -0
  72. {masster-0.5.19 → masster-0.5.20}/src/masster/study/h5.py +0 -0
  73. {masster-0.5.19 → masster-0.5.20}/src/masster/study/helpers.py +0 -0
  74. {masster-0.5.19 → masster-0.5.20}/src/masster/study/importers.py +0 -0
  75. {masster-0.5.19 → masster-0.5.20}/src/masster/study/load.py +0 -0
  76. {masster-0.5.19 → masster-0.5.20}/src/masster/study/merge.py +0 -0
  77. {masster-0.5.19 → masster-0.5.20}/src/masster/study/parameters.py +0 -0
  78. {masster-0.5.19 → masster-0.5.20}/src/masster/study/plot.py +0 -0
  79. {masster-0.5.19 → masster-0.5.20}/src/masster/study/processing.py +0 -0
  80. {masster-0.5.19 → masster-0.5.20}/src/masster/study/save.py +0 -0
  81. {masster-0.5.19 → masster-0.5.20}/src/masster/study/study.py +0 -0
  82. {masster-0.5.19 → masster-0.5.20}/src/masster/study/study5_schema.json +0 -0
  83. {masster-0.5.19 → masster-0.5.20}/src/masster/wizard/README.md +0 -0
  84. {masster-0.5.19 → masster-0.5.20}/src/masster/wizard/__init__.py +0 -0
  85. {masster-0.5.19 → masster-0.5.20}/src/masster/wizard/example.py +0 -0
  86. {masster-0.5.19 → masster-0.5.20}/src/masster/wizard/wizard.py +0 -0
  87. {masster-0.5.19 → masster-0.5.20}/tests/conftest.py +0 -0
  88. {masster-0.5.19 → masster-0.5.20}/tests/test_chromatogram.py +0 -0
  89. {masster-0.5.19 → masster-0.5.20}/tests/test_defaults.py +0 -0
  90. {masster-0.5.19 → masster-0.5.20}/tests/test_imports.py +0 -0
  91. {masster-0.5.19 → masster-0.5.20}/tests/test_integration.py +0 -0
  92. {masster-0.5.19 → masster-0.5.20}/tests/test_logger.py +0 -0
  93. {masster-0.5.19 → masster-0.5.20}/tests/test_parameters.py +0 -0
  94. {masster-0.5.19 → masster-0.5.20}/tests/test_sample.py +0 -0
  95. {masster-0.5.19 → masster-0.5.20}/tests/test_spectrum.py +0 -0
  96. {masster-0.5.19 → masster-0.5.20}/tests/test_study.py +0 -0
  97. {masster-0.5.19 → masster-0.5.20}/tests/test_version.py +0 -0
  98. {masster-0.5.19 → masster-0.5.20}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.19
3
+ Version: 0.5.20
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.5.19"
4
+ version = "0.5.20"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -46,6 +46,7 @@ annotations = lib.annotate_features(sample.features_df)
46
46
  """
47
47
 
48
48
  import os
49
+ import json
49
50
  from typing import Optional, Union, List, Dict, Any, TYPE_CHECKING
50
51
  import warnings
51
52
 
@@ -685,6 +686,142 @@ class Lib:
685
686
  if skipped_compounds > 0:
686
687
  print(f"All {total_compounds} compounds were skipped due to invalid formulas")
687
688
 
689
+ def import_json(self,
690
+ jsonfile: str,
691
+ polarity: Optional[str] = None,
692
+ adducts: Optional[List[str]] = None,
693
+ min_probability: float = 0.03) -> None:
694
+ """
695
+ Import compound library from a JSON file created by csv_to_json.py.
696
+
697
+ This method reads a JSON file with the structure created by csv_to_json.py
698
+ and generates adduct variants for each compound.
699
+
700
+ Args:
701
+ jsonfile: Path to the JSON file
702
+ polarity: Ionization polarity ("positive", "negative", or None for positive)
703
+ adducts: Specific adducts to generate. If None, generates defaults for the polarity
704
+ min_probability: Minimum probability threshold for adduct filtering
705
+
706
+ Expected JSON structure:
707
+ {
708
+ "version": "1.0",
709
+ "creation_date": "2025-10-07T09:17:06.142290",
710
+ "description": "Converted from CSV file...",
711
+ "source_file": "filename.csv",
712
+ "record_count": 123,
713
+ "data": [
714
+ {
715
+ "name": "compound name",
716
+ "smiles": "SMILES string",
717
+ "inchikey": "InChI key",
718
+ "formula": "molecular formula",
719
+ "db_id": "database ID",
720
+ "db": "database name"
721
+ },
722
+ ...
723
+ ]
724
+ }
725
+
726
+ Raises:
727
+ FileNotFoundError: If JSON file doesn't exist
728
+ ValueError: If JSON structure is invalid or required data is missing
729
+ """
730
+ if not os.path.exists(jsonfile):
731
+ raise FileNotFoundError(f"JSON file not found: {jsonfile}")
732
+
733
+ # Read and parse JSON file
734
+ try:
735
+ with open(jsonfile, 'r', encoding='utf-8') as f:
736
+ json_data = json.load(f)
737
+ except json.JSONDecodeError as e:
738
+ raise ValueError(f"Invalid JSON file: {e}") from e
739
+ except Exception as e:
740
+ raise ValueError(f"Error reading JSON file: {e}") from e
741
+
742
+ # Validate JSON structure
743
+ if not isinstance(json_data, dict):
744
+ raise ValueError("JSON file must contain a dictionary at root level")
745
+
746
+ if "data" not in json_data:
747
+ raise ValueError("JSON file must contain a 'data' field with compound records")
748
+
749
+ data = json_data["data"]
750
+ if not isinstance(data, list):
751
+ raise ValueError("'data' field must be a list of compound records")
752
+
753
+ # Extract metadata for reporting
754
+ version = json_data.get("version", "unknown")
755
+ source_file = json_data.get("source_file", "unknown")
756
+ record_count = json_data.get("record_count", len(data))
757
+
758
+ print(f"Loading JSON library: version {version}, source: {source_file}, records: {record_count}")
759
+
760
+ # Process each compound
761
+ all_variants = []
762
+ cmpd_id_counter = 1
763
+ lib_id_counter = 1
764
+ total_compounds = 0
765
+ skipped_compounds = 0
766
+
767
+ for compound_record in data:
768
+ total_compounds += 1
769
+
770
+ # Validate required fields
771
+ if not isinstance(compound_record, dict):
772
+ skipped_compounds += 1
773
+ continue
774
+
775
+ formula = compound_record.get("formula", "")
776
+ if not formula or not isinstance(formula, str):
777
+ skipped_compounds += 1
778
+ continue
779
+
780
+ # Extract compound data, handling both CSV column names and JSON field names
781
+ compound_level_uid = cmpd_id_counter
782
+ cmpd_id_counter += 1
783
+
784
+ compound_data = {
785
+ "name": compound_record.get("name", compound_record.get("Name", "")),
786
+ "shortname": compound_record.get("shortname", ""),
787
+ "class": compound_record.get("class", ""),
788
+ "smiles": compound_record.get("smiles", compound_record.get("SMILES", "")),
789
+ "inchi": compound_record.get("inchi", compound_record.get("InChI", "")),
790
+ "inchikey": compound_record.get("inchikey", compound_record.get("InChIKey", "")),
791
+ "formula": formula,
792
+ "rt": self._safe_float_conversion(compound_record.get("rt", compound_record.get("RT", None))),
793
+ "db_id": compound_record.get("db_id", compound_record.get("database_id", None)),
794
+ "db": compound_record.get("db", compound_record.get("database", None)),
795
+ "cmpd_uid": compound_level_uid,
796
+ }
797
+
798
+ # Generate adduct variants
799
+ variants, lib_id_counter = self._generate_adduct_variants(
800
+ compound_data, adducts=adducts, polarity=polarity,
801
+ lib_id_counter=lib_id_counter, min_probability=min_probability
802
+ )
803
+ all_variants.extend(variants)
804
+
805
+ # Track if compound was skipped due to invalid formula
806
+ if len(variants) == 0:
807
+ skipped_compounds += 1
808
+
809
+ # Convert to DataFrame and store
810
+ if all_variants:
811
+ new_lib_df = pl.DataFrame(all_variants)
812
+
813
+ # Combine with existing data if any
814
+ if self.lib_df is not None and len(self.lib_df) > 0:
815
+ self.lib_df = pl.concat([self.lib_df, new_lib_df])
816
+ else:
817
+ self.lib_df = new_lib_df
818
+
819
+ print(f"Imported {len(all_variants)} library entries from {jsonfile}")
820
+ else:
821
+ print(f"No valid compounds found in {jsonfile}")
822
+ if skipped_compounds > 0:
823
+ print(f"All {total_compounds} compounds were skipped due to invalid formulas")
824
+
688
825
  def _map_csv_columns(self, columns: List[str]) -> Dict[str, str]:
689
826
  """
690
827
  Map CSV column names to standardized internal names (case-insensitive).
@@ -21,10 +21,10 @@ def lib_load(
21
21
 
22
22
  Args:
23
23
  study: Study instance
24
- lib_source: either a CSV file path (str) or a Lib instance
25
- polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV path.
24
+ lib_source: either a CSV/JSON file path (str) or a Lib instance
25
+ polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV/JSON path.
26
26
  If None, uses study.polarity automatically.
27
- adducts: specific adducts to generate - used when lib_source is a CSV path
27
+ adducts: specific adducts to generate - used when lib_source is a CSV/JSON path
28
28
  iso: isotope generation mode ("13C" to generate 13C isotopes, None for no isotopes)
29
29
 
30
30
  Side effects:
@@ -38,7 +38,7 @@ def lib_load(
38
38
  Lib = None
39
39
 
40
40
  if lib_source is None:
41
- raise ValueError("lib_source must be a CSV file path (str) or a Lib instance")
41
+ raise ValueError("lib_source must be a CSV/JSON file path (str) or a Lib instance")
42
42
 
43
43
  # Use study polarity if not explicitly provided
44
44
  if polarity is None:
@@ -52,15 +52,23 @@ def lib_load(
52
52
  polarity = "positive" # Default fallback
53
53
  study.logger.debug(f"Using study polarity: {polarity}")
54
54
 
55
- # Handle string input (CSV file path)
55
+ # Handle string input (CSV or JSON file path)
56
56
  if isinstance(lib_source, str):
57
57
  if Lib is None:
58
58
  raise ImportError(
59
- "Could not import masster.lib.lib.Lib - required for CSV loading",
59
+ "Could not import masster.lib.lib.Lib - required for CSV/JSON loading",
60
60
  )
61
61
 
62
62
  lib_obj = Lib()
63
- lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
63
+
64
+ # Determine file type by extension
65
+ if lib_source.lower().endswith('.json'):
66
+ lib_obj.import_json(lib_source, polarity=polarity, adducts=adducts)
67
+ elif lib_source.lower().endswith('.csv'):
68
+ lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
69
+ else:
70
+ # Default to CSV behavior for backward compatibility
71
+ lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
64
72
 
65
73
  # Handle Lib instance
66
74
  elif Lib is not None and isinstance(lib_source, Lib):
@@ -72,7 +80,7 @@ def lib_load(
72
80
 
73
81
  else:
74
82
  raise TypeError(
75
- "lib_source must be a CSV file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
83
+ "lib_source must be a CSV/JSON file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
76
84
  )
77
85
 
78
86
  # Ensure lib_df is populated
@@ -101,7 +109,7 @@ def lib_load(
101
109
  # Store pointer and DataFrame on study
102
110
  study._lib = lib_obj
103
111
 
104
- # Add source_id column with filename (without path) if loading from CSV
112
+ # Add source_id column with filename (without path) if loading from CSV/JSON
105
113
  if isinstance(lib_source, str):
106
114
  import os
107
115
  filename_only = os.path.basename(lib_source)