masster 0.5.19__py3-none-any.whl → 0.5.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/lib/lib.py +137 -0
- masster/study/id.py +17 -9
- {masster-0.5.19.dist-info → masster-0.5.20.dist-info}/METADATA +1 -1
- {masster-0.5.19.dist-info → masster-0.5.20.dist-info}/RECORD +7 -13
- masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
- masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
- masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
- masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
- masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
- masster/data/libs/hilic.csv +0 -4812
- {masster-0.5.19.dist-info → masster-0.5.20.dist-info}/WHEEL +0 -0
- {masster-0.5.19.dist-info → masster-0.5.20.dist-info}/entry_points.txt +0 -0
- {masster-0.5.19.dist-info → masster-0.5.20.dist-info}/licenses/LICENSE +0 -0
masster/lib/lib.py
CHANGED
|
@@ -46,6 +46,7 @@ annotations = lib.annotate_features(sample.features_df)
|
|
|
46
46
|
"""
|
|
47
47
|
|
|
48
48
|
import os
|
|
49
|
+
import json
|
|
49
50
|
from typing import Optional, Union, List, Dict, Any, TYPE_CHECKING
|
|
50
51
|
import warnings
|
|
51
52
|
|
|
@@ -685,6 +686,142 @@ class Lib:
|
|
|
685
686
|
if skipped_compounds > 0:
|
|
686
687
|
print(f"All {total_compounds} compounds were skipped due to invalid formulas")
|
|
687
688
|
|
|
689
|
+
def import_json(self,
|
|
690
|
+
jsonfile: str,
|
|
691
|
+
polarity: Optional[str] = None,
|
|
692
|
+
adducts: Optional[List[str]] = None,
|
|
693
|
+
min_probability: float = 0.03) -> None:
|
|
694
|
+
"""
|
|
695
|
+
Import compound library from a JSON file created by csv_to_json.py.
|
|
696
|
+
|
|
697
|
+
This method reads a JSON file with the structure created by csv_to_json.py
|
|
698
|
+
and generates adduct variants for each compound.
|
|
699
|
+
|
|
700
|
+
Args:
|
|
701
|
+
jsonfile: Path to the JSON file
|
|
702
|
+
polarity: Ionization polarity ("positive", "negative", or None for positive)
|
|
703
|
+
adducts: Specific adducts to generate. If None, generates defaults for the polarity
|
|
704
|
+
min_probability: Minimum probability threshold for adduct filtering
|
|
705
|
+
|
|
706
|
+
Expected JSON structure:
|
|
707
|
+
{
|
|
708
|
+
"version": "1.0",
|
|
709
|
+
"creation_date": "2025-10-07T09:17:06.142290",
|
|
710
|
+
"description": "Converted from CSV file...",
|
|
711
|
+
"source_file": "filename.csv",
|
|
712
|
+
"record_count": 123,
|
|
713
|
+
"data": [
|
|
714
|
+
{
|
|
715
|
+
"name": "compound name",
|
|
716
|
+
"smiles": "SMILES string",
|
|
717
|
+
"inchikey": "InChI key",
|
|
718
|
+
"formula": "molecular formula",
|
|
719
|
+
"db_id": "database ID",
|
|
720
|
+
"db": "database name"
|
|
721
|
+
},
|
|
722
|
+
...
|
|
723
|
+
]
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
Raises:
|
|
727
|
+
FileNotFoundError: If JSON file doesn't exist
|
|
728
|
+
ValueError: If JSON structure is invalid or required data is missing
|
|
729
|
+
"""
|
|
730
|
+
if not os.path.exists(jsonfile):
|
|
731
|
+
raise FileNotFoundError(f"JSON file not found: {jsonfile}")
|
|
732
|
+
|
|
733
|
+
# Read and parse JSON file
|
|
734
|
+
try:
|
|
735
|
+
with open(jsonfile, 'r', encoding='utf-8') as f:
|
|
736
|
+
json_data = json.load(f)
|
|
737
|
+
except json.JSONDecodeError as e:
|
|
738
|
+
raise ValueError(f"Invalid JSON file: {e}") from e
|
|
739
|
+
except Exception as e:
|
|
740
|
+
raise ValueError(f"Error reading JSON file: {e}") from e
|
|
741
|
+
|
|
742
|
+
# Validate JSON structure
|
|
743
|
+
if not isinstance(json_data, dict):
|
|
744
|
+
raise ValueError("JSON file must contain a dictionary at root level")
|
|
745
|
+
|
|
746
|
+
if "data" not in json_data:
|
|
747
|
+
raise ValueError("JSON file must contain a 'data' field with compound records")
|
|
748
|
+
|
|
749
|
+
data = json_data["data"]
|
|
750
|
+
if not isinstance(data, list):
|
|
751
|
+
raise ValueError("'data' field must be a list of compound records")
|
|
752
|
+
|
|
753
|
+
# Extract metadata for reporting
|
|
754
|
+
version = json_data.get("version", "unknown")
|
|
755
|
+
source_file = json_data.get("source_file", "unknown")
|
|
756
|
+
record_count = json_data.get("record_count", len(data))
|
|
757
|
+
|
|
758
|
+
print(f"Loading JSON library: version {version}, source: {source_file}, records: {record_count}")
|
|
759
|
+
|
|
760
|
+
# Process each compound
|
|
761
|
+
all_variants = []
|
|
762
|
+
cmpd_id_counter = 1
|
|
763
|
+
lib_id_counter = 1
|
|
764
|
+
total_compounds = 0
|
|
765
|
+
skipped_compounds = 0
|
|
766
|
+
|
|
767
|
+
for compound_record in data:
|
|
768
|
+
total_compounds += 1
|
|
769
|
+
|
|
770
|
+
# Validate required fields
|
|
771
|
+
if not isinstance(compound_record, dict):
|
|
772
|
+
skipped_compounds += 1
|
|
773
|
+
continue
|
|
774
|
+
|
|
775
|
+
formula = compound_record.get("formula", "")
|
|
776
|
+
if not formula or not isinstance(formula, str):
|
|
777
|
+
skipped_compounds += 1
|
|
778
|
+
continue
|
|
779
|
+
|
|
780
|
+
# Extract compound data, handling both CSV column names and JSON field names
|
|
781
|
+
compound_level_uid = cmpd_id_counter
|
|
782
|
+
cmpd_id_counter += 1
|
|
783
|
+
|
|
784
|
+
compound_data = {
|
|
785
|
+
"name": compound_record.get("name", compound_record.get("Name", "")),
|
|
786
|
+
"shortname": compound_record.get("shortname", ""),
|
|
787
|
+
"class": compound_record.get("class", ""),
|
|
788
|
+
"smiles": compound_record.get("smiles", compound_record.get("SMILES", "")),
|
|
789
|
+
"inchi": compound_record.get("inchi", compound_record.get("InChI", "")),
|
|
790
|
+
"inchikey": compound_record.get("inchikey", compound_record.get("InChIKey", "")),
|
|
791
|
+
"formula": formula,
|
|
792
|
+
"rt": self._safe_float_conversion(compound_record.get("rt", compound_record.get("RT", None))),
|
|
793
|
+
"db_id": compound_record.get("db_id", compound_record.get("database_id", None)),
|
|
794
|
+
"db": compound_record.get("db", compound_record.get("database", None)),
|
|
795
|
+
"cmpd_uid": compound_level_uid,
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
# Generate adduct variants
|
|
799
|
+
variants, lib_id_counter = self._generate_adduct_variants(
|
|
800
|
+
compound_data, adducts=adducts, polarity=polarity,
|
|
801
|
+
lib_id_counter=lib_id_counter, min_probability=min_probability
|
|
802
|
+
)
|
|
803
|
+
all_variants.extend(variants)
|
|
804
|
+
|
|
805
|
+
# Track if compound was skipped due to invalid formula
|
|
806
|
+
if len(variants) == 0:
|
|
807
|
+
skipped_compounds += 1
|
|
808
|
+
|
|
809
|
+
# Convert to DataFrame and store
|
|
810
|
+
if all_variants:
|
|
811
|
+
new_lib_df = pl.DataFrame(all_variants)
|
|
812
|
+
|
|
813
|
+
# Combine with existing data if any
|
|
814
|
+
if self.lib_df is not None and len(self.lib_df) > 0:
|
|
815
|
+
self.lib_df = pl.concat([self.lib_df, new_lib_df])
|
|
816
|
+
else:
|
|
817
|
+
self.lib_df = new_lib_df
|
|
818
|
+
|
|
819
|
+
print(f"Imported {len(all_variants)} library entries from {jsonfile}")
|
|
820
|
+
else:
|
|
821
|
+
print(f"No valid compounds found in {jsonfile}")
|
|
822
|
+
if skipped_compounds > 0:
|
|
823
|
+
print(f"All {total_compounds} compounds were skipped due to invalid formulas")
|
|
824
|
+
|
|
688
825
|
def _map_csv_columns(self, columns: List[str]) -> Dict[str, str]:
|
|
689
826
|
"""
|
|
690
827
|
Map CSV column names to standardized internal names (case-insensitive).
|
masster/study/id.py
CHANGED
|
@@ -21,10 +21,10 @@ def lib_load(
|
|
|
21
21
|
|
|
22
22
|
Args:
|
|
23
23
|
study: Study instance
|
|
24
|
-
lib_source: either a CSV file path (str) or a Lib instance
|
|
25
|
-
polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV path.
|
|
24
|
+
lib_source: either a CSV/JSON file path (str) or a Lib instance
|
|
25
|
+
polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV/JSON path.
|
|
26
26
|
If None, uses study.polarity automatically.
|
|
27
|
-
adducts: specific adducts to generate - used when lib_source is a CSV path
|
|
27
|
+
adducts: specific adducts to generate - used when lib_source is a CSV/JSON path
|
|
28
28
|
iso: isotope generation mode ("13C" to generate 13C isotopes, None for no isotopes)
|
|
29
29
|
|
|
30
30
|
Side effects:
|
|
@@ -38,7 +38,7 @@ def lib_load(
|
|
|
38
38
|
Lib = None
|
|
39
39
|
|
|
40
40
|
if lib_source is None:
|
|
41
|
-
raise ValueError("lib_source must be a CSV file path (str) or a Lib instance")
|
|
41
|
+
raise ValueError("lib_source must be a CSV/JSON file path (str) or a Lib instance")
|
|
42
42
|
|
|
43
43
|
# Use study polarity if not explicitly provided
|
|
44
44
|
if polarity is None:
|
|
@@ -52,15 +52,23 @@ def lib_load(
|
|
|
52
52
|
polarity = "positive" # Default fallback
|
|
53
53
|
study.logger.debug(f"Using study polarity: {polarity}")
|
|
54
54
|
|
|
55
|
-
# Handle string input (CSV file path)
|
|
55
|
+
# Handle string input (CSV or JSON file path)
|
|
56
56
|
if isinstance(lib_source, str):
|
|
57
57
|
if Lib is None:
|
|
58
58
|
raise ImportError(
|
|
59
|
-
"Could not import masster.lib.lib.Lib - required for CSV loading",
|
|
59
|
+
"Could not import masster.lib.lib.Lib - required for CSV/JSON loading",
|
|
60
60
|
)
|
|
61
61
|
|
|
62
62
|
lib_obj = Lib()
|
|
63
|
-
|
|
63
|
+
|
|
64
|
+
# Determine file type by extension
|
|
65
|
+
if lib_source.lower().endswith('.json'):
|
|
66
|
+
lib_obj.import_json(lib_source, polarity=polarity, adducts=adducts)
|
|
67
|
+
elif lib_source.lower().endswith('.csv'):
|
|
68
|
+
lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
|
|
69
|
+
else:
|
|
70
|
+
# Default to CSV behavior for backward compatibility
|
|
71
|
+
lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
|
|
64
72
|
|
|
65
73
|
# Handle Lib instance
|
|
66
74
|
elif Lib is not None and isinstance(lib_source, Lib):
|
|
@@ -72,7 +80,7 @@ def lib_load(
|
|
|
72
80
|
|
|
73
81
|
else:
|
|
74
82
|
raise TypeError(
|
|
75
|
-
"lib_source must be a CSV file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
|
|
83
|
+
"lib_source must be a CSV/JSON file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
|
|
76
84
|
)
|
|
77
85
|
|
|
78
86
|
# Ensure lib_df is populated
|
|
@@ -101,7 +109,7 @@ def lib_load(
|
|
|
101
109
|
# Store pointer and DataFrame on study
|
|
102
110
|
study._lib = lib_obj
|
|
103
111
|
|
|
104
|
-
# Add source_id column with filename (without path) if loading from CSV
|
|
112
|
+
# Add source_id column with filename (without path) if loading from CSV/JSON
|
|
105
113
|
if isinstance(lib_source, str):
|
|
106
114
|
import os
|
|
107
115
|
filename_only = os.path.basename(lib_source)
|
|
@@ -3,21 +3,15 @@ masster/_version.py,sha256=uCkE1NJ7J1aQrPg6o1mVRwVi9N10aB8nbGRTr0cwkNY,257
|
|
|
3
3
|
masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
|
|
4
4
|
masster/logger.py,sha256=XT2gUcUIct8LWzTp9n484g5MaB89toT76CGA41oBvfA,18375
|
|
5
5
|
masster/spectrum.py,sha256=TWIgDcl0lveG40cLVZTWGp8-FxMolu-P8EjZyRBtXL4,49850
|
|
6
|
-
masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5,sha256=LdJMF8uLoDm9ixZNHBoOzBH6hX7NGY7vTvqa2Pzetb8,6539174
|
|
7
|
-
masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5,sha256=hWUfslGoOTiQw59jENSBXP4sa6DdkbOi40FJ68ep61Q,6956773
|
|
8
|
-
masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5,sha256=dSd2cIgYYdRcNSzkhqlZCeWKi3x8Hhhcx8BFMuiVG4c,11382948
|
|
9
|
-
masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5,sha256=wER8CHSBz54Yx1kwmU7ghPPWVwYvxv_lXGB8-8a1xpQ,9508434
|
|
10
|
-
masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5,sha256=h2OOAWWTwKXzTNewhiYeL-cMYdp_JYLPya8Q9Nv9Lvw,12389587
|
|
11
6
|
masster/data/libs/aa.csv,sha256=Sja1DyMsiaM2NfLcct4kAAcXYwPCukJJW8sDkup9w_c,1924
|
|
12
7
|
masster/data/libs/ccm.csv,sha256=Q6nylV1152uTpX-ydqWeGrc6L9kgv45xN_fBZ4f7Tvo,12754
|
|
13
|
-
masster/data/libs/hilic.csv,sha256=Ao2IN9t7GiFWEBJg21TmNJZjTbyHC3e0dJcfftAKsM4,671265
|
|
14
8
|
masster/data/libs/urine.csv,sha256=iRrR4N8Wzb8KDhHJA4LqoQC35pp93FSaOKvXPrgFHis,653736
|
|
15
9
|
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data,sha256=01vC6m__Qqm2rLvlTMZoeKIKowFvovBTUnrNl8Uav3E,24576
|
|
16
10
|
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff,sha256=go5N9gAM1rn4PZAVaoCmdteY9f7YGEM9gyPdSmkQ8PE,1447936
|
|
17
11
|
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan,sha256=ahi1Y3UhAj9Bj4Q2MlbgPekNdkJvMOoMXVOoR6CeIxc,13881220
|
|
18
12
|
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2,sha256=TFB0HW4Agkig6yht7FtgjUdbXax8jjKaHpSZSvuU5vs,3252224
|
|
19
13
|
masster/lib/__init__.py,sha256=TcePNx3SYZHz6763TL9Sg4gUNXaRWjlrOtyS6vsu-hg,178
|
|
20
|
-
masster/lib/lib.py,sha256=
|
|
14
|
+
masster/lib/lib.py,sha256=SGWuiCTHc65khmLndC2cFBCO1rk8-SS6BkG4C_nOf-o,44984
|
|
21
15
|
masster/sample/__init__.py,sha256=HL0m1ept0PMAYUCQtDDnkdOS12IFl6oLAq4TZQz83uY,170
|
|
22
16
|
masster/sample/adducts.py,sha256=SU6S3pyyLQUSg6yPcdj3p4MfwNDsp0_FYwYYb7F4li0,33798
|
|
23
17
|
masster/sample/h5.py,sha256=0FE6eH9n8RaO59HjKnFo0kKmr8L44UOQIBqcpD3LW0s,117749
|
|
@@ -44,7 +38,7 @@ masster/study/analysis.py,sha256=L-wXBnGZCLB5UUDrjIdOiMG9zdej3Tw_SftcEmmTukM,842
|
|
|
44
38
|
masster/study/export.py,sha256=c1HJdLAM6Ply0n8f0DjMk4mXd9lOYePr60UJTBksUho,60092
|
|
45
39
|
masster/study/h5.py,sha256=bznE9kKEfLNo0QtbyC6a6snfnR3Zjkx5BcjBNbRVlJ8,99579
|
|
46
40
|
masster/study/helpers.py,sha256=FdvQV-CgQyBhXMqk6_92aKSBsZhJBK4joLxEdKzmuhw,192011
|
|
47
|
-
masster/study/id.py,sha256=
|
|
41
|
+
masster/study/id.py,sha256=H6LhD0fbuxM4i0JIhHvWKLMa86fpXyEAHzbCCW5ffBA,91288
|
|
48
42
|
masster/study/importers.py,sha256=1Oco0yMid_siMMZdK7rQlhS20XikgjBBNAirbTHx5O8,13958
|
|
49
43
|
masster/study/load.py,sha256=EsKpxUuduu-w1TREfHTYxRdEncWTd15h2IwoB3D_nuc,72070
|
|
50
44
|
masster/study/merge.py,sha256=SwrsbcoI6hIuepvTJEFNoXncwMx1XXr6FVOvkSyfIbs,169239
|
|
@@ -69,8 +63,8 @@ masster/wizard/README.md,sha256=RX3uxT1qD5i9iDSznZUbnukixonqr96JlUE4TwssAgY,1411
|
|
|
69
63
|
masster/wizard/__init__.py,sha256=L9G_datyGSFJjrBVklEVpZVLGXzUhDiWobtiygBH8vQ,669
|
|
70
64
|
masster/wizard/example.py,sha256=xEZFTH9UZ8HKOm6s3JL8Js0Uw5ChnISWBHSZCL32vsM,7983
|
|
71
65
|
masster/wizard/wizard.py,sha256=ckOz-8zrk8i7tDjqkk-shYFA2Ni9fV4nejocEjTX78M,65265
|
|
72
|
-
masster-0.5.
|
|
73
|
-
masster-0.5.
|
|
74
|
-
masster-0.5.
|
|
75
|
-
masster-0.5.
|
|
76
|
-
masster-0.5.
|
|
66
|
+
masster-0.5.20.dist-info/METADATA,sha256=FJGXFasiyqxkR1R34mXnMANYqQ_ArHLktC3DqLRvg1I,45153
|
|
67
|
+
masster-0.5.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
68
|
+
masster-0.5.20.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
69
|
+
masster-0.5.20.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
|
|
70
|
+
masster-0.5.20.dist-info/RECORD,,
|
|
Binary file
|
|
Binary file
|
masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5
DELETED
|
Binary file
|