masster 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +2 -0
- masster/_version.py +1 -1
- masster/data/libs/README.md +17 -0
- masster/data/libs/ccm.py +533 -0
- masster/data/libs/central_carbon_README.md +17 -0
- masster/data/libs/central_carbon_metabolites.csv +120 -0
- masster/data/libs/urine.py +333 -0
- masster/data/libs/urine_metabolites.csv +51 -0
- masster/sample/h5.py +1 -1
- masster/sample/helpers.py +3 -7
- masster/sample/lib.py +32 -25
- masster/sample/load.py +9 -3
- masster/sample/plot.py +113 -27
- masster/study/export.py +27 -10
- masster/study/h5.py +58 -40
- masster/study/helpers.py +450 -196
- masster/study/helpers_optimized.py +5 -5
- masster/study/load.py +144 -118
- masster/study/plot.py +691 -277
- masster/study/processing.py +9 -5
- masster/study/study.py +6 -6
- {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/METADATA +1 -1
- {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/RECORD +31 -25
- /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
- /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/WHEEL +0 -0
- {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/entry_points.txt +0 -0
- {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
Name,Formula,SMILES,InChIKey
|
|
2
|
+
Glucose,C6H11O,OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O,WQZGKKKJIJFFOK-GASJEMHNSA-N
|
|
3
|
+
Glucose-6-phosphate,C6H13O9P,O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,VFRROHXSMXFLSN-SLPGGIOYSA-N
|
|
4
|
+
Fructose-6-phosphate,C6H13O9P,O=C(CO)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,GSXOAOHZAIYLCY-HSUXUTPPSA-N
|
|
5
|
+
"Fructose-1,6-bisphosphate",C6H13O12P,O=P(O)(O)OC[C@H]1O[C@](O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O,RNBGYGVWRKECFJ-ARQDHWQXSA-N
|
|
6
|
+
Glyceraldehyde-3-phosphate,C3H7O6P,O=C[C@H](O)COP(=O)(O)O,LXJXRIRHZLFYRP-VKHMYHEASA-N
|
|
7
|
+
Dihydroxyacetone phosphate,C3H7O6P,O=C(CO)COP(=O)(O)O,GNGACRATGGDKBX-UHFFFAOYSA-N
|
|
8
|
+
3-Phosphoglycerate,C3H7O7P,O=C(O)C(O)COP(=O)(O)O,OSJPPGNTCRNQQC-UHFFFAOYSA-N
|
|
9
|
+
2-Phosphoglycerate,C3H7O7P,O=C(O)C(CO)OP(=O)(O)O,GXIURPTVHJPJLF-UHFFFAOYSA-N
|
|
10
|
+
Phosphoenolpyruvate,C3H5O6P,C=C(OP(=O)(O)O)C(=O)O,DTBNBXWJWCWCIK-UHFFFAOYSA-N
|
|
11
|
+
Pyruvate,C3H6O,CC(=O)C(=O)O,LCTONWCANYUPML-UHFFFAOYSA-M
|
|
12
|
+
Lactate,C3H8O,CC(O)C(=O)O,JVTAAEKCZFNVCJ-UHFFFAOYSA-M
|
|
13
|
+
Acetyl-CoA,C23H38N7O17P3S,CC(=O)SCCN=C(O)CCN=C(O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,ZSLZBFCDCINBPY-ZSJPKINUSA-N
|
|
14
|
+
Citric acid,C6H7O,O=C(O)CC(O)(CC(=O)O)C(=O)O,KRKNYBCHXYNGOX-UHFFFAOYSA-N
|
|
15
|
+
Isocitrate,C6H7O,O=C(O)CC(C(=O)O)C(O)C(=O)O,ODBLHEXUDAPZAU-UHFFFAOYSA-N
|
|
16
|
+
Alpha-ketoglutaric acid,C5H5O,O=C(O)CCC(=O)C(=O)O,KPGXRSRHYNQIFN-UHFFFAOYSA-N
|
|
17
|
+
Succinyl-CoA,C25H40N7O19P3S,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(O)=NCCC(O)=NCCSC(=O)CCC(=O)O,VNOYUJKHFWYWIR-ITIYDSSPSA-N
|
|
18
|
+
Succinic acid,C4H5O,O=C(O)CCC(=O)O,KDYFGRWQOYBRFD-UHFFFAOYSA-N
|
|
19
|
+
Fumaric acid,C4H3O,O=C(O)/C=C/C(=O)O,VZCYOOQTPOCHFL-OWOJBTEDSA-N
|
|
20
|
+
Malic acid,C4H5O,O=C(O)CC(O)C(=O)O,BJEPYKJPYRNKOW-UHFFFAOYSA-N
|
|
21
|
+
Oxaloacetic acid,C4H3O,O=C(O)CC(=O)C(=O)O,KHPXUQMNIQBQEV-UHFFFAOYSA-N
|
|
22
|
+
Ribose-5-phosphate,C5H11O8P,O=C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,PPQRONHOSHZGFQ-LMVFSUKVSA-N
|
|
23
|
+
Ribulose-5-phosphate,C5H11O8P,O=C(CO)[C@H](O)[C@H](O)COP(=O)(O)O,FNZLKVNUWIIPSJ-UHNVWZDZSA-N
|
|
24
|
+
Sedoheptulose-7-phosphate,C7H15O10P,O=C(CO)[C@@H](O)[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,JDTUMPKOJBQPKX-GBNDHIKLSA-N
|
|
25
|
+
Erythrose-4-phosphate,C4H9O7P,O=C[C@H](O)[C@H](O)COP(=O)(O)O,NGHMDNPXVRFFGS-IUYQGCFVSA-N
|
|
26
|
+
"Sedoheptulose-1,7-bisphosphate",C7H15O13P,O=C(COP(=O)(O)O)[C@@H](O)[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,OKHXOUGRECCASI-SHUUEZRQSA-N
|
|
27
|
+
Glycerol-3-phosphate,C3H9O6P,O=P(O)(O)OCC(O)CO,AWUCVROLDVIAJX-UHFFFAOYSA-N
|
|
28
|
+
Glycerate,C3H9O,O=C(O)C(O)CO,RBNPOMFGQQGHHO-UHFFFAOYSA-M
|
|
29
|
+
Pentose,C5H9O,OC1COC(O)C(O)C1O,SRBFZHDQGSBBOR-UHFFFAOYSA-N
|
|
30
|
+
Acetaldehyde,C2H4O,CC=O,IKHGUXGNUITLKF-UHFFFAOYSA-N
|
|
31
|
+
Acetic acid,C2H3O,CC(=O)O,QTBSBXVTEAMEQO-UHFFFAOYSA-N
|
|
32
|
+
Alanine,C3H6NO,C[C@H](N)C(=O)O,QNAYBMKLOCPYGJ-REOHCLBHSA-N
|
|
33
|
+
Arginine,C6H13N4O,N=C(N)NCCC[C@H](N)C(=O)O,ODKSFYDXXFIFQN-BYPYZUCNSA-N
|
|
34
|
+
Asparagine,C4H7N2O,N=C(O)C[C@H](N)C(=O)O,DCXYFEDJOCDNAF-REOHCLBHSA-N
|
|
35
|
+
Aspartic acid,C4H6NO,N[C@@H](CC(=O)O)C(=O)O,CKLJMWTZIZZHCS-REOHCLBHSA-N
|
|
36
|
+
Cysteine,C3H7NO2S,N[C@@H](CS)C(=O)O,XUJNEKJLAYXESH-REOHCLBHSA-N
|
|
37
|
+
Glutamic acid,C5H8NO,N[C@@H](CCC(=O)O)C(=O)O,WHUUTDBJXJRKMK-VKHMYHEASA-N
|
|
38
|
+
Glutamine,C5H9N2O,N=C(O)CC[C@H](N)C(=O)O,ZDXPYRJPNDTMRX-VKHMYHEASA-N
|
|
39
|
+
Glycine,C2H4NO,NCC(=O)O,DHMQDGOQFOQNFH-UHFFFAOYSA-N
|
|
40
|
+
Histidine,C6H8N3O,N[C@@H](Cc1cnc[nH]1)C(=O)O,HNDVDQJCIGZPNO-YFKPBYRVSA-N
|
|
41
|
+
Isoleucine,C6H12NO,CC[C@H](C)[C@H](N)C(=O)O,AGPKZVBTJJNPAG-WHFBIAKZSA-N
|
|
42
|
+
Leucine,C6H12NO,CC(C)C[C@H](N)C(=O)O,ROHFNLRQFUQHCH-YFKPBYRVSA-N
|
|
43
|
+
Lysine,C6H13N2O,NCCCC[C@H](N)C(=O)O,KDXKERNSBIXSRK-YFKPBYRVSA-N
|
|
44
|
+
Methionine,C5H11NO2S,CSCC[C@H](N)C(=O)O,FFEARJCKVFRZRR-BYPYZUCNSA-N
|
|
45
|
+
Phenylalanine,C9H10NO,N[C@@H](Cc1ccccc1)C(=O)O,COLNVLDHVKWLRT-QMMMGPOBSA-N
|
|
46
|
+
Proline,C5H8NO,O=C(O)[C@@H]1CCCN1,ONIBWKKTOPOVIA-BYPYZUCNSA-N
|
|
47
|
+
Serine,C3H6NO,N[C@@H](CO)C(=O)O,MTCFGRXMJLQNBG-REOHCLBHSA-N
|
|
48
|
+
Threonine,C4H8NO,C[C@@H](O)[C@H](N)C(=O)O,AYFVYJQAPQTCCC-GBXIJSLDSA-N
|
|
49
|
+
Tryptophan,C11H11N2O,N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O,QIVBCDIJIAJPQS-VIFPVBQESA-N
|
|
50
|
+
Tyrosine,C9H10NO,N[C@@H](Cc1ccc(O)cc1)C(=O)O,OUYCCCASQSFEME-QMMMGPOBSA-N
|
|
51
|
+
Valine,C5H10NO,CC(C)[C@H](N)C(=O)O,KZSNJWFQEVHDMF-BYPYZUCNSA-N
|
|
52
|
+
Ornithine,C5H11N2O,NCCC[C@H](N)C(=O)O,AHLPHDHHMVZTML-BYPYZUCNSA-N
|
|
53
|
+
Citrulline,C6H12N3O,N=C(O)NCCC[C@H](N)C(=O)O,RHGKLRLOHDJJDR-BYPYZUCNSA-N
|
|
54
|
+
Homocysteine,C4H9NO2S,N[C@@H](CCS)C(=O)O,FFFHZYDWPBMWHY-VKHMYHEASA-N
|
|
55
|
+
S-adenosylmethionine,C15H22N6O5S,C[S](CC[C@H](N)C(=O)O)C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,MEFKEPWMEQBLKI-AIRLBKTGSA-N
|
|
56
|
+
S-adenosylhomocysteine,C14H20N6O5S,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,ZJUKTBDSGOFHSH-WFMPWKQPSA-N
|
|
57
|
+
Formic acid,CHO,O=CO,BDAGIHXWWSANSR-UHFFFAOYSA-N
|
|
58
|
+
Propionic acid,C3H5O,CCC(=O)O,XBDQKXXYIPTUBI-UHFFFAOYSA-N
|
|
59
|
+
Butyric acid,C4H7O,CCCC(=O)O,FERIUCNNQQJTOY-UHFFFAOYSA-N
|
|
60
|
+
Malonic acid,C3H3O,O=C(O)CC(=O)O,OFOBLEOULBTSOW-UHFFFAOYSA-N
|
|
61
|
+
2-Hydroxyglutarate,C5H7O,O=C(O)CCC(O)C(=O)O,HWXBTNAVRSUOJR-UHFFFAOYSA-N
|
|
62
|
+
3-Hydroxybutyrate,C4H10O,CC(O)CC(=O)O,WHBMMWSBFZVSSR-UHFFFAOYSA-M
|
|
63
|
+
Acetoacetate,C4H8O,CC(=O)CC(=O)O,WDJHALXBUFZDSR-UHFFFAOYSA-M
|
|
64
|
+
Beta-hydroxybutyrate,C4H7O,CC(O)CC(=O)O,WHBMMWSBFZVSSR-UHFFFAOYSA-N
|
|
65
|
+
Pyruvic acid,C3H3O,CC(=O)C(=O)O,LCTONWCANYUPML-UHFFFAOYSA-N
|
|
66
|
+
Lactic acid,C3H5O,CC(O)C(=O)O,JVTAAEKCZFNVCJ-UHFFFAOYSA-N
|
|
67
|
+
Myristic acid,C14H27O,CCCCCCCCCCCCCC(=O)O,TUNFSRHWOTWDNC-UHFFFAOYSA-N
|
|
68
|
+
Palmitic acid,C16H31O,CCCCCCCCCCCCCCCC(=O)O,IPCSVZSSVZVIGE-UHFFFAOYSA-N
|
|
69
|
+
Stearic acid,C18H35O,CCCCCCCCCCCCCCCCCC(=O)O,QIQXTHQIDYTFRH-UHFFFAOYSA-N
|
|
70
|
+
Palmitoleic acid,C16H29O,CCCCCC/C=C\CCCCCCCC(=O)O,SECPZKHBENQXJG-FPLPWBNLSA-N
|
|
71
|
+
Oleic acid,C18H33O,CCCCCCCC/C=C\CCCCCCCC(=O)O,ZQPPMHVWECSIRJ-KTKRTIGZSA-N
|
|
72
|
+
Linoleic acid,C18H31O,CCCCC/C=C\C/C=C\CCCCCCCC(=O)O,OYHQOLUKZRVURQ-HZJYTTRNSA-N
|
|
73
|
+
Alpha-linolenic acid,C18H29O,CC/C=C\C/C=C\C/C=C\CCCCCCCC(=O)O,DTOSIQBPPRVQHS-PDBXOOCHSA-N
|
|
74
|
+
Arachidonic acid,C20H31O,CCCCC/C=C\C/C=C\C/C=C\C/C=C\CCCC(=O)O,YZXBAPSDXZZRGB-DOFZRALJSA-N
|
|
75
|
+
Adenine,C5H4N,Nc1nc[nH]c2ncnc1-2,GFFGJBXGBJISGV-UHFFFAOYSA-N
|
|
76
|
+
Guanine,C5H5N5O,N=c1nc(O)c2nc[nH]c2[nH]1,UYTPUPDQBNUYGX-UHFFFAOYSA-N
|
|
77
|
+
Cytosine,C4H5N3O,N=c1ccnc(O)[nH]1,OPTASPLRGRRNAP-UHFFFAOYSA-N
|
|
78
|
+
Thymine,C5H5N2O,Cc1cnc(O)nc1O,RWQNBRDOKXIBIV-UHFFFAOYSA-N
|
|
79
|
+
Uracil,C4H3N2O,Oc1ccnc(O)n1,ISAKRJDGNUQOIC-UHFFFAOYSA-N
|
|
80
|
+
Adenosine,C10H12N5O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,OIRDTQYFTABQOQ-KQYNXXCUSA-N
|
|
81
|
+
Guanosine,C10H12N5O,N=c1nc(O)c2ncn([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O)c2[nH]1,NYHBQMYGNKIUIF-UUOKFMHZSA-N
|
|
82
|
+
Cytidine,C9H12N3O,N=c1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(O)n1,UHDGCWIWMRVCDJ-XVFCMESISA-N
|
|
83
|
+
Uridine,C9H11N2O,O=c1nc(O)ccn1[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,DRTQHJPVMGBUCF-XVFCMESISA-N
|
|
84
|
+
AMP,C10H14N5O7P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O,UDMBCSSLTHHNCD-KQYNXXCUSA-N
|
|
85
|
+
ADP,C10H14N5O10P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,XTWYTFMLZFPYCI-KQYNXXCUSA-N
|
|
86
|
+
ATP,C10H15N5O13P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,ZKHQWZAMYRWXGA-KQYNXXCUSA-N
|
|
87
|
+
GMP,C10H14N5O8P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,RQFCJASXJCIDSX-UUOKFMHZSA-N
|
|
88
|
+
GDP,C10H14N5O11P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,QGWNDRXFNXRZMB-UUOKFMHZSA-N
|
|
89
|
+
GTP,C10H15N5O14P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,XKMLYUALXHKNFT-UUOKFMHZSA-N
|
|
90
|
+
CMP,C9H14N3O8P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,IERHLVCPSMICTF-XVFCMESISA-N
|
|
91
|
+
CDP,C9H14N3O11P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,ZWIADYZPOWUWEW-XVFCMESISA-N
|
|
92
|
+
CTP,C9H15N3O14P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,PCDQPRRSZKQHHS-XVFCMESISA-N
|
|
93
|
+
UMP,C9H13N2O9P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O,DJJCXFVJDGTHFX-XVFCMESISA-N
|
|
94
|
+
UDP,C9H13N2O12P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,XCCTYIAWTASOJW-XVFCMESISA-N
|
|
95
|
+
UTP,C9H14N2O15P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,PGAVKCOVUIYSFO-XVFCMESISA-N
|
|
96
|
+
NAD+,C21H26N7O14P,N=C(O)C1CCCN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,BAWFJGJZGIEFAR-NNYOXOHSSA-N
|
|
97
|
+
NADH,C21H28N7O14P,N=C(O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,BOPGDPNILDQYTO-NNYOXOHSSA-N
|
|
98
|
+
NADP+,C21H27N7O17P,N=C(O)C1CCCN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,XJLXINKUBYWONI-NNYOXOHSSA-N
|
|
99
|
+
NADPH,C21H29N7O17P,N=C(O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,ACFIXJIJDZMPPO-NNYOXOHSSA-N
|
|
100
|
+
FAD,C27H32N9O15P,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)c2cc1C,VWWQXMAJTJZDQX-UYBVJOGSSA-N
|
|
101
|
+
FMN,C17H21N4O9P,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c2cc1C,FVTCRASFADXXNN-SCRDCRAPSA-N
|
|
102
|
+
Coenzyme A,C21H36N7O16P3S,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(O)=NCCC(O)=NCCS,RGJOEKWQDUBAIZ-IBOSZNHHSA-N
|
|
103
|
+
Pantothenic acid,C9H16NO,CC(C)(CO)[C@@H](O)C(O)=NCCC(=O)O,GHOKWGTUZJEAQD-ZETCQYMHSA-N
|
|
104
|
+
Riboflavin,C17H19N4O,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C,AUNGANRZJHBGPY-SCRDCRAPSA-N
|
|
105
|
+
Niacin,C6H4NO,O=C(O)c1cccnc1,PVNIIMVLHYAWGP-UHFFFAOYSA-N
|
|
106
|
+
Fructose,C6H11O,OCC1(O)OC[C@@H](O)[C@@H](O)[C@@H]1O,LKDRXBCSQODPBY-VRPWFDPXSA-N
|
|
107
|
+
Mannose,C6H11O,OC[C@H]1OC(O)[C@@H](O)[C@@H](O)[C@@H]1O,WQZGKKKJIJFFOK-QTVWNMPRSA-N
|
|
108
|
+
Mannose-6-phosphate,C6H13O9P,O=C[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,VFRROHXSMXFLSN-KVTDHHQDSA-N
|
|
109
|
+
Ribose,C5H9O,OC1OC[C@@H](O)[C@@H](O)[C@H]1O,SRBFZHDQGSBBOR-SOOFDHNKSA-N
|
|
110
|
+
Glucosamine,C6H12NO,N[C@H]1C(O)O[C@H](CO)[C@@H](O)[C@@H]1O,MSWZFWKMSRAUBD-IVMDWMLBSA-N
|
|
111
|
+
N-acetylglucosamine,C8H14NO,CC(O)=N[C@@H](C=O)[C@@H](O)[C@H](O)[C@H](O)CO,MBLBDJOUHNCFQT-LXGUWJNJSA-N
|
|
112
|
+
Choline,C5H13NO,[H]OC([H])([H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],OEYIOHPDSNJKLS-UHFFFAOYSA-N
|
|
113
|
+
Betaine,C5H10NO,[H]OC(=O)C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],KWIUHFFTVRNATP-UHFFFAOYSA-N
|
|
114
|
+
Carnitine,C7H14NO,[H]OC(=O)C([H])([H])C([H])(O[H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],PHIQHXFUZVPYII-UHFFFAOYSA-N
|
|
115
|
+
Phosphocholine,C5H14NO4P,[H]OP(=O)(O[H])OC([H])([H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],YHHSONZFOIEMCP-UHFFFAOYSA-O
|
|
116
|
+
Glycerol,C3H7O,OCC(O)CO,PEDCQBHIVMGVHV-UHFFFAOYSA-N
|
|
117
|
+
Sorbitol,C6H13O,OC[C@@H](O)[C@@H](O)[C@H](O)[C@@H](O)CO,FBPFZTCFMRRESA-JGWLITMVSA-N
|
|
118
|
+
Inositol,C6H11O,OC1C(O)C(O)C(O)C(O)C1O,CDAISMWEOUEBRE-UHFFFAOYSA-N
|
|
119
|
+
Cholesterol,C27H46O,CC(C)CCC[C@@H](C)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,HVYWMOMLDIMFJA-DPAQBDIFSA-N
|
|
120
|
+
Pantothenate,C9H21NO,CC(C)(CO)C(O)C(O)=NCCC(=O)O,GHOKWGTUZJEAQD-UHFFFAOYSA-M
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""Generate a CSV of human urine metabolites.
|
|
2
|
+
|
|
3
|
+
This improved script attempts to:
|
|
4
|
+
- Download or scrape a urine metabolite list from the UrineMetabolome downloads page.
|
|
5
|
+
- Fall back to HMDB scraping or a curated list if needed.
|
|
6
|
+
- Resolve formula/SMILES/InChIKey using PubChem with bounded parallelism.
|
|
7
|
+
- Use RDKit (if available) to convert InChI -> SMILES when PubChem does not provide SMILES.
|
|
8
|
+
|
|
9
|
+
The goal is robust coverage and faster lookups by parallelizing per-name queries
|
|
10
|
+
while avoiding aggressive parallelism that might overload PubChem.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import csv
|
|
16
|
+
import sys
|
|
17
|
+
import re
|
|
18
|
+
import time
|
|
19
|
+
import os
|
|
20
|
+
from urllib.parse import quote, urljoin
|
|
21
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
22
|
+
from typing import Iterable
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
import requests
|
|
26
|
+
from bs4 import BeautifulSoup
|
|
27
|
+
except Exception:
|
|
28
|
+
requests = None
|
|
29
|
+
BeautifulSoup = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
URINEMETABOLOME_DOWNLOADS = "https://www.urinemetabolome.ca/downloads"
|
|
33
|
+
HMDB_URINE_LIST_URL = "https://hmdb.ca/metabolites?utf8=%E2%9C%93&search=&biological_context=Urine"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def normalize_name(name: str) -> str:
|
|
37
|
+
if not name:
|
|
38
|
+
return name
|
|
39
|
+
s = name
|
|
40
|
+
s = re.sub(r"\(.*?\)", "", s)
|
|
41
|
+
s = s.replace("➔", "->").replace("–", "-").replace("—", "-")
|
|
42
|
+
s = re.sub(r"\s+", " ", s).strip()
|
|
43
|
+
mapping = {
|
|
44
|
+
"AMP": "Adenosine monophosphate",
|
|
45
|
+
"ADP": "Adenosine diphosphate",
|
|
46
|
+
"ATP": "Adenosine triphosphate",
|
|
47
|
+
"GMP": "Guanosine monophosphate",
|
|
48
|
+
"GDP": "Guanosine diphosphate",
|
|
49
|
+
"GTP": "Guanosine triphosphate",
|
|
50
|
+
"NAD+": "Nicotinamide adenine dinucleotide",
|
|
51
|
+
"NADH": "Nicotinamide adenine dinucleotide (reduced)",
|
|
52
|
+
}
|
|
53
|
+
up = s.upper()
|
|
54
|
+
if up in mapping:
|
|
55
|
+
return mapping[up]
|
|
56
|
+
return s
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def fetch_urinemetabolome_names(limit: int = 2000) -> list[str]:
|
|
60
|
+
"""Scrape the UrineMetabolome downloads page for any downloadable metabolite lists.
|
|
61
|
+
|
|
62
|
+
Best-effort: finds links on the downloads page that look like CSV/TSV/Excel and tries
|
|
63
|
+
to parse a simple name column. If anything fails, returns an empty list and the
|
|
64
|
+
caller should fall back to HMDB or a curated list.
|
|
65
|
+
"""
|
|
66
|
+
if requests is None or BeautifulSoup is None:
|
|
67
|
+
return []
|
|
68
|
+
try:
|
|
69
|
+
r = requests.get(URINEMETABOLOME_DOWNLOADS, timeout=15)
|
|
70
|
+
r.raise_for_status()
|
|
71
|
+
soup = BeautifulSoup(r.text, "html.parser")
|
|
72
|
+
names = []
|
|
73
|
+
for a in soup.find_all("a", href=True):
|
|
74
|
+
href = a["href"]
|
|
75
|
+
if re.search(r"\.csv$|\.tsv$|\.xlsx?$", href, re.I):
|
|
76
|
+
url = urljoin(URINEMETABOLOME_DOWNLOADS, href)
|
|
77
|
+
# try to download and parse simple CSV/TSV
|
|
78
|
+
try:
|
|
79
|
+
rr = requests.get(url, timeout=20)
|
|
80
|
+
if rr.status_code != 200:
|
|
81
|
+
continue
|
|
82
|
+
text = rr.content.decode("utf-8", errors="ignore")
|
|
83
|
+
# try CSV/TSV parse by splitting lines and looking for a header with 'name' or 'metabolite'
|
|
84
|
+
lines = [l.strip() for l in text.splitlines() if l.strip()]
|
|
85
|
+
if not lines:
|
|
86
|
+
continue
|
|
87
|
+
sep = "," if "," in lines[0] else "\t"
|
|
88
|
+
header = [c.strip().lower() for c in lines[0].split(sep)]
|
|
89
|
+
# find candidate column
|
|
90
|
+
col_idx = None
|
|
91
|
+
for i, c in enumerate(header):
|
|
92
|
+
if any(k in c for k in ("name", "metabolite", "compound")):
|
|
93
|
+
col_idx = i
|
|
94
|
+
break
|
|
95
|
+
if col_idx is None:
|
|
96
|
+
# fallback: take first column
|
|
97
|
+
col_idx = 0
|
|
98
|
+
for l in lines[1:limit+1]:
|
|
99
|
+
parts = [p.strip() for p in l.split(sep)]
|
|
100
|
+
if len(parts) > col_idx:
|
|
101
|
+
n = parts[col_idx]
|
|
102
|
+
if n and n not in names:
|
|
103
|
+
names.append(n)
|
|
104
|
+
if len(names) >= limit:
|
|
105
|
+
break
|
|
106
|
+
if names:
|
|
107
|
+
return names
|
|
108
|
+
except Exception:
|
|
109
|
+
continue
|
|
110
|
+
return []
|
|
111
|
+
except Exception:
|
|
112
|
+
return []
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def fetch_hmdb_urine_names(limit: int = 500) -> list[str]:
|
|
116
|
+
"""Fallback HMDB scrape (best-effort)."""
|
|
117
|
+
if requests is None or BeautifulSoup is None:
|
|
118
|
+
return []
|
|
119
|
+
try:
|
|
120
|
+
r = requests.get(HMDB_URINE_LIST_URL, timeout=20)
|
|
121
|
+
r.raise_for_status()
|
|
122
|
+
soup = BeautifulSoup(r.text, "html.parser")
|
|
123
|
+
names = []
|
|
124
|
+
for a in soup.find_all("a", href=True):
|
|
125
|
+
href = a["href"]
|
|
126
|
+
if re.search(r"/metabolites/HMDB", href):
|
|
127
|
+
text = a.get_text(strip=True)
|
|
128
|
+
if text and len(text) > 1:
|
|
129
|
+
names.append(text)
|
|
130
|
+
if len(names) >= limit:
|
|
131
|
+
break
|
|
132
|
+
return list(dict.fromkeys(names))
|
|
133
|
+
except Exception:
|
|
134
|
+
return []
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def fetch_pubchem_name_once(name: str, timeout: int = 15):
|
|
138
|
+
"""Fetch properties for a single name from PubChem and try inchI->SMILES if needed.
|
|
139
|
+
|
|
140
|
+
Returns (formula, smiles, inchikey) or (None, None, None) on failure.
|
|
141
|
+
"""
|
|
142
|
+
if requests is None:
|
|
143
|
+
return (None, None, None)
|
|
144
|
+
q = normalize_name(name)
|
|
145
|
+
url_name = quote(q)
|
|
146
|
+
url = (
|
|
147
|
+
f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{url_name}/property/"
|
|
148
|
+
+ "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
|
|
149
|
+
)
|
|
150
|
+
try:
|
|
151
|
+
r = requests.get(url, timeout=timeout)
|
|
152
|
+
if r.status_code != 200:
|
|
153
|
+
return (None, None, None)
|
|
154
|
+
j = r.json()
|
|
155
|
+
if "PropertyTable" in j and "Properties" in j["PropertyTable"]:
|
|
156
|
+
p = j["PropertyTable"]["Properties"][0]
|
|
157
|
+
formula = p.get("MolecularFormula")
|
|
158
|
+
smiles = p.get("CanonicalSMILES")
|
|
159
|
+
inchi = p.get("InChI")
|
|
160
|
+
inchikey = p.get("InChIKey")
|
|
161
|
+
if not smiles and inchi:
|
|
162
|
+
# try RDKit conversion
|
|
163
|
+
try:
|
|
164
|
+
from rdkit import Chem
|
|
165
|
+
m = Chem.MolFromInchi(inchi)
|
|
166
|
+
if m is not None:
|
|
167
|
+
try:
|
|
168
|
+
Chem.SanitizeMol(m)
|
|
169
|
+
except Exception:
|
|
170
|
+
pass
|
|
171
|
+
smiles = Chem.MolToSmiles(m, isomericSmiles=True)
|
|
172
|
+
except Exception:
|
|
173
|
+
pass
|
|
174
|
+
|
|
175
|
+
# If still missing SMILES, and we have an InChIKey, try inchikey -> property
|
|
176
|
+
if not smiles and inchikey:
|
|
177
|
+
try:
|
|
178
|
+
ik = quote(inchikey)
|
|
179
|
+
url2 = (
|
|
180
|
+
f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{ik}/property/"
|
|
181
|
+
+ "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
|
|
182
|
+
)
|
|
183
|
+
r2 = requests.get(url2, timeout=timeout)
|
|
184
|
+
if r2.status_code == 200:
|
|
185
|
+
j2 = r2.json()
|
|
186
|
+
if "PropertyTable" in j2 and "Properties" in j2["PropertyTable"]:
|
|
187
|
+
p2 = j2["PropertyTable"]["Properties"][0]
|
|
188
|
+
smiles = p2.get("CanonicalSMILES") or smiles
|
|
189
|
+
formula = formula or p2.get("MolecularFormula")
|
|
190
|
+
except Exception:
|
|
191
|
+
pass
|
|
192
|
+
|
|
193
|
+
# Final fallback: inchikey -> cids -> cid -> property
|
|
194
|
+
if not smiles and inchikey:
|
|
195
|
+
try:
|
|
196
|
+
ik = quote(inchikey)
|
|
197
|
+
urlc = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{ik}/cids/JSON"
|
|
198
|
+
rc = requests.get(urlc, timeout=timeout)
|
|
199
|
+
if rc.status_code == 200:
|
|
200
|
+
jc = rc.json()
|
|
201
|
+
if "IdentifierList" in jc and "CID" in jc["IdentifierList"] and jc["IdentifierList"]["CID"]:
|
|
202
|
+
cid = jc["IdentifierList"]["CID"][0]
|
|
203
|
+
try:
|
|
204
|
+
url3 = (
|
|
205
|
+
f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/property/"
|
|
206
|
+
+ "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
|
|
207
|
+
)
|
|
208
|
+
r3 = requests.get(url3, timeout=timeout)
|
|
209
|
+
if r3.status_code == 200:
|
|
210
|
+
j3 = r3.json()
|
|
211
|
+
if "PropertyTable" in j3 and "Properties" in j3["PropertyTable"]:
|
|
212
|
+
p3 = j3["PropertyTable"]["Properties"][0]
|
|
213
|
+
smiles = p3.get("CanonicalSMILES") or smiles
|
|
214
|
+
formula = formula or p3.get("MolecularFormula")
|
|
215
|
+
except Exception:
|
|
216
|
+
pass
|
|
217
|
+
except Exception:
|
|
218
|
+
pass
|
|
219
|
+
|
|
220
|
+
return (formula, smiles, inchikey)
|
|
221
|
+
except Exception:
|
|
222
|
+
return (None, None, None)
|
|
223
|
+
return (None, None, None)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def fetch_pubchem_for_names(names: Iterable[str], workers: int = 8, delay: float = 0.05):
|
|
227
|
+
"""Resolve a list of names via PubChem in parallel.
|
|
228
|
+
|
|
229
|
+
Returns dict name -> (formula, smiles, inchikey)
|
|
230
|
+
"""
|
|
231
|
+
names = list(names)
|
|
232
|
+
results: dict[str, tuple | None] = {}
|
|
233
|
+
with ThreadPoolExecutor(max_workers=workers) as ex:
|
|
234
|
+
futures = {ex.submit(fetch_pubchem_name_once, n): n for n in names}
|
|
235
|
+
for fut in as_completed(futures):
|
|
236
|
+
n = futures[fut]
|
|
237
|
+
try:
|
|
238
|
+
res = fut.result()
|
|
239
|
+
except Exception:
|
|
240
|
+
res = (None, None, None)
|
|
241
|
+
results[n] = res
|
|
242
|
+
time.sleep(delay) # polite small delay between completions
|
|
243
|
+
return results
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def generate_csv(out_path: str = "urine_metabolites.csv", workers: int = 8):
|
|
247
|
+
# Try UrineMetabolome downloads first
|
|
248
|
+
names = fetch_urinemetabolome_names()
|
|
249
|
+
if not names:
|
|
250
|
+
names = fetch_hmdb_urine_names()
|
|
251
|
+
if not names:
|
|
252
|
+
print("Falling back to curated urine list")
|
|
253
|
+
names = [
|
|
254
|
+
"Creatinine",
|
|
255
|
+
"Urea",
|
|
256
|
+
"Hippuric acid",
|
|
257
|
+
"Citrate",
|
|
258
|
+
"Creatine",
|
|
259
|
+
"Glycine",
|
|
260
|
+
"Taurine",
|
|
261
|
+
"Succinate",
|
|
262
|
+
"Fumaric acid",
|
|
263
|
+
"Malic acid",
|
|
264
|
+
"Lactic acid",
|
|
265
|
+
"Acetic acid",
|
|
266
|
+
"Formic acid",
|
|
267
|
+
"Alanine",
|
|
268
|
+
"Betaine",
|
|
269
|
+
"Choline",
|
|
270
|
+
"Trimethylamine N-oxide",
|
|
271
|
+
"Phenylacetylglutamine",
|
|
272
|
+
"p-Cresol sulfate",
|
|
273
|
+
"Indoxyl sulfate",
|
|
274
|
+
"Uric acid",
|
|
275
|
+
"Xanthine",
|
|
276
|
+
"3-Hydroxybutyrate",
|
|
277
|
+
"Acetoacetate",
|
|
278
|
+
"N-Acetylneuraminic acid",
|
|
279
|
+
]
|
|
280
|
+
|
|
281
|
+
print(f"Resolving {len(names)} names via PubChem (workers={workers})...")
|
|
282
|
+
mapping = fetch_pubchem_for_names(names, workers=workers)
|
|
283
|
+
|
|
284
|
+
rows = []
|
|
285
|
+
for name in names:
|
|
286
|
+
formula, smiles, inchikey = mapping.get(name, (None, None, None))
|
|
287
|
+
rows.append({
|
|
288
|
+
"Name": name,
|
|
289
|
+
"Formula": formula or "",
|
|
290
|
+
"SMILES": smiles or "",
|
|
291
|
+
"InChIKey": inchikey or "",
|
|
292
|
+
})
|
|
293
|
+
|
|
294
|
+
# Ensure output directory exists
|
|
295
|
+
out_dir = os.path.join("masster", "data", "libs")
|
|
296
|
+
os.makedirs(out_dir, exist_ok=True)
|
|
297
|
+
out_path_full = os.path.join(out_dir, os.path.basename(out_path))
|
|
298
|
+
|
|
299
|
+
fieldnames = ["Name", "Formula", "SMILES", "InChIKey"]
|
|
300
|
+
with open(out_path_full, "w", newline="", encoding="utf-8") as f:
|
|
301
|
+
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
302
|
+
writer.writeheader()
|
|
303
|
+
for r in rows:
|
|
304
|
+
writer.writerow(r)
|
|
305
|
+
|
|
306
|
+
print(f"Wrote {len(rows)} entries to {out_path_full}")
|
|
307
|
+
return out_path_full
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def test_load_with_lib(csv_path: str):
|
|
311
|
+
try:
|
|
312
|
+
from masster.lib import Lib
|
|
313
|
+
except Exception as e:
|
|
314
|
+
print(f"Cannot import masster.lib.Lib: {e}")
|
|
315
|
+
return False
|
|
316
|
+
|
|
317
|
+
try:
|
|
318
|
+
lib = Lib()
|
|
319
|
+
lib.import_csv(csv_path, polarity=None)
|
|
320
|
+
print(f"Lib loaded: {len(lib)} entries")
|
|
321
|
+
return True
|
|
322
|
+
except Exception as e:
|
|
323
|
+
print(f"Failed to load CSV with Lib.import_csv: {e}")
|
|
324
|
+
return False
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
if __name__ == "__main__":
|
|
328
|
+
csv_file = generate_csv()
|
|
329
|
+
ok = test_load_with_lib(csv_file)
|
|
330
|
+
if not ok:
|
|
331
|
+
print("Test failed; please inspect messages above.")
|
|
332
|
+
sys.exit(2)
|
|
333
|
+
print("Done.")
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
Name,Formula,SMILES,InChIKey
|
|
2
|
+
HMDB0000001,,,
|
|
3
|
+
1-Methylhistidine,C7H11N3O2,Cn1cnc(C[C@H](N)C(=O)O)c1,BRMWTNUJHUMWMS-LURJTMIESA-N
|
|
4
|
+
HMDB0000002,,,
|
|
5
|
+
"1,3-Diaminopropane",C3H10N2,NCCCN,XFNJVJPLKCPIBV-UHFFFAOYSA-N
|
|
6
|
+
HMDB0000005,,,
|
|
7
|
+
2-Ketobutyric acid,C4H6O3,CCC(=O)C(=O)O,TYEYBOSBBBHJIV-UHFFFAOYSA-N
|
|
8
|
+
HMDB0000008,,,
|
|
9
|
+
2-Hydroxybutyric acid,C4H8O3,CCC(O)C(=O)O,AFENDNXGAFYKQO-UHFFFAOYSA-N
|
|
10
|
+
HMDB0000010,,,
|
|
11
|
+
2-Methoxyestrone,,,
|
|
12
|
+
HMDB0000011,,,
|
|
13
|
+
3-Hydroxybutyric acid,,,
|
|
14
|
+
HMDB0000012,,,
|
|
15
|
+
Deoxyuridine,,,
|
|
16
|
+
HMDB0000014,,,
|
|
17
|
+
Deoxycytidine,,,
|
|
18
|
+
HMDB0000015,,,
|
|
19
|
+
Cortexolone,,,
|
|
20
|
+
HMDB0000016,,,
|
|
21
|
+
Deoxycorticosterone,,,
|
|
22
|
+
HMDB0000017,,,
|
|
23
|
+
4-Pyridoxic acid,C8H9NO4,Cc1ncc(CO)c(C(=O)O)c1O,HXACOUQIXZGNBF-UHFFFAOYSA-N
|
|
24
|
+
HMDB0000019,,,
|
|
25
|
+
alpha-Ketoisovaleric acid,C5H8O3,CC(C)C(=O)C(=O)O,QHKABHOOEWYVLI-UHFFFAOYSA-N
|
|
26
|
+
HMDB0000020,,,
|
|
27
|
+
p-Hydroxyphenylacetic acid,C8H8O3,O=C(O)Cc1ccc(O)cc1,XQXPVVBIMDBYFF-UHFFFAOYSA-N
|
|
28
|
+
HMDB0000021,,,
|
|
29
|
+
Iodotyrosine,C9H10INO3,N[C@@H](Cc1ccc(O)c(I)c1)C(=O)O,UQTZMGFTRHFAAM-ZETCQYMHSA-N
|
|
30
|
+
HMDB0000022,,,
|
|
31
|
+
3-Methoxytyramine,C9H13NO2,COc1cc(CCN)ccc1O,DIVQKHQLANKJQO-UHFFFAOYSA-N
|
|
32
|
+
HMDB0000023,,,
|
|
33
|
+
(S)-3-Hydroxyisobutyric acid,,,
|
|
34
|
+
HMDB0000024,,,
|
|
35
|
+
3-O-Sulfogalactosylceramide (d18:1/24:0),,,
|
|
36
|
+
HMDB0000026,,,
|
|
37
|
+
Ureidopropionic acid,,,
|
|
38
|
+
HMDB0000027,,,
|
|
39
|
+
Tetrahydrobiopterin,,,
|
|
40
|
+
HMDB0000030,,,
|
|
41
|
+
Biotin,C10H16N2O3S,O=C(O)CCCC[C@@H]1SC[C@@H]2N=C(O)N[C@@H]21,YBJHBAHKTGYVGT-ZKWXMUAHSA-N
|
|
42
|
+
HMDB0000031,,,
|
|
43
|
+
Androsterone,C19H30O2,C[C@]12CC[C@@H](O)C[C@@H]1CC[C@@H]1[C@@H]2CC[C@]2(C)C(=O)CC[C@@H]12,QGXBDMJGAMFCBF-HLUDHZFRSA-N
|
|
44
|
+
HMDB0000032,,,
|
|
45
|
+
7-Dehydrocholesterol,C27H44O,CC(C)CCC[C@@H](C)[C@H]1CC[C@H]2C3=CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,UCTLRSWJYQTBFZ-DDPQNLDTSA-N
|
|
46
|
+
HMDB0000033,,,
|
|
47
|
+
Carnosine,C9H14N4O3,NCCC(O)=N[C@@H](Cc1cnc[nH]1)C(=O)O,CQOVPNPJLQNMDC-ZETCQYMHSA-N
|
|
48
|
+
HMDB0000034,,,
|
|
49
|
+
Adenine,C5H5N5,Nc1nc[nH]c2ncnc1-2,GFFGJBXGBJISGV-UHFFFAOYSA-N
|
|
50
|
+
HMDB0000036,,,
|
|
51
|
+
Taurocholic acid,,,
|
masster/sample/h5.py
CHANGED
|
@@ -900,7 +900,7 @@ def _load_sample5(self, filename: str, map: bool = True):
|
|
|
900
900
|
def _load_sample5_study(self, filename: str, map: bool = True):
|
|
901
901
|
"""
|
|
902
902
|
Optimized variant of _load_sample5 for study loading that skips reading ms1_df.
|
|
903
|
-
|
|
903
|
+
|
|
904
904
|
This is used when adding samples to studies where ms1_df data is not needed,
|
|
905
905
|
improving loading throughput by skipping the potentially large ms1_df dataset.
|
|
906
906
|
|
masster/sample/helpers.py
CHANGED
|
@@ -176,7 +176,7 @@ def _get_feature_uids(self, features=None, verbose=True):
|
|
|
176
176
|
if not isinstance(features, pd.DataFrame):
|
|
177
177
|
if verbose:
|
|
178
178
|
self.logger.error(
|
|
179
|
-
"Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame."
|
|
179
|
+
"Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame.",
|
|
180
180
|
)
|
|
181
181
|
return []
|
|
182
182
|
|
|
@@ -298,7 +298,7 @@ def get_eic(self, mz, mz_tol=None):
|
|
|
298
298
|
"""
|
|
299
299
|
# Use default mz_tol from sample parameters if not provided
|
|
300
300
|
if mz_tol is None:
|
|
301
|
-
if hasattr(self,
|
|
301
|
+
if hasattr(self, "parameters") and hasattr(self.parameters, "eic_mz_tol"):
|
|
302
302
|
mz_tol = self.parameters.eic_mz_tol
|
|
303
303
|
else:
|
|
304
304
|
mz_tol = 0.01 # fallback default
|
|
@@ -323,11 +323,7 @@ def get_eic(self, mz, mz_tol=None):
|
|
|
323
323
|
return None
|
|
324
324
|
|
|
325
325
|
# Aggregate intensities per retention time. Use sum in case multiple points per rt.
|
|
326
|
-
chrom = (
|
|
327
|
-
matches.group_by("rt")
|
|
328
|
-
.agg([pl.col("inty").sum().alias("inty")])
|
|
329
|
-
.sort("rt")
|
|
330
|
-
)
|
|
326
|
+
chrom = matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
|
|
331
327
|
|
|
332
328
|
# Attach to Sample
|
|
333
329
|
self.chrom_df = chrom
|
masster/sample/lib.py
CHANGED
|
@@ -1,28 +1,32 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
lib.py
|
|
3
3
|
|
|
4
|
-
This module provides
|
|
5
|
-
It contains core functionality for compound library management,
|
|
6
|
-
adduct handling, and various analytical operations
|
|
4
|
+
This module provides the Lib class and utility functions for mass spectrometry compound library
|
|
5
|
+
management and feature annotation. It contains core functionality for compound library management,
|
|
6
|
+
target identification, adduct handling, and various analytical operations.
|
|
7
7
|
|
|
8
8
|
Key Features:
|
|
9
|
-
- **
|
|
10
|
-
- **
|
|
11
|
-
- **
|
|
12
|
-
- **
|
|
13
|
-
- **
|
|
14
|
-
- **
|
|
9
|
+
- **Lib Class**: Main class for managing compound libraries and annotations
|
|
10
|
+
- **Compound Libraries**: Load and manage compound databases with metadata
|
|
11
|
+
- **Adduct Calculations**: Handle various ionization adducts and charge states
|
|
12
|
+
- **Mass Calculations**: Precise mass calculations with adduct corrections
|
|
13
|
+
- **Target Matching**: Match detected features against compound libraries
|
|
14
|
+
- **Polarity Handling**: Support for positive and negative ionization modes
|
|
15
|
+
- **Database Integration**: Interface with various compound database formats
|
|
15
16
|
|
|
16
17
|
Dependencies:
|
|
17
|
-
- `pyopenms`: For mass spectrometry algorithms and data structures
|
|
18
|
-
- `polars` and `pandas`: For efficient data manipulation and analysis
|
|
19
|
-
- `numpy`: For numerical computations and array operations
|
|
20
|
-
- `tqdm`: For progress tracking during batch operations
|
|
18
|
+
- `pyopenms`: For mass spectrometry algorithms and data structures
|
|
19
|
+
- `polars` and `pandas`: For efficient data manipulation and analysis
|
|
20
|
+
- `numpy`: For numerical computations and array operations
|
|
21
|
+
- `tqdm`: For progress tracking during batch operations
|
|
22
|
+
|
|
23
|
+
Classes:
|
|
24
|
+
- `Lib`: Main class for compound library management and annotation
|
|
21
25
|
|
|
22
26
|
Functions:
|
|
23
|
-
- `lib_load()`: Load compound libraries from CSV files
|
|
24
|
-
- `load_lib()`: Alias for lib_load function
|
|
25
|
-
- Various utility functions for mass calculations and library management
|
|
27
|
+
- `lib_load()`: Load compound libraries from CSV files (legacy)
|
|
28
|
+
- `load_lib()`: Alias for lib_load function (legacy)
|
|
29
|
+
- Various utility functions for mass calculations and library management
|
|
26
30
|
|
|
27
31
|
Supported Adducts:
|
|
28
32
|
- Positive mode: [M+H]+, [M+Na]+, [M+K]+, [M+NH4]+, [M-H2O+H]+
|
|
@@ -30,19 +34,22 @@ Supported Adducts:
|
|
|
30
34
|
|
|
31
35
|
Example Usage:
|
|
32
36
|
```python
|
|
33
|
-
from
|
|
37
|
+
from masster.sample.lib import Lib
|
|
38
|
+
|
|
39
|
+
# Create library instance
|
|
40
|
+
lib = Lib()
|
|
34
41
|
|
|
35
|
-
#
|
|
36
|
-
|
|
42
|
+
# Import compounds from CSV
|
|
43
|
+
lib.import_csv("compounds.csv", polarity="positive")
|
|
37
44
|
|
|
38
|
-
# Access
|
|
39
|
-
print(f"Loaded {len(
|
|
40
|
-
print(
|
|
45
|
+
# Access library data
|
|
46
|
+
print(f"Loaded {len(lib.lib_df)} compounds")
|
|
47
|
+
print(lib.lib_df.head())
|
|
41
48
|
```
|
|
42
49
|
|
|
43
50
|
See Also:
|
|
44
|
-
- `parameters._lib_parameters`: For library-specific parameter configuration
|
|
45
|
-
- `
|
|
51
|
+
- `parameters._lib_parameters`: For library-specific parameter configuration
|
|
52
|
+
- `sample.py`: For applying library matching to detected features
|
|
46
53
|
|
|
47
54
|
"""
|
|
48
55
|
|
masster/sample/load.py
CHANGED
|
@@ -119,7 +119,7 @@ def load_study(
|
|
|
119
119
|
):
|
|
120
120
|
"""
|
|
121
121
|
Optimized load method for study use that skips loading ms1_df for better performance.
|
|
122
|
-
|
|
122
|
+
|
|
123
123
|
This method is identical to load() but uses _load_sample5_study() for .sample5 files,
|
|
124
124
|
which skips reading the potentially large ms1_df dataset to improve throughput when
|
|
125
125
|
adding samples to studies.
|
|
@@ -250,7 +250,13 @@ def _load_mzML(
|
|
|
250
250
|
precursorIsolationWindowLowerMZ = s.getPrecursors()[0].getIsolationWindowLowerOffset()
|
|
251
251
|
precursorIsolationWindowUpperMZ = s.getPrecursors()[0].getIsolationWindowUpperOffset()
|
|
252
252
|
prec_intyensity = s.getPrecursors()[0].getIntensity()
|
|
253
|
-
energy
|
|
253
|
+
# Try to get collision energy from meta values first, fallback to getActivationEnergy()
|
|
254
|
+
try:
|
|
255
|
+
energy = s.getPrecursors()[0].getMetaValue('collision energy')
|
|
256
|
+
if energy is None or energy == 0.0:
|
|
257
|
+
energy = s.getPrecursors()[0].getActivationEnergy()
|
|
258
|
+
except Exception:
|
|
259
|
+
energy = s.getPrecursors()[0].getActivationEnergy()
|
|
254
260
|
|
|
255
261
|
peaks = s.get_peaks()
|
|
256
262
|
spect = Spectrum(mz=peaks[0], inty=peaks[1], ms_level=s.getMSLevel())
|
|
@@ -983,7 +989,7 @@ def index_file(self):
|
|
|
983
989
|
self.set_source(self.file_source.replace(".sample5", ".mzml"))
|
|
984
990
|
else:
|
|
985
991
|
raise FileNotFoundError(
|
|
986
|
-
f"File {self.file_source} not found. Did the path change? Consider running source()."
|
|
992
|
+
f"File {self.file_source} not found. Did the path change? Consider running source().",
|
|
987
993
|
)
|
|
988
994
|
self.index_file()
|
|
989
995
|
else:
|