masster 0.3.19__py3-none-any.whl → 0.3.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (24) hide show
  1. masster/__init__.py +2 -0
  2. masster/_version.py +1 -1
  3. masster/data/libs/README.md +17 -0
  4. masster/data/libs/ccm.py +533 -0
  5. masster/data/libs/central_carbon_README.md +17 -0
  6. masster/data/libs/central_carbon_metabolites.csv +120 -0
  7. masster/data/libs/urine.py +333 -0
  8. masster/data/libs/urine_metabolites.csv +51 -0
  9. masster/sample/lib.py +32 -25
  10. masster/sample/load.py +7 -1
  11. masster/sample/plot.py +111 -26
  12. masster/study/helpers.py +230 -6
  13. masster/study/plot.py +457 -182
  14. masster/study/study.py +4 -0
  15. {masster-0.3.19.dist-info → masster-0.3.20.dist-info}/METADATA +1 -1
  16. {masster-0.3.19.dist-info → masster-0.3.20.dist-info}/RECORD +24 -18
  17. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
  18. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  19. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  20. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  21. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  22. {masster-0.3.19.dist-info → masster-0.3.20.dist-info}/WHEEL +0 -0
  23. {masster-0.3.19.dist-info → masster-0.3.20.dist-info}/entry_points.txt +0 -0
  24. {masster-0.3.19.dist-info → masster-0.3.20.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,120 @@
1
+ Name,Formula,SMILES,InChIKey
2
+ Glucose,C6H11O,OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O,WQZGKKKJIJFFOK-GASJEMHNSA-N
3
+ Glucose-6-phosphate,C6H13O9P,O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,VFRROHXSMXFLSN-SLPGGIOYSA-N
4
+ Fructose-6-phosphate,C6H13O9P,O=C(CO)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,GSXOAOHZAIYLCY-HSUXUTPPSA-N
5
+ "Fructose-1,6-bisphosphate",C6H13O12P,O=P(O)(O)OC[C@H]1O[C@](O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O,RNBGYGVWRKECFJ-ARQDHWQXSA-N
6
+ Glyceraldehyde-3-phosphate,C3H7O6P,O=C[C@H](O)COP(=O)(O)O,LXJXRIRHZLFYRP-VKHMYHEASA-N
7
+ Dihydroxyacetone phosphate,C3H7O6P,O=C(CO)COP(=O)(O)O,GNGACRATGGDKBX-UHFFFAOYSA-N
8
+ 3-Phosphoglycerate,C3H7O7P,O=C(O)C(O)COP(=O)(O)O,OSJPPGNTCRNQQC-UHFFFAOYSA-N
9
+ 2-Phosphoglycerate,C3H7O7P,O=C(O)C(CO)OP(=O)(O)O,GXIURPTVHJPJLF-UHFFFAOYSA-N
10
+ Phosphoenolpyruvate,C3H5O6P,C=C(OP(=O)(O)O)C(=O)O,DTBNBXWJWCWCIK-UHFFFAOYSA-N
11
+ Pyruvate,C3H6O,CC(=O)C(=O)O,LCTONWCANYUPML-UHFFFAOYSA-M
12
+ Lactate,C3H8O,CC(O)C(=O)O,JVTAAEKCZFNVCJ-UHFFFAOYSA-M
13
+ Acetyl-CoA,C23H38N7O17P3S,CC(=O)SCCN=C(O)CCN=C(O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,ZSLZBFCDCINBPY-ZSJPKINUSA-N
14
+ Citric acid,C6H7O,O=C(O)CC(O)(CC(=O)O)C(=O)O,KRKNYBCHXYNGOX-UHFFFAOYSA-N
15
+ Isocitrate,C6H7O,O=C(O)CC(C(=O)O)C(O)C(=O)O,ODBLHEXUDAPZAU-UHFFFAOYSA-N
16
+ Alpha-ketoglutaric acid,C5H5O,O=C(O)CCC(=O)C(=O)O,KPGXRSRHYNQIFN-UHFFFAOYSA-N
17
+ Succinyl-CoA,C25H40N7O19P3S,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(O)=NCCC(O)=NCCSC(=O)CCC(=O)O,VNOYUJKHFWYWIR-ITIYDSSPSA-N
18
+ Succinic acid,C4H5O,O=C(O)CCC(=O)O,KDYFGRWQOYBRFD-UHFFFAOYSA-N
19
+ Fumaric acid,C4H3O,O=C(O)/C=C/C(=O)O,VZCYOOQTPOCHFL-OWOJBTEDSA-N
20
+ Malic acid,C4H5O,O=C(O)CC(O)C(=O)O,BJEPYKJPYRNKOW-UHFFFAOYSA-N
21
+ Oxaloacetic acid,C4H3O,O=C(O)CC(=O)C(=O)O,KHPXUQMNIQBQEV-UHFFFAOYSA-N
22
+ Ribose-5-phosphate,C5H11O8P,O=C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,PPQRONHOSHZGFQ-LMVFSUKVSA-N
23
+ Ribulose-5-phosphate,C5H11O8P,O=C(CO)[C@H](O)[C@H](O)COP(=O)(O)O,FNZLKVNUWIIPSJ-UHNVWZDZSA-N
24
+ Sedoheptulose-7-phosphate,C7H15O10P,O=C(CO)[C@@H](O)[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,JDTUMPKOJBQPKX-GBNDHIKLSA-N
25
+ Erythrose-4-phosphate,C4H9O7P,O=C[C@H](O)[C@H](O)COP(=O)(O)O,NGHMDNPXVRFFGS-IUYQGCFVSA-N
26
+ "Sedoheptulose-1,7-bisphosphate",C7H15O13P,O=C(COP(=O)(O)O)[C@@H](O)[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,OKHXOUGRECCASI-SHUUEZRQSA-N
27
+ Glycerol-3-phosphate,C3H9O6P,O=P(O)(O)OCC(O)CO,AWUCVROLDVIAJX-UHFFFAOYSA-N
28
+ Glycerate,C3H9O,O=C(O)C(O)CO,RBNPOMFGQQGHHO-UHFFFAOYSA-M
29
+ Pentose,C5H9O,OC1COC(O)C(O)C1O,SRBFZHDQGSBBOR-UHFFFAOYSA-N
30
+ Acetaldehyde,C2H4O,CC=O,IKHGUXGNUITLKF-UHFFFAOYSA-N
31
+ Acetic acid,C2H3O,CC(=O)O,QTBSBXVTEAMEQO-UHFFFAOYSA-N
32
+ Alanine,C3H6NO,C[C@H](N)C(=O)O,QNAYBMKLOCPYGJ-REOHCLBHSA-N
33
+ Arginine,C6H13N4O,N=C(N)NCCC[C@H](N)C(=O)O,ODKSFYDXXFIFQN-BYPYZUCNSA-N
34
+ Asparagine,C4H7N2O,N=C(O)C[C@H](N)C(=O)O,DCXYFEDJOCDNAF-REOHCLBHSA-N
35
+ Aspartic acid,C4H6NO,N[C@@H](CC(=O)O)C(=O)O,CKLJMWTZIZZHCS-REOHCLBHSA-N
36
+ Cysteine,C3H7NO2S,N[C@@H](CS)C(=O)O,XUJNEKJLAYXESH-REOHCLBHSA-N
37
+ Glutamic acid,C5H8NO,N[C@@H](CCC(=O)O)C(=O)O,WHUUTDBJXJRKMK-VKHMYHEASA-N
38
+ Glutamine,C5H9N2O,N=C(O)CC[C@H](N)C(=O)O,ZDXPYRJPNDTMRX-VKHMYHEASA-N
39
+ Glycine,C2H4NO,NCC(=O)O,DHMQDGOQFOQNFH-UHFFFAOYSA-N
40
+ Histidine,C6H8N3O,N[C@@H](Cc1cnc[nH]1)C(=O)O,HNDVDQJCIGZPNO-YFKPBYRVSA-N
41
+ Isoleucine,C6H12NO,CC[C@H](C)[C@H](N)C(=O)O,AGPKZVBTJJNPAG-WHFBIAKZSA-N
42
+ Leucine,C6H12NO,CC(C)C[C@H](N)C(=O)O,ROHFNLRQFUQHCH-YFKPBYRVSA-N
43
+ Lysine,C6H13N2O,NCCCC[C@H](N)C(=O)O,KDXKERNSBIXSRK-YFKPBYRVSA-N
44
+ Methionine,C5H11NO2S,CSCC[C@H](N)C(=O)O,FFEARJCKVFRZRR-BYPYZUCNSA-N
45
+ Phenylalanine,C9H10NO,N[C@@H](Cc1ccccc1)C(=O)O,COLNVLDHVKWLRT-QMMMGPOBSA-N
46
+ Proline,C5H8NO,O=C(O)[C@@H]1CCCN1,ONIBWKKTOPOVIA-BYPYZUCNSA-N
47
+ Serine,C3H6NO,N[C@@H](CO)C(=O)O,MTCFGRXMJLQNBG-REOHCLBHSA-N
48
+ Threonine,C4H8NO,C[C@@H](O)[C@H](N)C(=O)O,AYFVYJQAPQTCCC-GBXIJSLDSA-N
49
+ Tryptophan,C11H11N2O,N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O,QIVBCDIJIAJPQS-VIFPVBQESA-N
50
+ Tyrosine,C9H10NO,N[C@@H](Cc1ccc(O)cc1)C(=O)O,OUYCCCASQSFEME-QMMMGPOBSA-N
51
+ Valine,C5H10NO,CC(C)[C@H](N)C(=O)O,KZSNJWFQEVHDMF-BYPYZUCNSA-N
52
+ Ornithine,C5H11N2O,NCCC[C@H](N)C(=O)O,AHLPHDHHMVZTML-BYPYZUCNSA-N
53
+ Citrulline,C6H12N3O,N=C(O)NCCC[C@H](N)C(=O)O,RHGKLRLOHDJJDR-BYPYZUCNSA-N
54
+ Homocysteine,C4H9NO2S,N[C@@H](CCS)C(=O)O,FFFHZYDWPBMWHY-VKHMYHEASA-N
55
+ S-adenosylmethionine,C15H22N6O5S,C[S](CC[C@H](N)C(=O)O)C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,MEFKEPWMEQBLKI-AIRLBKTGSA-N
56
+ S-adenosylhomocysteine,C14H20N6O5S,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,ZJUKTBDSGOFHSH-WFMPWKQPSA-N
57
+ Formic acid,CHO,O=CO,BDAGIHXWWSANSR-UHFFFAOYSA-N
58
+ Propionic acid,C3H5O,CCC(=O)O,XBDQKXXYIPTUBI-UHFFFAOYSA-N
59
+ Butyric acid,C4H7O,CCCC(=O)O,FERIUCNNQQJTOY-UHFFFAOYSA-N
60
+ Malonic acid,C3H3O,O=C(O)CC(=O)O,OFOBLEOULBTSOW-UHFFFAOYSA-N
61
+ 2-Hydroxyglutarate,C5H7O,O=C(O)CCC(O)C(=O)O,HWXBTNAVRSUOJR-UHFFFAOYSA-N
62
+ 3-Hydroxybutyrate,C4H10O,CC(O)CC(=O)O,WHBMMWSBFZVSSR-UHFFFAOYSA-M
63
+ Acetoacetate,C4H8O,CC(=O)CC(=O)O,WDJHALXBUFZDSR-UHFFFAOYSA-M
64
+ Beta-hydroxybutyrate,C4H7O,CC(O)CC(=O)O,WHBMMWSBFZVSSR-UHFFFAOYSA-N
65
+ Pyruvic acid,C3H3O,CC(=O)C(=O)O,LCTONWCANYUPML-UHFFFAOYSA-N
66
+ Lactic acid,C3H5O,CC(O)C(=O)O,JVTAAEKCZFNVCJ-UHFFFAOYSA-N
67
+ Myristic acid,C14H27O,CCCCCCCCCCCCCC(=O)O,TUNFSRHWOTWDNC-UHFFFAOYSA-N
68
+ Palmitic acid,C16H31O,CCCCCCCCCCCCCCCC(=O)O,IPCSVZSSVZVIGE-UHFFFAOYSA-N
69
+ Stearic acid,C18H35O,CCCCCCCCCCCCCCCCCC(=O)O,QIQXTHQIDYTFRH-UHFFFAOYSA-N
70
+ Palmitoleic acid,C16H29O,CCCCCC/C=C\CCCCCCCC(=O)O,SECPZKHBENQXJG-FPLPWBNLSA-N
71
+ Oleic acid,C18H33O,CCCCCCCC/C=C\CCCCCCCC(=O)O,ZQPPMHVWECSIRJ-KTKRTIGZSA-N
72
+ Linoleic acid,C18H31O,CCCCC/C=C\C/C=C\CCCCCCCC(=O)O,OYHQOLUKZRVURQ-HZJYTTRNSA-N
73
+ Alpha-linolenic acid,C18H29O,CC/C=C\C/C=C\C/C=C\CCCCCCCC(=O)O,DTOSIQBPPRVQHS-PDBXOOCHSA-N
74
+ Arachidonic acid,C20H31O,CCCCC/C=C\C/C=C\C/C=C\C/C=C\CCCC(=O)O,YZXBAPSDXZZRGB-DOFZRALJSA-N
75
+ Adenine,C5H4N,Nc1nc[nH]c2ncnc1-2,GFFGJBXGBJISGV-UHFFFAOYSA-N
76
+ Guanine,C5H5N5O,N=c1nc(O)c2nc[nH]c2[nH]1,UYTPUPDQBNUYGX-UHFFFAOYSA-N
77
+ Cytosine,C4H5N3O,N=c1ccnc(O)[nH]1,OPTASPLRGRRNAP-UHFFFAOYSA-N
78
+ Thymine,C5H5N2O,Cc1cnc(O)nc1O,RWQNBRDOKXIBIV-UHFFFAOYSA-N
79
+ Uracil,C4H3N2O,Oc1ccnc(O)n1,ISAKRJDGNUQOIC-UHFFFAOYSA-N
80
+ Adenosine,C10H12N5O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,OIRDTQYFTABQOQ-KQYNXXCUSA-N
81
+ Guanosine,C10H12N5O,N=c1nc(O)c2ncn([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O)c2[nH]1,NYHBQMYGNKIUIF-UUOKFMHZSA-N
82
+ Cytidine,C9H12N3O,N=c1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(O)n1,UHDGCWIWMRVCDJ-XVFCMESISA-N
83
+ Uridine,C9H11N2O,O=c1nc(O)ccn1[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,DRTQHJPVMGBUCF-XVFCMESISA-N
84
+ AMP,C10H14N5O7P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O,UDMBCSSLTHHNCD-KQYNXXCUSA-N
85
+ ADP,C10H14N5O10P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,XTWYTFMLZFPYCI-KQYNXXCUSA-N
86
+ ATP,C10H15N5O13P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,ZKHQWZAMYRWXGA-KQYNXXCUSA-N
87
+ GMP,C10H14N5O8P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,RQFCJASXJCIDSX-UUOKFMHZSA-N
88
+ GDP,C10H14N5O11P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,QGWNDRXFNXRZMB-UUOKFMHZSA-N
89
+ GTP,C10H15N5O14P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,XKMLYUALXHKNFT-UUOKFMHZSA-N
90
+ CMP,C9H14N3O8P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,IERHLVCPSMICTF-XVFCMESISA-N
91
+ CDP,C9H14N3O11P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,ZWIADYZPOWUWEW-XVFCMESISA-N
92
+ CTP,C9H15N3O14P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,PCDQPRRSZKQHHS-XVFCMESISA-N
93
+ UMP,C9H13N2O9P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O,DJJCXFVJDGTHFX-XVFCMESISA-N
94
+ UDP,C9H13N2O12P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,XCCTYIAWTASOJW-XVFCMESISA-N
95
+ UTP,C9H14N2O15P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,PGAVKCOVUIYSFO-XVFCMESISA-N
96
+ NAD+,C21H26N7O14P,N=C(O)C1CCCN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,BAWFJGJZGIEFAR-NNYOXOHSSA-N
97
+ NADH,C21H28N7O14P,N=C(O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,BOPGDPNILDQYTO-NNYOXOHSSA-N
98
+ NADP+,C21H27N7O17P,N=C(O)C1CCCN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,XJLXINKUBYWONI-NNYOXOHSSA-N
99
+ NADPH,C21H29N7O17P,N=C(O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,ACFIXJIJDZMPPO-NNYOXOHSSA-N
100
+ FAD,C27H32N9O15P,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)c2cc1C,VWWQXMAJTJZDQX-UYBVJOGSSA-N
101
+ FMN,C17H21N4O9P,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c2cc1C,FVTCRASFADXXNN-SCRDCRAPSA-N
102
+ Coenzyme A,C21H36N7O16P3S,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(O)=NCCC(O)=NCCS,RGJOEKWQDUBAIZ-IBOSZNHHSA-N
103
+ Pantothenic acid,C9H16NO,CC(C)(CO)[C@@H](O)C(O)=NCCC(=O)O,GHOKWGTUZJEAQD-ZETCQYMHSA-N
104
+ Riboflavin,C17H19N4O,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C,AUNGANRZJHBGPY-SCRDCRAPSA-N
105
+ Niacin,C6H4NO,O=C(O)c1cccnc1,PVNIIMVLHYAWGP-UHFFFAOYSA-N
106
+ Fructose,C6H11O,OCC1(O)OC[C@@H](O)[C@@H](O)[C@@H]1O,LKDRXBCSQODPBY-VRPWFDPXSA-N
107
+ Mannose,C6H11O,OC[C@H]1OC(O)[C@@H](O)[C@@H](O)[C@@H]1O,WQZGKKKJIJFFOK-QTVWNMPRSA-N
108
+ Mannose-6-phosphate,C6H13O9P,O=C[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,VFRROHXSMXFLSN-KVTDHHQDSA-N
109
+ Ribose,C5H9O,OC1OC[C@@H](O)[C@@H](O)[C@H]1O,SRBFZHDQGSBBOR-SOOFDHNKSA-N
110
+ Glucosamine,C6H12NO,N[C@H]1C(O)O[C@H](CO)[C@@H](O)[C@@H]1O,MSWZFWKMSRAUBD-IVMDWMLBSA-N
111
+ N-acetylglucosamine,C8H14NO,CC(O)=N[C@@H](C=O)[C@@H](O)[C@H](O)[C@H](O)CO,MBLBDJOUHNCFQT-LXGUWJNJSA-N
112
+ Choline,C5H13NO,[H]OC([H])([H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],OEYIOHPDSNJKLS-UHFFFAOYSA-N
113
+ Betaine,C5H10NO,[H]OC(=O)C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],KWIUHFFTVRNATP-UHFFFAOYSA-N
114
+ Carnitine,C7H14NO,[H]OC(=O)C([H])([H])C([H])(O[H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],PHIQHXFUZVPYII-UHFFFAOYSA-N
115
+ Phosphocholine,C5H14NO4P,[H]OP(=O)(O[H])OC([H])([H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],YHHSONZFOIEMCP-UHFFFAOYSA-O
116
+ Glycerol,C3H7O,OCC(O)CO,PEDCQBHIVMGVHV-UHFFFAOYSA-N
117
+ Sorbitol,C6H13O,OC[C@@H](O)[C@@H](O)[C@H](O)[C@@H](O)CO,FBPFZTCFMRRESA-JGWLITMVSA-N
118
+ Inositol,C6H11O,OC1C(O)C(O)C(O)C(O)C1O,CDAISMWEOUEBRE-UHFFFAOYSA-N
119
+ Cholesterol,C27H46O,CC(C)CCC[C@@H](C)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,HVYWMOMLDIMFJA-DPAQBDIFSA-N
120
+ Pantothenate,C9H21NO,CC(C)(CO)C(O)C(O)=NCCC(=O)O,GHOKWGTUZJEAQD-UHFFFAOYSA-M
@@ -0,0 +1,333 @@
1
+ """Generate a CSV of human urine metabolites.
2
+
3
+ This improved script attempts to:
4
+ - Download or scrape a urine metabolite list from the UrineMetabolome downloads page.
5
+ - Fall back to HMDB scraping or a curated list if needed.
6
+ - Resolve formula/SMILES/InChIKey using PubChem with bounded parallelism.
7
+ - Use RDKit (if available) to convert InChI -> SMILES when PubChem does not provide SMILES.
8
+
9
+ The goal is robust coverage and faster lookups by parallelizing per-name queries
10
+ while avoiding aggressive parallelism that might overload PubChem.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import csv
16
+ import sys
17
+ import re
18
+ import time
19
+ import os
20
+ from urllib.parse import quote, urljoin
21
+ from concurrent.futures import ThreadPoolExecutor, as_completed
22
+ from typing import Iterable
23
+
24
+ try:
25
+ import requests
26
+ from bs4 import BeautifulSoup
27
+ except Exception:
28
+ requests = None
29
+ BeautifulSoup = None
30
+
31
+
32
+ URINEMETABOLOME_DOWNLOADS = "https://www.urinemetabolome.ca/downloads"
33
+ HMDB_URINE_LIST_URL = "https://hmdb.ca/metabolites?utf8=%E2%9C%93&search=&biological_context=Urine"
34
+
35
+
36
+ def normalize_name(name: str) -> str:
37
+ if not name:
38
+ return name
39
+ s = name
40
+ s = re.sub(r"\(.*?\)", "", s)
41
+ s = s.replace("➔", "->").replace("–", "-").replace("—", "-")
42
+ s = re.sub(r"\s+", " ", s).strip()
43
+ mapping = {
44
+ "AMP": "Adenosine monophosphate",
45
+ "ADP": "Adenosine diphosphate",
46
+ "ATP": "Adenosine triphosphate",
47
+ "GMP": "Guanosine monophosphate",
48
+ "GDP": "Guanosine diphosphate",
49
+ "GTP": "Guanosine triphosphate",
50
+ "NAD+": "Nicotinamide adenine dinucleotide",
51
+ "NADH": "Nicotinamide adenine dinucleotide (reduced)",
52
+ }
53
+ up = s.upper()
54
+ if up in mapping:
55
+ return mapping[up]
56
+ return s
57
+
58
+
59
+ def fetch_urinemetabolome_names(limit: int = 2000) -> list[str]:
60
+ """Scrape the UrineMetabolome downloads page for any downloadable metabolite lists.
61
+
62
+ Best-effort: finds links on the downloads page that look like CSV/TSV/Excel and tries
63
+ to parse a simple name column. If anything fails, returns an empty list and the
64
+ caller should fall back to HMDB or a curated list.
65
+ """
66
+ if requests is None or BeautifulSoup is None:
67
+ return []
68
+ try:
69
+ r = requests.get(URINEMETABOLOME_DOWNLOADS, timeout=15)
70
+ r.raise_for_status()
71
+ soup = BeautifulSoup(r.text, "html.parser")
72
+ names = []
73
+ for a in soup.find_all("a", href=True):
74
+ href = a["href"]
75
+ if re.search(r"\.csv$|\.tsv$|\.xlsx?$", href, re.I):
76
+ url = urljoin(URINEMETABOLOME_DOWNLOADS, href)
77
+ # try to download and parse simple CSV/TSV
78
+ try:
79
+ rr = requests.get(url, timeout=20)
80
+ if rr.status_code != 200:
81
+ continue
82
+ text = rr.content.decode("utf-8", errors="ignore")
83
+ # try CSV/TSV parse by splitting lines and looking for a header with 'name' or 'metabolite'
84
+ lines = [l.strip() for l in text.splitlines() if l.strip()]
85
+ if not lines:
86
+ continue
87
+ sep = "," if "," in lines[0] else "\t"
88
+ header = [c.strip().lower() for c in lines[0].split(sep)]
89
+ # find candidate column
90
+ col_idx = None
91
+ for i, c in enumerate(header):
92
+ if any(k in c for k in ("name", "metabolite", "compound")):
93
+ col_idx = i
94
+ break
95
+ if col_idx is None:
96
+ # fallback: take first column
97
+ col_idx = 0
98
+ for l in lines[1:limit+1]:
99
+ parts = [p.strip() for p in l.split(sep)]
100
+ if len(parts) > col_idx:
101
+ n = parts[col_idx]
102
+ if n and n not in names:
103
+ names.append(n)
104
+ if len(names) >= limit:
105
+ break
106
+ if names:
107
+ return names
108
+ except Exception:
109
+ continue
110
+ return []
111
+ except Exception:
112
+ return []
113
+
114
+
115
+ def fetch_hmdb_urine_names(limit: int = 500) -> list[str]:
116
+ """Fallback HMDB scrape (best-effort)."""
117
+ if requests is None or BeautifulSoup is None:
118
+ return []
119
+ try:
120
+ r = requests.get(HMDB_URINE_LIST_URL, timeout=20)
121
+ r.raise_for_status()
122
+ soup = BeautifulSoup(r.text, "html.parser")
123
+ names = []
124
+ for a in soup.find_all("a", href=True):
125
+ href = a["href"]
126
+ if re.search(r"/metabolites/HMDB", href):
127
+ text = a.get_text(strip=True)
128
+ if text and len(text) > 1:
129
+ names.append(text)
130
+ if len(names) >= limit:
131
+ break
132
+ return list(dict.fromkeys(names))
133
+ except Exception:
134
+ return []
135
+
136
+
137
+ def fetch_pubchem_name_once(name: str, timeout: int = 15):
138
+ """Fetch properties for a single name from PubChem and try inchI->SMILES if needed.
139
+
140
+ Returns (formula, smiles, inchikey) or (None, None, None) on failure.
141
+ """
142
+ if requests is None:
143
+ return (None, None, None)
144
+ q = normalize_name(name)
145
+ url_name = quote(q)
146
+ url = (
147
+ f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{url_name}/property/"
148
+ + "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
149
+ )
150
+ try:
151
+ r = requests.get(url, timeout=timeout)
152
+ if r.status_code != 200:
153
+ return (None, None, None)
154
+ j = r.json()
155
+ if "PropertyTable" in j and "Properties" in j["PropertyTable"]:
156
+ p = j["PropertyTable"]["Properties"][0]
157
+ formula = p.get("MolecularFormula")
158
+ smiles = p.get("CanonicalSMILES")
159
+ inchi = p.get("InChI")
160
+ inchikey = p.get("InChIKey")
161
+ if not smiles and inchi:
162
+ # try RDKit conversion
163
+ try:
164
+ from rdkit import Chem
165
+ m = Chem.MolFromInchi(inchi)
166
+ if m is not None:
167
+ try:
168
+ Chem.SanitizeMol(m)
169
+ except Exception:
170
+ pass
171
+ smiles = Chem.MolToSmiles(m, isomericSmiles=True)
172
+ except Exception:
173
+ pass
174
+
175
+ # If still missing SMILES, and we have an InChIKey, try inchikey -> property
176
+ if not smiles and inchikey:
177
+ try:
178
+ ik = quote(inchikey)
179
+ url2 = (
180
+ f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{ik}/property/"
181
+ + "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
182
+ )
183
+ r2 = requests.get(url2, timeout=timeout)
184
+ if r2.status_code == 200:
185
+ j2 = r2.json()
186
+ if "PropertyTable" in j2 and "Properties" in j2["PropertyTable"]:
187
+ p2 = j2["PropertyTable"]["Properties"][0]
188
+ smiles = p2.get("CanonicalSMILES") or smiles
189
+ formula = formula or p2.get("MolecularFormula")
190
+ except Exception:
191
+ pass
192
+
193
+ # Final fallback: inchikey -> cids -> cid -> property
194
+ if not smiles and inchikey:
195
+ try:
196
+ ik = quote(inchikey)
197
+ urlc = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{ik}/cids/JSON"
198
+ rc = requests.get(urlc, timeout=timeout)
199
+ if rc.status_code == 200:
200
+ jc = rc.json()
201
+ if "IdentifierList" in jc and "CID" in jc["IdentifierList"] and jc["IdentifierList"]["CID"]:
202
+ cid = jc["IdentifierList"]["CID"][0]
203
+ try:
204
+ url3 = (
205
+ f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/property/"
206
+ + "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
207
+ )
208
+ r3 = requests.get(url3, timeout=timeout)
209
+ if r3.status_code == 200:
210
+ j3 = r3.json()
211
+ if "PropertyTable" in j3 and "Properties" in j3["PropertyTable"]:
212
+ p3 = j3["PropertyTable"]["Properties"][0]
213
+ smiles = p3.get("CanonicalSMILES") or smiles
214
+ formula = formula or p3.get("MolecularFormula")
215
+ except Exception:
216
+ pass
217
+ except Exception:
218
+ pass
219
+
220
+ return (formula, smiles, inchikey)
221
+ except Exception:
222
+ return (None, None, None)
223
+ return (None, None, None)
224
+
225
+
226
+ def fetch_pubchem_for_names(names: Iterable[str], workers: int = 8, delay: float = 0.05):
227
+ """Resolve a list of names via PubChem in parallel.
228
+
229
+ Returns dict name -> (formula, smiles, inchikey)
230
+ """
231
+ names = list(names)
232
+ results: dict[str, tuple | None] = {}
233
+ with ThreadPoolExecutor(max_workers=workers) as ex:
234
+ futures = {ex.submit(fetch_pubchem_name_once, n): n for n in names}
235
+ for fut in as_completed(futures):
236
+ n = futures[fut]
237
+ try:
238
+ res = fut.result()
239
+ except Exception:
240
+ res = (None, None, None)
241
+ results[n] = res
242
+ time.sleep(delay) # polite small delay between completions
243
+ return results
244
+
245
+
246
+ def generate_csv(out_path: str = "urine_metabolites.csv", workers: int = 8):
247
+ # Try UrineMetabolome downloads first
248
+ names = fetch_urinemetabolome_names()
249
+ if not names:
250
+ names = fetch_hmdb_urine_names()
251
+ if not names:
252
+ print("Falling back to curated urine list")
253
+ names = [
254
+ "Creatinine",
255
+ "Urea",
256
+ "Hippuric acid",
257
+ "Citrate",
258
+ "Creatine",
259
+ "Glycine",
260
+ "Taurine",
261
+ "Succinate",
262
+ "Fumaric acid",
263
+ "Malic acid",
264
+ "Lactic acid",
265
+ "Acetic acid",
266
+ "Formic acid",
267
+ "Alanine",
268
+ "Betaine",
269
+ "Choline",
270
+ "Trimethylamine N-oxide",
271
+ "Phenylacetylglutamine",
272
+ "p-Cresol sulfate",
273
+ "Indoxyl sulfate",
274
+ "Uric acid",
275
+ "Xanthine",
276
+ "3-Hydroxybutyrate",
277
+ "Acetoacetate",
278
+ "N-Acetylneuraminic acid",
279
+ ]
280
+
281
+ print(f"Resolving {len(names)} names via PubChem (workers={workers})...")
282
+ mapping = fetch_pubchem_for_names(names, workers=workers)
283
+
284
+ rows = []
285
+ for name in names:
286
+ formula, smiles, inchikey = mapping.get(name, (None, None, None))
287
+ rows.append({
288
+ "Name": name,
289
+ "Formula": formula or "",
290
+ "SMILES": smiles or "",
291
+ "InChIKey": inchikey or "",
292
+ })
293
+
294
+ # Ensure output directory exists
295
+ out_dir = os.path.join("masster", "data", "libs")
296
+ os.makedirs(out_dir, exist_ok=True)
297
+ out_path_full = os.path.join(out_dir, os.path.basename(out_path))
298
+
299
+ fieldnames = ["Name", "Formula", "SMILES", "InChIKey"]
300
+ with open(out_path_full, "w", newline="", encoding="utf-8") as f:
301
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
302
+ writer.writeheader()
303
+ for r in rows:
304
+ writer.writerow(r)
305
+
306
+ print(f"Wrote {len(rows)} entries to {out_path_full}")
307
+ return out_path_full
308
+
309
+
310
+ def test_load_with_lib(csv_path: str):
311
+ try:
312
+ from masster.lib import Lib
313
+ except Exception as e:
314
+ print(f"Cannot import masster.lib.Lib: {e}")
315
+ return False
316
+
317
+ try:
318
+ lib = Lib()
319
+ lib.import_csv(csv_path, polarity=None)
320
+ print(f"Lib loaded: {len(lib)} entries")
321
+ return True
322
+ except Exception as e:
323
+ print(f"Failed to load CSV with Lib.import_csv: {e}")
324
+ return False
325
+
326
+
327
+ if __name__ == "__main__":
328
+ csv_file = generate_csv()
329
+ ok = test_load_with_lib(csv_file)
330
+ if not ok:
331
+ print("Test failed; please inspect messages above.")
332
+ sys.exit(2)
333
+ print("Done.")
@@ -0,0 +1,51 @@
1
+ Name,Formula,SMILES,InChIKey
2
+ HMDB0000001,,,
3
+ 1-Methylhistidine,C7H11N3O2,Cn1cnc(C[C@H](N)C(=O)O)c1,BRMWTNUJHUMWMS-LURJTMIESA-N
4
+ HMDB0000002,,,
5
+ "1,3-Diaminopropane",C3H10N2,NCCCN,XFNJVJPLKCPIBV-UHFFFAOYSA-N
6
+ HMDB0000005,,,
7
+ 2-Ketobutyric acid,C4H6O3,CCC(=O)C(=O)O,TYEYBOSBBBHJIV-UHFFFAOYSA-N
8
+ HMDB0000008,,,
9
+ 2-Hydroxybutyric acid,C4H8O3,CCC(O)C(=O)O,AFENDNXGAFYKQO-UHFFFAOYSA-N
10
+ HMDB0000010,,,
11
+ 2-Methoxyestrone,,,
12
+ HMDB0000011,,,
13
+ 3-Hydroxybutyric acid,,,
14
+ HMDB0000012,,,
15
+ Deoxyuridine,,,
16
+ HMDB0000014,,,
17
+ Deoxycytidine,,,
18
+ HMDB0000015,,,
19
+ Cortexolone,,,
20
+ HMDB0000016,,,
21
+ Deoxycorticosterone,,,
22
+ HMDB0000017,,,
23
+ 4-Pyridoxic acid,C8H9NO4,Cc1ncc(CO)c(C(=O)O)c1O,HXACOUQIXZGNBF-UHFFFAOYSA-N
24
+ HMDB0000019,,,
25
+ alpha-Ketoisovaleric acid,C5H8O3,CC(C)C(=O)C(=O)O,QHKABHOOEWYVLI-UHFFFAOYSA-N
26
+ HMDB0000020,,,
27
+ p-Hydroxyphenylacetic acid,C8H8O3,O=C(O)Cc1ccc(O)cc1,XQXPVVBIMDBYFF-UHFFFAOYSA-N
28
+ HMDB0000021,,,
29
+ Iodotyrosine,C9H10INO3,N[C@@H](Cc1ccc(O)c(I)c1)C(=O)O,UQTZMGFTRHFAAM-ZETCQYMHSA-N
30
+ HMDB0000022,,,
31
+ 3-Methoxytyramine,C9H13NO2,COc1cc(CCN)ccc1O,DIVQKHQLANKJQO-UHFFFAOYSA-N
32
+ HMDB0000023,,,
33
+ (S)-3-Hydroxyisobutyric acid,,,
34
+ HMDB0000024,,,
35
+ 3-O-Sulfogalactosylceramide (d18:1/24:0),,,
36
+ HMDB0000026,,,
37
+ Ureidopropionic acid,,,
38
+ HMDB0000027,,,
39
+ Tetrahydrobiopterin,,,
40
+ HMDB0000030,,,
41
+ Biotin,C10H16N2O3S,O=C(O)CCCC[C@@H]1SC[C@@H]2N=C(O)N[C@@H]21,YBJHBAHKTGYVGT-ZKWXMUAHSA-N
42
+ HMDB0000031,,,
43
+ Androsterone,C19H30O2,C[C@]12CC[C@@H](O)C[C@@H]1CC[C@@H]1[C@@H]2CC[C@]2(C)C(=O)CC[C@@H]12,QGXBDMJGAMFCBF-HLUDHZFRSA-N
44
+ HMDB0000032,,,
45
+ 7-Dehydrocholesterol,C27H44O,CC(C)CCC[C@@H](C)[C@H]1CC[C@H]2C3=CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,UCTLRSWJYQTBFZ-DDPQNLDTSA-N
46
+ HMDB0000033,,,
47
+ Carnosine,C9H14N4O3,NCCC(O)=N[C@@H](Cc1cnc[nH]1)C(=O)O,CQOVPNPJLQNMDC-ZETCQYMHSA-N
48
+ HMDB0000034,,,
49
+ Adenine,C5H5N5,Nc1nc[nH]c2ncnc1-2,GFFGJBXGBJISGV-UHFFFAOYSA-N
50
+ HMDB0000036,,,
51
+ Taurocholic acid,,,
masster/sample/lib.py CHANGED
@@ -1,28 +1,32 @@
1
1
  """
2
- _lib.py
2
+ lib.py
3
3
 
4
- This module provides utility functions and algorithms for mass spectrometry data processing.
5
- It contains core functionality for compound library management, target identification,
6
- adduct handling, and various analytical operations used throughout the masster package.
4
+ This module provides the Lib class and utility functions for mass spectrometry compound library
5
+ management and feature annotation. It contains core functionality for compound library management,
6
+ target identification, adduct handling, and various analytical operations.
7
7
 
8
8
  Key Features:
9
- - **Compound Libraries**: Load and manage compound databases with metadata.
10
- - **Adduct Calculations**: Handle various ionization adducts and charge states.
11
- - **Mass Calculations**: Precise mass calculations with adduct corrections.
12
- - **Target Matching**: Match detected features against compound libraries.
13
- - **Polarity Handling**: Support for positive and negative ionization modes.
14
- - **Database Integration**: Interface with various compound database formats.
9
+ - **Lib Class**: Main class for managing compound libraries and annotations
10
+ - **Compound Libraries**: Load and manage compound databases with metadata
11
+ - **Adduct Calculations**: Handle various ionization adducts and charge states
12
+ - **Mass Calculations**: Precise mass calculations with adduct corrections
13
+ - **Target Matching**: Match detected features against compound libraries
14
+ - **Polarity Handling**: Support for positive and negative ionization modes
15
+ - **Database Integration**: Interface with various compound database formats
15
16
 
16
17
  Dependencies:
17
- - `pyopenms`: For mass spectrometry algorithms and data structures.
18
- - `polars` and `pandas`: For efficient data manipulation and analysis.
19
- - `numpy`: For numerical computations and array operations.
20
- - `tqdm`: For progress tracking during batch operations.
18
+ - `pyopenms`: For mass spectrometry algorithms and data structures
19
+ - `polars` and `pandas`: For efficient data manipulation and analysis
20
+ - `numpy`: For numerical computations and array operations
21
+ - `tqdm`: For progress tracking during batch operations
22
+
23
+ Classes:
24
+ - `Lib`: Main class for compound library management and annotation
21
25
 
22
26
  Functions:
23
- - `lib_load()`: Load compound libraries from CSV files.
24
- - `load_lib()`: Alias for lib_load function.
25
- - Various utility functions for mass calculations and library management.
27
+ - `lib_load()`: Load compound libraries from CSV files (legacy)
28
+ - `load_lib()`: Alias for lib_load function (legacy)
29
+ - Various utility functions for mass calculations and library management
26
30
 
27
31
  Supported Adducts:
28
32
  - Positive mode: [M+H]+, [M+Na]+, [M+K]+, [M+NH4]+, [M-H2O+H]+
@@ -30,19 +34,22 @@ Supported Adducts:
30
34
 
31
35
  Example Usage:
32
36
  ```python
33
- from _lib import lib_load
37
+ from masster.sample.lib import Lib
38
+
39
+ # Create library instance
40
+ lib = Lib()
34
41
 
35
- # Load compound library
36
- lib_load(self, csvfile="compounds.csv", polarity="positive")
42
+ # Import compounds from CSV
43
+ lib.import_csv("compounds.csv", polarity="positive")
37
44
 
38
- # Access loaded library data
39
- print(f"Loaded {len(self.lib_df)} compounds")
40
- print(self.lib_df.head())
45
+ # Access library data
46
+ print(f"Loaded {len(lib.lib_df)} compounds")
47
+ print(lib.lib_df.head())
41
48
  ```
42
49
 
43
50
  See Also:
44
- - `parameters._lib_parameters`: For library-specific parameter configuration.
45
- - `single.py`: For applying library matching to detected features.
51
+ - `parameters._lib_parameters`: For library-specific parameter configuration
52
+ - `sample.py`: For applying library matching to detected features
46
53
 
47
54
  """
48
55
 
masster/sample/load.py CHANGED
@@ -250,7 +250,13 @@ def _load_mzML(
250
250
  precursorIsolationWindowLowerMZ = s.getPrecursors()[0].getIsolationWindowLowerOffset()
251
251
  precursorIsolationWindowUpperMZ = s.getPrecursors()[0].getIsolationWindowUpperOffset()
252
252
  prec_intyensity = s.getPrecursors()[0].getIntensity()
253
- energy = s.getPrecursors()[0].getActivationEnergy()
253
+ # Try to get collision energy from meta values first, fallback to getActivationEnergy()
254
+ try:
255
+ energy = s.getPrecursors()[0].getMetaValue('collision energy')
256
+ if energy is None or energy == 0.0:
257
+ energy = s.getPrecursors()[0].getActivationEnergy()
258
+ except Exception:
259
+ energy = s.getPrecursors()[0].getActivationEnergy()
254
260
 
255
261
  peaks = s.get_peaks()
256
262
  spect = Spectrum(mz=peaks[0], inty=peaks[1], ms_level=s.getMSLevel())