masster 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (31) hide show
  1. masster/__init__.py +2 -0
  2. masster/_version.py +1 -1
  3. masster/data/libs/README.md +17 -0
  4. masster/data/libs/ccm.py +533 -0
  5. masster/data/libs/central_carbon_README.md +17 -0
  6. masster/data/libs/central_carbon_metabolites.csv +120 -0
  7. masster/data/libs/urine.py +333 -0
  8. masster/data/libs/urine_metabolites.csv +51 -0
  9. masster/sample/h5.py +1 -1
  10. masster/sample/helpers.py +3 -7
  11. masster/sample/lib.py +32 -25
  12. masster/sample/load.py +9 -3
  13. masster/sample/plot.py +113 -27
  14. masster/study/export.py +27 -10
  15. masster/study/h5.py +58 -40
  16. masster/study/helpers.py +450 -196
  17. masster/study/helpers_optimized.py +5 -5
  18. masster/study/load.py +144 -118
  19. masster/study/plot.py +691 -277
  20. masster/study/processing.py +9 -5
  21. masster/study/study.py +6 -6
  22. {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/METADATA +1 -1
  23. {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/RECORD +31 -25
  24. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
  25. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  26. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  27. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  28. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  29. {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/WHEEL +0 -0
  30. {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/entry_points.txt +0 -0
  31. {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,120 @@
1
+ Name,Formula,SMILES,InChIKey
2
+ Glucose,C6H11O,OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O,WQZGKKKJIJFFOK-GASJEMHNSA-N
3
+ Glucose-6-phosphate,C6H13O9P,O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,VFRROHXSMXFLSN-SLPGGIOYSA-N
4
+ Fructose-6-phosphate,C6H13O9P,O=C(CO)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,GSXOAOHZAIYLCY-HSUXUTPPSA-N
5
+ "Fructose-1,6-bisphosphate",C6H13O12P,O=P(O)(O)OC[C@H]1O[C@](O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O,RNBGYGVWRKECFJ-ARQDHWQXSA-N
6
+ Glyceraldehyde-3-phosphate,C3H7O6P,O=C[C@H](O)COP(=O)(O)O,LXJXRIRHZLFYRP-VKHMYHEASA-N
7
+ Dihydroxyacetone phosphate,C3H7O6P,O=C(CO)COP(=O)(O)O,GNGACRATGGDKBX-UHFFFAOYSA-N
8
+ 3-Phosphoglycerate,C3H7O7P,O=C(O)C(O)COP(=O)(O)O,OSJPPGNTCRNQQC-UHFFFAOYSA-N
9
+ 2-Phosphoglycerate,C3H7O7P,O=C(O)C(CO)OP(=O)(O)O,GXIURPTVHJPJLF-UHFFFAOYSA-N
10
+ Phosphoenolpyruvate,C3H5O6P,C=C(OP(=O)(O)O)C(=O)O,DTBNBXWJWCWCIK-UHFFFAOYSA-N
11
+ Pyruvate,C3H6O,CC(=O)C(=O)O,LCTONWCANYUPML-UHFFFAOYSA-M
12
+ Lactate,C3H8O,CC(O)C(=O)O,JVTAAEKCZFNVCJ-UHFFFAOYSA-M
13
+ Acetyl-CoA,C23H38N7O17P3S,CC(=O)SCCN=C(O)CCN=C(O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,ZSLZBFCDCINBPY-ZSJPKINUSA-N
14
+ Citric acid,C6H7O,O=C(O)CC(O)(CC(=O)O)C(=O)O,KRKNYBCHXYNGOX-UHFFFAOYSA-N
15
+ Isocitrate,C6H7O,O=C(O)CC(C(=O)O)C(O)C(=O)O,ODBLHEXUDAPZAU-UHFFFAOYSA-N
16
+ Alpha-ketoglutaric acid,C5H5O,O=C(O)CCC(=O)C(=O)O,KPGXRSRHYNQIFN-UHFFFAOYSA-N
17
+ Succinyl-CoA,C25H40N7O19P3S,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(O)=NCCC(O)=NCCSC(=O)CCC(=O)O,VNOYUJKHFWYWIR-ITIYDSSPSA-N
18
+ Succinic acid,C4H5O,O=C(O)CCC(=O)O,KDYFGRWQOYBRFD-UHFFFAOYSA-N
19
+ Fumaric acid,C4H3O,O=C(O)/C=C/C(=O)O,VZCYOOQTPOCHFL-OWOJBTEDSA-N
20
+ Malic acid,C4H5O,O=C(O)CC(O)C(=O)O,BJEPYKJPYRNKOW-UHFFFAOYSA-N
21
+ Oxaloacetic acid,C4H3O,O=C(O)CC(=O)C(=O)O,KHPXUQMNIQBQEV-UHFFFAOYSA-N
22
+ Ribose-5-phosphate,C5H11O8P,O=C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,PPQRONHOSHZGFQ-LMVFSUKVSA-N
23
+ Ribulose-5-phosphate,C5H11O8P,O=C(CO)[C@H](O)[C@H](O)COP(=O)(O)O,FNZLKVNUWIIPSJ-UHNVWZDZSA-N
24
+ Sedoheptulose-7-phosphate,C7H15O10P,O=C(CO)[C@@H](O)[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,JDTUMPKOJBQPKX-GBNDHIKLSA-N
25
+ Erythrose-4-phosphate,C4H9O7P,O=C[C@H](O)[C@H](O)COP(=O)(O)O,NGHMDNPXVRFFGS-IUYQGCFVSA-N
26
+ "Sedoheptulose-1,7-bisphosphate",C7H15O13P,O=C(COP(=O)(O)O)[C@@H](O)[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,OKHXOUGRECCASI-SHUUEZRQSA-N
27
+ Glycerol-3-phosphate,C3H9O6P,O=P(O)(O)OCC(O)CO,AWUCVROLDVIAJX-UHFFFAOYSA-N
28
+ Glycerate,C3H9O,O=C(O)C(O)CO,RBNPOMFGQQGHHO-UHFFFAOYSA-M
29
+ Pentose,C5H9O,OC1COC(O)C(O)C1O,SRBFZHDQGSBBOR-UHFFFAOYSA-N
30
+ Acetaldehyde,C2H4O,CC=O,IKHGUXGNUITLKF-UHFFFAOYSA-N
31
+ Acetic acid,C2H3O,CC(=O)O,QTBSBXVTEAMEQO-UHFFFAOYSA-N
32
+ Alanine,C3H6NO,C[C@H](N)C(=O)O,QNAYBMKLOCPYGJ-REOHCLBHSA-N
33
+ Arginine,C6H13N4O,N=C(N)NCCC[C@H](N)C(=O)O,ODKSFYDXXFIFQN-BYPYZUCNSA-N
34
+ Asparagine,C4H7N2O,N=C(O)C[C@H](N)C(=O)O,DCXYFEDJOCDNAF-REOHCLBHSA-N
35
+ Aspartic acid,C4H6NO,N[C@@H](CC(=O)O)C(=O)O,CKLJMWTZIZZHCS-REOHCLBHSA-N
36
+ Cysteine,C3H7NO2S,N[C@@H](CS)C(=O)O,XUJNEKJLAYXESH-REOHCLBHSA-N
37
+ Glutamic acid,C5H8NO,N[C@@H](CCC(=O)O)C(=O)O,WHUUTDBJXJRKMK-VKHMYHEASA-N
38
+ Glutamine,C5H9N2O,N=C(O)CC[C@H](N)C(=O)O,ZDXPYRJPNDTMRX-VKHMYHEASA-N
39
+ Glycine,C2H4NO,NCC(=O)O,DHMQDGOQFOQNFH-UHFFFAOYSA-N
40
+ Histidine,C6H8N3O,N[C@@H](Cc1cnc[nH]1)C(=O)O,HNDVDQJCIGZPNO-YFKPBYRVSA-N
41
+ Isoleucine,C6H12NO,CC[C@H](C)[C@H](N)C(=O)O,AGPKZVBTJJNPAG-WHFBIAKZSA-N
42
+ Leucine,C6H12NO,CC(C)C[C@H](N)C(=O)O,ROHFNLRQFUQHCH-YFKPBYRVSA-N
43
+ Lysine,C6H13N2O,NCCCC[C@H](N)C(=O)O,KDXKERNSBIXSRK-YFKPBYRVSA-N
44
+ Methionine,C5H11NO2S,CSCC[C@H](N)C(=O)O,FFEARJCKVFRZRR-BYPYZUCNSA-N
45
+ Phenylalanine,C9H10NO,N[C@@H](Cc1ccccc1)C(=O)O,COLNVLDHVKWLRT-QMMMGPOBSA-N
46
+ Proline,C5H8NO,O=C(O)[C@@H]1CCCN1,ONIBWKKTOPOVIA-BYPYZUCNSA-N
47
+ Serine,C3H6NO,N[C@@H](CO)C(=O)O,MTCFGRXMJLQNBG-REOHCLBHSA-N
48
+ Threonine,C4H8NO,C[C@@H](O)[C@H](N)C(=O)O,AYFVYJQAPQTCCC-GBXIJSLDSA-N
49
+ Tryptophan,C11H11N2O,N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O,QIVBCDIJIAJPQS-VIFPVBQESA-N
50
+ Tyrosine,C9H10NO,N[C@@H](Cc1ccc(O)cc1)C(=O)O,OUYCCCASQSFEME-QMMMGPOBSA-N
51
+ Valine,C5H10NO,CC(C)[C@H](N)C(=O)O,KZSNJWFQEVHDMF-BYPYZUCNSA-N
52
+ Ornithine,C5H11N2O,NCCC[C@H](N)C(=O)O,AHLPHDHHMVZTML-BYPYZUCNSA-N
53
+ Citrulline,C6H12N3O,N=C(O)NCCC[C@H](N)C(=O)O,RHGKLRLOHDJJDR-BYPYZUCNSA-N
54
+ Homocysteine,C4H9NO2S,N[C@@H](CCS)C(=O)O,FFFHZYDWPBMWHY-VKHMYHEASA-N
55
+ S-adenosylmethionine,C15H22N6O5S,C[S](CC[C@H](N)C(=O)O)C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,MEFKEPWMEQBLKI-AIRLBKTGSA-N
56
+ S-adenosylhomocysteine,C14H20N6O5S,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,ZJUKTBDSGOFHSH-WFMPWKQPSA-N
57
+ Formic acid,CHO,O=CO,BDAGIHXWWSANSR-UHFFFAOYSA-N
58
+ Propionic acid,C3H5O,CCC(=O)O,XBDQKXXYIPTUBI-UHFFFAOYSA-N
59
+ Butyric acid,C4H7O,CCCC(=O)O,FERIUCNNQQJTOY-UHFFFAOYSA-N
60
+ Malonic acid,C3H3O,O=C(O)CC(=O)O,OFOBLEOULBTSOW-UHFFFAOYSA-N
61
+ 2-Hydroxyglutarate,C5H7O,O=C(O)CCC(O)C(=O)O,HWXBTNAVRSUOJR-UHFFFAOYSA-N
62
+ 3-Hydroxybutyrate,C4H10O,CC(O)CC(=O)O,WHBMMWSBFZVSSR-UHFFFAOYSA-M
63
+ Acetoacetate,C4H8O,CC(=O)CC(=O)O,WDJHALXBUFZDSR-UHFFFAOYSA-M
64
+ Beta-hydroxybutyrate,C4H7O,CC(O)CC(=O)O,WHBMMWSBFZVSSR-UHFFFAOYSA-N
65
+ Pyruvic acid,C3H3O,CC(=O)C(=O)O,LCTONWCANYUPML-UHFFFAOYSA-N
66
+ Lactic acid,C3H5O,CC(O)C(=O)O,JVTAAEKCZFNVCJ-UHFFFAOYSA-N
67
+ Myristic acid,C14H27O,CCCCCCCCCCCCCC(=O)O,TUNFSRHWOTWDNC-UHFFFAOYSA-N
68
+ Palmitic acid,C16H31O,CCCCCCCCCCCCCCCC(=O)O,IPCSVZSSVZVIGE-UHFFFAOYSA-N
69
+ Stearic acid,C18H35O,CCCCCCCCCCCCCCCCCC(=O)O,QIQXTHQIDYTFRH-UHFFFAOYSA-N
70
+ Palmitoleic acid,C16H29O,CCCCCC/C=C\CCCCCCCC(=O)O,SECPZKHBENQXJG-FPLPWBNLSA-N
71
+ Oleic acid,C18H33O,CCCCCCCC/C=C\CCCCCCCC(=O)O,ZQPPMHVWECSIRJ-KTKRTIGZSA-N
72
+ Linoleic acid,C18H31O,CCCCC/C=C\C/C=C\CCCCCCCC(=O)O,OYHQOLUKZRVURQ-HZJYTTRNSA-N
73
+ Alpha-linolenic acid,C18H29O,CC/C=C\C/C=C\C/C=C\CCCCCCCC(=O)O,DTOSIQBPPRVQHS-PDBXOOCHSA-N
74
+ Arachidonic acid,C20H31O,CCCCC/C=C\C/C=C\C/C=C\C/C=C\CCCC(=O)O,YZXBAPSDXZZRGB-DOFZRALJSA-N
75
+ Adenine,C5H4N,Nc1nc[nH]c2ncnc1-2,GFFGJBXGBJISGV-UHFFFAOYSA-N
76
+ Guanine,C5H5N5O,N=c1nc(O)c2nc[nH]c2[nH]1,UYTPUPDQBNUYGX-UHFFFAOYSA-N
77
+ Cytosine,C4H5N3O,N=c1ccnc(O)[nH]1,OPTASPLRGRRNAP-UHFFFAOYSA-N
78
+ Thymine,C5H5N2O,Cc1cnc(O)nc1O,RWQNBRDOKXIBIV-UHFFFAOYSA-N
79
+ Uracil,C4H3N2O,Oc1ccnc(O)n1,ISAKRJDGNUQOIC-UHFFFAOYSA-N
80
+ Adenosine,C10H12N5O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,OIRDTQYFTABQOQ-KQYNXXCUSA-N
81
+ Guanosine,C10H12N5O,N=c1nc(O)c2ncn([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O)c2[nH]1,NYHBQMYGNKIUIF-UUOKFMHZSA-N
82
+ Cytidine,C9H12N3O,N=c1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(O)n1,UHDGCWIWMRVCDJ-XVFCMESISA-N
83
+ Uridine,C9H11N2O,O=c1nc(O)ccn1[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,DRTQHJPVMGBUCF-XVFCMESISA-N
84
+ AMP,C10H14N5O7P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O,UDMBCSSLTHHNCD-KQYNXXCUSA-N
85
+ ADP,C10H14N5O10P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,XTWYTFMLZFPYCI-KQYNXXCUSA-N
86
+ ATP,C10H15N5O13P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,ZKHQWZAMYRWXGA-KQYNXXCUSA-N
87
+ GMP,C10H14N5O8P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,RQFCJASXJCIDSX-UUOKFMHZSA-N
88
+ GDP,C10H14N5O11P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,QGWNDRXFNXRZMB-UUOKFMHZSA-N
89
+ GTP,C10H15N5O14P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,XKMLYUALXHKNFT-UUOKFMHZSA-N
90
+ CMP,C9H14N3O8P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,IERHLVCPSMICTF-XVFCMESISA-N
91
+ CDP,C9H14N3O11P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,ZWIADYZPOWUWEW-XVFCMESISA-N
92
+ CTP,C9H15N3O14P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,PCDQPRRSZKQHHS-XVFCMESISA-N
93
+ UMP,C9H13N2O9P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O,DJJCXFVJDGTHFX-XVFCMESISA-N
94
+ UDP,C9H13N2O12P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,XCCTYIAWTASOJW-XVFCMESISA-N
95
+ UTP,C9H14N2O15P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,PGAVKCOVUIYSFO-XVFCMESISA-N
96
+ NAD+,C21H26N7O14P,N=C(O)C1CCCN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,BAWFJGJZGIEFAR-NNYOXOHSSA-N
97
+ NADH,C21H28N7O14P,N=C(O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,BOPGDPNILDQYTO-NNYOXOHSSA-N
98
+ NADP+,C21H27N7O17P,N=C(O)C1CCCN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,XJLXINKUBYWONI-NNYOXOHSSA-N
99
+ NADPH,C21H29N7O17P,N=C(O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,ACFIXJIJDZMPPO-NNYOXOHSSA-N
100
+ FAD,C27H32N9O15P,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)c2cc1C,VWWQXMAJTJZDQX-UYBVJOGSSA-N
101
+ FMN,C17H21N4O9P,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c2cc1C,FVTCRASFADXXNN-SCRDCRAPSA-N
102
+ Coenzyme A,C21H36N7O16P3S,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(O)=NCCC(O)=NCCS,RGJOEKWQDUBAIZ-IBOSZNHHSA-N
103
+ Pantothenic acid,C9H16NO,CC(C)(CO)[C@@H](O)C(O)=NCCC(=O)O,GHOKWGTUZJEAQD-ZETCQYMHSA-N
104
+ Riboflavin,C17H19N4O,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C,AUNGANRZJHBGPY-SCRDCRAPSA-N
105
+ Niacin,C6H4NO,O=C(O)c1cccnc1,PVNIIMVLHYAWGP-UHFFFAOYSA-N
106
+ Fructose,C6H11O,OCC1(O)OC[C@@H](O)[C@@H](O)[C@@H]1O,LKDRXBCSQODPBY-VRPWFDPXSA-N
107
+ Mannose,C6H11O,OC[C@H]1OC(O)[C@@H](O)[C@@H](O)[C@@H]1O,WQZGKKKJIJFFOK-QTVWNMPRSA-N
108
+ Mannose-6-phosphate,C6H13O9P,O=C[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,VFRROHXSMXFLSN-KVTDHHQDSA-N
109
+ Ribose,C5H9O,OC1OC[C@@H](O)[C@@H](O)[C@H]1O,SRBFZHDQGSBBOR-SOOFDHNKSA-N
110
+ Glucosamine,C6H12NO,N[C@H]1C(O)O[C@H](CO)[C@@H](O)[C@@H]1O,MSWZFWKMSRAUBD-IVMDWMLBSA-N
111
+ N-acetylglucosamine,C8H14NO,CC(O)=N[C@@H](C=O)[C@@H](O)[C@H](O)[C@H](O)CO,MBLBDJOUHNCFQT-LXGUWJNJSA-N
112
+ Choline,C5H13NO,[H]OC([H])([H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],OEYIOHPDSNJKLS-UHFFFAOYSA-N
113
+ Betaine,C5H10NO,[H]OC(=O)C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],KWIUHFFTVRNATP-UHFFFAOYSA-N
114
+ Carnitine,C7H14NO,[H]OC(=O)C([H])([H])C([H])(O[H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],PHIQHXFUZVPYII-UHFFFAOYSA-N
115
+ Phosphocholine,C5H14NO4P,[H]OP(=O)(O[H])OC([H])([H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],YHHSONZFOIEMCP-UHFFFAOYSA-O
116
+ Glycerol,C3H7O,OCC(O)CO,PEDCQBHIVMGVHV-UHFFFAOYSA-N
117
+ Sorbitol,C6H13O,OC[C@@H](O)[C@@H](O)[C@H](O)[C@@H](O)CO,FBPFZTCFMRRESA-JGWLITMVSA-N
118
+ Inositol,C6H11O,OC1C(O)C(O)C(O)C(O)C1O,CDAISMWEOUEBRE-UHFFFAOYSA-N
119
+ Cholesterol,C27H46O,CC(C)CCC[C@@H](C)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,HVYWMOMLDIMFJA-DPAQBDIFSA-N
120
+ Pantothenate,C9H21NO,CC(C)(CO)C(O)C(O)=NCCC(=O)O,GHOKWGTUZJEAQD-UHFFFAOYSA-M
@@ -0,0 +1,333 @@
1
+ """Generate a CSV of human urine metabolites.
2
+
3
+ This improved script attempts to:
4
+ - Download or scrape a urine metabolite list from the UrineMetabolome downloads page.
5
+ - Fall back to HMDB scraping or a curated list if needed.
6
+ - Resolve formula/SMILES/InChIKey using PubChem with bounded parallelism.
7
+ - Use RDKit (if available) to convert InChI -> SMILES when PubChem does not provide SMILES.
8
+
9
+ The goal is robust coverage and faster lookups by parallelizing per-name queries
10
+ while avoiding aggressive parallelism that might overload PubChem.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import csv
16
+ import sys
17
+ import re
18
+ import time
19
+ import os
20
+ from urllib.parse import quote, urljoin
21
+ from concurrent.futures import ThreadPoolExecutor, as_completed
22
+ from typing import Iterable
23
+
24
+ try:
25
+ import requests
26
+ from bs4 import BeautifulSoup
27
+ except Exception:
28
+ requests = None
29
+ BeautifulSoup = None
30
+
31
+
32
+ URINEMETABOLOME_DOWNLOADS = "https://www.urinemetabolome.ca/downloads"
33
+ HMDB_URINE_LIST_URL = "https://hmdb.ca/metabolites?utf8=%E2%9C%93&search=&biological_context=Urine"
34
+
35
+
36
+ def normalize_name(name: str) -> str:
37
+ if not name:
38
+ return name
39
+ s = name
40
+ s = re.sub(r"\(.*?\)", "", s)
41
+ s = s.replace("➔", "->").replace("–", "-").replace("—", "-")
42
+ s = re.sub(r"\s+", " ", s).strip()
43
+ mapping = {
44
+ "AMP": "Adenosine monophosphate",
45
+ "ADP": "Adenosine diphosphate",
46
+ "ATP": "Adenosine triphosphate",
47
+ "GMP": "Guanosine monophosphate",
48
+ "GDP": "Guanosine diphosphate",
49
+ "GTP": "Guanosine triphosphate",
50
+ "NAD+": "Nicotinamide adenine dinucleotide",
51
+ "NADH": "Nicotinamide adenine dinucleotide (reduced)",
52
+ }
53
+ up = s.upper()
54
+ if up in mapping:
55
+ return mapping[up]
56
+ return s
57
+
58
+
59
+ def fetch_urinemetabolome_names(limit: int = 2000) -> list[str]:
60
+ """Scrape the UrineMetabolome downloads page for any downloadable metabolite lists.
61
+
62
+ Best-effort: finds links on the downloads page that look like CSV/TSV/Excel and tries
63
+ to parse a simple name column. If anything fails, returns an empty list and the
64
+ caller should fall back to HMDB or a curated list.
65
+ """
66
+ if requests is None or BeautifulSoup is None:
67
+ return []
68
+ try:
69
+ r = requests.get(URINEMETABOLOME_DOWNLOADS, timeout=15)
70
+ r.raise_for_status()
71
+ soup = BeautifulSoup(r.text, "html.parser")
72
+ names = []
73
+ for a in soup.find_all("a", href=True):
74
+ href = a["href"]
75
+ if re.search(r"\.csv$|\.tsv$|\.xlsx?$", href, re.I):
76
+ url = urljoin(URINEMETABOLOME_DOWNLOADS, href)
77
+ # try to download and parse simple CSV/TSV
78
+ try:
79
+ rr = requests.get(url, timeout=20)
80
+ if rr.status_code != 200:
81
+ continue
82
+ text = rr.content.decode("utf-8", errors="ignore")
83
+ # try CSV/TSV parse by splitting lines and looking for a header with 'name' or 'metabolite'
84
+ lines = [l.strip() for l in text.splitlines() if l.strip()]
85
+ if not lines:
86
+ continue
87
+ sep = "," if "," in lines[0] else "\t"
88
+ header = [c.strip().lower() for c in lines[0].split(sep)]
89
+ # find candidate column
90
+ col_idx = None
91
+ for i, c in enumerate(header):
92
+ if any(k in c for k in ("name", "metabolite", "compound")):
93
+ col_idx = i
94
+ break
95
+ if col_idx is None:
96
+ # fallback: take first column
97
+ col_idx = 0
98
+ for l in lines[1:limit+1]:
99
+ parts = [p.strip() for p in l.split(sep)]
100
+ if len(parts) > col_idx:
101
+ n = parts[col_idx]
102
+ if n and n not in names:
103
+ names.append(n)
104
+ if len(names) >= limit:
105
+ break
106
+ if names:
107
+ return names
108
+ except Exception:
109
+ continue
110
+ return []
111
+ except Exception:
112
+ return []
113
+
114
+
115
+ def fetch_hmdb_urine_names(limit: int = 500) -> list[str]:
116
+ """Fallback HMDB scrape (best-effort)."""
117
+ if requests is None or BeautifulSoup is None:
118
+ return []
119
+ try:
120
+ r = requests.get(HMDB_URINE_LIST_URL, timeout=20)
121
+ r.raise_for_status()
122
+ soup = BeautifulSoup(r.text, "html.parser")
123
+ names = []
124
+ for a in soup.find_all("a", href=True):
125
+ href = a["href"]
126
+ if re.search(r"/metabolites/HMDB", href):
127
+ text = a.get_text(strip=True)
128
+ if text and len(text) > 1:
129
+ names.append(text)
130
+ if len(names) >= limit:
131
+ break
132
+ return list(dict.fromkeys(names))
133
+ except Exception:
134
+ return []
135
+
136
+
137
+ def fetch_pubchem_name_once(name: str, timeout: int = 15):
138
+ """Fetch properties for a single name from PubChem and try inchI->SMILES if needed.
139
+
140
+ Returns (formula, smiles, inchikey) or (None, None, None) on failure.
141
+ """
142
+ if requests is None:
143
+ return (None, None, None)
144
+ q = normalize_name(name)
145
+ url_name = quote(q)
146
+ url = (
147
+ f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{url_name}/property/"
148
+ + "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
149
+ )
150
+ try:
151
+ r = requests.get(url, timeout=timeout)
152
+ if r.status_code != 200:
153
+ return (None, None, None)
154
+ j = r.json()
155
+ if "PropertyTable" in j and "Properties" in j["PropertyTable"]:
156
+ p = j["PropertyTable"]["Properties"][0]
157
+ formula = p.get("MolecularFormula")
158
+ smiles = p.get("CanonicalSMILES")
159
+ inchi = p.get("InChI")
160
+ inchikey = p.get("InChIKey")
161
+ if not smiles and inchi:
162
+ # try RDKit conversion
163
+ try:
164
+ from rdkit import Chem
165
+ m = Chem.MolFromInchi(inchi)
166
+ if m is not None:
167
+ try:
168
+ Chem.SanitizeMol(m)
169
+ except Exception:
170
+ pass
171
+ smiles = Chem.MolToSmiles(m, isomericSmiles=True)
172
+ except Exception:
173
+ pass
174
+
175
+ # If still missing SMILES, and we have an InChIKey, try inchikey -> property
176
+ if not smiles and inchikey:
177
+ try:
178
+ ik = quote(inchikey)
179
+ url2 = (
180
+ f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{ik}/property/"
181
+ + "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
182
+ )
183
+ r2 = requests.get(url2, timeout=timeout)
184
+ if r2.status_code == 200:
185
+ j2 = r2.json()
186
+ if "PropertyTable" in j2 and "Properties" in j2["PropertyTable"]:
187
+ p2 = j2["PropertyTable"]["Properties"][0]
188
+ smiles = p2.get("CanonicalSMILES") or smiles
189
+ formula = formula or p2.get("MolecularFormula")
190
+ except Exception:
191
+ pass
192
+
193
+ # Final fallback: inchikey -> cids -> cid -> property
194
+ if not smiles and inchikey:
195
+ try:
196
+ ik = quote(inchikey)
197
+ urlc = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{ik}/cids/JSON"
198
+ rc = requests.get(urlc, timeout=timeout)
199
+ if rc.status_code == 200:
200
+ jc = rc.json()
201
+ if "IdentifierList" in jc and "CID" in jc["IdentifierList"] and jc["IdentifierList"]["CID"]:
202
+ cid = jc["IdentifierList"]["CID"][0]
203
+ try:
204
+ url3 = (
205
+ f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/property/"
206
+ + "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
207
+ )
208
+ r3 = requests.get(url3, timeout=timeout)
209
+ if r3.status_code == 200:
210
+ j3 = r3.json()
211
+ if "PropertyTable" in j3 and "Properties" in j3["PropertyTable"]:
212
+ p3 = j3["PropertyTable"]["Properties"][0]
213
+ smiles = p3.get("CanonicalSMILES") or smiles
214
+ formula = formula or p3.get("MolecularFormula")
215
+ except Exception:
216
+ pass
217
+ except Exception:
218
+ pass
219
+
220
+ return (formula, smiles, inchikey)
221
+ except Exception:
222
+ return (None, None, None)
223
+ return (None, None, None)
224
+
225
+
226
+ def fetch_pubchem_for_names(names: Iterable[str], workers: int = 8, delay: float = 0.05):
227
+ """Resolve a list of names via PubChem in parallel.
228
+
229
+ Returns dict name -> (formula, smiles, inchikey)
230
+ """
231
+ names = list(names)
232
+ results: dict[str, tuple | None] = {}
233
+ with ThreadPoolExecutor(max_workers=workers) as ex:
234
+ futures = {ex.submit(fetch_pubchem_name_once, n): n for n in names}
235
+ for fut in as_completed(futures):
236
+ n = futures[fut]
237
+ try:
238
+ res = fut.result()
239
+ except Exception:
240
+ res = (None, None, None)
241
+ results[n] = res
242
+ time.sleep(delay) # polite small delay between completions
243
+ return results
244
+
245
+
246
+ def generate_csv(out_path: str = "urine_metabolites.csv", workers: int = 8):
247
+ # Try UrineMetabolome downloads first
248
+ names = fetch_urinemetabolome_names()
249
+ if not names:
250
+ names = fetch_hmdb_urine_names()
251
+ if not names:
252
+ print("Falling back to curated urine list")
253
+ names = [
254
+ "Creatinine",
255
+ "Urea",
256
+ "Hippuric acid",
257
+ "Citrate",
258
+ "Creatine",
259
+ "Glycine",
260
+ "Taurine",
261
+ "Succinate",
262
+ "Fumaric acid",
263
+ "Malic acid",
264
+ "Lactic acid",
265
+ "Acetic acid",
266
+ "Formic acid",
267
+ "Alanine",
268
+ "Betaine",
269
+ "Choline",
270
+ "Trimethylamine N-oxide",
271
+ "Phenylacetylglutamine",
272
+ "p-Cresol sulfate",
273
+ "Indoxyl sulfate",
274
+ "Uric acid",
275
+ "Xanthine",
276
+ "3-Hydroxybutyrate",
277
+ "Acetoacetate",
278
+ "N-Acetylneuraminic acid",
279
+ ]
280
+
281
+ print(f"Resolving {len(names)} names via PubChem (workers={workers})...")
282
+ mapping = fetch_pubchem_for_names(names, workers=workers)
283
+
284
+ rows = []
285
+ for name in names:
286
+ formula, smiles, inchikey = mapping.get(name, (None, None, None))
287
+ rows.append({
288
+ "Name": name,
289
+ "Formula": formula or "",
290
+ "SMILES": smiles or "",
291
+ "InChIKey": inchikey or "",
292
+ })
293
+
294
+ # Ensure output directory exists
295
+ out_dir = os.path.join("masster", "data", "libs")
296
+ os.makedirs(out_dir, exist_ok=True)
297
+ out_path_full = os.path.join(out_dir, os.path.basename(out_path))
298
+
299
+ fieldnames = ["Name", "Formula", "SMILES", "InChIKey"]
300
+ with open(out_path_full, "w", newline="", encoding="utf-8") as f:
301
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
302
+ writer.writeheader()
303
+ for r in rows:
304
+ writer.writerow(r)
305
+
306
+ print(f"Wrote {len(rows)} entries to {out_path_full}")
307
+ return out_path_full
308
+
309
+
310
+ def test_load_with_lib(csv_path: str):
311
+ try:
312
+ from masster.lib import Lib
313
+ except Exception as e:
314
+ print(f"Cannot import masster.lib.Lib: {e}")
315
+ return False
316
+
317
+ try:
318
+ lib = Lib()
319
+ lib.import_csv(csv_path, polarity=None)
320
+ print(f"Lib loaded: {len(lib)} entries")
321
+ return True
322
+ except Exception as e:
323
+ print(f"Failed to load CSV with Lib.import_csv: {e}")
324
+ return False
325
+
326
+
327
+ if __name__ == "__main__":
328
+ csv_file = generate_csv()
329
+ ok = test_load_with_lib(csv_file)
330
+ if not ok:
331
+ print("Test failed; please inspect messages above.")
332
+ sys.exit(2)
333
+ print("Done.")
@@ -0,0 +1,51 @@
1
+ Name,Formula,SMILES,InChIKey
2
+ HMDB0000001,,,
3
+ 1-Methylhistidine,C7H11N3O2,Cn1cnc(C[C@H](N)C(=O)O)c1,BRMWTNUJHUMWMS-LURJTMIESA-N
4
+ HMDB0000002,,,
5
+ "1,3-Diaminopropane",C3H10N2,NCCCN,XFNJVJPLKCPIBV-UHFFFAOYSA-N
6
+ HMDB0000005,,,
7
+ 2-Ketobutyric acid,C4H6O3,CCC(=O)C(=O)O,TYEYBOSBBBHJIV-UHFFFAOYSA-N
8
+ HMDB0000008,,,
9
+ 2-Hydroxybutyric acid,C4H8O3,CCC(O)C(=O)O,AFENDNXGAFYKQO-UHFFFAOYSA-N
10
+ HMDB0000010,,,
11
+ 2-Methoxyestrone,,,
12
+ HMDB0000011,,,
13
+ 3-Hydroxybutyric acid,,,
14
+ HMDB0000012,,,
15
+ Deoxyuridine,,,
16
+ HMDB0000014,,,
17
+ Deoxycytidine,,,
18
+ HMDB0000015,,,
19
+ Cortexolone,,,
20
+ HMDB0000016,,,
21
+ Deoxycorticosterone,,,
22
+ HMDB0000017,,,
23
+ 4-Pyridoxic acid,C8H9NO4,Cc1ncc(CO)c(C(=O)O)c1O,HXACOUQIXZGNBF-UHFFFAOYSA-N
24
+ HMDB0000019,,,
25
+ alpha-Ketoisovaleric acid,C5H8O3,CC(C)C(=O)C(=O)O,QHKABHOOEWYVLI-UHFFFAOYSA-N
26
+ HMDB0000020,,,
27
+ p-Hydroxyphenylacetic acid,C8H8O3,O=C(O)Cc1ccc(O)cc1,XQXPVVBIMDBYFF-UHFFFAOYSA-N
28
+ HMDB0000021,,,
29
+ Iodotyrosine,C9H10INO3,N[C@@H](Cc1ccc(O)c(I)c1)C(=O)O,UQTZMGFTRHFAAM-ZETCQYMHSA-N
30
+ HMDB0000022,,,
31
+ 3-Methoxytyramine,C9H13NO2,COc1cc(CCN)ccc1O,DIVQKHQLANKJQO-UHFFFAOYSA-N
32
+ HMDB0000023,,,
33
+ (S)-3-Hydroxyisobutyric acid,,,
34
+ HMDB0000024,,,
35
+ 3-O-Sulfogalactosylceramide (d18:1/24:0),,,
36
+ HMDB0000026,,,
37
+ Ureidopropionic acid,,,
38
+ HMDB0000027,,,
39
+ Tetrahydrobiopterin,,,
40
+ HMDB0000030,,,
41
+ Biotin,C10H16N2O3S,O=C(O)CCCC[C@@H]1SC[C@@H]2N=C(O)N[C@@H]21,YBJHBAHKTGYVGT-ZKWXMUAHSA-N
42
+ HMDB0000031,,,
43
+ Androsterone,C19H30O2,C[C@]12CC[C@@H](O)C[C@@H]1CC[C@@H]1[C@@H]2CC[C@]2(C)C(=O)CC[C@@H]12,QGXBDMJGAMFCBF-HLUDHZFRSA-N
44
+ HMDB0000032,,,
45
+ 7-Dehydrocholesterol,C27H44O,CC(C)CCC[C@@H](C)[C@H]1CC[C@H]2C3=CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,UCTLRSWJYQTBFZ-DDPQNLDTSA-N
46
+ HMDB0000033,,,
47
+ Carnosine,C9H14N4O3,NCCC(O)=N[C@@H](Cc1cnc[nH]1)C(=O)O,CQOVPNPJLQNMDC-ZETCQYMHSA-N
48
+ HMDB0000034,,,
49
+ Adenine,C5H5N5,Nc1nc[nH]c2ncnc1-2,GFFGJBXGBJISGV-UHFFFAOYSA-N
50
+ HMDB0000036,,,
51
+ Taurocholic acid,,,
masster/sample/h5.py CHANGED
@@ -900,7 +900,7 @@ def _load_sample5(self, filename: str, map: bool = True):
900
900
  def _load_sample5_study(self, filename: str, map: bool = True):
901
901
  """
902
902
  Optimized variant of _load_sample5 for study loading that skips reading ms1_df.
903
-
903
+
904
904
  This is used when adding samples to studies where ms1_df data is not needed,
905
905
  improving loading throughput by skipping the potentially large ms1_df dataset.
906
906
 
masster/sample/helpers.py CHANGED
@@ -176,7 +176,7 @@ def _get_feature_uids(self, features=None, verbose=True):
176
176
  if not isinstance(features, pd.DataFrame):
177
177
  if verbose:
178
178
  self.logger.error(
179
- "Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame."
179
+ "Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame.",
180
180
  )
181
181
  return []
182
182
 
@@ -298,7 +298,7 @@ def get_eic(self, mz, mz_tol=None):
298
298
  """
299
299
  # Use default mz_tol from sample parameters if not provided
300
300
  if mz_tol is None:
301
- if hasattr(self, 'parameters') and hasattr(self.parameters, 'eic_mz_tol'):
301
+ if hasattr(self, "parameters") and hasattr(self.parameters, "eic_mz_tol"):
302
302
  mz_tol = self.parameters.eic_mz_tol
303
303
  else:
304
304
  mz_tol = 0.01 # fallback default
@@ -323,11 +323,7 @@ def get_eic(self, mz, mz_tol=None):
323
323
  return None
324
324
 
325
325
  # Aggregate intensities per retention time. Use sum in case multiple points per rt.
326
- chrom = (
327
- matches.group_by("rt")
328
- .agg([pl.col("inty").sum().alias("inty")])
329
- .sort("rt")
330
- )
326
+ chrom = matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
331
327
 
332
328
  # Attach to Sample
333
329
  self.chrom_df = chrom
masster/sample/lib.py CHANGED
@@ -1,28 +1,32 @@
1
1
  """
2
- _lib.py
2
+ lib.py
3
3
 
4
- This module provides utility functions and algorithms for mass spectrometry data processing.
5
- It contains core functionality for compound library management, target identification,
6
- adduct handling, and various analytical operations used throughout the masster package.
4
+ This module provides the Lib class and utility functions for mass spectrometry compound library
5
+ management and feature annotation. It contains core functionality for compound library management,
6
+ target identification, adduct handling, and various analytical operations.
7
7
 
8
8
  Key Features:
9
- - **Compound Libraries**: Load and manage compound databases with metadata.
10
- - **Adduct Calculations**: Handle various ionization adducts and charge states.
11
- - **Mass Calculations**: Precise mass calculations with adduct corrections.
12
- - **Target Matching**: Match detected features against compound libraries.
13
- - **Polarity Handling**: Support for positive and negative ionization modes.
14
- - **Database Integration**: Interface with various compound database formats.
9
+ - **Lib Class**: Main class for managing compound libraries and annotations
10
+ - **Compound Libraries**: Load and manage compound databases with metadata
11
+ - **Adduct Calculations**: Handle various ionization adducts and charge states
12
+ - **Mass Calculations**: Precise mass calculations with adduct corrections
13
+ - **Target Matching**: Match detected features against compound libraries
14
+ - **Polarity Handling**: Support for positive and negative ionization modes
15
+ - **Database Integration**: Interface with various compound database formats
15
16
 
16
17
  Dependencies:
17
- - `pyopenms`: For mass spectrometry algorithms and data structures.
18
- - `polars` and `pandas`: For efficient data manipulation and analysis.
19
- - `numpy`: For numerical computations and array operations.
20
- - `tqdm`: For progress tracking during batch operations.
18
+ - `pyopenms`: For mass spectrometry algorithms and data structures
19
+ - `polars` and `pandas`: For efficient data manipulation and analysis
20
+ - `numpy`: For numerical computations and array operations
21
+ - `tqdm`: For progress tracking during batch operations
22
+
23
+ Classes:
24
+ - `Lib`: Main class for compound library management and annotation
21
25
 
22
26
  Functions:
23
- - `lib_load()`: Load compound libraries from CSV files.
24
- - `load_lib()`: Alias for lib_load function.
25
- - Various utility functions for mass calculations and library management.
27
+ - `lib_load()`: Load compound libraries from CSV files (legacy)
28
+ - `load_lib()`: Alias for lib_load function (legacy)
29
+ - Various utility functions for mass calculations and library management
26
30
 
27
31
  Supported Adducts:
28
32
  - Positive mode: [M+H]+, [M+Na]+, [M+K]+, [M+NH4]+, [M-H2O+H]+
@@ -30,19 +34,22 @@ Supported Adducts:
30
34
 
31
35
  Example Usage:
32
36
  ```python
33
- from _lib import lib_load
37
+ from masster.sample.lib import Lib
38
+
39
+ # Create library instance
40
+ lib = Lib()
34
41
 
35
- # Load compound library
36
- lib_load(self, csvfile="compounds.csv", polarity="positive")
42
+ # Import compounds from CSV
43
+ lib.import_csv("compounds.csv", polarity="positive")
37
44
 
38
- # Access loaded library data
39
- print(f"Loaded {len(self.lib_df)} compounds")
40
- print(self.lib_df.head())
45
+ # Access library data
46
+ print(f"Loaded {len(lib.lib_df)} compounds")
47
+ print(lib.lib_df.head())
41
48
  ```
42
49
 
43
50
  See Also:
44
- - `parameters._lib_parameters`: For library-specific parameter configuration.
45
- - `single.py`: For applying library matching to detected features.
51
+ - `parameters._lib_parameters`: For library-specific parameter configuration
52
+ - `sample.py`: For applying library matching to detected features
46
53
 
47
54
  """
48
55
 
masster/sample/load.py CHANGED
@@ -119,7 +119,7 @@ def load_study(
119
119
  ):
120
120
  """
121
121
  Optimized load method for study use that skips loading ms1_df for better performance.
122
-
122
+
123
123
  This method is identical to load() but uses _load_sample5_study() for .sample5 files,
124
124
  which skips reading the potentially large ms1_df dataset to improve throughput when
125
125
  adding samples to studies.
@@ -250,7 +250,13 @@ def _load_mzML(
250
250
  precursorIsolationWindowLowerMZ = s.getPrecursors()[0].getIsolationWindowLowerOffset()
251
251
  precursorIsolationWindowUpperMZ = s.getPrecursors()[0].getIsolationWindowUpperOffset()
252
252
  prec_intyensity = s.getPrecursors()[0].getIntensity()
253
- energy = s.getPrecursors()[0].getActivationEnergy()
253
+ # Try to get collision energy from meta values first, fallback to getActivationEnergy()
254
+ try:
255
+ energy = s.getPrecursors()[0].getMetaValue('collision energy')
256
+ if energy is None or energy == 0.0:
257
+ energy = s.getPrecursors()[0].getActivationEnergy()
258
+ except Exception:
259
+ energy = s.getPrecursors()[0].getActivationEnergy()
254
260
 
255
261
  peaks = s.get_peaks()
256
262
  spect = Spectrum(mz=peaks[0], inty=peaks[1], ms_level=s.getMSLevel())
@@ -983,7 +989,7 @@ def index_file(self):
983
989
  self.set_source(self.file_source.replace(".sample5", ".mzml"))
984
990
  else:
985
991
  raise FileNotFoundError(
986
- f"File {self.file_source} not found. Did the path change? Consider running source()."
992
+ f"File {self.file_source} not found. Did the path change? Consider running source().",
987
993
  )
988
994
  self.index_file()
989
995
  else: