PyPI - masster - Versions diffs - 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl - Mend

masster 0.3.18py3-none-any.whl → 0.3.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (31) hide show

masster/__init__.py +2 -0
masster/_version.py +1 -1
masster/data/libs/README.md +17 -0
masster/data/libs/ccm.py +533 -0
masster/data/libs/central_carbon_README.md +17 -0
masster/data/libs/central_carbon_metabolites.csv +120 -0
masster/data/libs/urine.py +333 -0
masster/data/libs/urine_metabolites.csv +51 -0
masster/sample/h5.py +1 -1
masster/sample/helpers.py +3 -7
masster/sample/lib.py +32 -25
masster/sample/load.py +9 -3
masster/sample/plot.py +113 -27
masster/study/export.py +27 -10
masster/study/h5.py +58 -40
masster/study/helpers.py +450 -196
masster/study/helpers_optimized.py +5 -5
masster/study/load.py +144 -118
masster/study/plot.py +691 -277
masster/study/processing.py +9 -5
masster/study/study.py +6 -6
{masster-0.3.18.dist-info → masster-0.3.20.dist-info}/METADATA +1 -1
{masster-0.3.18.dist-info → masster-0.3.20.dist-info}/RECORD +31 -25
/masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
/masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
/masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
/masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
/masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
{masster-0.3.18.dist-info → masster-0.3.20.dist-info}/WHEEL +0 -0
{masster-0.3.18.dist-info → masster-0.3.20.dist-info}/entry_points.txt +0 -0
{masster-0.3.18.dist-info → masster-0.3.20.dist-info}/licenses/LICENSE +0 -0

masster/data/libs/central_carbon_metabolites.csv ADDED Viewed

@@ -0,0 +1,120 @@
+Name,Formula,SMILES,InChIKey
+Glucose,C6H11O,OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O,WQZGKKKJIJFFOK-GASJEMHNSA-N
+Glucose-6-phosphate,C6H13O9P,O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,VFRROHXSMXFLSN-SLPGGIOYSA-N
+Fructose-6-phosphate,C6H13O9P,O=C(CO)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,GSXOAOHZAIYLCY-HSUXUTPPSA-N
+"Fructose-1,6-bisphosphate",C6H13O12P,O=P(O)(O)OC[C@H]1O[C@](O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O,RNBGYGVWRKECFJ-ARQDHWQXSA-N
+Glyceraldehyde-3-phosphate,C3H7O6P,O=C[C@H](O)COP(=O)(O)O,LXJXRIRHZLFYRP-VKHMYHEASA-N
+Dihydroxyacetone phosphate,C3H7O6P,O=C(CO)COP(=O)(O)O,GNGACRATGGDKBX-UHFFFAOYSA-N
+3-Phosphoglycerate,C3H7O7P,O=C(O)C(O)COP(=O)(O)O,OSJPPGNTCRNQQC-UHFFFAOYSA-N
+2-Phosphoglycerate,C3H7O7P,O=C(O)C(CO)OP(=O)(O)O,GXIURPTVHJPJLF-UHFFFAOYSA-N
+Phosphoenolpyruvate,C3H5O6P,C=C(OP(=O)(O)O)C(=O)O,DTBNBXWJWCWCIK-UHFFFAOYSA-N
+Pyruvate,C3H6O,CC(=O)C(=O)O,LCTONWCANYUPML-UHFFFAOYSA-M
+Lactate,C3H8O,CC(O)C(=O)O,JVTAAEKCZFNVCJ-UHFFFAOYSA-M
+Acetyl-CoA,C23H38N7O17P3S,CC(=O)SCCN=C(O)CCN=C(O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,ZSLZBFCDCINBPY-ZSJPKINUSA-N
+Citric acid,C6H7O,O=C(O)CC(O)(CC(=O)O)C(=O)O,KRKNYBCHXYNGOX-UHFFFAOYSA-N
+Isocitrate,C6H7O,O=C(O)CC(C(=O)O)C(O)C(=O)O,ODBLHEXUDAPZAU-UHFFFAOYSA-N
+Alpha-ketoglutaric acid,C5H5O,O=C(O)CCC(=O)C(=O)O,KPGXRSRHYNQIFN-UHFFFAOYSA-N
+Succinyl-CoA,C25H40N7O19P3S,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(O)=NCCC(O)=NCCSC(=O)CCC(=O)O,VNOYUJKHFWYWIR-ITIYDSSPSA-N
+Succinic acid,C4H5O,O=C(O)CCC(=O)O,KDYFGRWQOYBRFD-UHFFFAOYSA-N
+Fumaric acid,C4H3O,O=C(O)/C=C/C(=O)O,VZCYOOQTPOCHFL-OWOJBTEDSA-N
+Malic acid,C4H5O,O=C(O)CC(O)C(=O)O,BJEPYKJPYRNKOW-UHFFFAOYSA-N
+Oxaloacetic acid,C4H3O,O=C(O)CC(=O)C(=O)O,KHPXUQMNIQBQEV-UHFFFAOYSA-N
+Ribose-5-phosphate,C5H11O8P,O=C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,PPQRONHOSHZGFQ-LMVFSUKVSA-N
+Ribulose-5-phosphate,C5H11O8P,O=C(CO)[C@H](O)[C@H](O)COP(=O)(O)O,FNZLKVNUWIIPSJ-UHNVWZDZSA-N
+Sedoheptulose-7-phosphate,C7H15O10P,O=C(CO)[C@@H](O)[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,JDTUMPKOJBQPKX-GBNDHIKLSA-N
+Erythrose-4-phosphate,C4H9O7P,O=C[C@H](O)[C@H](O)COP(=O)(O)O,NGHMDNPXVRFFGS-IUYQGCFVSA-N
+"Sedoheptulose-1,7-bisphosphate",C7H15O13P,O=C(COP(=O)(O)O)[C@@H](O)[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,OKHXOUGRECCASI-SHUUEZRQSA-N
+Glycerol-3-phosphate,C3H9O6P,O=P(O)(O)OCC(O)CO,AWUCVROLDVIAJX-UHFFFAOYSA-N
+Glycerate,C3H9O,O=C(O)C(O)CO,RBNPOMFGQQGHHO-UHFFFAOYSA-M
+Pentose,C5H9O,OC1COC(O)C(O)C1O,SRBFZHDQGSBBOR-UHFFFAOYSA-N
+Acetaldehyde,C2H4O,CC=O,IKHGUXGNUITLKF-UHFFFAOYSA-N
+Acetic acid,C2H3O,CC(=O)O,QTBSBXVTEAMEQO-UHFFFAOYSA-N
+Alanine,C3H6NO,C[C@H](N)C(=O)O,QNAYBMKLOCPYGJ-REOHCLBHSA-N
+Arginine,C6H13N4O,N=C(N)NCCC[C@H](N)C(=O)O,ODKSFYDXXFIFQN-BYPYZUCNSA-N
+Asparagine,C4H7N2O,N=C(O)C[C@H](N)C(=O)O,DCXYFEDJOCDNAF-REOHCLBHSA-N
+Aspartic acid,C4H6NO,N[C@@H](CC(=O)O)C(=O)O,CKLJMWTZIZZHCS-REOHCLBHSA-N
+Cysteine,C3H7NO2S,N[C@@H](CS)C(=O)O,XUJNEKJLAYXESH-REOHCLBHSA-N
+Glutamic acid,C5H8NO,N[C@@H](CCC(=O)O)C(=O)O,WHUUTDBJXJRKMK-VKHMYHEASA-N
+Glutamine,C5H9N2O,N=C(O)CC[C@H](N)C(=O)O,ZDXPYRJPNDTMRX-VKHMYHEASA-N
+Glycine,C2H4NO,NCC(=O)O,DHMQDGOQFOQNFH-UHFFFAOYSA-N
+Histidine,C6H8N3O,N[C@@H](Cc1cnc[nH]1)C(=O)O,HNDVDQJCIGZPNO-YFKPBYRVSA-N
+Isoleucine,C6H12NO,CC[C@H](C)[C@H](N)C(=O)O,AGPKZVBTJJNPAG-WHFBIAKZSA-N
+Leucine,C6H12NO,CC(C)C[C@H](N)C(=O)O,ROHFNLRQFUQHCH-YFKPBYRVSA-N
+Lysine,C6H13N2O,NCCCC[C@H](N)C(=O)O,KDXKERNSBIXSRK-YFKPBYRVSA-N
+Methionine,C5H11NO2S,CSCC[C@H](N)C(=O)O,FFEARJCKVFRZRR-BYPYZUCNSA-N
+Phenylalanine,C9H10NO,N[C@@H](Cc1ccccc1)C(=O)O,COLNVLDHVKWLRT-QMMMGPOBSA-N
+Proline,C5H8NO,O=C(O)[C@@H]1CCCN1,ONIBWKKTOPOVIA-BYPYZUCNSA-N
+Serine,C3H6NO,N[C@@H](CO)C(=O)O,MTCFGRXMJLQNBG-REOHCLBHSA-N
+Threonine,C4H8NO,C[C@@H](O)[C@H](N)C(=O)O,AYFVYJQAPQTCCC-GBXIJSLDSA-N
+Tryptophan,C11H11N2O,N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O,QIVBCDIJIAJPQS-VIFPVBQESA-N
+Tyrosine,C9H10NO,N[C@@H](Cc1ccc(O)cc1)C(=O)O,OUYCCCASQSFEME-QMMMGPOBSA-N
+Valine,C5H10NO,CC(C)[C@H](N)C(=O)O,KZSNJWFQEVHDMF-BYPYZUCNSA-N
+Ornithine,C5H11N2O,NCCC[C@H](N)C(=O)O,AHLPHDHHMVZTML-BYPYZUCNSA-N
+Citrulline,C6H12N3O,N=C(O)NCCC[C@H](N)C(=O)O,RHGKLRLOHDJJDR-BYPYZUCNSA-N
+Homocysteine,C4H9NO2S,N[C@@H](CCS)C(=O)O,FFFHZYDWPBMWHY-VKHMYHEASA-N
+S-adenosylmethionine,C15H22N6O5S,C[S](CC[C@H](N)C(=O)O)C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,MEFKEPWMEQBLKI-AIRLBKTGSA-N
+S-adenosylhomocysteine,C14H20N6O5S,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,ZJUKTBDSGOFHSH-WFMPWKQPSA-N
+Formic acid,CHO,O=CO,BDAGIHXWWSANSR-UHFFFAOYSA-N
+Propionic acid,C3H5O,CCC(=O)O,XBDQKXXYIPTUBI-UHFFFAOYSA-N
+Butyric acid,C4H7O,CCCC(=O)O,FERIUCNNQQJTOY-UHFFFAOYSA-N
+Malonic acid,C3H3O,O=C(O)CC(=O)O,OFOBLEOULBTSOW-UHFFFAOYSA-N
+2-Hydroxyglutarate,C5H7O,O=C(O)CCC(O)C(=O)O,HWXBTNAVRSUOJR-UHFFFAOYSA-N
+3-Hydroxybutyrate,C4H10O,CC(O)CC(=O)O,WHBMMWSBFZVSSR-UHFFFAOYSA-M
+Acetoacetate,C4H8O,CC(=O)CC(=O)O,WDJHALXBUFZDSR-UHFFFAOYSA-M
+Beta-hydroxybutyrate,C4H7O,CC(O)CC(=O)O,WHBMMWSBFZVSSR-UHFFFAOYSA-N
+Pyruvic acid,C3H3O,CC(=O)C(=O)O,LCTONWCANYUPML-UHFFFAOYSA-N
+Lactic acid,C3H5O,CC(O)C(=O)O,JVTAAEKCZFNVCJ-UHFFFAOYSA-N
+Myristic acid,C14H27O,CCCCCCCCCCCCCC(=O)O,TUNFSRHWOTWDNC-UHFFFAOYSA-N
+Palmitic acid,C16H31O,CCCCCCCCCCCCCCCC(=O)O,IPCSVZSSVZVIGE-UHFFFAOYSA-N
+Stearic acid,C18H35O,CCCCCCCCCCCCCCCCCC(=O)O,QIQXTHQIDYTFRH-UHFFFAOYSA-N
+Palmitoleic acid,C16H29O,CCCCCC/C=C\CCCCCCCC(=O)O,SECPZKHBENQXJG-FPLPWBNLSA-N
+Oleic acid,C18H33O,CCCCCCCC/C=C\CCCCCCCC(=O)O,ZQPPMHVWECSIRJ-KTKRTIGZSA-N
+Linoleic acid,C18H31O,CCCCC/C=C\C/C=C\CCCCCCCC(=O)O,OYHQOLUKZRVURQ-HZJYTTRNSA-N
+Alpha-linolenic acid,C18H29O,CC/C=C\C/C=C\C/C=C\CCCCCCCC(=O)O,DTOSIQBPPRVQHS-PDBXOOCHSA-N
+Arachidonic acid,C20H31O,CCCCC/C=C\C/C=C\C/C=C\C/C=C\CCCC(=O)O,YZXBAPSDXZZRGB-DOFZRALJSA-N
+Adenine,C5H4N,Nc1nc[nH]c2ncnc1-2,GFFGJBXGBJISGV-UHFFFAOYSA-N
+Guanine,C5H5N5O,N=c1nc(O)c2nc[nH]c2[nH]1,UYTPUPDQBNUYGX-UHFFFAOYSA-N
+Cytosine,C4H5N3O,N=c1ccnc(O)[nH]1,OPTASPLRGRRNAP-UHFFFAOYSA-N
+Thymine,C5H5N2O,Cc1cnc(O)nc1O,RWQNBRDOKXIBIV-UHFFFAOYSA-N
+Uracil,C4H3N2O,Oc1ccnc(O)n1,ISAKRJDGNUQOIC-UHFFFAOYSA-N
+Adenosine,C10H12N5O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,OIRDTQYFTABQOQ-KQYNXXCUSA-N
+Guanosine,C10H12N5O,N=c1nc(O)c2ncn([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O)c2[nH]1,NYHBQMYGNKIUIF-UUOKFMHZSA-N
+Cytidine,C9H12N3O,N=c1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(O)n1,UHDGCWIWMRVCDJ-XVFCMESISA-N
+Uridine,C9H11N2O,O=c1nc(O)ccn1[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,DRTQHJPVMGBUCF-XVFCMESISA-N
+AMP,C10H14N5O7P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O,UDMBCSSLTHHNCD-KQYNXXCUSA-N
+ADP,C10H14N5O10P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,XTWYTFMLZFPYCI-KQYNXXCUSA-N
+ATP,C10H15N5O13P,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,ZKHQWZAMYRWXGA-KQYNXXCUSA-N
+GMP,C10H14N5O8P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,RQFCJASXJCIDSX-UUOKFMHZSA-N
+GDP,C10H14N5O11P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,QGWNDRXFNXRZMB-UUOKFMHZSA-N
+GTP,C10H15N5O14P,N=c1nc(O)c2ncn([C@@H]3O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]3O)c2[nH]1,XKMLYUALXHKNFT-UUOKFMHZSA-N
+CMP,C9H14N3O8P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,IERHLVCPSMICTF-XVFCMESISA-N
+CDP,C9H14N3O11P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,ZWIADYZPOWUWEW-XVFCMESISA-N
+CTP,C9H15N3O14P,N=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(O)n1,PCDQPRRSZKQHHS-XVFCMESISA-N
+UMP,C9H13N2O9P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O,DJJCXFVJDGTHFX-XVFCMESISA-N
+UDP,C9H13N2O12P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,XCCTYIAWTASOJW-XVFCMESISA-N
+UTP,C9H14N2O15P,O=c1nc(O)ccn1[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,PGAVKCOVUIYSFO-XVFCMESISA-N
+NAD+,C21H26N7O14P,N=C(O)C1CCCN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,BAWFJGJZGIEFAR-NNYOXOHSSA-N
+NADH,C21H28N7O14P,N=C(O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,BOPGDPNILDQYTO-NNYOXOHSSA-N
+NADP+,C21H27N7O17P,N=C(O)C1CCCN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,XJLXINKUBYWONI-NNYOXOHSSA-N
+NADPH,C21H29N7O17P,N=C(O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,ACFIXJIJDZMPPO-NNYOXOHSSA-N
+FAD,C27H32N9O15P,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)c2cc1C,VWWQXMAJTJZDQX-UYBVJOGSSA-N
+FMN,C17H21N4O9P,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c2cc1C,FVTCRASFADXXNN-SCRDCRAPSA-N
+Coenzyme A,C21H36N7O16P3S,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(O)=NCCC(O)=NCCS,RGJOEKWQDUBAIZ-IBOSZNHHSA-N
+Pantothenic acid,C9H16NO,CC(C)(CO)[C@@H](O)C(O)=NCCC(=O)O,GHOKWGTUZJEAQD-ZETCQYMHSA-N
+Riboflavin,C17H19N4O,Cc1cc2nc3c(O)nc(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C,AUNGANRZJHBGPY-SCRDCRAPSA-N
+Niacin,C6H4NO,O=C(O)c1cccnc1,PVNIIMVLHYAWGP-UHFFFAOYSA-N
+Fructose,C6H11O,OCC1(O)OC[C@@H](O)[C@@H](O)[C@@H]1O,LKDRXBCSQODPBY-VRPWFDPXSA-N
+Mannose,C6H11O,OC[C@H]1OC(O)[C@@H](O)[C@@H](O)[C@@H]1O,WQZGKKKJIJFFOK-QTVWNMPRSA-N
+Mannose-6-phosphate,C6H13O9P,O=C[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,VFRROHXSMXFLSN-KVTDHHQDSA-N
+Ribose,C5H9O,OC1OC[C@@H](O)[C@@H](O)[C@H]1O,SRBFZHDQGSBBOR-SOOFDHNKSA-N
+Glucosamine,C6H12NO,N[C@H]1C(O)O[C@H](CO)[C@@H](O)[C@@H]1O,MSWZFWKMSRAUBD-IVMDWMLBSA-N
+N-acetylglucosamine,C8H14NO,CC(O)=N[C@@H](C=O)[C@@H](O)[C@H](O)[C@H](O)CO,MBLBDJOUHNCFQT-LXGUWJNJSA-N
+Choline,C5H13NO,[H]OC([H])([H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],OEYIOHPDSNJKLS-UHFFFAOYSA-N
+Betaine,C5H10NO,[H]OC(=O)C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],KWIUHFFTVRNATP-UHFFFAOYSA-N
+Carnitine,C7H14NO,[H]OC(=O)C([H])([H])C([H])(O[H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],PHIQHXFUZVPYII-UHFFFAOYSA-N
+Phosphocholine,C5H14NO4P,[H]OP(=O)(O[H])OC([H])([H])C([H])([H])N(C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H],YHHSONZFOIEMCP-UHFFFAOYSA-O
+Glycerol,C3H7O,OCC(O)CO,PEDCQBHIVMGVHV-UHFFFAOYSA-N
+Sorbitol,C6H13O,OC[C@@H](O)[C@@H](O)[C@H](O)[C@@H](O)CO,FBPFZTCFMRRESA-JGWLITMVSA-N
+Inositol,C6H11O,OC1C(O)C(O)C(O)C(O)C1O,CDAISMWEOUEBRE-UHFFFAOYSA-N
+Cholesterol,C27H46O,CC(C)CCC[C@@H](C)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,HVYWMOMLDIMFJA-DPAQBDIFSA-N
+Pantothenate,C9H21NO,CC(C)(CO)C(O)C(O)=NCCC(=O)O,GHOKWGTUZJEAQD-UHFFFAOYSA-M

masster/data/libs/urine.py ADDED Viewed

@@ -0,0 +1,333 @@
+"""Generate a CSV of human urine metabolites.
+This improved script attempts to:
+- Download or scrape a urine metabolite list from the UrineMetabolome downloads page.
+- Fall back to HMDB scraping or a curated list if needed.
+- Resolve formula/SMILES/InChIKey using PubChem with bounded parallelism.
+- Use RDKit (if available) to convert InChI -> SMILES when PubChem does not provide SMILES.
+The goal is robust coverage and faster lookups by parallelizing per-name queries
+while avoiding aggressive parallelism that might overload PubChem.
+"""
+from __future__ import annotations
+import csv
+import sys
+import re
+import time
+import os
+from urllib.parse import quote, urljoin
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Iterable
+try:
+    import requests
+    from bs4 import BeautifulSoup
+except Exception:
+    requests = None
+    BeautifulSoup = None
+URINEMETABOLOME_DOWNLOADS = "https://www.urinemetabolome.ca/downloads"
+HMDB_URINE_LIST_URL = "https://hmdb.ca/metabolites?utf8=%E2%9C%93&search=&biological_context=Urine"
+def normalize_name(name: str) -> str:
+    if not name:
+        return name
+    s = name
+    s = re.sub(r"\(.*?\)", "", s)
+    s = s.replace("➔", "->").replace("–", "-").replace("—", "-")
+    s = re.sub(r"\s+", " ", s).strip()
+    mapping = {
+        "AMP": "Adenosine monophosphate",
+        "ADP": "Adenosine diphosphate",
+        "ATP": "Adenosine triphosphate",
+        "GMP": "Guanosine monophosphate",
+        "GDP": "Guanosine diphosphate",
+        "GTP": "Guanosine triphosphate",
+        "NAD+": "Nicotinamide adenine dinucleotide",
+        "NADH": "Nicotinamide adenine dinucleotide (reduced)",
+    }
+    up = s.upper()
+    if up in mapping:
+        return mapping[up]
+    return s
+def fetch_urinemetabolome_names(limit: int = 2000) -> list[str]:
+    """Scrape the UrineMetabolome downloads page for any downloadable metabolite lists.
+    Best-effort: finds links on the downloads page that look like CSV/TSV/Excel and tries
+    to parse a simple name column. If anything fails, returns an empty list and the
+    caller should fall back to HMDB or a curated list.
+    """
+    if requests is None or BeautifulSoup is None:
+        return []
+    try:
+        r = requests.get(URINEMETABOLOME_DOWNLOADS, timeout=15)
+        r.raise_for_status()
+        soup = BeautifulSoup(r.text, "html.parser")
+        names = []
+        for a in soup.find_all("a", href=True):
+            href = a["href"]
+            if re.search(r"\.csv$|\.tsv$|\.xlsx?$", href, re.I):
+                url = urljoin(URINEMETABOLOME_DOWNLOADS, href)
+                # try to download and parse simple CSV/TSV
+                try:
+                    rr = requests.get(url, timeout=20)
+                    if rr.status_code != 200:
+                        continue
+                    text = rr.content.decode("utf-8", errors="ignore")
+                    # try CSV/TSV parse by splitting lines and looking for a header with 'name' or 'metabolite'
+                    lines = [l.strip() for l in text.splitlines() if l.strip()]
+                    if not lines:
+                        continue
+                    sep = "," if "," in lines[0] else "\t"
+                    header = [c.strip().lower() for c in lines[0].split(sep)]
+                    # find candidate column
+                    col_idx = None
+                    for i, c in enumerate(header):
+                        if any(k in c for k in ("name", "metabolite", "compound")):
+                            col_idx = i
+                            break
+                    if col_idx is None:
+                        # fallback: take first column
+                        col_idx = 0
+                    for l in lines[1:limit+1]:
+                        parts = [p.strip() for p in l.split(sep)]
+                        if len(parts) > col_idx:
+                            n = parts[col_idx]
+                            if n and n not in names:
+                                names.append(n)
+                        if len(names) >= limit:
+                            break
+                    if names:
+                        return names
+                except Exception:
+                    continue
+        return []
+    except Exception:
+        return []
+def fetch_hmdb_urine_names(limit: int = 500) -> list[str]:
+    """Fallback HMDB scrape (best-effort)."""
+    if requests is None or BeautifulSoup is None:
+        return []
+    try:
+        r = requests.get(HMDB_URINE_LIST_URL, timeout=20)
+        r.raise_for_status()
+        soup = BeautifulSoup(r.text, "html.parser")
+        names = []
+        for a in soup.find_all("a", href=True):
+            href = a["href"]
+            if re.search(r"/metabolites/HMDB", href):
+                text = a.get_text(strip=True)
+                if text and len(text) > 1:
+                    names.append(text)
+                    if len(names) >= limit:
+                        break
+        return list(dict.fromkeys(names))
+    except Exception:
+        return []
+def fetch_pubchem_name_once(name: str, timeout: int = 15):
+    """Fetch properties for a single name from PubChem and try inchI->SMILES if needed.
+    Returns (formula, smiles, inchikey) or (None, None, None) on failure.
+    """
+    if requests is None:
+        return (None, None, None)
+    q = normalize_name(name)
+    url_name = quote(q)
+    url = (
+        f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{url_name}/property/"
+        + "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
+    )
+    try:
+        r = requests.get(url, timeout=timeout)
+        if r.status_code != 200:
+            return (None, None, None)
+        j = r.json()
+        if "PropertyTable" in j and "Properties" in j["PropertyTable"]:
+            p = j["PropertyTable"]["Properties"][0]
+            formula = p.get("MolecularFormula")
+            smiles = p.get("CanonicalSMILES")
+            inchi = p.get("InChI")
+            inchikey = p.get("InChIKey")
+            if not smiles and inchi:
+                # try RDKit conversion
+                try:
+                    from rdkit import Chem
+                    m = Chem.MolFromInchi(inchi)
+                    if m is not None:
+                        try:
+                            Chem.SanitizeMol(m)
+                        except Exception:
+                            pass
+                        smiles = Chem.MolToSmiles(m, isomericSmiles=True)
+                except Exception:
+                    pass
+            # If still missing SMILES, and we have an InChIKey, try inchikey -> property
+            if not smiles and inchikey:
+                try:
+                    ik = quote(inchikey)
+                    url2 = (
+                        f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{ik}/property/"
+                        + "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
+                    )
+                    r2 = requests.get(url2, timeout=timeout)
+                    if r2.status_code == 200:
+                        j2 = r2.json()
+                        if "PropertyTable" in j2 and "Properties" in j2["PropertyTable"]:
+                            p2 = j2["PropertyTable"]["Properties"][0]
+                            smiles = p2.get("CanonicalSMILES") or smiles
+                            formula = formula or p2.get("MolecularFormula")
+                except Exception:
+                    pass
+            # Final fallback: inchikey -> cids -> cid -> property
+            if not smiles and inchikey:
+                try:
+                    ik = quote(inchikey)
+                    urlc = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{ik}/cids/JSON"
+                    rc = requests.get(urlc, timeout=timeout)
+                    if rc.status_code == 200:
+                        jc = rc.json()
+                        if "IdentifierList" in jc and "CID" in jc["IdentifierList"] and jc["IdentifierList"]["CID"]:
+                            cid = jc["IdentifierList"]["CID"][0]
+                            try:
+                                url3 = (
+                                    f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/property/"
+                                    + "MolecularFormula,CanonicalSMILES,InChI,InChIKey/JSON"
+                                )
+                                r3 = requests.get(url3, timeout=timeout)
+                                if r3.status_code == 200:
+                                    j3 = r3.json()
+                                    if "PropertyTable" in j3 and "Properties" in j3["PropertyTable"]:
+                                        p3 = j3["PropertyTable"]["Properties"][0]
+                                        smiles = p3.get("CanonicalSMILES") or smiles
+                                        formula = formula or p3.get("MolecularFormula")
+                            except Exception:
+                                pass
+                except Exception:
+                    pass
+            return (formula, smiles, inchikey)
+    except Exception:
+        return (None, None, None)
+    return (None, None, None)
+def fetch_pubchem_for_names(names: Iterable[str], workers: int = 8, delay: float = 0.05):
+    """Resolve a list of names via PubChem in parallel.
+    Returns dict name -> (formula, smiles, inchikey)
+    """
+    names = list(names)
+    results: dict[str, tuple | None] = {}
+    with ThreadPoolExecutor(max_workers=workers) as ex:
+        futures = {ex.submit(fetch_pubchem_name_once, n): n for n in names}
+        for fut in as_completed(futures):
+            n = futures[fut]
+            try:
+                res = fut.result()
+            except Exception:
+                res = (None, None, None)
+            results[n] = res
+            time.sleep(delay)  # polite small delay between completions
+    return results
+def generate_csv(out_path: str = "urine_metabolites.csv", workers: int = 8):
+    # Try UrineMetabolome downloads first
+    names = fetch_urinemetabolome_names()
+    if not names:
+        names = fetch_hmdb_urine_names()
+    if not names:
+        print("Falling back to curated urine list")
+        names = [
+            "Creatinine",
+            "Urea",
+            "Hippuric acid",
+            "Citrate",
+            "Creatine",
+            "Glycine",
+            "Taurine",
+            "Succinate",
+            "Fumaric acid",
+            "Malic acid",
+            "Lactic acid",
+            "Acetic acid",
+            "Formic acid",
+            "Alanine",
+            "Betaine",
+            "Choline",
+            "Trimethylamine N-oxide",
+            "Phenylacetylglutamine",
+            "p-Cresol sulfate",
+            "Indoxyl sulfate",
+            "Uric acid",
+            "Xanthine",
+            "3-Hydroxybutyrate",
+            "Acetoacetate",
+            "N-Acetylneuraminic acid",
+        ]
+    print(f"Resolving {len(names)} names via PubChem (workers={workers})...")
+    mapping = fetch_pubchem_for_names(names, workers=workers)
+    rows = []
+    for name in names:
+        formula, smiles, inchikey = mapping.get(name, (None, None, None))
+        rows.append({
+            "Name": name,
+            "Formula": formula or "",
+            "SMILES": smiles or "",
+            "InChIKey": inchikey or "",
+        })
+    # Ensure output directory exists
+    out_dir = os.path.join("masster", "data", "libs")
+    os.makedirs(out_dir, exist_ok=True)
+    out_path_full = os.path.join(out_dir, os.path.basename(out_path))
+    fieldnames = ["Name", "Formula", "SMILES", "InChIKey"]
+    with open(out_path_full, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        for r in rows:
+            writer.writerow(r)
+    print(f"Wrote {len(rows)} entries to {out_path_full}")
+    return out_path_full
+def test_load_with_lib(csv_path: str):
+    try:
+        from masster.lib import Lib
+    except Exception as e:
+        print(f"Cannot import masster.lib.Lib: {e}")
+        return False
+    try:
+        lib = Lib()
+        lib.import_csv(csv_path, polarity=None)
+        print(f"Lib loaded: {len(lib)} entries")
+        return True
+    except Exception as e:
+        print(f"Failed to load CSV with Lib.import_csv: {e}")
+        return False
+if __name__ == "__main__":
+    csv_file = generate_csv()
+    ok = test_load_with_lib(csv_file)
+    if not ok:
+        print("Test failed; please inspect messages above.")
+        sys.exit(2)
+    print("Done.")

masster/data/libs/urine_metabolites.csv ADDED Viewed

@@ -0,0 +1,51 @@
+Name,Formula,SMILES,InChIKey
+HMDB0000001,,,
+1-Methylhistidine,C7H11N3O2,Cn1cnc(C[C@H](N)C(=O)O)c1,BRMWTNUJHUMWMS-LURJTMIESA-N
+HMDB0000002,,,
+"1,3-Diaminopropane",C3H10N2,NCCCN,XFNJVJPLKCPIBV-UHFFFAOYSA-N
+HMDB0000005,,,
+2-Ketobutyric acid,C4H6O3,CCC(=O)C(=O)O,TYEYBOSBBBHJIV-UHFFFAOYSA-N
+HMDB0000008,,,
+2-Hydroxybutyric acid,C4H8O3,CCC(O)C(=O)O,AFENDNXGAFYKQO-UHFFFAOYSA-N
+HMDB0000010,,,
+2-Methoxyestrone,,,
+HMDB0000011,,,
+3-Hydroxybutyric acid,,,
+HMDB0000012,,,
+Deoxyuridine,,,
+HMDB0000014,,,
+Deoxycytidine,,,
+HMDB0000015,,,
+Cortexolone,,,
+HMDB0000016,,,
+Deoxycorticosterone,,,
+HMDB0000017,,,
+4-Pyridoxic acid,C8H9NO4,Cc1ncc(CO)c(C(=O)O)c1O,HXACOUQIXZGNBF-UHFFFAOYSA-N
+HMDB0000019,,,
+alpha-Ketoisovaleric acid,C5H8O3,CC(C)C(=O)C(=O)O,QHKABHOOEWYVLI-UHFFFAOYSA-N
+HMDB0000020,,,
+p-Hydroxyphenylacetic acid,C8H8O3,O=C(O)Cc1ccc(O)cc1,XQXPVVBIMDBYFF-UHFFFAOYSA-N
+HMDB0000021,,,
+Iodotyrosine,C9H10INO3,N[C@@H](Cc1ccc(O)c(I)c1)C(=O)O,UQTZMGFTRHFAAM-ZETCQYMHSA-N
+HMDB0000022,,,
+3-Methoxytyramine,C9H13NO2,COc1cc(CCN)ccc1O,DIVQKHQLANKJQO-UHFFFAOYSA-N
+HMDB0000023,,,
+(S)-3-Hydroxyisobutyric acid,,,
+HMDB0000024,,,
+3-O-Sulfogalactosylceramide (d18:1/24:0),,,
+HMDB0000026,,,
+Ureidopropionic acid,,,
+HMDB0000027,,,
+Tetrahydrobiopterin,,,
+HMDB0000030,,,
+Biotin,C10H16N2O3S,O=C(O)CCCC[C@@H]1SC[C@@H]2N=C(O)N[C@@H]21,YBJHBAHKTGYVGT-ZKWXMUAHSA-N
+HMDB0000031,,,
+Androsterone,C19H30O2,C[C@]12CC[C@@H](O)C[C@@H]1CC[C@@H]1[C@@H]2CC[C@]2(C)C(=O)CC[C@@H]12,QGXBDMJGAMFCBF-HLUDHZFRSA-N
+HMDB0000032,,,
+7-Dehydrocholesterol,C27H44O,CC(C)CCC[C@@H](C)[C@H]1CC[C@H]2C3=CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,UCTLRSWJYQTBFZ-DDPQNLDTSA-N
+HMDB0000033,,,
+Carnosine,C9H14N4O3,NCCC(O)=N[C@@H](Cc1cnc[nH]1)C(=O)O,CQOVPNPJLQNMDC-ZETCQYMHSA-N
+HMDB0000034,,,
+Adenine,C5H5N5,Nc1nc[nH]c2ncnc1-2,GFFGJBXGBJISGV-UHFFFAOYSA-N
+HMDB0000036,,,
+Taurocholic acid,,,

masster/sample/h5.py CHANGED Viewed

@@ -900,7 +900,7 @@ def _load_sample5(self, filename: str, map: bool = True):
 def _load_sample5_study(self, filename: str, map: bool = True):
     """
     Optimized variant of _load_sample5 for study loading that skips reading ms1_df.
     This is used when adding samples to studies where ms1_df data is not needed,
     improving loading throughput by skipping the potentially large ms1_df dataset.

masster/sample/helpers.py CHANGED Viewed

@@ -176,7 +176,7 @@ def _get_feature_uids(self, features=None, verbose=True):
                 if not isinstance(features, pd.DataFrame):
                     if verbose:
                         self.logger.error(
-                            "Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame."
+                            "Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame.",
                         )
                     return []
@@ -298,7 +298,7 @@ def get_eic(self, mz, mz_tol=None):
     """
     # Use default mz_tol from sample parameters if not provided
     if mz_tol is None:
-        if hasattr(self, 'parameters') and hasattr(self.parameters, 'eic_mz_tol'):
+        if hasattr(self, "parameters") and hasattr(self.parameters, "eic_mz_tol"):
             mz_tol = self.parameters.eic_mz_tol
         else:
             mz_tol = 0.01  # fallback default
@@ -323,11 +323,7 @@ def get_eic(self, mz, mz_tol=None):
             return None
         # Aggregate intensities per retention time. Use sum in case multiple points per rt.
-        chrom = (
-            matches.group_by("rt")
-            .agg([pl.col("inty").sum().alias("inty")])
-            .sort("rt")
-        )
+        chrom = matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
         # Attach to Sample
         self.chrom_df = chrom

masster/sample/lib.py CHANGED Viewed

@@ -1,28 +1,32 @@
 """
-_lib.py
+lib.py
-This module provides utility functions and algorithms for mass spectrometry data processing.
-It contains core functionality for compound library management, target identification,
-adduct handling, and various analytical operations used throughout the masster package.
+This module provides the Lib class and utility functions for mass spectrometry compound library
+management and feature annotation. It contains core functionality for compound library management,
+target identification, adduct handling, and various analytical operations.
 Key Features:
-- **Compound Libraries**: Load and manage compound databases with metadata.
-- **Adduct Calculations**: Handle various ionization adducts and charge states.
-- **Mass Calculations**: Precise mass calculations with adduct corrections.
-- **Target Matching**: Match detected features against compound libraries.
-- **Polarity Handling**: Support for positive and negative ionization modes.
-- **Database Integration**: Interface with various compound database formats.
+- **Lib Class**: Main class for managing compound libraries and annotations
+- **Compound Libraries**: Load and manage compound databases with metadata
+- **Adduct Calculations**: Handle various ionization adducts and charge states
+- **Mass Calculations**: Precise mass calculations with adduct corrections
+- **Target Matching**: Match detected features against compound libraries
+- **Polarity Handling**: Support for positive and negative ionization modes
+- **Database Integration**: Interface with various compound database formats
 Dependencies:
-- `pyopenms`: For mass spectrometry algorithms and data structures.
-- `polars` and `pandas`: For efficient data manipulation and analysis.
-- `numpy`: For numerical computations and array operations.
-- `tqdm`: For progress tracking during batch operations.
+- `pyopenms`: For mass spectrometry algorithms and data structures
+- `polars` and `pandas`: For efficient data manipulation and analysis
+- `numpy`: For numerical computations and array operations
+- `tqdm`: For progress tracking during batch operations
+Classes:
+- `Lib`: Main class for compound library management and annotation
 Functions:
-- `lib_load()`: Load compound libraries from CSV files.
-- `load_lib()`: Alias for lib_load function.
-- Various utility functions for mass calculations and library management.
+- `lib_load()`: Load compound libraries from CSV files (legacy)
+- `load_lib()`: Alias for lib_load function (legacy)
+- Various utility functions for mass calculations and library management
 Supported Adducts:
 - Positive mode: [M+H]+, [M+Na]+, [M+K]+, [M+NH4]+, [M-H2O+H]+
@@ -30,19 +34,22 @@ Supported Adducts:
 Example Usage:
 ```python
-from _lib import lib_load
+from masster.sample.lib import Lib
+# Create library instance
+lib = Lib()
-# Load compound library
-lib_load(self, csvfile="compounds.csv", polarity="positive")
+# Import compounds from CSV
+lib.import_csv("compounds.csv", polarity="positive")
-# Access loaded library data
-print(f"Loaded {len(self.lib_df)} compounds")
-print(self.lib_df.head())
+# Access library data
+print(f"Loaded {len(lib.lib_df)} compounds")
+print(lib.lib_df.head())
 ```
 See Also:
-- `parameters._lib_parameters`: For library-specific parameter configuration.
-- `single.py`: For applying library matching to detected features.
+- `parameters._lib_parameters`: For library-specific parameter configuration
+- `sample.py`: For applying library matching to detected features
 """

masster/sample/load.py CHANGED Viewed

@@ -119,7 +119,7 @@ def load_study(
 ):
     """
     Optimized load method for study use that skips loading ms1_df for better performance.
     This method is identical to load() but uses _load_sample5_study() for .sample5 files,
     which skips reading the potentially large ms1_df dataset to improve throughput when
     adding samples to studies.
@@ -250,7 +250,13 @@ def _load_mzML(
             precursorIsolationWindowLowerMZ = s.getPrecursors()[0].getIsolationWindowLowerOffset()
             precursorIsolationWindowUpperMZ = s.getPrecursors()[0].getIsolationWindowUpperOffset()
             prec_intyensity = s.getPrecursors()[0].getIntensity()
-            energy = s.getPrecursors()[0].getActivationEnergy()
+            # Try to get collision energy from meta values first, fallback to getActivationEnergy()
+            try:
+                energy = s.getPrecursors()[0].getMetaValue('collision energy')
+                if energy is None or energy == 0.0:
+                    energy = s.getPrecursors()[0].getActivationEnergy()
+            except Exception:
+                energy = s.getPrecursors()[0].getActivationEnergy()
         peaks = s.get_peaks()
         spect = Spectrum(mz=peaks[0], inty=peaks[1], ms_level=s.getMSLevel())
@@ -983,7 +989,7 @@ def index_file(self):
             self.set_source(self.file_source.replace(".sample5", ".mzml"))
         else:
             raise FileNotFoundError(
-                f"File {self.file_source} not found. Did the path change? Consider running source()."
+                f"File {self.file_source} not found. Did the path change? Consider running source().",
             )
         self.index_file()
     else:

masster 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl

Potentially problematic release.

masster 0.3.18py3-none-any.whl → 0.3.20py3-none-any.whl