@datagrok/bio 2.12.21 → 2.12.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,51 +1,51 @@
1
- HELM,Activity
2
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Aze.dV.E.N.dV.Phe_4Me}$$$$,2.1058521
3
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.meM.D-Chg.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4416509
4
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.D-Cit.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0234375
5
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Dsu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0660219
6
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Tyr_tBu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.6578050
7
- PEPTIDE1{aHyp.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.9036875
8
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.A.D-aThr.Phe_4Me}$$$$,3.8863654
9
- PEPTIDE1{meI.hHis.Aca.N.T.Tyr_Me.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,4.6697459
10
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.A.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,2.7280300
11
- "PEPTIDE1{meI.Pip.dK.Thr_PO3H2.[L-hArg(Et,Et)].D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$",4.4462886
12
- PEPTIDE1{meI.hHis.D-Hyp.N.T.dK.Thr_PO3H2.Trp_Ome.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.3900189
13
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Phe_3Cl.meK.Phe_4Me}$$$$,3.6875632
14
- PEPTIDE1{Gly_allyl.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$,6.1076937
15
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Nle.D-aThr.Phe_4Me}$$$$,3.2512414
16
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.dF.Phe_4Me}$$$$,7.2294617
17
- PEPTIDE1{meI.hHis.Aca.N.T.D-Orn.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,0.6217819
18
- PEPTIDE1{meI.Pip.dK.Thr_PO3H2.D-Thz.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.D-Thz.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4476070
19
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Nle.aIle.Phe_4Me}$$$$,4.9557114
20
- PEPTIDE1{meY.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,1.3188239
21
- PEPTIDE1{meI.Aca.N.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.6897125
22
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.hHis.D-aThr.Phe_4Me}$$$$,4.0970631
23
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.3Pal.D-aThr.Phe_4Me}$$$$,2.7782860
24
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.D-aThr.Phe_4Me}$$$$,4.9825664
25
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Nle.Phe_4Me}$$$$,4.0829563
26
- PEPTIDE1{D-Nva.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.7203233
27
- PEPTIDE1{meI.Thr_PO3H2.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Cys_SEt.N.Phe_3Cl.aIle.Phe_4Me}$$$$,0.7954721
28
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.seC.Tyr_ab-dehydroMe.meN.E.N.dV.Phe_4Me}$$$$,5.0775967
29
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.3Pal.D-aThr.Phe_4Me}$$$$,4.1724143
30
- PEPTIDE1{D-Tic.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.dV.Phe_4Me}$$$$,3.1429222
31
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.dV.meF}$$$$,3.1327622
32
- PEPTIDE1{meI.Aca.N.T.Ser_PO3H2.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,3.8640671
33
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.H.P.Phe_4Me}$$$$,4.1827374
34
- PEPTIDE1{Phe_4Sdihydroorotamido.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.6165285
35
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Thz.Phe_4Me}$$$$,3.2189791
36
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.dV.E.N.H.D-aThr.Phe_4Me}$$$$,1.0362210
37
- PEPTIDE1{meI.Aca.N.T.D-1Nal.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,3.8830254
38
- PEPTIDE1{meI.Aca.N.T.meV.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,5.1701312
39
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Br.Phe_4Me}$$$$,3.1820068
40
- PEPTIDE1{meI.Aca.N.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,4.4652672
41
- PEPTIDE1{meI.Aca.Q.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,2.8669512
42
- PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.aMePhe.Phe_4Me}$$$$,3.2571971
43
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.V.Phe_4Me}$$$$,4.4447875
44
- PEPTIDE1{meI.Aca.Aca.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.Phe_4Me}$$$$,2.4899697
45
- PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cya.N.F.Phe_4Me}$$$$,0.3957288
46
- PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.F.aIle.Phe_4Me}$$$$,2.9058776
47
- PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.dV.Phe_4Me}$$$$,2.1254258
48
- PEPTIDE1{meI.Bux.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.Bmt.Phe_4Me}$$$$,1.7159123
49
- PEPTIDE1{D-Tyr_Et.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,1.5285099
50
- PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.dP.Tyr_ab-dehydroMe.dV.E.N.Bmt.Phe_4Me}$$$$,3.9470999
51
- PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.Bmt.Phe_4Me}$$$$,3.7495575
1
+ DBID,HELM,Activity,Cluster
2
+ DBID55,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Aze.dV.E.N.dV.Phe_4Me}$$$$,2.1058521,1
3
+ DBID83,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.meM.D-Chg.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4416509,3
4
+ DBID02,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.D-Cit.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0234375,1
5
+ DBID05,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Dsu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0660219,2
6
+ DBID34,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Tyr_tBu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.6578050,4
7
+ DBID02,PEPTIDE1{aHyp.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.9036875,3
8
+ DBID64,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.A.D-aThr.Phe_4Me}$$$$,3.8863654,2
9
+ DBID75,PEPTIDE1{meI.hHis.Aca.N.T.Tyr_Me.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,4.6697459,2
10
+ DBID74,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.A.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,2.7280300,4
11
+ DBID60,"PEPTIDE1{meI.Pip.dK.Thr_PO3H2.[L-hArg(Et,Et)].D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$",4.4462886,2
12
+ DBID96,PEPTIDE1{meI.hHis.D-Hyp.N.T.dK.Thr_PO3H2.Trp_Ome.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.3900189,4
13
+ DBID56,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Phe_3Cl.meK.Phe_4Me}$$$$,3.6875632,4
14
+ DBID65,PEPTIDE1{Gly_allyl.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$,6.1076937,1
15
+ DBID45,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Nle.D-aThr.Phe_4Me}$$$$,3.2512414,0
16
+ DBID13,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.dF.Phe_4Me}$$$$,7.2294617,0
17
+ DBID94,PEPTIDE1{meI.hHis.Aca.N.T.D-Orn.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,0.6217819,3
18
+ DBID79,PEPTIDE1{meI.Pip.dK.Thr_PO3H2.D-Thz.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.D-Thz.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4476070,1
19
+ DBID21,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Nle.aIle.Phe_4Me}$$$$,4.9557114,1
20
+ DBID10,PEPTIDE1{meY.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,1.3188239,1
21
+ DBID86,PEPTIDE1{meI.Aca.N.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.6897125,2
22
+ DBID52,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.hHis.D-aThr.Phe_4Me}$$$$,4.0970631,2
23
+ DBID27,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.3Pal.D-aThr.Phe_4Me}$$$$,2.7782860,3
24
+ DBID98,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.D-aThr.Phe_4Me}$$$$,4.9825664,2
25
+ DBID94,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Nle.Phe_4Me}$$$$,4.0829563,2
26
+ DBID86,PEPTIDE1{D-Nva.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.7203233,3
27
+ DBID48,PEPTIDE1{meI.Thr_PO3H2.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Cys_SEt.N.Phe_3Cl.aIle.Phe_4Me}$$$$,0.7954721,1
28
+ DBID22,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.seC.Tyr_ab-dehydroMe.meN.E.N.dV.Phe_4Me}$$$$,5.0775967,1
29
+ DBID54,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.3Pal.D-aThr.Phe_4Me}$$$$,4.1724143,3
30
+ DBID17,PEPTIDE1{D-Tic.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.dV.Phe_4Me}$$$$,3.1429222,3
31
+ DBID76,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.dV.meF}$$$$,3.1327622,0
32
+ DBID20,PEPTIDE1{meI.Aca.N.T.Ser_PO3H2.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,3.8640671,1
33
+ DBID01,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.H.P.Phe_4Me}$$$$,4.1827374,3
34
+ DBID93,PEPTIDE1{Phe_4Sdihydroorotamido.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.6165285,1
35
+ DBID89,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Thz.Phe_4Me}$$$$,3.2189791,0
36
+ DBID30,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.dV.E.N.H.D-aThr.Phe_4Me}$$$$,1.0362210,3
37
+ DBID07,PEPTIDE1{meI.Aca.N.T.D-1Nal.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,3.8830254,4
38
+ DBID51,PEPTIDE1{meI.Aca.N.T.meV.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,5.1701312,2
39
+ DBID95,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Br.Phe_4Me}$$$$,3.1820068,1
40
+ DBID47,PEPTIDE1{meI.Aca.N.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,4.4652672,1
41
+ DBID16,PEPTIDE1{meI.Aca.Q.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,2.8669512,4
42
+ DBID52,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.aMePhe.Phe_4Me}$$$$,3.2571971,1
43
+ DBID41,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.V.Phe_4Me}$$$$,4.4447875,0
44
+ DBID78,PEPTIDE1{meI.Aca.Aca.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.Phe_4Me}$$$$,2.4899697,4
45
+ DBID95,PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cya.N.F.Phe_4Me}$$$$,0.3957288,4
46
+ DBID92,PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.F.aIle.Phe_4Me}$$$$,2.9058776,3
47
+ DBID88,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.dV.Phe_4Me}$$$$,2.1254258,0
48
+ DBID10,PEPTIDE1{meI.Bux.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.Bmt.Phe_4Me}$$$$,1.7159123,0
49
+ DBID86,PEPTIDE1{D-Tyr_Et.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,1.5285099,0
50
+ DBID43,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.dP.Tyr_ab-dehydroMe.dV.E.N.Bmt.Phe_4Me}$$$$,3.9470999,3
51
+ DBID26,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.Bmt.Phe_4Me}$$$$,3.7495575,4
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Aleksandr Tanas",
6
6
  "email": "atanas@datagrok.ai"
7
7
  },
8
- "version": "2.12.21",
8
+ "version": "2.12.23",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.41.6",
37
+ "@datagrok-libraries/bio": "^5.41.8",
38
38
  "@datagrok-libraries/chem-meta": "^1.2.5",
39
39
  "@datagrok-libraries/math": "^1.1.5",
40
40
  "@datagrok-libraries/ml": "^6.6.5",
package/src/package.ts CHANGED
@@ -49,7 +49,8 @@ import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-c
49
49
  import {demoBio03UI} from './demo/bio03-atomic-level';
50
50
  import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
51
51
  import {checkInputColumnUI} from './utils/check-input-column';
52
- import {MsaWarning, multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
52
+ import {MsaWarning} from './utils/multiple-sequence-alignment';
53
+ import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
53
54
  import {WebLogoApp} from './apps/web-logo-app';
54
55
  import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
55
56
  import {splitToMonomersUI} from './utils/split-to-monomers';
@@ -560,7 +561,8 @@ export async function helmPreprocessingFunction(
560
561
  export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Column,
561
562
  methodName: DimReductionMethods, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames,
562
563
  plotEmbeddings: boolean, preprocessingFunction?: DG.Func, options?: (IUMAPOptions | ITSNEOptions) & Options,
563
- clusterEmbeddings?: boolean): Promise<DG.ScatterPlotViewer | undefined> {
564
+ clusterEmbeddings?: boolean
565
+ ): Promise<DG.ScatterPlotViewer | undefined> {
564
566
  if (!checkInputColumnUI(molecules, 'Sequence Space'))
565
567
  return;
566
568
  if (!preprocessingFunction)
@@ -600,12 +602,15 @@ export async function toAtomicLevel(table: DG.DataFrame, seqCol: DG.Column, nonl
600
602
  export function multipleSequenceAlignmentDialog(): void {
601
603
  multipleSequenceAlignmentUI()
602
604
  .catch((err: any) => {
603
- const [errMsg, _errStack] = errInfo(err);
605
+ const [errMsg, errStack] = errInfo(err);
604
606
  if (err instanceof MsaWarning) {
607
+ grok.shell.warning((err as MsaWarning).element);
605
608
  _package.logger.warning(errMsg);
606
609
  return;
607
610
  }
608
- throw err;
611
+ grok.shell.error(errMsg);
612
+ _package.logger.error(errMsg, undefined, errStack);
613
+ // throw err; // This error throw is not handled
609
614
  });
610
615
  }
611
616
 
@@ -2,13 +2,11 @@ import * as DG from 'datagrok-api/dg';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
 
4
4
  import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
5
+ import {NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
+ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
5
7
 
6
8
  import {ConverterFunc} from './types';
7
- import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
- import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
9
9
 
10
- // import {mmSemType} from '../const';
11
- // import {importFasta} from '../package';
12
10
 
13
11
  category('converters', () => {
14
12
  enum Samples {
@@ -116,6 +114,8 @@ RNA1{p.p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r(U)p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r
116
114
  RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p.p}$$$$`,
117
115
  };
118
116
 
117
+ const bioTagsSet = new Set<string>(Object.values(bioTAGS));
118
+
119
119
  /** Also detects semantic types
120
120
  * @param {string} key
121
121
  * @return {Promise<DG.DataFrame>}
@@ -152,7 +152,19 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
152
152
  const tgtCol: DG.Column = tgtDf.getCol('seq');
153
153
 
154
154
  expectArray(resCol.toList(), tgtCol.toList());
155
- const _sh: SeqHandler = SeqHandler.forColumn(resCol);
155
+ const srcSh: SeqHandler = SeqHandler.forColumn(srcCol);
156
+ const resSh: SeqHandler = SeqHandler.forColumn(resCol);
157
+ for (const [tagName, tgtTagValue] of Object.entries(tgtCol.tags)) {
158
+ if (
159
+ !bioTagsSet.has(tagName) ||
160
+ (srcSh.notation === NOTATION.HELM && [bioTAGS.alphabet, bioTAGS.alphabetIsMultichar].includes(tagName as bioTAGS)) ||
161
+ (resSh.notation === NOTATION.HELM && [bioTAGS.alphabet, bioTAGS.alphabetIsMultichar].includes(tagName as bioTAGS))
162
+ ) continue;
163
+
164
+ const resTagValue = resCol.getTag(tagName);
165
+ expect(resTagValue, tgtTagValue,
166
+ `Tag '${tagName}' expected value '${tgtTagValue}' is not equal to actual '${resTagValue}'.`);
167
+ }
156
168
  }
157
169
 
158
170
  // FASTA tests
@@ -48,13 +48,17 @@ category('renderers', () => {
48
48
  await _testAfterConvert();
49
49
  });
50
50
 
51
+ test('afterConvertToHelm', async () => {
52
+ await _testAfterConvertToHelm();
53
+ });
54
+
51
55
  test('selectRendererBySemType', async () => {
52
56
  await _selectRendererBySemType();
53
57
  });
54
58
 
55
59
  test('scatterPlotTooltip', async () => {
56
60
  await _testScatterPlotTooltip();
57
- }, {skipReason: 'GROK-15679'});
61
+ });
58
62
 
59
63
  async function _rendererMacromoleculeFasta() {
60
64
  const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');
@@ -162,7 +166,7 @@ category('renderers', () => {
162
166
  const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA_PT.csv');
163
167
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
164
168
 
165
- const srcCol: DG.Column = df.col('sequence')!;
169
+ const srcCol: DG.Column = df.getCol('sequence')!;
166
170
  const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
167
171
  if (semType)
168
172
  srcCol.semType = semType;
@@ -182,6 +186,19 @@ category('renderers', () => {
182
186
  const _sh: SeqHandler = SeqHandler.forColumn(tgtCol);
183
187
  }
184
188
 
189
+ async function _testAfterConvertToHelm() {
190
+ const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/FASTA_PT.csv');
191
+ const view = grok.shell.addTableView(df);
192
+ await awaitGrid(view.grid);
193
+
194
+ const srcCol = df.getCol('sequence');
195
+ const sh = SeqHandler.forColumn(srcCol);
196
+ const tgtCol = sh.convert(NOTATION.HELM);
197
+ df.columns.add(tgtCol);
198
+ await awaitGrid(view.grid);
199
+ expect(tgtCol.getTag(DG.TAGS.CELL_RENDERER), 'helm');
200
+ }
201
+
185
202
  async function _selectRendererBySemType() {
186
203
  /* There are renderers for semType Macromolecule and MacromoleculeDifference.
187
204
  Misbehavior was by selecting Macromolecule renderers for MacromoleculeDifference semType column
@@ -100,6 +100,7 @@ category('toAtomicLevel', async () => {
100
100
  fastaDna = 'fastaDna',
101
101
  fastaRna = 'fastaRna',
102
102
  fastaPt = 'fastaPt',
103
+ fastaUn = 'fastaUn',
103
104
 
104
105
  separatorDna = 'separatorDna',
105
106
  separatorRna = 'separatorRna',
@@ -122,6 +123,10 @@ UUCAACUUCAAC`,
122
123
  FWPHEYFWPHEY
123
124
  YNRQWYVYNRQWYV
124
125
  MKPSEYVMKPSEYV`,
126
+ [csvTests.fastaUn]: `seq
127
+ [meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
128
+ [meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
129
+ [Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]`,
125
130
  [csvTests.separatorDna]: `seq
126
131
  A/C/G/T/C/A/C/G/T/C
127
132
  C/A/G/T/G/T/C/A/G/T/G/T
@@ -169,6 +174,10 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
169
174
  await _testToAtomicLevel(await readCsv(csvTests.fastaPt), 'seq', monomerLibHelper);
170
175
  });
171
176
 
177
+ test('fastaUn', async () => {
178
+ await _testToAtomicLevel(await readCsv(csvTests.fastaUn), 'seq', monomerLibHelper);
179
+ });
180
+
172
181
  test('separatorDna', async () => {
173
182
  await _testToAtomicLevel(await readCsv(csvTests.separatorDna), 'seq', monomerLibHelper);
174
183
  });
@@ -18,10 +18,10 @@ let convertDialogSubs: Subscription[] = [];
18
18
  * @param {DG.column} col Column with 'Macromolecule' semantic type
19
19
  */
20
20
  export function convert(col?: DG.Column): void {
21
- let tgtCol = col ?? grok.shell.t.columns.bySemType('Macromolecule')!;
22
- if (!tgtCol)
21
+ let srcCol = col ?? grok.shell.t.columns.bySemType('Macromolecule')!;
22
+ if (!srcCol)
23
23
  throw new Error('No column with Macromolecule semantic type found');
24
- let converterSh = SeqHandler.forColumn(tgtCol);
24
+ let converterSh = SeqHandler.forColumn(srcCol);
25
25
  let currentNotation: NOTATION = converterSh.notation;
26
26
  const dialogHeader = ui.divText(
27
27
  'Current notation: ' + currentNotation,
@@ -41,12 +41,12 @@ export function convert(col?: DG.Column): void {
41
41
  ];
42
42
  const toggleColumn = (newCol: DG.Column) => {
43
43
  if (newCol.semType !== DG.SEMTYPE.MACROMOLECULE) {
44
- targetColumnInput.value = tgtCol;
44
+ targetColumnInput.value = srcCol;
45
45
  return;
46
46
  }
47
47
 
48
- tgtCol = newCol;
49
- converterSh = SeqHandler.forColumn(tgtCol);
48
+ srcCol = newCol;
49
+ converterSh = SeqHandler.forColumn(srcCol);
50
50
  currentNotation = converterSh.notation;
51
51
  if (currentNotation === NOTATION.HELM)
52
52
  separatorInput.value = '/'; // helm monomers can have - in the name like D-aThr;
@@ -63,7 +63,7 @@ export function convert(col?: DG.Column): void {
63
63
  ]));
64
64
  };
65
65
 
66
- const targetColumnInput = ui.columnInput('Column', grok.shell.t, tgtCol, toggleColumn);
66
+ const targetColumnInput = ui.columnInput('Column', grok.shell.t, srcCol, toggleColumn);
67
67
 
68
68
  const separatorArray = ['-', '.', '/'];
69
69
  let filteredNotations = notations.filter((e) => e !== currentNotation);
@@ -96,9 +96,9 @@ export function convert(col?: DG.Column): void {
96
96
  ]))
97
97
  .onOK(async () => {
98
98
  const targetNotation = targetNotationInput.value as NOTATION;
99
- const separator: string | undefined = separatorInput.value ?? undefined;
99
+ const separator: string | undefined = targetNotation === NOTATION.SEPARATOR ? separatorInput.value! : undefined;
100
100
 
101
- await convertDo(tgtCol, targetNotation, separator);
101
+ await convertDo(srcCol, targetNotation, separator);
102
102
  })
103
103
  .show({x: 350, y: 100});
104
104
 
@@ -34,5 +34,14 @@ export class MolfileAtomsV3K extends MolfileAtoms {
34
34
  }).replace(rGroupsRegex, '');
35
35
  });
36
36
  }
37
+
38
+ replaceRGroupSymbolByElement(atomIdx: number, newElementSymbol: string): void {
39
+ super.replaceRGroupSymbolByElement(atomIdx, newElementSymbol);
40
+ // rdkit can generate (out of thin air) masses for r groups, so we need to remove them as well.
41
+ //they are at the end of the line after coordinates and other data
42
+ const lineInfo = this.rawAtomLines[atomIdx].substring(3).split(' ');
43
+ if (lineInfo.length > 7)
44
+ this.rawAtomLines[atomIdx] = `M ${lineInfo.slice(0, 7).join(' ')}`;
45
+ }
37
46
  }
38
47
 
@@ -47,14 +47,17 @@ export class SimplePolymer {
47
47
  const monomerList: string[] = [];
48
48
  const monomerTypeList: HELM_MONOMER_TYPE[] = [];
49
49
  monomerGroups.forEach((monomerGroup) => {
50
- const splitted = monomerGroup.split(/\(|\)/)
51
- .map((el) => el.replace(/[\[\]]/g, ''));
52
- monomerList.push(...splitted);
50
+ // const splitted = monomerGroup.split(/\(|\)/).map((el) => el.replace(/[\[\]]/g, ''));
51
+ // monomerList.push(...splitted);
53
52
  // WARNING: only the groups of the form r(A)p, as in RNA, are supported
54
- const monomerTypes = splitted.map(
55
- (_, idx) => (idx % 2 === 0) ? HELM_MONOMER_TYPE.BACKBONE : HELM_MONOMER_TYPE.BRANCH
56
- );
57
- monomerTypeList.push(...monomerTypes);
53
+
54
+ monomerList.push(monomerGroup);
55
+ // const monomerTypes = splitted.map(
56
+ // (_, idx) => (idx % 2 === 0) ? HELM_MONOMER_TYPE.BACKBONE : HELM_MONOMER_TYPE.BRANCH
57
+ // );
58
+
59
+ // monomerTypeList.push(...monomerTypes);
60
+ monomerTypeList.push(HELM_MONOMER_TYPE.BACKBONE);
58
61
  });
59
62
  return {monomers: monomerList, monomerTypes: monomerTypeList};
60
63
  }
@@ -7,7 +7,7 @@ import {delay} from '@datagrok-libraries/utils/src/test';
7
7
  import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
8
  import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
9
9
 
10
- import {runKalign} from './multiple-sequence-alignment';
10
+ import {MsaWarning, runKalign} from './multiple-sequence-alignment';
11
11
  import {pepseaMethods, runPepsea} from './pepsea';
12
12
  import {checkInputColumnUI} from './check-input-column';
13
13
  import {multipleSequenceAlginmentUIOptions} from './types';
@@ -18,12 +18,6 @@ import {_package} from '../package';
18
18
 
19
19
  import '../../css/msa.css';
20
20
 
21
- export class MsaWarning extends Error {
22
- constructor(message: string, options?: ErrorOptions) {
23
- super(message, options);
24
- }
25
- }
26
-
27
21
  export async function multipleSequenceAlignmentUI(
28
22
  options: multipleSequenceAlginmentUIOptions = {},
29
23
  ): Promise<DG.Column> {
@@ -37,9 +31,9 @@ export async function multipleSequenceAlignmentUI(
37
31
  const table = options.col?.dataFrame ?? grok.shell.t;
38
32
  const seqCol = options.col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
39
33
  if (seqCol == null) {
40
- const errMsg: string = `Multiple sequence analysis requires a dataset with a macromolecule column.`;
34
+ const errMsg: string = `Multiple Sequence Alignment analysis requires a dataset with a macromolecule column.`;
41
35
  grok.shell.warning(errMsg);
42
- reject(new MsaWarning(errMsg));
36
+ reject(new MsaWarning(ui.divText(errMsg)));
43
37
  return; // Prevents creating the MSA dialog
44
38
  }
45
39
 
@@ -145,8 +139,6 @@ async function onDialogOk(
145
139
 
146
140
  resolve(msaCol);
147
141
  } catch (err: any) {
148
- const errMsg: string = err instanceof Error ? err.message : err.toString();
149
- grok.shell.error(errMsg);
150
142
  reject(err);
151
143
  } finally {
152
144
  pi.close();
@@ -1,6 +1,9 @@
1
- /* Do not change these import lines to match external modules in webpack configuration */
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
2
3
  import * as DG from 'datagrok-api/dg';
3
4
 
5
+ import wu from 'wu';
6
+
4
7
  import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
5
8
  import {ALIGNMENT, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
9
  //@ts-ignore: there are no types for this library
@@ -8,9 +11,17 @@ import Aioli from '@biowasm/aioli';
8
11
 
9
12
  import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
10
13
  import {kalignVersion} from './constants';
14
+
11
15
  const fastaInputFilename = 'input.fa';
12
16
  const fastaOutputFilename = 'result.fasta';
13
17
 
18
+ export class MsaWarning extends Error {
19
+ constructor(
20
+ public readonly element: HTMLElement, options?: ErrorOptions) {
21
+ super(element.innerText, options);
22
+ }
23
+ }
24
+
14
25
  /**
15
26
  * Converts array of sequences into simple fasta string.
16
27
  *
@@ -57,6 +68,7 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
57
68
  (fastaSequences[clusterCategoryIdx] ??= []).push(sequences[rowIdx]);
58
69
  (clusterIndexes[clusterCategoryIdx] ??= []).push(rowIdx);
59
70
  }
71
+ checkForSingleSeqClusters(clusterIndexes, clustersColCategories);
60
72
 
61
73
  const CLI = await new Aioli([
62
74
  'base/1.0.0',
@@ -78,8 +90,10 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
78
90
  console.warn(output);
79
91
 
80
92
  const buf = await CLI.cat(fastaOutputFilename);
81
- if (!buf)
82
- throw new Error(`kalign output no result`);
93
+ if (!buf) {
94
+ const errStr = parseKalignError(output, 1);
95
+ throw new Error(errStr);
96
+ }
83
97
 
84
98
  const ffh = new FastaFileHandler(buf);
85
99
  const aligned = ffh.sequencesArray; // array of sequences extracted from FASTA
@@ -115,3 +129,36 @@ export async function testMSAEnoughMemory(col: DG.Column<string>): Promise<void>
115
129
  }
116
130
  }
117
131
  }
132
+
133
+ function parseKalignError(out: string, limit?: number): string {
134
+ const errLineList: string[] = [];
135
+ const errLineRe = /^.+ERROR : (.+)$/gm;
136
+ let ma: RegExpExecArray | null;
137
+ while ((ma = errLineRe.exec(out)) != null && (limit === undefined || errLineList.length < limit)) {
138
+ //
139
+ errLineList.push(ma[1]);
140
+ }
141
+ return errLineList.join('\n');
142
+ }
143
+
144
+ /** */
145
+ export function checkForSingleSeqClusters(clusterIndexes: number[][], clustersColCategories: string[]): void {
146
+ const singleSeqClusterIdxList = clusterIndexes
147
+ .map<[number[], number]>((idxs: number[], clusterI: number) => { return [idxs, clusterI]; })
148
+ .filter(([idxs, _clusterIdx]) => idxs.length == 1)
149
+ .map(([_idxs, clusterIdx]) => clusterIdx);
150
+ if (singleSeqClusterIdxList.length > 0) {
151
+ const errEl = ui.div([
152
+ ui.divText(`MSA analysis is not available on single sequence clusters ` +
153
+ `#${singleSeqClusterIdxList.length}:`),
154
+ ...wu(singleSeqClusterIdxList).take(3)
155
+ .map((clusterIdx) => {
156
+ let clusterName = clustersColCategories[clusterIdx];
157
+ if (clusterName.length > 25) clusterName = clusterName.slice(0, 25) + '...';
158
+ return ui.divText(`"${clusterName}"${clusterIdx < singleSeqClusterIdxList.length - 1 ? ', ' : '.'}`);
159
+ }).toArray(),
160
+ ...singleSeqClusterIdxList.length > 3 ? [ui.divText('...')] : []
161
+ ]);
162
+ throw new MsaWarning(errEl);
163
+ }
164
+ }
@@ -1,5 +1,6 @@
1
1
  /* Do not change these import lines to match external modules in webpack configuration */
2
2
  import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
3
4
  import * as DG from 'datagrok-api/dg';
4
5
 
5
6
  import {Subject} from 'rxjs';
@@ -8,6 +9,7 @@ import {testEvent} from '@datagrok-libraries/utils/src/test';
8
9
  import {NOTATION, TAGS as bioTAGS, ALIGNMENT, ALPHABET} from '@datagrok-libraries/bio/src/utils/macromolecule';
9
10
  import {ILogger} from '@datagrok-libraries/bio/src/utils/logger';
10
11
 
12
+ import {checkForSingleSeqClusters} from './multiple-sequence-alignment';
11
13
  import * as C from './constants';
12
14
 
13
15
  import {_package} from '../package';
@@ -54,20 +56,26 @@ export async function runPepsea(srcCol: DG.Column<string>, unUsedName: string,
54
56
  if (clustersCol.type != DG.COLUMN_TYPE.STRING)
55
57
  clustersCol = clustersCol.convertTo(DG.TYPE.STRING);
56
58
 
57
- const clusters = clustersCol.categories;
58
- const bodies: PepseaBodyUnit[][] = new Array(clusters.length);
59
+ const clustersColCategories = clustersCol.categories;
60
+ const clustersColData = clustersCol.getRawData();
61
+ const bodies: PepseaBodyUnit[][] = new Array(clustersColCategories.length);
62
+ const clusterIndexes: number[][] = new Array(clustersColCategories.length);
59
63
 
60
64
  // Grouping data by clusters
61
65
  for (let rowIndex = 0; rowIndex < peptideCount; ++rowIndex) {
62
- const cluster = clustersCol.get(rowIndex) as string;
66
+ const clusterCategoryIdx = clustersColData[rowIndex];
67
+ const cluster = clustersColCategories[clusterCategoryIdx];
63
68
  if (cluster === '')
64
69
  continue;
65
70
 
66
- const clusterId = clusters.indexOf(cluster);
71
+ const clusterId = clustersColCategories.indexOf(cluster);
67
72
  const helmSeq = srcCol.get(rowIndex);
68
- if (helmSeq)
73
+ if (helmSeq) {
69
74
  (bodies[clusterId] ??= []).push({ID: rowIndex.toString(), HELM: helmSeq});
75
+ (clusterIndexes[clusterCategoryIdx] ??= []).push(rowIndex);
76
+ }
70
77
  }
78
+ checkForSingleSeqClusters(clusterIndexes, clustersColCategories);
71
79
 
72
80
  const alignedSequences: string[] = new Array(peptideCount);
73
81
  for (const body of bodies) { // getting aligned sequences for each cluster
package/dist/79.js DELETED
@@ -1,2 +0,0 @@
1
- var bio;(()=>{"use strict";const t={V2K_RGP_SHIFT:8,V2K_RGP_LINE:"M RGP",V2K_A_LINE:"A ",V3K_COUNTS_SHIFT:14,V3K_IDX_SHIFT:7,V3K_HEADER_FIRST_LINE:"\nDatagrok macromolecule handler\n\n",V3K_HEADER_SECOND_LINE:" 0 0 0 0 0 0 999 V3000\n",V3K_BEGIN_CTAB_BLOCK:"M V30 BEGIN CTAB\n",V3K_END_CTAB_BLOCK:"M V30 END CTAB\n",V3K_BEGIN_COUNTS_LINE:"M V30 COUNTS ",V3K_COUNTS_LINE_ENDING:" 0 0 0\n",V3K_BEGIN_ATOM_BLOCK:"M V30 BEGIN ATOM\n",V3K_END_ATOM_BLOCK:"M V30 END ATOM\n",V3K_BEGIN_BOND_BLOCK:"M V30 BEGIN BOND\n",V3K_END_BOND_BLOCK:"M V30 END BOND\n",V3K_BOND_CONFIG:" CFG=",V3K_BEGIN_DATA_LINE:"M V30 ",V3K_END:"M END",PRECISION_FACTOR:1e4,DEOXYRIBOSE:"d",RIBOSE:"r",PHOSPHATE:"p",OXYGEN:"O",HYDROGEN:"H"};function n(n,e,a,_){if(0===n.length)return"";const N=s,{atomCount:E,bondCount:f}=N(n,e,a,_),r=new Array(E),c=new Array(f);let l,b=null,S=null;"PEPTIDE"===_?l=o:(l=i,b="DNA"===a?e.get(t.DEOXYRIBOSE):e.get(t.RIBOSE),S=e.get(t.PHOSPHATE));const d={i:0,nodeShift:0,bondShift:0,backbonePositionShift:new Array(2).fill(0),branchPositionShift:new Array(2).fill(0),backboneAttachNode:0,branchAttachNode:0,flipFactor:1},I={sugar:b,phosphate:S,seqLength:n.length,atomCount:E,bondCount:f},A=[];let O=0;for(d.i=0;d.i<I.seqLength;++d.i){const t=e.get(n[d.i]);l(t,r,c,d,I),t.stereoAtoms?.forEach((t=>A.push(t+O))),O+=t.atoms.x.length}!function(n,o,e,i){const a=e.nodeShift+1;n[i.atomCount]=t.V3K_BEGIN_DATA_LINE+a+" "+t.OXYGEN+" "+h(e.backbonePositionShift[0])+" "+e.flipFactor*h(e.backbonePositionShift[1])+" 0.000000 0\n";const s=e.backboneAttachNode,_=a;o[i.bondCount]=t.V3K_BEGIN_DATA_LINE+e.bondShift+" 1 "+s+" "+_+"\n"}(r,c,d,I);const m=t.V3K_BEGIN_COUNTS_LINE+E+" "+f+t.V3K_COUNTS_LINE_ENDING;let g="";return g+=t.V3K_HEADER_FIRST_LINE,g+=t.V3K_HEADER_SECOND_LINE,g+=t.V3K_BEGIN_CTAB_BLOCK,g+=m,g+=t.V3K_BEGIN_ATOM_BLOCK,g+=r.join(""),g+=t.V3K_END_ATOM_BLOCK,g+=t.V3K_BEGIN_BOND_BLOCK,g+=c.join(""),g+=t.V3K_END_BOND_BLOCK,A.length>0&&(g+=function(t){const n=[];let o=`M V30 MDLV30/STEABS ATOMS=(${t.length}`;for(let e=0;e<t.length;e++){const i=`${o} ${t[e]}`;i.length>76?(n.push(`${o} -\n`),o=`M V30 ${t[e]}`):o=i,e===t.length-1&&n.push(`${o})\n`)}return`M V30 BEGIN COLLECTION\n${n.join("")}M V30 END COLLECTION\n`}(A)),g+=t.V3K_END_CTAB_BLOCK,g+=t.V3K_END,g}function o(t,n,o,i){i.flipFactor=(-1)**(i.i%2),e(t,n,o,i)}function e(n,o,e,i){!function(n,o,e){for(let i=0;i<n.atoms.atomTypes.length;++i){const a=e.nodeShift+i+1;o[e.nodeShift+i]=t.V3K_BEGIN_DATA_LINE+a+" "+n.atoms.atomTypes[i]+" "+h(e.backbonePositionShift[0]+n.atoms.x[i])+" "+h(e.backbonePositionShift[1]+e.flipFactor*n.atoms.y[i])+" "+n.atoms.kwargs[i]}}(n,o,i),a(n,e,i),function(n,o,e){if(0!==e.backboneAttachNode){const i=e.bondShift,a=e.backboneAttachNode,s=n.meta.terminalNodes[0]+e.nodeShift;o[e.bondShift-1]=t.V3K_BEGIN_DATA_LINE+i+" 1 "+a+" "+s+"\n"}}(n,e,i),null!==n.meta.branchShift&&n.meta.terminalNodes.length>2&&function(t,n){n.branchAttachNode=n.nodeShift+t.meta.terminalNodes[2];for(let o=0;o<2;++o)n.branchPositionShift[o]=n.backbonePositionShift[o]+t.meta.branchShift[o]}(n,i),function(t,n){n.backboneAttachNode=n.nodeShift+t.meta.terminalNodes[1],n.bondShift+=t.bonds.atomPairs.length+1,n.nodeShift+=t.atoms.atomTypes.length,n.backbonePositionShift[0]+=t.meta.backboneShift[0],n.backbonePositionShift[1]+=n.flipFactor*t.meta.backboneShift[1]}(n,i)}function i(n,o,i,s,_){if(0===s.i)e(_.sugar,o,i,s);else for(const t of[_.phosphate,_.sugar])e(t,o,i,s);!function(n,o,e,i){(function(n,o,e){for(let i=0;i<n.atoms.atomTypes.length;++i){const a=e.nodeShift+i+1;o[e.nodeShift+i]=t.V3K_BEGIN_DATA_LINE+a+" "+n.atoms.atomTypes[i]+" "+h(e.branchPositionShift[0]+n.atoms.x[i])+" "+h(e.branchPositionShift[1]+e.flipFactor*n.atoms.y[i])+" "+n.atoms.kwargs[i]}})(n,o,i),a(n,e,i),function(n,o,e){const i=e.bondShift,a=e.branchAttachNode,s=n.meta.terminalNodes[0]+e.nodeShift;o[i-1]=t.V3K_BEGIN_DATA_LINE+i+" 1 "+a+" "+s+"\n"}(n,e,i);const s=i.bondShift,_=i.branchAttachNode,N=n.meta.terminalNodes[0]+i.nodeShift;e[s-1]=t.V3K_BEGIN_DATA_LINE+s+" 1 "+_+" "+N+"\n",i.bondShift+=n.bonds.atomPairs.length+1,i.nodeShift+=n.atoms.atomTypes.length}(n,o,i,s)}function a(n,o,e){for(let i=0;i<n.bonds.atomPairs.length;++i){const a=e.bondShift+i+1,s=n.bonds.atomPairs[i][0]+e.nodeShift,h=n.bonds.atomPairs[i][1]+e.nodeShift;let _="";if(n.bonds.bondConfiguration.has(i)){let t=n.bonds.bondConfiguration.get(i);e.flipFactor<0&&(t=1===t?3:1),_=" CFG="+t}const N=n.bonds.kwargs.has(i)?" "+n.bonds.kwargs.get(i):"";o[e.bondShift+i]=t.V3K_BEGIN_DATA_LINE+a+" "+n.bonds.bondTypes[i]+" "+s+" "+h+_+N+"\n"}}function s(n,o,e,i){let a=0,s=0;for(const t of n){if(""===t)continue;const n=o.get(t);a+=n.atoms.x.length,s+=n.bonds.bondTypes.length}if("PEPTIDE"===i)a+=1,s+=n.length;else{const i="DNA"===e?o.get(t.DEOXYRIBOSE):o.get(t.RIBOSE),h=o.get(t.PHOSPHATE);a+=(n.length-1)*h.atoms.x.length,a+=n.length*i.atoms.x.length,a+=1,s+=(n.length-1)*h.bonds.bondTypes.length,s+=n.length*i.bonds.bondTypes.length,s-=1,s+=3*n.length}return{atomCount:a,bondCount:s}}function h(n){return Math.round(t.PRECISION_FACTOR*n)/t.PRECISION_FACTOR}new RegExp("[rd]\\((\\w)\\)p?","g"),onmessage=t=>{const{monomerSequencesArray:o,monomersDict:e,alphabet:i,polymerType:a,start:s,end:h}=t.data,_=new Array(h-s),N=new Array(0);for(let t=s;t<h;++t)try{const h=o[t];_[t-s]=n(h,e,i,a)}catch(n){const o=`Cannot get molfile of row #${t}: ${n instanceof Error?n.message:n.toString()}.`;N.push(o)}postMessage({molfileList:_,molfileWarningList:N})},bio={}})();
2
- //# sourceMappingURL=79.js.map