@datagrok/bio 2.12.21 → 2.12.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/286.js +2 -0
- package/dist/286.js.map +1 -0
- package/dist/package-test.js +3 -3
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/files/samples/FASTA.csv +65 -65
- package/files/samples/HELM_50.csv +51 -51
- package/package.json +2 -2
- package/src/package.ts +9 -4
- package/src/tests/converters-test.ts +17 -5
- package/src/tests/renderers-test.ts +19 -2
- package/src/tests/to-atomic-level-tests.ts +9 -0
- package/src/utils/convert.ts +9 -9
- package/src/utils/helm-to-molfile/converter/mol-atoms-v3k.ts +9 -0
- package/src/utils/helm-to-molfile/converter/simple-polymer.ts +10 -7
- package/src/utils/multiple-sequence-alignment-ui.ts +3 -11
- package/src/utils/multiple-sequence-alignment.ts +50 -3
- package/src/utils/pepsea.ts +13 -5
- package/dist/79.js +0 -2
- package/dist/79.js.map +0 -1
|
@@ -1,51 +1,51 @@
|
|
|
1
|
-
HELM,Activity
|
|
2
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Aze.dV.E.N.dV.Phe_4Me}$$$$,2.1058521
|
|
3
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.meM.D-Chg.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4416509
|
|
4
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.D-Cit.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0234375
|
|
5
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Dsu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0660219
|
|
6
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Tyr_tBu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.6578050
|
|
7
|
-
PEPTIDE1{aHyp.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.9036875
|
|
8
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.A.D-aThr.Phe_4Me}$$$$,3.8863654
|
|
9
|
-
PEPTIDE1{meI.hHis.Aca.N.T.Tyr_Me.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,4.6697459
|
|
10
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.A.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,2.7280300
|
|
11
|
-
"PEPTIDE1{meI.Pip.dK.Thr_PO3H2.[L-hArg(Et,Et)].D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$",4.4462886
|
|
12
|
-
PEPTIDE1{meI.hHis.D-Hyp.N.T.dK.Thr_PO3H2.Trp_Ome.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.3900189
|
|
13
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Phe_3Cl.meK.Phe_4Me}$$$$,3.6875632
|
|
14
|
-
PEPTIDE1{Gly_allyl.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$,6.1076937
|
|
15
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Nle.D-aThr.Phe_4Me}$$$$,3.2512414
|
|
16
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.dF.Phe_4Me}$$$$,7.2294617
|
|
17
|
-
PEPTIDE1{meI.hHis.Aca.N.T.D-Orn.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,0.6217819
|
|
18
|
-
PEPTIDE1{meI.Pip.dK.Thr_PO3H2.D-Thz.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.D-Thz.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4476070
|
|
19
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Nle.aIle.Phe_4Me}$$$$,4.9557114
|
|
20
|
-
PEPTIDE1{meY.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,1.3188239
|
|
21
|
-
PEPTIDE1{meI.Aca.N.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.6897125
|
|
22
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.hHis.D-aThr.Phe_4Me}$$$$,4.0970631
|
|
23
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.3Pal.D-aThr.Phe_4Me}$$$$,2.7782860
|
|
24
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.D-aThr.Phe_4Me}$$$$,4.9825664
|
|
25
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Nle.Phe_4Me}$$$$,4.0829563
|
|
26
|
-
PEPTIDE1{D-Nva.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.7203233
|
|
27
|
-
PEPTIDE1{meI.Thr_PO3H2.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Cys_SEt.N.Phe_3Cl.aIle.Phe_4Me}$$$$,0.7954721
|
|
28
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.seC.Tyr_ab-dehydroMe.meN.E.N.dV.Phe_4Me}$$$$,5.0775967
|
|
29
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.3Pal.D-aThr.Phe_4Me}$$$$,4.1724143
|
|
30
|
-
PEPTIDE1{D-Tic.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.dV.Phe_4Me}$$$$,3.1429222
|
|
31
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.dV.meF}$$$$,3.1327622
|
|
32
|
-
PEPTIDE1{meI.Aca.N.T.Ser_PO3H2.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,3.8640671
|
|
33
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.H.P.Phe_4Me}$$$$,4.1827374
|
|
34
|
-
PEPTIDE1{Phe_4Sdihydroorotamido.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.6165285
|
|
35
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Thz.Phe_4Me}$$$$,3.2189791
|
|
36
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.dV.E.N.H.D-aThr.Phe_4Me}$$$$,1.0362210
|
|
37
|
-
PEPTIDE1{meI.Aca.N.T.D-1Nal.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,3.8830254
|
|
38
|
-
PEPTIDE1{meI.Aca.N.T.meV.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,5.1701312
|
|
39
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Br.Phe_4Me}$$$$,3.1820068
|
|
40
|
-
PEPTIDE1{meI.Aca.N.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,4.4652672
|
|
41
|
-
PEPTIDE1{meI.Aca.Q.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,2.8669512
|
|
42
|
-
PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.aMePhe.Phe_4Me}$$$$,3.2571971
|
|
43
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.V.Phe_4Me}$$$$,4.4447875
|
|
44
|
-
PEPTIDE1{meI.Aca.Aca.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.Phe_4Me}$$$$,2.4899697
|
|
45
|
-
PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cya.N.F.Phe_4Me}$$$$,0.3957288
|
|
46
|
-
PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.F.aIle.Phe_4Me}$$$$,2.9058776
|
|
47
|
-
PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.dV.Phe_4Me}$$$$,2.1254258
|
|
48
|
-
PEPTIDE1{meI.Bux.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.Bmt.Phe_4Me}$$$$,1.7159123
|
|
49
|
-
PEPTIDE1{D-Tyr_Et.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,1.5285099
|
|
50
|
-
PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.dP.Tyr_ab-dehydroMe.dV.E.N.Bmt.Phe_4Me}$$$$,3.9470999
|
|
51
|
-
PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.Bmt.Phe_4Me}$$$$,3.7495575
|
|
1
|
+
DBID,HELM,Activity,Cluster
|
|
2
|
+
DBID55,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Aze.dV.E.N.dV.Phe_4Me}$$$$,2.1058521,1
|
|
3
|
+
DBID83,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.meM.D-Chg.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4416509,3
|
|
4
|
+
DBID02,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.D-Cit.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0234375,1
|
|
5
|
+
DBID05,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Dsu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0660219,2
|
|
6
|
+
DBID34,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Tyr_tBu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.6578050,4
|
|
7
|
+
DBID02,PEPTIDE1{aHyp.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.9036875,3
|
|
8
|
+
DBID64,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.A.D-aThr.Phe_4Me}$$$$,3.8863654,2
|
|
9
|
+
DBID75,PEPTIDE1{meI.hHis.Aca.N.T.Tyr_Me.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,4.6697459,2
|
|
10
|
+
DBID74,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.A.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,2.7280300,4
|
|
11
|
+
DBID60,"PEPTIDE1{meI.Pip.dK.Thr_PO3H2.[L-hArg(Et,Et)].D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$",4.4462886,2
|
|
12
|
+
DBID96,PEPTIDE1{meI.hHis.D-Hyp.N.T.dK.Thr_PO3H2.Trp_Ome.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.3900189,4
|
|
13
|
+
DBID56,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Phe_3Cl.meK.Phe_4Me}$$$$,3.6875632,4
|
|
14
|
+
DBID65,PEPTIDE1{Gly_allyl.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$,6.1076937,1
|
|
15
|
+
DBID45,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Nle.D-aThr.Phe_4Me}$$$$,3.2512414,0
|
|
16
|
+
DBID13,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.dF.Phe_4Me}$$$$,7.2294617,0
|
|
17
|
+
DBID94,PEPTIDE1{meI.hHis.Aca.N.T.D-Orn.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,0.6217819,3
|
|
18
|
+
DBID79,PEPTIDE1{meI.Pip.dK.Thr_PO3H2.D-Thz.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.D-Thz.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4476070,1
|
|
19
|
+
DBID21,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Nle.aIle.Phe_4Me}$$$$,4.9557114,1
|
|
20
|
+
DBID10,PEPTIDE1{meY.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,1.3188239,1
|
|
21
|
+
DBID86,PEPTIDE1{meI.Aca.N.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.6897125,2
|
|
22
|
+
DBID52,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.hHis.D-aThr.Phe_4Me}$$$$,4.0970631,2
|
|
23
|
+
DBID27,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.3Pal.D-aThr.Phe_4Me}$$$$,2.7782860,3
|
|
24
|
+
DBID98,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.D-aThr.Phe_4Me}$$$$,4.9825664,2
|
|
25
|
+
DBID94,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Nle.Phe_4Me}$$$$,4.0829563,2
|
|
26
|
+
DBID86,PEPTIDE1{D-Nva.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.7203233,3
|
|
27
|
+
DBID48,PEPTIDE1{meI.Thr_PO3H2.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Cys_SEt.N.Phe_3Cl.aIle.Phe_4Me}$$$$,0.7954721,1
|
|
28
|
+
DBID22,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.seC.Tyr_ab-dehydroMe.meN.E.N.dV.Phe_4Me}$$$$,5.0775967,1
|
|
29
|
+
DBID54,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.3Pal.D-aThr.Phe_4Me}$$$$,4.1724143,3
|
|
30
|
+
DBID17,PEPTIDE1{D-Tic.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.dV.Phe_4Me}$$$$,3.1429222,3
|
|
31
|
+
DBID76,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.dV.meF}$$$$,3.1327622,0
|
|
32
|
+
DBID20,PEPTIDE1{meI.Aca.N.T.Ser_PO3H2.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,3.8640671,1
|
|
33
|
+
DBID01,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.H.P.Phe_4Me}$$$$,4.1827374,3
|
|
34
|
+
DBID93,PEPTIDE1{Phe_4Sdihydroorotamido.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.6165285,1
|
|
35
|
+
DBID89,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Thz.Phe_4Me}$$$$,3.2189791,0
|
|
36
|
+
DBID30,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.dV.E.N.H.D-aThr.Phe_4Me}$$$$,1.0362210,3
|
|
37
|
+
DBID07,PEPTIDE1{meI.Aca.N.T.D-1Nal.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,3.8830254,4
|
|
38
|
+
DBID51,PEPTIDE1{meI.Aca.N.T.meV.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,5.1701312,2
|
|
39
|
+
DBID95,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Br.Phe_4Me}$$$$,3.1820068,1
|
|
40
|
+
DBID47,PEPTIDE1{meI.Aca.N.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,4.4652672,1
|
|
41
|
+
DBID16,PEPTIDE1{meI.Aca.Q.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,2.8669512,4
|
|
42
|
+
DBID52,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.aMePhe.Phe_4Me}$$$$,3.2571971,1
|
|
43
|
+
DBID41,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.V.Phe_4Me}$$$$,4.4447875,0
|
|
44
|
+
DBID78,PEPTIDE1{meI.Aca.Aca.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.Phe_4Me}$$$$,2.4899697,4
|
|
45
|
+
DBID95,PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cya.N.F.Phe_4Me}$$$$,0.3957288,4
|
|
46
|
+
DBID92,PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.F.aIle.Phe_4Me}$$$$,2.9058776,3
|
|
47
|
+
DBID88,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.dV.Phe_4Me}$$$$,2.1254258,0
|
|
48
|
+
DBID10,PEPTIDE1{meI.Bux.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.Bmt.Phe_4Me}$$$$,1.7159123,0
|
|
49
|
+
DBID86,PEPTIDE1{D-Tyr_Et.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,1.5285099,0
|
|
50
|
+
DBID43,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.dP.Tyr_ab-dehydroMe.dV.E.N.Bmt.Phe_4Me}$$$$,3.9470999,3
|
|
51
|
+
DBID26,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.Bmt.Phe_4Me}$$$$,3.7495575,4
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Aleksandr Tanas",
|
|
6
6
|
"email": "atanas@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.12.
|
|
8
|
+
"version": "2.12.23",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.41.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.41.8",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.2.5",
|
|
39
39
|
"@datagrok-libraries/math": "^1.1.5",
|
|
40
40
|
"@datagrok-libraries/ml": "^6.6.5",
|
package/src/package.ts
CHANGED
|
@@ -49,7 +49,8 @@ import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-c
|
|
|
49
49
|
import {demoBio03UI} from './demo/bio03-atomic-level';
|
|
50
50
|
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
51
51
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
52
|
-
import {MsaWarning
|
|
52
|
+
import {MsaWarning} from './utils/multiple-sequence-alignment';
|
|
53
|
+
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
53
54
|
import {WebLogoApp} from './apps/web-logo-app';
|
|
54
55
|
import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
|
|
55
56
|
import {splitToMonomersUI} from './utils/split-to-monomers';
|
|
@@ -560,7 +561,8 @@ export async function helmPreprocessingFunction(
|
|
|
560
561
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Column,
|
|
561
562
|
methodName: DimReductionMethods, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames,
|
|
562
563
|
plotEmbeddings: boolean, preprocessingFunction?: DG.Func, options?: (IUMAPOptions | ITSNEOptions) & Options,
|
|
563
|
-
clusterEmbeddings?: boolean
|
|
564
|
+
clusterEmbeddings?: boolean
|
|
565
|
+
): Promise<DG.ScatterPlotViewer | undefined> {
|
|
564
566
|
if (!checkInputColumnUI(molecules, 'Sequence Space'))
|
|
565
567
|
return;
|
|
566
568
|
if (!preprocessingFunction)
|
|
@@ -600,12 +602,15 @@ export async function toAtomicLevel(table: DG.DataFrame, seqCol: DG.Column, nonl
|
|
|
600
602
|
export function multipleSequenceAlignmentDialog(): void {
|
|
601
603
|
multipleSequenceAlignmentUI()
|
|
602
604
|
.catch((err: any) => {
|
|
603
|
-
const [errMsg,
|
|
605
|
+
const [errMsg, errStack] = errInfo(err);
|
|
604
606
|
if (err instanceof MsaWarning) {
|
|
607
|
+
grok.shell.warning((err as MsaWarning).element);
|
|
605
608
|
_package.logger.warning(errMsg);
|
|
606
609
|
return;
|
|
607
610
|
}
|
|
608
|
-
|
|
611
|
+
grok.shell.error(errMsg);
|
|
612
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
613
|
+
// throw err; // This error throw is not handled
|
|
609
614
|
});
|
|
610
615
|
}
|
|
611
616
|
|
|
@@ -2,13 +2,11 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
|
|
4
4
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
5
|
+
import {NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
5
7
|
|
|
6
8
|
import {ConverterFunc} from './types';
|
|
7
|
-
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
-
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
9
|
|
|
10
|
-
// import {mmSemType} from '../const';
|
|
11
|
-
// import {importFasta} from '../package';
|
|
12
10
|
|
|
13
11
|
category('converters', () => {
|
|
14
12
|
enum Samples {
|
|
@@ -116,6 +114,8 @@ RNA1{p.p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r(U)p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r
|
|
|
116
114
|
RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p.p}$$$$`,
|
|
117
115
|
};
|
|
118
116
|
|
|
117
|
+
const bioTagsSet = new Set<string>(Object.values(bioTAGS));
|
|
118
|
+
|
|
119
119
|
/** Also detects semantic types
|
|
120
120
|
* @param {string} key
|
|
121
121
|
* @return {Promise<DG.DataFrame>}
|
|
@@ -152,7 +152,19 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
|
|
|
152
152
|
const tgtCol: DG.Column = tgtDf.getCol('seq');
|
|
153
153
|
|
|
154
154
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
155
|
-
const
|
|
155
|
+
const srcSh: SeqHandler = SeqHandler.forColumn(srcCol);
|
|
156
|
+
const resSh: SeqHandler = SeqHandler.forColumn(resCol);
|
|
157
|
+
for (const [tagName, tgtTagValue] of Object.entries(tgtCol.tags)) {
|
|
158
|
+
if (
|
|
159
|
+
!bioTagsSet.has(tagName) ||
|
|
160
|
+
(srcSh.notation === NOTATION.HELM && [bioTAGS.alphabet, bioTAGS.alphabetIsMultichar].includes(tagName as bioTAGS)) ||
|
|
161
|
+
(resSh.notation === NOTATION.HELM && [bioTAGS.alphabet, bioTAGS.alphabetIsMultichar].includes(tagName as bioTAGS))
|
|
162
|
+
) continue;
|
|
163
|
+
|
|
164
|
+
const resTagValue = resCol.getTag(tagName);
|
|
165
|
+
expect(resTagValue, tgtTagValue,
|
|
166
|
+
`Tag '${tagName}' expected value '${tgtTagValue}' is not equal to actual '${resTagValue}'.`);
|
|
167
|
+
}
|
|
156
168
|
}
|
|
157
169
|
|
|
158
170
|
// FASTA tests
|
|
@@ -48,13 +48,17 @@ category('renderers', () => {
|
|
|
48
48
|
await _testAfterConvert();
|
|
49
49
|
});
|
|
50
50
|
|
|
51
|
+
test('afterConvertToHelm', async () => {
|
|
52
|
+
await _testAfterConvertToHelm();
|
|
53
|
+
});
|
|
54
|
+
|
|
51
55
|
test('selectRendererBySemType', async () => {
|
|
52
56
|
await _selectRendererBySemType();
|
|
53
57
|
});
|
|
54
58
|
|
|
55
59
|
test('scatterPlotTooltip', async () => {
|
|
56
60
|
await _testScatterPlotTooltip();
|
|
57
|
-
}
|
|
61
|
+
});
|
|
58
62
|
|
|
59
63
|
async function _rendererMacromoleculeFasta() {
|
|
60
64
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');
|
|
@@ -162,7 +166,7 @@ category('renderers', () => {
|
|
|
162
166
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA_PT.csv');
|
|
163
167
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
164
168
|
|
|
165
|
-
const srcCol: DG.Column = df.
|
|
169
|
+
const srcCol: DG.Column = df.getCol('sequence')!;
|
|
166
170
|
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
|
|
167
171
|
if (semType)
|
|
168
172
|
srcCol.semType = semType;
|
|
@@ -182,6 +186,19 @@ category('renderers', () => {
|
|
|
182
186
|
const _sh: SeqHandler = SeqHandler.forColumn(tgtCol);
|
|
183
187
|
}
|
|
184
188
|
|
|
189
|
+
async function _testAfterConvertToHelm() {
|
|
190
|
+
const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/FASTA_PT.csv');
|
|
191
|
+
const view = grok.shell.addTableView(df);
|
|
192
|
+
await awaitGrid(view.grid);
|
|
193
|
+
|
|
194
|
+
const srcCol = df.getCol('sequence');
|
|
195
|
+
const sh = SeqHandler.forColumn(srcCol);
|
|
196
|
+
const tgtCol = sh.convert(NOTATION.HELM);
|
|
197
|
+
df.columns.add(tgtCol);
|
|
198
|
+
await awaitGrid(view.grid);
|
|
199
|
+
expect(tgtCol.getTag(DG.TAGS.CELL_RENDERER), 'helm');
|
|
200
|
+
}
|
|
201
|
+
|
|
185
202
|
async function _selectRendererBySemType() {
|
|
186
203
|
/* There are renderers for semType Macromolecule and MacromoleculeDifference.
|
|
187
204
|
Misbehavior was by selecting Macromolecule renderers for MacromoleculeDifference semType column
|
|
@@ -100,6 +100,7 @@ category('toAtomicLevel', async () => {
|
|
|
100
100
|
fastaDna = 'fastaDna',
|
|
101
101
|
fastaRna = 'fastaRna',
|
|
102
102
|
fastaPt = 'fastaPt',
|
|
103
|
+
fastaUn = 'fastaUn',
|
|
103
104
|
|
|
104
105
|
separatorDna = 'separatorDna',
|
|
105
106
|
separatorRna = 'separatorRna',
|
|
@@ -122,6 +123,10 @@ UUCAACUUCAAC`,
|
|
|
122
123
|
FWPHEYFWPHEY
|
|
123
124
|
YNRQWYVYNRQWYV
|
|
124
125
|
MKPSEYVMKPSEYV`,
|
|
126
|
+
[csvTests.fastaUn]: `seq
|
|
127
|
+
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
|
|
128
|
+
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
129
|
+
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]`,
|
|
125
130
|
[csvTests.separatorDna]: `seq
|
|
126
131
|
A/C/G/T/C/A/C/G/T/C
|
|
127
132
|
C/A/G/T/G/T/C/A/G/T/G/T
|
|
@@ -169,6 +174,10 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
|
|
|
169
174
|
await _testToAtomicLevel(await readCsv(csvTests.fastaPt), 'seq', monomerLibHelper);
|
|
170
175
|
});
|
|
171
176
|
|
|
177
|
+
test('fastaUn', async () => {
|
|
178
|
+
await _testToAtomicLevel(await readCsv(csvTests.fastaUn), 'seq', monomerLibHelper);
|
|
179
|
+
});
|
|
180
|
+
|
|
172
181
|
test('separatorDna', async () => {
|
|
173
182
|
await _testToAtomicLevel(await readCsv(csvTests.separatorDna), 'seq', monomerLibHelper);
|
|
174
183
|
});
|
package/src/utils/convert.ts
CHANGED
|
@@ -18,10 +18,10 @@ let convertDialogSubs: Subscription[] = [];
|
|
|
18
18
|
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
19
19
|
*/
|
|
20
20
|
export function convert(col?: DG.Column): void {
|
|
21
|
-
let
|
|
22
|
-
if (!
|
|
21
|
+
let srcCol = col ?? grok.shell.t.columns.bySemType('Macromolecule')!;
|
|
22
|
+
if (!srcCol)
|
|
23
23
|
throw new Error('No column with Macromolecule semantic type found');
|
|
24
|
-
let converterSh = SeqHandler.forColumn(
|
|
24
|
+
let converterSh = SeqHandler.forColumn(srcCol);
|
|
25
25
|
let currentNotation: NOTATION = converterSh.notation;
|
|
26
26
|
const dialogHeader = ui.divText(
|
|
27
27
|
'Current notation: ' + currentNotation,
|
|
@@ -41,12 +41,12 @@ export function convert(col?: DG.Column): void {
|
|
|
41
41
|
];
|
|
42
42
|
const toggleColumn = (newCol: DG.Column) => {
|
|
43
43
|
if (newCol.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
44
|
-
targetColumnInput.value =
|
|
44
|
+
targetColumnInput.value = srcCol;
|
|
45
45
|
return;
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
-
|
|
49
|
-
converterSh = SeqHandler.forColumn(
|
|
48
|
+
srcCol = newCol;
|
|
49
|
+
converterSh = SeqHandler.forColumn(srcCol);
|
|
50
50
|
currentNotation = converterSh.notation;
|
|
51
51
|
if (currentNotation === NOTATION.HELM)
|
|
52
52
|
separatorInput.value = '/'; // helm monomers can have - in the name like D-aThr;
|
|
@@ -63,7 +63,7 @@ export function convert(col?: DG.Column): void {
|
|
|
63
63
|
]));
|
|
64
64
|
};
|
|
65
65
|
|
|
66
|
-
const targetColumnInput = ui.columnInput('Column', grok.shell.t,
|
|
66
|
+
const targetColumnInput = ui.columnInput('Column', grok.shell.t, srcCol, toggleColumn);
|
|
67
67
|
|
|
68
68
|
const separatorArray = ['-', '.', '/'];
|
|
69
69
|
let filteredNotations = notations.filter((e) => e !== currentNotation);
|
|
@@ -96,9 +96,9 @@ export function convert(col?: DG.Column): void {
|
|
|
96
96
|
]))
|
|
97
97
|
.onOK(async () => {
|
|
98
98
|
const targetNotation = targetNotationInput.value as NOTATION;
|
|
99
|
-
const separator: string | undefined = separatorInput.value
|
|
99
|
+
const separator: string | undefined = targetNotation === NOTATION.SEPARATOR ? separatorInput.value! : undefined;
|
|
100
100
|
|
|
101
|
-
await convertDo(
|
|
101
|
+
await convertDo(srcCol, targetNotation, separator);
|
|
102
102
|
})
|
|
103
103
|
.show({x: 350, y: 100});
|
|
104
104
|
|
|
@@ -34,5 +34,14 @@ export class MolfileAtomsV3K extends MolfileAtoms {
|
|
|
34
34
|
}).replace(rGroupsRegex, '');
|
|
35
35
|
});
|
|
36
36
|
}
|
|
37
|
+
|
|
38
|
+
replaceRGroupSymbolByElement(atomIdx: number, newElementSymbol: string): void {
|
|
39
|
+
super.replaceRGroupSymbolByElement(atomIdx, newElementSymbol);
|
|
40
|
+
// rdkit can generate (out of thin air) masses for r groups, so we need to remove them as well.
|
|
41
|
+
//they are at the end of the line after coordinates and other data
|
|
42
|
+
const lineInfo = this.rawAtomLines[atomIdx].substring(3).split(' ');
|
|
43
|
+
if (lineInfo.length > 7)
|
|
44
|
+
this.rawAtomLines[atomIdx] = `M ${lineInfo.slice(0, 7).join(' ')}`;
|
|
45
|
+
}
|
|
37
46
|
}
|
|
38
47
|
|
|
@@ -47,14 +47,17 @@ export class SimplePolymer {
|
|
|
47
47
|
const monomerList: string[] = [];
|
|
48
48
|
const monomerTypeList: HELM_MONOMER_TYPE[] = [];
|
|
49
49
|
monomerGroups.forEach((monomerGroup) => {
|
|
50
|
-
const splitted = monomerGroup.split(/\(|\)/)
|
|
51
|
-
|
|
52
|
-
monomerList.push(...splitted);
|
|
50
|
+
// const splitted = monomerGroup.split(/\(|\)/).map((el) => el.replace(/[\[\]]/g, ''));
|
|
51
|
+
// monomerList.push(...splitted);
|
|
53
52
|
// WARNING: only the groups of the form r(A)p, as in RNA, are supported
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
53
|
+
|
|
54
|
+
monomerList.push(monomerGroup);
|
|
55
|
+
// const monomerTypes = splitted.map(
|
|
56
|
+
// (_, idx) => (idx % 2 === 0) ? HELM_MONOMER_TYPE.BACKBONE : HELM_MONOMER_TYPE.BRANCH
|
|
57
|
+
// );
|
|
58
|
+
|
|
59
|
+
// monomerTypeList.push(...monomerTypes);
|
|
60
|
+
monomerTypeList.push(HELM_MONOMER_TYPE.BACKBONE);
|
|
58
61
|
});
|
|
59
62
|
return {monomers: monomerList, monomerTypes: monomerTypeList};
|
|
60
63
|
}
|
|
@@ -7,7 +7,7 @@ import {delay} from '@datagrok-libraries/utils/src/test';
|
|
|
7
7
|
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
8
|
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
9
|
|
|
10
|
-
import {runKalign} from './multiple-sequence-alignment';
|
|
10
|
+
import {MsaWarning, runKalign} from './multiple-sequence-alignment';
|
|
11
11
|
import {pepseaMethods, runPepsea} from './pepsea';
|
|
12
12
|
import {checkInputColumnUI} from './check-input-column';
|
|
13
13
|
import {multipleSequenceAlginmentUIOptions} from './types';
|
|
@@ -18,12 +18,6 @@ import {_package} from '../package';
|
|
|
18
18
|
|
|
19
19
|
import '../../css/msa.css';
|
|
20
20
|
|
|
21
|
-
export class MsaWarning extends Error {
|
|
22
|
-
constructor(message: string, options?: ErrorOptions) {
|
|
23
|
-
super(message, options);
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
|
|
27
21
|
export async function multipleSequenceAlignmentUI(
|
|
28
22
|
options: multipleSequenceAlginmentUIOptions = {},
|
|
29
23
|
): Promise<DG.Column> {
|
|
@@ -37,9 +31,9 @@ export async function multipleSequenceAlignmentUI(
|
|
|
37
31
|
const table = options.col?.dataFrame ?? grok.shell.t;
|
|
38
32
|
const seqCol = options.col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
39
33
|
if (seqCol == null) {
|
|
40
|
-
const errMsg: string = `Multiple
|
|
34
|
+
const errMsg: string = `Multiple Sequence Alignment analysis requires a dataset with a macromolecule column.`;
|
|
41
35
|
grok.shell.warning(errMsg);
|
|
42
|
-
reject(new MsaWarning(errMsg));
|
|
36
|
+
reject(new MsaWarning(ui.divText(errMsg)));
|
|
43
37
|
return; // Prevents creating the MSA dialog
|
|
44
38
|
}
|
|
45
39
|
|
|
@@ -145,8 +139,6 @@ async function onDialogOk(
|
|
|
145
139
|
|
|
146
140
|
resolve(msaCol);
|
|
147
141
|
} catch (err: any) {
|
|
148
|
-
const errMsg: string = err instanceof Error ? err.message : err.toString();
|
|
149
|
-
grok.shell.error(errMsg);
|
|
150
142
|
reject(err);
|
|
151
143
|
} finally {
|
|
152
144
|
pi.close();
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
2
3
|
import * as DG from 'datagrok-api/dg';
|
|
3
4
|
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
4
7
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
5
8
|
import {ALIGNMENT, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
9
|
//@ts-ignore: there are no types for this library
|
|
@@ -8,9 +11,17 @@ import Aioli from '@biowasm/aioli';
|
|
|
8
11
|
|
|
9
12
|
import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
|
|
10
13
|
import {kalignVersion} from './constants';
|
|
14
|
+
|
|
11
15
|
const fastaInputFilename = 'input.fa';
|
|
12
16
|
const fastaOutputFilename = 'result.fasta';
|
|
13
17
|
|
|
18
|
+
export class MsaWarning extends Error {
|
|
19
|
+
constructor(
|
|
20
|
+
public readonly element: HTMLElement, options?: ErrorOptions) {
|
|
21
|
+
super(element.innerText, options);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
14
25
|
/**
|
|
15
26
|
* Converts array of sequences into simple fasta string.
|
|
16
27
|
*
|
|
@@ -57,6 +68,7 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
|
|
|
57
68
|
(fastaSequences[clusterCategoryIdx] ??= []).push(sequences[rowIdx]);
|
|
58
69
|
(clusterIndexes[clusterCategoryIdx] ??= []).push(rowIdx);
|
|
59
70
|
}
|
|
71
|
+
checkForSingleSeqClusters(clusterIndexes, clustersColCategories);
|
|
60
72
|
|
|
61
73
|
const CLI = await new Aioli([
|
|
62
74
|
'base/1.0.0',
|
|
@@ -78,8 +90,10 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
|
|
|
78
90
|
console.warn(output);
|
|
79
91
|
|
|
80
92
|
const buf = await CLI.cat(fastaOutputFilename);
|
|
81
|
-
if (!buf)
|
|
82
|
-
|
|
93
|
+
if (!buf) {
|
|
94
|
+
const errStr = parseKalignError(output, 1);
|
|
95
|
+
throw new Error(errStr);
|
|
96
|
+
}
|
|
83
97
|
|
|
84
98
|
const ffh = new FastaFileHandler(buf);
|
|
85
99
|
const aligned = ffh.sequencesArray; // array of sequences extracted from FASTA
|
|
@@ -115,3 +129,36 @@ export async function testMSAEnoughMemory(col: DG.Column<string>): Promise<void>
|
|
|
115
129
|
}
|
|
116
130
|
}
|
|
117
131
|
}
|
|
132
|
+
|
|
133
|
+
function parseKalignError(out: string, limit?: number): string {
|
|
134
|
+
const errLineList: string[] = [];
|
|
135
|
+
const errLineRe = /^.+ERROR : (.+)$/gm;
|
|
136
|
+
let ma: RegExpExecArray | null;
|
|
137
|
+
while ((ma = errLineRe.exec(out)) != null && (limit === undefined || errLineList.length < limit)) {
|
|
138
|
+
//
|
|
139
|
+
errLineList.push(ma[1]);
|
|
140
|
+
}
|
|
141
|
+
return errLineList.join('\n');
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/** */
|
|
145
|
+
export function checkForSingleSeqClusters(clusterIndexes: number[][], clustersColCategories: string[]): void {
|
|
146
|
+
const singleSeqClusterIdxList = clusterIndexes
|
|
147
|
+
.map<[number[], number]>((idxs: number[], clusterI: number) => { return [idxs, clusterI]; })
|
|
148
|
+
.filter(([idxs, _clusterIdx]) => idxs.length == 1)
|
|
149
|
+
.map(([_idxs, clusterIdx]) => clusterIdx);
|
|
150
|
+
if (singleSeqClusterIdxList.length > 0) {
|
|
151
|
+
const errEl = ui.div([
|
|
152
|
+
ui.divText(`MSA analysis is not available on single sequence clusters ` +
|
|
153
|
+
`#${singleSeqClusterIdxList.length}:`),
|
|
154
|
+
...wu(singleSeqClusterIdxList).take(3)
|
|
155
|
+
.map((clusterIdx) => {
|
|
156
|
+
let clusterName = clustersColCategories[clusterIdx];
|
|
157
|
+
if (clusterName.length > 25) clusterName = clusterName.slice(0, 25) + '...';
|
|
158
|
+
return ui.divText(`"${clusterName}"${clusterIdx < singleSeqClusterIdxList.length - 1 ? ', ' : '.'}`);
|
|
159
|
+
}).toArray(),
|
|
160
|
+
...singleSeqClusterIdxList.length > 3 ? [ui.divText('...')] : []
|
|
161
|
+
]);
|
|
162
|
+
throw new MsaWarning(errEl);
|
|
163
|
+
}
|
|
164
|
+
}
|
package/src/utils/pepsea.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
4
|
import * as DG from 'datagrok-api/dg';
|
|
4
5
|
|
|
5
6
|
import {Subject} from 'rxjs';
|
|
@@ -8,6 +9,7 @@ import {testEvent} from '@datagrok-libraries/utils/src/test';
|
|
|
8
9
|
import {NOTATION, TAGS as bioTAGS, ALIGNMENT, ALPHABET} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
9
10
|
import {ILogger} from '@datagrok-libraries/bio/src/utils/logger';
|
|
10
11
|
|
|
12
|
+
import {checkForSingleSeqClusters} from './multiple-sequence-alignment';
|
|
11
13
|
import * as C from './constants';
|
|
12
14
|
|
|
13
15
|
import {_package} from '../package';
|
|
@@ -54,20 +56,26 @@ export async function runPepsea(srcCol: DG.Column<string>, unUsedName: string,
|
|
|
54
56
|
if (clustersCol.type != DG.COLUMN_TYPE.STRING)
|
|
55
57
|
clustersCol = clustersCol.convertTo(DG.TYPE.STRING);
|
|
56
58
|
|
|
57
|
-
const
|
|
58
|
-
const
|
|
59
|
+
const clustersColCategories = clustersCol.categories;
|
|
60
|
+
const clustersColData = clustersCol.getRawData();
|
|
61
|
+
const bodies: PepseaBodyUnit[][] = new Array(clustersColCategories.length);
|
|
62
|
+
const clusterIndexes: number[][] = new Array(clustersColCategories.length);
|
|
59
63
|
|
|
60
64
|
// Grouping data by clusters
|
|
61
65
|
for (let rowIndex = 0; rowIndex < peptideCount; ++rowIndex) {
|
|
62
|
-
const
|
|
66
|
+
const clusterCategoryIdx = clustersColData[rowIndex];
|
|
67
|
+
const cluster = clustersColCategories[clusterCategoryIdx];
|
|
63
68
|
if (cluster === '')
|
|
64
69
|
continue;
|
|
65
70
|
|
|
66
|
-
const clusterId =
|
|
71
|
+
const clusterId = clustersColCategories.indexOf(cluster);
|
|
67
72
|
const helmSeq = srcCol.get(rowIndex);
|
|
68
|
-
if (helmSeq)
|
|
73
|
+
if (helmSeq) {
|
|
69
74
|
(bodies[clusterId] ??= []).push({ID: rowIndex.toString(), HELM: helmSeq});
|
|
75
|
+
(clusterIndexes[clusterCategoryIdx] ??= []).push(rowIndex);
|
|
76
|
+
}
|
|
70
77
|
}
|
|
78
|
+
checkForSingleSeqClusters(clusterIndexes, clustersColCategories);
|
|
71
79
|
|
|
72
80
|
const alignedSequences: string[] = new Array(peptideCount);
|
|
73
81
|
for (const body of bodies) { // getting aligned sequences for each cluster
|
package/dist/79.js
DELETED
|
@@ -1,2 +0,0 @@
|
|
|
1
|
-
var bio;(()=>{"use strict";const t={V2K_RGP_SHIFT:8,V2K_RGP_LINE:"M RGP",V2K_A_LINE:"A ",V3K_COUNTS_SHIFT:14,V3K_IDX_SHIFT:7,V3K_HEADER_FIRST_LINE:"\nDatagrok macromolecule handler\n\n",V3K_HEADER_SECOND_LINE:" 0 0 0 0 0 0 999 V3000\n",V3K_BEGIN_CTAB_BLOCK:"M V30 BEGIN CTAB\n",V3K_END_CTAB_BLOCK:"M V30 END CTAB\n",V3K_BEGIN_COUNTS_LINE:"M V30 COUNTS ",V3K_COUNTS_LINE_ENDING:" 0 0 0\n",V3K_BEGIN_ATOM_BLOCK:"M V30 BEGIN ATOM\n",V3K_END_ATOM_BLOCK:"M V30 END ATOM\n",V3K_BEGIN_BOND_BLOCK:"M V30 BEGIN BOND\n",V3K_END_BOND_BLOCK:"M V30 END BOND\n",V3K_BOND_CONFIG:" CFG=",V3K_BEGIN_DATA_LINE:"M V30 ",V3K_END:"M END",PRECISION_FACTOR:1e4,DEOXYRIBOSE:"d",RIBOSE:"r",PHOSPHATE:"p",OXYGEN:"O",HYDROGEN:"H"};function n(n,e,a,_){if(0===n.length)return"";const N=s,{atomCount:E,bondCount:f}=N(n,e,a,_),r=new Array(E),c=new Array(f);let l,b=null,S=null;"PEPTIDE"===_?l=o:(l=i,b="DNA"===a?e.get(t.DEOXYRIBOSE):e.get(t.RIBOSE),S=e.get(t.PHOSPHATE));const d={i:0,nodeShift:0,bondShift:0,backbonePositionShift:new Array(2).fill(0),branchPositionShift:new Array(2).fill(0),backboneAttachNode:0,branchAttachNode:0,flipFactor:1},I={sugar:b,phosphate:S,seqLength:n.length,atomCount:E,bondCount:f},A=[];let O=0;for(d.i=0;d.i<I.seqLength;++d.i){const t=e.get(n[d.i]);l(t,r,c,d,I),t.stereoAtoms?.forEach((t=>A.push(t+O))),O+=t.atoms.x.length}!function(n,o,e,i){const a=e.nodeShift+1;n[i.atomCount]=t.V3K_BEGIN_DATA_LINE+a+" "+t.OXYGEN+" "+h(e.backbonePositionShift[0])+" "+e.flipFactor*h(e.backbonePositionShift[1])+" 0.000000 0\n";const s=e.backboneAttachNode,_=a;o[i.bondCount]=t.V3K_BEGIN_DATA_LINE+e.bondShift+" 1 "+s+" "+_+"\n"}(r,c,d,I);const m=t.V3K_BEGIN_COUNTS_LINE+E+" "+f+t.V3K_COUNTS_LINE_ENDING;let g="";return g+=t.V3K_HEADER_FIRST_LINE,g+=t.V3K_HEADER_SECOND_LINE,g+=t.V3K_BEGIN_CTAB_BLOCK,g+=m,g+=t.V3K_BEGIN_ATOM_BLOCK,g+=r.join(""),g+=t.V3K_END_ATOM_BLOCK,g+=t.V3K_BEGIN_BOND_BLOCK,g+=c.join(""),g+=t.V3K_END_BOND_BLOCK,A.length>0&&(g+=function(t){const n=[];let o=`M V30 MDLV30/STEABS ATOMS=(${t.length}`;for(let e=0;e<t.length;e++){const i=`${o} ${t[e]}`;i.length>76?(n.push(`${o} -\n`),o=`M V30 ${t[e]}`):o=i,e===t.length-1&&n.push(`${o})\n`)}return`M V30 BEGIN COLLECTION\n${n.join("")}M V30 END COLLECTION\n`}(A)),g+=t.V3K_END_CTAB_BLOCK,g+=t.V3K_END,g}function o(t,n,o,i){i.flipFactor=(-1)**(i.i%2),e(t,n,o,i)}function e(n,o,e,i){!function(n,o,e){for(let i=0;i<n.atoms.atomTypes.length;++i){const a=e.nodeShift+i+1;o[e.nodeShift+i]=t.V3K_BEGIN_DATA_LINE+a+" "+n.atoms.atomTypes[i]+" "+h(e.backbonePositionShift[0]+n.atoms.x[i])+" "+h(e.backbonePositionShift[1]+e.flipFactor*n.atoms.y[i])+" "+n.atoms.kwargs[i]}}(n,o,i),a(n,e,i),function(n,o,e){if(0!==e.backboneAttachNode){const i=e.bondShift,a=e.backboneAttachNode,s=n.meta.terminalNodes[0]+e.nodeShift;o[e.bondShift-1]=t.V3K_BEGIN_DATA_LINE+i+" 1 "+a+" "+s+"\n"}}(n,e,i),null!==n.meta.branchShift&&n.meta.terminalNodes.length>2&&function(t,n){n.branchAttachNode=n.nodeShift+t.meta.terminalNodes[2];for(let o=0;o<2;++o)n.branchPositionShift[o]=n.backbonePositionShift[o]+t.meta.branchShift[o]}(n,i),function(t,n){n.backboneAttachNode=n.nodeShift+t.meta.terminalNodes[1],n.bondShift+=t.bonds.atomPairs.length+1,n.nodeShift+=t.atoms.atomTypes.length,n.backbonePositionShift[0]+=t.meta.backboneShift[0],n.backbonePositionShift[1]+=n.flipFactor*t.meta.backboneShift[1]}(n,i)}function i(n,o,i,s,_){if(0===s.i)e(_.sugar,o,i,s);else for(const t of[_.phosphate,_.sugar])e(t,o,i,s);!function(n,o,e,i){(function(n,o,e){for(let i=0;i<n.atoms.atomTypes.length;++i){const a=e.nodeShift+i+1;o[e.nodeShift+i]=t.V3K_BEGIN_DATA_LINE+a+" "+n.atoms.atomTypes[i]+" "+h(e.branchPositionShift[0]+n.atoms.x[i])+" "+h(e.branchPositionShift[1]+e.flipFactor*n.atoms.y[i])+" "+n.atoms.kwargs[i]}})(n,o,i),a(n,e,i),function(n,o,e){const i=e.bondShift,a=e.branchAttachNode,s=n.meta.terminalNodes[0]+e.nodeShift;o[i-1]=t.V3K_BEGIN_DATA_LINE+i+" 1 "+a+" "+s+"\n"}(n,e,i);const s=i.bondShift,_=i.branchAttachNode,N=n.meta.terminalNodes[0]+i.nodeShift;e[s-1]=t.V3K_BEGIN_DATA_LINE+s+" 1 "+_+" "+N+"\n",i.bondShift+=n.bonds.atomPairs.length+1,i.nodeShift+=n.atoms.atomTypes.length}(n,o,i,s)}function a(n,o,e){for(let i=0;i<n.bonds.atomPairs.length;++i){const a=e.bondShift+i+1,s=n.bonds.atomPairs[i][0]+e.nodeShift,h=n.bonds.atomPairs[i][1]+e.nodeShift;let _="";if(n.bonds.bondConfiguration.has(i)){let t=n.bonds.bondConfiguration.get(i);e.flipFactor<0&&(t=1===t?3:1),_=" CFG="+t}const N=n.bonds.kwargs.has(i)?" "+n.bonds.kwargs.get(i):"";o[e.bondShift+i]=t.V3K_BEGIN_DATA_LINE+a+" "+n.bonds.bondTypes[i]+" "+s+" "+h+_+N+"\n"}}function s(n,o,e,i){let a=0,s=0;for(const t of n){if(""===t)continue;const n=o.get(t);a+=n.atoms.x.length,s+=n.bonds.bondTypes.length}if("PEPTIDE"===i)a+=1,s+=n.length;else{const i="DNA"===e?o.get(t.DEOXYRIBOSE):o.get(t.RIBOSE),h=o.get(t.PHOSPHATE);a+=(n.length-1)*h.atoms.x.length,a+=n.length*i.atoms.x.length,a+=1,s+=(n.length-1)*h.bonds.bondTypes.length,s+=n.length*i.bonds.bondTypes.length,s-=1,s+=3*n.length}return{atomCount:a,bondCount:s}}function h(n){return Math.round(t.PRECISION_FACTOR*n)/t.PRECISION_FACTOR}new RegExp("[rd]\\((\\w)\\)p?","g"),onmessage=t=>{const{monomerSequencesArray:o,monomersDict:e,alphabet:i,polymerType:a,start:s,end:h}=t.data,_=new Array(h-s),N=new Array(0);for(let t=s;t<h;++t)try{const h=o[t];_[t-s]=n(h,e,i,a)}catch(n){const o=`Cannot get molfile of row #${t}: ${n instanceof Error?n.message:n.toString()}.`;N.push(o)}postMessage({molfileList:_,molfileWarningList:N})},bio={}})();
|
|
2
|
-
//# sourceMappingURL=79.js.map
|