@datagrok/bio 2.12.21 → 2.12.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/dist/286.js +2 -0
- package/dist/286.js.map +1 -0
- package/dist/package-test.js +3 -3
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/files/samples/FASTA.csv +65 -65
- package/files/samples/HELM_50.csv +51 -51
- package/package.json +2 -2
- package/src/package.ts +9 -4
- package/src/tests/converters-test.ts +17 -5
- package/src/tests/renderers-test.ts +19 -2
- package/src/tests/to-atomic-level-tests.ts +9 -0
- package/src/utils/convert.ts +9 -9
- package/src/utils/helm-to-molfile/converter/simple-polymer.ts +10 -7
- package/src/utils/multiple-sequence-alignment-ui.ts +3 -11
- package/src/utils/multiple-sequence-alignment.ts +50 -3
- package/src/utils/pepsea.ts +13 -5
- package/dist/79.js +0 -2
- package/dist/79.js.map +0 -1
|
@@ -1,51 +1,51 @@
|
|
|
1
|
-
HELM,Activity
|
|
2
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Aze.dV.E.N.dV.Phe_4Me}$$$$,2.1058521
|
|
3
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.meM.D-Chg.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4416509
|
|
4
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.D-Cit.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0234375
|
|
5
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Dsu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0660219
|
|
6
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Tyr_tBu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.6578050
|
|
7
|
-
PEPTIDE1{aHyp.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.9036875
|
|
8
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.A.D-aThr.Phe_4Me}$$$$,3.8863654
|
|
9
|
-
PEPTIDE1{meI.hHis.Aca.N.T.Tyr_Me.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,4.6697459
|
|
10
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.A.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,2.7280300
|
|
11
|
-
"PEPTIDE1{meI.Pip.dK.Thr_PO3H2.[L-hArg(Et,Et)].D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$",4.4462886
|
|
12
|
-
PEPTIDE1{meI.hHis.D-Hyp.N.T.dK.Thr_PO3H2.Trp_Ome.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.3900189
|
|
13
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Phe_3Cl.meK.Phe_4Me}$$$$,3.6875632
|
|
14
|
-
PEPTIDE1{Gly_allyl.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$,6.1076937
|
|
15
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Nle.D-aThr.Phe_4Me}$$$$,3.2512414
|
|
16
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.dF.Phe_4Me}$$$$,7.2294617
|
|
17
|
-
PEPTIDE1{meI.hHis.Aca.N.T.D-Orn.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,0.6217819
|
|
18
|
-
PEPTIDE1{meI.Pip.dK.Thr_PO3H2.D-Thz.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.D-Thz.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4476070
|
|
19
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Nle.aIle.Phe_4Me}$$$$,4.9557114
|
|
20
|
-
PEPTIDE1{meY.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,1.3188239
|
|
21
|
-
PEPTIDE1{meI.Aca.N.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.6897125
|
|
22
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.hHis.D-aThr.Phe_4Me}$$$$,4.0970631
|
|
23
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.3Pal.D-aThr.Phe_4Me}$$$$,2.7782860
|
|
24
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.D-aThr.Phe_4Me}$$$$,4.9825664
|
|
25
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Nle.Phe_4Me}$$$$,4.0829563
|
|
26
|
-
PEPTIDE1{D-Nva.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.7203233
|
|
27
|
-
PEPTIDE1{meI.Thr_PO3H2.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Cys_SEt.N.Phe_3Cl.aIle.Phe_4Me}$$$$,0.7954721
|
|
28
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.seC.Tyr_ab-dehydroMe.meN.E.N.dV.Phe_4Me}$$$$,5.0775967
|
|
29
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.3Pal.D-aThr.Phe_4Me}$$$$,4.1724143
|
|
30
|
-
PEPTIDE1{D-Tic.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.dV.Phe_4Me}$$$$,3.1429222
|
|
31
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.dV.meF}$$$$,3.1327622
|
|
32
|
-
PEPTIDE1{meI.Aca.N.T.Ser_PO3H2.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,3.8640671
|
|
33
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.H.P.Phe_4Me}$$$$,4.1827374
|
|
34
|
-
PEPTIDE1{Phe_4Sdihydroorotamido.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.6165285
|
|
35
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Thz.Phe_4Me}$$$$,3.2189791
|
|
36
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.dV.E.N.H.D-aThr.Phe_4Me}$$$$,1.0362210
|
|
37
|
-
PEPTIDE1{meI.Aca.N.T.D-1Nal.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,3.8830254
|
|
38
|
-
PEPTIDE1{meI.Aca.N.T.meV.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,5.1701312
|
|
39
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Br.Phe_4Me}$$$$,3.1820068
|
|
40
|
-
PEPTIDE1{meI.Aca.N.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,4.4652672
|
|
41
|
-
PEPTIDE1{meI.Aca.Q.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,2.8669512
|
|
42
|
-
PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.aMePhe.Phe_4Me}$$$$,3.2571971
|
|
43
|
-
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.V.Phe_4Me}$$$$,4.4447875
|
|
44
|
-
PEPTIDE1{meI.Aca.Aca.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.Phe_4Me}$$$$,2.4899697
|
|
45
|
-
PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cya.N.F.Phe_4Me}$$$$,0.3957288
|
|
46
|
-
PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.F.aIle.Phe_4Me}$$$$,2.9058776
|
|
47
|
-
PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.dV.Phe_4Me}$$$$,2.1254258
|
|
48
|
-
PEPTIDE1{meI.Bux.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.Bmt.Phe_4Me}$$$$,1.7159123
|
|
49
|
-
PEPTIDE1{D-Tyr_Et.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,1.5285099
|
|
50
|
-
PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.dP.Tyr_ab-dehydroMe.dV.E.N.Bmt.Phe_4Me}$$$$,3.9470999
|
|
51
|
-
PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.Bmt.Phe_4Me}$$$$,3.7495575
|
|
1
|
+
DBID,HELM,Activity,Cluster
|
|
2
|
+
DBID55,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Aze.dV.E.N.dV.Phe_4Me}$$$$,2.1058521,1
|
|
3
|
+
DBID83,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.meM.D-Chg.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4416509,3
|
|
4
|
+
DBID02,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.D-Cit.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0234375,1
|
|
5
|
+
DBID05,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Dsu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.0660219,2
|
|
6
|
+
DBID34,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Tyr_tBu.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.6578050,4
|
|
7
|
+
DBID02,PEPTIDE1{aHyp.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.9036875,3
|
|
8
|
+
DBID64,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.A.D-aThr.Phe_4Me}$$$$,3.8863654,2
|
|
9
|
+
DBID75,PEPTIDE1{meI.hHis.Aca.N.T.Tyr_Me.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,4.6697459,2
|
|
10
|
+
DBID74,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.A.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,2.7280300,4
|
|
11
|
+
DBID60,"PEPTIDE1{meI.Pip.dK.Thr_PO3H2.[L-hArg(Et,Et)].D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$",4.4462886,2
|
|
12
|
+
DBID96,PEPTIDE1{meI.hHis.D-Hyp.N.T.dK.Thr_PO3H2.Trp_Ome.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.3900189,4
|
|
13
|
+
DBID56,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Phe_3Cl.meK.Phe_4Me}$$$$,3.6875632,4
|
|
14
|
+
DBID65,PEPTIDE1{Gly_allyl.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$,6.1076937,1
|
|
15
|
+
DBID45,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.Nle.D-aThr.Phe_4Me}$$$$,3.2512414,0
|
|
16
|
+
DBID13,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.dF.Phe_4Me}$$$$,7.2294617,0
|
|
17
|
+
DBID94,PEPTIDE1{meI.hHis.Aca.N.T.D-Orn.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,0.6217819,3
|
|
18
|
+
DBID79,PEPTIDE1{meI.Pip.dK.Thr_PO3H2.D-Thz.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.D-Thz.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.4476070,1
|
|
19
|
+
DBID21,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Nle.aIle.Phe_4Me}$$$$,4.9557114,1
|
|
20
|
+
DBID10,PEPTIDE1{meY.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.pnG.Phe_4Me}$$$$,1.3188239,1
|
|
21
|
+
DBID86,PEPTIDE1{meI.Aca.N.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,7.6897125,2
|
|
22
|
+
DBID52,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.hHis.D-aThr.Phe_4Me}$$$$,4.0970631,2
|
|
23
|
+
DBID27,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.3Pal.D-aThr.Phe_4Me}$$$$,2.7782860,3
|
|
24
|
+
DBID98,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.D-aThr.Phe_4Me}$$$$,4.9825664,2
|
|
25
|
+
DBID94,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Nle.Phe_4Me}$$$$,4.0829563,2
|
|
26
|
+
DBID86,PEPTIDE1{D-Nva.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.7203233,3
|
|
27
|
+
DBID48,PEPTIDE1{meI.Thr_PO3H2.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Cys_SEt.N.Phe_3Cl.aIle.Phe_4Me}$$$$,0.7954721,1
|
|
28
|
+
DBID22,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.seC.Tyr_ab-dehydroMe.meN.E.N.dV.Phe_4Me}$$$$,5.0775967,1
|
|
29
|
+
DBID54,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.3Pal.D-aThr.Phe_4Me}$$$$,4.1724143,3
|
|
30
|
+
DBID17,PEPTIDE1{D-Tic.Hcy.N.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.dV.Phe_4Me}$$$$,3.1429222,3
|
|
31
|
+
DBID76,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.meN.E.N.dV.meF}$$$$,3.1327622,0
|
|
32
|
+
DBID20,PEPTIDE1{meI.Aca.N.T.Ser_PO3H2.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,3.8640671,1
|
|
33
|
+
DBID01,PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.H.P.Phe_4Me}$$$$,4.1827374,3
|
|
34
|
+
DBID93,PEPTIDE1{Phe_4Sdihydroorotamido.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Me}$$$$,2.6165285,1
|
|
35
|
+
DBID89,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.D-Thz.Phe_4Me}$$$$,3.2189791,0
|
|
36
|
+
DBID30,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.dV.E.N.H.D-aThr.Phe_4Me}$$$$,1.0362210,3
|
|
37
|
+
DBID07,PEPTIDE1{meI.Aca.N.T.D-1Nal.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,3.8830254,4
|
|
38
|
+
DBID51,PEPTIDE1{meI.Aca.N.T.meV.Thr_PO3H2.Aca.D-Tyr_Et.Met_O2.D-Dap.Thr_PO3H2.N.H.D-aThr.Phe_4Me}$$$$,5.1701312,2
|
|
39
|
+
DBID95,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.pnG.Phe_4Br.Phe_4Me}$$$$,3.1820068,1
|
|
40
|
+
DBID47,PEPTIDE1{meI.Aca.N.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,4.4652672,1
|
|
41
|
+
DBID16,PEPTIDE1{meI.Aca.Q.T.W.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.dV.Phe_4Me}$$$$,2.8669512,4
|
|
42
|
+
DBID52,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.aMePhe.Phe_4Me}$$$$,3.2571971,1
|
|
43
|
+
DBID41,PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.V.Phe_4Me}$$$$,4.4447875,0
|
|
44
|
+
DBID78,PEPTIDE1{meI.Aca.Aca.T.dK.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.D-Dap.Thr_PO3H2.N.Thr_PO3H2.Phe_4Me}$$$$,2.4899697,4
|
|
45
|
+
DBID95,PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cya.N.F.Phe_4Me}$$$$,0.3957288,4
|
|
46
|
+
DBID92,PEPTIDE1{meI.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.F.aIle.Phe_4Me}$$$$,2.9058776,3
|
|
47
|
+
DBID88,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.dV.Phe_4Me}$$$$,2.1254258,0
|
|
48
|
+
DBID10,PEPTIDE1{meI.Bux.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Cys_SEt.N.Bmt.Phe_4Me}$$$$,1.7159123,0
|
|
49
|
+
DBID86,PEPTIDE1{D-Tyr_Et.hHis.Aca.Q.T.W.Q.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.dV.Phe_4Me}$$$$,1.5285099,0
|
|
50
|
+
DBID43,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.dP.Tyr_ab-dehydroMe.dV.E.N.Bmt.Phe_4Me}$$$$,3.9470999,3
|
|
51
|
+
DBID26,PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.pnG.N.Bmt.Phe_4Me}$$$$,3.7495575,4
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Aleksandr Tanas",
|
|
6
6
|
"email": "atanas@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.12.
|
|
8
|
+
"version": "2.12.22",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.41.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.41.8",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.2.5",
|
|
39
39
|
"@datagrok-libraries/math": "^1.1.5",
|
|
40
40
|
"@datagrok-libraries/ml": "^6.6.5",
|
package/src/package.ts
CHANGED
|
@@ -49,7 +49,8 @@ import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-c
|
|
|
49
49
|
import {demoBio03UI} from './demo/bio03-atomic-level';
|
|
50
50
|
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
51
51
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
52
|
-
import {MsaWarning
|
|
52
|
+
import {MsaWarning} from './utils/multiple-sequence-alignment';
|
|
53
|
+
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
53
54
|
import {WebLogoApp} from './apps/web-logo-app';
|
|
54
55
|
import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
|
|
55
56
|
import {splitToMonomersUI} from './utils/split-to-monomers';
|
|
@@ -560,7 +561,8 @@ export async function helmPreprocessingFunction(
|
|
|
560
561
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Column,
|
|
561
562
|
methodName: DimReductionMethods, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames,
|
|
562
563
|
plotEmbeddings: boolean, preprocessingFunction?: DG.Func, options?: (IUMAPOptions | ITSNEOptions) & Options,
|
|
563
|
-
clusterEmbeddings?: boolean
|
|
564
|
+
clusterEmbeddings?: boolean
|
|
565
|
+
): Promise<DG.ScatterPlotViewer | undefined> {
|
|
564
566
|
if (!checkInputColumnUI(molecules, 'Sequence Space'))
|
|
565
567
|
return;
|
|
566
568
|
if (!preprocessingFunction)
|
|
@@ -600,12 +602,15 @@ export async function toAtomicLevel(table: DG.DataFrame, seqCol: DG.Column, nonl
|
|
|
600
602
|
export function multipleSequenceAlignmentDialog(): void {
|
|
601
603
|
multipleSequenceAlignmentUI()
|
|
602
604
|
.catch((err: any) => {
|
|
603
|
-
const [errMsg,
|
|
605
|
+
const [errMsg, errStack] = errInfo(err);
|
|
604
606
|
if (err instanceof MsaWarning) {
|
|
607
|
+
grok.shell.warning((err as MsaWarning).element);
|
|
605
608
|
_package.logger.warning(errMsg);
|
|
606
609
|
return;
|
|
607
610
|
}
|
|
608
|
-
|
|
611
|
+
grok.shell.error(errMsg);
|
|
612
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
613
|
+
// throw err; // This error throw is not handled
|
|
609
614
|
});
|
|
610
615
|
}
|
|
611
616
|
|
|
@@ -2,13 +2,11 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
|
|
4
4
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
5
|
+
import {NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
5
7
|
|
|
6
8
|
import {ConverterFunc} from './types';
|
|
7
|
-
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
-
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
9
|
|
|
10
|
-
// import {mmSemType} from '../const';
|
|
11
|
-
// import {importFasta} from '../package';
|
|
12
10
|
|
|
13
11
|
category('converters', () => {
|
|
14
12
|
enum Samples {
|
|
@@ -116,6 +114,8 @@ RNA1{p.p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r(U)p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r
|
|
|
116
114
|
RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p.p}$$$$`,
|
|
117
115
|
};
|
|
118
116
|
|
|
117
|
+
const bioTagsSet = new Set<string>(Object.values(bioTAGS));
|
|
118
|
+
|
|
119
119
|
/** Also detects semantic types
|
|
120
120
|
* @param {string} key
|
|
121
121
|
* @return {Promise<DG.DataFrame>}
|
|
@@ -152,7 +152,19 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
|
|
|
152
152
|
const tgtCol: DG.Column = tgtDf.getCol('seq');
|
|
153
153
|
|
|
154
154
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
155
|
-
const
|
|
155
|
+
const srcSh: SeqHandler = SeqHandler.forColumn(srcCol);
|
|
156
|
+
const resSh: SeqHandler = SeqHandler.forColumn(resCol);
|
|
157
|
+
for (const [tagName, tgtTagValue] of Object.entries(tgtCol.tags)) {
|
|
158
|
+
if (
|
|
159
|
+
!bioTagsSet.has(tagName) ||
|
|
160
|
+
(srcSh.notation === NOTATION.HELM && [bioTAGS.alphabet, bioTAGS.alphabetIsMultichar].includes(tagName as bioTAGS)) ||
|
|
161
|
+
(resSh.notation === NOTATION.HELM && [bioTAGS.alphabet, bioTAGS.alphabetIsMultichar].includes(tagName as bioTAGS))
|
|
162
|
+
) continue;
|
|
163
|
+
|
|
164
|
+
const resTagValue = resCol.getTag(tagName);
|
|
165
|
+
expect(resTagValue, tgtTagValue,
|
|
166
|
+
`Tag '${tagName}' expected value '${tgtTagValue}' is not equal to actual '${resTagValue}'.`);
|
|
167
|
+
}
|
|
156
168
|
}
|
|
157
169
|
|
|
158
170
|
// FASTA tests
|
|
@@ -48,13 +48,17 @@ category('renderers', () => {
|
|
|
48
48
|
await _testAfterConvert();
|
|
49
49
|
});
|
|
50
50
|
|
|
51
|
+
test('afterConvertToHelm', async () => {
|
|
52
|
+
await _testAfterConvertToHelm();
|
|
53
|
+
});
|
|
54
|
+
|
|
51
55
|
test('selectRendererBySemType', async () => {
|
|
52
56
|
await _selectRendererBySemType();
|
|
53
57
|
});
|
|
54
58
|
|
|
55
59
|
test('scatterPlotTooltip', async () => {
|
|
56
60
|
await _testScatterPlotTooltip();
|
|
57
|
-
}
|
|
61
|
+
});
|
|
58
62
|
|
|
59
63
|
async function _rendererMacromoleculeFasta() {
|
|
60
64
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');
|
|
@@ -162,7 +166,7 @@ category('renderers', () => {
|
|
|
162
166
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA_PT.csv');
|
|
163
167
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
164
168
|
|
|
165
|
-
const srcCol: DG.Column = df.
|
|
169
|
+
const srcCol: DG.Column = df.getCol('sequence')!;
|
|
166
170
|
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
|
|
167
171
|
if (semType)
|
|
168
172
|
srcCol.semType = semType;
|
|
@@ -182,6 +186,19 @@ category('renderers', () => {
|
|
|
182
186
|
const _sh: SeqHandler = SeqHandler.forColumn(tgtCol);
|
|
183
187
|
}
|
|
184
188
|
|
|
189
|
+
async function _testAfterConvertToHelm() {
|
|
190
|
+
const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/FASTA_PT.csv');
|
|
191
|
+
const view = grok.shell.addTableView(df);
|
|
192
|
+
await awaitGrid(view.grid);
|
|
193
|
+
|
|
194
|
+
const srcCol = df.getCol('sequence');
|
|
195
|
+
const sh = SeqHandler.forColumn(srcCol);
|
|
196
|
+
const tgtCol = sh.convert(NOTATION.HELM);
|
|
197
|
+
df.columns.add(tgtCol);
|
|
198
|
+
await awaitGrid(view.grid);
|
|
199
|
+
expect(tgtCol.getTag(DG.TAGS.CELL_RENDERER), 'helm');
|
|
200
|
+
}
|
|
201
|
+
|
|
185
202
|
async function _selectRendererBySemType() {
|
|
186
203
|
/* There are renderers for semType Macromolecule and MacromoleculeDifference.
|
|
187
204
|
Misbehavior was by selecting Macromolecule renderers for MacromoleculeDifference semType column
|
|
@@ -100,6 +100,7 @@ category('toAtomicLevel', async () => {
|
|
|
100
100
|
fastaDna = 'fastaDna',
|
|
101
101
|
fastaRna = 'fastaRna',
|
|
102
102
|
fastaPt = 'fastaPt',
|
|
103
|
+
fastaUn = 'fastaUn',
|
|
103
104
|
|
|
104
105
|
separatorDna = 'separatorDna',
|
|
105
106
|
separatorRna = 'separatorRna',
|
|
@@ -122,6 +123,10 @@ UUCAACUUCAAC`,
|
|
|
122
123
|
FWPHEYFWPHEY
|
|
123
124
|
YNRQWYVYNRQWYV
|
|
124
125
|
MKPSEYVMKPSEYV`,
|
|
126
|
+
[csvTests.fastaUn]: `seq
|
|
127
|
+
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
|
|
128
|
+
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
129
|
+
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]`,
|
|
125
130
|
[csvTests.separatorDna]: `seq
|
|
126
131
|
A/C/G/T/C/A/C/G/T/C
|
|
127
132
|
C/A/G/T/G/T/C/A/G/T/G/T
|
|
@@ -169,6 +174,10 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
|
|
|
169
174
|
await _testToAtomicLevel(await readCsv(csvTests.fastaPt), 'seq', monomerLibHelper);
|
|
170
175
|
});
|
|
171
176
|
|
|
177
|
+
test('fastaUn', async () => {
|
|
178
|
+
await _testToAtomicLevel(await readCsv(csvTests.fastaUn), 'seq', monomerLibHelper);
|
|
179
|
+
});
|
|
180
|
+
|
|
172
181
|
test('separatorDna', async () => {
|
|
173
182
|
await _testToAtomicLevel(await readCsv(csvTests.separatorDna), 'seq', monomerLibHelper);
|
|
174
183
|
});
|
package/src/utils/convert.ts
CHANGED
|
@@ -18,10 +18,10 @@ let convertDialogSubs: Subscription[] = [];
|
|
|
18
18
|
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
19
19
|
*/
|
|
20
20
|
export function convert(col?: DG.Column): void {
|
|
21
|
-
let
|
|
22
|
-
if (!
|
|
21
|
+
let srcCol = col ?? grok.shell.t.columns.bySemType('Macromolecule')!;
|
|
22
|
+
if (!srcCol)
|
|
23
23
|
throw new Error('No column with Macromolecule semantic type found');
|
|
24
|
-
let converterSh = SeqHandler.forColumn(
|
|
24
|
+
let converterSh = SeqHandler.forColumn(srcCol);
|
|
25
25
|
let currentNotation: NOTATION = converterSh.notation;
|
|
26
26
|
const dialogHeader = ui.divText(
|
|
27
27
|
'Current notation: ' + currentNotation,
|
|
@@ -41,12 +41,12 @@ export function convert(col?: DG.Column): void {
|
|
|
41
41
|
];
|
|
42
42
|
const toggleColumn = (newCol: DG.Column) => {
|
|
43
43
|
if (newCol.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
44
|
-
targetColumnInput.value =
|
|
44
|
+
targetColumnInput.value = srcCol;
|
|
45
45
|
return;
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
-
|
|
49
|
-
converterSh = SeqHandler.forColumn(
|
|
48
|
+
srcCol = newCol;
|
|
49
|
+
converterSh = SeqHandler.forColumn(srcCol);
|
|
50
50
|
currentNotation = converterSh.notation;
|
|
51
51
|
if (currentNotation === NOTATION.HELM)
|
|
52
52
|
separatorInput.value = '/'; // helm monomers can have - in the name like D-aThr;
|
|
@@ -63,7 +63,7 @@ export function convert(col?: DG.Column): void {
|
|
|
63
63
|
]));
|
|
64
64
|
};
|
|
65
65
|
|
|
66
|
-
const targetColumnInput = ui.columnInput('Column', grok.shell.t,
|
|
66
|
+
const targetColumnInput = ui.columnInput('Column', grok.shell.t, srcCol, toggleColumn);
|
|
67
67
|
|
|
68
68
|
const separatorArray = ['-', '.', '/'];
|
|
69
69
|
let filteredNotations = notations.filter((e) => e !== currentNotation);
|
|
@@ -96,9 +96,9 @@ export function convert(col?: DG.Column): void {
|
|
|
96
96
|
]))
|
|
97
97
|
.onOK(async () => {
|
|
98
98
|
const targetNotation = targetNotationInput.value as NOTATION;
|
|
99
|
-
const separator: string | undefined = separatorInput.value
|
|
99
|
+
const separator: string | undefined = targetNotation === NOTATION.SEPARATOR ? separatorInput.value! : undefined;
|
|
100
100
|
|
|
101
|
-
await convertDo(
|
|
101
|
+
await convertDo(srcCol, targetNotation, separator);
|
|
102
102
|
})
|
|
103
103
|
.show({x: 350, y: 100});
|
|
104
104
|
|
|
@@ -47,14 +47,17 @@ export class SimplePolymer {
|
|
|
47
47
|
const monomerList: string[] = [];
|
|
48
48
|
const monomerTypeList: HELM_MONOMER_TYPE[] = [];
|
|
49
49
|
monomerGroups.forEach((monomerGroup) => {
|
|
50
|
-
const splitted = monomerGroup.split(/\(|\)/)
|
|
51
|
-
|
|
52
|
-
monomerList.push(...splitted);
|
|
50
|
+
// const splitted = monomerGroup.split(/\(|\)/).map((el) => el.replace(/[\[\]]/g, ''));
|
|
51
|
+
// monomerList.push(...splitted);
|
|
53
52
|
// WARNING: only the groups of the form r(A)p, as in RNA, are supported
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
53
|
+
|
|
54
|
+
monomerList.push(monomerGroup);
|
|
55
|
+
// const monomerTypes = splitted.map(
|
|
56
|
+
// (_, idx) => (idx % 2 === 0) ? HELM_MONOMER_TYPE.BACKBONE : HELM_MONOMER_TYPE.BRANCH
|
|
57
|
+
// );
|
|
58
|
+
|
|
59
|
+
// monomerTypeList.push(...monomerTypes);
|
|
60
|
+
monomerTypeList.push(HELM_MONOMER_TYPE.BACKBONE);
|
|
58
61
|
});
|
|
59
62
|
return {monomers: monomerList, monomerTypes: monomerTypeList};
|
|
60
63
|
}
|
|
@@ -7,7 +7,7 @@ import {delay} from '@datagrok-libraries/utils/src/test';
|
|
|
7
7
|
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
8
|
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
9
|
|
|
10
|
-
import {runKalign} from './multiple-sequence-alignment';
|
|
10
|
+
import {MsaWarning, runKalign} from './multiple-sequence-alignment';
|
|
11
11
|
import {pepseaMethods, runPepsea} from './pepsea';
|
|
12
12
|
import {checkInputColumnUI} from './check-input-column';
|
|
13
13
|
import {multipleSequenceAlginmentUIOptions} from './types';
|
|
@@ -18,12 +18,6 @@ import {_package} from '../package';
|
|
|
18
18
|
|
|
19
19
|
import '../../css/msa.css';
|
|
20
20
|
|
|
21
|
-
export class MsaWarning extends Error {
|
|
22
|
-
constructor(message: string, options?: ErrorOptions) {
|
|
23
|
-
super(message, options);
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
|
|
27
21
|
export async function multipleSequenceAlignmentUI(
|
|
28
22
|
options: multipleSequenceAlginmentUIOptions = {},
|
|
29
23
|
): Promise<DG.Column> {
|
|
@@ -37,9 +31,9 @@ export async function multipleSequenceAlignmentUI(
|
|
|
37
31
|
const table = options.col?.dataFrame ?? grok.shell.t;
|
|
38
32
|
const seqCol = options.col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
39
33
|
if (seqCol == null) {
|
|
40
|
-
const errMsg: string = `Multiple
|
|
34
|
+
const errMsg: string = `Multiple Sequence Alignment analysis requires a dataset with a macromolecule column.`;
|
|
41
35
|
grok.shell.warning(errMsg);
|
|
42
|
-
reject(new MsaWarning(errMsg));
|
|
36
|
+
reject(new MsaWarning(ui.divText(errMsg)));
|
|
43
37
|
return; // Prevents creating the MSA dialog
|
|
44
38
|
}
|
|
45
39
|
|
|
@@ -145,8 +139,6 @@ async function onDialogOk(
|
|
|
145
139
|
|
|
146
140
|
resolve(msaCol);
|
|
147
141
|
} catch (err: any) {
|
|
148
|
-
const errMsg: string = err instanceof Error ? err.message : err.toString();
|
|
149
|
-
grok.shell.error(errMsg);
|
|
150
142
|
reject(err);
|
|
151
143
|
} finally {
|
|
152
144
|
pi.close();
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
2
3
|
import * as DG from 'datagrok-api/dg';
|
|
3
4
|
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
4
7
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
5
8
|
import {ALIGNMENT, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
9
|
//@ts-ignore: there are no types for this library
|
|
@@ -8,9 +11,17 @@ import Aioli from '@biowasm/aioli';
|
|
|
8
11
|
|
|
9
12
|
import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
|
|
10
13
|
import {kalignVersion} from './constants';
|
|
14
|
+
|
|
11
15
|
const fastaInputFilename = 'input.fa';
|
|
12
16
|
const fastaOutputFilename = 'result.fasta';
|
|
13
17
|
|
|
18
|
+
export class MsaWarning extends Error {
|
|
19
|
+
constructor(
|
|
20
|
+
public readonly element: HTMLElement, options?: ErrorOptions) {
|
|
21
|
+
super(element.innerText, options);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
14
25
|
/**
|
|
15
26
|
* Converts array of sequences into simple fasta string.
|
|
16
27
|
*
|
|
@@ -57,6 +68,7 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
|
|
|
57
68
|
(fastaSequences[clusterCategoryIdx] ??= []).push(sequences[rowIdx]);
|
|
58
69
|
(clusterIndexes[clusterCategoryIdx] ??= []).push(rowIdx);
|
|
59
70
|
}
|
|
71
|
+
checkForSingleSeqClusters(clusterIndexes, clustersColCategories);
|
|
60
72
|
|
|
61
73
|
const CLI = await new Aioli([
|
|
62
74
|
'base/1.0.0',
|
|
@@ -78,8 +90,10 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
|
|
|
78
90
|
console.warn(output);
|
|
79
91
|
|
|
80
92
|
const buf = await CLI.cat(fastaOutputFilename);
|
|
81
|
-
if (!buf)
|
|
82
|
-
|
|
93
|
+
if (!buf) {
|
|
94
|
+
const errStr = parseKalignError(output, 1);
|
|
95
|
+
throw new Error(errStr);
|
|
96
|
+
}
|
|
83
97
|
|
|
84
98
|
const ffh = new FastaFileHandler(buf);
|
|
85
99
|
const aligned = ffh.sequencesArray; // array of sequences extracted from FASTA
|
|
@@ -115,3 +129,36 @@ export async function testMSAEnoughMemory(col: DG.Column<string>): Promise<void>
|
|
|
115
129
|
}
|
|
116
130
|
}
|
|
117
131
|
}
|
|
132
|
+
|
|
133
|
+
function parseKalignError(out: string, limit?: number): string {
|
|
134
|
+
const errLineList: string[] = [];
|
|
135
|
+
const errLineRe = /^.+ERROR : (.+)$/gm;
|
|
136
|
+
let ma: RegExpExecArray | null;
|
|
137
|
+
while ((ma = errLineRe.exec(out)) != null && (limit === undefined || errLineList.length < limit)) {
|
|
138
|
+
//
|
|
139
|
+
errLineList.push(ma[1]);
|
|
140
|
+
}
|
|
141
|
+
return errLineList.join('\n');
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/** */
|
|
145
|
+
export function checkForSingleSeqClusters(clusterIndexes: number[][], clustersColCategories: string[]): void {
|
|
146
|
+
const singleSeqClusterIdxList = clusterIndexes
|
|
147
|
+
.map<[number[], number]>((idxs: number[], clusterI: number) => { return [idxs, clusterI]; })
|
|
148
|
+
.filter(([idxs, _clusterIdx]) => idxs.length == 1)
|
|
149
|
+
.map(([_idxs, clusterIdx]) => clusterIdx);
|
|
150
|
+
if (singleSeqClusterIdxList.length > 0) {
|
|
151
|
+
const errEl = ui.div([
|
|
152
|
+
ui.divText(`MSA analysis is not available on single sequence clusters ` +
|
|
153
|
+
`#${singleSeqClusterIdxList.length}:`),
|
|
154
|
+
...wu(singleSeqClusterIdxList).take(3)
|
|
155
|
+
.map((clusterIdx) => {
|
|
156
|
+
let clusterName = clustersColCategories[clusterIdx];
|
|
157
|
+
if (clusterName.length > 25) clusterName = clusterName.slice(0, 25) + '...';
|
|
158
|
+
return ui.divText(`"${clusterName}"${clusterIdx < singleSeqClusterIdxList.length - 1 ? ', ' : '.'}`);
|
|
159
|
+
}).toArray(),
|
|
160
|
+
...singleSeqClusterIdxList.length > 3 ? [ui.divText('...')] : []
|
|
161
|
+
]);
|
|
162
|
+
throw new MsaWarning(errEl);
|
|
163
|
+
}
|
|
164
|
+
}
|
package/src/utils/pepsea.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
4
|
import * as DG from 'datagrok-api/dg';
|
|
4
5
|
|
|
5
6
|
import {Subject} from 'rxjs';
|
|
@@ -8,6 +9,7 @@ import {testEvent} from '@datagrok-libraries/utils/src/test';
|
|
|
8
9
|
import {NOTATION, TAGS as bioTAGS, ALIGNMENT, ALPHABET} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
9
10
|
import {ILogger} from '@datagrok-libraries/bio/src/utils/logger';
|
|
10
11
|
|
|
12
|
+
import {checkForSingleSeqClusters} from './multiple-sequence-alignment';
|
|
11
13
|
import * as C from './constants';
|
|
12
14
|
|
|
13
15
|
import {_package} from '../package';
|
|
@@ -54,20 +56,26 @@ export async function runPepsea(srcCol: DG.Column<string>, unUsedName: string,
|
|
|
54
56
|
if (clustersCol.type != DG.COLUMN_TYPE.STRING)
|
|
55
57
|
clustersCol = clustersCol.convertTo(DG.TYPE.STRING);
|
|
56
58
|
|
|
57
|
-
const
|
|
58
|
-
const
|
|
59
|
+
const clustersColCategories = clustersCol.categories;
|
|
60
|
+
const clustersColData = clustersCol.getRawData();
|
|
61
|
+
const bodies: PepseaBodyUnit[][] = new Array(clustersColCategories.length);
|
|
62
|
+
const clusterIndexes: number[][] = new Array(clustersColCategories.length);
|
|
59
63
|
|
|
60
64
|
// Grouping data by clusters
|
|
61
65
|
for (let rowIndex = 0; rowIndex < peptideCount; ++rowIndex) {
|
|
62
|
-
const
|
|
66
|
+
const clusterCategoryIdx = clustersColData[rowIndex];
|
|
67
|
+
const cluster = clustersColCategories[clusterCategoryIdx];
|
|
63
68
|
if (cluster === '')
|
|
64
69
|
continue;
|
|
65
70
|
|
|
66
|
-
const clusterId =
|
|
71
|
+
const clusterId = clustersColCategories.indexOf(cluster);
|
|
67
72
|
const helmSeq = srcCol.get(rowIndex);
|
|
68
|
-
if (helmSeq)
|
|
73
|
+
if (helmSeq) {
|
|
69
74
|
(bodies[clusterId] ??= []).push({ID: rowIndex.toString(), HELM: helmSeq});
|
|
75
|
+
(clusterIndexes[clusterCategoryIdx] ??= []).push(rowIndex);
|
|
76
|
+
}
|
|
70
77
|
}
|
|
78
|
+
checkForSingleSeqClusters(clusterIndexes, clustersColCategories);
|
|
71
79
|
|
|
72
80
|
const alignedSequences: string[] = new Array(peptideCount);
|
|
73
81
|
for (const body of bodies) { // getting aligned sequences for each cluster
|
package/dist/79.js
DELETED
|
@@ -1,2 +0,0 @@
|
|
|
1
|
-
var bio;(()=>{"use strict";const t={V2K_RGP_SHIFT:8,V2K_RGP_LINE:"M RGP",V2K_A_LINE:"A ",V3K_COUNTS_SHIFT:14,V3K_IDX_SHIFT:7,V3K_HEADER_FIRST_LINE:"\nDatagrok macromolecule handler\n\n",V3K_HEADER_SECOND_LINE:" 0 0 0 0 0 0 999 V3000\n",V3K_BEGIN_CTAB_BLOCK:"M V30 BEGIN CTAB\n",V3K_END_CTAB_BLOCK:"M V30 END CTAB\n",V3K_BEGIN_COUNTS_LINE:"M V30 COUNTS ",V3K_COUNTS_LINE_ENDING:" 0 0 0\n",V3K_BEGIN_ATOM_BLOCK:"M V30 BEGIN ATOM\n",V3K_END_ATOM_BLOCK:"M V30 END ATOM\n",V3K_BEGIN_BOND_BLOCK:"M V30 BEGIN BOND\n",V3K_END_BOND_BLOCK:"M V30 END BOND\n",V3K_BOND_CONFIG:" CFG=",V3K_BEGIN_DATA_LINE:"M V30 ",V3K_END:"M END",PRECISION_FACTOR:1e4,DEOXYRIBOSE:"d",RIBOSE:"r",PHOSPHATE:"p",OXYGEN:"O",HYDROGEN:"H"};function n(n,e,a,_){if(0===n.length)return"";const N=s,{atomCount:E,bondCount:f}=N(n,e,a,_),r=new Array(E),c=new Array(f);let l,b=null,S=null;"PEPTIDE"===_?l=o:(l=i,b="DNA"===a?e.get(t.DEOXYRIBOSE):e.get(t.RIBOSE),S=e.get(t.PHOSPHATE));const d={i:0,nodeShift:0,bondShift:0,backbonePositionShift:new Array(2).fill(0),branchPositionShift:new Array(2).fill(0),backboneAttachNode:0,branchAttachNode:0,flipFactor:1},I={sugar:b,phosphate:S,seqLength:n.length,atomCount:E,bondCount:f},A=[];let O=0;for(d.i=0;d.i<I.seqLength;++d.i){const t=e.get(n[d.i]);l(t,r,c,d,I),t.stereoAtoms?.forEach((t=>A.push(t+O))),O+=t.atoms.x.length}!function(n,o,e,i){const a=e.nodeShift+1;n[i.atomCount]=t.V3K_BEGIN_DATA_LINE+a+" "+t.OXYGEN+" "+h(e.backbonePositionShift[0])+" "+e.flipFactor*h(e.backbonePositionShift[1])+" 0.000000 0\n";const s=e.backboneAttachNode,_=a;o[i.bondCount]=t.V3K_BEGIN_DATA_LINE+e.bondShift+" 1 "+s+" "+_+"\n"}(r,c,d,I);const m=t.V3K_BEGIN_COUNTS_LINE+E+" "+f+t.V3K_COUNTS_LINE_ENDING;let g="";return g+=t.V3K_HEADER_FIRST_LINE,g+=t.V3K_HEADER_SECOND_LINE,g+=t.V3K_BEGIN_CTAB_BLOCK,g+=m,g+=t.V3K_BEGIN_ATOM_BLOCK,g+=r.join(""),g+=t.V3K_END_ATOM_BLOCK,g+=t.V3K_BEGIN_BOND_BLOCK,g+=c.join(""),g+=t.V3K_END_BOND_BLOCK,A.length>0&&(g+=function(t){const n=[];let o=`M V30 MDLV30/STEABS ATOMS=(${t.length}`;for(let e=0;e<t.length;e++){const i=`${o} ${t[e]}`;i.length>76?(n.push(`${o} -\n`),o=`M V30 ${t[e]}`):o=i,e===t.length-1&&n.push(`${o})\n`)}return`M V30 BEGIN COLLECTION\n${n.join("")}M V30 END COLLECTION\n`}(A)),g+=t.V3K_END_CTAB_BLOCK,g+=t.V3K_END,g}function o(t,n,o,i){i.flipFactor=(-1)**(i.i%2),e(t,n,o,i)}function e(n,o,e,i){!function(n,o,e){for(let i=0;i<n.atoms.atomTypes.length;++i){const a=e.nodeShift+i+1;o[e.nodeShift+i]=t.V3K_BEGIN_DATA_LINE+a+" "+n.atoms.atomTypes[i]+" "+h(e.backbonePositionShift[0]+n.atoms.x[i])+" "+h(e.backbonePositionShift[1]+e.flipFactor*n.atoms.y[i])+" "+n.atoms.kwargs[i]}}(n,o,i),a(n,e,i),function(n,o,e){if(0!==e.backboneAttachNode){const i=e.bondShift,a=e.backboneAttachNode,s=n.meta.terminalNodes[0]+e.nodeShift;o[e.bondShift-1]=t.V3K_BEGIN_DATA_LINE+i+" 1 "+a+" "+s+"\n"}}(n,e,i),null!==n.meta.branchShift&&n.meta.terminalNodes.length>2&&function(t,n){n.branchAttachNode=n.nodeShift+t.meta.terminalNodes[2];for(let o=0;o<2;++o)n.branchPositionShift[o]=n.backbonePositionShift[o]+t.meta.branchShift[o]}(n,i),function(t,n){n.backboneAttachNode=n.nodeShift+t.meta.terminalNodes[1],n.bondShift+=t.bonds.atomPairs.length+1,n.nodeShift+=t.atoms.atomTypes.length,n.backbonePositionShift[0]+=t.meta.backboneShift[0],n.backbonePositionShift[1]+=n.flipFactor*t.meta.backboneShift[1]}(n,i)}function i(n,o,i,s,_){if(0===s.i)e(_.sugar,o,i,s);else for(const t of[_.phosphate,_.sugar])e(t,o,i,s);!function(n,o,e,i){(function(n,o,e){for(let i=0;i<n.atoms.atomTypes.length;++i){const a=e.nodeShift+i+1;o[e.nodeShift+i]=t.V3K_BEGIN_DATA_LINE+a+" "+n.atoms.atomTypes[i]+" "+h(e.branchPositionShift[0]+n.atoms.x[i])+" "+h(e.branchPositionShift[1]+e.flipFactor*n.atoms.y[i])+" "+n.atoms.kwargs[i]}})(n,o,i),a(n,e,i),function(n,o,e){const i=e.bondShift,a=e.branchAttachNode,s=n.meta.terminalNodes[0]+e.nodeShift;o[i-1]=t.V3K_BEGIN_DATA_LINE+i+" 1 "+a+" "+s+"\n"}(n,e,i);const s=i.bondShift,_=i.branchAttachNode,N=n.meta.terminalNodes[0]+i.nodeShift;e[s-1]=t.V3K_BEGIN_DATA_LINE+s+" 1 "+_+" "+N+"\n",i.bondShift+=n.bonds.atomPairs.length+1,i.nodeShift+=n.atoms.atomTypes.length}(n,o,i,s)}function a(n,o,e){for(let i=0;i<n.bonds.atomPairs.length;++i){const a=e.bondShift+i+1,s=n.bonds.atomPairs[i][0]+e.nodeShift,h=n.bonds.atomPairs[i][1]+e.nodeShift;let _="";if(n.bonds.bondConfiguration.has(i)){let t=n.bonds.bondConfiguration.get(i);e.flipFactor<0&&(t=1===t?3:1),_=" CFG="+t}const N=n.bonds.kwargs.has(i)?" "+n.bonds.kwargs.get(i):"";o[e.bondShift+i]=t.V3K_BEGIN_DATA_LINE+a+" "+n.bonds.bondTypes[i]+" "+s+" "+h+_+N+"\n"}}function s(n,o,e,i){let a=0,s=0;for(const t of n){if(""===t)continue;const n=o.get(t);a+=n.atoms.x.length,s+=n.bonds.bondTypes.length}if("PEPTIDE"===i)a+=1,s+=n.length;else{const i="DNA"===e?o.get(t.DEOXYRIBOSE):o.get(t.RIBOSE),h=o.get(t.PHOSPHATE);a+=(n.length-1)*h.atoms.x.length,a+=n.length*i.atoms.x.length,a+=1,s+=(n.length-1)*h.bonds.bondTypes.length,s+=n.length*i.bonds.bondTypes.length,s-=1,s+=3*n.length}return{atomCount:a,bondCount:s}}function h(n){return Math.round(t.PRECISION_FACTOR*n)/t.PRECISION_FACTOR}new RegExp("[rd]\\((\\w)\\)p?","g"),onmessage=t=>{const{monomerSequencesArray:o,monomersDict:e,alphabet:i,polymerType:a,start:s,end:h}=t.data,_=new Array(h-s),N=new Array(0);for(let t=s;t<h;++t)try{const h=o[t];_[t-s]=n(h,e,i,a)}catch(n){const o=`Cannot get molfile of row #${t}: ${n instanceof Error?n.message:n.toString()}.`;N.push(o)}postMessage({molfileList:_,molfileWarningList:N})},bio={}})();
|
|
2
|
-
//# sourceMappingURL=79.js.map
|
package/dist/79.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"79.js","mappings":"2BAAO,MAAMA,EAAqB,CAE9BC,cAAe,EACfC,aAAc,SACdC,WAAY,MAEZC,iBAAkB,GAClBC,cAAe,EACfC,sBAAuB,uCACvBC,uBAAwB,4CACxBC,qBAAsB,sBACtBC,mBAAoB,oBACpBC,sBAAuB,iBACvBC,uBAAwB,WACxBC,qBAAsB,sBACtBC,mBAAoB,oBACpBC,qBAAsB,sBACtBC,mBAAoB,oBACpBC,gBAAiB,QACjBC,oBAAqB,UACrBC,QAAS,SACTC,iBAAkB,IAElBC,YAAa,IACbC,OAAQ,IACRC,UAAW,IACXC,OAAQ,IACRC,SAAU,KCOP,SAASC,EAAoBC,EAAYC,EAAcC,EAAUC,GACpE,GAA0B,IAAtBH,EAAWI,OAEX,MAAO,GAGX,MAAMC,EAAuBC,GACvB,UAAEC,EAAS,UAAEC,GAAcH,EAAqBL,EAAYC,EAAcC,EAAUC,GAEpFM,EAAmB,IAAIC,MAAMH,GAC7BI,EAAmB,IAAID,MAAMF,GACnC,IAAII,EACAC,EAAQ,KACRC,EAAY,KACI,YAAhBX,EACAS,EAAuBG,GAGvBH,EAAuBI,EACvBH,EAAsB,QAAbX,EAAyCD,EAAagB,IAAI,EAAEvB,aAAeO,EAAagB,IAAI,EAAEtB,QACvGmB,EAAYb,EAAagB,IAAI,EAAErB,YAEnC,MAAMsB,EAAI,CACNC,EAAG,EACHC,UAAW,EACXC,UAAW,EACXC,sBAAuB,IAAIZ,MAAM,GAAGa,KAAK,GACzCC,oBAAqB,IAAId,MAAM,GAAGa,KAAK,GACvCE,mBAAoB,EACpBC,iBAAkB,EAClBC,WAAY,GAEVC,EAAK,CACPf,MAAOA,EACPC,UAAWA,EACXe,UAAW7B,EAAWI,OACtBG,UAAWA,EACXC,UAAWA,GAETsB,EAAmB,GACzB,IAAIC,EAAS,EACb,IAAKb,EAAEC,EAAI,EAAGD,EAAEC,EAAIS,EAAGC,YAAaX,EAAEC,EAAG,CACrC,MAAMa,EAAU/B,EAAagB,IAAIjB,EAAWkB,EAAEC,IAC9CP,EAAqBoB,EAASvB,EAAkBE,EAAkBO,EAAGU,GAErEI,EAAQC,aAAaC,SAASf,GAAMW,EAAiBK,KAAKhB,EAAIY,KAC9DA,GAAUC,EAAQI,MAAMC,EAAEjC,MAC9B,EAiDJ,SAA8BK,EAAkBE,EAAkBO,EAAGU,GAEjE,MAAMU,EAAUpB,EAAEE,UAAY,EAC9BX,EAAiBmB,EAAGrB,WAAa,EAAEhB,oBAAsB+C,EAAU,IAC/D,EAAEzC,OAAS,IAAM0C,EAAcrB,EAAEI,sBAAsB,IAAM,IAC7DJ,EAAES,WAAaY,EAAcrB,EAAEI,sBAAsB,IAFxB,gBAIjC,MAAMkB,EAAYtB,EAAEO,mBACdgB,EAAaH,EACnB3B,EAAiBiB,EAAGpB,WAAa,EAAEjB,oBAAsB2B,EAAEG,UAA1B,MACnBmB,EAAY,IAAMC,EAAa,IACjD,CA3DIC,CAAqBjC,EAAkBE,EAAkBO,EAAGU,GAC5D,MAAMe,EAAoB,EAAE3D,sBAAwBuB,EAAY,IAAMC,EAAY,EAAEvB,uBAIpF,IAAI2D,EAAS,GAgBb,OAfAA,GAAU,EAAEhE,sBACZgE,GAAU,EAAE/D,uBACZ+D,GAAU,EAAE9D,qBACZ8D,GAAUD,EACVC,GAAU,EAAE1D,qBACZ0D,GAAUnC,EAAiBoC,KAAK,IAChCD,GAAU,EAAEzD,mBACZyD,GAAU,EAAExD,qBACZwD,GAAUjC,EAAiBkC,KAAK,IAChCD,GAAU,EAAEvD,mBACRyC,EAAiB1B,OAAS,IAC1BwC,GAMR,SAA4BE,GAGxB,MACMC,EAAY,GAClB,IAAIC,EAAmB,+BAA+BF,EAAW1C,SACjE,IAAK,IAAIe,EAAI,EAAGA,EAAI2B,EAAW1C,OAAQe,IAAK,CACxC,MAAM8B,EAAa,GAAGD,KAAoBF,EAAW3B,KACjD8B,EAAW7C,OALA,IAMX2C,EAAUZ,KAAK,GAAGa,SAClBA,EAAmB,UAAUF,EAAW3B,MAGxC6B,EAAmBC,EAEnB9B,IAAM2B,EAAW1C,OAAS,GAC1B2C,EAAUZ,KAAK,GAAGa,OAC1B,CACA,MAAO,4BAA4BD,EAAUF,KAAK,4BACtD,CAzBkBK,CAAmBpB,IACjCc,GAAU,EAAE7D,mBACZ6D,GAAU,EAAEpD,QAELoD,CACX,CAsCA,SAAS7B,EAAuBiB,EAASvB,EAAkBE,EAAkBO,GACzEA,EAAES,aAAe,KAAOT,EAAEC,EAAI,GAC9BgC,EAA6BnB,EAASvB,EAAkBE,EAAkBO,EAC9E,CACA,SAASiC,EAA6BnB,EAASvB,EAAkBE,EAAkBO,IAoDnF,SAAuBc,EAASvB,EAAkBS,GAC9C,IAAK,IAAIkC,EAAI,EAAGA,EAAIpB,EAAQI,MAAMiB,UAAUjD,SAAUgD,EAAG,CACrD,MAAMd,EAAUpB,EAAEE,UAAYgC,EAAI,EAClC3C,EAAiBS,EAAEE,UAAYgC,GAAK,EAAE7D,oBAAsB+C,EAAU,IAClEN,EAAQI,MAAMiB,UAAUD,GAAK,IAC7Bb,EAAcrB,EAAEI,sBAAsB,GAAKU,EAAQI,MAAMC,EAAEe,IAAM,IACjEb,EAAcrB,EAAEI,sBAAsB,GAAKJ,EAAES,WAAaK,EAAQI,MAAMkB,EAAEF,IAC1E,IAAMpB,EAAQI,MAAMmB,OAAOH,EACnC,CACJ,CA1DII,CAAcxB,EAASvB,EAAkBS,GAEzCuC,EAAczB,EAASrB,EAAkBO,GAyF7C,SAAgCc,EAASrB,EAAkBO,GACvD,GAA6B,IAAzBA,EAAEO,mBAA0B,CAC5B,MAAMiC,EAAUxC,EAAEG,UACZmB,EAAYtB,EAAEO,mBACdgB,EAAaT,EAAQ2B,KAAKC,cAAc,GAAK1C,EAAEE,UACrDT,EAAiBO,EAAEG,UAAY,GAAK,EAAE9B,oBAAsBmE,EAAxB,MACtBlB,EAAY,IAAMC,EAAa,IACjD,CACJ,CA/FIoB,CAAuB7B,EAASrB,EAAkBO,GAEjB,OAA7Bc,EAAQ2B,KAAKG,aAAwB9B,EAAQ2B,KAAKC,cAAcxD,OAAS,GAsCjF,SAA+B4B,EAASd,GACpCA,EAAEQ,iBAAmBR,EAAEE,UAAYY,EAAQ2B,KAAKC,cAAc,GAC9D,IAAK,IAAIzC,EAAI,EAAGA,EAAI,IAAKA,EACrBD,EAAEM,oBAAoBL,GAAKD,EAAEI,sBAAsBH,GAAKa,EAAQ2B,KAAKG,YAAY3C,EACzF,CAzCQ4C,CAAsB/B,EAASd,GA8BvC,SAAuCc,EAASd,GAC5CA,EAAEO,mBAAqBP,EAAEE,UAAYY,EAAQ2B,KAAKC,cAAc,GAChE1C,EAAEG,WAAaW,EAAQgC,MAAMC,UAAU7D,OAAS,EAChDc,EAAEE,WAAaY,EAAQI,MAAMiB,UAAUjD,OACvCc,EAAEI,sBAAsB,IAAMU,EAAQ2B,KAAKO,cAAc,GACzDhD,EAAEI,sBAAsB,IAAMJ,EAAES,WAAaK,EAAQ2B,KAAKO,cAAc,EAC5E,CAlCIC,CAA8BnC,EAASd,EAC3C,CACA,SAASF,EAAwBoD,EAAY3D,EAAkBE,EAAkBO,EAAGU,GAGhF,GAAY,IAARV,EAAEC,EACFgC,EAA6BvB,EAAGf,MAAOJ,EAAkBE,EAAkBO,QAG3E,IAAK,MAAMc,IAAW,CAACJ,EAAGd,UAAWc,EAAGf,OACpCsC,EAA6BnB,EAASvB,EAAkBE,EAAkBO,IAItF,SAAoCc,EAASvB,EAAkBE,EAAkBO,IAqCjF,SAA6Bc,EAASvB,EAAkBS,GACpD,IAAK,IAAIkC,EAAI,EAAGA,EAAIpB,EAAQI,MAAMiB,UAAUjD,SAAUgD,EAAG,CACrD,MAAMd,EAAUpB,EAAEE,UAAYgC,EAAI,EAClC3C,EAAiBS,EAAEE,UAAYgC,GAAK,EAAE7D,oBAAsB+C,EAAU,IAClEN,EAAQI,MAAMiB,UAAUD,GAAK,IAC7Bb,EAAcrB,EAAEM,oBAAoB,GAAKQ,EAAQI,MAAMC,EAAEe,IAAM,IAC/Db,EAAcrB,EAAEM,oBAAoB,GAAKN,EAAES,WAAaK,EAAQI,MAAMkB,EAAEF,IACxE,IAAMpB,EAAQI,MAAMmB,OAAOH,EACnC,CACJ,EA7CIiB,CAAoBrC,EAASvB,EAAkBS,GAC/CuC,EAAczB,EAASrB,EAAkBO,GA4E7C,SAAkCoD,EAAe3D,EAAkBO,GAC/D,MAAMwC,EAAUxC,EAAEG,UACZmB,EAAYtB,EAAEQ,iBACde,EAAa6B,EAAcX,KAAKC,cAAc,GAAK1C,EAAEE,UAC3DT,EAAiB+C,EAAU,GAAK,EAAEnE,oBAAsBmE,EAAxB,MAClBlB,EAAY,IAAMC,EAAa,IACjD,CAjFI8B,CAAyBvC,EAASrB,EAAkBO,GAEpD,MAAMwC,EAAUxC,EAAEG,UACZmB,EAAYtB,EAAEQ,iBACde,EAAaT,EAAQ2B,KAAKC,cAAc,GAAK1C,EAAEE,UACrDT,EAAiB+C,EAAU,GAAK,EAAEnE,oBAAsBmE,EAAxB,MAClBlB,EAAY,IAAMC,EAAa,KAE7CvB,EAAEG,WAAaW,EAAQgC,MAAMC,UAAU7D,OAAS,EAChDc,EAAEE,WAAaY,EAAQI,MAAMiB,UAAUjD,MAC3C,CAfIoE,CAA2BJ,EAAY3D,EAAkBE,EAAkBO,EAC/E,CAgDA,SAASuC,EAAczB,EAASrB,EAAkBO,GAE9C,IAAK,IAAIkC,EAAI,EAAGA,EAAIpB,EAAQgC,MAAMC,UAAU7D,SAAUgD,EAAG,CACrD,MAAMM,EAAUxC,EAAEG,UAAY+B,EAAI,EAC5BZ,EAAYR,EAAQgC,MAAMC,UAAUb,GAAG,GAAKlC,EAAEE,UAC9CqB,EAAaT,EAAQgC,MAAMC,UAAUb,GAAG,GAAKlC,EAAEE,UACrD,IAAIqD,EAAU,GACd,GAAIzC,EAAQgC,MAAMU,kBAAkBC,IAAIvB,GAAI,CAExC,IAAIwB,EAAc5C,EAAQgC,MAAMU,kBAAkBzD,IAAImC,GAClDlC,EAAES,WAAa,IACfiD,EAA+B,IAAhBA,EAAqB,EAAI,GAC5CH,EAAU,QAAUG,CACxB,CACA,MAAMrB,EAASvB,EAAQgC,MAAMT,OAAOoB,IAAIvB,GACpC,IAAMpB,EAAQgC,MAAMT,OAAOtC,IAAImC,GAAK,GACxCzC,EAAiBO,EAAEG,UAAY+B,GAAK,EAAE7D,oBAAsBmE,EAAU,IAClE1B,EAAQgC,MAAMa,UAAUzB,GAAK,IAC7BZ,EAAY,IAAMC,EAAagC,EAAUlB,EAAS,IAC1D,CACJ,CAyBA,SAASjD,EAA2BN,EAAYC,EAAcC,EAAUC,GACpE,IAAII,EAAY,EACZC,EAAY,EAEhB,IAAK,MAAMsE,KAAiB9E,EAAY,CACpC,GAAsB,KAAlB8E,EACA,SACJ,MAAM9C,EAAU/B,EAAagB,IAAI6D,GACjCvE,GAAayB,EAAQI,MAAMC,EAAEjC,OAC7BI,GAAawB,EAAQgC,MAAMa,UAAUzE,MACzC,CAEA,GAAoB,YAAhBD,EAEAI,GAAa,EAEbC,GAAaR,EAAWI,WAEvB,CACD,MAAMS,EAAsB,QAAbX,EACXD,EAAagB,IAAI,EAAEvB,aAAeO,EAAagB,IAAI,EAAEtB,QACnDmB,EAAYb,EAAagB,IAAI,EAAErB,WAErCW,IAAcP,EAAWI,OAAS,GAAKU,EAAUsB,MAAMC,EAAEjC,OAEzDG,GAAaP,EAAWI,OAASS,EAAMuB,MAAMC,EAAEjC,OAE/CG,GAAa,EAEbC,IAAcR,EAAWI,OAAS,GAAKU,EAAUkD,MAAMa,UAAUzE,OAEjEI,GAAaR,EAAWI,OAASS,EAAMmD,MAAMa,UAAUzE,OAEvDI,GAAa,EAEbA,GAAiC,EAApBR,EAAWI,MAC5B,CACA,MAAO,CAAEG,YAAWC,YACxB,CAKO,SAAS+B,EAAcF,GAC1B,OAAO0C,KAAKC,MAAM,EAAEvF,iBAAmB4C,GAAK,EAAE5C,gBAClD,CCzQoC,IAAIwF,OAAO,oBAA0E,KC3CzHC,UAAaC,IACT,MAAM,sBAAEC,EAAqB,aAAEnF,EAAY,SAAEC,EAAQ,YAAEC,EAAW,MAAEkF,EAAK,IAAEC,GAAQH,EAAMI,KACnFC,EAAc,IAAI9E,MAAM4E,EAAMD,GAC9BI,EAAqB,IAAI/E,MAAM,GACrC,IAAK,IAAIgF,EAAOL,EAAOK,EAAOJ,IAAOI,EACjC,IACI,MAAM1F,EAAaoF,EAAsBM,GACzCF,EAAYE,EAAOL,GAAStF,EAAoBC,EAAYC,EAAcC,EAAUC,EACxF,CACA,MAAOwF,GACH,MACMC,EAAM,8BAA8BF,MAD3BC,aAAeE,MAAQF,EAAIG,QAAUH,EAAII,cAExDN,EAAmBtD,KAAKyD,EAC5B,CAEJI,YAAY,CAAER,cAAaC,sBAAqB,E","sources":["webpack://bio/./node_modules/@datagrok-libraries/bio/src/monomer-works/consts.js","webpack://bio/./node_modules/@datagrok-libraries/bio/src/monomer-works/to-atomic-level-utils.js","webpack://bio/./node_modules/@datagrok-libraries/bio/src/utils/const.js","webpack://bio/./node_modules/@datagrok-libraries/bio/src/monomer-works/seq-to-molfile-worker.js"],"sourcesContent":["export const monomerWorksConsts = {\n // constants for parsing molfile V2000\n V2K_RGP_SHIFT: 8,\n V2K_RGP_LINE: 'M RGP',\n V2K_A_LINE: 'A ',\n // constants for parsing/reconstruction of molfile V3000\n V3K_COUNTS_SHIFT: 14,\n V3K_IDX_SHIFT: 7,\n V3K_HEADER_FIRST_LINE: '\\nDatagrok macromolecule handler\\n\\n',\n V3K_HEADER_SECOND_LINE: ' 0 0 0 0 0 0 999 V3000\\n',\n V3K_BEGIN_CTAB_BLOCK: 'M V30 BEGIN CTAB\\n',\n V3K_END_CTAB_BLOCK: 'M V30 END CTAB\\n',\n V3K_BEGIN_COUNTS_LINE: 'M V30 COUNTS ',\n V3K_COUNTS_LINE_ENDING: ' 0 0 0\\n',\n V3K_BEGIN_ATOM_BLOCK: 'M V30 BEGIN ATOM\\n',\n V3K_END_ATOM_BLOCK: 'M V30 END ATOM\\n',\n V3K_BEGIN_BOND_BLOCK: 'M V30 BEGIN BOND\\n',\n V3K_END_BOND_BLOCK: 'M V30 END BOND\\n',\n V3K_BOND_CONFIG: ' CFG=',\n V3K_BEGIN_DATA_LINE: 'M V30 ',\n V3K_END: 'M END',\n PRECISION_FACTOR: 10000,\n // symbols for the corresponding monomers in HELM library\n DEOXYRIBOSE: 'd',\n RIBOSE: 'r',\n PHOSPHATE: 'p',\n OXYGEN: 'O',\n HYDROGEN: 'H',\n};\n//# sourceMappingURL=consts.js.map","import { monomerWorksConsts as C } from './consts';\nimport { HELM_CORE_FIELDS, } from '../utils/const';\n/** Get a mapping of peptide symbols to HELM monomer library objects with selected fields.\n * @param {IMonomerLib} monomerLib - Monomer library\n * @param {HELM_POLYMER_TYPE} polymerType - Polymer type\n * @param {ALPHABET} alphabet - Alphabet of the column\n * @return {Map<string, any>} - Mapping of peptide symbols to HELM monomer library objects with selected fields*/\nexport function getFormattedMonomerLib(monomerLib, polymerType, alphabet) {\n const map = new Map();\n for (const monomerSymbol of monomerLib.getMonomerSymbolsByType(polymerType)) {\n const it = monomerLib.getMonomer(polymerType, monomerSymbol);\n if (polymerType === \"RNA\" /* HELM_POLYMER_TYPE.RNA */ &&\n (it[\"monomerType\" /* HELM_FIELDS.MONOMER_TYPE */] === \"Branch\" /* HELM_MONOMER_TYPE.BRANCH */ ||\n alphabet === \"DNA\" /* ALPHABET.DNA */ && it[\"symbol\" /* HELM_FIELDS.SYMBOL */] === C.DEOXYRIBOSE ||\n alphabet === \"RNA\" /* ALPHABET.RNA */ && it[\"symbol\" /* HELM_FIELDS.SYMBOL */] === C.RIBOSE ||\n it[\"symbol\" /* HELM_FIELDS.SYMBOL */] === C.PHOSPHATE) ||\n polymerType === \"PEPTIDE\" /* HELM_POLYMER_TYPE.PEPTIDE */ &&\n it[\"monomerType\" /* HELM_FIELDS.MONOMER_TYPE */] !== \"Branch\" /* HELM_MONOMER_TYPE.BRANCH */) {\n const monomerObject = {};\n HELM_CORE_FIELDS.forEach((field) => {\n //@ts-ignore\n monomerObject[field] = it[field];\n });\n map.set(it[\"symbol\" /* HELM_FIELDS.SYMBOL */], monomerObject);\n }\n }\n return map;\n}\n/** Translate a sequence of monomer symbols into Molfile V3000\n * @param {string[]} monomerSeq - Sequence of monomer symbols\n * @param {Map<string, MolGraph>} monomersDict - Mapping of monomer symbols to MolGraph objects\n * @param {ALPHABET} alphabet - Alphabet of the column\n * @param {HELM_POLYMER_TYPE} polymerType - Polymer type\n * @return {string} - Molfile V3000*/\nexport function monomerSeqToMolfile(monomerSeq, monomersDict, alphabet, polymerType) {\n if (monomerSeq.length === 0) {\n // throw new Error('monomerSeq is empty');\n return '';\n }\n // define atom and bond counts, taking into account the bond type\n const getAtomAndBondCounts = getResultingAtomBondCounts;\n const { atomCount, bondCount } = getAtomAndBondCounts(monomerSeq, monomersDict, alphabet, polymerType);\n // create arrays to store lines of the resulting molfile\n const molfileAtomBlock = new Array(atomCount);\n const molfileBondBlock = new Array(bondCount);\n let addMonomerToMolblock; // todo: types?\n let sugar = null;\n let phosphate = null;\n if (polymerType === \"PEPTIDE\" /* HELM_POLYMER_TYPE.PEPTIDE */) {\n addMonomerToMolblock = addAminoAcidToMolblock;\n }\n else { // nucleotides\n addMonomerToMolblock = addNucleotideToMolblock;\n sugar = (alphabet === \"DNA\" /* ALPHABET.DNA */) ? monomersDict.get(C.DEOXYRIBOSE) : monomersDict.get(C.RIBOSE);\n phosphate = monomersDict.get(C.PHOSPHATE);\n }\n const v = {\n i: 0,\n nodeShift: 0,\n bondShift: 0,\n backbonePositionShift: new Array(2).fill(0),\n branchPositionShift: new Array(2).fill(0),\n backboneAttachNode: 0,\n branchAttachNode: 0,\n flipFactor: 1,\n };\n const LC = {\n sugar: sugar,\n phosphate: phosphate,\n seqLength: monomerSeq.length,\n atomCount: atomCount,\n bondCount: bondCount,\n };\n const steabsCollection = [];\n let nAtoms = 0;\n for (v.i = 0; v.i < LC.seqLength; ++v.i) {\n const monomer = monomersDict.get(monomerSeq[v.i]);\n addMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v, LC);\n //adding stereo atoms to array for further STEABS block generation\n monomer.stereoAtoms?.forEach((i) => steabsCollection.push(i + nAtoms));\n nAtoms += monomer.atoms.x.length;\n }\n capResultingMolblock(molfileAtomBlock, molfileBondBlock, v, LC);\n const molfileCountsLine = C.V3K_BEGIN_COUNTS_LINE + atomCount + ' ' + bondCount + C.V3K_COUNTS_LINE_ENDING;\n // todo: possible optimization may be achieved by replacing .join('') with +=\n // since counterintuitively joining an array into a new string is reportedly\n // slower than using += as below\n let result = '';\n result += C.V3K_HEADER_FIRST_LINE;\n result += C.V3K_HEADER_SECOND_LINE;\n result += C.V3K_BEGIN_CTAB_BLOCK;\n result += molfileCountsLine;\n result += C.V3K_BEGIN_ATOM_BLOCK;\n result += molfileAtomBlock.join('');\n result += C.V3K_END_ATOM_BLOCK;\n result += C.V3K_BEGIN_BOND_BLOCK;\n result += molfileBondBlock.join('');\n result += C.V3K_END_BOND_BLOCK;\n if (steabsCollection.length > 0)\n result += getCollectionBlock(steabsCollection);\n result += C.V3K_END_CTAB_BLOCK;\n result += C.V3K_END;\n // return molfileParts.join('');\n return result;\n}\nfunction getCollectionBlock(collection) {\n //one row in STEABS block can be no longer than 80 symbols\n //maxSymbols = 80 symbols minus ' -\\n' (4 symbols)\n const maxSymbols = 76;\n const rowsArray = [];\n let newCollectionRow = `M V30 MDLV30/STEABS ATOMS=(${collection.length}`;\n for (let i = 0; i < collection.length; i++) {\n const updatedRow = `${newCollectionRow} ${collection[i]}`;\n if (updatedRow.length > maxSymbols) {\n rowsArray.push(`${newCollectionRow} -\\n`);\n newCollectionRow = `M V30 ${collection[i]}`;\n }\n else\n newCollectionRow = updatedRow;\n //in case last atom was added - close the block\n if (i === collection.length - 1)\n rowsArray.push(`${newCollectionRow})\\n`);\n }\n return `M V30 BEGIN COLLECTION\\n${rowsArray.join('')}M V30 END COLLECTION\\n`;\n}\n/** Cap the resulting (after sewing up all the monomers) molfile with 'O'\n * @param {string[]} molfileAtomBlock - Array of lines of the resulting molfile atom block\n * @param {string[]} molfileBondBlock - Array of lines of the resulting molfile bond block\n * @param {LoopVariables} v - Loop variables\n * @param {LoopConstants} LC - Loop constants*/\nfunction capResultingMolblock(molfileAtomBlock, molfileBondBlock, v, LC) {\n // add terminal oxygen\n const atomIdx = v.nodeShift + 1;\n molfileAtomBlock[LC.atomCount] = C.V3K_BEGIN_DATA_LINE + atomIdx + ' ' +\n C.OXYGEN + ' ' + keepPrecision(v.backbonePositionShift[0]) + ' ' +\n v.flipFactor * keepPrecision(v.backbonePositionShift[1]) + ' ' + '0.000000 0' + '\\n';\n // add terminal bond\n const firstAtom = v.backboneAttachNode;\n const secondAtom = atomIdx;\n molfileBondBlock[LC.bondCount] = C.V3K_BEGIN_DATA_LINE + v.bondShift + ' ' +\n 1 + ' ' + firstAtom + ' ' + secondAtom + '\\n';\n}\nfunction addAminoAcidToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v) {\n v.flipFactor = (-1) ** (v.i % 2); // to flip every even monomer over OX\n addBackboneMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v);\n}\nfunction addBackboneMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v) {\n // todo: remove these comments to the docstrings of the corr. functions\n // construnct the lines of V3K molfile atom block\n fillAtomLines(monomer, molfileAtomBlock, v);\n // construct the lines of V3K molfile bond block\n fillBondLines(monomer, molfileBondBlock, v);\n // peptide bond\n fillChainExtendingBond(monomer, molfileBondBlock, v);\n // update branch variables if necessary\n if (monomer.meta.branchShift !== null && monomer.meta.terminalNodes.length > 2)\n updateBranchVariables(monomer, v);\n // update loop variables\n updateChainExtendingVariables(monomer, v);\n}\nfunction addNucleotideToMolblock(nucleobase, molfileAtomBlock, molfileBondBlock, v, LC) {\n // construnct the lines of V3K molfile atom block corresponding to phosphate\n // and sugar\n if (v.i === 0) {\n addBackboneMonomerToMolblock(LC.sugar, molfileAtomBlock, molfileBondBlock, v);\n }\n else {\n for (const monomer of [LC.phosphate, LC.sugar])\n addBackboneMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v);\n }\n addBranchMonomerToMolblock(nucleobase, molfileAtomBlock, molfileBondBlock, v);\n}\nfunction addBranchMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v) {\n fillBranchAtomLines(monomer, molfileAtomBlock, v);\n fillBondLines(monomer, molfileBondBlock, v);\n fillBackboneToBranchBond(monomer, molfileBondBlock, v);\n // C-N bond\n const bondIdx = v.bondShift;\n const firstAtom = v.branchAttachNode;\n const secondAtom = monomer.meta.terminalNodes[0] + v.nodeShift;\n molfileBondBlock[bondIdx - 1] = C.V3K_BEGIN_DATA_LINE + bondIdx + ' ' +\n 1 + ' ' + firstAtom + ' ' + secondAtom + '\\n';\n // update loop variables\n v.bondShift += monomer.bonds.atomPairs.length + 1;\n v.nodeShift += monomer.atoms.atomTypes.length;\n}\nfunction updateChainExtendingVariables(monomer, v) {\n v.backboneAttachNode = v.nodeShift + monomer.meta.terminalNodes[1];\n v.bondShift += monomer.bonds.atomPairs.length + 1;\n v.nodeShift += monomer.atoms.atomTypes.length;\n v.backbonePositionShift[0] += monomer.meta.backboneShift[0]; // todo: non-null check\n v.backbonePositionShift[1] += v.flipFactor * monomer.meta.backboneShift[1];\n}\nfunction updateBranchVariables(monomer, v) {\n v.branchAttachNode = v.nodeShift + monomer.meta.terminalNodes[2];\n for (let i = 0; i < 2; ++i)\n v.branchPositionShift[i] = v.backbonePositionShift[i] + monomer.meta.branchShift[i];\n}\nfunction fillAtomLines(monomer, molfileAtomBlock, v) {\n for (let j = 0; j < monomer.atoms.atomTypes.length; ++j) {\n const atomIdx = v.nodeShift + j + 1;\n molfileAtomBlock[v.nodeShift + j] = C.V3K_BEGIN_DATA_LINE + atomIdx + ' ' +\n monomer.atoms.atomTypes[j] + ' ' +\n keepPrecision(v.backbonePositionShift[0] + monomer.atoms.x[j]) + ' ' +\n keepPrecision(v.backbonePositionShift[1] + v.flipFactor * monomer.atoms.y[j]) +\n ' ' + monomer.atoms.kwargs[j];\n }\n}\n// todo: remove as quickfix\nfunction fillBranchAtomLines(monomer, molfileAtomBlock, v) {\n for (let j = 0; j < monomer.atoms.atomTypes.length; ++j) {\n const atomIdx = v.nodeShift + j + 1;\n molfileAtomBlock[v.nodeShift + j] = C.V3K_BEGIN_DATA_LINE + atomIdx + ' ' +\n monomer.atoms.atomTypes[j] + ' ' +\n keepPrecision(v.branchPositionShift[0] + monomer.atoms.x[j]) + ' ' +\n keepPrecision(v.branchPositionShift[1] + v.flipFactor * monomer.atoms.y[j]) +\n ' ' + monomer.atoms.kwargs[j];\n }\n}\nfunction fillBondLines(monomer, molfileBondBlock, v) {\n // construct the lines of V3K molfile bond block\n for (let j = 0; j < monomer.bonds.atomPairs.length; ++j) {\n const bondIdx = v.bondShift + j + 1;\n const firstAtom = monomer.bonds.atomPairs[j][0] + v.nodeShift;\n const secondAtom = monomer.bonds.atomPairs[j][1] + v.nodeShift;\n let bondCfg = '';\n if (monomer.bonds.bondConfiguration.has(j)) {\n // flip orientation when necessary\n let orientation = monomer.bonds.bondConfiguration.get(j);\n if (v.flipFactor < 0)\n orientation = (orientation === 1) ? 3 : 1;\n bondCfg = ' CFG=' + orientation;\n }\n const kwargs = monomer.bonds.kwargs.has(j) ?\n ' ' + monomer.bonds.kwargs.get(j) : '';\n molfileBondBlock[v.bondShift + j] = C.V3K_BEGIN_DATA_LINE + bondIdx + ' ' +\n monomer.bonds.bondTypes[j] + ' ' +\n firstAtom + ' ' + secondAtom + bondCfg + kwargs + '\\n';\n }\n}\nfunction fillChainExtendingBond(monomer, molfileBondBlock, v) {\n if (v.backboneAttachNode !== 0) {\n const bondIdx = v.bondShift;\n const firstAtom = v.backboneAttachNode;\n const secondAtom = monomer.meta.terminalNodes[0] + v.nodeShift;\n molfileBondBlock[v.bondShift - 1] = C.V3K_BEGIN_DATA_LINE + bondIdx + ' ' +\n 1 + ' ' + firstAtom + ' ' + secondAtom + '\\n';\n }\n}\n// todo: remove\nfunction fillBackboneToBranchBond(branchMonomer, molfileBondBlock, v) {\n const bondIdx = v.bondShift;\n const firstAtom = v.branchAttachNode;\n const secondAtom = branchMonomer.meta.terminalNodes[0] + v.nodeShift;\n molfileBondBlock[bondIdx - 1] = C.V3K_BEGIN_DATA_LINE + bondIdx + ' ' +\n 1 + ' ' + firstAtom + ' ' + secondAtom + '\\n';\n}\n/** Compute the atom/bond counts for the resulting molfile, depending on the\n * type of polymer (peptide/nucleotide)\n * @param {string[]}monomerSeq - the sequence of monomers\n * @param {Map<string, MolGraph>}monomersDict - the dictionary of monomers\n * @param {ALPHABET}alphabet - the alphabet of the monomers\n * @param {HELM_POLYMER_TYPE}polymerType - the type of polymer\n * @return {{atomCount: number, bondCount: number}} - the atom/bond counts*/\nfunction getResultingAtomBondCounts(monomerSeq, monomersDict, alphabet, polymerType) {\n let atomCount = 0;\n let bondCount = 0;\n // sum up all the atoms/nodes provided by the sequence\n for (const monomerSymbol of monomerSeq) {\n if (monomerSymbol === '')\n continue; // Skip for gap/empty monomer in MSA\n const monomer = monomersDict.get(monomerSymbol);\n atomCount += monomer.atoms.x.length;\n bondCount += monomer.bonds.bondTypes.length;\n }\n // add extra values depending on the polymer type\n if (polymerType === \"PEPTIDE\" /* HELM_POLYMER_TYPE.PEPTIDE */) {\n // add the rightmost/terminating cap group 'OH' (i.e. 'O')\n atomCount += 1;\n // add chain-extending bonds (C-NH per each monomer pair and terminal C-OH)\n bondCount += monomerSeq.length;\n }\n else { // nucleotides\n const sugar = (alphabet === \"DNA\" /* ALPHABET.DNA */) ?\n monomersDict.get(C.DEOXYRIBOSE) : monomersDict.get(C.RIBOSE);\n const phosphate = monomersDict.get(C.PHOSPHATE);\n // add phosphate per each pair of nucleobase symbols\n atomCount += (monomerSeq.length - 1) * phosphate.atoms.x.length;\n // add sugar per each nucleobase symbol\n atomCount += monomerSeq.length * sugar.atoms.x.length;\n // add the leftmost cap group 'OH' (i.e. 'O')\n atomCount += 1;\n // add bonds from phosphate monomers\n bondCount += (monomerSeq.length - 1) * phosphate.bonds.bondTypes.length;\n // add bonds from sugar monomers\n bondCount += monomerSeq.length * sugar.bonds.bondTypes.length;\n // exclude the first chain-extending bond O-P (absent, no 'leftmost' phosphate)\n bondCount -= 1;\n // add chain-extending and branch bonds (O-P, C-O and C-N per each nucleotide)\n bondCount += monomerSeq.length * 3;\n }\n return { atomCount, bondCount };\n}\n/** Keep precision upon floating point operations over atom coordinates\n * @param {number}x - the floating point number\n * @return {number} - the floating point number with the same precision\n*/\nexport function keepPrecision(x) {\n return Math.round(C.PRECISION_FACTOR * x) / C.PRECISION_FACTOR;\n}\n//# sourceMappingURL=to-atomic-level-utils.js.map","import '../types/helm';\n// core fields of HELM library object used in toAtomicLevel function\nexport const HELM_CORE_FIELDS = [\n \"symbol\" /* HELM_FIELDS.SYMBOL */,\n \"molfile\" /* HELM_FIELDS.MOLFILE */,\n \"rgroups\" /* HELM_FIELDS.RGROUPS */,\n \"name\" /* HELM_FIELDS.NAME */,\n // HELM_FIELDS.MONOMER_TYPE, // add if terminal monomers for PEPTIDEs to be\n // supported\n];\nexport const SDF_MONOMER_NAME = 'MonomerName';\n// todo: ideally, keys should be expressed via constants\nexport const jsonSdfMonomerLibDict = {\n 'monomerType': null,\n 'smiles': null,\n 'name': 'Name',\n 'author': null,\n 'molfile': 'molecule',\n 'naturalAnalog': 'MonomerNaturalAnalogCode',\n 'rgroups': 'MonomerCaps',\n 'createDate': null,\n 'id': null,\n 'polymerType': 'MonomerType',\n 'symbol': 'MonomerName'\n};\nexport const DUMMY_MONOMER = {\n 'monomerType': 'Backbone',\n 'smiles': '',\n 'name': '',\n 'author': 'Datagrok',\n 'molfile': '',\n 'naturalAnalog': '',\n 'rgroups': [],\n 'createDate': null,\n 'id': 0,\n 'polymerType': 'PEPTIDE',\n 'symbol': ''\n};\n// range of hex nubers used in PepSea library to endode monomers\nexport const MONOMER_ENCODE_MIN = 0x100;\nexport const MONOMER_ENCODE_MAX = 0x40A;\nexport const RIBOSE_SYMBOL = 'r';\nexport const DEOXYRIBOSE_SYMBOL = 'd';\nexport const PHOSPHATE_SYMBOL = 'p';\nexport const HELM_WRAPPERS_REGEXP = new RegExp(`[${RIBOSE_SYMBOL}${DEOXYRIBOSE_SYMBOL}]\\\\((\\\\w)\\\\)${PHOSPHATE_SYMBOL}?`, 'g');\n//# sourceMappingURL=const.js.map","import { monomerSeqToMolfile } from './to-atomic-level-utils';\nonmessage = (event) => {\n const { monomerSequencesArray, monomersDict, alphabet, polymerType, start, end } = event.data;\n const molfileList = new Array(end - start);\n const molfileWarningList = new Array(0);\n for (let rowI = start; rowI < end; ++rowI) {\n try {\n const monomerSeq = monomerSequencesArray[rowI];\n molfileList[rowI - start] = monomerSeqToMolfile(monomerSeq, monomersDict, alphabet, polymerType);\n }\n catch (err) {\n const errMsg = err instanceof Error ? err.message : err.toString();\n const msg = `Cannot get molfile of row #${rowI}: ${errMsg}.`;\n molfileWarningList.push(msg);\n }\n }\n postMessage({ molfileList, molfileWarningList });\n};\n//# sourceMappingURL=seq-to-molfile-worker.js.map"],"names":["monomerWorksConsts","V2K_RGP_SHIFT","V2K_RGP_LINE","V2K_A_LINE","V3K_COUNTS_SHIFT","V3K_IDX_SHIFT","V3K_HEADER_FIRST_LINE","V3K_HEADER_SECOND_LINE","V3K_BEGIN_CTAB_BLOCK","V3K_END_CTAB_BLOCK","V3K_BEGIN_COUNTS_LINE","V3K_COUNTS_LINE_ENDING","V3K_BEGIN_ATOM_BLOCK","V3K_END_ATOM_BLOCK","V3K_BEGIN_BOND_BLOCK","V3K_END_BOND_BLOCK","V3K_BOND_CONFIG","V3K_BEGIN_DATA_LINE","V3K_END","PRECISION_FACTOR","DEOXYRIBOSE","RIBOSE","PHOSPHATE","OXYGEN","HYDROGEN","monomerSeqToMolfile","monomerSeq","monomersDict","alphabet","polymerType","length","getAtomAndBondCounts","getResultingAtomBondCounts","atomCount","bondCount","molfileAtomBlock","Array","molfileBondBlock","addMonomerToMolblock","sugar","phosphate","addAminoAcidToMolblock","addNucleotideToMolblock","get","v","i","nodeShift","bondShift","backbonePositionShift","fill","branchPositionShift","backboneAttachNode","branchAttachNode","flipFactor","LC","seqLength","steabsCollection","nAtoms","monomer","stereoAtoms","forEach","push","atoms","x","atomIdx","keepPrecision","firstAtom","secondAtom","capResultingMolblock","molfileCountsLine","result","join","collection","rowsArray","newCollectionRow","updatedRow","getCollectionBlock","addBackboneMonomerToMolblock","j","atomTypes","y","kwargs","fillAtomLines","fillBondLines","bondIdx","meta","terminalNodes","fillChainExtendingBond","branchShift","updateBranchVariables","bonds","atomPairs","backboneShift","updateChainExtendingVariables","nucleobase","fillBranchAtomLines","branchMonomer","fillBackboneToBranchBond","addBranchMonomerToMolblock","bondCfg","bondConfiguration","has","orientation","bondTypes","monomerSymbol","Math","round","RegExp","onmessage","event","monomerSequencesArray","start","end","data","molfileList","molfileWarningList","rowI","err","msg","Error","message","toString","postMessage"],"sourceRoot":""}
|