@datagrok/bio 1.5.1 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +230 -92
- package/dist/package.js +206 -74
- package/files/samples/id.csv +313 -0
- package/files/samples/sample_HELM.csv +540 -540
- package/files/samples/sample_MSA.csv +540 -540
- package/files/samples/sar-small.csv +201 -0
- package/package.json +1 -1
- package/setup.cmd +10 -1
- package/src/const.ts +0 -1
- package/src/package.ts +16 -14
- package/src/tests/convert-test.ts +31 -0
- package/src/tests/detectors-test.ts +26 -15
- package/src/utils/convert.ts +15 -3
- package/src/utils/multiple-sequence-alignment.ts +13 -8
- package/src/utils/notation-converter.ts +131 -0
- package/src/utils/sequence-activity-cliffs.ts +8 -2
- package/src/utils/sequence-space.ts +4 -4
- package/src/utils/split-to-monomers.ts +8 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
smiles
|
|
2
|
+
O=C1CN=C(c2ccccc2N1)C3CCCCC3
|
|
3
|
+
CN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
4
|
+
CCCCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
5
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
6
|
+
O=C1CN=C(c2ccccc2N1CC3CCCCC3)C4CCCCC4
|
|
7
|
+
O=C1CN=C(c2cc(Cl)ccc2N1)C3CCCCC3
|
|
8
|
+
CN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
9
|
+
CCCCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
10
|
+
CC(C)CCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
11
|
+
O=C1CN=C(c2cc(Cl)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
12
|
+
N#Cc1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
13
|
+
CN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
14
|
+
CCCCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
15
|
+
CC(C)CCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
16
|
+
N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
17
|
+
CN(C)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
18
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)C3CCCCC3
|
|
19
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
20
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
21
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
22
|
+
O=C1CN=C(c2cc(F)ccc2N1)C3CCCCC3
|
|
23
|
+
CN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
24
|
+
CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
25
|
+
CC(C)CCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
26
|
+
O=C1CN=C(c2cc(F)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
27
|
+
O=C1CN=C(c2cc(Br)ccc2N1)C3CCCCC3
|
|
28
|
+
CN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
29
|
+
CCCCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
30
|
+
CC(C)CCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
31
|
+
O=C1CN=C(c2cc(Br)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
32
|
+
O=C1CN=C(c2cc(I)ccc2N1)C3CCCCC3
|
|
33
|
+
CN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
34
|
+
CCCCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
35
|
+
CC(C)CCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
36
|
+
O=C1CN=C(c2cc(I)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
37
|
+
NC(=O)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
38
|
+
CN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
39
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
40
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
41
|
+
NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
42
|
+
O=C1CN=C(c2cc(ccc2N1)[N+](=O)[O-])C3CCCCC3
|
|
43
|
+
CN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
44
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
45
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
46
|
+
O=C1CN=C(c2cc(ccc2N1CC3CCCCC3)[N+](=O)[O-])C4CCCCC4
|
|
47
|
+
O=C1CN=C(c2ccccc2)c3ccccc3N1
|
|
48
|
+
CN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
49
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
50
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
51
|
+
O=C1CN=C(c2ccccc2)c3ccccc3N1CC4CCCCC4
|
|
52
|
+
O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1
|
|
53
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
54
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
55
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
56
|
+
O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1CC4CCCCC4
|
|
57
|
+
N#Cc1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
58
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
59
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
60
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
61
|
+
N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
62
|
+
CN(C)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
63
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)c3ccccc3
|
|
64
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
65
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
66
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
67
|
+
O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1
|
|
68
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
69
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
70
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
71
|
+
O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1CC4CCCCC4
|
|
72
|
+
O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1
|
|
73
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
74
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
75
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
76
|
+
O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1CC4CCCCC4
|
|
77
|
+
O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1
|
|
78
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
79
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
80
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
81
|
+
O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1CC4CCCCC4
|
|
82
|
+
NC(=O)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
83
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
84
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
85
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
86
|
+
NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
87
|
+
O=C1CN=C(c2ccccc2)c3cc(ccc3N1)[N+](=O)[O-]
|
|
88
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
89
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
90
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
91
|
+
O=C1CN=C(c2ccccc2)c3cc(ccc3N1CC4CCCCC4)[N+](=O)[O-]
|
|
92
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccccc23
|
|
93
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccccc23
|
|
94
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3ccccc13
|
|
95
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccccc23
|
|
96
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccccc24
|
|
97
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Cl)cc23
|
|
98
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Cl)cc23
|
|
99
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Cl)ccc13
|
|
100
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Cl)cc23
|
|
101
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Cl)cc24
|
|
102
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(C#N)cc23
|
|
103
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(C#N)cc23
|
|
104
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(C#N)ccc13
|
|
105
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(C#N)cc23
|
|
106
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(C#N)cc24
|
|
107
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)N(C)C
|
|
108
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)N(C)C
|
|
109
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)N(C)C
|
|
110
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)N(C)C
|
|
111
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)N(C)C
|
|
112
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(F)cc23
|
|
113
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(F)cc23
|
|
114
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(F)ccc13
|
|
115
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(F)cc23
|
|
116
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(F)cc24
|
|
117
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Br)cc23
|
|
118
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Br)cc23
|
|
119
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Br)ccc13
|
|
120
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Br)cc23
|
|
121
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Br)cc24
|
|
122
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(I)cc23
|
|
123
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(I)cc23
|
|
124
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(I)ccc13
|
|
125
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(I)cc23
|
|
126
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(I)cc24
|
|
127
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)C(N)=O
|
|
128
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)C(N)=O
|
|
129
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)C(N)=O
|
|
130
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)C(N)=O
|
|
131
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)C(N)=O
|
|
132
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)[N+](=O)[O-]
|
|
133
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)[N+](=O)[O-]
|
|
134
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)[N+](=O)[O-]
|
|
135
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)[N+](=O)[O-]
|
|
136
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)[N+](=O)[O-]
|
|
137
|
+
CC1N=C(c2ccccc2NC1=O)C3CCCCC3
|
|
138
|
+
CC1N=C(c2ccccc2N(C)C1=O)C3CCCCC3
|
|
139
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
|
|
140
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
|
|
141
|
+
CC1N=C(c2ccccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
142
|
+
CC1N=C(c2cc(Cl)ccc2NC1=O)C3CCCCC3
|
|
143
|
+
CC1N=C(c2cc(Cl)ccc2N(C)C1=O)C3CCCCC3
|
|
144
|
+
CCCCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
145
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
146
|
+
CC1N=C(c2cc(Cl)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
147
|
+
CC1N=C(c2cc(C#N)ccc2NC1=O)C3CCCCC3
|
|
148
|
+
CC1N=C(c2cc(C#N)ccc2N(C)C1=O)C3CCCCC3
|
|
149
|
+
CCCCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
150
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
151
|
+
CC1N=C(c2cc(C#N)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
152
|
+
CC1N=C(c2cc(ccc2NC1=O)N(C)C)C3CCCCC3
|
|
153
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)N(C)C)C3CCCCC3
|
|
154
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
155
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
156
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)N(C)C)C4CCCCC4
|
|
157
|
+
CC1N=C(c2cc(F)ccc2NC1=O)C3CCCCC3
|
|
158
|
+
CC1N=C(c2cc(F)ccc2N(C)C1=O)C3CCCCC3
|
|
159
|
+
CCCCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
|
|
160
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
|
|
161
|
+
CC1N=C(c2cc(F)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
162
|
+
CC1N=C(c2cc(Br)ccc2NC1=O)C3CCCCC3
|
|
163
|
+
CC1N=C(c2cc(Br)ccc2N(C)C1=O)C3CCCCC3
|
|
164
|
+
CCCCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
|
|
165
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
|
|
166
|
+
CC1N=C(c2cc(Br)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
167
|
+
CC1N=C(c2cc(I)ccc2NC1=O)C3CCCCC3
|
|
168
|
+
CC1N=C(c2cc(I)ccc2N(C)C1=O)C3CCCCC3
|
|
169
|
+
CCCCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
|
|
170
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
|
|
171
|
+
CC1N=C(c2cc(I)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
172
|
+
CC1N=C(c2cc(ccc2NC1=O)C(N)=O)C3CCCCC3
|
|
173
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)C(N)=O)C3CCCCC3
|
|
174
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
175
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
176
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)C(N)=O)C4CCCCC4
|
|
177
|
+
CC1N=C(c2cc(ccc2NC1=O)[N+](=O)[O-])C3CCCCC3
|
|
178
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)[N+](=O)[O-])C3CCCCC3
|
|
179
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
180
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
181
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)[N+](=O)[O-])C4CCCCC4
|
|
182
|
+
CC1N=C(c2ccccc2)c3ccccc3NC1=O
|
|
183
|
+
CC1N=C(c2ccccc2)c3ccccc3N(C)C1=O
|
|
184
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
|
|
185
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
|
|
186
|
+
CC1N=C(c2ccccc2)c3ccccc3N(CC4CCCCC4)C1=O
|
|
187
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3NC1=O
|
|
188
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(C)C1=O
|
|
189
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
|
|
190
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
|
|
191
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(CC4CCCCC4)C1=O
|
|
192
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3NC1=O
|
|
193
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(C)C1=O
|
|
194
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
|
|
195
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
|
|
196
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(CC4CCCCC4)C1=O
|
|
197
|
+
CC1N=C(c2ccccc2)c3cc(ccc3NC1=O)N(C)C
|
|
198
|
+
CC1N=C(c2ccccc2)c3cc(ccc3N(C)C1=O)N(C)C
|
|
199
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
200
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
201
|
+
CC1N=C(c2ccccc2)c3cc(ccc3N(CC4CCCCC4)C1=O)N(C)C
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.5.
|
|
5
|
+
"version": "1.5.4",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
package/setup.cmd
CHANGED
|
@@ -1,10 +1,19 @@
|
|
|
1
1
|
cd ../../js-api
|
|
2
2
|
call npm install
|
|
3
3
|
call npm link
|
|
4
|
+
cd ../libraries/utils
|
|
5
|
+
call npm install
|
|
6
|
+
call npm link
|
|
7
|
+
call npm link datagrok-api
|
|
8
|
+
cd ../libraries/ml
|
|
9
|
+
call npm install
|
|
10
|
+
call npm link
|
|
11
|
+
call npm link @datagrok-libraries/utils
|
|
4
12
|
cd ../libraries/bio
|
|
5
13
|
call npm install
|
|
6
14
|
call npm link
|
|
15
|
+
call npm link @datagrok-libraries/utils
|
|
7
16
|
cd ../../packages/Bio
|
|
8
17
|
call npm install
|
|
9
|
-
call npm link datagrok-api @datagrok-libraries/bio
|
|
18
|
+
call npm link datagrok-api @datagrok-libraries/bio @datagrok-libraries/utils @datagrok-libraries/ml
|
|
10
19
|
webpack
|
package/src/const.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -5,7 +5,6 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
|
|
6
6
|
export const _package = new DG.Package();
|
|
7
7
|
|
|
8
|
-
import {mmSemType} from './const';
|
|
9
8
|
import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
9
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
10
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
@@ -48,8 +47,8 @@ export function vdRegionViewer() {
|
|
|
48
47
|
return new VdRegionsViewer();
|
|
49
48
|
}
|
|
50
49
|
|
|
51
|
-
//top-menu: Bio | Activity Cliffs...
|
|
52
|
-
//name: Activity Cliffs
|
|
50
|
+
//top-menu: Bio | Sequence Activity Cliffs...
|
|
51
|
+
//name: Sequence Activity Cliffs
|
|
53
52
|
//description: detect activity cliffs
|
|
54
53
|
//input: dataframe df [Input data table]
|
|
55
54
|
//input: column sequence {semType: Macromolecule}
|
|
@@ -120,20 +119,23 @@ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.
|
|
|
120
119
|
//top-menu: Bio | Composition Analysis
|
|
121
120
|
//output: viewer result
|
|
122
121
|
export async function compositionAnalysis(): Promise<void> {
|
|
123
|
-
|
|
124
|
-
|
|
122
|
+
// Higher priority for columns with MSA data to show with WebLogo.
|
|
123
|
+
const tv = grok.shell.tv;
|
|
124
|
+
const df = tv.dataFrame;
|
|
125
|
+
const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
126
|
+
let col: DG.Column | undefined = semTypeColList.find((col) => {
|
|
127
|
+
const units = col.getTag(DG.TAGS.UNITS);
|
|
128
|
+
return units ? units.indexOf('MSA') !== -1 : false;
|
|
129
|
+
});
|
|
130
|
+
if (!col)
|
|
131
|
+
col = semTypeColList[0];
|
|
132
|
+
|
|
133
|
+
if (!col) {
|
|
125
134
|
grok.shell.error('Current table does not contain sequences');
|
|
126
135
|
return;
|
|
127
136
|
}
|
|
128
137
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
for (const v of grok.shell.views) {
|
|
132
|
-
if (v instanceof DG.TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
|
|
133
|
-
(v as DG.TableView).dockManager.dock(wl.root, 'down');
|
|
134
|
-
break;
|
|
135
|
-
}
|
|
136
|
-
}
|
|
138
|
+
tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
137
139
|
}
|
|
138
140
|
|
|
139
141
|
// helper function for importFasta
|
|
@@ -182,7 +184,7 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
182
184
|
(c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
183
185
|
const maxCos = Math.max(...alphabetCandidatesSim);
|
|
184
186
|
const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
|
|
185
|
-
sequenceCol.semType =
|
|
187
|
+
sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
186
188
|
const units: string = `fasta:${seqType}:${alphabet}`;
|
|
187
189
|
sequenceCol.setTag(DG.TAGS.UNITS, units);
|
|
188
190
|
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
// import {mmSemType} from '../const';
|
|
8
|
+
// import {importFasta} from '../package';
|
|
9
|
+
|
|
10
|
+
category('converters', () => {
|
|
11
|
+
// test('a', async () => {await _a();});
|
|
12
|
+
// test('b', async () => {await _b();});
|
|
13
|
+
test('testFastaToSeparator', async () => { await _testFastaToSeparator(); });
|
|
14
|
+
test('testSeparatorToFasta', async () => { await _testSeparatorToFasta(); });
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
// export async function _a() {
|
|
18
|
+
// expect(1, 1);
|
|
19
|
+
// }
|
|
20
|
+
//
|
|
21
|
+
// export async function _b() {
|
|
22
|
+
// expect(1, 2);
|
|
23
|
+
// }
|
|
24
|
+
|
|
25
|
+
export async function _testFastaToSeparator() {
|
|
26
|
+
expect(1, 1);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export async function _testSeparatorToFasta() {
|
|
30
|
+
expect(1, 2);
|
|
31
|
+
}
|
|
@@ -4,7 +4,6 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
import * as ui from 'datagrok-api/ui';
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
|
|
7
|
-
import {mmSemType} from '../const';
|
|
8
7
|
import {importFasta} from '../package';
|
|
9
8
|
|
|
10
9
|
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
@@ -93,12 +92,16 @@ MWRSWY-CKHP
|
|
|
93
92
|
peptidesComplex = 'PeptidesComplex',
|
|
94
93
|
fastaCsv = 'FastaCsv',
|
|
95
94
|
msaComplex = 'MsaComplex',
|
|
95
|
+
idCsv = 'IdCsv',
|
|
96
|
+
sarSmallCsv = 'SarSmallCsv',
|
|
96
97
|
}
|
|
97
98
|
|
|
98
99
|
const samples: { [key: string]: string } = {
|
|
99
|
-
'PeptidesComplex': 'System:AppData/Bio/samples/
|
|
100
|
+
'PeptidesComplex': 'System:AppData/Bio/samples/peptides_complex_msa.csv',
|
|
100
101
|
'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
|
|
101
102
|
'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
103
|
+
'IdCsv': 'System:AppData/Bio/samples/id.csv',
|
|
104
|
+
'SarSmallCsv': 'System:AppData/Bio/samples/sar-small.csv',
|
|
102
105
|
};
|
|
103
106
|
|
|
104
107
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -183,20 +186,28 @@ MWRSWY-CKHP
|
|
|
183
186
|
test('samplesMsaComplexNegativeActivity', async () => {
|
|
184
187
|
await _testNeg(readSamplesCsv(Samples.msaComplex), 'Activity');
|
|
185
188
|
});
|
|
189
|
+
|
|
190
|
+
test('samplesIdCsvNegativeID', async () => {
|
|
191
|
+
await _testNeg(readSamplesCsv(Samples.idCsv), 'ID');
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
test('samplesSarSmallCsvNegativeSmiles', async () => {
|
|
195
|
+
await _testNeg(readSamplesCsv(Samples.sarSmallCsv), 'smiles');
|
|
196
|
+
});
|
|
186
197
|
});
|
|
187
198
|
|
|
188
199
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
189
200
|
const df: DG.DataFrame = await readDf();
|
|
190
201
|
|
|
191
202
|
const col: DG.Column = df.col(colName)!;
|
|
192
|
-
expect(col.semType ===
|
|
203
|
+
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, false);
|
|
193
204
|
}
|
|
194
205
|
|
|
195
206
|
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
|
|
196
207
|
const df: DG.DataFrame = await readDf();
|
|
197
208
|
|
|
198
209
|
const col: DG.Column = df.col(colName)!;
|
|
199
|
-
expect(col.semType ===
|
|
210
|
+
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
200
211
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
201
212
|
if (separator)
|
|
202
213
|
expect(col.getTag('separator'), separator);
|
|
@@ -207,7 +218,7 @@ export async function _testN1(csvDfN1: string) {
|
|
|
207
218
|
await grok.data.detectSemanticTypes(dfN1);
|
|
208
219
|
|
|
209
220
|
const col: DG.Column = dfN1.col('seq')!;
|
|
210
|
-
expect(col.semType,
|
|
221
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
211
222
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
|
|
212
223
|
}
|
|
213
224
|
|
|
@@ -216,7 +227,7 @@ export async function _testAA1(csvDfAA1: string) {
|
|
|
216
227
|
await grok.data.detectSemanticTypes(dfAA1);
|
|
217
228
|
|
|
218
229
|
const col: DG.Column = dfAA1.col('seq')!;
|
|
219
|
-
expect(col.semType,
|
|
230
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
220
231
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
221
232
|
}
|
|
222
233
|
|
|
@@ -225,7 +236,7 @@ export async function _testMsaN1(csvDfMsaN1: string) {
|
|
|
225
236
|
await grok.data.detectSemanticTypes(dfMsaN1);
|
|
226
237
|
|
|
227
238
|
const col: DG.Column = dfMsaN1.col('seq')!;
|
|
228
|
-
expect(col.semType,
|
|
239
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
229
240
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
|
|
230
241
|
}
|
|
231
242
|
|
|
@@ -234,7 +245,7 @@ export async function _testMsaAA1(csvDfMsaAA1: string) {
|
|
|
234
245
|
await grok.data.detectSemanticTypes(dfMsaAA1);
|
|
235
246
|
|
|
236
247
|
const col: DG.Column = dfMsaAA1.col('seq')!;
|
|
237
|
-
expect(col.semType,
|
|
248
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
238
249
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
|
|
239
250
|
}
|
|
240
251
|
|
|
@@ -243,7 +254,7 @@ export async function _testSepNt(csv: string, separator: string) {
|
|
|
243
254
|
await grok.data.detectSemanticTypes(df);
|
|
244
255
|
|
|
245
256
|
const col: DG.Column = df.col('seq')!;
|
|
246
|
-
expect(col.semType,
|
|
257
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
247
258
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:NT');
|
|
248
259
|
expect(col.getTag('separator'), separator);
|
|
249
260
|
}
|
|
@@ -253,7 +264,7 @@ export async function _testSepPt(csv: string, separator: string) {
|
|
|
253
264
|
await grok.data.detectSemanticTypes(df);
|
|
254
265
|
|
|
255
266
|
const col: DG.Column = df.col('seq')!;
|
|
256
|
-
expect(col.semType,
|
|
267
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
257
268
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:PT');
|
|
258
269
|
expect(col.getTag('separator'), separator);
|
|
259
270
|
}
|
|
@@ -263,7 +274,7 @@ export async function _testSepUn(csv: string, separator: string) {
|
|
|
263
274
|
await grok.data.detectSemanticTypes(df);
|
|
264
275
|
|
|
265
276
|
const col: DG.Column = df.col('seq')!;
|
|
266
|
-
expect(col.semType,
|
|
277
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
267
278
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:UN');
|
|
268
279
|
expect(col.getTag('separator'), separator);
|
|
269
280
|
}
|
|
@@ -273,7 +284,7 @@ export async function _testSepMsaN1(csvDfSepMsaN1: string) {
|
|
|
273
284
|
await grok.data.detectSemanticTypes(dfSepMsaN1);
|
|
274
285
|
|
|
275
286
|
const col: DG.Column = dfSepMsaN1.col('seq')!;
|
|
276
|
-
expect(col.semType,
|
|
287
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
277
288
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
|
|
278
289
|
}
|
|
279
290
|
|
|
@@ -283,7 +294,7 @@ export async function _testSamplesFastaCsvPt() {
|
|
|
283
294
|
await grok.data.detectSemanticTypes(df);
|
|
284
295
|
|
|
285
296
|
const col: DG.Column = df.col('sequence')!;
|
|
286
|
-
expect(col.semType,
|
|
297
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
287
298
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
288
299
|
expect(col.getTag('separator'), null);
|
|
289
300
|
}
|
|
@@ -293,7 +304,7 @@ export async function _testSamplesFastaFastaPt() {
|
|
|
293
304
|
const df: DG.DataFrame = importFasta(fasta)[0];
|
|
294
305
|
|
|
295
306
|
const col: DG.Column = df.col('sequence')!;
|
|
296
|
-
expect(col.semType,
|
|
307
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
297
308
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
298
309
|
expect(col.getTag('separator'), null);
|
|
299
310
|
}
|
|
@@ -304,7 +315,7 @@ export async function _testSamplesPeptidesComplexUn() {
|
|
|
304
315
|
await grok.data.detectSemanticTypes(df);
|
|
305
316
|
|
|
306
317
|
const col: DG.Column = df.col('AlignedSequence')!;
|
|
307
|
-
expect(col.semType,
|
|
318
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
308
319
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
|
|
309
320
|
expect(col.getTag('separator'), '-');
|
|
310
321
|
}
|
package/src/utils/convert.ts
CHANGED
|
@@ -1,11 +1,21 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import {NotationConverter} from './notation-converter';
|
|
3
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Converts notations of a Macromolecule column
|
|
7
|
+
*
|
|
8
|
+
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
9
|
+
*/
|
|
4
10
|
export function convert(col: DG.Column): void {
|
|
5
11
|
const current = col.tags[DG.TAGS.UNITS];
|
|
6
12
|
//TODO: read all notations
|
|
7
|
-
const
|
|
8
|
-
|
|
13
|
+
const units = [
|
|
14
|
+
'fasta',
|
|
15
|
+
'separator',
|
|
16
|
+
'HELM'
|
|
17
|
+
];
|
|
18
|
+
const choices = ui.choiceInput('convert to', '', units.filter((e) => e !== current));
|
|
9
19
|
|
|
10
20
|
ui.dialog('Convert sequence')
|
|
11
21
|
.add(
|
|
@@ -17,7 +27,9 @@ export function convert(col: DG.Column): void {
|
|
|
17
27
|
)
|
|
18
28
|
.onOK(() => {
|
|
19
29
|
//TODO: create new converted column
|
|
20
|
-
|
|
30
|
+
const converter = new NotationConverter(col, choices.value!);
|
|
31
|
+
const newColumn = converter.convert();
|
|
32
|
+
col.dataFrame.columns.add(newColumn);
|
|
21
33
|
})
|
|
22
34
|
.show();
|
|
23
35
|
}
|
|
@@ -29,12 +29,12 @@ function _fastaToStrings(fasta: string): string[] {
|
|
|
29
29
|
/**
|
|
30
30
|
* Runs Aioli environment with kalign tool.
|
|
31
31
|
*
|
|
32
|
-
* @param {DG.Column}
|
|
32
|
+
* @param {DG.Column} srcCol Column with sequences.
|
|
33
33
|
* @param {boolean} isAligned Whether the column is aligned.
|
|
34
34
|
* @return {Promise<DG.Column>} Aligned sequences.
|
|
35
35
|
*/
|
|
36
|
-
export async function runKalign(
|
|
37
|
-
let sequences =
|
|
36
|
+
export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<DG.Column> {
|
|
37
|
+
let sequences = srcCol.toList();
|
|
38
38
|
|
|
39
39
|
if (isAligned)
|
|
40
40
|
sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
|
|
@@ -55,15 +55,20 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
|
|
|
55
55
|
console.warn(output);
|
|
56
56
|
|
|
57
57
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
58
|
+
const tgtCol = DG.Column.fromStrings(`msa(${srcCol.name})`, aligned);
|
|
59
|
+
|
|
60
|
+
// units
|
|
61
|
+
const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
|
|
62
|
+
const tgtUnits = srcUnits.split(':').map((p, i) => i == 1 ? p + '.MSA' : p).join(':');
|
|
63
|
+
|
|
64
|
+
tgtCol.setTag(DG.TAGS.UNITS, tgtUnits);
|
|
65
|
+
tgtCol.semType = C.SEM_TYPES.Macro_Molecule;
|
|
66
|
+
return tgtCol;
|
|
62
67
|
}
|
|
63
68
|
|
|
64
69
|
export async function testMSAEnoughMemory(col: DG.Column): Promise<void> {
|
|
65
70
|
const sequencesCount = col.length;
|
|
66
|
-
const delta = sequencesCount/100;
|
|
71
|
+
const delta = sequencesCount / 100;
|
|
67
72
|
|
|
68
73
|
for (let i = delta; i < sequencesCount; i += delta) {
|
|
69
74
|
try {
|