@datagrok/bio 1.5.3 → 1.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +11 -0
- package/dist/package-test.js +548 -86
- package/dist/package.js +520 -67
- package/files/samples/sample_HELM.csv +540 -540
- package/files/samples/sample_MSA.csv +540 -540
- package/files/samples/sar-small.csv +201 -0
- package/package.json +5 -4
- package/src/const.ts +0 -1
- package/src/package.ts +37 -14
- package/src/tests/activity-cliffs-tests.ts +1 -0
- package/src/tests/convert-test.ts +31 -0
- package/src/tests/detectors-test.ts +30 -15
- package/src/utils/atomic-works.ts +252 -0
- package/src/utils/convert.ts +15 -3
- package/src/utils/multiple-sequence-alignment.ts +13 -8
- package/src/utils/notation-converter.ts +131 -0
- package/src/utils/sequence-space.ts +4 -4
- package/src/utils/split-to-monomers.ts +8 -0
- package/src/utils/utils.ts +40 -0
- package/tsconfig.json +1 -1
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
smiles
|
|
2
|
+
O=C1CN=C(c2ccccc2N1)C3CCCCC3
|
|
3
|
+
CN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
4
|
+
CCCCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
5
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
6
|
+
O=C1CN=C(c2ccccc2N1CC3CCCCC3)C4CCCCC4
|
|
7
|
+
O=C1CN=C(c2cc(Cl)ccc2N1)C3CCCCC3
|
|
8
|
+
CN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
9
|
+
CCCCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
10
|
+
CC(C)CCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
11
|
+
O=C1CN=C(c2cc(Cl)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
12
|
+
N#Cc1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
13
|
+
CN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
14
|
+
CCCCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
15
|
+
CC(C)CCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
16
|
+
N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
17
|
+
CN(C)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
18
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)C3CCCCC3
|
|
19
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
20
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
21
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
22
|
+
O=C1CN=C(c2cc(F)ccc2N1)C3CCCCC3
|
|
23
|
+
CN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
24
|
+
CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
25
|
+
CC(C)CCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
26
|
+
O=C1CN=C(c2cc(F)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
27
|
+
O=C1CN=C(c2cc(Br)ccc2N1)C3CCCCC3
|
|
28
|
+
CN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
29
|
+
CCCCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
30
|
+
CC(C)CCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
31
|
+
O=C1CN=C(c2cc(Br)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
32
|
+
O=C1CN=C(c2cc(I)ccc2N1)C3CCCCC3
|
|
33
|
+
CN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
34
|
+
CCCCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
35
|
+
CC(C)CCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
36
|
+
O=C1CN=C(c2cc(I)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
37
|
+
NC(=O)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
38
|
+
CN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
39
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
40
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
41
|
+
NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
42
|
+
O=C1CN=C(c2cc(ccc2N1)[N+](=O)[O-])C3CCCCC3
|
|
43
|
+
CN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
44
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
45
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
46
|
+
O=C1CN=C(c2cc(ccc2N1CC3CCCCC3)[N+](=O)[O-])C4CCCCC4
|
|
47
|
+
O=C1CN=C(c2ccccc2)c3ccccc3N1
|
|
48
|
+
CN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
49
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
50
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
51
|
+
O=C1CN=C(c2ccccc2)c3ccccc3N1CC4CCCCC4
|
|
52
|
+
O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1
|
|
53
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
54
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
55
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
56
|
+
O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1CC4CCCCC4
|
|
57
|
+
N#Cc1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
58
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
59
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
60
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
61
|
+
N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
62
|
+
CN(C)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
63
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)c3ccccc3
|
|
64
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
65
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
66
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
67
|
+
O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1
|
|
68
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
69
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
70
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
71
|
+
O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1CC4CCCCC4
|
|
72
|
+
O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1
|
|
73
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
74
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
75
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
76
|
+
O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1CC4CCCCC4
|
|
77
|
+
O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1
|
|
78
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
79
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
80
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
81
|
+
O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1CC4CCCCC4
|
|
82
|
+
NC(=O)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
83
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
84
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
85
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
86
|
+
NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
87
|
+
O=C1CN=C(c2ccccc2)c3cc(ccc3N1)[N+](=O)[O-]
|
|
88
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
89
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
90
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
91
|
+
O=C1CN=C(c2ccccc2)c3cc(ccc3N1CC4CCCCC4)[N+](=O)[O-]
|
|
92
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccccc23
|
|
93
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccccc23
|
|
94
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3ccccc13
|
|
95
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccccc23
|
|
96
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccccc24
|
|
97
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Cl)cc23
|
|
98
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Cl)cc23
|
|
99
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Cl)ccc13
|
|
100
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Cl)cc23
|
|
101
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Cl)cc24
|
|
102
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(C#N)cc23
|
|
103
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(C#N)cc23
|
|
104
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(C#N)ccc13
|
|
105
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(C#N)cc23
|
|
106
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(C#N)cc24
|
|
107
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)N(C)C
|
|
108
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)N(C)C
|
|
109
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)N(C)C
|
|
110
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)N(C)C
|
|
111
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)N(C)C
|
|
112
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(F)cc23
|
|
113
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(F)cc23
|
|
114
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(F)ccc13
|
|
115
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(F)cc23
|
|
116
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(F)cc24
|
|
117
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Br)cc23
|
|
118
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Br)cc23
|
|
119
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Br)ccc13
|
|
120
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Br)cc23
|
|
121
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Br)cc24
|
|
122
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(I)cc23
|
|
123
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(I)cc23
|
|
124
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(I)ccc13
|
|
125
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(I)cc23
|
|
126
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(I)cc24
|
|
127
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)C(N)=O
|
|
128
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)C(N)=O
|
|
129
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)C(N)=O
|
|
130
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)C(N)=O
|
|
131
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)C(N)=O
|
|
132
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)[N+](=O)[O-]
|
|
133
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)[N+](=O)[O-]
|
|
134
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)[N+](=O)[O-]
|
|
135
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)[N+](=O)[O-]
|
|
136
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)[N+](=O)[O-]
|
|
137
|
+
CC1N=C(c2ccccc2NC1=O)C3CCCCC3
|
|
138
|
+
CC1N=C(c2ccccc2N(C)C1=O)C3CCCCC3
|
|
139
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
|
|
140
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
|
|
141
|
+
CC1N=C(c2ccccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
142
|
+
CC1N=C(c2cc(Cl)ccc2NC1=O)C3CCCCC3
|
|
143
|
+
CC1N=C(c2cc(Cl)ccc2N(C)C1=O)C3CCCCC3
|
|
144
|
+
CCCCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
145
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
146
|
+
CC1N=C(c2cc(Cl)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
147
|
+
CC1N=C(c2cc(C#N)ccc2NC1=O)C3CCCCC3
|
|
148
|
+
CC1N=C(c2cc(C#N)ccc2N(C)C1=O)C3CCCCC3
|
|
149
|
+
CCCCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
150
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
151
|
+
CC1N=C(c2cc(C#N)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
152
|
+
CC1N=C(c2cc(ccc2NC1=O)N(C)C)C3CCCCC3
|
|
153
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)N(C)C)C3CCCCC3
|
|
154
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
155
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
156
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)N(C)C)C4CCCCC4
|
|
157
|
+
CC1N=C(c2cc(F)ccc2NC1=O)C3CCCCC3
|
|
158
|
+
CC1N=C(c2cc(F)ccc2N(C)C1=O)C3CCCCC3
|
|
159
|
+
CCCCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
|
|
160
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
|
|
161
|
+
CC1N=C(c2cc(F)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
162
|
+
CC1N=C(c2cc(Br)ccc2NC1=O)C3CCCCC3
|
|
163
|
+
CC1N=C(c2cc(Br)ccc2N(C)C1=O)C3CCCCC3
|
|
164
|
+
CCCCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
|
|
165
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
|
|
166
|
+
CC1N=C(c2cc(Br)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
167
|
+
CC1N=C(c2cc(I)ccc2NC1=O)C3CCCCC3
|
|
168
|
+
CC1N=C(c2cc(I)ccc2N(C)C1=O)C3CCCCC3
|
|
169
|
+
CCCCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
|
|
170
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
|
|
171
|
+
CC1N=C(c2cc(I)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
172
|
+
CC1N=C(c2cc(ccc2NC1=O)C(N)=O)C3CCCCC3
|
|
173
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)C(N)=O)C3CCCCC3
|
|
174
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
175
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
176
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)C(N)=O)C4CCCCC4
|
|
177
|
+
CC1N=C(c2cc(ccc2NC1=O)[N+](=O)[O-])C3CCCCC3
|
|
178
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)[N+](=O)[O-])C3CCCCC3
|
|
179
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
180
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
181
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)[N+](=O)[O-])C4CCCCC4
|
|
182
|
+
CC1N=C(c2ccccc2)c3ccccc3NC1=O
|
|
183
|
+
CC1N=C(c2ccccc2)c3ccccc3N(C)C1=O
|
|
184
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
|
|
185
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
|
|
186
|
+
CC1N=C(c2ccccc2)c3ccccc3N(CC4CCCCC4)C1=O
|
|
187
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3NC1=O
|
|
188
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(C)C1=O
|
|
189
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
|
|
190
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
|
|
191
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(CC4CCCCC4)C1=O
|
|
192
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3NC1=O
|
|
193
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(C)C1=O
|
|
194
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
|
|
195
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
|
|
196
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(CC4CCCCC4)C1=O
|
|
197
|
+
CC1N=C(c2ccccc2)c3cc(ccc3NC1=O)N(C)C
|
|
198
|
+
CC1N=C(c2ccccc2)c3cc(ccc3N(C)C1=O)N(C)C
|
|
199
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
200
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
201
|
+
CC1N=C(c2ccccc2)c3cc(ccc3N(CC4CCCCC4)C1=O)N(C)C
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.5.
|
|
5
|
+
"version": "1.5.6",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -11,14 +11,15 @@
|
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
13
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
|
-
"@datagrok-libraries/bio": "^2.
|
|
14
|
+
"@datagrok-libraries/bio": "^2.4.0",
|
|
15
15
|
"@datagrok-libraries/utils": "^1.0.0",
|
|
16
|
-
"@datagrok-libraries/ml": "^2.0.
|
|
16
|
+
"@datagrok-libraries/ml": "^2.0.9",
|
|
17
17
|
"cash-dom": "latest",
|
|
18
18
|
"datagrok-api": "^1.4.12",
|
|
19
19
|
"dayjs": "latest",
|
|
20
20
|
"ts-loader": "^9.2.5",
|
|
21
|
-
"typescript": "^4.4.2"
|
|
21
|
+
"typescript": "^4.4.2",
|
|
22
|
+
"openchemlib": "6.0.1"
|
|
22
23
|
},
|
|
23
24
|
"devDependencies": {
|
|
24
25
|
"@types/jest": "^27.0.0",
|
package/src/const.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -5,7 +5,6 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
|
|
6
6
|
export const _package = new DG.Package();
|
|
7
7
|
|
|
8
|
-
import {mmSemType} from './const';
|
|
9
8
|
import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
9
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
10
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
@@ -17,6 +16,8 @@ import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
|
|
|
17
16
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
18
17
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
19
18
|
import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
|
|
19
|
+
import { getMolfilesFromSeq, HELM_CORE_LIB_FILENAME } from './utils/utils';
|
|
20
|
+
import {getMacroMol} from './utils/atomic-works';
|
|
20
21
|
|
|
21
22
|
//name: sequenceAlignment
|
|
22
23
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
@@ -51,7 +52,7 @@ export function vdRegionViewer() {
|
|
|
51
52
|
//top-menu: Bio | Sequence Activity Cliffs...
|
|
52
53
|
//name: Sequence Activity Cliffs
|
|
53
54
|
//description: detect activity cliffs
|
|
54
|
-
//input: dataframe
|
|
55
|
+
//input: dataframe table [Input data table]
|
|
55
56
|
//input: column sequence {semType: Macromolecule}
|
|
56
57
|
//input: column activities
|
|
57
58
|
//input: double similarity = 80 [Similarity cutoff]
|
|
@@ -67,6 +68,7 @@ export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, acti
|
|
|
67
68
|
df,
|
|
68
69
|
sequence,
|
|
69
70
|
axesNames,
|
|
71
|
+
'Activity cliffs',
|
|
70
72
|
activities,
|
|
71
73
|
similarity,
|
|
72
74
|
'Levenshtein',
|
|
@@ -102,11 +104,29 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
102
104
|
if (plotEmbeddings) {
|
|
103
105
|
for (const v of grok.shell.views) {
|
|
104
106
|
if (v.name === table.name)
|
|
105
|
-
(v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1]});
|
|
107
|
+
(v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
106
108
|
}
|
|
107
109
|
}
|
|
108
110
|
};
|
|
109
111
|
|
|
112
|
+
//top-menu: Bio | Molfiles From HELM...
|
|
113
|
+
//name: Molfiles From HELM
|
|
114
|
+
//description: returns molfiles for each monomer from HELM library
|
|
115
|
+
//input: dataframe df [Input data table]
|
|
116
|
+
//input: column sequence {semType: Macromolecule}
|
|
117
|
+
export async function molfilesFromHELM(df: DG.DataFrame, sequence: DG.Column): Promise<void> {
|
|
118
|
+
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
119
|
+
const monomersLibDf = DG.DataFrame.fromJson(monomersLibFile);
|
|
120
|
+
const atomicCodes = getMolfilesFromSeq(sequence, monomersLibDf);
|
|
121
|
+
|
|
122
|
+
let result: string[] = [];
|
|
123
|
+
for(let i = 0; i < atomicCodes!.length; i++)
|
|
124
|
+
result.push(getMacroMol(atomicCodes![i]));
|
|
125
|
+
|
|
126
|
+
df.columns.add(DG.Column.fromStrings('regenerated', result));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
110
130
|
//top-menu: Bio | MSA...
|
|
111
131
|
//name: MSA
|
|
112
132
|
//input: dataframe table
|
|
@@ -120,20 +140,23 @@ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.
|
|
|
120
140
|
//top-menu: Bio | Composition Analysis
|
|
121
141
|
//output: viewer result
|
|
122
142
|
export async function compositionAnalysis(): Promise<void> {
|
|
123
|
-
|
|
124
|
-
|
|
143
|
+
// Higher priority for columns with MSA data to show with WebLogo.
|
|
144
|
+
const tv = grok.shell.tv;
|
|
145
|
+
const df = tv.dataFrame;
|
|
146
|
+
const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
147
|
+
let col: DG.Column | undefined = semTypeColList.find((col) => {
|
|
148
|
+
const units = col.getTag(DG.TAGS.UNITS);
|
|
149
|
+
return units ? units.indexOf('MSA') !== -1 : false;
|
|
150
|
+
});
|
|
151
|
+
if (!col)
|
|
152
|
+
col = semTypeColList[0];
|
|
153
|
+
|
|
154
|
+
if (!col) {
|
|
125
155
|
grok.shell.error('Current table does not contain sequences');
|
|
126
156
|
return;
|
|
127
157
|
}
|
|
128
158
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
for (const v of grok.shell.views) {
|
|
132
|
-
if (v instanceof DG.TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
|
|
133
|
-
(v as DG.TableView).dockManager.dock(wl.root, 'down');
|
|
134
|
-
break;
|
|
135
|
-
}
|
|
136
|
-
}
|
|
159
|
+
tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
137
160
|
}
|
|
138
161
|
|
|
139
162
|
// helper function for importFasta
|
|
@@ -182,7 +205,7 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
182
205
|
(c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
183
206
|
const maxCos = Math.max(...alphabetCandidatesSim);
|
|
184
207
|
const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
|
|
185
|
-
sequenceCol.semType =
|
|
208
|
+
sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
186
209
|
const units: string = `fasta:${seqType}:${alphabet}`;
|
|
187
210
|
sequenceCol.setTag(DG.TAGS.UNITS, units);
|
|
188
211
|
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
// import {mmSemType} from '../const';
|
|
8
|
+
// import {importFasta} from '../package';
|
|
9
|
+
|
|
10
|
+
category('converters', () => {
|
|
11
|
+
// test('a', async () => {await _a();});
|
|
12
|
+
// test('b', async () => {await _b();});
|
|
13
|
+
test('testFastaToSeparator', async () => { await _testFastaToSeparator(); });
|
|
14
|
+
test('testSeparatorToFasta', async () => { await _testSeparatorToFasta(); });
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
// export async function _a() {
|
|
18
|
+
// expect(1, 1);
|
|
19
|
+
// }
|
|
20
|
+
//
|
|
21
|
+
// export async function _b() {
|
|
22
|
+
// expect(1, 2);
|
|
23
|
+
// }
|
|
24
|
+
|
|
25
|
+
export async function _testFastaToSeparator() {
|
|
26
|
+
expect(1, 1);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export async function _testSeparatorToFasta() {
|
|
30
|
+
expect(1, 2);
|
|
31
|
+
}
|
|
@@ -4,7 +4,6 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
import * as ui from 'datagrok-api/ui';
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
|
|
7
|
-
import {mmSemType} from '../const';
|
|
8
7
|
import {importFasta} from '../package';
|
|
9
8
|
|
|
10
9
|
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
@@ -94,6 +93,8 @@ MWRSWY-CKHP
|
|
|
94
93
|
fastaCsv = 'FastaCsv',
|
|
95
94
|
msaComplex = 'MsaComplex',
|
|
96
95
|
idCsv = 'IdCsv',
|
|
96
|
+
sarSmallCsv = 'SarSmallCsv',
|
|
97
|
+
HelmCsv = 'HelmCsv',
|
|
97
98
|
}
|
|
98
99
|
|
|
99
100
|
const samples: { [key: string]: string } = {
|
|
@@ -101,6 +102,8 @@ MWRSWY-CKHP
|
|
|
101
102
|
'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
|
|
102
103
|
'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
103
104
|
'IdCsv': 'System:AppData/Bio/samples/id.csv',
|
|
105
|
+
'SarSmallCsv': 'System:AppData/Bio/samples/sar-small.csv',
|
|
106
|
+
'HelmCsv': 'System:AppData/Bio/samples/sample_HELM.csv',
|
|
104
107
|
};
|
|
105
108
|
|
|
106
109
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -189,20 +192,32 @@ MWRSWY-CKHP
|
|
|
189
192
|
test('samplesIdCsvNegativeID', async () => {
|
|
190
193
|
await _testNeg(readSamplesCsv(Samples.idCsv), 'ID');
|
|
191
194
|
});
|
|
195
|
+
|
|
196
|
+
test('samplesSarSmallCsvNegativeSmiles', async () => {
|
|
197
|
+
await _testNeg(readSamplesCsv(Samples.sarSmallCsv), 'smiles');
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
test('samplesHelmCsvHELM', async () => {
|
|
201
|
+
await _testPos(readSamplesCsv(Samples.HelmCsv), 'HELM', 'HELM', null);
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
test('samplesHelmCsvNegativeActivity', async () => {
|
|
205
|
+
await _testNeg(readSamplesCsv(Samples.HelmCsv), 'Activity');
|
|
206
|
+
});
|
|
192
207
|
});
|
|
193
208
|
|
|
194
209
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
195
210
|
const df: DG.DataFrame = await readDf();
|
|
196
211
|
|
|
197
212
|
const col: DG.Column = df.col(colName)!;
|
|
198
|
-
expect(col.semType ===
|
|
213
|
+
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, false);
|
|
199
214
|
}
|
|
200
215
|
|
|
201
|
-
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
|
|
216
|
+
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string | null = null) {
|
|
202
217
|
const df: DG.DataFrame = await readDf();
|
|
203
218
|
|
|
204
219
|
const col: DG.Column = df.col(colName)!;
|
|
205
|
-
expect(col.semType ===
|
|
220
|
+
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
206
221
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
207
222
|
if (separator)
|
|
208
223
|
expect(col.getTag('separator'), separator);
|
|
@@ -213,7 +228,7 @@ export async function _testN1(csvDfN1: string) {
|
|
|
213
228
|
await grok.data.detectSemanticTypes(dfN1);
|
|
214
229
|
|
|
215
230
|
const col: DG.Column = dfN1.col('seq')!;
|
|
216
|
-
expect(col.semType,
|
|
231
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
217
232
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
|
|
218
233
|
}
|
|
219
234
|
|
|
@@ -222,7 +237,7 @@ export async function _testAA1(csvDfAA1: string) {
|
|
|
222
237
|
await grok.data.detectSemanticTypes(dfAA1);
|
|
223
238
|
|
|
224
239
|
const col: DG.Column = dfAA1.col('seq')!;
|
|
225
|
-
expect(col.semType,
|
|
240
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
226
241
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
227
242
|
}
|
|
228
243
|
|
|
@@ -231,7 +246,7 @@ export async function _testMsaN1(csvDfMsaN1: string) {
|
|
|
231
246
|
await grok.data.detectSemanticTypes(dfMsaN1);
|
|
232
247
|
|
|
233
248
|
const col: DG.Column = dfMsaN1.col('seq')!;
|
|
234
|
-
expect(col.semType,
|
|
249
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
235
250
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
|
|
236
251
|
}
|
|
237
252
|
|
|
@@ -240,7 +255,7 @@ export async function _testMsaAA1(csvDfMsaAA1: string) {
|
|
|
240
255
|
await grok.data.detectSemanticTypes(dfMsaAA1);
|
|
241
256
|
|
|
242
257
|
const col: DG.Column = dfMsaAA1.col('seq')!;
|
|
243
|
-
expect(col.semType,
|
|
258
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
244
259
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
|
|
245
260
|
}
|
|
246
261
|
|
|
@@ -249,7 +264,7 @@ export async function _testSepNt(csv: string, separator: string) {
|
|
|
249
264
|
await grok.data.detectSemanticTypes(df);
|
|
250
265
|
|
|
251
266
|
const col: DG.Column = df.col('seq')!;
|
|
252
|
-
expect(col.semType,
|
|
267
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
253
268
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:NT');
|
|
254
269
|
expect(col.getTag('separator'), separator);
|
|
255
270
|
}
|
|
@@ -259,7 +274,7 @@ export async function _testSepPt(csv: string, separator: string) {
|
|
|
259
274
|
await grok.data.detectSemanticTypes(df);
|
|
260
275
|
|
|
261
276
|
const col: DG.Column = df.col('seq')!;
|
|
262
|
-
expect(col.semType,
|
|
277
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
263
278
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:PT');
|
|
264
279
|
expect(col.getTag('separator'), separator);
|
|
265
280
|
}
|
|
@@ -269,7 +284,7 @@ export async function _testSepUn(csv: string, separator: string) {
|
|
|
269
284
|
await grok.data.detectSemanticTypes(df);
|
|
270
285
|
|
|
271
286
|
const col: DG.Column = df.col('seq')!;
|
|
272
|
-
expect(col.semType,
|
|
287
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
273
288
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:UN');
|
|
274
289
|
expect(col.getTag('separator'), separator);
|
|
275
290
|
}
|
|
@@ -279,7 +294,7 @@ export async function _testSepMsaN1(csvDfSepMsaN1: string) {
|
|
|
279
294
|
await grok.data.detectSemanticTypes(dfSepMsaN1);
|
|
280
295
|
|
|
281
296
|
const col: DG.Column = dfSepMsaN1.col('seq')!;
|
|
282
|
-
expect(col.semType,
|
|
297
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
283
298
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
|
|
284
299
|
}
|
|
285
300
|
|
|
@@ -289,7 +304,7 @@ export async function _testSamplesFastaCsvPt() {
|
|
|
289
304
|
await grok.data.detectSemanticTypes(df);
|
|
290
305
|
|
|
291
306
|
const col: DG.Column = df.col('sequence')!;
|
|
292
|
-
expect(col.semType,
|
|
307
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
293
308
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
294
309
|
expect(col.getTag('separator'), null);
|
|
295
310
|
}
|
|
@@ -299,7 +314,7 @@ export async function _testSamplesFastaFastaPt() {
|
|
|
299
314
|
const df: DG.DataFrame = importFasta(fasta)[0];
|
|
300
315
|
|
|
301
316
|
const col: DG.Column = df.col('sequence')!;
|
|
302
|
-
expect(col.semType,
|
|
317
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
303
318
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
304
319
|
expect(col.getTag('separator'), null);
|
|
305
320
|
}
|
|
@@ -310,7 +325,7 @@ export async function _testSamplesPeptidesComplexUn() {
|
|
|
310
325
|
await grok.data.detectSemanticTypes(df);
|
|
311
326
|
|
|
312
327
|
const col: DG.Column = df.col('AlignedSequence')!;
|
|
313
|
-
expect(col.semType,
|
|
328
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
314
329
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
|
|
315
330
|
expect(col.getTag('separator'), '-');
|
|
316
331
|
}
|