@datagrok/bio 1.5.3 → 1.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,201 @@
1
+ smiles
2
+ O=C1CN=C(c2ccccc2N1)C3CCCCC3
3
+ CN1C(=O)CN=C(c2ccccc12)C3CCCCC3
4
+ CCCCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
5
+ CC(C)CCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
6
+ O=C1CN=C(c2ccccc2N1CC3CCCCC3)C4CCCCC4
7
+ O=C1CN=C(c2cc(Cl)ccc2N1)C3CCCCC3
8
+ CN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
9
+ CCCCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
10
+ CC(C)CCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
11
+ O=C1CN=C(c2cc(Cl)ccc2N1CC3CCCCC3)C4CCCCC4
12
+ N#Cc1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
13
+ CN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
14
+ CCCCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
15
+ CC(C)CCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
16
+ N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
17
+ CN(C)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
18
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)C3CCCCC3
19
+ CCCCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
20
+ CC(C)CCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
21
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
22
+ O=C1CN=C(c2cc(F)ccc2N1)C3CCCCC3
23
+ CN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
24
+ CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
25
+ CC(C)CCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
26
+ O=C1CN=C(c2cc(F)ccc2N1CC3CCCCC3)C4CCCCC4
27
+ O=C1CN=C(c2cc(Br)ccc2N1)C3CCCCC3
28
+ CN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
29
+ CCCCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
30
+ CC(C)CCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
31
+ O=C1CN=C(c2cc(Br)ccc2N1CC3CCCCC3)C4CCCCC4
32
+ O=C1CN=C(c2cc(I)ccc2N1)C3CCCCC3
33
+ CN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
34
+ CCCCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
35
+ CC(C)CCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
36
+ O=C1CN=C(c2cc(I)ccc2N1CC3CCCCC3)C4CCCCC4
37
+ NC(=O)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
38
+ CN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
39
+ CCCCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
40
+ CC(C)CCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
41
+ NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
42
+ O=C1CN=C(c2cc(ccc2N1)[N+](=O)[O-])C3CCCCC3
43
+ CN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
44
+ CCCCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
45
+ CC(C)CCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
46
+ O=C1CN=C(c2cc(ccc2N1CC3CCCCC3)[N+](=O)[O-])C4CCCCC4
47
+ O=C1CN=C(c2ccccc2)c3ccccc3N1
48
+ CN1C(=O)CN=C(c2ccccc2)c3ccccc13
49
+ CCCCN1C(=O)CN=C(c2ccccc2)c3ccccc13
50
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3ccccc13
51
+ O=C1CN=C(c2ccccc2)c3ccccc3N1CC4CCCCC4
52
+ O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1
53
+ CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
54
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
55
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
56
+ O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1CC4CCCCC4
57
+ N#Cc1ccc2NC(=O)CN=C(c3ccccc3)c2c1
58
+ CN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
59
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
60
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
61
+ N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
62
+ CN(C)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
63
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)c3ccccc3
64
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
65
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
66
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
67
+ O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1
68
+ CN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
69
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
70
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
71
+ O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1CC4CCCCC4
72
+ O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1
73
+ CN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
74
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
75
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
76
+ O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1CC4CCCCC4
77
+ O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1
78
+ CN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
79
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
80
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
81
+ O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1CC4CCCCC4
82
+ NC(=O)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
83
+ CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
84
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
85
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
86
+ NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
87
+ O=C1CN=C(c2ccccc2)c3cc(ccc3N1)[N+](=O)[O-]
88
+ CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
89
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
90
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
91
+ O=C1CN=C(c2ccccc2)c3cc(ccc3N1CC4CCCCC4)[N+](=O)[O-]
92
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccccc23
93
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccccc23
94
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3ccccc13
95
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccccc23
96
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccccc24
97
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Cl)cc23
98
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Cl)cc23
99
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Cl)ccc13
100
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Cl)cc23
101
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Cl)cc24
102
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(C#N)cc23
103
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(C#N)cc23
104
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(C#N)ccc13
105
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(C#N)cc23
106
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(C#N)cc24
107
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)N(C)C
108
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)N(C)C
109
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)N(C)C
110
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)N(C)C
111
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)N(C)C
112
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(F)cc23
113
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(F)cc23
114
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(F)ccc13
115
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(F)cc23
116
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(F)cc24
117
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Br)cc23
118
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Br)cc23
119
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Br)ccc13
120
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Br)cc23
121
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Br)cc24
122
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(I)cc23
123
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(I)cc23
124
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(I)ccc13
125
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(I)cc23
126
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(I)cc24
127
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)C(N)=O
128
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)C(N)=O
129
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)C(N)=O
130
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)C(N)=O
131
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)C(N)=O
132
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)[N+](=O)[O-]
133
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)[N+](=O)[O-]
134
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)[N+](=O)[O-]
135
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)[N+](=O)[O-]
136
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)[N+](=O)[O-]
137
+ CC1N=C(c2ccccc2NC1=O)C3CCCCC3
138
+ CC1N=C(c2ccccc2N(C)C1=O)C3CCCCC3
139
+ CCCCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
140
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
141
+ CC1N=C(c2ccccc2N(CC3CCCCC3)C1=O)C4CCCCC4
142
+ CC1N=C(c2cc(Cl)ccc2NC1=O)C3CCCCC3
143
+ CC1N=C(c2cc(Cl)ccc2N(C)C1=O)C3CCCCC3
144
+ CCCCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
145
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
146
+ CC1N=C(c2cc(Cl)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
147
+ CC1N=C(c2cc(C#N)ccc2NC1=O)C3CCCCC3
148
+ CC1N=C(c2cc(C#N)ccc2N(C)C1=O)C3CCCCC3
149
+ CCCCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
150
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
151
+ CC1N=C(c2cc(C#N)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
152
+ CC1N=C(c2cc(ccc2NC1=O)N(C)C)C3CCCCC3
153
+ CC1N=C(c2cc(ccc2N(C)C1=O)N(C)C)C3CCCCC3
154
+ CCCCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
155
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
156
+ CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)N(C)C)C4CCCCC4
157
+ CC1N=C(c2cc(F)ccc2NC1=O)C3CCCCC3
158
+ CC1N=C(c2cc(F)ccc2N(C)C1=O)C3CCCCC3
159
+ CCCCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
160
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
161
+ CC1N=C(c2cc(F)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
162
+ CC1N=C(c2cc(Br)ccc2NC1=O)C3CCCCC3
163
+ CC1N=C(c2cc(Br)ccc2N(C)C1=O)C3CCCCC3
164
+ CCCCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
165
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
166
+ CC1N=C(c2cc(Br)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
167
+ CC1N=C(c2cc(I)ccc2NC1=O)C3CCCCC3
168
+ CC1N=C(c2cc(I)ccc2N(C)C1=O)C3CCCCC3
169
+ CCCCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
170
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
171
+ CC1N=C(c2cc(I)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
172
+ CC1N=C(c2cc(ccc2NC1=O)C(N)=O)C3CCCCC3
173
+ CC1N=C(c2cc(ccc2N(C)C1=O)C(N)=O)C3CCCCC3
174
+ CCCCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
175
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
176
+ CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)C(N)=O)C4CCCCC4
177
+ CC1N=C(c2cc(ccc2NC1=O)[N+](=O)[O-])C3CCCCC3
178
+ CC1N=C(c2cc(ccc2N(C)C1=O)[N+](=O)[O-])C3CCCCC3
179
+ CCCCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
180
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
181
+ CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)[N+](=O)[O-])C4CCCCC4
182
+ CC1N=C(c2ccccc2)c3ccccc3NC1=O
183
+ CC1N=C(c2ccccc2)c3ccccc3N(C)C1=O
184
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
185
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
186
+ CC1N=C(c2ccccc2)c3ccccc3N(CC4CCCCC4)C1=O
187
+ CC1N=C(c2ccccc2)c3cc(Cl)ccc3NC1=O
188
+ CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(C)C1=O
189
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
190
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
191
+ CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(CC4CCCCC4)C1=O
192
+ CC1N=C(c2ccccc2)c3cc(C#N)ccc3NC1=O
193
+ CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(C)C1=O
194
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
195
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
196
+ CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(CC4CCCCC4)C1=O
197
+ CC1N=C(c2ccccc2)c3cc(ccc3NC1=O)N(C)C
198
+ CC1N=C(c2ccccc2)c3cc(ccc3N(C)C1=O)N(C)C
199
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
200
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
201
+ CC1N=C(c2ccccc2)c3cc(ccc3N(CC4CCCCC4)C1=O)N(C)C
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@datagrok/bio",
3
3
  "beta": false,
4
4
  "friendlyName": "Bio",
5
- "version": "1.5.3",
5
+ "version": "1.5.6",
6
6
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
7
7
  "repository": {
8
8
  "type": "git",
@@ -11,14 +11,15 @@
11
11
  },
12
12
  "dependencies": {
13
13
  "@biowasm/aioli": ">=2.4.0",
14
- "@datagrok-libraries/bio": "^2.3.1",
14
+ "@datagrok-libraries/bio": "^2.4.0",
15
15
  "@datagrok-libraries/utils": "^1.0.0",
16
- "@datagrok-libraries/ml": "^2.0.8",
16
+ "@datagrok-libraries/ml": "^2.0.9",
17
17
  "cash-dom": "latest",
18
18
  "datagrok-api": "^1.4.12",
19
19
  "dayjs": "latest",
20
20
  "ts-loader": "^9.2.5",
21
- "typescript": "^4.4.2"
21
+ "typescript": "^4.4.2",
22
+ "openchemlib": "6.0.1"
22
23
  },
23
24
  "devDependencies": {
24
25
  "@types/jest": "^27.0.0",
package/src/const.ts CHANGED
@@ -2,4 +2,3 @@ import * as ui from 'datagrok-api/ui';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- export const mmSemType = 'Macromolecule';
package/src/package.ts CHANGED
@@ -5,7 +5,6 @@ import * as DG from 'datagrok-api/dg';
5
5
 
6
6
  export const _package = new DG.Package();
7
7
 
8
- import {mmSemType} from './const';
9
8
  import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
10
9
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
11
10
  import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
@@ -17,6 +16,8 @@ import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
17
16
  import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
18
17
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
19
18
  import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
19
+ import { getMolfilesFromSeq, HELM_CORE_LIB_FILENAME } from './utils/utils';
20
+ import {getMacroMol} from './utils/atomic-works';
20
21
 
21
22
  //name: sequenceAlignment
22
23
  //input: string alignType {choices: ['Local alignment', 'Global alignment']}
@@ -51,7 +52,7 @@ export function vdRegionViewer() {
51
52
  //top-menu: Bio | Sequence Activity Cliffs...
52
53
  //name: Sequence Activity Cliffs
53
54
  //description: detect activity cliffs
54
- //input: dataframe df [Input data table]
55
+ //input: dataframe table [Input data table]
55
56
  //input: column sequence {semType: Macromolecule}
56
57
  //input: column activities
57
58
  //input: double similarity = 80 [Similarity cutoff]
@@ -67,6 +68,7 @@ export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, acti
67
68
  df,
68
69
  sequence,
69
70
  axesNames,
71
+ 'Activity cliffs',
70
72
  activities,
71
73
  similarity,
72
74
  'Levenshtein',
@@ -102,11 +104,29 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
102
104
  if (plotEmbeddings) {
103
105
  for (const v of grok.shell.views) {
104
106
  if (v.name === table.name)
105
- (v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1]});
107
+ (v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
106
108
  }
107
109
  }
108
110
  };
109
111
 
112
+ //top-menu: Bio | Molfiles From HELM...
113
+ //name: Molfiles From HELM
114
+ //description: returns molfiles for each monomer from HELM library
115
+ //input: dataframe df [Input data table]
116
+ //input: column sequence {semType: Macromolecule}
117
+ export async function molfilesFromHELM(df: DG.DataFrame, sequence: DG.Column): Promise<void> {
118
+ const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
119
+ const monomersLibDf = DG.DataFrame.fromJson(monomersLibFile);
120
+ const atomicCodes = getMolfilesFromSeq(sequence, monomersLibDf);
121
+
122
+ let result: string[] = [];
123
+ for(let i = 0; i < atomicCodes!.length; i++)
124
+ result.push(getMacroMol(atomicCodes![i]));
125
+
126
+ df.columns.add(DG.Column.fromStrings('regenerated', result));
127
+ }
128
+
129
+
110
130
  //top-menu: Bio | MSA...
111
131
  //name: MSA
112
132
  //input: dataframe table
@@ -120,20 +140,23 @@ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.
120
140
  //top-menu: Bio | Composition Analysis
121
141
  //output: viewer result
122
142
  export async function compositionAnalysis(): Promise<void> {
123
- const col = grok.shell.t.columns.bySemType('Macromolecule');//DG.SEMTYPE.MACROMOLECULE);
124
- if (col === null) {
143
+ // Higher priority for columns with MSA data to show with WebLogo.
144
+ const tv = grok.shell.tv;
145
+ const df = tv.dataFrame;
146
+ const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
147
+ let col: DG.Column | undefined = semTypeColList.find((col) => {
148
+ const units = col.getTag(DG.TAGS.UNITS);
149
+ return units ? units.indexOf('MSA') !== -1 : false;
150
+ });
151
+ if (!col)
152
+ col = semTypeColList[0];
153
+
154
+ if (!col) {
125
155
  grok.shell.error('Current table does not contain sequences');
126
156
  return;
127
157
  }
128
158
 
129
- const wl = await col.dataFrame.plot.fromType('WebLogo', {});
130
-
131
- for (const v of grok.shell.views) {
132
- if (v instanceof DG.TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
133
- (v as DG.TableView).dockManager.dock(wl.root, 'down');
134
- break;
135
- }
136
- }
159
+ tv.addViewer('WebLogo', {sequenceColumnName: col.name});
137
160
  }
138
161
 
139
162
  // helper function for importFasta
@@ -182,7 +205,7 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
182
205
  (c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
183
206
  const maxCos = Math.max(...alphabetCandidatesSim);
184
207
  const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
185
- sequenceCol.semType = mmSemType;
208
+ sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
186
209
  const units: string = `fasta:${seqType}:${alphabet}`;
187
210
  sequenceCol.setTag(DG.TAGS.UNITS, units);
188
211
 
@@ -26,6 +26,7 @@ category('activityCliffs', async () => {
26
26
  actCliffsDf,
27
27
  actCliffsDf.col('MSA')!,
28
28
  axesNames,
29
+ 'Activity cliffs',
29
30
  actCliffsDf.col('Activity')!,
30
31
  50,
31
32
  'Levenshtein',
@@ -0,0 +1,31 @@
1
+ import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ // import {mmSemType} from '../const';
8
+ // import {importFasta} from '../package';
9
+
10
+ category('converters', () => {
11
+ // test('a', async () => {await _a();});
12
+ // test('b', async () => {await _b();});
13
+ test('testFastaToSeparator', async () => { await _testFastaToSeparator(); });
14
+ test('testSeparatorToFasta', async () => { await _testSeparatorToFasta(); });
15
+ });
16
+
17
+ // export async function _a() {
18
+ // expect(1, 1);
19
+ // }
20
+ //
21
+ // export async function _b() {
22
+ // expect(1, 2);
23
+ // }
24
+
25
+ export async function _testFastaToSeparator() {
26
+ expect(1, 1);
27
+ }
28
+
29
+ export async function _testSeparatorToFasta() {
30
+ expect(1, 2);
31
+ }
@@ -4,7 +4,6 @@ import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
 
7
- import {mmSemType} from '../const';
8
7
  import {importFasta} from '../package';
9
8
 
10
9
  type DfReaderFunc = () => Promise<DG.DataFrame>;
@@ -94,6 +93,8 @@ MWRSWY-CKHP
94
93
  fastaCsv = 'FastaCsv',
95
94
  msaComplex = 'MsaComplex',
96
95
  idCsv = 'IdCsv',
96
+ sarSmallCsv = 'SarSmallCsv',
97
+ HelmCsv = 'HelmCsv',
97
98
  }
98
99
 
99
100
  const samples: { [key: string]: string } = {
@@ -101,6 +102,8 @@ MWRSWY-CKHP
101
102
  'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
102
103
  'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
103
104
  'IdCsv': 'System:AppData/Bio/samples/id.csv',
105
+ 'SarSmallCsv': 'System:AppData/Bio/samples/sar-small.csv',
106
+ 'HelmCsv': 'System:AppData/Bio/samples/sample_HELM.csv',
104
107
  };
105
108
 
106
109
  const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
@@ -189,20 +192,32 @@ MWRSWY-CKHP
189
192
  test('samplesIdCsvNegativeID', async () => {
190
193
  await _testNeg(readSamplesCsv(Samples.idCsv), 'ID');
191
194
  });
195
+
196
+ test('samplesSarSmallCsvNegativeSmiles', async () => {
197
+ await _testNeg(readSamplesCsv(Samples.sarSmallCsv), 'smiles');
198
+ });
199
+
200
+ test('samplesHelmCsvHELM', async () => {
201
+ await _testPos(readSamplesCsv(Samples.HelmCsv), 'HELM', 'HELM', null);
202
+ });
203
+
204
+ test('samplesHelmCsvNegativeActivity', async () => {
205
+ await _testNeg(readSamplesCsv(Samples.HelmCsv), 'Activity');
206
+ });
192
207
  });
193
208
 
194
209
  export async function _testNeg(readDf: DfReaderFunc, colName: string) {
195
210
  const df: DG.DataFrame = await readDf();
196
211
 
197
212
  const col: DG.Column = df.col(colName)!;
198
- expect(col.semType === mmSemType, false);
213
+ expect(col.semType === DG.SEMTYPE.MACROMOLECULE, false);
199
214
  }
200
215
 
201
- export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
216
+ export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string | null = null) {
202
217
  const df: DG.DataFrame = await readDf();
203
218
 
204
219
  const col: DG.Column = df.col(colName)!;
205
- expect(col.semType === mmSemType, true);
220
+ expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
206
221
  expect(col.getTag(DG.TAGS.UNITS), units);
207
222
  if (separator)
208
223
  expect(col.getTag('separator'), separator);
@@ -213,7 +228,7 @@ export async function _testN1(csvDfN1: string) {
213
228
  await grok.data.detectSemanticTypes(dfN1);
214
229
 
215
230
  const col: DG.Column = dfN1.col('seq')!;
216
- expect(col.semType, mmSemType);
231
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
217
232
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
218
233
  }
219
234
 
@@ -222,7 +237,7 @@ export async function _testAA1(csvDfAA1: string) {
222
237
  await grok.data.detectSemanticTypes(dfAA1);
223
238
 
224
239
  const col: DG.Column = dfAA1.col('seq')!;
225
- expect(col.semType, mmSemType);
240
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
226
241
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
227
242
  }
228
243
 
@@ -231,7 +246,7 @@ export async function _testMsaN1(csvDfMsaN1: string) {
231
246
  await grok.data.detectSemanticTypes(dfMsaN1);
232
247
 
233
248
  const col: DG.Column = dfMsaN1.col('seq')!;
234
- expect(col.semType, mmSemType);
249
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
235
250
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
236
251
  }
237
252
 
@@ -240,7 +255,7 @@ export async function _testMsaAA1(csvDfMsaAA1: string) {
240
255
  await grok.data.detectSemanticTypes(dfMsaAA1);
241
256
 
242
257
  const col: DG.Column = dfMsaAA1.col('seq')!;
243
- expect(col.semType, mmSemType);
258
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
244
259
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
245
260
  }
246
261
 
@@ -249,7 +264,7 @@ export async function _testSepNt(csv: string, separator: string) {
249
264
  await grok.data.detectSemanticTypes(df);
250
265
 
251
266
  const col: DG.Column = df.col('seq')!;
252
- expect(col.semType, mmSemType);
267
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
253
268
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:NT');
254
269
  expect(col.getTag('separator'), separator);
255
270
  }
@@ -259,7 +274,7 @@ export async function _testSepPt(csv: string, separator: string) {
259
274
  await grok.data.detectSemanticTypes(df);
260
275
 
261
276
  const col: DG.Column = df.col('seq')!;
262
- expect(col.semType, mmSemType);
277
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
263
278
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:PT');
264
279
  expect(col.getTag('separator'), separator);
265
280
  }
@@ -269,7 +284,7 @@ export async function _testSepUn(csv: string, separator: string) {
269
284
  await grok.data.detectSemanticTypes(df);
270
285
 
271
286
  const col: DG.Column = df.col('seq')!;
272
- expect(col.semType, mmSemType);
287
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
273
288
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:UN');
274
289
  expect(col.getTag('separator'), separator);
275
290
  }
@@ -279,7 +294,7 @@ export async function _testSepMsaN1(csvDfSepMsaN1: string) {
279
294
  await grok.data.detectSemanticTypes(dfSepMsaN1);
280
295
 
281
296
  const col: DG.Column = dfSepMsaN1.col('seq')!;
282
- expect(col.semType, mmSemType);
297
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
283
298
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
284
299
  }
285
300
 
@@ -289,7 +304,7 @@ export async function _testSamplesFastaCsvPt() {
289
304
  await grok.data.detectSemanticTypes(df);
290
305
 
291
306
  const col: DG.Column = df.col('sequence')!;
292
- expect(col.semType, mmSemType);
307
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
293
308
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
294
309
  expect(col.getTag('separator'), null);
295
310
  }
@@ -299,7 +314,7 @@ export async function _testSamplesFastaFastaPt() {
299
314
  const df: DG.DataFrame = importFasta(fasta)[0];
300
315
 
301
316
  const col: DG.Column = df.col('sequence')!;
302
- expect(col.semType, mmSemType);
317
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
303
318
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
304
319
  expect(col.getTag('separator'), null);
305
320
  }
@@ -310,7 +325,7 @@ export async function _testSamplesPeptidesComplexUn() {
310
325
  await grok.data.detectSemanticTypes(df);
311
326
 
312
327
  const col: DG.Column = df.col('AlignedSequence')!;
313
- expect(col.semType, mmSemType);
328
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
314
329
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
315
330
  expect(col.getTag('separator'), '-');
316
331
  }