@datagrok/bio 1.4.2 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +17 -4
- package/dist/package-test.js +862 -635
- package/dist/package.js +664 -584
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +1665 -1651
- package/files/sample_MSA.csv +541 -0
- package/files/samples/id.csv +313 -0
- package/package.json +7 -6
- package/setup.cmd +10 -1
- package/src/package-test.ts +1 -0
- package/src/package.ts +70 -25
- package/src/tests/activity-cliffs-tests.ts +49 -0
- package/src/tests/detectors-test.ts +138 -34
- package/src/tests/sequence-space-test.ts +21 -19
- package/src/tests/utils.ts +9 -3
- package/src/utils/convert.ts +8 -9
- package/src/utils/multiple-sequence-alignment.ts +1 -1
- package/src/utils/sequence-activity-cliffs.ts +36 -0
- package/src/utils/sequence-space.ts +30 -30
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
ID
|
|
2
|
+
ID000000670
|
|
3
|
+
ID000000691
|
|
4
|
+
ID000001221
|
|
5
|
+
ID000001243
|
|
6
|
+
ID000001350
|
|
7
|
+
ID000001393
|
|
8
|
+
ID000001466
|
|
9
|
+
ID000001471
|
|
10
|
+
ID000001472
|
|
11
|
+
ID000001590
|
|
12
|
+
ID000001591
|
|
13
|
+
ID000001599
|
|
14
|
+
ID000002433
|
|
15
|
+
ID000002434
|
|
16
|
+
ID000002437
|
|
17
|
+
ID000002440
|
|
18
|
+
ID000002441
|
|
19
|
+
ID000002442
|
|
20
|
+
ID000002443
|
|
21
|
+
ID000002444
|
|
22
|
+
ID000002516
|
|
23
|
+
ID000002878
|
|
24
|
+
ID000002880
|
|
25
|
+
ID000002957
|
|
26
|
+
ID000002998
|
|
27
|
+
ID000003026
|
|
28
|
+
ID000003046
|
|
29
|
+
ID000003200
|
|
30
|
+
ID000003673
|
|
31
|
+
ID000004018
|
|
32
|
+
ID000004090
|
|
33
|
+
ID000004163
|
|
34
|
+
ID000004176
|
|
35
|
+
ID000004222
|
|
36
|
+
ID000004240
|
|
37
|
+
ID000004245
|
|
38
|
+
ID000004730
|
|
39
|
+
ID000004829
|
|
40
|
+
ID000004912
|
|
41
|
+
ID000004969
|
|
42
|
+
ID000005051
|
|
43
|
+
ID000005057
|
|
44
|
+
ID000005060
|
|
45
|
+
ID000005073
|
|
46
|
+
ID000005104
|
|
47
|
+
ID000006942
|
|
48
|
+
ID000007364
|
|
49
|
+
ID000007419
|
|
50
|
+
ID000007757
|
|
51
|
+
ID000007935
|
|
52
|
+
ID000007936
|
|
53
|
+
ID000007946
|
|
54
|
+
ID000008044
|
|
55
|
+
ID000008059
|
|
56
|
+
ID000008118
|
|
57
|
+
ID000010343
|
|
58
|
+
ID000011060
|
|
59
|
+
ID000011866
|
|
60
|
+
ID000011867
|
|
61
|
+
ID000011868
|
|
62
|
+
ID000011869
|
|
63
|
+
ID000011870
|
|
64
|
+
ID000011871
|
|
65
|
+
ID000011927
|
|
66
|
+
ID000011928
|
|
67
|
+
ID000011929
|
|
68
|
+
ID000011930
|
|
69
|
+
ID000011931
|
|
70
|
+
ID000011932
|
|
71
|
+
ID000011933
|
|
72
|
+
ID000011934
|
|
73
|
+
ID000011935
|
|
74
|
+
ID000011943
|
|
75
|
+
ID000011945
|
|
76
|
+
ID000012738
|
|
77
|
+
ID000012739
|
|
78
|
+
ID000012845
|
|
79
|
+
ID000012906
|
|
80
|
+
ID000012932
|
|
81
|
+
ID000012948
|
|
82
|
+
ID000012983
|
|
83
|
+
ID000012989
|
|
84
|
+
ID000012993
|
|
85
|
+
ID000012998
|
|
86
|
+
ID000013000
|
|
87
|
+
ID000013006
|
|
88
|
+
ID000013008
|
|
89
|
+
ID000013034
|
|
90
|
+
ID000013043
|
|
91
|
+
ID000013045
|
|
92
|
+
ID000013046
|
|
93
|
+
ID000013047
|
|
94
|
+
ID000013052
|
|
95
|
+
ID000013054
|
|
96
|
+
ID000013055
|
|
97
|
+
ID000013172
|
|
98
|
+
ID000013174
|
|
99
|
+
ID000013176
|
|
100
|
+
ID000013184
|
|
101
|
+
ID000013199
|
|
102
|
+
ID000013200
|
|
103
|
+
ID000013210
|
|
104
|
+
ID000013215
|
|
105
|
+
ID000013237
|
|
106
|
+
ID000013265
|
|
107
|
+
ID000013270
|
|
108
|
+
ID000013273
|
|
109
|
+
ID000013290
|
|
110
|
+
ID000013332
|
|
111
|
+
ID000013450
|
|
112
|
+
ID000013453
|
|
113
|
+
ID000013492
|
|
114
|
+
ID000013508
|
|
115
|
+
ID000013510
|
|
116
|
+
ID000013588
|
|
117
|
+
ID000013621
|
|
118
|
+
ID000013645
|
|
119
|
+
ID000013658
|
|
120
|
+
ID000013675
|
|
121
|
+
ID000013759
|
|
122
|
+
ID000013769
|
|
123
|
+
ID000013783
|
|
124
|
+
ID000013802
|
|
125
|
+
ID000013822
|
|
126
|
+
ID000013846
|
|
127
|
+
ID000013895
|
|
128
|
+
ID000013896
|
|
129
|
+
ID000013996
|
|
130
|
+
ID000014680
|
|
131
|
+
ID000014681
|
|
132
|
+
ID000014715
|
|
133
|
+
ID000014815
|
|
134
|
+
ID000014870
|
|
135
|
+
ID000015313
|
|
136
|
+
ID000015441
|
|
137
|
+
ID000015447
|
|
138
|
+
ID000015450
|
|
139
|
+
ID000015452
|
|
140
|
+
ID000015453
|
|
141
|
+
ID000015484
|
|
142
|
+
ID000015485
|
|
143
|
+
ID000015489
|
|
144
|
+
ID000015490
|
|
145
|
+
ID000015492
|
|
146
|
+
ID000015493
|
|
147
|
+
ID000015543
|
|
148
|
+
ID000015544
|
|
149
|
+
ID000015618
|
|
150
|
+
ID000015619
|
|
151
|
+
ID000015626
|
|
152
|
+
ID000015649
|
|
153
|
+
ID000015650
|
|
154
|
+
ID000015662
|
|
155
|
+
ID000015669
|
|
156
|
+
ID000015674
|
|
157
|
+
ID000015675
|
|
158
|
+
ID000015676
|
|
159
|
+
ID000015689
|
|
160
|
+
ID000015690
|
|
161
|
+
ID000015695
|
|
162
|
+
ID000015727
|
|
163
|
+
ID000015731
|
|
164
|
+
ID000015739
|
|
165
|
+
ID000015740
|
|
166
|
+
ID000015751
|
|
167
|
+
ID000015753
|
|
168
|
+
ID000015754
|
|
169
|
+
ID000015755
|
|
170
|
+
ID000015758
|
|
171
|
+
ID000015771
|
|
172
|
+
ID000015772
|
|
173
|
+
ID000015773
|
|
174
|
+
ID000015774
|
|
175
|
+
ID000015809
|
|
176
|
+
ID000015829
|
|
177
|
+
ID000015830
|
|
178
|
+
ID000015840
|
|
179
|
+
ID000015841
|
|
180
|
+
ID000015842
|
|
181
|
+
ID000015843
|
|
182
|
+
ID000015845
|
|
183
|
+
ID000015846
|
|
184
|
+
ID000015847
|
|
185
|
+
ID000015848
|
|
186
|
+
ID000015849
|
|
187
|
+
ID000015850
|
|
188
|
+
ID000015851
|
|
189
|
+
ID000015852
|
|
190
|
+
ID000015853
|
|
191
|
+
ID000015864
|
|
192
|
+
ID000015880
|
|
193
|
+
ID000015914
|
|
194
|
+
ID000015940
|
|
195
|
+
ID000015942
|
|
196
|
+
ID000015945
|
|
197
|
+
ID000015956
|
|
198
|
+
ID000015963
|
|
199
|
+
ID000015964
|
|
200
|
+
ID000015968
|
|
201
|
+
ID000015970
|
|
202
|
+
ID000015984
|
|
203
|
+
ID000015985
|
|
204
|
+
ID000016053
|
|
205
|
+
ID000016873
|
|
206
|
+
ID000016946
|
|
207
|
+
ID000017085
|
|
208
|
+
ID000017086
|
|
209
|
+
ID000017164
|
|
210
|
+
ID000017265
|
|
211
|
+
ID000017311
|
|
212
|
+
ID000017366
|
|
213
|
+
ID000017405
|
|
214
|
+
ID000017418
|
|
215
|
+
ID000017494
|
|
216
|
+
ID000017552
|
|
217
|
+
ID000017581
|
|
218
|
+
ID000017589
|
|
219
|
+
ID000017601
|
|
220
|
+
ID000017606
|
|
221
|
+
ID000017607
|
|
222
|
+
ID000017638
|
|
223
|
+
ID000017672
|
|
224
|
+
ID000017774
|
|
225
|
+
ID000017798
|
|
226
|
+
ID000017802
|
|
227
|
+
ID000017822
|
|
228
|
+
ID000017823
|
|
229
|
+
ID000017848
|
|
230
|
+
ID000017880
|
|
231
|
+
ID000017881
|
|
232
|
+
ID000017901
|
|
233
|
+
ID000017990
|
|
234
|
+
ID000018043
|
|
235
|
+
ID000018216
|
|
236
|
+
ID000018619
|
|
237
|
+
ID000018628
|
|
238
|
+
ID000018691
|
|
239
|
+
ID000018696
|
|
240
|
+
ID000018750
|
|
241
|
+
ID000018765
|
|
242
|
+
ID000018823
|
|
243
|
+
ID000018854
|
|
244
|
+
ID000018870
|
|
245
|
+
ID000018873
|
|
246
|
+
ID000018875
|
|
247
|
+
ID000018877
|
|
248
|
+
ID000018878
|
|
249
|
+
ID000018901
|
|
250
|
+
ID000018902
|
|
251
|
+
ID000018904
|
|
252
|
+
ID000018905
|
|
253
|
+
ID000018936
|
|
254
|
+
ID000018937
|
|
255
|
+
ID000018938
|
|
256
|
+
ID000018939
|
|
257
|
+
ID000018970
|
|
258
|
+
ID000018971
|
|
259
|
+
ID000018972
|
|
260
|
+
ID000019024
|
|
261
|
+
ID000019025
|
|
262
|
+
ID000019038
|
|
263
|
+
ID000019052
|
|
264
|
+
ID000019081
|
|
265
|
+
ID000019082
|
|
266
|
+
ID000019101
|
|
267
|
+
ID000019102
|
|
268
|
+
ID000019145
|
|
269
|
+
ID000019146
|
|
270
|
+
ID000019165
|
|
271
|
+
ID000019166
|
|
272
|
+
ID000019193
|
|
273
|
+
ID000019210
|
|
274
|
+
ID000019264
|
|
275
|
+
ID000019299
|
|
276
|
+
ID000019405
|
|
277
|
+
ID000020309
|
|
278
|
+
ID000020357
|
|
279
|
+
ID000021168
|
|
280
|
+
ID000021170
|
|
281
|
+
ID000021178
|
|
282
|
+
ID000021183
|
|
283
|
+
ID000021261
|
|
284
|
+
ID000021390
|
|
285
|
+
ID000023014
|
|
286
|
+
ID000027519
|
|
287
|
+
ID000028882
|
|
288
|
+
ID000028889
|
|
289
|
+
ID000029205
|
|
290
|
+
ID000029281
|
|
291
|
+
ID000029359
|
|
292
|
+
ID000029365
|
|
293
|
+
ID000029522
|
|
294
|
+
ID000029542
|
|
295
|
+
ID000029554
|
|
296
|
+
ID000029555
|
|
297
|
+
ID000029558
|
|
298
|
+
ID000030669
|
|
299
|
+
ID000030793
|
|
300
|
+
ID000031061
|
|
301
|
+
ID000031069
|
|
302
|
+
ID000031088
|
|
303
|
+
ID000031119
|
|
304
|
+
ID000031141
|
|
305
|
+
ID000031167
|
|
306
|
+
ID000031168
|
|
307
|
+
ID000031207
|
|
308
|
+
ID000031215
|
|
309
|
+
ID000031261
|
|
310
|
+
ID000031278
|
|
311
|
+
ID000031279
|
|
312
|
+
ID000031817
|
|
313
|
+
ID000031820
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.5.3",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -11,11 +11,11 @@
|
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
13
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
|
-
"@datagrok-libraries/bio": "^2.
|
|
15
|
-
"@datagrok-libraries/utils": "^0.
|
|
16
|
-
"@datagrok-libraries/ml": "^2.0.
|
|
14
|
+
"@datagrok-libraries/bio": "^2.3.1",
|
|
15
|
+
"@datagrok-libraries/utils": "^1.0.0",
|
|
16
|
+
"@datagrok-libraries/ml": "^2.0.8",
|
|
17
17
|
"cash-dom": "latest",
|
|
18
|
-
"datagrok-api": "^1.4.
|
|
18
|
+
"datagrok-api": "^1.4.12",
|
|
19
19
|
"dayjs": "latest",
|
|
20
20
|
"ts-loader": "^9.2.5",
|
|
21
21
|
"typescript": "^4.4.2"
|
|
@@ -41,7 +41,8 @@
|
|
|
41
41
|
"debug-sequences1": "grok publish --rebuild",
|
|
42
42
|
"release-sequences1": "grok publish --rebuild --release",
|
|
43
43
|
"build-sequences1": "webpack",
|
|
44
|
-
"local
|
|
44
|
+
"debug-local": "grok publish local",
|
|
45
|
+
"release-local": "grok publish local --release",
|
|
45
46
|
"build": "webpack",
|
|
46
47
|
"debug-sequences1-public": "grok publish public --rebuild",
|
|
47
48
|
"release-sequences1-public": "grok publish public --rebuild --release",
|
package/setup.cmd
CHANGED
|
@@ -1,10 +1,19 @@
|
|
|
1
1
|
cd ../../js-api
|
|
2
2
|
call npm install
|
|
3
3
|
call npm link
|
|
4
|
+
cd ../libraries/utils
|
|
5
|
+
call npm install
|
|
6
|
+
call npm link
|
|
7
|
+
call npm link datagrok-api
|
|
8
|
+
cd ../libraries/ml
|
|
9
|
+
call npm install
|
|
10
|
+
call npm link
|
|
11
|
+
call npm link @datagrok-libraries/utils
|
|
4
12
|
cd ../libraries/bio
|
|
5
13
|
call npm install
|
|
6
14
|
call npm link
|
|
15
|
+
call npm link @datagrok-libraries/utils
|
|
7
16
|
cd ../../packages/Bio
|
|
8
17
|
call npm install
|
|
9
|
-
call npm link datagrok-api @datagrok-libraries/bio
|
|
18
|
+
call npm link datagrok-api @datagrok-libraries/bio @datagrok-libraries/utils @datagrok-libraries/ml
|
|
10
19
|
webpack
|
package/src/package-test.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -2,17 +2,21 @@
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
|
-
import {SequenceAlignment, Aligned} from './seq_align';
|
|
6
5
|
|
|
7
6
|
export const _package = new DG.Package();
|
|
8
7
|
|
|
9
|
-
import {
|
|
8
|
+
import {mmSemType} from './const';
|
|
9
|
+
import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
12
|
+
import {SequenceAlignment, Aligned} from './seq_align';
|
|
13
|
+
import {Nucleotides} from '@datagrok-libraries/bio/src/nucleotides';
|
|
14
|
+
import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
|
|
12
15
|
import {convert} from './utils/convert';
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import {
|
|
16
|
+
import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
|
|
17
|
+
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
18
|
+
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
19
|
+
import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
|
|
16
20
|
|
|
17
21
|
//name: sequenceAlignment
|
|
18
22
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
@@ -44,16 +48,35 @@ export function vdRegionViewer() {
|
|
|
44
48
|
return new VdRegionsViewer();
|
|
45
49
|
}
|
|
46
50
|
|
|
47
|
-
//top-menu: Bio | Activity Cliffs...
|
|
48
|
-
//name: Activity Cliffs
|
|
51
|
+
//top-menu: Bio | Sequence Activity Cliffs...
|
|
52
|
+
//name: Sequence Activity Cliffs
|
|
49
53
|
//description: detect activity cliffs
|
|
50
54
|
//input: dataframe df [Input data table]
|
|
51
|
-
//input: column
|
|
55
|
+
//input: column sequence {semType: Macromolecule}
|
|
52
56
|
//input: column activities
|
|
53
57
|
//input: double similarity = 80 [Similarity cutoff]
|
|
54
58
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
55
|
-
export async function activityCliffs(df: DG.DataFrame,
|
|
59
|
+
export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, activities: DG.Column,
|
|
56
60
|
similarity: number, methodName: string): Promise<void> {
|
|
61
|
+
const axesNames = getEmbeddingColsNames(df);
|
|
62
|
+
const options = {
|
|
63
|
+
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
64
|
+
};
|
|
65
|
+
const units = sequence!.tags[DG.TAGS.UNITS];
|
|
66
|
+
await getActivityCliffs(
|
|
67
|
+
df,
|
|
68
|
+
sequence,
|
|
69
|
+
axesNames,
|
|
70
|
+
activities,
|
|
71
|
+
similarity,
|
|
72
|
+
'Levenshtein',
|
|
73
|
+
methodName,
|
|
74
|
+
DG.SEMTYPE.MACROMOLECULE,
|
|
75
|
+
units,
|
|
76
|
+
sequenceSpace,
|
|
77
|
+
sequenceGetSimilarities,
|
|
78
|
+
drawTooltip,
|
|
79
|
+
(options as any)[methodName]);
|
|
57
80
|
}
|
|
58
81
|
|
|
59
82
|
//top-menu: Bio | Sequence Space...
|
|
@@ -64,18 +87,24 @@ export async function activityCliffs(df: DG.DataFrame, smiles: DG.Column, activi
|
|
|
64
87
|
//input: string similarityMetric { choices:["Levenshtein", "Tanimoto"] }
|
|
65
88
|
//input: bool plotEmbeddings = true
|
|
66
89
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
67
|
-
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
90
|
+
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<void> {
|
|
91
|
+
const embedColsNames = getEmbeddingColsNames(table);
|
|
92
|
+
const chemSpaceParams = {
|
|
93
|
+
seqCol: macroMolecule,
|
|
94
|
+
methodName: methodName,
|
|
95
|
+
similarityMetric: similarityMetric,
|
|
96
|
+
embedAxesNames: embedColsNames
|
|
97
|
+
};
|
|
98
|
+
const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
|
|
99
|
+
const embeddings = sequenceSpaceRes.coordinates;
|
|
100
|
+
for (const col of embeddings)
|
|
101
|
+
table.columns.add(col);
|
|
102
|
+
if (plotEmbeddings) {
|
|
103
|
+
for (const v of grok.shell.views) {
|
|
104
|
+
if (v.name === table.name)
|
|
105
|
+
(v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1]});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
79
108
|
};
|
|
80
109
|
|
|
81
110
|
//top-menu: Bio | MSA...
|
|
@@ -100,7 +129,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
100
129
|
const wl = await col.dataFrame.plot.fromType('WebLogo', {});
|
|
101
130
|
|
|
102
131
|
for (const v of grok.shell.views) {
|
|
103
|
-
if (v instanceof TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
|
|
132
|
+
if (v instanceof DG.TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
|
|
104
133
|
(v as DG.TableView).dockManager.dock(wl.root, 'down');
|
|
105
134
|
break;
|
|
106
135
|
}
|
|
@@ -122,10 +151,10 @@ function parseMacromolecule(
|
|
|
122
151
|
//description: Opens FASTA file
|
|
123
152
|
//tags: file-handler
|
|
124
153
|
//meta.ext: fasta, fna, ffn, faa, frn, fa
|
|
125
|
-
//input: string
|
|
154
|
+
//input: string fileContent
|
|
126
155
|
//output: list tables
|
|
127
156
|
export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
128
|
-
const regex = /^>(.*)$/gm; // match
|
|
157
|
+
const regex = /^>(.*)$/gm; // match lines starting with >
|
|
129
158
|
const descriptionsArray = [];
|
|
130
159
|
const sequencesArray: string[] = [];
|
|
131
160
|
let startOfSequence = 0;
|
|
@@ -141,6 +170,22 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
141
170
|
const descriptionsArrayCol = DG.Column.fromStrings('description', descriptionsArray);
|
|
142
171
|
const sequenceCol = DG.Column.fromStrings('sequence', sequencesArray);
|
|
143
172
|
sequenceCol.semType = 'Macromolecule';
|
|
173
|
+
|
|
174
|
+
const stats: SeqColStats = WebLogo.getStats(sequenceCol, 5, WebLogo.splitterAsFasta);
|
|
175
|
+
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
176
|
+
const alphabetCandidates: [string, Set<string>][] = [
|
|
177
|
+
['NT', new Set(Object.keys(Nucleotides.Names))],
|
|
178
|
+
['PT', new Set(Object.keys(Aminoacids.Names))],
|
|
179
|
+
];
|
|
180
|
+
// Calculate likelihoods for alphabet_candidates
|
|
181
|
+
const alphabetCandidatesSim: number[] = alphabetCandidates.map(
|
|
182
|
+
(c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
183
|
+
const maxCos = Math.max(...alphabetCandidatesSim);
|
|
184
|
+
const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
|
|
185
|
+
sequenceCol.semType = mmSemType;
|
|
186
|
+
const units: string = `fasta:${seqType}:${alphabet}`;
|
|
187
|
+
sequenceCol.setTag(DG.TAGS.UNITS, units);
|
|
188
|
+
|
|
144
189
|
return [DG.DataFrame.fromColumns([
|
|
145
190
|
descriptionsArrayCol,
|
|
146
191
|
sequenceCol,
|
|
@@ -153,4 +198,4 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
153
198
|
//input: column col {semType: Macromolecule}
|
|
154
199
|
export function convertPanel(col: DG.Column): void {
|
|
155
200
|
convert(col);
|
|
156
|
-
}
|
|
201
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import {after, before, category, expect, expectFloat, test} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import {createTableView, readDataframe} from './utils';
|
|
4
|
+
import {_package} from '../package-test';
|
|
5
|
+
import {getEmbeddingColsNames, sequenceSpace} from '../utils/sequence-space';
|
|
6
|
+
import {drawTooltip, sequenceGetSimilarities} from '../utils/sequence-activity-cliffs';
|
|
7
|
+
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
category('activityCliffs', async () => {
|
|
11
|
+
let actCliffsTableView: DG.TableView;
|
|
12
|
+
let actCliffsDf: DG.DataFrame;
|
|
13
|
+
|
|
14
|
+
before(async () => {
|
|
15
|
+
actCliffsTableView = await createTableView('sample_MSA.csv');
|
|
16
|
+
actCliffsDf = await readDataframe('sample_MSA.csv');
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test('activityCliffsOpen', async () => {
|
|
20
|
+
const axesNames = getEmbeddingColsNames(actCliffsDf);
|
|
21
|
+
const units = actCliffsDf.col('MSA')!.tags[DG.TAGS.UNITS];
|
|
22
|
+
const options = {
|
|
23
|
+
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
24
|
+
};
|
|
25
|
+
const scatterPlot = await getActivityCliffs(
|
|
26
|
+
actCliffsDf,
|
|
27
|
+
actCliffsDf.col('MSA')!,
|
|
28
|
+
axesNames,
|
|
29
|
+
actCliffsDf.col('Activity')!,
|
|
30
|
+
50,
|
|
31
|
+
'Levenshtein',
|
|
32
|
+
't-SNE',
|
|
33
|
+
DG.SEMTYPE.MACROMOLECULE,
|
|
34
|
+
units,
|
|
35
|
+
sequenceSpace,
|
|
36
|
+
sequenceGetSimilarities,
|
|
37
|
+
drawTooltip);
|
|
38
|
+
|
|
39
|
+
expect(scatterPlot != null, true);
|
|
40
|
+
|
|
41
|
+
const cliffsLink = (Array.from(scatterPlot.root.children) as Element[])
|
|
42
|
+
.filter((it) => it.className === 'ui-btn ui-btn-ok');
|
|
43
|
+
expect((cliffsLink[0] as HTMLElement).innerText, '101 cliffs');
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
after(async () => {
|
|
47
|
+
actCliffsTableView.close();
|
|
48
|
+
});
|
|
49
|
+
});
|