@datagrok/bio 1.5.2 → 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,201 @@
1
+ smiles
2
+ O=C1CN=C(c2ccccc2N1)C3CCCCC3
3
+ CN1C(=O)CN=C(c2ccccc12)C3CCCCC3
4
+ CCCCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
5
+ CC(C)CCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
6
+ O=C1CN=C(c2ccccc2N1CC3CCCCC3)C4CCCCC4
7
+ O=C1CN=C(c2cc(Cl)ccc2N1)C3CCCCC3
8
+ CN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
9
+ CCCCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
10
+ CC(C)CCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
11
+ O=C1CN=C(c2cc(Cl)ccc2N1CC3CCCCC3)C4CCCCC4
12
+ N#Cc1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
13
+ CN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
14
+ CCCCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
15
+ CC(C)CCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
16
+ N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
17
+ CN(C)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
18
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)C3CCCCC3
19
+ CCCCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
20
+ CC(C)CCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
21
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
22
+ O=C1CN=C(c2cc(F)ccc2N1)C3CCCCC3
23
+ CN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
24
+ CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
25
+ CC(C)CCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
26
+ O=C1CN=C(c2cc(F)ccc2N1CC3CCCCC3)C4CCCCC4
27
+ O=C1CN=C(c2cc(Br)ccc2N1)C3CCCCC3
28
+ CN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
29
+ CCCCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
30
+ CC(C)CCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
31
+ O=C1CN=C(c2cc(Br)ccc2N1CC3CCCCC3)C4CCCCC4
32
+ O=C1CN=C(c2cc(I)ccc2N1)C3CCCCC3
33
+ CN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
34
+ CCCCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
35
+ CC(C)CCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
36
+ O=C1CN=C(c2cc(I)ccc2N1CC3CCCCC3)C4CCCCC4
37
+ NC(=O)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
38
+ CN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
39
+ CCCCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
40
+ CC(C)CCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
41
+ NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
42
+ O=C1CN=C(c2cc(ccc2N1)[N+](=O)[O-])C3CCCCC3
43
+ CN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
44
+ CCCCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
45
+ CC(C)CCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
46
+ O=C1CN=C(c2cc(ccc2N1CC3CCCCC3)[N+](=O)[O-])C4CCCCC4
47
+ O=C1CN=C(c2ccccc2)c3ccccc3N1
48
+ CN1C(=O)CN=C(c2ccccc2)c3ccccc13
49
+ CCCCN1C(=O)CN=C(c2ccccc2)c3ccccc13
50
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3ccccc13
51
+ O=C1CN=C(c2ccccc2)c3ccccc3N1CC4CCCCC4
52
+ O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1
53
+ CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
54
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
55
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
56
+ O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1CC4CCCCC4
57
+ N#Cc1ccc2NC(=O)CN=C(c3ccccc3)c2c1
58
+ CN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
59
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
60
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
61
+ N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
62
+ CN(C)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
63
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)c3ccccc3
64
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
65
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
66
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
67
+ O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1
68
+ CN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
69
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
70
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
71
+ O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1CC4CCCCC4
72
+ O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1
73
+ CN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
74
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
75
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
76
+ O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1CC4CCCCC4
77
+ O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1
78
+ CN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
79
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
80
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
81
+ O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1CC4CCCCC4
82
+ NC(=O)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
83
+ CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
84
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
85
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
86
+ NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
87
+ O=C1CN=C(c2ccccc2)c3cc(ccc3N1)[N+](=O)[O-]
88
+ CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
89
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
90
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
91
+ O=C1CN=C(c2ccccc2)c3cc(ccc3N1CC4CCCCC4)[N+](=O)[O-]
92
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccccc23
93
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccccc23
94
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3ccccc13
95
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccccc23
96
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccccc24
97
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Cl)cc23
98
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Cl)cc23
99
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Cl)ccc13
100
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Cl)cc23
101
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Cl)cc24
102
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(C#N)cc23
103
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(C#N)cc23
104
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(C#N)ccc13
105
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(C#N)cc23
106
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(C#N)cc24
107
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)N(C)C
108
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)N(C)C
109
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)N(C)C
110
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)N(C)C
111
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)N(C)C
112
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(F)cc23
113
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(F)cc23
114
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(F)ccc13
115
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(F)cc23
116
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(F)cc24
117
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Br)cc23
118
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Br)cc23
119
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Br)ccc13
120
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Br)cc23
121
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Br)cc24
122
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(I)cc23
123
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(I)cc23
124
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(I)ccc13
125
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(I)cc23
126
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(I)cc24
127
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)C(N)=O
128
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)C(N)=O
129
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)C(N)=O
130
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)C(N)=O
131
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)C(N)=O
132
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)[N+](=O)[O-]
133
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)[N+](=O)[O-]
134
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)[N+](=O)[O-]
135
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)[N+](=O)[O-]
136
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)[N+](=O)[O-]
137
+ CC1N=C(c2ccccc2NC1=O)C3CCCCC3
138
+ CC1N=C(c2ccccc2N(C)C1=O)C3CCCCC3
139
+ CCCCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
140
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
141
+ CC1N=C(c2ccccc2N(CC3CCCCC3)C1=O)C4CCCCC4
142
+ CC1N=C(c2cc(Cl)ccc2NC1=O)C3CCCCC3
143
+ CC1N=C(c2cc(Cl)ccc2N(C)C1=O)C3CCCCC3
144
+ CCCCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
145
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
146
+ CC1N=C(c2cc(Cl)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
147
+ CC1N=C(c2cc(C#N)ccc2NC1=O)C3CCCCC3
148
+ CC1N=C(c2cc(C#N)ccc2N(C)C1=O)C3CCCCC3
149
+ CCCCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
150
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
151
+ CC1N=C(c2cc(C#N)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
152
+ CC1N=C(c2cc(ccc2NC1=O)N(C)C)C3CCCCC3
153
+ CC1N=C(c2cc(ccc2N(C)C1=O)N(C)C)C3CCCCC3
154
+ CCCCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
155
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
156
+ CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)N(C)C)C4CCCCC4
157
+ CC1N=C(c2cc(F)ccc2NC1=O)C3CCCCC3
158
+ CC1N=C(c2cc(F)ccc2N(C)C1=O)C3CCCCC3
159
+ CCCCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
160
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
161
+ CC1N=C(c2cc(F)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
162
+ CC1N=C(c2cc(Br)ccc2NC1=O)C3CCCCC3
163
+ CC1N=C(c2cc(Br)ccc2N(C)C1=O)C3CCCCC3
164
+ CCCCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
165
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
166
+ CC1N=C(c2cc(Br)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
167
+ CC1N=C(c2cc(I)ccc2NC1=O)C3CCCCC3
168
+ CC1N=C(c2cc(I)ccc2N(C)C1=O)C3CCCCC3
169
+ CCCCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
170
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
171
+ CC1N=C(c2cc(I)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
172
+ CC1N=C(c2cc(ccc2NC1=O)C(N)=O)C3CCCCC3
173
+ CC1N=C(c2cc(ccc2N(C)C1=O)C(N)=O)C3CCCCC3
174
+ CCCCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
175
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
176
+ CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)C(N)=O)C4CCCCC4
177
+ CC1N=C(c2cc(ccc2NC1=O)[N+](=O)[O-])C3CCCCC3
178
+ CC1N=C(c2cc(ccc2N(C)C1=O)[N+](=O)[O-])C3CCCCC3
179
+ CCCCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
180
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
181
+ CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)[N+](=O)[O-])C4CCCCC4
182
+ CC1N=C(c2ccccc2)c3ccccc3NC1=O
183
+ CC1N=C(c2ccccc2)c3ccccc3N(C)C1=O
184
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
185
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
186
+ CC1N=C(c2ccccc2)c3ccccc3N(CC4CCCCC4)C1=O
187
+ CC1N=C(c2ccccc2)c3cc(Cl)ccc3NC1=O
188
+ CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(C)C1=O
189
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
190
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
191
+ CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(CC4CCCCC4)C1=O
192
+ CC1N=C(c2ccccc2)c3cc(C#N)ccc3NC1=O
193
+ CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(C)C1=O
194
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
195
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
196
+ CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(CC4CCCCC4)C1=O
197
+ CC1N=C(c2ccccc2)c3cc(ccc3NC1=O)N(C)C
198
+ CC1N=C(c2ccccc2)c3cc(ccc3N(C)C1=O)N(C)C
199
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
200
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
201
+ CC1N=C(c2ccccc2)c3cc(ccc3N(CC4CCCCC4)C1=O)N(C)C
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@datagrok/bio",
3
3
  "beta": false,
4
4
  "friendlyName": "Bio",
5
- "version": "1.5.2",
5
+ "version": "1.5.5",
6
6
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
7
7
  "repository": {
8
8
  "type": "git",
package/src/const.ts CHANGED
@@ -2,4 +2,3 @@ import * as ui from 'datagrok-api/ui';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- export const mmSemType = 'Macromolecule';
package/src/package.ts CHANGED
@@ -5,7 +5,6 @@ import * as DG from 'datagrok-api/dg';
5
5
 
6
6
  export const _package = new DG.Package();
7
7
 
8
- import {mmSemType} from './const';
9
8
  import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
10
9
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
11
10
  import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
@@ -120,20 +119,23 @@ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.
120
119
  //top-menu: Bio | Composition Analysis
121
120
  //output: viewer result
122
121
  export async function compositionAnalysis(): Promise<void> {
123
- const col = grok.shell.t.columns.bySemType('Macromolecule');//DG.SEMTYPE.MACROMOLECULE);
124
- if (col === null) {
122
+ // Higher priority for columns with MSA data to show with WebLogo.
123
+ const tv = grok.shell.tv;
124
+ const df = tv.dataFrame;
125
+ const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
126
+ let col: DG.Column | undefined = semTypeColList.find((col) => {
127
+ const units = col.getTag(DG.TAGS.UNITS);
128
+ return units ? units.indexOf('MSA') !== -1 : false;
129
+ });
130
+ if (!col)
131
+ col = semTypeColList[0];
132
+
133
+ if (!col) {
125
134
  grok.shell.error('Current table does not contain sequences');
126
135
  return;
127
136
  }
128
137
 
129
- const wl = await col.dataFrame.plot.fromType('WebLogo', {});
130
-
131
- for (const v of grok.shell.views) {
132
- if (v instanceof DG.TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
133
- (v as DG.TableView).dockManager.dock(wl.root, 'down');
134
- break;
135
- }
136
- }
138
+ tv.addViewer('WebLogo', {sequenceColumnName: col.name});
137
139
  }
138
140
 
139
141
  // helper function for importFasta
@@ -182,7 +184,7 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
182
184
  (c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
183
185
  const maxCos = Math.max(...alphabetCandidatesSim);
184
186
  const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
185
- sequenceCol.semType = mmSemType;
187
+ sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
186
188
  const units: string = `fasta:${seqType}:${alphabet}`;
187
189
  sequenceCol.setTag(DG.TAGS.UNITS, units);
188
190
 
@@ -0,0 +1,31 @@
1
+ import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ // import {mmSemType} from '../const';
8
+ // import {importFasta} from '../package';
9
+
10
+ category('converters', () => {
11
+ // test('a', async () => {await _a();});
12
+ // test('b', async () => {await _b();});
13
+ test('testFastaToSeparator', async () => { await _testFastaToSeparator(); });
14
+ test('testSeparatorToFasta', async () => { await _testSeparatorToFasta(); });
15
+ });
16
+
17
+ // export async function _a() {
18
+ // expect(1, 1);
19
+ // }
20
+ //
21
+ // export async function _b() {
22
+ // expect(1, 2);
23
+ // }
24
+
25
+ export async function _testFastaToSeparator() {
26
+ expect(1, 1);
27
+ }
28
+
29
+ export async function _testSeparatorToFasta() {
30
+ expect(1, 2);
31
+ }
@@ -4,7 +4,6 @@ import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
 
7
- import {mmSemType} from '../const';
8
7
  import {importFasta} from '../package';
9
8
 
10
9
  type DfReaderFunc = () => Promise<DG.DataFrame>;
@@ -93,12 +92,18 @@ MWRSWY-CKHP
93
92
  peptidesComplex = 'PeptidesComplex',
94
93
  fastaCsv = 'FastaCsv',
95
94
  msaComplex = 'MsaComplex',
95
+ idCsv = 'IdCsv',
96
+ sarSmallCsv = 'SarSmallCsv',
97
+ HelmCsv = 'HelmCsv',
96
98
  }
97
99
 
98
100
  const samples: { [key: string]: string } = {
99
- 'PeptidesComplex': 'System:AppData/Bio/samples/peptides_complex_aligned.csv',
101
+ 'PeptidesComplex': 'System:AppData/Bio/samples/peptides_complex_msa.csv',
100
102
  'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
101
103
  'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
104
+ 'IdCsv': 'System:AppData/Bio/samples/id.csv',
105
+ 'SarSmallCsv': 'System:AppData/Bio/samples/sar-small.csv',
106
+ 'HelmCsv': 'System:AppData/Bio/samples/sample_HELM.csv',
102
107
  };
103
108
 
104
109
  const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
@@ -183,20 +188,36 @@ MWRSWY-CKHP
183
188
  test('samplesMsaComplexNegativeActivity', async () => {
184
189
  await _testNeg(readSamplesCsv(Samples.msaComplex), 'Activity');
185
190
  });
191
+
192
+ test('samplesIdCsvNegativeID', async () => {
193
+ await _testNeg(readSamplesCsv(Samples.idCsv), 'ID');
194
+ });
195
+
196
+ test('samplesSarSmallCsvNegativeSmiles', async () => {
197
+ await _testNeg(readSamplesCsv(Samples.sarSmallCsv), 'smiles');
198
+ });
199
+
200
+ test('samplesHelmCsvHELM', async () => {
201
+ await _testPos(readSamplesCsv(Samples.HelmCsv), 'HELM', 'HELM', null);
202
+ });
203
+
204
+ test('samplesHelmCsvNegativeActivity', async () => {
205
+ await _testNeg(readSamplesCsv(Samples.HelmCsv), 'Activity');
206
+ });
186
207
  });
187
208
 
188
209
  export async function _testNeg(readDf: DfReaderFunc, colName: string) {
189
210
  const df: DG.DataFrame = await readDf();
190
211
 
191
212
  const col: DG.Column = df.col(colName)!;
192
- expect(col.semType === mmSemType, false);
213
+ expect(col.semType === DG.SEMTYPE.MACROMOLECULE, false);
193
214
  }
194
215
 
195
- export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
216
+ export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string | null = null) {
196
217
  const df: DG.DataFrame = await readDf();
197
218
 
198
219
  const col: DG.Column = df.col(colName)!;
199
- expect(col.semType === mmSemType, true);
220
+ expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
200
221
  expect(col.getTag(DG.TAGS.UNITS), units);
201
222
  if (separator)
202
223
  expect(col.getTag('separator'), separator);
@@ -207,7 +228,7 @@ export async function _testN1(csvDfN1: string) {
207
228
  await grok.data.detectSemanticTypes(dfN1);
208
229
 
209
230
  const col: DG.Column = dfN1.col('seq')!;
210
- expect(col.semType, mmSemType);
231
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
211
232
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
212
233
  }
213
234
 
@@ -216,7 +237,7 @@ export async function _testAA1(csvDfAA1: string) {
216
237
  await grok.data.detectSemanticTypes(dfAA1);
217
238
 
218
239
  const col: DG.Column = dfAA1.col('seq')!;
219
- expect(col.semType, mmSemType);
240
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
220
241
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
221
242
  }
222
243
 
@@ -225,7 +246,7 @@ export async function _testMsaN1(csvDfMsaN1: string) {
225
246
  await grok.data.detectSemanticTypes(dfMsaN1);
226
247
 
227
248
  const col: DG.Column = dfMsaN1.col('seq')!;
228
- expect(col.semType, mmSemType);
249
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
229
250
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
230
251
  }
231
252
 
@@ -234,7 +255,7 @@ export async function _testMsaAA1(csvDfMsaAA1: string) {
234
255
  await grok.data.detectSemanticTypes(dfMsaAA1);
235
256
 
236
257
  const col: DG.Column = dfMsaAA1.col('seq')!;
237
- expect(col.semType, mmSemType);
258
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
238
259
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
239
260
  }
240
261
 
@@ -243,7 +264,7 @@ export async function _testSepNt(csv: string, separator: string) {
243
264
  await grok.data.detectSemanticTypes(df);
244
265
 
245
266
  const col: DG.Column = df.col('seq')!;
246
- expect(col.semType, mmSemType);
267
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
247
268
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:NT');
248
269
  expect(col.getTag('separator'), separator);
249
270
  }
@@ -253,7 +274,7 @@ export async function _testSepPt(csv: string, separator: string) {
253
274
  await grok.data.detectSemanticTypes(df);
254
275
 
255
276
  const col: DG.Column = df.col('seq')!;
256
- expect(col.semType, mmSemType);
277
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
257
278
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:PT');
258
279
  expect(col.getTag('separator'), separator);
259
280
  }
@@ -263,7 +284,7 @@ export async function _testSepUn(csv: string, separator: string) {
263
284
  await grok.data.detectSemanticTypes(df);
264
285
 
265
286
  const col: DG.Column = df.col('seq')!;
266
- expect(col.semType, mmSemType);
287
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
267
288
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:UN');
268
289
  expect(col.getTag('separator'), separator);
269
290
  }
@@ -273,7 +294,7 @@ export async function _testSepMsaN1(csvDfSepMsaN1: string) {
273
294
  await grok.data.detectSemanticTypes(dfSepMsaN1);
274
295
 
275
296
  const col: DG.Column = dfSepMsaN1.col('seq')!;
276
- expect(col.semType, mmSemType);
297
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
277
298
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
278
299
  }
279
300
 
@@ -283,7 +304,7 @@ export async function _testSamplesFastaCsvPt() {
283
304
  await grok.data.detectSemanticTypes(df);
284
305
 
285
306
  const col: DG.Column = df.col('sequence')!;
286
- expect(col.semType, mmSemType);
307
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
287
308
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
288
309
  expect(col.getTag('separator'), null);
289
310
  }
@@ -293,7 +314,7 @@ export async function _testSamplesFastaFastaPt() {
293
314
  const df: DG.DataFrame = importFasta(fasta)[0];
294
315
 
295
316
  const col: DG.Column = df.col('sequence')!;
296
- expect(col.semType, mmSemType);
317
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
297
318
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
298
319
  expect(col.getTag('separator'), null);
299
320
  }
@@ -304,7 +325,7 @@ export async function _testSamplesPeptidesComplexUn() {
304
325
  await grok.data.detectSemanticTypes(df);
305
326
 
306
327
  const col: DG.Column = df.col('AlignedSequence')!;
307
- expect(col.semType, mmSemType);
328
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
308
329
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
309
330
  expect(col.getTag('separator'), '-');
310
331
  }
@@ -1,11 +1,21 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as ui from 'datagrok-api/ui';
3
+ import {NotationConverter} from './notation-converter';
3
4
 
5
+ /**
6
+ * Converts notations of a Macromolecule column
7
+ *
8
+ * @param {DG.column} col Column with 'Macromolecule' semantic type
9
+ */
4
10
  export function convert(col: DG.Column): void {
5
11
  const current = col.tags[DG.TAGS.UNITS];
6
12
  //TODO: read all notations
7
- const notations = ['fasta:SEQ:NT', 'fasta:SEQ:PT', 'fasta:SEQ.MSA:NT', 'fasta:SEQ.MSA:PT', 'HELM'];
8
- const choices = ui.choiceInput('convert to', '', notations.filter((e) => e !== current));
13
+ const units = [
14
+ 'fasta',
15
+ 'separator',
16
+ 'HELM'
17
+ ];
18
+ const choices = ui.choiceInput('convert to', '', units.filter((e) => e !== current));
9
19
 
10
20
  ui.dialog('Convert sequence')
11
21
  .add(
@@ -17,7 +27,9 @@ export function convert(col: DG.Column): void {
17
27
  )
18
28
  .onOK(() => {
19
29
  //TODO: create new converted column
20
- //col.dataFrame.columns.add();
30
+ const converter = new NotationConverter(col, choices.value!);
31
+ const newColumn = converter.convert();
32
+ col.dataFrame.columns.add(newColumn);
21
33
  })
22
34
  .show();
23
35
  }
@@ -29,12 +29,12 @@ function _fastaToStrings(fasta: string): string[] {
29
29
  /**
30
30
  * Runs Aioli environment with kalign tool.
31
31
  *
32
- * @param {DG.Column} col Column with sequences.
32
+ * @param {DG.Column} srcCol Column with sequences.
33
33
  * @param {boolean} isAligned Whether the column is aligned.
34
34
  * @return {Promise<DG.Column>} Aligned sequences.
35
35
  */
36
- export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.Column> {
37
- let sequences = col.toList();
36
+ export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<DG.Column> {
37
+ let sequences = srcCol.toList();
38
38
 
39
39
  if (isAligned)
40
40
  sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
@@ -55,15 +55,20 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
55
55
  console.warn(output);
56
56
 
57
57
  const aligned = _fastaToStrings(buf).slice(0, sequences.length);
58
- const alignedCol = DG.Column.fromStrings(`msa(${col.name})`, aligned);
59
- alignedCol.setTag(DG.TAGS.UNITS, '');
60
- alignedCol.semType = C.SEM_TYPES.Macro_Molecule;
61
- return alignedCol;
58
+ const tgtCol = DG.Column.fromStrings(`msa(${srcCol.name})`, aligned);
59
+
60
+ // units
61
+ const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
62
+ const tgtUnits = srcUnits.split(':').map((p, i) => i == 1 ? p + '.MSA' : p).join(':');
63
+
64
+ tgtCol.setTag(DG.TAGS.UNITS, tgtUnits);
65
+ tgtCol.semType = C.SEM_TYPES.Macro_Molecule;
66
+ return tgtCol;
62
67
  }
63
68
 
64
69
  export async function testMSAEnoughMemory(col: DG.Column): Promise<void> {
65
70
  const sequencesCount = col.length;
66
- const delta = sequencesCount/100;
71
+ const delta = sequencesCount / 100;
67
72
 
68
73
  for (let i = delta; i < sequencesCount; i += delta) {
69
74
  try {
@@ -0,0 +1,131 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ // export const enum NOTATION {
4
+ // // these values can be changed to "user-friendly" ones later on
5
+ // FASTA = 'fasta',
6
+ // SEPARATOR = 'separator',
7
+ // HELM = 'helm'
8
+ // }
9
+
10
+ export class NotationConverter {
11
+ private _sourceColumn: DG.Column; // the column to be converted
12
+ private _currentUnits: string; // units of the form fasta:SEQ:NT, etc.
13
+ private _sourceNotation: string; // current notation (without :SEQ:NT, etc.)
14
+ private _targetNotation: string;
15
+
16
+ private get sourceColumn(): DG.Column { return this._sourceColumn; }
17
+ private get currentUnits(): string { return this._currentUnits; }
18
+ private get sourceNotation(): string { return this._sourceNotation; }
19
+ private get targetNotation(): string { return this._targetNotation; }
20
+
21
+ // these values can be changed to "user-friendly" ones later on
22
+ private _fasta = 'fasta';
23
+ private _separator = 'separator';
24
+ private _helm = 'helm';
25
+
26
+ public isFasta(): boolean { return this.sourceNotation == this._fasta; }
27
+ public isSeparator(): boolean { return this.sourceNotation == this._separator; }
28
+ public isHelm(): boolean { return this.sourceNotation == this._helm; }
29
+
30
+ private determineSourceNotation() : string {
31
+ if (this.currentUnits.toLowerCase().startsWith('fasta'))
32
+ return 'fasta';
33
+ else if (this.currentUnits.toLowerCase().startsWith('separator'))
34
+ return 'separator';
35
+ else
36
+ // TODO: handle possible exceptions
37
+ return 'HELM';
38
+ }
39
+
40
+ private convertFastaToSeparator(): DG.Column {
41
+ // TODO: implementation
42
+ const len = this.sourceColumn.length;
43
+ const newColName = 'converted';
44
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2sep'));
45
+ newColumn.semType = 'Macromolecule';
46
+ return newColumn;
47
+ }
48
+
49
+ private convertFastaToHelm(): DG.Column {
50
+ // TODO: implementation
51
+ const len = this.sourceColumn.length;
52
+ const newColName = 'converted';
53
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2helm'));
54
+ newColumn.semType = 'Macromolecule';
55
+ return newColumn;
56
+ }
57
+
58
+ private convertSeparatorToFasta(): DG.Column {
59
+ // TODO: implementation
60
+ const len = this.sourceColumn.length;
61
+ const newColName = 'converted';
62
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2fasta'));
63
+ newColumn.semType = 'Macromolecule';
64
+ return newColumn;
65
+ }
66
+
67
+ private convertSeparatorToHelm(): DG.Column {
68
+ // TODO: implementation
69
+ const len = this.sourceColumn.length;
70
+ const newColName = 'converted';
71
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2helm'));
72
+ newColumn.semType = 'Macromolecule';
73
+ return newColumn;
74
+ }
75
+
76
+ private convertHelmToFasta(): DG.Column {
77
+ // TODO: implementation
78
+ const len = this.sourceColumn.length;
79
+ const newColName = 'converted';
80
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2fasta'));
81
+ newColumn.semType = 'Macromolecule';
82
+ return newColumn;
83
+ }
84
+
85
+ private convertHelmToSeparator(): DG.Column {
86
+ // TODO: implementation
87
+ const len = this.sourceColumn.length;
88
+ const newColName = 'converted';
89
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2sep'));
90
+ newColumn.semType = 'Macromolecule';
91
+ return newColumn;
92
+ }
93
+
94
+ // TODO: write the bodies of converter methods
95
+ public convert() : DG.Column {
96
+ if (
97
+ this.sourceNotation == this._fasta &&
98
+ this.targetNotation == this._separator
99
+ )
100
+ return this.convertFastaToSeparator();
101
+ else if (
102
+ this.sourceNotation == this._fasta &&
103
+ this.targetNotation == this._helm
104
+ )
105
+ return this.convertFastaToHelm();
106
+ else if (
107
+ this.sourceNotation == this._separator &&
108
+ this.targetNotation == this._fasta
109
+ )
110
+ return this.convertSeparatorToFasta();
111
+ else if (
112
+ this.sourceNotation == this._separator &&
113
+ this.targetNotation == this._helm
114
+ )
115
+ return this.convertSeparatorToHelm();
116
+ else if (
117
+ this.sourceNotation == this._helm &&
118
+ this.targetNotation == this._fasta
119
+ )
120
+ return this.convertHelmToFasta();
121
+ else
122
+ return this.convertHelmToSeparator();
123
+ }
124
+
125
+ public constructor(col: DG.Column, target: string) {
126
+ this._sourceColumn = col;
127
+ this._currentUnits = this._sourceColumn.tags[DG.TAGS.UNITS];
128
+ this._sourceNotation = this.determineSourceNotation();
129
+ this._targetNotation = target;
130
+ }
131
+ }