@datagrok/bio 1.5.3 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,201 @@
1
+ smiles
2
+ O=C1CN=C(c2ccccc2N1)C3CCCCC3
3
+ CN1C(=O)CN=C(c2ccccc12)C3CCCCC3
4
+ CCCCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
5
+ CC(C)CCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
6
+ O=C1CN=C(c2ccccc2N1CC3CCCCC3)C4CCCCC4
7
+ O=C1CN=C(c2cc(Cl)ccc2N1)C3CCCCC3
8
+ CN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
9
+ CCCCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
10
+ CC(C)CCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
11
+ O=C1CN=C(c2cc(Cl)ccc2N1CC3CCCCC3)C4CCCCC4
12
+ N#Cc1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
13
+ CN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
14
+ CCCCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
15
+ CC(C)CCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
16
+ N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
17
+ CN(C)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
18
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)C3CCCCC3
19
+ CCCCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
20
+ CC(C)CCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
21
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
22
+ O=C1CN=C(c2cc(F)ccc2N1)C3CCCCC3
23
+ CN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
24
+ CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
25
+ CC(C)CCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
26
+ O=C1CN=C(c2cc(F)ccc2N1CC3CCCCC3)C4CCCCC4
27
+ O=C1CN=C(c2cc(Br)ccc2N1)C3CCCCC3
28
+ CN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
29
+ CCCCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
30
+ CC(C)CCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
31
+ O=C1CN=C(c2cc(Br)ccc2N1CC3CCCCC3)C4CCCCC4
32
+ O=C1CN=C(c2cc(I)ccc2N1)C3CCCCC3
33
+ CN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
34
+ CCCCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
35
+ CC(C)CCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
36
+ O=C1CN=C(c2cc(I)ccc2N1CC3CCCCC3)C4CCCCC4
37
+ NC(=O)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
38
+ CN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
39
+ CCCCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
40
+ CC(C)CCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
41
+ NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
42
+ O=C1CN=C(c2cc(ccc2N1)[N+](=O)[O-])C3CCCCC3
43
+ CN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
44
+ CCCCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
45
+ CC(C)CCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
46
+ O=C1CN=C(c2cc(ccc2N1CC3CCCCC3)[N+](=O)[O-])C4CCCCC4
47
+ O=C1CN=C(c2ccccc2)c3ccccc3N1
48
+ CN1C(=O)CN=C(c2ccccc2)c3ccccc13
49
+ CCCCN1C(=O)CN=C(c2ccccc2)c3ccccc13
50
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3ccccc13
51
+ O=C1CN=C(c2ccccc2)c3ccccc3N1CC4CCCCC4
52
+ O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1
53
+ CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
54
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
55
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
56
+ O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1CC4CCCCC4
57
+ N#Cc1ccc2NC(=O)CN=C(c3ccccc3)c2c1
58
+ CN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
59
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
60
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
61
+ N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
62
+ CN(C)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
63
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)c3ccccc3
64
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
65
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
66
+ CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
67
+ O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1
68
+ CN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
69
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
70
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
71
+ O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1CC4CCCCC4
72
+ O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1
73
+ CN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
74
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
75
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
76
+ O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1CC4CCCCC4
77
+ O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1
78
+ CN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
79
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
80
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
81
+ O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1CC4CCCCC4
82
+ NC(=O)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
83
+ CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
84
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
85
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
86
+ NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
87
+ O=C1CN=C(c2ccccc2)c3cc(ccc3N1)[N+](=O)[O-]
88
+ CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
89
+ CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
90
+ CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
91
+ O=C1CN=C(c2ccccc2)c3cc(ccc3N1CC4CCCCC4)[N+](=O)[O-]
92
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccccc23
93
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccccc23
94
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3ccccc13
95
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccccc23
96
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccccc24
97
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Cl)cc23
98
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Cl)cc23
99
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Cl)ccc13
100
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Cl)cc23
101
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Cl)cc24
102
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(C#N)cc23
103
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(C#N)cc23
104
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(C#N)ccc13
105
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(C#N)cc23
106
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(C#N)cc24
107
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)N(C)C
108
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)N(C)C
109
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)N(C)C
110
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)N(C)C
111
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)N(C)C
112
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(F)cc23
113
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(F)cc23
114
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(F)ccc13
115
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(F)cc23
116
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(F)cc24
117
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Br)cc23
118
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Br)cc23
119
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Br)ccc13
120
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Br)cc23
121
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Br)cc24
122
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(I)cc23
123
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(I)cc23
124
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(I)ccc13
125
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(I)cc23
126
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(I)cc24
127
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)C(N)=O
128
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)C(N)=O
129
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)C(N)=O
130
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)C(N)=O
131
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)C(N)=O
132
+ COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)[N+](=O)[O-]
133
+ COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)[N+](=O)[O-]
134
+ CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)[N+](=O)[O-]
135
+ COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)[N+](=O)[O-]
136
+ COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)[N+](=O)[O-]
137
+ CC1N=C(c2ccccc2NC1=O)C3CCCCC3
138
+ CC1N=C(c2ccccc2N(C)C1=O)C3CCCCC3
139
+ CCCCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
140
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
141
+ CC1N=C(c2ccccc2N(CC3CCCCC3)C1=O)C4CCCCC4
142
+ CC1N=C(c2cc(Cl)ccc2NC1=O)C3CCCCC3
143
+ CC1N=C(c2cc(Cl)ccc2N(C)C1=O)C3CCCCC3
144
+ CCCCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
145
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
146
+ CC1N=C(c2cc(Cl)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
147
+ CC1N=C(c2cc(C#N)ccc2NC1=O)C3CCCCC3
148
+ CC1N=C(c2cc(C#N)ccc2N(C)C1=O)C3CCCCC3
149
+ CCCCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
150
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
151
+ CC1N=C(c2cc(C#N)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
152
+ CC1N=C(c2cc(ccc2NC1=O)N(C)C)C3CCCCC3
153
+ CC1N=C(c2cc(ccc2N(C)C1=O)N(C)C)C3CCCCC3
154
+ CCCCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
155
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
156
+ CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)N(C)C)C4CCCCC4
157
+ CC1N=C(c2cc(F)ccc2NC1=O)C3CCCCC3
158
+ CC1N=C(c2cc(F)ccc2N(C)C1=O)C3CCCCC3
159
+ CCCCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
160
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
161
+ CC1N=C(c2cc(F)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
162
+ CC1N=C(c2cc(Br)ccc2NC1=O)C3CCCCC3
163
+ CC1N=C(c2cc(Br)ccc2N(C)C1=O)C3CCCCC3
164
+ CCCCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
165
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
166
+ CC1N=C(c2cc(Br)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
167
+ CC1N=C(c2cc(I)ccc2NC1=O)C3CCCCC3
168
+ CC1N=C(c2cc(I)ccc2N(C)C1=O)C3CCCCC3
169
+ CCCCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
170
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
171
+ CC1N=C(c2cc(I)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
172
+ CC1N=C(c2cc(ccc2NC1=O)C(N)=O)C3CCCCC3
173
+ CC1N=C(c2cc(ccc2N(C)C1=O)C(N)=O)C3CCCCC3
174
+ CCCCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
175
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
176
+ CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)C(N)=O)C4CCCCC4
177
+ CC1N=C(c2cc(ccc2NC1=O)[N+](=O)[O-])C3CCCCC3
178
+ CC1N=C(c2cc(ccc2N(C)C1=O)[N+](=O)[O-])C3CCCCC3
179
+ CCCCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
180
+ CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
181
+ CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)[N+](=O)[O-])C4CCCCC4
182
+ CC1N=C(c2ccccc2)c3ccccc3NC1=O
183
+ CC1N=C(c2ccccc2)c3ccccc3N(C)C1=O
184
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
185
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
186
+ CC1N=C(c2ccccc2)c3ccccc3N(CC4CCCCC4)C1=O
187
+ CC1N=C(c2ccccc2)c3cc(Cl)ccc3NC1=O
188
+ CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(C)C1=O
189
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
190
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
191
+ CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(CC4CCCCC4)C1=O
192
+ CC1N=C(c2ccccc2)c3cc(C#N)ccc3NC1=O
193
+ CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(C)C1=O
194
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
195
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
196
+ CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(CC4CCCCC4)C1=O
197
+ CC1N=C(c2ccccc2)c3cc(ccc3NC1=O)N(C)C
198
+ CC1N=C(c2ccccc2)c3cc(ccc3N(C)C1=O)N(C)C
199
+ CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
200
+ CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
201
+ CC1N=C(c2ccccc2)c3cc(ccc3N(CC4CCCCC4)C1=O)N(C)C
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@datagrok/bio",
3
3
  "beta": false,
4
4
  "friendlyName": "Bio",
5
- "version": "1.5.3",
5
+ "version": "1.5.4",
6
6
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
7
7
  "repository": {
8
8
  "type": "git",
package/src/const.ts CHANGED
@@ -2,4 +2,3 @@ import * as ui from 'datagrok-api/ui';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- export const mmSemType = 'Macromolecule';
package/src/package.ts CHANGED
@@ -5,7 +5,6 @@ import * as DG from 'datagrok-api/dg';
5
5
 
6
6
  export const _package = new DG.Package();
7
7
 
8
- import {mmSemType} from './const';
9
8
  import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
10
9
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
11
10
  import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
@@ -120,20 +119,23 @@ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.
120
119
  //top-menu: Bio | Composition Analysis
121
120
  //output: viewer result
122
121
  export async function compositionAnalysis(): Promise<void> {
123
- const col = grok.shell.t.columns.bySemType('Macromolecule');//DG.SEMTYPE.MACROMOLECULE);
124
- if (col === null) {
122
+ // Higher priority for columns with MSA data to show with WebLogo.
123
+ const tv = grok.shell.tv;
124
+ const df = tv.dataFrame;
125
+ const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
126
+ let col: DG.Column | undefined = semTypeColList.find((col) => {
127
+ const units = col.getTag(DG.TAGS.UNITS);
128
+ return units ? units.indexOf('MSA') !== -1 : false;
129
+ });
130
+ if (!col)
131
+ col = semTypeColList[0];
132
+
133
+ if (!col) {
125
134
  grok.shell.error('Current table does not contain sequences');
126
135
  return;
127
136
  }
128
137
 
129
- const wl = await col.dataFrame.plot.fromType('WebLogo', {});
130
-
131
- for (const v of grok.shell.views) {
132
- if (v instanceof DG.TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
133
- (v as DG.TableView).dockManager.dock(wl.root, 'down');
134
- break;
135
- }
136
- }
138
+ tv.addViewer('WebLogo', {sequenceColumnName: col.name});
137
139
  }
138
140
 
139
141
  // helper function for importFasta
@@ -182,7 +184,7 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
182
184
  (c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
183
185
  const maxCos = Math.max(...alphabetCandidatesSim);
184
186
  const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
185
- sequenceCol.semType = mmSemType;
187
+ sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
186
188
  const units: string = `fasta:${seqType}:${alphabet}`;
187
189
  sequenceCol.setTag(DG.TAGS.UNITS, units);
188
190
 
@@ -0,0 +1,31 @@
1
+ import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ // import {mmSemType} from '../const';
8
+ // import {importFasta} from '../package';
9
+
10
+ category('converters', () => {
11
+ // test('a', async () => {await _a();});
12
+ // test('b', async () => {await _b();});
13
+ test('testFastaToSeparator', async () => { await _testFastaToSeparator(); });
14
+ test('testSeparatorToFasta', async () => { await _testSeparatorToFasta(); });
15
+ });
16
+
17
+ // export async function _a() {
18
+ // expect(1, 1);
19
+ // }
20
+ //
21
+ // export async function _b() {
22
+ // expect(1, 2);
23
+ // }
24
+
25
+ export async function _testFastaToSeparator() {
26
+ expect(1, 1);
27
+ }
28
+
29
+ export async function _testSeparatorToFasta() {
30
+ expect(1, 2);
31
+ }
@@ -4,7 +4,6 @@ import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
 
7
- import {mmSemType} from '../const';
8
7
  import {importFasta} from '../package';
9
8
 
10
9
  type DfReaderFunc = () => Promise<DG.DataFrame>;
@@ -94,6 +93,7 @@ MWRSWY-CKHP
94
93
  fastaCsv = 'FastaCsv',
95
94
  msaComplex = 'MsaComplex',
96
95
  idCsv = 'IdCsv',
96
+ sarSmallCsv = 'SarSmallCsv',
97
97
  }
98
98
 
99
99
  const samples: { [key: string]: string } = {
@@ -101,6 +101,7 @@ MWRSWY-CKHP
101
101
  'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
102
102
  'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
103
103
  'IdCsv': 'System:AppData/Bio/samples/id.csv',
104
+ 'SarSmallCsv': 'System:AppData/Bio/samples/sar-small.csv',
104
105
  };
105
106
 
106
107
  const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
@@ -189,20 +190,24 @@ MWRSWY-CKHP
189
190
  test('samplesIdCsvNegativeID', async () => {
190
191
  await _testNeg(readSamplesCsv(Samples.idCsv), 'ID');
191
192
  });
193
+
194
+ test('samplesSarSmallCsvNegativeSmiles', async () => {
195
+ await _testNeg(readSamplesCsv(Samples.sarSmallCsv), 'smiles');
196
+ });
192
197
  });
193
198
 
194
199
  export async function _testNeg(readDf: DfReaderFunc, colName: string) {
195
200
  const df: DG.DataFrame = await readDf();
196
201
 
197
202
  const col: DG.Column = df.col(colName)!;
198
- expect(col.semType === mmSemType, false);
203
+ expect(col.semType === DG.SEMTYPE.MACROMOLECULE, false);
199
204
  }
200
205
 
201
206
  export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
202
207
  const df: DG.DataFrame = await readDf();
203
208
 
204
209
  const col: DG.Column = df.col(colName)!;
205
- expect(col.semType === mmSemType, true);
210
+ expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
206
211
  expect(col.getTag(DG.TAGS.UNITS), units);
207
212
  if (separator)
208
213
  expect(col.getTag('separator'), separator);
@@ -213,7 +218,7 @@ export async function _testN1(csvDfN1: string) {
213
218
  await grok.data.detectSemanticTypes(dfN1);
214
219
 
215
220
  const col: DG.Column = dfN1.col('seq')!;
216
- expect(col.semType, mmSemType);
221
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
217
222
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
218
223
  }
219
224
 
@@ -222,7 +227,7 @@ export async function _testAA1(csvDfAA1: string) {
222
227
  await grok.data.detectSemanticTypes(dfAA1);
223
228
 
224
229
  const col: DG.Column = dfAA1.col('seq')!;
225
- expect(col.semType, mmSemType);
230
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
226
231
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
227
232
  }
228
233
 
@@ -231,7 +236,7 @@ export async function _testMsaN1(csvDfMsaN1: string) {
231
236
  await grok.data.detectSemanticTypes(dfMsaN1);
232
237
 
233
238
  const col: DG.Column = dfMsaN1.col('seq')!;
234
- expect(col.semType, mmSemType);
239
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
235
240
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
236
241
  }
237
242
 
@@ -240,7 +245,7 @@ export async function _testMsaAA1(csvDfMsaAA1: string) {
240
245
  await grok.data.detectSemanticTypes(dfMsaAA1);
241
246
 
242
247
  const col: DG.Column = dfMsaAA1.col('seq')!;
243
- expect(col.semType, mmSemType);
248
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
244
249
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
245
250
  }
246
251
 
@@ -249,7 +254,7 @@ export async function _testSepNt(csv: string, separator: string) {
249
254
  await grok.data.detectSemanticTypes(df);
250
255
 
251
256
  const col: DG.Column = df.col('seq')!;
252
- expect(col.semType, mmSemType);
257
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
253
258
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:NT');
254
259
  expect(col.getTag('separator'), separator);
255
260
  }
@@ -259,7 +264,7 @@ export async function _testSepPt(csv: string, separator: string) {
259
264
  await grok.data.detectSemanticTypes(df);
260
265
 
261
266
  const col: DG.Column = df.col('seq')!;
262
- expect(col.semType, mmSemType);
267
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
263
268
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:PT');
264
269
  expect(col.getTag('separator'), separator);
265
270
  }
@@ -269,7 +274,7 @@ export async function _testSepUn(csv: string, separator: string) {
269
274
  await grok.data.detectSemanticTypes(df);
270
275
 
271
276
  const col: DG.Column = df.col('seq')!;
272
- expect(col.semType, mmSemType);
277
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
273
278
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:UN');
274
279
  expect(col.getTag('separator'), separator);
275
280
  }
@@ -279,7 +284,7 @@ export async function _testSepMsaN1(csvDfSepMsaN1: string) {
279
284
  await grok.data.detectSemanticTypes(dfSepMsaN1);
280
285
 
281
286
  const col: DG.Column = dfSepMsaN1.col('seq')!;
282
- expect(col.semType, mmSemType);
287
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
283
288
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
284
289
  }
285
290
 
@@ -289,7 +294,7 @@ export async function _testSamplesFastaCsvPt() {
289
294
  await grok.data.detectSemanticTypes(df);
290
295
 
291
296
  const col: DG.Column = df.col('sequence')!;
292
- expect(col.semType, mmSemType);
297
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
293
298
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
294
299
  expect(col.getTag('separator'), null);
295
300
  }
@@ -299,7 +304,7 @@ export async function _testSamplesFastaFastaPt() {
299
304
  const df: DG.DataFrame = importFasta(fasta)[0];
300
305
 
301
306
  const col: DG.Column = df.col('sequence')!;
302
- expect(col.semType, mmSemType);
307
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
303
308
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
304
309
  expect(col.getTag('separator'), null);
305
310
  }
@@ -310,7 +315,7 @@ export async function _testSamplesPeptidesComplexUn() {
310
315
  await grok.data.detectSemanticTypes(df);
311
316
 
312
317
  const col: DG.Column = df.col('AlignedSequence')!;
313
- expect(col.semType, mmSemType);
318
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
314
319
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
315
320
  expect(col.getTag('separator'), '-');
316
321
  }
@@ -1,11 +1,21 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as ui from 'datagrok-api/ui';
3
+ import {NotationConverter} from './notation-converter';
3
4
 
5
+ /**
6
+ * Converts notations of a Macromolecule column
7
+ *
8
+ * @param {DG.column} col Column with 'Macromolecule' semantic type
9
+ */
4
10
  export function convert(col: DG.Column): void {
5
11
  const current = col.tags[DG.TAGS.UNITS];
6
12
  //TODO: read all notations
7
- const notations = ['fasta:SEQ:NT', 'fasta:SEQ:PT', 'fasta:SEQ.MSA:NT', 'fasta:SEQ.MSA:PT', 'HELM'];
8
- const choices = ui.choiceInput('convert to', '', notations.filter((e) => e !== current));
13
+ const units = [
14
+ 'fasta',
15
+ 'separator',
16
+ 'HELM'
17
+ ];
18
+ const choices = ui.choiceInput('convert to', '', units.filter((e) => e !== current));
9
19
 
10
20
  ui.dialog('Convert sequence')
11
21
  .add(
@@ -17,7 +27,9 @@ export function convert(col: DG.Column): void {
17
27
  )
18
28
  .onOK(() => {
19
29
  //TODO: create new converted column
20
- //col.dataFrame.columns.add();
30
+ const converter = new NotationConverter(col, choices.value!);
31
+ const newColumn = converter.convert();
32
+ col.dataFrame.columns.add(newColumn);
21
33
  })
22
34
  .show();
23
35
  }
@@ -29,12 +29,12 @@ function _fastaToStrings(fasta: string): string[] {
29
29
  /**
30
30
  * Runs Aioli environment with kalign tool.
31
31
  *
32
- * @param {DG.Column} col Column with sequences.
32
+ * @param {DG.Column} srcCol Column with sequences.
33
33
  * @param {boolean} isAligned Whether the column is aligned.
34
34
  * @return {Promise<DG.Column>} Aligned sequences.
35
35
  */
36
- export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.Column> {
37
- let sequences = col.toList();
36
+ export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<DG.Column> {
37
+ let sequences = srcCol.toList();
38
38
 
39
39
  if (isAligned)
40
40
  sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
@@ -55,15 +55,20 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
55
55
  console.warn(output);
56
56
 
57
57
  const aligned = _fastaToStrings(buf).slice(0, sequences.length);
58
- const alignedCol = DG.Column.fromStrings(`msa(${col.name})`, aligned);
59
- alignedCol.setTag(DG.TAGS.UNITS, '');
60
- alignedCol.semType = C.SEM_TYPES.Macro_Molecule;
61
- return alignedCol;
58
+ const tgtCol = DG.Column.fromStrings(`msa(${srcCol.name})`, aligned);
59
+
60
+ // units
61
+ const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
62
+ const tgtUnits = srcUnits.split(':').map((p, i) => i == 1 ? p + '.MSA' : p).join(':');
63
+
64
+ tgtCol.setTag(DG.TAGS.UNITS, tgtUnits);
65
+ tgtCol.semType = C.SEM_TYPES.Macro_Molecule;
66
+ return tgtCol;
62
67
  }
63
68
 
64
69
  export async function testMSAEnoughMemory(col: DG.Column): Promise<void> {
65
70
  const sequencesCount = col.length;
66
- const delta = sequencesCount/100;
71
+ const delta = sequencesCount / 100;
67
72
 
68
73
  for (let i = delta; i < sequencesCount; i += delta) {
69
74
  try {
@@ -0,0 +1,131 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ // export const enum NOTATION {
4
+ // // these values can be changed to "user-friendly" ones later on
5
+ // FASTA = 'fasta',
6
+ // SEPARATOR = 'separator',
7
+ // HELM = 'helm'
8
+ // }
9
+
10
+ export class NotationConverter {
11
+ private _sourceColumn: DG.Column; // the column to be converted
12
+ private _currentUnits: string; // units of the form fasta:SEQ:NT, etc.
13
+ private _sourceNotation: string; // current notation (without :SEQ:NT, etc.)
14
+ private _targetNotation: string;
15
+
16
+ private get sourceColumn(): DG.Column { return this._sourceColumn; }
17
+ private get currentUnits(): string { return this._currentUnits; }
18
+ private get sourceNotation(): string { return this._sourceNotation; }
19
+ private get targetNotation(): string { return this._targetNotation; }
20
+
21
+ // these values can be changed to "user-friendly" ones later on
22
+ private _fasta = 'fasta';
23
+ private _separator = 'separator';
24
+ private _helm = 'helm';
25
+
26
+ public isFasta(): boolean { return this.sourceNotation == this._fasta; }
27
+ public isSeparator(): boolean { return this.sourceNotation == this._separator; }
28
+ public isHelm(): boolean { return this.sourceNotation == this._helm; }
29
+
30
+ private determineSourceNotation() : string {
31
+ if (this.currentUnits.toLowerCase().startsWith('fasta'))
32
+ return 'fasta';
33
+ else if (this.currentUnits.toLowerCase().startsWith('separator'))
34
+ return 'separator';
35
+ else
36
+ // TODO: handle possible exceptions
37
+ return 'HELM';
38
+ }
39
+
40
+ private convertFastaToSeparator(): DG.Column {
41
+ // TODO: implementation
42
+ const len = this.sourceColumn.length;
43
+ const newColName = 'converted';
44
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2sep'));
45
+ newColumn.semType = 'Macromolecule';
46
+ return newColumn;
47
+ }
48
+
49
+ private convertFastaToHelm(): DG.Column {
50
+ // TODO: implementation
51
+ const len = this.sourceColumn.length;
52
+ const newColName = 'converted';
53
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2helm'));
54
+ newColumn.semType = 'Macromolecule';
55
+ return newColumn;
56
+ }
57
+
58
+ private convertSeparatorToFasta(): DG.Column {
59
+ // TODO: implementation
60
+ const len = this.sourceColumn.length;
61
+ const newColName = 'converted';
62
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2fasta'));
63
+ newColumn.semType = 'Macromolecule';
64
+ return newColumn;
65
+ }
66
+
67
+ private convertSeparatorToHelm(): DG.Column {
68
+ // TODO: implementation
69
+ const len = this.sourceColumn.length;
70
+ const newColName = 'converted';
71
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2helm'));
72
+ newColumn.semType = 'Macromolecule';
73
+ return newColumn;
74
+ }
75
+
76
+ private convertHelmToFasta(): DG.Column {
77
+ // TODO: implementation
78
+ const len = this.sourceColumn.length;
79
+ const newColName = 'converted';
80
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2fasta'));
81
+ newColumn.semType = 'Macromolecule';
82
+ return newColumn;
83
+ }
84
+
85
+ private convertHelmToSeparator(): DG.Column {
86
+ // TODO: implementation
87
+ const len = this.sourceColumn.length;
88
+ const newColName = 'converted';
89
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2sep'));
90
+ newColumn.semType = 'Macromolecule';
91
+ return newColumn;
92
+ }
93
+
94
+ // TODO: write the bodies of converter methods
95
+ public convert() : DG.Column {
96
+ if (
97
+ this.sourceNotation == this._fasta &&
98
+ this.targetNotation == this._separator
99
+ )
100
+ return this.convertFastaToSeparator();
101
+ else if (
102
+ this.sourceNotation == this._fasta &&
103
+ this.targetNotation == this._helm
104
+ )
105
+ return this.convertFastaToHelm();
106
+ else if (
107
+ this.sourceNotation == this._separator &&
108
+ this.targetNotation == this._fasta
109
+ )
110
+ return this.convertSeparatorToFasta();
111
+ else if (
112
+ this.sourceNotation == this._separator &&
113
+ this.targetNotation == this._helm
114
+ )
115
+ return this.convertSeparatorToHelm();
116
+ else if (
117
+ this.sourceNotation == this._helm &&
118
+ this.targetNotation == this._fasta
119
+ )
120
+ return this.convertHelmToFasta();
121
+ else
122
+ return this.convertHelmToSeparator();
123
+ }
124
+
125
+ public constructor(col: DG.Column, target: string) {
126
+ this._sourceColumn = col;
127
+ this._currentUnits = this._sourceColumn.tags[DG.TAGS.UNITS];
128
+ this._sourceNotation = this.determineSourceNotation();
129
+ this._targetNotation = target;
130
+ }
131
+ }
@@ -18,9 +18,9 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
18
18
  const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
19
19
  const regex = new RegExp(sepFinal, 'g');
20
20
  if (Object.keys(AvailableMetrics['String']).includes(spaceParams.similarityMetric))
21
- preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
21
+ preparedData = spaceParams.seqCol.toList().map((v: string) => v.replace(regex, '')) as string[];
22
22
  else
23
- preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
23
+ preparedData = spaceParams.seqCol.toList().map((v: string) => v.replace(regex, '')) as string[];
24
24
  } else {
25
25
  preparedData = spaceParams.seqCol.toList();
26
26
  }
@@ -31,13 +31,13 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
31
31
  spaceParams.similarityMetric as StringMetrics | BitArrayMetrics,
32
32
  spaceParams.options);
33
33
  const cols: DG.Column[] = spaceParams.embedAxesNames.map(
34
- (name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
34
+ (name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
35
35
  return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
36
36
  }
37
37
 
38
38
 
39
39
  export function getEmbeddingColsNames(df: DG.DataFrame) {
40
40
  const axes = ['Embed_X', 'Embed_Y'];
41
- const colNameInd = df.columns.names().filter((it) => it.includes(axes[0])).length + 1;
41
+ const colNameInd = df.columns.names().filter((it: string) => it.includes(axes[0])).length + 1;
42
42
  return axes.map((it) => `${it}_${colNameInd}`);
43
43
  }