@datagrok/bio 1.5.3 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +202 -79
- package/dist/package.js +183 -62
- package/files/samples/sample_HELM.csv +540 -540
- package/files/samples/sample_MSA.csv +540 -540
- package/files/samples/sar-small.csv +201 -0
- package/package.json +1 -1
- package/src/const.ts +0 -1
- package/src/package.ts +14 -12
- package/src/tests/convert-test.ts +31 -0
- package/src/tests/detectors-test.ts +19 -14
- package/src/utils/convert.ts +15 -3
- package/src/utils/multiple-sequence-alignment.ts +13 -8
- package/src/utils/notation-converter.ts +131 -0
- package/src/utils/sequence-space.ts +4 -4
- package/src/utils/split-to-monomers.ts +8 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
smiles
|
|
2
|
+
O=C1CN=C(c2ccccc2N1)C3CCCCC3
|
|
3
|
+
CN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
4
|
+
CCCCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
5
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
6
|
+
O=C1CN=C(c2ccccc2N1CC3CCCCC3)C4CCCCC4
|
|
7
|
+
O=C1CN=C(c2cc(Cl)ccc2N1)C3CCCCC3
|
|
8
|
+
CN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
9
|
+
CCCCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
10
|
+
CC(C)CCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
11
|
+
O=C1CN=C(c2cc(Cl)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
12
|
+
N#Cc1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
13
|
+
CN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
14
|
+
CCCCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
15
|
+
CC(C)CCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
16
|
+
N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
17
|
+
CN(C)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
18
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)C3CCCCC3
|
|
19
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
20
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
21
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
22
|
+
O=C1CN=C(c2cc(F)ccc2N1)C3CCCCC3
|
|
23
|
+
CN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
24
|
+
CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
25
|
+
CC(C)CCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
26
|
+
O=C1CN=C(c2cc(F)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
27
|
+
O=C1CN=C(c2cc(Br)ccc2N1)C3CCCCC3
|
|
28
|
+
CN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
29
|
+
CCCCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
30
|
+
CC(C)CCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
31
|
+
O=C1CN=C(c2cc(Br)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
32
|
+
O=C1CN=C(c2cc(I)ccc2N1)C3CCCCC3
|
|
33
|
+
CN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
34
|
+
CCCCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
35
|
+
CC(C)CCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
36
|
+
O=C1CN=C(c2cc(I)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
37
|
+
NC(=O)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
38
|
+
CN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
39
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
40
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
41
|
+
NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
42
|
+
O=C1CN=C(c2cc(ccc2N1)[N+](=O)[O-])C3CCCCC3
|
|
43
|
+
CN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
44
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
45
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
46
|
+
O=C1CN=C(c2cc(ccc2N1CC3CCCCC3)[N+](=O)[O-])C4CCCCC4
|
|
47
|
+
O=C1CN=C(c2ccccc2)c3ccccc3N1
|
|
48
|
+
CN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
49
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
50
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
51
|
+
O=C1CN=C(c2ccccc2)c3ccccc3N1CC4CCCCC4
|
|
52
|
+
O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1
|
|
53
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
54
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
55
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
56
|
+
O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1CC4CCCCC4
|
|
57
|
+
N#Cc1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
58
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
59
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
60
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
61
|
+
N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
62
|
+
CN(C)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
63
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)c3ccccc3
|
|
64
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
65
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
66
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
67
|
+
O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1
|
|
68
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
69
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
70
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
71
|
+
O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1CC4CCCCC4
|
|
72
|
+
O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1
|
|
73
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
74
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
75
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
76
|
+
O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1CC4CCCCC4
|
|
77
|
+
O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1
|
|
78
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
79
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
80
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
81
|
+
O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1CC4CCCCC4
|
|
82
|
+
NC(=O)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
83
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
84
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
85
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
86
|
+
NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
87
|
+
O=C1CN=C(c2ccccc2)c3cc(ccc3N1)[N+](=O)[O-]
|
|
88
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
89
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
90
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
91
|
+
O=C1CN=C(c2ccccc2)c3cc(ccc3N1CC4CCCCC4)[N+](=O)[O-]
|
|
92
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccccc23
|
|
93
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccccc23
|
|
94
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3ccccc13
|
|
95
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccccc23
|
|
96
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccccc24
|
|
97
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Cl)cc23
|
|
98
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Cl)cc23
|
|
99
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Cl)ccc13
|
|
100
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Cl)cc23
|
|
101
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Cl)cc24
|
|
102
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(C#N)cc23
|
|
103
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(C#N)cc23
|
|
104
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(C#N)ccc13
|
|
105
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(C#N)cc23
|
|
106
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(C#N)cc24
|
|
107
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)N(C)C
|
|
108
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)N(C)C
|
|
109
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)N(C)C
|
|
110
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)N(C)C
|
|
111
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)N(C)C
|
|
112
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(F)cc23
|
|
113
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(F)cc23
|
|
114
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(F)ccc13
|
|
115
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(F)cc23
|
|
116
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(F)cc24
|
|
117
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Br)cc23
|
|
118
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Br)cc23
|
|
119
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Br)ccc13
|
|
120
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Br)cc23
|
|
121
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Br)cc24
|
|
122
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(I)cc23
|
|
123
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(I)cc23
|
|
124
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(I)ccc13
|
|
125
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(I)cc23
|
|
126
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(I)cc24
|
|
127
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)C(N)=O
|
|
128
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)C(N)=O
|
|
129
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)C(N)=O
|
|
130
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)C(N)=O
|
|
131
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)C(N)=O
|
|
132
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)[N+](=O)[O-]
|
|
133
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)[N+](=O)[O-]
|
|
134
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)[N+](=O)[O-]
|
|
135
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)[N+](=O)[O-]
|
|
136
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)[N+](=O)[O-]
|
|
137
|
+
CC1N=C(c2ccccc2NC1=O)C3CCCCC3
|
|
138
|
+
CC1N=C(c2ccccc2N(C)C1=O)C3CCCCC3
|
|
139
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
|
|
140
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
|
|
141
|
+
CC1N=C(c2ccccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
142
|
+
CC1N=C(c2cc(Cl)ccc2NC1=O)C3CCCCC3
|
|
143
|
+
CC1N=C(c2cc(Cl)ccc2N(C)C1=O)C3CCCCC3
|
|
144
|
+
CCCCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
145
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
146
|
+
CC1N=C(c2cc(Cl)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
147
|
+
CC1N=C(c2cc(C#N)ccc2NC1=O)C3CCCCC3
|
|
148
|
+
CC1N=C(c2cc(C#N)ccc2N(C)C1=O)C3CCCCC3
|
|
149
|
+
CCCCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
150
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
151
|
+
CC1N=C(c2cc(C#N)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
152
|
+
CC1N=C(c2cc(ccc2NC1=O)N(C)C)C3CCCCC3
|
|
153
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)N(C)C)C3CCCCC3
|
|
154
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
155
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
156
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)N(C)C)C4CCCCC4
|
|
157
|
+
CC1N=C(c2cc(F)ccc2NC1=O)C3CCCCC3
|
|
158
|
+
CC1N=C(c2cc(F)ccc2N(C)C1=O)C3CCCCC3
|
|
159
|
+
CCCCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
|
|
160
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
|
|
161
|
+
CC1N=C(c2cc(F)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
162
|
+
CC1N=C(c2cc(Br)ccc2NC1=O)C3CCCCC3
|
|
163
|
+
CC1N=C(c2cc(Br)ccc2N(C)C1=O)C3CCCCC3
|
|
164
|
+
CCCCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
|
|
165
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
|
|
166
|
+
CC1N=C(c2cc(Br)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
167
|
+
CC1N=C(c2cc(I)ccc2NC1=O)C3CCCCC3
|
|
168
|
+
CC1N=C(c2cc(I)ccc2N(C)C1=O)C3CCCCC3
|
|
169
|
+
CCCCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
|
|
170
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
|
|
171
|
+
CC1N=C(c2cc(I)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
172
|
+
CC1N=C(c2cc(ccc2NC1=O)C(N)=O)C3CCCCC3
|
|
173
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)C(N)=O)C3CCCCC3
|
|
174
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
175
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
176
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)C(N)=O)C4CCCCC4
|
|
177
|
+
CC1N=C(c2cc(ccc2NC1=O)[N+](=O)[O-])C3CCCCC3
|
|
178
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)[N+](=O)[O-])C3CCCCC3
|
|
179
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
180
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
181
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)[N+](=O)[O-])C4CCCCC4
|
|
182
|
+
CC1N=C(c2ccccc2)c3ccccc3NC1=O
|
|
183
|
+
CC1N=C(c2ccccc2)c3ccccc3N(C)C1=O
|
|
184
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
|
|
185
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
|
|
186
|
+
CC1N=C(c2ccccc2)c3ccccc3N(CC4CCCCC4)C1=O
|
|
187
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3NC1=O
|
|
188
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(C)C1=O
|
|
189
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
|
|
190
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
|
|
191
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(CC4CCCCC4)C1=O
|
|
192
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3NC1=O
|
|
193
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(C)C1=O
|
|
194
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
|
|
195
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
|
|
196
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(CC4CCCCC4)C1=O
|
|
197
|
+
CC1N=C(c2ccccc2)c3cc(ccc3NC1=O)N(C)C
|
|
198
|
+
CC1N=C(c2ccccc2)c3cc(ccc3N(C)C1=O)N(C)C
|
|
199
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
200
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
201
|
+
CC1N=C(c2ccccc2)c3cc(ccc3N(CC4CCCCC4)C1=O)N(C)C
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.5.
|
|
5
|
+
"version": "1.5.4",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
package/src/const.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -5,7 +5,6 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
|
|
6
6
|
export const _package = new DG.Package();
|
|
7
7
|
|
|
8
|
-
import {mmSemType} from './const';
|
|
9
8
|
import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
9
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
10
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
@@ -120,20 +119,23 @@ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.
|
|
|
120
119
|
//top-menu: Bio | Composition Analysis
|
|
121
120
|
//output: viewer result
|
|
122
121
|
export async function compositionAnalysis(): Promise<void> {
|
|
123
|
-
|
|
124
|
-
|
|
122
|
+
// Higher priority for columns with MSA data to show with WebLogo.
|
|
123
|
+
const tv = grok.shell.tv;
|
|
124
|
+
const df = tv.dataFrame;
|
|
125
|
+
const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
126
|
+
let col: DG.Column | undefined = semTypeColList.find((col) => {
|
|
127
|
+
const units = col.getTag(DG.TAGS.UNITS);
|
|
128
|
+
return units ? units.indexOf('MSA') !== -1 : false;
|
|
129
|
+
});
|
|
130
|
+
if (!col)
|
|
131
|
+
col = semTypeColList[0];
|
|
132
|
+
|
|
133
|
+
if (!col) {
|
|
125
134
|
grok.shell.error('Current table does not contain sequences');
|
|
126
135
|
return;
|
|
127
136
|
}
|
|
128
137
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
for (const v of grok.shell.views) {
|
|
132
|
-
if (v instanceof DG.TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
|
|
133
|
-
(v as DG.TableView).dockManager.dock(wl.root, 'down');
|
|
134
|
-
break;
|
|
135
|
-
}
|
|
136
|
-
}
|
|
138
|
+
tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
137
139
|
}
|
|
138
140
|
|
|
139
141
|
// helper function for importFasta
|
|
@@ -182,7 +184,7 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
182
184
|
(c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
183
185
|
const maxCos = Math.max(...alphabetCandidatesSim);
|
|
184
186
|
const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
|
|
185
|
-
sequenceCol.semType =
|
|
187
|
+
sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
186
188
|
const units: string = `fasta:${seqType}:${alphabet}`;
|
|
187
189
|
sequenceCol.setTag(DG.TAGS.UNITS, units);
|
|
188
190
|
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
// import {mmSemType} from '../const';
|
|
8
|
+
// import {importFasta} from '../package';
|
|
9
|
+
|
|
10
|
+
category('converters', () => {
|
|
11
|
+
// test('a', async () => {await _a();});
|
|
12
|
+
// test('b', async () => {await _b();});
|
|
13
|
+
test('testFastaToSeparator', async () => { await _testFastaToSeparator(); });
|
|
14
|
+
test('testSeparatorToFasta', async () => { await _testSeparatorToFasta(); });
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
// export async function _a() {
|
|
18
|
+
// expect(1, 1);
|
|
19
|
+
// }
|
|
20
|
+
//
|
|
21
|
+
// export async function _b() {
|
|
22
|
+
// expect(1, 2);
|
|
23
|
+
// }
|
|
24
|
+
|
|
25
|
+
export async function _testFastaToSeparator() {
|
|
26
|
+
expect(1, 1);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export async function _testSeparatorToFasta() {
|
|
30
|
+
expect(1, 2);
|
|
31
|
+
}
|
|
@@ -4,7 +4,6 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
import * as ui from 'datagrok-api/ui';
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
|
|
7
|
-
import {mmSemType} from '../const';
|
|
8
7
|
import {importFasta} from '../package';
|
|
9
8
|
|
|
10
9
|
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
@@ -94,6 +93,7 @@ MWRSWY-CKHP
|
|
|
94
93
|
fastaCsv = 'FastaCsv',
|
|
95
94
|
msaComplex = 'MsaComplex',
|
|
96
95
|
idCsv = 'IdCsv',
|
|
96
|
+
sarSmallCsv = 'SarSmallCsv',
|
|
97
97
|
}
|
|
98
98
|
|
|
99
99
|
const samples: { [key: string]: string } = {
|
|
@@ -101,6 +101,7 @@ MWRSWY-CKHP
|
|
|
101
101
|
'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
|
|
102
102
|
'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
103
103
|
'IdCsv': 'System:AppData/Bio/samples/id.csv',
|
|
104
|
+
'SarSmallCsv': 'System:AppData/Bio/samples/sar-small.csv',
|
|
104
105
|
};
|
|
105
106
|
|
|
106
107
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -189,20 +190,24 @@ MWRSWY-CKHP
|
|
|
189
190
|
test('samplesIdCsvNegativeID', async () => {
|
|
190
191
|
await _testNeg(readSamplesCsv(Samples.idCsv), 'ID');
|
|
191
192
|
});
|
|
193
|
+
|
|
194
|
+
test('samplesSarSmallCsvNegativeSmiles', async () => {
|
|
195
|
+
await _testNeg(readSamplesCsv(Samples.sarSmallCsv), 'smiles');
|
|
196
|
+
});
|
|
192
197
|
});
|
|
193
198
|
|
|
194
199
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
195
200
|
const df: DG.DataFrame = await readDf();
|
|
196
201
|
|
|
197
202
|
const col: DG.Column = df.col(colName)!;
|
|
198
|
-
expect(col.semType ===
|
|
203
|
+
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, false);
|
|
199
204
|
}
|
|
200
205
|
|
|
201
206
|
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
|
|
202
207
|
const df: DG.DataFrame = await readDf();
|
|
203
208
|
|
|
204
209
|
const col: DG.Column = df.col(colName)!;
|
|
205
|
-
expect(col.semType ===
|
|
210
|
+
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
206
211
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
207
212
|
if (separator)
|
|
208
213
|
expect(col.getTag('separator'), separator);
|
|
@@ -213,7 +218,7 @@ export async function _testN1(csvDfN1: string) {
|
|
|
213
218
|
await grok.data.detectSemanticTypes(dfN1);
|
|
214
219
|
|
|
215
220
|
const col: DG.Column = dfN1.col('seq')!;
|
|
216
|
-
expect(col.semType,
|
|
221
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
217
222
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
|
|
218
223
|
}
|
|
219
224
|
|
|
@@ -222,7 +227,7 @@ export async function _testAA1(csvDfAA1: string) {
|
|
|
222
227
|
await grok.data.detectSemanticTypes(dfAA1);
|
|
223
228
|
|
|
224
229
|
const col: DG.Column = dfAA1.col('seq')!;
|
|
225
|
-
expect(col.semType,
|
|
230
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
226
231
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
227
232
|
}
|
|
228
233
|
|
|
@@ -231,7 +236,7 @@ export async function _testMsaN1(csvDfMsaN1: string) {
|
|
|
231
236
|
await grok.data.detectSemanticTypes(dfMsaN1);
|
|
232
237
|
|
|
233
238
|
const col: DG.Column = dfMsaN1.col('seq')!;
|
|
234
|
-
expect(col.semType,
|
|
239
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
235
240
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
|
|
236
241
|
}
|
|
237
242
|
|
|
@@ -240,7 +245,7 @@ export async function _testMsaAA1(csvDfMsaAA1: string) {
|
|
|
240
245
|
await grok.data.detectSemanticTypes(dfMsaAA1);
|
|
241
246
|
|
|
242
247
|
const col: DG.Column = dfMsaAA1.col('seq')!;
|
|
243
|
-
expect(col.semType,
|
|
248
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
244
249
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
|
|
245
250
|
}
|
|
246
251
|
|
|
@@ -249,7 +254,7 @@ export async function _testSepNt(csv: string, separator: string) {
|
|
|
249
254
|
await grok.data.detectSemanticTypes(df);
|
|
250
255
|
|
|
251
256
|
const col: DG.Column = df.col('seq')!;
|
|
252
|
-
expect(col.semType,
|
|
257
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
253
258
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:NT');
|
|
254
259
|
expect(col.getTag('separator'), separator);
|
|
255
260
|
}
|
|
@@ -259,7 +264,7 @@ export async function _testSepPt(csv: string, separator: string) {
|
|
|
259
264
|
await grok.data.detectSemanticTypes(df);
|
|
260
265
|
|
|
261
266
|
const col: DG.Column = df.col('seq')!;
|
|
262
|
-
expect(col.semType,
|
|
267
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
263
268
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:PT');
|
|
264
269
|
expect(col.getTag('separator'), separator);
|
|
265
270
|
}
|
|
@@ -269,7 +274,7 @@ export async function _testSepUn(csv: string, separator: string) {
|
|
|
269
274
|
await grok.data.detectSemanticTypes(df);
|
|
270
275
|
|
|
271
276
|
const col: DG.Column = df.col('seq')!;
|
|
272
|
-
expect(col.semType,
|
|
277
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
273
278
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:UN');
|
|
274
279
|
expect(col.getTag('separator'), separator);
|
|
275
280
|
}
|
|
@@ -279,7 +284,7 @@ export async function _testSepMsaN1(csvDfSepMsaN1: string) {
|
|
|
279
284
|
await grok.data.detectSemanticTypes(dfSepMsaN1);
|
|
280
285
|
|
|
281
286
|
const col: DG.Column = dfSepMsaN1.col('seq')!;
|
|
282
|
-
expect(col.semType,
|
|
287
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
283
288
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
|
|
284
289
|
}
|
|
285
290
|
|
|
@@ -289,7 +294,7 @@ export async function _testSamplesFastaCsvPt() {
|
|
|
289
294
|
await grok.data.detectSemanticTypes(df);
|
|
290
295
|
|
|
291
296
|
const col: DG.Column = df.col('sequence')!;
|
|
292
|
-
expect(col.semType,
|
|
297
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
293
298
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
294
299
|
expect(col.getTag('separator'), null);
|
|
295
300
|
}
|
|
@@ -299,7 +304,7 @@ export async function _testSamplesFastaFastaPt() {
|
|
|
299
304
|
const df: DG.DataFrame = importFasta(fasta)[0];
|
|
300
305
|
|
|
301
306
|
const col: DG.Column = df.col('sequence')!;
|
|
302
|
-
expect(col.semType,
|
|
307
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
303
308
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
304
309
|
expect(col.getTag('separator'), null);
|
|
305
310
|
}
|
|
@@ -310,7 +315,7 @@ export async function _testSamplesPeptidesComplexUn() {
|
|
|
310
315
|
await grok.data.detectSemanticTypes(df);
|
|
311
316
|
|
|
312
317
|
const col: DG.Column = df.col('AlignedSequence')!;
|
|
313
|
-
expect(col.semType,
|
|
318
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
314
319
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
|
|
315
320
|
expect(col.getTag('separator'), '-');
|
|
316
321
|
}
|
package/src/utils/convert.ts
CHANGED
|
@@ -1,11 +1,21 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import {NotationConverter} from './notation-converter';
|
|
3
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Converts notations of a Macromolecule column
|
|
7
|
+
*
|
|
8
|
+
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
9
|
+
*/
|
|
4
10
|
export function convert(col: DG.Column): void {
|
|
5
11
|
const current = col.tags[DG.TAGS.UNITS];
|
|
6
12
|
//TODO: read all notations
|
|
7
|
-
const
|
|
8
|
-
|
|
13
|
+
const units = [
|
|
14
|
+
'fasta',
|
|
15
|
+
'separator',
|
|
16
|
+
'HELM'
|
|
17
|
+
];
|
|
18
|
+
const choices = ui.choiceInput('convert to', '', units.filter((e) => e !== current));
|
|
9
19
|
|
|
10
20
|
ui.dialog('Convert sequence')
|
|
11
21
|
.add(
|
|
@@ -17,7 +27,9 @@ export function convert(col: DG.Column): void {
|
|
|
17
27
|
)
|
|
18
28
|
.onOK(() => {
|
|
19
29
|
//TODO: create new converted column
|
|
20
|
-
|
|
30
|
+
const converter = new NotationConverter(col, choices.value!);
|
|
31
|
+
const newColumn = converter.convert();
|
|
32
|
+
col.dataFrame.columns.add(newColumn);
|
|
21
33
|
})
|
|
22
34
|
.show();
|
|
23
35
|
}
|
|
@@ -29,12 +29,12 @@ function _fastaToStrings(fasta: string): string[] {
|
|
|
29
29
|
/**
|
|
30
30
|
* Runs Aioli environment with kalign tool.
|
|
31
31
|
*
|
|
32
|
-
* @param {DG.Column}
|
|
32
|
+
* @param {DG.Column} srcCol Column with sequences.
|
|
33
33
|
* @param {boolean} isAligned Whether the column is aligned.
|
|
34
34
|
* @return {Promise<DG.Column>} Aligned sequences.
|
|
35
35
|
*/
|
|
36
|
-
export async function runKalign(
|
|
37
|
-
let sequences =
|
|
36
|
+
export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<DG.Column> {
|
|
37
|
+
let sequences = srcCol.toList();
|
|
38
38
|
|
|
39
39
|
if (isAligned)
|
|
40
40
|
sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
|
|
@@ -55,15 +55,20 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
|
|
|
55
55
|
console.warn(output);
|
|
56
56
|
|
|
57
57
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
58
|
+
const tgtCol = DG.Column.fromStrings(`msa(${srcCol.name})`, aligned);
|
|
59
|
+
|
|
60
|
+
// units
|
|
61
|
+
const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
|
|
62
|
+
const tgtUnits = srcUnits.split(':').map((p, i) => i == 1 ? p + '.MSA' : p).join(':');
|
|
63
|
+
|
|
64
|
+
tgtCol.setTag(DG.TAGS.UNITS, tgtUnits);
|
|
65
|
+
tgtCol.semType = C.SEM_TYPES.Macro_Molecule;
|
|
66
|
+
return tgtCol;
|
|
62
67
|
}
|
|
63
68
|
|
|
64
69
|
export async function testMSAEnoughMemory(col: DG.Column): Promise<void> {
|
|
65
70
|
const sequencesCount = col.length;
|
|
66
|
-
const delta = sequencesCount/100;
|
|
71
|
+
const delta = sequencesCount / 100;
|
|
67
72
|
|
|
68
73
|
for (let i = delta; i < sequencesCount; i += delta) {
|
|
69
74
|
try {
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
// export const enum NOTATION {
|
|
4
|
+
// // these values can be changed to "user-friendly" ones later on
|
|
5
|
+
// FASTA = 'fasta',
|
|
6
|
+
// SEPARATOR = 'separator',
|
|
7
|
+
// HELM = 'helm'
|
|
8
|
+
// }
|
|
9
|
+
|
|
10
|
+
export class NotationConverter {
|
|
11
|
+
private _sourceColumn: DG.Column; // the column to be converted
|
|
12
|
+
private _currentUnits: string; // units of the form fasta:SEQ:NT, etc.
|
|
13
|
+
private _sourceNotation: string; // current notation (without :SEQ:NT, etc.)
|
|
14
|
+
private _targetNotation: string;
|
|
15
|
+
|
|
16
|
+
private get sourceColumn(): DG.Column { return this._sourceColumn; }
|
|
17
|
+
private get currentUnits(): string { return this._currentUnits; }
|
|
18
|
+
private get sourceNotation(): string { return this._sourceNotation; }
|
|
19
|
+
private get targetNotation(): string { return this._targetNotation; }
|
|
20
|
+
|
|
21
|
+
// these values can be changed to "user-friendly" ones later on
|
|
22
|
+
private _fasta = 'fasta';
|
|
23
|
+
private _separator = 'separator';
|
|
24
|
+
private _helm = 'helm';
|
|
25
|
+
|
|
26
|
+
public isFasta(): boolean { return this.sourceNotation == this._fasta; }
|
|
27
|
+
public isSeparator(): boolean { return this.sourceNotation == this._separator; }
|
|
28
|
+
public isHelm(): boolean { return this.sourceNotation == this._helm; }
|
|
29
|
+
|
|
30
|
+
private determineSourceNotation() : string {
|
|
31
|
+
if (this.currentUnits.toLowerCase().startsWith('fasta'))
|
|
32
|
+
return 'fasta';
|
|
33
|
+
else if (this.currentUnits.toLowerCase().startsWith('separator'))
|
|
34
|
+
return 'separator';
|
|
35
|
+
else
|
|
36
|
+
// TODO: handle possible exceptions
|
|
37
|
+
return 'HELM';
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
private convertFastaToSeparator(): DG.Column {
|
|
41
|
+
// TODO: implementation
|
|
42
|
+
const len = this.sourceColumn.length;
|
|
43
|
+
const newColName = 'converted';
|
|
44
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2sep'));
|
|
45
|
+
newColumn.semType = 'Macromolecule';
|
|
46
|
+
return newColumn;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
private convertFastaToHelm(): DG.Column {
|
|
50
|
+
// TODO: implementation
|
|
51
|
+
const len = this.sourceColumn.length;
|
|
52
|
+
const newColName = 'converted';
|
|
53
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2helm'));
|
|
54
|
+
newColumn.semType = 'Macromolecule';
|
|
55
|
+
return newColumn;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
private convertSeparatorToFasta(): DG.Column {
|
|
59
|
+
// TODO: implementation
|
|
60
|
+
const len = this.sourceColumn.length;
|
|
61
|
+
const newColName = 'converted';
|
|
62
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2fasta'));
|
|
63
|
+
newColumn.semType = 'Macromolecule';
|
|
64
|
+
return newColumn;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
private convertSeparatorToHelm(): DG.Column {
|
|
68
|
+
// TODO: implementation
|
|
69
|
+
const len = this.sourceColumn.length;
|
|
70
|
+
const newColName = 'converted';
|
|
71
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2helm'));
|
|
72
|
+
newColumn.semType = 'Macromolecule';
|
|
73
|
+
return newColumn;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
private convertHelmToFasta(): DG.Column {
|
|
77
|
+
// TODO: implementation
|
|
78
|
+
const len = this.sourceColumn.length;
|
|
79
|
+
const newColName = 'converted';
|
|
80
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2fasta'));
|
|
81
|
+
newColumn.semType = 'Macromolecule';
|
|
82
|
+
return newColumn;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
private convertHelmToSeparator(): DG.Column {
|
|
86
|
+
// TODO: implementation
|
|
87
|
+
const len = this.sourceColumn.length;
|
|
88
|
+
const newColName = 'converted';
|
|
89
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2sep'));
|
|
90
|
+
newColumn.semType = 'Macromolecule';
|
|
91
|
+
return newColumn;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// TODO: write the bodies of converter methods
|
|
95
|
+
public convert() : DG.Column {
|
|
96
|
+
if (
|
|
97
|
+
this.sourceNotation == this._fasta &&
|
|
98
|
+
this.targetNotation == this._separator
|
|
99
|
+
)
|
|
100
|
+
return this.convertFastaToSeparator();
|
|
101
|
+
else if (
|
|
102
|
+
this.sourceNotation == this._fasta &&
|
|
103
|
+
this.targetNotation == this._helm
|
|
104
|
+
)
|
|
105
|
+
return this.convertFastaToHelm();
|
|
106
|
+
else if (
|
|
107
|
+
this.sourceNotation == this._separator &&
|
|
108
|
+
this.targetNotation == this._fasta
|
|
109
|
+
)
|
|
110
|
+
return this.convertSeparatorToFasta();
|
|
111
|
+
else if (
|
|
112
|
+
this.sourceNotation == this._separator &&
|
|
113
|
+
this.targetNotation == this._helm
|
|
114
|
+
)
|
|
115
|
+
return this.convertSeparatorToHelm();
|
|
116
|
+
else if (
|
|
117
|
+
this.sourceNotation == this._helm &&
|
|
118
|
+
this.targetNotation == this._fasta
|
|
119
|
+
)
|
|
120
|
+
return this.convertHelmToFasta();
|
|
121
|
+
else
|
|
122
|
+
return this.convertHelmToSeparator();
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
public constructor(col: DG.Column, target: string) {
|
|
126
|
+
this._sourceColumn = col;
|
|
127
|
+
this._currentUnits = this._sourceColumn.tags[DG.TAGS.UNITS];
|
|
128
|
+
this._sourceNotation = this.determineSourceNotation();
|
|
129
|
+
this._targetNotation = target;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
@@ -18,9 +18,9 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
|
|
|
18
18
|
const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
|
|
19
19
|
const regex = new RegExp(sepFinal, 'g');
|
|
20
20
|
if (Object.keys(AvailableMetrics['String']).includes(spaceParams.similarityMetric))
|
|
21
|
-
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
21
|
+
preparedData = spaceParams.seqCol.toList().map((v: string) => v.replace(regex, '')) as string[];
|
|
22
22
|
else
|
|
23
|
-
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
23
|
+
preparedData = spaceParams.seqCol.toList().map((v: string) => v.replace(regex, '')) as string[];
|
|
24
24
|
} else {
|
|
25
25
|
preparedData = spaceParams.seqCol.toList();
|
|
26
26
|
}
|
|
@@ -31,13 +31,13 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
|
|
|
31
31
|
spaceParams.similarityMetric as StringMetrics | BitArrayMetrics,
|
|
32
32
|
spaceParams.options);
|
|
33
33
|
const cols: DG.Column[] = spaceParams.embedAxesNames.map(
|
|
34
|
-
(name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
34
|
+
(name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
35
35
|
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
export function getEmbeddingColsNames(df: DG.DataFrame) {
|
|
40
40
|
const axes = ['Embed_X', 'Embed_Y'];
|
|
41
|
-
const colNameInd = df.columns.names().filter((it) => it.includes(axes[0])).length + 1;
|
|
41
|
+
const colNameInd = df.columns.names().filter((it: string) => it.includes(axes[0])).length + 1;
|
|
42
42
|
return axes.map((it) => `${it}_${colNameInd}`);
|
|
43
43
|
}
|