@datagrok/bio 1.5.2 → 1.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +11 -0
- package/dist/package-test.js +215 -81
- package/dist/package.js +183 -62
- package/files/samples/id.csv +313 -0
- package/files/samples/sample_HELM.csv +540 -540
- package/files/samples/sample_MSA.csv +540 -540
- package/files/samples/sar-small.csv +201 -0
- package/package.json +1 -1
- package/src/const.ts +0 -1
- package/src/package.ts +14 -12
- package/src/tests/convert-test.ts +31 -0
- package/src/tests/detectors-test.ts +37 -16
- package/src/utils/convert.ts +15 -3
- package/src/utils/multiple-sequence-alignment.ts +13 -8
- package/src/utils/notation-converter.ts +131 -0
- package/src/utils/sequence-space.ts +4 -4
- package/src/utils/split-to-monomers.ts +8 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
smiles
|
|
2
|
+
O=C1CN=C(c2ccccc2N1)C3CCCCC3
|
|
3
|
+
CN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
4
|
+
CCCCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
5
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc12)C3CCCCC3
|
|
6
|
+
O=C1CN=C(c2ccccc2N1CC3CCCCC3)C4CCCCC4
|
|
7
|
+
O=C1CN=C(c2cc(Cl)ccc2N1)C3CCCCC3
|
|
8
|
+
CN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
9
|
+
CCCCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
10
|
+
CC(C)CCN1C(=O)CN=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
11
|
+
O=C1CN=C(c2cc(Cl)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
12
|
+
N#Cc1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
13
|
+
CN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
14
|
+
CCCCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
15
|
+
CC(C)CCN1C(=O)CN=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
16
|
+
N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
17
|
+
CN(C)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
18
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)C3CCCCC3
|
|
19
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
20
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
21
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
22
|
+
O=C1CN=C(c2cc(F)ccc2N1)C3CCCCC3
|
|
23
|
+
CN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
24
|
+
CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
25
|
+
CC(C)CCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
26
|
+
O=C1CN=C(c2cc(F)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
27
|
+
O=C1CN=C(c2cc(Br)ccc2N1)C3CCCCC3
|
|
28
|
+
CN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
29
|
+
CCCCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
30
|
+
CC(C)CCN1C(=O)CN=C(c2cc(Br)ccc12)C3CCCCC3
|
|
31
|
+
O=C1CN=C(c2cc(Br)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
32
|
+
O=C1CN=C(c2cc(I)ccc2N1)C3CCCCC3
|
|
33
|
+
CN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
34
|
+
CCCCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
35
|
+
CC(C)CCN1C(=O)CN=C(c2cc(I)ccc12)C3CCCCC3
|
|
36
|
+
O=C1CN=C(c2cc(I)ccc2N1CC3CCCCC3)C4CCCCC4
|
|
37
|
+
NC(=O)c1ccc2NC(=O)CN=C(c2c1)C3CCCCC3
|
|
38
|
+
CN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
39
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
40
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
41
|
+
NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)C4CCCCC4
|
|
42
|
+
O=C1CN=C(c2cc(ccc2N1)[N+](=O)[O-])C3CCCCC3
|
|
43
|
+
CN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
44
|
+
CCCCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
45
|
+
CC(C)CCN1C(=O)CN=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
46
|
+
O=C1CN=C(c2cc(ccc2N1CC3CCCCC3)[N+](=O)[O-])C4CCCCC4
|
|
47
|
+
O=C1CN=C(c2ccccc2)c3ccccc3N1
|
|
48
|
+
CN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
49
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
50
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3ccccc13
|
|
51
|
+
O=C1CN=C(c2ccccc2)c3ccccc3N1CC4CCCCC4
|
|
52
|
+
O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1
|
|
53
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
54
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
55
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13
|
|
56
|
+
O=C1CN=C(c2ccccc2)c3cc(Cl)ccc3N1CC4CCCCC4
|
|
57
|
+
N#Cc1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
58
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
59
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
60
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(C#N)ccc13
|
|
61
|
+
N#Cc1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
62
|
+
CN(C)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
63
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2C)c3ccccc3
|
|
64
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
65
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
66
|
+
CN(C)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
67
|
+
O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1
|
|
68
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
69
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
70
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(F)ccc13
|
|
71
|
+
O=C1CN=C(c2ccccc2)c3cc(F)ccc3N1CC4CCCCC4
|
|
72
|
+
O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1
|
|
73
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
74
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
75
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(Br)ccc13
|
|
76
|
+
O=C1CN=C(c2ccccc2)c3cc(Br)ccc3N1CC4CCCCC4
|
|
77
|
+
O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1
|
|
78
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
79
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
80
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(I)ccc13
|
|
81
|
+
O=C1CN=C(c2ccccc2)c3cc(I)ccc3N1CC4CCCCC4
|
|
82
|
+
NC(=O)c1ccc2NC(=O)CN=C(c3ccccc3)c2c1
|
|
83
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
84
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
85
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)C(N)=O
|
|
86
|
+
NC(=O)c1ccc2c(c1)C(=NCC(=O)N2CC3CCCCC3)c4ccccc4
|
|
87
|
+
O=C1CN=C(c2ccccc2)c3cc(ccc3N1)[N+](=O)[O-]
|
|
88
|
+
CN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
89
|
+
CCCCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
90
|
+
CC(C)CCN1C(=O)CN=C(c2ccccc2)c3cc(ccc13)[N+](=O)[O-]
|
|
91
|
+
O=C1CN=C(c2ccccc2)c3cc(ccc3N1CC4CCCCC4)[N+](=O)[O-]
|
|
92
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccccc23
|
|
93
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccccc23
|
|
94
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3ccccc13
|
|
95
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccccc23
|
|
96
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccccc24
|
|
97
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Cl)cc23
|
|
98
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Cl)cc23
|
|
99
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Cl)ccc13
|
|
100
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Cl)cc23
|
|
101
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Cl)cc24
|
|
102
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(C#N)cc23
|
|
103
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(C#N)cc23
|
|
104
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(C#N)ccc13
|
|
105
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(C#N)cc23
|
|
106
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(C#N)cc24
|
|
107
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)N(C)C
|
|
108
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)N(C)C
|
|
109
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)N(C)C
|
|
110
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)N(C)C
|
|
111
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)N(C)C
|
|
112
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(F)cc23
|
|
113
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(F)cc23
|
|
114
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(F)ccc13
|
|
115
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(F)cc23
|
|
116
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(F)cc24
|
|
117
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(Br)cc23
|
|
118
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(Br)cc23
|
|
119
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(Br)ccc13
|
|
120
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(Br)cc23
|
|
121
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(Br)cc24
|
|
122
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(I)cc23
|
|
123
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(I)cc23
|
|
124
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(I)ccc13
|
|
125
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(I)cc23
|
|
126
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(I)cc24
|
|
127
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)C(N)=O
|
|
128
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)C(N)=O
|
|
129
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)C(N)=O
|
|
130
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)C(N)=O
|
|
131
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)C(N)=O
|
|
132
|
+
COc1ccc(cc1)C2=NCC(=O)Nc3ccc(cc23)[N+](=O)[O-]
|
|
133
|
+
COc1ccc(cc1)C2=NCC(=O)N(C)c3ccc(cc23)[N+](=O)[O-]
|
|
134
|
+
CCCCN1C(=O)CN=C(c2ccc(cc2)OC)c3cc(ccc13)[N+](=O)[O-]
|
|
135
|
+
COc1ccc(cc1)C2=NCC(=O)N(CCC(C)C)c3ccc(cc23)[N+](=O)[O-]
|
|
136
|
+
COc1ccc(cc1)C2=NCC(=O)N(CC3CCCCC3)c4ccc(cc24)[N+](=O)[O-]
|
|
137
|
+
CC1N=C(c2ccccc2NC1=O)C3CCCCC3
|
|
138
|
+
CC1N=C(c2ccccc2N(C)C1=O)C3CCCCC3
|
|
139
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
|
|
140
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc12)C3CCCCC3
|
|
141
|
+
CC1N=C(c2ccccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
142
|
+
CC1N=C(c2cc(Cl)ccc2NC1=O)C3CCCCC3
|
|
143
|
+
CC1N=C(c2cc(Cl)ccc2N(C)C1=O)C3CCCCC3
|
|
144
|
+
CCCCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
145
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(Cl)ccc12)C3CCCCC3
|
|
146
|
+
CC1N=C(c2cc(Cl)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
147
|
+
CC1N=C(c2cc(C#N)ccc2NC1=O)C3CCCCC3
|
|
148
|
+
CC1N=C(c2cc(C#N)ccc2N(C)C1=O)C3CCCCC3
|
|
149
|
+
CCCCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
150
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(C#N)ccc12)C3CCCCC3
|
|
151
|
+
CC1N=C(c2cc(C#N)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
152
|
+
CC1N=C(c2cc(ccc2NC1=O)N(C)C)C3CCCCC3
|
|
153
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)N(C)C)C3CCCCC3
|
|
154
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
155
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)N(C)C)C3CCCCC3
|
|
156
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)N(C)C)C4CCCCC4
|
|
157
|
+
CC1N=C(c2cc(F)ccc2NC1=O)C3CCCCC3
|
|
158
|
+
CC1N=C(c2cc(F)ccc2N(C)C1=O)C3CCCCC3
|
|
159
|
+
CCCCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
|
|
160
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(F)ccc12)C3CCCCC3
|
|
161
|
+
CC1N=C(c2cc(F)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
162
|
+
CC1N=C(c2cc(Br)ccc2NC1=O)C3CCCCC3
|
|
163
|
+
CC1N=C(c2cc(Br)ccc2N(C)C1=O)C3CCCCC3
|
|
164
|
+
CCCCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
|
|
165
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(Br)ccc12)C3CCCCC3
|
|
166
|
+
CC1N=C(c2cc(Br)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
167
|
+
CC1N=C(c2cc(I)ccc2NC1=O)C3CCCCC3
|
|
168
|
+
CC1N=C(c2cc(I)ccc2N(C)C1=O)C3CCCCC3
|
|
169
|
+
CCCCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
|
|
170
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(I)ccc12)C3CCCCC3
|
|
171
|
+
CC1N=C(c2cc(I)ccc2N(CC3CCCCC3)C1=O)C4CCCCC4
|
|
172
|
+
CC1N=C(c2cc(ccc2NC1=O)C(N)=O)C3CCCCC3
|
|
173
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)C(N)=O)C3CCCCC3
|
|
174
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
175
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)C(N)=O)C3CCCCC3
|
|
176
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)C(N)=O)C4CCCCC4
|
|
177
|
+
CC1N=C(c2cc(ccc2NC1=O)[N+](=O)[O-])C3CCCCC3
|
|
178
|
+
CC1N=C(c2cc(ccc2N(C)C1=O)[N+](=O)[O-])C3CCCCC3
|
|
179
|
+
CCCCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
180
|
+
CC(C)CCN1C(=O)C(C)N=C(c2cc(ccc12)[N+](=O)[O-])C3CCCCC3
|
|
181
|
+
CC1N=C(c2cc(ccc2N(CC3CCCCC3)C1=O)[N+](=O)[O-])C4CCCCC4
|
|
182
|
+
CC1N=C(c2ccccc2)c3ccccc3NC1=O
|
|
183
|
+
CC1N=C(c2ccccc2)c3ccccc3N(C)C1=O
|
|
184
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
|
|
185
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3ccccc13
|
|
186
|
+
CC1N=C(c2ccccc2)c3ccccc3N(CC4CCCCC4)C1=O
|
|
187
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3NC1=O
|
|
188
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(C)C1=O
|
|
189
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
|
|
190
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(Cl)ccc13
|
|
191
|
+
CC1N=C(c2ccccc2)c3cc(Cl)ccc3N(CC4CCCCC4)C1=O
|
|
192
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3NC1=O
|
|
193
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(C)C1=O
|
|
194
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
|
|
195
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(C#N)ccc13
|
|
196
|
+
CC1N=C(c2ccccc2)c3cc(C#N)ccc3N(CC4CCCCC4)C1=O
|
|
197
|
+
CC1N=C(c2ccccc2)c3cc(ccc3NC1=O)N(C)C
|
|
198
|
+
CC1N=C(c2ccccc2)c3cc(ccc3N(C)C1=O)N(C)C
|
|
199
|
+
CCCCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
200
|
+
CC(C)CCN1C(=O)C(C)N=C(c2ccccc2)c3cc(ccc13)N(C)C
|
|
201
|
+
CC1N=C(c2ccccc2)c3cc(ccc3N(CC4CCCCC4)C1=O)N(C)C
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.5.
|
|
5
|
+
"version": "1.5.5",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
package/src/const.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -5,7 +5,6 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
|
|
6
6
|
export const _package = new DG.Package();
|
|
7
7
|
|
|
8
|
-
import {mmSemType} from './const';
|
|
9
8
|
import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
9
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
10
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
@@ -120,20 +119,23 @@ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.
|
|
|
120
119
|
//top-menu: Bio | Composition Analysis
|
|
121
120
|
//output: viewer result
|
|
122
121
|
export async function compositionAnalysis(): Promise<void> {
|
|
123
|
-
|
|
124
|
-
|
|
122
|
+
// Higher priority for columns with MSA data to show with WebLogo.
|
|
123
|
+
const tv = grok.shell.tv;
|
|
124
|
+
const df = tv.dataFrame;
|
|
125
|
+
const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
126
|
+
let col: DG.Column | undefined = semTypeColList.find((col) => {
|
|
127
|
+
const units = col.getTag(DG.TAGS.UNITS);
|
|
128
|
+
return units ? units.indexOf('MSA') !== -1 : false;
|
|
129
|
+
});
|
|
130
|
+
if (!col)
|
|
131
|
+
col = semTypeColList[0];
|
|
132
|
+
|
|
133
|
+
if (!col) {
|
|
125
134
|
grok.shell.error('Current table does not contain sequences');
|
|
126
135
|
return;
|
|
127
136
|
}
|
|
128
137
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
for (const v of grok.shell.views) {
|
|
132
|
-
if (v instanceof DG.TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
|
|
133
|
-
(v as DG.TableView).dockManager.dock(wl.root, 'down');
|
|
134
|
-
break;
|
|
135
|
-
}
|
|
136
|
-
}
|
|
138
|
+
tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
137
139
|
}
|
|
138
140
|
|
|
139
141
|
// helper function for importFasta
|
|
@@ -182,7 +184,7 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
182
184
|
(c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
183
185
|
const maxCos = Math.max(...alphabetCandidatesSim);
|
|
184
186
|
const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
|
|
185
|
-
sequenceCol.semType =
|
|
187
|
+
sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
186
188
|
const units: string = `fasta:${seqType}:${alphabet}`;
|
|
187
189
|
sequenceCol.setTag(DG.TAGS.UNITS, units);
|
|
188
190
|
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
// import {mmSemType} from '../const';
|
|
8
|
+
// import {importFasta} from '../package';
|
|
9
|
+
|
|
10
|
+
category('converters', () => {
|
|
11
|
+
// test('a', async () => {await _a();});
|
|
12
|
+
// test('b', async () => {await _b();});
|
|
13
|
+
test('testFastaToSeparator', async () => { await _testFastaToSeparator(); });
|
|
14
|
+
test('testSeparatorToFasta', async () => { await _testSeparatorToFasta(); });
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
// export async function _a() {
|
|
18
|
+
// expect(1, 1);
|
|
19
|
+
// }
|
|
20
|
+
//
|
|
21
|
+
// export async function _b() {
|
|
22
|
+
// expect(1, 2);
|
|
23
|
+
// }
|
|
24
|
+
|
|
25
|
+
export async function _testFastaToSeparator() {
|
|
26
|
+
expect(1, 1);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export async function _testSeparatorToFasta() {
|
|
30
|
+
expect(1, 2);
|
|
31
|
+
}
|
|
@@ -4,7 +4,6 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
import * as ui from 'datagrok-api/ui';
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
|
|
7
|
-
import {mmSemType} from '../const';
|
|
8
7
|
import {importFasta} from '../package';
|
|
9
8
|
|
|
10
9
|
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
@@ -93,12 +92,18 @@ MWRSWY-CKHP
|
|
|
93
92
|
peptidesComplex = 'PeptidesComplex',
|
|
94
93
|
fastaCsv = 'FastaCsv',
|
|
95
94
|
msaComplex = 'MsaComplex',
|
|
95
|
+
idCsv = 'IdCsv',
|
|
96
|
+
sarSmallCsv = 'SarSmallCsv',
|
|
97
|
+
HelmCsv = 'HelmCsv',
|
|
96
98
|
}
|
|
97
99
|
|
|
98
100
|
const samples: { [key: string]: string } = {
|
|
99
|
-
'PeptidesComplex': 'System:AppData/Bio/samples/
|
|
101
|
+
'PeptidesComplex': 'System:AppData/Bio/samples/peptides_complex_msa.csv',
|
|
100
102
|
'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
|
|
101
103
|
'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
104
|
+
'IdCsv': 'System:AppData/Bio/samples/id.csv',
|
|
105
|
+
'SarSmallCsv': 'System:AppData/Bio/samples/sar-small.csv',
|
|
106
|
+
'HelmCsv': 'System:AppData/Bio/samples/sample_HELM.csv',
|
|
102
107
|
};
|
|
103
108
|
|
|
104
109
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -183,20 +188,36 @@ MWRSWY-CKHP
|
|
|
183
188
|
test('samplesMsaComplexNegativeActivity', async () => {
|
|
184
189
|
await _testNeg(readSamplesCsv(Samples.msaComplex), 'Activity');
|
|
185
190
|
});
|
|
191
|
+
|
|
192
|
+
test('samplesIdCsvNegativeID', async () => {
|
|
193
|
+
await _testNeg(readSamplesCsv(Samples.idCsv), 'ID');
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
test('samplesSarSmallCsvNegativeSmiles', async () => {
|
|
197
|
+
await _testNeg(readSamplesCsv(Samples.sarSmallCsv), 'smiles');
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
test('samplesHelmCsvHELM', async () => {
|
|
201
|
+
await _testPos(readSamplesCsv(Samples.HelmCsv), 'HELM', 'HELM', null);
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
test('samplesHelmCsvNegativeActivity', async () => {
|
|
205
|
+
await _testNeg(readSamplesCsv(Samples.HelmCsv), 'Activity');
|
|
206
|
+
});
|
|
186
207
|
});
|
|
187
208
|
|
|
188
209
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
189
210
|
const df: DG.DataFrame = await readDf();
|
|
190
211
|
|
|
191
212
|
const col: DG.Column = df.col(colName)!;
|
|
192
|
-
expect(col.semType ===
|
|
213
|
+
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, false);
|
|
193
214
|
}
|
|
194
215
|
|
|
195
|
-
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
|
|
216
|
+
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string | null = null) {
|
|
196
217
|
const df: DG.DataFrame = await readDf();
|
|
197
218
|
|
|
198
219
|
const col: DG.Column = df.col(colName)!;
|
|
199
|
-
expect(col.semType ===
|
|
220
|
+
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
200
221
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
201
222
|
if (separator)
|
|
202
223
|
expect(col.getTag('separator'), separator);
|
|
@@ -207,7 +228,7 @@ export async function _testN1(csvDfN1: string) {
|
|
|
207
228
|
await grok.data.detectSemanticTypes(dfN1);
|
|
208
229
|
|
|
209
230
|
const col: DG.Column = dfN1.col('seq')!;
|
|
210
|
-
expect(col.semType,
|
|
231
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
211
232
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
|
|
212
233
|
}
|
|
213
234
|
|
|
@@ -216,7 +237,7 @@ export async function _testAA1(csvDfAA1: string) {
|
|
|
216
237
|
await grok.data.detectSemanticTypes(dfAA1);
|
|
217
238
|
|
|
218
239
|
const col: DG.Column = dfAA1.col('seq')!;
|
|
219
|
-
expect(col.semType,
|
|
240
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
220
241
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
221
242
|
}
|
|
222
243
|
|
|
@@ -225,7 +246,7 @@ export async function _testMsaN1(csvDfMsaN1: string) {
|
|
|
225
246
|
await grok.data.detectSemanticTypes(dfMsaN1);
|
|
226
247
|
|
|
227
248
|
const col: DG.Column = dfMsaN1.col('seq')!;
|
|
228
|
-
expect(col.semType,
|
|
249
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
229
250
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
|
|
230
251
|
}
|
|
231
252
|
|
|
@@ -234,7 +255,7 @@ export async function _testMsaAA1(csvDfMsaAA1: string) {
|
|
|
234
255
|
await grok.data.detectSemanticTypes(dfMsaAA1);
|
|
235
256
|
|
|
236
257
|
const col: DG.Column = dfMsaAA1.col('seq')!;
|
|
237
|
-
expect(col.semType,
|
|
258
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
238
259
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
|
|
239
260
|
}
|
|
240
261
|
|
|
@@ -243,7 +264,7 @@ export async function _testSepNt(csv: string, separator: string) {
|
|
|
243
264
|
await grok.data.detectSemanticTypes(df);
|
|
244
265
|
|
|
245
266
|
const col: DG.Column = df.col('seq')!;
|
|
246
|
-
expect(col.semType,
|
|
267
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
247
268
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:NT');
|
|
248
269
|
expect(col.getTag('separator'), separator);
|
|
249
270
|
}
|
|
@@ -253,7 +274,7 @@ export async function _testSepPt(csv: string, separator: string) {
|
|
|
253
274
|
await grok.data.detectSemanticTypes(df);
|
|
254
275
|
|
|
255
276
|
const col: DG.Column = df.col('seq')!;
|
|
256
|
-
expect(col.semType,
|
|
277
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
257
278
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:PT');
|
|
258
279
|
expect(col.getTag('separator'), separator);
|
|
259
280
|
}
|
|
@@ -263,7 +284,7 @@ export async function _testSepUn(csv: string, separator: string) {
|
|
|
263
284
|
await grok.data.detectSemanticTypes(df);
|
|
264
285
|
|
|
265
286
|
const col: DG.Column = df.col('seq')!;
|
|
266
|
-
expect(col.semType,
|
|
287
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
267
288
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:UN');
|
|
268
289
|
expect(col.getTag('separator'), separator);
|
|
269
290
|
}
|
|
@@ -273,7 +294,7 @@ export async function _testSepMsaN1(csvDfSepMsaN1: string) {
|
|
|
273
294
|
await grok.data.detectSemanticTypes(dfSepMsaN1);
|
|
274
295
|
|
|
275
296
|
const col: DG.Column = dfSepMsaN1.col('seq')!;
|
|
276
|
-
expect(col.semType,
|
|
297
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
277
298
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
|
|
278
299
|
}
|
|
279
300
|
|
|
@@ -283,7 +304,7 @@ export async function _testSamplesFastaCsvPt() {
|
|
|
283
304
|
await grok.data.detectSemanticTypes(df);
|
|
284
305
|
|
|
285
306
|
const col: DG.Column = df.col('sequence')!;
|
|
286
|
-
expect(col.semType,
|
|
307
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
287
308
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
288
309
|
expect(col.getTag('separator'), null);
|
|
289
310
|
}
|
|
@@ -293,7 +314,7 @@ export async function _testSamplesFastaFastaPt() {
|
|
|
293
314
|
const df: DG.DataFrame = importFasta(fasta)[0];
|
|
294
315
|
|
|
295
316
|
const col: DG.Column = df.col('sequence')!;
|
|
296
|
-
expect(col.semType,
|
|
317
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
297
318
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
298
319
|
expect(col.getTag('separator'), null);
|
|
299
320
|
}
|
|
@@ -304,7 +325,7 @@ export async function _testSamplesPeptidesComplexUn() {
|
|
|
304
325
|
await grok.data.detectSemanticTypes(df);
|
|
305
326
|
|
|
306
327
|
const col: DG.Column = df.col('AlignedSequence')!;
|
|
307
|
-
expect(col.semType,
|
|
328
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
308
329
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
|
|
309
330
|
expect(col.getTag('separator'), '-');
|
|
310
331
|
}
|
package/src/utils/convert.ts
CHANGED
|
@@ -1,11 +1,21 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import {NotationConverter} from './notation-converter';
|
|
3
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Converts notations of a Macromolecule column
|
|
7
|
+
*
|
|
8
|
+
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
9
|
+
*/
|
|
4
10
|
export function convert(col: DG.Column): void {
|
|
5
11
|
const current = col.tags[DG.TAGS.UNITS];
|
|
6
12
|
//TODO: read all notations
|
|
7
|
-
const
|
|
8
|
-
|
|
13
|
+
const units = [
|
|
14
|
+
'fasta',
|
|
15
|
+
'separator',
|
|
16
|
+
'HELM'
|
|
17
|
+
];
|
|
18
|
+
const choices = ui.choiceInput('convert to', '', units.filter((e) => e !== current));
|
|
9
19
|
|
|
10
20
|
ui.dialog('Convert sequence')
|
|
11
21
|
.add(
|
|
@@ -17,7 +27,9 @@ export function convert(col: DG.Column): void {
|
|
|
17
27
|
)
|
|
18
28
|
.onOK(() => {
|
|
19
29
|
//TODO: create new converted column
|
|
20
|
-
|
|
30
|
+
const converter = new NotationConverter(col, choices.value!);
|
|
31
|
+
const newColumn = converter.convert();
|
|
32
|
+
col.dataFrame.columns.add(newColumn);
|
|
21
33
|
})
|
|
22
34
|
.show();
|
|
23
35
|
}
|
|
@@ -29,12 +29,12 @@ function _fastaToStrings(fasta: string): string[] {
|
|
|
29
29
|
/**
|
|
30
30
|
* Runs Aioli environment with kalign tool.
|
|
31
31
|
*
|
|
32
|
-
* @param {DG.Column}
|
|
32
|
+
* @param {DG.Column} srcCol Column with sequences.
|
|
33
33
|
* @param {boolean} isAligned Whether the column is aligned.
|
|
34
34
|
* @return {Promise<DG.Column>} Aligned sequences.
|
|
35
35
|
*/
|
|
36
|
-
export async function runKalign(
|
|
37
|
-
let sequences =
|
|
36
|
+
export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<DG.Column> {
|
|
37
|
+
let sequences = srcCol.toList();
|
|
38
38
|
|
|
39
39
|
if (isAligned)
|
|
40
40
|
sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
|
|
@@ -55,15 +55,20 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
|
|
|
55
55
|
console.warn(output);
|
|
56
56
|
|
|
57
57
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
58
|
+
const tgtCol = DG.Column.fromStrings(`msa(${srcCol.name})`, aligned);
|
|
59
|
+
|
|
60
|
+
// units
|
|
61
|
+
const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
|
|
62
|
+
const tgtUnits = srcUnits.split(':').map((p, i) => i == 1 ? p + '.MSA' : p).join(':');
|
|
63
|
+
|
|
64
|
+
tgtCol.setTag(DG.TAGS.UNITS, tgtUnits);
|
|
65
|
+
tgtCol.semType = C.SEM_TYPES.Macro_Molecule;
|
|
66
|
+
return tgtCol;
|
|
62
67
|
}
|
|
63
68
|
|
|
64
69
|
export async function testMSAEnoughMemory(col: DG.Column): Promise<void> {
|
|
65
70
|
const sequencesCount = col.length;
|
|
66
|
-
const delta = sequencesCount/100;
|
|
71
|
+
const delta = sequencesCount / 100;
|
|
67
72
|
|
|
68
73
|
for (let i = delta; i < sequencesCount; i += delta) {
|
|
69
74
|
try {
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
// export const enum NOTATION {
|
|
4
|
+
// // these values can be changed to "user-friendly" ones later on
|
|
5
|
+
// FASTA = 'fasta',
|
|
6
|
+
// SEPARATOR = 'separator',
|
|
7
|
+
// HELM = 'helm'
|
|
8
|
+
// }
|
|
9
|
+
|
|
10
|
+
export class NotationConverter {
|
|
11
|
+
private _sourceColumn: DG.Column; // the column to be converted
|
|
12
|
+
private _currentUnits: string; // units of the form fasta:SEQ:NT, etc.
|
|
13
|
+
private _sourceNotation: string; // current notation (without :SEQ:NT, etc.)
|
|
14
|
+
private _targetNotation: string;
|
|
15
|
+
|
|
16
|
+
private get sourceColumn(): DG.Column { return this._sourceColumn; }
|
|
17
|
+
private get currentUnits(): string { return this._currentUnits; }
|
|
18
|
+
private get sourceNotation(): string { return this._sourceNotation; }
|
|
19
|
+
private get targetNotation(): string { return this._targetNotation; }
|
|
20
|
+
|
|
21
|
+
// these values can be changed to "user-friendly" ones later on
|
|
22
|
+
private _fasta = 'fasta';
|
|
23
|
+
private _separator = 'separator';
|
|
24
|
+
private _helm = 'helm';
|
|
25
|
+
|
|
26
|
+
public isFasta(): boolean { return this.sourceNotation == this._fasta; }
|
|
27
|
+
public isSeparator(): boolean { return this.sourceNotation == this._separator; }
|
|
28
|
+
public isHelm(): boolean { return this.sourceNotation == this._helm; }
|
|
29
|
+
|
|
30
|
+
private determineSourceNotation() : string {
|
|
31
|
+
if (this.currentUnits.toLowerCase().startsWith('fasta'))
|
|
32
|
+
return 'fasta';
|
|
33
|
+
else if (this.currentUnits.toLowerCase().startsWith('separator'))
|
|
34
|
+
return 'separator';
|
|
35
|
+
else
|
|
36
|
+
// TODO: handle possible exceptions
|
|
37
|
+
return 'HELM';
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
private convertFastaToSeparator(): DG.Column {
|
|
41
|
+
// TODO: implementation
|
|
42
|
+
const len = this.sourceColumn.length;
|
|
43
|
+
const newColName = 'converted';
|
|
44
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2sep'));
|
|
45
|
+
newColumn.semType = 'Macromolecule';
|
|
46
|
+
return newColumn;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
private convertFastaToHelm(): DG.Column {
|
|
50
|
+
// TODO: implementation
|
|
51
|
+
const len = this.sourceColumn.length;
|
|
52
|
+
const newColName = 'converted';
|
|
53
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2helm'));
|
|
54
|
+
newColumn.semType = 'Macromolecule';
|
|
55
|
+
return newColumn;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
private convertSeparatorToFasta(): DG.Column {
|
|
59
|
+
// TODO: implementation
|
|
60
|
+
const len = this.sourceColumn.length;
|
|
61
|
+
const newColName = 'converted';
|
|
62
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2fasta'));
|
|
63
|
+
newColumn.semType = 'Macromolecule';
|
|
64
|
+
return newColumn;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
private convertSeparatorToHelm(): DG.Column {
|
|
68
|
+
// TODO: implementation
|
|
69
|
+
const len = this.sourceColumn.length;
|
|
70
|
+
const newColName = 'converted';
|
|
71
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2helm'));
|
|
72
|
+
newColumn.semType = 'Macromolecule';
|
|
73
|
+
return newColumn;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
private convertHelmToFasta(): DG.Column {
|
|
77
|
+
// TODO: implementation
|
|
78
|
+
const len = this.sourceColumn.length;
|
|
79
|
+
const newColName = 'converted';
|
|
80
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2fasta'));
|
|
81
|
+
newColumn.semType = 'Macromolecule';
|
|
82
|
+
return newColumn;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
private convertHelmToSeparator(): DG.Column {
|
|
86
|
+
// TODO: implementation
|
|
87
|
+
const len = this.sourceColumn.length;
|
|
88
|
+
const newColName = 'converted';
|
|
89
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2sep'));
|
|
90
|
+
newColumn.semType = 'Macromolecule';
|
|
91
|
+
return newColumn;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// TODO: write the bodies of converter methods
|
|
95
|
+
public convert() : DG.Column {
|
|
96
|
+
if (
|
|
97
|
+
this.sourceNotation == this._fasta &&
|
|
98
|
+
this.targetNotation == this._separator
|
|
99
|
+
)
|
|
100
|
+
return this.convertFastaToSeparator();
|
|
101
|
+
else if (
|
|
102
|
+
this.sourceNotation == this._fasta &&
|
|
103
|
+
this.targetNotation == this._helm
|
|
104
|
+
)
|
|
105
|
+
return this.convertFastaToHelm();
|
|
106
|
+
else if (
|
|
107
|
+
this.sourceNotation == this._separator &&
|
|
108
|
+
this.targetNotation == this._fasta
|
|
109
|
+
)
|
|
110
|
+
return this.convertSeparatorToFasta();
|
|
111
|
+
else if (
|
|
112
|
+
this.sourceNotation == this._separator &&
|
|
113
|
+
this.targetNotation == this._helm
|
|
114
|
+
)
|
|
115
|
+
return this.convertSeparatorToHelm();
|
|
116
|
+
else if (
|
|
117
|
+
this.sourceNotation == this._helm &&
|
|
118
|
+
this.targetNotation == this._fasta
|
|
119
|
+
)
|
|
120
|
+
return this.convertHelmToFasta();
|
|
121
|
+
else
|
|
122
|
+
return this.convertHelmToSeparator();
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
public constructor(col: DG.Column, target: string) {
|
|
126
|
+
this._sourceColumn = col;
|
|
127
|
+
this._currentUnits = this._sourceColumn.tags[DG.TAGS.UNITS];
|
|
128
|
+
this._sourceNotation = this.determineSourceNotation();
|
|
129
|
+
this._targetNotation = target;
|
|
130
|
+
}
|
|
131
|
+
}
|