@datagrok/sequence-translator 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/salts.ts ADDED
@@ -0,0 +1,2 @@
1
+ export const SALTS_CSV = `CHARGE,DISPLAY,MOLWEIGHT
2
+ ,no Data Added,100`;
@@ -0,0 +1,288 @@
1
+ //name: asoGapmersNucleotidesToBioSpring
2
+ //input: string nucleotides {semType: DNA nucleotides}
3
+ //output: string result {semType: BioSpring / Gapmers}
4
+ export function asoGapmersNucleotidesToBioSpring(nucleotides: string): string {
5
+ let count: number = -1;
6
+ const objForEdges: {[index: string]: string} = {
7
+ '(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'T': '5*', 'A': '6*', 'C': '7*', 'G': '8*'};
8
+ const objForCenter: {[index: string]: string} = {
9
+ '(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'T': 'T*', 'A': 'A*', 'C': '9*', 'G': 'G*'};
10
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|T|C|G)/g, function(x: string) {
11
+ count++;
12
+ return (count > 4 && count < 15) ? objForCenter[x] : objForEdges[x];
13
+ }).slice(0, (nucleotides.endsWith('(invabasic)') || nucleotides.endsWith('(GalNAc-2-JNJ)')) ?
14
+ nucleotides.length : 2 * count + 1);
15
+ }
16
+
17
+ //name: asoGapmersNucleotidesToGcrs
18
+ //input: string nucleotides {semType: DNA nucleotides}
19
+ //output: string result {semType: GCRS / Gapmers}
20
+ export function asoGapmersNucleotidesToGcrs(nucleotides: string): string {
21
+ let count: number = -1;
22
+ const objForEdges: {[index: string]: string} = {
23
+ '(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'T': 'moeUnps',
24
+ 'A': 'moeAnps', 'C': 'moe5mCnps', 'G': 'moeGnps'};
25
+ const objForCenter: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
26
+ 'C': '5mCps', 'A': 'Aps', 'T': 'Tps', 'G': 'Gps'};
27
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|T|C|G)/g, function(x: string) {
28
+ count++;
29
+ if (count < 5) return (count == 4) ? objForEdges[x].slice(0, -3) + 'ps' : objForEdges[x];
30
+ if (count < 15) return (count == 14) ? objForCenter[x].slice(0, -2) + 'nps' : objForCenter[x];
31
+ return objForEdges[x];
32
+ }).slice(0, (nucleotides.endsWith('(invabasic)') || nucleotides.endsWith('(GalNAc-2-JNJ)')) ?
33
+ nucleotides.length : -3);
34
+ }
35
+
36
+ //name: asoGapmersBioSpringToNucleotides
37
+ //input: string nucleotides {semType: BioSpring / Gapmers}
38
+ //output: string result {semType: DNA nucleotides}
39
+ export function asoGapmersBioSpringToNucleotides(nucleotides: string): string {
40
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
41
+ '*': '', '5': 'T', '6': 'A', '7': 'C', '8': 'G', '9': 'C'};
42
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|\*|5|6|7|8|9)/g, function(x: string) {return obj[x];});
43
+ }
44
+
45
+ //name: asoGapmersBioSpringToGcrs
46
+ //input: string nucleotides {semType: BioSpring / Gapmers}
47
+ //output: string result {semType: GCRS / Gapmers}
48
+ export function asoGapmersBioSpringToGcrs(nucleotides: string): string {
49
+ let count: number = -1;
50
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
51
+ '5*': 'moeUnps', '6*': 'moeAnps', '7*': 'moe5mCnps', '8*': 'moeGnps', '9*': '5mCps', 'A*': 'Aps', 'T*': 'Tps',
52
+ 'G*': 'Gps', 'C*': 'Cps', '5': 'moeU', '6': 'moeA', '7': 'moe5mC', '8': 'moeG',
53
+ };
54
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|5\*|6\*|7\*|8\*|9\*|A\*|T\*|G\*|C\*|5|6|7|8)/g,
55
+ function(x: string) {
56
+ count++;
57
+ return (count == 4) ? obj[x].slice(0, -3) + 'ps' : (count == 14) ? obj[x].slice(0, -2) + 'nps' : obj[x];
58
+ });
59
+ }
60
+
61
+
62
+ //name: asoGapmersGcrsToBioSpring
63
+ //input: string nucleotides {semType: GCRS / Gapmers}
64
+ //output: string result {semType: BioSpring / Gapmers}
65
+ export function asoGapmersGcrsToBioSpring(nucleotides: string): string {
66
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
67
+ 'moeT': '5', 'moeA': '6', 'moe5mC': '7', 'moeG': '8', 'moeU': '5', '5mC': '9', 'nps': '*', 'ps': '*', 'U': 'T',
68
+ };
69
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|moeT|moeA|moe5mC|moeG|moeU|5mC|nps|ps|U)/g,
70
+ function(x: string) {return obj[x];});
71
+ }
72
+
73
+ //name: asoGapmersGcrsToNucleotides
74
+ //input: string nucleotides {semType: GCRS / Gapmers}
75
+ //output: string result {semType: DNA nucleotides}
76
+ export function asoGapmersGcrsToNucleotides(nucleotides: string) {
77
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
78
+ 'moe': '', '5m': '', 'n': '', 'ps': '', 'U': 'T'};
79
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|moe|5m|n|ps|U)/g, function(x: string) {return obj[x];});
80
+ }
81
+
82
+ //name: siRnaBioSpringToNucleotides
83
+ //input: string nucleotides {semType: BioSpring / siRNA}
84
+ //output: string result {semType: RNA nucleotides}
85
+ export function siRnaBioSpringToNucleotides(nucleotides: string) {
86
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
87
+ '1': 'U', '2': 'A', '3': 'C', '4': 'G', '5': 'U', '6': 'A', '7': 'C', '8': 'G', '*': ''};
88
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|1|2|3|4|5|6|7|8|\*)/g,
89
+ function(x: string) {return obj[x];});
90
+ }
91
+
92
+ //name: siRnaBioSpringToAxolabs
93
+ //input: string nucleotides {semType: BioSpring / siRNA}
94
+ //output: string result {semType: Axolabs / siRNA}
95
+ export function siRnaBioSpringToAxolabs(nucleotides: string) {
96
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
97
+ '1': 'Uf', '2': 'Af', '3': 'Cf', '4': 'Gf', '5': 'u', '6': 'a', '7': 'c', '8': 'g', '*': 's'};
98
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|1|2|3|4|5|6|7|8|\*)/g,
99
+ function(x: string) {return obj[x];});
100
+ }
101
+
102
+
103
+ //name: siRnaBioSpringToGcrs
104
+ //input: string nucleotides {semType: BioSpring / siRNA}
105
+ //output: string result {semType: GCRS}
106
+ export function siRnaBioSpringToGcrs(nucleotides: string) {
107
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
108
+ '1': 'fU', '2': 'fA', '3': 'fC', '4': 'fG', '5': 'mU', '6': 'mA', '7': 'mC', '8': 'mG', '*': 'ps'};
109
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|1|2|3|4|5|6|7|8|\*)/g,
110
+ function(x: string) {return obj[x];});
111
+ }
112
+
113
+ //name: siRnaAxolabsToGcrs
114
+ //input: string nucleotides {semType: Axolabs / siRNA}
115
+ //output: string result {semType: GCRS}
116
+ export function siRnaAxolabsToGcrs(nucleotides: string) {
117
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
118
+ 'Uf': 'fU', 'Af': 'fA', 'Cf': 'fC', 'Gf': 'fG', 'u': 'mU', 'a': 'mA', 'c': 'mC', 'g': 'mG', 's': 'ps',
119
+ };
120
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|Uf|Af|Cf|Gf|u|a|c|g|s)/g,
121
+ function(x: string) {return obj[x];});
122
+ }
123
+
124
+ //name: siRnaAxolabsToBioSpring
125
+ //input: string nucleotides {semType: Axolabs / siRNA}
126
+ //output: string result {semType: BioSpring / siRNA}
127
+ export function siRnaAxolabsToBioSpring(nucleotides: string) {
128
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
129
+ 'Uf': '1', 'Af': '2', 'Cf': '3', 'Gf': '4', 'u': '5', 'a': '6', 'c': '7', 'g': '8', 's': '*',
130
+ };
131
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|Uf|Af|Cf|Gf|u|a|c|g|s)/g,
132
+ function(x: string) {return obj[x];});
133
+ }
134
+
135
+ //name: siRnaAxolabsToNucleotides
136
+ //input: string nucleotides {semType: Axolabs / siRNA}
137
+ //output: string result {semType: RNA nucleotides}
138
+ export function siRnaAxolabsToNucleotides(nucleotides: string) {
139
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
140
+ 'Uf': 'U', 'Af': 'A', 'Cf': 'C', 'Gf': 'G', 'u': 'U', 'a': 'A', 'c': 'C', 'g': 'G', 's': '',
141
+ };
142
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|Uf|Af|Cf|Gf|u|a|c|g|s)/g,
143
+ function(x: string) {return obj[x];});
144
+ }
145
+
146
+
147
+ //name: siRnaGcrsToNucleotides
148
+ //input: string nucleotides {semType: GCRS}
149
+ //output: string result {semType: RNA nucleotides}
150
+ export function siRnaGcrsToNucleotides(nucleotides: string) {
151
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
152
+ 'fU': 'U', 'fA': 'A', 'fC': 'C', 'fG': 'G', 'mU': 'U', 'mA': 'A', 'mC': 'C', 'mG': 'G', 'ps': '',
153
+ };
154
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|fU|fA|fC|fG|mU|mA|mC|mG|ps)/g,
155
+ function(x: string) {return obj[x];});
156
+ }
157
+
158
+ //name: siRnaGcrsToBioSpring
159
+ //input: string nucleotides {semType: GCRS}
160
+ //output: string result {semType: BioSpring / siRNA}
161
+ export function siRnaGcrsToBioSpring(nucleotides: string) {
162
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
163
+ 'fU': '1', 'fA': '2', 'fC': '3', 'fG': '4', 'mU': '5', 'mA': '6', 'mC': '7', 'mG': '8', 'ps': '*',
164
+ };
165
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|fU|fA|fC|fG|mU|mA|mC|mG|ps)/g,
166
+ function(x: string) {return obj[x];});
167
+ }
168
+
169
+ //name: siRnaGcrsToAxolabs
170
+ //input: string nucleotides {semType: GCRS}
171
+ //output: string result {semType: Axolabs / siRNA}
172
+ export function siRnaGcrsToAxolabs(nucleotides: string) {
173
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
174
+ 'fU': 'Uf', 'fA': 'Af', 'fC': 'Cf', 'fG': 'Gf', 'mU': 'u', 'mA': 'a', 'mC': 'c', 'mG': 'g', 'ps': 's',
175
+ };
176
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|fU|fA|fC|fG|mU|mA|mC|mG|ps)/g,
177
+ function(x: string) {return obj[x];});
178
+ }
179
+
180
+ //name: siRnaNucleotideToBioSpringSenseStrand
181
+ //input: string nucleotides {semType: RNA nucleotides}
182
+ //output: string result {semType: BioSpring / siRNA}
183
+ export function siRnaNucleotideToBioSpringSenseStrand(nucleotides: string) {
184
+ let count: number = -1;
185
+ const objForLeftEdge: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
186
+ 'A': '6*', 'U': '5*', 'G': '8*', 'C': '7*'};
187
+ const objForRightEdge: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
188
+ 'A': '*6', 'U': '*5', 'G': '*8', 'C': '*7'};
189
+ const objForOddIndices: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
190
+ 'A': '6', 'U': '5', 'G': '8', 'C': '7'};
191
+ const objForEvenIndices: {[index: string]: string} = {'(invabasic)': '(invabasic)',
192
+ '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'A': '2', 'U': '1', 'G': '4', 'C': '3'};
193
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function(x: string) {
194
+ count++;
195
+ if (count < 2) return objForLeftEdge[x];
196
+ if (count > nucleotides.length - 3) return objForRightEdge[x];
197
+ return (count % 2 == 0) ? objForEvenIndices[x] : objForOddIndices[x];
198
+ });
199
+ }
200
+
201
+ //name: siRnaNucleotidesToGcrs
202
+ //input: string nucleotides {semType: RNA nucleotides}
203
+ //output: string result {semType: GCRS}
204
+ export function siRnaNucleotidesToGcrs(nucleotides: string) {
205
+ let count: number = -1;
206
+ const objForLeftEdge: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
207
+ 'A': 'mAps', 'U': 'mUps', 'G': 'mGps', 'C': 'mCps'};
208
+ const objForRightEdge: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
209
+ 'A': 'psmA', 'U': 'psmU', 'G': 'psmG', 'C': 'psmC'};
210
+ const objForEvenIndices: {[index: string]: string} = {'(invabasic)': '(invabasic)',
211
+ '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'A': 'fA', 'U': 'fU', 'G': 'fG', 'C': 'fC'};
212
+ const objForOddIndices: {[index: string]: string} = {'(invabasic)': '(invabasic)',
213
+ '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'A': 'mA', 'U': 'mU', 'G': 'mG', 'C': 'mC'};
214
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function(x: string) {
215
+ count++;
216
+ if (count < 2) return objForLeftEdge[x];
217
+ if (count > nucleotides.length - 3) return objForRightEdge[x];
218
+ return (count % 2 == 0) ? objForEvenIndices[x] : objForOddIndices[x];
219
+ });
220
+ }
221
+
222
+ //name: siRnaNucleotideToAxolabsSenseStrand
223
+ //input: string nucleotides {semType: RNA nucleotides}
224
+ //output: string result {semType: Axolabs}
225
+ export function siRnaNucleotideToAxolabsSenseStrand(nucleotides: string) {
226
+ let count: number = -1;
227
+ const objForLeftEdge: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
228
+ 'A': 'as', 'U': 'us', 'G': 'gs', 'C': 'cs'};
229
+ const objForSomeIndices: {[index: string]: string} = {'(invabasic)': '(invabasic)',
230
+ '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'A': 'Af', 'U': 'Uf', 'G': 'Gf', 'C': 'Cf'};
231
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
232
+ 'A': 'a', 'U': 'u', 'G': 'g', 'C': 'c'};
233
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function(x: string) {
234
+ count++;
235
+ if (count < 2) return objForLeftEdge[x];
236
+ if (count == 6 || (count > 7 && count < 11)) return objForSomeIndices[x];
237
+ if (count == nucleotides.length - 1) return 'a';
238
+ return obj[x];
239
+ });
240
+ }
241
+
242
+ //name: siRnaNucleotideToAxolabsAntisenseStrand
243
+ //input: string nucleotides {semType: RNA nucleotides}
244
+ //output: string result {semType: Axolabs}
245
+ export function siRnaNucleotideToAxolabsAntisenseStrand(nucleotides: string) {
246
+ let count: number = -1;
247
+ const objForSmallLinkages: {[index: string]: string} = {'(invabasic)': '(invabasic)',
248
+ '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'A': 'as', 'U': 'us', 'G': 'gs', 'C': 'cs'};
249
+ const objForBigLinkages: {[index: string]: string} = {'(invabasic)': '(invabasic)',
250
+ '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'A': 'Afs', 'U': 'Ufs', 'G': 'Gfs', 'C': 'Cfs'};
251
+ const objForSomeIndices: {[index: string]: string} = {'(invabasic)': '(invabasic)',
252
+ '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'A': 'Af', 'U': 'Uf', 'G': 'Gf', 'C': 'Cf'};
253
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)',
254
+ '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'A': 'a', 'U': 'u', 'G': 'g', 'C': 'c'};
255
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function(x: string) {
256
+ count++;
257
+ if (count > 19 && count < 22) return objForSmallLinkages[x];
258
+ if (count == 0) return 'us';
259
+ if (count == 1) return objForBigLinkages[x];
260
+ return (count == 5 || count == 7 || count == 8 || count == 13 || count == 15) ? objForSomeIndices[x] : obj[x];
261
+ });
262
+ }
263
+
264
+ //name: gcrsToNucleotides
265
+ //input: string nucleotides {semType: GCRS}
266
+ //output: string result {semType: RNA nucleotides}
267
+ export function gcrsToNucleotides(nucleotides: string) {
268
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
269
+ 'mAps': 'A', 'mUps': 'U', 'mGps': 'G', 'mCps': 'C', 'fAps': 'A', 'fUps': 'U', 'fGps': 'G', 'fCps': 'C',
270
+ 'fU': 'U', 'fA': 'A', 'fC': 'C', 'fG': 'G', 'mU': 'U', 'mA': 'A', 'mC': 'C', 'mG': 'G',
271
+ };
272
+ return nucleotides.replace(
273
+ /(\(invabasic\)|\(GalNAc-2-JNJ\)|mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g,
274
+ function(x: string) {return obj[x];});
275
+ }
276
+
277
+ //name: gcrsToMermade12
278
+ //input: string nucleotides {semType: GCRS}
279
+ //output: string result {semType: Mermade 12 / siRNA}
280
+ export function gcrsToMermade12(nucleotides: string) {
281
+ const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
282
+ 'mAps': 'e', 'mUps': 'h', 'mGps': 'g', 'mCps': 'f', 'fAps': 'i', 'fUps': 'l', 'fGps': 'k', 'fCps': 'j', 'fU': 'L',
283
+ 'fA': 'I', 'fC': 'J', 'fG': 'K', 'mU': 'H', 'mA': 'E', 'mC': 'F', 'mG': 'G',
284
+ };
285
+ return nucleotides.replace(
286
+ /(\(invabasic\)|\(GalNAc-2-JNJ\)|mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g,
287
+ function(x: string) {return obj[x];});
288
+ }
@@ -0,0 +1,73 @@
1
+ import {map, stadardPhosphateLinkSmiles, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS} from './map';
2
+ import {isValidSequence} from './sequence-codes-tools';
3
+
4
+ export function sequenceToSmiles(sequence: string, inverted: boolean = false): string {
5
+ const obj = getObjectWithCodesAndSmiles(sequence);
6
+ let codes = sortByStringLengthInDescendingOrder(Object.keys(obj));
7
+ let i = 0;
8
+ let smiles = '';
9
+ const codesList = [];
10
+ const links = ['s', 'ps', '*'];
11
+ const includesStandardLinkAlready = ['e', 'h', /*'g',*/ 'f', 'i', 'l', 'k', 'j'];
12
+ const dropdowns = Object.keys(MODIFICATIONS);
13
+ codes = codes.concat(dropdowns);
14
+ while (i < sequence.length) {
15
+ const code = codes.find((s: string) => s == sequence.slice(i, i + s.length))!;
16
+ i += code.length;
17
+ inverted ? codesList.unshift(code) : codesList.push(code);
18
+ }
19
+ for (let i = 0; i < codesList.length; i++) {
20
+ if (dropdowns.includes(codesList[i])) {
21
+ if (i == codesList.length -1 || (i < codesList.length - 1 && links.includes(codesList[i + 1]))) {
22
+ smiles += (i >= codesList.length / 2) ?
23
+ MODIFICATIONS[codesList[i]].right:
24
+ MODIFICATIONS[codesList[i]].left;
25
+ } else if (i < codesList.length - 1) {
26
+ smiles += (i >= codesList.length / 2) ?
27
+ MODIFICATIONS[codesList[i]].right + stadardPhosphateLinkSmiles:
28
+ MODIFICATIONS[codesList[i]].left + stadardPhosphateLinkSmiles;
29
+ }
30
+ } else {
31
+ if (links.includes(codesList[i]) ||
32
+ includesStandardLinkAlready.includes(codesList[i]) ||
33
+ (i < codesList.length - 1 && links.includes(codesList[i + 1]))
34
+ )
35
+ smiles += obj[codesList[i]];
36
+ else
37
+ smiles += obj[codesList[i]] + stadardPhosphateLinkSmiles;
38
+ }
39
+ }
40
+ smiles = smiles.replace(/OO/g, 'O');
41
+ return (
42
+ (
43
+ links.includes(codesList[codesList.length - 1]) &&
44
+ codesList.length > 1 &&
45
+ !includesStandardLinkAlready.includes(codesList[codesList.length - 2])
46
+ ) ||
47
+ dropdowns.includes(codesList[codesList.length - 1]) ||
48
+ includesStandardLinkAlready.includes(codesList[codesList.length - 1])
49
+ ) ?
50
+ smiles :
51
+ smiles.slice(0, smiles.length - stadardPhosphateLinkSmiles.length + 1);
52
+ }
53
+
54
+ function getObjectWithCodesAndSmiles(sequence: string) {
55
+ const obj: { [code: string]: string } = {};
56
+ for (const synthesizer of Object.keys(map)) {
57
+ for (const technology of Object.keys(map[synthesizer])) {
58
+ for (const code of Object.keys(map[synthesizer][technology]))
59
+ obj[code] = map[synthesizer][technology][code].SMILES;
60
+ }
61
+ }
62
+ // TODO: create object based from synthesizer type to avoid key(codes) duplicates
63
+ const output = isValidSequence(sequence);
64
+ if (output.expectedSynthesizer == SYNTHESIZERS.MERMADE_12)
65
+ obj['g'] = map[SYNTHESIZERS.MERMADE_12][TECHNOLOGIES.SI_RNA]['g'].SMILES;
66
+ else if (output.expectedSynthesizer == SYNTHESIZERS.AXOLABS)
67
+ obj['g'] = map[SYNTHESIZERS.AXOLABS][TECHNOLOGIES.SI_RNA]['g'].SMILES;
68
+ return obj;
69
+ }
70
+
71
+ function sortByStringLengthInDescendingOrder(array: string[]): string[] {
72
+ return array.sort(function(a: string, b: string) {return b.length - a.length;});
73
+ }
File without changes
@@ -1,22 +1,22 @@
1
1
  import * as ui from 'datagrok-api/ui';
2
- import {sequenceToSmiles} from './package';
2
+ import {sequenceToSmiles} from '../structures-works/from-monomers';
3
3
  import * as OCL from 'openchemlib/full.js';
4
4
 
5
5
  export function saveSenseAntiSense() {
6
6
  const ssInput = ui.textInput('Sense Strand 5\' ->3\'', '');
7
7
  const asInput = ui.textInput('Anti Sense 3\' ->5\'', '');
8
- const saveOption = ui.switchInput('save as one entity', false);
8
+ const saveOption = ui.switchInput('save as one entity', true);
9
9
  const saveBtn = ui.button('Save SDF', () => {
10
10
  const smiSS = sequenceToSmiles(ssInput.value);
11
11
  const smiAS = sequenceToSmiles(asInput.value, true);
12
12
  let result: string;
13
13
  if (saveOption.value)
14
- result = `${OCL.Molecule.fromSmiles(smiSS + '.' + smiAS).toMolfile()}\n\n$$$$\n`;
14
+ result = `${OCL.Molecule.fromSmiles(smiSS + '.' + smiAS).toMolfileV3()}\n\n$$$$\n`;
15
15
  else {
16
16
  result =
17
- `${OCL.Molecule.fromSmiles(smiSS).toMolfile()}\n` +
17
+ `${OCL.Molecule.fromSmiles(smiSS).toMolfileV3()}\n` +
18
18
  `> <Sequence>\nSense Strand\n\n$$$$\n` +
19
- `${OCL.Molecule.fromSmiles(smiAS).toMolfile()}\n` +
19
+ `${OCL.Molecule.fromSmiles(smiAS).toMolfileV3()}\n` +
20
20
  `> <Sequence>\nAnti Sense\n\n$$$$\n`;
21
21
  }
22
22
 
@@ -0,0 +1,236 @@
1
+ import {map, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS} from './map';
2
+ import {asoGapmersNucleotidesToBioSpring, asoGapmersNucleotidesToGcrs,
3
+ asoGapmersBioSpringToNucleotides, asoGapmersBioSpringToGcrs, asoGapmersGcrsToNucleotides,
4
+ asoGapmersGcrsToBioSpring, gcrsToMermade12, siRnaNucleotideToBioSpringSenseStrand,
5
+ siRnaNucleotideToAxolabsSenseStrand, siRnaNucleotidesToGcrs, siRnaBioSpringToNucleotides,
6
+ siRnaBioSpringToAxolabs, siRnaBioSpringToGcrs, siRnaAxolabsToNucleotides,
7
+ siRnaAxolabsToBioSpring, siRnaAxolabsToGcrs, siRnaGcrsToNucleotides,
8
+ siRnaGcrsToBioSpring, siRnaGcrsToAxolabs, gcrsToNucleotides} from './converters';
9
+
10
+ const noTranslationTableAvailable = 'No translation table available';
11
+ export const undefinedInputSequence = 'Type of input sequence is undefined';
12
+
13
+ export function isValidSequence(sequence: string): {
14
+ indexOfFirstNotValidCharacter: number,
15
+ expectedSynthesizer: string | null,
16
+ expectedTechnology: string | null
17
+ } {
18
+ const possibleSynthesizers = getListOfPossibleSynthesizersByFirstMatchedCode(sequence);
19
+ if (possibleSynthesizers.length == 0)
20
+ return {indexOfFirstNotValidCharacter: 0, expectedSynthesizer: null, expectedTechnology: null};
21
+
22
+ let outputIndices = Array(possibleSynthesizers.length).fill(0);
23
+
24
+ const firstUniqueCharacters = ['r', 'd'];
25
+ const nucleotides = ['A', 'U', 'T', 'C', 'G'];
26
+
27
+ possibleSynthesizers.forEach((synthesizer, synthesizerIndex) => {
28
+ const codes = getAllCodesOfSynthesizer(synthesizer);
29
+ while (outputIndices[synthesizerIndex] < sequence.length) {
30
+ const matchedCode = codes
31
+ .find((c) => c == sequence.slice(outputIndices[synthesizerIndex], outputIndices[synthesizerIndex] + c.length));
32
+
33
+ if (matchedCode == null)
34
+ break;
35
+
36
+ if ( // for mistake pattern 'rAA'
37
+ outputIndices[synthesizerIndex] > 1 &&
38
+ nucleotides.includes(sequence[outputIndices[synthesizerIndex]]) &&
39
+ firstUniqueCharacters.includes(sequence[outputIndices[synthesizerIndex] - 2])
40
+ ) break;
41
+
42
+ if ( // for mistake pattern 'ArA'
43
+ firstUniqueCharacters.includes(sequence[outputIndices[synthesizerIndex] + 1]) &&
44
+ nucleotides.includes(sequence[outputIndices[synthesizerIndex]])
45
+ ) {
46
+ outputIndices[synthesizerIndex]++;
47
+ break;
48
+ }
49
+
50
+ outputIndices[synthesizerIndex] += matchedCode.length;
51
+ }
52
+ });
53
+
54
+ const indexOfExpectedSythesizer = Math.max(...outputIndices);
55
+ const indexOfFirstNotValidCharacter = (indexOfExpectedSythesizer == sequence.length) ? -1 : indexOfExpectedSythesizer;
56
+ const expectedSynthesizer = possibleSynthesizers[outputIndices.indexOf(indexOfExpectedSythesizer)];
57
+ if (indexOfFirstNotValidCharacter != -1) {
58
+ return {
59
+ indexOfFirstNotValidCharacter: indexOfFirstNotValidCharacter,
60
+ expectedSynthesizer: expectedSynthesizer,
61
+ expectedTechnology: null,
62
+ };
63
+ }
64
+
65
+ const possibleTechnologies = getListOfPossibleTechnologiesByFirstMatchedCode(sequence, expectedSynthesizer);
66
+ if (possibleTechnologies.length == 0)
67
+ return {indexOfFirstNotValidCharacter: 0, expectedSynthesizer: null, expectedTechnology: null};
68
+
69
+ outputIndices = Array(possibleTechnologies.length).fill(0);
70
+
71
+ possibleTechnologies.forEach((technology: string, technologyIndex: number) => {
72
+ const codes = Object.keys(map[expectedSynthesizer][technology]);
73
+ while (outputIndices[technologyIndex] < sequence.length) {
74
+ const matchedCode = codes
75
+ .find((c) => c == sequence.slice(outputIndices[technologyIndex], outputIndices[technologyIndex] + c.length));
76
+
77
+ if (matchedCode == null)
78
+ break;
79
+
80
+ if ( // for mistake pattern 'rAA'
81
+ outputIndices[technologyIndex] > 1 &&
82
+ nucleotides.includes(sequence[outputIndices[technologyIndex]]) &&
83
+ firstUniqueCharacters.includes(sequence[outputIndices[technologyIndex] - 2])
84
+ ) break;
85
+
86
+ if ( // for mistake pattern 'ArA'
87
+ firstUniqueCharacters.includes(sequence[outputIndices[technologyIndex] + 1]) &&
88
+ nucleotides.includes(sequence[outputIndices[technologyIndex]])
89
+ ) {
90
+ outputIndices[technologyIndex]++;
91
+ break;
92
+ }
93
+
94
+ outputIndices[technologyIndex] += matchedCode.length;
95
+ }
96
+ });
97
+
98
+ const indexOfExpectedTechnology = Math.max(...outputIndices);
99
+ const expectedTechnology = possibleTechnologies[outputIndices.indexOf(indexOfExpectedTechnology)];
100
+
101
+ return {
102
+ indexOfFirstNotValidCharacter: indexOfFirstNotValidCharacter,
103
+ expectedSynthesizer: expectedSynthesizer,
104
+ expectedTechnology: expectedTechnology,
105
+ };
106
+ }
107
+
108
+ function getAllCodesOfSynthesizer(synthesizer: string): string[] {
109
+ let codes: string[] = [];
110
+ for (const technology of Object.keys(map[synthesizer]))
111
+ codes = codes.concat(Object.keys(map[synthesizer][technology]));
112
+ return codes.concat(Object.keys(MODIFICATIONS));
113
+ }
114
+
115
+ function getListOfPossibleSynthesizersByFirstMatchedCode(sequence: string): string[] {
116
+ const synthesizers: string[] = [];
117
+ Object.keys(map).forEach((synthesizer: string) => {
118
+ const codes = getAllCodesOfSynthesizer(synthesizer);
119
+ //TODO: get first non-dropdown code when there are two modifications
120
+ let start = 0;
121
+ for (let i = 0; i < sequence.length; i++) {
122
+ if (sequence[i] == ')' && i != sequence.length - 1) {
123
+ start = i + 1;
124
+ break;
125
+ }
126
+ }
127
+ if (codes.some((s: string) => s == sequence.slice(start, start + s.length)))
128
+ synthesizers.push(synthesizer);
129
+ });
130
+ return synthesizers;
131
+ }
132
+
133
+ function getListOfPossibleTechnologiesByFirstMatchedCode(sequence: string, synthesizer: string): string[] {
134
+ const technologies: string[] = [];
135
+ Object.keys(map[synthesizer]).forEach((technology: string) => {
136
+ const codes = Object.keys(map[synthesizer][technology]).concat(Object.keys(MODIFICATIONS));
137
+ if (codes.some((s) => s == sequence.slice(0, s.length)))
138
+ technologies.push(technology);
139
+ });
140
+ return technologies;
141
+ }
142
+
143
+ export function convertSequence(text: string) {
144
+ text = text.replace(/\s/g, '');
145
+ const seq = text;
146
+ const output = isValidSequence(seq);
147
+ if (output.indexOfFirstNotValidCharacter != -1) {
148
+ return {
149
+ // type: '',
150
+ indexOfFirstNotValidCharacter: JSON.stringify(output),
151
+ Error: undefinedInputSequence,
152
+ };
153
+ }
154
+ if (output.expectedSynthesizer == SYNTHESIZERS.RAW_NUCLEOTIDES && output.expectedTechnology == TECHNOLOGIES.DNA) {
155
+ return {
156
+ type: SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.DNA,
157
+ Nucleotides: seq,
158
+ BioSpring: asoGapmersNucleotidesToBioSpring(seq),
159
+ GCRS: asoGapmersNucleotidesToGcrs(seq),
160
+ };
161
+ }
162
+ if (output.expectedSynthesizer == SYNTHESIZERS.BIOSPRING && output.expectedTechnology == TECHNOLOGIES.ASO_GAPMERS) {
163
+ return {
164
+ type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.ASO_GAPMERS,
165
+ Nucleotides: asoGapmersBioSpringToNucleotides(seq),
166
+ BioSpring: seq,
167
+ GCRS: asoGapmersBioSpringToGcrs(seq),
168
+ };
169
+ }
170
+ if (output.expectedSynthesizer == SYNTHESIZERS.GCRS && output.expectedTechnology == TECHNOLOGIES.ASO_GAPMERS) {
171
+ return {
172
+ type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.ASO_GAPMERS,
173
+ Nucleotides: asoGapmersGcrsToNucleotides(seq),
174
+ BioSpring: asoGapmersGcrsToBioSpring(seq),
175
+ Mermade12: gcrsToMermade12(seq),
176
+ GCRS: seq,
177
+ };
178
+ }
179
+ if (output.expectedSynthesizer == SYNTHESIZERS.RAW_NUCLEOTIDES && output.expectedTechnology == TECHNOLOGIES.RNA) {
180
+ return {
181
+ type: SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.RNA,
182
+ Nucleotides: seq,
183
+ BioSpring: siRnaNucleotideToBioSpringSenseStrand(seq),
184
+ Axolabs: siRnaNucleotideToAxolabsSenseStrand(seq),
185
+ GCRS: siRnaNucleotidesToGcrs(seq),
186
+ };
187
+ }
188
+ if (output.expectedSynthesizer == SYNTHESIZERS.BIOSPRING && output.expectedTechnology == TECHNOLOGIES.SI_RNA) {
189
+ return {
190
+ type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.SI_RNA,
191
+ Nucleotides: siRnaBioSpringToNucleotides(seq),
192
+ BioSpring: seq,
193
+ Axolabs: siRnaBioSpringToAxolabs(seq),
194
+ GCRS: siRnaBioSpringToGcrs(seq),
195
+ };
196
+ }
197
+ if (output.expectedSynthesizer == SYNTHESIZERS.AXOLABS && output.expectedTechnology == TECHNOLOGIES.SI_RNA) {
198
+ return {
199
+ type: SYNTHESIZERS.AXOLABS + ' ' + TECHNOLOGIES.SI_RNA,
200
+ Nucleotides: siRnaAxolabsToNucleotides(seq),
201
+ BioSpring: siRnaAxolabsToBioSpring(seq),
202
+ Axolabs: seq,
203
+ GCRS: siRnaAxolabsToGcrs(seq),
204
+ };
205
+ }
206
+ if (output.expectedSynthesizer == SYNTHESIZERS.GCRS && output.expectedTechnology == TECHNOLOGIES.SI_RNA) {
207
+ return {
208
+ type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.SI_RNA,
209
+ Nucleotides: siRnaGcrsToNucleotides(seq),
210
+ BioSpring: siRnaGcrsToBioSpring(seq),
211
+ Axolabs: siRnaGcrsToAxolabs(seq),
212
+ MM12: gcrsToMermade12(seq),
213
+ GCRS: seq,
214
+ };
215
+ }
216
+ if (output.expectedSynthesizer == SYNTHESIZERS.GCRS) {
217
+ return {
218
+ type: SYNTHESIZERS.GCRS,
219
+ Nucleotides: gcrsToNucleotides(seq),
220
+ GCRS: seq,
221
+ Mermade12: gcrsToMermade12(seq),
222
+ };
223
+ }
224
+ if (output.expectedSynthesizer == SYNTHESIZERS.MERMADE_12) {
225
+ return {
226
+ type: SYNTHESIZERS.MERMADE_12,
227
+ Nucleotides: noTranslationTableAvailable,
228
+ GCRS: noTranslationTableAvailable,
229
+ Mermade12: seq,
230
+ };
231
+ }
232
+ return {
233
+ type: undefinedInputSequence,
234
+ Nucleotides: undefinedInputSequence,
235
+ };
236
+ }
@@ -1,5 +1,5 @@
1
1
  import {category, expect, test} from '@datagrok-libraries/utils/src/test';
2
- import {sequenceToSmiles} from '../package';
2
+ import {sequenceToSmiles} from '../structures-works/from-monomers';
3
3
 
4
4
  category('sequence-translator', () => {
5
5
  test('AGGTCCTCTTGACTTAGGCC', async () => {