@datagrok/sequence-translator 0.0.4 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +39 -0
- package/detectors.js +2 -12
- package/package.json +7 -3
- package/src/axolabsMap.ts +101 -99
- package/src/defineAxolabsPattern.ts +240 -211
- package/src/drawAxolabsPattern.ts +127 -92
- package/src/package-test.ts +6 -7
- package/src/package.ts +301 -604
- package/src/salts.ts +2 -0
- package/src/structures-works/converters.ts +288 -0
- package/src/structures-works/from-monomers.ts +73 -0
- package/src/structures-works/map.ts +540 -0
- package/src/structures-works/save-sense-antisense.ts +44 -0
- package/src/structures-works/sequence-codes-tools.ts +236 -0
- package/src/tests/smiles-tests.ts +448 -7
- package/src/map.ts +0 -534
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import {map, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS} from './map';
|
|
2
|
+
import {asoGapmersNucleotidesToBioSpring, asoGapmersNucleotidesToGcrs,
|
|
3
|
+
asoGapmersBioSpringToNucleotides, asoGapmersBioSpringToGcrs, asoGapmersGcrsToNucleotides,
|
|
4
|
+
asoGapmersGcrsToBioSpring, gcrsToMermade12, siRnaNucleotideToBioSpringSenseStrand,
|
|
5
|
+
siRnaNucleotideToAxolabsSenseStrand, siRnaNucleotidesToGcrs, siRnaBioSpringToNucleotides,
|
|
6
|
+
siRnaBioSpringToAxolabs, siRnaBioSpringToGcrs, siRnaAxolabsToNucleotides,
|
|
7
|
+
siRnaAxolabsToBioSpring, siRnaAxolabsToGcrs, siRnaGcrsToNucleotides,
|
|
8
|
+
siRnaGcrsToBioSpring, siRnaGcrsToAxolabs, gcrsToNucleotides} from './converters';
|
|
9
|
+
|
|
10
|
+
const noTranslationTableAvailable = 'No translation table available';
|
|
11
|
+
export const undefinedInputSequence = 'Type of input sequence is undefined';
|
|
12
|
+
|
|
13
|
+
export function isValidSequence(sequence: string): {
|
|
14
|
+
indexOfFirstNotValidCharacter: number,
|
|
15
|
+
expectedSynthesizer: string | null,
|
|
16
|
+
expectedTechnology: string | null
|
|
17
|
+
} {
|
|
18
|
+
const possibleSynthesizers = getListOfPossibleSynthesizersByFirstMatchedCode(sequence);
|
|
19
|
+
if (possibleSynthesizers.length == 0)
|
|
20
|
+
return {indexOfFirstNotValidCharacter: 0, expectedSynthesizer: null, expectedTechnology: null};
|
|
21
|
+
|
|
22
|
+
let outputIndices = Array(possibleSynthesizers.length).fill(0);
|
|
23
|
+
|
|
24
|
+
const firstUniqueCharacters = ['r', 'd'];
|
|
25
|
+
const nucleotides = ['A', 'U', 'T', 'C', 'G'];
|
|
26
|
+
|
|
27
|
+
possibleSynthesizers.forEach((synthesizer, synthesizerIndex) => {
|
|
28
|
+
const codes = getAllCodesOfSynthesizer(synthesizer);
|
|
29
|
+
while (outputIndices[synthesizerIndex] < sequence.length) {
|
|
30
|
+
const matchedCode = codes
|
|
31
|
+
.find((c) => c == sequence.slice(outputIndices[synthesizerIndex], outputIndices[synthesizerIndex] + c.length));
|
|
32
|
+
|
|
33
|
+
if (matchedCode == null)
|
|
34
|
+
break;
|
|
35
|
+
|
|
36
|
+
if ( // for mistake pattern 'rAA'
|
|
37
|
+
outputIndices[synthesizerIndex] > 1 &&
|
|
38
|
+
nucleotides.includes(sequence[outputIndices[synthesizerIndex]]) &&
|
|
39
|
+
firstUniqueCharacters.includes(sequence[outputIndices[synthesizerIndex] - 2])
|
|
40
|
+
) break;
|
|
41
|
+
|
|
42
|
+
if ( // for mistake pattern 'ArA'
|
|
43
|
+
firstUniqueCharacters.includes(sequence[outputIndices[synthesizerIndex] + 1]) &&
|
|
44
|
+
nucleotides.includes(sequence[outputIndices[synthesizerIndex]])
|
|
45
|
+
) {
|
|
46
|
+
outputIndices[synthesizerIndex]++;
|
|
47
|
+
break;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
outputIndices[synthesizerIndex] += matchedCode.length;
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
const indexOfExpectedSythesizer = Math.max(...outputIndices);
|
|
55
|
+
const indexOfFirstNotValidCharacter = (indexOfExpectedSythesizer == sequence.length) ? -1 : indexOfExpectedSythesizer;
|
|
56
|
+
const expectedSynthesizer = possibleSynthesizers[outputIndices.indexOf(indexOfExpectedSythesizer)];
|
|
57
|
+
if (indexOfFirstNotValidCharacter != -1) {
|
|
58
|
+
return {
|
|
59
|
+
indexOfFirstNotValidCharacter: indexOfFirstNotValidCharacter,
|
|
60
|
+
expectedSynthesizer: expectedSynthesizer,
|
|
61
|
+
expectedTechnology: null,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const possibleTechnologies = getListOfPossibleTechnologiesByFirstMatchedCode(sequence, expectedSynthesizer);
|
|
66
|
+
if (possibleTechnologies.length == 0)
|
|
67
|
+
return {indexOfFirstNotValidCharacter: 0, expectedSynthesizer: null, expectedTechnology: null};
|
|
68
|
+
|
|
69
|
+
outputIndices = Array(possibleTechnologies.length).fill(0);
|
|
70
|
+
|
|
71
|
+
possibleTechnologies.forEach((technology: string, technologyIndex: number) => {
|
|
72
|
+
const codes = Object.keys(map[expectedSynthesizer][technology]);
|
|
73
|
+
while (outputIndices[technologyIndex] < sequence.length) {
|
|
74
|
+
const matchedCode = codes
|
|
75
|
+
.find((c) => c == sequence.slice(outputIndices[technologyIndex], outputIndices[technologyIndex] + c.length));
|
|
76
|
+
|
|
77
|
+
if (matchedCode == null)
|
|
78
|
+
break;
|
|
79
|
+
|
|
80
|
+
if ( // for mistake pattern 'rAA'
|
|
81
|
+
outputIndices[technologyIndex] > 1 &&
|
|
82
|
+
nucleotides.includes(sequence[outputIndices[technologyIndex]]) &&
|
|
83
|
+
firstUniqueCharacters.includes(sequence[outputIndices[technologyIndex] - 2])
|
|
84
|
+
) break;
|
|
85
|
+
|
|
86
|
+
if ( // for mistake pattern 'ArA'
|
|
87
|
+
firstUniqueCharacters.includes(sequence[outputIndices[technologyIndex] + 1]) &&
|
|
88
|
+
nucleotides.includes(sequence[outputIndices[technologyIndex]])
|
|
89
|
+
) {
|
|
90
|
+
outputIndices[technologyIndex]++;
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
outputIndices[technologyIndex] += matchedCode.length;
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
const indexOfExpectedTechnology = Math.max(...outputIndices);
|
|
99
|
+
const expectedTechnology = possibleTechnologies[outputIndices.indexOf(indexOfExpectedTechnology)];
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
indexOfFirstNotValidCharacter: indexOfFirstNotValidCharacter,
|
|
103
|
+
expectedSynthesizer: expectedSynthesizer,
|
|
104
|
+
expectedTechnology: expectedTechnology,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function getAllCodesOfSynthesizer(synthesizer: string): string[] {
|
|
109
|
+
let codes: string[] = [];
|
|
110
|
+
for (const technology of Object.keys(map[synthesizer]))
|
|
111
|
+
codes = codes.concat(Object.keys(map[synthesizer][technology]));
|
|
112
|
+
return codes.concat(Object.keys(MODIFICATIONS));
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function getListOfPossibleSynthesizersByFirstMatchedCode(sequence: string): string[] {
|
|
116
|
+
const synthesizers: string[] = [];
|
|
117
|
+
Object.keys(map).forEach((synthesizer: string) => {
|
|
118
|
+
const codes = getAllCodesOfSynthesizer(synthesizer);
|
|
119
|
+
//TODO: get first non-dropdown code when there are two modifications
|
|
120
|
+
let start = 0;
|
|
121
|
+
for (let i = 0; i < sequence.length; i++) {
|
|
122
|
+
if (sequence[i] == ')' && i != sequence.length - 1) {
|
|
123
|
+
start = i + 1;
|
|
124
|
+
break;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (codes.some((s: string) => s == sequence.slice(start, start + s.length)))
|
|
128
|
+
synthesizers.push(synthesizer);
|
|
129
|
+
});
|
|
130
|
+
return synthesizers;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function getListOfPossibleTechnologiesByFirstMatchedCode(sequence: string, synthesizer: string): string[] {
|
|
134
|
+
const technologies: string[] = [];
|
|
135
|
+
Object.keys(map[synthesizer]).forEach((technology: string) => {
|
|
136
|
+
const codes = Object.keys(map[synthesizer][technology]).concat(Object.keys(MODIFICATIONS));
|
|
137
|
+
if (codes.some((s) => s == sequence.slice(0, s.length)))
|
|
138
|
+
technologies.push(technology);
|
|
139
|
+
});
|
|
140
|
+
return technologies;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export function convertSequence(text: string) {
|
|
144
|
+
text = text.replace(/\s/g, '');
|
|
145
|
+
const seq = text;
|
|
146
|
+
const output = isValidSequence(seq);
|
|
147
|
+
if (output.indexOfFirstNotValidCharacter != -1) {
|
|
148
|
+
return {
|
|
149
|
+
// type: '',
|
|
150
|
+
indexOfFirstNotValidCharacter: JSON.stringify(output),
|
|
151
|
+
Error: undefinedInputSequence,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
if (output.expectedSynthesizer == SYNTHESIZERS.RAW_NUCLEOTIDES && output.expectedTechnology == TECHNOLOGIES.DNA) {
|
|
155
|
+
return {
|
|
156
|
+
type: SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.DNA,
|
|
157
|
+
Nucleotides: seq,
|
|
158
|
+
BioSpring: asoGapmersNucleotidesToBioSpring(seq),
|
|
159
|
+
GCRS: asoGapmersNucleotidesToGcrs(seq),
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
if (output.expectedSynthesizer == SYNTHESIZERS.BIOSPRING && output.expectedTechnology == TECHNOLOGIES.ASO_GAPMERS) {
|
|
163
|
+
return {
|
|
164
|
+
type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.ASO_GAPMERS,
|
|
165
|
+
Nucleotides: asoGapmersBioSpringToNucleotides(seq),
|
|
166
|
+
BioSpring: seq,
|
|
167
|
+
GCRS: asoGapmersBioSpringToGcrs(seq),
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
if (output.expectedSynthesizer == SYNTHESIZERS.GCRS && output.expectedTechnology == TECHNOLOGIES.ASO_GAPMERS) {
|
|
171
|
+
return {
|
|
172
|
+
type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.ASO_GAPMERS,
|
|
173
|
+
Nucleotides: asoGapmersGcrsToNucleotides(seq),
|
|
174
|
+
BioSpring: asoGapmersGcrsToBioSpring(seq),
|
|
175
|
+
Mermade12: gcrsToMermade12(seq),
|
|
176
|
+
GCRS: seq,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
if (output.expectedSynthesizer == SYNTHESIZERS.RAW_NUCLEOTIDES && output.expectedTechnology == TECHNOLOGIES.RNA) {
|
|
180
|
+
return {
|
|
181
|
+
type: SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.RNA,
|
|
182
|
+
Nucleotides: seq,
|
|
183
|
+
BioSpring: siRnaNucleotideToBioSpringSenseStrand(seq),
|
|
184
|
+
Axolabs: siRnaNucleotideToAxolabsSenseStrand(seq),
|
|
185
|
+
GCRS: siRnaNucleotidesToGcrs(seq),
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
if (output.expectedSynthesizer == SYNTHESIZERS.BIOSPRING && output.expectedTechnology == TECHNOLOGIES.SI_RNA) {
|
|
189
|
+
return {
|
|
190
|
+
type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.SI_RNA,
|
|
191
|
+
Nucleotides: siRnaBioSpringToNucleotides(seq),
|
|
192
|
+
BioSpring: seq,
|
|
193
|
+
Axolabs: siRnaBioSpringToAxolabs(seq),
|
|
194
|
+
GCRS: siRnaBioSpringToGcrs(seq),
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
if (output.expectedSynthesizer == SYNTHESIZERS.AXOLABS && output.expectedTechnology == TECHNOLOGIES.SI_RNA) {
|
|
198
|
+
return {
|
|
199
|
+
type: SYNTHESIZERS.AXOLABS + ' ' + TECHNOLOGIES.SI_RNA,
|
|
200
|
+
Nucleotides: siRnaAxolabsToNucleotides(seq),
|
|
201
|
+
BioSpring: siRnaAxolabsToBioSpring(seq),
|
|
202
|
+
Axolabs: seq,
|
|
203
|
+
GCRS: siRnaAxolabsToGcrs(seq),
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
if (output.expectedSynthesizer == SYNTHESIZERS.GCRS && output.expectedTechnology == TECHNOLOGIES.SI_RNA) {
|
|
207
|
+
return {
|
|
208
|
+
type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.SI_RNA,
|
|
209
|
+
Nucleotides: siRnaGcrsToNucleotides(seq),
|
|
210
|
+
BioSpring: siRnaGcrsToBioSpring(seq),
|
|
211
|
+
Axolabs: siRnaGcrsToAxolabs(seq),
|
|
212
|
+
MM12: gcrsToMermade12(seq),
|
|
213
|
+
GCRS: seq,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
if (output.expectedSynthesizer == SYNTHESIZERS.GCRS) {
|
|
217
|
+
return {
|
|
218
|
+
type: SYNTHESIZERS.GCRS,
|
|
219
|
+
Nucleotides: gcrsToNucleotides(seq),
|
|
220
|
+
GCRS: seq,
|
|
221
|
+
Mermade12: gcrsToMermade12(seq),
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
if (output.expectedSynthesizer == SYNTHESIZERS.MERMADE_12) {
|
|
225
|
+
return {
|
|
226
|
+
type: SYNTHESIZERS.MERMADE_12,
|
|
227
|
+
Nucleotides: noTranslationTableAvailable,
|
|
228
|
+
GCRS: noTranslationTableAvailable,
|
|
229
|
+
Mermade12: seq,
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
return {
|
|
233
|
+
type: undefinedInputSequence,
|
|
234
|
+
Nucleotides: undefinedInputSequence,
|
|
235
|
+
};
|
|
236
|
+
}
|