@datagrok/sequence-translator 1.0.17 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +4 -3
- package/CHANGELOG.md +36 -0
- package/detectors.js +8 -0
- package/dist/package-test.js +2 -73079
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -72284
- package/dist/package.js.map +1 -0
- package/files/axolabs-style.json +97 -0
- package/files/codes-to-symbols.json +67 -0
- package/files/formats-to-helm.json +63 -0
- package/files/linkers.json +22 -0
- package/files/monomer-lib.json +1142 -0
- package/link-bio +7 -0
- package/package.json +30 -31
- package/scripts/build-monomer-lib.py +391 -122
- package/src/demo/demo-st-ui.ts +71 -0
- package/src/demo/handle-error.ts +12 -0
- package/src/model/axolabs/axolabs-tab.ts +111 -0
- package/src/model/axolabs/const.ts +33 -0
- package/src/{axolabs-tab → model/axolabs}/draw-svg.ts +1 -1
- package/src/{axolabs-tab → model/axolabs}/helpers.ts +7 -5
- package/src/model/const.ts +18 -0
- package/src/model/data-loading-utils/const.ts +8 -0
- package/src/model/data-loading-utils/json-loader.ts +38 -0
- package/src/model/data-loading-utils/types.ts +30 -0
- package/src/model/format-translation/const.ts +8 -0
- package/src/model/format-translation/conversion-utils.ts +49 -0
- package/src/model/format-translation/format-converter.ts +109 -0
- package/src/model/helpers.ts +12 -0
- package/src/model/monomer-lib/const.ts +3 -0
- package/src/model/monomer-lib/lib-wrapper.ts +119 -0
- package/src/model/parsing-validation/format-detector.ts +57 -0
- package/src/model/parsing-validation/sequence-validator.ts +52 -0
- package/src/model/sequence-to-structure-utils/const.ts +1 -0
- package/src/{utils/structures-works → model/sequence-to-structure-utils}/mol-transformations.ts +33 -41
- package/src/model/sequence-to-structure-utils/monomer-code-parser.ts +92 -0
- package/src/model/sequence-to-structure-utils/sdf-tab.ts +97 -0
- package/src/model/sequence-to-structure-utils/sequence-to-molfile.ts +409 -0
- package/src/package-test.ts +3 -1
- package/src/package.ts +113 -91
- package/src/tests/const.ts +24 -0
- package/src/tests/formats-support.ts +40 -0
- package/src/tests/formats-to-helm.ts +53 -0
- package/src/tests/helm-to-nucleotides.ts +28 -0
- package/src/view/const/main-tab.ts +3 -0
- package/src/view/const/view.ts +10 -0
- package/src/view/css/axolabs-tab.css +1 -0
- package/src/view/css/colored-text-input.css +27 -0
- package/src/view/css/main-tab.css +46 -0
- package/src/view/css/sdf-tab.css +39 -0
- package/src/view/monomer-lib-viewer/viewer.ts +22 -0
- package/src/view/tabs/axolabs.ts +719 -0
- package/src/view/tabs/main.ts +174 -0
- package/src/view/tabs/sdf.ts +193 -0
- package/src/view/utils/app-info-dialog.ts +18 -0
- package/src/view/utils/colored-input/colored-text-input.ts +56 -0
- package/src/view/utils/colored-input/input-painters.ts +44 -0
- package/src/view/utils/draw-molecule.ts +86 -0
- package/src/view/utils/molecule-img.ts +106 -0
- package/src/view/view.ts +127 -0
- package/tsconfig.json +12 -18
- package/webpack.config.js +17 -4
- package/README.md +0 -84
- package/css/style.css +0 -18
- package/img/Sequence Translator Axolabs.png +0 -0
- package/jest.config.js +0 -33
- package/setup-unlink-clean.cmd +0 -14
- package/setup-unlink-clean.sh +0 -21
- package/setup.cmd +0 -14
- package/setup.sh +0 -37
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -97
- package/src/apps/oligo-sd-file-app.ts +0 -58
- package/src/autostart/calculations.ts +0 -40
- package/src/autostart/constants.ts +0 -37
- package/src/autostart/registration.ts +0 -306
- package/src/axolabs-tab/axolabs-tab.ts +0 -873
- package/src/axolabs-tab/define-pattern.ts +0 -874
- package/src/hardcode-to-be-eliminated/ICDs.ts +0 -3
- package/src/hardcode-to-be-eliminated/IDPs.ts +0 -3
- package/src/hardcode-to-be-eliminated/const.ts +0 -5
- package/src/hardcode-to-be-eliminated/constants.ts +0 -101
- package/src/hardcode-to-be-eliminated/converters.ts +0 -323
- package/src/hardcode-to-be-eliminated/map.ts +0 -720
- package/src/hardcode-to-be-eliminated/salts.ts +0 -2
- package/src/hardcode-to-be-eliminated/sources.ts +0 -3
- package/src/hardcode-to-be-eliminated/users.ts +0 -3
- package/src/main-tab/main-tab.ts +0 -210
- package/src/sdf-tab/sdf-tab.ts +0 -163
- package/src/sdf-tab/sequence-codes-tools.ts +0 -347
- package/src/tests/smiles-tests.ts +0 -458
- package/src/utils/const.ts +0 -0
- package/src/utils/helpers.ts +0 -28
- package/src/utils/parse.ts +0 -27
- package/src/utils/sdf-add-columns.ts +0 -118
- package/src/utils/sdf-save-table.ts +0 -56
- package/src/utils/structures-works/draw-molecule.ts +0 -84
- package/src/utils/structures-works/from-monomers.ts +0 -266
- package/test-SequenceTranslator-6288c2fbe346-695b7b55.html +0 -259
- package/vendors/openchemlib-full.js +0 -293
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {MonomerSequenceParser} from './monomer-code-parser';
|
|
7
|
+
import {MonomerLibWrapper} from '../monomer-lib/lib-wrapper';
|
|
8
|
+
|
|
9
|
+
export class SequenceToMolfileConverter {
|
|
10
|
+
constructor(
|
|
11
|
+
sequence: string, invert: boolean = false, format: string
|
|
12
|
+
) {
|
|
13
|
+
this.lib = MonomerLibWrapper.getInstance();
|
|
14
|
+
const codeToSymbolMap = this.lib.getCodeToSymbolMap(format);
|
|
15
|
+
this.parser = new MonomerSequenceParser(sequence, invert, codeToSymbolMap);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
private parser: MonomerSequenceParser;
|
|
19
|
+
private lib: MonomerLibWrapper;
|
|
20
|
+
|
|
21
|
+
convert(): string {
|
|
22
|
+
const parsedSequence = this.parser.parseSequence();
|
|
23
|
+
const monomerMolfiles: string[] = [];
|
|
24
|
+
parsedSequence.forEach((monomerSymbol, idx) => {
|
|
25
|
+
const monomerMolfile = this.getMonomerMolfile(monomerSymbol, idx);
|
|
26
|
+
monomerMolfiles.push(monomerMolfile);
|
|
27
|
+
})
|
|
28
|
+
return this.getPolymerMolfile(monomerMolfiles);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
private getMonomerMolfile(monomerSymbol: string, idx: number): string {
|
|
32
|
+
const molBlock = this.lib.getMolfileBySymbol(monomerSymbol);
|
|
33
|
+
if (this.lib.isModification(monomerSymbol))
|
|
34
|
+
return (idx === 0) ? this.reflect(molBlock) : molBlock;
|
|
35
|
+
else
|
|
36
|
+
return this.rotateNucleotidesV3000(molBlock);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
private getPolymerMolfile(monomerMolfiles: string[]) {
|
|
40
|
+
return this.linkV3000(monomerMolfiles);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
private reflect(molBlock: string): string {
|
|
44
|
+
const coordinates = this.extractAtomDataV3000(molBlock);
|
|
45
|
+
const natom = coordinates.atomIndex.length;
|
|
46
|
+
|
|
47
|
+
const indexFivePrime = coordinates.atomIndex.indexOf(1);
|
|
48
|
+
const indexThreePrime = coordinates.atomIndex.indexOf(natom);
|
|
49
|
+
|
|
50
|
+
const xCenter = (coordinates.x[indexThreePrime] + coordinates.x[indexFivePrime]) / 2;
|
|
51
|
+
const yCenter = (coordinates.y[indexThreePrime] + coordinates.y[indexFivePrime]) / 2;
|
|
52
|
+
|
|
53
|
+
//place to center
|
|
54
|
+
for (let i = 0; i < natom; i++) {
|
|
55
|
+
coordinates.x[i] -= xCenter;
|
|
56
|
+
coordinates.y[i] -= yCenter;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
//place to center
|
|
60
|
+
for (let i = 0; i < natom; i++)
|
|
61
|
+
coordinates.x[i] = -coordinates.x[i];
|
|
62
|
+
|
|
63
|
+
//place to right
|
|
64
|
+
const xShift = coordinates.x[indexFivePrime];
|
|
65
|
+
for (let i = 0; i < natom; i++)
|
|
66
|
+
coordinates.x[i] -= xShift;
|
|
67
|
+
|
|
68
|
+
//rewrite molBlock
|
|
69
|
+
let index = molBlock.indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
70
|
+
index = molBlock.indexOf('\n', index);
|
|
71
|
+
let indexEnd = index;
|
|
72
|
+
for (let i = 0; i < natom; i++) {
|
|
73
|
+
index = molBlock.indexOf('V30', index) + 4;
|
|
74
|
+
index = molBlock.indexOf(' ', index) + 1;
|
|
75
|
+
index = molBlock.indexOf(' ', index) + 1;
|
|
76
|
+
indexEnd = molBlock.indexOf(' ', index) + 1;
|
|
77
|
+
indexEnd = molBlock.indexOf(' ', indexEnd);
|
|
78
|
+
|
|
79
|
+
molBlock = molBlock.slice(0, index) +
|
|
80
|
+
coordinates.x[i] + ' ' + coordinates.y[i] +
|
|
81
|
+
molBlock.slice(indexEnd);
|
|
82
|
+
|
|
83
|
+
index = molBlock.indexOf('\n', index) + 1;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return molBlock;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
private extractAtomDataV3000(molBlock: string) {
|
|
90
|
+
const numbers = this.extractAtomsBondsNumbersV3000(molBlock);
|
|
91
|
+
let index = molBlock.indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
92
|
+
index = molBlock.indexOf('\n', index);
|
|
93
|
+
let indexEnd = index;
|
|
94
|
+
|
|
95
|
+
const indexes: number[] = Array(numbers.natom);
|
|
96
|
+
const types: string[] = Array(numbers.natom);
|
|
97
|
+
const x: number[] = Array(numbers.natom);
|
|
98
|
+
const y: number[] = Array(numbers.natom);
|
|
99
|
+
|
|
100
|
+
for (let i = 0; i < numbers.natom; i++) {
|
|
101
|
+
index = molBlock.indexOf('V30', index) + 4;
|
|
102
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
103
|
+
indexes[i] = parseInt(molBlock.substring(index, indexEnd));
|
|
104
|
+
|
|
105
|
+
index = indexEnd + 1;
|
|
106
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
107
|
+
types[i] = molBlock.substring(index, indexEnd);
|
|
108
|
+
|
|
109
|
+
index = indexEnd + 1;
|
|
110
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
111
|
+
x[i] = parseFloat(molBlock.substring(index, indexEnd));
|
|
112
|
+
|
|
113
|
+
index = indexEnd + 1;
|
|
114
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
115
|
+
y[i] = parseFloat(molBlock.substring(index, indexEnd));
|
|
116
|
+
|
|
117
|
+
index = molBlock.indexOf('\n', index) + 1;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return {atomIndex: indexes, atomType: types, x: x, y: y};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
private extractAtomsBondsNumbersV3000(molBlock: string): { natom: number, nbond: number } {
|
|
124
|
+
molBlock = molBlock.replaceAll('\r', ''); //equalize old and new sdf standards
|
|
125
|
+
let index = molBlock.indexOf('COUNTS') + 7; // V3000 index for atoms and bonds number
|
|
126
|
+
let indexEnd = molBlock.indexOf(' ', index);
|
|
127
|
+
|
|
128
|
+
const atomsNumber = parseInt(molBlock.substring(index, indexEnd));
|
|
129
|
+
index = indexEnd + 1;
|
|
130
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
131
|
+
const bondsNumber = parseInt(molBlock.substring(index, indexEnd));
|
|
132
|
+
|
|
133
|
+
return {natom: atomsNumber, nbond: bondsNumber};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
private rotateNucleotidesV3000(molBlock: string): string {
|
|
137
|
+
const coordinates = this.extractAtomDataV3000(molBlock);
|
|
138
|
+
const natom = coordinates.atomIndex.length;
|
|
139
|
+
|
|
140
|
+
const indexFivePrime = coordinates.atomIndex.indexOf(1);
|
|
141
|
+
const indexThreePrime = coordinates.atomIndex.indexOf(natom);
|
|
142
|
+
|
|
143
|
+
//fix 5 prime if inadequate
|
|
144
|
+
if (natom > 8)
|
|
145
|
+
this.fix5Prime(coordinates, indexFivePrime, indexThreePrime);
|
|
146
|
+
|
|
147
|
+
const xCenter = (coordinates.x[indexThreePrime] + coordinates.x[indexFivePrime]) / 2;
|
|
148
|
+
const yCenter = (coordinates.y[indexThreePrime] + coordinates.y[indexFivePrime]) / 2;
|
|
149
|
+
|
|
150
|
+
//place to center
|
|
151
|
+
for (let i = 0; i < natom; i++) {
|
|
152
|
+
coordinates.x[i] -= xCenter;
|
|
153
|
+
coordinates.y[i] -= yCenter;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
let angle = 0;
|
|
157
|
+
if (coordinates.x[indexFivePrime] === 0) {
|
|
158
|
+
angle = coordinates.y[indexFivePrime] > coordinates.y[indexThreePrime] ? Math.PI / 2 : 3 * Math.PI / 2;
|
|
159
|
+
} else if (coordinates.y[indexFivePrime] === 0) {
|
|
160
|
+
angle = coordinates.x[indexFivePrime] > coordinates.x[indexThreePrime] ? Math.PI : 0;
|
|
161
|
+
} else {
|
|
162
|
+
const derivative = coordinates.y[indexFivePrime] / coordinates.x[indexFivePrime];
|
|
163
|
+
angle = derivative > 0 ?
|
|
164
|
+
(coordinates.x[indexFivePrime] > 0 ? Math.PI - Math.atan(derivative) : Math.PI * 2 - Math.atan(derivative)) :
|
|
165
|
+
(coordinates.x[indexFivePrime] > 0 ? -Math.PI - Math.atan(derivative) : Math.atan(derivative));
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const cos = Math.cos(angle);
|
|
169
|
+
const sin = Math.sin(angle);
|
|
170
|
+
|
|
171
|
+
for (let i = 0; i < natom; i++) {
|
|
172
|
+
const xAdd = coordinates.x[i];
|
|
173
|
+
coordinates.x[i] = xAdd * cos - coordinates.y[i] * sin;
|
|
174
|
+
coordinates.y[i] = xAdd * sin + coordinates.y[i] * cos;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
//place to right
|
|
178
|
+
const xShift = coordinates.x[indexFivePrime];
|
|
179
|
+
for (let i = 0; i < natom; i++)
|
|
180
|
+
coordinates.x[i] -= xShift;
|
|
181
|
+
|
|
182
|
+
//rewrite molBlock
|
|
183
|
+
let index = molBlock.indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
184
|
+
index = molBlock.indexOf('\n', index);
|
|
185
|
+
let indexEnd = index;
|
|
186
|
+
for (let i = 0; i < natom; i++) {
|
|
187
|
+
index = molBlock.indexOf('V30', index) + 4;
|
|
188
|
+
index = molBlock.indexOf(' ', index) + 1;
|
|
189
|
+
index = molBlock.indexOf(' ', index) + 1;
|
|
190
|
+
indexEnd = molBlock.indexOf(' ', index) + 1;
|
|
191
|
+
indexEnd = molBlock.indexOf(' ', indexEnd);
|
|
192
|
+
|
|
193
|
+
molBlock = molBlock.slice(0, index) +
|
|
194
|
+
coordinates.x[i] + ' ' + coordinates.y[i] +
|
|
195
|
+
molBlock.slice(indexEnd);
|
|
196
|
+
|
|
197
|
+
index = molBlock.indexOf('\n', index) + 1;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return molBlock;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
private linkV3000(molBlocks: string[], useChirality: boolean = true): string {
|
|
205
|
+
let macroMolBlock = '\nDatagrok macromolecule handler\n\n';
|
|
206
|
+
macroMolBlock += ' 0 0 0 0 0 0 999 V3000\n';
|
|
207
|
+
macroMolBlock += 'M V30 BEGIN CTAB\n';
|
|
208
|
+
let atomBlock = '';
|
|
209
|
+
let bondBlock = '';
|
|
210
|
+
let collectionBlock = '';
|
|
211
|
+
const collection: number [] = [];
|
|
212
|
+
let natom = 0;
|
|
213
|
+
let nbond = 0;
|
|
214
|
+
let xShift = 0;
|
|
215
|
+
|
|
216
|
+
for (let i = 0; i < molBlocks.length; i++) {
|
|
217
|
+
const isBoundary = molBlocks[i].includes('MODIFICATION') && i === 0;
|
|
218
|
+
let specLength = 0;
|
|
219
|
+
if (isBoundary) {
|
|
220
|
+
const coordinates = this.extractAtomDataV3000(molBlocks[i]);
|
|
221
|
+
specLength = coordinates.atomIndex.length;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
molBlocks[i] = molBlocks[i].replaceAll('(-\nM V30 ', '(')
|
|
226
|
+
.replaceAll('-\nM V30 ', '').replaceAll(' )', ')');
|
|
227
|
+
const numbers = this.extractAtomsBondsNumbersV3000(molBlocks[i]);
|
|
228
|
+
const coordinates = this.extractAtomDataV3000(molBlocks[i]);
|
|
229
|
+
|
|
230
|
+
let indexAtoms = molBlocks[i].indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
231
|
+
indexAtoms = molBlocks[i].indexOf('\n', indexAtoms);
|
|
232
|
+
let index = indexAtoms;
|
|
233
|
+
let indexEnd = indexAtoms;
|
|
234
|
+
|
|
235
|
+
for (let j = 0; j < numbers.natom; j++) {
|
|
236
|
+
if (coordinates.atomIndex[j] !== 1 || i === 0) {
|
|
237
|
+
//rewrite atom number
|
|
238
|
+
index = molBlocks[i].indexOf('V30', index) + 4;
|
|
239
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
240
|
+
let atomNumber = 0;
|
|
241
|
+
if (isBoundary) {
|
|
242
|
+
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd));
|
|
243
|
+
if (atomNumber === 1)
|
|
244
|
+
atomNumber = specLength;
|
|
245
|
+
else if (atomNumber === specLength)
|
|
246
|
+
atomNumber = 1;
|
|
247
|
+
atomNumber += natom;
|
|
248
|
+
} else {
|
|
249
|
+
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
|
|
250
|
+
}
|
|
251
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
|
|
252
|
+
|
|
253
|
+
//rewrite coordinates
|
|
254
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
255
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
256
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
257
|
+
|
|
258
|
+
const totalShift = xShift - coordinates.x[0];
|
|
259
|
+
let coordinate =
|
|
260
|
+
Math.round(10000 * (parseFloat(molBlocks[i].substring(index, indexEnd)) + totalShift)) / 10000;
|
|
261
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + coordinate + molBlocks[i].slice(indexEnd);
|
|
262
|
+
|
|
263
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
264
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
265
|
+
coordinate =
|
|
266
|
+
Math.round(10000 * (parseFloat(molBlocks[i].substring(index, indexEnd)))) / 10000;
|
|
267
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + coordinate + molBlocks[i].slice(indexEnd);
|
|
268
|
+
|
|
269
|
+
index = molBlocks[i].indexOf('\n', index) + 1;
|
|
270
|
+
} else {
|
|
271
|
+
index = molBlocks[i].indexOf('M V30', index) - 1;
|
|
272
|
+
indexEnd = molBlocks[i].indexOf('\n', index + 1);
|
|
273
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + molBlocks[i].slice(indexEnd);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const indexAtomsEnd = molBlocks[i].indexOf('M V30 END ATOM');
|
|
278
|
+
atomBlock += molBlocks[i].substring(indexAtoms + 1, indexAtomsEnd);
|
|
279
|
+
|
|
280
|
+
let indexBonds = molBlocks[i].indexOf('M V30 BEGIN BOND'); // V3000 index for bonds
|
|
281
|
+
indexBonds = molBlocks[i].indexOf('\n', indexBonds);
|
|
282
|
+
index = indexBonds;
|
|
283
|
+
indexEnd = indexBonds;
|
|
284
|
+
|
|
285
|
+
for (let j = 0; j < numbers.nbond; j++) {
|
|
286
|
+
//rewrite bond number
|
|
287
|
+
index = molBlocks[i].indexOf('V30', index) + 4;
|
|
288
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
289
|
+
const bondNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + nbond;
|
|
290
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + bondNumber + molBlocks[i].slice(indexEnd);
|
|
291
|
+
|
|
292
|
+
//rewrite atom pair in bond
|
|
293
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
294
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
295
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
296
|
+
let atomNumber = 0;
|
|
297
|
+
if (isBoundary) {
|
|
298
|
+
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd));
|
|
299
|
+
if (atomNumber === 1)
|
|
300
|
+
atomNumber = specLength;
|
|
301
|
+
else if (atomNumber === specLength)
|
|
302
|
+
atomNumber = 1;
|
|
303
|
+
atomNumber += natom;
|
|
304
|
+
} else {
|
|
305
|
+
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
|
|
309
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
310
|
+
indexEnd = Math.min(molBlocks[i].indexOf('\n', index), molBlocks[i].indexOf(' ', index));
|
|
311
|
+
atomNumber = 0;
|
|
312
|
+
if (isBoundary) {
|
|
313
|
+
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd));
|
|
314
|
+
if (atomNumber === 1)
|
|
315
|
+
atomNumber = specLength;
|
|
316
|
+
else if (atomNumber === specLength)
|
|
317
|
+
atomNumber = 1;
|
|
318
|
+
atomNumber += natom;
|
|
319
|
+
} else {
|
|
320
|
+
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
|
|
321
|
+
}
|
|
322
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
|
|
323
|
+
|
|
324
|
+
index = molBlocks[i].indexOf('\n', index) + 1;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
const indexBondEnd = molBlocks[i].indexOf('M V30 END BOND');
|
|
328
|
+
bondBlock += molBlocks[i].substring(indexBonds + 1, indexBondEnd);
|
|
329
|
+
|
|
330
|
+
let indexCollection = molBlocks[i].indexOf('M V30 MDLV30/STEABS ATOMS=('); // V3000 index for collections
|
|
331
|
+
|
|
332
|
+
while (indexCollection !== -1) {
|
|
333
|
+
indexCollection += 28;
|
|
334
|
+
const collectionEnd = molBlocks[i].indexOf(')', indexCollection);
|
|
335
|
+
const collectionEntries = molBlocks[i].substring(indexCollection, collectionEnd).split(' ').slice(1);
|
|
336
|
+
collectionEntries.forEach((e) => {
|
|
337
|
+
collection.push(parseInt(e) + natom);
|
|
338
|
+
});
|
|
339
|
+
indexCollection = collectionEnd;
|
|
340
|
+
indexCollection = molBlocks[i].indexOf('M V30 MDLV30/STEABS ATOMS=(', indexCollection);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
natom += numbers.natom - 1;
|
|
344
|
+
nbond += numbers.nbond;
|
|
345
|
+
if (isBoundary)
|
|
346
|
+
xShift += Math.max(...coordinates.x);
|
|
347
|
+
else
|
|
348
|
+
xShift += coordinates.x[numbers.natom - 1] - coordinates.x[0];
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
const entries = 4;
|
|
352
|
+
const collNumber = Math.ceil(collection.length / entries);
|
|
353
|
+
|
|
354
|
+
collectionBlock += 'M V30 MDLV30/STEABS ATOMS=(' + collection.length + ' -\n';
|
|
355
|
+
for (let i = 0; i < collNumber; i++) {
|
|
356
|
+
collectionBlock += 'M V30 ';
|
|
357
|
+
const entriesCurrent = i + 1 === collNumber ? collection.length - (collNumber - 1) * entries : entries;
|
|
358
|
+
for (let j = 0; j < entriesCurrent; j++) {
|
|
359
|
+
collectionBlock += (j + 1 === entriesCurrent) ?
|
|
360
|
+
(i === collNumber - 1 ? collection[entries * i + j] + ')\n' : collection[entries * i + j] + ' -\n') :
|
|
361
|
+
collection[entries * i + j] + ' ';
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
//generate file
|
|
366
|
+
natom++;
|
|
367
|
+
macroMolBlock += 'M V30 COUNTS ' + natom + ' ' + nbond + ' 0 0 0\n';
|
|
368
|
+
macroMolBlock += 'M V30 BEGIN ATOM\n';
|
|
369
|
+
macroMolBlock += atomBlock;
|
|
370
|
+
macroMolBlock += 'M V30 END ATOM\n';
|
|
371
|
+
macroMolBlock += 'M V30 BEGIN BOND\n';
|
|
372
|
+
macroMolBlock += bondBlock;
|
|
373
|
+
macroMolBlock += 'M V30 END BOND\n';
|
|
374
|
+
if (useChirality && collection.length > 0) {
|
|
375
|
+
macroMolBlock += 'M V30 BEGIN COLLECTION\n';
|
|
376
|
+
macroMolBlock += collectionBlock;
|
|
377
|
+
macroMolBlock += 'M V30 END COLLECTION\n';
|
|
378
|
+
} else { macroMolBlock = macroMolBlock.replace(/ CFG=\d/g, ' '); }
|
|
379
|
+
|
|
380
|
+
macroMolBlock += 'M V30 END CTAB\n';
|
|
381
|
+
macroMolBlock += 'M END';
|
|
382
|
+
|
|
383
|
+
return macroMolBlock;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
private fix5Prime(coordinates: { atomIndex: number[], atomType: string[], x: number[], y: number[] },
|
|
387
|
+
indexFivePrime: number, indexThreePrime: number) {
|
|
388
|
+
const indexFivePrimeNeighbour = indexFivePrime + 1;
|
|
389
|
+
const xShift = coordinates.x[indexFivePrimeNeighbour];
|
|
390
|
+
const yShift = coordinates.y[indexFivePrimeNeighbour];
|
|
391
|
+
const base3PrimeX = coordinates.x[indexThreePrime] - xShift;
|
|
392
|
+
const base3PrimeY = coordinates.y[indexThreePrime] - yShift;
|
|
393
|
+
const base5PrimeX = coordinates.x[indexFivePrime] - xShift;
|
|
394
|
+
const base5PrimeY = coordinates.y[indexFivePrime] - yShift;
|
|
395
|
+
|
|
396
|
+
const rotated5PrimeX = base5PrimeX * Math.cos(Math.PI * 2 / 3) - base5PrimeY * Math.sin(Math.PI * 2 / 3);
|
|
397
|
+
const rotated5PrimeY = base5PrimeX * Math.sin(Math.PI * 2 / 3) + base5PrimeY * Math.cos(Math.PI * 2 / 3);
|
|
398
|
+
|
|
399
|
+
const dx = base5PrimeX - base3PrimeX;
|
|
400
|
+
const dy = base5PrimeY - base3PrimeY;
|
|
401
|
+
const dxRotated = rotated5PrimeX - base3PrimeX;
|
|
402
|
+
const dyRotated = rotated5PrimeY - base3PrimeY;
|
|
403
|
+
|
|
404
|
+
if (Math.sqrt(dyRotated * dyRotated + dxRotated * dxRotated) >= Math.sqrt(dy * dy + dx * dx)) {
|
|
405
|
+
coordinates.x[indexFivePrime] = rotated5PrimeX + xShift;
|
|
406
|
+
coordinates.y[indexFivePrime] = rotated5PrimeY + yShift;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
}
|
package/src/package-test.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import {runTests, tests, TestContext} from '@datagrok-libraries/utils/src/test';
|
|
3
|
-
import './tests/
|
|
3
|
+
import './tests/formats-to-helm';
|
|
4
|
+
import './tests/helm-to-nucleotides';
|
|
5
|
+
import './tests/formats-support';
|
|
4
6
|
|
|
5
7
|
export const _package = new DG.Package();
|
|
6
8
|
export {tests};
|
package/src/package.ts
CHANGED
|
@@ -2,111 +2,133 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
5
|
+
import {SequenceTranslatorUI} from './view/view';
|
|
6
|
+
import {LIB_PATH, DEFAULT_LIB_FILENAME} from './model/data-loading-utils/const';
|
|
7
|
+
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
8
|
+
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
9
|
+
import {getJsonData} from './model/data-loading-utils/json-loader';
|
|
10
|
+
import {SequenceToMolfileConverter} from './model/sequence-to-structure-utils/sequence-to-molfile';
|
|
11
|
+
import {linkStrandsV3000} from './model/sequence-to-structure-utils/mol-transformations';
|
|
12
|
+
import {MonomerLibWrapper} from './model/monomer-lib/lib-wrapper';
|
|
13
|
+
import {FormatDetector} from './model/parsing-validation/format-detector';
|
|
14
|
+
import {SequenceValidator} from './model/parsing-validation/sequence-validator';
|
|
15
|
+
import {demoDesignPatternUI, demoVisualizeDuplexUI, demoTranslateSequenceUI} from './demo/demo-st-ui';
|
|
16
|
+
import {FormatConverter} from './model/format-translation/format-converter';
|
|
17
|
+
|
|
18
|
+
class StPackage extends DG.Package {
|
|
19
|
+
private _monomerLib?: IMonomerLib;
|
|
20
|
+
|
|
21
|
+
get monomerLib(): IMonomerLib {
|
|
22
|
+
if (!this._monomerLib)
|
|
23
|
+
throw new Error ('ST: monomer lib not loaded')
|
|
24
|
+
return this._monomerLib!;
|
|
25
|
+
}
|
|
20
26
|
|
|
21
|
-
|
|
27
|
+
public async initMonomerLib(): Promise<void> {
|
|
28
|
+
if (this._monomerLib !== undefined)
|
|
29
|
+
return;
|
|
30
|
+
|
|
31
|
+
const pi: DG.TaskBarProgressIndicator = DG.TaskBarProgressIndicator.create(
|
|
32
|
+
'Initializing Sequence Translator monomer library ...');
|
|
33
|
+
try {
|
|
34
|
+
const libHelper: IMonomerLibHelper = await getMonomerLibHelper();
|
|
35
|
+
this._monomerLib = await libHelper.readLibrary(LIB_PATH, DEFAULT_LIB_FILENAME);
|
|
36
|
+
} catch (err: any) {
|
|
37
|
+
const errMsg: string = err.hasOwnProperty('message') ? err.message : err.toString();
|
|
38
|
+
throw new Error('Loading monomer library: ' + errMsg);
|
|
39
|
+
} finally {
|
|
40
|
+
pi.close();
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
22
44
|
|
|
23
|
-
export const _package = new
|
|
45
|
+
export const _package: StPackage = new StPackage();
|
|
24
46
|
|
|
25
|
-
|
|
47
|
+
//name: Sequence Translator
|
|
48
|
+
//tags: app
|
|
49
|
+
export async function sequenceTranslatorApp(): Promise<void> {
|
|
50
|
+
const pi: DG.TaskBarProgressIndicator = DG.TaskBarProgressIndicator.create('Loading Sequence Translator app ...');
|
|
26
51
|
|
|
27
|
-
|
|
28
|
-
|
|
52
|
+
try {
|
|
53
|
+
await initSequenceTranslatorLibData();
|
|
54
|
+
const v = new SequenceTranslatorUI();
|
|
55
|
+
await v.createLayout();
|
|
56
|
+
} catch (err: any) {
|
|
57
|
+
const errMsg: string = err.hasOwnProperty('message') ? err.message : err.toString();
|
|
58
|
+
grok.shell.error(`Loading Sequence Translator application error: ` + errMsg);
|
|
59
|
+
throw err;
|
|
60
|
+
} finally {
|
|
61
|
+
pi.close();
|
|
62
|
+
}
|
|
63
|
+
}
|
|
29
64
|
|
|
30
|
-
|
|
31
|
-
|
|
65
|
+
//name: initSequenceTranslatorLibData
|
|
66
|
+
export async function initSequenceTranslatorLibData(): Promise<void> {
|
|
67
|
+
await getJsonData();
|
|
68
|
+
await _package.initMonomerLib();
|
|
32
69
|
}
|
|
33
70
|
|
|
34
|
-
|
|
35
|
-
|
|
71
|
+
//name: getCodeToWeightsMap
|
|
72
|
+
//output: object result
|
|
73
|
+
export function getCodeToWeightsMap(): {[key: string]: number} {
|
|
74
|
+
const map = MonomerLibWrapper.getInstance().getCodesToWeightsMap();
|
|
75
|
+
return Object.fromEntries(map);
|
|
36
76
|
}
|
|
37
77
|
|
|
38
|
-
//name:
|
|
39
|
-
//
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
const windows = grok.shell.windows;
|
|
47
|
-
windows.showProperties = false;
|
|
48
|
-
windows.showToolbox = false;
|
|
49
|
-
windows.showHelp = false;
|
|
50
|
-
|
|
51
|
-
let urlParams = new URLSearchParams(window.location.search);
|
|
52
|
-
|
|
53
|
-
let mainSeq: string = DEFAULT_SEQUENCE;
|
|
54
|
-
const view = grok.shell.newView(SEQUENCE_TRANSLATOR, []);
|
|
55
|
-
view.box = true;
|
|
56
|
-
|
|
57
|
-
const tabControl = ui.tabControl({
|
|
58
|
-
[MAIN]: await getMainTab((seq) => {
|
|
59
|
-
mainSeq = seq;
|
|
60
|
-
urlParams = new URLSearchParams();
|
|
61
|
-
urlParams.set('seq', mainSeq);
|
|
62
|
-
updatePath();
|
|
63
|
-
}),
|
|
64
|
-
[AXOLABS]: getAxolabsTab(),
|
|
65
|
-
[SDF]: getSdfTab(),
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
tabControl.onTabChanged.subscribe(() => {
|
|
69
|
-
if (tabControl.currentPane.name !== MAIN)
|
|
70
|
-
urlParams.delete('seq');
|
|
71
|
-
else
|
|
72
|
-
urlParams.set('seq', mainSeq);
|
|
73
|
-
updatePath();
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
function updatePath() {
|
|
77
|
-
const urlParamsTxt: string = Object.entries(urlParams)
|
|
78
|
-
.map(([key, value]) => `${key}=${encodeURIComponent(value)}`).join('&');
|
|
79
|
-
view.path = '/apps/SequenceTranslator/SequenceTranslator' + `/${tabControl.currentPane.name}/?${urlParamsTxt}`;
|
|
80
|
-
}
|
|
78
|
+
//name: validateSequence
|
|
79
|
+
//input: string sequence
|
|
80
|
+
//output: bool result
|
|
81
|
+
export function validateSequence(sequence: string): boolean {
|
|
82
|
+
const validator = new SequenceValidator(sequence);
|
|
83
|
+
const format = (new FormatDetector(sequence).getFormat());
|
|
84
|
+
return (format === null) ? false : validator.isValidSequence(format!);
|
|
85
|
+
}
|
|
81
86
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
+
//name: validateSequence
|
|
88
|
+
//input: string sequence
|
|
89
|
+
//input: bool invert
|
|
90
|
+
//output: string result
|
|
91
|
+
export function getMolfileFromGcrsSequence(sequence: string, invert: boolean): string {
|
|
92
|
+
return (new SequenceToMolfileConverter(sequence, invert, 'GCRS')).convert();
|
|
93
|
+
}
|
|
87
94
|
|
|
88
|
-
|
|
89
|
-
|
|
95
|
+
//name: linkStrands
|
|
96
|
+
//input: object strands
|
|
97
|
+
//output: string result
|
|
98
|
+
export function linkStrands(strands: { senseStrands: string[], antiStrands: string[] }): string {
|
|
99
|
+
return linkStrandsV3000(strands, true);
|
|
90
100
|
}
|
|
91
101
|
|
|
92
|
-
//
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
102
|
+
//name: demoTranslateSequence
|
|
103
|
+
//meta.demoPath: Bioinformatics | Oligonucleotide Sequence: Translate
|
|
104
|
+
//description: Translate oligonucleotide sequences across various formats accepted by different synthesizers
|
|
105
|
+
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Oligonucleotide%20Sequence:%20Translate
|
|
106
|
+
export async function demoTranslateSequence(): Promise<void> {
|
|
107
|
+
await demoTranslateSequenceUI();
|
|
108
|
+
}
|
|
96
109
|
|
|
97
|
-
//name:
|
|
98
|
-
//
|
|
99
|
-
|
|
100
|
-
|
|
110
|
+
//name: demoDesignPattern
|
|
111
|
+
//meta.demoPath: Bioinformatics | Oligonucleotide Sequence: Design
|
|
112
|
+
//description: Design a modification pattern for an oligonucleotide sequence
|
|
113
|
+
//meta.path:%20/apps/Tutorials/Demo/Bioinformatics/Oligonucleotide%20Sequence:%20Visualize%20duplex
|
|
114
|
+
export async function demoDesignPattern(): Promise<void> {
|
|
115
|
+
await demoDesignPatternUI();
|
|
116
|
+
}
|
|
101
117
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
118
|
+
//name: demoVisualizeDuplex
|
|
119
|
+
//meta.demoPath: Bioinformatics | Oligonucleotide Sequence: Visualize duplex
|
|
120
|
+
//description: Visualize duplex and save SDF
|
|
121
|
+
//meta.path:%20/apps/Tutorials/Demo/Bioinformatics/Oligonucleotide%20Sequence:%20Visualize%20duplex
|
|
122
|
+
export async function demoVisualizeDuplex(): Promise<void> {
|
|
123
|
+
await demoVisualizeDuplexUI();
|
|
124
|
+
}
|
|
106
125
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
126
|
+
//name: translateOligonucleotideSequence
|
|
127
|
+
//input: string sequence
|
|
128
|
+
//input: string sourceFormat
|
|
129
|
+
//input: string targetFormat
|
|
130
|
+
//output: string result
|
|
131
|
+
export async function translateOligonucleotideSequence(sequence: string, sourceFormat: string, targetFormat: string): Promise<string> {
|
|
132
|
+
await initSequenceTranslatorLibData();
|
|
133
|
+
return (new FormatConverter(sequence, sourceFormat)).convertTo(targetFormat);
|
|
112
134
|
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
type Dict = {[key: string]: string};
|
|
2
|
+
|
|
3
|
+
export const formatsToHelm: {[key: string]: Dict} = {
|
|
4
|
+
'Axolabs': {
|
|
5
|
+
'UfAfsCfsGfuacg': 'RNA1{[fR](U)p.[fR](A)[sp].[fR](C)[sp].[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)}$$$$'
|
|
6
|
+
},
|
|
7
|
+
'BioSpring': {
|
|
8
|
+
'AT*GC*123456789': 'RNA1{r(A)p.r(T)[sp].r(G)p.r(C)[sp].[fR](U)p.[fR](A)p.[fR](C)p.[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)p.d([m5C])}$$$$'
|
|
9
|
+
},
|
|
10
|
+
'Mermade12': {
|
|
11
|
+
'hefglijkLIJKHEFG': 'RNA1{[25r](U)[sp].[25r](A)[sp].[25r](C)[sp].[25r](G)[sp].[fR](U)[sp].[fR](A)[sp].[fR](C)[sp].[fR](G)[sp].[fR](U)p.[fR](A)p.[fR](C)p.[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)}$$$$'
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export const helmToNucleotides: Dict = {
|
|
16
|
+
'RNA1{[fR](U)p.[fR](A)[sp].[fR](C)[sp].[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)}$$$$': 'UACGUACG',
|
|
17
|
+
|
|
18
|
+
'RNA1{r(A)p.r(T)[sp].r(G)p.r(C)[sp].[fR](U)p.[fR](A)p.[fR](C)p.[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)p.d([m5C])}$$$$': 'ATGCUACGUACGC',
|
|
19
|
+
|
|
20
|
+
'RNA1{[25r](U)[sp].[25r](A)[sp].[25r](C)[sp].[25r](G)[sp].[fR](U)[sp].[fR](A)[sp].[fR](C)[sp].[fR](G)[sp].[fR](U)p.[fR](A)p.[fR](C)p.[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)}$$$$': 'UACGUACGUACGUACG'
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export const helmToMolfile: Dict = {
|
|
24
|
+
}
|