@datagrok/sequence-translator 1.0.16 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +4 -3
- package/CHANGELOG.md +3 -0
- package/detectors.js +8 -28
- package/dist/package-test.js +2 -72987
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -72192
- package/dist/package.js.map +1 -0
- package/files/axolabs-style.json +97 -0
- package/files/codes-to-symbols.json +66 -0
- package/files/formats-to-helm.json +59 -0
- package/files/linkers.json +22 -0
- package/files/monomer-lib.json +1094 -0
- package/link-bio +7 -0
- package/package.json +30 -26
- package/scripts/build-monomer-lib.py +391 -122
- package/src/demo/demo-st-ui.ts +71 -0
- package/src/demo/handle-error.ts +12 -0
- package/src/model/axolabs/axolabs-tab.ts +111 -0
- package/src/model/axolabs/const.ts +33 -0
- package/src/{axolabs → model/axolabs}/draw-svg.ts +1 -1
- package/src/{axolabs → model/axolabs}/helpers.ts +7 -5
- package/src/model/const.ts +19 -0
- package/src/model/data-loading-utils/const.ts +8 -0
- package/src/model/data-loading-utils/json-loader.ts +38 -0
- package/src/model/data-loading-utils/types.ts +30 -0
- package/src/model/format-translation/const.ts +8 -0
- package/src/model/format-translation/conversion-utils.ts +48 -0
- package/src/model/format-translation/format-converter.ts +107 -0
- package/src/model/helpers.ts +12 -0
- package/src/model/monomer-lib/const.ts +3 -0
- package/src/model/monomer-lib/lib-wrapper.ts +106 -0
- package/src/model/parsing-validation/format-detector.ts +57 -0
- package/src/model/parsing-validation/sequence-validator.ts +52 -0
- package/src/model/sequence-to-structure-utils/const.ts +1 -0
- package/src/{structures-works → model/sequence-to-structure-utils}/mol-transformations.ts +61 -87
- package/src/model/sequence-to-structure-utils/monomer-code-parser.ts +92 -0
- package/src/model/sequence-to-structure-utils/sdf-tab.ts +94 -0
- package/src/model/sequence-to-structure-utils/sequence-to-molfile.ts +409 -0
- package/src/package.ts +106 -77
- package/src/tests/const.ts +17 -0
- package/src/tests/smiles-tests.ts +32 -457
- package/src/view/const/main-tab.ts +3 -0
- package/src/view/const/view.ts +10 -0
- package/src/view/css/axolabs-tab.css +1 -0
- package/src/view/css/colored-text-input.css +27 -0
- package/src/view/css/main-tab.css +46 -0
- package/src/view/css/sdf-tab.css +39 -0
- package/src/view/monomer-lib-viewer/viewer.ts +22 -0
- package/src/view/tabs/axolabs.ts +720 -0
- package/src/view/tabs/main.ts +174 -0
- package/src/view/tabs/sdf.ts +173 -0
- package/src/view/utils/app-info-dialog.ts +18 -0
- package/src/view/utils/colored-input/colored-text-input.ts +56 -0
- package/src/view/utils/colored-input/input-painters.ts +44 -0
- package/src/view/utils/draw-molecule.ts +86 -0
- package/src/view/utils/molecule-img.ts +106 -0
- package/src/view/view.ts +129 -0
- package/tsconfig.json +12 -18
- package/webpack.config.js +17 -4
- package/README.md +0 -84
- package/css/style.css +0 -18
- package/img/Sequence Translator Axolabs.png +0 -0
- package/jest.config.js +0 -33
- package/setup-unlink-clean.cmd +0 -14
- package/setup.cmd +0 -14
- package/setup.sh +0 -37
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -97
- package/src/apps/oligo-sd-file-app.ts +0 -58
- package/src/autostart/ICDs.ts +0 -3
- package/src/autostart/IDPs.ts +0 -3
- package/src/autostart/calculations.ts +0 -40
- package/src/autostart/constants.ts +0 -37
- package/src/autostart/registration.ts +0 -241
- package/src/autostart/salts.ts +0 -2
- package/src/autostart/sources.ts +0 -3
- package/src/autostart/users.ts +0 -3
- package/src/axolabs/constants.ts +0 -101
- package/src/axolabs/define-pattern.ts +0 -873
- package/src/helpers.ts +0 -28
- package/src/main/main-view.ts +0 -262
- package/src/structures-works/const.ts +0 -5
- package/src/structures-works/converters.ts +0 -323
- package/src/structures-works/from-monomers.ts +0 -267
- package/src/structures-works/map.ts +0 -720
- package/src/structures-works/save-sense-antisense.ts +0 -91
- package/src/structures-works/sequence-codes-tools.ts +0 -344
- package/src/utils/parse.ts +0 -27
- package/src/utils/sdf-add-columns.ts +0 -118
- package/src/utils/sdf-save-table.ts +0 -56
- package/test-SequenceTranslator-6288c2fbe346-cce4ac1d.html +0 -259
- package/vendors/openchemlib-full.js +0 -293
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {sortByReverseLength} from '../helpers';
|
|
7
|
+
import {DEFAULT_FORMATS} from '../const';
|
|
8
|
+
import {MonomerLibWrapper} from '../monomer-lib/lib-wrapper';
|
|
9
|
+
import {codesToHelmDictionary} from '../data-loading-utils/json-loader';
|
|
10
|
+
import {SequenceValidator} from './sequence-validator';
|
|
11
|
+
|
|
12
|
+
export class FormatDetector {
|
|
13
|
+
constructor (private sequence: string) {
|
|
14
|
+
this.libWrapper = MonomerLibWrapper.getInstance();
|
|
15
|
+
this.formats = Object.keys(codesToHelmDictionary);
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
private libWrapper: MonomerLibWrapper;
|
|
19
|
+
private formats: string[];
|
|
20
|
+
|
|
21
|
+
getFormat(): string | null {
|
|
22
|
+
// todo: reliable criterion
|
|
23
|
+
if (this.sequence.startsWith('RNA'))
|
|
24
|
+
return DEFAULT_FORMATS.HELM;
|
|
25
|
+
const possibleFormats = this.getListOfPossibleSynthesizersByFirstMatchedCode();
|
|
26
|
+
if (possibleFormats.length === 0)
|
|
27
|
+
return null;
|
|
28
|
+
|
|
29
|
+
const validator = new SequenceValidator(this.sequence);
|
|
30
|
+
const outputIndices = Array(possibleFormats.length).fill(0);
|
|
31
|
+
for (let i = 0; i < possibleFormats.length; ++i) {
|
|
32
|
+
const format = possibleFormats[i];
|
|
33
|
+
outputIndices[i] = validator.getInvalidCodeIndex(format);
|
|
34
|
+
}
|
|
35
|
+
const formatIdx = (outputIndices.some((idx) => idx === -1)) ? -1 : Math.max(...outputIndices);
|
|
36
|
+
return possibleFormats[outputIndices.indexOf(formatIdx)];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// todo: rename
|
|
40
|
+
private getListOfPossibleSynthesizersByFirstMatchedCode(): string[] {
|
|
41
|
+
const sequence = this.sequence;
|
|
42
|
+
let synthesizers: string[] = [];
|
|
43
|
+
for (const format of this.formats) {
|
|
44
|
+
let codes = sortByReverseLength(this.libWrapper.getCodesByFormat(format));
|
|
45
|
+
let start = 0;
|
|
46
|
+
for (let i = 0; i < sequence.length; i++) {
|
|
47
|
+
if (sequence[i] === ')' && i !== sequence.length - 1) {
|
|
48
|
+
start = i + 1;
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
if (codes.some((s: string) => s === sequence.slice(start, start + s.length)))
|
|
53
|
+
synthesizers.push(format);
|
|
54
|
+
}
|
|
55
|
+
return synthesizers;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import {NUCLEOTIDES} from '../const';
|
|
2
|
+
import {MonomerLibWrapper} from '../monomer-lib/lib-wrapper';
|
|
3
|
+
import {sortByReverseLength} from '../helpers';
|
|
4
|
+
import {DEFAULT_FORMATS} from '../const';
|
|
5
|
+
|
|
6
|
+
export class SequenceValidator {
|
|
7
|
+
constructor(private sequence: string) {
|
|
8
|
+
this.libWrapper = MonomerLibWrapper.getInstance();
|
|
9
|
+
};
|
|
10
|
+
private libWrapper: MonomerLibWrapper;
|
|
11
|
+
|
|
12
|
+
getInvalidCodeIndex(format: string): number {
|
|
13
|
+
if (format === DEFAULT_FORMATS.HELM)
|
|
14
|
+
return this.sequence.length;
|
|
15
|
+
const firstUniqueCharacters = ['r', 'd']; // what for?
|
|
16
|
+
const codes = sortByReverseLength(
|
|
17
|
+
this.libWrapper.getCodesByFormat(format)
|
|
18
|
+
);
|
|
19
|
+
let indexOfFirstInvalidChar = 0;
|
|
20
|
+
while (indexOfFirstInvalidChar < this.sequence.length) {
|
|
21
|
+
const matchedCode = codes.find((code) => {
|
|
22
|
+
const subSequence = this.sequence.substring(indexOfFirstInvalidChar, indexOfFirstInvalidChar + code.length);
|
|
23
|
+
return code === subSequence;
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
if (!matchedCode) break;
|
|
27
|
+
|
|
28
|
+
// todo: refactor the vague condition
|
|
29
|
+
if ( // for mistake pattern 'rAA'
|
|
30
|
+
indexOfFirstInvalidChar > 1 &&
|
|
31
|
+
NUCLEOTIDES.includes(this.sequence[indexOfFirstInvalidChar]) &&
|
|
32
|
+
firstUniqueCharacters.includes(this.sequence[indexOfFirstInvalidChar - 2])
|
|
33
|
+
) break;
|
|
34
|
+
|
|
35
|
+
if ( // for mistake pattern 'ArA'
|
|
36
|
+
firstUniqueCharacters.includes(this.sequence[indexOfFirstInvalidChar + 1]) &&
|
|
37
|
+
NUCLEOTIDES.includes(this.sequence[indexOfFirstInvalidChar])
|
|
38
|
+
) {
|
|
39
|
+
indexOfFirstInvalidChar++;
|
|
40
|
+
break;
|
|
41
|
+
}
|
|
42
|
+
indexOfFirstInvalidChar += matchedCode.length;
|
|
43
|
+
}
|
|
44
|
+
if (indexOfFirstInvalidChar === this.sequence.length)
|
|
45
|
+
indexOfFirstInvalidChar = -1
|
|
46
|
+
return indexOfFirstInvalidChar;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
isValidSequence(format: string): boolean {
|
|
50
|
+
return this.getInvalidCodeIndex(format) === -1;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export const PHOSPHATE_SYMBOL = 'p';
|
|
@@ -1,30 +1,23 @@
|
|
|
1
1
|
export function getNucleotidesMol(codes: string[]) {
|
|
2
2
|
const molBlocks: string[] = [];
|
|
3
3
|
|
|
4
|
-
// for (let i = 0; i < smilesCodes.length - 1; i++) {
|
|
5
|
-
// smilesCodes[i] == 'OP(=O)(O)O' ? molBlocks.push(PHOSHATE) :
|
|
6
|
-
// smilesCodes[i] == 'OP(=O)(S)O' ? molBlocks.push(THIOPHOSHATE) :
|
|
7
|
-
// smilesCodes[i] == 'O[C@@H]1C[C@@H]O[C@H]1CO' ? molBlocks.push(rotateNucleotidesV3000(INVABASIC)) :
|
|
8
|
-
// smilesCodes[i] == 'OCC(O)CNC(=O)CCCC(=O)NC(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)' ? molBlocks.push(GALNAC) :
|
|
9
|
-
// smilesCodes[i] == 'C(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)NC(=O)CCCC(=O)NCC(O)CO' ? molBlocks.push(GALNACPRIME) :
|
|
10
|
-
// molBlocks.push(rotateNucleotidesV3000(smilesCodes[i]));
|
|
11
|
-
// }
|
|
12
|
-
|
|
13
4
|
for (let i = 0; i < codes.length - 1; i++) {
|
|
14
5
|
if (codes[i].includes('MODIFICATION')) {
|
|
15
|
-
if (i
|
|
6
|
+
if (i === 0)
|
|
16
7
|
molBlocks.push(reflect(codes[i]));
|
|
17
8
|
else
|
|
18
9
|
molBlocks.push(codes[i]);
|
|
19
|
-
}
|
|
20
|
-
else
|
|
10
|
+
} else {
|
|
21
11
|
molBlocks.push(rotateNucleotidesV3000(codes[i]));
|
|
12
|
+
}
|
|
22
13
|
}
|
|
23
14
|
|
|
24
15
|
return linkV3000(molBlocks);
|
|
25
16
|
}
|
|
26
17
|
|
|
27
|
-
export function linkStrandsV3000(
|
|
18
|
+
export function linkStrandsV3000(
|
|
19
|
+
strands: { senseStrands: string[], antiStrands: string[] }, useChirality: boolean = true
|
|
20
|
+
): string {
|
|
28
21
|
let macroMolBlock = '\nDatagrok macromolecule handler\n\n';
|
|
29
22
|
macroMolBlock += ' 0 0 0 0 0 0 999 V3000\n';
|
|
30
23
|
macroMolBlock += 'M V30 BEGIN CTAB\n';
|
|
@@ -36,38 +29,42 @@ export function linkStrandsV3000(strands: { senseStrands: string[], antiStrands:
|
|
|
36
29
|
let nbond = 0;
|
|
37
30
|
let xShift = 0;
|
|
38
31
|
|
|
39
|
-
// if (twoChains && molBlocks.length > 1)
|
|
40
|
-
// molBlocks[1] = invertNucleotidesV3000(molBlocks[1]);
|
|
41
|
-
|
|
42
32
|
if (strands.antiStrands.length > 0) {
|
|
43
|
-
for (let i = 0; i < strands.antiStrands.length; i++)
|
|
33
|
+
for (let i = 0; i < strands.antiStrands.length; i++)
|
|
44
34
|
strands.antiStrands[i] = invertNucleotidesV3000(strands.antiStrands[i]);
|
|
45
|
-
}
|
|
46
35
|
}
|
|
47
36
|
|
|
48
37
|
let inverted = false;
|
|
49
|
-
|
|
38
|
+
const molBlocks = strands.senseStrands.concat(strands.antiStrands);
|
|
39
|
+
/** Minimal value of AS and AS2 shift */
|
|
40
|
+
let ssYShift = 0;
|
|
50
41
|
|
|
51
42
|
for (let i = 0; i < molBlocks.length; i++) {
|
|
52
|
-
|
|
53
|
-
if (i >= strands.senseStrands.length && inverted == false) {
|
|
54
|
-
inverted = true;
|
|
55
|
-
xShift = 0;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
|
|
59
43
|
molBlocks[i] = molBlocks[i].replaceAll('(-\nM V30 ', '(')
|
|
60
44
|
.replaceAll('-\nM V30 ', '').replaceAll(' )', ')');
|
|
61
45
|
const numbers = extractAtomsBondsNumbersV3000(molBlocks[i]);
|
|
62
46
|
const coordinates = extractAtomDataV3000(molBlocks[i]);
|
|
63
47
|
|
|
48
|
+
if (i >= strands.senseStrands.length) {
|
|
49
|
+
if (inverted === false) {
|
|
50
|
+
// AS strand
|
|
51
|
+
inverted = true;
|
|
52
|
+
xShift = 0;
|
|
53
|
+
}
|
|
54
|
+
} else {
|
|
55
|
+
// SS strands
|
|
56
|
+
ssYShift = Math.min(ssYShift, Math.min(
|
|
57
|
+
...coordinates.y.filter((item) => item < 0)
|
|
58
|
+
));
|
|
59
|
+
}
|
|
60
|
+
|
|
64
61
|
if (inverted) {
|
|
65
62
|
const xShiftRight = Math.min(...coordinates.x) - xShift;
|
|
66
|
-
const yShift =
|
|
63
|
+
const yShift = Math.max(...coordinates.y) + 5;
|
|
67
64
|
for (let j = 0; j < coordinates.x.length; j++)
|
|
68
65
|
coordinates.x[j] -= xShiftRight;
|
|
69
66
|
for (let j = 0; j < coordinates.y.length; j++)
|
|
70
|
-
coordinates.y[j] -= yShift;
|
|
67
|
+
coordinates.y[j] -= yShift - ssYShift;
|
|
71
68
|
}
|
|
72
69
|
|
|
73
70
|
let indexAtoms = molBlocks[i].indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
@@ -76,7 +73,7 @@ export function linkStrandsV3000(strands: { senseStrands: string[], antiStrands:
|
|
|
76
73
|
let indexEnd = indexAtoms;
|
|
77
74
|
|
|
78
75
|
for (let j = 0; j < numbers.natom; j++) {
|
|
79
|
-
// if (coordinates.atomIndex[j]
|
|
76
|
+
// if (coordinates.atomIndex[j] !== 1 || i === 0 || twoChains) {
|
|
80
77
|
//rewrite atom number
|
|
81
78
|
index = molBlocks[i].indexOf('V30', index) + 4;
|
|
82
79
|
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
@@ -138,7 +135,7 @@ export function linkStrandsV3000(strands: { senseStrands: string[], antiStrands:
|
|
|
138
135
|
|
|
139
136
|
let indexCollection = molBlocks[i].indexOf('M V30 MDLV30/STEABS ATOMS=('); // V3000 index for collections
|
|
140
137
|
|
|
141
|
-
while (indexCollection
|
|
138
|
+
while (indexCollection !== -1) {
|
|
142
139
|
indexCollection += 28;
|
|
143
140
|
const collectionEnd = molBlocks[i].indexOf(')', indexCollection);
|
|
144
141
|
const collectionEntries = molBlocks[i].substring(indexCollection, collectionEnd).split(' ').slice(1);
|
|
@@ -151,31 +148,22 @@ export function linkStrandsV3000(strands: { senseStrands: string[], antiStrands:
|
|
|
151
148
|
|
|
152
149
|
natom += true ? numbers.natom : numbers.natom - 1;
|
|
153
150
|
nbond += numbers.nbond;
|
|
154
|
-
xShift += Math.max(...coordinates.x) +
|
|
151
|
+
xShift += Math.max(...coordinates.x) + 5;//twoChains ? 0 : coordinates.x[numbers.natom - 1] - coordinates.x[0];
|
|
155
152
|
}
|
|
156
153
|
|
|
157
154
|
const entries = 4;
|
|
158
155
|
const collNumber = Math.ceil(collection.length / entries);
|
|
159
156
|
|
|
160
|
-
//if (oclRender) {
|
|
161
|
-
// collectionBlock += 'M V30 MDLV30/STEABS ATOMS=(' + collection.length;
|
|
162
|
-
|
|
163
|
-
// for (let j = 0; j < collection.length; j++)
|
|
164
|
-
// collectionBlock += ' ' + collection[j];
|
|
165
|
-
|
|
166
|
-
// collectionBlock += ')\n';
|
|
167
|
-
//} else {
|
|
168
157
|
collectionBlock += 'M V30 MDLV30/STEABS ATOMS=(' + collection.length + ' -\n';
|
|
169
158
|
for (let i = 0; i < collNumber; i++) {
|
|
170
159
|
collectionBlock += 'M V30 ';
|
|
171
|
-
const entriesCurrent = i + 1
|
|
160
|
+
const entriesCurrent = i + 1 === collNumber ? collection.length - (collNumber - 1) * entries : entries;
|
|
172
161
|
for (let j = 0; j < entriesCurrent; j++) {
|
|
173
|
-
collectionBlock += (j + 1
|
|
174
|
-
(i
|
|
162
|
+
collectionBlock += (j + 1 === entriesCurrent) ?
|
|
163
|
+
(i === collNumber - 1 ? collection[entries * i + j] + ')\n' : collection[entries * i + j] + ' -\n') :
|
|
175
164
|
collection[entries * i + j] + ' ';
|
|
176
165
|
}
|
|
177
166
|
}
|
|
178
|
-
//}
|
|
179
167
|
|
|
180
168
|
//generate file
|
|
181
169
|
true ? natom : natom++;
|
|
@@ -186,12 +174,13 @@ export function linkStrandsV3000(strands: { senseStrands: string[], antiStrands:
|
|
|
186
174
|
macroMolBlock += 'M V30 BEGIN BOND\n';
|
|
187
175
|
macroMolBlock += bondBlock;
|
|
188
176
|
macroMolBlock += 'M V30 END BOND\n';
|
|
189
|
-
if (useChirality) {
|
|
177
|
+
if (useChirality && collection.length > 0) {
|
|
190
178
|
macroMolBlock += 'M V30 BEGIN COLLECTION\n';
|
|
191
179
|
macroMolBlock += collectionBlock;
|
|
192
180
|
macroMolBlock += 'M V30 END COLLECTION\n';
|
|
193
|
-
} else
|
|
181
|
+
} else {
|
|
194
182
|
macroMolBlock = macroMolBlock.replace(/ CFG=\d/g, ' ');
|
|
183
|
+
}
|
|
195
184
|
|
|
196
185
|
macroMolBlock += 'M V30 END CTAB\n';
|
|
197
186
|
macroMolBlock += 'M END';
|
|
@@ -199,7 +188,7 @@ export function linkStrandsV3000(strands: { senseStrands: string[], antiStrands:
|
|
|
199
188
|
return macroMolBlock;
|
|
200
189
|
}
|
|
201
190
|
|
|
202
|
-
export function linkV3000(molBlocks: string[], useChirality: boolean = true) {
|
|
191
|
+
export function linkV3000(molBlocks: string[], useChirality: boolean = true): string {
|
|
203
192
|
let macroMolBlock = '\nDatagrok macromolecule handler\n\n';
|
|
204
193
|
macroMolBlock += ' 0 0 0 0 0 0 999 V3000\n';
|
|
205
194
|
macroMolBlock += 'M V30 BEGIN CTAB\n';
|
|
@@ -212,7 +201,7 @@ export function linkV3000(molBlocks: string[], useChirality: boolean = true) {
|
|
|
212
201
|
let xShift = 0;
|
|
213
202
|
|
|
214
203
|
for (let i = 0; i < molBlocks.length; i++) {
|
|
215
|
-
const isBoundary = molBlocks[i].includes('MODIFICATION') && i
|
|
204
|
+
const isBoundary = molBlocks[i].includes('MODIFICATION') && i === 0;
|
|
216
205
|
let specLength = 0;
|
|
217
206
|
if (isBoundary) {
|
|
218
207
|
const coordinates = extractAtomDataV3000(molBlocks[i]);
|
|
@@ -231,16 +220,16 @@ export function linkV3000(molBlocks: string[], useChirality: boolean = true) {
|
|
|
231
220
|
let indexEnd = indexAtoms;
|
|
232
221
|
|
|
233
222
|
for (let j = 0; j < numbers.natom; j++) {
|
|
234
|
-
if (coordinates.atomIndex[j]
|
|
223
|
+
if (coordinates.atomIndex[j] !== 1 || i === 0) {
|
|
235
224
|
//rewrite atom number
|
|
236
225
|
index = molBlocks[i].indexOf('V30', index) + 4;
|
|
237
226
|
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
238
227
|
let atomNumber = 0;
|
|
239
228
|
if (isBoundary) {
|
|
240
|
-
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd))
|
|
241
|
-
if (atomNumber
|
|
229
|
+
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd));
|
|
230
|
+
if (atomNumber === 1)
|
|
242
231
|
atomNumber = specLength;
|
|
243
|
-
else if (atomNumber
|
|
232
|
+
else if (atomNumber === specLength)
|
|
244
233
|
atomNumber = 1;
|
|
245
234
|
atomNumber += natom;
|
|
246
235
|
} else {
|
|
@@ -293,10 +282,10 @@ export function linkV3000(molBlocks: string[], useChirality: boolean = true) {
|
|
|
293
282
|
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
294
283
|
let atomNumber = 0;
|
|
295
284
|
if (isBoundary) {
|
|
296
|
-
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd))
|
|
297
|
-
if (atomNumber
|
|
285
|
+
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd));
|
|
286
|
+
if (atomNumber === 1)
|
|
298
287
|
atomNumber = specLength;
|
|
299
|
-
else if (atomNumber
|
|
288
|
+
else if (atomNumber === specLength)
|
|
300
289
|
atomNumber = 1;
|
|
301
290
|
atomNumber += natom;
|
|
302
291
|
} else {
|
|
@@ -308,10 +297,10 @@ export function linkV3000(molBlocks: string[], useChirality: boolean = true) {
|
|
|
308
297
|
indexEnd = Math.min(molBlocks[i].indexOf('\n', index), molBlocks[i].indexOf(' ', index));
|
|
309
298
|
atomNumber = 0;
|
|
310
299
|
if (isBoundary) {
|
|
311
|
-
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd))
|
|
312
|
-
if (atomNumber
|
|
300
|
+
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd));
|
|
301
|
+
if (atomNumber === 1)
|
|
313
302
|
atomNumber = specLength;
|
|
314
|
-
else if (atomNumber
|
|
303
|
+
else if (atomNumber === specLength)
|
|
315
304
|
atomNumber = 1;
|
|
316
305
|
atomNumber += natom;
|
|
317
306
|
} else {
|
|
@@ -327,7 +316,7 @@ export function linkV3000(molBlocks: string[], useChirality: boolean = true) {
|
|
|
327
316
|
|
|
328
317
|
let indexCollection = molBlocks[i].indexOf('M V30 MDLV30/STEABS ATOMS=('); // V3000 index for collections
|
|
329
318
|
|
|
330
|
-
while (indexCollection
|
|
319
|
+
while (indexCollection !== -1) {
|
|
331
320
|
indexCollection += 28;
|
|
332
321
|
const collectionEnd = molBlocks[i].indexOf(')', indexCollection);
|
|
333
322
|
const collectionEntries = molBlocks[i].substring(indexCollection, collectionEnd).split(' ').slice(1);
|
|
@@ -349,21 +338,13 @@ export function linkV3000(molBlocks: string[], useChirality: boolean = true) {
|
|
|
349
338
|
const entries = 4;
|
|
350
339
|
const collNumber = Math.ceil(collection.length / entries);
|
|
351
340
|
|
|
352
|
-
//if (oclRender) {
|
|
353
|
-
// collectionBlock += 'M V30 MDLV30/STEABS ATOMS=(' + collection.length;
|
|
354
|
-
|
|
355
|
-
// for (let j = 0; j < collection.length; j++)
|
|
356
|
-
// collectionBlock += ' ' + collection[j];
|
|
357
|
-
|
|
358
|
-
// collectionBlock += ')\n';
|
|
359
|
-
//} else {
|
|
360
341
|
collectionBlock += 'M V30 MDLV30/STEABS ATOMS=(' + collection.length + ' -\n';
|
|
361
342
|
for (let i = 0; i < collNumber; i++) {
|
|
362
343
|
collectionBlock += 'M V30 ';
|
|
363
|
-
const entriesCurrent = i + 1
|
|
344
|
+
const entriesCurrent = i + 1 === collNumber ? collection.length - (collNumber - 1) * entries : entries;
|
|
364
345
|
for (let j = 0; j < entriesCurrent; j++) {
|
|
365
|
-
collectionBlock += (j + 1
|
|
366
|
-
(i
|
|
346
|
+
collectionBlock += (j + 1 === entriesCurrent) ?
|
|
347
|
+
(i === collNumber - 1 ? collection[entries * i + j] + ')\n' : collection[entries * i + j] + ' -\n') :
|
|
367
348
|
collection[entries * i + j] + ' ';
|
|
368
349
|
}
|
|
369
350
|
}
|
|
@@ -378,12 +359,11 @@ export function linkV3000(molBlocks: string[], useChirality: boolean = true) {
|
|
|
378
359
|
macroMolBlock += 'M V30 BEGIN BOND\n';
|
|
379
360
|
macroMolBlock += bondBlock;
|
|
380
361
|
macroMolBlock += 'M V30 END BOND\n';
|
|
381
|
-
if (useChirality) {
|
|
362
|
+
if (useChirality && collection.length > 0) {
|
|
382
363
|
macroMolBlock += 'M V30 BEGIN COLLECTION\n';
|
|
383
364
|
macroMolBlock += collectionBlock;
|
|
384
365
|
macroMolBlock += 'M V30 END COLLECTION\n';
|
|
385
|
-
} else
|
|
386
|
-
macroMolBlock = macroMolBlock.replace(/ CFG=\d/g, ' ');
|
|
366
|
+
} else { macroMolBlock = macroMolBlock.replace(/ CFG=\d/g, ' '); }
|
|
387
367
|
|
|
388
368
|
macroMolBlock += 'M V30 END CTAB\n';
|
|
389
369
|
macroMolBlock += 'M END';
|
|
@@ -391,9 +371,7 @@ export function linkV3000(molBlocks: string[], useChirality: boolean = true) {
|
|
|
391
371
|
return macroMolBlock;
|
|
392
372
|
}
|
|
393
373
|
|
|
394
|
-
function rotateNucleotidesV3000(
|
|
395
|
-
// @ts-ignore
|
|
396
|
-
let molBlock = molecule.includes('M END') ? molecule : OCL.Molecule.fromSmiles(molecule).toMolfileV3();
|
|
374
|
+
function rotateNucleotidesV3000(molBlock: string): string {
|
|
397
375
|
const coordinates = extractAtomDataV3000(molBlock);
|
|
398
376
|
const natom = coordinates.atomIndex.length;
|
|
399
377
|
|
|
@@ -414,15 +392,15 @@ function rotateNucleotidesV3000(molecule: string) {
|
|
|
414
392
|
}
|
|
415
393
|
|
|
416
394
|
let angle = 0;
|
|
417
|
-
if (coordinates.x[indexFivePrime]
|
|
395
|
+
if (coordinates.x[indexFivePrime] === 0) {
|
|
418
396
|
angle = coordinates.y[indexFivePrime] > coordinates.y[indexThreePrime] ? Math.PI / 2 : 3 * Math.PI / 2;
|
|
419
|
-
else if (coordinates.y[indexFivePrime]
|
|
397
|
+
} else if (coordinates.y[indexFivePrime] === 0) {
|
|
420
398
|
angle = coordinates.x[indexFivePrime] > coordinates.x[indexThreePrime] ? Math.PI : 0;
|
|
421
|
-
else {
|
|
399
|
+
} else {
|
|
422
400
|
const derivative = coordinates.y[indexFivePrime] / coordinates.x[indexFivePrime];
|
|
423
|
-
angle = derivative > 0
|
|
424
|
-
|
|
425
|
-
|
|
401
|
+
angle = derivative > 0 ?
|
|
402
|
+
(coordinates.x[indexFivePrime] > 0 ? Math.PI - Math.atan(derivative) : Math.PI * 2 - Math.atan(derivative)) :
|
|
403
|
+
(coordinates.x[indexFivePrime] > 0 ? -Math.PI - Math.atan(derivative) : Math.atan(derivative));
|
|
426
404
|
}
|
|
427
405
|
|
|
428
406
|
const cos = Math.cos(angle);
|
|
@@ -460,9 +438,7 @@ function rotateNucleotidesV3000(molecule: string) {
|
|
|
460
438
|
return molBlock;
|
|
461
439
|
}
|
|
462
440
|
|
|
463
|
-
function reflect(
|
|
464
|
-
// @ts-ignore
|
|
465
|
-
let molBlock = molecule.includes('M END') ? molecule : OCL.Molecule.fromSmiles(molecule).toMolfileV3();
|
|
441
|
+
function reflect(molBlock: string): string {
|
|
466
442
|
const coordinates = extractAtomDataV3000(molBlock);
|
|
467
443
|
const natom = coordinates.atomIndex.length;
|
|
468
444
|
|
|
@@ -509,9 +485,7 @@ function reflect(molecule: string) {
|
|
|
509
485
|
}
|
|
510
486
|
|
|
511
487
|
|
|
512
|
-
function invertNucleotidesV3000(
|
|
513
|
-
// @ts-ignore
|
|
514
|
-
let molBlock = molecule.includes('M END') ? molecule : OCL.Molecule.fromSmiles(molecule).toMolfileV3();
|
|
488
|
+
function invertNucleotidesV3000(molBlock: string) {
|
|
515
489
|
const coordinates = extractAtomDataV3000(molBlock);
|
|
516
490
|
const natom = coordinates.atomIndex.length;
|
|
517
491
|
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {PHOSPHATE_SYMBOL} from './const';
|
|
7
|
+
import {sortByReverseLength} from '../helpers';
|
|
8
|
+
import {MonomerLibWrapper} from '../monomer-lib/lib-wrapper';
|
|
9
|
+
import {monomersWithPhosphateLinkers} from '../data-loading-utils/json-loader';
|
|
10
|
+
|
|
11
|
+
/** Wrapper for parsing a strand and getting a sequence of monomer IDs (with
|
|
12
|
+
* omitted linkers, if needed) */
|
|
13
|
+
export class MonomerSequenceParser {
|
|
14
|
+
constructor(
|
|
15
|
+
private sequence: string, private invert: boolean = false,
|
|
16
|
+
// todo: remove from the list of parameters
|
|
17
|
+
private codeMap: Map<string, string>
|
|
18
|
+
) {
|
|
19
|
+
this.lib = MonomerLibWrapper.getInstance();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
private lib: MonomerLibWrapper;
|
|
23
|
+
|
|
24
|
+
/** Get sequence of parsed monomer symbols, which are unique short names for
|
|
25
|
+
* the monomers within the Monomer Library */
|
|
26
|
+
parseSequence(): string[] {
|
|
27
|
+
const parsedRawCodes = this.parseRawSequence();
|
|
28
|
+
return this.addLinkers(parsedRawCodes);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
private addLinkers(parsedRawCodes: string[]) {
|
|
32
|
+
const monomerSymbolSequence: string[] = [];
|
|
33
|
+
parsedRawCodes.forEach((code, i) => {
|
|
34
|
+
const monomerSymbol = this.getSymbolForCode(code);
|
|
35
|
+
if (i > 0 && monomerHasLeftPhosphateLinker(monomerSymbol))
|
|
36
|
+
monomerSymbolSequence.pop();
|
|
37
|
+
|
|
38
|
+
monomerSymbolSequence.push(monomerSymbol);
|
|
39
|
+
|
|
40
|
+
const isPhosphate = monomerIsPhosphateLinker(monomerSymbol);
|
|
41
|
+
const lastMonomer = i === parsedRawCodes.length - 1;
|
|
42
|
+
const nextMonomerIsPhosphate = (i + 1 < parsedRawCodes.length && monomerIsPhosphateLinker(this.getSymbolForCode(parsedRawCodes[i + 1])));
|
|
43
|
+
|
|
44
|
+
// todo: refactor as molfile-specific
|
|
45
|
+
if (!isPhosphate && !monomerHasRightPhosphateLinker(monomerSymbol) && !nextMonomerIsPhosphate && !lastMonomer) {
|
|
46
|
+
monomerSymbolSequence.push(PHOSPHATE_SYMBOL);
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
return monomerSymbolSequence;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
private getSymbolForCode(code: string): string {
|
|
53
|
+
let monomerSymbol = this.codeMap.get(code);
|
|
54
|
+
// todo: remove as a legacy workaround, codeMap must contain all the
|
|
55
|
+
// symbols, and symbols are not codes
|
|
56
|
+
monomerSymbol ??= code;
|
|
57
|
+
return monomerSymbol;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
private parseRawSequence(): string[] {
|
|
61
|
+
const allCodesOfFormat = this.getAllCodesOfFormat();
|
|
62
|
+
const parsedCodes = [];
|
|
63
|
+
let i = 0;
|
|
64
|
+
while (i < this.sequence.length) {
|
|
65
|
+
const code = allCodesOfFormat.find(
|
|
66
|
+
(s: string) => s === this.sequence.substring(i, i + s.length)
|
|
67
|
+
)!;
|
|
68
|
+
this.invert ? parsedCodes.unshift(code) : parsedCodes.push(code);
|
|
69
|
+
i += code.length;
|
|
70
|
+
}
|
|
71
|
+
return parsedCodes;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// todo: port to monomer handler
|
|
75
|
+
private getAllCodesOfFormat(): string[] {
|
|
76
|
+
let allCodesInTheFormat = Array.from(this.codeMap.keys());
|
|
77
|
+
return sortByReverseLength(allCodesInTheFormat);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// todo: to be eliminated after full helm support
|
|
82
|
+
function monomerHasLeftPhosphateLinker(monomerSymbol: string): boolean {
|
|
83
|
+
return monomersWithPhosphateLinkers['left'].includes(monomerSymbol);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function monomerHasRightPhosphateLinker(monomerSymbol: string): boolean {
|
|
87
|
+
return monomersWithPhosphateLinkers['right'].includes(monomerSymbol);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function monomerIsPhosphateLinker(monomerSymbol: string): boolean {
|
|
91
|
+
return monomersWithPhosphateLinkers['phosphate'].includes(monomerSymbol);
|
|
92
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
|
|
6
|
+
|
|
7
|
+
import {download} from '../helpers';
|
|
8
|
+
import {SequenceToMolfileConverter} from './sequence-to-molfile';
|
|
9
|
+
import {linkStrandsV3000} from './mol-transformations';
|
|
10
|
+
import {DEFAULT_FORMATS} from '../const';
|
|
11
|
+
|
|
12
|
+
export type StrandData = {
|
|
13
|
+
strand: string,
|
|
14
|
+
invert: boolean
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/** Get a molfile for a single strand */
|
|
18
|
+
export function getMolfileForStrand(strand: string, invert: boolean): string {
|
|
19
|
+
if (strand === '')
|
|
20
|
+
return '';
|
|
21
|
+
const format = DEFAULT_FORMATS.AXOLABS;
|
|
22
|
+
let molfile = '';
|
|
23
|
+
try {
|
|
24
|
+
molfile = (new SequenceToMolfileConverter(strand, invert, format)).convert();
|
|
25
|
+
} catch (err) {
|
|
26
|
+
const errStr = errorToConsole(err);
|
|
27
|
+
console.error(errStr);
|
|
28
|
+
}
|
|
29
|
+
return molfile;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** Get molfile for single strand or linked strands */
|
|
33
|
+
export function getLinkedMolfile(
|
|
34
|
+
ss: StrandData, as: StrandData, as2: StrandData, useChiral: boolean
|
|
35
|
+
): string {
|
|
36
|
+
const nonEmptyStrands = [ss, as, as2].filter((item) => item.strand !== '');
|
|
37
|
+
if (nonEmptyStrands.length === 1) {
|
|
38
|
+
return getMolfileForStrand(nonEmptyStrands[0].strand, nonEmptyStrands[0].invert);
|
|
39
|
+
} else {
|
|
40
|
+
const ssMol = getMolfileForStrand(ss.strand, ss.invert);
|
|
41
|
+
const asMol = getMolfileForStrand(as.strand, as.invert);
|
|
42
|
+
const as2Mol = getMolfileForStrand(as2.strand, as2.invert);
|
|
43
|
+
|
|
44
|
+
// select only the non-empty anti-strands
|
|
45
|
+
const antiStrands = [asMol, as2Mol].filter((item) => item !== '');
|
|
46
|
+
const resultingMolfile = linkStrandsV3000({senseStrands: [ssMol], antiStrands: antiStrands}, useChiral);
|
|
47
|
+
|
|
48
|
+
return resultingMolfile;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** Save sdf in case ss and as (and optionally as2) strands entered */
|
|
53
|
+
export function saveSdf(
|
|
54
|
+
ss: StrandData, as: StrandData, as2: StrandData, useChiral: boolean,
|
|
55
|
+
oneEntity: boolean
|
|
56
|
+
): void {
|
|
57
|
+
const nonEmptyStrands = [ss.strand, as.strand, as2.strand].filter((item) => item !== '');
|
|
58
|
+
if (
|
|
59
|
+
nonEmptyStrands.length === 0 ||
|
|
60
|
+
nonEmptyStrands.length === 1 && ss.strand === ''
|
|
61
|
+
) {
|
|
62
|
+
grok.shell.warning('Enter SS and AS/AS2 to save SDF');
|
|
63
|
+
} else {
|
|
64
|
+
let result: string;
|
|
65
|
+
if (oneEntity) {
|
|
66
|
+
result = getLinkedMolfile(ss, as, as2, useChiral) + '\n$$$$\n';
|
|
67
|
+
} else {
|
|
68
|
+
const ssMol = getMolfileForStrand(ss.strand, ss.invert);
|
|
69
|
+
const asMol = getMolfileForStrand(as.strand, as.invert);
|
|
70
|
+
const as2Mol = getMolfileForStrand(as2.strand, as2.invert);
|
|
71
|
+
result = ssMol + '\n' +
|
|
72
|
+
`> <Sequence>\nSense Strand\n$$$$\n`;
|
|
73
|
+
if (asMol) {
|
|
74
|
+
result += asMol + '\n' +
|
|
75
|
+
`> <Sequence>\nAnti Sense\n$$$$\n`;
|
|
76
|
+
}
|
|
77
|
+
if (as2Mol) {
|
|
78
|
+
result += as2Mol + '\n' +
|
|
79
|
+
`> <Sequence>\nAnti Sense 2\n$$$$\n`;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// construct date-time in the form yyyy-mm-dd_hh-mm-ss
|
|
84
|
+
const date = new Date();
|
|
85
|
+
function pad(x: number): string {
|
|
86
|
+
return (x >= 10) ? x.toString() : '0' + x.toString();
|
|
87
|
+
}
|
|
88
|
+
const dateString: string = date.getFullYear() + '-' + pad(date.getMonth() + 1) +
|
|
89
|
+
'-' + pad(date.getDate()) + '_' + pad(date.getHours()) + '-' +
|
|
90
|
+
pad(date.getMinutes()) + '-' + pad(date.getSeconds());
|
|
91
|
+
|
|
92
|
+
download(`SequenceTranslator-${dateString}.sdf`, encodeURIComponent(result));
|
|
93
|
+
}
|
|
94
|
+
}
|