@datagrok/sequence-translator 1.0.16 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +4 -3
- package/CHANGELOG.md +3 -0
- package/detectors.js +8 -28
- package/dist/package-test.js +2 -72987
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -72192
- package/dist/package.js.map +1 -0
- package/files/axolabs-style.json +97 -0
- package/files/codes-to-symbols.json +66 -0
- package/files/formats-to-helm.json +59 -0
- package/files/linkers.json +22 -0
- package/files/monomer-lib.json +1094 -0
- package/link-bio +7 -0
- package/package.json +30 -26
- package/scripts/build-monomer-lib.py +391 -122
- package/src/demo/demo-st-ui.ts +71 -0
- package/src/demo/handle-error.ts +12 -0
- package/src/model/axolabs/axolabs-tab.ts +111 -0
- package/src/model/axolabs/const.ts +33 -0
- package/src/{axolabs → model/axolabs}/draw-svg.ts +1 -1
- package/src/{axolabs → model/axolabs}/helpers.ts +7 -5
- package/src/model/const.ts +19 -0
- package/src/model/data-loading-utils/const.ts +8 -0
- package/src/model/data-loading-utils/json-loader.ts +38 -0
- package/src/model/data-loading-utils/types.ts +30 -0
- package/src/model/format-translation/const.ts +8 -0
- package/src/model/format-translation/conversion-utils.ts +48 -0
- package/src/model/format-translation/format-converter.ts +107 -0
- package/src/model/helpers.ts +12 -0
- package/src/model/monomer-lib/const.ts +3 -0
- package/src/model/monomer-lib/lib-wrapper.ts +106 -0
- package/src/model/parsing-validation/format-detector.ts +57 -0
- package/src/model/parsing-validation/sequence-validator.ts +52 -0
- package/src/model/sequence-to-structure-utils/const.ts +1 -0
- package/src/{structures-works → model/sequence-to-structure-utils}/mol-transformations.ts +61 -87
- package/src/model/sequence-to-structure-utils/monomer-code-parser.ts +92 -0
- package/src/model/sequence-to-structure-utils/sdf-tab.ts +94 -0
- package/src/model/sequence-to-structure-utils/sequence-to-molfile.ts +409 -0
- package/src/package.ts +106 -77
- package/src/tests/const.ts +17 -0
- package/src/tests/smiles-tests.ts +32 -457
- package/src/view/const/main-tab.ts +3 -0
- package/src/view/const/view.ts +10 -0
- package/src/view/css/axolabs-tab.css +1 -0
- package/src/view/css/colored-text-input.css +27 -0
- package/src/view/css/main-tab.css +46 -0
- package/src/view/css/sdf-tab.css +39 -0
- package/src/view/monomer-lib-viewer/viewer.ts +22 -0
- package/src/view/tabs/axolabs.ts +720 -0
- package/src/view/tabs/main.ts +174 -0
- package/src/view/tabs/sdf.ts +173 -0
- package/src/view/utils/app-info-dialog.ts +18 -0
- package/src/view/utils/colored-input/colored-text-input.ts +56 -0
- package/src/view/utils/colored-input/input-painters.ts +44 -0
- package/src/view/utils/draw-molecule.ts +86 -0
- package/src/view/utils/molecule-img.ts +106 -0
- package/src/view/view.ts +129 -0
- package/tsconfig.json +12 -18
- package/webpack.config.js +17 -4
- package/README.md +0 -84
- package/css/style.css +0 -18
- package/img/Sequence Translator Axolabs.png +0 -0
- package/jest.config.js +0 -33
- package/setup-unlink-clean.cmd +0 -14
- package/setup.cmd +0 -14
- package/setup.sh +0 -37
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -97
- package/src/apps/oligo-sd-file-app.ts +0 -58
- package/src/autostart/ICDs.ts +0 -3
- package/src/autostart/IDPs.ts +0 -3
- package/src/autostart/calculations.ts +0 -40
- package/src/autostart/constants.ts +0 -37
- package/src/autostart/registration.ts +0 -241
- package/src/autostart/salts.ts +0 -2
- package/src/autostart/sources.ts +0 -3
- package/src/autostart/users.ts +0 -3
- package/src/axolabs/constants.ts +0 -101
- package/src/axolabs/define-pattern.ts +0 -873
- package/src/helpers.ts +0 -28
- package/src/main/main-view.ts +0 -262
- package/src/structures-works/const.ts +0 -5
- package/src/structures-works/converters.ts +0 -323
- package/src/structures-works/from-monomers.ts +0 -267
- package/src/structures-works/map.ts +0 -720
- package/src/structures-works/save-sense-antisense.ts +0 -91
- package/src/structures-works/sequence-codes-tools.ts +0 -344
- package/src/utils/parse.ts +0 -27
- package/src/utils/sdf-add-columns.ts +0 -118
- package/src/utils/sdf-save-table.ts +0 -56
- package/test-SequenceTranslator-6288c2fbe346-cce4ac1d.html +0 -259
- package/vendors/openchemlib-full.js +0 -293
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
import * as grok from 'datagrok-api/grok';
|
|
2
|
-
import * as ui from 'datagrok-api/ui';
|
|
3
|
-
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
|
|
5
|
-
import {download} from '../helpers';
|
|
6
|
-
import {sequenceToMolV3000} from '../structures-works/from-monomers';
|
|
7
|
-
import {linkStrandsV3000} from '../structures-works/mol-transformations';
|
|
8
|
-
import {getFormat} from '../structures-works/sequence-codes-tools';
|
|
9
|
-
|
|
10
|
-
export function saveSdf(as: string, ss: string,
|
|
11
|
-
oneEntity: boolean, useChirality: boolean,
|
|
12
|
-
invertSS: boolean, invertAS: boolean,
|
|
13
|
-
as2: string | null = null, invertAS2: boolean | null) {
|
|
14
|
-
const formatAs = getFormat(as);
|
|
15
|
-
const formatSs = getFormat(ss);
|
|
16
|
-
let formatAs2: string | null = null;
|
|
17
|
-
let molAS2: string | null = null;
|
|
18
|
-
|
|
19
|
-
const molSS = sequenceToMolV3000(ss, invertSS, false, formatSs!);
|
|
20
|
-
const molAS = sequenceToMolV3000(as, invertAS, false, formatAs!);
|
|
21
|
-
|
|
22
|
-
if (as2 != null && as2 != '') {
|
|
23
|
-
formatAs2 = getFormat(as2!);
|
|
24
|
-
molAS2 = sequenceToMolV3000(as2, invertAS2!, false, formatAs2!);
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
let result: string;
|
|
28
|
-
if (oneEntity) {
|
|
29
|
-
const antiStrands = molAS2 == null ? [molAS] : [molAS, molAS2];
|
|
30
|
-
result = linkStrandsV3000({senseStrands: [molSS], antiStrands: antiStrands}, useChirality) + '\n$$$$\n';
|
|
31
|
-
|
|
32
|
-
} else {
|
|
33
|
-
result =
|
|
34
|
-
molSS + '\n' +
|
|
35
|
-
`> <Sequence>\nSense Strand\n$$$$\n` +
|
|
36
|
-
molAS + '\n' +
|
|
37
|
-
`> <Sequence>\nAnti Sense\n$$$$\n`;
|
|
38
|
-
|
|
39
|
-
if (molAS2)
|
|
40
|
-
result += molAS2+ '\n' +
|
|
41
|
-
`> <Sequence>\nAnti Sense 2\n$$$$\n`;
|
|
42
|
-
}
|
|
43
|
-
download(ss.replace(/\s/g, '') + '.sdf', encodeURIComponent(result));
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
export function saveSenseAntiSense() {
|
|
47
|
-
const moleculeSvgDiv = ui.block([]);
|
|
48
|
-
const ssInput = ui.textInput('Sense Strand', '');
|
|
49
|
-
const asInput = ui.textInput('Anti Sense', '');
|
|
50
|
-
const asInput2 = ui.textInput('Anti Sense 2', '');
|
|
51
|
-
const straight = "5 prime -> 3 prime";
|
|
52
|
-
const inverse = "3 prime -> 5 prime";
|
|
53
|
-
let ssInverse = false;
|
|
54
|
-
let asInverse = false;
|
|
55
|
-
let as2Inverse = false;
|
|
56
|
-
|
|
57
|
-
const changeSense = ui.choiceInput('SS direction', straight, [straight, inverse]);
|
|
58
|
-
changeSense.onChanged(() => {ssInverse = changeSense.value == inverse;});
|
|
59
|
-
const changeAntiSense = ui.choiceInput('AS direction', straight, [straight, inverse]);
|
|
60
|
-
changeAntiSense.onChanged(() => {asInverse = changeAntiSense.value == inverse;});
|
|
61
|
-
const changeAntiSense2 = ui.choiceInput('AS 2 direction', straight, [straight, inverse]);
|
|
62
|
-
changeAntiSense2.onChanged(() => {asInverse = changeAntiSense.value == inverse;});
|
|
63
|
-
|
|
64
|
-
const saveOption = ui.switchInput('Save as one entity', true);
|
|
65
|
-
const chirality = ui.switchInput('Use chiral', true);
|
|
66
|
-
const saveBtn = ui.button('Save SDF', () =>
|
|
67
|
-
saveSdf(asInput.value, ssInput.value, saveOption.value, chirality.value, ssInverse, asInverse, asInput2.value, as2Inverse));
|
|
68
|
-
|
|
69
|
-
const saveSection = ui.panel([
|
|
70
|
-
ui.div([
|
|
71
|
-
ui.div([
|
|
72
|
-
ui.divH([ui.h1('Inputs')]),
|
|
73
|
-
ui.divV([
|
|
74
|
-
ssInput,
|
|
75
|
-
asInput,
|
|
76
|
-
asInput2,
|
|
77
|
-
ui.div([changeSense], {style: {width: '40'}}),
|
|
78
|
-
changeSense,
|
|
79
|
-
changeAntiSense,
|
|
80
|
-
changeAntiSense2,
|
|
81
|
-
saveOption,
|
|
82
|
-
chirality,
|
|
83
|
-
ui.buttonsInput([saveBtn]),
|
|
84
|
-
], 'ui-form'),
|
|
85
|
-
], 'ui-form'),
|
|
86
|
-
], 'ui-form'),
|
|
87
|
-
moleculeSvgDiv,
|
|
88
|
-
]);
|
|
89
|
-
|
|
90
|
-
return saveSection;
|
|
91
|
-
}
|
|
@@ -1,344 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
import {map, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS, DELIMITER, gcrsCodesWithoutSmiles, NUCLEOTIDES} from './map';
|
|
3
|
-
import {sortByStringLengthInDescendingOrder} from '../helpers';
|
|
4
|
-
import {asoGapmersNucleotidesToBioSpring, asoGapmersNucleotidesToGcrs,
|
|
5
|
-
asoGapmersBioSpringToNucleotides, asoGapmersBioSpringToGcrs, gcrsToMermade12, siRnaNucleotideToBioSpringSenseStrand,
|
|
6
|
-
siRnaNucleotideToAxolabsSenseStrand, siRnaNucleotidesToGcrs, siRnaBioSpringToNucleotides,
|
|
7
|
-
siRnaBioSpringToAxolabs, siRnaBioSpringToGcrs, siRnaAxolabsToNucleotides,
|
|
8
|
-
siRnaAxolabsToBioSpring, siRnaAxolabsToGcrs, siRnaGcrsToNucleotides,
|
|
9
|
-
siRnaGcrsToBioSpring, siRnaGcrsToAxolabs, gcrsToNucleotides, gcrsToLcms} from './converters';
|
|
10
|
-
|
|
11
|
-
const noTranslationTableAvailable = 'No translation table available';
|
|
12
|
-
export const undefinedInputSequence = 'Type of input sequence is undefined';
|
|
13
|
-
|
|
14
|
-
export function getFormat(sequence: string): string | null {
|
|
15
|
-
const possibleSynthesizers = getListOfPossibleSynthesizersByFirstMatchedCode(sequence);
|
|
16
|
-
|
|
17
|
-
if (possibleSynthesizers.length == 0)
|
|
18
|
-
return null;
|
|
19
|
-
|
|
20
|
-
let outputIndex = 0;
|
|
21
|
-
|
|
22
|
-
const firstUniqueCharacters = ['r', 'd'];
|
|
23
|
-
|
|
24
|
-
possibleSynthesizers.forEach((synthesizer) => {
|
|
25
|
-
const codes = getAllCodesOfSynthesizer(synthesizer);
|
|
26
|
-
while (outputIndex < sequence.length) {
|
|
27
|
-
const matchedCode = codes.find((c) => c == sequence.slice(outputIndex, outputIndex + c.length));
|
|
28
|
-
|
|
29
|
-
if (matchedCode == null)
|
|
30
|
-
break;
|
|
31
|
-
|
|
32
|
-
if ( // for mistake pattern 'rAA'
|
|
33
|
-
outputIndex > 1 &&
|
|
34
|
-
NUCLEOTIDES.includes(sequence[outputIndex]) &&
|
|
35
|
-
firstUniqueCharacters.includes(sequence[outputIndex - 2])
|
|
36
|
-
) break;
|
|
37
|
-
|
|
38
|
-
if ( // for mistake pattern 'ArA'
|
|
39
|
-
firstUniqueCharacters.includes(sequence[outputIndex + 1]) &&
|
|
40
|
-
NUCLEOTIDES.includes(sequence[outputIndex])
|
|
41
|
-
) {
|
|
42
|
-
outputIndex++;
|
|
43
|
-
break;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
outputIndex += matchedCode.length;
|
|
47
|
-
}
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
const indexOfFirstNotValidChar = (outputIndex == sequence.length) ? -1 : outputIndex;
|
|
51
|
-
if (indexOfFirstNotValidChar != -1)
|
|
52
|
-
return possibleSynthesizers[0];
|
|
53
|
-
|
|
54
|
-
const possibleTechnologies = getListOfPossibleTechnologiesByFirstMatchedCode(sequence, possibleSynthesizers[0]);
|
|
55
|
-
|
|
56
|
-
if (possibleTechnologies.length == 0)
|
|
57
|
-
return null;
|
|
58
|
-
|
|
59
|
-
outputIndex = 0;
|
|
60
|
-
|
|
61
|
-
possibleTechnologies.forEach((technology: string) => {
|
|
62
|
-
const codes = Object.keys(map[possibleSynthesizers[0]][technology]);
|
|
63
|
-
while (outputIndex < sequence.length) {
|
|
64
|
-
const matchedCode = codes.find((c) => c == sequence.slice(outputIndex, outputIndex + c.length));
|
|
65
|
-
|
|
66
|
-
if (matchedCode == null)
|
|
67
|
-
break;
|
|
68
|
-
|
|
69
|
-
if ( // for mistake pattern 'rAA'
|
|
70
|
-
outputIndex > 1 &&
|
|
71
|
-
NUCLEOTIDES.includes(sequence[outputIndex]) &&
|
|
72
|
-
firstUniqueCharacters.includes(sequence[outputIndex - 2])
|
|
73
|
-
) break;
|
|
74
|
-
|
|
75
|
-
if ( // for mistake pattern 'ArA'
|
|
76
|
-
firstUniqueCharacters.includes(sequence[outputIndex + 1]) &&
|
|
77
|
-
NUCLEOTIDES.includes(sequence[outputIndex])
|
|
78
|
-
) {
|
|
79
|
-
outputIndex++;
|
|
80
|
-
break;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
outputIndex += matchedCode.length;
|
|
84
|
-
}
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
return possibleSynthesizers[0];
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
export function isValidSequence(sequence: string, format: string | null): {
|
|
92
|
-
indexOfFirstNotValidChar: number,
|
|
93
|
-
synthesizer: string[] | null,
|
|
94
|
-
// technology: string[] | null
|
|
95
|
-
} {
|
|
96
|
-
const possibleSynthesizers = format == null ?
|
|
97
|
-
getListOfPossibleSynthesizersByFirstMatchedCode(sequence) :
|
|
98
|
-
[format];
|
|
99
|
-
|
|
100
|
-
// if (possibleSynthesizers.length > 1) {
|
|
101
|
-
// const synthesizer = ui.choiceInput('Choose synthesizer from list: ', possibleSynthesizers[0],
|
|
102
|
-
// possibleSynthesizers);
|
|
103
|
-
// ui.dialog('Choose Synthesizer')
|
|
104
|
-
// .add(ui.panel([synthesizer.root], {style: {fontWeight: 'bold'}}))
|
|
105
|
-
// .onOK(() => possibleSynthesizers = [synthesizer.value])
|
|
106
|
-
// .onCancel(() => {
|
|
107
|
-
// possibleSynthesizers = [possibleSynthesizers[0]];
|
|
108
|
-
// grok.shell.warning('Input sequence is expected to be in format ' + possibleSynthesizers[0]);
|
|
109
|
-
// })
|
|
110
|
-
// .show();
|
|
111
|
-
// } else if (possibleSynthesizers.length == 0)
|
|
112
|
-
if (possibleSynthesizers.length == 0)
|
|
113
|
-
return {indexOfFirstNotValidChar: 0, synthesizer: null};//, technology: null};
|
|
114
|
-
|
|
115
|
-
const outputIndices = Array(possibleSynthesizers.length).fill(0);
|
|
116
|
-
|
|
117
|
-
const firstUniqueCharacters = ['r', 'd'];
|
|
118
|
-
possibleSynthesizers.forEach(function(synthesizer, i) {
|
|
119
|
-
const codes = sortByStringLengthInDescendingOrder(getAllCodesOfSynthesizer(synthesizer));
|
|
120
|
-
while (outputIndices[i] < sequence.length) {
|
|
121
|
-
const matchedCode = codes.find((c) => c == sequence.slice(outputIndices[i], outputIndices[i] + c.length));
|
|
122
|
-
|
|
123
|
-
if (matchedCode == null)
|
|
124
|
-
break;
|
|
125
|
-
|
|
126
|
-
if ( // for mistake pattern 'rAA'
|
|
127
|
-
outputIndices[i] > 1 &&
|
|
128
|
-
NUCLEOTIDES.includes(sequence[outputIndices[i]]) &&
|
|
129
|
-
firstUniqueCharacters.includes(sequence[outputIndices[i] - 2])
|
|
130
|
-
) break;
|
|
131
|
-
|
|
132
|
-
if ( // for mistake pattern 'ArA'
|
|
133
|
-
firstUniqueCharacters.includes(sequence[outputIndices[i] + 1]) &&
|
|
134
|
-
NUCLEOTIDES.includes(sequence[outputIndices[i]])
|
|
135
|
-
) {
|
|
136
|
-
outputIndices[i]++;
|
|
137
|
-
break;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
outputIndices[i] += matchedCode.length;
|
|
141
|
-
}
|
|
142
|
-
});
|
|
143
|
-
|
|
144
|
-
const outputIndex = Math.max(...outputIndices);
|
|
145
|
-
const synthesizer = possibleSynthesizers[outputIndices.indexOf(outputIndex)];
|
|
146
|
-
const indexOfFirstNotValidChar = (outputIndex == sequence.length) ? -1 : outputIndex;
|
|
147
|
-
if (indexOfFirstNotValidChar != -1) {
|
|
148
|
-
return {
|
|
149
|
-
indexOfFirstNotValidChar: indexOfFirstNotValidChar,
|
|
150
|
-
synthesizer: [synthesizer],
|
|
151
|
-
// technology: null,
|
|
152
|
-
};
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
// const possibleTechnologies =
|
|
156
|
-
// getListOfPossibleTechnologiesByFirstMatchedCode(sequence, possibleSynthesizers[outputIndex]);
|
|
157
|
-
|
|
158
|
-
// if (possibleTechnologies.length > 1) {
|
|
159
|
-
// const technology = ui.choiceInput('Choose technology from list: ', possibleTechnologies[0],
|
|
160
|
-
// possibleTechnologies);
|
|
161
|
-
// ui.dialog('Choose Technology')
|
|
162
|
-
// .add(ui.panel([technology.root], {style: {fontWeight: 'bold'}}))
|
|
163
|
-
// .onOK(() => possibleTechnologies = [technology.value])
|
|
164
|
-
// .onCancel(() => {
|
|
165
|
-
// possibleTechnologies = [possibleTechnologies[0]];
|
|
166
|
-
// grok.shell.warning('Input sequence is expected to be in format ' + possibleTechnologies[0]);
|
|
167
|
-
// })
|
|
168
|
-
// .show();
|
|
169
|
-
// } else if (possibleTechnologies.length == 0)
|
|
170
|
-
// if (possibleTechnologies.length == 0)
|
|
171
|
-
// return {indexOfFirstNotValidChar: 0, synthesizer: [possibleSynthesizers[3]], technology: null};
|
|
172
|
-
|
|
173
|
-
// outputIndex = 0;
|
|
174
|
-
|
|
175
|
-
// possibleTechnologies.forEach((technology: string) => {
|
|
176
|
-
// const codes = Object.keys(map[possibleSynthesizers[0]][technology]);
|
|
177
|
-
// while (outputIndex < sequence.length) {
|
|
178
|
-
// const matchedCode = codes.find((c) => c == sequence.slice(outputIndex, outputIndex + c.length));
|
|
179
|
-
|
|
180
|
-
// if (matchedCode == null)
|
|
181
|
-
// break;
|
|
182
|
-
|
|
183
|
-
// if ( // for mistake pattern 'rAA'
|
|
184
|
-
// outputIndex > 1 &&
|
|
185
|
-
// nucleotides.includes(sequence[outputIndex]) &&
|
|
186
|
-
// firstUniqueCharacters.includes(sequence[outputIndex - 2])
|
|
187
|
-
// ) break;
|
|
188
|
-
|
|
189
|
-
// if ( // for mistake pattern 'ArA'
|
|
190
|
-
// firstUniqueCharacters.includes(sequence[outputIndex + 1]) &&
|
|
191
|
-
// nucleotides.includes(sequence[outputIndex])
|
|
192
|
-
// ) {
|
|
193
|
-
// outputIndex++;
|
|
194
|
-
// break;
|
|
195
|
-
// }
|
|
196
|
-
|
|
197
|
-
// outputIndex += matchedCode.length;
|
|
198
|
-
// }
|
|
199
|
-
// });
|
|
200
|
-
|
|
201
|
-
return {
|
|
202
|
-
indexOfFirstNotValidChar: indexOfFirstNotValidChar,
|
|
203
|
-
synthesizer: [synthesizer],
|
|
204
|
-
// technology: [possibleTechnologies[0]],
|
|
205
|
-
};
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
export function getAllCodesOfSynthesizer(synthesizer: string): string[] {
|
|
209
|
-
let codes: string[] = [];
|
|
210
|
-
for (const technology of Object.keys(map[synthesizer]))
|
|
211
|
-
codes = codes.concat(Object.keys(map[synthesizer][technology]));
|
|
212
|
-
return codes.concat(Object.keys(MODIFICATIONS)).concat(DELIMITER);
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
function getListOfPossibleSynthesizersByFirstMatchedCode(sequence: string): string[] {
|
|
216
|
-
let synthesizers: string[] = [];
|
|
217
|
-
Object.keys(map).forEach((synthesizer: string) => {
|
|
218
|
-
let codes = sortByStringLengthInDescendingOrder(getAllCodesOfSynthesizer(synthesizer));
|
|
219
|
-
if (synthesizer == 'Janssen GCRS Codes')
|
|
220
|
-
codes = codes.concat(gcrsCodesWithoutSmiles);
|
|
221
|
-
//TODO: get first non-dropdown code when there are two modifications
|
|
222
|
-
let start = 0;
|
|
223
|
-
for (let i = 0; i < sequence.length; i++) {
|
|
224
|
-
if (sequence[i] == ')' && i != sequence.length - 1) {
|
|
225
|
-
start = i + 1;
|
|
226
|
-
break;
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
if (gcrsCodesWithoutSmiles.some((s: string) => s == sequence.slice(start, start + s.length)))
|
|
230
|
-
synthesizers = ['Janssen GCRS Codes'];
|
|
231
|
-
if (codes.some((s: string) => s == sequence.slice(start, start + s.length)))
|
|
232
|
-
synthesizers.push(synthesizer);
|
|
233
|
-
});
|
|
234
|
-
return synthesizers;
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
function getListOfPossibleTechnologiesByFirstMatchedCode(sequence: string, synthesizer: string): string[] {
|
|
238
|
-
const technologies: string[] = [];
|
|
239
|
-
Object.keys(map[synthesizer]).forEach((technology: string) => {
|
|
240
|
-
const codes = Object.keys(map[synthesizer][technology]).concat(Object.keys(MODIFICATIONS));
|
|
241
|
-
if (codes.some((s) => s == sequence.slice(0, s.length)))
|
|
242
|
-
technologies.push(technology);
|
|
243
|
-
});
|
|
244
|
-
return technologies;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
export function convertSequence(sequence: string, output: {
|
|
248
|
-
indexOfFirstNotValidChar: number, synthesizer: string[] | null}) {//, technology: string[] | null}) {
|
|
249
|
-
if (output.indexOfFirstNotValidChar != -1) {
|
|
250
|
-
return {
|
|
251
|
-
// type: '',
|
|
252
|
-
indexOfFirstNotValidChar: JSON.stringify(output),
|
|
253
|
-
Error: undefinedInputSequence,
|
|
254
|
-
};
|
|
255
|
-
}
|
|
256
|
-
if (output.synthesizer!.includes(SYNTHESIZERS.RAW_NUCLEOTIDES)) {//&& output.technology!.includes(TECHNOLOGIES.DNA)) {
|
|
257
|
-
return {
|
|
258
|
-
type: SYNTHESIZERS.RAW_NUCLEOTIDES, // + ' ' + TECHNOLOGIES.DNA,
|
|
259
|
-
Nucleotides: sequence,
|
|
260
|
-
BioSpring: asoGapmersNucleotidesToBioSpring(sequence),
|
|
261
|
-
GCRS: asoGapmersNucleotidesToGcrs(sequence),
|
|
262
|
-
};
|
|
263
|
-
}
|
|
264
|
-
if (output.synthesizer!.includes(SYNTHESIZERS.BIOSPRING)) {
|
|
265
|
-
// && output.technology!.includes(TECHNOLOGIES.ASO_GAPMERS)) {
|
|
266
|
-
return {
|
|
267
|
-
type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.ASO_GAPMERS,
|
|
268
|
-
Nucleotides: asoGapmersBioSpringToNucleotides(sequence),
|
|
269
|
-
BioSpring: sequence,
|
|
270
|
-
GCRS: asoGapmersBioSpringToGcrs(sequence),
|
|
271
|
-
};
|
|
272
|
-
}
|
|
273
|
-
if (output.synthesizer!.includes(SYNTHESIZERS.GCRS)) { // && output.technology!.includes(TECHNOLOGIES.ASO_GAPMERS)) {
|
|
274
|
-
return {
|
|
275
|
-
type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.ASO_GAPMERS,
|
|
276
|
-
Nucleotides: gcrsToNucleotides(sequence),
|
|
277
|
-
BioSpring: siRnaGcrsToBioSpring(sequence),
|
|
278
|
-
Axolabs: siRnaGcrsToAxolabs(sequence),
|
|
279
|
-
Mermade12: gcrsToMermade12(sequence),
|
|
280
|
-
GCRS: sequence,
|
|
281
|
-
LCMS: gcrsToLcms(sequence),
|
|
282
|
-
};
|
|
283
|
-
}
|
|
284
|
-
if (output.synthesizer!.includes(SYNTHESIZERS.RAW_NUCLEOTIDES)) {
|
|
285
|
-
// && output.technology!.includes(TECHNOLOGIES.RNA)) {
|
|
286
|
-
return {
|
|
287
|
-
type: SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.RNA,
|
|
288
|
-
Nucleotides: sequence,
|
|
289
|
-
BioSpring: siRnaNucleotideToBioSpringSenseStrand(sequence),
|
|
290
|
-
Axolabs: siRnaNucleotideToAxolabsSenseStrand(sequence),
|
|
291
|
-
GCRS: siRnaNucleotidesToGcrs(sequence),
|
|
292
|
-
};
|
|
293
|
-
}
|
|
294
|
-
if (output.synthesizer!.includes(SYNTHESIZERS.BIOSPRING)) { // && output.technology!.includes(TECHNOLOGIES.SI_RNA)) {
|
|
295
|
-
return {
|
|
296
|
-
type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.SI_RNA,
|
|
297
|
-
Nucleotides: siRnaBioSpringToNucleotides(sequence),
|
|
298
|
-
BioSpring: sequence,
|
|
299
|
-
Axolabs: siRnaBioSpringToAxolabs(sequence),
|
|
300
|
-
GCRS: siRnaBioSpringToGcrs(sequence),
|
|
301
|
-
};
|
|
302
|
-
}
|
|
303
|
-
if (output.synthesizer!.includes(SYNTHESIZERS.AXOLABS)) {
|
|
304
|
-
return {
|
|
305
|
-
type: SYNTHESIZERS.AXOLABS + ' ' + TECHNOLOGIES.SI_RNA,
|
|
306
|
-
Nucleotides: siRnaAxolabsToNucleotides(sequence),
|
|
307
|
-
BioSpring: siRnaAxolabsToBioSpring(sequence),
|
|
308
|
-
Axolabs: sequence,
|
|
309
|
-
GCRS: siRnaAxolabsToGcrs(sequence),
|
|
310
|
-
};
|
|
311
|
-
}
|
|
312
|
-
if (output.synthesizer!.includes(SYNTHESIZERS.GCRS)) { // && output.technology!.includes(TECHNOLOGIES.SI_RNA)) {
|
|
313
|
-
return {
|
|
314
|
-
type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.SI_RNA,
|
|
315
|
-
Nucleotides: siRnaGcrsToNucleotides(sequence),
|
|
316
|
-
BioSpring: siRnaGcrsToBioSpring(sequence),
|
|
317
|
-
Axolabs: siRnaGcrsToAxolabs(sequence),
|
|
318
|
-
MM12: gcrsToMermade12(sequence),
|
|
319
|
-
GCRS: sequence,
|
|
320
|
-
LCMS: gcrsToLcms(sequence),
|
|
321
|
-
};
|
|
322
|
-
}
|
|
323
|
-
if (output.synthesizer!.includes(SYNTHESIZERS.GCRS)) {
|
|
324
|
-
return {
|
|
325
|
-
type: SYNTHESIZERS.GCRS,
|
|
326
|
-
Nucleotides: gcrsToNucleotides(sequence),
|
|
327
|
-
GCRS: sequence,
|
|
328
|
-
Mermade12: gcrsToMermade12(sequence),
|
|
329
|
-
LCMS: gcrsToLcms(sequence),
|
|
330
|
-
};
|
|
331
|
-
}
|
|
332
|
-
if (output.synthesizer!.includes(SYNTHESIZERS.MERMADE_12)) {
|
|
333
|
-
return {
|
|
334
|
-
type: SYNTHESIZERS.MERMADE_12,
|
|
335
|
-
Nucleotides: noTranslationTableAvailable,
|
|
336
|
-
GCRS: noTranslationTableAvailable,
|
|
337
|
-
Mermade12: sequence,
|
|
338
|
-
};
|
|
339
|
-
}
|
|
340
|
-
return {
|
|
341
|
-
type: undefinedInputSequence,
|
|
342
|
-
Nucleotides: undefinedInputSequence,
|
|
343
|
-
};
|
|
344
|
-
}
|
package/src/utils/parse.ts
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
export const CELL_STRUCTURE = {
|
|
2
|
-
DUPLEX: {
|
|
3
|
-
BEFORE_SS: 'SS ',
|
|
4
|
-
BEFORE_AS: '\r\nAS ',
|
|
5
|
-
},
|
|
6
|
-
TRIPLEX_OR_DIMER: {
|
|
7
|
-
BEFORE_SS: 'SS ',
|
|
8
|
-
BEFORE_AS1: '\r\nAS1 ',
|
|
9
|
-
BEFORE_AS2: '\r\nAS2 ',
|
|
10
|
-
},
|
|
11
|
-
};
|
|
12
|
-
|
|
13
|
-
export function parseStrandsFromDuplexCell(s: string): { SS: string, AS: string } {
|
|
14
|
-
const arr = s
|
|
15
|
-
.slice(CELL_STRUCTURE.DUPLEX.BEFORE_SS.length)
|
|
16
|
-
.split(CELL_STRUCTURE.DUPLEX.BEFORE_AS);
|
|
17
|
-
return {SS: arr[0], AS: arr[1]};
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export function parseStrandsFromTriplexOrDimerCell(s: string): { SS: string, AS1: string, AS2: string } {
|
|
21
|
-
const arr1 = s
|
|
22
|
-
.slice(CELL_STRUCTURE.TRIPLEX_OR_DIMER.BEFORE_SS.length)
|
|
23
|
-
.split(CELL_STRUCTURE.TRIPLEX_OR_DIMER.BEFORE_AS1);
|
|
24
|
-
const arr2 = arr1[1]
|
|
25
|
-
.split(CELL_STRUCTURE.TRIPLEX_OR_DIMER.BEFORE_AS2);
|
|
26
|
-
return {SS: arr1[0], AS1: arr2[0], AS2: arr2[1]};
|
|
27
|
-
}
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import {COL_NAMES, GENERATED_COL_NAMES, SEQUENCE_TYPES} from '../autostart/constants';
|
|
3
|
-
import * as grok from 'datagrok-api/grok';
|
|
4
|
-
import {removeEmptyRows} from '../helpers';
|
|
5
|
-
import {parseStrandsFromDuplexCell, parseStrandsFromTriplexOrDimerCell} from './parse';
|
|
6
|
-
import {isValidSequence} from '../structures-works/sequence-codes-tools';
|
|
7
|
-
import {batchMolWeight, molecularWeight, saltMass, saltMolWeigth} from '../autostart/calculations';
|
|
8
|
-
import {weightsObj} from '../structures-works/map';
|
|
9
|
-
|
|
10
|
-
export class SdfColumnsExistsError extends Error {
|
|
11
|
-
constructor(message: string) {
|
|
12
|
-
super();
|
|
13
|
-
}
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export function sdfAddColumns(
|
|
17
|
-
df: DG.DataFrame, saltNamesList: string[], saltsMolWeightList: number[], onError: (rowI: number, err: any) => void
|
|
18
|
-
): DG.DataFrame {
|
|
19
|
-
const sequenceCol = df.getCol(COL_NAMES.SEQUENCE);
|
|
20
|
-
const saltCol = df.getCol(COL_NAMES.SALT);
|
|
21
|
-
const equivalentsCol = df.getCol(COL_NAMES.EQUIVALENTS);
|
|
22
|
-
const typeCol = df.getCol(COL_NAMES.TYPE);
|
|
23
|
-
const chemistryNameCol = df.getCol(COL_NAMES.CHEMISTRY_NAME);
|
|
24
|
-
|
|
25
|
-
if (GENERATED_COL_NAMES.some((colName) => df.columns.contains(colName)))
|
|
26
|
-
throw new SdfColumnsExistsError('Columns already exist');
|
|
27
|
-
|
|
28
|
-
df = removeEmptyRows(df, sequenceCol);
|
|
29
|
-
|
|
30
|
-
df.columns.addNewString(COL_NAMES.COMPOUND_NAME).init((i: number) => {
|
|
31
|
-
let res: string = '';
|
|
32
|
-
try {
|
|
33
|
-
res = ([SEQUENCE_TYPES.DUPLEX, SEQUENCE_TYPES.DIMER, SEQUENCE_TYPES.TRIPLEX].includes(typeCol.get(i))) ?
|
|
34
|
-
chemistryNameCol.get(i) :
|
|
35
|
-
sequenceCol.get(i);
|
|
36
|
-
} catch (err) {
|
|
37
|
-
onError(i, err);
|
|
38
|
-
}
|
|
39
|
-
return res;
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
df.columns.addNewString(COL_NAMES.COMPOUND_COMMENTS).init((i: number) => {
|
|
43
|
-
let res: string = '';
|
|
44
|
-
try {
|
|
45
|
-
if ([SEQUENCE_TYPES.SENSE_STRAND, SEQUENCE_TYPES.ANTISENSE_STRAND].includes(typeCol.get(i))) {
|
|
46
|
-
res = sequenceCol.get(i);
|
|
47
|
-
} else if (typeCol.get(i) == SEQUENCE_TYPES.DUPLEX) {
|
|
48
|
-
const obj = parseStrandsFromDuplexCell(sequenceCol.get(i));
|
|
49
|
-
res = `${chemistryNameCol.get(i)}; duplex of SS: ${obj.SS} and AS: ${obj.AS}`;
|
|
50
|
-
} else if ([SEQUENCE_TYPES.DIMER, SEQUENCE_TYPES.TRIPLEX].includes(typeCol.get(i))) {
|
|
51
|
-
const obj = parseStrandsFromTriplexOrDimerCell(sequenceCol.get(i));
|
|
52
|
-
res = `${chemistryNameCol.get(i)}; duplex of SS: ${obj.SS} and AS1: ${obj.AS1} and AS2: ${obj.AS2}`;
|
|
53
|
-
}
|
|
54
|
-
} catch (err) {
|
|
55
|
-
onError(i, err);
|
|
56
|
-
}
|
|
57
|
-
return res;
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
df.columns.addNewFloat(COL_NAMES.COMPOUND_MOL_WEIGHT).init((i: number) => {
|
|
61
|
-
let res: number = Number.NaN;
|
|
62
|
-
try {
|
|
63
|
-
if ([SEQUENCE_TYPES.SENSE_STRAND, SEQUENCE_TYPES.ANTISENSE_STRAND].includes(typeCol.get(i))) {
|
|
64
|
-
res = (isValidSequence(sequenceCol.get(i), null).indexOfFirstNotValidChar == -1) ?
|
|
65
|
-
molecularWeight(sequenceCol.get(i), weightsObj) :
|
|
66
|
-
DG.FLOAT_NULL;
|
|
67
|
-
} else if (typeCol.get(i) == SEQUENCE_TYPES.DUPLEX) {
|
|
68
|
-
const obj = parseStrandsFromDuplexCell(sequenceCol.get(i));
|
|
69
|
-
res = (Object.values(obj).every((seq) => isValidSequence(seq, null).indexOfFirstNotValidChar == -1)) ?
|
|
70
|
-
molecularWeight(obj.SS, weightsObj) + molecularWeight(obj.AS, weightsObj) :
|
|
71
|
-
DG.FLOAT_NULL;
|
|
72
|
-
} else if ([SEQUENCE_TYPES.DIMER, SEQUENCE_TYPES.TRIPLEX].includes(typeCol.get(i))) {
|
|
73
|
-
const obj = parseStrandsFromTriplexOrDimerCell(sequenceCol.get(i));
|
|
74
|
-
res = (Object.values(obj).every((seq) => isValidSequence(seq, null).indexOfFirstNotValidChar == -1)) ?
|
|
75
|
-
molecularWeight(obj.SS, weightsObj) + molecularWeight(obj.AS1, weightsObj) +
|
|
76
|
-
molecularWeight(obj.AS2, weightsObj) :
|
|
77
|
-
DG.FLOAT_NULL;
|
|
78
|
-
}
|
|
79
|
-
} catch (err) {
|
|
80
|
-
onError(i, err);
|
|
81
|
-
}
|
|
82
|
-
return res;
|
|
83
|
-
});
|
|
84
|
-
|
|
85
|
-
df.columns.addNewFloat(COL_NAMES.SALT_MASS).init((i: number) => {
|
|
86
|
-
let res: number = Number.NaN;
|
|
87
|
-
try {
|
|
88
|
-
res = saltMass(saltNamesList, saltsMolWeightList, equivalentsCol, i, saltCol);
|
|
89
|
-
} catch (err) {
|
|
90
|
-
onError(i, err);
|
|
91
|
-
}
|
|
92
|
-
return res;
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
df.columns.addNewFloat(COL_NAMES.SALT_MOL_WEIGHT).init((i: number) => {
|
|
96
|
-
let res: number = Number.NaN;
|
|
97
|
-
try {
|
|
98
|
-
res = saltMolWeigth(saltNamesList, saltCol, saltsMolWeightList, i);
|
|
99
|
-
} catch (err) {
|
|
100
|
-
onError(i, err);
|
|
101
|
-
}
|
|
102
|
-
return res;
|
|
103
|
-
});
|
|
104
|
-
|
|
105
|
-
const compoundMolWeightCol = df.getCol(COL_NAMES.COMPOUND_MOL_WEIGHT);
|
|
106
|
-
const saltMassCol = df.getCol(COL_NAMES.SALT_MASS);
|
|
107
|
-
df.columns.addNewFloat(COL_NAMES.BATCH_MOL_WEIGHT).init((i: number) => {
|
|
108
|
-
let res: number = Number.NaN;
|
|
109
|
-
try {
|
|
110
|
-
res = batchMolWeight(compoundMolWeightCol, saltMassCol, i);
|
|
111
|
-
} catch (err) {
|
|
112
|
-
onError(i, err);
|
|
113
|
-
}
|
|
114
|
-
return res;
|
|
115
|
-
});
|
|
116
|
-
|
|
117
|
-
return df;
|
|
118
|
-
}
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import {COL_NAMES, GENERATED_COL_NAMES, SEQUENCE_TYPES} from '../autostart/constants';
|
|
3
|
-
import {differenceOfTwoArrays, download} from '../helpers';
|
|
4
|
-
import * as grok from 'datagrok-api/grok';
|
|
5
|
-
import {SYNTHESIZERS} from '../structures-works/map';
|
|
6
|
-
import {sequenceToMolV3000} from '../structures-works/from-monomers';
|
|
7
|
-
import {parseStrandsFromDuplexCell, parseStrandsFromTriplexOrDimerCell} from './parse';
|
|
8
|
-
import {linkStrandsV3000} from '../structures-works/mol-transformations';
|
|
9
|
-
|
|
10
|
-
export async function sdfSaveTable(table: DG.DataFrame, onError: (rowI: number, err: any) => void) {
|
|
11
|
-
if (GENERATED_COL_NAMES.some((colName) => !table.columns.contains(colName))) {
|
|
12
|
-
const absentColNames = differenceOfTwoArrays(GENERATED_COL_NAMES, table.columns.names()).join(`', '`);
|
|
13
|
-
grok.shell.warning(`File saved without columns '${absentColNames}'`);
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
const sequenceCol = table.getCol(COL_NAMES.SEQUENCE);
|
|
17
|
-
const typeCol = table.getCol(COL_NAMES.TYPE);
|
|
18
|
-
|
|
19
|
-
let resultStr = '';
|
|
20
|
-
const rowCount = table.rowCount;
|
|
21
|
-
for (let i = 0; i < rowCount; i++) {
|
|
22
|
-
try {
|
|
23
|
-
let rowStr = '';
|
|
24
|
-
const format = SYNTHESIZERS.GCRS; //getFormat(sequenceCol.get(i))!;
|
|
25
|
-
if (typeCol.get(i) == SEQUENCE_TYPES.SENSE_STRAND) {
|
|
26
|
-
rowStr += `${sequenceToMolV3000(sequenceCol.get(i), false, true, format)}\n> <Sequence>\nSense Strand\n\n`;
|
|
27
|
-
} else if (typeCol.get(i) == SEQUENCE_TYPES.ANTISENSE_STRAND) {
|
|
28
|
-
rowStr += `${sequenceToMolV3000(sequenceCol.get(i), true, true, format)}\n> <Sequence>\nAnti Sense\n\n`;
|
|
29
|
-
} else if (typeCol.get(i) == SEQUENCE_TYPES.DUPLEX) {
|
|
30
|
-
const obj = parseStrandsFromDuplexCell(sequenceCol.get(i));
|
|
31
|
-
const as = `${sequenceToMolV3000(obj.AS, true, true, format)}\n> <Sequence>\nAnti Sense\n\n`;
|
|
32
|
-
const ss = `${sequenceToMolV3000(obj.SS, false, true, format)}\n> <Sequence>\nSense Strand\n\n`;
|
|
33
|
-
rowStr += `${linkStrandsV3000({senseStrands: [ss], antiStrands: [as]}, true)}\n\n`;
|
|
34
|
-
} else if ([SEQUENCE_TYPES.TRIPLEX, SEQUENCE_TYPES.DIMER].includes(typeCol.get(i))) {
|
|
35
|
-
const obj = parseStrandsFromTriplexOrDimerCell(sequenceCol.get(i));
|
|
36
|
-
const as1 = `${sequenceToMolV3000(obj.AS1, true, true, format)}\n> <Sequence>\nAnti Sense\n\n`;
|
|
37
|
-
const as2 = `${sequenceToMolV3000(obj.AS2, true, true, format)}\n> <Sequence>\nAnti Sense\n\n`;
|
|
38
|
-
const ss = `${sequenceToMolV3000(obj.SS, false, true, format)}\n> <Sequence>\nSense Strand\n\n`;
|
|
39
|
-
rowStr += `${linkStrandsV3000({senseStrands: [ss], antiStrands: [as1, as2]}, true)}\n\n`;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
for (const col of table.columns) {
|
|
43
|
-
if (col.name != COL_NAMES.SEQUENCE)
|
|
44
|
-
rowStr += `> <${col.name}>\n${col.get(i)}\n\n`;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
rowStr += '$$$$\n';
|
|
48
|
-
|
|
49
|
-
resultStr += rowStr;
|
|
50
|
-
} catch (err: any) {
|
|
51
|
-
onError(i, err);
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
download(`${table.name}.sdf`, encodeURIComponent(resultStr));
|
|
56
|
-
}
|