@datagrok/sequence-translator 1.0.13 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +70674 -1436
- package/dist/package.js +69844 -4386
- package/package.json +27 -22
- package/scripts/build-monomer-lib.py +178 -0
- package/setup-unlink-clean.cmd +14 -0
- package/setup.cmd +14 -11
- package/setup.sh +37 -0
- package/src/autostart/constants.ts +12 -0
- package/src/autostart/registration.ts +18 -4
- package/src/axolabs/constants.ts +14 -14
- package/src/axolabs/define-pattern.ts +13 -12
- package/src/axolabs/draw-svg.ts +140 -201
- package/src/axolabs/helpers.ts +94 -0
- package/src/main/main-view.ts +90 -29
- package/src/package.ts +20 -2
- package/src/structures-works/const.ts +5 -0
- package/src/structures-works/converters.ts +29 -27
- package/src/structures-works/from-monomers.ts +187 -31
- package/src/structures-works/map.ts +6 -7
- package/src/structures-works/mol-transformations.ts +172 -622
- package/src/structures-works/save-sense-antisense.ts +6 -3
- package/src/structures-works/sequence-codes-tools.ts +8 -10
- package/{test-SequenceTranslator-62cc009524f3-4a9916b0.html → test-SequenceTranslator-91c83d8913ff-f94596bc.html} +10 -10
package/src/main/main-view.ts
CHANGED
|
@@ -1,21 +1,30 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import * as rxjs from 'rxjs';
|
|
4
6
|
import {convertSequence, undefinedInputSequence, isValidSequence} from '../structures-works/sequence-codes-tools';
|
|
5
|
-
import {map
|
|
7
|
+
import {map} from '../structures-works/map';
|
|
8
|
+
import {MODIFICATIONS} from '../structures-works/const';
|
|
6
9
|
import {sequenceToSmiles, sequenceToMolV3000} from '../structures-works/from-monomers';
|
|
7
10
|
import $ from 'cash-dom';
|
|
8
11
|
import {download} from '../helpers';
|
|
12
|
+
import {extractAtomDataV3000} from '../structures-works/mol-transformations';
|
|
13
|
+
import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
|
|
9
14
|
|
|
10
|
-
const defaultInput = 'fAmCmGmAmCpsmU';
|
|
11
|
-
const sequenceWasCopied = 'Copied';
|
|
15
|
+
const defaultInput = 'fAmCmGmAmCpsmU'; // todo: capitalize constants
|
|
16
|
+
const sequenceWasCopied = 'Copied'; // todo: wrap hardcoded literals into constants
|
|
12
17
|
const tooltipSequence = 'Copy sequence';
|
|
13
18
|
|
|
14
|
-
export function mainView() {
|
|
15
|
-
|
|
19
|
+
export async function mainView(): Promise<HTMLDivElement> {
|
|
20
|
+
const onInput: rxjs.Subject<string> = new rxjs.Subject<string>();
|
|
21
|
+
|
|
22
|
+
async function updateTableAndMolecule(sequence: string, inputFormat: string): Promise<void> {
|
|
16
23
|
moleculeSvgDiv.innerHTML = '';
|
|
17
24
|
outputTableDiv.innerHTML = '';
|
|
18
25
|
const pi = DG.TaskBarProgressIndicator.create('Rendering table and molecule...');
|
|
26
|
+
let errorsExist = false;
|
|
27
|
+
|
|
19
28
|
try {
|
|
20
29
|
sequence = sequence.replace(/\s/g, '');
|
|
21
30
|
const output = isValidSequence(sequence, null);
|
|
@@ -52,24 +61,71 @@ export function mainView() {
|
|
|
52
61
|
);
|
|
53
62
|
|
|
54
63
|
if (outputSequenceObj.type != undefinedInputSequence && outputSequenceObj.Error != undefinedInputSequence) {
|
|
55
|
-
const
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
64
|
+
const formCanvasWidth = 500;
|
|
65
|
+
const formCanvasHeight = 170;
|
|
66
|
+
const formCanvas = ui.canvas(
|
|
67
|
+
formCanvasWidth * window.devicePixelRatio, formCanvasHeight * window.devicePixelRatio);
|
|
68
|
+
formCanvas.style.width = `${formCanvasWidth}px`;
|
|
69
|
+
formCanvas.style.height = `${formCanvasHeight}px`;
|
|
70
|
+
|
|
71
|
+
formCanvas.addEventListener('click', async () => {
|
|
72
|
+
try {
|
|
73
|
+
const mol = sequenceToMolV3000(
|
|
74
|
+
inputSequenceField.value.replace(/\s/g, ''), false, true,
|
|
75
|
+
output.synthesizer![0],
|
|
76
|
+
);
|
|
77
|
+
console.log(mol);
|
|
78
|
+
|
|
79
|
+
const addDiv = ui.div([], {style: {overflowX: 'scroll'}});
|
|
80
|
+
|
|
81
|
+
// addDiv size required, but now available before dialog show()
|
|
82
|
+
const coordinates = extractAtomDataV3000(mol);
|
|
83
|
+
const cw: number = $(window).width() * 0.80; // addDiv.clientWidth
|
|
84
|
+
const ch: number = $(window).height() * 0.70; // addDiv.clientHeight
|
|
85
|
+
const molWidth: number = Math.max(...coordinates.x) - Math.min(...coordinates.x);
|
|
86
|
+
const molHeight: number = Math.max(...coordinates.y) - Math.min(...coordinates.y);
|
|
87
|
+
|
|
88
|
+
const wR: number = cw / molWidth;
|
|
89
|
+
const hR: number = ch / molHeight;
|
|
90
|
+
const r: number = hR; // Math.max(wR, hR);
|
|
91
|
+
const dlgCanvasWidth = r * molWidth;
|
|
92
|
+
const dlgCanvasHeight = r * molHeight;
|
|
93
|
+
|
|
94
|
+
const dlgCanvas = ui.canvas(dlgCanvasWidth * window.devicePixelRatio, dlgCanvasHeight * window.devicePixelRatio);
|
|
95
|
+
dlgCanvas.style.width = `${dlgCanvasWidth}px`;
|
|
96
|
+
dlgCanvas.style.height = `${dlgCanvasHeight}px`;
|
|
97
|
+
|
|
98
|
+
// // @ts-ignore
|
|
99
|
+
// OCL.StructureView.drawMolecule(dlgCanvas, OCL.Molecule.fromMolfile(mol), {suppressChiralText: true});
|
|
100
|
+
// await grok.chem.canvasMol(0, 0, dlgCanvas.width, dlgCanvas.height, dlgCanvas, mol, null,
|
|
101
|
+
// {setNewCoords: false, normalizeDepiction: false, straightenDepiction: false});
|
|
102
|
+
await grok.functions.call('Chem:canvasMol', {
|
|
103
|
+
x: 0, y: 0, w: dlgCanvas.width, h: dlgCanvas.height, canvas: dlgCanvas,
|
|
104
|
+
molString: mol, scaffoldMolString: '',
|
|
105
|
+
options: {setNewCoords: false, normalizeDepiction: false, straightenDepiction: false}
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
addDiv.appendChild(dlgCanvas);
|
|
109
|
+
ui.dialog('Molecule: ' + inputSequenceField.value)
|
|
110
|
+
.add(addDiv)
|
|
111
|
+
.showModal(true);
|
|
112
|
+
} catch (err) {
|
|
113
|
+
const errStr = errorToConsole(err);
|
|
114
|
+
console.error(errStr);
|
|
115
|
+
}
|
|
65
116
|
});
|
|
66
|
-
$(
|
|
67
|
-
$(
|
|
117
|
+
$(formCanvas).on('mouseover', () => $(formCanvas).css('cursor', 'zoom-in'));
|
|
118
|
+
$(formCanvas).on('mouseout', () => $(formCanvas).css('cursor', 'default'));
|
|
68
119
|
const mol = sequenceToMolV3000(inputSequenceField.value.replace(/\s/g, ''), false, true,
|
|
69
|
-
|
|
70
|
-
// @ts-ignore
|
|
71
|
-
OCL.StructureView.drawMolecule(
|
|
72
|
-
|
|
120
|
+
output.synthesizer![0]);
|
|
121
|
+
// // @ts-ignore
|
|
122
|
+
// OCL.StructureView.drawMolecule(formCanvas, OCL.Molecule.fromMolfile(mol), {suppressChiralText: true});
|
|
123
|
+
await grok.functions.call('Chem:canvasMol', {
|
|
124
|
+
x: 0, y: 0, w: formCanvas.width, h: formCanvas.height, canvas: formCanvas,
|
|
125
|
+
molString: mol, scaffoldMolString: '',
|
|
126
|
+
options: {setNewCoords: false, normalizeDepiction: false, straightenDepiction: false}
|
|
127
|
+
});
|
|
128
|
+
moleculeSvgDiv.append(formCanvas);
|
|
73
129
|
} else
|
|
74
130
|
moleculeSvgDiv.innerHTML = '';
|
|
75
131
|
} finally {
|
|
@@ -84,6 +140,11 @@ export function mainView() {
|
|
|
84
140
|
const moleculeSvgDiv = ui.block([]);
|
|
85
141
|
const outputTableDiv = ui.div([]);
|
|
86
142
|
const inputSequenceField = ui.textInput('', defaultInput, (sequence: string) => {
|
|
143
|
+
// Send event to DG.debounce()
|
|
144
|
+
onInput.next(sequence);
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
DG.debounce<string>(onInput, 300).subscribe((sequence) => {
|
|
87
148
|
updateTableAndMolecule(sequence, inputFormatChoiceInput.value!);
|
|
88
149
|
});
|
|
89
150
|
|
|
@@ -121,9 +182,9 @@ export function mainView() {
|
|
|
121
182
|
);
|
|
122
183
|
|
|
123
184
|
const overhangModificationsGrid = DG.Viewer.grid(
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
185
|
+
DG.DataFrame.fromColumns([
|
|
186
|
+
DG.Column.fromStrings('Name', Object.keys(MODIFICATIONS)),
|
|
187
|
+
])!, {showRowHeader: false, showCellTooltip: false, allowEdit: false},
|
|
127
188
|
);
|
|
128
189
|
updateTableAndMolecule(defaultInput, inputFormatChoiceInput.value!);
|
|
129
190
|
|
|
@@ -151,9 +212,10 @@ export function mainView() {
|
|
|
151
212
|
$(codesTablesDiv).hide(),
|
|
152
213
|
);
|
|
153
214
|
|
|
154
|
-
const downloadMolFileIcon = ui.iconFA('download', () => {
|
|
215
|
+
const downloadMolFileIcon = ui.iconFA('download', async () => {
|
|
155
216
|
const clearSequence = inputSequenceField.value.replace(/\s/g, '');
|
|
156
|
-
const result = sequenceToMolV3000(
|
|
217
|
+
const result = sequenceToMolV3000(inputSequenceField.value.replace(/\s/g, ''), false, false,
|
|
218
|
+
inputFormatChoiceInput.value!);
|
|
157
219
|
download(clearSequence + '.mol', encodeURIComponent(result));
|
|
158
220
|
}, 'Save .mol file');
|
|
159
221
|
|
|
@@ -183,9 +245,8 @@ export function mainView() {
|
|
|
183
245
|
appMainDescription,
|
|
184
246
|
ui.div([
|
|
185
247
|
ui.h1('Input sequence'),
|
|
186
|
-
ui.div([
|
|
187
|
-
|
|
188
|
-
], 'input-base'),
|
|
248
|
+
ui.div([], 'input-base'),
|
|
249
|
+
inputSequenceField.root,
|
|
189
250
|
], 'inputSequence'),
|
|
190
251
|
ui.div([inputFormatChoiceInput], {style: {padding: '5px 0'}}),
|
|
191
252
|
ui.block([
|
package/src/package.ts
CHANGED
|
@@ -5,13 +5,31 @@ import {autostartOligoSdFileSubscription} from './autostart/registration';
|
|
|
5
5
|
import {defineAxolabsPattern} from './axolabs/define-pattern';
|
|
6
6
|
import {saveSenseAntiSense} from './structures-works/save-sense-antisense';
|
|
7
7
|
import {mainView} from './main/main-view';
|
|
8
|
+
import {IMonomerLib, MonomerWorks, readLibrary} from '@datagrok-libraries/bio';
|
|
8
9
|
|
|
9
10
|
export const _package = new DG.Package();
|
|
10
11
|
|
|
12
|
+
const LIB_PATH = 'System:AppData/SequenceTranslator';
|
|
13
|
+
|
|
14
|
+
let monomerLib: IMonomerLib | null = null;
|
|
15
|
+
export let monomerWorks: MonomerWorks | null = null;
|
|
16
|
+
|
|
17
|
+
export function getMonomerWorks() {
|
|
18
|
+
return monomerWorks;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function getMonomerLib() {
|
|
22
|
+
return monomerLib;
|
|
23
|
+
}
|
|
11
24
|
|
|
12
25
|
//name: Sequence Translator
|
|
13
26
|
//tags: app
|
|
14
|
-
export function sequenceTranslator(): void {
|
|
27
|
+
export async function sequenceTranslator(): Promise<void> {
|
|
28
|
+
monomerLib = await readLibrary(LIB_PATH, 'helmLib.json');
|
|
29
|
+
|
|
30
|
+
if (monomerWorks == null)
|
|
31
|
+
monomerWorks = new MonomerWorks(monomerLib);
|
|
32
|
+
|
|
15
33
|
const windows = grok.shell.windows;
|
|
16
34
|
windows.showProperties = false;
|
|
17
35
|
windows.showToolbox = false;
|
|
@@ -20,7 +38,7 @@ export function sequenceTranslator(): void {
|
|
|
20
38
|
const v = grok.shell.newView('Sequence Translator', []);
|
|
21
39
|
v.box = true;
|
|
22
40
|
v.append(ui.tabControl({
|
|
23
|
-
'MAIN': mainView(),
|
|
41
|
+
'MAIN': await mainView(),
|
|
24
42
|
'AXOLABS': defineAxolabsPattern(),
|
|
25
43
|
'SDF': saveSenseAntiSense(),
|
|
26
44
|
}));
|
|
@@ -1,33 +1,35 @@
|
|
|
1
|
-
import {lcmsToGcrs} from './map';
|
|
1
|
+
import {lcmsToGcrs, MODIFICATIONS} from './map';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
|
-
import {
|
|
3
|
+
import {DELIMITER} from './map';
|
|
4
|
+
import {sortByStringLengthInDescendingOrder} from '../helpers';
|
|
4
5
|
//name: gcrsToLcms
|
|
5
6
|
//input: string nucleotides {semType: GCRS}
|
|
6
7
|
//output: string result {semType: LCMS}
|
|
7
8
|
export function gcrsToLcms(sequence: string): string {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
9
|
+
try {
|
|
10
|
+
const df = DG.DataFrame.fromCsv(lcmsToGcrs);
|
|
11
|
+
const arr1: string[] = df.getCol('GCRS').toList();
|
|
12
|
+
const arr2: string[] = df.getCol('LCMS').toList();
|
|
13
|
+
const obj: { [i: string]: string } = {};
|
|
14
|
+
arr1.forEach((element, index) => obj[element] = arr2[index]);
|
|
15
|
+
obj[DELIMITER] = DELIMITER;
|
|
16
|
+
const codes = arr1
|
|
17
|
+
.concat(DELIMITER)
|
|
18
|
+
.concat(Object.keys(MODIFICATIONS));
|
|
19
|
+
const sortedCodes = sortByStringLengthInDescendingOrder(codes);
|
|
20
|
+
let i = 0;
|
|
21
|
+
let r1 = '';
|
|
22
|
+
while (i < sequence.length) {
|
|
23
|
+
const matchedCode = sortedCodes.find((c) => c == sequence.slice(i, i + c.length))!;
|
|
24
|
+
r1 += obj[sequence.slice(i, i + matchedCode.length)];
|
|
25
|
+
i += matchedCode.length;
|
|
26
|
+
}
|
|
27
|
+
while (r1.indexOf('//') != -1)
|
|
28
|
+
r1 = r1.replace('//', '/');
|
|
29
|
+
return r1;
|
|
30
|
+
} catch {
|
|
31
|
+
return '<error>';
|
|
27
32
|
}
|
|
28
|
-
while (r1.indexOf('//') != -1)
|
|
29
|
-
r1 = r1.replace('//', '/');
|
|
30
|
-
return r1;
|
|
31
33
|
}
|
|
32
34
|
|
|
33
35
|
//name: asoGapmersNucleotidesToBioSpring
|
|
@@ -35,9 +37,9 @@ export function gcrsToLcms(sequence: string): string {
|
|
|
35
37
|
//output: string result {semType: BioSpring / Gapmers}
|
|
36
38
|
export function asoGapmersNucleotidesToBioSpring(nucleotides: string): string {
|
|
37
39
|
let count: number = -1;
|
|
38
|
-
const objForEdges: {[index: string]: string} = {
|
|
40
|
+
const objForEdges: { [index: string]: string } = {
|
|
39
41
|
'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'T': '5*', 'A': '6*', 'C': '7*', 'G': '8*'};
|
|
40
|
-
const objForCenter: {[index: string]: string} = {
|
|
42
|
+
const objForCenter: { [index: string]: string } = {
|
|
41
43
|
'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'T': 'T*', 'A': 'A*', 'C': '9*', 'G': 'G*'};
|
|
42
44
|
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|T|C|G)/g, function(x: string) {
|
|
43
45
|
count++;
|
|
@@ -51,7 +53,7 @@ export function asoGapmersNucleotidesToBioSpring(nucleotides: string): string {
|
|
|
51
53
|
//output: string result {semType: GCRS / Gapmers}
|
|
52
54
|
export function asoGapmersNucleotidesToGcrs(nucleotides: string): string {
|
|
53
55
|
let count: number = -1;
|
|
54
|
-
const objForEdges: {[index: string]: string} = {
|
|
56
|
+
const objForEdges: { [index: string]: string } = {
|
|
55
57
|
'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)', 'T': 'moeUnps',
|
|
56
58
|
'A': 'moeAnps', 'C': 'moe5mCnps', 'G': 'moeGnps'};
|
|
57
59
|
const objForCenter: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
|
|
@@ -1,44 +1,104 @@
|
|
|
1
|
-
import {map,
|
|
1
|
+
// import {map, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS, DELIMITER} from './map';
|
|
2
|
+
import {map, SYNTHESIZERS, TECHNOLOGIES, DELIMITER} from './map';
|
|
2
3
|
import {isValidSequence} from './sequence-codes-tools';
|
|
3
|
-
import {getNucleotidesMol} from './mol-transformations';
|
|
4
4
|
import {sortByStringLengthInDescendingOrder} from '../helpers';
|
|
5
|
+
import {getMonomerWorks} from '../package';
|
|
6
|
+
import {getNucleotidesMol} from './mol-transformations';
|
|
5
7
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
import {standardPhosphateLinkSmiles, MODIFICATIONS} from './const';
|
|
9
|
+
import {getMonomerLib} from '../package';
|
|
10
|
+
// todo: remove
|
|
11
|
+
// const NAME = 'name';
|
|
12
|
+
const CODES = 'codes';
|
|
13
|
+
// const SMILES = 'smiles';
|
|
14
|
+
const MOL = 'molfile';
|
|
15
|
+
|
|
16
|
+
export function sequenceToMolV3000(
|
|
17
|
+
sequence: string, inverted: boolean = false, oclRender: boolean = false,
|
|
18
|
+
format: string,
|
|
19
|
+
): string {
|
|
20
|
+
const monomerNameFromCode = getCodeToNameMap(sequence, format);
|
|
21
|
+
let codes = sortByStringLengthInDescendingOrder(Object.keys(monomerNameFromCode));
|
|
10
22
|
let i = 0;
|
|
11
|
-
const smilesCodes:string[] = [];
|
|
12
23
|
const codesList = [];
|
|
13
24
|
const links = ['s', 'ps', '*'];
|
|
14
25
|
const includesStandardLinkAlready = ['e', 'h', /*'g',*/ 'f', 'i', 'l', 'k', 'j'];
|
|
15
26
|
const dropdowns = Object.keys(MODIFICATIONS);
|
|
16
|
-
codes = codes.concat(dropdowns).concat(
|
|
27
|
+
codes = codes.concat(dropdowns).concat(DELIMITER);
|
|
17
28
|
while (i < sequence.length) {
|
|
18
|
-
const code = codes.find((s: string) => s
|
|
29
|
+
const code = codes.find((s: string) => s === sequence.slice(i, i + s.length))!;
|
|
19
30
|
i += code.length;
|
|
20
31
|
inverted ? codesList.unshift(code) : codesList.push(code);
|
|
21
32
|
}
|
|
33
|
+
|
|
34
|
+
const monomers: string[] = [];
|
|
35
|
+
|
|
22
36
|
for (let i = 0; i < codesList.length; i++) {
|
|
23
|
-
if (
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
if (links.includes(codesList[i]) ||
|
|
38
|
+
includesStandardLinkAlready.includes(codesList[i]) ||
|
|
39
|
+
(i < codesList.length - 1 && links.includes(codesList[i + 1]))
|
|
40
|
+
) {
|
|
41
|
+
let aa = monomerNameFromCode[codesList[i]];
|
|
42
|
+
if(aa !== undefined)
|
|
43
|
+
monomers.push(aa);
|
|
44
|
+
else
|
|
45
|
+
monomers.push(codesList[i]);
|
|
46
|
+
}
|
|
47
|
+
else {
|
|
48
|
+
let aa = monomerNameFromCode[codesList[i]];
|
|
49
|
+
if(aa !== undefined)
|
|
50
|
+
monomers.push(aa);
|
|
51
|
+
else
|
|
52
|
+
monomers.push(codesList[i]);
|
|
53
|
+
monomers.push('p linkage');
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const lib = getMonomerLib();
|
|
58
|
+
const mols: string [] = [];
|
|
59
|
+
for(let i = 0; i < monomers.length; i++) {
|
|
60
|
+
const mnmr = lib?.getMonomer('RNA', monomers[i]);
|
|
61
|
+
mols.push(mnmr?.molfile!);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
return getNucleotidesMol(mols);
|
|
66
|
+
//return getMonomerWorks()?.getAtomicLevel(monomers, 'RNA')!;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function sequenceToMolV3000_new(
|
|
70
|
+
sequence: string, inverted: boolean = false, oclRender: boolean = false,
|
|
71
|
+
format: string,
|
|
72
|
+
): string {
|
|
73
|
+
const monomerNameFromCode = getCodeToNameMap(sequence, format);
|
|
74
|
+
let codes = sortByStringLengthInDescendingOrder(Object.keys(monomerNameFromCode));
|
|
75
|
+
let i = 0;
|
|
76
|
+
const codesList = [];
|
|
77
|
+
const links = ['s', 'ps', '*'];
|
|
78
|
+
const includesStandardLinkAlready = ['e', 'h', /*'g',*/ 'f', 'i', 'l', 'k', 'j'];
|
|
79
|
+
const dropdowns = Object.keys(MODIFICATIONS);
|
|
80
|
+
codes = codes.concat(dropdowns).concat(DELIMITER);
|
|
81
|
+
while (i < sequence.length) {
|
|
82
|
+
const code = codes.find((s: string) => s === sequence.slice(i, i + s.length))!;
|
|
83
|
+
i += code.length;
|
|
84
|
+
inverted ? codesList.unshift(code) : codesList.push(code);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const monomers: string[] = [];
|
|
88
|
+
|
|
89
|
+
for (let i = 0; i < codesList.length; i++) {
|
|
90
|
+
if (links.includes(codesList[i]) ||
|
|
91
|
+
includesStandardLinkAlready.includes(codesList[i]) ||
|
|
92
|
+
(i < codesList.length - 1 && links.includes(codesList[i + 1]))
|
|
93
|
+
)
|
|
94
|
+
monomers.push(monomerNameFromCode[codesList[i]]);
|
|
95
|
+
else {
|
|
96
|
+
monomers.push(monomerNameFromCode[codesList[i]]);
|
|
97
|
+
monomers.push('p linkage');
|
|
38
98
|
}
|
|
39
99
|
}
|
|
40
100
|
|
|
41
|
-
return
|
|
101
|
+
return getMonomerWorks()?.getAtomicLevel(monomers, 'RNA')!;
|
|
42
102
|
}
|
|
43
103
|
|
|
44
104
|
export function sequenceToSmiles(sequence: string, inverted: boolean = false, format: string): string {
|
|
@@ -50,7 +110,7 @@ export function sequenceToSmiles(sequence: string, inverted: boolean = false, fo
|
|
|
50
110
|
const links = ['s', 'ps', '*'];
|
|
51
111
|
const includesStandardLinkAlready = ['e', 'h', /*'g',*/ 'f', 'i', 'l', 'k', 'j'];
|
|
52
112
|
const dropdowns = Object.keys(MODIFICATIONS);
|
|
53
|
-
codes = codes.concat(dropdowns).concat(
|
|
113
|
+
codes = codes.concat(dropdowns).concat(DELIMITER);
|
|
54
114
|
while (i < sequence.length) {
|
|
55
115
|
const code = codes.find((s: string) => s == sequence.slice(i, i + s.length))!;
|
|
56
116
|
i += code.length;
|
|
@@ -59,8 +119,8 @@ export function sequenceToSmiles(sequence: string, inverted: boolean = false, fo
|
|
|
59
119
|
for (let i = 0; i < codesList.length; i++) {
|
|
60
120
|
if (dropdowns.includes(codesList[i])) {
|
|
61
121
|
smiles += (i >= codesList.length / 2) ?
|
|
62
|
-
MODIFICATIONS[codesList[i]].right +
|
|
63
|
-
MODIFICATIONS[codesList[i]].left +
|
|
122
|
+
MODIFICATIONS[codesList[i]].right + standardPhosphateLinkSmiles :
|
|
123
|
+
MODIFICATIONS[codesList[i]].left + standardPhosphateLinkSmiles;
|
|
64
124
|
} else {
|
|
65
125
|
if (links.includes(codesList[i]) ||
|
|
66
126
|
includesStandardLinkAlready.includes(codesList[i]) ||
|
|
@@ -68,7 +128,7 @@ export function sequenceToSmiles(sequence: string, inverted: boolean = false, fo
|
|
|
68
128
|
)
|
|
69
129
|
smiles += obj[codesList[i]];
|
|
70
130
|
else
|
|
71
|
-
smiles += obj[codesList[i]] +
|
|
131
|
+
smiles += obj[codesList[i]] + standardPhosphateLinkSmiles;
|
|
72
132
|
}
|
|
73
133
|
}
|
|
74
134
|
smiles = smiles.replace(/OO/g, 'O');
|
|
@@ -82,7 +142,34 @@ export function sequenceToSmiles(sequence: string, inverted: boolean = false, fo
|
|
|
82
142
|
includesStandardLinkAlready.includes(codesList[codesList.length - 1])
|
|
83
143
|
) ?
|
|
84
144
|
smiles :
|
|
85
|
-
smiles.slice(0, smiles.length -
|
|
145
|
+
smiles.slice(0, smiles.length - standardPhosphateLinkSmiles.length + 1);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function getCodeToNameMap(sequence: string, format: string) {
|
|
149
|
+
const obj: { [code: string]: string } = {};
|
|
150
|
+
const NAME = 'name';
|
|
151
|
+
if (format == null) {
|
|
152
|
+
for (const synthesizer of Object.keys(map)) {
|
|
153
|
+
for (const technology of Object.keys(map[synthesizer])) {
|
|
154
|
+
for (const code of Object.keys(map[synthesizer][technology]))
|
|
155
|
+
obj[code] = map[synthesizer][technology][code][NAME]!;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
} else {
|
|
159
|
+
for (const technology of Object.keys(map[format])) {
|
|
160
|
+
for (const code of Object.keys(map[format][technology]))
|
|
161
|
+
obj[code] = map[format][technology][code][NAME]!;
|
|
162
|
+
// obj[code] = map[format][technology][code].SMILES;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
obj[DELIMITER] = '';
|
|
166
|
+
// TODO: create object based from synthesizer type to avoid key(codes) duplicates
|
|
167
|
+
const output = isValidSequence(sequence, format);
|
|
168
|
+
if (output.synthesizer!.includes(SYNTHESIZERS.MERMADE_12))
|
|
169
|
+
obj['g'] = map[SYNTHESIZERS.MERMADE_12][TECHNOLOGIES.SI_RNA]['g'][NAME]!;
|
|
170
|
+
else if (output.synthesizer!.includes(SYNTHESIZERS.AXOLABS))
|
|
171
|
+
obj['g'] = map[SYNTHESIZERS.AXOLABS][TECHNOLOGIES.SI_RNA]['g'][NAME]!;
|
|
172
|
+
return obj;
|
|
86
173
|
}
|
|
87
174
|
|
|
88
175
|
function getObjectWithCodesAndSmiles(sequence: string, format: string) {
|
|
@@ -100,7 +187,7 @@ function getObjectWithCodesAndSmiles(sequence: string, format: string) {
|
|
|
100
187
|
obj[code] = map[format][technology][code].SMILES;
|
|
101
188
|
}
|
|
102
189
|
}
|
|
103
|
-
obj[
|
|
190
|
+
obj[DELIMITER] = '';
|
|
104
191
|
// TODO: create object based from synthesizer type to avoid key(codes) duplicates
|
|
105
192
|
const output = isValidSequence(sequence, format);
|
|
106
193
|
if (output.synthesizer!.includes(SYNTHESIZERS.MERMADE_12))
|
|
@@ -109,3 +196,72 @@ function getObjectWithCodesAndSmiles(sequence: string, format: string) {
|
|
|
109
196
|
obj['g'] = map[SYNTHESIZERS.AXOLABS][TECHNOLOGIES.SI_RNA]['g'].SMILES;
|
|
110
197
|
return obj;
|
|
111
198
|
}
|
|
199
|
+
|
|
200
|
+
function getObjectWithCodesAndMolsFromFile(sequence: string, format: string, libFileContent: string) {
|
|
201
|
+
const obj: { [code: string]: string } = {};
|
|
202
|
+
// todo: type
|
|
203
|
+
const lib: any[] = JSON.parse(libFileContent); //consider using library
|
|
204
|
+
|
|
205
|
+
for (const item of lib) {
|
|
206
|
+
for (const synthesizer of Object.keys(item[CODES])) {
|
|
207
|
+
if (synthesizer === format) {
|
|
208
|
+
for (const technology of Object.keys(item[CODES][synthesizer])) {
|
|
209
|
+
const codes = item[CODES][synthesizer][technology];
|
|
210
|
+
let mol: string = item[MOL];
|
|
211
|
+
// todo: find another solution
|
|
212
|
+
mol = mol.replace(/ R /g, ' O ');
|
|
213
|
+
|
|
214
|
+
for (const code of codes)
|
|
215
|
+
obj[code] = mol;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
obj[DELIMITER] = '';
|
|
222
|
+
// TODO: create object based on synthesizer type to avoid key(codes) duplicates
|
|
223
|
+
const output = isValidSequence(sequence, format);
|
|
224
|
+
if (output.synthesizer!.includes(SYNTHESIZERS.MERMADE_12)) {
|
|
225
|
+
// todo: remove as quickfix, optimize access to 'g'
|
|
226
|
+
for (const item of lib) {
|
|
227
|
+
for (const synthesizer of Object.keys(item[CODES])) {
|
|
228
|
+
for (const technology of Object.keys(item[CODES][synthesizer])) {
|
|
229
|
+
const codes = item[CODES][synthesizer][technology];
|
|
230
|
+
for (const code of codes) {
|
|
231
|
+
const condition =
|
|
232
|
+
(code === 'g') &&
|
|
233
|
+
(synthesizer === SYNTHESIZERS.MERMADE_12) &&
|
|
234
|
+
(technology === TECHNOLOGIES.SI_RNA);
|
|
235
|
+
if (condition) {
|
|
236
|
+
let mol: string = item[MOL];
|
|
237
|
+
// todo: find another solution
|
|
238
|
+
mol = mol.replace(/ R /g, ' O ');
|
|
239
|
+
obj[code] = mol;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
} else if (output.synthesizer!.includes(SYNTHESIZERS.AXOLABS)) {
|
|
246
|
+
for (const item of lib) {
|
|
247
|
+
for (const synthesizer of Object.keys(item[CODES])) {
|
|
248
|
+
for (const technology of Object.keys(item[CODES][synthesizer])) {
|
|
249
|
+
const codes = item[CODES][synthesizer][technology];
|
|
250
|
+
for (const code of codes) {
|
|
251
|
+
const condition =
|
|
252
|
+
(code === 'g') &&
|
|
253
|
+
(synthesizer === SYNTHESIZERS.AXOLABS) &&
|
|
254
|
+
(technology === TECHNOLOGIES.SI_RNA);
|
|
255
|
+
if (condition) {
|
|
256
|
+
let mol: string = item[MOL];
|
|
257
|
+
// todo: find another solution
|
|
258
|
+
mol = mol.replace(/ R /g, ' O ');
|
|
259
|
+
obj[code] = mol;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
return obj;
|
|
267
|
+
}
|
|
@@ -2,7 +2,8 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import {getAllCodesOfSynthesizer} from './sequence-codes-tools';
|
|
3
3
|
import {differenceOfTwoArrays} from '../helpers';
|
|
4
4
|
|
|
5
|
-
export const
|
|
5
|
+
export const DELIMITER = ';';
|
|
6
|
+
export const NUCLEOTIDES = ['A', 'G', 'C', 'U', 'T'];
|
|
6
7
|
export const SYNTHESIZERS = {
|
|
7
8
|
RAW_NUCLEOTIDES: 'Raw Nucleotides',
|
|
8
9
|
BIOSPRING: 'BioSpring Codes',
|
|
@@ -46,7 +47,7 @@ export const map: {[synthesizer: string]:
|
|
|
46
47
|
'SMILES': 'OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1O',
|
|
47
48
|
},
|
|
48
49
|
'T': {
|
|
49
|
-
'name': '
|
|
50
|
+
'name': 'Thymine',
|
|
50
51
|
'weight': 304.2,
|
|
51
52
|
'normalized': 'dT',
|
|
52
53
|
'SMILES': 'OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1O',
|
|
@@ -148,7 +149,7 @@ export const map: {[synthesizer: string]:
|
|
|
148
149
|
'SMILES': 'OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)C[C@@H]1O',
|
|
149
150
|
},
|
|
150
151
|
'T': {
|
|
151
|
-
'name': '
|
|
152
|
+
'name': 'Thymine',
|
|
152
153
|
'weight': 304.2,
|
|
153
154
|
'normalized': 'dT',
|
|
154
155
|
'SMILES': 'OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1O',
|
|
@@ -362,13 +363,13 @@ export const map: {[synthesizer: string]:
|
|
|
362
363
|
'SMILES': 'OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)C[C@@H]1O',
|
|
363
364
|
},
|
|
364
365
|
'T': {
|
|
365
|
-
'name': '
|
|
366
|
+
'name': 'Thymine',
|
|
366
367
|
'weight': 304.2,
|
|
367
368
|
'normalized': 'dT',
|
|
368
369
|
'SMILES': 'OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1O',
|
|
369
370
|
},
|
|
370
371
|
'dT': {
|
|
371
|
-
'name': '
|
|
372
|
+
'name': 'Thymine',
|
|
372
373
|
'weight': 304.2,
|
|
373
374
|
'normalized': 'dT',
|
|
374
375
|
'SMILES': 'OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1O',
|
|
@@ -550,8 +551,6 @@ export const map: {[synthesizer: string]:
|
|
|
550
551
|
},
|
|
551
552
|
},
|
|
552
553
|
},
|
|
553
|
-
// 'LCMS': {
|
|
554
|
-
// 'For 2\'-OMe and 2\'-F modified siRNA': {
|
|
555
554
|
};
|
|
556
555
|
|
|
557
556
|
export const lcmsToGcrs = `LCMS, GCRS
|