@datagrok/sequence-translator 1.0.13 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ export const MODIFICATIONS: {[index: string]: {molecularWeight: number, left: string, right: string}} = {
2
+ '(invabasic)': {
3
+ molecularWeight: 118.13,
4
+ left: 'O[C@@H]1C[C@@H]O[C@H]1CO',
5
+ right: 'O[C@@H]1C[C@@H]O[C@H]1CO',
6
+ },
7
+ '(GalNAc-2-JNJ)': {
8
+ molecularWeight: 1273.3,
9
+ left: 'C(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)' +
10
+ '(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)' +
11
+ '(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)NC(=O)CCCC(=O)NCC(O)CO',
12
+ right: 'OCC(O)CNC(=O)CCCC(=O)NC(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)' +
13
+ '(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)'+
14
+ '(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)',
15
+ },
16
+ };
17
+
18
+ export const standardPhosphateLinkSmiles = 'OP(=O)(O)O';
@@ -1,6 +1,6 @@
1
1
  import {lcmsToGcrs} from './map';
2
2
  import * as DG from 'datagrok-api/dg';
3
- import {delimiter} from './map';
3
+ import {DELIMITER} from './map';
4
4
  //name: gcrsToLcms
5
5
  //input: string nucleotides {semType: GCRS}
6
6
  //output: string result {semType: LCMS}
@@ -10,14 +10,14 @@ export function gcrsToLcms(sequence: string): string {
10
10
  const arr2 = df.getCol('LCMS').toList();
11
11
  const obj: {[i: string]: string} = {};
12
12
  arr1.forEach((element, index) => obj[element] = arr2[index]);
13
- obj[delimiter] = delimiter;
13
+ obj[DELIMITER] = DELIMITER;
14
14
  // for (let i = 0; i < arr1.length; i++) {
15
15
  // arr1[i] = arr1[i].replace('(', '\\(');
16
16
  // arr1[i] = arr1[i].replace(')', '\\)');
17
17
  // }
18
18
  // const regExp = new RegExp('(' + arr1.join('|') + ')', 'g');
19
19
  // let r1 = sequence.replace(regExp, function(code) {return obj[code];});
20
- const codes = arr1.concat(delimiter).sort(function(a, b) {return b.length - a.length;});
20
+ const codes = arr1.concat(DELIMITER).sort(function(a, b) {return b.length - a.length;});
21
21
  let i = 0;
22
22
  let r1 = '';
23
23
  while (i < sequence.length) {
@@ -1,44 +1,104 @@
1
- import {map, stadardPhosphateLinkSmiles, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS, delimiter} from './map';
1
+ // import {map, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS, DELIMITER} from './map';
2
+ import {map, SYNTHESIZERS, TECHNOLOGIES, DELIMITER} from './map';
2
3
  import {isValidSequence} from './sequence-codes-tools';
3
- import {getNucleotidesMol} from './mol-transformations';
4
4
  import {sortByStringLengthInDescendingOrder} from '../helpers';
5
+ import {getMonomerWorks} from '../package';
6
+ import {getNucleotidesMol} from './mol-transformations';
5
7
 
6
- export function sequenceToMolV3000(sequence: string, inverted: boolean = false, oclRender: boolean = false,
7
- format: string): string {
8
- const obj = getObjectWithCodesAndSmiles(sequence, format);
9
- let codes = sortByStringLengthInDescendingOrder(Object.keys(obj));
8
+ import {standardPhosphateLinkSmiles, MODIFICATIONS} from './const';
9
+ import {getMonomerLib} from '../package';
10
+ // todo: remove
11
+ // const NAME = 'name';
12
+ const CODES = 'codes';
13
+ // const SMILES = 'smiles';
14
+ const MOL = 'molfile';
15
+
16
+ export function sequenceToMolV3000(
17
+ sequence: string, inverted: boolean = false, oclRender: boolean = false,
18
+ format: string,
19
+ ): string {
20
+ const monomerNameFromCode = getCodeToNameMap(sequence, format);
21
+ let codes = sortByStringLengthInDescendingOrder(Object.keys(monomerNameFromCode));
10
22
  let i = 0;
11
- const smilesCodes:string[] = [];
12
23
  const codesList = [];
13
24
  const links = ['s', 'ps', '*'];
14
25
  const includesStandardLinkAlready = ['e', 'h', /*'g',*/ 'f', 'i', 'l', 'k', 'j'];
15
26
  const dropdowns = Object.keys(MODIFICATIONS);
16
- codes = codes.concat(dropdowns).concat(delimiter);
27
+ codes = codes.concat(dropdowns).concat(DELIMITER);
17
28
  while (i < sequence.length) {
18
- const code = codes.find((s: string) => s == sequence.slice(i, i + s.length))!;
29
+ const code = codes.find((s: string) => s === sequence.slice(i, i + s.length))!;
19
30
  i += code.length;
20
31
  inverted ? codesList.unshift(code) : codesList.push(code);
21
32
  }
33
+
34
+ const monomers: string[] = [];
35
+
22
36
  for (let i = 0; i < codesList.length; i++) {
23
- if (dropdowns.includes(codesList[i])) {
24
- smilesCodes.push((i >= codesList.length / 2) ?
25
- MODIFICATIONS[codesList[i]].right : MODIFICATIONS[codesList[i]].left);
26
- if (!(i < codesList.length - 1 && links.includes(codesList[i + 1])))
27
- smilesCodes.push(stadardPhosphateLinkSmiles);
28
- } else {
29
- if (links.includes(codesList[i]) ||
30
- includesStandardLinkAlready.includes(codesList[i]) ||
31
- (i < codesList.length - 1 && links.includes(codesList[i + 1]))
32
- )
33
- smilesCodes.push(obj[codesList[i]]);
34
- else {
35
- smilesCodes.push(obj[codesList[i]]);
36
- smilesCodes.push(stadardPhosphateLinkSmiles);
37
- }
37
+ if (links.includes(codesList[i]) ||
38
+ includesStandardLinkAlready.includes(codesList[i]) ||
39
+ (i < codesList.length - 1 && links.includes(codesList[i + 1]))
40
+ ) {
41
+ let aa = monomerNameFromCode[codesList[i]];
42
+ if(aa !== undefined)
43
+ monomers.push(aa);
44
+ else
45
+ monomers.push(codesList[i]);
46
+ }
47
+ else {
48
+ let aa = monomerNameFromCode[codesList[i]];
49
+ if(aa !== undefined)
50
+ monomers.push(aa);
51
+ else
52
+ monomers.push(codesList[i]);
53
+ monomers.push('p linkage');
54
+ }
55
+ }
56
+
57
+ const lib = getMonomerLib();
58
+ const mols: string [] = [];
59
+ for(let i = 0; i < monomers.length; i++) {
60
+ const mnmr = lib?.getMonomer('RNA', monomers[i]);
61
+ mols.push(mnmr?.molfile!);
62
+ }
63
+
64
+
65
+ return getNucleotidesMol(mols);
66
+ //return getMonomerWorks()?.getAtomicLevel(monomers, 'RNA')!;
67
+ }
68
+
69
+ export function sequenceToMolV3000_new(
70
+ sequence: string, inverted: boolean = false, oclRender: boolean = false,
71
+ format: string,
72
+ ): string {
73
+ const monomerNameFromCode = getCodeToNameMap(sequence, format);
74
+ let codes = sortByStringLengthInDescendingOrder(Object.keys(monomerNameFromCode));
75
+ let i = 0;
76
+ const codesList = [];
77
+ const links = ['s', 'ps', '*'];
78
+ const includesStandardLinkAlready = ['e', 'h', /*'g',*/ 'f', 'i', 'l', 'k', 'j'];
79
+ const dropdowns = Object.keys(MODIFICATIONS);
80
+ codes = codes.concat(dropdowns).concat(DELIMITER);
81
+ while (i < sequence.length) {
82
+ const code = codes.find((s: string) => s === sequence.slice(i, i + s.length))!;
83
+ i += code.length;
84
+ inverted ? codesList.unshift(code) : codesList.push(code);
85
+ }
86
+
87
+ const monomers: string[] = [];
88
+
89
+ for (let i = 0; i < codesList.length; i++) {
90
+ if (links.includes(codesList[i]) ||
91
+ includesStandardLinkAlready.includes(codesList[i]) ||
92
+ (i < codesList.length - 1 && links.includes(codesList[i + 1]))
93
+ )
94
+ monomers.push(monomerNameFromCode[codesList[i]]);
95
+ else {
96
+ monomers.push(monomerNameFromCode[codesList[i]]);
97
+ monomers.push('p linkage');
38
98
  }
39
99
  }
40
100
 
41
- return getNucleotidesMol(smilesCodes);
101
+ return getMonomerWorks()?.getAtomicLevel(monomers, 'RNA')!;
42
102
  }
43
103
 
44
104
  export function sequenceToSmiles(sequence: string, inverted: boolean = false, format: string): string {
@@ -50,7 +110,7 @@ export function sequenceToSmiles(sequence: string, inverted: boolean = false, fo
50
110
  const links = ['s', 'ps', '*'];
51
111
  const includesStandardLinkAlready = ['e', 'h', /*'g',*/ 'f', 'i', 'l', 'k', 'j'];
52
112
  const dropdowns = Object.keys(MODIFICATIONS);
53
- codes = codes.concat(dropdowns).concat(delimiter);
113
+ codes = codes.concat(dropdowns).concat(DELIMITER);
54
114
  while (i < sequence.length) {
55
115
  const code = codes.find((s: string) => s == sequence.slice(i, i + s.length))!;
56
116
  i += code.length;
@@ -59,8 +119,8 @@ export function sequenceToSmiles(sequence: string, inverted: boolean = false, fo
59
119
  for (let i = 0; i < codesList.length; i++) {
60
120
  if (dropdowns.includes(codesList[i])) {
61
121
  smiles += (i >= codesList.length / 2) ?
62
- MODIFICATIONS[codesList[i]].right + stadardPhosphateLinkSmiles:
63
- MODIFICATIONS[codesList[i]].left + stadardPhosphateLinkSmiles;
122
+ MODIFICATIONS[codesList[i]].right + standardPhosphateLinkSmiles :
123
+ MODIFICATIONS[codesList[i]].left + standardPhosphateLinkSmiles;
64
124
  } else {
65
125
  if (links.includes(codesList[i]) ||
66
126
  includesStandardLinkAlready.includes(codesList[i]) ||
@@ -68,7 +128,7 @@ export function sequenceToSmiles(sequence: string, inverted: boolean = false, fo
68
128
  )
69
129
  smiles += obj[codesList[i]];
70
130
  else
71
- smiles += obj[codesList[i]] + stadardPhosphateLinkSmiles;
131
+ smiles += obj[codesList[i]] + standardPhosphateLinkSmiles;
72
132
  }
73
133
  }
74
134
  smiles = smiles.replace(/OO/g, 'O');
@@ -82,7 +142,34 @@ export function sequenceToSmiles(sequence: string, inverted: boolean = false, fo
82
142
  includesStandardLinkAlready.includes(codesList[codesList.length - 1])
83
143
  ) ?
84
144
  smiles :
85
- smiles.slice(0, smiles.length - stadardPhosphateLinkSmiles.length + 1);
145
+ smiles.slice(0, smiles.length - standardPhosphateLinkSmiles.length + 1);
146
+ }
147
+
148
+ function getCodeToNameMap(sequence: string, format: string) {
149
+ const obj: { [code: string]: string } = {};
150
+ const NAME = 'name';
151
+ if (format == null) {
152
+ for (const synthesizer of Object.keys(map)) {
153
+ for (const technology of Object.keys(map[synthesizer])) {
154
+ for (const code of Object.keys(map[synthesizer][technology]))
155
+ obj[code] = map[synthesizer][technology][code][NAME]!;
156
+ }
157
+ }
158
+ } else {
159
+ for (const technology of Object.keys(map[format])) {
160
+ for (const code of Object.keys(map[format][technology]))
161
+ obj[code] = map[format][technology][code][NAME]!;
162
+ // obj[code] = map[format][technology][code].SMILES;
163
+ }
164
+ }
165
+ obj[DELIMITER] = '';
166
+ // TODO: create object based from synthesizer type to avoid key(codes) duplicates
167
+ const output = isValidSequence(sequence, format);
168
+ if (output.synthesizer!.includes(SYNTHESIZERS.MERMADE_12))
169
+ obj['g'] = map[SYNTHESIZERS.MERMADE_12][TECHNOLOGIES.SI_RNA]['g'][NAME]!;
170
+ else if (output.synthesizer!.includes(SYNTHESIZERS.AXOLABS))
171
+ obj['g'] = map[SYNTHESIZERS.AXOLABS][TECHNOLOGIES.SI_RNA]['g'][NAME]!;
172
+ return obj;
86
173
  }
87
174
 
88
175
  function getObjectWithCodesAndSmiles(sequence: string, format: string) {
@@ -100,7 +187,7 @@ function getObjectWithCodesAndSmiles(sequence: string, format: string) {
100
187
  obj[code] = map[format][technology][code].SMILES;
101
188
  }
102
189
  }
103
- obj[delimiter] = '';
190
+ obj[DELIMITER] = '';
104
191
  // TODO: create object based from synthesizer type to avoid key(codes) duplicates
105
192
  const output = isValidSequence(sequence, format);
106
193
  if (output.synthesizer!.includes(SYNTHESIZERS.MERMADE_12))
@@ -109,3 +196,72 @@ function getObjectWithCodesAndSmiles(sequence: string, format: string) {
109
196
  obj['g'] = map[SYNTHESIZERS.AXOLABS][TECHNOLOGIES.SI_RNA]['g'].SMILES;
110
197
  return obj;
111
198
  }
199
+
200
+ function getObjectWithCodesAndMolsFromFile(sequence: string, format: string, libFileContent: string) {
201
+ const obj: { [code: string]: string } = {};
202
+ // todo: type
203
+ const lib: any[] = JSON.parse(libFileContent); //consider using library
204
+
205
+ for (const item of lib) {
206
+ for (const synthesizer of Object.keys(item[CODES])) {
207
+ if (synthesizer === format) {
208
+ for (const technology of Object.keys(item[CODES][synthesizer])) {
209
+ const codes = item[CODES][synthesizer][technology];
210
+ let mol: string = item[MOL];
211
+ // todo: find another solution
212
+ mol = mol.replace(/ R /g, ' O ');
213
+
214
+ for (const code of codes)
215
+ obj[code] = mol;
216
+ }
217
+ }
218
+ }
219
+ }
220
+
221
+ obj[DELIMITER] = '';
222
+ // TODO: create object based on synthesizer type to avoid key(codes) duplicates
223
+ const output = isValidSequence(sequence, format);
224
+ if (output.synthesizer!.includes(SYNTHESIZERS.MERMADE_12)) {
225
+ // todo: remove as quickfix, optimize access to 'g'
226
+ for (const item of lib) {
227
+ for (const synthesizer of Object.keys(item[CODES])) {
228
+ for (const technology of Object.keys(item[CODES][synthesizer])) {
229
+ const codes = item[CODES][synthesizer][technology];
230
+ for (const code of codes) {
231
+ const condition =
232
+ (code === 'g') &&
233
+ (synthesizer === SYNTHESIZERS.MERMADE_12) &&
234
+ (technology === TECHNOLOGIES.SI_RNA);
235
+ if (condition) {
236
+ let mol: string = item[MOL];
237
+ // todo: find another solution
238
+ mol = mol.replace(/ R /g, ' O ');
239
+ obj[code] = mol;
240
+ }
241
+ }
242
+ }
243
+ }
244
+ }
245
+ } else if (output.synthesizer!.includes(SYNTHESIZERS.AXOLABS)) {
246
+ for (const item of lib) {
247
+ for (const synthesizer of Object.keys(item[CODES])) {
248
+ for (const technology of Object.keys(item[CODES][synthesizer])) {
249
+ const codes = item[CODES][synthesizer][technology];
250
+ for (const code of codes) {
251
+ const condition =
252
+ (code === 'g') &&
253
+ (synthesizer === SYNTHESIZERS.AXOLABS) &&
254
+ (technology === TECHNOLOGIES.SI_RNA);
255
+ if (condition) {
256
+ let mol: string = item[MOL];
257
+ // todo: find another solution
258
+ mol = mol.replace(/ R /g, ' O ');
259
+ obj[code] = mol;
260
+ }
261
+ }
262
+ }
263
+ }
264
+ }
265
+ }
266
+ return obj;
267
+ }
@@ -2,7 +2,8 @@ import * as DG from 'datagrok-api/dg';
2
2
  import {getAllCodesOfSynthesizer} from './sequence-codes-tools';
3
3
  import {differenceOfTwoArrays} from '../helpers';
4
4
 
5
- export const delimiter = ';';
5
+ export const DELIMITER = ';';
6
+ export const NUCLEOTIDES = ['A', 'G', 'C', 'U', 'T'];
6
7
  export const SYNTHESIZERS = {
7
8
  RAW_NUCLEOTIDES: 'Raw Nucleotides',
8
9
  BIOSPRING: 'BioSpring Codes',
@@ -46,7 +47,7 @@ export const map: {[synthesizer: string]:
46
47
  'SMILES': 'OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1O',
47
48
  },
48
49
  'T': {
49
- 'name': 'Tyrosine',
50
+ 'name': 'Thymine',
50
51
  'weight': 304.2,
51
52
  'normalized': 'dT',
52
53
  'SMILES': 'OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1O',
@@ -148,7 +149,7 @@ export const map: {[synthesizer: string]:
148
149
  'SMILES': 'OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)C[C@@H]1O',
149
150
  },
150
151
  'T': {
151
- 'name': 'Tyrosine',
152
+ 'name': 'Thymine',
152
153
  'weight': 304.2,
153
154
  'normalized': 'dT',
154
155
  'SMILES': 'OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1O',
@@ -362,13 +363,13 @@ export const map: {[synthesizer: string]:
362
363
  'SMILES': 'OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)C[C@@H]1O',
363
364
  },
364
365
  'T': {
365
- 'name': 'Tyrosine',
366
+ 'name': 'Thymine',
366
367
  'weight': 304.2,
367
368
  'normalized': 'dT',
368
369
  'SMILES': 'OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1O',
369
370
  },
370
371
  'dT': {
371
- 'name': 'Tyrosine',
372
+ 'name': 'Thymine',
372
373
  'weight': 304.2,
373
374
  'normalized': 'dT',
374
375
  'SMILES': 'OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1O',
@@ -550,8 +551,6 @@ export const map: {[synthesizer: string]:
550
551
  },
551
552
  },
552
553
  },
553
- // 'LCMS': {
554
- // 'For 2\'-OMe and 2\'-F modified siRNA': {
555
554
  };
556
555
 
557
556
  export const lcmsToGcrs = `LCMS, GCRS