@datagrok/sequence-translator 1.0.9 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@datagrok/sequence-translator",
3
3
  "friendlyName": "Sequence Translator",
4
- "version": "1.0.9",
4
+ "version": "1.0.10",
5
5
  "author": {
6
6
  "name": "Vadym Kovadlo",
7
7
  "email": "vkovadlo@datagrok.ai"
@@ -13,7 +13,7 @@
13
13
  "directory": "packages/SequenceTranslator"
14
14
  },
15
15
  "dependencies": {
16
- "@datagrok-libraries/utils": "^1.9.2",
16
+ "@datagrok-libraries/utils": "^1.11.1",
17
17
  "@types/react": "^18.0.15",
18
18
  "datagrok-api": "^1.6.0",
19
19
  "datagrok-tools": "^4.1.2",
@@ -1,11 +1,12 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
- import {siRnaAxolabsToGcrs, gcrsToNucleotides, asoGapmersBioSpringToGcrs, gcrsToMermade12,
5
- } from '../structures-works/converters';
4
+ import {siRnaBioSpringToGcrs, siRnaAxolabsToGcrs, gcrsToNucleotides, asoGapmersBioSpringToGcrs, gcrsToMermade12,
5
+ siRnaNucleotidesToGcrs} from '../structures-works/converters';
6
6
  import {map, COL_NAMES, MODIFICATIONS} from '../structures-works/map';
7
7
  import {isValidSequence} from '../structures-works/sequence-codes-tools';
8
8
  import {sequenceToMolV3000} from '../structures-works/from-monomers';
9
+ import {linkV3000} from '../structures-works/mol-transformations';
9
10
 
10
11
  import {SALTS_CSV} from '../salts';
11
12
  import {USERS_CSV} from '../users';
@@ -54,6 +55,10 @@ function molecularWeight(sequence: string, weightsObj: {[index: string]: number}
54
55
  return weight - 61.97;
55
56
  }
56
57
 
58
+ function parseStrandsFromDuplexCell(s: string): string[] {
59
+ return s.slice(3).split('\r\nAS ');
60
+ }
61
+
57
62
  async function saveTableAsSdFile(table: DG.DataFrame) {
58
63
  if (!table.columns.contains('Compound Name')) {
59
64
  grok.shell.warning(
@@ -67,9 +72,23 @@ async function saveTableAsSdFile(table: DG.DataFrame) {
67
72
  let result = '';
68
73
  for (let i = 0; i < table.rowCount; i++) {
69
74
  const format = 'Janssen GCRS Codes'; //getFormat(structureColumn.get(i))!;
70
- result += (typeColumn.get(i) == 'SS') ?
71
- sequenceToMolV3000(structureColumn.get(i), false, true, format) + '\n' + `> <Sequence>\nSense Strand\n\n` :
72
- sequenceToMolV3000(structureColumn.get(i), true, true, format) + '\n' + `> <Sequence>\nAnti Sense\n\n`;
75
+ if (typeColumn.get(i) == 'Duplex') {
76
+ const array = parseStrandsFromDuplexCell(structureColumn.get(i));
77
+ const as = sequenceToMolV3000(array[1], true, true, format) +
78
+ '\n' + `> <Sequence>\nAnti Sense\n\n`;
79
+ const ss = sequenceToMolV3000(array[0], false, true, format) +
80
+ '\n' + `> <Sequence>\nSense Strand\n\n`;
81
+ result += linkV3000([ss, as], true, true) + '\n\n';
82
+ } else if (typeColumn.get(i) == 'SS') {
83
+ const molSS = sequenceToMolV3000(structureColumn.get(i), false, true, format) +
84
+ '\n' + `> <Sequence>\nSense Strand\n\n`;
85
+ result += molSS;
86
+ } else if (typeColumn.get(i) == 'AS') {
87
+ const molAS = sequenceToMolV3000(structureColumn.get(i), true, true, format) +
88
+ '\n' + `> <Sequence>\nAnti Sense\n\n`;
89
+ result += molAS;
90
+ }
91
+
73
92
  for (const col of table.columns) {
74
93
  if (col.name != COL_NAMES.SEQUENCE)
75
94
  result += `> <${col.name}>\n${col.get(i)}\n\n`;
@@ -90,6 +109,13 @@ export function autostartOligoSdFileSubscription() {
90
109
  grok.events.onContextMenu.subscribe((args) => {
91
110
  const seqCol = args.args.context.table.currentCol; // /^[fsACGUacgu]{6,}$/
92
111
  if (DG.Detector.sampleCategories(seqCol,
112
+ (s) => /(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C){6,}$/.test(s))) {
113
+ args.args.menu.item('Convert raw nucleotides to GCRS', () => {
114
+ args.args.context.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
115
+ return siRnaNucleotidesToGcrs(seqCol.get(i));
116
+ });
117
+ });
118
+ } else if (DG.Detector.sampleCategories(seqCol,
93
119
  (s) => /(\(invabasic\)|\(GalNAc-2-JNJ\)|f|s|A|C|G|U|a|c|g|u){6,}$/.test(s))) {
94
120
  args.args.menu.item('Convert Axolabs to GCRS', () => {
95
121
  args.args.context.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
@@ -121,7 +147,7 @@ export function autostartOligoSdFileSubscription() {
121
147
  (s) => /(\(invabasic\)|\(GalNAc-2-JNJ\)|\*|1|2|3|4|5|6|7|8){6,}$/.test(s))) {
122
148
  args.args.menu.item('Convert Biospring to GCRS', () => {
123
149
  args.args.context.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
124
- return siRnaAxolabsToGcrs(seqCol.get(i));
150
+ return siRnaBioSpringToGcrs(seqCol.get(i));
125
151
  });
126
152
  });
127
153
  }
@@ -141,6 +167,7 @@ export function oligoSdFile(table: DG.DataFrame) {
141
167
  const saltCol = table.getCol(COL_NAMES.SALT);
142
168
  const equivalentsCol = table.getCol(COL_NAMES.EQUIVALENTS);
143
169
  const typeColumn = table.getCol(COL_NAMES.TYPE);
170
+ const chemistryNameCol = table.getCol(COL_NAMES.CHEMISTRY_NAME);
144
171
 
145
172
  const molWeightCol = saltsDf.getCol('MOLWEIGHT');
146
173
  const saltNamesList = saltsDf.getCol('DISPLAY').toList();
@@ -154,12 +181,17 @@ export function oligoSdFile(table: DG.DataFrame) {
154
181
  t.rows.removeAt(i, 1, false);
155
182
  }
156
183
 
157
- t.columns.addNewString(COL_NAMES.COMPOUND_NAME).init((i: number) => sequenceCol.get(i));
184
+ t.columns.addNewString(COL_NAMES.COMPOUND_NAME).init((i: number) => {
185
+ return (typeColumn.get(i) == 'Duplex') ? chemistryNameCol.get(i) : sequenceCol.get(i);
186
+ });
158
187
 
159
- t.columns.addNewString(COL_NAMES.COMPOUND_COMMENTS).init((i: number) => (i > 2 && typeColumn.get(i) == 'Duplex') ?
160
- sequenceCol.get(i) + '; duplex of SS: ' + sequenceCol.get(i - 2) + ' and AS: ' + sequenceCol.get(i - 1) :
161
- sequenceCol.get(i),
162
- );
188
+ t.columns.addNewString(COL_NAMES.COMPOUND_COMMENTS).init((i: number) => {
189
+ if (typeColumn.get(i) == 'Duplex') {
190
+ const arr = parseStrandsFromDuplexCell(sequenceCol.get(i));
191
+ return chemistryNameCol.get(i) + '; duplex of SS: ' + arr[0] + ' and AS: ' + arr[1];
192
+ }
193
+ return sequenceCol.get(i);
194
+ });
163
195
 
164
196
  const weightsObj: {[code: string]: number} = {};
165
197
  for (const synthesizer of Object.keys(map)) {
@@ -172,6 +204,15 @@ export function oligoSdFile(table: DG.DataFrame) {
172
204
  weightsObj[key] = value.molecularWeight;
173
205
 
174
206
  t.columns.addNewFloat(COL_NAMES.CPD_MW).init((i: number) => {
207
+ if (typeColumn.get(i) == 'Duplex') {
208
+ const arr = parseStrandsFromDuplexCell(sequenceCol.get(i));
209
+ return (
210
+ isValidSequence(arr[0], null).indexOfFirstNotValidChar == -1 &&
211
+ isValidSequence(arr[1], null).indexOfFirstNotValidChar == -1
212
+ ) ?
213
+ molecularWeight(arr[0], weightsObj) + molecularWeight(arr[1], weightsObj) :
214
+ DG.FLOAT_NULL;
215
+ }
175
216
  return (isValidSequence(sequenceCol.get(i), null).indexOfFirstNotValidChar == -1) ?
176
217
  molecularWeight(sequenceCol.get(i), weightsObj) :
177
218
  DG.FLOAT_NULL;
@@ -1,5 +1,5 @@
1
1
  const rnaColor = 'rgb(255,230,153)';
2
- const invAbasicColor = 'rgb(255,230,153)';
2
+ const invAbasicColor = 'rgb(203,119,211)';
3
3
  export const axolabsMap:
4
4
  {[index: string]: {fullName: string, symbols: [string, string, string, string], color: string}} =
5
5
  {
@@ -703,6 +703,7 @@ export function defineAxolabsPattern() {
703
703
  grok.shell.v = grok.shell.getTableView(tables.value!.name);
704
704
  grok.shell.info(((createAsStrand.value) ? 'Columns were' : 'Column was') +
705
705
  ' added to table \'' + tables.value!.name + '\'');
706
+ updateOutputExamples();
706
707
  }
707
708
  });
708
709
 
@@ -19,8 +19,7 @@ export function mainView() {
19
19
  try {
20
20
  sequence = sequence.replace(/\s/g, '');
21
21
  const output = isValidSequence(sequence, null);
22
- output.synthesizer = [inputFormat];
23
- // inputFormatChoiceInput.value = output.synthesizer![0];
22
+ inputFormatChoiceInput.value = output.synthesizer![0];
24
23
  const outputSequenceObj = convertSequence(sequence, output);
25
24
  const tableRows = [];
26
25
 
@@ -1,13 +1,16 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
- import {runTests, tests} from '@datagrok-libraries/utils/src/test';
2
+ import {runTests, tests, TestContext} from '@datagrok-libraries/utils/src/test';
3
3
  import './tests/smiles-tests';
4
4
 
5
5
  export const _package = new DG.Package();
6
6
  export {tests};
7
7
 
8
8
  //name: test
9
+ //input: string category {optional: true}
10
+ //input: string test {optional: true}
11
+ //input: object testContext {optional: true}
9
12
  //output: dataframe result
10
- export async function test(): Promise<DG.DataFrame> {
11
- const data = await runTests();
13
+ export async function test(category: string, test: string, testContext: TestContext): Promise<DG.DataFrame> {
14
+ const data = await runTests({category, test, testContext});
12
15
  return DG.DataFrame.fromObjects(data)!;
13
16
  }
@@ -96,9 +96,10 @@ export function asoGapmersBioSpringToGcrs(nucleotides: string): string {
96
96
  //output: string result {semType: BioSpring / Gapmers}
97
97
  export function asoGapmersGcrsToBioSpring(nucleotides: string): string {
98
98
  const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
99
+ 'fU': '1', 'fA': '2', 'fC': '3', 'fG': '4', 'mU': '5', 'mA': '6', 'mC': '7', 'mG': '8',
99
100
  'moeT': '5', 'moeA': '6', 'moe5mC': '7', 'moeG': '8', 'moeU': '5', '5mC': '9', 'nps': '*', 'ps': '*', 'U': 'T',
100
101
  };
101
- return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|moeT|moeA|moe5mC|moeG|moeU|5mC|nps|ps|U)/g,
102
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|fU|fA|fC|fG|mU|mA|mC|mG|moeT|moeA|moe5mC|moeG|moeU|5mC|nps|ps|U)/g,
102
103
  function(x: string) {return obj[x];});
103
104
  }
104
105
 
@@ -78,6 +78,480 @@ M V30 END COLLECTION
78
78
  M V30 END CTAB
79
79
  M END`;
80
80
 
81
+ const GALNAC = `
82
+ Datagrok monomer library Nucleotides
83
+
84
+ 0 0 0 0 0 999 V3000
85
+ M V30 BEGIN CTAB
86
+ M V30 COUNTS 111 113 0 0 0
87
+ M V30 BEGIN ATOM
88
+ M V30 1 O -20.7313 -0.7027 0 0
89
+ M V30 2 C -19.3976 0.0673 0 0
90
+ M V30 3 C -18.0638 -0.7027 0 0
91
+ M V30 4 C -16.7303 0.0673 0 0
92
+ M V30 5 N -15.3965 -0.7027 0 0
93
+ M V30 6 C -14.0628 0.0673 0 0
94
+ M V30 7 C -12.7293 -0.7027 0 0
95
+ M V30 8 C -11.3955 0.0673 0 0
96
+ M V30 9 C -10.062 -0.7027 0 0
97
+ M V30 10 C -8.7283 0.0673 0 0
98
+ M V30 11 N -7.3947 -0.7027 0 0
99
+ M V30 12 O -18.0638 -2.2427 0 0
100
+ M V30 13 O -14.0628 1.6073 0 0
101
+ M V30 14 O -8.7283 1.6073 0 0
102
+ M V30 15 C -5.8547 -0.7027 0 0
103
+ M V30 16 C -5.8547 0.8373 0 0
104
+ M V30 17 C -5.8547 -2.2427 0 0
105
+ M V30 18 C -3.4848 -3.0127 0 0
106
+ M V30 19 C -2.4544 -4.157 0 0
107
+ M V30 20 C -0.948 -3.8368 0 0
108
+ M V30 21 N 0.0824 -4.9813 0 0
109
+ M V30 22 C 1.5888 -4.6612 0 0
110
+ M V30 23 C 2.6192 -5.8056 0 0
111
+ M V30 24 C 4.1256 -5.4855 0 0
112
+ M V30 25 N 5.156 -6.6297 0 0
113
+ M V30 26 C 6.6624 -6.3096 0 0
114
+ M V30 27 C 7.6928 -7.4541 0 0
115
+ M V30 28 C 9.1992 -7.1339 0 0
116
+ M V30 29 C 10.2296 -8.2784 0 0
117
+ M V30 30 C 11.736 -7.9583 0 0
118
+ M V30 31 O 12.7664 -9.1027 0 0
119
+ M V30 32 O -0.4722 -2.3723 0 0
120
+ M V30 33 O 7.1382 -4.845 0 0
121
+ M V30 34 C 14.2728 -8.7824 0 0
122
+ M V30 35 C 15.3032 -9.9267 0 0
123
+ M V30 36 C 16.8098 -9.6065 0 0
124
+ M V30 37 C 17.2856 -8.1421 0 0
125
+ M V30 38 C 16.2552 -6.9975 0 0
126
+ M V30 39 O 14.7486 -7.3178 0 0
127
+ M V30 40 C 16.7312 -5.5329 0 0
128
+ M V30 41 O 18.7918 -7.8218 0 0
129
+ M V30 42 O 17.8404 -10.751 0 0
130
+ M V30 43 N 14.8274 -11.3914 0 0
131
+ M V30 44 C 15.7325 -12.6372 0 0
132
+ M V30 45 C 15.2567 -14.1018 0 0
133
+ M V30 46 O 17.2537 -12.3963 0 0
134
+ M V30 47 O 18.2628 -5.372 0 0
135
+ M V30 48 O -4.9494 -3.4885 0 0
136
+ M V30 49 C -4.521 0.0673 0 0
137
+ M V30 50 C -1.9414 0.2026 0 0
138
+ M V30 51 C -0.6077 -0.5674 0 0
139
+ M V30 52 C 0.726 0.2026 0 0
140
+ M V30 53 N 2.0596 -0.5674 0 0
141
+ M V30 54 C 3.3933 0.2026 0 0
142
+ M V30 55 C 4.7271 -0.5674 0 0
143
+ M V30 56 C 6.0606 0.2026 0 0
144
+ M V30 57 N 7.3943 -0.5674 0 0
145
+ M V30 58 C 8.7281 0.2026 0 0
146
+ M V30 59 C 10.0618 -0.5674 0 0
147
+ M V30 60 C 11.3953 0.2026 0 0
148
+ M V30 61 C 14.0628 0.2026 0 0
149
+ M V30 62 O 15.3964 -0.5674 0 0
150
+ M V30 63 O 0.726 1.7426 0 0
151
+ M V30 64 O 8.7281 1.7426 0 0
152
+ M V30 65 C 16.7301 0.2026 0 0
153
+ M V30 66 C 18.0638 -0.5676 0 0
154
+ M V30 67 C 19.3976 0.2026 0 0
155
+ M V30 68 C 19.3974 1.7426 0 0
156
+ M V30 69 C 18.0638 2.5126 0 0
157
+ M V30 70 O 16.7301 1.7426 0 0
158
+ M V30 71 C 18.064 4.0526 0 0
159
+ M V30 72 O 20.7311 2.5126 0 0
160
+ M V30 73 O 20.7313 -0.5674 0 0
161
+ M V30 74 N 18.0638 -2.1076 0 0
162
+ M V30 75 C 19.3096 -3.0127 0 0
163
+ M V30 76 C 19.3096 -4.5527 0 0
164
+ M V30 77 O 20.6818 -2.3135 0 0
165
+ M V30 78 O 19.4709 4.6791 0 0
166
+ M V30 79 O -3.1872 -0.7027 0 0
167
+ M V30 80 C 12.7291 -0.5674 0 0
168
+ M V30 81 C -3.919 3.2277 0 0
169
+ M V30 82 C -2.4126 2.9076 0 0
170
+ M V30 83 C -1.3822 4.0519 0 0
171
+ M V30 84 N 0.1242 3.7317 0 0
172
+ M V30 85 C 1.1546 4.8762 0 0
173
+ M V30 86 C 2.661 4.5561 0 0
174
+ M V30 87 C 3.6914 5.7005 0 0
175
+ M V30 88 N 5.1978 5.3804 0 0
176
+ M V30 89 C 6.2282 6.5248 0 0
177
+ M V30 90 C 7.7346 6.2045 0 0
178
+ M V30 91 C 8.765 7.349 0 0
179
+ M V30 92 C 10.2714 7.0288 0 0
180
+ M V30 93 C 11.3018 8.1733 0 0
181
+ M V30 94 O 12.8082 7.8532 0 0
182
+ M V30 95 O -1.858 5.5167 0 0
183
+ M V30 96 O 5.7524 7.9894 0 0
184
+ M V30 97 C 13.8386 8.9976 0 0
185
+ M V30 98 C 15.345 8.6773 0 0
186
+ M V30 99 C 16.3756 9.8219 0 0
187
+ M V30 100 C 15.8996 11.2863 0 0
188
+ M V30 101 C 14.3934 11.6068 0 0
189
+ M V30 102 O 13.3628 10.4622 0 0
190
+ M V30 103 C 13.9176 13.0714 0 0
191
+ M V30 104 O 16.93 12.4308 0 0
192
+ M V30 105 O 17.882 9.5018 0 0
193
+ M V30 106 N 15.8208 7.2127 0 0
194
+ M V30 107 C 17.2856 6.7367 0 0
195
+ M V30 108 C 17.7614 5.2721 0 0
196
+ M V30 109 O 18.3744 7.8257 0 0
197
+ M V30 110 O 15.062 14.1018 0 0
198
+ M V30 111 O -4.8241 1.9817 0 0
199
+ M V30 END ATOM
200
+ M V30 BEGIN BOND
201
+ M V30 1 1 2 3
202
+ M V30 2 1 3 4
203
+ M V30 3 1 6 7
204
+ M V30 4 1 7 8
205
+ M V30 5 1 8 9
206
+ M V30 6 1 9 10
207
+ M V30 7 1 1 2
208
+ M V30 8 1 3 12
209
+ M V30 9 1 4 5
210
+ M V30 10 1 5 6
211
+ M V30 11 2 6 13
212
+ M V30 12 1 10 11
213
+ M V30 13 1 11 15
214
+ M V30 14 1 15 16
215
+ M V30 15 1 15 17
216
+ M V30 16 2 10 14
217
+ M V30 17 1 18 19
218
+ M V30 18 1 19 20
219
+ M V30 19 1 22 23
220
+ M V30 20 1 23 24
221
+ M V30 21 1 26 27
222
+ M V30 22 1 27 28
223
+ M V30 23 1 28 29
224
+ M V30 24 1 29 30
225
+ M V30 25 2 26 33
226
+ M V30 26 2 20 32
227
+ M V30 27 1 20 21
228
+ M V30 28 1 21 22
229
+ M V30 29 1 24 25
230
+ M V30 30 1 25 26
231
+ M V30 31 1 30 31
232
+ M V30 32 1 31 34
233
+ M V30 33 1 35 36
234
+ M V30 34 1 36 37
235
+ M V30 35 1 37 38
236
+ M V30 36 1 34 35
237
+ M V30 37 1 38 39
238
+ M V30 38 1 34 39
239
+ M V30 39 1 38 40
240
+ M V30 40 1 35 43
241
+ M V30 41 1 43 44
242
+ M V30 42 1 44 45
243
+ M V30 43 2 44 46
244
+ M V30 44 1 36 42
245
+ M V30 45 1 37 41
246
+ M V30 46 1 40 47
247
+ M V30 47 1 18 48
248
+ M V30 48 1 15 49
249
+ M V30 49 1 50 51
250
+ M V30 50 1 51 52
251
+ M V30 51 1 54 55
252
+ M V30 52 1 55 56
253
+ M V30 53 1 58 59
254
+ M V30 54 1 59 60
255
+ M V30 55 2 58 64
256
+ M V30 56 2 52 63
257
+ M V30 57 1 52 53
258
+ M V30 58 1 53 54
259
+ M V30 59 1 56 57
260
+ M V30 60 1 57 58
261
+ M V30 61 1 61 62
262
+ M V30 62 1 62 65
263
+ M V30 63 1 66 67
264
+ M V30 64 1 67 68
265
+ M V30 65 1 68 69
266
+ M V30 66 1 65 66
267
+ M V30 67 1 69 70
268
+ M V30 68 1 65 70
269
+ M V30 69 1 69 71
270
+ M V30 70 1 66 74
271
+ M V30 71 1 74 75
272
+ M V30 72 1 75 76
273
+ M V30 73 2 75 77
274
+ M V30 74 1 67 73
275
+ M V30 75 1 68 72
276
+ M V30 76 1 71 78
277
+ M V30 77 1 50 79
278
+ M V30 78 1 49 79
279
+ M V30 79 1 60 80
280
+ M V30 80 1 80 61
281
+ M V30 81 1 81 82
282
+ M V30 82 1 82 83
283
+ M V30 83 1 85 86
284
+ M V30 84 1 86 87
285
+ M V30 85 1 89 90
286
+ M V30 86 1 90 91
287
+ M V30 87 1 91 92
288
+ M V30 88 1 92 93
289
+ M V30 89 2 89 96
290
+ M V30 90 2 83 95
291
+ M V30 91 1 83 84
292
+ M V30 92 1 84 85
293
+ M V30 93 1 87 88
294
+ M V30 94 1 88 89
295
+ M V30 95 1 93 94
296
+ M V30 96 1 94 97
297
+ M V30 97 1 98 99
298
+ M V30 98 1 99 100
299
+ M V30 99 1 100 101
300
+ M V30 100 1 97 98
301
+ M V30 101 1 101 102
302
+ M V30 102 1 97 102
303
+ M V30 103 1 101 103
304
+ M V30 104 1 98 106
305
+ M V30 105 1 106 107
306
+ M V30 106 1 107 108
307
+ M V30 107 2 107 109
308
+ M V30 108 1 99 105
309
+ M V30 109 1 100 104
310
+ M V30 110 1 103 110
311
+ M V30 111 1 81 111
312
+ M V30 112 1 16 111
313
+ M V30 113 1 17 48
314
+ M V30 END BOND
315
+ M V30 END CTAB
316
+ M END`;
317
+
318
+ const GALNACPRIME = `
319
+ Datagrok monomer library Nucleotides
320
+
321
+ 0 0 0 0 0 999 V3000
322
+ M V30 BEGIN CTAB
323
+ M V30 COUNTS 111 113 0 0 0
324
+ M V30 BEGIN ATOM
325
+ M V30 1 O 20.7313 0.7027 0 0
326
+ M V30 2 C 19.3976 -0.0673 0 0
327
+ M V30 3 C 18.0638 0.7027 0 0
328
+ M V30 4 C 16.7303 -0.0673 0 0
329
+ M V30 5 N 15.3965 0.7027 0 0
330
+ M V30 6 C 14.0628 -0.0673 0 0
331
+ M V30 7 C 12.7293 0.7027 0 0
332
+ M V30 8 C 11.3955 -0.0673 0 0
333
+ M V30 9 C 10.062 0.7027 0 0
334
+ M V30 10 C 8.7283 -0.0673 0 0
335
+ M V30 11 N 7.3947 0.7027 0 0
336
+ M V30 12 O 18.0638 2.2427 0 0
337
+ M V30 13 O 14.0628 -1.6073 0 0
338
+ M V30 14 O 8.7283 -1.6073 0 0
339
+ M V30 15 C 5.8547 0.7027 0 0
340
+ M V30 16 C 5.8547 -0.8373 0 0
341
+ M V30 17 C 5.8547 2.2427 0 0
342
+ M V30 18 C 3.4848 3.0127 0 0
343
+ M V30 19 C 2.4544 4.157 0 0
344
+ M V30 20 C 0.948 3.8368 0 0
345
+ M V30 21 N -0.0824 4.9813 0 0
346
+ M V30 22 C -1.5888 4.6612 0 0
347
+ M V30 23 C -2.6192 5.8056 0 0
348
+ M V30 24 C -4.1256 5.4855 0 0
349
+ M V30 25 N -5.156 6.6297 0 0
350
+ M V30 26 C -6.6624 6.3096 0 0
351
+ M V30 27 C -7.6928 7.4541 0 0
352
+ M V30 28 C -9.1992 7.1339 0 0
353
+ M V30 29 C -10.2296 8.2784 0 0
354
+ M V30 30 C -11.736 7.9583 0 0
355
+ M V30 31 O -12.7664 9.1027 0 0
356
+ M V30 32 O 0.4722 2.3723 0 0
357
+ M V30 33 O -7.1382 4.845 0 0
358
+ M V30 34 C -14.2728 8.7824 0 0
359
+ M V30 35 C -15.3032 9.9267 0 0
360
+ M V30 36 C -16.8098 9.6065 0 0
361
+ M V30 37 C -17.2856 8.1421 0 0
362
+ M V30 38 C -16.2552 6.9975 0 0
363
+ M V30 39 O -14.7486 7.3178 0 0
364
+ M V30 40 C -16.7312 5.5329 0 0
365
+ M V30 41 O -18.7918 7.8218 0 0
366
+ M V30 42 O -17.8404 10.751 0 0
367
+ M V30 43 N -14.8274 11.3914 0 0
368
+ M V30 44 C -15.7325 12.6372 0 0
369
+ M V30 45 C -15.2567 14.1018 0 0
370
+ M V30 46 O -17.2537 12.3963 0 0
371
+ M V30 47 O -18.2628 5.372 0 0
372
+ M V30 48 O 4.9494 3.4885 0 0
373
+ M V30 49 C 4.521 -0.0673 0 0
374
+ M V30 50 C 1.9414 -0.2026 0 0
375
+ M V30 51 C 0.6077 0.5674 0 0
376
+ M V30 52 C -0.726 -0.2026 0 0
377
+ M V30 53 N -2.0596 0.5674 0 0
378
+ M V30 54 C -3.3933 -0.2026 0 0
379
+ M V30 55 C -4.7271 0.5674 0 0
380
+ M V30 56 C -6.0606 -0.2026 0 0
381
+ M V30 57 N -7.3943 0.5674 0 0
382
+ M V30 58 C -8.7281 -0.2026 0 0
383
+ M V30 59 C -10.0618 0.5674 0 0
384
+ M V30 60 C -11.3953 -0.2026 0 0
385
+ M V30 61 C -14.0628 -0.2026 0 0
386
+ M V30 62 O -15.3964 0.5674 0 0
387
+ M V30 63 O -0.726 -1.7426 0 0
388
+ M V30 64 O -8.7281 -1.7426 0 0
389
+ M V30 65 C -16.7301 -0.2026 0 0
390
+ M V30 66 C -18.0638 0.5676 0 0
391
+ M V30 67 C -19.3976 -0.2026 0 0
392
+ M V30 68 C -19.3974 -1.7426 0 0
393
+ M V30 69 C -18.0638 -2.5126 0 0
394
+ M V30 70 O -16.7301 -1.7426 0 0
395
+ M V30 71 C -18.064 -4.0526 0 0
396
+ M V30 72 O -20.7311 -2.5126 0 0
397
+ M V30 73 O -20.7313 0.5674 0 0
398
+ M V30 74 N -18.0638 2.1076 0 0
399
+ M V30 75 C -19.3096 3.0127 0 0
400
+ M V30 76 C -19.3096 4.5527 0 0
401
+ M V30 77 O -20.6818 2.3135 0 0
402
+ M V30 78 O -19.4709 -4.6791 0 0
403
+ M V30 79 O 3.1872 0.7027 0 0
404
+ M V30 80 C -12.7291 0.5674 0 0
405
+ M V30 81 C 3.919 -3.2277 0 0
406
+ M V30 82 C 2.4126 -2.9076 0 0
407
+ M V30 83 C 1.3822 -4.0519 0 0
408
+ M V30 84 N -0.1242 -3.7317 0 0
409
+ M V30 85 C -1.1546 -4.8762 0 0
410
+ M V30 86 C -2.661 -4.5561 0 0
411
+ M V30 87 C -3.6914 -5.7005 0 0
412
+ M V30 88 N -5.1978 -5.3804 0 0
413
+ M V30 89 C -6.2282 -6.5248 0 0
414
+ M V30 90 C -7.7346 -6.2045 0 0
415
+ M V30 91 C -8.765 -7.349 0 0
416
+ M V30 92 C -10.2714 -7.0288 0 0
417
+ M V30 93 C -11.3018 -8.1733 0 0
418
+ M V30 94 O -12.8082 -7.8532 0 0
419
+ M V30 95 O 1.858 -5.5167 0 0
420
+ M V30 96 O -5.7524 -7.9894 0 0
421
+ M V30 97 C -13.8386 -8.9976 0 0
422
+ M V30 98 C -15.345 -8.6773 0 0
423
+ M V30 99 C -16.3756 -9.8219 0 0
424
+ M V30 100 C -15.8996 -11.2863 0 0
425
+ M V30 101 C -14.3934 -11.6068 0 0
426
+ M V30 102 O -13.3628 -10.4622 0 0
427
+ M V30 103 C -13.9176 -13.0714 0 0
428
+ M V30 104 O -16.93 -12.4308 0 0
429
+ M V30 105 O -17.882 -9.5018 0 0
430
+ M V30 106 N -15.8208 -7.2127 0 0
431
+ M V30 107 C -17.2856 -6.7367 0 0
432
+ M V30 108 C -17.7614 -5.2721 0 0
433
+ M V30 109 O -18.3744 -7.8257 0 0
434
+ M V30 110 O -15.062 -14.1018 0 0
435
+ M V30 111 O 4.8241 -1.9817 0 0
436
+ M V30 END ATOM
437
+ M V30 BEGIN BOND
438
+ M V30 1 1 2 3
439
+ M V30 2 1 3 4
440
+ M V30 3 1 6 7
441
+ M V30 4 1 7 8
442
+ M V30 5 1 8 9
443
+ M V30 6 1 9 10
444
+ M V30 7 1 1 2
445
+ M V30 8 1 3 12
446
+ M V30 9 1 4 5
447
+ M V30 10 1 5 6
448
+ M V30 11 2 6 13
449
+ M V30 12 1 10 11
450
+ M V30 13 1 11 15
451
+ M V30 14 1 15 16
452
+ M V30 15 1 15 17
453
+ M V30 16 2 10 14
454
+ M V30 17 1 18 19
455
+ M V30 18 1 19 20
456
+ M V30 19 1 22 23
457
+ M V30 20 1 23 24
458
+ M V30 21 1 26 27
459
+ M V30 22 1 27 28
460
+ M V30 23 1 28 29
461
+ M V30 24 1 29 30
462
+ M V30 25 2 26 33
463
+ M V30 26 2 20 32
464
+ M V30 27 1 20 21
465
+ M V30 28 1 21 22
466
+ M V30 29 1 24 25
467
+ M V30 30 1 25 26
468
+ M V30 31 1 30 31
469
+ M V30 32 1 31 34
470
+ M V30 33 1 35 36
471
+ M V30 34 1 36 37
472
+ M V30 35 1 37 38
473
+ M V30 36 1 34 35
474
+ M V30 37 1 38 39
475
+ M V30 38 1 34 39
476
+ M V30 39 1 38 40
477
+ M V30 40 1 35 43
478
+ M V30 41 1 43 44
479
+ M V30 42 1 44 45
480
+ M V30 43 2 44 46
481
+ M V30 44 1 36 42
482
+ M V30 45 1 37 41
483
+ M V30 46 1 40 47
484
+ M V30 47 1 18 48
485
+ M V30 48 1 15 49
486
+ M V30 49 1 50 51
487
+ M V30 50 1 51 52
488
+ M V30 51 1 54 55
489
+ M V30 52 1 55 56
490
+ M V30 53 1 58 59
491
+ M V30 54 1 59 60
492
+ M V30 55 2 58 64
493
+ M V30 56 2 52 63
494
+ M V30 57 1 52 53
495
+ M V30 58 1 53 54
496
+ M V30 59 1 56 57
497
+ M V30 60 1 57 58
498
+ M V30 61 1 61 62
499
+ M V30 62 1 62 65
500
+ M V30 63 1 66 67
501
+ M V30 64 1 67 68
502
+ M V30 65 1 68 69
503
+ M V30 66 1 65 66
504
+ M V30 67 1 69 70
505
+ M V30 68 1 65 70
506
+ M V30 69 1 69 71
507
+ M V30 70 1 66 74
508
+ M V30 71 1 74 75
509
+ M V30 72 1 75 76
510
+ M V30 73 2 75 77
511
+ M V30 74 1 67 73
512
+ M V30 75 1 68 72
513
+ M V30 76 1 71 78
514
+ M V30 77 1 50 79
515
+ M V30 78 1 49 79
516
+ M V30 79 1 60 80
517
+ M V30 80 1 80 61
518
+ M V30 81 1 81 82
519
+ M V30 82 1 82 83
520
+ M V30 83 1 85 86
521
+ M V30 84 1 86 87
522
+ M V30 85 1 89 90
523
+ M V30 86 1 90 91
524
+ M V30 87 1 91 92
525
+ M V30 88 1 92 93
526
+ M V30 89 2 89 96
527
+ M V30 90 2 83 95
528
+ M V30 91 1 83 84
529
+ M V30 92 1 84 85
530
+ M V30 93 1 87 88
531
+ M V30 94 1 88 89
532
+ M V30 95 1 93 94
533
+ M V30 96 1 94 97
534
+ M V30 97 1 98 99
535
+ M V30 98 1 99 100
536
+ M V30 99 1 100 101
537
+ M V30 100 1 97 98
538
+ M V30 101 1 101 102
539
+ M V30 102 1 97 102
540
+ M V30 103 1 101 103
541
+ M V30 104 1 98 106
542
+ M V30 105 1 106 107
543
+ M V30 106 1 107 108
544
+ M V30 107 2 107 109
545
+ M V30 108 1 99 105
546
+ M V30 109 1 100 104
547
+ M V30 110 1 103 110
548
+ M V30 111 1 81 111
549
+ M V30 112 1 16 111
550
+ M V30 113 1 17 48
551
+ M V30 END BOND
552
+ M V30 END CTAB
553
+ M END`;
554
+
81
555
  export function getNucleotidesMol(smilesCodes: string[]) {
82
556
  const molBlocks: string[] = [];
83
557
 
@@ -85,6 +559,8 @@ export function getNucleotidesMol(smilesCodes: string[]) {
85
559
  smilesCodes[i] == 'OP(=O)(O)O' ? molBlocks.push(PHOSHATE) :
86
560
  smilesCodes[i] == 'OP(=O)(S)O' ? molBlocks.push(THIOPHOSHATE) :
87
561
  smilesCodes[i] == 'O[C@@H]1C[C@@H]O[C@H]1CO' ? molBlocks.push(rotateNucleotidesV3000(INVABASIC)) :
562
+ smilesCodes[i] == 'OCC(O)CNC(=O)CCCC(=O)NC(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)' ? molBlocks.push(GALNAC) :
563
+ smilesCodes[i] == 'C(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)NC(=O)CCCC(=O)NCC(O)CO' ? molBlocks.push(GALNACPRIME) :
88
564
  molBlocks.push(rotateNucleotidesV3000(smilesCodes[i]));
89
565
  }
90
566