@datagrok/sequence-translator 1.0.9 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +538 -40
- package/dist/package.js +572 -50
- package/package.json +2 -2
- package/src/autostart/registration.ts +52 -11
- package/src/axolabs/constants.ts +1 -1
- package/src/axolabs/define-pattern.ts +1 -0
- package/src/main/main-view.ts +1 -2
- package/src/package-test.ts +6 -3
- package/src/structures-works/converters.ts +2 -1
- package/src/structures-works/mol-transformations.ts +476 -0
- package/src/structures-works/sequence-codes-tools.ts +34 -30
- package/{test-SequenceTranslator-7770371320b2-1d307ddc.html → test-SequenceTranslator-916a90d7d48b-0c0e8404.html} +8 -8
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/sequence-translator",
|
|
3
3
|
"friendlyName": "Sequence Translator",
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.10",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Vadym Kovadlo",
|
|
7
7
|
"email": "vkovadlo@datagrok.ai"
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"directory": "packages/SequenceTranslator"
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
|
-
"@datagrok-libraries/utils": "^1.
|
|
16
|
+
"@datagrok-libraries/utils": "^1.11.1",
|
|
17
17
|
"@types/react": "^18.0.15",
|
|
18
18
|
"datagrok-api": "^1.6.0",
|
|
19
19
|
"datagrok-tools": "^4.1.2",
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import {siRnaAxolabsToGcrs, gcrsToNucleotides, asoGapmersBioSpringToGcrs, gcrsToMermade12,
|
|
5
|
-
} from '../structures-works/converters';
|
|
4
|
+
import {siRnaBioSpringToGcrs, siRnaAxolabsToGcrs, gcrsToNucleotides, asoGapmersBioSpringToGcrs, gcrsToMermade12,
|
|
5
|
+
siRnaNucleotidesToGcrs} from '../structures-works/converters';
|
|
6
6
|
import {map, COL_NAMES, MODIFICATIONS} from '../structures-works/map';
|
|
7
7
|
import {isValidSequence} from '../structures-works/sequence-codes-tools';
|
|
8
8
|
import {sequenceToMolV3000} from '../structures-works/from-monomers';
|
|
9
|
+
import {linkV3000} from '../structures-works/mol-transformations';
|
|
9
10
|
|
|
10
11
|
import {SALTS_CSV} from '../salts';
|
|
11
12
|
import {USERS_CSV} from '../users';
|
|
@@ -54,6 +55,10 @@ function molecularWeight(sequence: string, weightsObj: {[index: string]: number}
|
|
|
54
55
|
return weight - 61.97;
|
|
55
56
|
}
|
|
56
57
|
|
|
58
|
+
function parseStrandsFromDuplexCell(s: string): string[] {
|
|
59
|
+
return s.slice(3).split('\r\nAS ');
|
|
60
|
+
}
|
|
61
|
+
|
|
57
62
|
async function saveTableAsSdFile(table: DG.DataFrame) {
|
|
58
63
|
if (!table.columns.contains('Compound Name')) {
|
|
59
64
|
grok.shell.warning(
|
|
@@ -67,9 +72,23 @@ async function saveTableAsSdFile(table: DG.DataFrame) {
|
|
|
67
72
|
let result = '';
|
|
68
73
|
for (let i = 0; i < table.rowCount; i++) {
|
|
69
74
|
const format = 'Janssen GCRS Codes'; //getFormat(structureColumn.get(i))!;
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
sequenceToMolV3000(
|
|
75
|
+
if (typeColumn.get(i) == 'Duplex') {
|
|
76
|
+
const array = parseStrandsFromDuplexCell(structureColumn.get(i));
|
|
77
|
+
const as = sequenceToMolV3000(array[1], true, true, format) +
|
|
78
|
+
'\n' + `> <Sequence>\nAnti Sense\n\n`;
|
|
79
|
+
const ss = sequenceToMolV3000(array[0], false, true, format) +
|
|
80
|
+
'\n' + `> <Sequence>\nSense Strand\n\n`;
|
|
81
|
+
result += linkV3000([ss, as], true, true) + '\n\n';
|
|
82
|
+
} else if (typeColumn.get(i) == 'SS') {
|
|
83
|
+
const molSS = sequenceToMolV3000(structureColumn.get(i), false, true, format) +
|
|
84
|
+
'\n' + `> <Sequence>\nSense Strand\n\n`;
|
|
85
|
+
result += molSS;
|
|
86
|
+
} else if (typeColumn.get(i) == 'AS') {
|
|
87
|
+
const molAS = sequenceToMolV3000(structureColumn.get(i), true, true, format) +
|
|
88
|
+
'\n' + `> <Sequence>\nAnti Sense\n\n`;
|
|
89
|
+
result += molAS;
|
|
90
|
+
}
|
|
91
|
+
|
|
73
92
|
for (const col of table.columns) {
|
|
74
93
|
if (col.name != COL_NAMES.SEQUENCE)
|
|
75
94
|
result += `> <${col.name}>\n${col.get(i)}\n\n`;
|
|
@@ -90,6 +109,13 @@ export function autostartOligoSdFileSubscription() {
|
|
|
90
109
|
grok.events.onContextMenu.subscribe((args) => {
|
|
91
110
|
const seqCol = args.args.context.table.currentCol; // /^[fsACGUacgu]{6,}$/
|
|
92
111
|
if (DG.Detector.sampleCategories(seqCol,
|
|
112
|
+
(s) => /(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C){6,}$/.test(s))) {
|
|
113
|
+
args.args.menu.item('Convert raw nucleotides to GCRS', () => {
|
|
114
|
+
args.args.context.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
|
|
115
|
+
return siRnaNucleotidesToGcrs(seqCol.get(i));
|
|
116
|
+
});
|
|
117
|
+
});
|
|
118
|
+
} else if (DG.Detector.sampleCategories(seqCol,
|
|
93
119
|
(s) => /(\(invabasic\)|\(GalNAc-2-JNJ\)|f|s|A|C|G|U|a|c|g|u){6,}$/.test(s))) {
|
|
94
120
|
args.args.menu.item('Convert Axolabs to GCRS', () => {
|
|
95
121
|
args.args.context.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
|
|
@@ -121,7 +147,7 @@ export function autostartOligoSdFileSubscription() {
|
|
|
121
147
|
(s) => /(\(invabasic\)|\(GalNAc-2-JNJ\)|\*|1|2|3|4|5|6|7|8){6,}$/.test(s))) {
|
|
122
148
|
args.args.menu.item('Convert Biospring to GCRS', () => {
|
|
123
149
|
args.args.context.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
|
|
124
|
-
return
|
|
150
|
+
return siRnaBioSpringToGcrs(seqCol.get(i));
|
|
125
151
|
});
|
|
126
152
|
});
|
|
127
153
|
}
|
|
@@ -141,6 +167,7 @@ export function oligoSdFile(table: DG.DataFrame) {
|
|
|
141
167
|
const saltCol = table.getCol(COL_NAMES.SALT);
|
|
142
168
|
const equivalentsCol = table.getCol(COL_NAMES.EQUIVALENTS);
|
|
143
169
|
const typeColumn = table.getCol(COL_NAMES.TYPE);
|
|
170
|
+
const chemistryNameCol = table.getCol(COL_NAMES.CHEMISTRY_NAME);
|
|
144
171
|
|
|
145
172
|
const molWeightCol = saltsDf.getCol('MOLWEIGHT');
|
|
146
173
|
const saltNamesList = saltsDf.getCol('DISPLAY').toList();
|
|
@@ -154,12 +181,17 @@ export function oligoSdFile(table: DG.DataFrame) {
|
|
|
154
181
|
t.rows.removeAt(i, 1, false);
|
|
155
182
|
}
|
|
156
183
|
|
|
157
|
-
t.columns.addNewString(COL_NAMES.COMPOUND_NAME).init((i: number) =>
|
|
184
|
+
t.columns.addNewString(COL_NAMES.COMPOUND_NAME).init((i: number) => {
|
|
185
|
+
return (typeColumn.get(i) == 'Duplex') ? chemistryNameCol.get(i) : sequenceCol.get(i);
|
|
186
|
+
});
|
|
158
187
|
|
|
159
|
-
t.columns.addNewString(COL_NAMES.COMPOUND_COMMENTS).init((i: number) =>
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
188
|
+
t.columns.addNewString(COL_NAMES.COMPOUND_COMMENTS).init((i: number) => {
|
|
189
|
+
if (typeColumn.get(i) == 'Duplex') {
|
|
190
|
+
const arr = parseStrandsFromDuplexCell(sequenceCol.get(i));
|
|
191
|
+
return chemistryNameCol.get(i) + '; duplex of SS: ' + arr[0] + ' and AS: ' + arr[1];
|
|
192
|
+
}
|
|
193
|
+
return sequenceCol.get(i);
|
|
194
|
+
});
|
|
163
195
|
|
|
164
196
|
const weightsObj: {[code: string]: number} = {};
|
|
165
197
|
for (const synthesizer of Object.keys(map)) {
|
|
@@ -172,6 +204,15 @@ export function oligoSdFile(table: DG.DataFrame) {
|
|
|
172
204
|
weightsObj[key] = value.molecularWeight;
|
|
173
205
|
|
|
174
206
|
t.columns.addNewFloat(COL_NAMES.CPD_MW).init((i: number) => {
|
|
207
|
+
if (typeColumn.get(i) == 'Duplex') {
|
|
208
|
+
const arr = parseStrandsFromDuplexCell(sequenceCol.get(i));
|
|
209
|
+
return (
|
|
210
|
+
isValidSequence(arr[0], null).indexOfFirstNotValidChar == -1 &&
|
|
211
|
+
isValidSequence(arr[1], null).indexOfFirstNotValidChar == -1
|
|
212
|
+
) ?
|
|
213
|
+
molecularWeight(arr[0], weightsObj) + molecularWeight(arr[1], weightsObj) :
|
|
214
|
+
DG.FLOAT_NULL;
|
|
215
|
+
}
|
|
175
216
|
return (isValidSequence(sequenceCol.get(i), null).indexOfFirstNotValidChar == -1) ?
|
|
176
217
|
molecularWeight(sequenceCol.get(i), weightsObj) :
|
|
177
218
|
DG.FLOAT_NULL;
|
package/src/axolabs/constants.ts
CHANGED
|
@@ -703,6 +703,7 @@ export function defineAxolabsPattern() {
|
|
|
703
703
|
grok.shell.v = grok.shell.getTableView(tables.value!.name);
|
|
704
704
|
grok.shell.info(((createAsStrand.value) ? 'Columns were' : 'Column was') +
|
|
705
705
|
' added to table \'' + tables.value!.name + '\'');
|
|
706
|
+
updateOutputExamples();
|
|
706
707
|
}
|
|
707
708
|
});
|
|
708
709
|
|
package/src/main/main-view.ts
CHANGED
|
@@ -19,8 +19,7 @@ export function mainView() {
|
|
|
19
19
|
try {
|
|
20
20
|
sequence = sequence.replace(/\s/g, '');
|
|
21
21
|
const output = isValidSequence(sequence, null);
|
|
22
|
-
|
|
23
|
-
// inputFormatChoiceInput.value = output.synthesizer![0];
|
|
22
|
+
inputFormatChoiceInput.value = output.synthesizer![0];
|
|
24
23
|
const outputSequenceObj = convertSequence(sequence, output);
|
|
25
24
|
const tableRows = [];
|
|
26
25
|
|
package/src/package-test.ts
CHANGED
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import {runTests, tests} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
import {runTests, tests, TestContext} from '@datagrok-libraries/utils/src/test';
|
|
3
3
|
import './tests/smiles-tests';
|
|
4
4
|
|
|
5
5
|
export const _package = new DG.Package();
|
|
6
6
|
export {tests};
|
|
7
7
|
|
|
8
8
|
//name: test
|
|
9
|
+
//input: string category {optional: true}
|
|
10
|
+
//input: string test {optional: true}
|
|
11
|
+
//input: object testContext {optional: true}
|
|
9
12
|
//output: dataframe result
|
|
10
|
-
export async function test(): Promise<DG.DataFrame> {
|
|
11
|
-
const data = await runTests();
|
|
13
|
+
export async function test(category: string, test: string, testContext: TestContext): Promise<DG.DataFrame> {
|
|
14
|
+
const data = await runTests({category, test, testContext});
|
|
12
15
|
return DG.DataFrame.fromObjects(data)!;
|
|
13
16
|
}
|
|
@@ -96,9 +96,10 @@ export function asoGapmersBioSpringToGcrs(nucleotides: string): string {
|
|
|
96
96
|
//output: string result {semType: BioSpring / Gapmers}
|
|
97
97
|
export function asoGapmersGcrsToBioSpring(nucleotides: string): string {
|
|
98
98
|
const obj: {[index: string]: string} = {'(invabasic)': '(invabasic)', '(GalNAc-2-JNJ)': '(GalNAc-2-JNJ)',
|
|
99
|
+
'fU': '1', 'fA': '2', 'fC': '3', 'fG': '4', 'mU': '5', 'mA': '6', 'mC': '7', 'mG': '8',
|
|
99
100
|
'moeT': '5', 'moeA': '6', 'moe5mC': '7', 'moeG': '8', 'moeU': '5', '5mC': '9', 'nps': '*', 'ps': '*', 'U': 'T',
|
|
100
101
|
};
|
|
101
|
-
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|moeT|moeA|moe5mC|moeG|moeU|5mC|nps|ps|U)/g,
|
|
102
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|fU|fA|fC|fG|mU|mA|mC|mG|moeT|moeA|moe5mC|moeG|moeU|5mC|nps|ps|U)/g,
|
|
102
103
|
function(x: string) {return obj[x];});
|
|
103
104
|
}
|
|
104
105
|
|
|
@@ -78,6 +78,480 @@ M V30 END COLLECTION
|
|
|
78
78
|
M V30 END CTAB
|
|
79
79
|
M END`;
|
|
80
80
|
|
|
81
|
+
const GALNAC = `
|
|
82
|
+
Datagrok monomer library Nucleotides
|
|
83
|
+
|
|
84
|
+
0 0 0 0 0 999 V3000
|
|
85
|
+
M V30 BEGIN CTAB
|
|
86
|
+
M V30 COUNTS 111 113 0 0 0
|
|
87
|
+
M V30 BEGIN ATOM
|
|
88
|
+
M V30 1 O -20.7313 -0.7027 0 0
|
|
89
|
+
M V30 2 C -19.3976 0.0673 0 0
|
|
90
|
+
M V30 3 C -18.0638 -0.7027 0 0
|
|
91
|
+
M V30 4 C -16.7303 0.0673 0 0
|
|
92
|
+
M V30 5 N -15.3965 -0.7027 0 0
|
|
93
|
+
M V30 6 C -14.0628 0.0673 0 0
|
|
94
|
+
M V30 7 C -12.7293 -0.7027 0 0
|
|
95
|
+
M V30 8 C -11.3955 0.0673 0 0
|
|
96
|
+
M V30 9 C -10.062 -0.7027 0 0
|
|
97
|
+
M V30 10 C -8.7283 0.0673 0 0
|
|
98
|
+
M V30 11 N -7.3947 -0.7027 0 0
|
|
99
|
+
M V30 12 O -18.0638 -2.2427 0 0
|
|
100
|
+
M V30 13 O -14.0628 1.6073 0 0
|
|
101
|
+
M V30 14 O -8.7283 1.6073 0 0
|
|
102
|
+
M V30 15 C -5.8547 -0.7027 0 0
|
|
103
|
+
M V30 16 C -5.8547 0.8373 0 0
|
|
104
|
+
M V30 17 C -5.8547 -2.2427 0 0
|
|
105
|
+
M V30 18 C -3.4848 -3.0127 0 0
|
|
106
|
+
M V30 19 C -2.4544 -4.157 0 0
|
|
107
|
+
M V30 20 C -0.948 -3.8368 0 0
|
|
108
|
+
M V30 21 N 0.0824 -4.9813 0 0
|
|
109
|
+
M V30 22 C 1.5888 -4.6612 0 0
|
|
110
|
+
M V30 23 C 2.6192 -5.8056 0 0
|
|
111
|
+
M V30 24 C 4.1256 -5.4855 0 0
|
|
112
|
+
M V30 25 N 5.156 -6.6297 0 0
|
|
113
|
+
M V30 26 C 6.6624 -6.3096 0 0
|
|
114
|
+
M V30 27 C 7.6928 -7.4541 0 0
|
|
115
|
+
M V30 28 C 9.1992 -7.1339 0 0
|
|
116
|
+
M V30 29 C 10.2296 -8.2784 0 0
|
|
117
|
+
M V30 30 C 11.736 -7.9583 0 0
|
|
118
|
+
M V30 31 O 12.7664 -9.1027 0 0
|
|
119
|
+
M V30 32 O -0.4722 -2.3723 0 0
|
|
120
|
+
M V30 33 O 7.1382 -4.845 0 0
|
|
121
|
+
M V30 34 C 14.2728 -8.7824 0 0
|
|
122
|
+
M V30 35 C 15.3032 -9.9267 0 0
|
|
123
|
+
M V30 36 C 16.8098 -9.6065 0 0
|
|
124
|
+
M V30 37 C 17.2856 -8.1421 0 0
|
|
125
|
+
M V30 38 C 16.2552 -6.9975 0 0
|
|
126
|
+
M V30 39 O 14.7486 -7.3178 0 0
|
|
127
|
+
M V30 40 C 16.7312 -5.5329 0 0
|
|
128
|
+
M V30 41 O 18.7918 -7.8218 0 0
|
|
129
|
+
M V30 42 O 17.8404 -10.751 0 0
|
|
130
|
+
M V30 43 N 14.8274 -11.3914 0 0
|
|
131
|
+
M V30 44 C 15.7325 -12.6372 0 0
|
|
132
|
+
M V30 45 C 15.2567 -14.1018 0 0
|
|
133
|
+
M V30 46 O 17.2537 -12.3963 0 0
|
|
134
|
+
M V30 47 O 18.2628 -5.372 0 0
|
|
135
|
+
M V30 48 O -4.9494 -3.4885 0 0
|
|
136
|
+
M V30 49 C -4.521 0.0673 0 0
|
|
137
|
+
M V30 50 C -1.9414 0.2026 0 0
|
|
138
|
+
M V30 51 C -0.6077 -0.5674 0 0
|
|
139
|
+
M V30 52 C 0.726 0.2026 0 0
|
|
140
|
+
M V30 53 N 2.0596 -0.5674 0 0
|
|
141
|
+
M V30 54 C 3.3933 0.2026 0 0
|
|
142
|
+
M V30 55 C 4.7271 -0.5674 0 0
|
|
143
|
+
M V30 56 C 6.0606 0.2026 0 0
|
|
144
|
+
M V30 57 N 7.3943 -0.5674 0 0
|
|
145
|
+
M V30 58 C 8.7281 0.2026 0 0
|
|
146
|
+
M V30 59 C 10.0618 -0.5674 0 0
|
|
147
|
+
M V30 60 C 11.3953 0.2026 0 0
|
|
148
|
+
M V30 61 C 14.0628 0.2026 0 0
|
|
149
|
+
M V30 62 O 15.3964 -0.5674 0 0
|
|
150
|
+
M V30 63 O 0.726 1.7426 0 0
|
|
151
|
+
M V30 64 O 8.7281 1.7426 0 0
|
|
152
|
+
M V30 65 C 16.7301 0.2026 0 0
|
|
153
|
+
M V30 66 C 18.0638 -0.5676 0 0
|
|
154
|
+
M V30 67 C 19.3976 0.2026 0 0
|
|
155
|
+
M V30 68 C 19.3974 1.7426 0 0
|
|
156
|
+
M V30 69 C 18.0638 2.5126 0 0
|
|
157
|
+
M V30 70 O 16.7301 1.7426 0 0
|
|
158
|
+
M V30 71 C 18.064 4.0526 0 0
|
|
159
|
+
M V30 72 O 20.7311 2.5126 0 0
|
|
160
|
+
M V30 73 O 20.7313 -0.5674 0 0
|
|
161
|
+
M V30 74 N 18.0638 -2.1076 0 0
|
|
162
|
+
M V30 75 C 19.3096 -3.0127 0 0
|
|
163
|
+
M V30 76 C 19.3096 -4.5527 0 0
|
|
164
|
+
M V30 77 O 20.6818 -2.3135 0 0
|
|
165
|
+
M V30 78 O 19.4709 4.6791 0 0
|
|
166
|
+
M V30 79 O -3.1872 -0.7027 0 0
|
|
167
|
+
M V30 80 C 12.7291 -0.5674 0 0
|
|
168
|
+
M V30 81 C -3.919 3.2277 0 0
|
|
169
|
+
M V30 82 C -2.4126 2.9076 0 0
|
|
170
|
+
M V30 83 C -1.3822 4.0519 0 0
|
|
171
|
+
M V30 84 N 0.1242 3.7317 0 0
|
|
172
|
+
M V30 85 C 1.1546 4.8762 0 0
|
|
173
|
+
M V30 86 C 2.661 4.5561 0 0
|
|
174
|
+
M V30 87 C 3.6914 5.7005 0 0
|
|
175
|
+
M V30 88 N 5.1978 5.3804 0 0
|
|
176
|
+
M V30 89 C 6.2282 6.5248 0 0
|
|
177
|
+
M V30 90 C 7.7346 6.2045 0 0
|
|
178
|
+
M V30 91 C 8.765 7.349 0 0
|
|
179
|
+
M V30 92 C 10.2714 7.0288 0 0
|
|
180
|
+
M V30 93 C 11.3018 8.1733 0 0
|
|
181
|
+
M V30 94 O 12.8082 7.8532 0 0
|
|
182
|
+
M V30 95 O -1.858 5.5167 0 0
|
|
183
|
+
M V30 96 O 5.7524 7.9894 0 0
|
|
184
|
+
M V30 97 C 13.8386 8.9976 0 0
|
|
185
|
+
M V30 98 C 15.345 8.6773 0 0
|
|
186
|
+
M V30 99 C 16.3756 9.8219 0 0
|
|
187
|
+
M V30 100 C 15.8996 11.2863 0 0
|
|
188
|
+
M V30 101 C 14.3934 11.6068 0 0
|
|
189
|
+
M V30 102 O 13.3628 10.4622 0 0
|
|
190
|
+
M V30 103 C 13.9176 13.0714 0 0
|
|
191
|
+
M V30 104 O 16.93 12.4308 0 0
|
|
192
|
+
M V30 105 O 17.882 9.5018 0 0
|
|
193
|
+
M V30 106 N 15.8208 7.2127 0 0
|
|
194
|
+
M V30 107 C 17.2856 6.7367 0 0
|
|
195
|
+
M V30 108 C 17.7614 5.2721 0 0
|
|
196
|
+
M V30 109 O 18.3744 7.8257 0 0
|
|
197
|
+
M V30 110 O 15.062 14.1018 0 0
|
|
198
|
+
M V30 111 O -4.8241 1.9817 0 0
|
|
199
|
+
M V30 END ATOM
|
|
200
|
+
M V30 BEGIN BOND
|
|
201
|
+
M V30 1 1 2 3
|
|
202
|
+
M V30 2 1 3 4
|
|
203
|
+
M V30 3 1 6 7
|
|
204
|
+
M V30 4 1 7 8
|
|
205
|
+
M V30 5 1 8 9
|
|
206
|
+
M V30 6 1 9 10
|
|
207
|
+
M V30 7 1 1 2
|
|
208
|
+
M V30 8 1 3 12
|
|
209
|
+
M V30 9 1 4 5
|
|
210
|
+
M V30 10 1 5 6
|
|
211
|
+
M V30 11 2 6 13
|
|
212
|
+
M V30 12 1 10 11
|
|
213
|
+
M V30 13 1 11 15
|
|
214
|
+
M V30 14 1 15 16
|
|
215
|
+
M V30 15 1 15 17
|
|
216
|
+
M V30 16 2 10 14
|
|
217
|
+
M V30 17 1 18 19
|
|
218
|
+
M V30 18 1 19 20
|
|
219
|
+
M V30 19 1 22 23
|
|
220
|
+
M V30 20 1 23 24
|
|
221
|
+
M V30 21 1 26 27
|
|
222
|
+
M V30 22 1 27 28
|
|
223
|
+
M V30 23 1 28 29
|
|
224
|
+
M V30 24 1 29 30
|
|
225
|
+
M V30 25 2 26 33
|
|
226
|
+
M V30 26 2 20 32
|
|
227
|
+
M V30 27 1 20 21
|
|
228
|
+
M V30 28 1 21 22
|
|
229
|
+
M V30 29 1 24 25
|
|
230
|
+
M V30 30 1 25 26
|
|
231
|
+
M V30 31 1 30 31
|
|
232
|
+
M V30 32 1 31 34
|
|
233
|
+
M V30 33 1 35 36
|
|
234
|
+
M V30 34 1 36 37
|
|
235
|
+
M V30 35 1 37 38
|
|
236
|
+
M V30 36 1 34 35
|
|
237
|
+
M V30 37 1 38 39
|
|
238
|
+
M V30 38 1 34 39
|
|
239
|
+
M V30 39 1 38 40
|
|
240
|
+
M V30 40 1 35 43
|
|
241
|
+
M V30 41 1 43 44
|
|
242
|
+
M V30 42 1 44 45
|
|
243
|
+
M V30 43 2 44 46
|
|
244
|
+
M V30 44 1 36 42
|
|
245
|
+
M V30 45 1 37 41
|
|
246
|
+
M V30 46 1 40 47
|
|
247
|
+
M V30 47 1 18 48
|
|
248
|
+
M V30 48 1 15 49
|
|
249
|
+
M V30 49 1 50 51
|
|
250
|
+
M V30 50 1 51 52
|
|
251
|
+
M V30 51 1 54 55
|
|
252
|
+
M V30 52 1 55 56
|
|
253
|
+
M V30 53 1 58 59
|
|
254
|
+
M V30 54 1 59 60
|
|
255
|
+
M V30 55 2 58 64
|
|
256
|
+
M V30 56 2 52 63
|
|
257
|
+
M V30 57 1 52 53
|
|
258
|
+
M V30 58 1 53 54
|
|
259
|
+
M V30 59 1 56 57
|
|
260
|
+
M V30 60 1 57 58
|
|
261
|
+
M V30 61 1 61 62
|
|
262
|
+
M V30 62 1 62 65
|
|
263
|
+
M V30 63 1 66 67
|
|
264
|
+
M V30 64 1 67 68
|
|
265
|
+
M V30 65 1 68 69
|
|
266
|
+
M V30 66 1 65 66
|
|
267
|
+
M V30 67 1 69 70
|
|
268
|
+
M V30 68 1 65 70
|
|
269
|
+
M V30 69 1 69 71
|
|
270
|
+
M V30 70 1 66 74
|
|
271
|
+
M V30 71 1 74 75
|
|
272
|
+
M V30 72 1 75 76
|
|
273
|
+
M V30 73 2 75 77
|
|
274
|
+
M V30 74 1 67 73
|
|
275
|
+
M V30 75 1 68 72
|
|
276
|
+
M V30 76 1 71 78
|
|
277
|
+
M V30 77 1 50 79
|
|
278
|
+
M V30 78 1 49 79
|
|
279
|
+
M V30 79 1 60 80
|
|
280
|
+
M V30 80 1 80 61
|
|
281
|
+
M V30 81 1 81 82
|
|
282
|
+
M V30 82 1 82 83
|
|
283
|
+
M V30 83 1 85 86
|
|
284
|
+
M V30 84 1 86 87
|
|
285
|
+
M V30 85 1 89 90
|
|
286
|
+
M V30 86 1 90 91
|
|
287
|
+
M V30 87 1 91 92
|
|
288
|
+
M V30 88 1 92 93
|
|
289
|
+
M V30 89 2 89 96
|
|
290
|
+
M V30 90 2 83 95
|
|
291
|
+
M V30 91 1 83 84
|
|
292
|
+
M V30 92 1 84 85
|
|
293
|
+
M V30 93 1 87 88
|
|
294
|
+
M V30 94 1 88 89
|
|
295
|
+
M V30 95 1 93 94
|
|
296
|
+
M V30 96 1 94 97
|
|
297
|
+
M V30 97 1 98 99
|
|
298
|
+
M V30 98 1 99 100
|
|
299
|
+
M V30 99 1 100 101
|
|
300
|
+
M V30 100 1 97 98
|
|
301
|
+
M V30 101 1 101 102
|
|
302
|
+
M V30 102 1 97 102
|
|
303
|
+
M V30 103 1 101 103
|
|
304
|
+
M V30 104 1 98 106
|
|
305
|
+
M V30 105 1 106 107
|
|
306
|
+
M V30 106 1 107 108
|
|
307
|
+
M V30 107 2 107 109
|
|
308
|
+
M V30 108 1 99 105
|
|
309
|
+
M V30 109 1 100 104
|
|
310
|
+
M V30 110 1 103 110
|
|
311
|
+
M V30 111 1 81 111
|
|
312
|
+
M V30 112 1 16 111
|
|
313
|
+
M V30 113 1 17 48
|
|
314
|
+
M V30 END BOND
|
|
315
|
+
M V30 END CTAB
|
|
316
|
+
M END`;
|
|
317
|
+
|
|
318
|
+
const GALNACPRIME = `
|
|
319
|
+
Datagrok monomer library Nucleotides
|
|
320
|
+
|
|
321
|
+
0 0 0 0 0 999 V3000
|
|
322
|
+
M V30 BEGIN CTAB
|
|
323
|
+
M V30 COUNTS 111 113 0 0 0
|
|
324
|
+
M V30 BEGIN ATOM
|
|
325
|
+
M V30 1 O 20.7313 0.7027 0 0
|
|
326
|
+
M V30 2 C 19.3976 -0.0673 0 0
|
|
327
|
+
M V30 3 C 18.0638 0.7027 0 0
|
|
328
|
+
M V30 4 C 16.7303 -0.0673 0 0
|
|
329
|
+
M V30 5 N 15.3965 0.7027 0 0
|
|
330
|
+
M V30 6 C 14.0628 -0.0673 0 0
|
|
331
|
+
M V30 7 C 12.7293 0.7027 0 0
|
|
332
|
+
M V30 8 C 11.3955 -0.0673 0 0
|
|
333
|
+
M V30 9 C 10.062 0.7027 0 0
|
|
334
|
+
M V30 10 C 8.7283 -0.0673 0 0
|
|
335
|
+
M V30 11 N 7.3947 0.7027 0 0
|
|
336
|
+
M V30 12 O 18.0638 2.2427 0 0
|
|
337
|
+
M V30 13 O 14.0628 -1.6073 0 0
|
|
338
|
+
M V30 14 O 8.7283 -1.6073 0 0
|
|
339
|
+
M V30 15 C 5.8547 0.7027 0 0
|
|
340
|
+
M V30 16 C 5.8547 -0.8373 0 0
|
|
341
|
+
M V30 17 C 5.8547 2.2427 0 0
|
|
342
|
+
M V30 18 C 3.4848 3.0127 0 0
|
|
343
|
+
M V30 19 C 2.4544 4.157 0 0
|
|
344
|
+
M V30 20 C 0.948 3.8368 0 0
|
|
345
|
+
M V30 21 N -0.0824 4.9813 0 0
|
|
346
|
+
M V30 22 C -1.5888 4.6612 0 0
|
|
347
|
+
M V30 23 C -2.6192 5.8056 0 0
|
|
348
|
+
M V30 24 C -4.1256 5.4855 0 0
|
|
349
|
+
M V30 25 N -5.156 6.6297 0 0
|
|
350
|
+
M V30 26 C -6.6624 6.3096 0 0
|
|
351
|
+
M V30 27 C -7.6928 7.4541 0 0
|
|
352
|
+
M V30 28 C -9.1992 7.1339 0 0
|
|
353
|
+
M V30 29 C -10.2296 8.2784 0 0
|
|
354
|
+
M V30 30 C -11.736 7.9583 0 0
|
|
355
|
+
M V30 31 O -12.7664 9.1027 0 0
|
|
356
|
+
M V30 32 O 0.4722 2.3723 0 0
|
|
357
|
+
M V30 33 O -7.1382 4.845 0 0
|
|
358
|
+
M V30 34 C -14.2728 8.7824 0 0
|
|
359
|
+
M V30 35 C -15.3032 9.9267 0 0
|
|
360
|
+
M V30 36 C -16.8098 9.6065 0 0
|
|
361
|
+
M V30 37 C -17.2856 8.1421 0 0
|
|
362
|
+
M V30 38 C -16.2552 6.9975 0 0
|
|
363
|
+
M V30 39 O -14.7486 7.3178 0 0
|
|
364
|
+
M V30 40 C -16.7312 5.5329 0 0
|
|
365
|
+
M V30 41 O -18.7918 7.8218 0 0
|
|
366
|
+
M V30 42 O -17.8404 10.751 0 0
|
|
367
|
+
M V30 43 N -14.8274 11.3914 0 0
|
|
368
|
+
M V30 44 C -15.7325 12.6372 0 0
|
|
369
|
+
M V30 45 C -15.2567 14.1018 0 0
|
|
370
|
+
M V30 46 O -17.2537 12.3963 0 0
|
|
371
|
+
M V30 47 O -18.2628 5.372 0 0
|
|
372
|
+
M V30 48 O 4.9494 3.4885 0 0
|
|
373
|
+
M V30 49 C 4.521 -0.0673 0 0
|
|
374
|
+
M V30 50 C 1.9414 -0.2026 0 0
|
|
375
|
+
M V30 51 C 0.6077 0.5674 0 0
|
|
376
|
+
M V30 52 C -0.726 -0.2026 0 0
|
|
377
|
+
M V30 53 N -2.0596 0.5674 0 0
|
|
378
|
+
M V30 54 C -3.3933 -0.2026 0 0
|
|
379
|
+
M V30 55 C -4.7271 0.5674 0 0
|
|
380
|
+
M V30 56 C -6.0606 -0.2026 0 0
|
|
381
|
+
M V30 57 N -7.3943 0.5674 0 0
|
|
382
|
+
M V30 58 C -8.7281 -0.2026 0 0
|
|
383
|
+
M V30 59 C -10.0618 0.5674 0 0
|
|
384
|
+
M V30 60 C -11.3953 -0.2026 0 0
|
|
385
|
+
M V30 61 C -14.0628 -0.2026 0 0
|
|
386
|
+
M V30 62 O -15.3964 0.5674 0 0
|
|
387
|
+
M V30 63 O -0.726 -1.7426 0 0
|
|
388
|
+
M V30 64 O -8.7281 -1.7426 0 0
|
|
389
|
+
M V30 65 C -16.7301 -0.2026 0 0
|
|
390
|
+
M V30 66 C -18.0638 0.5676 0 0
|
|
391
|
+
M V30 67 C -19.3976 -0.2026 0 0
|
|
392
|
+
M V30 68 C -19.3974 -1.7426 0 0
|
|
393
|
+
M V30 69 C -18.0638 -2.5126 0 0
|
|
394
|
+
M V30 70 O -16.7301 -1.7426 0 0
|
|
395
|
+
M V30 71 C -18.064 -4.0526 0 0
|
|
396
|
+
M V30 72 O -20.7311 -2.5126 0 0
|
|
397
|
+
M V30 73 O -20.7313 0.5674 0 0
|
|
398
|
+
M V30 74 N -18.0638 2.1076 0 0
|
|
399
|
+
M V30 75 C -19.3096 3.0127 0 0
|
|
400
|
+
M V30 76 C -19.3096 4.5527 0 0
|
|
401
|
+
M V30 77 O -20.6818 2.3135 0 0
|
|
402
|
+
M V30 78 O -19.4709 -4.6791 0 0
|
|
403
|
+
M V30 79 O 3.1872 0.7027 0 0
|
|
404
|
+
M V30 80 C -12.7291 0.5674 0 0
|
|
405
|
+
M V30 81 C 3.919 -3.2277 0 0
|
|
406
|
+
M V30 82 C 2.4126 -2.9076 0 0
|
|
407
|
+
M V30 83 C 1.3822 -4.0519 0 0
|
|
408
|
+
M V30 84 N -0.1242 -3.7317 0 0
|
|
409
|
+
M V30 85 C -1.1546 -4.8762 0 0
|
|
410
|
+
M V30 86 C -2.661 -4.5561 0 0
|
|
411
|
+
M V30 87 C -3.6914 -5.7005 0 0
|
|
412
|
+
M V30 88 N -5.1978 -5.3804 0 0
|
|
413
|
+
M V30 89 C -6.2282 -6.5248 0 0
|
|
414
|
+
M V30 90 C -7.7346 -6.2045 0 0
|
|
415
|
+
M V30 91 C -8.765 -7.349 0 0
|
|
416
|
+
M V30 92 C -10.2714 -7.0288 0 0
|
|
417
|
+
M V30 93 C -11.3018 -8.1733 0 0
|
|
418
|
+
M V30 94 O -12.8082 -7.8532 0 0
|
|
419
|
+
M V30 95 O 1.858 -5.5167 0 0
|
|
420
|
+
M V30 96 O -5.7524 -7.9894 0 0
|
|
421
|
+
M V30 97 C -13.8386 -8.9976 0 0
|
|
422
|
+
M V30 98 C -15.345 -8.6773 0 0
|
|
423
|
+
M V30 99 C -16.3756 -9.8219 0 0
|
|
424
|
+
M V30 100 C -15.8996 -11.2863 0 0
|
|
425
|
+
M V30 101 C -14.3934 -11.6068 0 0
|
|
426
|
+
M V30 102 O -13.3628 -10.4622 0 0
|
|
427
|
+
M V30 103 C -13.9176 -13.0714 0 0
|
|
428
|
+
M V30 104 O -16.93 -12.4308 0 0
|
|
429
|
+
M V30 105 O -17.882 -9.5018 0 0
|
|
430
|
+
M V30 106 N -15.8208 -7.2127 0 0
|
|
431
|
+
M V30 107 C -17.2856 -6.7367 0 0
|
|
432
|
+
M V30 108 C -17.7614 -5.2721 0 0
|
|
433
|
+
M V30 109 O -18.3744 -7.8257 0 0
|
|
434
|
+
M V30 110 O -15.062 -14.1018 0 0
|
|
435
|
+
M V30 111 O 4.8241 -1.9817 0 0
|
|
436
|
+
M V30 END ATOM
|
|
437
|
+
M V30 BEGIN BOND
|
|
438
|
+
M V30 1 1 2 3
|
|
439
|
+
M V30 2 1 3 4
|
|
440
|
+
M V30 3 1 6 7
|
|
441
|
+
M V30 4 1 7 8
|
|
442
|
+
M V30 5 1 8 9
|
|
443
|
+
M V30 6 1 9 10
|
|
444
|
+
M V30 7 1 1 2
|
|
445
|
+
M V30 8 1 3 12
|
|
446
|
+
M V30 9 1 4 5
|
|
447
|
+
M V30 10 1 5 6
|
|
448
|
+
M V30 11 2 6 13
|
|
449
|
+
M V30 12 1 10 11
|
|
450
|
+
M V30 13 1 11 15
|
|
451
|
+
M V30 14 1 15 16
|
|
452
|
+
M V30 15 1 15 17
|
|
453
|
+
M V30 16 2 10 14
|
|
454
|
+
M V30 17 1 18 19
|
|
455
|
+
M V30 18 1 19 20
|
|
456
|
+
M V30 19 1 22 23
|
|
457
|
+
M V30 20 1 23 24
|
|
458
|
+
M V30 21 1 26 27
|
|
459
|
+
M V30 22 1 27 28
|
|
460
|
+
M V30 23 1 28 29
|
|
461
|
+
M V30 24 1 29 30
|
|
462
|
+
M V30 25 2 26 33
|
|
463
|
+
M V30 26 2 20 32
|
|
464
|
+
M V30 27 1 20 21
|
|
465
|
+
M V30 28 1 21 22
|
|
466
|
+
M V30 29 1 24 25
|
|
467
|
+
M V30 30 1 25 26
|
|
468
|
+
M V30 31 1 30 31
|
|
469
|
+
M V30 32 1 31 34
|
|
470
|
+
M V30 33 1 35 36
|
|
471
|
+
M V30 34 1 36 37
|
|
472
|
+
M V30 35 1 37 38
|
|
473
|
+
M V30 36 1 34 35
|
|
474
|
+
M V30 37 1 38 39
|
|
475
|
+
M V30 38 1 34 39
|
|
476
|
+
M V30 39 1 38 40
|
|
477
|
+
M V30 40 1 35 43
|
|
478
|
+
M V30 41 1 43 44
|
|
479
|
+
M V30 42 1 44 45
|
|
480
|
+
M V30 43 2 44 46
|
|
481
|
+
M V30 44 1 36 42
|
|
482
|
+
M V30 45 1 37 41
|
|
483
|
+
M V30 46 1 40 47
|
|
484
|
+
M V30 47 1 18 48
|
|
485
|
+
M V30 48 1 15 49
|
|
486
|
+
M V30 49 1 50 51
|
|
487
|
+
M V30 50 1 51 52
|
|
488
|
+
M V30 51 1 54 55
|
|
489
|
+
M V30 52 1 55 56
|
|
490
|
+
M V30 53 1 58 59
|
|
491
|
+
M V30 54 1 59 60
|
|
492
|
+
M V30 55 2 58 64
|
|
493
|
+
M V30 56 2 52 63
|
|
494
|
+
M V30 57 1 52 53
|
|
495
|
+
M V30 58 1 53 54
|
|
496
|
+
M V30 59 1 56 57
|
|
497
|
+
M V30 60 1 57 58
|
|
498
|
+
M V30 61 1 61 62
|
|
499
|
+
M V30 62 1 62 65
|
|
500
|
+
M V30 63 1 66 67
|
|
501
|
+
M V30 64 1 67 68
|
|
502
|
+
M V30 65 1 68 69
|
|
503
|
+
M V30 66 1 65 66
|
|
504
|
+
M V30 67 1 69 70
|
|
505
|
+
M V30 68 1 65 70
|
|
506
|
+
M V30 69 1 69 71
|
|
507
|
+
M V30 70 1 66 74
|
|
508
|
+
M V30 71 1 74 75
|
|
509
|
+
M V30 72 1 75 76
|
|
510
|
+
M V30 73 2 75 77
|
|
511
|
+
M V30 74 1 67 73
|
|
512
|
+
M V30 75 1 68 72
|
|
513
|
+
M V30 76 1 71 78
|
|
514
|
+
M V30 77 1 50 79
|
|
515
|
+
M V30 78 1 49 79
|
|
516
|
+
M V30 79 1 60 80
|
|
517
|
+
M V30 80 1 80 61
|
|
518
|
+
M V30 81 1 81 82
|
|
519
|
+
M V30 82 1 82 83
|
|
520
|
+
M V30 83 1 85 86
|
|
521
|
+
M V30 84 1 86 87
|
|
522
|
+
M V30 85 1 89 90
|
|
523
|
+
M V30 86 1 90 91
|
|
524
|
+
M V30 87 1 91 92
|
|
525
|
+
M V30 88 1 92 93
|
|
526
|
+
M V30 89 2 89 96
|
|
527
|
+
M V30 90 2 83 95
|
|
528
|
+
M V30 91 1 83 84
|
|
529
|
+
M V30 92 1 84 85
|
|
530
|
+
M V30 93 1 87 88
|
|
531
|
+
M V30 94 1 88 89
|
|
532
|
+
M V30 95 1 93 94
|
|
533
|
+
M V30 96 1 94 97
|
|
534
|
+
M V30 97 1 98 99
|
|
535
|
+
M V30 98 1 99 100
|
|
536
|
+
M V30 99 1 100 101
|
|
537
|
+
M V30 100 1 97 98
|
|
538
|
+
M V30 101 1 101 102
|
|
539
|
+
M V30 102 1 97 102
|
|
540
|
+
M V30 103 1 101 103
|
|
541
|
+
M V30 104 1 98 106
|
|
542
|
+
M V30 105 1 106 107
|
|
543
|
+
M V30 106 1 107 108
|
|
544
|
+
M V30 107 2 107 109
|
|
545
|
+
M V30 108 1 99 105
|
|
546
|
+
M V30 109 1 100 104
|
|
547
|
+
M V30 110 1 103 110
|
|
548
|
+
M V30 111 1 81 111
|
|
549
|
+
M V30 112 1 16 111
|
|
550
|
+
M V30 113 1 17 48
|
|
551
|
+
M V30 END BOND
|
|
552
|
+
M V30 END CTAB
|
|
553
|
+
M END`;
|
|
554
|
+
|
|
81
555
|
export function getNucleotidesMol(smilesCodes: string[]) {
|
|
82
556
|
const molBlocks: string[] = [];
|
|
83
557
|
|
|
@@ -85,6 +559,8 @@ export function getNucleotidesMol(smilesCodes: string[]) {
|
|
|
85
559
|
smilesCodes[i] == 'OP(=O)(O)O' ? molBlocks.push(PHOSHATE) :
|
|
86
560
|
smilesCodes[i] == 'OP(=O)(S)O' ? molBlocks.push(THIOPHOSHATE) :
|
|
87
561
|
smilesCodes[i] == 'O[C@@H]1C[C@@H]O[C@H]1CO' ? molBlocks.push(rotateNucleotidesV3000(INVABASIC)) :
|
|
562
|
+
smilesCodes[i] == 'OCC(O)CNC(=O)CCCC(=O)NC(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)' ? molBlocks.push(GALNAC) :
|
|
563
|
+
smilesCodes[i] == 'C(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)NC(=O)CCCC(=O)NCC(O)CO' ? molBlocks.push(GALNACPRIME) :
|
|
88
564
|
molBlocks.push(rotateNucleotidesV3000(smilesCodes[i]));
|
|
89
565
|
}
|
|
90
566
|
|