@datagrok/sequence-translator 1.0.1 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +17 -16
- package/dist/package-test.js +199 -21
- package/dist/package.js +352 -107
- package/package.json +9 -8
- package/package.png +0 -0
- package/src/__jest__/remote.test.ts +33 -14
- package/src/defineAxolabsPattern.ts +48 -24
- package/src/package.ts +46 -6
- package/src/structures-works/converters.ts +20 -0
- package/src/structures-works/map.ts +166 -1
- package/src/structures-works/mol-transformations.ts +2 -2
- package/src/structures-works/sequence-codes-tools.ts +7 -6
- package/test-SequenceTranslator-4f0c8bae6479-18ff1615.html +276 -0
- package/vendors/openchemlib-full.js +293 -0
- package/webpack.config.js +1 -1
- package/test-SequenceTranslator-089b6516ed77-2280593f.html +0 -245
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/sequence-translator",
|
|
3
3
|
"friendlyName": "Sequence Translator",
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.4",
|
|
5
5
|
"description": "SequenceTranslator is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform, used to translate [oligonucleotide](https://en.wikipedia.org/wiki/Oligonucleotide) sequences between [different representations](https://github.com/datagrok-ai/public/tree/master/packages/SequenceTranslator#sequence-representations).",
|
|
6
6
|
"repository": {
|
|
7
7
|
"type": "git",
|
|
@@ -21,20 +21,21 @@
|
|
|
21
21
|
},
|
|
22
22
|
"scripts": {
|
|
23
23
|
"link-api": "npm link datagrok-api",
|
|
24
|
-
"debug-sequencetranslator": "grok publish
|
|
25
|
-
"release-sequencetranslator": "grok publish
|
|
24
|
+
"debug-sequencetranslator": "grok publish",
|
|
25
|
+
"release-sequencetranslator": "grok publish localhost --release",
|
|
26
26
|
"build-sequencetranslator": "webpack",
|
|
27
27
|
"build": "webpack",
|
|
28
|
-
"debug-sequencetranslator-public": "grok publish public
|
|
29
|
-
"release-sequencetranslator-public": "grok publish public --
|
|
30
|
-
"debug-sequencetranslator-local": "grok publish local
|
|
31
|
-
"release-sequencetranslator-local": "grok publish local --
|
|
28
|
+
"debug-sequencetranslator-public": "grok publish public",
|
|
29
|
+
"release-sequencetranslator-public": "grok publish public --release",
|
|
30
|
+
"debug-sequencetranslator-local": "grok publish local",
|
|
31
|
+
"release-sequencetranslator-local": "grok publish local --release",
|
|
32
32
|
"test": "set HOST=dev && jest",
|
|
33
33
|
"test-dev": "set HOST=dev && jest",
|
|
34
34
|
"test-local": "set HOST=localhost && jest"
|
|
35
35
|
},
|
|
36
36
|
"sources": [
|
|
37
|
-
"css/style.css"
|
|
37
|
+
"css/style.css",
|
|
38
|
+
"vendors/openchemlib-full.js"
|
|
38
39
|
],
|
|
39
40
|
"devDependencies": {
|
|
40
41
|
"@typescript-eslint/eslint-plugin": "^4.29.1",
|
package/package.png
CHANGED
|
Binary file
|
|
@@ -16,34 +16,53 @@ beforeAll(async () => {
|
|
|
16
16
|
}, P_START_TIMEOUT);
|
|
17
17
|
|
|
18
18
|
afterAll(async () => {
|
|
19
|
-
await browser
|
|
19
|
+
await browser?.close();
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
expect.extend({
|
|
23
|
+
checkOutput(received, expected, context) {
|
|
24
|
+
if (received === expected) {
|
|
25
|
+
return {
|
|
26
|
+
message: () => context,
|
|
27
|
+
pass: true
|
|
28
|
+
};
|
|
29
|
+
} else {
|
|
30
|
+
return {
|
|
31
|
+
message: () => context,
|
|
32
|
+
pass: false
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
}
|
|
20
36
|
});
|
|
21
37
|
|
|
22
38
|
it('TEST', async () => {
|
|
23
|
-
const
|
|
24
|
-
console.log(`Testing ${
|
|
39
|
+
const targetPackage:string = process.env.TARGET_PACKAGE ?? 'SequenceTranslator';
|
|
40
|
+
console.log(`Testing ${targetPackage} package`);
|
|
25
41
|
|
|
26
|
-
|
|
27
|
-
let r = await page.evaluate((target_package):Promise<object> => {
|
|
42
|
+
let r = await page.evaluate((targetPackage):Promise<object> => {
|
|
28
43
|
return new Promise<object>((resolve, reject) => {
|
|
29
|
-
(<any>window).grok.functions.eval(
|
|
44
|
+
(<any>window).grok.functions.eval(targetPackage + ':test()').then((df: any) => {
|
|
30
45
|
let cStatus = df.columns.byName('success');
|
|
31
46
|
let cMessage = df.columns.byName('result');
|
|
32
47
|
let cCat = df.columns.byName('category');
|
|
33
48
|
let cName = df.columns.byName('name');
|
|
34
49
|
let failed = false;
|
|
35
|
-
let
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
50
|
+
let passReport = '';
|
|
51
|
+
let failReport = '';
|
|
52
|
+
for (let i = 0; i < df.rowCount; i++) {
|
|
53
|
+
if (cStatus.get(i)) {
|
|
54
|
+
passReport += `Test result : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
55
|
+
} else {
|
|
39
56
|
failed = true;
|
|
57
|
+
failReport += `Test result : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
40
58
|
}
|
|
41
|
-
|
|
59
|
+
}
|
|
60
|
+
resolve({failReport, passReport, failed});
|
|
42
61
|
}).catch((e: any) => reject(e));
|
|
43
62
|
});
|
|
44
|
-
},
|
|
63
|
+
}, targetPackage);
|
|
45
64
|
// @ts-ignore
|
|
46
|
-
console.log(r.
|
|
65
|
+
console.log(r.passReport);
|
|
47
66
|
// @ts-ignore
|
|
48
|
-
expect(r.failed).
|
|
67
|
+
expect(r.failed).checkOutput(false, r.failReport);
|
|
49
68
|
}, 100000);
|
|
@@ -82,7 +82,9 @@ function addColumnWithIds(tableName: string, columnName: string, patternName: st
|
|
|
82
82
|
if (columns.contains(nameOfNewColumn))
|
|
83
83
|
columns.remove(nameOfNewColumn);
|
|
84
84
|
const columnWithIds = columns.byName(columnName);
|
|
85
|
-
return columns.addNewString(nameOfNewColumn).init((i: number) =>
|
|
85
|
+
return columns.addNewString(nameOfNewColumn).init((i: number) => {
|
|
86
|
+
return (columnWithIds.getString(i) == '') ? '' : columnWithIds.get(i) + '_' + patternName;
|
|
87
|
+
});
|
|
86
88
|
}
|
|
87
89
|
|
|
88
90
|
function addColumnWithTranslatedSequences(
|
|
@@ -99,8 +101,10 @@ function addColumnWithTranslatedSequences(
|
|
|
99
101
|
columns.remove(nameOfNewColumn);
|
|
100
102
|
const columnWithInputSequences = columns.byName(columnName);
|
|
101
103
|
return columns.addNewString(nameOfNewColumn).init((i: number) => {
|
|
102
|
-
return
|
|
103
|
-
|
|
104
|
+
return columnWithInputSequences.getString(i) == '' ?
|
|
105
|
+
'' :
|
|
106
|
+
translateSequence(columnWithInputSequences.getString(i), bases, ptoLinkages, startModification, endModification,
|
|
107
|
+
firstPtoExist);
|
|
104
108
|
});
|
|
105
109
|
}
|
|
106
110
|
|
|
@@ -332,7 +336,7 @@ export function defineAxolabsPattern() {
|
|
|
332
336
|
const col = tables.value!.columns.byName(colName);
|
|
333
337
|
let allLengthsAreTheSame = true;
|
|
334
338
|
for (let i = 1; i < col.length; i++) {
|
|
335
|
-
if (col.get(i - 1).length != col.get(i).length) {
|
|
339
|
+
if (col.get(i - 1).length != col.get(i).length && col.get(i).length != 0) {
|
|
336
340
|
allLengthsAreTheSame = false;
|
|
337
341
|
break;
|
|
338
342
|
}
|
|
@@ -361,12 +365,13 @@ export function defineAxolabsPattern() {
|
|
|
361
365
|
}
|
|
362
366
|
|
|
363
367
|
async function postPatternToUserStorage() {
|
|
364
|
-
const
|
|
365
|
-
|
|
366
|
-
saveAs.value
|
|
368
|
+
const currUserName = await getCurrentUserName();
|
|
369
|
+
saveAs.value = (saveAs.stringValue.includes('(created by ')) ?
|
|
370
|
+
getShortName(saveAs.value) + currUserName :
|
|
371
|
+
saveAs.stringValue + currUserName;
|
|
367
372
|
return grok.dapi.userDataStorage.postValue(
|
|
368
373
|
userStorageKey,
|
|
369
|
-
saveAs.
|
|
374
|
+
saveAs.value,
|
|
370
375
|
JSON.stringify({
|
|
371
376
|
'ssBases': ssBases.slice(0, ssLength.value!).map((e) => e.value),
|
|
372
377
|
'asBases': asBases.slice(0, asLength.value!).map((e) => e.value),
|
|
@@ -387,11 +392,12 @@ export function defineAxolabsPattern() {
|
|
|
387
392
|
const lstMy: string[] = [];
|
|
388
393
|
const lstOthers: string[] = [];
|
|
389
394
|
|
|
395
|
+
// TODO: display short name, but use long for querying userdataStorage
|
|
390
396
|
for (const ent of Object.keys(entities)) {
|
|
391
397
|
if (await isCurrentUserCreatedThisPattern(ent))
|
|
392
398
|
lstOthers.push(ent);
|
|
393
399
|
else
|
|
394
|
-
lstMy.push(getShortName(ent));
|
|
400
|
+
lstMy.push(ent);//getShortName(ent));
|
|
395
401
|
}
|
|
396
402
|
|
|
397
403
|
let loadPattern = ui.choiceInput('Load Pattern', '', lstMy, (v: string) => parsePatternAndUpdateUi(v));
|
|
@@ -526,7 +532,7 @@ export function defineAxolabsPattern() {
|
|
|
526
532
|
const col = tables.value!.columns.byName(colName);
|
|
527
533
|
if (col.type != DG.TYPE.INT)
|
|
528
534
|
grok.shell.error('Column should contain integers only');
|
|
529
|
-
else if (col.categories.length < col.length) {
|
|
535
|
+
else if (col.categories.filter((e) => e != '').length < col.toList().filter((e) => e != '').length) {
|
|
530
536
|
const duplicates = findDuplicates(col.getRawData());
|
|
531
537
|
ui.dialog('Non-unique IDs')
|
|
532
538
|
.add(ui.divText('Press \'OK\' to select rows with non-unique values'))
|
|
@@ -541,25 +547,43 @@ export function defineAxolabsPattern() {
|
|
|
541
547
|
}
|
|
542
548
|
|
|
543
549
|
const tables = ui.tableInput('Tables', grok.shell.tables[0], grok.shell.tables, (t: DG.DataFrame) => {
|
|
544
|
-
const inputSsColumn =
|
|
545
|
-
|
|
550
|
+
const inputSsColumn = ui.choiceInput('SS Column', '', t.columns.names(), (colName: string) => {
|
|
551
|
+
validateSsColumn(colName);
|
|
552
|
+
ssVar = colName;
|
|
553
|
+
});
|
|
546
554
|
inputSsColumnDiv.innerHTML = '';
|
|
547
555
|
inputSsColumnDiv.append(inputSsColumn.root);
|
|
548
|
-
const inputAsColumn =
|
|
549
|
-
|
|
556
|
+
const inputAsColumn = ui.choiceInput('AS Column', '', t.columns.names(), (colName: string) => {
|
|
557
|
+
validateAsColumn(colName);
|
|
558
|
+
asVar = colName;
|
|
559
|
+
});
|
|
550
560
|
inputAsColumnDiv.innerHTML = '';
|
|
551
561
|
inputAsColumnDiv.append(inputAsColumn.root);
|
|
552
|
-
const inputIdColumn =
|
|
553
|
-
|
|
562
|
+
const inputIdColumn = ui.choiceInput('ID Column', '', t.columns.names(), (colName: string) => {
|
|
563
|
+
validateIdsColumn(colName);
|
|
564
|
+
idVar = colName;
|
|
565
|
+
});
|
|
554
566
|
inputIdColumnDiv.innerHTML = '';
|
|
555
567
|
inputIdColumnDiv.append(inputIdColumn.root);
|
|
556
568
|
});
|
|
557
569
|
|
|
558
|
-
|
|
570
|
+
let ssVar = '';
|
|
571
|
+
const inputSsColumn = ui.choiceInput('SS Column', '', [], (colName: string) => {
|
|
572
|
+
validateSsColumn(colName);
|
|
573
|
+
ssVar = colName;
|
|
574
|
+
});
|
|
559
575
|
inputSsColumnDiv.append(inputSsColumn.root);
|
|
560
|
-
|
|
576
|
+
let asVar = '';
|
|
577
|
+
const inputAsColumn = ui.choiceInput('AS Column', '', [], (colName: string) => {
|
|
578
|
+
validateAsColumn(colName);
|
|
579
|
+
asVar = colName;
|
|
580
|
+
});
|
|
561
581
|
inputAsColumnDiv.append(inputAsColumn.root);
|
|
562
|
-
|
|
582
|
+
let idVar = '';
|
|
583
|
+
const inputIdColumn = ui.choiceInput('ID Column', '', [], (colName: string) => {
|
|
584
|
+
validateIdsColumn(colName);
|
|
585
|
+
idVar = colName;
|
|
586
|
+
});
|
|
563
587
|
inputIdColumnDiv.append(inputIdColumn.root);
|
|
564
588
|
|
|
565
589
|
updatePatternsList();
|
|
@@ -638,7 +662,7 @@ export function defineAxolabsPattern() {
|
|
|
638
662
|
});
|
|
639
663
|
|
|
640
664
|
const convertSequenceButton = ui.button('Convert Sequences', () => {
|
|
641
|
-
if (
|
|
665
|
+
if (ssVar == '' || (createAsStrand.value && asVar == ''))
|
|
642
666
|
grok.shell.info('Please select table and columns on which to apply pattern');
|
|
643
667
|
else if (ssLength.value != ssInputExample.value.length || asLength.value != asInputExample.value.length) {
|
|
644
668
|
const dialog = ui.dialog('Length Mismatch');
|
|
@@ -652,14 +676,14 @@ export function defineAxolabsPattern() {
|
|
|
652
676
|
})
|
|
653
677
|
.show();
|
|
654
678
|
} else {
|
|
655
|
-
if (
|
|
656
|
-
addColumnWithIds(tables.value!.name,
|
|
679
|
+
if (idVar != '')
|
|
680
|
+
addColumnWithIds(tables.value!.name, idVar, getShortName(saveAs.value));
|
|
657
681
|
addColumnWithTranslatedSequences(
|
|
658
|
-
tables.value!.name,
|
|
682
|
+
tables.value!.name, ssVar, ssBases, ssPtoLinkages,
|
|
659
683
|
ssFiveModification, ssThreeModification, firstSsPto.value!);
|
|
660
684
|
if (createAsStrand.value) {
|
|
661
685
|
addColumnWithTranslatedSequences(
|
|
662
|
-
tables.value!.name,
|
|
686
|
+
tables.value!.name, asVar, asBases, asPtoLinkages,
|
|
663
687
|
asFiveModification, asThreeModification, firstAsPto.value!);
|
|
664
688
|
}
|
|
665
689
|
grok.shell.v = grok.shell.getTableView(tables.value!.name);
|
package/src/package.ts
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
|
-
import * as OCL from 'openchemlib/full.js';
|
|
6
5
|
import $ from 'cash-dom';
|
|
7
6
|
import {defineAxolabsPattern} from './defineAxolabsPattern';
|
|
8
7
|
import {saveSenseAntiSense} from './structures-works/save-sense-antisense';
|
|
@@ -10,6 +9,8 @@ import {sequenceToSmiles, sequenceToMolV3000} from './structures-works/from-mono
|
|
|
10
9
|
import {convertSequence, undefinedInputSequence, isValidSequence, getFormat} from
|
|
11
10
|
'./structures-works/sequence-codes-tools';
|
|
12
11
|
import {map, COL_NAMES, MODIFICATIONS} from './structures-works/map';
|
|
12
|
+
import {siRnaAxolabsToGcrs, gcrsToNucleotides, asoGapmersBioSpringToGcrs, gcrsToMermade12,
|
|
13
|
+
} from './structures-works/converters';
|
|
13
14
|
import {SALTS_CSV} from './salts';
|
|
14
15
|
import {USERS_CSV} from './users';
|
|
15
16
|
import {ICDS} from './ICDs';
|
|
@@ -216,10 +217,12 @@ export function sequenceTranslator(): void {
|
|
|
216
217
|
'SDF': saveSenseAntiSense(),
|
|
217
218
|
});
|
|
218
219
|
|
|
220
|
+
$(codesTablesDiv).hide();
|
|
221
|
+
|
|
219
222
|
const v = grok.shell.newView('Sequence Translator', [tabControl]);
|
|
220
223
|
v.box = true;
|
|
221
224
|
|
|
222
|
-
const switchInput = ui.switchInput('Codes',
|
|
225
|
+
const switchInput = ui.switchInput('Codes', false, (v: boolean) => (v) ?
|
|
223
226
|
$(codesTablesDiv).show() :
|
|
224
227
|
$(codesTablesDiv).hide(),
|
|
225
228
|
);
|
|
@@ -278,7 +281,7 @@ const weightsObj: {[code: string]: number} = {};
|
|
|
278
281
|
for (const synthesizer of Object.keys(map)) {
|
|
279
282
|
for (const technology of Object.keys(map[synthesizer])) {
|
|
280
283
|
for (const code of Object.keys(map[synthesizer][technology]))
|
|
281
|
-
weightsObj[code]
|
|
284
|
+
weightsObj[code] ?? map[synthesizer][technology][code].weight;
|
|
282
285
|
}
|
|
283
286
|
}
|
|
284
287
|
for (const [key, value] of Object.entries(MODIFICATIONS))
|
|
@@ -307,8 +310,45 @@ function molecularWeight(sequence: string, weightsObj: {[index: string]: number}
|
|
|
307
310
|
//tags: autostart
|
|
308
311
|
export function autostartOligoSdFileSubscription() {
|
|
309
312
|
grok.events.onViewAdded.subscribe((v: any) => {
|
|
310
|
-
if (v.type == 'TableView'
|
|
311
|
-
|
|
313
|
+
if (v.type == 'TableView') {
|
|
314
|
+
if (v.dataFrame.columns.contains(COL_NAMES.TYPE))
|
|
315
|
+
oligoSdFile(v.dataFrame);
|
|
316
|
+
grok.events.onContextMenu.subscribe((args) => {
|
|
317
|
+
const seqCol = args.args.context.table.currentCol;
|
|
318
|
+
if (DG.Detector.sampleCategories(seqCol, (s) => /^[fsACGUacgu]{6,}$/.test(s))) {
|
|
319
|
+
args.args.menu.item('Convert Axolabs to GCRS', () => {
|
|
320
|
+
args.args.context.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
|
|
321
|
+
return siRnaAxolabsToGcrs(seqCol.get(i));
|
|
322
|
+
});
|
|
323
|
+
});
|
|
324
|
+
} else if (DG.Detector.sampleCategories(seqCol, (s) => /^[fmpsACGU]{6,}$/.test(s)) ||
|
|
325
|
+
DG.Detector.sampleCategories(seqCol, (s) => /^(?=.*moe)(?=.*5mC)(?=.*ps){6,}/.test(s))) {
|
|
326
|
+
args.args.menu.item('Convert GCRS to raw', () => {
|
|
327
|
+
args.args.context.table.columns.addNewString(seqCol.name + ' to raw').init((i: number) => {
|
|
328
|
+
return gcrsToNucleotides(seqCol.get(i));
|
|
329
|
+
});
|
|
330
|
+
});
|
|
331
|
+
args.args.menu.item('Convert GCRS to MM12', () => {
|
|
332
|
+
args.args.context.table.columns.addNewString(seqCol.name + ' to MM12').init((i: number) => {
|
|
333
|
+
return gcrsToMermade12(seqCol.get(i));
|
|
334
|
+
});
|
|
335
|
+
});
|
|
336
|
+
} else if (DG.Detector.sampleCategories(seqCol, (s) => /^[*56789ATGC]{6,}$/.test(s))) {
|
|
337
|
+
args.args.menu.item('Convert Biospring to GCRS', () => {
|
|
338
|
+
const seqCol = args.args.context.table.currentCol;
|
|
339
|
+
args.args.context.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
|
|
340
|
+
return asoGapmersBioSpringToGcrs(seqCol.get(i));
|
|
341
|
+
});
|
|
342
|
+
});
|
|
343
|
+
} else if (DG.Detector.sampleCategories(seqCol, (s) => /^[*1-8]{6,}$/.test(s))) {
|
|
344
|
+
args.args.menu.item('Convert Biospring to GCRS', () => {
|
|
345
|
+
args.args.context.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
|
|
346
|
+
return siRnaAxolabsToGcrs(seqCol.get(i));
|
|
347
|
+
});
|
|
348
|
+
});
|
|
349
|
+
}
|
|
350
|
+
});
|
|
351
|
+
}
|
|
312
352
|
});
|
|
313
353
|
}
|
|
314
354
|
|
|
@@ -366,7 +406,7 @@ export function oligoSdFile(table: DG.DataFrame) {
|
|
|
366
406
|
ui.button('Save SD file', () => saveTableAsSdFile(addColumnsPressed ? newDf : table)),
|
|
367
407
|
);
|
|
368
408
|
|
|
369
|
-
const view = grok.shell.getTableView(table.name)
|
|
409
|
+
const view = grok.shell.getTableView(table.name);
|
|
370
410
|
|
|
371
411
|
view.table!.col(COL_NAMES.TYPE)!.setTag(DG.TAGS.CHOICES, '["AS", "SS", "Duplex"]');
|
|
372
412
|
view.table!.col(COL_NAMES.OWNER)!.setTag(DG.TAGS.CHOICES, stringifyItems(usersDf.columns.byIndex(0).toList()));
|
|
@@ -1,3 +1,23 @@
|
|
|
1
|
+
import {lcmsToGcrs} from './map';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
|
|
4
|
+
//name: gcrsToLcms
|
|
5
|
+
//input: string nucleotides {semType: GCRS}
|
|
6
|
+
//output: string result {semType: LCMS}
|
|
7
|
+
export function gcrsToLcms(sequence: string): string {
|
|
8
|
+
const df = DG.DataFrame.fromCsv(lcmsToGcrs);
|
|
9
|
+
const arr1 = df.getCol('GCRS').toList();
|
|
10
|
+
const arr2 = df.getCol('LCMS').toList();
|
|
11
|
+
const obj: {[i: string]: string} = {};
|
|
12
|
+
arr1.forEach((element, index) => obj[element] = arr2[index]);
|
|
13
|
+
for (let i = 0; i < arr1.length; i++) {
|
|
14
|
+
arr1[i] = arr1[i].replace('(', '\\(');
|
|
15
|
+
arr1[i] = arr1[i].replace(')', '\\)');
|
|
16
|
+
}
|
|
17
|
+
const regExp = new RegExp('(' + arr1.join('|') + ')', 'g');
|
|
18
|
+
return sequence.replace(regExp, function(code) {return obj[code];});
|
|
19
|
+
}
|
|
20
|
+
|
|
1
21
|
//name: asoGapmersNucleotidesToBioSpring
|
|
2
22
|
//input: string nucleotides {semType: DNA nucleotides}
|
|
3
23
|
//output: string result {semType: BioSpring / Gapmers}
|
|
@@ -1,9 +1,13 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import {getAllCodesOfSynthesizer} from './sequence-codes-tools';
|
|
3
|
+
|
|
1
4
|
export const SYNTHESIZERS = {
|
|
2
5
|
RAW_NUCLEOTIDES: 'Raw Nucleotides',
|
|
3
6
|
BIOSPRING: 'BioSpring Codes',
|
|
4
7
|
GCRS: 'Janssen GCRS Codes',
|
|
5
8
|
AXOLABS: 'Axolabs Codes',
|
|
6
9
|
MERMADE_12: 'Mermade 12',
|
|
10
|
+
LCMS: 'LCMS',
|
|
7
11
|
};
|
|
8
12
|
export const TECHNOLOGIES = {
|
|
9
13
|
DNA: 'DNA',
|
|
@@ -52,7 +56,7 @@ export const MODIFICATIONS: {[index: string]: {molecularWeight: number, left: st
|
|
|
52
56
|
export const stadardPhosphateLinkSmiles = 'OP(=O)(O)O';
|
|
53
57
|
export const map: {[synthesizer: string]:
|
|
54
58
|
{[technology: string]: {[code: string]:
|
|
55
|
-
{'name'
|
|
59
|
+
{'name'?: string, 'weight'?: number, 'normalized'?: string, 'SMILES': string}}}} = {
|
|
56
60
|
'Raw Nucleotides': {
|
|
57
61
|
'DNA': {
|
|
58
62
|
'A': {
|
|
@@ -458,6 +462,7 @@ export const map: {[synthesizer: string]:
|
|
|
458
462
|
'SMILES': 'OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1O',
|
|
459
463
|
},
|
|
460
464
|
},
|
|
465
|
+
'Others': {},
|
|
461
466
|
},
|
|
462
467
|
'Mermade 12': {
|
|
463
468
|
'For 2\'-OMe and 2\'-F modified siRNA': {
|
|
@@ -559,4 +564,164 @@ export const map: {[synthesizer: string]:
|
|
|
559
564
|
},
|
|
560
565
|
},
|
|
561
566
|
},
|
|
567
|
+
// 'LCMS': {
|
|
568
|
+
// 'For 2\'-OMe and 2\'-F modified siRNA': {
|
|
562
569
|
};
|
|
570
|
+
|
|
571
|
+
export const lcmsToGcrs = `LCMS, GCRS
|
|
572
|
+
A, A
|
|
573
|
+
C, C
|
|
574
|
+
/5mC/, (5m)C
|
|
575
|
+
G, G
|
|
576
|
+
T, T
|
|
577
|
+
rA, rA
|
|
578
|
+
rC, rC
|
|
579
|
+
rG, rG
|
|
580
|
+
rU, rU
|
|
581
|
+
mA, mA
|
|
582
|
+
mC, mC
|
|
583
|
+
/5mmC/, (5m)mC
|
|
584
|
+
mG, mG
|
|
585
|
+
mU, mU
|
|
586
|
+
fA, fA
|
|
587
|
+
fC, fC
|
|
588
|
+
/5mfC/, (5m)fC
|
|
589
|
+
fG, fG
|
|
590
|
+
fU, fU
|
|
591
|
+
/afA/, afA
|
|
592
|
+
/afC/, afC
|
|
593
|
+
/afG/, afG
|
|
594
|
+
/afU/, afU
|
|
595
|
+
+A, lna A
|
|
596
|
+
+C, lna C
|
|
597
|
+
+G, lna G
|
|
598
|
+
+T, lna T
|
|
599
|
+
/moeA/, moeA
|
|
600
|
+
/moeC/, moeC
|
|
601
|
+
/5mmoeC/, (5m)moeC
|
|
602
|
+
/moeG/, moeG
|
|
603
|
+
/moeT/, moeT
|
|
604
|
+
/moeU/, moeU
|
|
605
|
+
/xA/, Anp
|
|
606
|
+
/xC/, Cnp
|
|
607
|
+
/x5mC/, (5m)Cnp
|
|
608
|
+
/xG/, Gnp
|
|
609
|
+
/xT/, Tnp
|
|
610
|
+
/xrA/, rAnp
|
|
611
|
+
/xrC/, rCnp
|
|
612
|
+
/xrG/, rGnp
|
|
613
|
+
/xrU/, rUnp
|
|
614
|
+
/xmA/, mAnp
|
|
615
|
+
/xmC/, mCnp
|
|
616
|
+
/x5mmC/, (5m)mCnp
|
|
617
|
+
/xmG/, mGnp
|
|
618
|
+
/xmU/, mUnp
|
|
619
|
+
/xfA/, fAnp
|
|
620
|
+
/xfC/, fCnp
|
|
621
|
+
/xfG/, fGnp
|
|
622
|
+
/xfT/, fTnp
|
|
623
|
+
/xfU/, fUnp
|
|
624
|
+
/xafA/, afAnp
|
|
625
|
+
/xafC/, afCnp
|
|
626
|
+
/xafG/, afGnp
|
|
627
|
+
/xafU/, afUnp
|
|
628
|
+
/xeA/, eAnp
|
|
629
|
+
/xeC/, eCnp
|
|
630
|
+
/xeG/, eGnp
|
|
631
|
+
/xeU/, eUnp
|
|
632
|
+
/xmoeA/, moeAnp
|
|
633
|
+
/xmoeC/, moeCnp
|
|
634
|
+
/x5mmoeC/, (5m)moeCnp
|
|
635
|
+
/xmoeG/, moeGnp
|
|
636
|
+
/xmoeU/, moeUnp
|
|
637
|
+
/UNA-A/, (UNA-A)
|
|
638
|
+
/UNA-C/, (UNA-C)
|
|
639
|
+
/UNA-G/, (UNA-G)
|
|
640
|
+
/UNA-T/, (UNA-T)
|
|
641
|
+
/UNA-U/, (UNA-U)
|
|
642
|
+
/GNA-A/, (GNA-A)
|
|
643
|
+
/GNA-C/, (GNA-C)
|
|
644
|
+
/GNA-G/, (GNA-G)
|
|
645
|
+
/GNA-T/, (GNA-T)
|
|
646
|
+
/GNA-U/, (GNA-U)
|
|
647
|
+
/5CholTEG/, (5-CholTEG)
|
|
648
|
+
/3CholTEG/, (TEGChol-3)
|
|
649
|
+
/Toco/, Toco
|
|
650
|
+
/Palm/, Palm
|
|
651
|
+
/GalNAc/, GalNAc
|
|
652
|
+
/GalNAc2/, GalNAc2
|
|
653
|
+
/GalNAc3/, GalNAc3
|
|
654
|
+
/GalNAc6/, GalNAc6
|
|
655
|
+
/GalNAc7/, GalNAc7
|
|
656
|
+
/GalNAc9/, GalNAc9
|
|
657
|
+
/GalNAc14/, GalNAc14
|
|
658
|
+
/NAG37/, NAG37
|
|
659
|
+
/HEG/, (HEG)
|
|
660
|
+
/TEG/, (TEG)
|
|
661
|
+
/AmmC6/, (NHC6)
|
|
662
|
+
/AmmC7/, (NHC7)
|
|
663
|
+
/AmmC12/, (NHC12)
|
|
664
|
+
/invAb/, (invabasic)
|
|
665
|
+
/invdT/, (invdT)
|
|
666
|
+
/VPmU/, (vinu)
|
|
667
|
+
*, ps
|
|
668
|
+
/2-C16U/, 2-C16U
|
|
669
|
+
/2-C18w9U/, 2-C18w9U
|
|
670
|
+
/JDi-Palm/, JDi-Palm
|
|
671
|
+
/J2-CONC16U/, J2-CONC16U
|
|
672
|
+
/J2-C3NC16U/, J2-C3NC16U
|
|
673
|
+
/J-C15Ada/, J-C15Ada
|
|
674
|
+
/J-2C15AdaU/, J-2C15AdaU
|
|
675
|
+
/J-C16NC6/, J-C16NC6
|
|
676
|
+
/R2-C6NH-U/, R2-C6NH-U
|
|
677
|
+
/J-M1/, J-M1
|
|
678
|
+
/J-B1/, J-B1
|
|
679
|
+
/J-B2/, J-B2
|
|
680
|
+
/J-M2/, J-M2
|
|
681
|
+
/2-C16C/, 2-C16C
|
|
682
|
+
/2-C16A/, 2-C16A
|
|
683
|
+
/2-C16G/, 2-C16G
|
|
684
|
+
/R2-C6NH-G/, R2-C6NH-G
|
|
685
|
+
/R2-C6NH-C/, R2-C6NH-C
|
|
686
|
+
/J2-CONC16A/, J2-CONC16A
|
|
687
|
+
/J2-CONC16C/, J2-CONC16C
|
|
688
|
+
/J2-CONC16G/, J2-CONC16G
|
|
689
|
+
/J2-C15AdaC/, J2-C15AdaC
|
|
690
|
+
/J2-M2U/, J2-M2U
|
|
691
|
+
/J2-B2U/, J2-B2U
|
|
692
|
+
/J2-C3NC16C/, J2-C3NC16C
|
|
693
|
+
/J2-C3NC16G/, J2-C3NC16G
|
|
694
|
+
/R2-C6NH-A/, R2-C6NH-A
|
|
695
|
+
/J2-C15AdaA/, J2-C15AdaA
|
|
696
|
+
/J2-C3NC16A/, J2-C3NC16A
|
|
697
|
+
/J-C5-SER-1/, J-C5-SER-1
|
|
698
|
+
/J-C16-SER-1/, J-C16-SER-1
|
|
699
|
+
/J-A2/, J-A2
|
|
700
|
+
/J-A1/, J-A1
|
|
701
|
+
/J2-C15AdaG/, J2-C15AdaG
|
|
702
|
+
/J-C16NAsp/, J-C16NAsp
|
|
703
|
+
/J2-C16NC6U/, J2-C16NC6U
|
|
704
|
+
/J-C5-REBO-1/, J-C5-REBO-1
|
|
705
|
+
/J-C16-REBO-1/, J-C16-REBO-1
|
|
706
|
+
/J-C16-IND-1/, J-C16-IND-1
|
|
707
|
+
/J-C5-IND-1/, J-C5-IND-1
|
|
708
|
+
/J-1C15Ada-2Man/, J-1C15Ada-2Man
|
|
709
|
+
/JG-1C15Ada-23DiMan/, JG-1C15Ada-2,3DiMan
|
|
710
|
+
/J-TriManPC/, J-TriManPC
|
|
711
|
+
/J-triManPO/, J-triManPO
|
|
712
|
+
/J-A4/, J-A4
|
|
713
|
+
/J-Ara-1/, J-Ara-1
|
|
714
|
+
/J-Ara-2/, J-Ara-2
|
|
715
|
+
/J-AcCS/, J-AcCS
|
|
716
|
+
/J-CbCS/, J-CbCS
|
|
717
|
+
/J-MtCD/, J-MtCD`;
|
|
718
|
+
|
|
719
|
+
function differenceOfTwoArrays(a: string[], b: string[]): string[] {
|
|
720
|
+
return a.filter((x) => !b.includes(x));
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
const codesWithSmiles = getAllCodesOfSynthesizer(SYNTHESIZERS.GCRS);
|
|
724
|
+
const allGcrsCodes = DG.DataFrame.fromCsv(lcmsToGcrs).getCol('GCRS').toList();
|
|
725
|
+
export const gcrsCodesWithoutSmiles = differenceOfTwoArrays(allGcrsCodes, codesWithSmiles);
|
|
726
|
+
for (const e of gcrsCodesWithoutSmiles)
|
|
727
|
+
map[SYNTHESIZERS.GCRS]['Others'][e] = {'SMILES': ''};
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import * as OCL from 'openchemlib/full.js';
|
|
2
|
-
|
|
3
1
|
const PHOSHATE = `
|
|
4
2
|
Datagrok monomer library Nucleotides
|
|
5
3
|
|
|
@@ -254,6 +252,7 @@ export function linkV3000(molBlocks: string[], twoChains: boolean = false, oclRe
|
|
|
254
252
|
}
|
|
255
253
|
|
|
256
254
|
function rotateNucleotidesV3000(molecule: string) {
|
|
255
|
+
// @ts-ignore
|
|
257
256
|
let molBlock = molecule.includes('M END') ? molecule : OCL.Molecule.fromSmiles(molecule).toMolfileV3();
|
|
258
257
|
const coordinates = extractAtomDataV3000(molBlock);
|
|
259
258
|
const natom = coordinates.atomIndex.length;
|
|
@@ -320,6 +319,7 @@ function rotateNucleotidesV3000(molecule: string) {
|
|
|
320
319
|
}
|
|
321
320
|
|
|
322
321
|
function invertNucleotidesV3000(molecule: string) {
|
|
322
|
+
// @ts-ignore
|
|
323
323
|
let molBlock = molecule.includes('M END') ? molecule : OCL.Molecule.fromSmiles(molecule).toMolfileV3();
|
|
324
324
|
const coordinates = extractAtomDataV3000(molBlock);
|
|
325
325
|
const natom = coordinates.atomIndex.length;
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
// import * as ui from 'datagrok-api/ui';
|
|
3
|
-
// import * as DG from 'datagrok-api/dg';
|
|
1
|
+
|
|
4
2
|
import {map, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS} from './map';
|
|
5
3
|
import {asoGapmersNucleotidesToBioSpring, asoGapmersNucleotidesToGcrs,
|
|
6
4
|
asoGapmersBioSpringToNucleotides, asoGapmersBioSpringToGcrs, asoGapmersGcrsToNucleotides,
|
|
@@ -8,7 +6,7 @@ import {asoGapmersNucleotidesToBioSpring, asoGapmersNucleotidesToGcrs,
|
|
|
8
6
|
siRnaNucleotideToAxolabsSenseStrand, siRnaNucleotidesToGcrs, siRnaBioSpringToNucleotides,
|
|
9
7
|
siRnaBioSpringToAxolabs, siRnaBioSpringToGcrs, siRnaAxolabsToNucleotides,
|
|
10
8
|
siRnaAxolabsToBioSpring, siRnaAxolabsToGcrs, siRnaGcrsToNucleotides,
|
|
11
|
-
siRnaGcrsToBioSpring, siRnaGcrsToAxolabs, gcrsToNucleotides} from './converters';
|
|
9
|
+
siRnaGcrsToBioSpring, siRnaGcrsToAxolabs, gcrsToNucleotides, gcrsToLcms} from './converters';
|
|
12
10
|
|
|
13
11
|
const noTranslationTableAvailable = 'No translation table available';
|
|
14
12
|
export const undefinedInputSequence = 'Type of input sequence is undefined';
|
|
@@ -206,11 +204,11 @@ export function isValidSequence(sequence: string, format: string | null): {
|
|
|
206
204
|
};
|
|
207
205
|
}
|
|
208
206
|
|
|
209
|
-
function getAllCodesOfSynthesizer(synthesizer: string): string[] {
|
|
207
|
+
export function getAllCodesOfSynthesizer(synthesizer: string): string[] {
|
|
210
208
|
let codes: string[] = [];
|
|
211
209
|
for (const technology of Object.keys(map[synthesizer]))
|
|
212
210
|
codes = codes.concat(Object.keys(map[synthesizer][technology]));
|
|
213
|
-
return codes.concat(Object.keys(MODIFICATIONS));
|
|
211
|
+
return codes.concat(Object.keys(MODIFICATIONS)).concat(',');
|
|
214
212
|
}
|
|
215
213
|
|
|
216
214
|
function getListOfPossibleSynthesizersByFirstMatchedCode(sequence: string): string[] {
|
|
@@ -273,6 +271,7 @@ export function convertSequence(sequence: string, output: {
|
|
|
273
271
|
BioSpring: asoGapmersGcrsToBioSpring(sequence),
|
|
274
272
|
Mermade12: gcrsToMermade12(sequence),
|
|
275
273
|
GCRS: sequence,
|
|
274
|
+
LCMS: gcrsToLcms(sequence),
|
|
276
275
|
};
|
|
277
276
|
}
|
|
278
277
|
if (output.synthesizer!.includes(SYNTHESIZERS.RAW_NUCLEOTIDES) && output.technology!.includes(TECHNOLOGIES.RNA)) {
|
|
@@ -310,6 +309,7 @@ export function convertSequence(sequence: string, output: {
|
|
|
310
309
|
Axolabs: siRnaGcrsToAxolabs(sequence),
|
|
311
310
|
MM12: gcrsToMermade12(sequence),
|
|
312
311
|
GCRS: sequence,
|
|
312
|
+
LCMS: gcrsToLcms(sequence),
|
|
313
313
|
};
|
|
314
314
|
}
|
|
315
315
|
if (output.synthesizer!.includes(SYNTHESIZERS.GCRS)) {
|
|
@@ -318,6 +318,7 @@ export function convertSequence(sequence: string, output: {
|
|
|
318
318
|
Nucleotides: gcrsToNucleotides(sequence),
|
|
319
319
|
GCRS: sequence,
|
|
320
320
|
Mermade12: gcrsToMermade12(sequence),
|
|
321
|
+
LCMS: gcrsToLcms(sequence),
|
|
321
322
|
};
|
|
322
323
|
}
|
|
323
324
|
if (output.synthesizer!.includes(SYNTHESIZERS.MERMADE_12)) {
|