@datagrok/sequence-translator 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +195 -6
- package/dist/package.js +197 -7
- package/package.json +3 -2
- package/package.png +0 -0
- package/src/package.ts +1 -1
- package/src/structures-works/converters.ts +20 -0
- package/src/structures-works/map.ts +166 -1
- package/src/structures-works/sequence-codes-tools.ts +7 -6
- package/{test-SequenceTranslator-089b6516ed77-2280593f.html → test-SequenceTranslator-b5ff4c12f22d-f3bfb562.html} +2 -2
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/sequence-translator",
|
|
3
3
|
"friendlyName": "Sequence Translator",
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.2",
|
|
5
5
|
"description": "SequenceTranslator is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform, used to translate [oligonucleotide](https://en.wikipedia.org/wiki/Oligonucleotide) sequences between [different representations](https://github.com/datagrok-ai/public/tree/master/packages/SequenceTranslator#sequence-representations).",
|
|
6
6
|
"repository": {
|
|
7
7
|
"type": "git",
|
|
@@ -34,7 +34,8 @@
|
|
|
34
34
|
"test-local": "set HOST=localhost && jest"
|
|
35
35
|
},
|
|
36
36
|
"sources": [
|
|
37
|
-
"css/style.css"
|
|
37
|
+
"css/style.css",
|
|
38
|
+
"common/openchemlib-full.js"
|
|
38
39
|
],
|
|
39
40
|
"devDependencies": {
|
|
40
41
|
"@typescript-eslint/eslint-plugin": "^4.29.1",
|
package/package.png
CHANGED
|
Binary file
|
package/src/package.ts
CHANGED
|
@@ -278,7 +278,7 @@ const weightsObj: {[code: string]: number} = {};
|
|
|
278
278
|
for (const synthesizer of Object.keys(map)) {
|
|
279
279
|
for (const technology of Object.keys(map[synthesizer])) {
|
|
280
280
|
for (const code of Object.keys(map[synthesizer][technology]))
|
|
281
|
-
weightsObj[code]
|
|
281
|
+
weightsObj[code] ?? map[synthesizer][technology][code].weight;
|
|
282
282
|
}
|
|
283
283
|
}
|
|
284
284
|
for (const [key, value] of Object.entries(MODIFICATIONS))
|
|
@@ -1,3 +1,23 @@
|
|
|
1
|
+
import {lcmsToGcrs} from './map';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
|
|
4
|
+
//name: gcrsToLcms
|
|
5
|
+
//input: string nucleotides {semType: GCRS}
|
|
6
|
+
//output: string result {semType: LCMS}
|
|
7
|
+
export function gcrsToLcms(sequence: string): string {
|
|
8
|
+
const df = DG.DataFrame.fromCsv(lcmsToGcrs);
|
|
9
|
+
const arr1 = df.getCol('GCRS').toList();
|
|
10
|
+
const arr2 = df.getCol('LCMS').toList();
|
|
11
|
+
const obj: {[i: string]: string} = {};
|
|
12
|
+
arr1.forEach((element, index) => obj[element] = arr2[index]);
|
|
13
|
+
for (let i = 0; i < arr1.length; i++) {
|
|
14
|
+
arr1[i] = arr1[i].replace('(', '\\(');
|
|
15
|
+
arr1[i] = arr1[i].replace(')', '\\)');
|
|
16
|
+
}
|
|
17
|
+
const regExp = new RegExp('(' + arr1.join('|') + ')', 'g');
|
|
18
|
+
return sequence.replace(regExp, function(code) {return obj[code];});
|
|
19
|
+
}
|
|
20
|
+
|
|
1
21
|
//name: asoGapmersNucleotidesToBioSpring
|
|
2
22
|
//input: string nucleotides {semType: DNA nucleotides}
|
|
3
23
|
//output: string result {semType: BioSpring / Gapmers}
|
|
@@ -1,9 +1,13 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import {getAllCodesOfSynthesizer} from './sequence-codes-tools';
|
|
3
|
+
|
|
1
4
|
export const SYNTHESIZERS = {
|
|
2
5
|
RAW_NUCLEOTIDES: 'Raw Nucleotides',
|
|
3
6
|
BIOSPRING: 'BioSpring Codes',
|
|
4
7
|
GCRS: 'Janssen GCRS Codes',
|
|
5
8
|
AXOLABS: 'Axolabs Codes',
|
|
6
9
|
MERMADE_12: 'Mermade 12',
|
|
10
|
+
LCMS: 'LCMS',
|
|
7
11
|
};
|
|
8
12
|
export const TECHNOLOGIES = {
|
|
9
13
|
DNA: 'DNA',
|
|
@@ -52,7 +56,7 @@ export const MODIFICATIONS: {[index: string]: {molecularWeight: number, left: st
|
|
|
52
56
|
export const stadardPhosphateLinkSmiles = 'OP(=O)(O)O';
|
|
53
57
|
export const map: {[synthesizer: string]:
|
|
54
58
|
{[technology: string]: {[code: string]:
|
|
55
|
-
{'name'
|
|
59
|
+
{'name'?: string, 'weight'?: number, 'normalized'?: string, 'SMILES': string}}}} = {
|
|
56
60
|
'Raw Nucleotides': {
|
|
57
61
|
'DNA': {
|
|
58
62
|
'A': {
|
|
@@ -458,6 +462,7 @@ export const map: {[synthesizer: string]:
|
|
|
458
462
|
'SMILES': 'OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1O',
|
|
459
463
|
},
|
|
460
464
|
},
|
|
465
|
+
'Others': {},
|
|
461
466
|
},
|
|
462
467
|
'Mermade 12': {
|
|
463
468
|
'For 2\'-OMe and 2\'-F modified siRNA': {
|
|
@@ -559,4 +564,164 @@ export const map: {[synthesizer: string]:
|
|
|
559
564
|
},
|
|
560
565
|
},
|
|
561
566
|
},
|
|
567
|
+
// 'LCMS': {
|
|
568
|
+
// 'For 2\'-OMe and 2\'-F modified siRNA': {
|
|
562
569
|
};
|
|
570
|
+
|
|
571
|
+
export const lcmsToGcrs = `LCMS, GCRS
|
|
572
|
+
A, A
|
|
573
|
+
C, C
|
|
574
|
+
/5mC/, (5m)C
|
|
575
|
+
G, G
|
|
576
|
+
T, T
|
|
577
|
+
rA, rA
|
|
578
|
+
rC, rC
|
|
579
|
+
rG, rG
|
|
580
|
+
rU, rU
|
|
581
|
+
mA, mA
|
|
582
|
+
mC, mC
|
|
583
|
+
/5mmC/, (5m)mC
|
|
584
|
+
mG, mG
|
|
585
|
+
mU, mU
|
|
586
|
+
fA, fA
|
|
587
|
+
fC, fC
|
|
588
|
+
/5mfC/, (5m)fC
|
|
589
|
+
fG, fG
|
|
590
|
+
fU, fU
|
|
591
|
+
/afA/, afA
|
|
592
|
+
/afC/, afC
|
|
593
|
+
/afG/, afG
|
|
594
|
+
/afU/, afU
|
|
595
|
+
+A, lna A
|
|
596
|
+
+C, lna C
|
|
597
|
+
+G, lna G
|
|
598
|
+
+T, lna T
|
|
599
|
+
/moeA/, moeA
|
|
600
|
+
/moeC/, moeC
|
|
601
|
+
/5mmoeC/, (5m)moeC
|
|
602
|
+
/moeG/, moeG
|
|
603
|
+
/moeT/, moeT
|
|
604
|
+
/moeU/, moeU
|
|
605
|
+
/xA/, Anp
|
|
606
|
+
/xC/, Cnp
|
|
607
|
+
/x5mC/, (5m)Cnp
|
|
608
|
+
/xG/, Gnp
|
|
609
|
+
/xT/, Tnp
|
|
610
|
+
/xrA/, rAnp
|
|
611
|
+
/xrC/, rCnp
|
|
612
|
+
/xrG/, rGnp
|
|
613
|
+
/xrU/, rUnp
|
|
614
|
+
/xmA/, mAnp
|
|
615
|
+
/xmC/, mCnp
|
|
616
|
+
/x5mmC/, (5m)mCnp
|
|
617
|
+
/xmG/, mGnp
|
|
618
|
+
/xmU/, mUnp
|
|
619
|
+
/xfA/, fAnp
|
|
620
|
+
/xfC/, fCnp
|
|
621
|
+
/xfG/, fGnp
|
|
622
|
+
/xfT/, fTnp
|
|
623
|
+
/xfU/, fUnp
|
|
624
|
+
/xafA/, afAnp
|
|
625
|
+
/xafC/, afCnp
|
|
626
|
+
/xafG/, afGnp
|
|
627
|
+
/xafU/, afUnp
|
|
628
|
+
/xeA/, eAnp
|
|
629
|
+
/xeC/, eCnp
|
|
630
|
+
/xeG/, eGnp
|
|
631
|
+
/xeU/, eUnp
|
|
632
|
+
/xmoeA/, moeAnp
|
|
633
|
+
/xmoeC/, moeCnp
|
|
634
|
+
/x5mmoeC/, (5m)moeCnp
|
|
635
|
+
/xmoeG/, moeGnp
|
|
636
|
+
/xmoeU/, moeUnp
|
|
637
|
+
/UNA-A/, (UNA-A)
|
|
638
|
+
/UNA-C/, (UNA-C)
|
|
639
|
+
/UNA-G/, (UNA-G)
|
|
640
|
+
/UNA-T/, (UNA-T)
|
|
641
|
+
/UNA-U/, (UNA-U)
|
|
642
|
+
/GNA-A/, (GNA-A)
|
|
643
|
+
/GNA-C/, (GNA-C)
|
|
644
|
+
/GNA-G/, (GNA-G)
|
|
645
|
+
/GNA-T/, (GNA-T)
|
|
646
|
+
/GNA-U/, (GNA-U)
|
|
647
|
+
/5CholTEG/, (5-CholTEG)
|
|
648
|
+
/3CholTEG/, (TEGChol-3)
|
|
649
|
+
/Toco/, Toco
|
|
650
|
+
/Palm/, Palm
|
|
651
|
+
/GalNAc/, GalNAc
|
|
652
|
+
/GalNAc2/, GalNAc2
|
|
653
|
+
/GalNAc3/, GalNAc3
|
|
654
|
+
/GalNAc6/, GalNAc6
|
|
655
|
+
/GalNAc7/, GalNAc7
|
|
656
|
+
/GalNAc9/, GalNAc9
|
|
657
|
+
/GalNAc14/, GalNAc14
|
|
658
|
+
/NAG37/, NAG37
|
|
659
|
+
/HEG/, (HEG)
|
|
660
|
+
/TEG/, (TEG)
|
|
661
|
+
/AmmC6/, (NHC6)
|
|
662
|
+
/AmmC7/, (NHC7)
|
|
663
|
+
/AmmC12/, (NHC12)
|
|
664
|
+
/invAb/, (invabasic)
|
|
665
|
+
/invdT/, (invdT)
|
|
666
|
+
/VPmU/, (vinu)
|
|
667
|
+
*, ps
|
|
668
|
+
/2-C16U/, 2-C16U
|
|
669
|
+
/2-C18w9U/, 2-C18w9U
|
|
670
|
+
/JDi-Palm/, JDi-Palm
|
|
671
|
+
/J2-CONC16U/, J2-CONC16U
|
|
672
|
+
/J2-C3NC16U/, J2-C3NC16U
|
|
673
|
+
/J-C15Ada/, J-C15Ada
|
|
674
|
+
/J-2C15AdaU/, J-2C15AdaU
|
|
675
|
+
/J-C16NC6/, J-C16NC6
|
|
676
|
+
/R2-C6NH-U/, R2-C6NH-U
|
|
677
|
+
/J-M1/, J-M1
|
|
678
|
+
/J-B1/, J-B1
|
|
679
|
+
/J-B2/, J-B2
|
|
680
|
+
/J-M2/, J-M2
|
|
681
|
+
/2-C16C/, 2-C16C
|
|
682
|
+
/2-C16A/, 2-C16A
|
|
683
|
+
/2-C16G/, 2-C16G
|
|
684
|
+
/R2-C6NH-G/, R2-C6NH-G
|
|
685
|
+
/R2-C6NH-C/, R2-C6NH-C
|
|
686
|
+
/J2-CONC16A/, J2-CONC16A
|
|
687
|
+
/J2-CONC16C/, J2-CONC16C
|
|
688
|
+
/J2-CONC16G/, J2-CONC16G
|
|
689
|
+
/J2-C15AdaC/, J2-C15AdaC
|
|
690
|
+
/J2-M2U/, J2-M2U
|
|
691
|
+
/J2-B2U/, J2-B2U
|
|
692
|
+
/J2-C3NC16C/, J2-C3NC16C
|
|
693
|
+
/J2-C3NC16G/, J2-C3NC16G
|
|
694
|
+
/R2-C6NH-A/, R2-C6NH-A
|
|
695
|
+
/J2-C15AdaA/, J2-C15AdaA
|
|
696
|
+
/J2-C3NC16A/, J2-C3NC16A
|
|
697
|
+
/J-C5-SER-1/, J-C5-SER-1
|
|
698
|
+
/J-C16-SER-1/, J-C16-SER-1
|
|
699
|
+
/J-A2/, J-A2
|
|
700
|
+
/J-A1/, J-A1
|
|
701
|
+
/J2-C15AdaG/, J2-C15AdaG
|
|
702
|
+
/J-C16NAsp/, J-C16NAsp
|
|
703
|
+
/J2-C16NC6U/, J2-C16NC6U
|
|
704
|
+
/J-C5-REBO-1/, J-C5-REBO-1
|
|
705
|
+
/J-C16-REBO-1/, J-C16-REBO-1
|
|
706
|
+
/J-C16-IND-1/, J-C16-IND-1
|
|
707
|
+
/J-C5-IND-1/, J-C5-IND-1
|
|
708
|
+
/J-1C15Ada-2Man/, J-1C15Ada-2Man
|
|
709
|
+
/JG-1C15Ada-23DiMan/, JG-1C15Ada-2,3DiMan
|
|
710
|
+
/J-TriManPC/, J-TriManPC
|
|
711
|
+
/J-triManPO/, J-triManPO
|
|
712
|
+
/J-A4/, J-A4
|
|
713
|
+
/J-Ara-1/, J-Ara-1
|
|
714
|
+
/J-Ara-2/, J-Ara-2
|
|
715
|
+
/J-AcCS/, J-AcCS
|
|
716
|
+
/J-CbCS/, J-CbCS
|
|
717
|
+
/J-MtCD/, J-MtCD`;
|
|
718
|
+
|
|
719
|
+
function differenceOfTwoArrays(a: string[], b: string[]): string[] {
|
|
720
|
+
return a.filter((x) => !b.includes(x));
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
const codesWithSmiles = getAllCodesOfSynthesizer(SYNTHESIZERS.GCRS);
|
|
724
|
+
const allGcrsCodes = DG.DataFrame.fromCsv(lcmsToGcrs).getCol('GCRS').toList();
|
|
725
|
+
export const gcrsCodesWithoutSmiles = differenceOfTwoArrays(allGcrsCodes, codesWithSmiles);
|
|
726
|
+
for (const e of gcrsCodesWithoutSmiles)
|
|
727
|
+
map[SYNTHESIZERS.GCRS]['Others'][e] = {'SMILES': ''};
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
// import * as ui from 'datagrok-api/ui';
|
|
3
|
-
// import * as DG from 'datagrok-api/dg';
|
|
1
|
+
|
|
4
2
|
import {map, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS} from './map';
|
|
5
3
|
import {asoGapmersNucleotidesToBioSpring, asoGapmersNucleotidesToGcrs,
|
|
6
4
|
asoGapmersBioSpringToNucleotides, asoGapmersBioSpringToGcrs, asoGapmersGcrsToNucleotides,
|
|
@@ -8,7 +6,7 @@ import {asoGapmersNucleotidesToBioSpring, asoGapmersNucleotidesToGcrs,
|
|
|
8
6
|
siRnaNucleotideToAxolabsSenseStrand, siRnaNucleotidesToGcrs, siRnaBioSpringToNucleotides,
|
|
9
7
|
siRnaBioSpringToAxolabs, siRnaBioSpringToGcrs, siRnaAxolabsToNucleotides,
|
|
10
8
|
siRnaAxolabsToBioSpring, siRnaAxolabsToGcrs, siRnaGcrsToNucleotides,
|
|
11
|
-
siRnaGcrsToBioSpring, siRnaGcrsToAxolabs, gcrsToNucleotides} from './converters';
|
|
9
|
+
siRnaGcrsToBioSpring, siRnaGcrsToAxolabs, gcrsToNucleotides, gcrsToLcms} from './converters';
|
|
12
10
|
|
|
13
11
|
const noTranslationTableAvailable = 'No translation table available';
|
|
14
12
|
export const undefinedInputSequence = 'Type of input sequence is undefined';
|
|
@@ -206,11 +204,11 @@ export function isValidSequence(sequence: string, format: string | null): {
|
|
|
206
204
|
};
|
|
207
205
|
}
|
|
208
206
|
|
|
209
|
-
function getAllCodesOfSynthesizer(synthesizer: string): string[] {
|
|
207
|
+
export function getAllCodesOfSynthesizer(synthesizer: string): string[] {
|
|
210
208
|
let codes: string[] = [];
|
|
211
209
|
for (const technology of Object.keys(map[synthesizer]))
|
|
212
210
|
codes = codes.concat(Object.keys(map[synthesizer][technology]));
|
|
213
|
-
return codes.concat(Object.keys(MODIFICATIONS));
|
|
211
|
+
return codes.concat(Object.keys(MODIFICATIONS)).concat(',');
|
|
214
212
|
}
|
|
215
213
|
|
|
216
214
|
function getListOfPossibleSynthesizersByFirstMatchedCode(sequence: string): string[] {
|
|
@@ -273,6 +271,7 @@ export function convertSequence(sequence: string, output: {
|
|
|
273
271
|
BioSpring: asoGapmersGcrsToBioSpring(sequence),
|
|
274
272
|
Mermade12: gcrsToMermade12(sequence),
|
|
275
273
|
GCRS: sequence,
|
|
274
|
+
LCMS: gcrsToLcms(sequence),
|
|
276
275
|
};
|
|
277
276
|
}
|
|
278
277
|
if (output.synthesizer!.includes(SYNTHESIZERS.RAW_NUCLEOTIDES) && output.technology!.includes(TECHNOLOGIES.RNA)) {
|
|
@@ -310,6 +309,7 @@ export function convertSequence(sequence: string, output: {
|
|
|
310
309
|
Axolabs: siRnaGcrsToAxolabs(sequence),
|
|
311
310
|
MM12: gcrsToMermade12(sequence),
|
|
312
311
|
GCRS: sequence,
|
|
312
|
+
LCMS: gcrsToLcms(sequence),
|
|
313
313
|
};
|
|
314
314
|
}
|
|
315
315
|
if (output.synthesizer!.includes(SYNTHESIZERS.GCRS)) {
|
|
@@ -318,6 +318,7 @@ export function convertSequence(sequence: string, output: {
|
|
|
318
318
|
Nucleotides: gcrsToNucleotides(sequence),
|
|
319
319
|
GCRS: sequence,
|
|
320
320
|
Mermade12: gcrsToMermade12(sequence),
|
|
321
|
+
LCMS: gcrsToLcms(sequence),
|
|
321
322
|
};
|
|
322
323
|
}
|
|
323
324
|
if (output.synthesizer!.includes(SYNTHESIZERS.MERMADE_12)) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
<html><head><meta charset="utf-8"/><title>SequenceTranslator Test Report. Datagrok version datagrok/datagrok:latest SHA=
|
|
1
|
+
<html><head><meta charset="utf-8"/><title>SequenceTranslator Test Report. Datagrok version datagrok/datagrok:latest SHA=b5ff4c12f22d. Commit f3bfb562.</title><style type="text/css">html,
|
|
2
2
|
body {
|
|
3
3
|
font-family: Arial, Helvetica, sans-serif;
|
|
4
4
|
font-size: 1rem;
|
|
@@ -229,7 +229,7 @@ header {
|
|
|
229
229
|
font-size: 1rem;
|
|
230
230
|
padding: 0 0.5rem;
|
|
231
231
|
}
|
|
232
|
-
</style></head><body><div id="jesthtml-content"><header><h1 id="title">SequenceTranslator Test Report. Datagrok version datagrok/datagrok:latest SHA=
|
|
232
|
+
</style></head><body><div id="jesthtml-content"><header><h1 id="title">SequenceTranslator Test Report. Datagrok version datagrok/datagrok:latest SHA=b5ff4c12f22d. Commit f3bfb562.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-06-30 13:09:46</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/SequenceTranslator/src/__jest__/remote.test.ts</div><div class="suite-time warn">14.488s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">0.427s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: Evaluation failed: Unable to find JS function "test"
|
|
233
233
|
at ExecutionContext._evaluateInternal (/home/runner/work/public/public/packages/SequenceTranslator/node_modules/puppeteer/src/common/ExecutionContext.ts:273:13)
|
|
234
234
|
at processTicksAndRejections (internal/process/task_queues.js:97:5)
|
|
235
235
|
at ExecutionContext.evaluate (/home/runner/work/public/public/packages/SequenceTranslator/node_modules/puppeteer/src/common/ExecutionContext.ts:140:12)</pre></div></div></div><div class="suite-consolelog"><div class="suite-consolelog-header">Console Log</div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at Object.<anonymous> (/home/runner/work/public/public/packages/SequenceTranslator/src/__jest__/test-node.ts:62:11)
|