@datagrok/sequence-translator 0.0.12 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -11
- package/dist/package-test.js +404 -148
- package/dist/package.js +409 -136
- package/package.json +9 -7
- package/package.png +0 -0
- package/src/defineAxolabsPattern.ts +58 -55
- package/src/package-test.ts +1 -1
- package/src/package.ts +29 -18
- package/src/structures-works/converters.ts +20 -0
- package/src/structures-works/from-monomers.ts +20 -12
- package/src/structures-works/map.ts +166 -1
- package/src/structures-works/mol-transformations.ts +23 -11
- package/src/structures-works/save-sense-antisense.ts +5 -2
- package/src/structures-works/sequence-codes-tools.ts +133 -49
- package/src/tests/smiles-tests.ts +31 -27
- package/{test-SequenceTranslator-c2bbc2b235db-afc0e1c5.html → test-SequenceTranslator-b5ff4c12f22d-f3bfb562.html} +2 -2
|
@@ -1,9 +1,13 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import {getAllCodesOfSynthesizer} from './sequence-codes-tools';
|
|
3
|
+
|
|
1
4
|
export const SYNTHESIZERS = {
|
|
2
5
|
RAW_NUCLEOTIDES: 'Raw Nucleotides',
|
|
3
6
|
BIOSPRING: 'BioSpring Codes',
|
|
4
7
|
GCRS: 'Janssen GCRS Codes',
|
|
5
8
|
AXOLABS: 'Axolabs Codes',
|
|
6
9
|
MERMADE_12: 'Mermade 12',
|
|
10
|
+
LCMS: 'LCMS',
|
|
7
11
|
};
|
|
8
12
|
export const TECHNOLOGIES = {
|
|
9
13
|
DNA: 'DNA',
|
|
@@ -52,7 +56,7 @@ export const MODIFICATIONS: {[index: string]: {molecularWeight: number, left: st
|
|
|
52
56
|
export const stadardPhosphateLinkSmiles = 'OP(=O)(O)O';
|
|
53
57
|
export const map: {[synthesizer: string]:
|
|
54
58
|
{[technology: string]: {[code: string]:
|
|
55
|
-
{'name'
|
|
59
|
+
{'name'?: string, 'weight'?: number, 'normalized'?: string, 'SMILES': string}}}} = {
|
|
56
60
|
'Raw Nucleotides': {
|
|
57
61
|
'DNA': {
|
|
58
62
|
'A': {
|
|
@@ -458,6 +462,7 @@ export const map: {[synthesizer: string]:
|
|
|
458
462
|
'SMILES': 'OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1O',
|
|
459
463
|
},
|
|
460
464
|
},
|
|
465
|
+
'Others': {},
|
|
461
466
|
},
|
|
462
467
|
'Mermade 12': {
|
|
463
468
|
'For 2\'-OMe and 2\'-F modified siRNA': {
|
|
@@ -559,4 +564,164 @@ export const map: {[synthesizer: string]:
|
|
|
559
564
|
},
|
|
560
565
|
},
|
|
561
566
|
},
|
|
567
|
+
// 'LCMS': {
|
|
568
|
+
// 'For 2\'-OMe and 2\'-F modified siRNA': {
|
|
562
569
|
};
|
|
570
|
+
|
|
571
|
+
export const lcmsToGcrs = `LCMS, GCRS
|
|
572
|
+
A, A
|
|
573
|
+
C, C
|
|
574
|
+
/5mC/, (5m)C
|
|
575
|
+
G, G
|
|
576
|
+
T, T
|
|
577
|
+
rA, rA
|
|
578
|
+
rC, rC
|
|
579
|
+
rG, rG
|
|
580
|
+
rU, rU
|
|
581
|
+
mA, mA
|
|
582
|
+
mC, mC
|
|
583
|
+
/5mmC/, (5m)mC
|
|
584
|
+
mG, mG
|
|
585
|
+
mU, mU
|
|
586
|
+
fA, fA
|
|
587
|
+
fC, fC
|
|
588
|
+
/5mfC/, (5m)fC
|
|
589
|
+
fG, fG
|
|
590
|
+
fU, fU
|
|
591
|
+
/afA/, afA
|
|
592
|
+
/afC/, afC
|
|
593
|
+
/afG/, afG
|
|
594
|
+
/afU/, afU
|
|
595
|
+
+A, lna A
|
|
596
|
+
+C, lna C
|
|
597
|
+
+G, lna G
|
|
598
|
+
+T, lna T
|
|
599
|
+
/moeA/, moeA
|
|
600
|
+
/moeC/, moeC
|
|
601
|
+
/5mmoeC/, (5m)moeC
|
|
602
|
+
/moeG/, moeG
|
|
603
|
+
/moeT/, moeT
|
|
604
|
+
/moeU/, moeU
|
|
605
|
+
/xA/, Anp
|
|
606
|
+
/xC/, Cnp
|
|
607
|
+
/x5mC/, (5m)Cnp
|
|
608
|
+
/xG/, Gnp
|
|
609
|
+
/xT/, Tnp
|
|
610
|
+
/xrA/, rAnp
|
|
611
|
+
/xrC/, rCnp
|
|
612
|
+
/xrG/, rGnp
|
|
613
|
+
/xrU/, rUnp
|
|
614
|
+
/xmA/, mAnp
|
|
615
|
+
/xmC/, mCnp
|
|
616
|
+
/x5mmC/, (5m)mCnp
|
|
617
|
+
/xmG/, mGnp
|
|
618
|
+
/xmU/, mUnp
|
|
619
|
+
/xfA/, fAnp
|
|
620
|
+
/xfC/, fCnp
|
|
621
|
+
/xfG/, fGnp
|
|
622
|
+
/xfT/, fTnp
|
|
623
|
+
/xfU/, fUnp
|
|
624
|
+
/xafA/, afAnp
|
|
625
|
+
/xafC/, afCnp
|
|
626
|
+
/xafG/, afGnp
|
|
627
|
+
/xafU/, afUnp
|
|
628
|
+
/xeA/, eAnp
|
|
629
|
+
/xeC/, eCnp
|
|
630
|
+
/xeG/, eGnp
|
|
631
|
+
/xeU/, eUnp
|
|
632
|
+
/xmoeA/, moeAnp
|
|
633
|
+
/xmoeC/, moeCnp
|
|
634
|
+
/x5mmoeC/, (5m)moeCnp
|
|
635
|
+
/xmoeG/, moeGnp
|
|
636
|
+
/xmoeU/, moeUnp
|
|
637
|
+
/UNA-A/, (UNA-A)
|
|
638
|
+
/UNA-C/, (UNA-C)
|
|
639
|
+
/UNA-G/, (UNA-G)
|
|
640
|
+
/UNA-T/, (UNA-T)
|
|
641
|
+
/UNA-U/, (UNA-U)
|
|
642
|
+
/GNA-A/, (GNA-A)
|
|
643
|
+
/GNA-C/, (GNA-C)
|
|
644
|
+
/GNA-G/, (GNA-G)
|
|
645
|
+
/GNA-T/, (GNA-T)
|
|
646
|
+
/GNA-U/, (GNA-U)
|
|
647
|
+
/5CholTEG/, (5-CholTEG)
|
|
648
|
+
/3CholTEG/, (TEGChol-3)
|
|
649
|
+
/Toco/, Toco
|
|
650
|
+
/Palm/, Palm
|
|
651
|
+
/GalNAc/, GalNAc
|
|
652
|
+
/GalNAc2/, GalNAc2
|
|
653
|
+
/GalNAc3/, GalNAc3
|
|
654
|
+
/GalNAc6/, GalNAc6
|
|
655
|
+
/GalNAc7/, GalNAc7
|
|
656
|
+
/GalNAc9/, GalNAc9
|
|
657
|
+
/GalNAc14/, GalNAc14
|
|
658
|
+
/NAG37/, NAG37
|
|
659
|
+
/HEG/, (HEG)
|
|
660
|
+
/TEG/, (TEG)
|
|
661
|
+
/AmmC6/, (NHC6)
|
|
662
|
+
/AmmC7/, (NHC7)
|
|
663
|
+
/AmmC12/, (NHC12)
|
|
664
|
+
/invAb/, (invabasic)
|
|
665
|
+
/invdT/, (invdT)
|
|
666
|
+
/VPmU/, (vinu)
|
|
667
|
+
*, ps
|
|
668
|
+
/2-C16U/, 2-C16U
|
|
669
|
+
/2-C18w9U/, 2-C18w9U
|
|
670
|
+
/JDi-Palm/, JDi-Palm
|
|
671
|
+
/J2-CONC16U/, J2-CONC16U
|
|
672
|
+
/J2-C3NC16U/, J2-C3NC16U
|
|
673
|
+
/J-C15Ada/, J-C15Ada
|
|
674
|
+
/J-2C15AdaU/, J-2C15AdaU
|
|
675
|
+
/J-C16NC6/, J-C16NC6
|
|
676
|
+
/R2-C6NH-U/, R2-C6NH-U
|
|
677
|
+
/J-M1/, J-M1
|
|
678
|
+
/J-B1/, J-B1
|
|
679
|
+
/J-B2/, J-B2
|
|
680
|
+
/J-M2/, J-M2
|
|
681
|
+
/2-C16C/, 2-C16C
|
|
682
|
+
/2-C16A/, 2-C16A
|
|
683
|
+
/2-C16G/, 2-C16G
|
|
684
|
+
/R2-C6NH-G/, R2-C6NH-G
|
|
685
|
+
/R2-C6NH-C/, R2-C6NH-C
|
|
686
|
+
/J2-CONC16A/, J2-CONC16A
|
|
687
|
+
/J2-CONC16C/, J2-CONC16C
|
|
688
|
+
/J2-CONC16G/, J2-CONC16G
|
|
689
|
+
/J2-C15AdaC/, J2-C15AdaC
|
|
690
|
+
/J2-M2U/, J2-M2U
|
|
691
|
+
/J2-B2U/, J2-B2U
|
|
692
|
+
/J2-C3NC16C/, J2-C3NC16C
|
|
693
|
+
/J2-C3NC16G/, J2-C3NC16G
|
|
694
|
+
/R2-C6NH-A/, R2-C6NH-A
|
|
695
|
+
/J2-C15AdaA/, J2-C15AdaA
|
|
696
|
+
/J2-C3NC16A/, J2-C3NC16A
|
|
697
|
+
/J-C5-SER-1/, J-C5-SER-1
|
|
698
|
+
/J-C16-SER-1/, J-C16-SER-1
|
|
699
|
+
/J-A2/, J-A2
|
|
700
|
+
/J-A1/, J-A1
|
|
701
|
+
/J2-C15AdaG/, J2-C15AdaG
|
|
702
|
+
/J-C16NAsp/, J-C16NAsp
|
|
703
|
+
/J2-C16NC6U/, J2-C16NC6U
|
|
704
|
+
/J-C5-REBO-1/, J-C5-REBO-1
|
|
705
|
+
/J-C16-REBO-1/, J-C16-REBO-1
|
|
706
|
+
/J-C16-IND-1/, J-C16-IND-1
|
|
707
|
+
/J-C5-IND-1/, J-C5-IND-1
|
|
708
|
+
/J-1C15Ada-2Man/, J-1C15Ada-2Man
|
|
709
|
+
/JG-1C15Ada-23DiMan/, JG-1C15Ada-2,3DiMan
|
|
710
|
+
/J-TriManPC/, J-TriManPC
|
|
711
|
+
/J-triManPO/, J-triManPO
|
|
712
|
+
/J-A4/, J-A4
|
|
713
|
+
/J-Ara-1/, J-Ara-1
|
|
714
|
+
/J-Ara-2/, J-Ara-2
|
|
715
|
+
/J-AcCS/, J-AcCS
|
|
716
|
+
/J-CbCS/, J-CbCS
|
|
717
|
+
/J-MtCD/, J-MtCD`;
|
|
718
|
+
|
|
719
|
+
function differenceOfTwoArrays(a: string[], b: string[]): string[] {
|
|
720
|
+
return a.filter((x) => !b.includes(x));
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
const codesWithSmiles = getAllCodesOfSynthesizer(SYNTHESIZERS.GCRS);
|
|
724
|
+
const allGcrsCodes = DG.DataFrame.fromCsv(lcmsToGcrs).getCol('GCRS').toList();
|
|
725
|
+
export const gcrsCodesWithoutSmiles = differenceOfTwoArrays(allGcrsCodes, codesWithSmiles);
|
|
726
|
+
for (const e of gcrsCodesWithoutSmiles)
|
|
727
|
+
map[SYNTHESIZERS.GCRS]['Others'][e] = {'SMILES': ''};
|
|
@@ -93,7 +93,7 @@ export function getNucleotidesMol(smilesCodes: string[], oclRender: boolean = fa
|
|
|
93
93
|
return linkV3000(molBlocks, false, oclRender);
|
|
94
94
|
}
|
|
95
95
|
|
|
96
|
-
export function linkV3000(molBlocks: string[],
|
|
96
|
+
export function linkV3000(molBlocks: string[], twoChains: boolean = false, oclRender: boolean = false) {
|
|
97
97
|
let macroMolBlock = '\nDatagrok macromolecule handler\n\n';
|
|
98
98
|
macroMolBlock += ' 0 0 0 0 0 0 999 V3000\n';
|
|
99
99
|
macroMolBlock += 'M V30 BEGIN CTAB\n';
|
|
@@ -103,10 +103,9 @@ export function linkV3000(molBlocks: string[], twoMolecules: boolean = false, oc
|
|
|
103
103
|
const collection: number [] = [];
|
|
104
104
|
let natom = 0;
|
|
105
105
|
let nbond = 0;
|
|
106
|
-
let sequenceShift = 0;
|
|
107
106
|
let xShift = 0;
|
|
108
107
|
|
|
109
|
-
if (
|
|
108
|
+
if (twoChains && molBlocks.length > 1)
|
|
110
109
|
molBlocks[1] = invertNucleotidesV3000(molBlocks[1]);
|
|
111
110
|
|
|
112
111
|
for (let i = 0; i < molBlocks.length; i++) {
|
|
@@ -114,13 +113,23 @@ export function linkV3000(molBlocks: string[], twoMolecules: boolean = false, oc
|
|
|
114
113
|
.replaceAll('-\nM V30 ', '').replaceAll(' )', ')');
|
|
115
114
|
const numbers = extractAtomsBondsNumbersV3000(molBlocks[i]);
|
|
116
115
|
const coordinates = extractAtomDataV3000(molBlocks[i]);
|
|
116
|
+
|
|
117
|
+
if (twoChains) {
|
|
118
|
+
const xShiftRight = Math.min(...coordinates.x);
|
|
119
|
+
const yShift = i == 0 ? Math.min(...coordinates.y) - 1 : Math.max(...coordinates.y) + 1;
|
|
120
|
+
for (let j = 0; j < coordinates.x.length; j++)
|
|
121
|
+
coordinates.x[j] -= xShiftRight;
|
|
122
|
+
for (let j = 0; j < coordinates.y.length; j++)
|
|
123
|
+
coordinates.y[j] -= yShift;
|
|
124
|
+
}
|
|
125
|
+
|
|
117
126
|
let indexAtoms = molBlocks[i].indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
118
127
|
indexAtoms = molBlocks[i].indexOf('\n', indexAtoms);
|
|
119
128
|
let index = indexAtoms;
|
|
120
129
|
let indexEnd = indexAtoms;
|
|
121
130
|
|
|
122
131
|
for (let j = 0; j < numbers.natom; j++) {
|
|
123
|
-
if (coordinates.atomIndex[j] != 1 || i == 0 ||
|
|
132
|
+
if (coordinates.atomIndex[j] != 1 || i == 0 || twoChains) {
|
|
124
133
|
//rewrite atom number
|
|
125
134
|
index = molBlocks[i].indexOf('V30', index) + 4;
|
|
126
135
|
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
@@ -132,13 +141,17 @@ export function linkV3000(molBlocks: string[], twoMolecules: boolean = false, oc
|
|
|
132
141
|
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
133
142
|
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
134
143
|
|
|
135
|
-
const totalShift = xShift - coordinates.x[0];
|
|
136
|
-
let coordinate =
|
|
144
|
+
const totalShift = twoChains ? 0 : xShift - coordinates.x[0];
|
|
145
|
+
let coordinate = twoChains ?
|
|
146
|
+
Math.round(10000*coordinates.x[j])/10000 :
|
|
147
|
+
Math.round(10000*(parseFloat(molBlocks[i].substring(index, indexEnd)) + totalShift))/10000;
|
|
137
148
|
molBlocks[i] = molBlocks[i].slice(0, index) + coordinate + molBlocks[i].slice(indexEnd);
|
|
138
149
|
|
|
139
150
|
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
140
151
|
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
141
|
-
coordinate =
|
|
152
|
+
coordinate = twoChains ?
|
|
153
|
+
Math.round(10000*coordinates.y[j])/10000 :
|
|
154
|
+
Math.round(10000*(parseFloat(molBlocks[i].substring(index, indexEnd))))/10000;
|
|
142
155
|
molBlocks[i] = molBlocks[i].slice(0, index) + coordinate + molBlocks[i].slice(indexEnd);
|
|
143
156
|
|
|
144
157
|
index = molBlocks[i].indexOf('\n', index) + 1;
|
|
@@ -194,10 +207,9 @@ export function linkV3000(molBlocks: string[], twoMolecules: boolean = false, oc
|
|
|
194
207
|
indexCollection = molBlocks[i].indexOf('M V30 MDLV30/STEABS ATOMS=(', indexCollection);
|
|
195
208
|
}
|
|
196
209
|
|
|
197
|
-
natom +=
|
|
210
|
+
natom += twoChains ? numbers.natom : numbers.natom - 1;
|
|
198
211
|
nbond += numbers.nbond;
|
|
199
|
-
xShift +=
|
|
200
|
-
sequenceShift += twoMolecules ? -7 : 0;
|
|
212
|
+
xShift += twoChains ? 0 : coordinates.x[numbers.natom - 1] - coordinates.x[0];
|
|
201
213
|
}
|
|
202
214
|
|
|
203
215
|
const entries = 4;
|
|
@@ -224,7 +236,7 @@ export function linkV3000(molBlocks: string[], twoMolecules: boolean = false, oc
|
|
|
224
236
|
}
|
|
225
237
|
|
|
226
238
|
//generate file
|
|
227
|
-
|
|
239
|
+
twoChains? natom : natom++;
|
|
228
240
|
macroMolBlock += 'M V30 COUNTS ' + natom + ' ' + nbond + ' 0 0 0\n';
|
|
229
241
|
macroMolBlock += 'M V30 BEGIN ATOM\n';
|
|
230
242
|
macroMolBlock += atomBlock;
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import * as ui from 'datagrok-api/ui';
|
|
2
2
|
import {sequenceToMolV3000} from '../structures-works/from-monomers';
|
|
3
3
|
import {linkV3000} from '../structures-works/mol-transformations';
|
|
4
|
+
import {getFormat} from '../structures-works/sequence-codes-tools';
|
|
4
5
|
|
|
5
6
|
export function saveSdf(as: string, ss: string, oneEntity: boolean, fit3dx: boolean) {
|
|
6
|
-
const
|
|
7
|
-
const
|
|
7
|
+
const formatAs = getFormat(as);
|
|
8
|
+
const formatSs = getFormat(ss);
|
|
9
|
+
const molSS = sequenceToMolV3000(ss, false, false, formatSs!);
|
|
10
|
+
const molAS = sequenceToMolV3000(as, true, false, formatAs!);
|
|
8
11
|
let result: string;
|
|
9
12
|
if (oneEntity)
|
|
10
13
|
result = linkV3000([molSS, molAS], true, !fit3dx) + '\n\n$$$$\n';
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
import * as ui from 'datagrok-api/ui';
|
|
3
|
-
// import * as DG from 'datagrok-api/dg';
|
|
1
|
+
|
|
4
2
|
import {map, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS} from './map';
|
|
5
3
|
import {asoGapmersNucleotidesToBioSpring, asoGapmersNucleotidesToGcrs,
|
|
6
4
|
asoGapmersBioSpringToNucleotides, asoGapmersBioSpringToGcrs, asoGapmersGcrsToNucleotides,
|
|
@@ -8,29 +6,110 @@ import {asoGapmersNucleotidesToBioSpring, asoGapmersNucleotidesToGcrs,
|
|
|
8
6
|
siRnaNucleotideToAxolabsSenseStrand, siRnaNucleotidesToGcrs, siRnaBioSpringToNucleotides,
|
|
9
7
|
siRnaBioSpringToAxolabs, siRnaBioSpringToGcrs, siRnaAxolabsToNucleotides,
|
|
10
8
|
siRnaAxolabsToBioSpring, siRnaAxolabsToGcrs, siRnaGcrsToNucleotides,
|
|
11
|
-
siRnaGcrsToBioSpring, siRnaGcrsToAxolabs, gcrsToNucleotides} from './converters';
|
|
9
|
+
siRnaGcrsToBioSpring, siRnaGcrsToAxolabs, gcrsToNucleotides, gcrsToLcms} from './converters';
|
|
12
10
|
|
|
13
11
|
const noTranslationTableAvailable = 'No translation table available';
|
|
14
12
|
export const undefinedInputSequence = 'Type of input sequence is undefined';
|
|
15
13
|
|
|
16
|
-
export function
|
|
14
|
+
export function getFormat(sequence: string): string | null {
|
|
15
|
+
const possibleSynthesizers = getListOfPossibleSynthesizersByFirstMatchedCode(sequence);
|
|
16
|
+
|
|
17
|
+
if (possibleSynthesizers.length == 0)
|
|
18
|
+
return null;
|
|
19
|
+
|
|
20
|
+
let outputIndex = 0;
|
|
21
|
+
|
|
22
|
+
const firstUniqueCharacters = ['r', 'd'];
|
|
23
|
+
const nucleotides = ['A', 'U', 'T', 'C', 'G'];
|
|
24
|
+
|
|
25
|
+
possibleSynthesizers.forEach((synthesizer) => {
|
|
26
|
+
const codes = getAllCodesOfSynthesizer(synthesizer);
|
|
27
|
+
while (outputIndex < sequence.length) {
|
|
28
|
+
const matchedCode = codes.find((c) => c == sequence.slice(outputIndex, outputIndex + c.length));
|
|
29
|
+
|
|
30
|
+
if (matchedCode == null)
|
|
31
|
+
break;
|
|
32
|
+
|
|
33
|
+
if ( // for mistake pattern 'rAA'
|
|
34
|
+
outputIndex > 1 &&
|
|
35
|
+
nucleotides.includes(sequence[outputIndex]) &&
|
|
36
|
+
firstUniqueCharacters.includes(sequence[outputIndex - 2])
|
|
37
|
+
) break;
|
|
38
|
+
|
|
39
|
+
if ( // for mistake pattern 'ArA'
|
|
40
|
+
firstUniqueCharacters.includes(sequence[outputIndex + 1]) &&
|
|
41
|
+
nucleotides.includes(sequence[outputIndex])
|
|
42
|
+
) {
|
|
43
|
+
outputIndex++;
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
outputIndex += matchedCode.length;
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const indexOfFirstNotValidChar = (outputIndex == sequence.length) ? -1 : outputIndex;
|
|
52
|
+
if (indexOfFirstNotValidChar != -1)
|
|
53
|
+
return possibleSynthesizers[0];
|
|
54
|
+
|
|
55
|
+
const possibleTechnologies = getListOfPossibleTechnologiesByFirstMatchedCode(sequence, possibleSynthesizers[0]);
|
|
56
|
+
|
|
57
|
+
if (possibleTechnologies.length == 0)
|
|
58
|
+
return null;
|
|
59
|
+
|
|
60
|
+
outputIndex = 0;
|
|
61
|
+
|
|
62
|
+
possibleTechnologies.forEach((technology: string) => {
|
|
63
|
+
const codes = Object.keys(map[possibleSynthesizers[0]][technology]);
|
|
64
|
+
while (outputIndex < sequence.length) {
|
|
65
|
+
const matchedCode = codes.find((c) => c == sequence.slice(outputIndex, outputIndex + c.length));
|
|
66
|
+
|
|
67
|
+
if (matchedCode == null)
|
|
68
|
+
break;
|
|
69
|
+
|
|
70
|
+
if ( // for mistake pattern 'rAA'
|
|
71
|
+
outputIndex > 1 &&
|
|
72
|
+
nucleotides.includes(sequence[outputIndex]) &&
|
|
73
|
+
firstUniqueCharacters.includes(sequence[outputIndex - 2])
|
|
74
|
+
) break;
|
|
75
|
+
|
|
76
|
+
if ( // for mistake pattern 'ArA'
|
|
77
|
+
firstUniqueCharacters.includes(sequence[outputIndex + 1]) &&
|
|
78
|
+
nucleotides.includes(sequence[outputIndex])
|
|
79
|
+
) {
|
|
80
|
+
outputIndex++;
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
outputIndex += matchedCode.length;
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
return possibleSynthesizers[0];
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export function isValidSequence(sequence: string, format: string | null): {
|
|
17
92
|
indexOfFirstNotValidChar: number,
|
|
18
|
-
synthesizer: string | null,
|
|
19
|
-
technology: string | null
|
|
93
|
+
synthesizer: string[] | null,
|
|
94
|
+
technology: string[] | null
|
|
20
95
|
} {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
96
|
+
const possibleSynthesizers = format == null ?
|
|
97
|
+
getListOfPossibleSynthesizersByFirstMatchedCode(sequence) :
|
|
98
|
+
[format];
|
|
99
|
+
|
|
100
|
+
// if (possibleSynthesizers.length > 1) {
|
|
101
|
+
// const synthesizer = ui.choiceInput('Choose synthesizer from list: ', possibleSynthesizers[0],
|
|
102
|
+
// possibleSynthesizers);
|
|
103
|
+
// ui.dialog('Choose Synthesizer')
|
|
104
|
+
// .add(ui.panel([synthesizer.root], {style: {fontWeight: 'bold'}}))
|
|
105
|
+
// .onOK(() => possibleSynthesizers = [synthesizer.value])
|
|
106
|
+
// .onCancel(() => {
|
|
107
|
+
// possibleSynthesizers = [possibleSynthesizers[0]];
|
|
108
|
+
// grok.shell.warning('Input sequence is expected to be in format ' + possibleSynthesizers[0]);
|
|
109
|
+
// })
|
|
110
|
+
// .show();
|
|
111
|
+
// } else if (possibleSynthesizers.length == 0)
|
|
112
|
+
if (possibleSynthesizers.length == 0)
|
|
34
113
|
return {indexOfFirstNotValidChar: 0, synthesizer: null, technology: null};
|
|
35
114
|
|
|
36
115
|
let outputIndex = 0;
|
|
@@ -68,24 +147,26 @@ export function isValidSequence(sequence: string): {
|
|
|
68
147
|
if (indexOfFirstNotValidChar != -1) {
|
|
69
148
|
return {
|
|
70
149
|
indexOfFirstNotValidChar: indexOfFirstNotValidChar,
|
|
71
|
-
synthesizer: possibleSynthesizers
|
|
150
|
+
synthesizer: possibleSynthesizers,
|
|
72
151
|
technology: null,
|
|
73
152
|
};
|
|
74
153
|
}
|
|
75
154
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
if (possibleTechnologies.length > 1) {
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
155
|
+
const possibleTechnologies = getListOfPossibleTechnologiesByFirstMatchedCode(sequence, possibleSynthesizers[0]);
|
|
156
|
+
|
|
157
|
+
// if (possibleTechnologies.length > 1) {
|
|
158
|
+
// const technology = ui.choiceInput('Choose technology from list: ', possibleTechnologies[0],
|
|
159
|
+
// possibleTechnologies);
|
|
160
|
+
// ui.dialog('Choose Technology')
|
|
161
|
+
// .add(ui.panel([technology.root], {style: {fontWeight: 'bold'}}))
|
|
162
|
+
// .onOK(() => possibleTechnologies = [technology.value])
|
|
163
|
+
// .onCancel(() => {
|
|
164
|
+
// possibleTechnologies = [possibleTechnologies[0]];
|
|
165
|
+
// grok.shell.warning('Input sequence is expected to be in format ' + possibleTechnologies[0]);
|
|
166
|
+
// })
|
|
167
|
+
// .show();
|
|
168
|
+
// } else if (possibleTechnologies.length == 0)
|
|
169
|
+
if (possibleTechnologies.length == 0)
|
|
89
170
|
return {indexOfFirstNotValidChar: 0, synthesizer: null, technology: null};
|
|
90
171
|
|
|
91
172
|
outputIndex = 0;
|
|
@@ -118,16 +199,16 @@ export function isValidSequence(sequence: string): {
|
|
|
118
199
|
|
|
119
200
|
return {
|
|
120
201
|
indexOfFirstNotValidChar: indexOfFirstNotValidChar,
|
|
121
|
-
synthesizer: possibleSynthesizers
|
|
122
|
-
technology: possibleTechnologies[outputIndex],
|
|
202
|
+
synthesizer: possibleSynthesizers,
|
|
203
|
+
technology: [possibleTechnologies[outputIndex]],
|
|
123
204
|
};
|
|
124
205
|
}
|
|
125
206
|
|
|
126
|
-
function getAllCodesOfSynthesizer(synthesizer: string): string[] {
|
|
207
|
+
export function getAllCodesOfSynthesizer(synthesizer: string): string[] {
|
|
127
208
|
let codes: string[] = [];
|
|
128
209
|
for (const technology of Object.keys(map[synthesizer]))
|
|
129
210
|
codes = codes.concat(Object.keys(map[synthesizer][technology]));
|
|
130
|
-
return codes.concat(Object.keys(MODIFICATIONS));
|
|
211
|
+
return codes.concat(Object.keys(MODIFICATIONS)).concat(',');
|
|
131
212
|
}
|
|
132
213
|
|
|
133
214
|
function getListOfPossibleSynthesizersByFirstMatchedCode(sequence: string): string[] {
|
|
@@ -159,7 +240,7 @@ function getListOfPossibleTechnologiesByFirstMatchedCode(sequence: string, synth
|
|
|
159
240
|
}
|
|
160
241
|
|
|
161
242
|
export function convertSequence(sequence: string, output: {
|
|
162
|
-
indexOfFirstNotValidChar: number, synthesizer: string | null, technology: string | null}) {
|
|
243
|
+
indexOfFirstNotValidChar: number, synthesizer: string[] | null, technology: string[] | null}) {
|
|
163
244
|
if (output.indexOfFirstNotValidChar != -1) {
|
|
164
245
|
return {
|
|
165
246
|
// type: '',
|
|
@@ -167,15 +248,15 @@ export function convertSequence(sequence: string, output: {
|
|
|
167
248
|
Error: undefinedInputSequence,
|
|
168
249
|
};
|
|
169
250
|
}
|
|
170
|
-
if (output.synthesizer
|
|
251
|
+
if (output.synthesizer!.includes(SYNTHESIZERS.RAW_NUCLEOTIDES)) {//&& output.technology!.includes(TECHNOLOGIES.DNA)) {
|
|
171
252
|
return {
|
|
172
|
-
type: SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.DNA,
|
|
253
|
+
type: SYNTHESIZERS.RAW_NUCLEOTIDES, // + ' ' + TECHNOLOGIES.DNA,
|
|
173
254
|
Nucleotides: sequence,
|
|
174
255
|
BioSpring: asoGapmersNucleotidesToBioSpring(sequence),
|
|
175
256
|
GCRS: asoGapmersNucleotidesToGcrs(sequence),
|
|
176
257
|
};
|
|
177
258
|
}
|
|
178
|
-
if (output.synthesizer
|
|
259
|
+
if (output.synthesizer!.includes(SYNTHESIZERS.BIOSPRING) && output.technology!.includes(TECHNOLOGIES.ASO_GAPMERS)) {
|
|
179
260
|
return {
|
|
180
261
|
type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.ASO_GAPMERS,
|
|
181
262
|
Nucleotides: asoGapmersBioSpringToNucleotides(sequence),
|
|
@@ -183,16 +264,17 @@ export function convertSequence(sequence: string, output: {
|
|
|
183
264
|
GCRS: asoGapmersBioSpringToGcrs(sequence),
|
|
184
265
|
};
|
|
185
266
|
}
|
|
186
|
-
if (output.synthesizer
|
|
267
|
+
if (output.synthesizer!.includes(SYNTHESIZERS.GCRS) && output.technology!.includes(TECHNOLOGIES.ASO_GAPMERS)) {
|
|
187
268
|
return {
|
|
188
269
|
type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.ASO_GAPMERS,
|
|
189
270
|
Nucleotides: asoGapmersGcrsToNucleotides(sequence),
|
|
190
271
|
BioSpring: asoGapmersGcrsToBioSpring(sequence),
|
|
191
272
|
Mermade12: gcrsToMermade12(sequence),
|
|
192
273
|
GCRS: sequence,
|
|
274
|
+
LCMS: gcrsToLcms(sequence),
|
|
193
275
|
};
|
|
194
276
|
}
|
|
195
|
-
if (output.synthesizer
|
|
277
|
+
if (output.synthesizer!.includes(SYNTHESIZERS.RAW_NUCLEOTIDES) && output.technology!.includes(TECHNOLOGIES.RNA)) {
|
|
196
278
|
return {
|
|
197
279
|
type: SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.RNA,
|
|
198
280
|
Nucleotides: sequence,
|
|
@@ -201,7 +283,7 @@ export function convertSequence(sequence: string, output: {
|
|
|
201
283
|
GCRS: siRnaNucleotidesToGcrs(sequence),
|
|
202
284
|
};
|
|
203
285
|
}
|
|
204
|
-
if (output.synthesizer
|
|
286
|
+
if (output.synthesizer!.includes(SYNTHESIZERS.BIOSPRING) && output.technology!.includes(TECHNOLOGIES.SI_RNA)) {
|
|
205
287
|
return {
|
|
206
288
|
type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.SI_RNA,
|
|
207
289
|
Nucleotides: siRnaBioSpringToNucleotides(sequence),
|
|
@@ -210,7 +292,7 @@ export function convertSequence(sequence: string, output: {
|
|
|
210
292
|
GCRS: siRnaBioSpringToGcrs(sequence),
|
|
211
293
|
};
|
|
212
294
|
}
|
|
213
|
-
if (output.synthesizer
|
|
295
|
+
if (output.synthesizer!.includes(SYNTHESIZERS.AXOLABS)) {
|
|
214
296
|
return {
|
|
215
297
|
type: SYNTHESIZERS.AXOLABS + ' ' + TECHNOLOGIES.SI_RNA,
|
|
216
298
|
Nucleotides: siRnaAxolabsToNucleotides(sequence),
|
|
@@ -219,7 +301,7 @@ export function convertSequence(sequence: string, output: {
|
|
|
219
301
|
GCRS: siRnaAxolabsToGcrs(sequence),
|
|
220
302
|
};
|
|
221
303
|
}
|
|
222
|
-
if (output.synthesizer
|
|
304
|
+
if (output.synthesizer!.includes(SYNTHESIZERS.GCRS) && output.technology!.includes(TECHNOLOGIES.SI_RNA)) {
|
|
223
305
|
return {
|
|
224
306
|
type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.SI_RNA,
|
|
225
307
|
Nucleotides: siRnaGcrsToNucleotides(sequence),
|
|
@@ -227,17 +309,19 @@ export function convertSequence(sequence: string, output: {
|
|
|
227
309
|
Axolabs: siRnaGcrsToAxolabs(sequence),
|
|
228
310
|
MM12: gcrsToMermade12(sequence),
|
|
229
311
|
GCRS: sequence,
|
|
312
|
+
LCMS: gcrsToLcms(sequence),
|
|
230
313
|
};
|
|
231
314
|
}
|
|
232
|
-
if (output.synthesizer
|
|
315
|
+
if (output.synthesizer!.includes(SYNTHESIZERS.GCRS)) {
|
|
233
316
|
return {
|
|
234
317
|
type: SYNTHESIZERS.GCRS,
|
|
235
318
|
Nucleotides: gcrsToNucleotides(sequence),
|
|
236
319
|
GCRS: sequence,
|
|
237
320
|
Mermade12: gcrsToMermade12(sequence),
|
|
321
|
+
LCMS: gcrsToLcms(sequence),
|
|
238
322
|
};
|
|
239
323
|
}
|
|
240
|
-
if (output.synthesizer
|
|
324
|
+
if (output.synthesizer!.includes(SYNTHESIZERS.MERMADE_12)) {
|
|
241
325
|
return {
|
|
242
326
|
type: SYNTHESIZERS.MERMADE_12,
|
|
243
327
|
Nucleotides: noTranslationTableAvailable,
|