@datagrok/sequence-translator 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +20 -14
- package/package.json +5 -3
- package/setup.cmd +11 -0
- package/src/map.ts +185 -6
- package/src/package-test.ts +13 -0
- package/src/package.ts +314 -225
- package/src/tests/smiles-tests.ts +13 -0
- package/webpack.config.js +7 -1
- package/scripts/convertFastaToSmiles +0 -24
- package/scripts/drawAxolabsPattern +0 -50
package/detectors.js
CHANGED
|
@@ -1,29 +1,35 @@
|
|
|
1
1
|
class SequenceTranslatorPackageDetectors extends DG.Package {
|
|
2
|
+
isDnaNucleotides(sequence) {return /^[ATGC]{6,}$/.test(sequence);}
|
|
3
|
+
isRnaNucleotides(sequence) {return /^[AUGC]{6,}$/.test(sequence);}
|
|
4
|
+
isAsoGapmerBioSpring(sequence) {return /^[*56789ATGC]{6,}$/.test(sequence);}
|
|
5
|
+
isAsoGapmerGcrs(sequence) {return /^(?=.*moe)(?=.*5mC)(?=.*ps){6,}/.test(sequence);}
|
|
6
|
+
isSiRnaBioSpring(sequence) {return /^[*1-8]{6,}$/.test(sequence);}
|
|
7
|
+
isSiRnaAxolabs(sequence) {return /^[fsACGUacgu]{6,}$/.test(sequence);}
|
|
8
|
+
isSiRnaGcrs(sequence) {return /^[fmpsACGU]{6,}$/.test(sequence);} // TODO: insert into detectNucleotides
|
|
9
|
+
isGcrs(sequence) {return /^[fmpsACGU]{6,}$/.test(sequence);}
|
|
10
|
+
isMermade12(sequence) {return /^[IiJjKkLlEeFfGgHhQq]{6,}$/.test(sequence);}
|
|
11
|
+
|
|
2
12
|
//tags: semTypeDetector
|
|
3
13
|
//input: column col
|
|
4
14
|
//output: string semType
|
|
5
15
|
detectNucleotides(col) {
|
|
6
16
|
if (col.type === DG.TYPE.STRING) {
|
|
7
|
-
if (DG.Detector.sampleCategories(col, (s) =>
|
|
17
|
+
if (DG.Detector.sampleCategories(col, (s) => isDnaNucleotides(s)))
|
|
8
18
|
return 'DNA nucleotides';
|
|
9
|
-
if (DG.Detector.sampleCategories(col, (s) =>
|
|
19
|
+
if (DG.Detector.sampleCategories(col, (s) => isRnaNucleotides(s)))
|
|
10
20
|
return 'RNA nucleotides';
|
|
11
|
-
if (DG.Detector.sampleCategories(col, (s) =>
|
|
12
|
-
return 'ABI';
|
|
13
|
-
if (DG.Detector.sampleCategories(col, (s) => /^[*56789ATGC]{30,}$/.test(s)))
|
|
21
|
+
if (DG.Detector.sampleCategories(col, (s) => isAsoGapmerBioSpring(s)))
|
|
14
22
|
return 'BioSpring / Gapmers';
|
|
15
|
-
if (DG.Detector.sampleCategories(col, (s) =>
|
|
23
|
+
if (DG.Detector.sampleCategories(col, (s) => isAsoGapmerGcrs(s)))
|
|
16
24
|
return 'GCRS / Gapmers';
|
|
17
|
-
if (DG.Detector.sampleCategories(col, (s) =>
|
|
25
|
+
if (DG.Detector.sampleCategories(col, (s) => isSiRnaBioSpring(s)))
|
|
18
26
|
return 'BioSpring / siRNA';
|
|
19
|
-
if (DG.Detector.sampleCategories(col, (s) =>
|
|
20
|
-
return 'Axolabs / siRNA';
|
|
21
|
-
if (DG.Detector.sampleCategories(col, (s) =>
|
|
27
|
+
if (DG.Detector.sampleCategories(col, (s) => isSiRnaAxolabs(s)))
|
|
28
|
+
return 'Axolabs / siRNA';
|
|
29
|
+
if (DG.Detector.sampleCategories(col, (s) => isGcrs(s)))
|
|
22
30
|
return 'GCRS';
|
|
23
|
-
if (DG.Detector.sampleCategories(col, (s) =>
|
|
24
|
-
return '
|
|
25
|
-
if (DG.Detector.sampleCategories(col, (s) => /^[IiJjKkLlEeFfGgHhQq]{10,}$/.test(s)))
|
|
26
|
-
return 'MM12';
|
|
31
|
+
if (DG.Detector.sampleCategories(col, (s) => isMermade12(s)))
|
|
32
|
+
return 'Mermade 12 / siRNA';
|
|
27
33
|
}
|
|
28
34
|
}
|
|
29
35
|
}
|
package/package.json
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/sequence-translator",
|
|
3
3
|
"friendlyName": "SequenceTranslator",
|
|
4
|
-
"version": "0.0.
|
|
4
|
+
"version": "0.0.4",
|
|
5
5
|
"description": "",
|
|
6
6
|
"dependencies": {
|
|
7
|
+
"@datagrok-libraries/utils": "^0.0.23",
|
|
7
8
|
"@types/react": "latest",
|
|
8
9
|
"datagrok-api": ">0.94.10",
|
|
9
10
|
"datagrok-tools": "^4.1.2",
|
|
10
11
|
"npm": "^7.11.2",
|
|
11
12
|
"save-svg-as-png": "^1.4.17",
|
|
12
13
|
"ts-loader": "latest",
|
|
13
|
-
"typescript": "latest"
|
|
14
|
+
"typescript": "latest",
|
|
15
|
+
"openchemlib": "6.0.1"
|
|
14
16
|
},
|
|
15
17
|
"scripts": {
|
|
16
18
|
"link-api": "npm link datagrok-api",
|
|
@@ -28,4 +30,4 @@
|
|
|
28
30
|
"webpack": "^5.31.0",
|
|
29
31
|
"webpack-cli": "^4.6.0"
|
|
30
32
|
}
|
|
31
|
-
}
|
|
33
|
+
}
|
package/setup.cmd
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
cd ../../js-api
|
|
2
|
+
call npm install
|
|
3
|
+
call npm link
|
|
4
|
+
cd ../libraries/utils
|
|
5
|
+
call npm install
|
|
6
|
+
call npm link
|
|
7
|
+
call npm link datagrok-api
|
|
8
|
+
cd ../../packages/SequenceTranslator
|
|
9
|
+
call npm install
|
|
10
|
+
call npm link datagrok-api @datagrok-libraries/utils
|
|
11
|
+
webpack
|
package/src/map.ts
CHANGED
|
@@ -1,7 +1,86 @@
|
|
|
1
|
-
export const
|
|
1
|
+
export const SYNTHESIZERS = {
|
|
2
|
+
RAW_NUCLEOTIDES: "Raw Nucleotides",
|
|
3
|
+
BIOSPRING: "BioSpring Codes",
|
|
4
|
+
GCRS: "Janssen GCRS Codes",
|
|
5
|
+
AXOLABS: "Axolabs Codes",
|
|
6
|
+
MERMADE_12: "Mermade 12"
|
|
7
|
+
};
|
|
8
|
+
export const TECHNOLOGIES = {
|
|
9
|
+
DNA: "DNA",
|
|
10
|
+
RNA: "RNA",
|
|
11
|
+
ASO_GAPMERS: "For ASO Gapmers",
|
|
12
|
+
SI_RNA: "For 2'-OMe and 2'-F modified siRNA"
|
|
13
|
+
};
|
|
14
|
+
// interface CODES {
|
|
15
|
+
// }
|
|
16
|
+
export const MODIFICATIONS: {[index: string]: {left: string, right: string}} = {
|
|
17
|
+
"(invabasic)": {
|
|
18
|
+
left: "OC1CCOC1COP(=O)(S[H])O",
|
|
19
|
+
right: "OP(=O)(S[H])OCC1OCCC1O"
|
|
20
|
+
},
|
|
21
|
+
"(GalNAc-2-JNJ)": {
|
|
22
|
+
left: "C(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)NC(=O)CCCC(=O)NCC(O)CO",
|
|
23
|
+
right: "OCC(O)CNC(=O)CCCC(=O)NC(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)"
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
export const stadardPhosphateLinkSmiles = 'OP(=O)(O)O';
|
|
2
27
|
export const map: {[synthesizer: string]: {[technology: string]: {[code: string]: {"name": string, "weight": number, "normalized": string, "SMILES": string}}}} = {
|
|
28
|
+
"Raw Nucleotides": {
|
|
29
|
+
"DNA": {
|
|
30
|
+
"A": {
|
|
31
|
+
"name": "Adenine",
|
|
32
|
+
"weight": 313.21,
|
|
33
|
+
"normalized": "dA",
|
|
34
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1O"
|
|
35
|
+
},
|
|
36
|
+
"T": {
|
|
37
|
+
"name": "Tyrosine",
|
|
38
|
+
"weight": 304.2,
|
|
39
|
+
"normalized": "dT",
|
|
40
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1O"
|
|
41
|
+
},
|
|
42
|
+
"G": {
|
|
43
|
+
"name": "Guanine",
|
|
44
|
+
"weight": 329.21,
|
|
45
|
+
"normalized": "dG",
|
|
46
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)C)[C@@H]1O"
|
|
47
|
+
},
|
|
48
|
+
"C": {
|
|
49
|
+
"name": "Cytosine",
|
|
50
|
+
"weight": 289.18,
|
|
51
|
+
"normalized": "dC",
|
|
52
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1O"
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
"RNA": {
|
|
56
|
+
"A": {
|
|
57
|
+
"name": "Adenine",
|
|
58
|
+
"weight": 313.21,
|
|
59
|
+
"normalized": "dA",
|
|
60
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1O"
|
|
61
|
+
},
|
|
62
|
+
"U": {
|
|
63
|
+
"name": "Uracil",
|
|
64
|
+
"weight": 306.17,
|
|
65
|
+
"normalized": "rU",
|
|
66
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=CC(=O)NC2(=O))[C@H](O)[C@@H]1O"
|
|
67
|
+
},
|
|
68
|
+
"G": {
|
|
69
|
+
"name": "Guanine",
|
|
70
|
+
"weight": 329.21,
|
|
71
|
+
"normalized": "dG",
|
|
72
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)C)[C@@H]1O"
|
|
73
|
+
},
|
|
74
|
+
"C": {
|
|
75
|
+
"name": "Cytosine",
|
|
76
|
+
"weight": 289.18,
|
|
77
|
+
"normalized": "dC",
|
|
78
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1O"
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
},
|
|
3
82
|
"BioSpring Codes": {
|
|
4
|
-
|
|
83
|
+
'For ASO Gapmers': {
|
|
5
84
|
"5": {
|
|
6
85
|
"name": "2'MOE-5Me-rU",
|
|
7
86
|
"weight": 378.27,
|
|
@@ -36,7 +115,7 @@ export const map: {[synthesizer: string]: {[technology: string]: {[code: string]
|
|
|
36
115
|
"name": "ps linkage",
|
|
37
116
|
"weight": 16.07,
|
|
38
117
|
"normalized": "",
|
|
39
|
-
"SMILES": "OP(=O)(O
|
|
118
|
+
"SMILES": "OP(=O)(S)O"
|
|
40
119
|
},
|
|
41
120
|
"A": {
|
|
42
121
|
"name": "Adenine",
|
|
@@ -116,7 +195,7 @@ export const map: {[synthesizer: string]: {[technology: string]: {[code: string]
|
|
|
116
195
|
"name": "ps linkage",
|
|
117
196
|
"weight": 16.07,
|
|
118
197
|
"normalized": "",
|
|
119
|
-
"SMILES": "OP(=O)(O
|
|
198
|
+
"SMILES": "OP(=O)(S)O"
|
|
120
199
|
}
|
|
121
200
|
}
|
|
122
201
|
},
|
|
@@ -174,7 +253,7 @@ export const map: {[synthesizer: string]: {[technology: string]: {[code: string]
|
|
|
174
253
|
"name": "ps linkage",
|
|
175
254
|
"weight": 16.07,
|
|
176
255
|
"normalized": "",
|
|
177
|
-
"SMILES": "OP(=O)(O
|
|
256
|
+
"SMILES": "OP(=O)(S)O"
|
|
178
257
|
}
|
|
179
258
|
}
|
|
180
259
|
},
|
|
@@ -226,7 +305,7 @@ export const map: {[synthesizer: string]: {[technology: string]: {[code: string]
|
|
|
226
305
|
"name": "ps linkage",
|
|
227
306
|
"weight": 16.07,
|
|
228
307
|
"normalized": "",
|
|
229
|
-
"SMILES": "OP(=O)(O
|
|
308
|
+
"SMILES": "OP(=O)(S)O"
|
|
230
309
|
},
|
|
231
310
|
"A": {
|
|
232
311
|
"name": "Adenine",
|
|
@@ -351,5 +430,105 @@ export const map: {[synthesizer: string]: {[technology: string]: {[code: string]
|
|
|
351
430
|
"SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1O"
|
|
352
431
|
}
|
|
353
432
|
}
|
|
433
|
+
},
|
|
434
|
+
"Mermade 12": {
|
|
435
|
+
"For 2'-OMe and 2'-F modified siRNA": {
|
|
436
|
+
"e": {
|
|
437
|
+
"name": "2'OMe-rA-ps",
|
|
438
|
+
"weight": 359.31,
|
|
439
|
+
"normalized": "rA",
|
|
440
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(S)O"
|
|
441
|
+
},
|
|
442
|
+
"h": {
|
|
443
|
+
"name": "2'OMe-rU-ps",
|
|
444
|
+
"weight": 336.27,
|
|
445
|
+
"normalized": "rU",
|
|
446
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=CC(=O)NC2(=O))[C@H](OC)[C@@H]1OP(=O)(S)O"
|
|
447
|
+
},
|
|
448
|
+
"g": {
|
|
449
|
+
"name": "2'OMe-rG-ps",
|
|
450
|
+
"weight": 375.31,
|
|
451
|
+
"normalized": "rG",
|
|
452
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(S)O"
|
|
453
|
+
},
|
|
454
|
+
"f": {
|
|
455
|
+
"name": "2'OMe-rC-ps",
|
|
456
|
+
"weight": 335.28,
|
|
457
|
+
"normalized": "rC",
|
|
458
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))[C@H](OC)[C@@H]1OP(=O)(S)O"
|
|
459
|
+
},
|
|
460
|
+
"i": {
|
|
461
|
+
"name": "2'-fluoro-A-ps",
|
|
462
|
+
"weight": 347.27,
|
|
463
|
+
"normalized": "rA",
|
|
464
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)[C@H](F)[C@@H]1OP(=O)(S)O"
|
|
465
|
+
},
|
|
466
|
+
"l": {
|
|
467
|
+
"name": "2'-fluoro-U-ps",
|
|
468
|
+
"weight": 324.23,
|
|
469
|
+
"normalized": "rU",
|
|
470
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=CC(=O)NC2(=O))[C@H](F)[C@@H]1OP(=O)(S)O"
|
|
471
|
+
},
|
|
472
|
+
"k": {
|
|
473
|
+
"name": "2'-fluoro-G-ps",
|
|
474
|
+
"weight": 363.26,
|
|
475
|
+
"normalized": "rG",
|
|
476
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](F)[C@@H]1OP(=O)(S)O"
|
|
477
|
+
},
|
|
478
|
+
"j": {
|
|
479
|
+
"name": "2'-fluoro-C-ps",
|
|
480
|
+
"weight": 323.25,
|
|
481
|
+
"normalized": "rC",
|
|
482
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))[C@H](F)[C@@H]1OP(=O)(S)O"
|
|
483
|
+
},
|
|
484
|
+
"L": {
|
|
485
|
+
"name": "2'-fluoro-U",
|
|
486
|
+
"weight": 308.16,
|
|
487
|
+
"normalized": "rU",
|
|
488
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=CC(=O)NC2(=O))[C@H](F)[C@@H]1O"
|
|
489
|
+
},
|
|
490
|
+
"I": {
|
|
491
|
+
"name": "2'-fluoro-A",
|
|
492
|
+
"weight": 331.2,
|
|
493
|
+
"normalized": "rA",
|
|
494
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)[C@H](F)[C@@H]1O"
|
|
495
|
+
},
|
|
496
|
+
"J": {
|
|
497
|
+
"name": "2'-fluoro-C",
|
|
498
|
+
"weight": 307.18,
|
|
499
|
+
"normalized": "rC",
|
|
500
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))[C@H](F)[C@@H]1O"
|
|
501
|
+
},
|
|
502
|
+
"K": {
|
|
503
|
+
"name": "2'-fluoro-G",
|
|
504
|
+
"weight": 347.19,
|
|
505
|
+
"normalized": "rG",
|
|
506
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](F)[C@@H]1O"
|
|
507
|
+
},
|
|
508
|
+
"H": {
|
|
509
|
+
"name": "2'OMe-rU",
|
|
510
|
+
"weight": 320.2,
|
|
511
|
+
"normalized": "rU",
|
|
512
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=CC(=O)NC2(=O))[C@H](OC)[C@@H]1O"
|
|
513
|
+
},
|
|
514
|
+
"E": {
|
|
515
|
+
"name": "2'OMe-rA",
|
|
516
|
+
"weight": 343.24,
|
|
517
|
+
"normalized": "rA",
|
|
518
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)[C@H](OC)[C@@H]1O"
|
|
519
|
+
},
|
|
520
|
+
"F": {
|
|
521
|
+
"name": "2'OMe-rC",
|
|
522
|
+
"weight": 319.21,
|
|
523
|
+
"normalized": "rC",
|
|
524
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))[C@H](OC)[C@@H]1O"
|
|
525
|
+
},
|
|
526
|
+
"G": {
|
|
527
|
+
"name": "2'OMe-rG",
|
|
528
|
+
"weight": 359.24,
|
|
529
|
+
"normalized": "rG",
|
|
530
|
+
"SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1O"
|
|
531
|
+
}
|
|
532
|
+
}
|
|
354
533
|
}
|
|
355
534
|
};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import * as DG from "datagrok-api/dg";
|
|
2
|
+
import * as grok from "datagrok-api/grok";
|
|
3
|
+
import {runTests} from "@datagrok-libraries/utils/src/test";
|
|
4
|
+
import "./tests/smiles-tests";
|
|
5
|
+
|
|
6
|
+
export let _package = new DG.Package();
|
|
7
|
+
|
|
8
|
+
//name: test
|
|
9
|
+
//output: dataframe result
|
|
10
|
+
export async function test(): Promise<DG.DataFrame> {
|
|
11
|
+
let data = await runTests();
|
|
12
|
+
return DG.DataFrame.fromObjects(data)!;
|
|
13
|
+
}
|
package/src/package.ts
CHANGED
|
@@ -5,20 +5,141 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import * as OCL from 'openchemlib/full.js';
|
|
6
6
|
import $ from "cash-dom";
|
|
7
7
|
import {defineAxolabsPattern} from "./defineAxolabsPattern";
|
|
8
|
-
import {map,
|
|
8
|
+
import {map, stadardPhosphateLinkSmiles, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS} from "./map";
|
|
9
9
|
|
|
10
10
|
export let _package = new DG.Package();
|
|
11
11
|
|
|
12
12
|
const defaultInput = "AGGTCCTCTTGACTTAGGCC";
|
|
13
|
-
const minimalValidNumberOfCharacters = 6;
|
|
14
|
-
const smallNumberOfCharacters = "Length of input sequence should be at least " + minimalValidNumberOfCharacters + " characters";
|
|
15
13
|
const undefinedInputSequence = "Type of input sequence is undefined";
|
|
16
14
|
const noTranslationTableAvailable = "No translation table available";
|
|
17
15
|
const sequenceWasCopied = 'Copied';
|
|
18
16
|
const tooltipSequence = 'Copy sequence';
|
|
19
17
|
|
|
20
|
-
function
|
|
21
|
-
|
|
18
|
+
function getAllCodesOfSynthesizer(synthesizer: string) {
|
|
19
|
+
let codes: string[] = [];
|
|
20
|
+
for (let technology of Object.keys(map[synthesizer]))
|
|
21
|
+
codes = codes.concat(Object.keys(map[synthesizer][technology]));
|
|
22
|
+
return codes.concat(Object.keys(MODIFICATIONS));
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function getListOfPossibleSynthesizersByFirstMatchedCode(sequence: string): string[] {
|
|
26
|
+
let synthesizers: string[] = [];
|
|
27
|
+
Object.keys(map).forEach((synthesizer: string) => {
|
|
28
|
+
const codes = getAllCodesOfSynthesizer(synthesizer);
|
|
29
|
+
//TODO: get first non-dropdown code when there are two modifications
|
|
30
|
+
let start = 0;
|
|
31
|
+
for (let i = 0; i < sequence.length; i++)
|
|
32
|
+
if (sequence[i] == ')') {
|
|
33
|
+
start = i + 1;
|
|
34
|
+
break;
|
|
35
|
+
}
|
|
36
|
+
if (codes.some((s: string) => s == sequence.slice(start, start + s.length)))
|
|
37
|
+
synthesizers.push(synthesizer);
|
|
38
|
+
});
|
|
39
|
+
return synthesizers;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function getListOfPossibleTechnologiesByFirstMatchedCode(sequence: string, synthesizer: string): string[] {
|
|
43
|
+
let technologies: string[] = [];
|
|
44
|
+
Object.keys(map[synthesizer]).forEach((technology: string) => {
|
|
45
|
+
const codes = Object.keys(map[synthesizer][technology]).concat(Object.keys(MODIFICATIONS));
|
|
46
|
+
if (codes.some((s) => s == sequence.slice(0, s.length)))
|
|
47
|
+
technologies.push(technology);
|
|
48
|
+
});
|
|
49
|
+
return technologies;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function isValidSequence(sequence: string) {
|
|
53
|
+
let possibleSynthesizers = getListOfPossibleSynthesizersByFirstMatchedCode(sequence);
|
|
54
|
+
if (possibleSynthesizers.length == 0)
|
|
55
|
+
return { indexOfFirstNotValidCharacter: 0, expectedType: null };
|
|
56
|
+
|
|
57
|
+
let outputIndices = Array(possibleSynthesizers.length).fill(0);
|
|
58
|
+
|
|
59
|
+
const firstUniqueCharacters = ['r', 'd'], nucleotides = ["A", "U", "T", "C", "G"];
|
|
60
|
+
|
|
61
|
+
possibleSynthesizers.forEach((synthesizer, synthesizerIndex) => {
|
|
62
|
+
let codes = getAllCodesOfSynthesizer(synthesizer);
|
|
63
|
+
while (outputIndices[synthesizerIndex] < sequence.length) {
|
|
64
|
+
|
|
65
|
+
let matchedCode = codes
|
|
66
|
+
.find((c) => c == sequence.slice(outputIndices[synthesizerIndex], outputIndices[synthesizerIndex] + c.length));
|
|
67
|
+
|
|
68
|
+
if (matchedCode == null)
|
|
69
|
+
break;
|
|
70
|
+
|
|
71
|
+
if ( // for mistake pattern 'rAA'
|
|
72
|
+
outputIndices[synthesizerIndex] > 1 &&
|
|
73
|
+
nucleotides.includes(sequence[outputIndices[synthesizerIndex]]) &&
|
|
74
|
+
firstUniqueCharacters.includes(sequence[outputIndices[synthesizerIndex] - 2])
|
|
75
|
+
) break;
|
|
76
|
+
|
|
77
|
+
if ( // for mistake pattern 'ArA'
|
|
78
|
+
firstUniqueCharacters.includes(sequence[outputIndices[synthesizerIndex] + 1]) &&
|
|
79
|
+
nucleotides.includes(sequence[outputIndices[synthesizerIndex]])
|
|
80
|
+
) {
|
|
81
|
+
outputIndices[synthesizerIndex]++;
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
outputIndices[synthesizerIndex] += matchedCode.length;
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
const indexOfExpectedSythesizer = Math.max.apply(Math, outputIndices);
|
|
90
|
+
const indexOfFirstNotValidCharacter = (indexOfExpectedSythesizer == sequence.length) ? -1 : indexOfExpectedSythesizer;
|
|
91
|
+
const expectedSynthesizer = possibleSynthesizers[outputIndices.indexOf(indexOfExpectedSythesizer)];
|
|
92
|
+
if (indexOfFirstNotValidCharacter != -1)
|
|
93
|
+
return {
|
|
94
|
+
indexOfFirstNotValidCharacter: indexOfFirstNotValidCharacter,
|
|
95
|
+
expectedType: expectedSynthesizer
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
let possibleTechnologies = getListOfPossibleTechnologiesByFirstMatchedCode(sequence, expectedSynthesizer);
|
|
99
|
+
if (possibleTechnologies.length == 0)
|
|
100
|
+
return { indexOfFirstNotValidCharacter: 0, expectedRepresentation: null };
|
|
101
|
+
|
|
102
|
+
outputIndices = Array(possibleTechnologies.length).fill(0);
|
|
103
|
+
|
|
104
|
+
possibleTechnologies.forEach((technology, technologyIndex) => {
|
|
105
|
+
let codes = Object.keys(map[expectedSynthesizer][technology]);
|
|
106
|
+
while (outputIndices[technologyIndex] < sequence.length) {
|
|
107
|
+
|
|
108
|
+
let matchedCode = codes
|
|
109
|
+
.find((c) => c == sequence.slice(outputIndices[technologyIndex], outputIndices[technologyIndex] + c.length));
|
|
110
|
+
|
|
111
|
+
if (matchedCode == null)
|
|
112
|
+
break;
|
|
113
|
+
|
|
114
|
+
if ( // for mistake pattern 'rAA'
|
|
115
|
+
outputIndices[technologyIndex] > 1 &&
|
|
116
|
+
nucleotides.includes(sequence[outputIndices[technologyIndex]]) &&
|
|
117
|
+
firstUniqueCharacters.includes(sequence[outputIndices[technologyIndex] - 2])
|
|
118
|
+
) break;
|
|
119
|
+
|
|
120
|
+
if ( // for mistake pattern 'ArA'
|
|
121
|
+
firstUniqueCharacters.includes(sequence[outputIndices[technologyIndex] + 1]) &&
|
|
122
|
+
nucleotides.includes(sequence[outputIndices[technologyIndex]])
|
|
123
|
+
) {
|
|
124
|
+
outputIndices[technologyIndex]++;
|
|
125
|
+
break;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
outputIndices[technologyIndex] += matchedCode.length;
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
const indexOfExpectedTechnology = Math.max.apply(Math, outputIndices);
|
|
133
|
+
const expectedTechnology = possibleTechnologies[outputIndices.indexOf(indexOfExpectedTechnology)];
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
indexOfFirstNotValidCharacter: indexOfFirstNotValidCharacter,
|
|
137
|
+
expectedType: expectedSynthesizer + ' ' + expectedTechnology
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function sortByStringLengthInDescendingOrder(array: string[]): string[] {
|
|
142
|
+
return array.sort(function(a: string, b: string) { return b.length - a.length; });
|
|
22
143
|
}
|
|
23
144
|
|
|
24
145
|
function getObjectWithCodesAndSmiles() {
|
|
@@ -30,22 +151,48 @@ function getObjectWithCodesAndSmiles() {
|
|
|
30
151
|
return obj;
|
|
31
152
|
}
|
|
32
153
|
|
|
33
|
-
function
|
|
154
|
+
export function sequenceToSmiles(sequence: string) {
|
|
34
155
|
const obj = getObjectWithCodesAndSmiles();
|
|
35
|
-
|
|
36
|
-
let i = 0,
|
|
156
|
+
let codes = sortByStringLengthInDescendingOrder(Object.keys(obj));
|
|
157
|
+
let i = 0, smiles = '', codesList = [];
|
|
37
158
|
const links = ['s', 'ps', '*'];
|
|
159
|
+
const includesStandardLinkAlready = ["e", "h", "g", "f", "i", "l", "k", "j"];
|
|
160
|
+
const dropdowns = Object.keys(MODIFICATIONS);
|
|
161
|
+
codes = codes.concat(dropdowns);
|
|
38
162
|
while (i < sequence.length) {
|
|
39
|
-
let code = codes.find((s) => s == sequence.slice(i, i + s.length))!;
|
|
163
|
+
let code = codes.find((s: string) => s == sequence.slice(i, i + s.length))!;
|
|
40
164
|
i += code.length;
|
|
41
165
|
codesList.push(code);
|
|
42
166
|
}
|
|
43
|
-
for (let i = 0; i < codesList.length; i++)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
167
|
+
for (let i = 0; i < codesList.length; i++) {
|
|
168
|
+
if (dropdowns.includes(codesList[i])) {
|
|
169
|
+
smiles += (i >= codesList.length / 2) ?
|
|
170
|
+
MODIFICATIONS[codesList[i]].right :
|
|
171
|
+
MODIFICATIONS[codesList[i]].left;
|
|
172
|
+
} else {
|
|
173
|
+
if (links.includes(codesList[i]) && i > 1 && !includesStandardLinkAlready.includes(codesList[i - 1]))
|
|
174
|
+
smiles = smiles.slice(0, smiles.length - stadardPhosphateLinkSmiles.length + 1);
|
|
175
|
+
else if (links.includes(codesList[i]) ||
|
|
176
|
+
includesStandardLinkAlready.includes(codesList[i]) ||
|
|
177
|
+
(i < codesList.length - 1 && (links.includes(codesList[i + 1]) || dropdowns.includes(codesList[i + 1])))
|
|
178
|
+
)
|
|
179
|
+
smiles += obj[codesList[i]];
|
|
180
|
+
else
|
|
181
|
+
smiles += obj[codesList[i]] + stadardPhosphateLinkSmiles;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
smiles = smiles.replace(/OO/g, 'O');
|
|
185
|
+
return (
|
|
186
|
+
(
|
|
187
|
+
links.includes(codesList[codesList.length - 1]) &&
|
|
188
|
+
codesList.length > 1 &&
|
|
189
|
+
!includesStandardLinkAlready.includes(codesList[codesList.length - 2])
|
|
190
|
+
) ||
|
|
191
|
+
dropdowns.includes(codesList[codesList.length - 1]) ||
|
|
192
|
+
includesStandardLinkAlready.includes(codesList[codesList.length - 1])
|
|
193
|
+
) ?
|
|
194
|
+
smiles :
|
|
195
|
+
smiles.slice(0, smiles.length - stadardPhosphateLinkSmiles.length + 1);
|
|
49
196
|
}
|
|
50
197
|
|
|
51
198
|
//name: Sequence Translator
|
|
@@ -57,71 +204,60 @@ export function sequenceTranslator() {
|
|
|
57
204
|
windows.showToolbox = false;
|
|
58
205
|
windows.showHelp = false;
|
|
59
206
|
|
|
60
|
-
function
|
|
207
|
+
function updateTableAndMolecule(sequence: string) {
|
|
61
208
|
moleculeSvgDiv.innerHTML = "";
|
|
62
209
|
outputTableDiv.innerHTML = "";
|
|
63
|
-
let
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
let flavor: string = (outputSequenceObj.Nucleotides.includes('U')) ? "RNA_both_caps" : "DNA_both_caps";
|
|
81
|
-
(async () => {
|
|
82
|
-
let smiles = (/^[ATGCU]{6,}$/.test(inputSequenceField.value.replace(/\s/g, ''))) ?
|
|
83
|
-
await nucleotidesToSmiles(outputSequenceObj.Nucleotides, flavor) :
|
|
84
|
-
modifiedToSmiles(inputSequenceField.value.replace(/\s/g, ''));
|
|
85
|
-
smiles = smiles.replace(/@/g, ''); // Remove StereoChemistry on the Nucleic acid chain and remove the Chiral label
|
|
86
|
-
moleculeSvgDiv.append(grok.chem.svgMol(smiles, 900, 300));
|
|
87
|
-
})();
|
|
88
|
-
} finally {
|
|
89
|
-
pi.close();
|
|
210
|
+
let pi = DG.TaskBarProgressIndicator.create('Rendering table and molecule...');
|
|
211
|
+
try {
|
|
212
|
+
let outputSequenceObj = convertSequence(sequence);
|
|
213
|
+
let tableRows = [];
|
|
214
|
+
for (let key of Object.keys(outputSequenceObj).slice(1)) {
|
|
215
|
+
tableRows.push({
|
|
216
|
+
'key': key,
|
|
217
|
+
'value': ("indexOfFirstNotValidCharacter" in outputSequenceObj) ?
|
|
218
|
+
ui.divH([
|
|
219
|
+
ui.divText(sequence.slice(0, JSON.parse(outputSequenceObj.indexOfFirstNotValidCharacter!).indexOfFirstNotValidCharacter), {style: {color: "grey"}}),
|
|
220
|
+
ui.tooltip.bind(
|
|
221
|
+
ui.divText(sequence.slice(JSON.parse(outputSequenceObj.indexOfFirstNotValidCharacter!).indexOfFirstNotValidCharacter), {style: {color: "red"}}),
|
|
222
|
+
"Expected format: " + JSON.parse(outputSequenceObj.indexOfFirstNotValidCharacter!).expectedType + ". Press 'SHOW CODES' button to see tables with valid codes"
|
|
223
|
+
)
|
|
224
|
+
]) : //@ts-ignore
|
|
225
|
+
ui.link(outputSequenceObj[key], () => navigator.clipboard.writeText(outputSequenceObj[key]).then(() => grok.shell.info(sequenceWasCopied)), tooltipSequence, '')
|
|
226
|
+
})
|
|
90
227
|
}
|
|
228
|
+
outputTableDiv.append(
|
|
229
|
+
ui.div([DG.HtmlTable.create(tableRows, (item: { key: string; value: string; }) => [item.key, item.value], ['Code', 'Sequence']).root], 'table')
|
|
230
|
+
);
|
|
231
|
+
semTypeOfInputSequence.textContent = 'Detected input type: ' + outputSequenceObj.type;
|
|
232
|
+
|
|
233
|
+
let width = $(window).width();
|
|
234
|
+
const canvas = ui.canvas(width, Math.round(width / 2));
|
|
235
|
+
let smiles = sequenceToSmiles(inputSequenceField.value.replace(/\s/g, ''));
|
|
236
|
+
// @ts-ignore
|
|
237
|
+
OCL.StructureView.drawMolecule(canvas, OCL.Molecule.fromSmiles(smiles), { suppressChiralText: true });
|
|
238
|
+
if (outputSequenceObj.type != undefinedInputSequence)
|
|
239
|
+
moleculeSvgDiv.append(canvas);
|
|
240
|
+
} finally {
|
|
241
|
+
pi.close();
|
|
91
242
|
}
|
|
92
243
|
}
|
|
93
244
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
ui.divText("Paste sequence into the text field below"),
|
|
97
|
-
ui.divText('\n How to convert many sequences:',{style:{'font-weight':'bolder'}}),
|
|
98
|
-
ui.divText("1. Drag & drop an Excel or CSV file with sequences into Datagrok. The platform will automatically detect columns with sequences"),
|
|
99
|
-
ui.divText('2. Right-click on the column header, then see the \'Convert\' menu'),
|
|
100
|
-
ui.divText("This will add the result column to the right of the table"),
|
|
101
|
-
], 'Convert oligonucleotide sequences between Nucleotides, BioSpring, Axolabs, and GCRS representations.'
|
|
102
|
-
);
|
|
103
|
-
|
|
104
|
-
let inputSequenceField = ui.textInput("", defaultInput, (sequence: string) => updateTableAndSVG(sequence));
|
|
105
|
-
let outputSequenceObj = convertSequence(defaultInput);
|
|
106
|
-
let semTypeOfInputSequence = ui.divText('Detected input type: ' + outputSequenceObj.type);
|
|
107
|
-
|
|
108
|
-
let tableRows = [];
|
|
109
|
-
for (let key of Object.keys(outputSequenceObj).slice(1)) {
|
|
110
|
-
//@ts-ignore
|
|
111
|
-
tableRows.push({'key': key, 'value': ui.link(outputSequenceObj[key], () => navigator.clipboard.writeText(outputSequenceObj[key]).then(() => grok.shell.info(sequenceWasCopied)), tooltipSequence, '')})
|
|
112
|
-
}
|
|
245
|
+
let semTypeOfInputSequence = ui.divText('');
|
|
246
|
+
let moleculeSvgDiv = ui.block([]);
|
|
113
247
|
let outputTableDiv = ui.div([], 'table');
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
);
|
|
248
|
+
let inputSequenceField = ui.textInput("", defaultInput, (sequence: string) => updateTableAndMolecule(sequence));
|
|
249
|
+
updateTableAndMolecule(defaultInput);
|
|
117
250
|
|
|
118
|
-
let
|
|
251
|
+
let tablesWithCodes = ui.divV([
|
|
252
|
+
DG.HtmlTable.create(Object.keys(MODIFICATIONS), (item: string) => [item], ['Overhang modification']).root,
|
|
253
|
+
ui.div([], {style: {height: '30px'}})
|
|
254
|
+
]);
|
|
119
255
|
for (let synthesizer of Object.keys(map)) {
|
|
120
256
|
for (let technology of Object.keys(map[synthesizer])) {
|
|
121
257
|
let tableRows = [];
|
|
122
258
|
for (let [key, value] of Object.entries(map[synthesizer][technology]))
|
|
123
259
|
tableRows.push({'name': value.name, 'code': key});
|
|
124
|
-
|
|
260
|
+
tablesWithCodes.append(
|
|
125
261
|
DG.HtmlTable.create(
|
|
126
262
|
tableRows,
|
|
127
263
|
(item: {name: string; code: string;}) => [item['name'], item['code']],
|
|
@@ -131,29 +267,31 @@ export function sequenceTranslator() {
|
|
|
131
267
|
);
|
|
132
268
|
}
|
|
133
269
|
}
|
|
134
|
-
|
|
135
|
-
let
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
let
|
|
144
|
-
flavor = outputSequenceObj.Nucleotides.includes('U') ? "RNA_both_caps" : "DNA_both_caps";
|
|
145
|
-
let smiles = (/^[ATGCU]{6,}$/.test(inputSequenceField.value.replace(/\s/g, ''))) ?
|
|
146
|
-
await nucleotidesToSmiles(outputSequenceObj.Nucleotides, flavor) :
|
|
147
|
-
modifiedToSmiles(inputSequenceField.value.replace(/\s/g, ''));
|
|
148
|
-
smiles = smiles.replace(/@/g, ''); // Remove StereoChemistry on the Nucleic acid chain and remove the Chiral label
|
|
149
|
-
let mol = OCL.Molecule.fromSmiles(smiles);
|
|
150
|
-
let result = `${mol.toMolfile()}\n`;// + '$$$$';
|
|
151
|
-
var element = document.createElement('a');
|
|
270
|
+
let showCodesButton = ui.button('SHOW CODES', () => ui.dialog('Codes').add(tablesWithCodes).show());
|
|
271
|
+
let copySmiles = ui.button(
|
|
272
|
+
'COPY SMILES',
|
|
273
|
+
() => navigator.clipboard.writeText(sequenceToSmiles(inputSequenceField.value.replace(/\s/g, '')))
|
|
274
|
+
.then(() => grok.shell.info(sequenceWasCopied))
|
|
275
|
+
);
|
|
276
|
+
let saveMolFileButton = ui.bigButton('SAVE MOL FILE', () => {
|
|
277
|
+
let smiles = sequenceToSmiles(inputSequenceField.value.replace(/\s/g, ''));
|
|
278
|
+
let result = `${OCL.Molecule.fromSmiles(smiles).toMolfile()}\n`;
|
|
279
|
+
let element = document.createElement('a');
|
|
152
280
|
element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(result));
|
|
153
281
|
element.setAttribute('download', inputSequenceField.value.replace(/\s/g, '') + '.mol');
|
|
154
282
|
element.click();
|
|
155
283
|
});
|
|
156
284
|
|
|
285
|
+
const appMainDescription = ui.info([
|
|
286
|
+
ui.divText('\n How to convert one sequence:',{style:{'font-weight':'bolder'}}),
|
|
287
|
+
ui.divText("Paste sequence into the text field below"),
|
|
288
|
+
ui.divText('\n How to convert many sequences:',{style:{'font-weight':'bolder'}}),
|
|
289
|
+
ui.divText("1. Drag & drop an Excel or CSV file with sequences into Datagrok. The platform will automatically detect columns with sequences"),
|
|
290
|
+
ui.divText('2. Right-click on the column header, then see the \'Convert\' menu'),
|
|
291
|
+
ui.divText("This will add the result column to the right of the table"),
|
|
292
|
+
], 'Convert oligonucleotide sequences between Nucleotides, BioSpring, Axolabs, Mermade 12 and GCRS representations.'
|
|
293
|
+
);
|
|
294
|
+
|
|
157
295
|
let v = grok.shell.newView('Sequence Translator', [
|
|
158
296
|
ui.tabControl({
|
|
159
297
|
'MAIN': ui.div([
|
|
@@ -171,7 +309,7 @@ export function sequenceTranslator() {
|
|
|
171
309
|
outputTableDiv
|
|
172
310
|
]),
|
|
173
311
|
moleculeSvgDiv,
|
|
174
|
-
ui.divH([saveMolFileButton, showCodesButton])
|
|
312
|
+
ui.divH([saveMolFileButton, showCodesButton, copySmiles])
|
|
175
313
|
], 'sequence')
|
|
176
314
|
]),
|
|
177
315
|
'AXOLABS': defineAxolabsPattern()
|
|
@@ -192,104 +330,83 @@ export function sequenceTranslator() {
|
|
|
192
330
|
.css('width','100%');
|
|
193
331
|
}
|
|
194
332
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
export function isDnaNucleotidesCode(sequence: string) {return /^[ATGC]{6,}$/.test(sequence);}
|
|
203
|
-
export function isRnaNucleotidesCode(sequence: string) {return /^[AUGC]{6,}$/.test(sequence);}
|
|
204
|
-
export function isAsoGapmerBioSpringCode(sequence: string) {return /^[*56789ATGC]{6,}$/.test(sequence);}
|
|
205
|
-
export function isAsoGapmerGcrsCode(sequence: string) {return /^(?=.*moe)(?=.*5mC)(?=.*ps){6,}/.test(sequence);}
|
|
206
|
-
export function isSiRnaBioSpringCode(sequence: string) {return /^[*1-8]{6,}$/.test(sequence);}
|
|
207
|
-
export function isSiRnaAxolabsCode(sequence: string) {return /^[fsACGUacgu]{6,}$/.test(sequence);}
|
|
208
|
-
export function isSiRnaGcrsCode(sequence: string) {return /^[fmpsACGU]{6,}$/.test(sequence);}
|
|
209
|
-
export function isGcrsCode(sequence: string) {return /^[fmpsACGU]{6,}$/.test(sequence);}
|
|
210
|
-
export function isMM12Code(sequence: string) {return /^[IiJjKkLlEeFfGgHhQq]{6,}$/.test(sequence);}
|
|
211
|
-
|
|
212
|
-
function convertSequence(seq: string) {
|
|
213
|
-
seq = seq.replace(/\s/g, '');
|
|
214
|
-
if (seq.length < minimalValidNumberOfCharacters)
|
|
333
|
+
function convertSequence(text: string) {
|
|
334
|
+
text = text.replace(/\s/g, '');
|
|
335
|
+
let seq = text;
|
|
336
|
+
let output = isValidSequence(seq);
|
|
337
|
+
if (output.indexOfFirstNotValidCharacter != -1)
|
|
215
338
|
return {
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
BioSpring: smallNumberOfCharacters,
|
|
219
|
-
Axolabs: smallNumberOfCharacters,
|
|
220
|
-
GCRS: smallNumberOfCharacters
|
|
339
|
+
indexOfFirstNotValidCharacter: JSON.stringify(output),
|
|
340
|
+
Error: undefinedInputSequence
|
|
221
341
|
};
|
|
222
|
-
if (
|
|
342
|
+
if (output.expectedType == SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.DNA)
|
|
223
343
|
return {
|
|
224
|
-
type:
|
|
344
|
+
type: SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.DNA,
|
|
225
345
|
Nucleotides: seq,
|
|
226
346
|
BioSpring: asoGapmersNucleotidesToBioSpring(seq),
|
|
227
|
-
Axolabs: noTranslationTableAvailable,
|
|
228
347
|
GCRS: asoGapmersNucleotidesToGcrs(seq)
|
|
229
348
|
};
|
|
230
|
-
if (
|
|
349
|
+
if (output.expectedType == SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.ASO_GAPMERS)
|
|
231
350
|
return {
|
|
232
|
-
type:
|
|
351
|
+
type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.ASO_GAPMERS,
|
|
233
352
|
Nucleotides: asoGapmersBioSpringToNucleotides(seq),
|
|
234
353
|
BioSpring: seq,
|
|
235
|
-
Axolabs: noTranslationTableAvailable,
|
|
236
354
|
GCRS: asoGapmersBioSpringToGcrs(seq)
|
|
237
355
|
};
|
|
238
|
-
if (
|
|
356
|
+
if (output.expectedType == SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.ASO_GAPMERS)
|
|
239
357
|
return {
|
|
240
|
-
type:
|
|
358
|
+
type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.ASO_GAPMERS,
|
|
241
359
|
Nucleotides: asoGapmersGcrsToNucleotides(seq),
|
|
242
360
|
BioSpring: asoGapmersGcrsToBioSpring(seq),
|
|
243
|
-
|
|
244
|
-
MM12: gcrsToMM12(seq),
|
|
361
|
+
Mermade12: gcrsToMermade12(seq),
|
|
245
362
|
GCRS: seq
|
|
246
363
|
};
|
|
247
|
-
if (
|
|
364
|
+
if (output.expectedType == SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.RNA)
|
|
248
365
|
return {
|
|
249
|
-
type:
|
|
366
|
+
type: SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.RNA,
|
|
250
367
|
Nucleotides: seq,
|
|
251
368
|
BioSpring: siRnaNucleotideToBioSpringSenseStrand(seq),
|
|
252
369
|
Axolabs: siRnaNucleotideToAxolabsSenseStrand(seq),
|
|
253
370
|
GCRS: siRnaNucleotidesToGcrs(seq)
|
|
254
371
|
};
|
|
255
|
-
if (
|
|
372
|
+
if (output.expectedType == SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.SI_RNA)
|
|
256
373
|
return {
|
|
257
|
-
type:
|
|
374
|
+
type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.SI_RNA,
|
|
258
375
|
Nucleotides: siRnaBioSpringToNucleotides(seq),
|
|
259
376
|
BioSpring: seq,
|
|
260
377
|
Axolabs: siRnaBioSpringToAxolabs(seq),
|
|
261
378
|
GCRS: siRnaBioSpringToGcrs(seq)
|
|
262
379
|
};
|
|
263
|
-
if (
|
|
380
|
+
if (output.expectedType == SYNTHESIZERS.AXOLABS + ' ' + TECHNOLOGIES.SI_RNA)
|
|
264
381
|
return {
|
|
265
|
-
type:
|
|
382
|
+
type: SYNTHESIZERS.AXOLABS + ' ' + TECHNOLOGIES.SI_RNA,
|
|
266
383
|
Nucleotides: siRnaAxolabsToNucleotides(seq),
|
|
267
384
|
BioSpring: siRnaAxolabsToBioSpring(seq),
|
|
268
385
|
Axolabs: seq,
|
|
269
386
|
GCRS: siRnaAxolabsToGcrs(seq)
|
|
270
387
|
};
|
|
271
|
-
if (
|
|
388
|
+
if (output.expectedType == SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.SI_RNA)
|
|
272
389
|
return {
|
|
273
|
-
type:
|
|
390
|
+
type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.SI_RNA,
|
|
274
391
|
Nucleotides: siRnaGcrsToNucleotides(seq),
|
|
275
392
|
BioSpring: siRnaGcrsToBioSpring(seq),
|
|
276
393
|
Axolabs: siRnaGcrsToAxolabs(seq),
|
|
277
|
-
MM12:
|
|
394
|
+
MM12: gcrsToMermade12(seq),
|
|
278
395
|
GCRS: seq
|
|
279
396
|
};
|
|
280
|
-
if (
|
|
397
|
+
if (output.expectedType == SYNTHESIZERS.GCRS)
|
|
281
398
|
return {
|
|
282
|
-
type:
|
|
399
|
+
type: SYNTHESIZERS.GCRS,
|
|
283
400
|
Nucleotides: gcrsToNucleotides(seq),
|
|
284
401
|
GCRS: seq,
|
|
285
|
-
|
|
402
|
+
Mermade12: gcrsToMermade12(seq)
|
|
286
403
|
}
|
|
287
|
-
if (
|
|
404
|
+
if (output.expectedType == SYNTHESIZERS.MERMADE_12)
|
|
288
405
|
return {
|
|
289
|
-
type:
|
|
406
|
+
type: SYNTHESIZERS.MERMADE_12,
|
|
290
407
|
Nucleotides: noTranslationTableAvailable,
|
|
291
408
|
GCRS: noTranslationTableAvailable,
|
|
292
|
-
|
|
409
|
+
Mermade12: seq
|
|
293
410
|
};
|
|
294
411
|
return {
|
|
295
412
|
type: undefinedInputSequence,
|
|
@@ -302,12 +419,12 @@ function convertSequence(seq: string) {
|
|
|
302
419
|
//output: string result {semType: BioSpring / Gapmers}
|
|
303
420
|
export function asoGapmersNucleotidesToBioSpring(nucleotides: string) {
|
|
304
421
|
let count: number = -1;
|
|
305
|
-
const objForEdges: {[index: string]: string} = {"T": "5*", "A": "6*", "C": "7*", "G": "8*"};
|
|
306
|
-
const objForCenter: {[index: string]: string} = {"
|
|
307
|
-
return nucleotides.replace(/
|
|
422
|
+
const objForEdges: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "T": "5*", "A": "6*", "C": "7*", "G": "8*"};
|
|
423
|
+
const objForCenter: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "T": "T*", "A": "A*", "C": "9*", "G": "G*"};
|
|
424
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|T|C|G)/g, function (x: string) {
|
|
308
425
|
count++;
|
|
309
426
|
return (count > 4 && count < 15) ? objForCenter[x] : objForEdges[x];
|
|
310
|
-
}).slice(0, 2 * count + 1);
|
|
427
|
+
}).slice(0, (nucleotides.endsWith("(invabasic)") || nucleotides.endsWith("(GalNAc-2-JNJ)")) ? nucleotides.length : 2 * count + 1);
|
|
311
428
|
}
|
|
312
429
|
|
|
313
430
|
//name: asoGapmersNucleotidesToGcrs
|
|
@@ -315,22 +432,22 @@ export function asoGapmersNucleotidesToBioSpring(nucleotides: string) {
|
|
|
315
432
|
//output: string result {semType: GCRS / Gapmers}
|
|
316
433
|
export function asoGapmersNucleotidesToGcrs(nucleotides: string) {
|
|
317
434
|
let count: number = -1;
|
|
318
|
-
const objForEdges: {[index: string]: string} = {"T": "moeUnps", "A": "moeAnps", "C": "moe5mCnps", "G": "moeGnps"};
|
|
319
|
-
const objForCenter: {[index: string]: string} = {"C": "5mCps", "A": "Aps", "T": "Tps", "G": "Gps"};
|
|
320
|
-
return nucleotides.replace(/
|
|
435
|
+
const objForEdges: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "T": "moeUnps", "A": "moeAnps", "C": "moe5mCnps", "G": "moeGnps"};
|
|
436
|
+
const objForCenter: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "C": "5mCps", "A": "Aps", "T": "Tps", "G": "Gps"};
|
|
437
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|T|C|G)/g, function (x: string) {
|
|
321
438
|
count++;
|
|
322
439
|
if (count < 5) return (count == 4) ? objForEdges[x].slice(0, -3) + 'ps' : objForEdges[x];
|
|
323
440
|
if (count < 15) return (count == 14) ? objForCenter[x].slice(0, -2) + 'nps' : objForCenter[x];
|
|
324
441
|
return objForEdges[x];
|
|
325
|
-
}).slice(0, -3);
|
|
442
|
+
}).slice(0, (nucleotides.endsWith("(invabasic)") || nucleotides.endsWith("(GalNAc-2-JNJ)")) ? nucleotides.length : -3);
|
|
326
443
|
}
|
|
327
444
|
|
|
328
445
|
//name: asoGapmersBioSpringToNucleotides
|
|
329
446
|
//input: string nucleotides {semType: BioSpring / Gapmers}
|
|
330
447
|
//output: string result {semType: DNA nucleotides}
|
|
331
448
|
export function asoGapmersBioSpringToNucleotides(nucleotides: string) {
|
|
332
|
-
const obj: {[index: string]: string} = {"*": "", "5": "T", "6": "A", "7": "C", "8": "G", "9": "C"};
|
|
333
|
-
return nucleotides.replace(/
|
|
449
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "*": "", "5": "T", "6": "A", "7": "C", "8": "G", "9": "C"};
|
|
450
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|\*|5|6|7|8|9)/g, function (x: string) {return obj[x];});
|
|
334
451
|
}
|
|
335
452
|
|
|
336
453
|
//name: asoGapmersBioSpringToGcrs
|
|
@@ -338,11 +455,11 @@ export function asoGapmersBioSpringToNucleotides(nucleotides: string) {
|
|
|
338
455
|
//output: string result {semType: GCRS / Gapmers}
|
|
339
456
|
export function asoGapmersBioSpringToGcrs(nucleotides: string) {
|
|
340
457
|
let count: number = -1;
|
|
341
|
-
const obj: {[index: string]: string} = {
|
|
458
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
|
|
342
459
|
"5*": "moeUnps", "6*": "moeAnps", "7*": "moe5mCnps", "8*": "moeGnps", "9*": "5mCps", "A*": "Aps", "T*": "Tps",
|
|
343
460
|
"G*": "Gps", "C*": "Cps", "5": "moeU", "6": "moeA", "7": "moe5mC", "8": "moeG"
|
|
344
461
|
};
|
|
345
|
-
return nucleotides.replace(/(5\*|6\*|7\*|8\*|9\*|A\*|T\*|G\*|C\*|5|6|7|8)/g, function (x: string) {
|
|
462
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|5\*|6\*|7\*|8\*|9\*|A\*|T\*|G\*|C\*|5|6|7|8)/g, function (x: string) {
|
|
346
463
|
count++;
|
|
347
464
|
return (count == 4) ? obj[x].slice(0, -3) + 'ps' : (count == 14) ? obj[x].slice(0, -2) + 'nps' : obj[x];
|
|
348
465
|
});
|
|
@@ -352,102 +469,102 @@ export function asoGapmersBioSpringToGcrs(nucleotides: string) {
|
|
|
352
469
|
//input: string nucleotides {semType: GCRS / Gapmers}
|
|
353
470
|
//output: string result {semType: BioSpring / Gapmers}
|
|
354
471
|
export function asoGapmersGcrsToBioSpring(nucleotides: string) {
|
|
355
|
-
const obj: {[index: string]: string} = {
|
|
472
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
|
|
356
473
|
"moeT": "5", "moeA": "6", "moe5mC": "7", "moeG": "8", "moeU": "5", "5mC": "9", "nps": "*", "ps": "*", "U": "T"
|
|
357
474
|
};
|
|
358
|
-
return nucleotides.replace(/(moeT|moeA|moe5mC|moeG|moeU|5mC|nps|ps|U)/g, function (x: string) {return obj[x];});
|
|
475
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|moeT|moeA|moe5mC|moeG|moeU|5mC|nps|ps|U)/g, function (x: string) {return obj[x];});
|
|
359
476
|
}
|
|
360
477
|
|
|
361
478
|
//name: asoGapmersGcrsToNucleotides
|
|
362
479
|
//input: string nucleotides {semType: GCRS / Gapmers}
|
|
363
480
|
//output: string result {semType: DNA nucleotides}
|
|
364
481
|
export function asoGapmersGcrsToNucleotides(nucleotides: string) {
|
|
365
|
-
const obj: {[index: string]: string} = {"moe": "", "5m": "", "n": "", "ps": "", "U": "T"};
|
|
366
|
-
return nucleotides.replace(/(moe|5m|n|ps|U)/g, function (x: string) {return obj[x];});
|
|
482
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "moe": "", "5m": "", "n": "", "ps": "", "U": "T"};
|
|
483
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|moe|5m|n|ps|U)/g, function (x: string) {return obj[x];});
|
|
367
484
|
}
|
|
368
485
|
|
|
369
486
|
//name: siRnaBioSpringToNucleotides
|
|
370
487
|
//input: string nucleotides {semType: BioSpring / siRNA}
|
|
371
488
|
//output: string result {semType: RNA nucleotides}
|
|
372
489
|
export function siRnaBioSpringToNucleotides(nucleotides: string) {
|
|
373
|
-
const obj: {[index: string]: string} = {"1": "U", "2": "A", "3": "C", "4": "G", "5": "U", "6": "A", "7": "C", "8": "G", "*": ""};
|
|
374
|
-
return nucleotides.replace(/
|
|
490
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "1": "U", "2": "A", "3": "C", "4": "G", "5": "U", "6": "A", "7": "C", "8": "G", "*": ""};
|
|
491
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|1|2|3|4|5|6|7|8|\*)/g, function (x: string) {return obj[x];});
|
|
375
492
|
}
|
|
376
493
|
|
|
377
494
|
//name: siRnaBioSpringToAxolabs
|
|
378
495
|
//input: string nucleotides {semType: BioSpring / siRNA}
|
|
379
496
|
//output: string result {semType: Axolabs / siRNA}
|
|
380
497
|
export function siRnaBioSpringToAxolabs(nucleotides: string) {
|
|
381
|
-
const obj: {[index: string]: string} = {"1": "Uf", "2": "Af", "3": "Cf", "4": "Gf", "5": "u", "6": "a", "7": "c", "8": "g", "*": "s"};
|
|
382
|
-
return nucleotides.replace(/
|
|
498
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "1": "Uf", "2": "Af", "3": "Cf", "4": "Gf", "5": "u", "6": "a", "7": "c", "8": "g", "*": "s"};
|
|
499
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|1|2|3|4|5|6|7|8|\*)/g, function (x: string) {return obj[x];});
|
|
383
500
|
}
|
|
384
501
|
|
|
385
502
|
//name: siRnaBioSpringToGcrs
|
|
386
503
|
//input: string nucleotides {semType: BioSpring / siRNA}
|
|
387
504
|
//output: string result {semType: GCRS}
|
|
388
505
|
export function siRnaBioSpringToGcrs(nucleotides: string) {
|
|
389
|
-
const obj: {[index: string]: string} = {"1": "fU", "2": "fA", "3": "fC", "4": "fG", "5": "mU", "6": "mA", "7": "mC", "8": "mG", "*": "ps"};
|
|
390
|
-
return nucleotides.replace(/
|
|
506
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "1": "fU", "2": "fA", "3": "fC", "4": "fG", "5": "mU", "6": "mA", "7": "mC", "8": "mG", "*": "ps"};
|
|
507
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|1|2|3|4|5|6|7|8|\*)/g, function (x: string) {return obj[x];});
|
|
391
508
|
}
|
|
392
509
|
|
|
393
510
|
//name: siRnaAxolabsToGcrs
|
|
394
511
|
//input: string nucleotides {semType: Axolabs / siRNA}
|
|
395
512
|
//output: string result {semType: GCRS}
|
|
396
513
|
export function siRnaAxolabsToGcrs(nucleotides: string) {
|
|
397
|
-
const obj: {[index: string]: string} = {
|
|
514
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
|
|
398
515
|
"Uf": "fU", "Af": "fA", "Cf": "fC", "Gf": "fG", "u": "mU", "a": "mA", "c": "mC", "g": "mG", "s": "ps"
|
|
399
516
|
};
|
|
400
|
-
return nucleotides.replace(/(Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
|
|
517
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
|
|
401
518
|
}
|
|
402
519
|
|
|
403
520
|
//name: siRnaAxolabsToBioSpring
|
|
404
521
|
//input: string nucleotides {semType: Axolabs / siRNA}
|
|
405
522
|
//output: string result {semType: BioSpring / siRNA}
|
|
406
523
|
export function siRnaAxolabsToBioSpring(nucleotides: string) {
|
|
407
|
-
const obj: {[index: string]: string} = {
|
|
524
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
|
|
408
525
|
"Uf": "1", "Af": "2", "Cf": "3", "Gf": "4", "u": "5", "a": "6", "c": "7", "g": "8", "s": "*"
|
|
409
526
|
};
|
|
410
|
-
return nucleotides.replace(/(Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
|
|
527
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
|
|
411
528
|
}
|
|
412
529
|
|
|
413
530
|
//name: siRnaAxolabsToNucleotides
|
|
414
531
|
//input: string nucleotides {semType: Axolabs / siRNA}
|
|
415
532
|
//output: string result {semType: RNA nucleotides}
|
|
416
533
|
export function siRnaAxolabsToNucleotides(nucleotides: string) {
|
|
417
|
-
const obj: {[index: string]: string} = {
|
|
534
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
|
|
418
535
|
"Uf": "U", "Af": "A", "Cf": "C", "Gf": "G", "u": "U", "a": "A", "c": "C", "g": "G", "s": ""
|
|
419
536
|
};
|
|
420
|
-
return nucleotides.replace(/(Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
|
|
537
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
|
|
421
538
|
}
|
|
422
539
|
|
|
423
540
|
//name: siRnaGcrsToNucleotides
|
|
424
541
|
//input: string nucleotides {semType: GCRS}
|
|
425
542
|
//output: string result {semType: RNA nucleotides}
|
|
426
543
|
export function siRnaGcrsToNucleotides(nucleotides: string) {
|
|
427
|
-
const obj: {[index: string]: string} = {
|
|
544
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
|
|
428
545
|
"fU": "U", "fA": "A", "fC": "C", "fG": "G", "mU": "U", "mA": "A", "mC": "C", "mG": "G", "ps": ""
|
|
429
546
|
};
|
|
430
|
-
return nucleotides.replace(/(fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
|
|
547
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
|
|
431
548
|
}
|
|
432
549
|
|
|
433
550
|
//name: siRnaGcrsToBioSpring
|
|
434
551
|
//input: string nucleotides {semType: GCRS}
|
|
435
552
|
//output: string result {semType: BioSpring / siRNA}
|
|
436
553
|
export function siRnaGcrsToBioSpring(nucleotides: string) {
|
|
437
|
-
const obj: {[index: string]: string} = {
|
|
554
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
|
|
438
555
|
"fU": "1", "fA": "2", "fC": "3", "fG": "4", "mU": "5", "mA": "6", "mC": "7", "mG": "8", "ps": "*"
|
|
439
556
|
};
|
|
440
|
-
return nucleotides.replace(/(fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
|
|
557
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
|
|
441
558
|
}
|
|
442
559
|
|
|
443
560
|
//name: siRnaGcrsToAxolabs
|
|
444
561
|
//input: string nucleotides {semType: GCRS}
|
|
445
562
|
//output: string result {semType: Axolabs / siRNA}
|
|
446
563
|
export function siRnaGcrsToAxolabs(nucleotides: string) {
|
|
447
|
-
const obj: {[index: string]: string} = {
|
|
564
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
|
|
448
565
|
"fU": "Uf", "fA": "Af", "fC": "Cf", "fG": "Gf", "mU": "u", "mA": "a", "mC": "c", "mG": "g", "ps": "s"
|
|
449
566
|
};
|
|
450
|
-
return nucleotides.replace(/(fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
|
|
567
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
|
|
451
568
|
}
|
|
452
569
|
|
|
453
570
|
//name: siRnaNucleotideToBioSpringSenseStrand
|
|
@@ -455,11 +572,11 @@ export function siRnaGcrsToAxolabs(nucleotides: string) {
|
|
|
455
572
|
//output: string result {semType: BioSpring / siRNA}
|
|
456
573
|
export function siRnaNucleotideToBioSpringSenseStrand(nucleotides: string) {
|
|
457
574
|
let count: number = -1;
|
|
458
|
-
const objForLeftEdge: {[index: string]: string} = {"A": "6*", "U": "5*", "G": "8*", "C": "7*"};
|
|
459
|
-
const objForRightEdge: {[index: string]: string} = {"A": "*6", "U": "*5", "G": "*8", "C": "*7"};
|
|
460
|
-
const objForOddIndices: {[index: string]: string} = {"A": "6", "U": "5", "G": "8", "C": "7"};
|
|
461
|
-
const objForEvenIndices: {[index: string]: string} = {"A": "2", "U": "1", "G": "4", "C": "3"};
|
|
462
|
-
return nucleotides.replace(/
|
|
575
|
+
const objForLeftEdge: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "6*", "U": "5*", "G": "8*", "C": "7*"};
|
|
576
|
+
const objForRightEdge: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "*6", "U": "*5", "G": "*8", "C": "*7"};
|
|
577
|
+
const objForOddIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "6", "U": "5", "G": "8", "C": "7"};
|
|
578
|
+
const objForEvenIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "2", "U": "1", "G": "4", "C": "3"};
|
|
579
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function (x: string) {
|
|
463
580
|
count++;
|
|
464
581
|
if (count < 2) return objForLeftEdge[x];
|
|
465
582
|
if (count > nucleotides.length - 3) return objForRightEdge[x];
|
|
@@ -472,11 +589,11 @@ export function siRnaNucleotideToBioSpringSenseStrand(nucleotides: string) {
|
|
|
472
589
|
//output: string result {semType: GCRS}
|
|
473
590
|
export function siRnaNucleotidesToGcrs(nucleotides: string) {
|
|
474
591
|
let count: number = -1;
|
|
475
|
-
const objForLeftEdge: {[index: string]: string} = {"A": "mAps", "U": "mUps", "G": "mGps", "C": "mCps"};
|
|
476
|
-
const objForRightEdge: {[index: string]: string} = {"A": "psmA", "U": "psmU", "G": "psmG", "C": "psmC"};
|
|
477
|
-
const objForEvenIndices: {[index: string]: string} = {"A": "fA", "U": "fU", "G": "fG", "C": "fC"};
|
|
478
|
-
const objForOddIndices: {[index: string]: string} = {"A": "mA", "U": "mU", "G": "mG", "C": "mC"};
|
|
479
|
-
return nucleotides.replace(/
|
|
592
|
+
const objForLeftEdge: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "mAps", "U": "mUps", "G": "mGps", "C": "mCps"};
|
|
593
|
+
const objForRightEdge: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "psmA", "U": "psmU", "G": "psmG", "C": "psmC"};
|
|
594
|
+
const objForEvenIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "fA", "U": "fU", "G": "fG", "C": "fC"};
|
|
595
|
+
const objForOddIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "mA", "U": "mU", "G": "mG", "C": "mC"};
|
|
596
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function (x: string) {
|
|
480
597
|
count++;
|
|
481
598
|
if (count < 2) return objForLeftEdge[x];
|
|
482
599
|
if (count > nucleotides.length - 3) return objForRightEdge[x];
|
|
@@ -489,10 +606,10 @@ export function siRnaNucleotidesToGcrs(nucleotides: string) {
|
|
|
489
606
|
//output: string result {semType: Axolabs}
|
|
490
607
|
export function siRnaNucleotideToAxolabsSenseStrand(nucleotides: string) {
|
|
491
608
|
let count: number = -1;
|
|
492
|
-
const objForLeftEdge: {[index: string]: string} = {"A": "as", "U": "us", "G": "gs", "C": "cs"};
|
|
493
|
-
const objForSomeIndices: {[index: string]: string} = {"A": "Af", "U": "Uf", "G": "Gf", "C": "Cf"};
|
|
494
|
-
const obj: {[index: string]: string} = {"A": "a", "U": "u", "G": "g", "C": "c"};
|
|
495
|
-
return nucleotides.replace(/
|
|
609
|
+
const objForLeftEdge: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "as", "U": "us", "G": "gs", "C": "cs"};
|
|
610
|
+
const objForSomeIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "Af", "U": "Uf", "G": "Gf", "C": "Cf"};
|
|
611
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "a", "U": "u", "G": "g", "C": "c"};
|
|
612
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function (x: string) {
|
|
496
613
|
count++;
|
|
497
614
|
if (count < 2) return objForLeftEdge[x];
|
|
498
615
|
if (count == 6 || (count > 7 && count < 11)) return objForSomeIndices[x]
|
|
@@ -506,11 +623,11 @@ export function siRnaNucleotideToAxolabsSenseStrand(nucleotides: string) {
|
|
|
506
623
|
//output: string result {semType: Axolabs}
|
|
507
624
|
export function siRnaNucleotideToAxolabsAntisenseStrand(nucleotides: string) {
|
|
508
625
|
let count: number = -1;
|
|
509
|
-
const objForSmallLinkages: {[index: string]: string} = {"A": "as", "U": "us", "G": "gs", "C": "cs"};
|
|
510
|
-
const objForBigLinkages: {[index: string]: string} = {"A": "Afs", "U": "Ufs", "G": "Gfs", "C": "Cfs"};
|
|
511
|
-
const objForSomeIndices: {[index: string]: string} = {"A": "Af", "U": "Uf", "G": "Gf", "C": "Cf"};
|
|
512
|
-
const obj: {[index: string]: string} = {"A": "a", "U": "u", "G": "g", "C": "c"};
|
|
513
|
-
return nucleotides.replace(/
|
|
626
|
+
const objForSmallLinkages: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "as", "U": "us", "G": "gs", "C": "cs"};
|
|
627
|
+
const objForBigLinkages: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "Afs", "U": "Ufs", "G": "Gfs", "C": "Cfs"};
|
|
628
|
+
const objForSomeIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "Af", "U": "Uf", "G": "Gf", "C": "Cf"};
|
|
629
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "a", "U": "u", "G": "g", "C": "c"};
|
|
630
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function (x: string) {
|
|
514
631
|
count++;
|
|
515
632
|
if (count > 19 && count < 22) return objForSmallLinkages[x];
|
|
516
633
|
if (count == 0) return 'us';
|
|
@@ -523,48 +640,20 @@ export function siRnaNucleotideToAxolabsAntisenseStrand(nucleotides: string) {
|
|
|
523
640
|
//input: string nucleotides {semType: GCRS}
|
|
524
641
|
//output: string result {semType: RNA nucleotides}
|
|
525
642
|
export function gcrsToNucleotides(nucleotides: string) {
|
|
526
|
-
const obj: {[index: string]: string} = {
|
|
643
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
|
|
527
644
|
"mAps": "A", "mUps": "U", "mGps": "G", "mCps": "C", "fAps": "A", "fUps": "U", "fGps": "G", "fCps": "C",
|
|
528
645
|
"fU": "U", "fA": "A", "fC": "C", "fG": "G", "mU": "U", "mA": "A", "mC": "C", "mG": "G"
|
|
529
646
|
};
|
|
530
|
-
return nucleotides.replace(/(mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g, function (x: string) {return obj[x];});
|
|
531
|
-
}
|
|
532
|
-
|
|
533
|
-
//name: gcrsToOP100
|
|
534
|
-
//input: string nucleotides {semType: GCRS}
|
|
535
|
-
//output: string result {semType: OP100}
|
|
536
|
-
export function gcrsToOP100(nucleotides: string) {
|
|
537
|
-
let count: number = -1;
|
|
538
|
-
const objForEvenIndicesAtLeftEdge: {[index: string]: string} = {
|
|
539
|
-
"mAps": "a", "mUps": "u", "mGps": "g", "mCps": "c", "fAps": "a", "fUps": "u", "fGps": "g", "fCps": "c"
|
|
540
|
-
};
|
|
541
|
-
const objForOddIndicesAtLeftEdge: {[index: string]: string} = {
|
|
542
|
-
"mAps": "a*", "mUps": "u*", "mGps": "g*", "mCps": "c*", "fAps": "a*", "fUps": "u*", "fGps": "g*", "fCps": "c*"
|
|
543
|
-
};
|
|
544
|
-
const objForOddIndicesAtRightEdge: {[index: string]: string} = {
|
|
545
|
-
"mAps": "a", "mUps": "u", "mGps": "g", "mCps": "c", "fAps": "a", "fUps": "u", "fGps": "g", "fCps": "c"
|
|
546
|
-
};
|
|
547
|
-
const objForEvenIndicesAtCenter: {[index: string]: string} = {
|
|
548
|
-
"fU": "u*", "fA": "a*", "fC": "c*", "fG": "g*", "mU": "u*", "mA": "a*", "mC": "c*", "mG": "g*"
|
|
549
|
-
};
|
|
550
|
-
const objForOddIndicesAtCenter: {[index: string]: string} = {
|
|
551
|
-
"fU": "u", "fA": "a", "fC": "c", "fG": "g", "mU": "u", "mA": "a", "mC": "c", "mG": "g"
|
|
552
|
-
};
|
|
553
|
-
return nucleotides.replace(/(mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g, function (x: string) {
|
|
554
|
-
count++;
|
|
555
|
-
if (count < 3) return (count % 2 == 0) ? objForEvenIndicesAtLeftEdge[x] : objForOddIndicesAtLeftEdge[x];
|
|
556
|
-
if (count == 19) return objForOddIndicesAtRightEdge[x];
|
|
557
|
-
return (count % 2 == 1) ? objForEvenIndicesAtCenter[x] : objForOddIndicesAtCenter[x];
|
|
558
|
-
});
|
|
647
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g, function (x: string) {return obj[x];});
|
|
559
648
|
}
|
|
560
649
|
|
|
561
|
-
//name:
|
|
650
|
+
//name: gcrsToMermade12
|
|
562
651
|
//input: string nucleotides {semType: GCRS}
|
|
563
|
-
//output: string result {semType:
|
|
564
|
-
export function
|
|
565
|
-
const obj: {[index: string]: string} = {
|
|
652
|
+
//output: string result {semType: Mermade 12 / siRNA}
|
|
653
|
+
export function gcrsToMermade12(nucleotides: string) {
|
|
654
|
+
const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
|
|
566
655
|
"mAps": "e", "mUps": "h", "mGps": "g", "mCps": "f", "fAps": "i", "fUps": "l", "fGps": "k", "fCps": "j", "fU": "L",
|
|
567
656
|
"fA": "I", "fC": "J", "fG": "K", "mU": "H", "mA": "E", "mC": "F", "mG": "G"
|
|
568
657
|
};
|
|
569
|
-
return nucleotides.replace(/(mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g, function (x: string) {return obj[x]});
|
|
570
|
-
}
|
|
658
|
+
return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g, function (x: string) {return obj[x]});
|
|
659
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import {category, expect, expectFloat, test, testExpectFinish} from "@datagrok-libraries/utils/src/test";
|
|
2
|
+
import * as DG from "datagrok-api/dg";
|
|
3
|
+
import * as grok from "datagrok-api/grok";
|
|
4
|
+
import * as ui from "datagrok-api/ui";
|
|
5
|
+
import {sequenceToSmiles} from '../package'
|
|
6
|
+
|
|
7
|
+
category('sequence-translator', () => {
|
|
8
|
+
|
|
9
|
+
testExpectFinish('ts', async () => {
|
|
10
|
+
let expected = 'OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1O';
|
|
11
|
+
expect(sequenceToSmiles('AGGTCCTCTTGACTTAGGCC'), expected);
|
|
12
|
+
});
|
|
13
|
+
});
|
package/webpack.config.js
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
const path = require('path');
|
|
2
|
+
const packageName = path.parse(require('./package.json').name).name.toLowerCase().replace(/-/g, '');
|
|
2
3
|
|
|
3
4
|
module.exports = {
|
|
4
5
|
mode: 'development',
|
|
5
6
|
entry: {
|
|
6
|
-
package: './src/package.ts'
|
|
7
|
+
package: ['./src/package.ts'],
|
|
8
|
+
test: {
|
|
9
|
+
filename: 'package-test.js',
|
|
10
|
+
library: {type: 'var', name: `${packageName}_test`},
|
|
11
|
+
import: './src/package-test.ts',
|
|
12
|
+
}
|
|
7
13
|
},
|
|
8
14
|
resolve: {
|
|
9
15
|
extensions: ['.wasm', '.mjs', '.js', '.json', '.ts', '.tsx'],
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
#name: convertFastaToSmiles
|
|
2
|
-
#description: Converts FASTA format to smiles
|
|
3
|
-
#language: python
|
|
4
|
-
#tags: demo, chem, rdkit, smiles
|
|
5
|
-
#input: string sequence_in_fasta_format = AGGTCTTCATGACTTCGGCC {semType: nucleotides}
|
|
6
|
-
#input: string flavor = DNA_no_cap {choices: ["Protein_L_amino_acids","Protein_D_amino_acids","RNA_no_cap","RNA_5_cap","RNA_3_cap","RNA_both_caps","DNA_no_cap","DNA_5_cap","DNA_3_cap","DNA_both_caps"]}
|
|
7
|
-
#output: string smiles {semType: Molecule}
|
|
8
|
-
|
|
9
|
-
from rdkit import Chem
|
|
10
|
-
|
|
11
|
-
flavors = {
|
|
12
|
-
"Protein_L_amino_acids": 0,
|
|
13
|
-
"Protein_D_amino_acids": 1,
|
|
14
|
-
"RNA_no_cap": 2,
|
|
15
|
-
"RNA_5_cap": 3,
|
|
16
|
-
"RNA_3_cap": 4,
|
|
17
|
-
"RNA_both_caps": 5,
|
|
18
|
-
"DNA_no_cap": 6,
|
|
19
|
-
"DNA_5_cap": 7,
|
|
20
|
-
"DNA_3_cap": 8,
|
|
21
|
-
"DNA_both_caps": 9
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
smiles = Chem.MolToSmiles(Chem.MolFromFASTA(text=sequence_in_fasta_format, flavor=flavors[flavor]))
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
#name: drawAxolabsPattern
|
|
2
|
-
#language: python
|
|
3
|
-
#input: dataframe ss_df
|
|
4
|
-
#input: dataframe as_df
|
|
5
|
-
#input: string patternName
|
|
6
|
-
#output: graphics pattern
|
|
7
|
-
|
|
8
|
-
import matplotlib.pyplot as plt
|
|
9
|
-
|
|
10
|
-
basis_color_dict = {
|
|
11
|
-
"RNA": 'k', # black
|
|
12
|
-
"DNA": 'm', # magenta
|
|
13
|
-
"2'-Fluoro": 'b', # blue
|
|
14
|
-
"2'-O-Methyl": 'r', # red
|
|
15
|
-
"2'-O-MOE": 'c', # cyan
|
|
16
|
-
"Glycol nucleic acid": 'y', # yellow
|
|
17
|
-
"LNA": 'aquamarine',
|
|
18
|
-
"Unlocked (UNA)": 'g' # green
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
pattern = plt.figure()
|
|
22
|
-
|
|
23
|
-
max_length = 2 * max(len(ss_df), len(as_df)) + 2.5
|
|
24
|
-
|
|
25
|
-
for i in range(len(ss_df) - 1, -1, -1):
|
|
26
|
-
plt.gca().add_patch(plt.Circle(xy=(-2 * i, 3), radius=1, fc=basis_color_dict[ss_df['basis'][i]]))
|
|
27
|
-
if ss_df['pto'][i]:
|
|
28
|
-
plt.gca()._add_text(plt.Text(x=-2 * i - 1.25, y=3.25, text="*", color='r', fontsize=20))
|
|
29
|
-
plt.gca()._add_text(plt.Text(x=-2 * len(ss_df) - 0.25, y=2.5, text="5'", fontsize=30))
|
|
30
|
-
plt.gca()._add_text(plt.Text(x=-max_length, y=2.5, text='SS:', fontsize=30))
|
|
31
|
-
plt.gca()._add_text(plt.Text(x=1, y=2.5, text="3'", fontsize=30))
|
|
32
|
-
|
|
33
|
-
for i in range(len(as_df) - 1, -1, -1):
|
|
34
|
-
plt.gca().add_patch(plt.Circle(xy=(-2 * i, 0), radius=1, fc=basis_color_dict[as_df['basis'][i]]))
|
|
35
|
-
if as_df['pto'][i]:
|
|
36
|
-
plt.gca()._add_text(plt.Text(x=-2 * i - 1.25, y=-1.25, text="*", color='r', fontsize=20))
|
|
37
|
-
plt.gca()._add_text(plt.Text(x=-2 * len(as_df) - 0.25, y=-0.5, text="3'", fontsize=30))
|
|
38
|
-
plt.gca()._add_text(plt.Text(x=-max_length, y=-0.5, text='AS:', fontsize=30))
|
|
39
|
-
plt.gca()._add_text(plt.Text(x=1, y=-0.5, text="5'", fontsize=30))
|
|
40
|
-
|
|
41
|
-
scaler = -1
|
|
42
|
-
for index, key in enumerate(basis_color_dict):
|
|
43
|
-
if key in as_df['basis'].unique() or key in ss_df['basis'].unique():
|
|
44
|
-
scaler += 1
|
|
45
|
-
plt.gca().add_patch(plt.Circle(xy=(-3 * scaler, -2), radius=0.5, fc=basis_color_dict[key]))
|
|
46
|
-
plt.gca()._add_text(plt.Text(x=-3 * scaler - 1, y=-4, text=key, fontsize=10))
|
|
47
|
-
|
|
48
|
-
plt.axis('scaled')
|
|
49
|
-
plt.axis('off')
|
|
50
|
-
plt.title(patternName + ' for ' + str(len(ss_df)) + '/' + str(len(as_df)) + 'mer')
|