@datagrok/sequence-translator 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/detectors.js CHANGED
@@ -1,29 +1,35 @@
1
1
  class SequenceTranslatorPackageDetectors extends DG.Package {
2
+ isDnaNucleotides(sequence) {return /^[ATGC]{6,}$/.test(sequence);}
3
+ isRnaNucleotides(sequence) {return /^[AUGC]{6,}$/.test(sequence);}
4
+ isAsoGapmerBioSpring(sequence) {return /^[*56789ATGC]{6,}$/.test(sequence);}
5
+ isAsoGapmerGcrs(sequence) {return /^(?=.*moe)(?=.*5mC)(?=.*ps){6,}/.test(sequence);}
6
+ isSiRnaBioSpring(sequence) {return /^[*1-8]{6,}$/.test(sequence);}
7
+ isSiRnaAxolabs(sequence) {return /^[fsACGUacgu]{6,}$/.test(sequence);}
8
+ isSiRnaGcrs(sequence) {return /^[fmpsACGU]{6,}$/.test(sequence);} // TODO: insert into detectNucleotides
9
+ isGcrs(sequence) {return /^[fmpsACGU]{6,}$/.test(sequence);}
10
+ isMermade12(sequence) {return /^[IiJjKkLlEeFfGgHhQq]{6,}$/.test(sequence);}
11
+
2
12
  //tags: semTypeDetector
3
13
  //input: column col
4
14
  //output: string semType
5
15
  detectNucleotides(col) {
6
16
  if (col.type === DG.TYPE.STRING) {
7
- if (DG.Detector.sampleCategories(col, (s) => /^[ATGC]{10,}$/.test(s)))
17
+ if (DG.Detector.sampleCategories(col, (s) => isDnaNucleotides(s)))
8
18
  return 'DNA nucleotides';
9
- if (DG.Detector.sampleCategories(col, (s) => /^[AUGC]{10,}$/.test(s)))
19
+ if (DG.Detector.sampleCategories(col, (s) => isRnaNucleotides(s)))
10
20
  return 'RNA nucleotides';
11
- if (DG.Detector.sampleCategories(col, (s) => /^[5678ATGC]{10,}$/.test(s)))
12
- return 'ABI';
13
- if (DG.Detector.sampleCategories(col, (s) => /^[*56789ATGC]{30,}$/.test(s)))
21
+ if (DG.Detector.sampleCategories(col, (s) => isAsoGapmerBioSpring(s)))
14
22
  return 'BioSpring / Gapmers';
15
- if (DG.Detector.sampleCategories(col, (s) => /^(?=.*moe)(?=.*5mC)(?=.*ps){30,}/.test(s)))
23
+ if (DG.Detector.sampleCategories(col, (s) => isAsoGapmerGcrs(s)))
16
24
  return 'GCRS / Gapmers';
17
- if (DG.Detector.sampleCategories(col, (s) => /^[*1-8]{30,}$/.test(s)))
25
+ if (DG.Detector.sampleCategories(col, (s) => isSiRnaBioSpring(s)))
18
26
  return 'BioSpring / siRNA';
19
- if (DG.Detector.sampleCategories(col, (s) => /^[fsACGUacgu]{20,}$/.test(s)))
20
- return 'Axolabs / siRNA';
21
- if (DG.Detector.sampleCategories(col, (s) => /^[fmpsACGU]{30,}$/.test(s)))
27
+ if (DG.Detector.sampleCategories(col, (s) => isSiRnaAxolabs(s)))
28
+ return 'Axolabs / siRNA';
29
+ if (DG.Detector.sampleCategories(col, (s) => isGcrs(s)))
22
30
  return 'GCRS';
23
- if (DG.Detector.sampleCategories(col, (s) => /^[acgu*]{10,}$/.test(s)))
24
- return 'OP100';
25
- if (DG.Detector.sampleCategories(col, (s) => /^[IiJjKkLlEeFfGgHhQq]{10,}$/.test(s)))
26
- return 'MM12';
31
+ if (DG.Detector.sampleCategories(col, (s) => isMermade12(s)))
32
+ return 'Mermade 12 / siRNA';
27
33
  }
28
34
  }
29
35
  }
package/package.json CHANGED
@@ -1,16 +1,18 @@
1
1
  {
2
2
  "name": "@datagrok/sequence-translator",
3
3
  "friendlyName": "SequenceTranslator",
4
- "version": "0.0.3",
4
+ "version": "0.0.4",
5
5
  "description": "",
6
6
  "dependencies": {
7
+ "@datagrok-libraries/utils": "^0.0.23",
7
8
  "@types/react": "latest",
8
9
  "datagrok-api": ">0.94.10",
9
10
  "datagrok-tools": "^4.1.2",
10
11
  "npm": "^7.11.2",
11
12
  "save-svg-as-png": "^1.4.17",
12
13
  "ts-loader": "latest",
13
- "typescript": "latest"
14
+ "typescript": "latest",
15
+ "openchemlib": "6.0.1"
14
16
  },
15
17
  "scripts": {
16
18
  "link-api": "npm link datagrok-api",
@@ -28,4 +30,4 @@
28
30
  "webpack": "^5.31.0",
29
31
  "webpack-cli": "^4.6.0"
30
32
  }
31
- }
33
+ }
package/setup.cmd ADDED
@@ -0,0 +1,11 @@
1
+ cd ../../js-api
2
+ call npm install
3
+ call npm link
4
+ cd ../libraries/utils
5
+ call npm install
6
+ call npm link
7
+ call npm link datagrok-api
8
+ cd ../../packages/SequenceTranslator
9
+ call npm install
10
+ call npm link datagrok-api @datagrok-libraries/utils
11
+ webpack
package/src/map.ts CHANGED
@@ -1,7 +1,86 @@
1
- export const stadardPhosphateLinkSMILES = 'OP(=O)(O)O';
1
+ export const SYNTHESIZERS = {
2
+ RAW_NUCLEOTIDES: "Raw Nucleotides",
3
+ BIOSPRING: "BioSpring Codes",
4
+ GCRS: "Janssen GCRS Codes",
5
+ AXOLABS: "Axolabs Codes",
6
+ MERMADE_12: "Mermade 12"
7
+ };
8
+ export const TECHNOLOGIES = {
9
+ DNA: "DNA",
10
+ RNA: "RNA",
11
+ ASO_GAPMERS: "For ASO Gapmers",
12
+ SI_RNA: "For 2'-OMe and 2'-F modified siRNA"
13
+ };
14
+ // interface CODES {
15
+ // }
16
+ export const MODIFICATIONS: {[index: string]: {left: string, right: string}} = {
17
+ "(invabasic)": {
18
+ left: "OC1CCOC1COP(=O)(S[H])O",
19
+ right: "OP(=O)(S[H])OCC1OCCC1O"
20
+ },
21
+ "(GalNAc-2-JNJ)": {
22
+ left: "C(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)NC(=O)CCCC(=O)NCC(O)CO",
23
+ right: "OCC(O)CNC(=O)CCCC(=O)NC(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)"
24
+ }
25
+ };
26
+ export const stadardPhosphateLinkSmiles = 'OP(=O)(O)O';
2
27
  export const map: {[synthesizer: string]: {[technology: string]: {[code: string]: {"name": string, "weight": number, "normalized": string, "SMILES": string}}}} = {
28
+ "Raw Nucleotides": {
29
+ "DNA": {
30
+ "A": {
31
+ "name": "Adenine",
32
+ "weight": 313.21,
33
+ "normalized": "dA",
34
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1O"
35
+ },
36
+ "T": {
37
+ "name": "Tyrosine",
38
+ "weight": 304.2,
39
+ "normalized": "dT",
40
+ "SMILES": "OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1O"
41
+ },
42
+ "G": {
43
+ "name": "Guanine",
44
+ "weight": 329.21,
45
+ "normalized": "dG",
46
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)C)[C@@H]1O"
47
+ },
48
+ "C": {
49
+ "name": "Cytosine",
50
+ "weight": 289.18,
51
+ "normalized": "dC",
52
+ "SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1O"
53
+ }
54
+ },
55
+ "RNA": {
56
+ "A": {
57
+ "name": "Adenine",
58
+ "weight": 313.21,
59
+ "normalized": "dA",
60
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1O"
61
+ },
62
+ "U": {
63
+ "name": "Uracil",
64
+ "weight": 306.17,
65
+ "normalized": "rU",
66
+ "SMILES": "OC[C@H]1O[C@@H](N2C=CC(=O)NC2(=O))[C@H](O)[C@@H]1O"
67
+ },
68
+ "G": {
69
+ "name": "Guanine",
70
+ "weight": 329.21,
71
+ "normalized": "dG",
72
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)C)[C@@H]1O"
73
+ },
74
+ "C": {
75
+ "name": "Cytosine",
76
+ "weight": 289.18,
77
+ "normalized": "dC",
78
+ "SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1O"
79
+ }
80
+ }
81
+ },
3
82
  "BioSpring Codes": {
4
- "For ASO Gapmers": {
83
+ 'For ASO Gapmers': {
5
84
  "5": {
6
85
  "name": "2'MOE-5Me-rU",
7
86
  "weight": 378.27,
@@ -36,7 +115,7 @@ export const map: {[synthesizer: string]: {[technology: string]: {[code: string]
36
115
  "name": "ps linkage",
37
116
  "weight": 16.07,
38
117
  "normalized": "",
39
- "SMILES": "OP(=O)(O)S"
118
+ "SMILES": "OP(=O)(S)O"
40
119
  },
41
120
  "A": {
42
121
  "name": "Adenine",
@@ -116,7 +195,7 @@ export const map: {[synthesizer: string]: {[technology: string]: {[code: string]
116
195
  "name": "ps linkage",
117
196
  "weight": 16.07,
118
197
  "normalized": "",
119
- "SMILES": "OP(=O)(O)S"
198
+ "SMILES": "OP(=O)(S)O"
120
199
  }
121
200
  }
122
201
  },
@@ -174,7 +253,7 @@ export const map: {[synthesizer: string]: {[technology: string]: {[code: string]
174
253
  "name": "ps linkage",
175
254
  "weight": 16.07,
176
255
  "normalized": "",
177
- "SMILES": "OP(=O)(O)S"
256
+ "SMILES": "OP(=O)(S)O"
178
257
  }
179
258
  }
180
259
  },
@@ -226,7 +305,7 @@ export const map: {[synthesizer: string]: {[technology: string]: {[code: string]
226
305
  "name": "ps linkage",
227
306
  "weight": 16.07,
228
307
  "normalized": "",
229
- "SMILES": "OP(=O)(O)S"
308
+ "SMILES": "OP(=O)(S)O"
230
309
  },
231
310
  "A": {
232
311
  "name": "Adenine",
@@ -351,5 +430,105 @@ export const map: {[synthesizer: string]: {[technology: string]: {[code: string]
351
430
  "SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1O"
352
431
  }
353
432
  }
433
+ },
434
+ "Mermade 12": {
435
+ "For 2'-OMe and 2'-F modified siRNA": {
436
+ "e": {
437
+ "name": "2'OMe-rA-ps",
438
+ "weight": 359.31,
439
+ "normalized": "rA",
440
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(S)O"
441
+ },
442
+ "h": {
443
+ "name": "2'OMe-rU-ps",
444
+ "weight": 336.27,
445
+ "normalized": "rU",
446
+ "SMILES": "OC[C@H]1O[C@@H](N2C=CC(=O)NC2(=O))[C@H](OC)[C@@H]1OP(=O)(S)O"
447
+ },
448
+ "g": {
449
+ "name": "2'OMe-rG-ps",
450
+ "weight": 375.31,
451
+ "normalized": "rG",
452
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(S)O"
453
+ },
454
+ "f": {
455
+ "name": "2'OMe-rC-ps",
456
+ "weight": 335.28,
457
+ "normalized": "rC",
458
+ "SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))[C@H](OC)[C@@H]1OP(=O)(S)O"
459
+ },
460
+ "i": {
461
+ "name": "2'-fluoro-A-ps",
462
+ "weight": 347.27,
463
+ "normalized": "rA",
464
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)[C@H](F)[C@@H]1OP(=O)(S)O"
465
+ },
466
+ "l": {
467
+ "name": "2'-fluoro-U-ps",
468
+ "weight": 324.23,
469
+ "normalized": "rU",
470
+ "SMILES": "OC[C@H]1O[C@@H](N2C=CC(=O)NC2(=O))[C@H](F)[C@@H]1OP(=O)(S)O"
471
+ },
472
+ "k": {
473
+ "name": "2'-fluoro-G-ps",
474
+ "weight": 363.26,
475
+ "normalized": "rG",
476
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](F)[C@@H]1OP(=O)(S)O"
477
+ },
478
+ "j": {
479
+ "name": "2'-fluoro-C-ps",
480
+ "weight": 323.25,
481
+ "normalized": "rC",
482
+ "SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))[C@H](F)[C@@H]1OP(=O)(S)O"
483
+ },
484
+ "L": {
485
+ "name": "2'-fluoro-U",
486
+ "weight": 308.16,
487
+ "normalized": "rU",
488
+ "SMILES": "OC[C@H]1O[C@@H](N2C=CC(=O)NC2(=O))[C@H](F)[C@@H]1O"
489
+ },
490
+ "I": {
491
+ "name": "2'-fluoro-A",
492
+ "weight": 331.2,
493
+ "normalized": "rA",
494
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)[C@H](F)[C@@H]1O"
495
+ },
496
+ "J": {
497
+ "name": "2'-fluoro-C",
498
+ "weight": 307.18,
499
+ "normalized": "rC",
500
+ "SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))[C@H](F)[C@@H]1O"
501
+ },
502
+ "K": {
503
+ "name": "2'-fluoro-G",
504
+ "weight": 347.19,
505
+ "normalized": "rG",
506
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](F)[C@@H]1O"
507
+ },
508
+ "H": {
509
+ "name": "2'OMe-rU",
510
+ "weight": 320.2,
511
+ "normalized": "rU",
512
+ "SMILES": "OC[C@H]1O[C@@H](N2C=CC(=O)NC2(=O))[C@H](OC)[C@@H]1O"
513
+ },
514
+ "E": {
515
+ "name": "2'OMe-rA",
516
+ "weight": 343.24,
517
+ "normalized": "rA",
518
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)[C@H](OC)[C@@H]1O"
519
+ },
520
+ "F": {
521
+ "name": "2'OMe-rC",
522
+ "weight": 319.21,
523
+ "normalized": "rC",
524
+ "SMILES": "OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))[C@H](OC)[C@@H]1O"
525
+ },
526
+ "G": {
527
+ "name": "2'OMe-rG",
528
+ "weight": 359.24,
529
+ "normalized": "rG",
530
+ "SMILES": "OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1O"
531
+ }
532
+ }
354
533
  }
355
534
  };
@@ -0,0 +1,13 @@
1
+ import * as DG from "datagrok-api/dg";
2
+ import * as grok from "datagrok-api/grok";
3
+ import {runTests} from "@datagrok-libraries/utils/src/test";
4
+ import "./tests/smiles-tests";
5
+
6
+ export let _package = new DG.Package();
7
+
8
+ //name: test
9
+ //output: dataframe result
10
+ export async function test(): Promise<DG.DataFrame> {
11
+ let data = await runTests();
12
+ return DG.DataFrame.fromObjects(data)!;
13
+ }
package/src/package.ts CHANGED
@@ -5,20 +5,141 @@ import * as DG from 'datagrok-api/dg';
5
5
  import * as OCL from 'openchemlib/full.js';
6
6
  import $ from "cash-dom";
7
7
  import {defineAxolabsPattern} from "./defineAxolabsPattern";
8
- import {map, stadardPhosphateLinkSMILES} from "./map";
8
+ import {map, stadardPhosphateLinkSmiles, SYNTHESIZERS, TECHNOLOGIES, MODIFICATIONS} from "./map";
9
9
 
10
10
  export let _package = new DG.Package();
11
11
 
12
12
  const defaultInput = "AGGTCCTCTTGACTTAGGCC";
13
- const minimalValidNumberOfCharacters = 6;
14
- const smallNumberOfCharacters = "Length of input sequence should be at least " + minimalValidNumberOfCharacters + " characters";
15
13
  const undefinedInputSequence = "Type of input sequence is undefined";
16
14
  const noTranslationTableAvailable = "No translation table available";
17
15
  const sequenceWasCopied = 'Copied';
18
16
  const tooltipSequence = 'Copy sequence';
19
17
 
20
- function sortByStringLengthInDescendingOrderToCheckForMatchWithLongerCodesFirst(array: string[]): string[] {
21
- return array.sort(function(a, b) { return b.length - a.length; });
18
+ function getAllCodesOfSynthesizer(synthesizer: string) {
19
+ let codes: string[] = [];
20
+ for (let technology of Object.keys(map[synthesizer]))
21
+ codes = codes.concat(Object.keys(map[synthesizer][technology]));
22
+ return codes.concat(Object.keys(MODIFICATIONS));
23
+ }
24
+
25
+ function getListOfPossibleSynthesizersByFirstMatchedCode(sequence: string): string[] {
26
+ let synthesizers: string[] = [];
27
+ Object.keys(map).forEach((synthesizer: string) => {
28
+ const codes = getAllCodesOfSynthesizer(synthesizer);
29
+ //TODO: get first non-dropdown code when there are two modifications
30
+ let start = 0;
31
+ for (let i = 0; i < sequence.length; i++)
32
+ if (sequence[i] == ')') {
33
+ start = i + 1;
34
+ break;
35
+ }
36
+ if (codes.some((s: string) => s == sequence.slice(start, start + s.length)))
37
+ synthesizers.push(synthesizer);
38
+ });
39
+ return synthesizers;
40
+ }
41
+
42
+ function getListOfPossibleTechnologiesByFirstMatchedCode(sequence: string, synthesizer: string): string[] {
43
+ let technologies: string[] = [];
44
+ Object.keys(map[synthesizer]).forEach((technology: string) => {
45
+ const codes = Object.keys(map[synthesizer][technology]).concat(Object.keys(MODIFICATIONS));
46
+ if (codes.some((s) => s == sequence.slice(0, s.length)))
47
+ technologies.push(technology);
48
+ });
49
+ return technologies;
50
+ }
51
+
52
+ function isValidSequence(sequence: string) {
53
+ let possibleSynthesizers = getListOfPossibleSynthesizersByFirstMatchedCode(sequence);
54
+ if (possibleSynthesizers.length == 0)
55
+ return { indexOfFirstNotValidCharacter: 0, expectedType: null };
56
+
57
+ let outputIndices = Array(possibleSynthesizers.length).fill(0);
58
+
59
+ const firstUniqueCharacters = ['r', 'd'], nucleotides = ["A", "U", "T", "C", "G"];
60
+
61
+ possibleSynthesizers.forEach((synthesizer, synthesizerIndex) => {
62
+ let codes = getAllCodesOfSynthesizer(synthesizer);
63
+ while (outputIndices[synthesizerIndex] < sequence.length) {
64
+
65
+ let matchedCode = codes
66
+ .find((c) => c == sequence.slice(outputIndices[synthesizerIndex], outputIndices[synthesizerIndex] + c.length));
67
+
68
+ if (matchedCode == null)
69
+ break;
70
+
71
+ if ( // for mistake pattern 'rAA'
72
+ outputIndices[synthesizerIndex] > 1 &&
73
+ nucleotides.includes(sequence[outputIndices[synthesizerIndex]]) &&
74
+ firstUniqueCharacters.includes(sequence[outputIndices[synthesizerIndex] - 2])
75
+ ) break;
76
+
77
+ if ( // for mistake pattern 'ArA'
78
+ firstUniqueCharacters.includes(sequence[outputIndices[synthesizerIndex] + 1]) &&
79
+ nucleotides.includes(sequence[outputIndices[synthesizerIndex]])
80
+ ) {
81
+ outputIndices[synthesizerIndex]++;
82
+ break;
83
+ }
84
+
85
+ outputIndices[synthesizerIndex] += matchedCode.length;
86
+ }
87
+ });
88
+
89
+ const indexOfExpectedSythesizer = Math.max.apply(Math, outputIndices);
90
+ const indexOfFirstNotValidCharacter = (indexOfExpectedSythesizer == sequence.length) ? -1 : indexOfExpectedSythesizer;
91
+ const expectedSynthesizer = possibleSynthesizers[outputIndices.indexOf(indexOfExpectedSythesizer)];
92
+ if (indexOfFirstNotValidCharacter != -1)
93
+ return {
94
+ indexOfFirstNotValidCharacter: indexOfFirstNotValidCharacter,
95
+ expectedType: expectedSynthesizer
96
+ };
97
+
98
+ let possibleTechnologies = getListOfPossibleTechnologiesByFirstMatchedCode(sequence, expectedSynthesizer);
99
+ if (possibleTechnologies.length == 0)
100
+ return { indexOfFirstNotValidCharacter: 0, expectedRepresentation: null };
101
+
102
+ outputIndices = Array(possibleTechnologies.length).fill(0);
103
+
104
+ possibleTechnologies.forEach((technology, technologyIndex) => {
105
+ let codes = Object.keys(map[expectedSynthesizer][technology]);
106
+ while (outputIndices[technologyIndex] < sequence.length) {
107
+
108
+ let matchedCode = codes
109
+ .find((c) => c == sequence.slice(outputIndices[technologyIndex], outputIndices[technologyIndex] + c.length));
110
+
111
+ if (matchedCode == null)
112
+ break;
113
+
114
+ if ( // for mistake pattern 'rAA'
115
+ outputIndices[technologyIndex] > 1 &&
116
+ nucleotides.includes(sequence[outputIndices[technologyIndex]]) &&
117
+ firstUniqueCharacters.includes(sequence[outputIndices[technologyIndex] - 2])
118
+ ) break;
119
+
120
+ if ( // for mistake pattern 'ArA'
121
+ firstUniqueCharacters.includes(sequence[outputIndices[technologyIndex] + 1]) &&
122
+ nucleotides.includes(sequence[outputIndices[technologyIndex]])
123
+ ) {
124
+ outputIndices[technologyIndex]++;
125
+ break;
126
+ }
127
+
128
+ outputIndices[technologyIndex] += matchedCode.length;
129
+ }
130
+ });
131
+
132
+ const indexOfExpectedTechnology = Math.max.apply(Math, outputIndices);
133
+ const expectedTechnology = possibleTechnologies[outputIndices.indexOf(indexOfExpectedTechnology)];
134
+
135
+ return {
136
+ indexOfFirstNotValidCharacter: indexOfFirstNotValidCharacter,
137
+ expectedType: expectedSynthesizer + ' ' + expectedTechnology
138
+ };
139
+ }
140
+
141
+ function sortByStringLengthInDescendingOrder(array: string[]): string[] {
142
+ return array.sort(function(a: string, b: string) { return b.length - a.length; });
22
143
  }
23
144
 
24
145
  function getObjectWithCodesAndSmiles() {
@@ -30,22 +151,48 @@ function getObjectWithCodesAndSmiles() {
30
151
  return obj;
31
152
  }
32
153
 
33
- function modifiedToSmiles(sequence: string) {
154
+ export function sequenceToSmiles(sequence: string) {
34
155
  const obj = getObjectWithCodesAndSmiles();
35
- const codes = sortByStringLengthInDescendingOrderToCheckForMatchWithLongerCodesFirst(Object.keys(obj));
36
- let i = 0, smiles = '', codesList = [];
156
+ let codes = sortByStringLengthInDescendingOrder(Object.keys(obj));
157
+ let i = 0, smiles = '', codesList = [];
37
158
  const links = ['s', 'ps', '*'];
159
+ const includesStandardLinkAlready = ["e", "h", "g", "f", "i", "l", "k", "j"];
160
+ const dropdowns = Object.keys(MODIFICATIONS);
161
+ codes = codes.concat(dropdowns);
38
162
  while (i < sequence.length) {
39
- let code = codes.find((s) => s == sequence.slice(i, i + s.length))!;
163
+ let code = codes.find((s: string) => s == sequence.slice(i, i + s.length))!;
40
164
  i += code.length;
41
165
  codesList.push(code);
42
166
  }
43
- for (let i = 0; i < codesList.length; i++)
44
- smiles += (links.includes(codesList[i]) || (i < codesList.length - 1 && links.includes(codesList[i+1]))) ?
45
- obj[codesList[i]] :
46
- obj[codesList[i]] + stadardPhosphateLinkSMILES;
47
- smiles = smiles.replace(/OO/g, 'O').replace(/SO/g, 'S');
48
- return codesList[codesList.length - 1] == 'ps' ? smiles : smiles.slice(0, smiles.length - stadardPhosphateLinkSMILES.length + 1);
167
+ for (let i = 0; i < codesList.length; i++) {
168
+ if (dropdowns.includes(codesList[i])) {
169
+ smiles += (i >= codesList.length / 2) ?
170
+ MODIFICATIONS[codesList[i]].right :
171
+ MODIFICATIONS[codesList[i]].left;
172
+ } else {
173
+ if (links.includes(codesList[i]) && i > 1 && !includesStandardLinkAlready.includes(codesList[i - 1]))
174
+ smiles = smiles.slice(0, smiles.length - stadardPhosphateLinkSmiles.length + 1);
175
+ else if (links.includes(codesList[i]) ||
176
+ includesStandardLinkAlready.includes(codesList[i]) ||
177
+ (i < codesList.length - 1 && (links.includes(codesList[i + 1]) || dropdowns.includes(codesList[i + 1])))
178
+ )
179
+ smiles += obj[codesList[i]];
180
+ else
181
+ smiles += obj[codesList[i]] + stadardPhosphateLinkSmiles;
182
+ }
183
+ }
184
+ smiles = smiles.replace(/OO/g, 'O');
185
+ return (
186
+ (
187
+ links.includes(codesList[codesList.length - 1]) &&
188
+ codesList.length > 1 &&
189
+ !includesStandardLinkAlready.includes(codesList[codesList.length - 2])
190
+ ) ||
191
+ dropdowns.includes(codesList[codesList.length - 1]) ||
192
+ includesStandardLinkAlready.includes(codesList[codesList.length - 1])
193
+ ) ?
194
+ smiles :
195
+ smiles.slice(0, smiles.length - stadardPhosphateLinkSmiles.length + 1);
49
196
  }
50
197
 
51
198
  //name: Sequence Translator
@@ -57,71 +204,60 @@ export function sequenceTranslator() {
57
204
  windows.showToolbox = false;
58
205
  windows.showHelp = false;
59
206
 
60
- function updateTableAndSVG(sequence: string) {
207
+ function updateTableAndMolecule(sequence: string) {
61
208
  moleculeSvgDiv.innerHTML = "";
62
209
  outputTableDiv.innerHTML = "";
63
- let outputSequenceObj = convertSequence(sequence);
64
- let tableRows = [];
65
- for (let key of Object.keys(outputSequenceObj).slice(1)) {
66
- //@ts-ignore
67
- tableRows.push({'key': key, 'value': ui.link(outputSequenceObj[key], () => navigator.clipboard.writeText(outputSequenceObj[key]).then(() => grok.shell.info(sequenceWasCopied)), tooltipSequence, '')})
68
- }
69
- outputTableDiv.append(
70
- ui.div([
71
- DG.HtmlTable.create(
72
- tableRows, (item: { key: string; value: string; }) => [item.key, item.value], ['Code', 'Sequence']
73
- ).root
74
- ], 'table')
75
- );
76
- semTypeOfInputSequence.textContent = 'Detected input type: ' + outputSequenceObj.type;
77
- if (!(outputSequenceObj.type == undefinedInputSequence || outputSequenceObj.type == smallNumberOfCharacters)) {
78
- let pi = DG.TaskBarProgressIndicator.create('Rendering molecule...');
79
- try {
80
- let flavor: string = (outputSequenceObj.Nucleotides.includes('U')) ? "RNA_both_caps" : "DNA_both_caps";
81
- (async () => {
82
- let smiles = (/^[ATGCU]{6,}$/.test(inputSequenceField.value.replace(/\s/g, ''))) ?
83
- await nucleotidesToSmiles(outputSequenceObj.Nucleotides, flavor) :
84
- modifiedToSmiles(inputSequenceField.value.replace(/\s/g, ''));
85
- smiles = smiles.replace(/@/g, ''); // Remove StereoChemistry on the Nucleic acid chain and remove the Chiral label
86
- moleculeSvgDiv.append(grok.chem.svgMol(smiles, 900, 300));
87
- })();
88
- } finally {
89
- pi.close();
210
+ let pi = DG.TaskBarProgressIndicator.create('Rendering table and molecule...');
211
+ try {
212
+ let outputSequenceObj = convertSequence(sequence);
213
+ let tableRows = [];
214
+ for (let key of Object.keys(outputSequenceObj).slice(1)) {
215
+ tableRows.push({
216
+ 'key': key,
217
+ 'value': ("indexOfFirstNotValidCharacter" in outputSequenceObj) ?
218
+ ui.divH([
219
+ ui.divText(sequence.slice(0, JSON.parse(outputSequenceObj.indexOfFirstNotValidCharacter!).indexOfFirstNotValidCharacter), {style: {color: "grey"}}),
220
+ ui.tooltip.bind(
221
+ ui.divText(sequence.slice(JSON.parse(outputSequenceObj.indexOfFirstNotValidCharacter!).indexOfFirstNotValidCharacter), {style: {color: "red"}}),
222
+ "Expected format: " + JSON.parse(outputSequenceObj.indexOfFirstNotValidCharacter!).expectedType + ". Press 'SHOW CODES' button to see tables with valid codes"
223
+ )
224
+ ]) : //@ts-ignore
225
+ ui.link(outputSequenceObj[key], () => navigator.clipboard.writeText(outputSequenceObj[key]).then(() => grok.shell.info(sequenceWasCopied)), tooltipSequence, '')
226
+ })
90
227
  }
228
+ outputTableDiv.append(
229
+ ui.div([DG.HtmlTable.create(tableRows, (item: { key: string; value: string; }) => [item.key, item.value], ['Code', 'Sequence']).root], 'table')
230
+ );
231
+ semTypeOfInputSequence.textContent = 'Detected input type: ' + outputSequenceObj.type;
232
+
233
+ let width = $(window).width();
234
+ const canvas = ui.canvas(width, Math.round(width / 2));
235
+ let smiles = sequenceToSmiles(inputSequenceField.value.replace(/\s/g, ''));
236
+ // @ts-ignore
237
+ OCL.StructureView.drawMolecule(canvas, OCL.Molecule.fromSmiles(smiles), { suppressChiralText: true });
238
+ if (outputSequenceObj.type != undefinedInputSequence)
239
+ moleculeSvgDiv.append(canvas);
240
+ } finally {
241
+ pi.close();
91
242
  }
92
243
  }
93
244
 
94
- const appMainDescription = ui.info([
95
- ui.divText('\n How to convert one sequence:',{style:{'font-weight':'bolder'}}),
96
- ui.divText("Paste sequence into the text field below"),
97
- ui.divText('\n How to convert many sequences:',{style:{'font-weight':'bolder'}}),
98
- ui.divText("1. Drag & drop an Excel or CSV file with sequences into Datagrok. The platform will automatically detect columns with sequences"),
99
- ui.divText('2. Right-click on the column header, then see the \'Convert\' menu'),
100
- ui.divText("This will add the result column to the right of the table"),
101
- ], 'Convert oligonucleotide sequences between Nucleotides, BioSpring, Axolabs, and GCRS representations.'
102
- );
103
-
104
- let inputSequenceField = ui.textInput("", defaultInput, (sequence: string) => updateTableAndSVG(sequence));
105
- let outputSequenceObj = convertSequence(defaultInput);
106
- let semTypeOfInputSequence = ui.divText('Detected input type: ' + outputSequenceObj.type);
107
-
108
- let tableRows = [];
109
- for (let key of Object.keys(outputSequenceObj).slice(1)) {
110
- //@ts-ignore
111
- tableRows.push({'key': key, 'value': ui.link(outputSequenceObj[key], () => navigator.clipboard.writeText(outputSequenceObj[key]).then(() => grok.shell.info(sequenceWasCopied)), tooltipSequence, '')})
112
- }
245
+ let semTypeOfInputSequence = ui.divText('');
246
+ let moleculeSvgDiv = ui.block([]);
113
247
  let outputTableDiv = ui.div([], 'table');
114
- outputTableDiv.append(
115
- DG.HtmlTable.create(tableRows, (item: {key: string; value: string;}) => [item.key, item.value], ['Code', 'Sequence']).root
116
- );
248
+ let inputSequenceField = ui.textInput("", defaultInput, (sequence: string) => updateTableAndMolecule(sequence));
249
+ updateTableAndMolecule(defaultInput);
117
250
 
118
- let tables = ui.divV([]);
251
+ let tablesWithCodes = ui.divV([
252
+ DG.HtmlTable.create(Object.keys(MODIFICATIONS), (item: string) => [item], ['Overhang modification']).root,
253
+ ui.div([], {style: {height: '30px'}})
254
+ ]);
119
255
  for (let synthesizer of Object.keys(map)) {
120
256
  for (let technology of Object.keys(map[synthesizer])) {
121
257
  let tableRows = [];
122
258
  for (let [key, value] of Object.entries(map[synthesizer][technology]))
123
259
  tableRows.push({'name': value.name, 'code': key});
124
- tables.append(
260
+ tablesWithCodes.append(
125
261
  DG.HtmlTable.create(
126
262
  tableRows,
127
263
  (item: {name: string; code: string;}) => [item['name'], item['code']],
@@ -131,29 +267,31 @@ export function sequenceTranslator() {
131
267
  );
132
268
  }
133
269
  }
134
-
135
- let showCodesButton = ui.button('SHOW CODES', () => ui.dialog('Codes').add(tables).show());
136
-
137
- let moleculeSvgDiv = ui.block([]);
138
-
139
- let flavor: string = (defaultInput.includes('U')) ? "RNA_both_caps" : "DNA_both_caps";
140
- (async () => moleculeSvgDiv.append(grok.chem.svgMol(<string> await nucleotidesToSmiles(defaultInput, flavor), 900, 300)))();
141
-
142
- let saveMolFileButton = ui.bigButton('SAVE MOL FILE', async() => {
143
- let outputSequenceObj = convertSequence(inputSequenceField.value);
144
- flavor = outputSequenceObj.Nucleotides.includes('U') ? "RNA_both_caps" : "DNA_both_caps";
145
- let smiles = (/^[ATGCU]{6,}$/.test(inputSequenceField.value.replace(/\s/g, ''))) ?
146
- await nucleotidesToSmiles(outputSequenceObj.Nucleotides, flavor) :
147
- modifiedToSmiles(inputSequenceField.value.replace(/\s/g, ''));
148
- smiles = smiles.replace(/@/g, ''); // Remove StereoChemistry on the Nucleic acid chain and remove the Chiral label
149
- let mol = OCL.Molecule.fromSmiles(smiles);
150
- let result = `${mol.toMolfile()}\n`;// + '$$$$';
151
- var element = document.createElement('a');
270
+ let showCodesButton = ui.button('SHOW CODES', () => ui.dialog('Codes').add(tablesWithCodes).show());
271
+ let copySmiles = ui.button(
272
+ 'COPY SMILES',
273
+ () => navigator.clipboard.writeText(sequenceToSmiles(inputSequenceField.value.replace(/\s/g, '')))
274
+ .then(() => grok.shell.info(sequenceWasCopied))
275
+ );
276
+ let saveMolFileButton = ui.bigButton('SAVE MOL FILE', () => {
277
+ let smiles = sequenceToSmiles(inputSequenceField.value.replace(/\s/g, ''));
278
+ let result = `${OCL.Molecule.fromSmiles(smiles).toMolfile()}\n`;
279
+ let element = document.createElement('a');
152
280
  element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(result));
153
281
  element.setAttribute('download', inputSequenceField.value.replace(/\s/g, '') + '.mol');
154
282
  element.click();
155
283
  });
156
284
 
285
+ const appMainDescription = ui.info([
286
+ ui.divText('\n How to convert one sequence:',{style:{'font-weight':'bolder'}}),
287
+ ui.divText("Paste sequence into the text field below"),
288
+ ui.divText('\n How to convert many sequences:',{style:{'font-weight':'bolder'}}),
289
+ ui.divText("1. Drag & drop an Excel or CSV file with sequences into Datagrok. The platform will automatically detect columns with sequences"),
290
+ ui.divText('2. Right-click on the column header, then see the \'Convert\' menu'),
291
+ ui.divText("This will add the result column to the right of the table"),
292
+ ], 'Convert oligonucleotide sequences between Nucleotides, BioSpring, Axolabs, Mermade 12 and GCRS representations.'
293
+ );
294
+
157
295
  let v = grok.shell.newView('Sequence Translator', [
158
296
  ui.tabControl({
159
297
  'MAIN': ui.div([
@@ -171,7 +309,7 @@ export function sequenceTranslator() {
171
309
  outputTableDiv
172
310
  ]),
173
311
  moleculeSvgDiv,
174
- ui.divH([saveMolFileButton, showCodesButton])
312
+ ui.divH([saveMolFileButton, showCodesButton, copySmiles])
175
313
  ], 'sequence')
176
314
  ]),
177
315
  'AXOLABS': defineAxolabsPattern()
@@ -192,104 +330,83 @@ export function sequenceTranslator() {
192
330
  .css('width','100%');
193
331
  }
194
332
 
195
- export async function nucleotidesToSmiles(nucleotides: string, flavor: string) {
196
- return await grok.functions.call('SequenceTranslator:convertFastaToSmiles', {
197
- 'sequence_in_fasta_format': nucleotides,
198
- 'flavor': flavor
199
- });
200
- }
201
-
202
- export function isDnaNucleotidesCode(sequence: string) {return /^[ATGC]{6,}$/.test(sequence);}
203
- export function isRnaNucleotidesCode(sequence: string) {return /^[AUGC]{6,}$/.test(sequence);}
204
- export function isAsoGapmerBioSpringCode(sequence: string) {return /^[*56789ATGC]{6,}$/.test(sequence);}
205
- export function isAsoGapmerGcrsCode(sequence: string) {return /^(?=.*moe)(?=.*5mC)(?=.*ps){6,}/.test(sequence);}
206
- export function isSiRnaBioSpringCode(sequence: string) {return /^[*1-8]{6,}$/.test(sequence);}
207
- export function isSiRnaAxolabsCode(sequence: string) {return /^[fsACGUacgu]{6,}$/.test(sequence);}
208
- export function isSiRnaGcrsCode(sequence: string) {return /^[fmpsACGU]{6,}$/.test(sequence);}
209
- export function isGcrsCode(sequence: string) {return /^[fmpsACGU]{6,}$/.test(sequence);}
210
- export function isMM12Code(sequence: string) {return /^[IiJjKkLlEeFfGgHhQq]{6,}$/.test(sequence);}
211
-
212
- function convertSequence(seq: string) {
213
- seq = seq.replace(/\s/g, '');
214
- if (seq.length < minimalValidNumberOfCharacters)
333
+ function convertSequence(text: string) {
334
+ text = text.replace(/\s/g, '');
335
+ let seq = text;
336
+ let output = isValidSequence(seq);
337
+ if (output.indexOfFirstNotValidCharacter != -1)
215
338
  return {
216
- type: smallNumberOfCharacters,
217
- Nucleotides: smallNumberOfCharacters,
218
- BioSpring: smallNumberOfCharacters,
219
- Axolabs: smallNumberOfCharacters,
220
- GCRS: smallNumberOfCharacters
339
+ indexOfFirstNotValidCharacter: JSON.stringify(output),
340
+ Error: undefinedInputSequence
221
341
  };
222
- if (isDnaNucleotidesCode(seq))
342
+ if (output.expectedType == SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.DNA)
223
343
  return {
224
- type: "DNA Nucleotides Code",
344
+ type: SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.DNA,
225
345
  Nucleotides: seq,
226
346
  BioSpring: asoGapmersNucleotidesToBioSpring(seq),
227
- Axolabs: noTranslationTableAvailable,
228
347
  GCRS: asoGapmersNucleotidesToGcrs(seq)
229
348
  };
230
- if (isAsoGapmerBioSpringCode(seq))
349
+ if (output.expectedType == SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.ASO_GAPMERS)
231
350
  return {
232
- type: "ASO Gapmers / BioSpring Code",
351
+ type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.ASO_GAPMERS,
233
352
  Nucleotides: asoGapmersBioSpringToNucleotides(seq),
234
353
  BioSpring: seq,
235
- Axolabs: noTranslationTableAvailable,
236
354
  GCRS: asoGapmersBioSpringToGcrs(seq)
237
355
  };
238
- if (isAsoGapmerGcrsCode(seq))
356
+ if (output.expectedType == SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.ASO_GAPMERS)
239
357
  return {
240
- type: "ASO Gapmers / GCRS Code",
358
+ type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.ASO_GAPMERS,
241
359
  Nucleotides: asoGapmersGcrsToNucleotides(seq),
242
360
  BioSpring: asoGapmersGcrsToBioSpring(seq),
243
- Axolabs: noTranslationTableAvailable,
244
- MM12: gcrsToMM12(seq),
361
+ Mermade12: gcrsToMermade12(seq),
245
362
  GCRS: seq
246
363
  };
247
- if (isRnaNucleotidesCode(seq))
364
+ if (output.expectedType == SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.RNA)
248
365
  return {
249
- type: "RNA Nucleotides Code",
366
+ type: SYNTHESIZERS.RAW_NUCLEOTIDES + ' ' + TECHNOLOGIES.RNA,
250
367
  Nucleotides: seq,
251
368
  BioSpring: siRnaNucleotideToBioSpringSenseStrand(seq),
252
369
  Axolabs: siRnaNucleotideToAxolabsSenseStrand(seq),
253
370
  GCRS: siRnaNucleotidesToGcrs(seq)
254
371
  };
255
- if (isSiRnaBioSpringCode(seq))
372
+ if (output.expectedType == SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.SI_RNA)
256
373
  return {
257
- type: "siRNA / bioSpring Code",
374
+ type: SYNTHESIZERS.BIOSPRING + ' ' + TECHNOLOGIES.SI_RNA,
258
375
  Nucleotides: siRnaBioSpringToNucleotides(seq),
259
376
  BioSpring: seq,
260
377
  Axolabs: siRnaBioSpringToAxolabs(seq),
261
378
  GCRS: siRnaBioSpringToGcrs(seq)
262
379
  };
263
- if (isSiRnaAxolabsCode(seq))
380
+ if (output.expectedType == SYNTHESIZERS.AXOLABS + ' ' + TECHNOLOGIES.SI_RNA)
264
381
  return {
265
- type: "siRNA / Axolabs Code",
382
+ type: SYNTHESIZERS.AXOLABS + ' ' + TECHNOLOGIES.SI_RNA,
266
383
  Nucleotides: siRnaAxolabsToNucleotides(seq),
267
384
  BioSpring: siRnaAxolabsToBioSpring(seq),
268
385
  Axolabs: seq,
269
386
  GCRS: siRnaAxolabsToGcrs(seq)
270
387
  };
271
- if (isSiRnaGcrsCode(seq))
388
+ if (output.expectedType == SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.SI_RNA)
272
389
  return {
273
- type: "siRNA / GCRS Code",
390
+ type: SYNTHESIZERS.GCRS + ' ' + TECHNOLOGIES.SI_RNA,
274
391
  Nucleotides: siRnaGcrsToNucleotides(seq),
275
392
  BioSpring: siRnaGcrsToBioSpring(seq),
276
393
  Axolabs: siRnaGcrsToAxolabs(seq),
277
- MM12: gcrsToMM12(seq),
394
+ MM12: gcrsToMermade12(seq),
278
395
  GCRS: seq
279
396
  };
280
- if (isGcrsCode(seq))
397
+ if (output.expectedType == SYNTHESIZERS.GCRS)
281
398
  return {
282
- type: "GCRS Code",
399
+ type: SYNTHESIZERS.GCRS,
283
400
  Nucleotides: gcrsToNucleotides(seq),
284
401
  GCRS: seq,
285
- MM12: gcrsToMM12(seq)
402
+ Mermade12: gcrsToMermade12(seq)
286
403
  }
287
- if (isMM12Code(seq))
404
+ if (output.expectedType == SYNTHESIZERS.MERMADE_12)
288
405
  return {
289
- type: "MM12 Code",
406
+ type: SYNTHESIZERS.MERMADE_12,
290
407
  Nucleotides: noTranslationTableAvailable,
291
408
  GCRS: noTranslationTableAvailable,
292
- MM12: seq
409
+ Mermade12: seq
293
410
  };
294
411
  return {
295
412
  type: undefinedInputSequence,
@@ -302,12 +419,12 @@ function convertSequence(seq: string) {
302
419
  //output: string result {semType: BioSpring / Gapmers}
303
420
  export function asoGapmersNucleotidesToBioSpring(nucleotides: string) {
304
421
  let count: number = -1;
305
- const objForEdges: {[index: string]: string} = {"T": "5*", "A": "6*", "C": "7*", "G": "8*"};
306
- const objForCenter: {[index: string]: string} = {"C": "9*", "A": "A*", "T": "T*", "G": "G*"};
307
- return nucleotides.replace(/[ATCG]/g, function (x: string) {
422
+ const objForEdges: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "T": "5*", "A": "6*", "C": "7*", "G": "8*"};
423
+ const objForCenter: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "T": "T*", "A": "A*", "C": "9*", "G": "G*"};
424
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|T|C|G)/g, function (x: string) {
308
425
  count++;
309
426
  return (count > 4 && count < 15) ? objForCenter[x] : objForEdges[x];
310
- }).slice(0, 2 * count + 1);
427
+ }).slice(0, (nucleotides.endsWith("(invabasic)") || nucleotides.endsWith("(GalNAc-2-JNJ)")) ? nucleotides.length : 2 * count + 1);
311
428
  }
312
429
 
313
430
  //name: asoGapmersNucleotidesToGcrs
@@ -315,22 +432,22 @@ export function asoGapmersNucleotidesToBioSpring(nucleotides: string) {
315
432
  //output: string result {semType: GCRS / Gapmers}
316
433
  export function asoGapmersNucleotidesToGcrs(nucleotides: string) {
317
434
  let count: number = -1;
318
- const objForEdges: {[index: string]: string} = {"T": "moeUnps", "A": "moeAnps", "C": "moe5mCnps", "G": "moeGnps"};
319
- const objForCenter: {[index: string]: string} = {"C": "5mCps", "A": "Aps", "T": "Tps", "G": "Gps"};
320
- return nucleotides.replace(/[ATCG]/g, function (x: string) {
435
+ const objForEdges: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "T": "moeUnps", "A": "moeAnps", "C": "moe5mCnps", "G": "moeGnps"};
436
+ const objForCenter: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "C": "5mCps", "A": "Aps", "T": "Tps", "G": "Gps"};
437
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|T|C|G)/g, function (x: string) {
321
438
  count++;
322
439
  if (count < 5) return (count == 4) ? objForEdges[x].slice(0, -3) + 'ps' : objForEdges[x];
323
440
  if (count < 15) return (count == 14) ? objForCenter[x].slice(0, -2) + 'nps' : objForCenter[x];
324
441
  return objForEdges[x];
325
- }).slice(0, -3);
442
+ }).slice(0, (nucleotides.endsWith("(invabasic)") || nucleotides.endsWith("(GalNAc-2-JNJ)")) ? nucleotides.length : -3);
326
443
  }
327
444
 
328
445
  //name: asoGapmersBioSpringToNucleotides
329
446
  //input: string nucleotides {semType: BioSpring / Gapmers}
330
447
  //output: string result {semType: DNA nucleotides}
331
448
  export function asoGapmersBioSpringToNucleotides(nucleotides: string) {
332
- const obj: {[index: string]: string} = {"*": "", "5": "T", "6": "A", "7": "C", "8": "G", "9": "C"};
333
- return nucleotides.replace(/[*56789]/g, function (x: string) {return obj[x];});
449
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "*": "", "5": "T", "6": "A", "7": "C", "8": "G", "9": "C"};
450
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|\*|5|6|7|8|9)/g, function (x: string) {return obj[x];});
334
451
  }
335
452
 
336
453
  //name: asoGapmersBioSpringToGcrs
@@ -338,11 +455,11 @@ export function asoGapmersBioSpringToNucleotides(nucleotides: string) {
338
455
  //output: string result {semType: GCRS / Gapmers}
339
456
  export function asoGapmersBioSpringToGcrs(nucleotides: string) {
340
457
  let count: number = -1;
341
- const obj: {[index: string]: string} = {
458
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
342
459
  "5*": "moeUnps", "6*": "moeAnps", "7*": "moe5mCnps", "8*": "moeGnps", "9*": "5mCps", "A*": "Aps", "T*": "Tps",
343
460
  "G*": "Gps", "C*": "Cps", "5": "moeU", "6": "moeA", "7": "moe5mC", "8": "moeG"
344
461
  };
345
- return nucleotides.replace(/(5\*|6\*|7\*|8\*|9\*|A\*|T\*|G\*|C\*|5|6|7|8)/g, function (x: string) {
462
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|5\*|6\*|7\*|8\*|9\*|A\*|T\*|G\*|C\*|5|6|7|8)/g, function (x: string) {
346
463
  count++;
347
464
  return (count == 4) ? obj[x].slice(0, -3) + 'ps' : (count == 14) ? obj[x].slice(0, -2) + 'nps' : obj[x];
348
465
  });
@@ -352,102 +469,102 @@ export function asoGapmersBioSpringToGcrs(nucleotides: string) {
352
469
  //input: string nucleotides {semType: GCRS / Gapmers}
353
470
  //output: string result {semType: BioSpring / Gapmers}
354
471
  export function asoGapmersGcrsToBioSpring(nucleotides: string) {
355
- const obj: {[index: string]: string} = {
472
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
356
473
  "moeT": "5", "moeA": "6", "moe5mC": "7", "moeG": "8", "moeU": "5", "5mC": "9", "nps": "*", "ps": "*", "U": "T"
357
474
  };
358
- return nucleotides.replace(/(moeT|moeA|moe5mC|moeG|moeU|5mC|nps|ps|U)/g, function (x: string) {return obj[x];});
475
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|moeT|moeA|moe5mC|moeG|moeU|5mC|nps|ps|U)/g, function (x: string) {return obj[x];});
359
476
  }
360
477
 
361
478
  //name: asoGapmersGcrsToNucleotides
362
479
  //input: string nucleotides {semType: GCRS / Gapmers}
363
480
  //output: string result {semType: DNA nucleotides}
364
481
  export function asoGapmersGcrsToNucleotides(nucleotides: string) {
365
- const obj: {[index: string]: string} = {"moe": "", "5m": "", "n": "", "ps": "", "U": "T"};
366
- return nucleotides.replace(/(moe|5m|n|ps|U)/g, function (x: string) {return obj[x];});
482
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "moe": "", "5m": "", "n": "", "ps": "", "U": "T"};
483
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|moe|5m|n|ps|U)/g, function (x: string) {return obj[x];});
367
484
  }
368
485
 
369
486
  //name: siRnaBioSpringToNucleotides
370
487
  //input: string nucleotides {semType: BioSpring / siRNA}
371
488
  //output: string result {semType: RNA nucleotides}
372
489
  export function siRnaBioSpringToNucleotides(nucleotides: string) {
373
- const obj: {[index: string]: string} = {"1": "U", "2": "A", "3": "C", "4": "G", "5": "U", "6": "A", "7": "C", "8": "G", "*": ""};
374
- return nucleotides.replace(/[12345678*]/g, function (x: string) {return obj[x];});
490
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "1": "U", "2": "A", "3": "C", "4": "G", "5": "U", "6": "A", "7": "C", "8": "G", "*": ""};
491
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|1|2|3|4|5|6|7|8|\*)/g, function (x: string) {return obj[x];});
375
492
  }
376
493
 
377
494
  //name: siRnaBioSpringToAxolabs
378
495
  //input: string nucleotides {semType: BioSpring / siRNA}
379
496
  //output: string result {semType: Axolabs / siRNA}
380
497
  export function siRnaBioSpringToAxolabs(nucleotides: string) {
381
- const obj: {[index: string]: string} = {"1": "Uf", "2": "Af", "3": "Cf", "4": "Gf", "5": "u", "6": "a", "7": "c", "8": "g", "*": "s"};
382
- return nucleotides.replace(/[12345678*]/g, function (x: string) {return obj[x];});
498
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "1": "Uf", "2": "Af", "3": "Cf", "4": "Gf", "5": "u", "6": "a", "7": "c", "8": "g", "*": "s"};
499
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|1|2|3|4|5|6|7|8|\*)/g, function (x: string) {return obj[x];});
383
500
  }
384
501
 
385
502
  //name: siRnaBioSpringToGcrs
386
503
  //input: string nucleotides {semType: BioSpring / siRNA}
387
504
  //output: string result {semType: GCRS}
388
505
  export function siRnaBioSpringToGcrs(nucleotides: string) {
389
- const obj: {[index: string]: string} = {"1": "fU", "2": "fA", "3": "fC", "4": "fG", "5": "mU", "6": "mA", "7": "mC", "8": "mG", "*": "ps"};
390
- return nucleotides.replace(/[12345678*]/g, function (x: string) {return obj[x];});
506
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "1": "fU", "2": "fA", "3": "fC", "4": "fG", "5": "mU", "6": "mA", "7": "mC", "8": "mG", "*": "ps"};
507
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|1|2|3|4|5|6|7|8|\*)/g, function (x: string) {return obj[x];});
391
508
  }
392
509
 
393
510
  //name: siRnaAxolabsToGcrs
394
511
  //input: string nucleotides {semType: Axolabs / siRNA}
395
512
  //output: string result {semType: GCRS}
396
513
  export function siRnaAxolabsToGcrs(nucleotides: string) {
397
- const obj: {[index: string]: string} = {
514
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
398
515
  "Uf": "fU", "Af": "fA", "Cf": "fC", "Gf": "fG", "u": "mU", "a": "mA", "c": "mC", "g": "mG", "s": "ps"
399
516
  };
400
- return nucleotides.replace(/(Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
517
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
401
518
  }
402
519
 
403
520
  //name: siRnaAxolabsToBioSpring
404
521
  //input: string nucleotides {semType: Axolabs / siRNA}
405
522
  //output: string result {semType: BioSpring / siRNA}
406
523
  export function siRnaAxolabsToBioSpring(nucleotides: string) {
407
- const obj: {[index: string]: string} = {
524
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
408
525
  "Uf": "1", "Af": "2", "Cf": "3", "Gf": "4", "u": "5", "a": "6", "c": "7", "g": "8", "s": "*"
409
526
  };
410
- return nucleotides.replace(/(Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
527
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
411
528
  }
412
529
 
413
530
  //name: siRnaAxolabsToNucleotides
414
531
  //input: string nucleotides {semType: Axolabs / siRNA}
415
532
  //output: string result {semType: RNA nucleotides}
416
533
  export function siRnaAxolabsToNucleotides(nucleotides: string) {
417
- const obj: {[index: string]: string} = {
534
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
418
535
  "Uf": "U", "Af": "A", "Cf": "C", "Gf": "G", "u": "U", "a": "A", "c": "C", "g": "G", "s": ""
419
536
  };
420
- return nucleotides.replace(/(Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
537
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|Uf|Af|Cf|Gf|u|a|c|g|s)/g, function (x: string) {return obj[x];});
421
538
  }
422
539
 
423
540
  //name: siRnaGcrsToNucleotides
424
541
  //input: string nucleotides {semType: GCRS}
425
542
  //output: string result {semType: RNA nucleotides}
426
543
  export function siRnaGcrsToNucleotides(nucleotides: string) {
427
- const obj: {[index: string]: string} = {
544
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
428
545
  "fU": "U", "fA": "A", "fC": "C", "fG": "G", "mU": "U", "mA": "A", "mC": "C", "mG": "G", "ps": ""
429
546
  };
430
- return nucleotides.replace(/(fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
547
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
431
548
  }
432
549
 
433
550
  //name: siRnaGcrsToBioSpring
434
551
  //input: string nucleotides {semType: GCRS}
435
552
  //output: string result {semType: BioSpring / siRNA}
436
553
  export function siRnaGcrsToBioSpring(nucleotides: string) {
437
- const obj: {[index: string]: string} = {
554
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
438
555
  "fU": "1", "fA": "2", "fC": "3", "fG": "4", "mU": "5", "mA": "6", "mC": "7", "mG": "8", "ps": "*"
439
556
  };
440
- return nucleotides.replace(/(fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
557
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
441
558
  }
442
559
 
443
560
  //name: siRnaGcrsToAxolabs
444
561
  //input: string nucleotides {semType: GCRS}
445
562
  //output: string result {semType: Axolabs / siRNA}
446
563
  export function siRnaGcrsToAxolabs(nucleotides: string) {
447
- const obj: {[index: string]: string} = {
564
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
448
565
  "fU": "Uf", "fA": "Af", "fC": "Cf", "fG": "Gf", "mU": "u", "mA": "a", "mC": "c", "mG": "g", "ps": "s"
449
566
  };
450
- return nucleotides.replace(/(fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
567
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|fU|fA|fC|fG|mU|mA|mC|mG|ps)/g, function (x: string) {return obj[x];});
451
568
  }
452
569
 
453
570
  //name: siRnaNucleotideToBioSpringSenseStrand
@@ -455,11 +572,11 @@ export function siRnaGcrsToAxolabs(nucleotides: string) {
455
572
  //output: string result {semType: BioSpring / siRNA}
456
573
  export function siRnaNucleotideToBioSpringSenseStrand(nucleotides: string) {
457
574
  let count: number = -1;
458
- const objForLeftEdge: {[index: string]: string} = {"A": "6*", "U": "5*", "G": "8*", "C": "7*"};
459
- const objForRightEdge: {[index: string]: string} = {"A": "*6", "U": "*5", "G": "*8", "C": "*7"};
460
- const objForOddIndices: {[index: string]: string} = {"A": "6", "U": "5", "G": "8", "C": "7"};
461
- const objForEvenIndices: {[index: string]: string} = {"A": "2", "U": "1", "G": "4", "C": "3"};
462
- return nucleotides.replace(/[AUGC]/g, function (x: string) {
575
+ const objForLeftEdge: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "6*", "U": "5*", "G": "8*", "C": "7*"};
576
+ const objForRightEdge: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "*6", "U": "*5", "G": "*8", "C": "*7"};
577
+ const objForOddIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "6", "U": "5", "G": "8", "C": "7"};
578
+ const objForEvenIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "2", "U": "1", "G": "4", "C": "3"};
579
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function (x: string) {
463
580
  count++;
464
581
  if (count < 2) return objForLeftEdge[x];
465
582
  if (count > nucleotides.length - 3) return objForRightEdge[x];
@@ -472,11 +589,11 @@ export function siRnaNucleotideToBioSpringSenseStrand(nucleotides: string) {
472
589
  //output: string result {semType: GCRS}
473
590
  export function siRnaNucleotidesToGcrs(nucleotides: string) {
474
591
  let count: number = -1;
475
- const objForLeftEdge: {[index: string]: string} = {"A": "mAps", "U": "mUps", "G": "mGps", "C": "mCps"};
476
- const objForRightEdge: {[index: string]: string} = {"A": "psmA", "U": "psmU", "G": "psmG", "C": "psmC"};
477
- const objForEvenIndices: {[index: string]: string} = {"A": "fA", "U": "fU", "G": "fG", "C": "fC"};
478
- const objForOddIndices: {[index: string]: string} = {"A": "mA", "U": "mU", "G": "mG", "C": "mC"};
479
- return nucleotides.replace(/[AUGC]/g, function (x: string) {
592
+ const objForLeftEdge: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "mAps", "U": "mUps", "G": "mGps", "C": "mCps"};
593
+ const objForRightEdge: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "psmA", "U": "psmU", "G": "psmG", "C": "psmC"};
594
+ const objForEvenIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "fA", "U": "fU", "G": "fG", "C": "fC"};
595
+ const objForOddIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "mA", "U": "mU", "G": "mG", "C": "mC"};
596
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function (x: string) {
480
597
  count++;
481
598
  if (count < 2) return objForLeftEdge[x];
482
599
  if (count > nucleotides.length - 3) return objForRightEdge[x];
@@ -489,10 +606,10 @@ export function siRnaNucleotidesToGcrs(nucleotides: string) {
489
606
  //output: string result {semType: Axolabs}
490
607
  export function siRnaNucleotideToAxolabsSenseStrand(nucleotides: string) {
491
608
  let count: number = -1;
492
- const objForLeftEdge: {[index: string]: string} = {"A": "as", "U": "us", "G": "gs", "C": "cs"};
493
- const objForSomeIndices: {[index: string]: string} = {"A": "Af", "U": "Uf", "G": "Gf", "C": "Cf"};
494
- const obj: {[index: string]: string} = {"A": "a", "U": "u", "G": "g", "C": "c"};
495
- return nucleotides.replace(/[AUGC]/g, function (x: string) {
609
+ const objForLeftEdge: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "as", "U": "us", "G": "gs", "C": "cs"};
610
+ const objForSomeIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "Af", "U": "Uf", "G": "Gf", "C": "Cf"};
611
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "a", "U": "u", "G": "g", "C": "c"};
612
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function (x: string) {
496
613
  count++;
497
614
  if (count < 2) return objForLeftEdge[x];
498
615
  if (count == 6 || (count > 7 && count < 11)) return objForSomeIndices[x]
@@ -506,11 +623,11 @@ export function siRnaNucleotideToAxolabsSenseStrand(nucleotides: string) {
506
623
  //output: string result {semType: Axolabs}
507
624
  export function siRnaNucleotideToAxolabsAntisenseStrand(nucleotides: string) {
508
625
  let count: number = -1;
509
- const objForSmallLinkages: {[index: string]: string} = {"A": "as", "U": "us", "G": "gs", "C": "cs"};
510
- const objForBigLinkages: {[index: string]: string} = {"A": "Afs", "U": "Ufs", "G": "Gfs", "C": "Cfs"};
511
- const objForSomeIndices: {[index: string]: string} = {"A": "Af", "U": "Uf", "G": "Gf", "C": "Cf"};
512
- const obj: {[index: string]: string} = {"A": "a", "U": "u", "G": "g", "C": "c"};
513
- return nucleotides.replace(/[AUGC]/g, function (x: string) {
626
+ const objForSmallLinkages: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "as", "U": "us", "G": "gs", "C": "cs"};
627
+ const objForBigLinkages: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "Afs", "U": "Ufs", "G": "Gfs", "C": "Cfs"};
628
+ const objForSomeIndices: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "Af", "U": "Uf", "G": "Gf", "C": "Cf"};
629
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)", "A": "a", "U": "u", "G": "g", "C": "c"};
630
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C)/g, function (x: string) {
514
631
  count++;
515
632
  if (count > 19 && count < 22) return objForSmallLinkages[x];
516
633
  if (count == 0) return 'us';
@@ -523,48 +640,20 @@ export function siRnaNucleotideToAxolabsAntisenseStrand(nucleotides: string) {
523
640
  //input: string nucleotides {semType: GCRS}
524
641
  //output: string result {semType: RNA nucleotides}
525
642
  export function gcrsToNucleotides(nucleotides: string) {
526
- const obj: {[index: string]: string} = {
643
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
527
644
  "mAps": "A", "mUps": "U", "mGps": "G", "mCps": "C", "fAps": "A", "fUps": "U", "fGps": "G", "fCps": "C",
528
645
  "fU": "U", "fA": "A", "fC": "C", "fG": "G", "mU": "U", "mA": "A", "mC": "C", "mG": "G"
529
646
  };
530
- return nucleotides.replace(/(mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g, function (x: string) {return obj[x];});
531
- }
532
-
533
- //name: gcrsToOP100
534
- //input: string nucleotides {semType: GCRS}
535
- //output: string result {semType: OP100}
536
- export function gcrsToOP100(nucleotides: string) {
537
- let count: number = -1;
538
- const objForEvenIndicesAtLeftEdge: {[index: string]: string} = {
539
- "mAps": "a", "mUps": "u", "mGps": "g", "mCps": "c", "fAps": "a", "fUps": "u", "fGps": "g", "fCps": "c"
540
- };
541
- const objForOddIndicesAtLeftEdge: {[index: string]: string} = {
542
- "mAps": "a*", "mUps": "u*", "mGps": "g*", "mCps": "c*", "fAps": "a*", "fUps": "u*", "fGps": "g*", "fCps": "c*"
543
- };
544
- const objForOddIndicesAtRightEdge: {[index: string]: string} = {
545
- "mAps": "a", "mUps": "u", "mGps": "g", "mCps": "c", "fAps": "a", "fUps": "u", "fGps": "g", "fCps": "c"
546
- };
547
- const objForEvenIndicesAtCenter: {[index: string]: string} = {
548
- "fU": "u*", "fA": "a*", "fC": "c*", "fG": "g*", "mU": "u*", "mA": "a*", "mC": "c*", "mG": "g*"
549
- };
550
- const objForOddIndicesAtCenter: {[index: string]: string} = {
551
- "fU": "u", "fA": "a", "fC": "c", "fG": "g", "mU": "u", "mA": "a", "mC": "c", "mG": "g"
552
- };
553
- return nucleotides.replace(/(mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g, function (x: string) {
554
- count++;
555
- if (count < 3) return (count % 2 == 0) ? objForEvenIndicesAtLeftEdge[x] : objForOddIndicesAtLeftEdge[x];
556
- if (count == 19) return objForOddIndicesAtRightEdge[x];
557
- return (count % 2 == 1) ? objForEvenIndicesAtCenter[x] : objForOddIndicesAtCenter[x];
558
- });
647
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g, function (x: string) {return obj[x];});
559
648
  }
560
649
 
561
- //name: gcrsToMM12
650
+ //name: gcrsToMermade12
562
651
  //input: string nucleotides {semType: GCRS}
563
- //output: string result {semType: MM12}
564
- export function gcrsToMM12(nucleotides: string) {
565
- const obj: {[index: string]: string} = {
652
+ //output: string result {semType: Mermade 12 / siRNA}
653
+ export function gcrsToMermade12(nucleotides: string) {
654
+ const obj: {[index: string]: string} = {"(invabasic)": "(invabasic)", "(GalNAc-2-JNJ)": "(GalNAc-2-JNJ)",
566
655
  "mAps": "e", "mUps": "h", "mGps": "g", "mCps": "f", "fAps": "i", "fUps": "l", "fGps": "k", "fCps": "j", "fU": "L",
567
656
  "fA": "I", "fC": "J", "fG": "K", "mU": "H", "mA": "E", "mC": "F", "mG": "G"
568
657
  };
569
- return nucleotides.replace(/(mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g, function (x: string) {return obj[x]});
570
- }
658
+ return nucleotides.replace(/(\(invabasic\)|\(GalNAc-2-JNJ\)|mAps|mUps|mGps|mCps|fAps|fUps|fGps|fCps|fU|fA|fC|fG|mU|mA|mC|mG)/g, function (x: string) {return obj[x]});
659
+ }
@@ -0,0 +1,13 @@
1
+ import {category, expect, expectFloat, test, testExpectFinish} from "@datagrok-libraries/utils/src/test";
2
+ import * as DG from "datagrok-api/dg";
3
+ import * as grok from "datagrok-api/grok";
4
+ import * as ui from "datagrok-api/ui";
5
+ import {sequenceToSmiles} from '../package'
6
+
7
+ category('sequence-translator', () => {
8
+
9
+ testExpectFinish('ts', async () => {
10
+ let expected = 'OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=C(C)C(=O)NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=CN=C(N)C=3N=C2)C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C3N=C(N)NC(=O)C=3N=C2)[C@H](OC)[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1OP(=O)(O)OC[C@H]1O[C@@H](N2C=CC(N)=NC2(=O))C[C@@H]1O';
11
+ expect(sequenceToSmiles('AGGTCCTCTTGACTTAGGCC'), expected);
12
+ });
13
+ });
package/webpack.config.js CHANGED
@@ -1,9 +1,15 @@
1
1
  const path = require('path');
2
+ const packageName = path.parse(require('./package.json').name).name.toLowerCase().replace(/-/g, '');
2
3
 
3
4
  module.exports = {
4
5
  mode: 'development',
5
6
  entry: {
6
- package: './src/package.ts'
7
+ package: ['./src/package.ts'],
8
+ test: {
9
+ filename: 'package-test.js',
10
+ library: {type: 'var', name: `${packageName}_test`},
11
+ import: './src/package-test.ts',
12
+ }
7
13
  },
8
14
  resolve: {
9
15
  extensions: ['.wasm', '.mjs', '.js', '.json', '.ts', '.tsx'],
@@ -1,24 +0,0 @@
1
- #name: convertFastaToSmiles
2
- #description: Converts FASTA format to smiles
3
- #language: python
4
- #tags: demo, chem, rdkit, smiles
5
- #input: string sequence_in_fasta_format = AGGTCTTCATGACTTCGGCC {semType: nucleotides}
6
- #input: string flavor = DNA_no_cap {choices: ["Protein_L_amino_acids","Protein_D_amino_acids","RNA_no_cap","RNA_5_cap","RNA_3_cap","RNA_both_caps","DNA_no_cap","DNA_5_cap","DNA_3_cap","DNA_both_caps"]}
7
- #output: string smiles {semType: Molecule}
8
-
9
- from rdkit import Chem
10
-
11
- flavors = {
12
- "Protein_L_amino_acids": 0,
13
- "Protein_D_amino_acids": 1,
14
- "RNA_no_cap": 2,
15
- "RNA_5_cap": 3,
16
- "RNA_3_cap": 4,
17
- "RNA_both_caps": 5,
18
- "DNA_no_cap": 6,
19
- "DNA_5_cap": 7,
20
- "DNA_3_cap": 8,
21
- "DNA_both_caps": 9
22
- }
23
-
24
- smiles = Chem.MolToSmiles(Chem.MolFromFASTA(text=sequence_in_fasta_format, flavor=flavors[flavor]))
@@ -1,50 +0,0 @@
1
- #name: drawAxolabsPattern
2
- #language: python
3
- #input: dataframe ss_df
4
- #input: dataframe as_df
5
- #input: string patternName
6
- #output: graphics pattern
7
-
8
- import matplotlib.pyplot as plt
9
-
10
- basis_color_dict = {
11
- "RNA": 'k', # black
12
- "DNA": 'm', # magenta
13
- "2'-Fluoro": 'b', # blue
14
- "2'-O-Methyl": 'r', # red
15
- "2'-O-MOE": 'c', # cyan
16
- "Glycol nucleic acid": 'y', # yellow
17
- "LNA": 'aquamarine',
18
- "Unlocked (UNA)": 'g' # green
19
- }
20
-
21
- pattern = plt.figure()
22
-
23
- max_length = 2 * max(len(ss_df), len(as_df)) + 2.5
24
-
25
- for i in range(len(ss_df) - 1, -1, -1):
26
- plt.gca().add_patch(plt.Circle(xy=(-2 * i, 3), radius=1, fc=basis_color_dict[ss_df['basis'][i]]))
27
- if ss_df['pto'][i]:
28
- plt.gca()._add_text(plt.Text(x=-2 * i - 1.25, y=3.25, text="*", color='r', fontsize=20))
29
- plt.gca()._add_text(plt.Text(x=-2 * len(ss_df) - 0.25, y=2.5, text="5'", fontsize=30))
30
- plt.gca()._add_text(plt.Text(x=-max_length, y=2.5, text='SS:', fontsize=30))
31
- plt.gca()._add_text(plt.Text(x=1, y=2.5, text="3'", fontsize=30))
32
-
33
- for i in range(len(as_df) - 1, -1, -1):
34
- plt.gca().add_patch(plt.Circle(xy=(-2 * i, 0), radius=1, fc=basis_color_dict[as_df['basis'][i]]))
35
- if as_df['pto'][i]:
36
- plt.gca()._add_text(plt.Text(x=-2 * i - 1.25, y=-1.25, text="*", color='r', fontsize=20))
37
- plt.gca()._add_text(plt.Text(x=-2 * len(as_df) - 0.25, y=-0.5, text="3'", fontsize=30))
38
- plt.gca()._add_text(plt.Text(x=-max_length, y=-0.5, text='AS:', fontsize=30))
39
- plt.gca()._add_text(plt.Text(x=1, y=-0.5, text="5'", fontsize=30))
40
-
41
- scaler = -1
42
- for index, key in enumerate(basis_color_dict):
43
- if key in as_df['basis'].unique() or key in ss_df['basis'].unique():
44
- scaler += 1
45
- plt.gca().add_patch(plt.Circle(xy=(-3 * scaler, -2), radius=0.5, fc=basis_color_dict[key]))
46
- plt.gca()._add_text(plt.Text(x=-3 * scaler - 1, y=-4, text=key, fontsize=10))
47
-
48
- plt.axis('scaled')
49
- plt.axis('off')
50
- plt.title(patternName + ' for ' + str(len(ss_df)) + '/' + str(len(as_df)) + 'mer')