@datagrok/sequence-translator 1.1.0 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  {
2
2
  "Axolabs": {
3
+ "(GalNAc)": "(GalNAc)",
3
4
  "Af": "2'-fluoro-A",
4
5
  "Cf": "2'-fluoro-C",
5
6
  "Gf": "2'-fluoro-G",
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "Axolabs": {
3
3
  "nucleoside": {
4
+ "(GalNAc)": "[GalNAc]",
4
5
  "Uf": "[fR](U)p",
5
6
  "Af": "[fR](A)p",
6
7
  "Cf": "[fR](C)p",
@@ -16,6 +17,7 @@
16
17
  },
17
18
  "BioSpring": {
18
19
  "nucleoside": {
20
+ "(GalNAc)": "[GalNAc]",
19
21
  "A": "r(A)p",
20
22
  "T": "r(T)p",
21
23
  "G": "r(G)p",
@@ -36,6 +38,7 @@
36
38
  },
37
39
  "Mermade12": {
38
40
  "nucleoside": {
41
+ "(GalNAc)": "[GalNAc]",
39
42
  "h": "[25r](U)[sp]",
40
43
  "e": "[25r](A)[sp]",
41
44
  "f": "[25r](C)[sp]",
@@ -54,6 +57,7 @@
54
57
  "G": "[25r](G)p"
55
58
  },
56
59
  "phosphateBackbone": {
60
+ "<empty>": "[sp]"
57
61
  }
58
62
  }
59
63
  }
@@ -1,4 +1,32 @@
1
1
  [
2
+ {
3
+ "symbol": "(GalNAc)",
4
+ "name": "GalNAc",
5
+ "molfile": "\nDatagrok monomer library Nucleotides MODIFICATION|GalNAc2\n\n 0 0 0 0 0 0 0 0 0 0999 V3000\nM V30 BEGIN CTAB\nM V30 COUNTS 111 113 0 0 0\nM V30 BEGIN ATOM\nM V30 1 O -20.731300 -0.702700 0.000000 0\nM V30 2 C -19.397600 0.067300 0.000000 0\nM V30 3 C -18.063800 -0.702700 0.000000 0\nM V30 4 C -16.730300 0.067300 0.000000 0\nM V30 5 N -15.396500 -0.702700 0.000000 0\nM V30 6 C -14.062800 0.067300 0.000000 0\nM V30 7 C -12.729300 -0.702700 0.000000 0\nM V30 8 C -11.395500 0.067300 0.000000 0\nM V30 9 C -10.062000 -0.702700 0.000000 0\nM V30 10 C -8.728300 0.067300 0.000000 0\nM V30 11 N -7.394700 -0.702700 0.000000 0\nM V30 12 O -18.063800 -2.242700 0.000000 0\nM V30 13 O -14.062800 1.607300 0.000000 0\nM V30 14 O -8.728300 1.607300 0.000000 0\nM V30 15 C -5.854700 -0.702700 0.000000 0\nM V30 16 C -5.854700 0.837300 0.000000 0\nM V30 17 C -5.854700 -2.242700 0.000000 0\nM V30 18 C -3.484800 -3.012700 0.000000 0\nM V30 19 C -2.454400 -4.157000 0.000000 0\nM V30 20 C -0.948000 -3.836800 0.000000 0\nM V30 21 N 0.082400 -4.981300 0.000000 0\nM V30 22 C 1.588800 -4.661200 0.000000 0\nM V30 23 C 2.619200 -5.805600 0.000000 0\nM V30 24 C 4.125600 -5.485500 0.000000 0\nM V30 25 N 5.156000 -6.629700 0.000000 0\nM V30 26 C 6.662400 -6.309600 0.000000 0\nM V30 27 C 7.692800 -7.454100 0.000000 0\nM V30 28 C 9.199200 -7.133900 0.000000 0\nM V30 29 C 10.229600 -8.278400 0.000000 0\nM V30 30 C 11.736000 -7.958300 0.000000 0\nM V30 31 O 12.766400 -9.102700 0.000000 0\nM V30 32 O -0.472200 -2.372300 0.000000 0\nM V30 33 O 7.138200 -4.845000 0.000000 0\nM V30 34 C 14.272800 -8.782400 0.000000 0\nM V30 35 C 15.303200 -9.926700 0.000000 0\nM V30 36 C 16.809800 -9.606500 0.000000 0\nM V30 37 C 17.285600 -8.142100 0.000000 0\nM V30 38 C 16.255200 -6.997500 0.000000 0\nM V30 39 O 14.748600 -7.317800 0.000000 0\nM V30 40 C 16.731200 -5.532900 0.000000 0\nM V30 41 O 18.791800 -7.821800 0.000000 0\nM V30 42 O 17.840400 -10.751000 0.000000 0\nM V30 43 N 14.827400 -11.391400 0.000000 0\nM V30 44 C 15.732500 -12.637200 0.000000 0\nM V30 45 C 15.256700 -14.101800 0.000000 0\nM V30 46 O 17.253700 -12.396300 0.000000 0\nM V30 47 O 18.262800 -5.372000 0.000000 0\nM V30 48 O -4.949400 -3.488500 0.000000 0\nM V30 49 C -4.521000 0.067300 0.000000 0\nM V30 50 C -1.941400 0.202600 0.000000 0\nM V30 51 C -0.607700 -0.567400 0.000000 0\nM V30 52 C 0.726000 0.202600 0.000000 0\nM V30 53 N 2.059600 -0.567400 0.000000 0\nM V30 54 C 3.393300 0.202600 0.000000 0\nM V30 55 C 4.727100 -0.567400 0.000000 0\nM V30 56 C 6.060600 0.202600 0.000000 0\nM V30 57 N 7.394300 -0.567400 0.000000 0\nM V30 58 C 8.728100 0.202600 0.000000 0\nM V30 59 C 10.061800 -0.567400 0.000000 0\nM V30 60 C 11.395300 0.202600 0.000000 0\nM V30 61 C 14.062800 0.202600 0.000000 0\nM V30 62 O 15.396400 -0.567400 0.000000 0\nM V30 63 O 0.726000 1.742600 0.000000 0\nM V30 64 O 8.728100 1.742600 0.000000 0\nM V30 65 C 16.730100 0.202600 0.000000 0\nM V30 66 C 18.063800 -0.567600 0.000000 0\nM V30 67 C 19.397600 0.202600 0.000000 0\nM V30 68 C 19.397400 1.742600 0.000000 0\nM V30 69 C 18.063800 2.512600 0.000000 0\nM V30 70 O 16.730100 1.742600 0.000000 0\nM V30 71 C 18.064000 4.052600 0.000000 0\nM V30 72 O 20.731100 2.512600 0.000000 0\nM V30 73 O 20.731300 -0.567400 0.000000 0\nM V30 74 N 18.063800 -2.107600 0.000000 0\nM V30 75 C 19.309600 -3.012700 0.000000 0\nM V30 76 C 19.309600 -4.552700 0.000000 0\nM V30 77 O 20.681800 -2.313500 0.000000 0\nM V30 78 O 19.470900 4.679100 0.000000 0\nM V30 79 O -3.187200 -0.702700 0.000000 0\nM V30 80 C 12.729100 -0.567400 0.000000 0\nM V30 81 C -3.919000 3.227700 0.000000 0\nM V30 82 C -2.412600 2.907600 0.000000 0\nM V30 83 C -1.382200 4.051900 0.000000 0\nM V30 84 N 0.124200 3.731700 0.000000 0\nM V30 85 C 1.154600 4.876200 0.000000 0\nM V30 86 C 2.661000 4.556100 0.000000 0\nM V30 87 C 3.691400 5.700500 0.000000 0\nM V30 88 N 5.197800 5.380400 0.000000 0\nM V30 89 C 6.228200 6.524800 0.000000 0\nM V30 90 C 7.734600 6.204500 0.000000 0\nM V30 91 C 8.765000 7.349000 0.000000 0\nM V30 92 C 10.271400 7.028800 0.000000 0\nM V30 93 C 11.301800 8.173300 0.000000 0\nM V30 94 O 12.808200 7.853200 0.000000 0\nM V30 95 O -1.858000 5.516700 0.000000 0\nM V30 96 O 5.752400 7.989400 0.000000 0\nM V30 97 C 13.838600 8.997600 0.000000 0\nM V30 98 C 15.345000 8.677300 0.000000 0\nM V30 99 C 16.375600 9.821900 0.000000 0\nM V30 100 C 15.899600 11.286300 0.000000 0\nM V30 101 C 14.393400 11.606800 0.000000 0\nM V30 102 O 13.362800 10.462200 0.000000 0\nM V30 103 C 13.917600 13.071400 0.000000 0\nM V30 104 O 16.930000 12.430800 0.000000 0\nM V30 105 O 17.882000 9.501800 0.000000 0\nM V30 106 N 15.820800 7.212700 0.000000 0\nM V30 107 C 17.285600 6.736700 0.000000 0\nM V30 108 C 17.761400 5.272100 0.000000 0\nM V30 109 O 18.374400 7.825700 0.000000 0\nM V30 110 O 15.062000 14.101800 0.000000 0\nM V30 111 O -4.824100 1.981700 0.000000 0\nM V30 END ATOM\nM V30 BEGIN BOND\nM V30 1 1 2 3\nM V30 2 1 3 4\nM V30 3 1 6 7\nM V30 4 1 7 8\nM V30 5 1 8 9\nM V30 6 1 9 10\nM V30 7 1 1 2\nM V30 8 1 3 12\nM V30 9 1 4 5\nM V30 10 1 5 6\nM V30 11 2 6 13\nM V30 12 1 10 11\nM V30 13 1 11 15\nM V30 14 1 15 16\nM V30 15 1 15 17\nM V30 16 2 10 14\nM V30 17 1 18 19\nM V30 18 1 19 20\nM V30 19 1 22 23\nM V30 20 1 23 24\nM V30 21 1 26 27\nM V30 22 1 27 28\nM V30 23 1 28 29\nM V30 24 1 29 30\nM V30 25 2 26 33\nM V30 26 2 20 32\nM V30 27 1 20 21\nM V30 28 1 21 22\nM V30 29 1 24 25\nM V30 30 1 25 26\nM V30 31 1 30 31\nM V30 32 1 31 34\nM V30 33 1 35 36\nM V30 34 1 36 37\nM V30 35 1 37 38\nM V30 36 1 34 35\nM V30 37 1 38 39\nM V30 38 1 34 39\nM V30 39 1 38 40\nM V30 40 1 35 43\nM V30 41 1 43 44\nM V30 42 1 44 45\nM V30 43 2 44 46\nM V30 44 1 36 42\nM V30 45 1 37 41\nM V30 46 1 40 47\nM V30 47 1 18 48\nM V30 48 1 15 49\nM V30 49 1 50 51\nM V30 50 1 51 52\nM V30 51 1 54 55\nM V30 52 1 55 56\nM V30 53 1 58 59\nM V30 54 1 59 60\nM V30 55 2 58 64\nM V30 56 2 52 63\nM V30 57 1 52 53\nM V30 58 1 53 54\nM V30 59 1 56 57\nM V30 60 1 57 58\nM V30 61 1 61 62\nM V30 62 1 62 65\nM V30 63 1 66 67\nM V30 64 1 67 68\nM V30 65 1 68 69\nM V30 66 1 65 66\nM V30 67 1 69 70\nM V30 68 1 65 70\nM V30 69 1 69 71\nM V30 70 1 66 74\nM V30 71 1 74 75\nM V30 72 1 75 76\nM V30 73 2 75 77\nM V30 74 1 67 73\nM V30 75 1 68 72\nM V30 76 1 71 78\nM V30 77 1 50 79\nM V30 78 1 49 79\nM V30 79 1 60 80\nM V30 80 1 80 61\nM V30 81 1 81 82\nM V30 82 1 82 83\nM V30 83 1 85 86\nM V30 84 1 86 87\nM V30 85 1 89 90\nM V30 86 1 90 91\nM V30 87 1 91 92\nM V30 88 1 92 93\nM V30 89 2 89 96\nM V30 90 2 83 95\nM V30 91 1 83 84\nM V30 92 1 84 85\nM V30 93 1 87 88\nM V30 94 1 88 89\nM V30 95 1 93 94\nM V30 96 1 94 97\nM V30 97 1 98 99\nM V30 98 1 99 100\nM V30 99 1 100 101\nM V30 100 1 97 98\nM V30 101 1 101 102\nM V30 102 1 97 102\nM V30 103 1 101 103\nM V30 104 1 98 106\nM V30 105 1 106 107\nM V30 106 1 107 108\nM V30 107 2 107 109\nM V30 108 1 99 105\nM V30 109 1 100 104\nM V30 110 1 103 110\nM V30 111 1 81 111\nM V30 112 1 16 111\nM V30 113 1 17 48\nM V30 END BOND\nM V30 END CTAB\nM END\n",
6
+ "author": "SequenceTranslator",
7
+ "id": 0,
8
+ "rgroups": [
9
+ {
10
+ "capGroupSmiles": "O[*:1]",
11
+ "alternateId": "R1-OH",
12
+ "capGroupName": "OH",
13
+ "label": "R1"
14
+ },
15
+ {
16
+ "capGroupSmiles": "O[*:2]",
17
+ "alternateId": "R2-OH",
18
+ "capGroupName": "OH",
19
+ "label": "R2"
20
+ }
21
+ ],
22
+ "smiles": "C(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)(COCCC(=O)NCCCNC(=O)CCCCOC2OC(CO)C(O)C(O)C2NC(=O)C)NC(=O)CCCC(=O)NCC(O)CO",
23
+ "polymerType": "RNA",
24
+ "monomerType": "Backbone",
25
+ "createDate": null,
26
+ "meta": {
27
+ "molecularWeight": 1273.3
28
+ }
29
+ },
2
30
  {
3
31
  "symbol": "2'-fluoro-A",
4
32
  "name": "2'-fluoro-A",
@@ -1090,5 +1118,25 @@
1090
1118
  "meta": {
1091
1119
  "molecularWeight": 306.17
1092
1120
  }
1121
+ },
1122
+ {
1123
+ "monomerType": "Branch",
1124
+ "smiles": "[H:1]n1cc(C)c(N)nc1=O",
1125
+ "name": "5-Methylcytosine",
1126
+ "author": "Pistoia Alliance HELM project",
1127
+ "molfile": "HELM Core Monomer library\nHELMMonomers071816.sdf \n\n 10 10 0 0 0 0 0 0 0 0999 V2000\n 23.0624 -3.8327 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 23.0624 -5.3327 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 21.7674 -6.0827 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 20.4624 -5.3327 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 20.4624 -3.8327 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 21.7674 -3.0827 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 21.7674 -1.5827 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 21.7674 -7.5827 0.0000 R1 0 0 0 0 0 0 0 0 0 0 0 0\n 24.3614 -6.0827 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 19.1634 -3.0827 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0 0\n 2 3 1 0 0\n 3 4 1 0 0\n 4 5 2 0 0\n 5 6 1 0 0\n 1 6 2 0 0\n 6 7 1 0 0\n 3 8 1 0 0\n 2 9 2 0 0\n 5 10 1 0 0\nA 8\nR1\nM END\n",
1128
+ "naturalAnalog": "C",
1129
+ "rgroups": [
1130
+ {
1131
+ "capGroupSMILES": "[*:1][H]",
1132
+ "alternateId": "R1-H",
1133
+ "capGroupName": "H",
1134
+ "label": "R1"
1135
+ }
1136
+ ],
1137
+ "symbol": "m5C",
1138
+ "createDate": null,
1139
+ "polymerType": "RNA",
1140
+ "id": 0
1093
1141
  }
1094
1142
  ]
package/package.json CHANGED
@@ -1,33 +1,33 @@
1
1
  {
2
2
  "name": "@datagrok/sequence-translator",
3
3
  "friendlyName": "Sequence Translator",
4
- "version": "1.1.0",
4
+ "version": "1.1.5",
5
5
  "author": {
6
6
  "name": "Alexey Choposky",
7
7
  "email": "achopovsky@datagrok.ai"
8
8
  },
9
- "description": "SequenceTranslator is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform, used to translate [oligonucleotide](https://en.wikipedia.org/wiki/Oligonucleotide) sequences between [different representations](https://github.com/datagrok-ai/public/tree/master/packages/SequenceTranslator#sequence-representations).",
9
+ "description": "SequenceTranslator translates [oligonucleotide](https://en.wikipedia.org/wiki/Oligonucleotide) sequences between [different representations](https://github.com/datagrok-ai/public/tree/master/packages/SequenceTranslator#sequence-representations).",
10
10
  "repository": {
11
11
  "type": "git",
12
12
  "url": "https://github.com/datagrok-ai/public.git",
13
13
  "directory": "packages/SequenceTranslator"
14
14
  },
15
15
  "dependencies": {
16
+ "@datagrok-libraries/bio": "^5.32.1",
16
17
  "@datagrok-libraries/chem-meta": "^1.0.9",
17
- "@datagrok-libraries/utils": "^1.17.2",
18
18
  "@datagrok-libraries/tutorials": "^1.3.2",
19
- "cash-dom": "^8.1.0",
20
- "datagrok-api": "^1.10.2",
19
+ "@datagrok-libraries/utils": "^1.17.2",
21
20
  "@types/react": "^18.0.15",
22
- "@datagrok-libraries/bio": "^5.32.1",
23
- "datagrok-tools": "^4.1.2",
24
- "npm": "^8.11.0",
21
+ "cash-dom": "^8.1.0",
22
+ "datagrok-api": "^1.15.2",
25
23
  "openchemlib": "6.0.1",
26
24
  "save-svg-as-png": "^1.4.17",
27
25
  "ts-loader": "^9.3.1",
28
26
  "typescript": "^4.7.4"
29
27
  },
30
28
  "devDependencies": {
29
+ "@datagrok/bio": "^2.10.0",
30
+ "@datagrok/chem": "1.7.2",
31
31
  "@types/jquery": "^3.5.14",
32
32
  "@types/js-yaml": "^4.0.5",
33
33
  "@types/node-fetch": "^2.6.2",
@@ -35,16 +35,13 @@
35
35
  "@typescript-eslint/eslint-plugin": "latest",
36
36
  "@typescript-eslint/parser": "parser",
37
37
  "css-loader": "^6.7.3",
38
- "datagrok-tools": "^4.7.10",
39
38
  "eslint": "^7.32.0",
40
39
  "eslint-config-google": "latest",
41
40
  "style-loader": "^3.3.1",
42
41
  "ts-loader": "^9.3.1",
43
42
  "typescript": "^4.7.4",
44
43
  "webpack": "^5.75.0",
45
- "webpack-cli": "latest",
46
- "@datagrok/chem": "1.5.7",
47
- "@datagrok/bio": "^2.1.12"
44
+ "webpack-cli": "latest"
48
45
  },
49
46
  "scripts": {
50
47
  "link-api": "npm link datagrok-api",
@@ -20,8 +20,8 @@ export const SS = 'SS' as const;
20
20
  export const AS = 'AS' as const;
21
21
  export const STRANDS = [SS, AS];
22
22
  export const STRAND_NAME = {
23
- [SS]: 'Sense Strand',
24
- [AS]: 'Antisense Strand',
23
+ [SS]: 'Sense strand',
24
+ [AS]: 'Anti sense',
25
25
  }
26
26
 
27
27
  export const THREE_PRIME = 'THREE_PRIME' as const;
@@ -3,7 +3,6 @@ import * as grok from 'datagrok-api/grok';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
  import * as DG from 'datagrok-api/dg';
5
5
 
6
- export const DELIMITER = ';'; // what is the need for this?
7
6
  export const NUCLEOTIDES = ['A', 'G', 'C', 'U', 'T'];
8
7
 
9
8
  export const TECHNOLOGIES = {
@@ -27,13 +27,14 @@ export function getTranslatedSequences(sequence: string, indexOfFirstInvalidChar
27
27
  return [format, translation];
28
28
  }).filter(([format, translation]) => translation)
29
29
  )
30
- const nucleotides = getNucleotidesSequence(result[DEFAULT_FORMATS.HELM], MonomerLibWrapper.getInstance());
30
+ const helm = (sourceFormat === DEFAULT_FORMATS.HELM) ? sequence : result[DEFAULT_FORMATS.HELM];
31
+ const nucleotides = getNucleotidesSequence(helm, MonomerLibWrapper.getInstance());
31
32
  if (nucleotides)
32
33
  result['Nucleotides'] = nucleotides;
33
34
  return result;
34
35
  }
35
36
 
36
- function getNucleotidesSequence(helmString: string, monomerLib: MonomerLibWrapper): string | null {
37
+ export function getNucleotidesSequence(helmString: string, monomerLib: MonomerLibWrapper): string | null {
37
38
  const re = new RegExp('\\([^()]*\\)', 'g');
38
39
  const branches = helmString.match(re);
39
40
  if (!branches)
@@ -48,7 +48,7 @@ function getHelmToCodeDict(infoObj: CodesInfo) {
48
48
  const result: {[key: string]: string | string[]} = {};
49
49
  Object.values(infoObj).forEach((obj: {[code: string]: string}) => {
50
50
  Object.entries(obj).forEach(([code, helm]) => {
51
- const key = helm.replace(/\)p/g, ')');
51
+ const key = helm.replace(/\)p/g, ')').replace(/\]p/g, ']');
52
52
  if (result[key] === undefined) {
53
53
  result[key] = [code];
54
54
  } else {
@@ -76,6 +76,7 @@ function helmToFormat(helmSequence: string, targetFormat: string): string {
76
76
  return helmCodes.includes(match) ? dict[match] :
77
77
  (match === 'p' || match === '.') ? match : '?';
78
78
  }).replace(/\?+/g, UNKNOWN_SYMBOL).replace(/p\.|\./g, '');
79
+ result = result.replace(/<empty>/g, '');
79
80
  // remove double slash in LCMS codes
80
81
  result = result.replace(/\/\//g, '/');
81
82
  return result;
@@ -103,5 +104,6 @@ function formatToHelm(sequence: string, sourceFormat: string): string {
103
104
  if (helm[helm.length - 1] === PHOSPHATE_SYMBOL)
104
105
  helm = helm.slice(0, -1);
105
106
  helm = helm.replace(phosphateRegExp, (match, group) => group);
107
+ helm = helm.replace(/<empty>/g, '');
106
108
  return `${HELM_WRAPPER.LEFT + helm + HELM_WRAPPER.RIGHT}`;
107
109
  }
@@ -4,6 +4,7 @@ import * as ui from 'datagrok-api/ui';
4
4
  import * as DG from 'datagrok-api/dg';
5
5
 
6
6
  import {_package} from '../../package';
7
+ import {DEFAULT_FORMATS} from '../const';
7
8
 
8
9
  import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types';
9
10
 
@@ -29,6 +30,14 @@ export class MonomerLibWrapper {
29
30
  formattedObject[REQ.NAME] = sourceObj[REQ.SYMBOL];
30
31
  formattedObject[REQ.SYMBOL] = sourceObj[REQ.SYMBOL];
31
32
  formattedObject[REQ.MOLFILE] = sourceObj[REQ.MOLFILE];
33
+ const formats = this.getAllFormats();
34
+ formats.forEach((format) => {
35
+ if (format === DEFAULT_FORMATS.HELM)
36
+ return;
37
+ const map = codesToSymbolsDictionary[format];
38
+ const codes = Object.keys(map).filter((code) => map[code] === sourceObj.symbol);
39
+ formattedObject[format] = codes.join(', ');
40
+ })
32
41
 
33
42
  return formattedObject;
34
43
  }
@@ -86,6 +95,10 @@ export class MonomerLibWrapper {
86
95
  return Object.keys(codesToSymbolsDictionary[format]);
87
96
  }
88
97
 
98
+ getAllFormats(): string[] {
99
+ return Object.keys(codesToSymbolsDictionary);
100
+ }
101
+
89
102
  getTableForViewer(): DG.DataFrame {
90
103
  const formattedObjects = this.allMonomers.map((monomer) => this.formatMonomerForViewer(monomer));
91
104
  const df = DG.DataFrame.fromObjects(formattedObjects)!;
@@ -8,6 +8,7 @@ import {download} from '../helpers';
8
8
  import {SequenceToMolfileConverter} from './sequence-to-molfile';
9
9
  import {linkStrandsV3000} from './mol-transformations';
10
10
  import {DEFAULT_FORMATS} from '../const';
11
+ import {FormatDetector} from '../parsing-validation/format-detector';
11
12
 
12
13
  export type StrandData = {
13
14
  strand: string,
@@ -18,7 +19,9 @@ export type StrandData = {
18
19
  export function getMolfileForStrand(strand: string, invert: boolean): string {
19
20
  if (strand === '')
20
21
  return '';
21
- const format = DEFAULT_FORMATS.AXOLABS;
22
+ const format = (new FormatDetector(strand)).getFormat();
23
+ if (!format)
24
+ return '';
22
25
  let molfile = '';
23
26
  try {
24
27
  molfile = (new SequenceToMolfileConverter(strand, invert, format)).convert();
@@ -1,6 +1,8 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import {runTests, tests, TestContext} from '@datagrok-libraries/utils/src/test';
3
- import './tests/smiles-tests';
3
+ import './tests/formats-to-helm';
4
+ import './tests/helm-to-nucleotides';
5
+ import './tests/formats-support';
4
6
 
5
7
  export const _package = new DG.Package();
6
8
  export {tests};
package/src/package.ts CHANGED
@@ -13,6 +13,7 @@ import {MonomerLibWrapper} from './model/monomer-lib/lib-wrapper';
13
13
  import {FormatDetector} from './model/parsing-validation/format-detector';
14
14
  import {SequenceValidator} from './model/parsing-validation/sequence-validator';
15
15
  import {demoDesignPatternUI, demoVisualizeDuplexUI, demoTranslateSequenceUI} from './demo/demo-st-ui';
16
+ import {FormatConverter} from './model/format-translation/format-converter';
16
17
 
17
18
  class StPackage extends DG.Package {
18
19
  private _monomerLib?: IMonomerLib;
@@ -34,7 +35,7 @@ class StPackage extends DG.Package {
34
35
  this._monomerLib = await libHelper.readLibrary(LIB_PATH, DEFAULT_LIB_FILENAME);
35
36
  } catch (err: any) {
36
37
  const errMsg: string = err.hasOwnProperty('message') ? err.message : err.toString();
37
- throw new Error('Sequence Translator: Loading monomer library error: ' + errMsg);
38
+ throw new Error('Loading monomer library: ' + errMsg);
38
39
  } finally {
39
40
  pi.close();
40
41
  }
@@ -93,7 +94,6 @@ export function getMolfileFromGcrsSequence(sequence: string, invert: boolean): s
93
94
 
94
95
  //name: linkStrands
95
96
  //input: object strands
96
- //input: bool invert
97
97
  //output: string result
98
98
  export function linkStrands(strands: { senseStrands: string[], antiStrands: string[] }): string {
99
99
  return linkStrandsV3000(strands, true);
@@ -122,3 +122,13 @@ export async function demoDesignPattern(): Promise<void> {
122
122
  export async function demoVisualizeDuplex(): Promise<void> {
123
123
  await demoVisualizeDuplexUI();
124
124
  }
125
+
126
+ //name: translateOligonucleotideSequence
127
+ //input: string sequence
128
+ //input: string sourceFormat
129
+ //input: string targetFormat
130
+ //output: string result
131
+ export async function translateOligonucleotideSequence(sequence: string, sourceFormat: string, targetFormat: string): Promise<string> {
132
+ await initSequenceTranslatorLibData();
133
+ return (new FormatConverter(sequence, sourceFormat)).convertTo(targetFormat);
134
+ }
@@ -1,17 +1,24 @@
1
- export const axolabsToSmiles: {[key: string]: string} = {
2
- 'usCfCfUfGfAf': 'CO[C@@H]1[C@H](OP(=O)(S)OC[C@H]2O[C@@H](n3ccc(N)nc3=O)[C@H](F)[C@@H]2OP(=O)(O)OC[C@H]2O[C@@H](n3ccc(N)nc3=O)[C@H](F)[C@@H]2OP(=O)(O)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](F)[C@@H]2OP(=O)(O)OC[C@H]2O[C@@H](n3cnc4c(=O)[nH]c(N)nc43)[C@H](F)[C@@H]2OP(=O)(O)OC[C@H]2O[C@@H](n3cnc4c(N)ncnc43)[C@H](F)[C@@H]2O)[C@@H](CO)O[C@H]1n1ccc(=O)[nH]c1=O',
1
+ type Dict = {[key: string]: string};
3
2
 
4
- 'usAfsusgsgsg': 'CO[C@@H]1[C@H](OP(=O)(S)OC[C@H]2O[C@@H](n3cnc4c(N)ncnc43)[C@H](F)[C@@H]2OP(=O)(S)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](OC)[C@@H]2OP(=O)(S)OC[C@H]2O[C@@H](n3cnc4c(=O)[nH]c(N)nc43)[C@H](OC)[C@@H]2OP(=O)(S)OC[C@H]2O[C@@H](n3cnc4c(=O)[nH]c(N)nc43)[C@H](OC)[C@@H]2OP(=O)(S)OC[C@H]2O[C@@H](n3cnc4c(=O)[nH]c(N)nc43)[C@H](OC)[C@@H]2O)[C@@H](CO)O[C@H]1n1ccc(=O)[nH]c1=O',
5
-
6
- 'UfUfUfsCfsuacg': 'CO[C@@H]1[C@H](OP(=O)(O)OC[C@H]2O[C@@H](n3cnc4c(=O)[nH]c(N)nc43)[C@H](OC)[C@@H]2O)[C@@H](COP(=O)(O)O[C@@H]2[C@@H](COP(=O)(O)O[C@@H]3[C@@H](COP(=O)(S)O[C@@H]4[C@@H](COP(=O)(S)O[C@@H]5[C@@H](COP(=O)(O)O[C@@H]6[C@@H](COP(=O)(O)O[C@@H]7[C@@H](CO)O[C@@H](n8ccc(=O)[nH]c8=O)[C@@H]7F)O[C@@H](n7ccc(=O)[nH]c7=O)[C@@H]6F)O[C@@H](n6ccc(=O)[nH]c6=O)[C@@H]5F)O[C@@H](n5ccc(N)nc5=O)[C@@H]4F)O[C@@H](n4ccc(=O)[nH]c4=O)[C@@H]3OC)O[C@@H](n3cnc4c(N)ncnc43)[C@@H]2OC)O[C@H]1n1ccc(N)nc1=O',
7
-
8
- 'susususauasu': 'CO[C@@H]1[C@H](O)[C@@H](COP(=O)(S)O[C@@H]2[C@@H](COP(=O)(O)O[C@@H]3[C@@H](COP(=O)(O)O[C@@H]4[C@@H](COP(=O)(S)O[C@@H]5[C@@H](COP(=O)(S)O[C@@H]6[C@@H](COP(=O)(S)O[C@@H]7[C@@H](COP(=O)(O)S)O[C@@H](n8ccc(=O)[nH]c8=O)[C@@H]7OC)O[C@@H](n7ccc(=O)[nH]c7=O)[C@@H]6OC)O[C@@H](n6ccc(=O)[nH]c6=O)[C@@H]5OC)O[C@@H](n5cnc6c(N)ncnc65)[C@@H]4OC)O[C@@H](n4ccc(=O)[nH]c4=O)[C@@H]3OC)O[C@@H](n3cnc4c(N)ncnc43)[C@@H]2OC)O[C@H]1n1ccc(=O)[nH]c1=O',
3
+ export const formatsToHelm: {[key: string]: Dict} = {
4
+ 'Axolabs': {
5
+ 'UfAfsCfsGfuacg': 'RNA1{[fR](U)p.[fR](A)[sp].[fR](C)[sp].[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)}$$$$'
6
+ },
7
+ 'BioSpring': {
8
+ 'AT*GC*123456789': 'RNA1{r(A)p.r(T)[sp].r(G)p.r(C)[sp].[fR](U)p.[fR](A)p.[fR](C)p.[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)p.d([m5C])}$$$$'
9
+ },
10
+ 'Mermade12': {
11
+ 'hefglijkLIJKHEFG': 'RNA1{[25r](U)[sp].[25r](A)[sp].[25r](C)[sp].[25r](G)[sp].[fR](U)[sp].[fR](A)[sp].[fR](C)[sp].[fR](G)[sp].[fR](U)p.[fR](A)p.[fR](C)p.[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)}$$$$'
12
+ }
13
+ }
9
14
 
10
- 'CfGfCfsGfsCf': 'Nc1ccn([C@@H]2O[C@H](COP(=O)(S)O[C@@H]3[C@@H](COP(=O)(S)O[C@@H]4[C@@H](COP(=O)(O)O[C@@H]5[C@@H](COP(=O)(O)O[C@@H]6[C@@H](CO)O[C@@H](n7ccc(N)nc7=O)[C@@H]6F)O[C@@H](n6cnc7c(=O)[nH]c(N)nc76)[C@@H]5F)O[C@@H](n5ccc(N)nc5=O)[C@@H]4F)O[C@@H](n4cnc5c(=O)[nH]c(N)nc54)[C@@H]3F)[C@@H](O)[C@H]2F)c(=O)n1',
15
+ export const helmToNucleotides: Dict = {
16
+ 'RNA1{[fR](U)p.[fR](A)[sp].[fR](C)[sp].[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)}$$$$': 'UACGUACG',
11
17
 
12
- 'acacacsacsac': 'CO[C@@H]1[C@H](O)[C@@H](COP(=O)(O)O[C@@H]2[C@@H](COP(=O)(S)O[C@@H]3[C@@H](COP(=O)(O)O[C@@H]4[C@@H](COP(=O)(S)O[C@@H]5[C@@H](COP(=O)(O)O[C@@H]6[C@@H](COP(=O)(O)O[C@@H]7[C@@H](COP(=O)(O)O[C@@H]8[C@@H](COP(=O)(O)O[C@@H]9[C@@H](COP(=O)(O)O[C@@H]%10[C@@H](CO)O[C@@H](n%11cnc%12c(N)ncnc%12%11)[C@@H]%10OC)O[C@@H](n%10ccc(N)nc%10=O)[C@@H]9OC)O[C@@H](n9cnc%10c(N)ncnc%109)[C@@H]8OC)O[C@@H](n8ccc(N)nc8=O)[C@@H]7OC)O[C@@H](n7cnc8c(N)ncnc87)[C@@H]6OC)O[C@@H](n6ccc(N)nc6=O)[C@@H]5OC)O[C@@H](n5cnc6c(N)ncnc65)[C@@H]4OC)O[C@@H](n4ccc(N)nc4=O)[C@@H]3OC)O[C@@H](n3cnc4c(N)ncnc43)[C@@H]2OC)O[C@H]1n1ccc(N)nc1=O',
18
+ 'RNA1{r(A)p.r(T)[sp].r(G)p.r(C)[sp].[fR](U)p.[fR](A)p.[fR](C)p.[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)p.d([m5C])}$$$$': 'ATGCUACGUACGC',
13
19
 
14
- 'cccgggusug': 'CO[C@@H]1[C@H](OP(=O)(O)OC[C@H]2O[C@@H](n3ccc(N)nc3=O)[C@H](OC)[C@@H]2OP(=O)(O)OC[C@H]2O[C@@H](n3ccc(N)nc3=O)[C@H](OC)[C@@H]2OP(=O)(O)OC[C@H]2O[C@@H](n3cnc4c(=O)[nH]c(N)nc43)[C@H](OC)[C@@H]2OP(=O)(O)OC[C@H]2O[C@@H](n3cnc4c(=O)[nH]c(N)nc43)[C@H](OC)[C@@H]2OP(=O)(O)OC[C@H]2O[C@@H](n3cnc4c(=O)[nH]c(N)nc43)[C@H](OC)[C@@H]2OP(=O)(O)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](OC)[C@@H]2OP(=O)(S)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](OC)[C@@H]2OP(=O)(O)OC[C@H]2O[C@@H](n3cnc4c(=O)[nH]c(N)nc43)[C@H](OC)[C@@H]2O)[C@@H](CO)O[C@H]1n1ccc(N)nc1=O',
20
+ 'RNA1{[25r](U)[sp].[25r](A)[sp].[25r](C)[sp].[25r](G)[sp].[fR](U)[sp].[fR](A)[sp].[fR](C)[sp].[fR](G)[sp].[fR](U)p.[fR](A)p.[fR](C)p.[fR](G)p.[25r](U)p.[25r](A)p.[25r](C)p.[25r](G)}$$$$': 'UACGUACGUACGUACG'
21
+ }
15
22
 
16
- 'UfAfCfGfGfCfAfUf': 'Nc1ccn([C@@H]2O[C@H](COP(=O)(O)O[C@@H]3[C@@H](COP(=O)(O)O[C@@H]4[C@@H](CO)O[C@@H](n5ccc(=O)[nH]c5=O)[C@@H]4F)O[C@@H](n4cnc5c(N)ncnc54)[C@@H]3F)[C@@H](OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(=O)[nH]c(N)nc54)[C@H](F)[C@@H]3OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(=O)[nH]c(N)nc54)[C@H](F)[C@@H]3OP(=O)(O)OC[C@H]3O[C@@H](n4ccc(N)nc4=O)[C@H](F)[C@@H]3OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](F)[C@@H]3OP(=O)(O)OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](F)[C@@H]3O)[C@H]2F)c(=O)n1'
23
+ export const helmToMolfile: Dict = {
17
24
  }
@@ -0,0 +1,40 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {before, category, expect, test} from '@datagrok-libraries/utils/src/test';
7
+ import {DEFAULT_FORMATS} from '../model/const';
8
+ import {FormatConverter} from '../model/format-translation/format-converter';
9
+ import {getJsonData} from '../model/data-loading-utils/json-loader';
10
+ import {formatsToHelm} from './const';
11
+ import {SequenceValidator} from '../model/parsing-validation/sequence-validator';
12
+ import {getTranslatedSequences} from '../model/format-translation/conversion-utils';
13
+ import {_package} from '../package';
14
+
15
+ function getTranslationObject(sequence: string, format: string): {[format: string]: string} {
16
+ const indexOfInvalidChar = (new SequenceValidator(sequence)).getInvalidCodeIndex(format);
17
+ return getTranslatedSequences(sequence, indexOfInvalidChar, format);
18
+ }
19
+
20
+ const inputs = {
21
+ [DEFAULT_FORMATS.AXOLABS]: 'Afcgacsu',
22
+ [DEFAULT_FORMATS.HELM]: 'RNA1{[fR](A)p.[25r](C)p.[25r](G)p.[25r](A)p.[25r](C)[sp].[25r](U)}$$$$'
23
+ }
24
+
25
+ category('Formats support', () => {
26
+ before(async () => {
27
+ await getJsonData();
28
+ await _package.initMonomerLib();
29
+ });
30
+
31
+ Object.entries(inputs).forEach(([format, sequence]) => {
32
+ test(`All formats for ${format}`, async () => {
33
+ const output = getTranslationObject(sequence, format);
34
+ const result = Object.keys(output).length;
35
+ // +1 due to nucleotides
36
+ const expected = Object.keys(formatsToHelm).length + 1;
37
+ expect(result, expected);
38
+ });
39
+ });
40
+ });
@@ -0,0 +1,53 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {before, category, expect, test} from '@datagrok-libraries/utils/src/test';
7
+ import {DEFAULT_FORMATS} from '../model/const';
8
+ import {FormatConverter} from '../model/format-translation/format-converter';
9
+ import {getJsonData} from '../model/data-loading-utils/json-loader';
10
+ import {formatsToHelm} from './const';
11
+ import {_package} from '../package';
12
+
13
+ function getHelm(strand: string, format: string): string {
14
+ return (new FormatConverter(strand, format).convertTo(DEFAULT_FORMATS.HELM));
15
+ }
16
+
17
+ function getFromat(helm: string, format: string): string {
18
+ return (new FormatConverter(helm, DEFAULT_FORMATS.HELM).convertTo(format));
19
+ }
20
+
21
+ category('Formats to HELM', () => {
22
+ before(async () => {
23
+ await getJsonData();
24
+ await _package.initMonomerLib();
25
+ });
26
+
27
+ for (const format of Object.keys(formatsToHelm)) {
28
+ for (const [strand, helm] of Object.entries(formatsToHelm[format])) {
29
+ test(`${format} to HELM`, async () => {
30
+ const expected = helm;
31
+ const result = getHelm(strand, format);
32
+ expect(result, expected);
33
+ });
34
+ }
35
+ }
36
+ });
37
+
38
+ category('HELM to Formats', () => {
39
+ before(async () => {
40
+ await getJsonData();
41
+ await _package.initMonomerLib();
42
+ });
43
+
44
+ for (const format of Object.keys(formatsToHelm)) {
45
+ for (const [strand, helm] of Object.entries(formatsToHelm[format])) {
46
+ test(`${format} to HELM`, async () => {
47
+ const expected = strand;
48
+ const result = getFromat(helm, format);
49
+ expect(result, expected);
50
+ });
51
+ }
52
+ }
53
+ });
@@ -0,0 +1,28 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {before, category, expect, test} from '@datagrok-libraries/utils/src/test';
7
+ import {DEFAULT_FORMATS} from '../model/const';
8
+ import {FormatConverter} from '../model/format-translation/format-converter';
9
+ import {getNucleotidesSequence} from '../model/format-translation/conversion-utils';
10
+ import {getJsonData} from '../model/data-loading-utils/json-loader';
11
+ import {helmToNucleotides} from './const';
12
+ import {_package} from '../package';
13
+ import {MonomerLibWrapper} from '../model/monomer-lib/lib-wrapper';
14
+
15
+ category('HELM to Nucleotides', () => {
16
+ before(async () => {
17
+ await getJsonData();
18
+ await _package.initMonomerLib();
19
+ });
20
+
21
+ Object.entries(helmToNucleotides).forEach(([helm, nucleotide], idx) => {
22
+ test(`Sequence ${idx + 1} to nucleotides`, async () => {
23
+ const expected = nucleotide;
24
+ const result = getNucleotidesSequence(helm, MonomerLibWrapper.getInstance());
25
+ expect(result, expected);
26
+ });
27
+ })
28
+ });
@@ -5,6 +5,6 @@ import * as DG from 'datagrok-api/dg';
5
5
 
6
6
  export const MAIN_TAB = 'SEQUENCE';
7
7
  export const AXOLABS_TAB = 'PATTERN';
8
- export const SDF_TAB = 'DUPLEX';
8
+ export const SDF_TAB = 'SDF';
9
9
 
10
10
  export const DEFAULT_AXOLABS_INPUT = 'Afcgacsu';