@datagrok/sequence-translator 1.0.13 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,10 +1,10 @@
1
1
  {
2
2
  "name": "@datagrok/sequence-translator",
3
3
  "friendlyName": "Sequence Translator",
4
- "version": "1.0.13",
4
+ "version": "1.0.14",
5
5
  "author": {
6
- "name": "Vadym Kovadlo",
7
- "email": "vkovadlo@datagrok.ai"
6
+ "name": "Alexey Choposky",
7
+ "email": "achopovsky@datagrok.ai"
8
8
  },
9
9
  "description": "SequenceTranslator is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform, used to translate [oligonucleotide](https://en.wikipedia.org/wiki/Oligonucleotide) sequences between [different representations](https://github.com/datagrok-ai/public/tree/master/packages/SequenceTranslator#sequence-representations).",
10
10
  "repository": {
@@ -15,6 +15,7 @@
15
15
  "dependencies": {
16
16
  "@datagrok-libraries/utils": "^1.15.5",
17
17
  "@types/react": "^18.0.15",
18
+ "@datagrok-libraries/bio": "^5.11.1",
18
19
  "datagrok-api": "^1.7.2",
19
20
  "datagrok-tools": "^4.1.2",
20
21
  "npm": "^8.11.0",
@@ -25,6 +26,7 @@
25
26
  },
26
27
  "scripts": {
27
28
  "link-api": "npm link datagrok-api",
29
+ "link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio",
28
30
  "debug-sequencetranslator": "grok publish",
29
31
  "release-sequencetranslator": "grok publish localhost --release",
30
32
  "build-sequencetranslator": "webpack",
@@ -0,0 +1,140 @@
1
+ from io import TextIOWrapper
2
+
3
+ from rdkit.Chem import AllChem
4
+ from rdkit import Chem
5
+
6
+ import orjson
7
+ import json
8
+
9
+ import click
10
+
11
+ from click_default_group import DefaultGroup
12
+ from rdkit.Chem.rdchem import Mol
13
+
14
+
15
+ def smiles2molfile(smiles: str) -> str:
16
+ mol: Mol = Chem.MolFromSmiles(smiles)
17
+ res: str = Chem.MolToMolBlock(mol, forceV3000=True) # MolToMolFile
18
+ return res
19
+
20
+
21
+ def molV2000toMolV3000(molV2K: str) -> str:
22
+ mol: str = Chem.MolFromMolBlock(molV2K)
23
+ res: str = Chem.MolToMolBlock(mol, forceV3000=True)
24
+ return res.replace('Pol', 'O ')
25
+
26
+
27
+ CodesType = dict[str, dict[str, list[str]]]
28
+
29
+
30
+ class Monomer:
31
+ def __init__(self,
32
+ symbol: str, name: str, smiles: str,
33
+ codes: CodesType):
34
+ self.monomerType = 'Backbone'
35
+ self.smiles = smiles
36
+ self.name = name
37
+ self.author = 'SequenceTranslator'
38
+ self.molfile = smiles2molfile(smiles)
39
+ self.naturalAnalog = ''
40
+ self.rgroups = [
41
+ {
42
+ "capGroupSmiles": "O[*:1]",
43
+ "alternateId": "R1-OH",
44
+ "capGroupName": "OH",
45
+ "label": "R1"
46
+ },
47
+ {
48
+ "capGroupSmiles": "O[*:2]",
49
+ "alternateId": "R2-OH",
50
+ "capGroupName": "OH",
51
+ "label": "R2"
52
+ }]
53
+ self.createDate = None
54
+ self.id = 0
55
+ self.polymerType = 'RNA'
56
+ self.symbol = symbol
57
+ self.codes: CodesType = codes
58
+
59
+ @staticmethod
60
+ def from_json(src_json: {}):
61
+ obj = Monomer(src_json['symbol'], src_json['name'], src_json['smiles'], src_json['codes'])
62
+ obj.molfile = src_json['molfile']
63
+ return obj
64
+
65
+ def to_json(self):
66
+ return {
67
+ 'monomerType': self.monomerType,
68
+ 'smiles': self.smiles,
69
+ 'name': self.name,
70
+ 'author': self.author,
71
+ 'molfile': self.molfile,
72
+ 'naturalAnalog': self.naturalAnalog,
73
+ 'rgroups': self.rgroups,
74
+ 'createDate': self.createDate,
75
+ 'id': self.id,
76
+ 'polymerType': self.polymerType,
77
+ 'symbol': self.symbol,
78
+ 'codes': self.codes,
79
+ }
80
+
81
+
82
+ def codes2monomers(codes_json: {}) -> dict[str, Monomer]:
83
+ monomers_res: dict[str, Monomer] = {}
84
+ for (codes_src, src_dict) in codes_json.items():
85
+ for (codes_type, monomers_dict) in src_dict.items():
86
+ for (codes_code, monomer_json) in monomers_dict.items():
87
+ monomer_name = monomer_json['name']
88
+ if monomer_name not in monomers_res:
89
+ symbol = monomer_json['name']
90
+ name = monomer_json['name']
91
+ smiles = monomer_json['SMILES']
92
+ monomers_res[monomer_name] = Monomer(symbol, name, smiles, {})
93
+ codes = monomers_res[monomer_name].codes
94
+ if codes_src not in codes:
95
+ codes[codes_src] = {}
96
+ if codes_type not in codes[codes_src]:
97
+ codes[codes_src][codes_type] = [];
98
+ codes[codes_src][codes_type].append(codes_code)
99
+ return monomers_res
100
+
101
+
102
+ @click.group(cls=DefaultGroup, default='main')
103
+ def cli():
104
+ pass
105
+
106
+
107
+ @cli.command()
108
+ @click.pass_context
109
+ @click.option('--initial', 'initial_f',
110
+ help='Initial monomers source file.',
111
+ type=click.File('r', 'utf-8'))
112
+ @click.option('--lib', 'lib_f',
113
+ help='Output library (HELM format) file.',
114
+ type=click.File('wb', 'utf-8'))
115
+ @click.option('--add', 'add_f_list', multiple=True,
116
+ help='Additional libraries to build.',
117
+ type=click.File('r', 'utf-8'))
118
+ def main(ctx, initial_f: TextIOWrapper, lib_f: TextIOWrapper, add_f_list: list[TextIOWrapper]):
119
+ initial_json_str = initial_f.read()
120
+
121
+ initial_json = orjson.loads(initial_json_str)
122
+
123
+ monomers: dict[str, Monomer] = codes2monomers(initial_json)
124
+
125
+ for add_f in add_f_list:
126
+ add_json_str = add_f.read()
127
+ add_json = orjson.loads(add_json_str)
128
+ for add_m in add_json:
129
+ m = Monomer.from_json(add_m)
130
+ monomers[m.name] = m
131
+
132
+ add_json = [m.to_json() for m in monomers.values()]
133
+
134
+ lib_json_txt = orjson.dumps(add_json, option=orjson.OPT_INDENT_2)
135
+ lib_f.write(lib_json_txt)
136
+ k = 11
137
+
138
+
139
+ if __name__ == '__main__':
140
+ cli()
@@ -0,0 +1,14 @@
1
+ set package_dir=%cd%
2
+
3
+ set dirs=^
4
+ \..\..\js-api\ ^
5
+ \..\..\libraries\utils\ ^
6
+ \..\..\libraries\bio\ ^
7
+ \
8
+
9
+ call npm uninstall -g datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio
10
+
11
+ for %%p in (%dirs%) do cd %package_dir%\%%p & rmdir /s /q node_modules
12
+ for %%p in (%dirs%) do cd %package_dir%\%%p & rmdir /s /q dist
13
+
14
+ rem for %%p in (%dirs%) do cd %package_dir%\%%p & del "package-lock.json"
package/setup.cmd CHANGED
@@ -1,11 +1,14 @@
1
- cd ../../js-api
2
- call npm install
3
- call npm link
4
- cd ../libraries/utils
5
- call npm install
6
- call npm link
7
- call npm link datagrok-api
8
- cd ../../packages/SequenceTranslator
9
- call npm install
10
- call npm link datagrok-api @datagrok-libraries/utils
11
- webpack
1
+ call setup-unlink-clean.cmd
2
+
3
+ set package_dir=%cd%
4
+
5
+ set dirs=^
6
+ \..\..\js-api\ ^
7
+ \..\..\libraries\utils\ ^
8
+ \..\..\libraries\bio\ ^
9
+ \
10
+
11
+ for %%p in (%dirs%) do cd %package_dir%\%%p & call npm install
12
+ for %%p in (%dirs%) do cd %package_dir%\%%p & call npm link
13
+ for %%p in (%dirs%) do cd %package_dir%\%%p & call npm run link-all
14
+ for %%p in (%dirs%) do cd %package_dir%\%%p & call npm run build
package/setup.sh ADDED
@@ -0,0 +1,37 @@
1
+ #!/bin/bash
2
+
3
+ ./setup-unlink-clean.sh
4
+
5
+ GREEN='\e[0;32m'
6
+ NO_COLOR='\e[0m'
7
+
8
+ package_dir=$(pwd)
9
+
10
+ dirs=(
11
+ "../../js-api/"
12
+ "../../libraries/utils/"
13
+ "../../libraries/bio/"
14
+ )
15
+
16
+ for dir in ${dirs[@]}; do
17
+ cd $package_dir
18
+ cd $dir
19
+ echo -e $GREEN npm install in $(pwd) $NO_COLOR
20
+ npm install
21
+ echo -e $GREEN npm link in $(pwd) $NO_COLOR
22
+ npm link
23
+ done
24
+
25
+ for dir in ${dirs[@]}; do
26
+ cd $package_dir
27
+ cd $dir
28
+ if [ $dir != "../../js-api/" ]; then
29
+ echo -e $GREEN npm link-all in $(pwd) $NO_COLOR
30
+ npm run link-all
31
+ fi
32
+ echo -e $GREEN npm run build in$(pwd) $NO_COLOR
33
+ npm run build || exit
34
+ done
35
+
36
+ cd $package_dir
37
+ npm run link-all
@@ -6,6 +6,18 @@ export const SEQUENCE_TYPES = {
6
6
  DIMER: 'Dimer',
7
7
  };
8
8
 
9
+ export const CELL_STRUCTURE = {
10
+ DUPLEX: {
11
+ BEFORE_SS: 'SS ',
12
+ BEFORE_AS: '\r\nAS ',
13
+ },
14
+ TRIPLEX_OR_DIMER: {
15
+ BEFORE_SS: 'SS ',
16
+ BEFORE_AS1: '\r\nAS1 ',
17
+ BEFORE_AS2: '\r\nAS2 ',
18
+ },
19
+ };
20
+
9
21
  export const COL_NAMES = {
10
22
  CHEMISTRY: 'Chemistry',
11
23
  NUMBER: 'Number',
@@ -4,7 +4,7 @@ import * as DG from 'datagrok-api/dg';
4
4
  import {siRnaBioSpringToGcrs, siRnaAxolabsToGcrs, gcrsToNucleotides, asoGapmersBioSpringToGcrs, gcrsToMermade12,
5
5
  siRnaNucleotidesToGcrs} from '../structures-works/converters';
6
6
  import {weightsObj, SYNTHESIZERS} from '../structures-works/map';
7
- import {SEQUENCE_TYPES, COL_NAMES, GENERATED_COL_NAMES} from './constants';
7
+ import {SEQUENCE_TYPES, COL_NAMES, GENERATED_COL_NAMES, CELL_STRUCTURE} from './constants';
8
8
  import {saltMass, saltMolWeigth, molecularWeight, batchMolWeight} from './calculations';
9
9
  import {isValidSequence} from '../structures-works/sequence-codes-tools';
10
10
  import {sequenceToMolV3000} from '../structures-works/from-monomers';
@@ -19,13 +19,18 @@ import {IDPS} from './IDPs';
19
19
 
20
20
 
21
21
  function parseStrandsFromDuplexCell(s: string): {SS: string, AS: string} {
22
- const arr = s.slice(3).split('\r\nAS ');
22
+ const arr = s
23
+ .slice(CELL_STRUCTURE.DUPLEX.BEFORE_SS.length)
24
+ .split(CELL_STRUCTURE.DUPLEX.BEFORE_AS);
23
25
  return {SS: arr[0], AS: arr[1]};
24
26
  }
25
27
 
26
28
  function parseStrandsFromTriplexOrDimerCell(s: string): {SS: string, AS1: string, AS2: string} {
27
- const arr1 = s.slice(3).split('\r\nAS1 ');
28
- const arr2 = arr1[1].split('\r\nAS2 ');
29
+ const arr1 = s
30
+ .slice(CELL_STRUCTURE.TRIPLEX_OR_DIMER.BEFORE_SS.length)
31
+ .split(CELL_STRUCTURE.TRIPLEX_OR_DIMER.BEFORE_AS1);
32
+ const arr2 = arr1[1]
33
+ .split(CELL_STRUCTURE.TRIPLEX_OR_DIMER.BEFORE_AS2);
29
34
  return {SS: arr1[0], AS1: arr2[0], AS2: arr2[1]};
30
35
  }
31
36
 
@@ -249,6 +254,15 @@ export function oligoSdFile(table: DG.DataFrame) {
249
254
  if ([COL_NAMES.SALT, COL_NAMES.EQUIVALENTS, COL_NAMES.SALT_MOL_WEIGHT].includes(colName))
250
255
  updateCalculatedColumns(view.dataFrame, view.dataFrame.currentRowIdx);
251
256
  });
257
+
258
+ function updateCalculatedColumns(t: DG.DataFrame, i: number): void {
259
+ const smValue = saltMass(saltNamesList, molWeightCol, equivalentsCol, i, saltCol);
260
+ t.getCol(COL_NAMES.SALT_MASS).set(i, smValue, false);
261
+ const smwValue = saltMolWeigth(saltNamesList, saltCol, molWeightCol, i);
262
+ t.getCol(COL_NAMES.SALT_MOL_WEIGHT).set(i, smwValue, false);
263
+ const bmw = batchMolWeight(t.getCol(COL_NAMES.COMPOUND_MOL_WEIGHT), t.getCol(COL_NAMES.SALT_MASS), i);
264
+ t.getCol(COL_NAMES.BATCH_MOL_WEIGHT).set(i, bmw, false);
265
+ }
252
266
  }),
253
267
  ]);
254
268
  grok.shell.v.setRibbonPanels([[d]]);
@@ -1,12 +1,12 @@
1
- const rnaColor = 'rgb(255,230,153)';
2
- const invAbasicColor = 'rgb(203,119,211)';
3
- export const axolabsMap:
1
+ const RNA_COLOR = 'rgb(255,230,153)';
2
+ const INVABASIC_COLOR = 'rgb(203,119,211)';
3
+ export const AXOLABS_MAP:
4
4
  {[index: string]: {fullName: string, symbols: [string, string, string, string], color: string}} =
5
5
  {
6
6
  'RNA': {
7
7
  fullName: 'RNA nucleotides',
8
8
  symbols: ['A', 'C', 'G', 'U'],
9
- color: rnaColor,
9
+ color: RNA_COLOR,
10
10
  },
11
11
  'DNA': {
12
12
  fullName: 'DNA nucleotides',
@@ -46,22 +46,22 @@ export const axolabsMap:
46
46
  'A': {
47
47
  fullName: 'Adenine',
48
48
  symbols: ['a', 'a', 'a', 'a'],
49
- color: rnaColor,
49
+ color: RNA_COLOR,
50
50
  },
51
51
  'C': {
52
52
  fullName: 'Cytosine',
53
53
  symbols: ['c', 'c', 'c', 'c'],
54
- color: rnaColor,
54
+ color: RNA_COLOR,
55
55
  },
56
56
  'G': {
57
57
  fullName: 'Guanine',
58
58
  symbols: ['g', 'g', 'g', 'g'],
59
- color: rnaColor,
59
+ color: RNA_COLOR,
60
60
  },
61
61
  'U': {
62
62
  fullName: 'Uracil',
63
63
  symbols: ['u', 'u', 'u', 'u'],
64
- color: rnaColor,
64
+ color: RNA_COLOR,
65
65
  },
66
66
  'X-New': {
67
67
  fullName: '',
@@ -81,7 +81,7 @@ export const axolabsMap:
81
81
  'InvAbasic': {
82
82
  fullName: 'Inverted abasic capped',
83
83
  symbols: ['(invabasic)', '(invabasic)', '(invabasic)', '(invabasic)'],
84
- color: invAbasicColor,
84
+ color: INVABASIC_COLOR,
85
85
  },
86
86
  "5\"-vinylps": {
87
87
  fullName: '5\'-vinylphosphonate-2\'-OMe-uridine',
@@ -91,7 +91,7 @@ export const axolabsMap:
91
91
  'InvAbasic(o)': {
92
92
  fullName: 'Inverted abasic capped (overhang)',
93
93
  symbols: ['(invabasic)', '(invabasic)', '(invabasic)', '(invabasic)'],
94
- color: invAbasicColor,
94
+ color: INVABASIC_COLOR,
95
95
  },
96
96
  "2\"-OMe-U(o)": {
97
97
  fullName: 'Nucleotide Uridine with 2\'O-Methyl protection (overhang)',
@@ -6,9 +6,10 @@ import * as svg from 'save-svg-as-png';
6
6
  import $ from 'cash-dom';
7
7
 
8
8
  import {drawAxolabsPattern} from './draw-svg';
9
- import {axolabsMap} from './constants';
9
+ import {AXOLABS_MAP} from './constants';
10
+ import {isOverhang} from './helpers';
10
11
 
11
- const baseChoices: string[] = Object.keys(axolabsMap);
12
+ const baseChoices: string[] = Object.keys(AXOLABS_MAP);
12
13
  const defaultBase: string = baseChoices[0];
13
14
  const defaultPto: boolean = true;
14
15
  const defaultSequenceLength: number = 23;
@@ -17,7 +18,7 @@ const userStorageKey: string = 'SequenceTranslator';
17
18
  const exampleMinWidth: string = '400px';
18
19
 
19
20
  function generateExample(sequenceLength: number, sequenceBasis: string): string {
20
- const uniqueSymbols = axolabsMap[sequenceBasis].symbols.join('');
21
+ const uniqueSymbols = AXOLABS_MAP[sequenceBasis].symbols.join('');
21
22
  return uniqueSymbols.repeat(Math.floor(sequenceLength / 4)) + uniqueSymbols.slice(0, sequenceLength % 4);
22
23
  }
23
24
 
@@ -64,12 +65,12 @@ function translateSequence(
64
65
  let i: number = -1;
65
66
  let mainSequence = sequence.replace(/[AUGC]/g, function(x: string) {
66
67
  i++;
67
- const indexOfSymbol = axolabsMap['RNA']['symbols'].indexOf(x);
68
- let symbol = axolabsMap[bases[i].value]['symbols'][indexOfSymbol];
69
- if (bases[i].value.slice(-3) == '(o)') {
70
- if (i < sequence.length / 2 && bases[i + 1].value.slice(-3) != '(o)')
68
+ const indexOfSymbol = AXOLABS_MAP['RNA']['symbols'].indexOf(x);
69
+ let symbol = AXOLABS_MAP[bases[i].value]['symbols'][indexOfSymbol];
70
+ if (isOverhang(bases[i].value)) {
71
+ if (i < sequence.length / 2 && !isOverhang(bases[i + 1].value))
71
72
  symbol = symbol + x + 'f';
72
- else if (i > sequence.length / 2 && bases[i - 1].value.slice(-3) != '(o)')
73
+ else if (i > sequence.length / 2 && !isOverhang(bases[i - 1].value))
73
74
  symbol = x + 'f' + symbol;
74
75
  }
75
76
  return (ptoLinkages[i].value) ? symbol + 's' : symbol;
@@ -150,12 +151,12 @@ export function defineAxolabsPattern() {
150
151
  updateSvgScheme();
151
152
  updateOutputExamples();
152
153
  });
153
- if (asBases[i].value.slice(-3) != '(o)')
154
+ if (!isOverhang(asBases[i].value))
154
155
  nucleotideCounter++;
155
156
 
156
157
  asModificationItems.append(
157
158
  ui.divH([
158
- ui.div([ui.label(asBases[i].value.slice(-3) == '(o)' ? '' : String(nucleotideCounter))],
159
+ ui.div([ui.label(isOverhang(asBases[i].value) ? '' : String(nucleotideCounter))],
159
160
  {style: {width: '20px'}})!,
160
161
  ui.block75([asBases[i]])!,
161
162
  ui.div([asPtoLinkages[i]])!,
@@ -196,12 +197,12 @@ export function defineAxolabsPattern() {
196
197
  updateSvgScheme();
197
198
  updateOutputExamples();
198
199
  });
199
- if (ssBases[i].value.slice(-3) != '(o)')
200
+ if (!isOverhang(ssBases[i].value))
200
201
  nucleotideCounter++;
201
202
 
202
203
  ssModificationItems.append(
203
204
  ui.divH([
204
- ui.div([ui.label(ssBases[i].value.slice(-3) == '(o)' ? '' : String(nucleotideCounter))],
205
+ ui.div([ui.label(isOverhang(ssBases[i].value) ? '' : String(nucleotideCounter))],
205
206
  {style: {width: '20px'}})!,
206
207
  ui.block75([ssBases[i]])!,
207
208
  ui.div([ssPtoLinkages[i]])!,