@datagrok/sequence-translator 1.0.12 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +70739 -1417
- package/dist/package.js +70546 -4914
- package/package.json +6 -4
- package/scripts/build-monomer-lib.py +140 -0
- package/setup-unlink-clean.cmd +14 -0
- package/setup.cmd +14 -11
- package/setup.sh +37 -0
- package/src/__jest__/remote.test.ts +11 -3
- package/src/{ICDs.ts → autostart/ICDs.ts} +0 -0
- package/src/{IDPs.ts → autostart/IDPs.ts} +0 -0
- package/src/autostart/calculations.ts +37 -0
- package/src/autostart/constants.ts +49 -0
- package/src/autostart/registration.ts +101 -122
- package/src/{salts.ts → autostart/salts.ts} +0 -0
- package/src/{sources.ts → autostart/sources.ts} +0 -0
- package/src/{users.ts → autostart/users.ts} +0 -0
- package/src/axolabs/constants.ts +10 -10
- package/src/axolabs/define-pattern.ts +13 -12
- package/src/axolabs/draw-svg.ts +140 -201
- package/src/axolabs/helpers.ts +94 -0
- package/src/helpers.ts +28 -0
- package/src/main/main-view.ts +85 -87
- package/src/package.ts +25 -8
- package/src/structures-works/const.ts +18 -0
- package/src/structures-works/converters.ts +3 -3
- package/src/structures-works/from-monomers.ts +185 -32
- package/src/structures-works/map.ts +20 -35
- package/src/structures-works/mol-transformations.ts +295 -582
- package/src/structures-works/save-sense-antisense.ts +35 -11
- package/src/structures-works/sequence-codes-tools.ts +9 -13
- package/{test-SequenceTranslator-49ff04f38f57-128d0678.html → test-SequenceTranslator-e8c06047b7e7-eb4db608.html} +10 -7
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/sequence-translator",
|
|
3
3
|
"friendlyName": "Sequence Translator",
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.14",
|
|
5
5
|
"author": {
|
|
6
|
-
"name": "
|
|
7
|
-
"email": "
|
|
6
|
+
"name": "Alexey Choposky",
|
|
7
|
+
"email": "achopovsky@datagrok.ai"
|
|
8
8
|
},
|
|
9
9
|
"description": "SequenceTranslator is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform, used to translate [oligonucleotide](https://en.wikipedia.org/wiki/Oligonucleotide) sequences between [different representations](https://github.com/datagrok-ai/public/tree/master/packages/SequenceTranslator#sequence-representations).",
|
|
10
10
|
"repository": {
|
|
@@ -13,8 +13,9 @@
|
|
|
13
13
|
"directory": "packages/SequenceTranslator"
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
|
-
"@datagrok-libraries/utils": "^1.
|
|
16
|
+
"@datagrok-libraries/utils": "^1.15.5",
|
|
17
17
|
"@types/react": "^18.0.15",
|
|
18
|
+
"@datagrok-libraries/bio": "^5.11.1",
|
|
18
19
|
"datagrok-api": "^1.7.2",
|
|
19
20
|
"datagrok-tools": "^4.1.2",
|
|
20
21
|
"npm": "^8.11.0",
|
|
@@ -25,6 +26,7 @@
|
|
|
25
26
|
},
|
|
26
27
|
"scripts": {
|
|
27
28
|
"link-api": "npm link datagrok-api",
|
|
29
|
+
"link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio",
|
|
28
30
|
"debug-sequencetranslator": "grok publish",
|
|
29
31
|
"release-sequencetranslator": "grok publish localhost --release",
|
|
30
32
|
"build-sequencetranslator": "webpack",
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
from io import TextIOWrapper
|
|
2
|
+
|
|
3
|
+
from rdkit.Chem import AllChem
|
|
4
|
+
from rdkit import Chem
|
|
5
|
+
|
|
6
|
+
import orjson
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
|
|
11
|
+
from click_default_group import DefaultGroup
|
|
12
|
+
from rdkit.Chem.rdchem import Mol
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def smiles2molfile(smiles: str) -> str:
|
|
16
|
+
mol: Mol = Chem.MolFromSmiles(smiles)
|
|
17
|
+
res: str = Chem.MolToMolBlock(mol, forceV3000=True) # MolToMolFile
|
|
18
|
+
return res
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def molV2000toMolV3000(molV2K: str) -> str:
|
|
22
|
+
mol: str = Chem.MolFromMolBlock(molV2K)
|
|
23
|
+
res: str = Chem.MolToMolBlock(mol, forceV3000=True)
|
|
24
|
+
return res.replace('Pol', 'O ')
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
CodesType = dict[str, dict[str, list[str]]]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Monomer:
|
|
31
|
+
def __init__(self,
|
|
32
|
+
symbol: str, name: str, smiles: str,
|
|
33
|
+
codes: CodesType):
|
|
34
|
+
self.monomerType = 'Backbone'
|
|
35
|
+
self.smiles = smiles
|
|
36
|
+
self.name = name
|
|
37
|
+
self.author = 'SequenceTranslator'
|
|
38
|
+
self.molfile = smiles2molfile(smiles)
|
|
39
|
+
self.naturalAnalog = ''
|
|
40
|
+
self.rgroups = [
|
|
41
|
+
{
|
|
42
|
+
"capGroupSmiles": "O[*:1]",
|
|
43
|
+
"alternateId": "R1-OH",
|
|
44
|
+
"capGroupName": "OH",
|
|
45
|
+
"label": "R1"
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"capGroupSmiles": "O[*:2]",
|
|
49
|
+
"alternateId": "R2-OH",
|
|
50
|
+
"capGroupName": "OH",
|
|
51
|
+
"label": "R2"
|
|
52
|
+
}]
|
|
53
|
+
self.createDate = None
|
|
54
|
+
self.id = 0
|
|
55
|
+
self.polymerType = 'RNA'
|
|
56
|
+
self.symbol = symbol
|
|
57
|
+
self.codes: CodesType = codes
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def from_json(src_json: {}):
|
|
61
|
+
obj = Monomer(src_json['symbol'], src_json['name'], src_json['smiles'], src_json['codes'])
|
|
62
|
+
obj.molfile = src_json['molfile']
|
|
63
|
+
return obj
|
|
64
|
+
|
|
65
|
+
def to_json(self):
|
|
66
|
+
return {
|
|
67
|
+
'monomerType': self.monomerType,
|
|
68
|
+
'smiles': self.smiles,
|
|
69
|
+
'name': self.name,
|
|
70
|
+
'author': self.author,
|
|
71
|
+
'molfile': self.molfile,
|
|
72
|
+
'naturalAnalog': self.naturalAnalog,
|
|
73
|
+
'rgroups': self.rgroups,
|
|
74
|
+
'createDate': self.createDate,
|
|
75
|
+
'id': self.id,
|
|
76
|
+
'polymerType': self.polymerType,
|
|
77
|
+
'symbol': self.symbol,
|
|
78
|
+
'codes': self.codes,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def codes2monomers(codes_json: {}) -> dict[str, Monomer]:
|
|
83
|
+
monomers_res: dict[str, Monomer] = {}
|
|
84
|
+
for (codes_src, src_dict) in codes_json.items():
|
|
85
|
+
for (codes_type, monomers_dict) in src_dict.items():
|
|
86
|
+
for (codes_code, monomer_json) in monomers_dict.items():
|
|
87
|
+
monomer_name = monomer_json['name']
|
|
88
|
+
if monomer_name not in monomers_res:
|
|
89
|
+
symbol = monomer_json['name']
|
|
90
|
+
name = monomer_json['name']
|
|
91
|
+
smiles = monomer_json['SMILES']
|
|
92
|
+
monomers_res[monomer_name] = Monomer(symbol, name, smiles, {})
|
|
93
|
+
codes = monomers_res[monomer_name].codes
|
|
94
|
+
if codes_src not in codes:
|
|
95
|
+
codes[codes_src] = {}
|
|
96
|
+
if codes_type not in codes[codes_src]:
|
|
97
|
+
codes[codes_src][codes_type] = [];
|
|
98
|
+
codes[codes_src][codes_type].append(codes_code)
|
|
99
|
+
return monomers_res
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@click.group(cls=DefaultGroup, default='main')
|
|
103
|
+
def cli():
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@cli.command()
|
|
108
|
+
@click.pass_context
|
|
109
|
+
@click.option('--initial', 'initial_f',
|
|
110
|
+
help='Initial monomers source file.',
|
|
111
|
+
type=click.File('r', 'utf-8'))
|
|
112
|
+
@click.option('--lib', 'lib_f',
|
|
113
|
+
help='Output library (HELM format) file.',
|
|
114
|
+
type=click.File('wb', 'utf-8'))
|
|
115
|
+
@click.option('--add', 'add_f_list', multiple=True,
|
|
116
|
+
help='Additional libraries to build.',
|
|
117
|
+
type=click.File('r', 'utf-8'))
|
|
118
|
+
def main(ctx, initial_f: TextIOWrapper, lib_f: TextIOWrapper, add_f_list: list[TextIOWrapper]):
|
|
119
|
+
initial_json_str = initial_f.read()
|
|
120
|
+
|
|
121
|
+
initial_json = orjson.loads(initial_json_str)
|
|
122
|
+
|
|
123
|
+
monomers: dict[str, Monomer] = codes2monomers(initial_json)
|
|
124
|
+
|
|
125
|
+
for add_f in add_f_list:
|
|
126
|
+
add_json_str = add_f.read()
|
|
127
|
+
add_json = orjson.loads(add_json_str)
|
|
128
|
+
for add_m in add_json:
|
|
129
|
+
m = Monomer.from_json(add_m)
|
|
130
|
+
monomers[m.name] = m
|
|
131
|
+
|
|
132
|
+
add_json = [m.to_json() for m in monomers.values()]
|
|
133
|
+
|
|
134
|
+
lib_json_txt = orjson.dumps(add_json, option=orjson.OPT_INDENT_2)
|
|
135
|
+
lib_f.write(lib_json_txt)
|
|
136
|
+
k = 11
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
if __name__ == '__main__':
|
|
140
|
+
cli()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
set package_dir=%cd%
|
|
2
|
+
|
|
3
|
+
set dirs=^
|
|
4
|
+
\..\..\js-api\ ^
|
|
5
|
+
\..\..\libraries\utils\ ^
|
|
6
|
+
\..\..\libraries\bio\ ^
|
|
7
|
+
\
|
|
8
|
+
|
|
9
|
+
call npm uninstall -g datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio
|
|
10
|
+
|
|
11
|
+
for %%p in (%dirs%) do cd %package_dir%\%%p & rmdir /s /q node_modules
|
|
12
|
+
for %%p in (%dirs%) do cd %package_dir%\%%p & rmdir /s /q dist
|
|
13
|
+
|
|
14
|
+
rem for %%p in (%dirs%) do cd %package_dir%\%%p & del "package-lock.json"
|
package/setup.cmd
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
call setup-unlink-clean.cmd
|
|
2
|
+
|
|
3
|
+
set package_dir=%cd%
|
|
4
|
+
|
|
5
|
+
set dirs=^
|
|
6
|
+
\..\..\js-api\ ^
|
|
7
|
+
\..\..\libraries\utils\ ^
|
|
8
|
+
\..\..\libraries\bio\ ^
|
|
9
|
+
\
|
|
10
|
+
|
|
11
|
+
for %%p in (%dirs%) do cd %package_dir%\%%p & call npm install
|
|
12
|
+
for %%p in (%dirs%) do cd %package_dir%\%%p & call npm link
|
|
13
|
+
for %%p in (%dirs%) do cd %package_dir%\%%p & call npm run link-all
|
|
14
|
+
for %%p in (%dirs%) do cd %package_dir%\%%p & call npm run build
|
package/setup.sh
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
./setup-unlink-clean.sh
|
|
4
|
+
|
|
5
|
+
GREEN='\e[0;32m'
|
|
6
|
+
NO_COLOR='\e[0m'
|
|
7
|
+
|
|
8
|
+
package_dir=$(pwd)
|
|
9
|
+
|
|
10
|
+
dirs=(
|
|
11
|
+
"../../js-api/"
|
|
12
|
+
"../../libraries/utils/"
|
|
13
|
+
"../../libraries/bio/"
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
for dir in ${dirs[@]}; do
|
|
17
|
+
cd $package_dir
|
|
18
|
+
cd $dir
|
|
19
|
+
echo -e $GREEN npm install in $(pwd) $NO_COLOR
|
|
20
|
+
npm install
|
|
21
|
+
echo -e $GREEN npm link in $(pwd) $NO_COLOR
|
|
22
|
+
npm link
|
|
23
|
+
done
|
|
24
|
+
|
|
25
|
+
for dir in ${dirs[@]}; do
|
|
26
|
+
cd $package_dir
|
|
27
|
+
cd $dir
|
|
28
|
+
if [ $dir != "../../js-api/" ]; then
|
|
29
|
+
echo -e $GREEN npm link-all in $(pwd) $NO_COLOR
|
|
30
|
+
npm run link-all
|
|
31
|
+
fi
|
|
32
|
+
echo -e $GREEN npm run build in$(pwd) $NO_COLOR
|
|
33
|
+
npm run build || exit
|
|
34
|
+
done
|
|
35
|
+
|
|
36
|
+
cd $package_dir
|
|
37
|
+
npm run link-all
|
|
@@ -43,27 +43,35 @@ it('TEST', async () => {
|
|
|
43
43
|
return new Promise<object>((resolve, reject) => {
|
|
44
44
|
(<any>window).grok.functions.eval(targetPackage + ':test()').then((df: any) => {
|
|
45
45
|
const cStatus = df.columns.byName('success');
|
|
46
|
+
const cSkipped = df.columns.byName('skipped');
|
|
46
47
|
const cMessage = df.columns.byName('result');
|
|
47
48
|
const cCat = df.columns.byName('category');
|
|
48
49
|
const cName = df.columns.byName('name');
|
|
49
50
|
const cTime = df.columns.byName('ms');
|
|
50
51
|
let failed = false;
|
|
52
|
+
let skipReport = '';
|
|
51
53
|
let passReport = '';
|
|
52
54
|
let failReport = '';
|
|
53
55
|
for (let i = 0; i < df.rowCount; i++) {
|
|
54
56
|
if (cStatus.get(i)) {
|
|
55
|
-
|
|
57
|
+
if (cSkipped.get(i)) {
|
|
58
|
+
skipReport += `Test result : Skipped : ${cTime.get(i)} : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
59
|
+
} else {
|
|
60
|
+
passReport += `Test result : Success : ${cTime.get(i)} : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
61
|
+
}
|
|
56
62
|
} else {
|
|
57
63
|
failed = true;
|
|
58
64
|
failReport += `Test result : Failed : ${cTime.get(i)} : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
59
65
|
}
|
|
60
66
|
}
|
|
61
|
-
resolve({failReport, passReport, failed});
|
|
67
|
+
resolve({failReport, skipReport, passReport, failed});
|
|
62
68
|
}).catch((e: any) => reject(e));
|
|
63
69
|
});
|
|
64
70
|
}, targetPackage);
|
|
65
71
|
// @ts-ignore
|
|
66
72
|
console.log(r.passReport);
|
|
67
73
|
// @ts-ignore
|
|
74
|
+
console.log(r.skipReport);
|
|
75
|
+
// @ts-ignore
|
|
68
76
|
expect(r.failed).checkOutput(false, r.failReport);
|
|
69
|
-
},
|
|
77
|
+
}, 7200000);
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import {sortByStringLengthInDescendingOrder} from '../helpers';
|
|
3
|
+
import {MODIFICATIONS} from '../structures-works/map';
|
|
4
|
+
|
|
5
|
+
export function saltMass(
|
|
6
|
+
saltNames: string[], molWeightCol: DG.Column, equivalentsCol: DG.Column, i: number, saltCol: DG.Column,
|
|
7
|
+
): number {
|
|
8
|
+
const saltRowIndex = saltNames.indexOf(saltCol.get(i));
|
|
9
|
+
return (
|
|
10
|
+
saltRowIndex == -1 || molWeightCol.get(saltRowIndex) == DG.FLOAT_NULL || equivalentsCol.get(i) == DG.INT_NULL
|
|
11
|
+
) ?
|
|
12
|
+
DG.FLOAT_NULL :
|
|
13
|
+
molWeightCol.get(saltRowIndex) * equivalentsCol.get(i);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function saltMolWeigth(saltNamesList: string[], saltCol: DG.Column, molWeightCol: DG.Column, i: number): number {
|
|
17
|
+
const saltRowIndex = saltNamesList.indexOf(saltCol.get(i));
|
|
18
|
+
return (saltRowIndex == -1) ? DG.FLOAT_NULL : molWeightCol.get(saltRowIndex);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function batchMolWeight(compoundMolWeightCol: DG.Column, saltMassCol: DG.Column, i: number): number {
|
|
22
|
+
return (compoundMolWeightCol.getString(i) == '' || saltMassCol.getString(i) == '') ?
|
|
23
|
+
DG.FLOAT_NULL :
|
|
24
|
+
compoundMolWeightCol.get(i) + saltMassCol.get(i);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function molecularWeight(sequence: string, weightsObj: {[index: string]: number}): number {
|
|
28
|
+
const codes = sortByStringLengthInDescendingOrder(Object.keys(weightsObj)).concat(Object.keys(MODIFICATIONS));
|
|
29
|
+
let weight = 0;
|
|
30
|
+
let i = 0;
|
|
31
|
+
while (i < sequence.length) {
|
|
32
|
+
const matchedCode = codes.find((s) => s == sequence.slice(i, i + s.length))!;
|
|
33
|
+
weight += weightsObj[sequence.slice(i, i + matchedCode.length)];
|
|
34
|
+
i += matchedCode.length;
|
|
35
|
+
}
|
|
36
|
+
return weight - 61.97;
|
|
37
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
export const SEQUENCE_TYPES = {
|
|
2
|
+
SENSE_STRAND: 'SS',
|
|
3
|
+
ANTISENSE_STRAND: 'AS',
|
|
4
|
+
DUPLEX: 'Duplex',
|
|
5
|
+
TRIPLEX: 'Triplex',
|
|
6
|
+
DIMER: 'Dimer',
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
export const CELL_STRUCTURE = {
|
|
10
|
+
DUPLEX: {
|
|
11
|
+
BEFORE_SS: 'SS ',
|
|
12
|
+
BEFORE_AS: '\r\nAS ',
|
|
13
|
+
},
|
|
14
|
+
TRIPLEX_OR_DIMER: {
|
|
15
|
+
BEFORE_SS: 'SS ',
|
|
16
|
+
BEFORE_AS1: '\r\nAS1 ',
|
|
17
|
+
BEFORE_AS2: '\r\nAS2 ',
|
|
18
|
+
},
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
export const COL_NAMES = {
|
|
22
|
+
CHEMISTRY: 'Chemistry',
|
|
23
|
+
NUMBER: 'Number',
|
|
24
|
+
TYPE: 'Type',
|
|
25
|
+
CHEMISTRY_NAME: 'Chemistry Name',
|
|
26
|
+
INTERNAL_COMPOUND_ID: 'Internal compound ID',
|
|
27
|
+
IDP: 'IDP',
|
|
28
|
+
SEQUENCE: 'Sequence',
|
|
29
|
+
COMPOUND_NAME: 'Compound Name',
|
|
30
|
+
COMPOUND_COMMENTS: 'Compound Comments',
|
|
31
|
+
SALT: 'Salt',
|
|
32
|
+
EQUIVALENTS: 'Equivalents',
|
|
33
|
+
PURITY: 'Purity',
|
|
34
|
+
COMPOUND_MOL_WEIGHT: 'Cpd MW',
|
|
35
|
+
SALT_MOL_WEIGHT: 'Salt MW',
|
|
36
|
+
SALT_MASS: 'Salt mass',
|
|
37
|
+
BATCH_MOL_WEIGHT: 'Batch MW',
|
|
38
|
+
SOURCE: 'Source',
|
|
39
|
+
ICD: 'ICD',
|
|
40
|
+
OWNER: 'Owner',
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
export const GENERATED_COL_NAMES = [
|
|
44
|
+
COL_NAMES.COMPOUND_NAME,
|
|
45
|
+
COL_NAMES.COMPOUND_COMMENTS,
|
|
46
|
+
COL_NAMES.COMPOUND_MOL_WEIGHT,
|
|
47
|
+
COL_NAMES.SALT_MASS,
|
|
48
|
+
COL_NAMES.BATCH_MOL_WEIGHT,
|
|
49
|
+
];
|