@datagrok/sequence-translator 1.0.14 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +16 -3
- package/dist/package-test.js +715 -316
- package/dist/package.js +684 -301
- package/package.json +25 -22
- package/scripts/build-monomer-lib.py +52 -14
- package/src/apps/oligo-sd-file-app.ts +58 -0
- package/src/autostart/calculations.ts +9 -6
- package/src/autostart/constants.ts +0 -12
- package/src/autostart/registration.ts +121 -149
- package/src/axolabs/constants.ts +4 -4
- package/src/main/main-view.ts +90 -49
- package/src/package.ts +52 -5
- package/src/structures-works/const.ts +3 -16
- package/src/structures-works/converters.ts +28 -26
- package/src/structures-works/mol-transformations.ts +73 -75
- package/src/utils/parse.ts +27 -0
- package/src/utils/sdf-add-columns.ts +118 -0
- package/src/utils/sdf-save-table.ts +56 -0
- package/{test-SequenceTranslator-e8c06047b7e7-eb4db608.html → test-SequenceTranslator-6288c2fbe346-cce4ac1d.html} +6 -6
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/sequence-translator",
|
|
3
3
|
"friendlyName": "Sequence Translator",
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.16",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Alexey Choposky",
|
|
7
7
|
"email": "achopovsky@datagrok.ai"
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
"directory": "packages/SequenceTranslator"
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
|
-
"@datagrok-libraries/utils": "^1.
|
|
16
|
+
"@datagrok-libraries/utils": "^1.17.2",
|
|
17
17
|
"@types/react": "^18.0.15",
|
|
18
18
|
"@datagrok-libraries/bio": "^5.11.1",
|
|
19
|
-
"datagrok-api": "^1.
|
|
19
|
+
"datagrok-api": "^1.8.2",
|
|
20
20
|
"datagrok-tools": "^4.1.2",
|
|
21
21
|
"npm": "^8.11.0",
|
|
22
22
|
"openchemlib": "6.0.1",
|
|
@@ -24,25 +24,6 @@
|
|
|
24
24
|
"ts-loader": "^9.3.1",
|
|
25
25
|
"typescript": "^4.7.4"
|
|
26
26
|
},
|
|
27
|
-
"scripts": {
|
|
28
|
-
"link-api": "npm link datagrok-api",
|
|
29
|
-
"link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio",
|
|
30
|
-
"debug-sequencetranslator": "grok publish",
|
|
31
|
-
"release-sequencetranslator": "grok publish localhost --release",
|
|
32
|
-
"build-sequencetranslator": "webpack",
|
|
33
|
-
"build": "webpack",
|
|
34
|
-
"debug-sequencetranslator-public": "grok publish public",
|
|
35
|
-
"release-sequencetranslator-public": "grok publish public --release",
|
|
36
|
-
"debug-sequencetranslator-local": "grok publish local",
|
|
37
|
-
"release-sequencetranslator-local": "grok publish local --release",
|
|
38
|
-
"test": "jest",
|
|
39
|
-
"test-dev": "set HOST=dev && jest",
|
|
40
|
-
"test-local": "set HOST=localhost && jest"
|
|
41
|
-
},
|
|
42
|
-
"sources": [
|
|
43
|
-
"css/style.css",
|
|
44
|
-
"vendors/openchemlib-full.js"
|
|
45
|
-
],
|
|
46
27
|
"devDependencies": {
|
|
47
28
|
"@types/jest": "^27.0.0",
|
|
48
29
|
"@types/jquery": "^3.5.14",
|
|
@@ -62,5 +43,27 @@
|
|
|
62
43
|
"@types/node-fetch": "^2.6.2",
|
|
63
44
|
"node-fetch": "^2.6.7"
|
|
64
45
|
},
|
|
46
|
+
"grokDependencies": {
|
|
47
|
+
"@datagrok/chem": "1.3.32"
|
|
48
|
+
},
|
|
49
|
+
"scripts": {
|
|
50
|
+
"link-api": "npm link datagrok-api",
|
|
51
|
+
"link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio",
|
|
52
|
+
"debug-sequencetranslator": "grok publish",
|
|
53
|
+
"release-sequencetranslator": "grok publish localhost --release",
|
|
54
|
+
"build-sequencetranslator": "webpack",
|
|
55
|
+
"build": "webpack",
|
|
56
|
+
"debug-sequencetranslator-public": "grok publish public",
|
|
57
|
+
"release-sequencetranslator-public": "grok publish public --release",
|
|
58
|
+
"debug-sequencetranslator-local": "grok publish local",
|
|
59
|
+
"release-sequencetranslator-local": "grok publish local --release",
|
|
60
|
+
"test": "jest",
|
|
61
|
+
"test-dev": "set HOST=dev && jest",
|
|
62
|
+
"test-local": "set HOST=localhost && jest"
|
|
63
|
+
},
|
|
64
|
+
"sources": [
|
|
65
|
+
"css/style.css",
|
|
66
|
+
"vendors/openchemlib-full.js"
|
|
67
|
+
],
|
|
65
68
|
"category": "Bioinformatics"
|
|
66
69
|
}
|
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
from io import TextIOWrapper
|
|
2
2
|
|
|
3
|
-
from rdkit.Chem import AllChem
|
|
4
3
|
from rdkit import Chem
|
|
5
4
|
|
|
6
5
|
import orjson
|
|
7
|
-
import json
|
|
8
6
|
|
|
9
7
|
import click
|
|
10
8
|
|
|
@@ -12,16 +10,55 @@ from click_default_group import DefaultGroup
|
|
|
12
10
|
from rdkit.Chem.rdchem import Mol
|
|
13
11
|
|
|
14
12
|
|
|
15
|
-
def
|
|
16
|
-
|
|
13
|
+
def molAddCollection(mol: Mol, name: str, title: str = None) -> str:
|
|
14
|
+
"""
|
|
15
|
+
Get and postprocess (atom's CFG, title, e.t.c.) molblock
|
|
16
|
+
:param mol: Mol molecule structure / object
|
|
17
|
+
:param name: Monomer name to add to molblock title
|
|
18
|
+
:param title: title to replace in Chem.MolToMolBlock() string output
|
|
19
|
+
:return: molblock string
|
|
20
|
+
"""
|
|
17
21
|
res: str = Chem.MolToMolBlock(mol, forceV3000=True) # MolToMolFile
|
|
18
|
-
return res
|
|
19
22
|
|
|
23
|
+
mb_line_list: list[str] = res.split('\n')
|
|
24
|
+
if title:
|
|
25
|
+
mb_line_list[1] = title
|
|
26
|
+
|
|
27
|
+
if name and name not in mb_line_list[1]:
|
|
28
|
+
mb_line_list[1] += '|' + name
|
|
29
|
+
|
|
30
|
+
end_bond_idx: int = mb_line_list.index('M V30 END BOND')
|
|
31
|
+
chirality = [atom.GetChiralTag() for atom in mol.GetAtoms()]
|
|
32
|
+
begin_atom_idx = mb_line_list.index('M V30 BEGIN ATOM')
|
|
33
|
+
end_atom_idx = mb_line_list.index('M V30 END ATOM')
|
|
34
|
+
for atom_idx in range(1, end_atom_idx - begin_atom_idx):
|
|
35
|
+
line_idx = begin_atom_idx + atom_idx
|
|
36
|
+
atom_ch = chirality[atom_idx - 1]
|
|
37
|
+
if atom_ch != Chem.rdchem.CHI_UNSPECIFIED:
|
|
38
|
+
mb_line_list[line_idx] += " CFG={0}".format(int(atom_ch))
|
|
39
|
+
|
|
40
|
+
steabs: list[int] = [i + 1 for (i, ch) in enumerate(chirality) if ch != Chem.rdchem.CHI_UNSPECIFIED]
|
|
41
|
+
if len(steabs) > 0:
|
|
42
|
+
steabs_str: str = "M V30 MDLV30/STEABS ATOMS=({count} {list})" \
|
|
43
|
+
.format(count=len(steabs), list=' '.join([str(idx) for idx in steabs]))
|
|
44
|
+
|
|
45
|
+
mb_line_list = mb_line_list[:(end_bond_idx + 1)] + \
|
|
46
|
+
["M V30 BEGIN COLLECTION", steabs_str, "M V30 END COLLECTION"] + \
|
|
47
|
+
mb_line_list[(end_bond_idx + 1):]
|
|
20
48
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
49
|
+
return '\n'.join(mb_line_list)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def molfile2molfile(src_mol: str, name: str) -> str:
|
|
53
|
+
mol: Mol = Chem.MolFromMolBlock(src_mol)
|
|
54
|
+
src_mf_lines = src_mol.split('\n')
|
|
55
|
+
title = src_mf_lines[1]
|
|
56
|
+
return molAddCollection(mol, name, title=title)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def smiles2molfile(smiles: str, name: str) -> str:
|
|
60
|
+
mol: Mol = Chem.MolFromSmiles(smiles)
|
|
61
|
+
return molAddCollection(mol, name)
|
|
25
62
|
|
|
26
63
|
|
|
27
64
|
CodesType = dict[str, dict[str, list[str]]]
|
|
@@ -29,13 +66,13 @@ CodesType = dict[str, dict[str, list[str]]]
|
|
|
29
66
|
|
|
30
67
|
class Monomer:
|
|
31
68
|
def __init__(self,
|
|
32
|
-
symbol: str, name: str, smiles: str,
|
|
69
|
+
symbol: str, name: str, molfile: str, smiles: str,
|
|
33
70
|
codes: CodesType):
|
|
34
71
|
self.monomerType = 'Backbone'
|
|
35
72
|
self.smiles = smiles
|
|
36
73
|
self.name = name
|
|
37
74
|
self.author = 'SequenceTranslator'
|
|
38
|
-
self.molfile = smiles2molfile(smiles)
|
|
75
|
+
self.molfile = molfile2molfile(molfile, name) if molfile else smiles2molfile(smiles, name)
|
|
39
76
|
self.naturalAnalog = ''
|
|
40
77
|
self.rgroups = [
|
|
41
78
|
{
|
|
@@ -58,8 +95,9 @@ class Monomer:
|
|
|
58
95
|
|
|
59
96
|
@staticmethod
|
|
60
97
|
def from_json(src_json: {}):
|
|
61
|
-
obj = Monomer(src_json['symbol'], src_json['name'],
|
|
62
|
-
|
|
98
|
+
obj = Monomer(src_json['symbol'], src_json['name'],
|
|
99
|
+
src_json['molfile'], src_json['smiles'],
|
|
100
|
+
src_json['codes'])
|
|
63
101
|
return obj
|
|
64
102
|
|
|
65
103
|
def to_json(self):
|
|
@@ -89,7 +127,7 @@ def codes2monomers(codes_json: {}) -> dict[str, Monomer]:
|
|
|
89
127
|
symbol = monomer_json['name']
|
|
90
128
|
name = monomer_json['name']
|
|
91
129
|
smiles = monomer_json['SMILES']
|
|
92
|
-
monomers_res[monomer_name] = Monomer(symbol, name, smiles, {})
|
|
130
|
+
monomers_res[monomer_name] = Monomer(symbol, name, None, smiles, {})
|
|
93
131
|
codes = monomers_res[monomer_name].codes
|
|
94
132
|
if codes_src not in codes:
|
|
95
133
|
codes[codes_src] = {}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import {_package} from '../package';
|
|
5
|
+
import {FileSource} from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
export class OligoSdFileApp {
|
|
8
|
+
df!: DG.DataFrame;
|
|
9
|
+
|
|
10
|
+
constructor() {}
|
|
11
|
+
|
|
12
|
+
async init(srcDf?: DG.DataFrame): Promise<void> {
|
|
13
|
+
let dataDf: DG.DataFrame;
|
|
14
|
+
if (srcDf) {
|
|
15
|
+
dataDf = srcDf;
|
|
16
|
+
} else {
|
|
17
|
+
const dfFn: string = 'System:AppData/SequenceTranslator/test input_Nov28_Duplex_dimer.xlsx';
|
|
18
|
+
dataDf = await this.loadData(dfFn);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
this.setData(dataDf);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async loadData(dfFn: string): Promise<DG.DataFrame> {
|
|
25
|
+
//
|
|
26
|
+
const dataDf: DG.DataFrame = await grok.data.files.openTable(dfFn);
|
|
27
|
+
return dataDf;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async setData(df: DG.DataFrame): Promise<void> {
|
|
31
|
+
if (this.viewed) {
|
|
32
|
+
await this.destroyView();
|
|
33
|
+
this.viewed = false;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
this.df = df;
|
|
37
|
+
|
|
38
|
+
if (!this.viewed) {
|
|
39
|
+
await this.buildView();
|
|
40
|
+
this.viewed = true;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
private viewed: boolean = false;
|
|
45
|
+
private tView?: DG.TableView;
|
|
46
|
+
|
|
47
|
+
async destroyView(): Promise<void> {
|
|
48
|
+
this.tView!.close();
|
|
49
|
+
delete this.tView;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async buildView(): Promise<void> {
|
|
53
|
+
console.debug('SequenceTranslator: OligoSdFileApp.buildView() ');
|
|
54
|
+
|
|
55
|
+
this.tView = grok.shell.addTableView(this.df);
|
|
56
|
+
this.tView.path = this.tView.basePath = 'func/SequenceTranslator.oligoSdFileApp';
|
|
57
|
+
}
|
|
58
|
+
}
|
|
@@ -1,21 +1,24 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
2
3
|
import {sortByStringLengthInDescendingOrder} from '../helpers';
|
|
3
4
|
import {MODIFICATIONS} from '../structures-works/map';
|
|
4
5
|
|
|
5
6
|
export function saltMass(
|
|
6
|
-
saltNames: string[],
|
|
7
|
+
saltNames: string[], saltsMolWeightList: number[], equivalentsCol: DG.Column, i: number, saltCol: DG.Column
|
|
7
8
|
): number {
|
|
8
9
|
const saltRowIndex = saltNames.indexOf(saltCol.get(i));
|
|
9
10
|
return (
|
|
10
|
-
saltRowIndex == -1 ||
|
|
11
|
+
saltRowIndex == -1 || saltsMolWeightList[saltRowIndex] == DG.FLOAT_NULL || equivalentsCol.get(i) == DG.INT_NULL
|
|
11
12
|
) ?
|
|
12
13
|
DG.FLOAT_NULL :
|
|
13
|
-
|
|
14
|
+
saltsMolWeightList[saltRowIndex] * equivalentsCol.get(i);
|
|
14
15
|
}
|
|
15
16
|
|
|
16
|
-
export function saltMolWeigth(
|
|
17
|
+
export function saltMolWeigth(
|
|
18
|
+
saltNamesList: string[], saltCol: DG.Column, saltsMolWeightList: number[], i: number
|
|
19
|
+
): number {
|
|
17
20
|
const saltRowIndex = saltNamesList.indexOf(saltCol.get(i));
|
|
18
|
-
return (saltRowIndex == -1) ? DG.FLOAT_NULL :
|
|
21
|
+
return (saltRowIndex == -1) ? DG.FLOAT_NULL : saltsMolWeightList[saltRowIndex];
|
|
19
22
|
}
|
|
20
23
|
|
|
21
24
|
export function batchMolWeight(compoundMolWeightCol: DG.Column, saltMassCol: DG.Column, i: number): number {
|
|
@@ -24,7 +27,7 @@ export function batchMolWeight(compoundMolWeightCol: DG.Column, saltMassCol: DG.
|
|
|
24
27
|
compoundMolWeightCol.get(i) + saltMassCol.get(i);
|
|
25
28
|
}
|
|
26
29
|
|
|
27
|
-
export function molecularWeight(sequence: string, weightsObj: {[index: string]: number}): number {
|
|
30
|
+
export function molecularWeight(sequence: string, weightsObj: { [index: string]: number }): number {
|
|
28
31
|
const codes = sortByStringLengthInDescendingOrder(Object.keys(weightsObj)).concat(Object.keys(MODIFICATIONS));
|
|
29
32
|
let weight = 0;
|
|
30
33
|
let i = 0;
|
|
@@ -6,18 +6,6 @@ export const SEQUENCE_TYPES = {
|
|
|
6
6
|
DIMER: 'Dimer',
|
|
7
7
|
};
|
|
8
8
|
|
|
9
|
-
export const CELL_STRUCTURE = {
|
|
10
|
-
DUPLEX: {
|
|
11
|
-
BEFORE_SS: 'SS ',
|
|
12
|
-
BEFORE_AS: '\r\nAS ',
|
|
13
|
-
},
|
|
14
|
-
TRIPLEX_OR_DIMER: {
|
|
15
|
-
BEFORE_SS: 'SS ',
|
|
16
|
-
BEFORE_AS1: '\r\nAS1 ',
|
|
17
|
-
BEFORE_AS2: '\r\nAS2 ',
|
|
18
|
-
},
|
|
19
|
-
};
|
|
20
|
-
|
|
21
9
|
export const COL_NAMES = {
|
|
22
10
|
CHEMISTRY: 'Chemistry',
|
|
23
11
|
NUMBER: 'Number',
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import {
|
|
5
|
-
|
|
4
|
+
import {
|
|
5
|
+
siRnaBioSpringToGcrs, siRnaAxolabsToGcrs, gcrsToNucleotides, asoGapmersBioSpringToGcrs, gcrsToMermade12,
|
|
6
|
+
siRnaNucleotidesToGcrs
|
|
7
|
+
} from '../structures-works/converters';
|
|
6
8
|
import {weightsObj, SYNTHESIZERS} from '../structures-works/map';
|
|
7
|
-
import {SEQUENCE_TYPES, COL_NAMES, GENERATED_COL_NAMES
|
|
9
|
+
import {SEQUENCE_TYPES, COL_NAMES, GENERATED_COL_NAMES} from './constants';
|
|
8
10
|
import {saltMass, saltMolWeigth, molecularWeight, batchMolWeight} from './calculations';
|
|
9
11
|
import {isValidSequence} from '../structures-works/sequence-codes-tools';
|
|
10
12
|
import {sequenceToMolV3000} from '../structures-works/from-monomers';
|
|
@@ -17,109 +19,137 @@ import {ICDS} from './ICDs';
|
|
|
17
19
|
import {SOURCES} from './sources';
|
|
18
20
|
import {IDPS} from './IDPs';
|
|
19
21
|
|
|
22
|
+
import {sdfAddColumns} from '../utils/sdf-add-columns';
|
|
23
|
+
import {sdfSaveTable} from '../utils/sdf-save-table';
|
|
20
24
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
25
|
+
const enum SEQ_TYPE {
|
|
26
|
+
AS = 'AS',
|
|
27
|
+
SS = 'SS',
|
|
28
|
+
DUPLEX = 'Duplex',
|
|
29
|
+
DIMER = 'Dimer',
|
|
26
30
|
}
|
|
27
31
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
.split(CELL_STRUCTURE.TRIPLEX_OR_DIMER.BEFORE_AS2);
|
|
34
|
-
return {SS: arr1[0], AS1: arr2[0], AS2: arr2[1]};
|
|
32
|
+
/** Computable classes of sequence types */
|
|
33
|
+
const enum SEQ_TYPE_CLASS {
|
|
34
|
+
AS_OR_SS,
|
|
35
|
+
DUPLEX,
|
|
36
|
+
DIMER,
|
|
35
37
|
}
|
|
36
38
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
/** Style used for a cell with invalid value */
|
|
40
|
+
const errorStyle = {
|
|
41
|
+
'background-color': '#ff8080',
|
|
42
|
+
'width': '100%',
|
|
43
|
+
'height': '100%',
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export function sdfHandleErrorUI(msgPrefix: string, df: DG.DataFrame, rowI: number, err: any) {
|
|
47
|
+
const errStr: string = err.toString();
|
|
48
|
+
const errMsg: string = msgPrefix + `row #${rowI + 1}, name: '${df.get('Chemistry Name', rowI)}', ` +
|
|
49
|
+
`type: ${df.get('Type', rowI)} error: ${errStr}.`;
|
|
50
|
+
grok.shell.warning(errMsg);
|
|
51
|
+
}
|
|
42
52
|
|
|
43
|
-
|
|
44
|
-
|
|
53
|
+
// todo: use a dictionary instead?
|
|
54
|
+
function getActualTypeClass(actualType: string): SEQ_TYPE_CLASS {
|
|
55
|
+
if (actualType === SEQ_TYPE.AS || actualType === SEQ_TYPE.SS)
|
|
56
|
+
return SEQ_TYPE_CLASS.AS_OR_SS;
|
|
57
|
+
else if (actualType === SEQ_TYPE.DIMER)
|
|
58
|
+
return SEQ_TYPE_CLASS.DIMER;
|
|
59
|
+
else if (actualType === SEQ_TYPE.DUPLEX)
|
|
60
|
+
return SEQ_TYPE_CLASS.DUPLEX;
|
|
61
|
+
else
|
|
62
|
+
throw new Error('Some types in \'Types\' column are invalid ');
|
|
63
|
+
}
|
|
45
64
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
result += `${linkStrandsV3000({senseStrands: [ss], antiStrands: [as1, as2]}, true)}\n\n`;
|
|
64
|
-
}
|
|
65
|
+
function inferTypeClassFromSequence(seq: string): SEQ_TYPE_CLASS {
|
|
66
|
+
const lines = seq.split('\n');
|
|
67
|
+
if (lines.length === 1)
|
|
68
|
+
return SEQ_TYPE_CLASS.AS_OR_SS;
|
|
69
|
+
else if (lines.length === 2)
|
|
70
|
+
return SEQ_TYPE_CLASS.DUPLEX;
|
|
71
|
+
else if (lines.length === 3)
|
|
72
|
+
return SEQ_TYPE_CLASS.DIMER;
|
|
73
|
+
else
|
|
74
|
+
throw new Error('Wrong formatting of sequences in \'Sequence\' column');
|
|
75
|
+
//todo: throw in the case of wrong formatting
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Compare type specified in 'Type' column to that computed from 'Sequence' column */
|
|
79
|
+
function validateType(actualType: string, seq: string): boolean {
|
|
80
|
+
return getActualTypeClass(actualType) === inferTypeClassFromSequence(seq);
|
|
81
|
+
}
|
|
65
82
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
83
|
+
function oligoSdFileGrid(view: DG.TableView): void {
|
|
84
|
+
const typeColName = 'Type';
|
|
85
|
+
const seqColName = 'Sequence';
|
|
86
|
+
const grid = view.grid;
|
|
87
|
+
const df = view.dataFrame;
|
|
88
|
+
const typeCol = df.getCol(typeColName);
|
|
89
|
+
grid.columns.byName(typeColName)!.cellType = 'html';
|
|
90
|
+
const seqCol = df.getCol(seqColName);
|
|
91
|
+
grid.onCellPrepare((gridCell: DG.GridCell) => {
|
|
92
|
+
if (gridCell.isTableCell && gridCell.gridColumn.column!.name === typeColName) {
|
|
93
|
+
const isValidType = validateType(gridCell.cell.value, seqCol.get(gridCell.tableRow!.idx));
|
|
94
|
+
gridCell.style.element = ui.div(gridCell.cell.value, isValidType ? {} : {style: errorStyle});
|
|
69
95
|
}
|
|
70
|
-
|
|
71
|
-
}
|
|
72
|
-
download(`${table.name}.sdf`, encodeURIComponent(result));
|
|
96
|
+
});
|
|
73
97
|
}
|
|
74
98
|
|
|
75
99
|
export function autostartOligoSdFileSubscription() {
|
|
76
100
|
grok.events.onViewAdded.subscribe((v: any) => {
|
|
77
|
-
if (v.type
|
|
78
|
-
if (v.dataFrame.columns.contains(COL_NAMES.TYPE))
|
|
101
|
+
if (v.type === DG.VIEW_TYPE.TABLE_VIEW) {
|
|
102
|
+
if (v.dataFrame.columns.contains(COL_NAMES.TYPE)) {
|
|
103
|
+
oligoSdFileGrid(v);
|
|
79
104
|
oligoSdFile(v.dataFrame);
|
|
105
|
+
}
|
|
80
106
|
|
|
81
107
|
// Should be removed after fixing bug https://github.com/datagrok-ai/public/issues/808
|
|
82
108
|
grok.events.onContextMenu.subscribe((args) => {
|
|
83
|
-
|
|
109
|
+
if (!(args.args.context instanceof DG.Grid)) return;
|
|
110
|
+
const grid: DG.Grid = args.args.context as DG.Grid;
|
|
111
|
+
const menu: DG.Menu = args.args.menu;
|
|
112
|
+
|
|
113
|
+
const seqCol = grid.table.currentCol; // /^[fsACGUacgu]{6,}$/
|
|
84
114
|
if (DG.Detector.sampleCategories(seqCol,
|
|
85
115
|
(s) => /(\(invabasic\)|\(GalNAc-2-JNJ\)|A|U|G|C){6,}$/.test(s))) {
|
|
86
|
-
|
|
87
|
-
|
|
116
|
+
menu.item('Convert raw nucleotides to GCRS', () => {
|
|
117
|
+
grid.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
|
|
88
118
|
return siRnaNucleotidesToGcrs(seqCol.get(i));
|
|
89
119
|
});
|
|
90
120
|
});
|
|
91
121
|
} else if (DG.Detector.sampleCategories(seqCol,
|
|
92
122
|
(s) => /(\(invabasic\)|\(GalNAc-2-JNJ\)|f|s|A|C|G|U|a|c|g|u){6,}$/.test(s))) {
|
|
93
|
-
|
|
94
|
-
|
|
123
|
+
menu.item('Convert Axolabs to GCRS', () => {
|
|
124
|
+
grid.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
|
|
95
125
|
return siRnaAxolabsToGcrs(seqCol.get(i));
|
|
96
126
|
});
|
|
97
127
|
}); // /^[fmpsACGU]{6,}$/
|
|
98
128
|
} else if (DG.Detector.sampleCategories(seqCol,
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
129
|
+
(s) => /(\(invabasic\)|\(GalNAc-2-JNJ\)|f|m|ps|A|C|G|U){6,}$/.test(s)) ||
|
|
130
|
+
DG.Detector.sampleCategories(seqCol, (s) => /^(?=.*moe)(?=.*5mC)(?=.*ps){6,}/.test(s))) {
|
|
131
|
+
menu.item('Convert GCRS to raw', () => {
|
|
132
|
+
grid.table.columns.addNewString(seqCol.name + ' to raw').init((i: number) => {
|
|
103
133
|
return gcrsToNucleotides(seqCol.get(i));
|
|
104
134
|
});
|
|
105
135
|
});
|
|
106
|
-
|
|
107
|
-
|
|
136
|
+
menu.item('Convert GCRS to MM12', () => {
|
|
137
|
+
grid.table.columns.addNewString(seqCol.name + ' to MM12').init((i: number) => {
|
|
108
138
|
return gcrsToMermade12(seqCol.get(i));
|
|
109
139
|
});
|
|
110
140
|
}); // /^[*56789ATGC]{6,}$/
|
|
111
141
|
} else if (DG.Detector.sampleCategories(seqCol,
|
|
112
142
|
(s) => /(\(invabasic\)|\(GalNAc-2-JNJ\)|\*|5|6|7|8|9|A|T|G|C){6,}$/.test(s))) {
|
|
113
|
-
|
|
114
|
-
const seqCol =
|
|
115
|
-
|
|
143
|
+
menu.item('Convert Biospring to GCRS', () => {
|
|
144
|
+
const seqCol = grid.table.currentCol;
|
|
145
|
+
grid.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
|
|
116
146
|
return asoGapmersBioSpringToGcrs(seqCol.get(i));
|
|
117
147
|
});
|
|
118
148
|
}); // /^[*1-8]{6,}$/
|
|
119
149
|
} else if (DG.Detector.sampleCategories(seqCol,
|
|
120
150
|
(s) => /(\(invabasic\)|\(GalNAc-2-JNJ\)|\*|1|2|3|4|5|6|7|8){6,}$/.test(s))) {
|
|
121
|
-
|
|
122
|
-
|
|
151
|
+
menu.item('Convert Biospring to GCRS', () => {
|
|
152
|
+
grid.table.columns.addNewString(seqCol.name + ' to GCRS').init((i: number) => {
|
|
123
153
|
return siRnaBioSpringToGcrs(seqCol.get(i));
|
|
124
154
|
});
|
|
125
155
|
});
|
|
@@ -136,85 +166,13 @@ export function oligoSdFile(table: DG.DataFrame) {
|
|
|
136
166
|
const icdsDf = DG.DataFrame.fromCsv(ICDS);
|
|
137
167
|
const idpsDf = DG.DataFrame.fromCsv(IDPS);
|
|
138
168
|
|
|
139
|
-
const sequenceCol = table.getCol(COL_NAMES.SEQUENCE);
|
|
140
169
|
const saltCol = table.getCol(COL_NAMES.SALT);
|
|
141
170
|
const equivalentsCol = table.getCol(COL_NAMES.EQUIVALENTS);
|
|
142
|
-
const typeCol = table.getCol(COL_NAMES.TYPE);
|
|
143
|
-
const chemistryNameCol = table.getCol(COL_NAMES.CHEMISTRY_NAME);
|
|
144
|
-
|
|
145
|
-
const molWeightCol = saltsDf.getCol('MOLWEIGHT');
|
|
146
|
-
const saltNamesList = saltsDf.getCol('DISPLAY').toList();
|
|
147
|
-
|
|
148
|
-
let newDf: DG.DataFrame;
|
|
149
|
-
let addColumnsPressed = false;
|
|
150
171
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
return grok.shell.error('Columns already exist');
|
|
172
|
+
const saltsMolWeightList: number[] = saltsDf.getCol('MOLWEIGHT').toList();
|
|
173
|
+
const saltNamesList: string[] = saltsDf.getCol('DISPLAY').toList();
|
|
154
174
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
t.columns.addNewString(COL_NAMES.COMPOUND_NAME).init((i: number) => {
|
|
158
|
-
return ([SEQUENCE_TYPES.DUPLEX, SEQUENCE_TYPES.DIMER, SEQUENCE_TYPES.TRIPLEX].includes(typeCol.get(i))) ?
|
|
159
|
-
chemistryNameCol.get(i) :
|
|
160
|
-
sequenceCol.get(i);
|
|
161
|
-
});
|
|
162
|
-
|
|
163
|
-
t.columns.addNewString(COL_NAMES.COMPOUND_COMMENTS).init((i: number) => {
|
|
164
|
-
if ([SEQUENCE_TYPES.SENSE_STRAND, SEQUENCE_TYPES.ANTISENSE_STRAND].includes(typeCol.get(i)))
|
|
165
|
-
return sequenceCol.get(i);
|
|
166
|
-
else if (typeCol.get(i) == SEQUENCE_TYPES.DUPLEX) {
|
|
167
|
-
const obj = parseStrandsFromDuplexCell(sequenceCol.get(i));
|
|
168
|
-
return `${chemistryNameCol.get(i)}; duplex of SS: ${obj.SS} and AS: ${obj.AS}`;
|
|
169
|
-
} else if ([SEQUENCE_TYPES.DIMER, SEQUENCE_TYPES.TRIPLEX].includes(typeCol.get(i))) {
|
|
170
|
-
const obj = parseStrandsFromTriplexOrDimerCell(sequenceCol.get(i));
|
|
171
|
-
return `${chemistryNameCol.get(i)}; duplex of SS: ${obj.SS} and AS1: ${obj.AS1} and AS2: ${obj.AS2}`;
|
|
172
|
-
}
|
|
173
|
-
});
|
|
174
|
-
|
|
175
|
-
t.columns.addNewFloat(COL_NAMES.COMPOUND_MOL_WEIGHT).init((i: number) => {
|
|
176
|
-
if ([SEQUENCE_TYPES.SENSE_STRAND, SEQUENCE_TYPES.ANTISENSE_STRAND].includes(typeCol.get(i))) {
|
|
177
|
-
return (isValidSequence(sequenceCol.get(i), null).indexOfFirstNotValidChar == -1) ?
|
|
178
|
-
molecularWeight(sequenceCol.get(i), weightsObj) :
|
|
179
|
-
DG.FLOAT_NULL;
|
|
180
|
-
} else if (typeCol.get(i) == SEQUENCE_TYPES.DUPLEX) {
|
|
181
|
-
const obj = parseStrandsFromDuplexCell(sequenceCol.get(i));
|
|
182
|
-
return (Object.values(obj).every((seq) => isValidSequence(seq, null).indexOfFirstNotValidChar == -1)) ?
|
|
183
|
-
molecularWeight(obj.SS, weightsObj) + molecularWeight(obj.AS, weightsObj) :
|
|
184
|
-
DG.FLOAT_NULL;
|
|
185
|
-
} else if ([SEQUENCE_TYPES.DIMER, SEQUENCE_TYPES.TRIPLEX].includes(typeCol.get(i))) {
|
|
186
|
-
const obj = parseStrandsFromTriplexOrDimerCell(sequenceCol.get(i));
|
|
187
|
-
return (Object.values(obj).every((seq) => isValidSequence(seq, null).indexOfFirstNotValidChar == -1)) ?
|
|
188
|
-
molecularWeight(obj.SS, weightsObj) + molecularWeight(obj.AS1, weightsObj) +
|
|
189
|
-
molecularWeight(obj.AS2, weightsObj) :
|
|
190
|
-
DG.FLOAT_NULL;
|
|
191
|
-
}
|
|
192
|
-
});
|
|
193
|
-
|
|
194
|
-
t.columns.addNewFloat(COL_NAMES.SALT_MASS).init((i: number) =>
|
|
195
|
-
saltMass(saltNamesList, molWeightCol, equivalentsCol, i, saltCol));
|
|
196
|
-
|
|
197
|
-
t.columns.addNewFloat(COL_NAMES.SALT_MOL_WEIGHT).init((i: number) =>
|
|
198
|
-
saltMolWeigth(saltNamesList, saltCol, molWeightCol, i));
|
|
199
|
-
|
|
200
|
-
const compoundMolWeightCol = t.getCol(COL_NAMES.COMPOUND_MOL_WEIGHT);
|
|
201
|
-
const saltMassCol = t.getCol(COL_NAMES.SALT_MASS);
|
|
202
|
-
t.columns.addNewFloat(COL_NAMES.BATCH_MOL_WEIGHT).init((i: number) =>
|
|
203
|
-
batchMolWeight(compoundMolWeightCol, saltMassCol, i));
|
|
204
|
-
|
|
205
|
-
grok.shell.getTableView(table.name).grid.columns.setOrder(Object.values(COL_NAMES));
|
|
206
|
-
addColumnsPressed = true;
|
|
207
|
-
return newDf = t;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
function updateCalculatedColumns(t: DG.DataFrame, i: number): void {
|
|
211
|
-
const smValue = saltMass(saltNamesList, molWeightCol, equivalentsCol, i, saltCol);
|
|
212
|
-
t.getCol(COL_NAMES.SALT_MASS).set(i, smValue, false);
|
|
213
|
-
const smwValue = saltMolWeigth(saltNamesList, saltCol, molWeightCol, i);
|
|
214
|
-
t.getCol(COL_NAMES.SALT_MOL_WEIGHT).set(i, smwValue, false);
|
|
215
|
-
const bmw = batchMolWeight(t.getCol(COL_NAMES.COMPOUND_MOL_WEIGHT), t.getCol(COL_NAMES.SALT_MASS), i);
|
|
216
|
-
t.getCol(COL_NAMES.BATCH_MOL_WEIGHT).set(i, bmw, false);
|
|
217
|
-
}
|
|
175
|
+
let newDf: DG.DataFrame | undefined = undefined;
|
|
218
176
|
|
|
219
177
|
const d = ui.div([
|
|
220
178
|
ui.icons.edit(() => {
|
|
@@ -223,9 +181,19 @@ export function oligoSdFile(table: DG.DataFrame) {
|
|
|
223
181
|
table.changeColumnType(COL_NAMES.IDP, DG.COLUMN_TYPE.STRING);
|
|
224
182
|
d.append(
|
|
225
183
|
ui.divH([
|
|
226
|
-
ui.
|
|
227
|
-
|
|
228
|
-
|
|
184
|
+
ui.button('Add columns',
|
|
185
|
+
() => {
|
|
186
|
+
newDf = sdfAddColumns(table, saltNamesList, saltsMolWeightList,
|
|
187
|
+
(rowI, err) => { sdfHandleErrorUI('Error on ', table, rowI, err); });
|
|
188
|
+
grok.shell.getTableView(newDf.name).grid.columns.setOrder(Object.values(COL_NAMES));
|
|
189
|
+
},
|
|
190
|
+
`Add columns: '${GENERATED_COL_NAMES.join(`', '`)}'`),
|
|
191
|
+
ui.bigButton('Save SDF', () => {
|
|
192
|
+
const df: DG.DataFrame = newDf ?? table;
|
|
193
|
+
sdfSaveTable(df,
|
|
194
|
+
(rowI, err) => { sdfHandleErrorUI('Skip ', df, rowI, err); });
|
|
195
|
+
}, 'Save SD file'),
|
|
196
|
+
])
|
|
229
197
|
);
|
|
230
198
|
|
|
231
199
|
const view = grok.shell.getTableView(table.name);
|
|
@@ -238,11 +206,15 @@ export function oligoSdFile(table: DG.DataFrame) {
|
|
|
238
206
|
view.dataFrame.getCol(COL_NAMES.IDP).setTag(DG.TAGS.CHOICES, stringify(idpsDf.columns.byIndex(0).toList()));
|
|
239
207
|
|
|
240
208
|
grok.events.onContextMenu.subscribe((args) => {
|
|
209
|
+
if (!(args.args.context instanceof DG.Grid)) return;
|
|
210
|
+
const grid: DG.Grid = args.args.context as DG.Grid;
|
|
211
|
+
const menu: DG.Menu = args.args.menu;
|
|
212
|
+
|
|
241
213
|
if ([COL_NAMES.TYPE, COL_NAMES.OWNER, COL_NAMES.SALT, COL_NAMES.SOURCE, COL_NAMES.ICD, COL_NAMES.IDP]
|
|
242
|
-
.includes(
|
|
243
|
-
|
|
244
|
-
const v =
|
|
245
|
-
|
|
214
|
+
.includes(grid.table.currentCol.name)) {
|
|
215
|
+
menu.item('Fill Column With Value', () => {
|
|
216
|
+
const v = grid.table.currentCell.value;
|
|
217
|
+
grid.table.currentCell.column.init(v);
|
|
246
218
|
for (let i = 0; i < view.dataFrame.rowCount; i++)
|
|
247
219
|
updateCalculatedColumns(view.dataFrame, i);
|
|
248
220
|
});
|
|
@@ -256,9 +228,9 @@ export function oligoSdFile(table: DG.DataFrame) {
|
|
|
256
228
|
});
|
|
257
229
|
|
|
258
230
|
function updateCalculatedColumns(t: DG.DataFrame, i: number): void {
|
|
259
|
-
const smValue = saltMass(saltNamesList,
|
|
231
|
+
const smValue = saltMass(saltNamesList, saltsMolWeightList, equivalentsCol, i, saltCol);
|
|
260
232
|
t.getCol(COL_NAMES.SALT_MASS).set(i, smValue, false);
|
|
261
|
-
const smwValue = saltMolWeigth(saltNamesList, saltCol,
|
|
233
|
+
const smwValue = saltMolWeigth(saltNamesList, saltCol, saltsMolWeightList, i);
|
|
262
234
|
t.getCol(COL_NAMES.SALT_MOL_WEIGHT).set(i, smwValue, false);
|
|
263
235
|
const bmw = batchMolWeight(t.getCol(COL_NAMES.COMPOUND_MOL_WEIGHT), t.getCol(COL_NAMES.SALT_MASS), i);
|
|
264
236
|
t.getCol(COL_NAMES.BATCH_MOL_WEIGHT).set(i, bmw, false);
|