@datagrok/bio 2.8.1 → 2.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -4
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/samples/FASTA.csv +65 -0
- package/files/samples/HELM.csv +541 -0
- package/files/samples/HELM_cyclic.tsv +42 -0
- package/files/samples/MSA.csv +541 -0
- package/files/samples/natural-sequences.csv +5063 -0
- package/files/samples/synthetic-sequences.csv +5044 -0
- package/package.json +3 -3
- package/scripts/sequence_generator.py +6 -3
- package/src/demo/bio03-atomic-level.ts +1 -1
- package/src/demo/bio05-helm-msa-sequence-space.ts +1 -1
- package/src/package.ts +2 -2
- package/src/tests/bio-tests.ts +33 -3
- package/src/tests/detectors-tests.ts +3 -3
- package/src/tests/renderers-monomer-placer.ts +7 -7
- package/src/tests/renderers-test.ts +1 -1
- package/src/tests/splitters-test.ts +1 -1
- package/scripts/admet-requirements.txt +0 -7
- package/scripts/admet-run.py +0 -27
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.8.
|
|
8
|
+
"version": "2.8.3",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
{
|
|
17
17
|
"name": "MaxMonomerLength",
|
|
18
18
|
"propertyType": "int",
|
|
19
|
-
"defaultValue":
|
|
19
|
+
"defaultValue": 4,
|
|
20
20
|
"nullable": false
|
|
21
21
|
},
|
|
22
22
|
{
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.34.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.34.1",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
39
39
|
"@datagrok-libraries/ml": "^6.3.39",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# input: bool disable_cliffs = False [Disable generation of cliffs]
|
|
14
14
|
# input: double cliff_probability = 0.01 [Probability to make activity cliff of a sequence]
|
|
15
15
|
# input: double cliff_strength = 4.0 [Strength of cliff]
|
|
16
|
-
# input:
|
|
16
|
+
# input: string fasta_separator = '' {nullable: true}
|
|
17
17
|
# output: dataframe sequences
|
|
18
18
|
|
|
19
19
|
"""
|
|
@@ -54,6 +54,7 @@ alphabets: Dict[str, str] = {
|
|
|
54
54
|
"PT": "A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y",
|
|
55
55
|
"DNA": "A,T,G,C",
|
|
56
56
|
"RNA": "A,U,G,C",
|
|
57
|
+
"PT_HELM": "A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,dA,dC,dD,dE,dF,dH,dI,dK,dL,dM,dN,dP,dQ,dR,dS,dT,dV,dW,dY,meA,meD,meS,meT,meV,meY,meE,meG,meI,meK,meM,meN,meQ,meC,meR,meW,meF,meH,meL,Nle,Nva,Orn,Iva,aIle,gGlu,Hcy,Hse,Hyp,D-gGlu,D-Nle,D-hPhe,D-Hyp,D-Nva,D-Orn,Pyr,Phe_3Cl,Phe_4Cl,Phe_4NH2,Phg,Ser_tBu,Tyr_Bn,Tza,1Nal,Cha,Lys_Boc,aThr,D-2Nal,D-2Thi,D-aHyp,D-aIle,D-Phg,D-Ser_tBu,Cya,Lys_Me3,Pen,Phe_4Me,Ser_Bn,Tyr_tBu,2Nal,Thi,aHyp,Ala_tBu,hPhe,D-1Nal,D-aThr,D-Cha,D-Pen,D-Phe_4Cl,D-Ser_Bn,Wil,Oic_3aS-7aS,Pip,3Pal,4Pal,Abu,Apm,Chg,Dab,Dap,D-3Pal,D-aMeAbu,D-Chg,D-Cit,D-Dab,D-Pip,D-Tic,Aca,Tic,Aad,Cit,Aze,Ac5c,Aib,D-2Pal,D-Abu,D-Dap,Asu,D-Thz,D-Trp_For,D-Tyr_Et,Lys_Ac,Asp_OMe,Phe_ab-dehydro,Sta_3xi4xi,Tyr_ab-dehydroMe,App,Cap,Cys_SEt,Dsu,pnC,pnG,Pqa,Pro_4Me3OH,Met_O2,Phe_2Me,Phe_34diCl,Phe_4Br,Phe_4I,Phe_4Sdihydroorotamido,Pyl,Ser_PO3H2,Thr_PO3H2,Thz,Trp_Me,Tyr_26diMe,Tyr_3I,Tyr_3NO2,Tyr_Ph4OH,Tyr_SO3H,Val_3OH,xiIle,NMe2Abz,NMebAla,aMePhe,aMePro,aMeTyr_3OH,Bmt,Bmt_E,Cys_Bn,Gla,hHis,His_1Me,Gly_allyl,Gly_cPr,Asp_Ph2NH2,Azi,2Abz,3Abz,4Abz,Ac3c,Ac6c,bAla,D-Bmt,D-Bmt_E,D-hArg,D-Phe_4F,D-Trp_2Me,D-Tyr_Me,D-xiIle,Lys_iPr,Phe_ab-dehydro_3NO2,Sta_3S4S,Bux,Dpm,pnA,pnT,seC,Met_O,nTyr,Oic_3aR-7aS,Oic_3axi-7axi,Phe_2F,Phe_3F,Phe_4F,Phe_4NO2,Phe_bbdiMe,Trp_5OH,Trp_Ome,Tyr_35diI,Tyr_3OH,Tyr_Me,Tyr_PO3H2,xiHyp,xiThr,NMe4Abz,aMeTyr,Aoda,Bpa,Cys_Me,Dip,hArg,His_1Bn,His_3Me,Hyl_5xi,Bip,Abu_23dehydro,D-Dip,Dha,D-hArg_Et2,D-Met_S-O,D-His_1Bn,D-nTyr,D-Phe_4ureido",
|
|
57
58
|
}
|
|
58
59
|
|
|
59
60
|
|
|
@@ -392,7 +393,9 @@ if not grok:
|
|
|
392
393
|
helm_library_file = args.helm_library_file
|
|
393
394
|
helm_connection_mode = args.helm_connection_mode
|
|
394
395
|
|
|
395
|
-
|
|
396
|
+
helm_init = "helm_library_file" in globals() and helm_library_file is not None
|
|
397
|
+
|
|
398
|
+
if not helm_init:
|
|
396
399
|
alphabet: Alphabet = (
|
|
397
400
|
alphabets[alphabet_key].split(",")
|
|
398
401
|
if alphabet_key in alphabets
|
|
@@ -414,7 +417,7 @@ header, data = generate_sequences(
|
|
|
414
417
|
cliff_probability,
|
|
415
418
|
cliff_strength,
|
|
416
419
|
)
|
|
417
|
-
if
|
|
420
|
+
if not helm_init:
|
|
418
421
|
data_formatted = convert_to_fasta(data, fasta_separator)
|
|
419
422
|
else:
|
|
420
423
|
data_formatted = convert_to_helm(data, helm_connection_mode)
|
|
@@ -7,7 +7,7 @@ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
|
7
7
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
8
8
|
|
|
9
9
|
export async function demoBio03UI(): Promise<void> {
|
|
10
|
-
const dataFn: string = 'samples/
|
|
10
|
+
const dataFn: string = 'samples/HELM.csv';
|
|
11
11
|
const seqColName = 'HELM';
|
|
12
12
|
|
|
13
13
|
let df: DG.DataFrame;
|
|
@@ -11,7 +11,7 @@ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
|
11
11
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
12
12
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
13
13
|
|
|
14
|
-
const helmFn: string = 'samples/
|
|
14
|
+
const helmFn: string = 'samples/HELM.csv';
|
|
15
15
|
|
|
16
16
|
export async function demoBio05UI(): Promise<void> {
|
|
17
17
|
let view: DG.TableView;
|
package/src/package.ts
CHANGED
|
@@ -167,7 +167,7 @@ export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
|
|
|
167
167
|
export function packageSettingsEditor(propList: DG.Property[]): DG.Widget {
|
|
168
168
|
const widget = new PackageSettingsEditorWidget(propList);
|
|
169
169
|
widget.init().then(); // Ignore promise returned
|
|
170
|
-
return widget;
|
|
170
|
+
return widget as DG.Widget;
|
|
171
171
|
}
|
|
172
172
|
|
|
173
173
|
// -- Cell renderers --
|
|
@@ -246,7 +246,7 @@ export function webLogoViewer() {
|
|
|
246
246
|
//tags: viewer, panel
|
|
247
247
|
//meta.icon: files/icons/vdregions-viewer.svg
|
|
248
248
|
//output: viewer result
|
|
249
|
-
export function
|
|
249
|
+
export function vdRegionsViewer() {
|
|
250
250
|
return new VdRegionsViewer();
|
|
251
251
|
}
|
|
252
252
|
|
package/src/tests/bio-tests.ts
CHANGED
|
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
5
|
+
import {category, test, expect, expectObject, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {
|
|
7
7
|
getAlphabetSimilarity,
|
|
8
8
|
monomerToShort,
|
|
@@ -83,13 +83,43 @@ category('WebLogo.monomerToShort', () => {
|
|
|
83
83
|
expect(monomerToShort('Short', 5), 'Short');
|
|
84
84
|
});
|
|
85
85
|
test('longMonomerLong56', async () => {
|
|
86
|
-
expect(monomerToShort('Long56',
|
|
86
|
+
expect(monomerToShort('Long56', 6), 'Long56');
|
|
87
87
|
});
|
|
88
88
|
test('longMonomerComplexFirstPartShort', async () => {
|
|
89
89
|
expect(monomerToShort('Long-long', 5), 'Long…');
|
|
90
90
|
});
|
|
91
91
|
test('longMonomerComplexFirstPartLong56', async () => {
|
|
92
|
-
expect(monomerToShort('Long56-long',
|
|
92
|
+
expect(monomerToShort('Long56-long', 6), 'Long5…');
|
|
93
|
+
});
|
|
94
|
+
test('monomerToShort', async () => {
|
|
95
|
+
const pairs = [
|
|
96
|
+
['AbC', 'AbC'],
|
|
97
|
+
['AbCd', 'Ab…'],
|
|
98
|
+
['ABc', 'ABc'],
|
|
99
|
+
['ABcd', 'AB…'],
|
|
100
|
+
['A_b', 'A_b'],
|
|
101
|
+
['A_bc', 'A…'],
|
|
102
|
+
['Ab_c', 'Ab…'],
|
|
103
|
+
['A1_b', 'A1…'],
|
|
104
|
+
['Abc_d', 'Ab…'],
|
|
105
|
+
['Abcd_e', 'Ab…'],
|
|
106
|
+
['A-b', 'A-b'],
|
|
107
|
+
['A-bc', 'A…'],
|
|
108
|
+
['Ab-c', 'Ab…'],
|
|
109
|
+
['A1-b', 'A1…'],
|
|
110
|
+
['Abc-d', 'Ab…'],
|
|
111
|
+
['Abcd-e', 'Ab…'],
|
|
112
|
+
['A', 'A'],
|
|
113
|
+
['Ab', 'Ab'],
|
|
114
|
+
['Abc', 'Abc'],
|
|
115
|
+
['Ab…', 'Ab…'],
|
|
116
|
+
['Abcd', 'Ab…'],
|
|
117
|
+
['Abcde', 'Ab…'],
|
|
118
|
+
];
|
|
119
|
+
const src: string[] = pairs.map((p) => p[0]);
|
|
120
|
+
const tgt: string[] = pairs.map((p) => p[1]);
|
|
121
|
+
const res: string [] = src.map((m) => monomerToShort(m, 3));
|
|
122
|
+
expectArray(res, tgt);
|
|
93
123
|
});
|
|
94
124
|
});
|
|
95
125
|
|
|
@@ -146,9 +146,9 @@ MWRSWY-CKHP`;
|
|
|
146
146
|
const samples: { [key: string]: string } = {
|
|
147
147
|
[Samples.fastaFasta]: 'System:AppData/Bio/data/sample_FASTA.fasta',
|
|
148
148
|
[Samples.fastaPtCsv]: 'System:AppData/Bio/data/sample_FASTA_PT.csv',
|
|
149
|
-
[Samples.msaComplex]: 'System:AppData/Bio/samples/
|
|
150
|
-
[Samples.fastaCsv]: 'System:AppData/Bio/samples/
|
|
151
|
-
[Samples.helmCsv]: 'System:AppData/Bio/samples/
|
|
149
|
+
[Samples.msaComplex]: 'System:AppData/Bio/samples/MSA.csv',
|
|
150
|
+
[Samples.fastaCsv]: 'System:AppData/Bio/samples/FASTA.csv',
|
|
151
|
+
[Samples.helmCsv]: 'System:AppData/Bio/samples/HELM.csv',
|
|
152
152
|
[Samples.peptidesComplex]: 'System:AppData/Bio/tests/peptides_complex_msa.csv',
|
|
153
153
|
[Samples.peptidesSimple]: 'System:AppData/Bio/tests/peptides_simple_msa.csv',
|
|
154
154
|
[Samples.testDemogCsv]: 'System:AppData/Bio/tests/testDemog.csv',
|
|
@@ -26,10 +26,10 @@ category('renderers: monomerPlacer', () => {
|
|
|
26
26
|
{src: {row: 1, x: 5}, tgt: {pos: 0}},
|
|
27
27
|
{src: {row: 1, x: 6}, tgt: {pos: 0}},
|
|
28
28
|
{src: {row: 1, x: 26}, tgt: {pos: 1}},
|
|
29
|
-
{src: {row: 1, x:
|
|
30
|
-
{src: {row: 1, x:
|
|
31
|
-
{src: {row: 2, x:
|
|
32
|
-
{src: {row: 2, x:
|
|
29
|
+
{src: {row: 1, x: 160}, tgt: {pos: 6}},
|
|
30
|
+
{src: {row: 1, x: 185}, tgt: {pos: 7}},
|
|
31
|
+
{src: {row: 2, x: 140}, tgt: {pos: 5}},
|
|
32
|
+
{src: {row: 2, x: 145}, tgt: {pos: 5}},
|
|
33
33
|
]
|
|
34
34
|
},
|
|
35
35
|
splitterMsa: {
|
|
@@ -46,9 +46,9 @@ category('renderers: monomerPlacer', () => {
|
|
|
46
46
|
{src: {row: 1, x: 1}, tgt: {pos: null}},
|
|
47
47
|
{src: {row: 1, x: 26}, tgt: {pos: 0}},
|
|
48
48
|
{src: {row: 1, x: 170}, tgt: {pos: 4}},
|
|
49
|
-
{src: {row: 1, x:
|
|
50
|
-
{src: {row: 2, x:
|
|
51
|
-
{src: {row: 2, x:
|
|
49
|
+
{src: {row: 1, x: 200}, tgt: {pos: 5}},
|
|
50
|
+
{src: {row: 2, x: 200}, tgt: {pos: 5}},
|
|
51
|
+
{src: {row: 2, x: 203}, tgt: {pos: 5}},
|
|
52
52
|
]
|
|
53
53
|
},
|
|
54
54
|
fastaMsa: {
|
|
@@ -52,7 +52,7 @@ category('renderers', () => {
|
|
|
52
52
|
}, {skipReason: 'GROK-11212'});
|
|
53
53
|
|
|
54
54
|
async function _rendererMacromoleculeFasta() {
|
|
55
|
-
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/
|
|
55
|
+
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');
|
|
56
56
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
57
57
|
|
|
58
58
|
const seqCol = df.getCol('Sequence');
|
|
@@ -79,7 +79,7 @@ category('splitters', async () => {
|
|
|
79
79
|
test('testHelm3', async () => { await _testHelmSplitter(data.testHelm3[0], data.testHelm3[1]); });
|
|
80
80
|
|
|
81
81
|
test('splitToMonomers', async () => {
|
|
82
|
-
const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/
|
|
82
|
+
const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/MSA.csv');
|
|
83
83
|
|
|
84
84
|
const seqCol = df.getCol('MSA');
|
|
85
85
|
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
package/scripts/admet-run.py
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python2
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
Created on Mon Feb 19 17:30:46 2018
|
|
5
|
-
|
|
6
|
-
@author: cbdd
|
|
7
|
-
"""
|
|
8
|
-
from sklearn.externals import joblib
|
|
9
|
-
import numpy as np
|
|
10
|
-
import pandas as pd
|
|
11
|
-
import os
|
|
12
|
-
|
|
13
|
-
###################################### Load model ##########
|
|
14
|
-
# current_path = os.path.split(os.path.realpath(__file__))[0]
|
|
15
|
-
cf = joblib.load('CYP3A4-substrate.pkl')
|
|
16
|
-
|
|
17
|
-
###################################### Load descriptors ##########
|
|
18
|
-
fingerprint_content = pd.read_csv('des.csv').ix[:, 1:]
|
|
19
|
-
des_list = np.array(fingerprint_content)
|
|
20
|
-
|
|
21
|
-
###################################### Prediction ##########
|
|
22
|
-
y_predict_label = cf.predict(des_list)
|
|
23
|
-
y_predict_proba = cf.predict_proba(des_list)
|
|
24
|
-
print('#' * 10 + 'Results labels' + '#' * 10)
|
|
25
|
-
print(y_predict_label)
|
|
26
|
-
print('#' * 10 + 'Results probabilities' + '#' * 10)
|
|
27
|
-
print(y_predict_proba)
|