@datagrok/bio 2.8.1 → 2.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -4
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/samples/FASTA.csv +65 -0
- package/files/samples/HELM.csv +541 -0
- package/files/samples/HELM_cyclic.tsv +42 -0
- package/files/samples/MSA.csv +541 -0
- package/files/samples/natural-sequences.csv +5063 -0
- package/files/samples/synthetic-sequences.csv +5044 -0
- package/package.json +3 -3
- package/scripts/sequence_generator.py +6 -3
- package/src/demo/bio03-atomic-level.ts +1 -1
- package/src/demo/bio05-helm-msa-sequence-space.ts +1 -1
- package/src/package.ts +1 -1
- package/src/tests/bio-tests.ts +33 -3
- package/src/tests/detectors-tests.ts +3 -3
- package/src/tests/renderers-monomer-placer.ts +7 -7
- package/src/tests/renderers-test.ts +1 -1
- package/src/tests/splitters-test.ts +1 -1
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.8.
|
|
8
|
+
"version": "2.8.2",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
{
|
|
17
17
|
"name": "MaxMonomerLength",
|
|
18
18
|
"propertyType": "int",
|
|
19
|
-
"defaultValue":
|
|
19
|
+
"defaultValue": 4,
|
|
20
20
|
"nullable": false
|
|
21
21
|
},
|
|
22
22
|
{
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.34.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.34.1",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
39
39
|
"@datagrok-libraries/ml": "^6.3.39",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# input: bool disable_cliffs = False [Disable generation of cliffs]
|
|
14
14
|
# input: double cliff_probability = 0.01 [Probability to make activity cliff of a sequence]
|
|
15
15
|
# input: double cliff_strength = 4.0 [Strength of cliff]
|
|
16
|
-
# input:
|
|
16
|
+
# input: string fasta_separator = '' {nullable: true}
|
|
17
17
|
# output: dataframe sequences
|
|
18
18
|
|
|
19
19
|
"""
|
|
@@ -54,6 +54,7 @@ alphabets: Dict[str, str] = {
|
|
|
54
54
|
"PT": "A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y",
|
|
55
55
|
"DNA": "A,T,G,C",
|
|
56
56
|
"RNA": "A,U,G,C",
|
|
57
|
+
"PT_HELM": "A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,dA,dC,dD,dE,dF,dH,dI,dK,dL,dM,dN,dP,dQ,dR,dS,dT,dV,dW,dY,meA,meD,meS,meT,meV,meY,meE,meG,meI,meK,meM,meN,meQ,meC,meR,meW,meF,meH,meL,Nle,Nva,Orn,Iva,aIle,gGlu,Hcy,Hse,Hyp,D-gGlu,D-Nle,D-hPhe,D-Hyp,D-Nva,D-Orn,Pyr,Phe_3Cl,Phe_4Cl,Phe_4NH2,Phg,Ser_tBu,Tyr_Bn,Tza,1Nal,Cha,Lys_Boc,aThr,D-2Nal,D-2Thi,D-aHyp,D-aIle,D-Phg,D-Ser_tBu,Cya,Lys_Me3,Pen,Phe_4Me,Ser_Bn,Tyr_tBu,2Nal,Thi,aHyp,Ala_tBu,hPhe,D-1Nal,D-aThr,D-Cha,D-Pen,D-Phe_4Cl,D-Ser_Bn,Wil,Oic_3aS-7aS,Pip,3Pal,4Pal,Abu,Apm,Chg,Dab,Dap,D-3Pal,D-aMeAbu,D-Chg,D-Cit,D-Dab,D-Pip,D-Tic,Aca,Tic,Aad,Cit,Aze,Ac5c,Aib,D-2Pal,D-Abu,D-Dap,Asu,D-Thz,D-Trp_For,D-Tyr_Et,Lys_Ac,Asp_OMe,Phe_ab-dehydro,Sta_3xi4xi,Tyr_ab-dehydroMe,App,Cap,Cys_SEt,Dsu,pnC,pnG,Pqa,Pro_4Me3OH,Met_O2,Phe_2Me,Phe_34diCl,Phe_4Br,Phe_4I,Phe_4Sdihydroorotamido,Pyl,Ser_PO3H2,Thr_PO3H2,Thz,Trp_Me,Tyr_26diMe,Tyr_3I,Tyr_3NO2,Tyr_Ph4OH,Tyr_SO3H,Val_3OH,xiIle,NMe2Abz,NMebAla,aMePhe,aMePro,aMeTyr_3OH,Bmt,Bmt_E,Cys_Bn,Gla,hHis,His_1Me,Gly_allyl,Gly_cPr,Asp_Ph2NH2,Azi,2Abz,3Abz,4Abz,Ac3c,Ac6c,bAla,D-Bmt,D-Bmt_E,D-hArg,D-Phe_4F,D-Trp_2Me,D-Tyr_Me,D-xiIle,Lys_iPr,Phe_ab-dehydro_3NO2,Sta_3S4S,Bux,Dpm,pnA,pnT,seC,Met_O,nTyr,Oic_3aR-7aS,Oic_3axi-7axi,Phe_2F,Phe_3F,Phe_4F,Phe_4NO2,Phe_bbdiMe,Trp_5OH,Trp_Ome,Tyr_35diI,Tyr_3OH,Tyr_Me,Tyr_PO3H2,xiHyp,xiThr,NMe4Abz,aMeTyr,Aoda,Bpa,Cys_Me,Dip,hArg,His_1Bn,His_3Me,Hyl_5xi,Bip,Abu_23dehydro,D-Dip,Dha,D-hArg_Et2,D-Met_S-O,D-His_1Bn,D-nTyr,D-Phe_4ureido",
|
|
57
58
|
}
|
|
58
59
|
|
|
59
60
|
|
|
@@ -392,7 +393,9 @@ if not grok:
|
|
|
392
393
|
helm_library_file = args.helm_library_file
|
|
393
394
|
helm_connection_mode = args.helm_connection_mode
|
|
394
395
|
|
|
395
|
-
|
|
396
|
+
helm_init = "helm_library_file" in globals() and helm_library_file is not None
|
|
397
|
+
|
|
398
|
+
if not helm_init:
|
|
396
399
|
alphabet: Alphabet = (
|
|
397
400
|
alphabets[alphabet_key].split(",")
|
|
398
401
|
if alphabet_key in alphabets
|
|
@@ -414,7 +417,7 @@ header, data = generate_sequences(
|
|
|
414
417
|
cliff_probability,
|
|
415
418
|
cliff_strength,
|
|
416
419
|
)
|
|
417
|
-
if
|
|
420
|
+
if not helm_init:
|
|
418
421
|
data_formatted = convert_to_fasta(data, fasta_separator)
|
|
419
422
|
else:
|
|
420
423
|
data_formatted = convert_to_helm(data, helm_connection_mode)
|
|
@@ -7,7 +7,7 @@ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
|
7
7
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
8
8
|
|
|
9
9
|
export async function demoBio03UI(): Promise<void> {
|
|
10
|
-
const dataFn: string = 'samples/
|
|
10
|
+
const dataFn: string = 'samples/HELM.csv';
|
|
11
11
|
const seqColName = 'HELM';
|
|
12
12
|
|
|
13
13
|
let df: DG.DataFrame;
|
|
@@ -11,7 +11,7 @@ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
|
11
11
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
12
12
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
13
13
|
|
|
14
|
-
const helmFn: string = 'samples/
|
|
14
|
+
const helmFn: string = 'samples/HELM.csv';
|
|
15
15
|
|
|
16
16
|
export async function demoBio05UI(): Promise<void> {
|
|
17
17
|
let view: DG.TableView;
|
package/src/package.ts
CHANGED
|
@@ -167,7 +167,7 @@ export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
|
|
|
167
167
|
export function packageSettingsEditor(propList: DG.Property[]): DG.Widget {
|
|
168
168
|
const widget = new PackageSettingsEditorWidget(propList);
|
|
169
169
|
widget.init().then(); // Ignore promise returned
|
|
170
|
-
return widget;
|
|
170
|
+
return widget as DG.Widget;
|
|
171
171
|
}
|
|
172
172
|
|
|
173
173
|
// -- Cell renderers --
|
package/src/tests/bio-tests.ts
CHANGED
|
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
5
|
+
import {category, test, expect, expectObject, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {
|
|
7
7
|
getAlphabetSimilarity,
|
|
8
8
|
monomerToShort,
|
|
@@ -83,13 +83,43 @@ category('WebLogo.monomerToShort', () => {
|
|
|
83
83
|
expect(monomerToShort('Short', 5), 'Short');
|
|
84
84
|
});
|
|
85
85
|
test('longMonomerLong56', async () => {
|
|
86
|
-
expect(monomerToShort('Long56',
|
|
86
|
+
expect(monomerToShort('Long56', 6), 'Long56');
|
|
87
87
|
});
|
|
88
88
|
test('longMonomerComplexFirstPartShort', async () => {
|
|
89
89
|
expect(monomerToShort('Long-long', 5), 'Long…');
|
|
90
90
|
});
|
|
91
91
|
test('longMonomerComplexFirstPartLong56', async () => {
|
|
92
|
-
expect(monomerToShort('Long56-long',
|
|
92
|
+
expect(monomerToShort('Long56-long', 6), 'Long5…');
|
|
93
|
+
});
|
|
94
|
+
test('monomerToShort', async () => {
|
|
95
|
+
const pairs = [
|
|
96
|
+
['AbC', 'AbC'],
|
|
97
|
+
['AbCd', 'Ab…'],
|
|
98
|
+
['ABc', 'ABc'],
|
|
99
|
+
['ABcd', 'AB…'],
|
|
100
|
+
['A_b', 'A_b'],
|
|
101
|
+
['A_bc', 'A…'],
|
|
102
|
+
['Ab_c', 'Ab…'],
|
|
103
|
+
['A1_b', 'A1…'],
|
|
104
|
+
['Abc_d', 'Ab…'],
|
|
105
|
+
['Abcd_e', 'Ab…'],
|
|
106
|
+
['A-b', 'A-b'],
|
|
107
|
+
['A-bc', 'A…'],
|
|
108
|
+
['Ab-c', 'Ab…'],
|
|
109
|
+
['A1-b', 'A1…'],
|
|
110
|
+
['Abc-d', 'Ab…'],
|
|
111
|
+
['Abcd-e', 'Ab…'],
|
|
112
|
+
['A', 'A'],
|
|
113
|
+
['Ab', 'Ab'],
|
|
114
|
+
['Abc', 'Abc'],
|
|
115
|
+
['Ab…', 'Ab…'],
|
|
116
|
+
['Abcd', 'Ab…'],
|
|
117
|
+
['Abcde', 'Ab…'],
|
|
118
|
+
];
|
|
119
|
+
const src: string[] = pairs.map((p) => p[0]);
|
|
120
|
+
const tgt: string[] = pairs.map((p) => p[1]);
|
|
121
|
+
const res: string [] = src.map((m) => monomerToShort(m, 3));
|
|
122
|
+
expectArray(res, tgt);
|
|
93
123
|
});
|
|
94
124
|
});
|
|
95
125
|
|
|
@@ -146,9 +146,9 @@ MWRSWY-CKHP`;
|
|
|
146
146
|
const samples: { [key: string]: string } = {
|
|
147
147
|
[Samples.fastaFasta]: 'System:AppData/Bio/data/sample_FASTA.fasta',
|
|
148
148
|
[Samples.fastaPtCsv]: 'System:AppData/Bio/data/sample_FASTA_PT.csv',
|
|
149
|
-
[Samples.msaComplex]: 'System:AppData/Bio/samples/
|
|
150
|
-
[Samples.fastaCsv]: 'System:AppData/Bio/samples/
|
|
151
|
-
[Samples.helmCsv]: 'System:AppData/Bio/samples/
|
|
149
|
+
[Samples.msaComplex]: 'System:AppData/Bio/samples/MSA.csv',
|
|
150
|
+
[Samples.fastaCsv]: 'System:AppData/Bio/samples/FASTA.csv',
|
|
151
|
+
[Samples.helmCsv]: 'System:AppData/Bio/samples/HELM.csv',
|
|
152
152
|
[Samples.peptidesComplex]: 'System:AppData/Bio/tests/peptides_complex_msa.csv',
|
|
153
153
|
[Samples.peptidesSimple]: 'System:AppData/Bio/tests/peptides_simple_msa.csv',
|
|
154
154
|
[Samples.testDemogCsv]: 'System:AppData/Bio/tests/testDemog.csv',
|
|
@@ -26,10 +26,10 @@ category('renderers: monomerPlacer', () => {
|
|
|
26
26
|
{src: {row: 1, x: 5}, tgt: {pos: 0}},
|
|
27
27
|
{src: {row: 1, x: 6}, tgt: {pos: 0}},
|
|
28
28
|
{src: {row: 1, x: 26}, tgt: {pos: 1}},
|
|
29
|
-
{src: {row: 1, x:
|
|
30
|
-
{src: {row: 1, x:
|
|
31
|
-
{src: {row: 2, x:
|
|
32
|
-
{src: {row: 2, x:
|
|
29
|
+
{src: {row: 1, x: 160}, tgt: {pos: 6}},
|
|
30
|
+
{src: {row: 1, x: 185}, tgt: {pos: 7}},
|
|
31
|
+
{src: {row: 2, x: 140}, tgt: {pos: 5}},
|
|
32
|
+
{src: {row: 2, x: 145}, tgt: {pos: 5}},
|
|
33
33
|
]
|
|
34
34
|
},
|
|
35
35
|
splitterMsa: {
|
|
@@ -46,9 +46,9 @@ category('renderers: monomerPlacer', () => {
|
|
|
46
46
|
{src: {row: 1, x: 1}, tgt: {pos: null}},
|
|
47
47
|
{src: {row: 1, x: 26}, tgt: {pos: 0}},
|
|
48
48
|
{src: {row: 1, x: 170}, tgt: {pos: 4}},
|
|
49
|
-
{src: {row: 1, x:
|
|
50
|
-
{src: {row: 2, x:
|
|
51
|
-
{src: {row: 2, x:
|
|
49
|
+
{src: {row: 1, x: 200}, tgt: {pos: 5}},
|
|
50
|
+
{src: {row: 2, x: 200}, tgt: {pos: 5}},
|
|
51
|
+
{src: {row: 2, x: 203}, tgt: {pos: 5}},
|
|
52
52
|
]
|
|
53
53
|
},
|
|
54
54
|
fastaMsa: {
|
|
@@ -52,7 +52,7 @@ category('renderers', () => {
|
|
|
52
52
|
}, {skipReason: 'GROK-11212'});
|
|
53
53
|
|
|
54
54
|
async function _rendererMacromoleculeFasta() {
|
|
55
|
-
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/
|
|
55
|
+
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');
|
|
56
56
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
57
57
|
|
|
58
58
|
const seqCol = df.getCol('Sequence');
|
|
@@ -79,7 +79,7 @@ category('splitters', async () => {
|
|
|
79
79
|
test('testHelm3', async () => { await _testHelmSplitter(data.testHelm3[0], data.testHelm3[1]); });
|
|
80
80
|
|
|
81
81
|
test('splitToMonomers', async () => {
|
|
82
|
-
const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/
|
|
82
|
+
const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/MSA.csv');
|
|
83
83
|
|
|
84
84
|
const seqCol = df.getCol('MSA');
|
|
85
85
|
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|