@datagrok/bio 2.8.1 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.8.1",
8
+ "version": "2.8.3",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -16,7 +16,7 @@
16
16
  {
17
17
  "name": "MaxMonomerLength",
18
18
  "propertyType": "int",
19
- "defaultValue": 3,
19
+ "defaultValue": 4,
20
20
  "nullable": false
21
21
  },
22
22
  {
@@ -34,7 +34,7 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.34.0",
37
+ "@datagrok-libraries/bio": "^5.34.1",
38
38
  "@datagrok-libraries/chem-meta": "^1.0.1",
39
39
  "@datagrok-libraries/ml": "^6.3.39",
40
40
  "@datagrok-libraries/tutorials": "^1.3.6",
@@ -13,7 +13,7 @@
13
13
  # input: bool disable_cliffs = False [Disable generation of cliffs]
14
14
  # input: double cliff_probability = 0.01 [Probability to make activity cliff of a sequence]
15
15
  # input: double cliff_strength = 4.0 [Strength of cliff]
16
- # input: double fasta_separator = '' [Separator for a FASTA notation]
16
+ # input: string fasta_separator = '' {nullable: true}
17
17
  # output: dataframe sequences
18
18
 
19
19
  """
@@ -54,6 +54,7 @@ alphabets: Dict[str, str] = {
54
54
  "PT": "A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y",
55
55
  "DNA": "A,T,G,C",
56
56
  "RNA": "A,U,G,C",
57
+ "PT_HELM": "A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,dA,dC,dD,dE,dF,dH,dI,dK,dL,dM,dN,dP,dQ,dR,dS,dT,dV,dW,dY,meA,meD,meS,meT,meV,meY,meE,meG,meI,meK,meM,meN,meQ,meC,meR,meW,meF,meH,meL,Nle,Nva,Orn,Iva,aIle,gGlu,Hcy,Hse,Hyp,D-gGlu,D-Nle,D-hPhe,D-Hyp,D-Nva,D-Orn,Pyr,Phe_3Cl,Phe_4Cl,Phe_4NH2,Phg,Ser_tBu,Tyr_Bn,Tza,1Nal,Cha,Lys_Boc,aThr,D-2Nal,D-2Thi,D-aHyp,D-aIle,D-Phg,D-Ser_tBu,Cya,Lys_Me3,Pen,Phe_4Me,Ser_Bn,Tyr_tBu,2Nal,Thi,aHyp,Ala_tBu,hPhe,D-1Nal,D-aThr,D-Cha,D-Pen,D-Phe_4Cl,D-Ser_Bn,Wil,Oic_3aS-7aS,Pip,3Pal,4Pal,Abu,Apm,Chg,Dab,Dap,D-3Pal,D-aMeAbu,D-Chg,D-Cit,D-Dab,D-Pip,D-Tic,Aca,Tic,Aad,Cit,Aze,Ac5c,Aib,D-2Pal,D-Abu,D-Dap,Asu,D-Thz,D-Trp_For,D-Tyr_Et,Lys_Ac,Asp_OMe,Phe_ab-dehydro,Sta_3xi4xi,Tyr_ab-dehydroMe,App,Cap,Cys_SEt,Dsu,pnC,pnG,Pqa,Pro_4Me3OH,Met_O2,Phe_2Me,Phe_34diCl,Phe_4Br,Phe_4I,Phe_4Sdihydroorotamido,Pyl,Ser_PO3H2,Thr_PO3H2,Thz,Trp_Me,Tyr_26diMe,Tyr_3I,Tyr_3NO2,Tyr_Ph4OH,Tyr_SO3H,Val_3OH,xiIle,NMe2Abz,NMebAla,aMePhe,aMePro,aMeTyr_3OH,Bmt,Bmt_E,Cys_Bn,Gla,hHis,His_1Me,Gly_allyl,Gly_cPr,Asp_Ph2NH2,Azi,2Abz,3Abz,4Abz,Ac3c,Ac6c,bAla,D-Bmt,D-Bmt_E,D-hArg,D-Phe_4F,D-Trp_2Me,D-Tyr_Me,D-xiIle,Lys_iPr,Phe_ab-dehydro_3NO2,Sta_3S4S,Bux,Dpm,pnA,pnT,seC,Met_O,nTyr,Oic_3aR-7aS,Oic_3axi-7axi,Phe_2F,Phe_3F,Phe_4F,Phe_4NO2,Phe_bbdiMe,Trp_5OH,Trp_Ome,Tyr_35diI,Tyr_3OH,Tyr_Me,Tyr_PO3H2,xiHyp,xiThr,NMe4Abz,aMeTyr,Aoda,Bpa,Cys_Me,Dip,hArg,His_1Bn,His_3Me,Hyl_5xi,Bip,Abu_23dehydro,D-Dip,Dha,D-hArg_Et2,D-Met_S-O,D-His_1Bn,D-nTyr,D-Phe_4ureido",
57
58
  }
58
59
 
59
60
 
@@ -392,7 +393,9 @@ if not grok:
392
393
  helm_library_file = args.helm_library_file
393
394
  helm_connection_mode = args.helm_connection_mode
394
395
 
395
- if helm_library_file is None:
396
+ helm_init = "helm_library_file" in globals() and helm_library_file is not None
397
+
398
+ if not helm_init:
396
399
  alphabet: Alphabet = (
397
400
  alphabets[alphabet_key].split(",")
398
401
  if alphabet_key in alphabets
@@ -414,7 +417,7 @@ header, data = generate_sequences(
414
417
  cliff_probability,
415
418
  cliff_strength,
416
419
  )
417
- if helm_library_file is None:
420
+ if not helm_init:
418
421
  data_formatted = convert_to_fasta(data, fasta_separator)
419
422
  else:
420
423
  data_formatted = convert_to_helm(data, helm_connection_mode)
@@ -7,7 +7,7 @@ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
7
7
  import {delay} from '@datagrok-libraries/utils/src/test';
8
8
 
9
9
  export async function demoBio03UI(): Promise<void> {
10
- const dataFn: string = 'samples/sample_HELM.csv';
10
+ const dataFn: string = 'samples/HELM.csv';
11
11
  const seqColName = 'HELM';
12
12
 
13
13
  let df: DG.DataFrame;
@@ -11,7 +11,7 @@ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
11
11
  import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
12
12
  import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
13
13
 
14
- const helmFn: string = 'samples/sample_HELM.csv';
14
+ const helmFn: string = 'samples/HELM.csv';
15
15
 
16
16
  export async function demoBio05UI(): Promise<void> {
17
17
  let view: DG.TableView;
package/src/package.ts CHANGED
@@ -167,7 +167,7 @@ export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
167
167
  export function packageSettingsEditor(propList: DG.Property[]): DG.Widget {
168
168
  const widget = new PackageSettingsEditorWidget(propList);
169
169
  widget.init().then(); // Ignore promise returned
170
- return widget;
170
+ return widget as DG.Widget;
171
171
  }
172
172
 
173
173
  // -- Cell renderers --
@@ -246,7 +246,7 @@ export function webLogoViewer() {
246
246
  //tags: viewer, panel
247
247
  //meta.icon: files/icons/vdregions-viewer.svg
248
248
  //output: viewer result
249
- export function vdRegionViewer() {
249
+ export function vdRegionsViewer() {
250
250
  return new VdRegionsViewer();
251
251
  }
252
252
 
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
5
+ import {category, test, expect, expectObject, expectArray} from '@datagrok-libraries/utils/src/test';
6
6
  import {
7
7
  getAlphabetSimilarity,
8
8
  monomerToShort,
@@ -83,13 +83,43 @@ category('WebLogo.monomerToShort', () => {
83
83
  expect(monomerToShort('Short', 5), 'Short');
84
84
  });
85
85
  test('longMonomerLong56', async () => {
86
- expect(monomerToShort('Long56', 5), 'Long5…');
86
+ expect(monomerToShort('Long56', 6), 'Long56');
87
87
  });
88
88
  test('longMonomerComplexFirstPartShort', async () => {
89
89
  expect(monomerToShort('Long-long', 5), 'Long…');
90
90
  });
91
91
  test('longMonomerComplexFirstPartLong56', async () => {
92
- expect(monomerToShort('Long56-long', 5), 'Long5…');
92
+ expect(monomerToShort('Long56-long', 6), 'Long5…');
93
+ });
94
+ test('monomerToShort', async () => {
95
+ const pairs = [
96
+ ['AbC', 'AbC'],
97
+ ['AbCd', 'Ab…'],
98
+ ['ABc', 'ABc'],
99
+ ['ABcd', 'AB…'],
100
+ ['A_b', 'A_b'],
101
+ ['A_bc', 'A…'],
102
+ ['Ab_c', 'Ab…'],
103
+ ['A1_b', 'A1…'],
104
+ ['Abc_d', 'Ab…'],
105
+ ['Abcd_e', 'Ab…'],
106
+ ['A-b', 'A-b'],
107
+ ['A-bc', 'A…'],
108
+ ['Ab-c', 'Ab…'],
109
+ ['A1-b', 'A1…'],
110
+ ['Abc-d', 'Ab…'],
111
+ ['Abcd-e', 'Ab…'],
112
+ ['A', 'A'],
113
+ ['Ab', 'Ab'],
114
+ ['Abc', 'Abc'],
115
+ ['Ab…', 'Ab…'],
116
+ ['Abcd', 'Ab…'],
117
+ ['Abcde', 'Ab…'],
118
+ ];
119
+ const src: string[] = pairs.map((p) => p[0]);
120
+ const tgt: string[] = pairs.map((p) => p[1]);
121
+ const res: string [] = src.map((m) => monomerToShort(m, 3));
122
+ expectArray(res, tgt);
93
123
  });
94
124
  });
95
125
 
@@ -146,9 +146,9 @@ MWRSWY-CKHP`;
146
146
  const samples: { [key: string]: string } = {
147
147
  [Samples.fastaFasta]: 'System:AppData/Bio/data/sample_FASTA.fasta',
148
148
  [Samples.fastaPtCsv]: 'System:AppData/Bio/data/sample_FASTA_PT.csv',
149
- [Samples.msaComplex]: 'System:AppData/Bio/samples/sample_MSA.csv',
150
- [Samples.fastaCsv]: 'System:AppData/Bio/samples/sample_FASTA.csv',
151
- [Samples.helmCsv]: 'System:AppData/Bio/samples/sample_HELM.csv',
149
+ [Samples.msaComplex]: 'System:AppData/Bio/samples/MSA.csv',
150
+ [Samples.fastaCsv]: 'System:AppData/Bio/samples/FASTA.csv',
151
+ [Samples.helmCsv]: 'System:AppData/Bio/samples/HELM.csv',
152
152
  [Samples.peptidesComplex]: 'System:AppData/Bio/tests/peptides_complex_msa.csv',
153
153
  [Samples.peptidesSimple]: 'System:AppData/Bio/tests/peptides_simple_msa.csv',
154
154
  [Samples.testDemogCsv]: 'System:AppData/Bio/tests/testDemog.csv',
@@ -26,10 +26,10 @@ category('renderers: monomerPlacer', () => {
26
26
  {src: {row: 1, x: 5}, tgt: {pos: 0}},
27
27
  {src: {row: 1, x: 6}, tgt: {pos: 0}},
28
28
  {src: {row: 1, x: 26}, tgt: {pos: 1}},
29
- {src: {row: 1, x: 170}, tgt: {pos: 6}},
30
- {src: {row: 1, x: 208}, tgt: {pos: 7}},
31
- {src: {row: 2, x: 170}, tgt: {pos: 5}},
32
- {src: {row: 2, x: 175}, tgt: {pos: 5}},
29
+ {src: {row: 1, x: 160}, tgt: {pos: 6}},
30
+ {src: {row: 1, x: 185}, tgt: {pos: 7}},
31
+ {src: {row: 2, x: 140}, tgt: {pos: 5}},
32
+ {src: {row: 2, x: 145}, tgt: {pos: 5}},
33
33
  ]
34
34
  },
35
35
  splitterMsa: {
@@ -46,9 +46,9 @@ category('renderers: monomerPlacer', () => {
46
46
  {src: {row: 1, x: 1}, tgt: {pos: null}},
47
47
  {src: {row: 1, x: 26}, tgt: {pos: 0}},
48
48
  {src: {row: 1, x: 170}, tgt: {pos: 4}},
49
- {src: {row: 1, x: 227}, tgt: {pos: 5}},
50
- {src: {row: 2, x: 220}, tgt: {pos: 5}},
51
- {src: {row: 2, x: 227}, tgt: {pos: 5}},
49
+ {src: {row: 1, x: 200}, tgt: {pos: 5}},
50
+ {src: {row: 2, x: 200}, tgt: {pos: 5}},
51
+ {src: {row: 2, x: 203}, tgt: {pos: 5}},
52
52
  ]
53
53
  },
54
54
  fastaMsa: {
@@ -52,7 +52,7 @@ category('renderers', () => {
52
52
  }, {skipReason: 'GROK-11212'});
53
53
 
54
54
  async function _rendererMacromoleculeFasta() {
55
- const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.csv');
55
+ const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');
56
56
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
57
57
 
58
58
  const seqCol = df.getCol('Sequence');
@@ -79,7 +79,7 @@ category('splitters', async () => {
79
79
  test('testHelm3', async () => { await _testHelmSplitter(data.testHelm3[0], data.testHelm3[1]); });
80
80
 
81
81
  test('splitToMonomers', async () => {
82
- const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/sample_MSA.csv');
82
+ const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/MSA.csv');
83
83
 
84
84
  const seqCol = df.getCol('MSA');
85
85
  const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
@@ -1,7 +0,0 @@
1
- numpy==1.16.6
2
- pandas==0.24.2
3
- python-dateutil==2.8.2
4
- pytz==2022.5
5
- scikit-learn==0.17.1
6
- scipy==1.2.3
7
- six==1.16.0
@@ -1,27 +0,0 @@
1
- #!/usr/bin/env python2
2
- # -*- coding: utf-8 -*-
3
- """
4
- Created on Mon Feb 19 17:30:46 2018
5
-
6
- @author: cbdd
7
- """
8
- from sklearn.externals import joblib
9
- import numpy as np
10
- import pandas as pd
11
- import os
12
-
13
- ###################################### Load model ##########
14
- # current_path = os.path.split(os.path.realpath(__file__))[0]
15
- cf = joblib.load('CYP3A4-substrate.pkl')
16
-
17
- ###################################### Load descriptors ##########
18
- fingerprint_content = pd.read_csv('des.csv').ix[:, 1:]
19
- des_list = np.array(fingerprint_content)
20
-
21
- ###################################### Prediction ##########
22
- y_predict_label = cf.predict(des_list)
23
- y_predict_proba = cf.predict_proba(des_list)
24
- print('#' * 10 + 'Results labels' + '#' * 10)
25
- print(y_predict_label)
26
- print('#' * 10 + 'Results probabilities' + '#' * 10)
27
- print(y_predict_proba)