@datagrok/bio 2.4.18 → 2.4.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.18",
8
+ "version": "2.4.19",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -14,10 +14,10 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@biowasm/aioli": "^3.1.0",
17
- "@datagrok-libraries/bio": "^5.29.0",
17
+ "@datagrok-libraries/bio": "^5.29.3",
18
18
  "@datagrok-libraries/chem-meta": "^1.0.1",
19
- "@datagrok-libraries/ml": "^6.3.16",
20
- "@datagrok-libraries/tutorials": "^1.2.1",
19
+ "@datagrok-libraries/ml": "^6.3.23",
20
+ "@datagrok-libraries/tutorials": "^1.3.1",
21
21
  "@datagrok-libraries/utils": "^2.1.3",
22
22
  "cash-dom": "^8.0.0",
23
23
  "css-loader": "^6.7.3",
@@ -16,7 +16,9 @@ export async function demoBio01UI() {
16
16
  let df: DG.DataFrame;
17
17
 
18
18
  try {
19
- const demoScript = new DemoScript('Demo', 'Sequence similarity / diversity search');
19
+ const demoScript = new DemoScript(
20
+ 'Similarity, Diversity',
21
+ 'Sequence similarity tracking and evaluation dataset diversity');
20
22
  await demoScript
21
23
  .step(`Load DNA sequences`, async () => {
22
24
  grok.shell.windows.showContextPanel = false;
@@ -30,7 +32,7 @@ export async function demoBio01UI() {
30
32
  // TODO: Fix column width
31
33
  }, {
32
34
  description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
33
- delay: 1200
35
+ delay: 2000
34
36
  })
35
37
  .step('Find the most similar sequences to the current one', async () => {
36
38
  const simViewer = await df.plot.fromType('Sequence Similarity Search', {
@@ -40,7 +42,7 @@ export async function demoBio01UI() {
40
42
  view.dockManager.dock(simViewer, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.35);
41
43
  }, {
42
44
  description: `Add 'Sequence Similarity Search' viewer.`,
43
- delay: 1600
45
+ delay: 2000
44
46
  })
45
47
  .step('Explore most diverse sequences in a dataset', async () => {
46
48
  const divViewer = await df.plot.fromType('Sequence Diversity Search', {
@@ -50,19 +52,19 @@ export async function demoBio01UI() {
50
52
  view.dockManager.dock(divViewer, DG.DOCK_TYPE.DOWN, null, 'Diversity search', 0.27);
51
53
  }, {
52
54
  description: `Add 'Sequence Deversity Search' viewer.`,
53
- delay: 1600
55
+ delay: 2000
54
56
  })
55
57
  .step('Choose another sequence for similarity search', async () => {
56
58
  df.currentRowIdx = 3;
57
59
  }, {
58
60
  description: 'Handling current row changed of data frame showing update of similar sequences.',
59
- delay: 1600,
61
+ delay: 2000,
60
62
  })
61
63
  .step('One more sequence for similarity search', async () => {
62
64
  df.currentRowIdx = 7;
63
65
  }, {
64
66
  description: 'Just one more sequence to search similar ones.',
65
- delay: 1600,
67
+ delay: 2000,
66
68
  })
67
69
  .start();
68
70
  } catch (err: any) {
@@ -26,7 +26,9 @@ export async function demoBio01aUI() {
26
26
  const embedCols: { [colName: string]: DG.Column<number> } = {};
27
27
 
28
28
  try {
29
- const demoScript = new DemoScript('Demo', 'Exploring sequence space');
29
+ const demoScript = new DemoScript(
30
+ 'Demo',
31
+ 'Exploring sequence space of Macromolecules, comparison with hierarchical clustering results');
30
32
  await demoScript
31
33
  .step(`Load DNA sequences`, async () => {
32
34
  [df, treeHelper, dendrogramSvc] = await Promise.all([
@@ -41,13 +43,13 @@ export async function demoBio01aUI() {
41
43
  grok.shell.windows.showProperties = false;
42
44
  }, {
43
45
  description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
44
- delay: 1600,
46
+ delay: 2000,
45
47
  })
46
48
  .step('Build sequence space', async () => {
47
49
  spViewer = await demoSequenceSpace(view, df, seqColName, method);
48
50
  }, {
49
51
  description: `Reduce sequence space dimensionality to display on 2D representation.`,
50
- delay: 1600
52
+ delay: 2000
51
53
  })
52
54
  .step('Cluster sequences', async () => {
53
55
  const seqCol: DG.Column<string> = df.getCol(seqColName);
@@ -60,20 +62,20 @@ export async function demoBio01aUI() {
60
62
  dendrogramSvc.injectTreeForGrid(view.grid, treeRoot, undefined, 150, undefined);
61
63
  }, {
62
64
  description: `Perform hierarchical clustering to reveal relationships between sequences.`,
63
- delay: 1600,
65
+ delay: 2000,
64
66
  })
65
67
  .step('Select a sequence', async () => {
66
68
  df.selection.init((idx: number) => [15].includes(idx));
67
69
  }, {
68
70
  description: `Handling selection of data frame row reflecting on linked viewers.`,
69
- delay: 1600,
71
+ delay: 2000,
70
72
  })
71
73
  .step('Select a bunch of sequences', async () => {
72
74
  df.selection.init((idx: number) => [21, 9, 58].includes(idx));
73
75
  df.currentRowIdx = 27;
74
76
  }, {
75
77
  description: 'Selecting a group of rows from a data frame to show their similarity and proximity to each other on a viewer..',
76
- delay: 1600,
78
+ delay: 2000,
77
79
  })
78
80
  .start();
79
81
  } catch (err: any) {
@@ -13,20 +13,23 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
13
13
  import {handleError} from './utils';
14
14
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
15
15
 
16
- const dataFn = 'samples/sample_FASTA.csv';
16
+ const dataFn: string = 'samples/sample_FASTA.csv';
17
17
 
18
18
  export async function demoBio01bUI() {
19
19
  let treeHelper: ITreeHelper;
20
20
  let dendrogramSvc: IDendrogramService;
21
- let view: DG.TableView;
21
+
22
22
  let df: DG.DataFrame;
23
+ let view: DG.TableView;
23
24
  let activityCliffsViewer: DG.ScatterPlotViewer;
24
25
 
25
26
  const method: string = 'UMAP';
26
27
  const idRows: { [id: number]: number } = {};
27
28
 
28
29
  try {
29
- const demoScript = new DemoScript('Demo', '');
30
+ const demoScript = new DemoScript(
31
+ 'Activity Cliffs',
32
+ 'Activity Cliffs analysis on Macromolecules data');
30
33
  await demoScript
31
34
  .step(`Load DNA sequences`, async () => {
32
35
  grok.shell.windows.showContextPanel = false;
@@ -46,7 +49,7 @@ export async function demoBio01bUI() {
46
49
  lengthGCol.width = 0;
47
50
  }, {
48
51
  description: 'Load dataset with macromolecules of \'fasta\' notation, \'DNA\' alphabet.',
49
- delay: 1600,
52
+ delay: 2000,
50
53
  })
51
54
  .step('Find activity cliffs', async () => {
52
55
  activityCliffsViewer = (await activityCliffs(
@@ -60,7 +63,7 @@ export async function demoBio01bUI() {
60
63
  cliffsLink.click();
61
64
  }, {
62
65
  description: 'Reveal similar sequences with a cliff of activity.',
63
- delay: 1600
66
+ delay: 2000
64
67
  })
65
68
  .step('Cluster sequences', async () => {
66
69
  const seqCol: DG.Column<string> = df.getCol('sequence');
@@ -77,7 +80,7 @@ export async function demoBio01bUI() {
77
80
  activityGCol.scrollIntoView();
78
81
  }, {
79
82
  description: 'Perform hierarchical clustering to reveal relationships between sequences.',
80
- delay: 1600
83
+ delay: 2000
81
84
  })
82
85
  .step('Browse the cliff', async () => {
83
86
  //cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
@@ -97,7 +100,7 @@ export async function demoBio01bUI() {
97
100
  // }
98
101
  }, {
99
102
  description: 'Zoom in to explore selected activity cliff details.',
100
- delay: 1600
103
+ delay: 2000
101
104
  })
102
105
  .start();
103
106
  } catch (err: any) {
@@ -7,12 +7,14 @@ import {_package, toAtomicLevel} from '../package';
7
7
  import $ from 'cash-dom';
8
8
  import {handleError} from './utils';
9
9
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
10
+ import {delay} from '@datagrok-libraries/utils/src/test';
10
11
 
11
- const dataFn: string = 'sample/sample_FASTA.csv';
12
+ const dataFn: string = 'samples/sample_FASTA.csv';
12
13
 
13
14
  export async function demoBio03UI(): Promise<void> {
14
15
  let df: DG.DataFrame;
15
16
  let view: DG.TableView;
17
+ let dlg: DG.Dialog;
16
18
 
17
19
  try {
18
20
  await new DemoScript(
@@ -20,6 +22,9 @@ export async function demoBio03UI(): Promise<void> {
20
22
  'Atomic level structure of Macromolecules'
21
23
  )
22
24
  .step(`Loading Macromolecules notation 'Helm'`, async () => {
25
+ grok.shell.windows.showContextPanel = false;
26
+ grok.shell.windows.showProperties = false;
27
+
23
28
  df = await _package.files.readCsv(dataFn);
24
29
  view = grok.shell.addTableView(df);
25
30
  for (let colI: number = 0; colI < view.grid.columns.length; colI++) {
@@ -28,14 +33,31 @@ export async function demoBio03UI(): Promise<void> {
28
33
  }
29
34
  }, {
30
35
  description: `Load dataset with macromolecules of 'fasta' notation, 'PT' alphabet (protein, aminoacids).`,
31
- delay: 1600,
36
+ delay: 2000,
32
37
  })
33
38
  .step('To atomic level', async () => {
34
39
  const seqCol = df.getCol('Sequence');
35
40
  await toAtomicLevel(df, seqCol);
36
41
  }, {
37
42
  description: 'Get atomic level structures of Macromolecules.',
38
- delay: 1600,
43
+ delay: 2000,
44
+ })
45
+ .step('Sketcher', async () => {
46
+ const molColName: string = 'molfile(Sequence)';
47
+ df.currentCell = df.cell(1, molColName);
48
+ const mol: string = df.currentCell.value;
49
+
50
+ const sketcher = new DG.chem.Sketcher(DG.chem.SKETCHER_MODE.INPLACE);
51
+ sketcher.setMolFile(mol);
52
+
53
+ dlg = ui.dialog()
54
+ .add(sketcher)
55
+ .show();
56
+ await delay(3000);
57
+ dlg.close();
58
+ }, {
59
+ description: 'Display atomic level structure within a sketcher.',
60
+ delay: 2000,
39
61
  })
40
62
  .start();
41
63
  } catch (err: any) {
@@ -24,7 +24,9 @@ export async function demoBio05UI(): Promise<void> {
24
24
  const msaHelmColName: string = 'msa(HELM)';
25
25
 
26
26
  try {
27
- const demoScript = new DemoScript('Demo', 'MSA and composition analysis on Helm data.');
27
+ const demoScript = new DemoScript(
28
+ 'Helm, MSA, Sequence Space',
29
+ 'MSA and composition analysis on Helm data');
28
30
  await demoScript
29
31
  .step(`Load peptides with non-natural aminoacids in 'HELM' notation`, async () => {
30
32
  view = grok.shell.addTableView(df = await _package.files.readCsv(helmFn));
@@ -33,7 +35,7 @@ export async function demoBio05UI(): Promise<void> {
33
35
  grok.shell.windows.showProperties = false;
34
36
  }, {
35
37
  description: 'Load dataset with macromolecules of \'Helm\' notation.',
36
- delay: 1600,
38
+ delay: 2000,
37
39
  })
38
40
  .step('Align paptides with non-natural aminoacids with PepSeA', async () => {
39
41
  helmCol = df.getCol(helmColName);
@@ -45,7 +47,7 @@ export async function demoBio05UI(): Promise<void> {
45
47
  await grok.data.detectSemanticTypes(df);
46
48
  }, {
47
49
  description: 'Multiple sequence alignment (MSA) performed with PepSeA tool operating on non-natural aminoacids as well.',
48
- delay: 1600,
50
+ delay: 2000,
49
51
  })
50
52
  .step('Build sequence space', async () => {
51
53
  const method: string = 'UMAP';
@@ -54,7 +56,7 @@ export async function demoBio05UI(): Promise<void> {
54
56
  view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
55
57
  }, {
56
58
  description: 'Reduce sequence space dimensionality to display on 2D representation.',
57
- delay: 1600
59
+ delay: 2000
58
60
  })
59
61
  .step('Analyse sequence composition', async () => {
60
62
  wlViewer = await df.plot.fromType('WebLogo', {
@@ -64,7 +66,7 @@ export async function demoBio05UI(): Promise<void> {
64
66
  view.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.2);
65
67
  }, {
66
68
  description: 'Composition analysis allows to reveal functional features of sequences like motifs, or variable loops.',
67
- delay: 1600,
69
+ delay: 2000,
68
70
  })
69
71
  .start();
70
72
  } catch (err: any) {
@@ -22,6 +22,7 @@ import './tests/substructure-filters-tests';
22
22
  import './tests/pepsea-tests';
23
23
  import './tests/viewers';
24
24
  import './tests/units-handler-tests';
25
+ import './tests/mm-distance-tests';
25
26
 
26
27
  // Tests hanging github CI
27
28
  import './tests/activity-cliffs-tests';
@@ -0,0 +1,138 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {category, expect, test} from '@datagrok-libraries/utils/src/test';
6
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
7
+ import {MmDistanceFunctionsNames, mmDistanceFunctions}
8
+ from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
9
+
10
+ category('Distance', async () => {
11
+ const scoringMatrix = [
12
+ [1, 0, 0, 0],
13
+ [0, 1, 0, 0],
14
+ [0, 0, 1, 0],
15
+ [0, 0, 0, 1],
16
+ ];
17
+
18
+ const alphabetIndexes = {'F': 0, 'W': 1, 'R': 2, 'Y': 3};
19
+
20
+ const prot1 = 'FWRWY';
21
+ const prot2 = 'FWRWW';
22
+
23
+ const prot3 = 'FWY';
24
+ const prot4 = 'FWRWY';
25
+
26
+ const prot5 = 'FWY';
27
+ const prot6 = 'FWRRRRY';
28
+
29
+ const protTable = `seq
30
+ FWRWYVKHP
31
+ YNRWYVKHP
32
+ MWRSWYCKHP`;
33
+
34
+ const DNATable = `seq
35
+ ATAACG
36
+ ATCGA
37
+ ATCGA`;
38
+
39
+ const MSATable = `seq
40
+ ATAAC
41
+ ATCGA
42
+ ATCGA`;
43
+ test('protein-distance-function', async () => {
44
+ const uh = await _initMacromoleculeColumn(protTable);
45
+ const distFunc = uh.getDistanceFunctionName();
46
+ expect(distFunc, MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH);
47
+ });
48
+
49
+ test('DNA-distance-function', async () => {
50
+ const uh = await _initMacromoleculeColumn(DNATable);
51
+ const distFunc = uh.getDistanceFunctionName();
52
+ expect(distFunc, MmDistanceFunctionsNames.LEVENSHTEIN);
53
+ });
54
+
55
+ test('MSA-distance-function', async () => {
56
+ const uh = await _initMacromoleculeColumn(MSATable);
57
+ const distFunc = uh.getDistanceFunctionName();
58
+ expect(distFunc, MmDistanceFunctionsNames.HAMMING);
59
+ });
60
+
61
+ test('levenstein-sub', async () => {
62
+ const df = mmDistanceFunctions[MmDistanceFunctionsNames.LEVENSHTEIN]();
63
+ _testDistance(prot1, prot2, df, 1);
64
+ });
65
+ test('levenstein-del', async () => {
66
+ const df = mmDistanceFunctions[MmDistanceFunctionsNames.LEVENSHTEIN]();
67
+ _testDistance(prot3, prot4, df, 2);
68
+ });
69
+
70
+ test('hamming', async () => {
71
+ const df = mmDistanceFunctions[MmDistanceFunctionsNames.HAMMING]();
72
+ _testDistance(prot3, prot4, df, 3);
73
+ });
74
+
75
+ // Note that here the result is actually an inverted value of alignment score, which is coorelated with distance
76
+ // tests using default BLOSUM62 matrix are in agreement with the results of the online tool
77
+ test('needleman-blosum62', async () => {
78
+ const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH]();
79
+ _testDistance(prot1, prot2, df, -35);
80
+ });
81
+
82
+ test('needleman-blosum62-del', async () => {
83
+ const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH]();
84
+ _testDistance(prot3, prot4, df, -14);
85
+ });
86
+
87
+ test('needleman-custom-sub', async () => {
88
+ const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
89
+ {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
90
+ );
91
+ _testDistance(prot1, prot2, df, -4);
92
+ });
93
+
94
+ test('needleman-custom-del', async () => {
95
+ const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
96
+ {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
97
+ );
98
+ _testDistance(prot3, prot4, df, -1);
99
+ });
100
+
101
+ test('needleman-custom-zero-extend', async () => {
102
+ const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
103
+ {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 0}
104
+ );
105
+ _testDistance(prot5, prot6, df, -2);
106
+ });
107
+
108
+ test('needleman-custom-half-extend', async () => {
109
+ const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
110
+ {scoringMatrix, alphabetIndexes, gapOpen: 2, gapExtend: 1}
111
+ );
112
+ _testDistance(prot5, prot6, df, 2);
113
+ });
114
+
115
+ test('needleman-custom-same-extend', async () => {
116
+ const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
117
+ {scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
118
+ );
119
+ _testDistance(prot5, prot6, df, 1);
120
+ });
121
+ });
122
+
123
+ async function _initMacromoleculeColumn(csv: string): Promise<UnitsHandler> {
124
+ const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(csv);
125
+ const seqCol = srcDf.col('seq')!;
126
+ const semType: string = await grok.functions
127
+ .call('Bio:detectMacromolecule', {col: seqCol}) as unknown as string;
128
+ if (semType)
129
+ seqCol.semType = semType;
130
+ await grok.data.detectSemanticTypes(srcDf);
131
+ const uh = new UnitsHandler(seqCol);
132
+ return uh;
133
+ }
134
+
135
+ function _testDistance(seq1: string, seq2: string, df: (a: string, b: string) => number, expected: number) {
136
+ const d = df(seq1, seq2);
137
+ expect(d, expected);
138
+ }