@datagrok/bio 2.4.12 → 2.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.12",
8
+ "version": "2.4.13",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -17,6 +17,7 @@
17
17
  "@datagrok-libraries/bio": "^5.28.4",
18
18
  "@datagrok-libraries/chem-meta": "^1.0.1",
19
19
  "@datagrok-libraries/ml": "^6.3.16",
20
+ "@datagrok-libraries/tutorials": "^1.2.1",
20
21
  "@datagrok-libraries/utils": "^2.1.3",
21
22
  "cash-dom": "^8.0.0",
22
23
  "css-loader": "^6.7.3",
@@ -32,12 +33,12 @@
32
33
  "devDependencies": {
33
34
  "@types/node": "^17.0.24",
34
35
  "@types/wu": "latest",
35
- "@typescript-eslint/eslint-plugin": "^4.20.0",
36
- "@typescript-eslint/parser": "^4.20.0",
37
- "eslint": "^7.23.0",
36
+ "@typescript-eslint/eslint-plugin": "latest",
37
+ "@typescript-eslint/parser": "latest",
38
+ "eslint": "latest",
38
39
  "eslint-config-google": "latest",
39
40
  "ts-loader": "^9.2.5",
40
- "typescript": "^4.2.3",
41
+ "typescript": "^5.0.4",
41
42
  "webpack": "^5.76.0",
42
43
  "webpack-bundle-analyzer": "latest",
43
44
  "webpack-cli": "^4.6.0",
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import random
4
+ from math import sqrt
5
+ import argparse
6
+ import sys
7
+
8
+ from typing import List, Tuple
9
+
10
+ letter_choice_type = List[str]
11
+ motif_template_type = List[letter_choice_type]
12
+
13
+ default_alphabet = 'A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y'
14
+
15
+ def meanrange(mean:int,disp:int) -> int:
16
+ return random.randint(mean - disp, mean + disp)
17
+
18
+ def generate_modif_template(motif_length:int, alphabet:List[str], max_variants_cluster:int, prob_any:float=0.2) -> motif_template_type: # Making a template to generate from it some random motifs
19
+ motif_template = []
20
+ for position in range(motif_length):
21
+ # Selecting letters for position i
22
+ if (0 < position < motif_length-1) and (random.random() <= prob_any):
23
+ letters = ['?'] # this stands for any symbol
24
+ else:
25
+ n_variants = random.randrange(max_variants_cluster) + 1
26
+ letters = [ random.choice(alphabet) for i in range(n_variants)]
27
+ motif_template.append(letters)
28
+ return motif_template
29
+
30
+ def generate_motif(template: motif_template_type, alphabet:List[str]) -> str:
31
+ # Sunbtituting the ? in template for any letter
32
+ template_with_any = [ (letters if not '?' in letters else alphabet) for letters in template ]
33
+ return ''.join([ random.choice(letters) for letters in template_with_any ])
34
+
35
+ def motif_notation(motif_template: motif_template_type) -> str:
36
+ def motif_notation_code(letter_choice:letter_choice_type) -> str:
37
+ if len(letter_choice) == 1:
38
+ return(letter_choice[0])
39
+ else:
40
+ return f"[{''.join(letter_choice)}]"
41
+
42
+ return ''.join([ motif_notation_code(letter_choice) for letter_choice in motif_template])
43
+
44
+ def generate_random(n:int, alphabet:List[str]) -> str:
45
+ return ''.join([ random.choice(alphabet) for i in range(n) ])
46
+
47
+ def make_cliff(motif_template:motif_template_type, alphabet:List[str] , motif:str) -> str:
48
+ # Selecting conservative letter in motif
49
+ pos = random.randrange(len(motif_template))
50
+ while '?' in motif_template[pos]:
51
+ pos = (pos + 1) % len(motif_template) # always will find letters since ends of motif can't be any symbol
52
+ outlier_letters = list(set(alphabet) - set (motif_template[pos]))
53
+ return motif[:pos] + random.choice(outlier_letters) + motif[pos+1:]
54
+
55
+ # ====================================================================================
56
+
57
+ parser = argparse.ArgumentParser(prog='MotifSequencesGenerator',
58
+ description='The program generates set of sequences containing sequence motifs for SAR fucntionality testing',
59
+ epilog='Unitity support: Gennadii Zakharov ')
60
+
61
+ parser.add_argument("-a", "--alphabet", type=str, default=default_alphabet, help="Alphabet to generate sequences, separated by comma",)
62
+ parser.add_argument("-c", "--clusters", type=int, default=1, help="Number of clusters")
63
+ parser.add_argument("-s", "--sequences", type=int, default=500, help="Number of sequences in each cluster",)
64
+ parser.add_argument("-m,", "--motif", type=int, default=12, help="Average length of motif",)
65
+ parser.add_argument("-r,", "--random", type=int, default=4, help="Average length of random sequence parts before and after motif",)
66
+ parser.add_argument("-d,", "--dispersion", type=int, default=2, help="Variation of total sequence lengths",)
67
+
68
+ parser.add_argument("--max-variants-position", type=int, default=3, help="maximum number of different letters in motif position",)
69
+ parser.add_argument("--cliff-probability", type=float, default=0.01, help="Probabaility to make activity cliff of a sequence",)
70
+ parser.add_argument("--cliff-strength", type=float, default=4.0, help="Strength of cliff",)
71
+
72
+ args = parser.parse_args()
73
+
74
+ alphabet:List[str] = args.alphabet.split(',')
75
+
76
+ print('cluster\tsequence_id\tsequence\tactivity\tis_cliff')
77
+
78
+ line_number = 0
79
+
80
+ for n_cluster in range(args.clusters):
81
+ activity_average = random.random() * 10
82
+ activity_dispersion = random.random()
83
+
84
+ # Generatin motif template for cluster
85
+ motif_length = meanrange(args.motif, args.dispersion)
86
+ motif_template = generate_modif_template(motif_length, alphabet, args.max_variants_position)
87
+ sys.stderr.write(f"Cluster {n_cluster:2} motif template: {motif_notation(motif_template)}\n")
88
+
89
+ total_length = meanrange(args.random * 2, args.dispersion) + motif_length
90
+ prefix_length = meanrange(args.random, args.dispersion//2)
91
+ suffix_length = total_length - motif_length - prefix_length
92
+
93
+ cliff_made = False
94
+ for n_seq in range(args.sequences):
95
+ line_number +=1
96
+ activity = random.gauss(activity_average, activity_dispersion)
97
+
98
+ motif = generate_motif(motif_template, alphabet)
99
+ prefix = generate_random(prefix_length, alphabet)
100
+ suffix = generate_random(suffix_length, alphabet)
101
+ seq = prefix + motif + suffix
102
+
103
+ is_cliff = random.random() <= args.cliff_probability
104
+ if is_cliff:
105
+ # Making activity cliff
106
+ cliff_motif = make_cliff(motif_template, alphabet, motif)
107
+ cliff_seq = prefix + cliff_motif + suffix
108
+ # Recalculating activity
109
+ cliff_disp = activity_dispersion * args.cliff_strength * (0.5 + random.random())
110
+ activity = activity_average - cliff_disp
111
+ cliff_activity = activity_average + cliff_disp
112
+
113
+ sys.stderr.write(f"Cliff for sequence #{line_number:4}, cluster {n_cluster} \n")
114
+ sys.stderr.write(f"{activity_average}\t{motif}\t{activity}\n")
115
+ sys.stderr.write(f"{activity_average}\t{cliff_motif}\t{cliff_activity}\n")
116
+ print(f"{n_cluster}\tc{n_cluster}_seq{line_number}\t{cliff_seq}\t{cliff_activity:5.2f}\t{is_cliff}")
117
+ line_number +=1
118
+ print(f"{n_cluster}\tc{n_cluster}_seq{line_number}\t{seq}\t{activity:5.2f}\t{is_cliff}")
119
+
@@ -3,43 +3,54 @@ import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import {_package} from '../package';
6
+ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
6
7
  import {delay} from '@datagrok-libraries/utils/src/test';
7
- import {step} from './utils';
8
+ import {handleError} from './utils';
8
9
 
9
10
  const dataFn = 'data/sample_FASTA_DNA.csv';
10
11
 
11
- export async function demoBio01UI(funcPath: string) {
12
+ export async function demoBio01UI() {
12
13
  let view: DG.TableView;
13
14
  let df: DG.DataFrame;
14
15
 
15
16
  try {
16
- await step(`Loading DNA notation 'fasta'.`, async () => {
17
- df = await _package.files.readCsv(dataFn);
18
- view = grok.shell.addTableView(df);
19
- view.path = view.basePath = funcPath;
20
- })();
21
-
22
- await step('Sequence similarity search.', async () => {
23
- const simViewer = await df.plot.fromType('Sequence Similarity Search') as DG.Viewer;
24
- view.dockManager.dock(simViewer, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.35);
25
- })();
26
-
27
- await step('Sequence diversity search.', async () => {
28
- const divViewer = await df.plot.fromType('Sequence Diversity Search') as DG.Viewer;
29
- view.dockManager.dock(divViewer, DG.DOCK_TYPE.DOWN, null, 'Diversity search', 0.27);
30
- })();
31
-
32
- await step('Current row 3.', async () => {
33
- df.currentRowIdx = 3;
34
- })();
35
-
36
- await step('Current row 7', async () => {
37
- df.currentRowIdx = 7;
38
- });
17
+ const demoScript = new DemoScript('Demo', 'Sequence similarity / diversity search');
18
+ await demoScript
19
+ .step(`Loading DNA notation 'fasta'`, async () => {
20
+ df = await _package.files.readCsv(dataFn);
21
+ view = grok.shell.addTableView(df);
22
+ }, {
23
+ description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
24
+ delay: 1600
25
+ })
26
+ .step('Sequence similarity search', async () => {
27
+ const simViewer = await df.plot.fromType('Sequence Similarity Search') as DG.Viewer;
28
+ view.dockManager.dock(simViewer, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.35);
29
+ }, {
30
+ description: `Add 'Sequence Similarity Search' viewer.`,
31
+ delay: 1600
32
+ })
33
+ .step('Sequence diversity search', async () => {
34
+ const divViewer = await df.plot.fromType('Sequence Diversity Search') as DG.Viewer;
35
+ view.dockManager.dock(divViewer, DG.DOCK_TYPE.DOWN, null, 'Diversity search', 0.27);
36
+ }, {
37
+ description: `Add 'Sequence Deversity Search' viewer.`,
38
+ delay: 1600
39
+ })
40
+ .step('Set current row 3', async () => {
41
+ df.currentRowIdx = 3;
42
+ }, {
43
+ description: 'Handling current row changed of data frame showing update of similar sequences.',
44
+ delay: 1600,
45
+ })
46
+ .step('Set current row 7', async () => {
47
+ df.currentRowIdx = 7;
48
+ }, {
49
+ description: 'Changing current row to another.',
50
+ delay: 1600,
51
+ })
52
+ .start();
39
53
  } catch (err: any) {
40
- if (err instanceof Error)
41
- _package.logger.error(err.message, undefined, err.stack);
42
- else
43
- _package.logger.error(err.toString());
54
+ handleError(err);
44
55
  }
45
56
  }
@@ -8,12 +8,13 @@ import * as lev from 'fastest-levenshtein';
8
8
  import {DistanceMatrix} from '@datagrok-libraries/bio/src/trees/distance-matrix';
9
9
  import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree-helper';
10
10
  import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
11
- import {demoSequenceSpace, step} from './utils';
11
+ import {demoSequenceSpace, handleError} from './utils';
12
+ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
12
13
 
13
14
  const dataFn = 'data/sample_FASTA_DNA.csv';
14
15
  const seqColName = 'sequence';
15
16
 
16
- export async function demoBio01aUI(funcPath: string) {
17
+ export async function demoBio01aUI() {
17
18
  let treeHelper: ITreeHelper;
18
19
  let dendrogramSvc: IDendrogramService;
19
20
  let view: DG.TableView;
@@ -25,44 +26,54 @@ export async function demoBio01aUI(funcPath: string) {
25
26
  const embedCols: { [colName: string]: DG.Column<number> } = {};
26
27
 
27
28
  try {
28
- await step(`Loading DNA notation 'fasta'.`, async () => {
29
- [df, treeHelper, dendrogramSvc] = await Promise.all([
30
- _package.files.readCsv(dataFn),
31
- getTreeHelper(),
32
- getDendrogramService()
33
- ]);
34
- view = grok.shell.addTableView(df);
35
- view.grid.props.rowHeight = 22;
36
- view.path = view.basePath = funcPath;
37
- })();
38
-
39
- await step('Building sequence space.', async () => {
40
- spViewer = await demoSequenceSpace(view, df, seqColName, method);
41
- })();
42
-
43
- await step('Hierarchical clustering.', async () => {
44
- const seqCol: DG.Column<string> = df.getCol(seqColName);
45
- const seqList = seqCol.toList();
46
- const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
47
- const levDistance = lev.distance(aSeq, bSeq);
48
- return levDistance / ((aSeq.length + bSeq.length) / 2);
49
- });
50
- const treeRoot = await treeHelper.hierarchicalClusteringByDistance(distance, 'ward');
51
- dendrogramSvc.injectTreeForGrid(view.grid, treeRoot, undefined, 150, undefined);
52
- })();
53
-
54
- await step('Selection.', async () => {
55
- df.selection.init((idx: number) => [15].includes(idx));
56
- })();
57
-
58
- await step('Select a bunch of sequences.', async () => {
59
- df.selection.init((idx: number) => [21, 9, 58].includes(idx));
60
- df.currentRowIdx = 27;
61
- })();
29
+ const demoScript = new DemoScript('Demo', 'Exploring sequence space');
30
+ await demoScript
31
+ .step(`Loading DNA notation 'fasta'`, async () => {
32
+ [df, treeHelper, dendrogramSvc] = await Promise.all([
33
+ _package.files.readCsv(dataFn),
34
+ getTreeHelper(),
35
+ getDendrogramService()
36
+ ]);
37
+ view = grok.shell.addTableView(df);
38
+ view.grid.props.rowHeight = 22;
39
+ }, {
40
+ description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
41
+ delay: 1600,
42
+ })
43
+ .step('Building sequence space', async () => {
44
+ spViewer = await demoSequenceSpace(view, df, seqColName, method);
45
+ }, {
46
+ description: `Reduce sequence space dimensionality to display on 2D representation.`,
47
+ delay: 1600
48
+ })
49
+ .step('Hierarchical clustering', async () => {
50
+ const seqCol: DG.Column<string> = df.getCol(seqColName);
51
+ const seqList = seqCol.toList();
52
+ const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
53
+ const levDistance = lev.distance(aSeq, bSeq);
54
+ return levDistance / ((aSeq.length + bSeq.length) / 2);
55
+ });
56
+ const treeRoot = await treeHelper.hierarchicalClusteringByDistance(distance, 'ward');
57
+ dendrogramSvc.injectTreeForGrid(view.grid, treeRoot, undefined, 150, undefined);
58
+ }, {
59
+ description: `Perform hierarchical clustering to reveal relationships between sequences.`,
60
+ delay: 1600,
61
+ })
62
+ .step('Selection', async () => {
63
+ df.selection.init((idx: number) => [15].includes(idx));
64
+ }, {
65
+ description: `Handling selection of data frame row reflecting on linked viewers.`,
66
+ delay: 1600,
67
+ })
68
+ .step('Select a bunch of sequences', async () => {
69
+ df.selection.init((idx: number) => [21, 9, 58].includes(idx));
70
+ df.currentRowIdx = 27;
71
+ }, {
72
+ description: 'Selecting a group of rows from a data frame to show their similarity and proximity to each other on a viewer..',
73
+ delay: 1600,
74
+ })
75
+ .start();
62
76
  } catch (err: any) {
63
- if (err instanceof Error)
64
- _package.logger.error(err.message, undefined, err.stack);
65
- else
66
- _package.logger.error(err.toString());
77
+ handleError(err);
67
78
  }
68
79
  }
@@ -10,11 +10,12 @@ import * as lev from 'fastest-levenshtein';
10
10
  import {DistanceMatrix} from '@datagrok-libraries/bio/src/trees/distance-matrix';
11
11
  import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree-helper';
12
12
  import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
13
- import {step} from './utils';
13
+ import {handleError} from './utils';
14
+ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
14
15
 
15
16
  const dataFn = 'samples/sample_FASTA.csv';
16
17
 
17
- export async function demoBio01bUI(funcPath: string) {
18
+ export async function demoBio01bUI() {
18
19
  let treeHelper: ITreeHelper;
19
20
  let dendrogramSvc: IDendrogramService;
20
21
  let view: DG.TableView;
@@ -25,70 +26,78 @@ export async function demoBio01bUI(funcPath: string) {
25
26
  const idRows: { [id: number]: number } = {};
26
27
 
27
28
  try {
28
- await step('Loading DNA notation \'fasta\'.', async () => {
29
- [df, treeHelper, dendrogramSvc] = await Promise.all([
30
- _package.files.readCsv(dataFn),
31
- getTreeHelper(),
32
- getDendrogramService()
33
- ]);
29
+ const demoScript = new DemoScript('Demo', '');
30
+ await demoScript
31
+ .step(`Loading DNA notation \'fasta\'`, async () => {
32
+ [df, treeHelper, dendrogramSvc] = await Promise.all([
33
+ _package.files.readCsv(dataFn),
34
+ getTreeHelper(),
35
+ getDendrogramService()
36
+ ]);
34
37
 
35
- view = grok.shell.addTableView(df);
36
- view.path = view.basePath = funcPath;
37
- view.grid.props.rowHeight = 22;
38
- const uniProtKbGCol = view.grid.columns.byName('UniProtKB')!;
39
- uniProtKbGCol.width = 75;
40
- const lengthGCol = view.grid.columns.byName('Length')!;
41
- lengthGCol.width = 0;
42
- })();
38
+ view = grok.shell.addTableView(df);
39
+ view.grid.props.rowHeight = 22;
40
+ const uniProtKbGCol = view.grid.columns.byName('UniProtKB')!;
41
+ uniProtKbGCol.width = 75;
42
+ const lengthGCol = view.grid.columns.byName('Length')!;
43
+ lengthGCol.width = 0;
44
+ }, {
45
+ description: 'Load dataset with macromolecules of \'fasta\' notation, \'DNA\' alphabet.',
46
+ delay: 1600,
47
+ })
48
+ .step('Analyze for activity cliffs', async () => {
49
+ activityCliffsViewer = (await activityCliffs(
50
+ df, df.getCol('Sequence'), df.getCol('Activity'),
51
+ 80, method)) as DG.ScatterPlotViewer;
52
+ view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
43
53
 
44
- await step('Analyze for activity cliffs.', async () => {
45
- activityCliffsViewer = (await activityCliffs(
46
- df, df.getCol('Sequence'), df.getCol('Activity'),
47
- 80, method)) as DG.ScatterPlotViewer;
48
- view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
54
+ // Show grid viewer with the cliffs
55
+ const cliffsLink: HTMLButtonElement = $(activityCliffsViewer.root)
56
+ .find('button.scatter_plot_link,cliffs_grid').get()[0] as HTMLButtonElement;
57
+ cliffsLink.click();
58
+ }, {
59
+ description: 'Reveal similar sequences with a cliff of activity.',
60
+ delay: 1600
61
+ })
62
+ .step('Hierarchical clustering', async () => {
63
+ const seqCol: DG.Column<string> = df.getCol('sequence');
64
+ const seqList = seqCol.toList();
65
+ const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
66
+ const levDistance = lev.distance(aSeq, bSeq);
67
+ return levDistance / ((aSeq.length + bSeq.length) / 2);
68
+ });
69
+ const treeRoot = await treeHelper.hierarchicalClusteringByDistance(distance, 'ward');
70
+ dendrogramSvc.injectTreeForGrid(view.grid, treeRoot, undefined, 150, undefined);
49
71
 
50
- // Show grid viewer with the cliffs
51
- const cliffsLink: HTMLButtonElement = $(activityCliffsViewer.root)
52
- .find('button.scatter_plot_link,cliffs_grid').get()[0] as HTMLButtonElement;
53
- cliffsLink.click();
54
- })();
72
+ // adjust for visual
73
+ const activityGCol = view.grid.columns.byName('Activity')!;
74
+ activityGCol.scrollIntoView();
75
+ }, {
76
+ description: 'Perform hierarchical clustering to reveal relationships between sequences.',
77
+ delay: 1600
78
+ })
79
+ .step('Browse the cliff', async () => {
80
+ //cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
81
+ const cliffsDfGrid: DG.Grid = activityCliffsViewer.dataFrame.temp[acTEMPS.cliffsDfGrid];
82
+ //cliffsDfGrid.dataFrame.selection.init((i) => i == currentCliffIdx);
83
+ cliffsDfGrid.dataFrame.currentRowIdx = 0;
84
+ //cliffsDfGrid.dataFrame.selection.set(currentCliffIdx, true, true);
55
85
 
56
- await step('Hierarchical clustering.', async () => {
57
- const seqCol: DG.Column<string> = df.getCol('sequence');
58
- const seqList = seqCol.toList();
59
- const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
60
- const levDistance = lev.distance(aSeq, bSeq);
61
- return levDistance / ((aSeq.length + bSeq.length) / 2);
62
- });
63
- const treeRoot = await treeHelper.hierarchicalClusteringByDistance(distance, 'ward');
64
- dendrogramSvc.injectTreeForGrid(view.grid, treeRoot, undefined, 150, undefined);
65
-
66
- // adjust for visual
67
- const activityGCol = view.grid.columns.byName('Activity')!;
68
- activityGCol.scrollIntoView();
69
- })();
70
-
71
- await step('Browse the cliff.', async () => {
72
- //cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
73
- const cliffsDfGrid: DG.Grid = activityCliffsViewer.dataFrame.temp[acTEMPS.cliffsDfGrid];
74
- //cliffsDfGrid.dataFrame.selection.init((i) => i == currentCliffIdx);
75
- cliffsDfGrid.dataFrame.currentRowIdx = 0;
76
- //cliffsDfGrid.dataFrame.selection.set(currentCliffIdx, true, true);
77
-
78
- // /* workaround to select rows of the cliff */
79
- // const entryCol: DG.Column = df.getCol('Entry');
80
- // df.selection.init((rowIdx) => ['UPI00000BFE1D', 'UPI00000BFE17'].includes(entryCol.get(rowIdx)));
81
- //
82
- // const selectionIdxList: Int32Array = df.selection.getSelectedIndexes();
83
- // if (selectionIdxList.length > 0) {
84
- // df.currentRowIdx = selectionIdxList[0];
85
- // view.grid.scrollToCell('UniProtKB', view.grid.tableRowToGrid(selectionIdxList[0]));
86
- // }
87
- })();
86
+ // /* workaround to select rows of the cliff */
87
+ // const entryCol: DG.Column = df.getCol('Entry');
88
+ // df.selection.init((rowIdx) => ['UPI00000BFE1D', 'UPI00000BFE17'].includes(entryCol.get(rowIdx)));
89
+ //
90
+ // const selectionIdxList: Int32Array = df.selection.getSelectedIndexes();
91
+ // if (selectionIdxList.length > 0) {
92
+ // df.currentRowIdx = selectionIdxList[0];
93
+ // view.grid.scrollToCell('UniProtKB', view.grid.tableRowToGrid(selectionIdxList[0]));
94
+ // }
95
+ }, {
96
+ description: 'Zoom in to explore selected activity cliff details.',
97
+ delay: 1600
98
+ })
99
+ .start();
88
100
  } catch (err: any) {
89
- if (err instanceof Error)
90
- _package.logger.error(err.message, undefined, err.stack);
91
- else
92
- _package.logger.error(err.toString());
101
+ handleError(err);
93
102
  }
94
103
  }
@@ -3,15 +3,16 @@ import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import {_package, sequenceSpaceTopMenu} from '../package';
6
- import {step} from './utils';
6
+ import {handleError} from './utils';
7
7
 
8
8
  import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
9
9
  import {pepseaMethods, runPepsea} from '../utils/pepsea';
10
10
  import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
11
+ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
11
12
 
12
13
  const helmFn: string = 'samples/sample_HELM.csv';
13
14
 
14
- export async function demoBio05UI(funcPath: string): Promise<void> {
15
+ export async function demoBio05UI(): Promise<void> {
15
16
  let view: DG.TableView;
16
17
  let df: DG.DataFrame;
17
18
  let helmCol: DG.Column<string>;
@@ -23,38 +24,46 @@ export async function demoBio05UI(funcPath: string): Promise<void> {
23
24
  const msaHelmColName: string = 'msa(HELM)';
24
25
 
25
26
  try {
26
- await step(`Loading peptides notation 'HELM'.`, async () => {
27
- view = grok.shell.addTableView(df = await _package.files.readCsv(helmFn));
28
- view.path = view.basePath = funcPath;
29
- })();
30
-
31
- await step('MSA on non-natural aminoacids with PepSeA.', async () => {
32
- helmCol = df.getCol(helmColName);
33
- const method: string = pepseaMethods[0];
34
- const gapOpen: number = 1.53;
35
- const gapExtend: number = 0;
36
- msaHelmCol = await runPepsea(helmCol, msaHelmColName, method, gapOpen, gapExtend, undefined);
37
- df.columns.add(msaHelmCol);
38
- await grok.data.detectSemanticTypes(df);
39
- })();
40
-
41
- await step('Composition analysis on MSA results', async () => {
42
- wlViewer = await df.plot.fromType('WebLogo', {
43
- sequenceColumnName: msaHelmColName
44
- }) as DG.Viewer & IWebLogoViewer;
45
- view.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.2);
46
- })();
47
-
48
- await step('Building sequence space.', async () => {
49
- const method: string = 'UMAP';
50
- ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
51
- 'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
52
- view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
53
- })();
27
+ const demoScript = new DemoScript('Demo', 'MSA and composition analysis on Helm data.');
28
+ await demoScript
29
+ .step(`Loading peptides notation 'HELM'`, async () => {
30
+ view = grok.shell.addTableView(df = await _package.files.readCsv(helmFn));
31
+ }, {
32
+ description: 'Load dataset with macromolecules of \'Helm\' notation.',
33
+ delay: 1600,
34
+ })
35
+ .step('MSA on non-natural aminoacids with PepSeA', async () => {
36
+ helmCol = df.getCol(helmColName);
37
+ const method: string = pepseaMethods[0];
38
+ const gapOpen: number = 1.53;
39
+ const gapExtend: number = 0;
40
+ msaHelmCol = await runPepsea(helmCol, msaHelmColName, method, gapOpen, gapExtend, undefined);
41
+ df.columns.add(msaHelmCol);
42
+ await grok.data.detectSemanticTypes(df);
43
+ }, {
44
+ description: 'Multiple sequence alignment (MSA) performed with PepSeA tool operating on non-natural aminoacids as well.',
45
+ delay: 1600,
46
+ })
47
+ .step('Composition analysis on MSA results', async () => {
48
+ wlViewer = await df.plot.fromType('WebLogo', {
49
+ sequenceColumnName: msaHelmColName
50
+ }) as DG.Viewer & IWebLogoViewer;
51
+ view.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.2);
52
+ }, {
53
+ description: 'Composition analysis allows to reveal functional features of sequences like motifs, or variable loops.',
54
+ delay: 1600,
55
+ })
56
+ .step('Building sequence space', async () => {
57
+ const method: string = 'UMAP';
58
+ ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
59
+ 'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
60
+ view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
61
+ }, {
62
+ description: 'Reduce sequence space dimensionality to display on 2D representation.',
63
+ delay: 1600
64
+ })
65
+ .start();
54
66
  } catch (err: any) {
55
- if (err instanceof Error)
56
- _package.logger.error(err.message, undefined, err.stack);
57
- else
58
- _package.logger.error(err.toString());
67
+ handleError(err);
59
68
  }
60
69
  }
package/src/demo/utils.ts CHANGED
@@ -7,19 +7,6 @@ import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/s
7
7
  import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
8
8
  import {delay} from '@datagrok-libraries/utils/src/test';
9
9
 
10
- export function step(message: string, action: () => Promise<void>, delayMs: number = 1600): () => Promise<void> {
11
- return async function() {
12
- grok.shell.info(message);
13
- const pi = DG.TaskBarProgressIndicator.create(message);
14
- try {
15
- await action();
16
- } finally {
17
- pi.close();
18
- await delay(delayMs);
19
- }
20
- };
21
- }
22
-
23
10
  enum EMBED_COL_NAMES {
24
11
  X = 'Embed_X',
25
12
  Y = 'Embed_Y'
@@ -93,3 +80,10 @@ export async function demoSequenceSpace(
93
80
  view.dockManager.dock(resSpaceViewer!, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
94
81
  return resSpaceViewer;
95
82
  }
83
+
84
+ export function handleError(err: any): void {
85
+ const errMsg: string = err instanceof Error ? err.message : err.toString();
86
+ const stack: string | undefined = err instanceof Error ? err.stack : undefined;
87
+ grok.shell.error(errMsg);
88
+ _package.logger.error(err.message, undefined, stack);
89
+ }