@datagrok/bio 2.4.17 → 2.4.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/864.js +1 -1
- package/dist/864.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +3 -3
- package/src/demo/bio01-similarity-diversity.ts +14 -12
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +12 -10
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +13 -10
- package/src/demo/bio03-atomic-level.ts +66 -0
- package/src/demo/bio05-helm-msa-sequence-space.ts +11 -9
- package/src/package-test.ts +1 -0
- package/src/package.ts +31 -23
- package/src/tests/mm-distance-tests.ts +138 -0
- package/src/tests/monomer-libraries-tests.ts +2 -2
- package/src/utils/monomer-lib.ts +35 -5
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.4.
|
|
8
|
+
"version": "2.4.19",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -16,8 +16,8 @@
|
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
17
|
"@datagrok-libraries/bio": "^5.29.3",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
19
|
-
"@datagrok-libraries/ml": "^6.3.
|
|
20
|
-
"@datagrok-libraries/tutorials": "^1.
|
|
19
|
+
"@datagrok-libraries/ml": "^6.3.23",
|
|
20
|
+
"@datagrok-libraries/tutorials": "^1.3.1",
|
|
21
21
|
"@datagrok-libraries/utils": "^2.1.3",
|
|
22
22
|
"cash-dom": "^8.0.0",
|
|
23
23
|
"css-loader": "^6.7.3",
|
|
@@ -16,9 +16,11 @@ export async function demoBio01UI() {
|
|
|
16
16
|
let df: DG.DataFrame;
|
|
17
17
|
|
|
18
18
|
try {
|
|
19
|
-
const demoScript = new DemoScript(
|
|
19
|
+
const demoScript = new DemoScript(
|
|
20
|
+
'Similarity, Diversity',
|
|
21
|
+
'Sequence similarity tracking and evaluation dataset diversity');
|
|
20
22
|
await demoScript
|
|
21
|
-
.step(`
|
|
23
|
+
.step(`Load DNA sequences`, async () => {
|
|
22
24
|
grok.shell.windows.showContextPanel = false;
|
|
23
25
|
grok.shell.windows.showProperties = false;
|
|
24
26
|
|
|
@@ -30,9 +32,9 @@ export async function demoBio01UI() {
|
|
|
30
32
|
// TODO: Fix column width
|
|
31
33
|
}, {
|
|
32
34
|
description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
|
|
33
|
-
delay:
|
|
35
|
+
delay: 2000
|
|
34
36
|
})
|
|
35
|
-
.step('
|
|
37
|
+
.step('Find the most similar sequences to the current one', async () => {
|
|
36
38
|
const simViewer = await df.plot.fromType('Sequence Similarity Search', {
|
|
37
39
|
moleculeColumnName: 'sequence',
|
|
38
40
|
similarColumnLabel: 'Similar to current',
|
|
@@ -40,9 +42,9 @@ export async function demoBio01UI() {
|
|
|
40
42
|
view.dockManager.dock(simViewer, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.35);
|
|
41
43
|
}, {
|
|
42
44
|
description: `Add 'Sequence Similarity Search' viewer.`,
|
|
43
|
-
delay:
|
|
45
|
+
delay: 2000
|
|
44
46
|
})
|
|
45
|
-
.step('
|
|
47
|
+
.step('Explore most diverse sequences in a dataset', async () => {
|
|
46
48
|
const divViewer = await df.plot.fromType('Sequence Diversity Search', {
|
|
47
49
|
moleculeColumnName: 'sequence',
|
|
48
50
|
diverseColumnLabel: 'Top diverse sequences of all data'
|
|
@@ -50,19 +52,19 @@ export async function demoBio01UI() {
|
|
|
50
52
|
view.dockManager.dock(divViewer, DG.DOCK_TYPE.DOWN, null, 'Diversity search', 0.27);
|
|
51
53
|
}, {
|
|
52
54
|
description: `Add 'Sequence Deversity Search' viewer.`,
|
|
53
|
-
delay:
|
|
55
|
+
delay: 2000
|
|
54
56
|
})
|
|
55
|
-
.step('
|
|
57
|
+
.step('Choose another sequence for similarity search', async () => {
|
|
56
58
|
df.currentRowIdx = 3;
|
|
57
59
|
}, {
|
|
58
60
|
description: 'Handling current row changed of data frame showing update of similar sequences.',
|
|
59
|
-
delay:
|
|
61
|
+
delay: 2000,
|
|
60
62
|
})
|
|
61
|
-
.step('
|
|
63
|
+
.step('One more sequence for similarity search', async () => {
|
|
62
64
|
df.currentRowIdx = 7;
|
|
63
65
|
}, {
|
|
64
|
-
description: '
|
|
65
|
-
delay:
|
|
66
|
+
description: 'Just one more sequence to search similar ones.',
|
|
67
|
+
delay: 2000,
|
|
66
68
|
})
|
|
67
69
|
.start();
|
|
68
70
|
} catch (err: any) {
|
|
@@ -26,9 +26,11 @@ export async function demoBio01aUI() {
|
|
|
26
26
|
const embedCols: { [colName: string]: DG.Column<number> } = {};
|
|
27
27
|
|
|
28
28
|
try {
|
|
29
|
-
const demoScript = new DemoScript(
|
|
29
|
+
const demoScript = new DemoScript(
|
|
30
|
+
'Demo',
|
|
31
|
+
'Exploring sequence space of Macromolecules, comparison with hierarchical clustering results');
|
|
30
32
|
await demoScript
|
|
31
|
-
.step(`
|
|
33
|
+
.step(`Load DNA sequences`, async () => {
|
|
32
34
|
[df, treeHelper, dendrogramSvc] = await Promise.all([
|
|
33
35
|
_package.files.readCsv(dataFn),
|
|
34
36
|
getTreeHelper(),
|
|
@@ -41,15 +43,15 @@ export async function demoBio01aUI() {
|
|
|
41
43
|
grok.shell.windows.showProperties = false;
|
|
42
44
|
}, {
|
|
43
45
|
description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
|
|
44
|
-
delay:
|
|
46
|
+
delay: 2000,
|
|
45
47
|
})
|
|
46
|
-
.step('
|
|
48
|
+
.step('Build sequence space', async () => {
|
|
47
49
|
spViewer = await demoSequenceSpace(view, df, seqColName, method);
|
|
48
50
|
}, {
|
|
49
51
|
description: `Reduce sequence space dimensionality to display on 2D representation.`,
|
|
50
|
-
delay:
|
|
52
|
+
delay: 2000
|
|
51
53
|
})
|
|
52
|
-
.step('
|
|
54
|
+
.step('Cluster sequences', async () => {
|
|
53
55
|
const seqCol: DG.Column<string> = df.getCol(seqColName);
|
|
54
56
|
const seqList = seqCol.toList();
|
|
55
57
|
const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
|
|
@@ -60,20 +62,20 @@ export async function demoBio01aUI() {
|
|
|
60
62
|
dendrogramSvc.injectTreeForGrid(view.grid, treeRoot, undefined, 150, undefined);
|
|
61
63
|
}, {
|
|
62
64
|
description: `Perform hierarchical clustering to reveal relationships between sequences.`,
|
|
63
|
-
delay:
|
|
65
|
+
delay: 2000,
|
|
64
66
|
})
|
|
65
|
-
.step('
|
|
67
|
+
.step('Select a sequence', async () => {
|
|
66
68
|
df.selection.init((idx: number) => [15].includes(idx));
|
|
67
69
|
}, {
|
|
68
70
|
description: `Handling selection of data frame row reflecting on linked viewers.`,
|
|
69
|
-
delay:
|
|
71
|
+
delay: 2000,
|
|
70
72
|
})
|
|
71
73
|
.step('Select a bunch of sequences', async () => {
|
|
72
74
|
df.selection.init((idx: number) => [21, 9, 58].includes(idx));
|
|
73
75
|
df.currentRowIdx = 27;
|
|
74
76
|
}, {
|
|
75
77
|
description: 'Selecting a group of rows from a data frame to show their similarity and proximity to each other on a viewer..',
|
|
76
|
-
delay:
|
|
78
|
+
delay: 2000,
|
|
77
79
|
})
|
|
78
80
|
.start();
|
|
79
81
|
} catch (err: any) {
|
|
@@ -13,22 +13,25 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
|
|
|
13
13
|
import {handleError} from './utils';
|
|
14
14
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
15
15
|
|
|
16
|
-
const dataFn = 'samples/sample_FASTA.csv';
|
|
16
|
+
const dataFn: string = 'samples/sample_FASTA.csv';
|
|
17
17
|
|
|
18
18
|
export async function demoBio01bUI() {
|
|
19
19
|
let treeHelper: ITreeHelper;
|
|
20
20
|
let dendrogramSvc: IDendrogramService;
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
let df: DG.DataFrame;
|
|
23
|
+
let view: DG.TableView;
|
|
23
24
|
let activityCliffsViewer: DG.ScatterPlotViewer;
|
|
24
25
|
|
|
25
26
|
const method: string = 'UMAP';
|
|
26
27
|
const idRows: { [id: number]: number } = {};
|
|
27
28
|
|
|
28
29
|
try {
|
|
29
|
-
const demoScript = new DemoScript(
|
|
30
|
+
const demoScript = new DemoScript(
|
|
31
|
+
'Activity Cliffs',
|
|
32
|
+
'Activity Cliffs analysis on Macromolecules data');
|
|
30
33
|
await demoScript
|
|
31
|
-
.step(`
|
|
34
|
+
.step(`Load DNA sequences`, async () => {
|
|
32
35
|
grok.shell.windows.showContextPanel = false;
|
|
33
36
|
grok.shell.windows.showProperties = false;
|
|
34
37
|
|
|
@@ -46,9 +49,9 @@ export async function demoBio01bUI() {
|
|
|
46
49
|
lengthGCol.width = 0;
|
|
47
50
|
}, {
|
|
48
51
|
description: 'Load dataset with macromolecules of \'fasta\' notation, \'DNA\' alphabet.',
|
|
49
|
-
delay:
|
|
52
|
+
delay: 2000,
|
|
50
53
|
})
|
|
51
|
-
.step('
|
|
54
|
+
.step('Find activity cliffs', async () => {
|
|
52
55
|
activityCliffsViewer = (await activityCliffs(
|
|
53
56
|
df, df.getCol('Sequence'), df.getCol('Activity'),
|
|
54
57
|
80, method)) as DG.ScatterPlotViewer;
|
|
@@ -60,9 +63,9 @@ export async function demoBio01bUI() {
|
|
|
60
63
|
cliffsLink.click();
|
|
61
64
|
}, {
|
|
62
65
|
description: 'Reveal similar sequences with a cliff of activity.',
|
|
63
|
-
delay:
|
|
66
|
+
delay: 2000
|
|
64
67
|
})
|
|
65
|
-
.step('
|
|
68
|
+
.step('Cluster sequences', async () => {
|
|
66
69
|
const seqCol: DG.Column<string> = df.getCol('sequence');
|
|
67
70
|
const seqList = seqCol.toList();
|
|
68
71
|
const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
|
|
@@ -77,7 +80,7 @@ export async function demoBio01bUI() {
|
|
|
77
80
|
activityGCol.scrollIntoView();
|
|
78
81
|
}, {
|
|
79
82
|
description: 'Perform hierarchical clustering to reveal relationships between sequences.',
|
|
80
|
-
delay:
|
|
83
|
+
delay: 2000
|
|
81
84
|
})
|
|
82
85
|
.step('Browse the cliff', async () => {
|
|
83
86
|
//cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
|
|
@@ -97,7 +100,7 @@ export async function demoBio01bUI() {
|
|
|
97
100
|
// }
|
|
98
101
|
}, {
|
|
99
102
|
description: 'Zoom in to explore selected activity cliff details.',
|
|
100
|
-
delay:
|
|
103
|
+
delay: 2000
|
|
101
104
|
})
|
|
102
105
|
.start();
|
|
103
106
|
} catch (err: any) {
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import {_package, toAtomicLevel} from '../package';
|
|
7
|
+
import $ from 'cash-dom';
|
|
8
|
+
import {handleError} from './utils';
|
|
9
|
+
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
10
|
+
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
11
|
+
|
|
12
|
+
const dataFn: string = 'samples/sample_FASTA.csv';
|
|
13
|
+
|
|
14
|
+
export async function demoBio03UI(): Promise<void> {
|
|
15
|
+
let df: DG.DataFrame;
|
|
16
|
+
let view: DG.TableView;
|
|
17
|
+
let dlg: DG.Dialog;
|
|
18
|
+
|
|
19
|
+
try {
|
|
20
|
+
await new DemoScript(
|
|
21
|
+
'Atomic Level',
|
|
22
|
+
'Atomic level structure of Macromolecules'
|
|
23
|
+
)
|
|
24
|
+
.step(`Loading Macromolecules notation 'Helm'`, async () => {
|
|
25
|
+
grok.shell.windows.showContextPanel = false;
|
|
26
|
+
grok.shell.windows.showProperties = false;
|
|
27
|
+
|
|
28
|
+
df = await _package.files.readCsv(dataFn);
|
|
29
|
+
view = grok.shell.addTableView(df);
|
|
30
|
+
for (let colI: number = 0; colI < view.grid.columns.length; colI++) {
|
|
31
|
+
const gCol: DG.GridColumn = view.grid.columns.byIndex(colI)!;
|
|
32
|
+
if (!(['Sequence', 'Activity'].includes(gCol.name))) gCol.visible = false;
|
|
33
|
+
}
|
|
34
|
+
}, {
|
|
35
|
+
description: `Load dataset with macromolecules of 'fasta' notation, 'PT' alphabet (protein, aminoacids).`,
|
|
36
|
+
delay: 2000,
|
|
37
|
+
})
|
|
38
|
+
.step('To atomic level', async () => {
|
|
39
|
+
const seqCol = df.getCol('Sequence');
|
|
40
|
+
await toAtomicLevel(df, seqCol);
|
|
41
|
+
}, {
|
|
42
|
+
description: 'Get atomic level structures of Macromolecules.',
|
|
43
|
+
delay: 2000,
|
|
44
|
+
})
|
|
45
|
+
.step('Sketcher', async () => {
|
|
46
|
+
const molColName: string = 'molfile(Sequence)';
|
|
47
|
+
df.currentCell = df.cell(1, molColName);
|
|
48
|
+
const mol: string = df.currentCell.value;
|
|
49
|
+
|
|
50
|
+
const sketcher = new DG.chem.Sketcher(DG.chem.SKETCHER_MODE.INPLACE);
|
|
51
|
+
sketcher.setMolFile(mol);
|
|
52
|
+
|
|
53
|
+
dlg = ui.dialog()
|
|
54
|
+
.add(sketcher)
|
|
55
|
+
.show();
|
|
56
|
+
await delay(3000);
|
|
57
|
+
dlg.close();
|
|
58
|
+
}, {
|
|
59
|
+
description: 'Display atomic level structure within a sketcher.',
|
|
60
|
+
delay: 2000,
|
|
61
|
+
})
|
|
62
|
+
.start();
|
|
63
|
+
} catch (err: any) {
|
|
64
|
+
handleError(err);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
@@ -24,18 +24,20 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
24
24
|
const msaHelmColName: string = 'msa(HELM)';
|
|
25
25
|
|
|
26
26
|
try {
|
|
27
|
-
const demoScript = new DemoScript(
|
|
27
|
+
const demoScript = new DemoScript(
|
|
28
|
+
'Helm, MSA, Sequence Space',
|
|
29
|
+
'MSA and composition analysis on Helm data');
|
|
28
30
|
await demoScript
|
|
29
|
-
.step(`
|
|
31
|
+
.step(`Load peptides with non-natural aminoacids in 'HELM' notation`, async () => {
|
|
30
32
|
view = grok.shell.addTableView(df = await _package.files.readCsv(helmFn));
|
|
31
33
|
|
|
32
34
|
grok.shell.windows.showContextPanel = false;
|
|
33
35
|
grok.shell.windows.showProperties = false;
|
|
34
36
|
}, {
|
|
35
37
|
description: 'Load dataset with macromolecules of \'Helm\' notation.',
|
|
36
|
-
delay:
|
|
38
|
+
delay: 2000,
|
|
37
39
|
})
|
|
38
|
-
.step('
|
|
40
|
+
.step('Align paptides with non-natural aminoacids with PepSeA', async () => {
|
|
39
41
|
helmCol = df.getCol(helmColName);
|
|
40
42
|
const method: string = pepseaMethods[0];
|
|
41
43
|
const gapOpen: number = 1.53;
|
|
@@ -45,18 +47,18 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
45
47
|
await grok.data.detectSemanticTypes(df);
|
|
46
48
|
}, {
|
|
47
49
|
description: 'Multiple sequence alignment (MSA) performed with PepSeA tool operating on non-natural aminoacids as well.',
|
|
48
|
-
delay:
|
|
50
|
+
delay: 2000,
|
|
49
51
|
})
|
|
50
|
-
.step('
|
|
52
|
+
.step('Build sequence space', async () => {
|
|
51
53
|
const method: string = 'UMAP';
|
|
52
54
|
ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
|
|
53
55
|
'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
|
|
54
56
|
view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
|
|
55
57
|
}, {
|
|
56
58
|
description: 'Reduce sequence space dimensionality to display on 2D representation.',
|
|
57
|
-
delay:
|
|
59
|
+
delay: 2000
|
|
58
60
|
})
|
|
59
|
-
.step('
|
|
61
|
+
.step('Analyse sequence composition', async () => {
|
|
60
62
|
wlViewer = await df.plot.fromType('WebLogo', {
|
|
61
63
|
sequenceColumnName: msaHelmColName,
|
|
62
64
|
maxHeight: 50,
|
|
@@ -64,7 +66,7 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
64
66
|
view.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.2);
|
|
65
67
|
}, {
|
|
66
68
|
description: 'Composition analysis allows to reveal functional features of sequences like motifs, or variable loops.',
|
|
67
|
-
delay:
|
|
69
|
+
delay: 2000,
|
|
68
70
|
})
|
|
69
71
|
.start();
|
|
70
72
|
} catch (err: any) {
|
package/src/package-test.ts
CHANGED
|
@@ -22,6 +22,7 @@ import './tests/substructure-filters-tests';
|
|
|
22
22
|
import './tests/pepsea-tests';
|
|
23
23
|
import './tests/viewers';
|
|
24
24
|
import './tests/units-handler-tests';
|
|
25
|
+
import './tests/mm-distance-tests';
|
|
25
26
|
|
|
26
27
|
// Tests hanging github CI
|
|
27
28
|
import './tests/activity-cliffs-tests';
|
package/src/package.ts
CHANGED
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
} from './utils/cell-renderer';
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {SequenceAlignment} from './seq_align';
|
|
12
|
-
import {getEmbeddingColsNames, sequenceSpaceByFingerprints
|
|
12
|
+
import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
|
|
13
13
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
14
14
|
import {
|
|
15
15
|
createLinesGrid,
|
|
@@ -38,7 +38,10 @@ import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
|
38
38
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
39
39
|
import {WebLogoViewer} from './viewers/web-logo-viewer';
|
|
40
40
|
import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
41
|
-
import {
|
|
41
|
+
import {
|
|
42
|
+
LIB_PATH, MonomerLibHelper,
|
|
43
|
+
LIB_STORAGE_NAME, LibSettings, getUserLibSettings, setUserLibSetting, getLibFileNameList
|
|
44
|
+
} from './utils/monomer-lib';
|
|
42
45
|
import {getMacromoleculeColumn} from './utils/ui-utils';
|
|
43
46
|
import {ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
44
47
|
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
@@ -46,10 +49,10 @@ import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionE
|
|
|
46
49
|
import {demoBio01UI} from './demo/bio01-similarity-diversity';
|
|
47
50
|
import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
|
|
48
51
|
import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
|
|
52
|
+
import {demoBio03UI} from './demo/bio03-atomic-level';
|
|
49
53
|
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
50
54
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
51
55
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
52
|
-
import { runKalign } from './utils/multiple-sequence-alignment';
|
|
53
56
|
|
|
54
57
|
export const _package = new DG.Package();
|
|
55
58
|
|
|
@@ -150,25 +153,25 @@ export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
|
|
|
150
153
|
//@ts-ignore
|
|
151
154
|
const filesButton: HTMLButtonElement = ui.button('Manage', manageFiles);
|
|
152
155
|
const divInputs: HTMLDivElement = ui.div();
|
|
153
|
-
const libFileNameList: string[] =
|
|
154
|
-
.map((it) => it.fileName);
|
|
156
|
+
const libFileNameList: string[] = await getLibFileNameList();
|
|
155
157
|
const librariesUserSettingsSet: Set<string> = new Set<string>(Object.keys(
|
|
156
158
|
await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true)));
|
|
157
159
|
|
|
158
160
|
let userStoragePromise: Promise<void> = Promise.resolve();
|
|
159
161
|
for (const libFileName of libFileNameList) {
|
|
160
|
-
const
|
|
162
|
+
const settings = await getUserLibSettings();
|
|
163
|
+
const libInput: DG.InputBase<boolean | null> = ui.boolInput(libFileName, !settings.exclude.includes(libFileName),
|
|
161
164
|
() => {
|
|
162
165
|
userStoragePromise = userStoragePromise.then(async () => {
|
|
163
166
|
if (libInput.value == true) {
|
|
164
|
-
//
|
|
165
|
-
|
|
166
|
-
await MonomerLibHelper.instance.loadLibraries(); // from libraryPanel()
|
|
167
|
+
// Checked library remove from excluded list
|
|
168
|
+
settings.exclude = settings.exclude.filter((l) => l != libFileName);
|
|
167
169
|
} else {
|
|
168
|
-
//
|
|
169
|
-
|
|
170
|
-
await MonomerLibHelper.instance.loadLibraries(true); // from libraryPanel()
|
|
170
|
+
// Unchecked library add to excluded list
|
|
171
|
+
if (!settings.exclude.includes(libFileName)) settings.exclude.push(libFileName);
|
|
171
172
|
}
|
|
173
|
+
await setUserLibSetting(settings);
|
|
174
|
+
await MonomerLibHelper.instance.loadLibraries(true); // from libraryPanel()
|
|
172
175
|
grok.shell.info('Monomer library user settings saved.');
|
|
173
176
|
});
|
|
174
177
|
});
|
|
@@ -287,23 +290,19 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
287
290
|
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
288
291
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
289
292
|
};
|
|
290
|
-
const uh = new UnitsHandler(macroMolecule);
|
|
291
|
-
let columnDistanceMetric = 'Tanimoto';
|
|
292
|
-
if (uh.isFasta())
|
|
293
|
-
columnDistanceMetric = uh.getDistanceFunctionName();
|
|
294
293
|
const sp = await getActivityCliffs(
|
|
295
294
|
df,
|
|
296
295
|
macroMolecule,
|
|
297
296
|
null,
|
|
298
297
|
axesNames,
|
|
299
|
-
|
|
298
|
+
'Activity cliffs',
|
|
300
299
|
activities,
|
|
301
300
|
similarity,
|
|
302
301
|
'Tanimoto',
|
|
303
302
|
methodName,
|
|
304
303
|
DG.SEMTYPE.MACROMOLECULE,
|
|
305
304
|
tags,
|
|
306
|
-
|
|
305
|
+
sequenceSpaceByFingerprints,
|
|
307
306
|
getChemSimilaritiesMatrix,
|
|
308
307
|
createTooltipElement,
|
|
309
308
|
createPropPanelElement,
|
|
@@ -354,7 +353,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
354
353
|
embedAxesNames: embedColsNames,
|
|
355
354
|
options: options
|
|
356
355
|
};
|
|
357
|
-
const sequenceSpaceRes = await
|
|
356
|
+
const sequenceSpaceRes = await sequenceSpaceByFingerprints(chemSpaceParams);
|
|
358
357
|
const embeddings = sequenceSpaceRes.coordinates;
|
|
359
358
|
for (const col of embeddings) {
|
|
360
359
|
const listValues = col.toList();
|
|
@@ -672,7 +671,7 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
|
672
671
|
// demoBio01
|
|
673
672
|
//name: demoBioSimilarityDiversity
|
|
674
673
|
//meta.demoPath: Bioinformatics | Similarity, Diversity
|
|
675
|
-
//description:
|
|
674
|
+
//description: Sequence similarity tracking and evaluation dataset diversity
|
|
676
675
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity
|
|
677
676
|
export async function demoBioSimilarityDiversity(): Promise<void> {
|
|
678
677
|
await demoBio01UI();
|
|
@@ -681,7 +680,7 @@ export async function demoBioSimilarityDiversity(): Promise<void> {
|
|
|
681
680
|
// demoBio01a
|
|
682
681
|
//name:demoBioSequenceSpace
|
|
683
682
|
//meta.demoPath: Bioinformatics | Sequence Space
|
|
684
|
-
//description:
|
|
683
|
+
//description: Exploring sequence space of Macromolecules, comparison with hierarchical clustering results
|
|
685
684
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Sequence%20Space
|
|
686
685
|
export async function demoBioSequenceSpace(): Promise<void> {
|
|
687
686
|
await demoBio01aUI();
|
|
@@ -690,16 +689,25 @@ export async function demoBioSequenceSpace(): Promise<void> {
|
|
|
690
689
|
// demoBio01b
|
|
691
690
|
//name: demoBioActivityCliffs
|
|
692
691
|
//meta.demoPath: Bioinformatics | Activity Cliffs
|
|
693
|
-
//description:
|
|
692
|
+
//description: Activity Cliffs analysis on Macromolecules data
|
|
694
693
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs
|
|
695
694
|
export async function demoBioActivityCliffs(): Promise<void> {
|
|
696
695
|
await demoBio01bUI();
|
|
697
696
|
}
|
|
698
697
|
|
|
698
|
+
// demoBio03
|
|
699
|
+
//name: demoBioAtomicLevel
|
|
700
|
+
//meta.demoPath: Bioinformatics | Atomic Level
|
|
701
|
+
//description: Atomic level structure of Macromolecules
|
|
702
|
+
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Atomic%20Level
|
|
703
|
+
export async function demoBioAtomicLevel(): Promise<void> {
|
|
704
|
+
await demoBio03UI();
|
|
705
|
+
}
|
|
706
|
+
|
|
699
707
|
// demoBio05
|
|
700
708
|
//name: demoBioHelmMsaSequenceSpace
|
|
701
709
|
//meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
|
|
702
|
-
//description:
|
|
710
|
+
//description: MSA and composition analysis on Helm data
|
|
703
711
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Helm,%20MSA,%20Sequence%20Space
|
|
704
712
|
export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
|
|
705
713
|
await demoBio05UI();
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
7
|
+
import {MmDistanceFunctionsNames, mmDistanceFunctions}
|
|
8
|
+
from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
9
|
+
|
|
10
|
+
category('Distance', async () => {
|
|
11
|
+
const scoringMatrix = [
|
|
12
|
+
[1, 0, 0, 0],
|
|
13
|
+
[0, 1, 0, 0],
|
|
14
|
+
[0, 0, 1, 0],
|
|
15
|
+
[0, 0, 0, 1],
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
const alphabetIndexes = {'F': 0, 'W': 1, 'R': 2, 'Y': 3};
|
|
19
|
+
|
|
20
|
+
const prot1 = 'FWRWY';
|
|
21
|
+
const prot2 = 'FWRWW';
|
|
22
|
+
|
|
23
|
+
const prot3 = 'FWY';
|
|
24
|
+
const prot4 = 'FWRWY';
|
|
25
|
+
|
|
26
|
+
const prot5 = 'FWY';
|
|
27
|
+
const prot6 = 'FWRRRRY';
|
|
28
|
+
|
|
29
|
+
const protTable = `seq
|
|
30
|
+
FWRWYVKHP
|
|
31
|
+
YNRWYVKHP
|
|
32
|
+
MWRSWYCKHP`;
|
|
33
|
+
|
|
34
|
+
const DNATable = `seq
|
|
35
|
+
ATAACG
|
|
36
|
+
ATCGA
|
|
37
|
+
ATCGA`;
|
|
38
|
+
|
|
39
|
+
const MSATable = `seq
|
|
40
|
+
ATAAC
|
|
41
|
+
ATCGA
|
|
42
|
+
ATCGA`;
|
|
43
|
+
test('protein-distance-function', async () => {
|
|
44
|
+
const uh = await _initMacromoleculeColumn(protTable);
|
|
45
|
+
const distFunc = uh.getDistanceFunctionName();
|
|
46
|
+
expect(distFunc, MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test('DNA-distance-function', async () => {
|
|
50
|
+
const uh = await _initMacromoleculeColumn(DNATable);
|
|
51
|
+
const distFunc = uh.getDistanceFunctionName();
|
|
52
|
+
expect(distFunc, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
test('MSA-distance-function', async () => {
|
|
56
|
+
const uh = await _initMacromoleculeColumn(MSATable);
|
|
57
|
+
const distFunc = uh.getDistanceFunctionName();
|
|
58
|
+
expect(distFunc, MmDistanceFunctionsNames.HAMMING);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test('levenstein-sub', async () => {
|
|
62
|
+
const df = mmDistanceFunctions[MmDistanceFunctionsNames.LEVENSHTEIN]();
|
|
63
|
+
_testDistance(prot1, prot2, df, 1);
|
|
64
|
+
});
|
|
65
|
+
test('levenstein-del', async () => {
|
|
66
|
+
const df = mmDistanceFunctions[MmDistanceFunctionsNames.LEVENSHTEIN]();
|
|
67
|
+
_testDistance(prot3, prot4, df, 2);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test('hamming', async () => {
|
|
71
|
+
const df = mmDistanceFunctions[MmDistanceFunctionsNames.HAMMING]();
|
|
72
|
+
_testDistance(prot3, prot4, df, 3);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// Note that here the result is actually an inverted value of alignment score, which is coorelated with distance
|
|
76
|
+
// tests using default BLOSUM62 matrix are in agreement with the results of the online tool
|
|
77
|
+
test('needleman-blosum62', async () => {
|
|
78
|
+
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH]();
|
|
79
|
+
_testDistance(prot1, prot2, df, -35);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test('needleman-blosum62-del', async () => {
|
|
83
|
+
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH]();
|
|
84
|
+
_testDistance(prot3, prot4, df, -14);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test('needleman-custom-sub', async () => {
|
|
88
|
+
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
89
|
+
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
|
|
90
|
+
);
|
|
91
|
+
_testDistance(prot1, prot2, df, -4);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test('needleman-custom-del', async () => {
|
|
95
|
+
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
96
|
+
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
|
|
97
|
+
);
|
|
98
|
+
_testDistance(prot3, prot4, df, -1);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test('needleman-custom-zero-extend', async () => {
|
|
102
|
+
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
103
|
+
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 0}
|
|
104
|
+
);
|
|
105
|
+
_testDistance(prot5, prot6, df, -2);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test('needleman-custom-half-extend', async () => {
|
|
109
|
+
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
110
|
+
{scoringMatrix, alphabetIndexes, gapOpen: 2, gapExtend: 1}
|
|
111
|
+
);
|
|
112
|
+
_testDistance(prot5, prot6, df, 2);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test('needleman-custom-same-extend', async () => {
|
|
116
|
+
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
117
|
+
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
|
|
118
|
+
);
|
|
119
|
+
_testDistance(prot5, prot6, df, 1);
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
async function _initMacromoleculeColumn(csv: string): Promise<UnitsHandler> {
|
|
124
|
+
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
125
|
+
const seqCol = srcDf.col('seq')!;
|
|
126
|
+
const semType: string = await grok.functions
|
|
127
|
+
.call('Bio:detectMacromolecule', {col: seqCol}) as unknown as string;
|
|
128
|
+
if (semType)
|
|
129
|
+
seqCol.semType = semType;
|
|
130
|
+
await grok.data.detectSemanticTypes(srcDf);
|
|
131
|
+
const uh = new UnitsHandler(seqCol);
|
|
132
|
+
return uh;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function _testDistance(seq1: string, seq2: string, df: (a: string, b: string) => number, expected: number) {
|
|
136
|
+
const d = df(seq1, seq2);
|
|
137
|
+
expect(d, expected);
|
|
138
|
+
}
|
|
@@ -27,8 +27,8 @@ category('monomerLibraries', () => {
|
|
|
27
27
|
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
|
|
28
28
|
await monomerLibHelper.loadLibraries(true); // test defaultLib
|
|
29
29
|
|
|
30
|
-
// Currently default monomer lib is
|
|
30
|
+
// Currently default monomer lib set is of all files at LIB_PATH (at least HELMCoreLibrary.json)
|
|
31
31
|
const currentMonomerLib = monomerLibHelper.getBioLib();
|
|
32
|
-
expect(currentMonomerLib.getTypes().length,
|
|
32
|
+
expect(currentMonomerLib.getTypes().length > 0, true);
|
|
33
33
|
});
|
|
34
34
|
});
|