@datagrok/bio 2.4.3 → 2.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +1 -2
- package/dist/153.js +2 -0
- package/dist/153.js.map +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +3 -2
- package/src/demo/bio01-similarity-diversity.ts +45 -0
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +68 -0
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +94 -0
- package/src/demo/bio05-helm-msa-sequence-space.ts +59 -0
- package/src/demo/utils.ts +95 -0
- package/src/package.ts +44 -9
- package/src/tests/similarity-diversity-tests.ts +1 -0
- package/src/utils/pepsea.ts +9 -5
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.4.
|
|
8
|
+
"version": "2.4.5",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -16,12 +16,13 @@
|
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
17
|
"@datagrok-libraries/bio": "^5.27.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
19
|
-
"@datagrok-libraries/ml": "^6.3.
|
|
19
|
+
"@datagrok-libraries/ml": "^6.3.16",
|
|
20
20
|
"@datagrok-libraries/utils": "^2.1.3",
|
|
21
21
|
"cash-dom": "^8.0.0",
|
|
22
22
|
"css-loader": "^6.7.3",
|
|
23
23
|
"datagrok-api": "^1.13.3",
|
|
24
24
|
"dayjs": "^1.11.4",
|
|
25
|
+
"fastest-levenshtein": "^1.0.16",
|
|
25
26
|
"openchemlib": "6.0.1",
|
|
26
27
|
"rxjs": "^6.5.5",
|
|
27
28
|
"source-map-loader": "^4.0.1",
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {_package} from '../package';
|
|
6
|
+
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
7
|
+
import {step} from './utils';
|
|
8
|
+
|
|
9
|
+
const dataFn = 'data/sample_FASTA_DNA.csv';
|
|
10
|
+
|
|
11
|
+
export async function demoBio01UI(funcPath: string) {
|
|
12
|
+
let view: DG.TableView;
|
|
13
|
+
let df: DG.DataFrame;
|
|
14
|
+
|
|
15
|
+
try {
|
|
16
|
+
await step(`Loading DNA notation 'fasta'.`, async () => {
|
|
17
|
+
df = await _package.files.readCsv(dataFn);
|
|
18
|
+
view = grok.shell.addTableView(df);
|
|
19
|
+
view.path = view.basePath = funcPath;
|
|
20
|
+
})();
|
|
21
|
+
|
|
22
|
+
await step('Sequence similarity search.', async () => {
|
|
23
|
+
const simViewer = await df.plot.fromType('Sequence Similarity Search') as DG.Viewer;
|
|
24
|
+
view.dockManager.dock(simViewer, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.35);
|
|
25
|
+
})();
|
|
26
|
+
|
|
27
|
+
await step('Sequence diversity search.', async () => {
|
|
28
|
+
const divViewer = await df.plot.fromType('Sequence Diversity Search') as DG.Viewer;
|
|
29
|
+
view.dockManager.dock(divViewer, DG.DOCK_TYPE.DOWN, null, 'Diversity search', 0.27);
|
|
30
|
+
})();
|
|
31
|
+
|
|
32
|
+
await step('Current row 3.', async () => {
|
|
33
|
+
df.currentRowIdx = 3;
|
|
34
|
+
})();
|
|
35
|
+
|
|
36
|
+
await step('Current row 7', async () => {
|
|
37
|
+
df.currentRowIdx = 7;
|
|
38
|
+
});
|
|
39
|
+
} catch (err: any) {
|
|
40
|
+
if (err instanceof Error)
|
|
41
|
+
_package.logger.error(err.message, undefined, err.stack);
|
|
42
|
+
else
|
|
43
|
+
_package.logger.error(err.toString());
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {_package} from '../package';
|
|
6
|
+
|
|
7
|
+
import * as lev from 'fastest-levenshtein';
|
|
8
|
+
import {DistanceMatrix} from '@datagrok-libraries/bio/src/trees/distance-matrix';
|
|
9
|
+
import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree-helper';
|
|
10
|
+
import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
|
|
11
|
+
import {demoSequenceSpace, step} from './utils';
|
|
12
|
+
|
|
13
|
+
const dataFn = 'data/sample_FASTA_DNA.csv';
|
|
14
|
+
const seqColName = 'sequence';
|
|
15
|
+
|
|
16
|
+
export async function demoBio01aUI(funcPath: string) {
|
|
17
|
+
let treeHelper: ITreeHelper;
|
|
18
|
+
let dendrogramSvc: IDendrogramService;
|
|
19
|
+
let view: DG.TableView;
|
|
20
|
+
let df: DG.DataFrame;
|
|
21
|
+
let spViewer: DG.ScatterPlotViewer;
|
|
22
|
+
|
|
23
|
+
const method: string = 'UMAP';
|
|
24
|
+
const idRows: { [id: number]: number } = {};
|
|
25
|
+
const embedCols: { [colName: string]: DG.Column<number> } = {};
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
await step(`Loading DNA notation 'fasta'.`, async () => {
|
|
29
|
+
[df, treeHelper, dendrogramSvc] = await Promise.all([
|
|
30
|
+
_package.files.readCsv(dataFn),
|
|
31
|
+
getTreeHelper(),
|
|
32
|
+
getDendrogramService()
|
|
33
|
+
]);
|
|
34
|
+
view = grok.shell.addTableView(df);
|
|
35
|
+
view.grid.props.rowHeight = 22;
|
|
36
|
+
view.path = view.basePath = funcPath;
|
|
37
|
+
})();
|
|
38
|
+
|
|
39
|
+
await step('Building sequence space.', async () => {
|
|
40
|
+
spViewer = await demoSequenceSpace(view, df, seqColName, method);
|
|
41
|
+
})();
|
|
42
|
+
|
|
43
|
+
await step('Hierarchical clustering.', async () => {
|
|
44
|
+
const seqCol: DG.Column<string> = df.getCol(seqColName);
|
|
45
|
+
const seqList = seqCol.toList();
|
|
46
|
+
const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
|
|
47
|
+
const levDistance = lev.distance(aSeq, bSeq);
|
|
48
|
+
return levDistance / ((aSeq.length + bSeq.length) / 2);
|
|
49
|
+
});
|
|
50
|
+
const treeRoot = await treeHelper.hierarchicalClusteringByDistance(distance, 'ward');
|
|
51
|
+
dendrogramSvc.injectTreeForGrid(view.grid, treeRoot, undefined, 150, undefined);
|
|
52
|
+
})();
|
|
53
|
+
|
|
54
|
+
await step('Selection.', async () => {
|
|
55
|
+
df.selection.init((idx: number) => [15].includes(idx));
|
|
56
|
+
})();
|
|
57
|
+
|
|
58
|
+
await step('Select bunch of sequences.', async () => {
|
|
59
|
+
df.selection.init((idx: number) => [21, 9, 58].includes(idx));
|
|
60
|
+
df.currentRowIdx = 27;
|
|
61
|
+
})();
|
|
62
|
+
} catch (err: any) {
|
|
63
|
+
if (err instanceof Error)
|
|
64
|
+
_package.logger.error(err.message, undefined, err.stack);
|
|
65
|
+
else
|
|
66
|
+
_package.logger.error(err.toString());
|
|
67
|
+
}
|
|
68
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {_package, activityCliffs,} from '../package';
|
|
6
|
+
import $ from 'cash-dom';
|
|
7
|
+
|
|
8
|
+
import {TEMPS as acTEMPS} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
9
|
+
import * as lev from 'fastest-levenshtein';
|
|
10
|
+
import {DistanceMatrix} from '@datagrok-libraries/bio/src/trees/distance-matrix';
|
|
11
|
+
import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree-helper';
|
|
12
|
+
import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
|
|
13
|
+
import {step} from './utils';
|
|
14
|
+
|
|
15
|
+
const dataFn = 'samples/sample_FASTA.csv';
|
|
16
|
+
|
|
17
|
+
export async function demoBio01bUI(funcPath: string) {
|
|
18
|
+
let treeHelper: ITreeHelper;
|
|
19
|
+
let dendrogramSvc: IDendrogramService;
|
|
20
|
+
let view: DG.TableView;
|
|
21
|
+
let df: DG.DataFrame;
|
|
22
|
+
let activityCliffsViewer: DG.ScatterPlotViewer;
|
|
23
|
+
|
|
24
|
+
const method: string = 'UMAP';
|
|
25
|
+
const idRows: { [id: number]: number } = {};
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
await step('Loading DNA notation \'fasta\'.', async () => {
|
|
29
|
+
[df, treeHelper, dendrogramSvc] = await Promise.all([
|
|
30
|
+
_package.files.readCsv(dataFn),
|
|
31
|
+
getTreeHelper(),
|
|
32
|
+
getDendrogramService()
|
|
33
|
+
]);
|
|
34
|
+
|
|
35
|
+
view = grok.shell.addTableView(df);
|
|
36
|
+
view.path = view.basePath = funcPath;
|
|
37
|
+
view.grid.props.rowHeight = 22;
|
|
38
|
+
const uniProtKbGCol = view.grid.columns.byName('UniProtKB')!;
|
|
39
|
+
uniProtKbGCol.width = 75;
|
|
40
|
+
const lengthGCol = view.grid.columns.byName('Length')!;
|
|
41
|
+
lengthGCol.width = 0;
|
|
42
|
+
})();
|
|
43
|
+
|
|
44
|
+
await step('Analyze for activity cliffs.', async () => {
|
|
45
|
+
activityCliffsViewer = (await activityCliffs(
|
|
46
|
+
df, df.getCol('Sequence'), df.getCol('Activity'),
|
|
47
|
+
80, method)) as DG.ScatterPlotViewer;
|
|
48
|
+
view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
|
|
49
|
+
|
|
50
|
+
// Show grid viewer with the cliffs
|
|
51
|
+
const cliffsLink: HTMLButtonElement = $(activityCliffsViewer.root)
|
|
52
|
+
.find('button.scatter_plot_link,cliffs_grid').get()[0] as HTMLButtonElement;
|
|
53
|
+
cliffsLink.click();
|
|
54
|
+
})();
|
|
55
|
+
|
|
56
|
+
await step('Hierarchical clustering.', async () => {
|
|
57
|
+
const seqCol: DG.Column<string> = df.getCol('sequence');
|
|
58
|
+
const seqList = seqCol.toList();
|
|
59
|
+
const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
|
|
60
|
+
const levDistance = lev.distance(aSeq, bSeq);
|
|
61
|
+
return levDistance / ((aSeq.length + bSeq.length) / 2);
|
|
62
|
+
});
|
|
63
|
+
const treeRoot = await treeHelper.hierarchicalClusteringByDistance(distance, 'ward');
|
|
64
|
+
dendrogramSvc.injectTreeForGrid(view.grid, treeRoot, undefined, 150, undefined);
|
|
65
|
+
|
|
66
|
+
// adjust for visual
|
|
67
|
+
const activityGCol = view.grid.columns.byName('Activity')!;
|
|
68
|
+
activityGCol.scrollIntoView();
|
|
69
|
+
})();
|
|
70
|
+
|
|
71
|
+
await step('Browse the cliff.', async () => {
|
|
72
|
+
//cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
|
|
73
|
+
const cliffsDfGrid: DG.Grid = activityCliffsViewer.dataFrame.temp[acTEMPS.cliffsDfGrid];
|
|
74
|
+
//cliffsDfGrid.dataFrame.selection.init((i) => i == currentCliffIdx);
|
|
75
|
+
cliffsDfGrid.dataFrame.currentRowIdx = 0;
|
|
76
|
+
//cliffsDfGrid.dataFrame.selection.set(currentCliffIdx, true, true);
|
|
77
|
+
|
|
78
|
+
// /* workaround to select rows of the cliff */
|
|
79
|
+
// const entryCol: DG.Column = df.getCol('Entry');
|
|
80
|
+
// df.selection.init((rowIdx) => ['UPI00000BFE1D', 'UPI00000BFE17'].includes(entryCol.get(rowIdx)));
|
|
81
|
+
//
|
|
82
|
+
// const selectionIdxList: Int32Array = df.selection.getSelectedIndexes();
|
|
83
|
+
// if (selectionIdxList.length > 0) {
|
|
84
|
+
// df.currentRowIdx = selectionIdxList[0];
|
|
85
|
+
// view.grid.scrollToCell('UniProtKB', view.grid.tableRowToGrid(selectionIdxList[0]));
|
|
86
|
+
// }
|
|
87
|
+
})();
|
|
88
|
+
} catch (err: any) {
|
|
89
|
+
if (err instanceof Error)
|
|
90
|
+
_package.logger.error(err.message, undefined, err.stack);
|
|
91
|
+
else
|
|
92
|
+
_package.logger.error(err.toString());
|
|
93
|
+
}
|
|
94
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {_package, sequenceSpaceTopMenu} from '../package';
|
|
6
|
+
import {step} from './utils';
|
|
7
|
+
|
|
8
|
+
import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
9
|
+
import {pepseaMethods, runPepsea} from '../utils/pepsea';
|
|
10
|
+
import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
11
|
+
|
|
12
|
+
const helmFn: string = 'samples/sample_HELM.csv';
|
|
13
|
+
|
|
14
|
+
export async function demoBio05UI(funcPath: string): Promise<void> {
|
|
15
|
+
let view: DG.TableView;
|
|
16
|
+
let df: DG.DataFrame;
|
|
17
|
+
let helmCol: DG.Column<string>;
|
|
18
|
+
let msaHelmCol: DG.Column<string>;
|
|
19
|
+
let wlViewer: DG.Viewer & IWebLogoViewer;
|
|
20
|
+
let ssViewer: DG.ScatterPlotViewer;
|
|
21
|
+
|
|
22
|
+
const helmColName: string = 'HELM';
|
|
23
|
+
const msaHelmColName: string = 'msa(HELM)';
|
|
24
|
+
|
|
25
|
+
try {
|
|
26
|
+
await step(`Loading peptides notation 'HELM'.`, async () => {
|
|
27
|
+
view = grok.shell.addTableView(df = await _package.files.readCsv(helmFn));
|
|
28
|
+
view.path = view.basePath = funcPath;
|
|
29
|
+
})();
|
|
30
|
+
|
|
31
|
+
await step('MSA on non-natural aminoacids with PepSeA.', async () => {
|
|
32
|
+
helmCol = df.getCol(helmColName);
|
|
33
|
+
const method: string = pepseaMethods[0];
|
|
34
|
+
const gapOpen: number = 1.53;
|
|
35
|
+
const gapExtend: number = 0;
|
|
36
|
+
msaHelmCol = await runPepsea(helmCol, msaHelmColName, method, gapOpen, gapExtend, undefined);
|
|
37
|
+
df.columns.add(msaHelmCol);
|
|
38
|
+
})();
|
|
39
|
+
|
|
40
|
+
await step('Composition analysis on MSA results', async () => {
|
|
41
|
+
wlViewer = await df.plot.fromType('WebLogo', {
|
|
42
|
+
sequenceColumnName: msaHelmColName
|
|
43
|
+
}) as DG.Viewer & IWebLogoViewer;
|
|
44
|
+
view.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.2);
|
|
45
|
+
})();
|
|
46
|
+
|
|
47
|
+
await step('Building sequence space.', async () => {
|
|
48
|
+
const method: string = 'UMAP';
|
|
49
|
+
ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
|
|
50
|
+
'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
|
|
51
|
+
view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
|
|
52
|
+
})();
|
|
53
|
+
} catch (err: any) {
|
|
54
|
+
if (err instanceof Error)
|
|
55
|
+
_package.logger.error(err.message, undefined, err.stack);
|
|
56
|
+
else
|
|
57
|
+
_package.logger.error(err.toString());
|
|
58
|
+
}
|
|
59
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
import {_package, sequenceSpaceTopMenu} from '../package';
|
|
6
|
+
import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
|
|
7
|
+
import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
|
+
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
9
|
+
|
|
10
|
+
export function step(message: string, action: () => Promise<void>, delayMs: number = 1600): () => Promise<void> {
|
|
11
|
+
return async function() {
|
|
12
|
+
grok.shell.info(message);
|
|
13
|
+
const pi = DG.TaskBarProgressIndicator.create(message);
|
|
14
|
+
try {
|
|
15
|
+
await action();
|
|
16
|
+
} finally {
|
|
17
|
+
pi.close();
|
|
18
|
+
await delay(delayMs);
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
enum EMBED_COL_NAMES {
|
|
24
|
+
X = 'Embed_X',
|
|
25
|
+
Y = 'Embed_Y'
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export async function demoSequenceSpace(
|
|
29
|
+
view: DG.TableView, df: DG.DataFrame, colName: string, method: string
|
|
30
|
+
): Promise<DG.ScatterPlotViewer> {
|
|
31
|
+
let resSpaceViewer: DG.ScatterPlotViewer;
|
|
32
|
+
if (true) {
|
|
33
|
+
// Custom sequence space implementation for closer resembling of hierarchical clustering results.
|
|
34
|
+
const embedColNameList = Object.values(EMBED_COL_NAMES);
|
|
35
|
+
// ensure embed columns exist
|
|
36
|
+
for (let embedI: number = 0; embedI < embedColNameList.length; embedI++) {
|
|
37
|
+
const embedColName: string = embedColNameList[embedI];
|
|
38
|
+
const embedCol: DG.Column | null = df.col(embedColName);
|
|
39
|
+
if (!embedCol) {
|
|
40
|
+
// Notification is required to reflect added data frame Embed_<X> columns to grid columns
|
|
41
|
+
// MolecularLiabilityBrowser.setView() corrects grid columns' names with .replace('_', ' ');
|
|
42
|
+
const notify: boolean = embedI == embedColNameList.length - 1; // notify on adding last Embed_<X> column
|
|
43
|
+
df.columns.add(DG.Column.float(embedColName, df.rowCount), notify);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (df.rowCount >= 1) {
|
|
48
|
+
const seqCol: DG.Column<string> = df.getCol(colName);
|
|
49
|
+
const seqList = seqCol.toList();
|
|
50
|
+
|
|
51
|
+
const t1: number = Date.now();
|
|
52
|
+
_package.logger.debug('Bio: demoBio01aUI(), calc reduceDimensionality start...');
|
|
53
|
+
const redDimRes = await reduceDimensinalityWithNormalization( // TODO: Rename method typo
|
|
54
|
+
seqList, method, StringMetricsNames.Levenshtein, {});
|
|
55
|
+
const t2: number = Date.now();
|
|
56
|
+
_package.logger.debug('Bio: demoBio01aUI(), calc reduceDimensionality ' +
|
|
57
|
+
`ET: ${((t2 - t1) / 1000)} s`);
|
|
58
|
+
|
|
59
|
+
for (let embedI: number = 0; embedI < embedColNameList.length; embedI++) {
|
|
60
|
+
const embedColName: string = embedColNameList[embedI];
|
|
61
|
+
const embedCol: DG.Column = df.getCol(embedColName);
|
|
62
|
+
const embedColData: Float32Array = redDimRes.embedding[embedI];
|
|
63
|
+
// TODO: User DG.Column.setRawData()
|
|
64
|
+
// embedCol.setRawData(embedColData);
|
|
65
|
+
embedCol.init((rowI) => { return embedColData[rowI]; });
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const rowCount: number = df.rowCount;
|
|
69
|
+
const idCol: DG.Column = df.getCol('id');
|
|
70
|
+
for (let idRowI = 0; idRowI < rowCount; idRowI++) {
|
|
71
|
+
const id = idCol.get(idRowI);
|
|
72
|
+
//idRows[id] = idRowI;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
for (const embedColName of Object.values(EMBED_COL_NAMES)) {
|
|
76
|
+
const embedCol: DG.Column<number> = df.getCol(embedColName);
|
|
77
|
+
//embedCols[embedColName] = embedCol;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const t3: number = Date.now();
|
|
81
|
+
_package.logger.debug('MLB: MlbVrSpaceBrowser.buildView(), postprocess reduceDimensionality ' +
|
|
82
|
+
`ET: ${((t3 - t2) / 1000)} s`);
|
|
83
|
+
}
|
|
84
|
+
resSpaceViewer = (await df.plot.fromType(DG.VIEWER.SCATTER_PLOT, {
|
|
85
|
+
'xColumnName': EMBED_COL_NAMES.X,
|
|
86
|
+
'yColumnName': EMBED_COL_NAMES.Y,
|
|
87
|
+
'lassoTool': true,
|
|
88
|
+
})) as DG.ScatterPlotViewer;
|
|
89
|
+
} else {
|
|
90
|
+
resSpaceViewer = (await sequenceSpaceTopMenu(df, df.getCol(colName),
|
|
91
|
+
'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
|
|
92
|
+
}
|
|
93
|
+
view.dockManager.dock(resSpaceViewer!, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
|
|
94
|
+
return resSpaceViewer;
|
|
95
|
+
}
|
package/src/package.ts
CHANGED
|
@@ -48,6 +48,10 @@ import {getMacromoleculeColumn} from './utils/ui-utils';
|
|
|
48
48
|
import {IUMAPOptions, ITSNEOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
49
49
|
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
50
50
|
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
51
|
+
import {demoBio01UI} from './demo/bio01-similarity-diversity';
|
|
52
|
+
import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
|
|
53
|
+
import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
|
|
54
|
+
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
51
55
|
|
|
52
56
|
// /** Avoid reassinging {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
|
|
53
57
|
// let monomerLib: MonomerLib | null = null;
|
|
@@ -303,7 +307,7 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
|
303
307
|
.show();
|
|
304
308
|
}
|
|
305
309
|
|
|
306
|
-
//top-menu: Bio |
|
|
310
|
+
//top-menu: Bio | SAR | Activity Cliffs...
|
|
307
311
|
//name: Sequence Activity Cliffs
|
|
308
312
|
//description: detect activity cliffs
|
|
309
313
|
//input: dataframe table [Input data table]
|
|
@@ -360,7 +364,7 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
|
360
364
|
.show();
|
|
361
365
|
}
|
|
362
366
|
|
|
363
|
-
//top-menu: Bio | Sequence Space...
|
|
367
|
+
//top-menu: Bio | Structure | Sequence Space...
|
|
364
368
|
//name: Sequence Space
|
|
365
369
|
//input: dataframe table
|
|
366
370
|
//input: column molecules { semType: Macromolecule }
|
|
@@ -370,7 +374,8 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
|
370
374
|
//input: object options {optional: true}
|
|
371
375
|
//editor: Bio:SequenceSpaceEditor
|
|
372
376
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
373
|
-
similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean, options?: IUMAPOptions | ITSNEOptions
|
|
377
|
+
similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean, options?: IUMAPOptions | ITSNEOptions
|
|
378
|
+
): Promise<DG.Viewer | undefined> {
|
|
374
379
|
// Delay is required for initial function dialog to close before starting invalidating of molfiles.
|
|
375
380
|
// Otherwise, dialog is freezing
|
|
376
381
|
await delay(10);
|
|
@@ -430,7 +435,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
430
435
|
} */
|
|
431
436
|
};
|
|
432
437
|
|
|
433
|
-
//top-menu: Bio | To Atomic Level...
|
|
438
|
+
//top-menu: Bio | Atomic Level | To Atomic Level...
|
|
434
439
|
//name: To Atomic Level
|
|
435
440
|
//description: returns molfiles for each monomer from HELM library
|
|
436
441
|
//input: dataframe df [Input data table]
|
|
@@ -447,7 +452,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
447
452
|
await _toAtomicLevel(df, macroMolecule, monomersLibObject);
|
|
448
453
|
}
|
|
449
454
|
|
|
450
|
-
//top-menu: Bio | MSA...
|
|
455
|
+
//top-menu: Bio | Alignment | MSA...
|
|
451
456
|
//name: MSA...
|
|
452
457
|
//tags: bio, panel
|
|
453
458
|
export function multipleSequenceAlignmentAny(col: DG.Column<string> | null = null): void {
|
|
@@ -515,8 +520,8 @@ export function multipleSequenceAlignmentAny(col: DG.Column<string> | null = nul
|
|
|
515
520
|
.show();
|
|
516
521
|
}
|
|
517
522
|
|
|
523
|
+
//top-menu: Bio | Structure | Composition Analysis
|
|
518
524
|
//name: Composition Analysis
|
|
519
|
-
//top-menu: Bio | Composition Analysis
|
|
520
525
|
//meta.icon: files/icons/composition-analysis.svg
|
|
521
526
|
//output: viewer result
|
|
522
527
|
export async function compositionAnalysis(): Promise<void> {
|
|
@@ -574,8 +579,8 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
574
579
|
await handler(col);
|
|
575
580
|
}
|
|
576
581
|
|
|
577
|
-
//top-menu: Bio | SDF to JSON
|
|
578
|
-
//name: SDF to JSON
|
|
582
|
+
//top-menu: Bio | Atomic Level | SDF to JSON Library...
|
|
583
|
+
//name: SDF to JSON Library
|
|
579
584
|
//input: dataframe table
|
|
580
585
|
export async function sdfToJsonLib(table: DG.DataFrame) {
|
|
581
586
|
const jsonMonomerLibrary = createJsonMonomerLibFromSdf(table);
|
|
@@ -728,7 +733,7 @@ export function diversitySearchTopMenu() {
|
|
|
728
733
|
view.dockManager.dock(viewer, 'down');
|
|
729
734
|
}
|
|
730
735
|
|
|
731
|
-
//top-menu: Bio | Substructure Search ...
|
|
736
|
+
//top-menu: Bio | Structure | Substructure Search ...
|
|
732
737
|
//name: bioSubstructureSearch
|
|
733
738
|
export function bioSubstructureSearch(): void {
|
|
734
739
|
const col = getMacromoleculeColumn();
|
|
@@ -750,3 +755,33 @@ export function saveAsFasta() {
|
|
|
750
755
|
export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
751
756
|
return new BioSubstructureFilter();
|
|
752
757
|
}
|
|
758
|
+
|
|
759
|
+
// -- Demo --
|
|
760
|
+
|
|
761
|
+
//name: demoBio01
|
|
762
|
+
//meta.demoPath: Bioinformatics | Similarity & Diversity
|
|
763
|
+
//description:
|
|
764
|
+
export async function demoBio01(): Promise<void> {
|
|
765
|
+
await demoBio01UI('func/Bio.demoBio01');
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
//name:demoBio01a
|
|
769
|
+
//meta.demoPath: Bioinformatics | Hierarchical Clustering & Sequence Space
|
|
770
|
+
//description:
|
|
771
|
+
export async function demoBio01a(): Promise<void> {
|
|
772
|
+
await demoBio01aUI('func/Bio.demoBio01a');
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
//name: demoBio01c
|
|
776
|
+
//meta.demoPath: Bioinformatics | Hierarchical Clustering & Activity Cliffs
|
|
777
|
+
//description:
|
|
778
|
+
export async function demoBio01b(): Promise<void> {
|
|
779
|
+
await demoBio01bUI('func/Bio.demoBio01b');
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
//name: demoBio05
|
|
783
|
+
//meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
|
|
784
|
+
//description:
|
|
785
|
+
export async function demoBio05(): Promise<void> {
|
|
786
|
+
await demoBio05UI('func/demoBio05');
|
|
787
|
+
}
|
package/src/utils/pepsea.ts
CHANGED
|
@@ -6,18 +6,21 @@ import * as C from './constants';
|
|
|
6
6
|
|
|
7
7
|
export const pepseaMethods = ['mafft --auto', 'mafft', 'linsi', 'ginsi', 'einsi', 'fftns', 'fftnsi', 'nwns', 'nwnsi'];
|
|
8
8
|
const alignmentObjectMetaKeys = ['AlignedSeq', 'AlignedSubpeptide', 'HELM', 'ID', 'PolymerID'];
|
|
9
|
-
type
|
|
9
|
+
type PepseaResponse = {
|
|
10
10
|
Alignment: {
|
|
11
11
|
PolymerID: string, AlignedSubpeptide: string, HELM: string, ID: string, AlignedSeq: string, [key: string]: string,
|
|
12
12
|
}[],
|
|
13
|
-
AlignmentScore: {[key: string]: number | null},
|
|
13
|
+
AlignmentScore: { [key: string]: number | null },
|
|
14
14
|
};
|
|
15
|
-
type PepseaBodyUnit = {ID: string, HELM: string};
|
|
15
|
+
type PepseaBodyUnit = { ID: string, HELM: string };
|
|
16
16
|
|
|
17
|
+
/** Gets the column containing MSA sequences produced by the 'PepSeA' tool from the {@link srcCol} column.
|
|
18
|
+
* Does not add the result column to the dataframe of {@link srcCol}.
|
|
19
|
+
*/
|
|
17
20
|
export async function runPepsea(srcCol: DG.Column<string>, unUsedName: string,
|
|
18
21
|
method: typeof pepseaMethods[number] = 'ginsi', gapOpen: number = 1.53, gapExtend: number = 0.0,
|
|
19
22
|
clustersCol: DG.Column<string | number> | null = null,
|
|
20
|
-
|
|
23
|
+
): Promise<DG.Column<string>> {
|
|
21
24
|
const peptideCount = srcCol.length;
|
|
22
25
|
clustersCol ??= DG.Column.int('Clusters', peptideCount).init(0);
|
|
23
26
|
if (clustersCol.type != DG.COLUMN_TYPE.STRING)
|
|
@@ -58,13 +61,14 @@ export async function runPepsea(srcCol: DG.Column<string>, unUsedName: string,
|
|
|
58
61
|
alignedSequencesCol.setTag(bioTAGS.separator, C.PEPSEA.SEPARATOR);
|
|
59
62
|
alignedSequencesCol.setTag(bioTAGS.aligned, ALIGNMENT.SEQ_MSA);
|
|
60
63
|
alignedSequencesCol.setTag(bioTAGS.alphabet, ALPHABET.UN);
|
|
64
|
+
alignedSequencesCol.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
61
65
|
alignedSequencesCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
62
66
|
|
|
63
67
|
return alignedSequencesCol;
|
|
64
68
|
}
|
|
65
69
|
|
|
66
70
|
async function requestAlignedObjects(dockerfileId: string, body: PepseaBodyUnit[], method: string, gapOpen: number,
|
|
67
|
-
gapExtend: number): Promise<
|
|
71
|
+
gapExtend: number): Promise<PepseaResponse> {
|
|
68
72
|
const params = {
|
|
69
73
|
method: 'POST',
|
|
70
74
|
headers: {'Accept': 'application/json', 'Content-Type': 'application/json'},
|