@datagrok/bio 2.4.18 → 2.4.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +2 -8
- package/dist/864.js +1 -1
- package/dist/864.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/data/sample_FASTA_PT_activity.csv +100 -0
- package/files/tests/to-atomic-level-dna-fasta-input.csv +11 -0
- package/files/tests/to-atomic-level-dna-output.csv +15299 -0
- package/files/tests/to-atomic-level-msa-output.csv +3594 -0
- package/files/tests/to-atomic-level-msa-separator-input.csv +12 -0
- package/files/tests/to-atomic-level-peptides-fasta-input.csv +65 -0
- package/files/tests/to-atomic-level-peptides-output.csv +34901 -0
- package/package.json +4 -4
- package/src/demo/bio01-similarity-diversity.ts +15 -9
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +18 -10
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +16 -14
- package/src/demo/bio03-atomic-level.ts +25 -3
- package/src/demo/bio05-helm-msa-sequence-space.ts +10 -8
- package/src/demo/utils.ts +0 -12
- package/src/package-test.ts +2 -0
- package/src/package.ts +18 -8
- package/src/tests/converters-test.ts +24 -24
- package/src/tests/mm-distance-tests.ts +138 -0
- package/src/tests/to-atomic-level-tests.ts +187 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.4.
|
|
8
|
+
"version": "2.4.23",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,10 +14,10 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.
|
|
17
|
+
"@datagrok-libraries/bio": "^5.30.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
19
|
-
"@datagrok-libraries/ml": "^6.3.
|
|
20
|
-
"@datagrok-libraries/tutorials": "^1.2
|
|
19
|
+
"@datagrok-libraries/ml": "^6.3.23",
|
|
20
|
+
"@datagrok-libraries/tutorials": "^1.3.2",
|
|
21
21
|
"@datagrok-libraries/utils": "^2.1.3",
|
|
22
22
|
"cash-dom": "^8.0.0",
|
|
23
23
|
"css-loader": "^6.7.3",
|
|
@@ -9,14 +9,16 @@ import {handleError} from './utils';
|
|
|
9
9
|
import {SequenceDiversityViewer} from '../analysis/sequence-diversity-viewer';
|
|
10
10
|
import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
|
|
11
11
|
|
|
12
|
-
const dataFn: string = 'data/
|
|
12
|
+
const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
|
|
13
13
|
|
|
14
14
|
export async function demoBio01UI() {
|
|
15
15
|
let view: DG.TableView;
|
|
16
16
|
let df: DG.DataFrame;
|
|
17
17
|
|
|
18
18
|
try {
|
|
19
|
-
const demoScript = new DemoScript(
|
|
19
|
+
const demoScript = new DemoScript(
|
|
20
|
+
'Similarity, Diversity',
|
|
21
|
+
'Sequence similarity tracking and evaluation dataset diversity');
|
|
20
22
|
await demoScript
|
|
21
23
|
.step(`Load DNA sequences`, async () => {
|
|
22
24
|
grok.shell.windows.showContextPanel = false;
|
|
@@ -25,12 +27,16 @@ export async function demoBio01UI() {
|
|
|
25
27
|
df = await _package.files.readCsv(dataFn);
|
|
26
28
|
view = grok.shell.addTableView(df);
|
|
27
29
|
|
|
28
|
-
view.grid.columns.byName('
|
|
29
|
-
view.grid.columns.byName('
|
|
30
|
+
view.grid.columns.byName('cluster')!.visible = false;
|
|
31
|
+
view.grid.columns.byName('sequence_id')!.visible = false;
|
|
32
|
+
view.grid.columns.byName('sequence')!.width = 300;
|
|
33
|
+
view.grid.columns.byName('activity')!.visible = false;
|
|
34
|
+
view.grid.columns.byName('is_cliff')!.visible = false;
|
|
35
|
+
|
|
30
36
|
// TODO: Fix column width
|
|
31
37
|
}, {
|
|
32
38
|
description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
|
|
33
|
-
delay:
|
|
39
|
+
delay: 2000
|
|
34
40
|
})
|
|
35
41
|
.step('Find the most similar sequences to the current one', async () => {
|
|
36
42
|
const simViewer = await df.plot.fromType('Sequence Similarity Search', {
|
|
@@ -40,7 +46,7 @@ export async function demoBio01UI() {
|
|
|
40
46
|
view.dockManager.dock(simViewer, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.35);
|
|
41
47
|
}, {
|
|
42
48
|
description: `Add 'Sequence Similarity Search' viewer.`,
|
|
43
|
-
delay:
|
|
49
|
+
delay: 2000
|
|
44
50
|
})
|
|
45
51
|
.step('Explore most diverse sequences in a dataset', async () => {
|
|
46
52
|
const divViewer = await df.plot.fromType('Sequence Diversity Search', {
|
|
@@ -50,19 +56,19 @@ export async function demoBio01UI() {
|
|
|
50
56
|
view.dockManager.dock(divViewer, DG.DOCK_TYPE.DOWN, null, 'Diversity search', 0.27);
|
|
51
57
|
}, {
|
|
52
58
|
description: `Add 'Sequence Deversity Search' viewer.`,
|
|
53
|
-
delay:
|
|
59
|
+
delay: 2000
|
|
54
60
|
})
|
|
55
61
|
.step('Choose another sequence for similarity search', async () => {
|
|
56
62
|
df.currentRowIdx = 3;
|
|
57
63
|
}, {
|
|
58
64
|
description: 'Handling current row changed of data frame showing update of similar sequences.',
|
|
59
|
-
delay:
|
|
65
|
+
delay: 2000,
|
|
60
66
|
})
|
|
61
67
|
.step('One more sequence for similarity search', async () => {
|
|
62
68
|
df.currentRowIdx = 7;
|
|
63
69
|
}, {
|
|
64
70
|
description: 'Just one more sequence to search similar ones.',
|
|
65
|
-
delay:
|
|
71
|
+
delay: 2000,
|
|
66
72
|
})
|
|
67
73
|
.start();
|
|
68
74
|
} catch (err: any) {
|
|
@@ -11,7 +11,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
|
|
|
11
11
|
import {demoSequenceSpace, handleError} from './utils';
|
|
12
12
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
13
13
|
|
|
14
|
-
const dataFn = 'data/
|
|
14
|
+
const dataFn = 'data/sample_FASTA_PT_activity.csv';
|
|
15
15
|
const seqColName = 'sequence';
|
|
16
16
|
|
|
17
17
|
export async function demoBio01aUI() {
|
|
@@ -21,12 +21,14 @@ export async function demoBio01aUI() {
|
|
|
21
21
|
let df: DG.DataFrame;
|
|
22
22
|
let spViewer: DG.ScatterPlotViewer;
|
|
23
23
|
|
|
24
|
-
const
|
|
24
|
+
const dimRedMethod: string = 'UMAP';
|
|
25
25
|
const idRows: { [id: number]: number } = {};
|
|
26
26
|
const embedCols: { [colName: string]: DG.Column<number> } = {};
|
|
27
27
|
|
|
28
28
|
try {
|
|
29
|
-
const demoScript = new DemoScript(
|
|
29
|
+
const demoScript = new DemoScript(
|
|
30
|
+
'Demo',
|
|
31
|
+
'Exploring sequence space of Macromolecules, comparison with hierarchical clustering results');
|
|
30
32
|
await demoScript
|
|
31
33
|
.step(`Load DNA sequences`, async () => {
|
|
32
34
|
[df, treeHelper, dendrogramSvc] = await Promise.all([
|
|
@@ -36,18 +38,21 @@ export async function demoBio01aUI() {
|
|
|
36
38
|
]);
|
|
37
39
|
view = grok.shell.addTableView(df);
|
|
38
40
|
view.grid.props.rowHeight = 22;
|
|
41
|
+
view.grid.columns.byName('cluster')!.visible = false;
|
|
42
|
+
view.grid.columns.byName('sequence')!.width = 200;
|
|
43
|
+
view.grid.columns.byName('is_cliff')!.visible = false;
|
|
39
44
|
|
|
40
45
|
grok.shell.windows.showContextPanel = false;
|
|
41
46
|
grok.shell.windows.showProperties = false;
|
|
42
47
|
}, {
|
|
43
48
|
description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
|
|
44
|
-
delay:
|
|
49
|
+
delay: 2000,
|
|
45
50
|
})
|
|
46
51
|
.step('Build sequence space', async () => {
|
|
47
|
-
spViewer = await demoSequenceSpace(view, df, seqColName,
|
|
52
|
+
spViewer = await demoSequenceSpace(view, df, seqColName, dimRedMethod);
|
|
48
53
|
}, {
|
|
49
54
|
description: `Reduce sequence space dimensionality to display on 2D representation.`,
|
|
50
|
-
delay:
|
|
55
|
+
delay: 2000
|
|
51
56
|
})
|
|
52
57
|
.step('Cluster sequences', async () => {
|
|
53
58
|
const seqCol: DG.Column<string> = df.getCol(seqColName);
|
|
@@ -60,20 +65,23 @@ export async function demoBio01aUI() {
|
|
|
60
65
|
dendrogramSvc.injectTreeForGrid(view.grid, treeRoot, undefined, 150, undefined);
|
|
61
66
|
}, {
|
|
62
67
|
description: `Perform hierarchical clustering to reveal relationships between sequences.`,
|
|
63
|
-
delay:
|
|
68
|
+
delay: 2000,
|
|
64
69
|
})
|
|
65
70
|
.step('Select a sequence', async () => {
|
|
66
71
|
df.selection.init((idx: number) => [15].includes(idx));
|
|
67
72
|
}, {
|
|
68
73
|
description: `Handling selection of data frame row reflecting on linked viewers.`,
|
|
69
|
-
delay:
|
|
74
|
+
delay: 2000,
|
|
70
75
|
})
|
|
71
76
|
.step('Select a bunch of sequences', async () => {
|
|
72
|
-
|
|
77
|
+
const seqIdCol: DG.Column<string> = df.getCol('sequence_id');
|
|
78
|
+
df.selection.init((rowI: number) => {
|
|
79
|
+
return ['c0_seq120', 'c0_seq105', 'c0_seq121', 'c0_seq93'].includes(seqIdCol.get(rowI)!);
|
|
80
|
+
});
|
|
73
81
|
df.currentRowIdx = 27;
|
|
74
82
|
}, {
|
|
75
83
|
description: 'Selecting a group of rows from a data frame to show their similarity and proximity to each other on a viewer..',
|
|
76
|
-
delay:
|
|
84
|
+
delay: 2000,
|
|
77
85
|
})
|
|
78
86
|
.start();
|
|
79
87
|
} catch (err: any) {
|
|
@@ -13,20 +13,23 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
|
|
|
13
13
|
import {handleError} from './utils';
|
|
14
14
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
15
15
|
|
|
16
|
-
const dataFn = '
|
|
16
|
+
const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
|
|
17
17
|
|
|
18
18
|
export async function demoBio01bUI() {
|
|
19
19
|
let treeHelper: ITreeHelper;
|
|
20
20
|
let dendrogramSvc: IDendrogramService;
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
let df: DG.DataFrame;
|
|
23
|
+
let view: DG.TableView;
|
|
23
24
|
let activityCliffsViewer: DG.ScatterPlotViewer;
|
|
24
25
|
|
|
25
|
-
const
|
|
26
|
+
const dimRedMethod: string = 'UMAP';
|
|
26
27
|
const idRows: { [id: number]: number } = {};
|
|
27
28
|
|
|
28
29
|
try {
|
|
29
|
-
const demoScript = new DemoScript(
|
|
30
|
+
const demoScript = new DemoScript(
|
|
31
|
+
'Activity Cliffs',
|
|
32
|
+
'Activity Cliffs analysis on Macromolecules data');
|
|
30
33
|
await demoScript
|
|
31
34
|
.step(`Load DNA sequences`, async () => {
|
|
32
35
|
grok.shell.windows.showContextPanel = false;
|
|
@@ -40,18 +43,17 @@ export async function demoBio01bUI() {
|
|
|
40
43
|
|
|
41
44
|
view = grok.shell.addTableView(df);
|
|
42
45
|
view.grid.props.rowHeight = 22;
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
lengthGCol.width = 0;
|
|
46
|
+
view.grid.columns.byName('cluster')!.visible = false;
|
|
47
|
+
view.grid.columns.byName('sequence')!.width = 300;
|
|
48
|
+
view.grid.columns.byName('is_cliff')!.visible = false;
|
|
47
49
|
}, {
|
|
48
50
|
description: 'Load dataset with macromolecules of \'fasta\' notation, \'DNA\' alphabet.',
|
|
49
|
-
delay:
|
|
51
|
+
delay: 2000,
|
|
50
52
|
})
|
|
51
53
|
.step('Find activity cliffs', async () => {
|
|
52
54
|
activityCliffsViewer = (await activityCliffs(
|
|
53
55
|
df, df.getCol('Sequence'), df.getCol('Activity'),
|
|
54
|
-
80,
|
|
56
|
+
80, dimRedMethod)) as DG.ScatterPlotViewer;
|
|
55
57
|
view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
|
|
56
58
|
|
|
57
59
|
// Show grid viewer with the cliffs
|
|
@@ -60,7 +62,7 @@ export async function demoBio01bUI() {
|
|
|
60
62
|
cliffsLink.click();
|
|
61
63
|
}, {
|
|
62
64
|
description: 'Reveal similar sequences with a cliff of activity.',
|
|
63
|
-
delay:
|
|
65
|
+
delay: 2000
|
|
64
66
|
})
|
|
65
67
|
.step('Cluster sequences', async () => {
|
|
66
68
|
const seqCol: DG.Column<string> = df.getCol('sequence');
|
|
@@ -77,13 +79,13 @@ export async function demoBio01bUI() {
|
|
|
77
79
|
activityGCol.scrollIntoView();
|
|
78
80
|
}, {
|
|
79
81
|
description: 'Perform hierarchical clustering to reveal relationships between sequences.',
|
|
80
|
-
delay:
|
|
82
|
+
delay: 2000
|
|
81
83
|
})
|
|
82
84
|
.step('Browse the cliff', async () => {
|
|
83
85
|
//cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
|
|
84
86
|
const cliffsDfGrid: DG.Grid = activityCliffsViewer.dataFrame.temp[acTEMPS.cliffsDfGrid];
|
|
85
87
|
//cliffsDfGrid.dataFrame.selection.init((i) => i == currentCliffIdx);
|
|
86
|
-
cliffsDfGrid.dataFrame.currentRowIdx = 0;
|
|
88
|
+
if (cliffsDfGrid.dataFrame.rowCount > 0) cliffsDfGrid.dataFrame.currentRowIdx = 0;
|
|
87
89
|
//cliffsDfGrid.dataFrame.selection.set(currentCliffIdx, true, true);
|
|
88
90
|
|
|
89
91
|
// /* workaround to select rows of the cliff */
|
|
@@ -97,7 +99,7 @@ export async function demoBio01bUI() {
|
|
|
97
99
|
// }
|
|
98
100
|
}, {
|
|
99
101
|
description: 'Zoom in to explore selected activity cliff details.',
|
|
100
|
-
delay:
|
|
102
|
+
delay: 2000
|
|
101
103
|
})
|
|
102
104
|
.start();
|
|
103
105
|
} catch (err: any) {
|
|
@@ -7,12 +7,14 @@ import {_package, toAtomicLevel} from '../package';
|
|
|
7
7
|
import $ from 'cash-dom';
|
|
8
8
|
import {handleError} from './utils';
|
|
9
9
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
10
|
+
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
10
11
|
|
|
11
|
-
const dataFn: string = '
|
|
12
|
+
const dataFn: string = 'samples/sample_FASTA.csv';
|
|
12
13
|
|
|
13
14
|
export async function demoBio03UI(): Promise<void> {
|
|
14
15
|
let df: DG.DataFrame;
|
|
15
16
|
let view: DG.TableView;
|
|
17
|
+
let dlg: DG.Dialog;
|
|
16
18
|
|
|
17
19
|
try {
|
|
18
20
|
await new DemoScript(
|
|
@@ -20,6 +22,9 @@ export async function demoBio03UI(): Promise<void> {
|
|
|
20
22
|
'Atomic level structure of Macromolecules'
|
|
21
23
|
)
|
|
22
24
|
.step(`Loading Macromolecules notation 'Helm'`, async () => {
|
|
25
|
+
grok.shell.windows.showContextPanel = false;
|
|
26
|
+
grok.shell.windows.showProperties = false;
|
|
27
|
+
|
|
23
28
|
df = await _package.files.readCsv(dataFn);
|
|
24
29
|
view = grok.shell.addTableView(df);
|
|
25
30
|
for (let colI: number = 0; colI < view.grid.columns.length; colI++) {
|
|
@@ -28,14 +33,31 @@ export async function demoBio03UI(): Promise<void> {
|
|
|
28
33
|
}
|
|
29
34
|
}, {
|
|
30
35
|
description: `Load dataset with macromolecules of 'fasta' notation, 'PT' alphabet (protein, aminoacids).`,
|
|
31
|
-
delay:
|
|
36
|
+
delay: 2000,
|
|
32
37
|
})
|
|
33
38
|
.step('To atomic level', async () => {
|
|
34
39
|
const seqCol = df.getCol('Sequence');
|
|
35
40
|
await toAtomicLevel(df, seqCol);
|
|
36
41
|
}, {
|
|
37
42
|
description: 'Get atomic level structures of Macromolecules.',
|
|
38
|
-
delay:
|
|
43
|
+
delay: 2000,
|
|
44
|
+
})
|
|
45
|
+
.step('Sketcher', async () => {
|
|
46
|
+
const molColName: string = 'molfile(Sequence)';
|
|
47
|
+
df.currentCell = df.cell(1, molColName);
|
|
48
|
+
const mol: string = df.currentCell.value;
|
|
49
|
+
|
|
50
|
+
const sketcher = new DG.chem.Sketcher(DG.chem.SKETCHER_MODE.INPLACE);
|
|
51
|
+
sketcher.setMolFile(mol);
|
|
52
|
+
|
|
53
|
+
dlg = ui.dialog()
|
|
54
|
+
.add(sketcher)
|
|
55
|
+
.show();
|
|
56
|
+
await delay(3000);
|
|
57
|
+
dlg.close();
|
|
58
|
+
}, {
|
|
59
|
+
description: 'Display atomic level structure within a sketcher.',
|
|
60
|
+
delay: 2000,
|
|
39
61
|
})
|
|
40
62
|
.start();
|
|
41
63
|
} catch (err: any) {
|
|
@@ -22,9 +22,12 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
22
22
|
|
|
23
23
|
const helmColName: string = 'HELM';
|
|
24
24
|
const msaHelmColName: string = 'msa(HELM)';
|
|
25
|
+
const dimRedMethod: string = 'UMAP';
|
|
25
26
|
|
|
26
27
|
try {
|
|
27
|
-
const demoScript = new DemoScript(
|
|
28
|
+
const demoScript = new DemoScript(
|
|
29
|
+
'Helm, MSA, Sequence Space',
|
|
30
|
+
'MSA and composition analysis on Helm data');
|
|
28
31
|
await demoScript
|
|
29
32
|
.step(`Load peptides with non-natural aminoacids in 'HELM' notation`, async () => {
|
|
30
33
|
view = grok.shell.addTableView(df = await _package.files.readCsv(helmFn));
|
|
@@ -33,9 +36,9 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
33
36
|
grok.shell.windows.showProperties = false;
|
|
34
37
|
}, {
|
|
35
38
|
description: 'Load dataset with macromolecules of \'Helm\' notation.',
|
|
36
|
-
delay:
|
|
39
|
+
delay: 2000,
|
|
37
40
|
})
|
|
38
|
-
.step('Align
|
|
41
|
+
.step('Align peptides with non-natural aminoacids with PepSeA', async () => {
|
|
39
42
|
helmCol = df.getCol(helmColName);
|
|
40
43
|
const method: string = pepseaMethods[0];
|
|
41
44
|
const gapOpen: number = 1.53;
|
|
@@ -45,16 +48,15 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
45
48
|
await grok.data.detectSemanticTypes(df);
|
|
46
49
|
}, {
|
|
47
50
|
description: 'Multiple sequence alignment (MSA) performed with PepSeA tool operating on non-natural aminoacids as well.',
|
|
48
|
-
delay:
|
|
51
|
+
delay: 2000,
|
|
49
52
|
})
|
|
50
53
|
.step('Build sequence space', async () => {
|
|
51
|
-
const method: string = 'UMAP';
|
|
52
54
|
ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
|
|
53
|
-
|
|
55
|
+
dimRedMethod, StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
|
|
54
56
|
view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
|
|
55
57
|
}, {
|
|
56
58
|
description: 'Reduce sequence space dimensionality to display on 2D representation.',
|
|
57
|
-
delay:
|
|
59
|
+
delay: 2000
|
|
58
60
|
})
|
|
59
61
|
.step('Analyse sequence composition', async () => {
|
|
60
62
|
wlViewer = await df.plot.fromType('WebLogo', {
|
|
@@ -64,7 +66,7 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
64
66
|
view.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.2);
|
|
65
67
|
}, {
|
|
66
68
|
description: 'Composition analysis allows to reveal functional features of sequences like motifs, or variable loops.',
|
|
67
|
-
delay:
|
|
69
|
+
delay: 2000,
|
|
68
70
|
})
|
|
69
71
|
.start();
|
|
70
72
|
} catch (err: any) {
|
package/src/demo/utils.ts
CHANGED
|
@@ -52,18 +52,6 @@ export async function demoSequenceSpace(
|
|
|
52
52
|
embedCol.init((rowI) => { return embedColData[rowI]; });
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
-
const rowCount: number = df.rowCount;
|
|
56
|
-
const idCol: DG.Column = df.getCol('id');
|
|
57
|
-
for (let idRowI = 0; idRowI < rowCount; idRowI++) {
|
|
58
|
-
const id = idCol.get(idRowI);
|
|
59
|
-
//idRows[id] = idRowI;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
for (const embedColName of Object.values(EMBED_COL_NAMES)) {
|
|
63
|
-
const embedCol: DG.Column<number> = df.getCol(embedColName);
|
|
64
|
-
//embedCols[embedColName] = embedCol;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
55
|
const t3: number = Date.now();
|
|
68
56
|
_package.logger.debug('MLB: MlbVrSpaceBrowser.buildView(), postprocess reduceDimensionality ' +
|
|
69
57
|
`ET: ${((t3 - t2) / 1000)} s`);
|
package/src/package-test.ts
CHANGED
|
@@ -22,6 +22,8 @@ import './tests/substructure-filters-tests';
|
|
|
22
22
|
import './tests/pepsea-tests';
|
|
23
23
|
import './tests/viewers';
|
|
24
24
|
import './tests/units-handler-tests';
|
|
25
|
+
import './tests/to-atomic-level-tests';
|
|
26
|
+
import './tests/mm-distance-tests';
|
|
25
27
|
|
|
26
28
|
// Tests hanging github CI
|
|
27
29
|
import './tests/activity-cliffs-tests';
|
package/src/package.ts
CHANGED
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
} from './utils/cell-renderer';
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {SequenceAlignment} from './seq_align';
|
|
12
|
-
import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
|
|
12
|
+
import {getEmbeddingColsNames, sequenceSpaceByFingerprints, getSequenceSpace} from './analysis/sequence-space';
|
|
13
13
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
14
14
|
import {
|
|
15
15
|
createLinesGrid,
|
|
@@ -290,19 +290,23 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
290
290
|
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
291
291
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
292
292
|
};
|
|
293
|
+
const uh = new UnitsHandler(macroMolecule);
|
|
294
|
+
let columnDistanceMetric = 'Tanimoto';
|
|
295
|
+
if (uh.isFasta())
|
|
296
|
+
columnDistanceMetric = uh.getDistanceFunctionName();
|
|
293
297
|
const sp = await getActivityCliffs(
|
|
294
298
|
df,
|
|
295
299
|
macroMolecule,
|
|
296
300
|
null,
|
|
297
301
|
axesNames,
|
|
298
|
-
'Activity cliffs',
|
|
302
|
+
'Activity cliffs', //scatterTitle
|
|
299
303
|
activities,
|
|
300
304
|
similarity,
|
|
301
|
-
|
|
305
|
+
columnDistanceMetric, //similarityMetric
|
|
302
306
|
methodName,
|
|
303
307
|
DG.SEMTYPE.MACROMOLECULE,
|
|
304
308
|
tags,
|
|
305
|
-
|
|
309
|
+
getSequenceSpace,
|
|
306
310
|
getChemSimilaritiesMatrix,
|
|
307
311
|
createTooltipElement,
|
|
308
312
|
createPropPanelElement,
|
|
@@ -353,7 +357,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
353
357
|
embedAxesNames: embedColsNames,
|
|
354
358
|
options: options
|
|
355
359
|
};
|
|
356
|
-
const sequenceSpaceRes = await
|
|
360
|
+
const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
|
|
357
361
|
const embeddings = sequenceSpaceRes.coordinates;
|
|
358
362
|
for (const col of embeddings) {
|
|
359
363
|
const listValues = col.toList();
|
|
@@ -407,9 +411,15 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
407
411
|
}
|
|
408
412
|
if (!checkInputColumnUI(macroMolecule, 'To Atomic Level'))
|
|
409
413
|
return;
|
|
410
|
-
const
|
|
411
|
-
const
|
|
412
|
-
|
|
414
|
+
const monomerLib: IMonomerLib = (await getMonomerLibHelper()).getBioLib();
|
|
415
|
+
const atomicLevelRes = await _toAtomicLevel(df, macroMolecule, monomerLib);
|
|
416
|
+
if (atomicLevelRes.col !== null) {
|
|
417
|
+
df.columns.add(atomicLevelRes.col, true);
|
|
418
|
+
await grok.data.detectSemanticTypes(df);
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
if (atomicLevelRes.warnings && atomicLevelRes.warnings.length > 0)
|
|
422
|
+
grok.shell.warning(ui.list(atomicLevelRes.warnings));
|
|
413
423
|
}
|
|
414
424
|
|
|
415
425
|
//top-menu: Bio | Alignment | MSA...
|
|
@@ -53,9 +53,9 @@ Y-N-R-Q-W-Y-V
|
|
|
53
53
|
M-K-P-S-E-Y-V
|
|
54
54
|
`,
|
|
55
55
|
helmPt: `seq
|
|
56
|
-
PEPTIDE1{F.W.P.H.E.Y}
|
|
57
|
-
PEPTIDE1{Y.N.R.Q.W.Y.V}
|
|
58
|
-
PEPTIDE1{M.K.P.S.E.Y.V}
|
|
56
|
+
PEPTIDE1{F.W.P.H.E.Y}$$$$
|
|
57
|
+
PEPTIDE1{Y.N.R.Q.W.Y.V}$$$$
|
|
58
|
+
PEPTIDE1{M.K.P.S.E.Y.V}$$$$
|
|
59
59
|
`,
|
|
60
60
|
fastaDna: `seq
|
|
61
61
|
ACGTC
|
|
@@ -68,9 +68,9 @@ C/A/G/T/G/T
|
|
|
68
68
|
T/T/C/A/A/C
|
|
69
69
|
`,
|
|
70
70
|
helmDna: `seq
|
|
71
|
-
DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}
|
|
72
|
-
DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}
|
|
73
|
-
DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}
|
|
71
|
+
DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}$$$$
|
|
72
|
+
DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}$$$$
|
|
73
|
+
DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$$
|
|
74
74
|
`,
|
|
75
75
|
fastaRna: `seq
|
|
76
76
|
ACGUC
|
|
@@ -83,9 +83,9 @@ C*A*G*U*G*U
|
|
|
83
83
|
U*U*C*A*A*C
|
|
84
84
|
`,
|
|
85
85
|
helmRna: `seq
|
|
86
|
-
RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}
|
|
87
|
-
RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}
|
|
88
|
-
RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}
|
|
86
|
+
RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
|
|
87
|
+
RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
|
|
88
|
+
RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$$
|
|
89
89
|
`,
|
|
90
90
|
fastaGaps: `seq
|
|
91
91
|
FW-PH-EYY
|
|
@@ -98,9 +98,9 @@ F/Y/N/R/Q/W/Y/V/
|
|
|
98
98
|
F/K/P//Q//S/E/Y/V
|
|
99
99
|
`,
|
|
100
100
|
helmGaps: `seq
|
|
101
|
-
PEPTIDE1{F.W.*.P.H.*.E.Y.Y}
|
|
102
|
-
PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}
|
|
103
|
-
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}
|
|
101
|
+
PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$$
|
|
102
|
+
PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$$
|
|
103
|
+
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$$
|
|
104
104
|
`,
|
|
105
105
|
|
|
106
106
|
fastaUn: `seq
|
|
@@ -114,24 +114,24 @@ meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
|
114
114
|
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
115
115
|
`,
|
|
116
116
|
helmUn: `seq
|
|
117
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}
|
|
118
|
-
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}
|
|
119
|
-
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}
|
|
117
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$$
|
|
118
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
|
|
119
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
|
|
120
120
|
`,
|
|
121
121
|
helmLoneDeoxyribose: `seq
|
|
122
|
-
DNA1{D(A).D(C).D(G).D(T).D(C)}
|
|
123
|
-
DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}
|
|
124
|
-
DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}
|
|
122
|
+
DNA1{D(A).D(C).D(G).D(T).D(C)}$$$$
|
|
123
|
+
DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}$$$$
|
|
124
|
+
DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}$$$$
|
|
125
125
|
`,
|
|
126
126
|
helmLoneRibose: `seq
|
|
127
|
-
RNA1{R(A).R(C).R(G).R(U).R(C)}
|
|
128
|
-
RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}
|
|
129
|
-
RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}
|
|
127
|
+
RNA1{R(A).R(C).R(G).R(U).R(C)}$$$$
|
|
128
|
+
RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}$$$$
|
|
129
|
+
RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}$$$$
|
|
130
130
|
`,
|
|
131
131
|
helmLonePhosphorus: `seq
|
|
132
|
-
RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}
|
|
133
|
-
RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}
|
|
134
|
-
RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}
|
|
132
|
+
RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
|
|
133
|
+
RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
|
|
134
|
+
RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$
|
|
135
135
|
`,
|
|
136
136
|
};
|
|
137
137
|
|