@datagrok/bio 2.19.0 → 2.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/README.md +2 -2
- package/dist/242.js +1 -1
- package/dist/242.js.map +1 -1
- package/dist/284.js +1 -1
- package/dist/284.js.map +1 -1
- package/dist/589.js +1 -1
- package/dist/589.js.map +1 -1
- package/dist/731.js +1 -1
- package/dist/731.js.map +1 -1
- package/dist/810.js +2 -0
- package/dist/810.js.map +1 -0
- package/dist/980.js +1 -1
- package/dist/980.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/dockerfiles/container.json +1 -1
- package/files/samples/HELM_BI_CYCLIC.csv +7 -0
- package/files/samples/peptides-non-natural.csv +1001 -0
- package/files/tests/helm_cyclic_cliffs.csv +225019 -0
- package/package.json +6 -6
- package/projects/seq_space_demo.zip +0 -0
- package/src/analysis/sequence-diversity-viewer.ts +22 -14
- package/src/analysis/sequence-search-base-viewer.ts +7 -73
- package/src/analysis/sequence-similarity-viewer.ts +41 -41
- package/src/demo/bio01-similarity-diversity.ts +21 -2
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +7 -0
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +38 -1
- package/src/demo/bio03-atomic-level.ts +15 -0
- package/src/package.ts +11 -34
- package/src/tests/activity-cliffs-utils.ts +7 -6
- package/src/tests/similarity-diversity-tests.ts +2 -2
- package/src/utils/cell-renderer.ts +11 -1
- package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +8 -4
- package/src/utils/pepsea.ts +22 -39
- package/src/utils/seq-helper/seq-helper.ts +14 -34
- package/src/utils/ui-utils.ts +23 -0
- package/test-console-output-1.log +0 -7396
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.20.1",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -47,10 +47,10 @@
|
|
|
47
47
|
"@datagrok-libraries/bio": "^5.50.1",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.7",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.4",
|
|
50
|
-
"@datagrok-libraries/ml": "^6.
|
|
51
|
-
"@datagrok-libraries/tutorials": "^1.
|
|
52
|
-
"@datagrok-libraries/utils": "^4.
|
|
53
|
-
"datagrok-api": "^1.
|
|
50
|
+
"@datagrok-libraries/ml": "^6.10.0",
|
|
51
|
+
"@datagrok-libraries/tutorials": "^1.6.0",
|
|
52
|
+
"@datagrok-libraries/utils": "^4.5.0",
|
|
53
|
+
"datagrok-api": "^1.25.0",
|
|
54
54
|
"@webgpu/types": "^0.1.40",
|
|
55
55
|
"ajv": "^8.12.0",
|
|
56
56
|
"ajv-errors": "^3.0.0",
|
|
@@ -109,7 +109,7 @@
|
|
|
109
109
|
"analyze": "webpack --profile --json > ./stats.json && npx webpack-bundle-analyzer ./stats.json"
|
|
110
110
|
},
|
|
111
111
|
"canEdit": [
|
|
112
|
-
"
|
|
112
|
+
"Administrators"
|
|
113
113
|
],
|
|
114
114
|
"canView": [
|
|
115
115
|
"All users"
|
|
Binary file
|
|
@@ -5,12 +5,13 @@ import * as grok from 'datagrok-api/grok';
|
|
|
5
5
|
import {getDiverseSubset} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
6
6
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
7
7
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
8
|
-
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
8
|
+
import {adjustGridcolAfterRender, updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
10
|
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
11
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
12
12
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
13
13
|
import {DistanceMatrixService, dmLinearIndex} from '@datagrok-libraries/ml/src/distance-matrix';
|
|
14
|
+
import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
14
15
|
|
|
15
16
|
export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
16
17
|
diverseColumnLabel: string | null; // Use postfix Label to prevent activating table column selection editor
|
|
@@ -22,7 +23,7 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
22
23
|
constructor(
|
|
23
24
|
private readonly seqHelper: ISeqHelper,
|
|
24
25
|
) {
|
|
25
|
-
super('diversity');
|
|
26
|
+
super('diversity', DG.SEMTYPE.MACROMOLECULE);
|
|
26
27
|
this.diverseColumnLabel = this.string('diverseColumnLabel', null);
|
|
27
28
|
}
|
|
28
29
|
|
|
@@ -30,27 +31,34 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
30
31
|
if (!this.beforeRender())
|
|
31
32
|
return;
|
|
32
33
|
if (this.dataFrame) {
|
|
33
|
-
if (computeData && this.
|
|
34
|
-
const sh = this.seqHelper.getSeqHandler(this.
|
|
34
|
+
if (computeData && this.targetColumn) {
|
|
35
|
+
const sh = this.seqHelper.getSeqHandler(this.targetColumn);
|
|
35
36
|
await (sh.isFasta() ? this.computeByMM() : this.computeByChem());
|
|
36
37
|
|
|
37
38
|
const diverseColumnName: string = this.diverseColumnLabel != null ? this.diverseColumnLabel :
|
|
38
|
-
`diverse (${this.
|
|
39
|
+
`diverse (${this.targetColumnName})`;
|
|
39
40
|
const resCol = DG.Column.string(diverseColumnName, this.renderMolIds!.length)
|
|
40
|
-
.init((i) => this.
|
|
41
|
+
.init((i) => this.targetColumn?.get(this.renderMolIds![i]));
|
|
41
42
|
resCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
42
|
-
this.tags.forEach((tag) => resCol.setTag(tag, this.
|
|
43
|
+
this.tags.forEach((tag) => resCol.setTag(tag, this.targetColumn!.getTag(tag)));
|
|
43
44
|
const resDf = DG.DataFrame.fromColumns([resCol]);
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
resCol.temp[MmcrTemps.maxMonomerLength] = 4;
|
|
46
|
+
|
|
47
|
+
const _ = resDf.onCurrentRowChanged.subscribe((_: any) => {
|
|
48
|
+
this.dataFrame.currentRowIdx = this.renderMolIds![resDf.currentRowIdx];
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const grid = resDf.plot.grid();
|
|
52
|
+
adjustGridcolAfterRender(grid, resCol.name, 450, 30);
|
|
53
|
+
|
|
54
|
+
updateDivInnerHTML(this.root, grid.root);
|
|
47
55
|
this.computeCompleted.next(true);
|
|
48
56
|
}
|
|
49
57
|
}
|
|
50
58
|
}
|
|
51
59
|
|
|
52
60
|
private async computeByChem() {
|
|
53
|
-
const monomericMols = await getMonomericMols(this.
|
|
61
|
+
const monomericMols = await getMonomericMols(this.targetColumn!, this.seqHelper);
|
|
54
62
|
//need to create df to calculate fingerprints
|
|
55
63
|
const _monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
|
|
56
64
|
this.renderMolIds = await grok.functions.call('Chem:callChemDiversitySearch', {
|
|
@@ -63,15 +71,15 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
63
71
|
|
|
64
72
|
private async computeByMM() {
|
|
65
73
|
const encodedSequences =
|
|
66
|
-
(await getEncodedSeqSpaceCol(this.
|
|
74
|
+
(await getEncodedSeqSpaceCol(this.targetColumn!, MmDistanceFunctionsNames.LEVENSHTEIN)).seqList;
|
|
67
75
|
const distanceMatrixService = new DistanceMatrixService(true, false);
|
|
68
76
|
const distanceMatrixData = await distanceMatrixService.calc(encodedSequences, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
69
77
|
distanceMatrixService.terminate();
|
|
70
|
-
const len = this.
|
|
78
|
+
const len = this.targetColumn!.length;
|
|
71
79
|
const linearizeFunc = dmLinearIndex(len);
|
|
72
80
|
this.renderMolIds = getDiverseSubset(len, Math.min(len, this.limit),
|
|
73
81
|
(i1: number, i2: number) => {
|
|
74
|
-
return this.
|
|
82
|
+
return this.targetColumn!.isNone(i1) || this.targetColumn!.isNone(i2) ? 0 :
|
|
75
83
|
distanceMatrixData[linearizeFunc(i1, i2)];
|
|
76
84
|
});
|
|
77
85
|
}
|
|
@@ -4,94 +4,28 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
|
|
5
5
|
import {CHEM_SIMILARITY_METRICS} from '@datagrok-libraries/ml/src/distance-metrics-methods';
|
|
6
6
|
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
+
import {SearchBaseViewer} from '@datagrok-libraries/ml/src/viewers/search-base-viewer';
|
|
7
8
|
|
|
8
9
|
const MAX_ROWS_FOR_DISTANCE_MATRIX = 22000;
|
|
9
10
|
|
|
10
|
-
export class SequenceSearchBaseViewer extends
|
|
11
|
-
name: string = '';
|
|
11
|
+
export class SequenceSearchBaseViewer extends SearchBaseViewer {
|
|
12
12
|
distanceMetric: string;
|
|
13
|
-
limit: number;
|
|
14
13
|
fingerprint: string;
|
|
15
14
|
metricsProperties = ['distanceMetric', 'fingerprint'];
|
|
16
15
|
fingerprintChoices = ['Morgan', 'Pattern'];
|
|
17
|
-
|
|
18
|
-
moleculeColumnName: string;
|
|
19
|
-
initialized: boolean = false;
|
|
20
|
-
tags = [DG.TAGS.UNITS, bioTAGS.aligned, bioTAGS.separator, bioTAGS.alphabet];
|
|
16
|
+
tags = [DG.TAGS.UNITS, bioTAGS.aligned, bioTAGS.separator, bioTAGS.alphabet, 'cell.renderer'];
|
|
21
17
|
preComputeDistanceMatrix: boolean = false;
|
|
22
18
|
|
|
23
|
-
constructor(name: string) {
|
|
24
|
-
super();
|
|
19
|
+
constructor(name: string, semType: string) {
|
|
20
|
+
super(name, semType);
|
|
25
21
|
this.fingerprint = this.string('fingerprint', this.fingerprintChoices[0], {choices: this.fingerprintChoices});
|
|
26
|
-
this.limit = this.int('limit', 10);
|
|
27
22
|
this.distanceMetric = this.string('distanceMetric', CHEM_SIMILARITY_METRICS[0], {choices: CHEM_SIMILARITY_METRICS});
|
|
28
|
-
this.moleculeColumnName = this.string('moleculeColumnName');
|
|
29
|
-
this.name = name;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
init(): void {
|
|
33
|
-
this.initialized = true;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
detach(): void {
|
|
37
|
-
this.subs.forEach((sub) => sub.unsubscribe());
|
|
38
23
|
}
|
|
39
24
|
|
|
40
25
|
async onTableAttached(): Promise<void> {
|
|
41
|
-
|
|
26
|
+
super.onTableAttached();
|
|
42
27
|
|
|
43
|
-
if (this.dataFrame)
|
|
28
|
+
if (this.dataFrame)
|
|
44
29
|
this.preComputeDistanceMatrix = this.dataFrame.rowCount <= MAX_ROWS_FOR_DISTANCE_MATRIX;
|
|
45
|
-
this.subs.push(DG.debounce(this.dataFrame.onRowsRemoved, 50)
|
|
46
|
-
.subscribe((_: any) => this.render(true)));
|
|
47
|
-
const compute = this.name !== 'diversity';
|
|
48
|
-
this.subs.push(DG.debounce(this.dataFrame.onCurrentRowChanged, 50)
|
|
49
|
-
.subscribe((_: any) => this.render(compute)));
|
|
50
|
-
this.subs.push(DG.debounce(this.dataFrame.selection.onChanged, 50)
|
|
51
|
-
.subscribe((_: any) => this.render(false)));
|
|
52
|
-
this.subs.push(DG.debounce(ui.onSizeChanged(this.root), 50)
|
|
53
|
-
.subscribe((_: any) => this.render(false)));
|
|
54
|
-
this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE) as DG.Column<string>;
|
|
55
|
-
this.moleculeColumnName = this.moleculeColumn?.name!;
|
|
56
|
-
this.getProperty('limit')!.fromOptions({min: 1, max: this.dataFrame.rowCount});
|
|
57
|
-
}
|
|
58
|
-
this.render();
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
onPropertyChanged(property: DG.Property): void {
|
|
62
|
-
super.onPropertyChanged(property);
|
|
63
|
-
if (!this.initialized)
|
|
64
|
-
return;
|
|
65
|
-
if (property.name === 'moleculeColumnName') {
|
|
66
|
-
const col = this.dataFrame.col(property.get(this))!;
|
|
67
|
-
if (col.semType === DG.SEMTYPE.MACROMOLECULE)
|
|
68
|
-
this.moleculeColumn = col;
|
|
69
|
-
}
|
|
70
|
-
this.render();
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
/** For tests */ public computeRequested: boolean = false;
|
|
74
|
-
public renderPromise: Promise<void> = Promise.resolve();
|
|
75
|
-
|
|
76
|
-
protected render(computeData = true): void {
|
|
77
|
-
this.renderPromise = this.renderPromise.then(async () => {
|
|
78
|
-
this.computeRequested = this.computeRequested || computeData;
|
|
79
|
-
await this.renderInt(computeData);
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
async renderInt(_computeData: boolean): Promise<void> {
|
|
84
|
-
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
beforeRender() {
|
|
88
|
-
if (!this.initialized)
|
|
89
|
-
return false;
|
|
90
|
-
if (this.dataFrame && this.moleculeColumnName &&
|
|
91
|
-
this.dataFrame.col(this.moleculeColumnName)!.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
92
|
-
grok.shell.error(`${this.moleculeColumnName} is not Macromolecule type`);
|
|
93
|
-
return false;
|
|
94
|
-
}
|
|
95
|
-
return true;
|
|
96
30
|
}
|
|
97
31
|
}
|
|
@@ -3,15 +3,15 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
6
|
-
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
7
6
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
|
-
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
7
|
+
import {adjustGridcolAfterRender, updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
8
|
import {Subject} from 'rxjs';
|
|
10
9
|
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
10
|
import {alignSequencePair} from '@datagrok-libraries/bio/src/utils/macromolecule/alignment';
|
|
12
11
|
import {KnnResult, SparseMatrixService} from '@datagrok-libraries/ml/src/distance-matrix/sparse-matrix-service';
|
|
13
12
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
14
13
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
14
|
+
import {MmcrTemps, tempTAGS} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
15
15
|
|
|
16
16
|
export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
17
17
|
cutoff: number;
|
|
@@ -31,12 +31,14 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
31
31
|
knn?: KnnResult;
|
|
32
32
|
kPrevNeighbors: number = 0;
|
|
33
33
|
demo?: boolean;
|
|
34
|
+
analysisGrid?: DG.Grid;
|
|
35
|
+
subInited: boolean = false;
|
|
34
36
|
|
|
35
37
|
constructor(
|
|
36
38
|
private readonly seqHelper: ISeqHelper,
|
|
37
39
|
demo?: boolean,
|
|
38
40
|
) {
|
|
39
|
-
super('similarity');
|
|
41
|
+
super('similarity', DG.SEMTYPE.MACROMOLECULE);
|
|
40
42
|
this.cutoff = this.float('cutoff', 0.01, {min: 0, max: 1});
|
|
41
43
|
this.hotSearch = this.bool('hotSearch', true);
|
|
42
44
|
this.similarColumnLabel = this.string('similarColumnLabel', null);
|
|
@@ -51,63 +53,60 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
51
53
|
override async renderInt(computeData: boolean): Promise<void> {
|
|
52
54
|
if (!this.beforeRender())
|
|
53
55
|
return;
|
|
54
|
-
if (this.
|
|
56
|
+
if (this.targetColumn) {
|
|
55
57
|
this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
56
58
|
if (computeData && !this.gridSelect) {
|
|
57
|
-
this.targetMoleculeIdx = this.dataFrame!.currentRowIdx
|
|
58
|
-
const sh = this.seqHelper.getSeqHandler(this.moleculeColumn!);
|
|
59
|
+
this.targetMoleculeIdx = (this.dataFrame!.currentRowIdx ?? -1) < 0 ? 0 : this.dataFrame!.currentRowIdx;
|
|
59
60
|
|
|
60
|
-
await
|
|
61
|
+
await this.computeByMM();
|
|
61
62
|
const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
|
|
62
|
-
`similar (${this.
|
|
63
|
+
`similar (${this.targetColumn})`;
|
|
63
64
|
this.molCol = DG.Column.string(similarColumnName,
|
|
64
|
-
this.idxs!.length).init((i) => this.
|
|
65
|
+
this.idxs!.length).init((i) => this.targetColumn?.get(this.idxs?.get(i)));
|
|
65
66
|
this.molCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
66
|
-
this.tags.forEach((tag) => this.molCol!.setTag(tag, this.
|
|
67
|
+
this.tags.forEach((tag) => this.molCol!.setTag(tag, this.targetColumn!.getTag(tag)));
|
|
67
68
|
const resDf = DG.DataFrame.fromColumns([this.idxs!, this.molCol!, this.scores!]);
|
|
68
|
-
resDf.
|
|
69
|
+
await resDf.meta.detectSemanticTypes();
|
|
70
|
+
await grok.data.detectSemanticTypes(resDf);
|
|
71
|
+
this.molCol.temp[tempTAGS.referenceSequence] = this.targetColumn!.get(this.targetMoleculeIdx);
|
|
72
|
+
this.molCol.temp[MmcrTemps.maxMonomerLength] = 4;
|
|
73
|
+
let prevTimer: any = null;
|
|
74
|
+
const _ = resDf.onCurrentRowChanged.subscribe((_: any) => {
|
|
75
|
+
prevTimer && clearTimeout(prevTimer);
|
|
69
76
|
this.dataFrame.currentRowIdx = resDf.col('indexes')!.get(resDf.currentRowIdx);
|
|
70
|
-
setTimeout(() => { this.createPropertyPanel(resDf); },
|
|
77
|
+
prevTimer = setTimeout(() => { this.createPropertyPanel(resDf); }, 300);
|
|
71
78
|
this.gridSelect = true;
|
|
72
79
|
});
|
|
73
|
-
|
|
74
|
-
|
|
80
|
+
if (!this.analysisGrid) {
|
|
81
|
+
this.analysisGrid = resDf.plot.grid();
|
|
82
|
+
updateDivInnerHTML(this.root, this.analysisGrid.root);
|
|
83
|
+
} else {
|
|
84
|
+
this.analysisGrid.dataFrame = resDf;
|
|
85
|
+
this.analysisGrid.invalidate();
|
|
86
|
+
}
|
|
87
|
+
this.analysisGrid.col('indexes')!.visible = false;
|
|
88
|
+
adjustGridcolAfterRender(this.analysisGrid, this.molCol!.name, 450, 30, true);
|
|
75
89
|
const targetMolRow = this.idxs?.getRawData().findIndex((it) => it == this.targetMoleculeIdx);
|
|
76
|
-
const targetScoreCell =
|
|
90
|
+
const targetScoreCell = this.analysisGrid.cell('score', targetMolRow!);
|
|
77
91
|
targetScoreCell.cell.value = null;
|
|
78
|
-
const view =
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
92
|
+
const view = grok.shell.tv;
|
|
93
|
+
if (!this.subInited) {
|
|
94
|
+
view.grid.root.addEventListener('click', (_event: MouseEvent) => {
|
|
95
|
+
this.gridSelect = false;
|
|
96
|
+
});
|
|
97
|
+
this.subInited = true;
|
|
98
|
+
}
|
|
83
99
|
this.computeCompleted.next(true);
|
|
84
100
|
}
|
|
85
101
|
}
|
|
86
102
|
}
|
|
87
103
|
|
|
88
|
-
private async computeByChem() {
|
|
89
|
-
const monomericMols = await getMonomericMols(this.moleculeColumn!, this.seqHelper);
|
|
90
|
-
//need to create df to calculate fingerprints
|
|
91
|
-
const _monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
|
|
92
|
-
const df = await grok.functions.call('Chem:callChemSimilaritySearch', {
|
|
93
|
-
df: this.dataFrame,
|
|
94
|
-
col: monomericMols,
|
|
95
|
-
molecule: monomericMols.get(this.targetMoleculeIdx),
|
|
96
|
-
metricName: this.distanceMetric,
|
|
97
|
-
limit: this.limit,
|
|
98
|
-
minScore: this.cutoff,
|
|
99
|
-
fingerprint: this.fingerprint,
|
|
100
|
-
});
|
|
101
|
-
this.idxs = df.getCol('indexes');
|
|
102
|
-
this.scores = df.getCol('score');
|
|
103
|
-
}
|
|
104
|
-
|
|
105
104
|
private async computeByMM() {
|
|
106
|
-
const len = this.
|
|
105
|
+
const len = this.targetColumn!.length;
|
|
107
106
|
const actualLimit = Math.min(this.limit, len - 1);
|
|
108
107
|
if (!this.knn || this.kPrevNeighbors !== actualLimit) {
|
|
109
108
|
const encodedSequences =
|
|
110
|
-
(await getEncodedSeqSpaceCol(this.
|
|
109
|
+
(await getEncodedSeqSpaceCol(this.targetColumn!, MmDistanceFunctionsNames.LEVENSHTEIN)).seqList;
|
|
111
110
|
|
|
112
111
|
this.kPrevNeighbors = actualLimit;
|
|
113
112
|
this.knn = await (new SparseMatrixService()
|
|
@@ -128,12 +127,13 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
128
127
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
129
128
|
const molColName = this.molCol?.name!;
|
|
130
129
|
const resCol: DG.Column<string> = resDf.col(molColName)!;
|
|
131
|
-
const molColSh = this.seqHelper.getSeqHandler(this.
|
|
130
|
+
const molColSh = this.seqHelper.getSeqHandler(this.targetColumn!);
|
|
132
131
|
const resSh = this.seqHelper.getSeqHandler(resCol);
|
|
133
132
|
const subParts1 = molColSh.getSplitted(this.targetMoleculeIdx);
|
|
134
133
|
const subParts2 = resSh.getSplitted(resDf.currentRowIdx);
|
|
135
134
|
const alignment = alignSequencePair(subParts1, subParts2);
|
|
136
|
-
const canvas =
|
|
135
|
+
const canvas =
|
|
136
|
+
createDifferenceCanvas(alignment.seq1Splitted, alignment.seq2Splitted, resSh.defaultBiotype, molDifferences);
|
|
137
137
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
138
138
|
if (subParts1.length !== subParts2.length) {
|
|
139
139
|
propPanel.append(ui.divV([
|
|
@@ -8,11 +8,30 @@ import {handleError} from './utils';
|
|
|
8
8
|
import {SequenceDiversityViewer} from '../analysis/sequence-diversity-viewer';
|
|
9
9
|
import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
|
|
10
10
|
import {getSeqHelper, ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
|
+
import {adjustGridcolAfterRender} from '../utils/ui-utils';
|
|
12
|
+
import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
11
13
|
|
|
12
14
|
const dataFn: string = 'samples/FASTA_PT_activity.csv';
|
|
13
15
|
|
|
14
|
-
export async function
|
|
15
|
-
|
|
16
|
+
export async function demoBioSimDiv() {
|
|
17
|
+
const t = await _package.files.readCsv('samples/peptides-non-natural.csv');
|
|
18
|
+
t.name = 'Similarity and Diversity Demo';
|
|
19
|
+
t.col('activity')!.setTag('format', '3 significant digits');
|
|
20
|
+
t.col('sequence')!.temp[MmcrTemps.maxMonomerLength] = 4;
|
|
21
|
+
const tv = grok.shell.addTableView(t);
|
|
22
|
+
await t.meta.detectSemanticTypes();
|
|
23
|
+
await grok.data.detectSemanticTypes(t);
|
|
24
|
+
const simV = tv.addViewer('Sequence Similarity Search', {limit: 20});
|
|
25
|
+
const dn = tv.dockManager.dock(simV, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.45);
|
|
26
|
+
adjustGridcolAfterRender(tv.grid, 'sequence', 500, 30);
|
|
27
|
+
const divV = tv.addViewer('Sequence Diversity Search', {limit: 20});
|
|
28
|
+
tv.dockManager.dock(divV, DG.DOCK_TYPE.DOWN, dn, 'Diversity search', 0.4);
|
|
29
|
+
grok.functions.call('Dendrogram:HierarchicalClustering',
|
|
30
|
+
{df: grok.shell.t, colNameList: ['sequence'], distance: 'euclidian', linkage: 'complete'});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export async function demoBio01UISteps() {
|
|
34
|
+
const seqHelper: ISeqHelper = await getSeqHelper();
|
|
16
35
|
|
|
17
36
|
let view: DG.TableView;
|
|
18
37
|
let df: DG.DataFrame;
|
|
@@ -12,6 +12,13 @@ import {getClusterMatrixWorker} from '@datagrok-libraries/math';
|
|
|
12
12
|
const dataFn = 'samples/FASTA_PT_activity.csv';
|
|
13
13
|
const seqColName = 'sequence';
|
|
14
14
|
|
|
15
|
+
export async function demoSeqSpace() {
|
|
16
|
+
const p = await grok.functions.eval('Bio:SeqSpaceDemo');
|
|
17
|
+
const project = await grok.dapi.projects.find(p.id);
|
|
18
|
+
await project.open();
|
|
19
|
+
grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#sequence-space');
|
|
20
|
+
}
|
|
21
|
+
|
|
15
22
|
export async function demoBio01aUI() {
|
|
16
23
|
let treeHelper: ITreeHelper;
|
|
17
24
|
let dendrogramSvc: IDendrogramService;
|
|
@@ -28,7 +28,7 @@ export async function demoBio01bUI() {
|
|
|
28
28
|
|
|
29
29
|
try {
|
|
30
30
|
const demoScript = new DemoScript('Activity Cliffs', 'Activity Cliffs analysis on Macromolecules data', false,
|
|
31
|
-
{autoStartFirstStep: true});
|
|
31
|
+
{autoStartFirstStep: true, path: 'Bioinformatics/Activity Cliffs'});
|
|
32
32
|
await demoScript
|
|
33
33
|
.step(`Load DNA sequences`, async () => {
|
|
34
34
|
grok.shell.windows.showContextPanel = false;
|
|
@@ -107,3 +107,40 @@ export async function demoBio01bUI() {
|
|
|
107
107
|
handleError(err);
|
|
108
108
|
}
|
|
109
109
|
}
|
|
110
|
+
|
|
111
|
+
export async function demoActivityCliffsCyclic() {
|
|
112
|
+
const df = await _package.files.readCsv('tests/helm_cyclic_cliffs.csv');
|
|
113
|
+
df.name = 'Activity Cliffs Demo';
|
|
114
|
+
await grok.data.detectSemanticTypes(df);
|
|
115
|
+
await df.meta.detectSemanticTypes();
|
|
116
|
+
const tv = grok.shell.addTableView(df);
|
|
117
|
+
ui.setUpdateIndicator(tv.root, true);
|
|
118
|
+
try {
|
|
119
|
+
const seqEncodingFunc = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
|
|
120
|
+
const activityCliffsViewer = (await activityCliffs(
|
|
121
|
+
df, df.getCol('Sequence'), df.getCol('Activity'),
|
|
122
|
+
96, DimReductionMethods.UMAP, MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE,
|
|
123
|
+
seqEncodingFunc, {}, true)) as DG.ScatterPlotViewer;
|
|
124
|
+
tv.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.65);
|
|
125
|
+
await DG.delay(100);
|
|
126
|
+
const cliffsLink: HTMLButtonElement = $(activityCliffsViewer.root)
|
|
127
|
+
.find('button.scatter_plot_link,cliffs_grid').get()[0] as HTMLButtonElement;
|
|
128
|
+
cliffsLink.click();
|
|
129
|
+
await DG.delay(100);
|
|
130
|
+
tv.grid.props.rowHeight = 180;
|
|
131
|
+
tv.grid.col('sequence') && (tv.grid.col('sequence')!.width = 300);
|
|
132
|
+
tv.grid.col('structure') && (tv.grid.col('structure')!.width = 300);
|
|
133
|
+
const cliffsGrid = Array.from(tv.viewers).find((v) => v !== tv.grid && v.type === DG.VIEWER.GRID) as DG.Grid;
|
|
134
|
+
if (cliffsGrid) {
|
|
135
|
+
cliffsGrid.props.rowHeight = 40;
|
|
136
|
+
cliffsGrid.col('seq_diff')!.width = 600;
|
|
137
|
+
tv.dockManager.dock(cliffsGrid, DG.DOCK_TYPE.DOWN, null, 'Cliffs', 0.35);
|
|
138
|
+
tv.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.55);
|
|
139
|
+
}
|
|
140
|
+
} catch (err: any) {
|
|
141
|
+
handleError(err);
|
|
142
|
+
} finally {
|
|
143
|
+
ui.setUpdateIndicator(tv.root, false);
|
|
144
|
+
}
|
|
145
|
+
grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#activity-cliffs');
|
|
146
|
+
}
|
|
@@ -5,6 +5,21 @@ import {_package, toAtomicLevel} from '../package';
|
|
|
5
5
|
import {handleError} from './utils';
|
|
6
6
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
7
7
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
8
|
+
import {adjustGridcolAfterRender} from '../utils/ui-utils';
|
|
9
|
+
|
|
10
|
+
export async function demoToAtomicLevel(): Promise<void> {
|
|
11
|
+
const data = await _package.files.readCsv('samples/HELM_BI_CYCLIC.csv');
|
|
12
|
+
data.name = 'To Atomic Level';
|
|
13
|
+
await data.meta.detectSemanticTypes();
|
|
14
|
+
await grok.data.detectSemanticTypes(data);
|
|
15
|
+
const view = grok.shell.addTableView(data);
|
|
16
|
+
const seqCol = data.col('HELM')!;
|
|
17
|
+
await toAtomicLevel(data, seqCol, true, false);
|
|
18
|
+
adjustGridcolAfterRender(view.grid, 'molfile(HELM)', 500, 300, true);
|
|
19
|
+
adjustGridcolAfterRender(view.grid, 'HELM', 500, undefined, true);
|
|
20
|
+
grok.shell.info('Hover over monomers in HELM column to highlight them in molecular structure.', {timeout: 10});
|
|
21
|
+
grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#get-atomic-level-structure');
|
|
22
|
+
}
|
|
8
23
|
|
|
9
24
|
export async function demoBio03UI(): Promise<void> {
|
|
10
25
|
const dataFn: string = 'samples/HELM.csv';
|
package/src/package.ts
CHANGED
|
@@ -47,11 +47,10 @@ import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
|
47
47
|
import {WebLogoViewer} from './viewers/web-logo-viewer';
|
|
48
48
|
import {MonomerLibManager} from './utils/monomer-lib/lib-manager';
|
|
49
49
|
import {getMonomerLibraryManagerLink, showManageLibrariesDialog, showManageLibrariesView} from './utils/monomer-lib/library-file-manager/ui';
|
|
50
|
-
import {
|
|
51
|
-
import {
|
|
52
|
-
import {
|
|
53
|
-
import {
|
|
54
|
-
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
50
|
+
import {demoBioSimDiv} from './demo/bio01-similarity-diversity';
|
|
51
|
+
import {demoSeqSpace} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
|
|
52
|
+
import {demoActivityCliffsCyclic} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
|
|
53
|
+
import {demoToAtomicLevel} from './demo/bio03-atomic-level';
|
|
55
54
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
56
55
|
import {MsaWarning} from './utils/multiple-sequence-alignment';
|
|
57
56
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
@@ -587,7 +586,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
|
|
|
587
586
|
plotEmbeddings: boolean, preprocessingFunction?: DG.Func, options?: (IUMAPOptions | ITSNEOptions) & Options,
|
|
588
587
|
clusterEmbeddings?: boolean, isDemo?: boolean
|
|
589
588
|
): Promise<DG.ScatterPlotViewer | undefined> {
|
|
590
|
-
const tableView =
|
|
589
|
+
const tableView =
|
|
591
590
|
grok.shell.tv.dataFrame == table ? grok.shell.tv : undefined;
|
|
592
591
|
if (!checkInputColumnUI(molecules, 'Sequence Space'))
|
|
593
592
|
return;
|
|
@@ -967,21 +966,14 @@ export async function manageLibrariesApp(): Promise<DG.View> {
|
|
|
967
966
|
|
|
968
967
|
//name: Monomer Manager Tree Browser
|
|
969
968
|
//input: dynamic treeNode
|
|
970
|
-
//input:
|
|
971
|
-
export async function manageLibrariesAppTreeBrowser(treeNode: DG.TreeViewGroup,
|
|
969
|
+
//input: dynamic browsePanel
|
|
970
|
+
export async function manageLibrariesAppTreeBrowser(treeNode: DG.TreeViewGroup, browsePanel: DG.BrowsePanel) {
|
|
972
971
|
const libraries = (await (await MonomerLibManager.getInstance()).getFileManager()).getValidLibraryPaths();
|
|
973
972
|
libraries.forEach((libName) => {
|
|
974
973
|
const nodeName = libName.endsWith('.json') ? libName.substring(0, libName.length - 5) : libName;
|
|
975
974
|
const libNode = treeNode.item(nodeName);
|
|
976
975
|
// eslint-disable-next-line rxjs/no-ignored-subscription, rxjs/no-async-subscribe
|
|
977
976
|
libNode.onSelected.subscribe(async () => {
|
|
978
|
-
const monomerManager = await MonomerManager.getNewInstance();
|
|
979
|
-
browseView.preview = await monomerManager.getViewRoot(libName, false);
|
|
980
|
-
});
|
|
981
|
-
|
|
982
|
-
libNode.root.addEventListener('dblclick', async (e) => {
|
|
983
|
-
e.preventDefault();
|
|
984
|
-
e.stopImmediatePropagation();
|
|
985
977
|
const monomerManager = await MonomerManager.getInstance();
|
|
986
978
|
await monomerManager.getViewRoot(libName, true);
|
|
987
979
|
monomerManager.resetCurrentRowFollowing();
|
|
@@ -1103,10 +1095,9 @@ export function addCopyMenu(cell: DG.Cell, menu: DG.Menu): void {
|
|
|
1103
1095
|
//meta.demoPath: Bioinformatics | Similarity, Diversity
|
|
1104
1096
|
//description: Sequence similarity tracking and evaluation dataset diversity
|
|
1105
1097
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity
|
|
1106
|
-
//meta.isDemoScript: True
|
|
1107
1098
|
//meta.demoSkip: GROK-14320
|
|
1108
1099
|
export async function demoBioSimilarityDiversity(): Promise<void> {
|
|
1109
|
-
await
|
|
1100
|
+
await demoBioSimDiv();
|
|
1110
1101
|
}
|
|
1111
1102
|
|
|
1112
1103
|
// demoBio01a
|
|
@@ -1114,10 +1105,9 @@ export async function demoBioSimilarityDiversity(): Promise<void> {
|
|
|
1114
1105
|
//meta.demoPath: Bioinformatics | Sequence Space
|
|
1115
1106
|
//description: Exploring sequence space of Macromolecules, comparison with hierarchical clustering results
|
|
1116
1107
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Sequence%20Space
|
|
1117
|
-
//meta.isDemoScript: True
|
|
1118
1108
|
//meta.demoSkip: GROK-14320
|
|
1119
1109
|
export async function demoBioSequenceSpace(): Promise<void> {
|
|
1120
|
-
await
|
|
1110
|
+
await demoSeqSpace();
|
|
1121
1111
|
}
|
|
1122
1112
|
|
|
1123
1113
|
// demoBio01b
|
|
@@ -1125,10 +1115,9 @@ export async function demoBioSequenceSpace(): Promise<void> {
|
|
|
1125
1115
|
//meta.demoPath: Bioinformatics | Activity Cliffs
|
|
1126
1116
|
//description: Activity Cliffs analysis on Macromolecules data
|
|
1127
1117
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs
|
|
1128
|
-
//meta.isDemoScript: True
|
|
1129
1118
|
//meta.demoSkip: GROK-14320
|
|
1130
1119
|
export async function demoBioActivityCliffs(): Promise<void> {
|
|
1131
|
-
await
|
|
1120
|
+
await demoActivityCliffsCyclic();
|
|
1132
1121
|
}
|
|
1133
1122
|
|
|
1134
1123
|
// demoBio03
|
|
@@ -1136,21 +1125,9 @@ export async function demoBioActivityCliffs(): Promise<void> {
|
|
|
1136
1125
|
//meta.demoPath: Bioinformatics | Atomic Level
|
|
1137
1126
|
//description: Atomic level structure of Macromolecules
|
|
1138
1127
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Atomic%20Level
|
|
1139
|
-
//meta.isDemoScript: True
|
|
1140
1128
|
//meta.demoSkip: GROK-14320
|
|
1141
1129
|
export async function demoBioAtomicLevel(): Promise<void> {
|
|
1142
|
-
await
|
|
1143
|
-
}
|
|
1144
|
-
|
|
1145
|
-
// demoBio05
|
|
1146
|
-
//name: demoBioHelmMsaSequenceSpace
|
|
1147
|
-
//meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
|
|
1148
|
-
//description: MSA and composition analysis on Helm data
|
|
1149
|
-
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Helm,%20MSA,%20Sequence%20Space
|
|
1150
|
-
//meta.isDemoScript: True
|
|
1151
|
-
//meta.demoSkip: GROK-14320
|
|
1152
|
-
export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
|
|
1153
|
-
await demoBio05UI();
|
|
1130
|
+
await demoToAtomicLevel();
|
|
1154
1131
|
}
|
|
1155
1132
|
|
|
1156
1133
|
//name: SDF to JSON Library
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
|
|
4
|
-
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {awaitCheck, expect} from '@datagrok-libraries/utils/src/test';
|
|
5
5
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
6
6
|
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
7
7
|
import {BYPASS_LARGE_DATA_WARNING} from '@datagrok-libraries/ml/src/functionEditors/consts';
|
|
@@ -25,9 +25,10 @@ export async function _testActivityCliffsOpen(df: DG.DataFrame, drMethod: DimRed
|
|
|
25
25
|
})) as DG.Viewer | undefined;
|
|
26
26
|
expect(scatterPlot != null, true);
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
const
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
28
|
+
await awaitCheck(() => {
|
|
29
|
+
const link = Array.from(scatterPlot!.root.getElementsByClassName('scatter_plot_link'));
|
|
30
|
+
if (link.length)
|
|
31
|
+
return (link[0] as HTMLElement).innerText.toLowerCase() === `${tgtNumberCliffs} cliffs`;
|
|
32
|
+
return true;
|
|
33
|
+
}, 'incorrect cliffs link', 3000);
|
|
33
34
|
}
|
|
@@ -37,7 +37,7 @@ async function _testSimilaritySearchViewer() {
|
|
|
37
37
|
await awaitCheck(() => getSearchViewer(moleculesView, 'Sequence Similarity Search') !== undefined,
|
|
38
38
|
'Sequence Similarity Search viewer has not been created', 100);
|
|
39
39
|
if (!viewer.initialized) throw new Error('The viewer is not initialized.');
|
|
40
|
-
if (!viewer.
|
|
40
|
+
if (!viewer.targetColumn) throw new Error('The viewer has not molecule column (onTableAttached).');
|
|
41
41
|
if (!viewer.beforeRender()) throw new Error('The viewer is not able to render.');
|
|
42
42
|
if (!viewer.computeRequested) throw new Error('The viewer has not compute requested even.');
|
|
43
43
|
if (!computeCompleted) throw new Error('The viewer has not compute completed.');
|
|
@@ -87,7 +87,7 @@ async function _testDiversitySearchViewer() {
|
|
|
87
87
|
await awaitCheck(() => getSearchViewer(moleculesView, 'Sequence Diversity Search') !== undefined,
|
|
88
88
|
'Sequence Diversity Search viewer has not been created', 100);
|
|
89
89
|
if (!viewer.initialized) throw new Error('The viewer is not initialized.');
|
|
90
|
-
if (!viewer.
|
|
90
|
+
if (!viewer.targetColumn) throw new Error('The viewer has not molecule column (onTableAttached).');
|
|
91
91
|
if (!viewer.beforeRender()) throw new Error('The viewer is not able to render.');
|
|
92
92
|
if (!viewer.computeRequested) throw new Error('The viewer has not compute requested even.');
|
|
93
93
|
if (!computeCompleted) throw new Error('The viewer has not compute completed.');
|