@datagrok/bio 2.19.0 → 2.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/242.js +1 -1
- package/dist/242.js.map +1 -1
- package/dist/284.js +1 -1
- package/dist/284.js.map +1 -1
- package/dist/589.js +1 -1
- package/dist/589.js.map +1 -1
- package/dist/731.js +1 -1
- package/dist/731.js.map +1 -1
- package/dist/810.js +2 -0
- package/dist/810.js.map +1 -0
- package/dist/980.js +1 -1
- package/dist/980.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/dockerfiles/container.json +1 -1
- package/files/samples/HELM_BI_CYCLIC.csv +7 -0
- package/files/samples/peptides-non-natural.csv +1001 -0
- package/package.json +6 -6
- package/projects/seq_space_demo.zip +0 -0
- package/src/analysis/sequence-diversity-viewer.ts +22 -14
- package/src/analysis/sequence-search-base-viewer.ts +6 -72
- package/src/analysis/sequence-similarity-viewer.ts +42 -23
- package/src/demo/bio01-similarity-diversity.ts +21 -2
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +7 -0
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +1 -1
- package/src/demo/bio03-atomic-level.ts +15 -0
- package/src/package.ts +9 -31
- package/src/tests/activity-cliffs-utils.ts +7 -6
- package/src/tests/similarity-diversity-tests.ts +2 -2
- package/src/utils/cell-renderer.ts +11 -1
- package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +8 -4
- package/src/utils/pepsea.ts +22 -39
- package/src/utils/seq-helper/seq-helper.ts +14 -34
- package/src/utils/ui-utils.ts +23 -0
- package/test-console-output-1.log +0 -7396
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.20.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -47,10 +47,10 @@
|
|
|
47
47
|
"@datagrok-libraries/bio": "^5.50.1",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.7",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.4",
|
|
50
|
-
"@datagrok-libraries/ml": "^6.
|
|
51
|
-
"@datagrok-libraries/tutorials": "^1.
|
|
52
|
-
"@datagrok-libraries/utils": "^4.
|
|
53
|
-
"datagrok-api": "^1.
|
|
50
|
+
"@datagrok-libraries/ml": "^6.10.0",
|
|
51
|
+
"@datagrok-libraries/tutorials": "^1.6.0",
|
|
52
|
+
"@datagrok-libraries/utils": "^4.5.0",
|
|
53
|
+
"datagrok-api": "^1.25.0",
|
|
54
54
|
"@webgpu/types": "^0.1.40",
|
|
55
55
|
"ajv": "^8.12.0",
|
|
56
56
|
"ajv-errors": "^3.0.0",
|
|
@@ -109,7 +109,7 @@
|
|
|
109
109
|
"analyze": "webpack --profile --json > ./stats.json && npx webpack-bundle-analyzer ./stats.json"
|
|
110
110
|
},
|
|
111
111
|
"canEdit": [
|
|
112
|
-
"
|
|
112
|
+
"Administrators"
|
|
113
113
|
],
|
|
114
114
|
"canView": [
|
|
115
115
|
"All users"
|
|
Binary file
|
|
@@ -5,12 +5,13 @@ import * as grok from 'datagrok-api/grok';
|
|
|
5
5
|
import {getDiverseSubset} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
6
6
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
7
7
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
8
|
-
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
8
|
+
import {adjustGridcolAfterRender, updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
10
|
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
11
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
12
12
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
13
13
|
import {DistanceMatrixService, dmLinearIndex} from '@datagrok-libraries/ml/src/distance-matrix';
|
|
14
|
+
import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
14
15
|
|
|
15
16
|
export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
16
17
|
diverseColumnLabel: string | null; // Use postfix Label to prevent activating table column selection editor
|
|
@@ -22,7 +23,7 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
22
23
|
constructor(
|
|
23
24
|
private readonly seqHelper: ISeqHelper,
|
|
24
25
|
) {
|
|
25
|
-
super('diversity');
|
|
26
|
+
super('diversity', DG.SEMTYPE.MACROMOLECULE);
|
|
26
27
|
this.diverseColumnLabel = this.string('diverseColumnLabel', null);
|
|
27
28
|
}
|
|
28
29
|
|
|
@@ -30,27 +31,34 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
30
31
|
if (!this.beforeRender())
|
|
31
32
|
return;
|
|
32
33
|
if (this.dataFrame) {
|
|
33
|
-
if (computeData && this.
|
|
34
|
-
const sh = this.seqHelper.getSeqHandler(this.
|
|
34
|
+
if (computeData && this.targetColumn) {
|
|
35
|
+
const sh = this.seqHelper.getSeqHandler(this.targetColumn);
|
|
35
36
|
await (sh.isFasta() ? this.computeByMM() : this.computeByChem());
|
|
36
37
|
|
|
37
38
|
const diverseColumnName: string = this.diverseColumnLabel != null ? this.diverseColumnLabel :
|
|
38
|
-
`diverse (${this.
|
|
39
|
+
`diverse (${this.targetColumnName})`;
|
|
39
40
|
const resCol = DG.Column.string(diverseColumnName, this.renderMolIds!.length)
|
|
40
|
-
.init((i) => this.
|
|
41
|
+
.init((i) => this.targetColumn?.get(this.renderMolIds![i]));
|
|
41
42
|
resCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
42
|
-
this.tags.forEach((tag) => resCol.setTag(tag, this.
|
|
43
|
+
this.tags.forEach((tag) => resCol.setTag(tag, this.targetColumn!.getTag(tag)));
|
|
43
44
|
const resDf = DG.DataFrame.fromColumns([resCol]);
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
resCol.temp[MmcrTemps.maxMonomerLength] = 4;
|
|
46
|
+
|
|
47
|
+
const _ = resDf.onCurrentRowChanged.subscribe((_: any) => {
|
|
48
|
+
this.dataFrame.currentRowIdx = this.renderMolIds![resDf.currentRowIdx];
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const grid = resDf.plot.grid();
|
|
52
|
+
adjustGridcolAfterRender(grid, resCol.name, 450, 30);
|
|
53
|
+
|
|
54
|
+
updateDivInnerHTML(this.root, grid.root);
|
|
47
55
|
this.computeCompleted.next(true);
|
|
48
56
|
}
|
|
49
57
|
}
|
|
50
58
|
}
|
|
51
59
|
|
|
52
60
|
private async computeByChem() {
|
|
53
|
-
const monomericMols = await getMonomericMols(this.
|
|
61
|
+
const monomericMols = await getMonomericMols(this.targetColumn!, this.seqHelper);
|
|
54
62
|
//need to create df to calculate fingerprints
|
|
55
63
|
const _monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
|
|
56
64
|
this.renderMolIds = await grok.functions.call('Chem:callChemDiversitySearch', {
|
|
@@ -63,15 +71,15 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
63
71
|
|
|
64
72
|
private async computeByMM() {
|
|
65
73
|
const encodedSequences =
|
|
66
|
-
(await getEncodedSeqSpaceCol(this.
|
|
74
|
+
(await getEncodedSeqSpaceCol(this.targetColumn!, MmDistanceFunctionsNames.LEVENSHTEIN)).seqList;
|
|
67
75
|
const distanceMatrixService = new DistanceMatrixService(true, false);
|
|
68
76
|
const distanceMatrixData = await distanceMatrixService.calc(encodedSequences, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
69
77
|
distanceMatrixService.terminate();
|
|
70
|
-
const len = this.
|
|
78
|
+
const len = this.targetColumn!.length;
|
|
71
79
|
const linearizeFunc = dmLinearIndex(len);
|
|
72
80
|
this.renderMolIds = getDiverseSubset(len, Math.min(len, this.limit),
|
|
73
81
|
(i1: number, i2: number) => {
|
|
74
|
-
return this.
|
|
82
|
+
return this.targetColumn!.isNone(i1) || this.targetColumn!.isNone(i2) ? 0 :
|
|
75
83
|
distanceMatrixData[linearizeFunc(i1, i2)];
|
|
76
84
|
});
|
|
77
85
|
}
|
|
@@ -4,94 +4,28 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
|
|
5
5
|
import {CHEM_SIMILARITY_METRICS} from '@datagrok-libraries/ml/src/distance-metrics-methods';
|
|
6
6
|
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
+
import {SearchBaseViewer} from '@datagrok-libraries/ml/src/viewers/search-base-viewer';
|
|
7
8
|
|
|
8
9
|
const MAX_ROWS_FOR_DISTANCE_MATRIX = 22000;
|
|
9
10
|
|
|
10
|
-
export class SequenceSearchBaseViewer extends
|
|
11
|
-
name: string = '';
|
|
11
|
+
export class SequenceSearchBaseViewer extends SearchBaseViewer {
|
|
12
12
|
distanceMetric: string;
|
|
13
|
-
limit: number;
|
|
14
13
|
fingerprint: string;
|
|
15
14
|
metricsProperties = ['distanceMetric', 'fingerprint'];
|
|
16
15
|
fingerprintChoices = ['Morgan', 'Pattern'];
|
|
17
|
-
moleculeColumn?: DG.Column<string>;
|
|
18
|
-
moleculeColumnName: string;
|
|
19
|
-
initialized: boolean = false;
|
|
20
16
|
tags = [DG.TAGS.UNITS, bioTAGS.aligned, bioTAGS.separator, bioTAGS.alphabet];
|
|
21
17
|
preComputeDistanceMatrix: boolean = false;
|
|
22
18
|
|
|
23
|
-
constructor(name: string) {
|
|
24
|
-
super();
|
|
19
|
+
constructor(name: string, semType: string) {
|
|
20
|
+
super(name, semType);
|
|
25
21
|
this.fingerprint = this.string('fingerprint', this.fingerprintChoices[0], {choices: this.fingerprintChoices});
|
|
26
|
-
this.limit = this.int('limit', 10);
|
|
27
22
|
this.distanceMetric = this.string('distanceMetric', CHEM_SIMILARITY_METRICS[0], {choices: CHEM_SIMILARITY_METRICS});
|
|
28
|
-
this.moleculeColumnName = this.string('moleculeColumnName');
|
|
29
|
-
this.name = name;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
init(): void {
|
|
33
|
-
this.initialized = true;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
detach(): void {
|
|
37
|
-
this.subs.forEach((sub) => sub.unsubscribe());
|
|
38
23
|
}
|
|
39
24
|
|
|
40
25
|
async onTableAttached(): Promise<void> {
|
|
41
|
-
|
|
26
|
+
super.onTableAttached();
|
|
42
27
|
|
|
43
|
-
if (this.dataFrame)
|
|
28
|
+
if (this.dataFrame)
|
|
44
29
|
this.preComputeDistanceMatrix = this.dataFrame.rowCount <= MAX_ROWS_FOR_DISTANCE_MATRIX;
|
|
45
|
-
this.subs.push(DG.debounce(this.dataFrame.onRowsRemoved, 50)
|
|
46
|
-
.subscribe((_: any) => this.render(true)));
|
|
47
|
-
const compute = this.name !== 'diversity';
|
|
48
|
-
this.subs.push(DG.debounce(this.dataFrame.onCurrentRowChanged, 50)
|
|
49
|
-
.subscribe((_: any) => this.render(compute)));
|
|
50
|
-
this.subs.push(DG.debounce(this.dataFrame.selection.onChanged, 50)
|
|
51
|
-
.subscribe((_: any) => this.render(false)));
|
|
52
|
-
this.subs.push(DG.debounce(ui.onSizeChanged(this.root), 50)
|
|
53
|
-
.subscribe((_: any) => this.render(false)));
|
|
54
|
-
this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE) as DG.Column<string>;
|
|
55
|
-
this.moleculeColumnName = this.moleculeColumn?.name!;
|
|
56
|
-
this.getProperty('limit')!.fromOptions({min: 1, max: this.dataFrame.rowCount});
|
|
57
|
-
}
|
|
58
|
-
this.render();
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
onPropertyChanged(property: DG.Property): void {
|
|
62
|
-
super.onPropertyChanged(property);
|
|
63
|
-
if (!this.initialized)
|
|
64
|
-
return;
|
|
65
|
-
if (property.name === 'moleculeColumnName') {
|
|
66
|
-
const col = this.dataFrame.col(property.get(this))!;
|
|
67
|
-
if (col.semType === DG.SEMTYPE.MACROMOLECULE)
|
|
68
|
-
this.moleculeColumn = col;
|
|
69
|
-
}
|
|
70
|
-
this.render();
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
/** For tests */ public computeRequested: boolean = false;
|
|
74
|
-
public renderPromise: Promise<void> = Promise.resolve();
|
|
75
|
-
|
|
76
|
-
protected render(computeData = true): void {
|
|
77
|
-
this.renderPromise = this.renderPromise.then(async () => {
|
|
78
|
-
this.computeRequested = this.computeRequested || computeData;
|
|
79
|
-
await this.renderInt(computeData);
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
async renderInt(_computeData: boolean): Promise<void> {
|
|
84
|
-
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
beforeRender() {
|
|
88
|
-
if (!this.initialized)
|
|
89
|
-
return false;
|
|
90
|
-
if (this.dataFrame && this.moleculeColumnName &&
|
|
91
|
-
this.dataFrame.col(this.moleculeColumnName)!.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
92
|
-
grok.shell.error(`${this.moleculeColumnName} is not Macromolecule type`);
|
|
93
|
-
return false;
|
|
94
|
-
}
|
|
95
|
-
return true;
|
|
96
30
|
}
|
|
97
31
|
}
|
|
@@ -5,13 +5,14 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
6
6
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
7
7
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
|
-
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
8
|
+
import {adjustGridcolAfterRender, updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
10
|
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
11
|
import {alignSequencePair} from '@datagrok-libraries/bio/src/utils/macromolecule/alignment';
|
|
12
12
|
import {KnnResult, SparseMatrixService} from '@datagrok-libraries/ml/src/distance-matrix/sparse-matrix-service';
|
|
13
13
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
14
14
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
15
|
+
import {MmcrTemps, tempTAGS} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
15
16
|
|
|
16
17
|
export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
17
18
|
cutoff: number;
|
|
@@ -31,12 +32,14 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
31
32
|
knn?: KnnResult;
|
|
32
33
|
kPrevNeighbors: number = 0;
|
|
33
34
|
demo?: boolean;
|
|
35
|
+
analysisGrid?: DG.Grid;
|
|
36
|
+
subInited: boolean = false;
|
|
34
37
|
|
|
35
38
|
constructor(
|
|
36
39
|
private readonly seqHelper: ISeqHelper,
|
|
37
40
|
demo?: boolean,
|
|
38
41
|
) {
|
|
39
|
-
super('similarity');
|
|
42
|
+
super('similarity', DG.SEMTYPE.MACROMOLECULE);
|
|
40
43
|
this.cutoff = this.float('cutoff', 0.01, {min: 0, max: 1});
|
|
41
44
|
this.hotSearch = this.bool('hotSearch', true);
|
|
42
45
|
this.similarColumnLabel = this.string('similarColumnLabel', null);
|
|
@@ -51,42 +54,57 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
51
54
|
override async renderInt(computeData: boolean): Promise<void> {
|
|
52
55
|
if (!this.beforeRender())
|
|
53
56
|
return;
|
|
54
|
-
if (this.
|
|
57
|
+
if (this.targetColumn) {
|
|
55
58
|
this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
56
59
|
if (computeData && !this.gridSelect) {
|
|
57
60
|
this.targetMoleculeIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
58
|
-
const sh = this.seqHelper.getSeqHandler(this.
|
|
61
|
+
const sh = this.seqHelper.getSeqHandler(this.targetColumn!);
|
|
59
62
|
|
|
60
|
-
await
|
|
63
|
+
await this.computeByMM();
|
|
61
64
|
const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
|
|
62
|
-
`similar (${this.
|
|
65
|
+
`similar (${this.targetColumn})`;
|
|
63
66
|
this.molCol = DG.Column.string(similarColumnName,
|
|
64
|
-
this.idxs!.length).init((i) => this.
|
|
67
|
+
this.idxs!.length).init((i) => this.targetColumn?.get(this.idxs?.get(i)));
|
|
65
68
|
this.molCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
66
|
-
this.tags.forEach((tag) => this.molCol!.setTag(tag, this.
|
|
69
|
+
this.tags.forEach((tag) => this.molCol!.setTag(tag, this.targetColumn!.getTag(tag)));
|
|
67
70
|
const resDf = DG.DataFrame.fromColumns([this.idxs!, this.molCol!, this.scores!]);
|
|
68
|
-
resDf.
|
|
71
|
+
await resDf.meta.detectSemanticTypes();
|
|
72
|
+
await grok.data.detectSemanticTypes(resDf);
|
|
73
|
+
this.molCol.temp[tempTAGS.referenceSequence] = this.targetColumn!.get(this.targetMoleculeIdx);
|
|
74
|
+
this.molCol.temp[MmcrTemps.maxMonomerLength] = 4;
|
|
75
|
+
let prevTimer: any = null;
|
|
76
|
+
const _ = resDf.onCurrentRowChanged.subscribe((_: any) => {
|
|
77
|
+
prevTimer && clearTimeout(prevTimer);
|
|
69
78
|
this.dataFrame.currentRowIdx = resDf.col('indexes')!.get(resDf.currentRowIdx);
|
|
70
|
-
setTimeout(() => { this.createPropertyPanel(resDf); },
|
|
79
|
+
prevTimer = setTimeout(() => { this.createPropertyPanel(resDf); }, 300);
|
|
71
80
|
this.gridSelect = true;
|
|
72
81
|
});
|
|
73
|
-
|
|
74
|
-
|
|
82
|
+
if (!this.analysisGrid) {
|
|
83
|
+
this.analysisGrid = resDf.plot.grid();
|
|
84
|
+
updateDivInnerHTML(this.root, this.analysisGrid.root);
|
|
85
|
+
} else {
|
|
86
|
+
this.analysisGrid.dataFrame = resDf;
|
|
87
|
+
this.analysisGrid.invalidate();
|
|
88
|
+
}
|
|
89
|
+
this.analysisGrid.col('indexes')!.visible = false;
|
|
90
|
+
adjustGridcolAfterRender(this.analysisGrid, this.molCol!.name, 450, 30, true);
|
|
75
91
|
const targetMolRow = this.idxs?.getRawData().findIndex((it) => it == this.targetMoleculeIdx);
|
|
76
|
-
const targetScoreCell =
|
|
92
|
+
const targetScoreCell = this.analysisGrid.cell('score', targetMolRow!);
|
|
77
93
|
targetScoreCell.cell.value = null;
|
|
78
|
-
const view =
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
94
|
+
const view = grok.shell.tv;
|
|
95
|
+
if (!this.subInited) {
|
|
96
|
+
view.grid.root.addEventListener('click', (_event: MouseEvent) => {
|
|
97
|
+
this.gridSelect = false;
|
|
98
|
+
});
|
|
99
|
+
this.subInited = true;
|
|
100
|
+
}
|
|
83
101
|
this.computeCompleted.next(true);
|
|
84
102
|
}
|
|
85
103
|
}
|
|
86
104
|
}
|
|
87
105
|
|
|
88
106
|
private async computeByChem() {
|
|
89
|
-
const monomericMols = await getMonomericMols(this.
|
|
107
|
+
const monomericMols = await getMonomericMols(this.targetColumn!, this.seqHelper);
|
|
90
108
|
//need to create df to calculate fingerprints
|
|
91
109
|
const _monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
|
|
92
110
|
const df = await grok.functions.call('Chem:callChemSimilaritySearch', {
|
|
@@ -103,11 +121,11 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
103
121
|
}
|
|
104
122
|
|
|
105
123
|
private async computeByMM() {
|
|
106
|
-
const len = this.
|
|
124
|
+
const len = this.targetColumn!.length;
|
|
107
125
|
const actualLimit = Math.min(this.limit, len - 1);
|
|
108
126
|
if (!this.knn || this.kPrevNeighbors !== actualLimit) {
|
|
109
127
|
const encodedSequences =
|
|
110
|
-
(await getEncodedSeqSpaceCol(this.
|
|
128
|
+
(await getEncodedSeqSpaceCol(this.targetColumn!, MmDistanceFunctionsNames.LEVENSHTEIN)).seqList;
|
|
111
129
|
|
|
112
130
|
this.kPrevNeighbors = actualLimit;
|
|
113
131
|
this.knn = await (new SparseMatrixService()
|
|
@@ -128,12 +146,13 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
128
146
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
129
147
|
const molColName = this.molCol?.name!;
|
|
130
148
|
const resCol: DG.Column<string> = resDf.col(molColName)!;
|
|
131
|
-
const molColSh = this.seqHelper.getSeqHandler(this.
|
|
149
|
+
const molColSh = this.seqHelper.getSeqHandler(this.targetColumn!);
|
|
132
150
|
const resSh = this.seqHelper.getSeqHandler(resCol);
|
|
133
151
|
const subParts1 = molColSh.getSplitted(this.targetMoleculeIdx);
|
|
134
152
|
const subParts2 = resSh.getSplitted(resDf.currentRowIdx);
|
|
135
153
|
const alignment = alignSequencePair(subParts1, subParts2);
|
|
136
|
-
const canvas =
|
|
154
|
+
const canvas =
|
|
155
|
+
createDifferenceCanvas(alignment.seq1Splitted, alignment.seq2Splitted, resSh.defaultBiotype, molDifferences);
|
|
137
156
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
138
157
|
if (subParts1.length !== subParts2.length) {
|
|
139
158
|
propPanel.append(ui.divV([
|
|
@@ -8,11 +8,30 @@ import {handleError} from './utils';
|
|
|
8
8
|
import {SequenceDiversityViewer} from '../analysis/sequence-diversity-viewer';
|
|
9
9
|
import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
|
|
10
10
|
import {getSeqHelper, ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
|
+
import {adjustGridcolAfterRender} from '../utils/ui-utils';
|
|
12
|
+
import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
11
13
|
|
|
12
14
|
const dataFn: string = 'samples/FASTA_PT_activity.csv';
|
|
13
15
|
|
|
14
|
-
export async function
|
|
15
|
-
|
|
16
|
+
export async function demoBioSimDiv() {
|
|
17
|
+
const t = await _package.files.readCsv('samples/peptides-non-natural.csv');
|
|
18
|
+
t.name = 'Similarity and Diversity Demo';
|
|
19
|
+
t.col('activity')!.setTag('format', '3 significant digits');
|
|
20
|
+
t.col('sequence')!.temp[MmcrTemps.maxMonomerLength] = 4;
|
|
21
|
+
const tv = grok.shell.addTableView(t);
|
|
22
|
+
await t.meta.detectSemanticTypes();
|
|
23
|
+
await grok.data.detectSemanticTypes(t);
|
|
24
|
+
const simV = tv.addViewer('Sequence Similarity Search', {limit: 20});
|
|
25
|
+
const dn = tv.dockManager.dock(simV, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.45);
|
|
26
|
+
adjustGridcolAfterRender(tv.grid, 'sequence', 500, 30);
|
|
27
|
+
const divV = tv.addViewer('Sequence Diversity Search', {limit: 20});
|
|
28
|
+
tv.dockManager.dock(divV, DG.DOCK_TYPE.DOWN, dn, 'Diversity search', 0.4);
|
|
29
|
+
grok.functions.call('Dendrogram:HierarchicalClustering',
|
|
30
|
+
{df: grok.shell.t, colNameList: ['sequence'], distance: 'euclidian', linkage: 'complete'});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export async function demoBio01UISteps() {
|
|
34
|
+
const seqHelper: ISeqHelper = await getSeqHelper();
|
|
16
35
|
|
|
17
36
|
let view: DG.TableView;
|
|
18
37
|
let df: DG.DataFrame;
|
|
@@ -12,6 +12,13 @@ import {getClusterMatrixWorker} from '@datagrok-libraries/math';
|
|
|
12
12
|
const dataFn = 'samples/FASTA_PT_activity.csv';
|
|
13
13
|
const seqColName = 'sequence';
|
|
14
14
|
|
|
15
|
+
export async function demoSeqSpace() {
|
|
16
|
+
const p = await grok.functions.eval('Bio:SeqSpaceDemo');
|
|
17
|
+
const project = await grok.dapi.projects.find(p.id);
|
|
18
|
+
await project.open();
|
|
19
|
+
grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#sequence-space');
|
|
20
|
+
}
|
|
21
|
+
|
|
15
22
|
export async function demoBio01aUI() {
|
|
16
23
|
let treeHelper: ITreeHelper;
|
|
17
24
|
let dendrogramSvc: IDendrogramService;
|
|
@@ -28,7 +28,7 @@ export async function demoBio01bUI() {
|
|
|
28
28
|
|
|
29
29
|
try {
|
|
30
30
|
const demoScript = new DemoScript('Activity Cliffs', 'Activity Cliffs analysis on Macromolecules data', false,
|
|
31
|
-
{autoStartFirstStep: true});
|
|
31
|
+
{autoStartFirstStep: true, path: 'Bioinformatics/Activity Cliffs'});
|
|
32
32
|
await demoScript
|
|
33
33
|
.step(`Load DNA sequences`, async () => {
|
|
34
34
|
grok.shell.windows.showContextPanel = false;
|
|
@@ -5,6 +5,21 @@ import {_package, toAtomicLevel} from '../package';
|
|
|
5
5
|
import {handleError} from './utils';
|
|
6
6
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
7
7
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
8
|
+
import {adjustGridcolAfterRender} from '../utils/ui-utils';
|
|
9
|
+
|
|
10
|
+
export async function demoToAtomicLevel(): Promise<void> {
|
|
11
|
+
const data = await _package.files.readCsv('samples/HELM_BI_CYCLIC.csv');
|
|
12
|
+
data.name = 'To Atomic Level';
|
|
13
|
+
await data.meta.detectSemanticTypes();
|
|
14
|
+
await grok.data.detectSemanticTypes(data);
|
|
15
|
+
const view = grok.shell.addTableView(data);
|
|
16
|
+
const seqCol = data.col('HELM')!;
|
|
17
|
+
await toAtomicLevel(data, seqCol, true, false);
|
|
18
|
+
adjustGridcolAfterRender(view.grid, 'molfile(HELM)', 500, 300, true);
|
|
19
|
+
adjustGridcolAfterRender(view.grid, 'HELM', 500, undefined, true);
|
|
20
|
+
grok.shell.info('Hover over monomers in HELM column to highlight them in molecular structure.', {timeout: 10});
|
|
21
|
+
grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#get-atomic-level-structure');
|
|
22
|
+
}
|
|
8
23
|
|
|
9
24
|
export async function demoBio03UI(): Promise<void> {
|
|
10
25
|
const dataFn: string = 'samples/HELM.csv';
|
package/src/package.ts
CHANGED
|
@@ -47,11 +47,10 @@ import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
|
47
47
|
import {WebLogoViewer} from './viewers/web-logo-viewer';
|
|
48
48
|
import {MonomerLibManager} from './utils/monomer-lib/lib-manager';
|
|
49
49
|
import {getMonomerLibraryManagerLink, showManageLibrariesDialog, showManageLibrariesView} from './utils/monomer-lib/library-file-manager/ui';
|
|
50
|
-
import {
|
|
51
|
-
import {
|
|
50
|
+
import {demoBioSimDiv} from './demo/bio01-similarity-diversity';
|
|
51
|
+
import {demoSeqSpace} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
|
|
52
52
|
import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
|
|
53
|
-
import {
|
|
54
|
-
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
53
|
+
import {demoToAtomicLevel} from './demo/bio03-atomic-level';
|
|
55
54
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
56
55
|
import {MsaWarning} from './utils/multiple-sequence-alignment';
|
|
57
56
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
@@ -587,7 +586,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
|
|
|
587
586
|
plotEmbeddings: boolean, preprocessingFunction?: DG.Func, options?: (IUMAPOptions | ITSNEOptions) & Options,
|
|
588
587
|
clusterEmbeddings?: boolean, isDemo?: boolean
|
|
589
588
|
): Promise<DG.ScatterPlotViewer | undefined> {
|
|
590
|
-
const tableView =
|
|
589
|
+
const tableView =
|
|
591
590
|
grok.shell.tv.dataFrame == table ? grok.shell.tv : undefined;
|
|
592
591
|
if (!checkInputColumnUI(molecules, 'Sequence Space'))
|
|
593
592
|
return;
|
|
@@ -967,21 +966,14 @@ export async function manageLibrariesApp(): Promise<DG.View> {
|
|
|
967
966
|
|
|
968
967
|
//name: Monomer Manager Tree Browser
|
|
969
968
|
//input: dynamic treeNode
|
|
970
|
-
//input:
|
|
971
|
-
export async function manageLibrariesAppTreeBrowser(treeNode: DG.TreeViewGroup,
|
|
969
|
+
//input: dynamic browsePanel
|
|
970
|
+
export async function manageLibrariesAppTreeBrowser(treeNode: DG.TreeViewGroup, browsePanel: DG.BrowsePanel) {
|
|
972
971
|
const libraries = (await (await MonomerLibManager.getInstance()).getFileManager()).getValidLibraryPaths();
|
|
973
972
|
libraries.forEach((libName) => {
|
|
974
973
|
const nodeName = libName.endsWith('.json') ? libName.substring(0, libName.length - 5) : libName;
|
|
975
974
|
const libNode = treeNode.item(nodeName);
|
|
976
975
|
// eslint-disable-next-line rxjs/no-ignored-subscription, rxjs/no-async-subscribe
|
|
977
976
|
libNode.onSelected.subscribe(async () => {
|
|
978
|
-
const monomerManager = await MonomerManager.getNewInstance();
|
|
979
|
-
browseView.preview = await monomerManager.getViewRoot(libName, false);
|
|
980
|
-
});
|
|
981
|
-
|
|
982
|
-
libNode.root.addEventListener('dblclick', async (e) => {
|
|
983
|
-
e.preventDefault();
|
|
984
|
-
e.stopImmediatePropagation();
|
|
985
977
|
const monomerManager = await MonomerManager.getInstance();
|
|
986
978
|
await monomerManager.getViewRoot(libName, true);
|
|
987
979
|
monomerManager.resetCurrentRowFollowing();
|
|
@@ -1103,10 +1095,9 @@ export function addCopyMenu(cell: DG.Cell, menu: DG.Menu): void {
|
|
|
1103
1095
|
//meta.demoPath: Bioinformatics | Similarity, Diversity
|
|
1104
1096
|
//description: Sequence similarity tracking and evaluation dataset diversity
|
|
1105
1097
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity
|
|
1106
|
-
//meta.isDemoScript: True
|
|
1107
1098
|
//meta.demoSkip: GROK-14320
|
|
1108
1099
|
export async function demoBioSimilarityDiversity(): Promise<void> {
|
|
1109
|
-
await
|
|
1100
|
+
await demoBioSimDiv();
|
|
1110
1101
|
}
|
|
1111
1102
|
|
|
1112
1103
|
// demoBio01a
|
|
@@ -1114,10 +1105,9 @@ export async function demoBioSimilarityDiversity(): Promise<void> {
|
|
|
1114
1105
|
//meta.demoPath: Bioinformatics | Sequence Space
|
|
1115
1106
|
//description: Exploring sequence space of Macromolecules, comparison with hierarchical clustering results
|
|
1116
1107
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Sequence%20Space
|
|
1117
|
-
//meta.isDemoScript: True
|
|
1118
1108
|
//meta.demoSkip: GROK-14320
|
|
1119
1109
|
export async function demoBioSequenceSpace(): Promise<void> {
|
|
1120
|
-
await
|
|
1110
|
+
await demoSeqSpace();
|
|
1121
1111
|
}
|
|
1122
1112
|
|
|
1123
1113
|
// demoBio01b
|
|
@@ -1136,21 +1126,9 @@ export async function demoBioActivityCliffs(): Promise<void> {
|
|
|
1136
1126
|
//meta.demoPath: Bioinformatics | Atomic Level
|
|
1137
1127
|
//description: Atomic level structure of Macromolecules
|
|
1138
1128
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Atomic%20Level
|
|
1139
|
-
//meta.isDemoScript: True
|
|
1140
1129
|
//meta.demoSkip: GROK-14320
|
|
1141
1130
|
export async function demoBioAtomicLevel(): Promise<void> {
|
|
1142
|
-
await
|
|
1143
|
-
}
|
|
1144
|
-
|
|
1145
|
-
// demoBio05
|
|
1146
|
-
//name: demoBioHelmMsaSequenceSpace
|
|
1147
|
-
//meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
|
|
1148
|
-
//description: MSA and composition analysis on Helm data
|
|
1149
|
-
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Helm,%20MSA,%20Sequence%20Space
|
|
1150
|
-
//meta.isDemoScript: True
|
|
1151
|
-
//meta.demoSkip: GROK-14320
|
|
1152
|
-
export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
|
|
1153
|
-
await demoBio05UI();
|
|
1131
|
+
await demoToAtomicLevel();
|
|
1154
1132
|
}
|
|
1155
1133
|
|
|
1156
1134
|
//name: SDF to JSON Library
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
|
|
4
|
-
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {awaitCheck, expect} from '@datagrok-libraries/utils/src/test';
|
|
5
5
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
6
6
|
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
7
7
|
import {BYPASS_LARGE_DATA_WARNING} from '@datagrok-libraries/ml/src/functionEditors/consts';
|
|
@@ -25,9 +25,10 @@ export async function _testActivityCliffsOpen(df: DG.DataFrame, drMethod: DimRed
|
|
|
25
25
|
})) as DG.Viewer | undefined;
|
|
26
26
|
expect(scatterPlot != null, true);
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
const
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
28
|
+
await awaitCheck(() => {
|
|
29
|
+
const link = Array.from(scatterPlot!.root.getElementsByClassName('scatter_plot_link'));
|
|
30
|
+
if (link.length)
|
|
31
|
+
return (link[0] as HTMLElement).innerText.toLowerCase() === `${tgtNumberCliffs} cliffs`;
|
|
32
|
+
return true;
|
|
33
|
+
}, 'incorrect cliffs link', 3000);
|
|
33
34
|
}
|
|
@@ -37,7 +37,7 @@ async function _testSimilaritySearchViewer() {
|
|
|
37
37
|
await awaitCheck(() => getSearchViewer(moleculesView, 'Sequence Similarity Search') !== undefined,
|
|
38
38
|
'Sequence Similarity Search viewer has not been created', 100);
|
|
39
39
|
if (!viewer.initialized) throw new Error('The viewer is not initialized.');
|
|
40
|
-
if (!viewer.
|
|
40
|
+
if (!viewer.targetColumn) throw new Error('The viewer has not molecule column (onTableAttached).');
|
|
41
41
|
if (!viewer.beforeRender()) throw new Error('The viewer is not able to render.');
|
|
42
42
|
if (!viewer.computeRequested) throw new Error('The viewer has not compute requested even.');
|
|
43
43
|
if (!computeCompleted) throw new Error('The viewer has not compute completed.');
|
|
@@ -87,7 +87,7 @@ async function _testDiversitySearchViewer() {
|
|
|
87
87
|
await awaitCheck(() => getSearchViewer(moleculesView, 'Sequence Diversity Search') !== undefined,
|
|
88
88
|
'Sequence Diversity Search viewer has not been created', 100);
|
|
89
89
|
if (!viewer.initialized) throw new Error('The viewer is not initialized.');
|
|
90
|
-
if (!viewer.
|
|
90
|
+
if (!viewer.targetColumn) throw new Error('The viewer has not molecule column (onTableAttached).');
|
|
91
91
|
if (!viewer.beforeRender()) throw new Error('The viewer is not able to render.');
|
|
92
92
|
if (!viewer.computeRequested) throw new Error('The viewer has not compute requested even.');
|
|
93
93
|
if (!computeCompleted) throw new Error('The viewer has not compute completed.');
|
|
@@ -58,7 +58,6 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
58
58
|
type RendererGridCellTemp = {
|
|
59
59
|
[MmcrTemps.monomerPlacer]: MonomerPlacer
|
|
60
60
|
}
|
|
61
|
-
|
|
62
61
|
export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
63
62
|
private readonly seqHelper: ISeqHelper;
|
|
64
63
|
|
|
@@ -70,6 +69,8 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
70
69
|
|
|
71
70
|
get defaultWidth(): number | null { return 230; }
|
|
72
71
|
|
|
72
|
+
hasMouseOver: boolean = false;
|
|
73
|
+
|
|
73
74
|
constructor() {
|
|
74
75
|
super();
|
|
75
76
|
this.seqHelper = _package.seqHelper;
|
|
@@ -101,6 +102,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
101
102
|
override onMouseEnter(gridCell: DG.GridCell, e: MouseEvent) {
|
|
102
103
|
const back = this.getRendererBack(gridCell);
|
|
103
104
|
back?.onMouseEnter(gridCell, e);
|
|
105
|
+
this.hasMouseOver = true;
|
|
104
106
|
}
|
|
105
107
|
|
|
106
108
|
override onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
|
|
@@ -110,6 +112,14 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
110
112
|
|
|
111
113
|
override onMouseLeave(gridCell: DG.GridCell, _e: MouseEvent) {
|
|
112
114
|
execMonomerHoverLinks(gridCell, null);
|
|
115
|
+
if (gridCell?.grid) {
|
|
116
|
+
const sub = gridCell.grid.onEvent('d4-grid-show-tooltip').subscribe((e) => {
|
|
117
|
+
sub.unsubscribe();
|
|
118
|
+
if (this.hasMouseOver)
|
|
119
|
+
e.preventDefault();
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
this.hasMouseOver = false;
|
|
113
123
|
}
|
|
114
124
|
|
|
115
125
|
override onDoubleClick(gridCell: DG.GridCell, e: MouseEvent) {
|
|
@@ -10,7 +10,7 @@ import {MolfileWrapperFactory} from './mol-wrapper-factory';
|
|
|
10
10
|
export class MonomerWrapper {
|
|
11
11
|
private readonly molfileWrapper: MolfileWrapper;
|
|
12
12
|
private capGroupElements: string[] = [];
|
|
13
|
-
|
|
13
|
+
private static molfileV2KToV3KCache: Map<string, string> = new Map();
|
|
14
14
|
constructor(
|
|
15
15
|
public readonly monomerSymbol: string,
|
|
16
16
|
public readonly monomerIdx: number,
|
|
@@ -39,12 +39,16 @@ export class MonomerWrapper {
|
|
|
39
39
|
public get bondCount() { return this.molfileWrapper.bondCount; }
|
|
40
40
|
|
|
41
41
|
private convertMolfileToV3KFormat(molfileV2K: string, monomerSymbol: string, rdKitModule: RDModule): string {
|
|
42
|
+
if (MonomerWrapper.molfileV2KToV3KCache.has(molfileV2K))
|
|
43
|
+
return MonomerWrapper.molfileV2KToV3KCache.get(molfileV2K)!;
|
|
42
44
|
let mol: RDMol | null = null;
|
|
43
45
|
try {
|
|
44
46
|
mol = rdKitModule.get_mol(molfileV2K, JSON.stringify({mergeQueryHs: true}));
|
|
45
|
-
if (mol)
|
|
46
|
-
|
|
47
|
-
|
|
47
|
+
if (mol) {
|
|
48
|
+
const res = mol.get_v3Kmolblock();
|
|
49
|
+
MonomerWrapper.molfileV2KToV3KCache.set(molfileV2K, res);
|
|
50
|
+
return res;
|
|
51
|
+
} else
|
|
48
52
|
throw new Error(`Cannot convert ${monomerSymbol} to molV3000`);
|
|
49
53
|
} finally {
|
|
50
54
|
mol?.delete();
|