@datagrok/bio 2.18.4 → 2.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/README.md +2 -2
- package/dist/242.js +1 -1
- package/dist/242.js.map +1 -1
- package/dist/284.js +1 -1
- package/dist/284.js.map +1 -1
- package/dist/589.js +1 -1
- package/dist/589.js.map +1 -1
- package/dist/731.js +1 -1
- package/dist/731.js.map +1 -1
- package/dist/810.js +2 -0
- package/dist/810.js.map +1 -0
- package/dist/980.js +1 -1
- package/dist/980.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/dockerfiles/container.json +1 -1
- package/files/monomer-libraries/HELMCoreLibrary.json +18926 -18215
- package/files/samples/HELM_BI_CYCLIC.csv +7 -0
- package/files/samples/peptides-non-natural.csv +1001 -0
- package/files/tests/to-atomic-level-dna-fasta-output.csv +15077 -15077
- package/files/tests/to-atomic-level-msa-fasta-output.csv +1903 -1903
- package/files/tests/to-atomic-level-msa-separator-output.csv +3236 -3236
- package/files/tests/to-atomic-level-peptides-fasta-output.csv +32262 -32262
- package/files/tests/to-atomic-level-pt-fasta-2.mol +29 -29
- package/package.json +7 -7
- package/projects/seq_space_demo.zip +0 -0
- package/src/analysis/sequence-diversity-viewer.ts +22 -14
- package/src/analysis/sequence-search-base-viewer.ts +6 -72
- package/src/analysis/sequence-similarity-viewer.ts +42 -23
- package/src/demo/bio01-similarity-diversity.ts +21 -2
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +7 -0
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +1 -1
- package/src/demo/bio03-atomic-level.ts +15 -0
- package/src/package.ts +29 -59
- package/src/tests/activity-cliffs-utils.ts +7 -6
- package/src/tests/scoring.ts +2 -2
- package/src/tests/similarity-diversity-tests.ts +2 -2
- package/src/utils/cell-renderer.ts +11 -1
- package/src/utils/helm-to-molfile/converter/converter.ts +21 -9
- package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +8 -4
- package/src/utils/helm-to-molfile/converter/simple-polymer.ts +18 -9
- package/src/utils/monomer-lib/monomer-lib-base.ts +4 -4
- package/src/utils/pepsea.ts +22 -39
- package/src/utils/seq-helper/seq-handler.ts +2 -2
- package/src/utils/seq-helper/seq-helper.ts +50 -34
- package/src/utils/sequence-to-mol.ts +1 -1
- package/src/utils/ui-utils.ts +23 -0
- package/test-console-output-1.log +0 -7387
- package/test-record-1.mp4 +0 -0
|
@@ -5,41 +5,41 @@ Datagrok macromolecule handler
|
|
|
5
5
|
M V30 BEGIN CTAB
|
|
6
6
|
M V30 COUNTS 17 16 0 0 0
|
|
7
7
|
M V30 BEGIN ATOM
|
|
8
|
-
M V30 1 C
|
|
9
|
-
M V30 2 C
|
|
8
|
+
M V30 1 C 1.2991 -2.25 0.000000 0
|
|
9
|
+
M V30 2 C 1.2991 -0.75 0.000000 0
|
|
10
10
|
M V30 3 N 0 0 0.000000 0
|
|
11
|
-
M V30 4 C
|
|
12
|
-
M V30 5 O
|
|
13
|
-
M V30 6 N
|
|
14
|
-
M V30 7 C
|
|
15
|
-
M V30 8 N
|
|
16
|
-
M V30 9 N
|
|
17
|
-
M V30 10 C
|
|
18
|
-
M V30 11 C
|
|
19
|
-
M V30 12 C
|
|
20
|
-
M V30 13 C
|
|
21
|
-
M V30 14 N
|
|
22
|
-
M V30 15 C
|
|
23
|
-
M V30 16 O
|
|
24
|
-
M V30 17 O
|
|
11
|
+
M V30 4 C 2.5981 0 0.000000 0
|
|
12
|
+
M V30 5 O 3.8971 -0.75 0.000000 0
|
|
13
|
+
M V30 6 N 7.7941 9 0.000000 0
|
|
14
|
+
M V30 7 C 6.4951 8.2501 0.000000 0
|
|
15
|
+
M V30 8 N 5.196 9 0.000000 0
|
|
16
|
+
M V30 9 N 6.4951 6.7501 0.000000 0
|
|
17
|
+
M V30 10 C 5.1961 6 0.000000 0
|
|
18
|
+
M V30 11 C 5.1961 4.5 0.000000 0
|
|
19
|
+
M V30 12 C 3.8971 3.75 0.000000 0
|
|
20
|
+
M V30 13 C 3.8971 2.25 0.000000 0
|
|
21
|
+
M V30 14 N 2.5981 1.5 0.000000 0
|
|
22
|
+
M V30 15 C 5.1962 1.5 0.000000 0
|
|
23
|
+
M V30 16 O 6.4952 2.25 0.000000 0
|
|
24
|
+
M V30 17 O 5.1962 -0.0001 0.000000 0
|
|
25
25
|
M V30 END ATOM
|
|
26
26
|
M V30 BEGIN BOND
|
|
27
|
-
M V30 1 1 2 1 CFG=3
|
|
28
|
-
M V30 2 1 2 3
|
|
27
|
+
M V30 1 1 2 1 CFG=3
|
|
28
|
+
M V30 2 1 2 3
|
|
29
29
|
M V30 3 1 2 4
|
|
30
|
-
M V30 4 2 4 5
|
|
30
|
+
M V30 4 2 4 5
|
|
31
31
|
M V30 5 1 4 14
|
|
32
|
-
M V30 6
|
|
33
|
-
M V30 7
|
|
34
|
-
M V30 8 1 7 9
|
|
35
|
-
M V30 9 1 9 10
|
|
36
|
-
M V30 10 1 10 11
|
|
37
|
-
M V30 11 1 11 12
|
|
38
|
-
M V30 12 1 13 12 CFG=1
|
|
39
|
-
M V30 13 1 13 14
|
|
32
|
+
M V30 6 2 6 7
|
|
33
|
+
M V30 7 1 7 8
|
|
34
|
+
M V30 8 1 7 9
|
|
35
|
+
M V30 9 1 9 10
|
|
36
|
+
M V30 10 1 10 11
|
|
37
|
+
M V30 11 1 11 12
|
|
38
|
+
M V30 12 1 13 12 CFG=1
|
|
39
|
+
M V30 13 1 13 14
|
|
40
40
|
M V30 14 1 13 15
|
|
41
|
-
M V30 15 2 15 16
|
|
41
|
+
M V30 15 2 15 16
|
|
42
42
|
M V30 16 1 15 17
|
|
43
43
|
M V30 END BOND
|
|
44
44
|
M V30 END CTAB
|
|
45
|
-
M END
|
|
45
|
+
M END
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.20.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,13 +44,13 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.50.1",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.7",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.4",
|
|
50
|
-
"@datagrok-libraries/ml": "^6.
|
|
51
|
-
"@datagrok-libraries/tutorials": "^1.
|
|
52
|
-
"@datagrok-libraries/utils": "^4.
|
|
53
|
-
"datagrok-api": "^1.
|
|
50
|
+
"@datagrok-libraries/ml": "^6.10.0",
|
|
51
|
+
"@datagrok-libraries/tutorials": "^1.6.0",
|
|
52
|
+
"@datagrok-libraries/utils": "^4.5.0",
|
|
53
|
+
"datagrok-api": "^1.25.0",
|
|
54
54
|
"@webgpu/types": "^0.1.40",
|
|
55
55
|
"ajv": "^8.12.0",
|
|
56
56
|
"ajv-errors": "^3.0.0",
|
|
@@ -109,7 +109,7 @@
|
|
|
109
109
|
"analyze": "webpack --profile --json > ./stats.json && npx webpack-bundle-analyzer ./stats.json"
|
|
110
110
|
},
|
|
111
111
|
"canEdit": [
|
|
112
|
-
"
|
|
112
|
+
"Administrators"
|
|
113
113
|
],
|
|
114
114
|
"canView": [
|
|
115
115
|
"All users"
|
|
Binary file
|
|
@@ -5,12 +5,13 @@ import * as grok from 'datagrok-api/grok';
|
|
|
5
5
|
import {getDiverseSubset} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
6
6
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
7
7
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
8
|
-
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
8
|
+
import {adjustGridcolAfterRender, updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
10
|
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
11
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
12
12
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
13
13
|
import {DistanceMatrixService, dmLinearIndex} from '@datagrok-libraries/ml/src/distance-matrix';
|
|
14
|
+
import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
14
15
|
|
|
15
16
|
export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
16
17
|
diverseColumnLabel: string | null; // Use postfix Label to prevent activating table column selection editor
|
|
@@ -22,7 +23,7 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
22
23
|
constructor(
|
|
23
24
|
private readonly seqHelper: ISeqHelper,
|
|
24
25
|
) {
|
|
25
|
-
super('diversity');
|
|
26
|
+
super('diversity', DG.SEMTYPE.MACROMOLECULE);
|
|
26
27
|
this.diverseColumnLabel = this.string('diverseColumnLabel', null);
|
|
27
28
|
}
|
|
28
29
|
|
|
@@ -30,27 +31,34 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
30
31
|
if (!this.beforeRender())
|
|
31
32
|
return;
|
|
32
33
|
if (this.dataFrame) {
|
|
33
|
-
if (computeData && this.
|
|
34
|
-
const sh = this.seqHelper.getSeqHandler(this.
|
|
34
|
+
if (computeData && this.targetColumn) {
|
|
35
|
+
const sh = this.seqHelper.getSeqHandler(this.targetColumn);
|
|
35
36
|
await (sh.isFasta() ? this.computeByMM() : this.computeByChem());
|
|
36
37
|
|
|
37
38
|
const diverseColumnName: string = this.diverseColumnLabel != null ? this.diverseColumnLabel :
|
|
38
|
-
`diverse (${this.
|
|
39
|
+
`diverse (${this.targetColumnName})`;
|
|
39
40
|
const resCol = DG.Column.string(diverseColumnName, this.renderMolIds!.length)
|
|
40
|
-
.init((i) => this.
|
|
41
|
+
.init((i) => this.targetColumn?.get(this.renderMolIds![i]));
|
|
41
42
|
resCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
42
|
-
this.tags.forEach((tag) => resCol.setTag(tag, this.
|
|
43
|
+
this.tags.forEach((tag) => resCol.setTag(tag, this.targetColumn!.getTag(tag)));
|
|
43
44
|
const resDf = DG.DataFrame.fromColumns([resCol]);
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
resCol.temp[MmcrTemps.maxMonomerLength] = 4;
|
|
46
|
+
|
|
47
|
+
const _ = resDf.onCurrentRowChanged.subscribe((_: any) => {
|
|
48
|
+
this.dataFrame.currentRowIdx = this.renderMolIds![resDf.currentRowIdx];
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const grid = resDf.plot.grid();
|
|
52
|
+
adjustGridcolAfterRender(grid, resCol.name, 450, 30);
|
|
53
|
+
|
|
54
|
+
updateDivInnerHTML(this.root, grid.root);
|
|
47
55
|
this.computeCompleted.next(true);
|
|
48
56
|
}
|
|
49
57
|
}
|
|
50
58
|
}
|
|
51
59
|
|
|
52
60
|
private async computeByChem() {
|
|
53
|
-
const monomericMols = await getMonomericMols(this.
|
|
61
|
+
const monomericMols = await getMonomericMols(this.targetColumn!, this.seqHelper);
|
|
54
62
|
//need to create df to calculate fingerprints
|
|
55
63
|
const _monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
|
|
56
64
|
this.renderMolIds = await grok.functions.call('Chem:callChemDiversitySearch', {
|
|
@@ -63,15 +71,15 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
|
63
71
|
|
|
64
72
|
private async computeByMM() {
|
|
65
73
|
const encodedSequences =
|
|
66
|
-
(await getEncodedSeqSpaceCol(this.
|
|
74
|
+
(await getEncodedSeqSpaceCol(this.targetColumn!, MmDistanceFunctionsNames.LEVENSHTEIN)).seqList;
|
|
67
75
|
const distanceMatrixService = new DistanceMatrixService(true, false);
|
|
68
76
|
const distanceMatrixData = await distanceMatrixService.calc(encodedSequences, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
69
77
|
distanceMatrixService.terminate();
|
|
70
|
-
const len = this.
|
|
78
|
+
const len = this.targetColumn!.length;
|
|
71
79
|
const linearizeFunc = dmLinearIndex(len);
|
|
72
80
|
this.renderMolIds = getDiverseSubset(len, Math.min(len, this.limit),
|
|
73
81
|
(i1: number, i2: number) => {
|
|
74
|
-
return this.
|
|
82
|
+
return this.targetColumn!.isNone(i1) || this.targetColumn!.isNone(i2) ? 0 :
|
|
75
83
|
distanceMatrixData[linearizeFunc(i1, i2)];
|
|
76
84
|
});
|
|
77
85
|
}
|
|
@@ -4,94 +4,28 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
|
|
5
5
|
import {CHEM_SIMILARITY_METRICS} from '@datagrok-libraries/ml/src/distance-metrics-methods';
|
|
6
6
|
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
+
import {SearchBaseViewer} from '@datagrok-libraries/ml/src/viewers/search-base-viewer';
|
|
7
8
|
|
|
8
9
|
const MAX_ROWS_FOR_DISTANCE_MATRIX = 22000;
|
|
9
10
|
|
|
10
|
-
export class SequenceSearchBaseViewer extends
|
|
11
|
-
name: string = '';
|
|
11
|
+
export class SequenceSearchBaseViewer extends SearchBaseViewer {
|
|
12
12
|
distanceMetric: string;
|
|
13
|
-
limit: number;
|
|
14
13
|
fingerprint: string;
|
|
15
14
|
metricsProperties = ['distanceMetric', 'fingerprint'];
|
|
16
15
|
fingerprintChoices = ['Morgan', 'Pattern'];
|
|
17
|
-
moleculeColumn?: DG.Column<string>;
|
|
18
|
-
moleculeColumnName: string;
|
|
19
|
-
initialized: boolean = false;
|
|
20
16
|
tags = [DG.TAGS.UNITS, bioTAGS.aligned, bioTAGS.separator, bioTAGS.alphabet];
|
|
21
17
|
preComputeDistanceMatrix: boolean = false;
|
|
22
18
|
|
|
23
|
-
constructor(name: string) {
|
|
24
|
-
super();
|
|
19
|
+
constructor(name: string, semType: string) {
|
|
20
|
+
super(name, semType);
|
|
25
21
|
this.fingerprint = this.string('fingerprint', this.fingerprintChoices[0], {choices: this.fingerprintChoices});
|
|
26
|
-
this.limit = this.int('limit', 10);
|
|
27
22
|
this.distanceMetric = this.string('distanceMetric', CHEM_SIMILARITY_METRICS[0], {choices: CHEM_SIMILARITY_METRICS});
|
|
28
|
-
this.moleculeColumnName = this.string('moleculeColumnName');
|
|
29
|
-
this.name = name;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
init(): void {
|
|
33
|
-
this.initialized = true;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
detach(): void {
|
|
37
|
-
this.subs.forEach((sub) => sub.unsubscribe());
|
|
38
23
|
}
|
|
39
24
|
|
|
40
25
|
async onTableAttached(): Promise<void> {
|
|
41
|
-
|
|
26
|
+
super.onTableAttached();
|
|
42
27
|
|
|
43
|
-
if (this.dataFrame)
|
|
28
|
+
if (this.dataFrame)
|
|
44
29
|
this.preComputeDistanceMatrix = this.dataFrame.rowCount <= MAX_ROWS_FOR_DISTANCE_MATRIX;
|
|
45
|
-
this.subs.push(DG.debounce(this.dataFrame.onRowsRemoved, 50)
|
|
46
|
-
.subscribe((_: any) => this.render(true)));
|
|
47
|
-
const compute = this.name !== 'diversity';
|
|
48
|
-
this.subs.push(DG.debounce(this.dataFrame.onCurrentRowChanged, 50)
|
|
49
|
-
.subscribe((_: any) => this.render(compute)));
|
|
50
|
-
this.subs.push(DG.debounce(this.dataFrame.selection.onChanged, 50)
|
|
51
|
-
.subscribe((_: any) => this.render(false)));
|
|
52
|
-
this.subs.push(DG.debounce(ui.onSizeChanged(this.root), 50)
|
|
53
|
-
.subscribe((_: any) => this.render(false)));
|
|
54
|
-
this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE) as DG.Column<string>;
|
|
55
|
-
this.moleculeColumnName = this.moleculeColumn?.name!;
|
|
56
|
-
this.getProperty('limit')!.fromOptions({min: 1, max: this.dataFrame.rowCount});
|
|
57
|
-
}
|
|
58
|
-
this.render();
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
onPropertyChanged(property: DG.Property): void {
|
|
62
|
-
super.onPropertyChanged(property);
|
|
63
|
-
if (!this.initialized)
|
|
64
|
-
return;
|
|
65
|
-
if (property.name === 'moleculeColumnName') {
|
|
66
|
-
const col = this.dataFrame.col(property.get(this))!;
|
|
67
|
-
if (col.semType === DG.SEMTYPE.MACROMOLECULE)
|
|
68
|
-
this.moleculeColumn = col;
|
|
69
|
-
}
|
|
70
|
-
this.render();
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
/** For tests */ public computeRequested: boolean = false;
|
|
74
|
-
public renderPromise: Promise<void> = Promise.resolve();
|
|
75
|
-
|
|
76
|
-
protected render(computeData = true): void {
|
|
77
|
-
this.renderPromise = this.renderPromise.then(async () => {
|
|
78
|
-
this.computeRequested = this.computeRequested || computeData;
|
|
79
|
-
await this.renderInt(computeData);
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
async renderInt(_computeData: boolean): Promise<void> {
|
|
84
|
-
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
beforeRender() {
|
|
88
|
-
if (!this.initialized)
|
|
89
|
-
return false;
|
|
90
|
-
if (this.dataFrame && this.moleculeColumnName &&
|
|
91
|
-
this.dataFrame.col(this.moleculeColumnName)!.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
92
|
-
grok.shell.error(`${this.moleculeColumnName} is not Macromolecule type`);
|
|
93
|
-
return false;
|
|
94
|
-
}
|
|
95
|
-
return true;
|
|
96
30
|
}
|
|
97
31
|
}
|
|
@@ -5,13 +5,14 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
6
6
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
7
7
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
|
-
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
8
|
+
import {adjustGridcolAfterRender, updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
9
|
import {Subject} from 'rxjs';
|
|
10
10
|
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
11
|
import {alignSequencePair} from '@datagrok-libraries/bio/src/utils/macromolecule/alignment';
|
|
12
12
|
import {KnnResult, SparseMatrixService} from '@datagrok-libraries/ml/src/distance-matrix/sparse-matrix-service';
|
|
13
13
|
import {getEncodedSeqSpaceCol} from './sequence-space';
|
|
14
14
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
15
|
+
import {MmcrTemps, tempTAGS} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
15
16
|
|
|
16
17
|
export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
17
18
|
cutoff: number;
|
|
@@ -31,12 +32,14 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
31
32
|
knn?: KnnResult;
|
|
32
33
|
kPrevNeighbors: number = 0;
|
|
33
34
|
demo?: boolean;
|
|
35
|
+
analysisGrid?: DG.Grid;
|
|
36
|
+
subInited: boolean = false;
|
|
34
37
|
|
|
35
38
|
constructor(
|
|
36
39
|
private readonly seqHelper: ISeqHelper,
|
|
37
40
|
demo?: boolean,
|
|
38
41
|
) {
|
|
39
|
-
super('similarity');
|
|
42
|
+
super('similarity', DG.SEMTYPE.MACROMOLECULE);
|
|
40
43
|
this.cutoff = this.float('cutoff', 0.01, {min: 0, max: 1});
|
|
41
44
|
this.hotSearch = this.bool('hotSearch', true);
|
|
42
45
|
this.similarColumnLabel = this.string('similarColumnLabel', null);
|
|
@@ -51,42 +54,57 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
51
54
|
override async renderInt(computeData: boolean): Promise<void> {
|
|
52
55
|
if (!this.beforeRender())
|
|
53
56
|
return;
|
|
54
|
-
if (this.
|
|
57
|
+
if (this.targetColumn) {
|
|
55
58
|
this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
56
59
|
if (computeData && !this.gridSelect) {
|
|
57
60
|
this.targetMoleculeIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
58
|
-
const sh = this.seqHelper.getSeqHandler(this.
|
|
61
|
+
const sh = this.seqHelper.getSeqHandler(this.targetColumn!);
|
|
59
62
|
|
|
60
|
-
await
|
|
63
|
+
await this.computeByMM();
|
|
61
64
|
const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
|
|
62
|
-
`similar (${this.
|
|
65
|
+
`similar (${this.targetColumn})`;
|
|
63
66
|
this.molCol = DG.Column.string(similarColumnName,
|
|
64
|
-
this.idxs!.length).init((i) => this.
|
|
67
|
+
this.idxs!.length).init((i) => this.targetColumn?.get(this.idxs?.get(i)));
|
|
65
68
|
this.molCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
66
|
-
this.tags.forEach((tag) => this.molCol!.setTag(tag, this.
|
|
69
|
+
this.tags.forEach((tag) => this.molCol!.setTag(tag, this.targetColumn!.getTag(tag)));
|
|
67
70
|
const resDf = DG.DataFrame.fromColumns([this.idxs!, this.molCol!, this.scores!]);
|
|
68
|
-
resDf.
|
|
71
|
+
await resDf.meta.detectSemanticTypes();
|
|
72
|
+
await grok.data.detectSemanticTypes(resDf);
|
|
73
|
+
this.molCol.temp[tempTAGS.referenceSequence] = this.targetColumn!.get(this.targetMoleculeIdx);
|
|
74
|
+
this.molCol.temp[MmcrTemps.maxMonomerLength] = 4;
|
|
75
|
+
let prevTimer: any = null;
|
|
76
|
+
const _ = resDf.onCurrentRowChanged.subscribe((_: any) => {
|
|
77
|
+
prevTimer && clearTimeout(prevTimer);
|
|
69
78
|
this.dataFrame.currentRowIdx = resDf.col('indexes')!.get(resDf.currentRowIdx);
|
|
70
|
-
setTimeout(() => { this.createPropertyPanel(resDf); },
|
|
79
|
+
prevTimer = setTimeout(() => { this.createPropertyPanel(resDf); }, 300);
|
|
71
80
|
this.gridSelect = true;
|
|
72
81
|
});
|
|
73
|
-
|
|
74
|
-
|
|
82
|
+
if (!this.analysisGrid) {
|
|
83
|
+
this.analysisGrid = resDf.plot.grid();
|
|
84
|
+
updateDivInnerHTML(this.root, this.analysisGrid.root);
|
|
85
|
+
} else {
|
|
86
|
+
this.analysisGrid.dataFrame = resDf;
|
|
87
|
+
this.analysisGrid.invalidate();
|
|
88
|
+
}
|
|
89
|
+
this.analysisGrid.col('indexes')!.visible = false;
|
|
90
|
+
adjustGridcolAfterRender(this.analysisGrid, this.molCol!.name, 450, 30, true);
|
|
75
91
|
const targetMolRow = this.idxs?.getRawData().findIndex((it) => it == this.targetMoleculeIdx);
|
|
76
|
-
const targetScoreCell =
|
|
92
|
+
const targetScoreCell = this.analysisGrid.cell('score', targetMolRow!);
|
|
77
93
|
targetScoreCell.cell.value = null;
|
|
78
|
-
const view =
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
94
|
+
const view = grok.shell.tv;
|
|
95
|
+
if (!this.subInited) {
|
|
96
|
+
view.grid.root.addEventListener('click', (_event: MouseEvent) => {
|
|
97
|
+
this.gridSelect = false;
|
|
98
|
+
});
|
|
99
|
+
this.subInited = true;
|
|
100
|
+
}
|
|
83
101
|
this.computeCompleted.next(true);
|
|
84
102
|
}
|
|
85
103
|
}
|
|
86
104
|
}
|
|
87
105
|
|
|
88
106
|
private async computeByChem() {
|
|
89
|
-
const monomericMols = await getMonomericMols(this.
|
|
107
|
+
const monomericMols = await getMonomericMols(this.targetColumn!, this.seqHelper);
|
|
90
108
|
//need to create df to calculate fingerprints
|
|
91
109
|
const _monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
|
|
92
110
|
const df = await grok.functions.call('Chem:callChemSimilaritySearch', {
|
|
@@ -103,11 +121,11 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
103
121
|
}
|
|
104
122
|
|
|
105
123
|
private async computeByMM() {
|
|
106
|
-
const len = this.
|
|
124
|
+
const len = this.targetColumn!.length;
|
|
107
125
|
const actualLimit = Math.min(this.limit, len - 1);
|
|
108
126
|
if (!this.knn || this.kPrevNeighbors !== actualLimit) {
|
|
109
127
|
const encodedSequences =
|
|
110
|
-
(await getEncodedSeqSpaceCol(this.
|
|
128
|
+
(await getEncodedSeqSpaceCol(this.targetColumn!, MmDistanceFunctionsNames.LEVENSHTEIN)).seqList;
|
|
111
129
|
|
|
112
130
|
this.kPrevNeighbors = actualLimit;
|
|
113
131
|
this.knn = await (new SparseMatrixService()
|
|
@@ -128,12 +146,13 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
128
146
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
129
147
|
const molColName = this.molCol?.name!;
|
|
130
148
|
const resCol: DG.Column<string> = resDf.col(molColName)!;
|
|
131
|
-
const molColSh = this.seqHelper.getSeqHandler(this.
|
|
149
|
+
const molColSh = this.seqHelper.getSeqHandler(this.targetColumn!);
|
|
132
150
|
const resSh = this.seqHelper.getSeqHandler(resCol);
|
|
133
151
|
const subParts1 = molColSh.getSplitted(this.targetMoleculeIdx);
|
|
134
152
|
const subParts2 = resSh.getSplitted(resDf.currentRowIdx);
|
|
135
153
|
const alignment = alignSequencePair(subParts1, subParts2);
|
|
136
|
-
const canvas =
|
|
154
|
+
const canvas =
|
|
155
|
+
createDifferenceCanvas(alignment.seq1Splitted, alignment.seq2Splitted, resSh.defaultBiotype, molDifferences);
|
|
137
156
|
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
138
157
|
if (subParts1.length !== subParts2.length) {
|
|
139
158
|
propPanel.append(ui.divV([
|
|
@@ -8,11 +8,30 @@ import {handleError} from './utils';
|
|
|
8
8
|
import {SequenceDiversityViewer} from '../analysis/sequence-diversity-viewer';
|
|
9
9
|
import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
|
|
10
10
|
import {getSeqHelper, ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
|
+
import {adjustGridcolAfterRender} from '../utils/ui-utils';
|
|
12
|
+
import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
11
13
|
|
|
12
14
|
const dataFn: string = 'samples/FASTA_PT_activity.csv';
|
|
13
15
|
|
|
14
|
-
export async function
|
|
15
|
-
|
|
16
|
+
export async function demoBioSimDiv() {
|
|
17
|
+
const t = await _package.files.readCsv('samples/peptides-non-natural.csv');
|
|
18
|
+
t.name = 'Similarity and Diversity Demo';
|
|
19
|
+
t.col('activity')!.setTag('format', '3 significant digits');
|
|
20
|
+
t.col('sequence')!.temp[MmcrTemps.maxMonomerLength] = 4;
|
|
21
|
+
const tv = grok.shell.addTableView(t);
|
|
22
|
+
await t.meta.detectSemanticTypes();
|
|
23
|
+
await grok.data.detectSemanticTypes(t);
|
|
24
|
+
const simV = tv.addViewer('Sequence Similarity Search', {limit: 20});
|
|
25
|
+
const dn = tv.dockManager.dock(simV, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.45);
|
|
26
|
+
adjustGridcolAfterRender(tv.grid, 'sequence', 500, 30);
|
|
27
|
+
const divV = tv.addViewer('Sequence Diversity Search', {limit: 20});
|
|
28
|
+
tv.dockManager.dock(divV, DG.DOCK_TYPE.DOWN, dn, 'Diversity search', 0.4);
|
|
29
|
+
grok.functions.call('Dendrogram:HierarchicalClustering',
|
|
30
|
+
{df: grok.shell.t, colNameList: ['sequence'], distance: 'euclidian', linkage: 'complete'});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export async function demoBio01UISteps() {
|
|
34
|
+
const seqHelper: ISeqHelper = await getSeqHelper();
|
|
16
35
|
|
|
17
36
|
let view: DG.TableView;
|
|
18
37
|
let df: DG.DataFrame;
|
|
@@ -12,6 +12,13 @@ import {getClusterMatrixWorker} from '@datagrok-libraries/math';
|
|
|
12
12
|
const dataFn = 'samples/FASTA_PT_activity.csv';
|
|
13
13
|
const seqColName = 'sequence';
|
|
14
14
|
|
|
15
|
+
export async function demoSeqSpace() {
|
|
16
|
+
const p = await grok.functions.eval('Bio:SeqSpaceDemo');
|
|
17
|
+
const project = await grok.dapi.projects.find(p.id);
|
|
18
|
+
await project.open();
|
|
19
|
+
grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#sequence-space');
|
|
20
|
+
}
|
|
21
|
+
|
|
15
22
|
export async function demoBio01aUI() {
|
|
16
23
|
let treeHelper: ITreeHelper;
|
|
17
24
|
let dendrogramSvc: IDendrogramService;
|
|
@@ -28,7 +28,7 @@ export async function demoBio01bUI() {
|
|
|
28
28
|
|
|
29
29
|
try {
|
|
30
30
|
const demoScript = new DemoScript('Activity Cliffs', 'Activity Cliffs analysis on Macromolecules data', false,
|
|
31
|
-
{autoStartFirstStep: true});
|
|
31
|
+
{autoStartFirstStep: true, path: 'Bioinformatics/Activity Cliffs'});
|
|
32
32
|
await demoScript
|
|
33
33
|
.step(`Load DNA sequences`, async () => {
|
|
34
34
|
grok.shell.windows.showContextPanel = false;
|
|
@@ -5,6 +5,21 @@ import {_package, toAtomicLevel} from '../package';
|
|
|
5
5
|
import {handleError} from './utils';
|
|
6
6
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
7
7
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
8
|
+
import {adjustGridcolAfterRender} from '../utils/ui-utils';
|
|
9
|
+
|
|
10
|
+
export async function demoToAtomicLevel(): Promise<void> {
|
|
11
|
+
const data = await _package.files.readCsv('samples/HELM_BI_CYCLIC.csv');
|
|
12
|
+
data.name = 'To Atomic Level';
|
|
13
|
+
await data.meta.detectSemanticTypes();
|
|
14
|
+
await grok.data.detectSemanticTypes(data);
|
|
15
|
+
const view = grok.shell.addTableView(data);
|
|
16
|
+
const seqCol = data.col('HELM')!;
|
|
17
|
+
await toAtomicLevel(data, seqCol, true, false);
|
|
18
|
+
adjustGridcolAfterRender(view.grid, 'molfile(HELM)', 500, 300, true);
|
|
19
|
+
adjustGridcolAfterRender(view.grid, 'HELM', 500, undefined, true);
|
|
20
|
+
grok.shell.info('Hover over monomers in HELM column to highlight them in molecular structure.', {timeout: 10});
|
|
21
|
+
grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#get-atomic-level-structure');
|
|
22
|
+
}
|
|
8
23
|
|
|
9
24
|
export async function demoBio03UI(): Promise<void> {
|
|
10
25
|
const dataFn: string = 'samples/HELM.csv';
|