@datagrok/bio 2.0.12 → 2.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +1864 -1162
- package/dist/package.js +4021 -3444
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +135 -152
- package/files/data/sample_HELM_empty_vals.csv +537 -537
- package/files/samples/sample_HELM.csv +540 -540
- package/package.json +19 -12
- package/scripts/read-tree-pkl.py +215 -0
- package/src/__jest__/remote.test.ts +6 -4
- package/src/__jest__/test-node.ts +1 -1
- package/src/{utils → analysis}/sequence-activity-cliffs.ts +56 -37
- package/src/analysis/sequence-diversity-viewer.ts +48 -0
- package/src/analysis/sequence-search-base-viewer.ts +81 -0
- package/src/analysis/sequence-similarity-viewer.ts +107 -0
- package/src/{utils → analysis}/sequence-space.ts +0 -0
- package/src/calculations/monomerLevelMols.ts +64 -20
- package/src/package-test.ts +2 -1
- package/src/package.ts +57 -3
- package/src/substructure-search/substructure-search.ts +65 -0
- package/src/tests/WebLogo-test.ts +6 -6
- package/src/tests/activity-cliffs-tests.ts +8 -16
- package/src/tests/activity-cliffs-utils.ts +3 -2
- package/src/tests/renderers-test.ts +1 -1
- package/src/tests/sequence-space-test.ts +7 -14
- package/src/tests/similarity-diversity-tests.ts +78 -0
- package/src/utils/cell-renderer.ts +81 -68
- package/src/utils/ui-utils.ts +4 -0
- package/src/viewers/vd-regions-viewer.ts +2 -1
- package/src/widgets/representations.ts +58 -0
- package/test-Bio-7770371320b2-f955a7eb.html +0 -374
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.0.
|
|
8
|
+
"version": "2.0.16",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,35 +14,42 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": ">=2.4.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^4.4.
|
|
17
|
+
"@datagrok-libraries/bio": "^4.4.3",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.0",
|
|
19
19
|
"@datagrok-libraries/ml": "^6.2.0",
|
|
20
20
|
"@datagrok-libraries/utils": "^1.6.2",
|
|
21
|
+
"@deck.gl/core": "^8.7.5",
|
|
22
|
+
"@deck.gl/layers": "^8.7.5",
|
|
23
|
+
"@luma.gl/constants": "^8.5.10",
|
|
24
|
+
"@luma.gl/core": "^8.5.10",
|
|
25
|
+
"@phylocanvas/phylocanvas.gl": "^1.43.0",
|
|
21
26
|
"cash-dom": "latest",
|
|
22
|
-
"datagrok-api": "^1.6.
|
|
27
|
+
"datagrok-api": "^1.6.12",
|
|
23
28
|
"dayjs": "^1.11.4",
|
|
24
29
|
"openchemlib": "6.0.1",
|
|
25
30
|
"rxjs": "^6.5.5",
|
|
26
|
-
"typescript": "^4.4.2",
|
|
27
31
|
"wu": "latest"
|
|
28
32
|
},
|
|
29
33
|
"devDependencies": {
|
|
30
|
-
"@types/jest": "^27.
|
|
34
|
+
"@types/jest": "^27.5.1",
|
|
35
|
+
"@types/js-yaml": "^4.0.5",
|
|
36
|
+
"@types/node": "^17.0.24",
|
|
37
|
+
"@types/node-fetch": "^2.6.2",
|
|
38
|
+
"@types/wu": "latest",
|
|
31
39
|
"@typescript-eslint/eslint-plugin": "latest",
|
|
32
40
|
"@typescript-eslint/parser": "latest",
|
|
33
|
-
"eslint": "
|
|
41
|
+
"eslint": "latest",
|
|
34
42
|
"eslint-config-google": "latest",
|
|
35
|
-
"jest": "^27.
|
|
43
|
+
"jest": "^27.5.1",
|
|
36
44
|
"jest-html-reporter": "^3.6.0",
|
|
45
|
+
"js-yaml": "^4.1.0",
|
|
46
|
+
"node-fetch": "^2.6.7",
|
|
37
47
|
"puppeteer": "^13.7.0",
|
|
38
48
|
"ts-jest": "^27.0.0",
|
|
39
49
|
"ts-loader": "^9.2.5",
|
|
50
|
+
"typescript": "^4.5.4",
|
|
40
51
|
"webpack": "latest",
|
|
41
|
-
"webpack-cli": "^4.10.0"
|
|
42
|
-
"@types/js-yaml": "^4.0.5",
|
|
43
|
-
"js-yaml": "^4.1.0",
|
|
44
|
-
"@types/node-fetch": "^2.6.2",
|
|
45
|
-
"node-fetch": "^2.6.7"
|
|
52
|
+
"webpack-cli": "^4.10.0"
|
|
46
53
|
},
|
|
47
54
|
"scripts": {
|
|
48
55
|
"link-api": "npm link datagrok-api",
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
import ete3
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import pickle
|
|
7
|
+
import six
|
|
8
|
+
from sklearn.cluster import AgglomerativeClustering
|
|
9
|
+
from Bio import Phylo as ph
|
|
10
|
+
from Bio.Phylo.BaseTree import TreeElement
|
|
11
|
+
from Bio.Phylo import parse
|
|
12
|
+
|
|
13
|
+
import click
|
|
14
|
+
from click_default_group import DefaultGroup
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TreeNode:
|
|
18
|
+
|
|
19
|
+
def __init__(self, node_id: int, distance: float, children: list['TreeNode']):
|
|
20
|
+
"""
|
|
21
|
+
:param id:
|
|
22
|
+
:param distance:
|
|
23
|
+
:param children: list of child nodes, set None for leaf node
|
|
24
|
+
"""
|
|
25
|
+
self._node_id: int = node_id
|
|
26
|
+
self._distance: float = distance
|
|
27
|
+
self._children: [] = children
|
|
28
|
+
|
|
29
|
+
def set_height(self, height):
|
|
30
|
+
""" Sets distance based on overal height of children"""
|
|
31
|
+
self._distance = height - max((ch.get_height() for ch in self._children))
|
|
32
|
+
|
|
33
|
+
def get_height(self):
|
|
34
|
+
if self._children is None or len(self._children) == 0:
|
|
35
|
+
return self._distance
|
|
36
|
+
else:
|
|
37
|
+
return self._distance + max(ch.get_height() for ch in self._children)
|
|
38
|
+
|
|
39
|
+
def __str__(self):
|
|
40
|
+
if self._children is None or len(self._children) == 0:
|
|
41
|
+
return "{id}:{distance}".format(id=self._node_id, distance=self._distance)
|
|
42
|
+
else:
|
|
43
|
+
return "({children}){id}:{distance}".format(
|
|
44
|
+
children=','.join((str(ch) for ch in self._children)),
|
|
45
|
+
id=self._node_id,
|
|
46
|
+
distance=self._distance)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@click.group(cls=DefaultGroup, default='main')
|
|
50
|
+
def cli():
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@cli.command()
|
|
55
|
+
@click.option('--pkl', 'pkl_f',
|
|
56
|
+
help="Pickle file with tree data",
|
|
57
|
+
type=click.File('rb'))
|
|
58
|
+
@click.option('--nwk', 'nwk_f',
|
|
59
|
+
help="Output file for tree in newick format",
|
|
60
|
+
type=click.File('w'))
|
|
61
|
+
@click.pass_context
|
|
62
|
+
def main(ctx, pkl_f, nwk_f):
|
|
63
|
+
data = pickle.load(pkl_f)
|
|
64
|
+
|
|
65
|
+
all_nodes = {}
|
|
66
|
+
|
|
67
|
+
# https://stackoverflow.com/questions/27386641/how-to-traverse-a-tree-from-sklearn-agglomerativeclustering
|
|
68
|
+
# The hint from user76284
|
|
69
|
+
v = dict(enumerate(data.children_, data.n_leaves_))
|
|
70
|
+
|
|
71
|
+
root: TreeNode = None # The last created node is a root
|
|
72
|
+
for row_i in range(data.children_.shape[0]):
|
|
73
|
+
height = data.distances_[row_i] # height of connection - distance between nodes (groups) -
|
|
74
|
+
|
|
75
|
+
node_id = data.n_leaves_ + row_i
|
|
76
|
+
child_id_list = data.children_[row_i, :].tolist()
|
|
77
|
+
|
|
78
|
+
child_node_list = []
|
|
79
|
+
for child_id in child_id_list:
|
|
80
|
+
child_node: TreeNode = None
|
|
81
|
+
if child_id < data.n_leaves_:
|
|
82
|
+
# leaf
|
|
83
|
+
# create child node on height
|
|
84
|
+
child_node = TreeNode(child_id, height, None)
|
|
85
|
+
pass
|
|
86
|
+
else:
|
|
87
|
+
# internal node
|
|
88
|
+
child_node = all_nodes[child_id]
|
|
89
|
+
child_node.set_height(height);
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
child_node_list.append(child_node)
|
|
93
|
+
|
|
94
|
+
# create node with distance 0, set distance to parent node later based on connection height
|
|
95
|
+
root = TreeNode(node_id, 0, child_node_list)
|
|
96
|
+
all_nodes[node_id] = root
|
|
97
|
+
|
|
98
|
+
k = 11
|
|
99
|
+
|
|
100
|
+
nwk_f.write(str(root))
|
|
101
|
+
nwk_f.write(';')
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def build_Newick_tree(children, n_leaves, X, leaf_labels, spanner):
|
|
105
|
+
"""
|
|
106
|
+
build_Newick_tree(children,n_leaves,X,leaf_labels,spanner)
|
|
107
|
+
|
|
108
|
+
Get a string representation (Newick tree) from the sklearn
|
|
109
|
+
AgglomerativeClustering.fit output.
|
|
110
|
+
|
|
111
|
+
Input:
|
|
112
|
+
children: AgglomerativeClustering.children_
|
|
113
|
+
n_leaves: AgglomerativeClustering.n_leaves_
|
|
114
|
+
X: parameters supplied to AgglomerativeClustering.fit
|
|
115
|
+
leaf_labels: The label of each parameter array in X
|
|
116
|
+
spanner: Callable that computes the dendrite's span
|
|
117
|
+
|
|
118
|
+
Output:
|
|
119
|
+
ntree: A str with the Newick tree representation
|
|
120
|
+
|
|
121
|
+
"""
|
|
122
|
+
return go_down_tree(children, n_leaves, X, leaf_labels, len(children) + n_leaves - 1, spanner)[0] + ';'
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def go_down_tree(children, n_leaves, X, leaf_labels, nodename, spanner):
|
|
126
|
+
"""
|
|
127
|
+
go_down_tree(children,n_leaves,X,leaf_labels,nodename,spanner)
|
|
128
|
+
|
|
129
|
+
Iterative function that traverses the subtree that descends from
|
|
130
|
+
nodename and returns the Newick representation of the subtree.
|
|
131
|
+
|
|
132
|
+
Input:
|
|
133
|
+
children: AgglomerativeClustering.children_
|
|
134
|
+
n_leaves: AgglomerativeClustering.n_leaves_
|
|
135
|
+
X: parameters supplied to AgglomerativeClustering.fit
|
|
136
|
+
leaf_labels: The label of each parameter array in X
|
|
137
|
+
nodename: An int that is the intermediate node name whos
|
|
138
|
+
children are located in children[nodename-n_leaves].
|
|
139
|
+
spanner: Callable that computes the dendrite's span
|
|
140
|
+
|
|
141
|
+
Output:
|
|
142
|
+
ntree: A str with the Newick tree representation
|
|
143
|
+
|
|
144
|
+
"""
|
|
145
|
+
nodeindex = nodename - n_leaves
|
|
146
|
+
if nodename < n_leaves:
|
|
147
|
+
return leaf_labels[nodeindex], np.array([X[nodeindex]])
|
|
148
|
+
else:
|
|
149
|
+
node_children = children[nodeindex]
|
|
150
|
+
branch0, branch0samples = go_down_tree(children, n_leaves, X, leaf_labels, node_children[0])
|
|
151
|
+
branch1, branch1samples = go_down_tree(children, n_leaves, X, leaf_labels, node_children[1])
|
|
152
|
+
node = np.vstack((branch0samples, branch1samples))
|
|
153
|
+
branch0span = spanner(branch0samples)
|
|
154
|
+
branch1span = spanner(branch1samples)
|
|
155
|
+
nodespan = spanner(node)
|
|
156
|
+
branch0distance = nodespan - branch0span
|
|
157
|
+
branch1distance = nodespan - branch1span
|
|
158
|
+
nodename = '({branch0}:{branch0distance},{branch1}:{branch1distance})'.format(branch0=branch0,
|
|
159
|
+
branch0distance=branch0distance,
|
|
160
|
+
branch1=branch1,
|
|
161
|
+
branch1distance=branch1distance)
|
|
162
|
+
return nodename, node
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_cluster_spanner(aggClusterer):
|
|
166
|
+
"""
|
|
167
|
+
spanner = get_cluster_spanner(aggClusterer)
|
|
168
|
+
|
|
169
|
+
Input:
|
|
170
|
+
aggClusterer: sklearn.cluster.AgglomerativeClustering instance
|
|
171
|
+
|
|
172
|
+
Get a callable that computes a given cluster's span. To compute
|
|
173
|
+
a cluster's span, call spanner(cluster)
|
|
174
|
+
|
|
175
|
+
The cluster must be a 2D numpy array, where the axis=0 holds
|
|
176
|
+
separate cluster members and the axis=1 holds the different
|
|
177
|
+
variables.
|
|
178
|
+
|
|
179
|
+
"""
|
|
180
|
+
if aggClusterer.linkage == 'ward':
|
|
181
|
+
if aggClusterer.affinity == 'euclidean':
|
|
182
|
+
spanner = lambda x: np.sum((x - aggClusterer.pooling_func(x, axis=0)) ** 2)
|
|
183
|
+
elif aggClusterer.linkage == 'complete':
|
|
184
|
+
if aggClusterer.affinity == 'euclidean':
|
|
185
|
+
spanner = lambda x: np.max(np.sum((x[:, None, :] - x[None, :, :]) ** 2, axis=2))
|
|
186
|
+
elif aggClusterer.affinity == 'l1' or aggClusterer.affinity == 'manhattan':
|
|
187
|
+
spanner = lambda x: np.max(np.sum(np.abs(x[:, None, :] - x[None, :, :]), axis=2))
|
|
188
|
+
elif aggClusterer.affinity == 'l2':
|
|
189
|
+
spanner = lambda x: np.max(np.sqrt(np.sum((x[:, None, :] - x[None, :, :]) ** 2, axis=2)))
|
|
190
|
+
elif aggClusterer.affinity == 'cosine':
|
|
191
|
+
spanner = lambda x: np.max(np.sum((x[:, None, :] * x[None, :, :])) / (
|
|
192
|
+
np.sqrt(np.sum(x[:, None, :] * x[:, None, :], axis=2, keepdims=True)) * np.sqrt(
|
|
193
|
+
np.sum(x[None, :, :] * x[None, :, :], axis=2, keepdims=True))))
|
|
194
|
+
else:
|
|
195
|
+
raise AttributeError('Unknown affinity attribute value {0}.'.format(aggClusterer.affinity))
|
|
196
|
+
elif aggClusterer.linkage == 'average':
|
|
197
|
+
if aggClusterer.affinity == 'euclidean':
|
|
198
|
+
spanner = lambda x: np.mean(np.sum((x[:, None, :] - x[None, :, :]) ** 2, axis=2))
|
|
199
|
+
elif aggClusterer.affinity == 'l1' or aggClusterer.affinity == 'manhattan':
|
|
200
|
+
spanner = lambda x: np.mean(np.sum(np.abs(x[:, None, :] - x[None, :, :]), axis=2))
|
|
201
|
+
elif aggClusterer.affinity == 'l2':
|
|
202
|
+
spanner = lambda x: np.mean(np.sqrt(np.sum((x[:, None, :] - x[None, :, :]) ** 2, axis=2)))
|
|
203
|
+
elif aggClusterer.affinity == 'cosine':
|
|
204
|
+
spanner = lambda x: np.mean(np.sum((x[:, None, :] * x[None, :, :])) / (
|
|
205
|
+
np.sqrt(np.sum(x[:, None, :] * x[:, None, :], axis=2, keepdims=True)) * np.sqrt(
|
|
206
|
+
np.sum(x[None, :, :] * x[None, :, :], axis=2, keepdims=True))))
|
|
207
|
+
else:
|
|
208
|
+
raise AttributeError('Unknown affinity attribute value {0}.'.format(aggClusterer.affinity))
|
|
209
|
+
else:
|
|
210
|
+
raise AttributeError('Unknown linkage attribute value {0}.'.format(aggClusterer.linkage))
|
|
211
|
+
return spanner
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
if __name__ == '__main__':
|
|
215
|
+
cli()
|
|
@@ -36,10 +36,10 @@ expect.extend({
|
|
|
36
36
|
});
|
|
37
37
|
|
|
38
38
|
it('TEST', async () => {
|
|
39
|
-
const targetPackage:string = process.env.TARGET_PACKAGE ?? 'Bio';
|
|
39
|
+
const targetPackage: string = process.env.TARGET_PACKAGE ?? 'Bio';
|
|
40
40
|
console.log(`Testing ${targetPackage} package`);
|
|
41
41
|
|
|
42
|
-
const r = await page.evaluate((targetPackage):Promise<object> => {
|
|
42
|
+
const r = await page.evaluate((targetPackage): Promise<object> => {
|
|
43
43
|
return new Promise<object>((resolve, reject) => {
|
|
44
44
|
(<any>window).grok.functions.eval(targetPackage + ':test()').then((df: any) => {
|
|
45
45
|
const cStatus = df.columns.byName('success');
|
|
@@ -52,10 +52,12 @@ it('TEST', async () => {
|
|
|
52
52
|
let failReport = '';
|
|
53
53
|
for (let i = 0; i < df.rowCount; i++) {
|
|
54
54
|
if (cStatus.get(i)) {
|
|
55
|
-
passReport += `Test result : Success : ${cTime.get(i)} :
|
|
55
|
+
passReport += `Test result : Success : ${cTime.get(i)} : ` +
|
|
56
|
+
`${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
56
57
|
} else {
|
|
57
58
|
failed = true;
|
|
58
|
-
failReport += `Test result : Failed : ${cTime.get(i)} :
|
|
59
|
+
failReport += `Test result : Failed : ${cTime.get(i)} : ` +
|
|
60
|
+
`${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
59
61
|
}
|
|
60
62
|
}
|
|
61
63
|
resolve({failReport, passReport, failed});
|
|
@@ -76,7 +76,7 @@ export async function getBrowserPage(puppeteer: any): Promise<{ browser: any, pa
|
|
|
76
76
|
}, token);
|
|
77
77
|
await page.goto(url);
|
|
78
78
|
try {
|
|
79
|
-
//
|
|
79
|
+
// await page.waitForSelector('.grok-preloader', { timeout: 1800000 });
|
|
80
80
|
await page.waitForFunction(() => document.querySelector('.grok-preloader') == null, {timeout: 3600000});
|
|
81
81
|
} catch (error) {
|
|
82
82
|
throw error;
|
|
@@ -4,10 +4,10 @@ import * as ui from 'datagrok-api/ui';
|
|
|
4
4
|
import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
5
5
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
6
6
|
import * as grok from 'datagrok-api/grok';
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
7
|
+
import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
8
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
9
|
import {SEM_TYPES, TAGS} from '../utils/constants';
|
|
10
|
-
import {
|
|
10
|
+
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
11
11
|
|
|
12
12
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
13
13
|
const stringArray = col.toList();
|
|
@@ -21,18 +21,17 @@ export async function getDistances(col: DG.Column, seq: string): Promise<Array<n
|
|
|
21
21
|
|
|
22
22
|
export async function getSimilaritiesMarix(dim: number, seqCol: DG.Column, df: DG.DataFrame, colName: string, simArr: DG.Column[])
|
|
23
23
|
: Promise<DG.Column[]> {
|
|
24
|
-
|
|
25
24
|
const distances = new Array(simArr.length).fill(null);
|
|
26
25
|
for (let i = 0; i != dim - 1; ++i) {
|
|
27
|
-
const seq: string = seqCol.get(i);
|
|
26
|
+
const seq: string = seqCol.get(i);
|
|
28
27
|
df.rows.removeAt(0, 1, false);
|
|
29
28
|
distances[i] = (await getDistances(df.col(colName)!, seq))!;
|
|
30
29
|
}
|
|
31
30
|
|
|
32
31
|
for (let i = 0; i < distances.length; i++) {
|
|
33
|
-
for (let j = 0; j < distances[i].length; j++)
|
|
32
|
+
for (let j = 0; j < distances[i].length; j++)
|
|
34
33
|
distances[i][j] = getSimilarityFromDistance(distances[i][j]);
|
|
35
|
-
|
|
34
|
+
|
|
36
35
|
simArr[i] = DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'distances', distances[i]);
|
|
37
36
|
}
|
|
38
37
|
return simArr;
|
|
@@ -62,11 +61,10 @@ export function createTooltipElement(params: ITooltipAndPanelParams): HTMLDivEle
|
|
|
62
61
|
}
|
|
63
62
|
|
|
64
63
|
function moleculeInfo(df: DG.DataFrame, idx: number, seqColName: string): HTMLElement {
|
|
65
|
-
|
|
66
|
-
for (
|
|
67
|
-
if(col.name !== seqColName)
|
|
64
|
+
const dict: {[key: string]: string} = {};
|
|
65
|
+
for (const col of df.columns) {
|
|
66
|
+
if (col.name !== seqColName)
|
|
68
67
|
dict[col.name] = df.get(col.name, idx);
|
|
69
|
-
}
|
|
70
68
|
}
|
|
71
69
|
return ui.tableFromMap(dict);
|
|
72
70
|
}
|
|
@@ -75,7 +73,7 @@ function moleculeInfo(df: DG.DataFrame, idx: number, seqColName: string): HTMLEl
|
|
|
75
73
|
export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivElement {
|
|
76
74
|
const propPanel = ui.div();
|
|
77
75
|
|
|
78
|
-
propPanel.append(ui.divText(params.seqCol.name, {
|
|
76
|
+
propPanel.append(ui.divText(params.seqCol.name, {style: {fontWeight: 'bold'}}));
|
|
79
77
|
|
|
80
78
|
const sequencesArray = new Array<string>(2);
|
|
81
79
|
const activitiesArray = new Array<number>(2);
|
|
@@ -83,39 +81,60 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
|
|
|
83
81
|
sequencesArray[idx] = params.seqCol.get(molIdx);
|
|
84
82
|
activitiesArray[idx] = params.activityCol.get(molIdx);
|
|
85
83
|
});
|
|
84
|
+
|
|
85
|
+
const molDifferences: {[key: number]: HTMLCanvasElement} = {};
|
|
86
|
+
const units = params.seqCol.getTag(DG.TAGS.UNITS);
|
|
87
|
+
const separator = params.seqCol.getTag(TAGS.SEPARATOR);
|
|
88
|
+
const splitter = WebLogo.getSplitter(units, separator);
|
|
89
|
+
const subParts1 = splitter(sequencesArray[0]);
|
|
90
|
+
const subParts2 = splitter(sequencesArray[1]);
|
|
91
|
+
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
92
|
+
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
93
|
+
|
|
94
|
+
propPanel.append(createDifferencesWithPositions(molDifferences));
|
|
95
|
+
|
|
96
|
+
propPanel.append(createPropPanelField('Activity delta', Math.abs(activitiesArray[0] - activitiesArray[1])));
|
|
97
|
+
propPanel.append(createPropPanelField('Cliff', params.sali!));
|
|
98
|
+
|
|
99
|
+
return propPanel;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function createPropPanelField(name: string, value: number): HTMLDivElement {
|
|
103
|
+
return ui.divH([
|
|
104
|
+
ui.divText(`${name}: `, {style: {fontWeight: 'bold', paddingRight: '5px'}}),
|
|
105
|
+
ui.divText(value.toFixed(2))
|
|
106
|
+
], {style: {paddingTop: '5px'}});
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export function createDifferenceCanvas(
|
|
110
|
+
subParts1: string[],
|
|
111
|
+
subParts2: string[],
|
|
112
|
+
units: string,
|
|
113
|
+
molDifferences: { [key: number]: HTMLCanvasElement }): HTMLCanvasElement {
|
|
86
114
|
const canvas = document.createElement('canvas');
|
|
87
115
|
const context = canvas.getContext('2d');
|
|
88
116
|
canvas.height = 30;
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
117
|
+
drawMoleculeDifferenceOnCanvas(context!, 0, 0, 0, 30, subParts1, subParts2, units, true, molDifferences);
|
|
118
|
+
return canvas;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export function createDifferencesWithPositions(
|
|
122
|
+
molDifferences: { [key: number]: HTMLCanvasElement }): HTMLDivElement {
|
|
123
|
+
const div = ui.div();
|
|
95
124
|
if (Object.keys(molDifferences).length > 0) {
|
|
96
125
|
const diffsPanel = ui.divV([]);
|
|
97
126
|
diffsPanel.append(ui.divH([
|
|
98
|
-
ui.divText('Pos', {
|
|
99
|
-
ui.divText('Difference', {
|
|
100
|
-
]))
|
|
101
|
-
for (
|
|
127
|
+
ui.divText('Pos', {style: {fontWeight: 'bold', width: '30px', borderBottom: '1px solid'}}),
|
|
128
|
+
ui.divText('Difference', {style: {fontWeight: 'bold', borderBottom: '1px solid'}})
|
|
129
|
+
]));
|
|
130
|
+
for (const key of Object.keys(molDifferences)) {
|
|
131
|
+
molDifferences[key as any].style.borderBottom = '1px solid lightgray';
|
|
102
132
|
diffsPanel.append(ui.divH([
|
|
103
|
-
ui.divText(key, {
|
|
133
|
+
ui.divText((parseInt(key) + 1).toString(), {style: {width: '30px', borderBottom: '1px solid lightgray'}}),
|
|
104
134
|
molDifferences[key as any]
|
|
105
135
|
]));
|
|
106
136
|
}
|
|
107
|
-
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
function addFiledToPropPanel(name: string, value: number) {
|
|
111
|
-
propPanel.append(ui.divH([
|
|
112
|
-
ui.divText(`${name}: `, { style: { fontWeight: 'bold', paddingRight: '5px' } }),
|
|
113
|
-
ui.divText(value.toFixed(2))
|
|
114
|
-
], { style: { paddingTop: '5px' } }));
|
|
137
|
+
div.append(diffsPanel);
|
|
115
138
|
}
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
addFiledToPropPanel('Cliff', params.sali!);
|
|
119
|
-
|
|
120
|
-
return propPanel;
|
|
121
|
-
}
|
|
139
|
+
return div;
|
|
140
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import * as ui from 'datagrok-api/ui';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
5
|
+
import {similarityMetric, getDiverseSubset} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
6
|
+
import $ from 'cash-dom';
|
|
7
|
+
import {ArrayUtils} from '@datagrok-libraries/utils/src/array-utils';
|
|
8
|
+
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
9
|
+
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
10
|
+
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
11
|
+
import { Subject } from 'rxjs';
|
|
12
|
+
|
|
13
|
+
export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
14
|
+
renderMolIds: number[] | null = null;
|
|
15
|
+
columnNames = [];
|
|
16
|
+
computeCompleted = new Subject<boolean>();
|
|
17
|
+
|
|
18
|
+
constructor() {
|
|
19
|
+
super('diversity');
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async render(computeData = true): Promise<void> {
|
|
24
|
+
if (!this.beforeRender())
|
|
25
|
+
return;
|
|
26
|
+
if (this.dataFrame) {
|
|
27
|
+
if (computeData && this.moleculeColumn) {
|
|
28
|
+
const monomericMols = await getMonomericMols(this.moleculeColumn);
|
|
29
|
+
//need to create df to calculate fingerprints
|
|
30
|
+
const monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
|
|
31
|
+
this.renderMolIds =
|
|
32
|
+
await grok.functions.call('Chem:callChemDiversitySearch', {
|
|
33
|
+
col: monomericMols,
|
|
34
|
+
metricName: this.distanceMetric,
|
|
35
|
+
limit: this.limit,
|
|
36
|
+
fingerprint: this.fingerprint
|
|
37
|
+
});
|
|
38
|
+
const resCol = DG.Column.string('sequence', this.renderMolIds!.length)
|
|
39
|
+
.init((i) => this.moleculeColumn?.get(this.renderMolIds![i]));
|
|
40
|
+
resCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
41
|
+
this.tags.forEach((tag) => resCol.setTag(tag, this.moleculeColumn!.getTag(tag)));
|
|
42
|
+
const resDf = DG.DataFrame.fromColumns([resCol]);
|
|
43
|
+
updateDivInnerHTML(this.root, resDf.plot.grid().root);
|
|
44
|
+
this.computeCompleted.next(true);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import * as ui from 'datagrok-api/ui';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import {CHEM_SIMILARITY_METRICS} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
5
|
+
import * as C from '../utils/constants';
|
|
6
|
+
|
|
7
|
+
export class SequenceSearchBaseViewer extends DG.JsViewer {
|
|
8
|
+
name: string = '';
|
|
9
|
+
distanceMetric: string;
|
|
10
|
+
limit: number;
|
|
11
|
+
fingerprint: string;
|
|
12
|
+
metricsProperties = ['distanceMetric', 'fingerprint'];
|
|
13
|
+
fingerprintChoices = ['Morgan', 'Pattern'];
|
|
14
|
+
moleculeColumn?: DG.Column|null;
|
|
15
|
+
moleculeColumnName: string;
|
|
16
|
+
initialized: boolean = false;
|
|
17
|
+
tags = [DG.TAGS.UNITS, C.TAGS.ALIGNED, C.TAGS.SEPARATOR, C.TAGS.ALPHABET];
|
|
18
|
+
|
|
19
|
+
constructor(name: string) {
|
|
20
|
+
super();
|
|
21
|
+
this.fingerprint = this.string('fingerprint', this.fingerprintChoices[0], {choices: this.fingerprintChoices});
|
|
22
|
+
this.limit = this.int('limit', 10);
|
|
23
|
+
this.distanceMetric = this.string('distanceMetric', CHEM_SIMILARITY_METRICS[0], {choices: CHEM_SIMILARITY_METRICS});
|
|
24
|
+
this.moleculeColumnName = this.string('moleculeColumnName');
|
|
25
|
+
this.name = name;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
init(): void {
|
|
29
|
+
this.initialized = true;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
detach(): void {
|
|
33
|
+
this.subs.forEach((sub) => sub.unsubscribe());
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async onTableAttached(): Promise<void> {
|
|
37
|
+
this.init();
|
|
38
|
+
|
|
39
|
+
if (this.dataFrame) {
|
|
40
|
+
this.subs.push(DG.debounce(this.dataFrame.onRowsRemoved, 50).subscribe(async (_: any) => await this.render()));
|
|
41
|
+
const compute = this.name !== 'diversity';
|
|
42
|
+
this.subs.push(DG.debounce(this.dataFrame.onCurrentRowChanged, 50)
|
|
43
|
+
.subscribe(async (_: any) => await this.render(compute)));
|
|
44
|
+
this.subs.push(DG.debounce(this.dataFrame.selection.onChanged, 50)
|
|
45
|
+
.subscribe(async (_: any) => await this.render(false)));
|
|
46
|
+
this.subs.push(DG.debounce(ui.onSizeChanged(this.root), 50)
|
|
47
|
+
.subscribe(async (_: any) => await this.render(false)));
|
|
48
|
+
this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
49
|
+
this.moleculeColumnName = this.moleculeColumn?.name!;
|
|
50
|
+
this.getProperty('limit')!.fromOptions({min: 1, max: this.dataFrame.rowCount});
|
|
51
|
+
}
|
|
52
|
+
await this.render();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
onPropertyChanged(property: DG.Property): void {
|
|
56
|
+
super.onPropertyChanged(property);
|
|
57
|
+
if (!this.initialized)
|
|
58
|
+
return;
|
|
59
|
+
if (property.name === 'moleculeColumnName') {
|
|
60
|
+
const col = this.dataFrame.col(property.get(this))!;
|
|
61
|
+
if (col.semType === DG.SEMTYPE.MACROMOLECULE)
|
|
62
|
+
this.moleculeColumn = col;
|
|
63
|
+
}
|
|
64
|
+
this.render();
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async render(computeData = true) {
|
|
68
|
+
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
beforeRender() {
|
|
72
|
+
if (!this.initialized)
|
|
73
|
+
return false;
|
|
74
|
+
if (this.dataFrame && this.moleculeColumnName &&
|
|
75
|
+
this.dataFrame.col(this.moleculeColumnName)!.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
76
|
+
grok.shell.error(`${this.moleculeColumnName} is not Macromolecule type`);
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
return true;
|
|
80
|
+
}
|
|
81
|
+
}
|