@datagrok/bio 1.7.25 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +1 -1
- package/dist/package-test.js +983 -246
- package/dist/package.js +812 -168
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +26 -10
- package/files/samples/sample_HELM_empty_vals.csv +541 -0
- package/package.json +5 -5
- package/setup.sh +3 -0
- package/src/package.ts +28 -29
- package/src/tests/activity-cliffs-tests.ts +15 -34
- package/src/tests/activity-cliffs-utils.ts +19 -0
- package/src/tests/detectors-test.ts +2 -2
- package/src/tests/renderers-test.ts +6 -5
- package/src/tests/sequence-space-test.ts +17 -12
- package/src/tests/sequence-space-utils.ts +10 -0
- package/src/utils/cell-renderer.ts +59 -81
- package/src/utils/multiple-sequence-alignment.ts +8 -2
- package/src/utils/sequence-activity-cliffs.ts +5 -21
- package/src/viewers/vd-regions-viewer.ts +7 -0
- package/test-Bio-d4ef1f35c295-90ae719f.html +245 -0
- package/test-Bio-4f0c8bae6479-17115d45.html +0 -358
package/src/package.ts
CHANGED
|
@@ -15,7 +15,7 @@ import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
|
|
|
15
15
|
import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
|
|
16
16
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
17
17
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
18
|
-
import {
|
|
18
|
+
import {drawSequences, sequenceGetSimilarities} from './utils/sequence-activity-cliffs';
|
|
19
19
|
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
@@ -26,6 +26,10 @@ import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler'
|
|
|
26
26
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils'
|
|
27
27
|
|
|
28
28
|
|
|
29
|
+
//tags: init
|
|
30
|
+
export async function initBio() {
|
|
31
|
+
}
|
|
32
|
+
|
|
29
33
|
//name: fastaSequenceCellRenderer
|
|
30
34
|
//tags: cellRenderer
|
|
31
35
|
//meta.cellType: Sequence
|
|
@@ -46,15 +50,16 @@ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRender
|
|
|
46
50
|
|
|
47
51
|
function checkInputColumn(col: DG.Column, name: string,
|
|
48
52
|
allowedNotations: string[] = [], allowedAlphabets: string[] = []): boolean {
|
|
49
|
-
const
|
|
53
|
+
const notation: string = col.getTag(DG.TAGS.UNITS);
|
|
54
|
+
const alphabet: string = col.getTag('alphabet')
|
|
50
55
|
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
51
56
|
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
52
57
|
return false;
|
|
53
58
|
} else if (
|
|
54
59
|
(allowedAlphabets.length > 0 &&
|
|
55
|
-
!allowedAlphabets.some((a) =>
|
|
60
|
+
!allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))) ||
|
|
56
61
|
(allowedNotations.length > 0 &&
|
|
57
|
-
!allowedNotations.some((n) =>
|
|
62
|
+
!allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase())))
|
|
58
63
|
) {
|
|
59
64
|
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
60
65
|
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
@@ -107,7 +112,7 @@ export function vdRegionViewer() {
|
|
|
107
112
|
//input: double similarity = 80 [Similarity cutoff]
|
|
108
113
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
109
114
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
110
|
-
similarity: number, methodName: string): Promise<
|
|
115
|
+
similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
|
|
111
116
|
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
112
117
|
return;
|
|
113
118
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
@@ -117,8 +122,13 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
117
122
|
const options = {
|
|
118
123
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
119
124
|
};
|
|
120
|
-
const
|
|
121
|
-
|
|
125
|
+
const tags = {
|
|
126
|
+
'units': macroMolecule.tags['units'],
|
|
127
|
+
'aligned': macroMolecule.tags['aligned'],
|
|
128
|
+
'separator': macroMolecule.tags['separator'],
|
|
129
|
+
'alphabet': macroMolecule.tags['alphabet'],
|
|
130
|
+
}
|
|
131
|
+
const sp = await getActivityCliffs(
|
|
122
132
|
df,
|
|
123
133
|
macroMolecule,
|
|
124
134
|
encodedCol,
|
|
@@ -129,11 +139,12 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
129
139
|
'Levenshtein',
|
|
130
140
|
methodName,
|
|
131
141
|
DG.SEMTYPE.MACROMOLECULE,
|
|
132
|
-
|
|
142
|
+
tags,
|
|
133
143
|
sequenceSpace,
|
|
134
144
|
sequenceGetSimilarities,
|
|
135
|
-
|
|
145
|
+
drawSequences,
|
|
136
146
|
(options as any)[methodName]);
|
|
147
|
+
return sp;
|
|
137
148
|
}
|
|
138
149
|
|
|
139
150
|
//top-menu: Bio | Sequence Space...
|
|
@@ -144,7 +155,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
144
155
|
//input: string similarityMetric { choices:["Levenshtein", "Tanimoto"] }
|
|
145
156
|
//input: bool plotEmbeddings = true
|
|
146
157
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
147
|
-
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<
|
|
158
|
+
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer|undefined> {
|
|
148
159
|
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
149
160
|
return;
|
|
150
161
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
@@ -165,14 +176,16 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
165
176
|
for (const col of embeddings) {
|
|
166
177
|
const listValues = col.toList();
|
|
167
178
|
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
168
|
-
table.columns.add(DG.Column.
|
|
169
|
-
}
|
|
179
|
+
table.columns.add(DG.Column.fromList('double', col.name, listValues));
|
|
180
|
+
}
|
|
181
|
+
let sp;
|
|
170
182
|
if (plotEmbeddings) {
|
|
171
183
|
for (const v of grok.shell.views) {
|
|
172
184
|
if (v.name === table.name)
|
|
173
|
-
(v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
185
|
+
sp = (v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
174
186
|
}
|
|
175
187
|
}
|
|
188
|
+
return sp;
|
|
176
189
|
};
|
|
177
190
|
|
|
178
191
|
//top-menu: Bio | To Atomic Level...
|
|
@@ -188,22 +201,6 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
188
201
|
if (!checkInputColumn(macroMolecule, 'To Atomic Level'))
|
|
189
202
|
return;
|
|
190
203
|
|
|
191
|
-
let currentView: DG.TableView;
|
|
192
|
-
for (const view of grok.shell.tableViews) {
|
|
193
|
-
if (df.name === view.name)
|
|
194
|
-
currentView = view;
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
// Some hack to activate Chem Molecule rendering
|
|
198
|
-
const file2 = await _package.files.readAsText('tests/sar-small.csv');
|
|
199
|
-
const df2 = DG.DataFrame.fromCsv(file2);
|
|
200
|
-
const v2 = grok.shell.addTableView(df2);
|
|
201
|
-
setTimeout(() => {
|
|
202
|
-
grok.shell.closeTable(df2);
|
|
203
|
-
v2.close();
|
|
204
|
-
grok.shell.v = currentView;
|
|
205
|
-
}, 100);
|
|
206
|
-
|
|
207
204
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
208
205
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
209
206
|
const atomicCodes = getMolfilesFromSeq(macroMolecule, monomersLibObject);
|
|
@@ -213,6 +210,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
213
210
|
col.semType = DG.SEMTYPE.MOLECULE;
|
|
214
211
|
col.tags[DG.TAGS.UNITS] = 'molblock';
|
|
215
212
|
df.columns.add(col, true);
|
|
213
|
+
await grok.data.detectSemanticTypes(df);
|
|
216
214
|
}
|
|
217
215
|
|
|
218
216
|
|
|
@@ -404,3 +402,4 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
404
402
|
resDf.name = `datasets_detectMacromolecule_${path}`;
|
|
405
403
|
return resDf;
|
|
406
404
|
}
|
|
405
|
+
|
|
@@ -1,58 +1,39 @@
|
|
|
1
|
-
import {after, before, category,
|
|
1
|
+
import {after, before, category, test} from '@datagrok-libraries/utils/src/test';
|
|
2
2
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
import * as grok from 'datagrok-api/grok';
|
|
5
5
|
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
|
-
import {
|
|
8
|
-
import {drawTooltip, sequenceGetSimilarities} from '../utils/sequence-activity-cliffs';
|
|
9
|
-
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
10
|
-
import {encodeMonomers} from '../utils/utils';
|
|
7
|
+
import { _testActivityCliffsOpen } from './activity-cliffs-utils';
|
|
11
8
|
|
|
12
9
|
|
|
13
10
|
category('activityCliffs', async () => {
|
|
14
11
|
let actCliffsTableView: DG.TableView;
|
|
15
12
|
let actCliffsDf: DG.DataFrame;
|
|
13
|
+
let actCliffsTableViewWithEmptyRows: DG.TableView;
|
|
14
|
+
let actCliffsDfWithEmptyRows: DG.DataFrame;
|
|
15
|
+
|
|
16
16
|
|
|
17
17
|
before(async () => {
|
|
18
18
|
actCliffsDf = await readDataframe('samples/sample_MSA.csv');
|
|
19
19
|
actCliffsTableView = grok.shell.addTableView(actCliffsDf);
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
actCliffsDfWithEmptyRows = await readDataframe('samples/sample_HELM_empty_vals.csv');
|
|
21
|
+
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
22
22
|
});
|
|
23
23
|
|
|
24
24
|
after(async () => {
|
|
25
25
|
grok.shell.closeTable(actCliffsDf);
|
|
26
26
|
actCliffsTableView.close();
|
|
27
|
+
grok.shell.closeTable(actCliffsDfWithEmptyRows);
|
|
28
|
+
actCliffsTableViewWithEmptyRows.close();
|
|
27
29
|
});
|
|
28
30
|
|
|
29
31
|
test('activityCliffsOpen', async () => {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
const encodedCol = encodeMonomers(actCliffsDf.col('MSA')!) as DG.Column;
|
|
36
|
-
const scatterPlot = await getActivityCliffs(
|
|
37
|
-
actCliffsDf,
|
|
38
|
-
actCliffsDf.col('MSA')!,
|
|
39
|
-
encodedCol,
|
|
40
|
-
axesNames,
|
|
41
|
-
'Activity cliffs',
|
|
42
|
-
actCliffsDf.col('Activity')!,
|
|
43
|
-
50,
|
|
44
|
-
'Levenshtein',
|
|
45
|
-
't-SNE',
|
|
46
|
-
DG.SEMTYPE.MACROMOLECULE,
|
|
47
|
-
units,
|
|
48
|
-
sequenceSpace,
|
|
49
|
-
sequenceGetSimilarities,
|
|
50
|
-
drawTooltip);
|
|
51
|
-
|
|
52
|
-
expect(scatterPlot != null, true);
|
|
53
|
-
|
|
54
|
-
const cliffsLink = (Array.from(scatterPlot.root.children) as Element[])
|
|
55
|
-
.filter((it) => it.className === 'ui-btn ui-btn-ok');
|
|
56
|
-
expect((cliffsLink[0] as HTMLElement).innerText, '2362 cliffs');
|
|
32
|
+
await _testActivityCliffsOpen(actCliffsDf, 53, 'UMAP', 'MSA');
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test('activityCliffsOpenWithEmptyRows', async () => {
|
|
36
|
+
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 53, 'UMAP', 'HELM');
|
|
57
37
|
});
|
|
38
|
+
|
|
58
39
|
});
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import {delay, expect} from '@datagrok-libraries/utils/src/test';
|
|
3
|
+
import {_package} from '../package-test';
|
|
4
|
+
import { activityCliffs } from '../package';
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: string, colName: string) {
|
|
8
|
+
const scatterPlot = await activityCliffs(
|
|
9
|
+
df,
|
|
10
|
+
df.col(colName)!,
|
|
11
|
+
df.col('Activity')!,
|
|
12
|
+
80,
|
|
13
|
+
method);
|
|
14
|
+
|
|
15
|
+
expect(scatterPlot != null, true);
|
|
16
|
+
|
|
17
|
+
const cliffsLink = Array.from(scatterPlot!.root.children).filter(it => it.className === 'ui-btn ui-btn-ok');
|
|
18
|
+
expect((cliffsLink[0] as HTMLElement).innerText, `${numberCliffs} cliffs`);
|
|
19
|
+
}
|
|
@@ -281,7 +281,7 @@ MWRSWY-CKHP
|
|
|
281
281
|
});
|
|
282
282
|
|
|
283
283
|
test('samplesHelmCsvHELM', async () => {
|
|
284
|
-
await _testPos(readSamples(Samples.helmCsv), 'HELM', '
|
|
284
|
+
await _testPos(readSamples(Samples.helmCsv), 'HELM', 'helm', null, null, null);
|
|
285
285
|
});
|
|
286
286
|
|
|
287
287
|
test('samplesHelmCsvNegativeActivity', async () => {
|
|
@@ -297,7 +297,7 @@ MWRSWY-CKHP
|
|
|
297
297
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
298
298
|
});
|
|
299
299
|
test('samplesTestHelmPositiveHelmString', async () => {
|
|
300
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', '
|
|
300
|
+
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'helm', null, null, null);
|
|
301
301
|
});
|
|
302
302
|
test('samplesTestHelmNegativeValid', async () => {
|
|
303
303
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {after, before, category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
1
|
+
import {after, before, category, delay, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
2
2
|
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
@@ -12,6 +12,7 @@ category('renderers', () => {
|
|
|
12
12
|
let dfList: DG.DataFrame[];
|
|
13
13
|
|
|
14
14
|
before(async () => {
|
|
15
|
+
await grok.functions.call('Bio:initBio');
|
|
15
16
|
tvList = [];
|
|
16
17
|
dfList = [];
|
|
17
18
|
});
|
|
@@ -46,16 +47,16 @@ category('renderers', () => {
|
|
|
46
47
|
expect(srcSeqCol!.getTag(DG.TAGS.UNITS), 'fasta');
|
|
47
48
|
expect(srcSeqCol!.getTag('aligned'), 'SEQ');
|
|
48
49
|
expect(srcSeqCol!.getTag('alphabet'), 'PT');
|
|
49
|
-
expect(srcSeqCol!.getTag('cell.renderer'), '
|
|
50
|
+
expect(srcSeqCol!.getTag('cell.renderer'), 'sequence');
|
|
50
51
|
|
|
51
52
|
const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
|
|
52
53
|
tv.grid.invalidate();
|
|
53
|
-
|
|
54
|
+
|
|
54
55
|
expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
55
56
|
expect(msaSeqCol!.getTag(DG.TAGS.UNITS), 'fasta');
|
|
56
57
|
expect(msaSeqCol!.getTag('aligned'), 'SEQ.MSA');
|
|
57
58
|
expect(msaSeqCol!.getTag('alphabet'), 'PT');
|
|
58
|
-
expect(msaSeqCol!.getTag('cell.renderer'), '
|
|
59
|
+
expect(msaSeqCol!.getTag('cell.renderer'), 'sequence');
|
|
59
60
|
|
|
60
61
|
dfList.push(df);
|
|
61
62
|
tvList.push(tv);
|
|
@@ -69,7 +70,7 @@ category('renderers', () => {
|
|
|
69
70
|
|
|
70
71
|
const srcCol: DG.Column = df.col('sequence')!;
|
|
71
72
|
const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
|
|
72
|
-
expect(tgtCol.getTag('cell.renderer'), '
|
|
73
|
+
expect(tgtCol.getTag('cell.renderer'), 'sequence');
|
|
73
74
|
|
|
74
75
|
tvList.push(tv);
|
|
75
76
|
dfList.push(df);
|
|
@@ -1,30 +1,35 @@
|
|
|
1
|
-
import {after, before, category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
1
|
+
import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
|
-
import {sequenceSpace} from '../utils/sequence-space';
|
|
4
3
|
import {readDataframe} from './utils';
|
|
5
4
|
import * as grok from 'datagrok-api/grok';
|
|
5
|
+
import { _testSequenceSpaceReturnsResult } from './sequence-space-utils';
|
|
6
6
|
|
|
7
7
|
category('sequenceSpace', async () => {
|
|
8
8
|
let testFastaDf: DG.DataFrame;
|
|
9
|
+
let testFastaTableView: DG.TableView;
|
|
10
|
+
let testHelmWithEmptyRows: DG.DataFrame;
|
|
11
|
+
let testHelmWithEmptyRowsTableView: DG.TableView;
|
|
9
12
|
|
|
10
13
|
before(async () => {
|
|
11
14
|
testFastaDf = await readDataframe('samples/sample_FASTA.csv');
|
|
12
|
-
|
|
15
|
+
testFastaTableView = grok.shell.addTableView(testFastaDf);
|
|
16
|
+
testHelmWithEmptyRows = await readDataframe('samples/sample_HELM_empty_vals.csv');
|
|
17
|
+
testHelmWithEmptyRowsTableView = grok.shell.addTableView(testHelmWithEmptyRows);
|
|
13
18
|
});
|
|
14
19
|
|
|
15
20
|
after(async () => {
|
|
16
21
|
grok.shell.closeTable(testFastaDf);
|
|
22
|
+
testFastaTableView.close();
|
|
23
|
+
grok.shell.closeTable(testHelmWithEmptyRows);
|
|
24
|
+
testHelmWithEmptyRowsTableView.close();
|
|
17
25
|
});
|
|
18
26
|
|
|
19
27
|
test('sequenceSpaceOpens', async () => {
|
|
20
|
-
|
|
21
|
-
seqCol: testFastaDf.col('Sequence')!,
|
|
22
|
-
methodName: 't-SNE',
|
|
23
|
-
similarityMetric: 'Levenshtein',
|
|
24
|
-
embedAxesNames: ['Embed_X', 'Embed_Y']
|
|
25
|
-
};
|
|
26
|
-
const res = await sequenceSpace(sequenceSpaceParams);
|
|
27
|
-
expect(res.coordinates != undefined, true);
|
|
28
|
-
expect(res.distance != undefined, true);
|
|
28
|
+
await _testSequenceSpaceReturnsResult(testFastaDf, 'UMAP', 'Sequence');
|
|
29
29
|
});
|
|
30
|
+
|
|
31
|
+
test('sequenceSpaceOpensWithEmptyRows', async () => {
|
|
32
|
+
await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, 'UMAP', 'HELM');
|
|
33
|
+
});
|
|
34
|
+
|
|
30
35
|
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import { expect } from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import { sequenceSpaceTopMenu } from '../package';
|
|
5
|
+
|
|
6
|
+
export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm: string, colName: string) {
|
|
7
|
+
await grok.data.detectSemanticTypes(df);
|
|
8
|
+
const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, 'Levenshtein', true);
|
|
9
|
+
expect(sp != null, true);
|
|
10
|
+
}
|
|
@@ -6,9 +6,9 @@ import {UnknownSeqPalette, UnknownSeqPalettes} from '@datagrok-libraries/bio/src
|
|
|
6
6
|
import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
7
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
8
8
|
import * as ui from 'datagrok-api/ui';
|
|
9
|
+
import {printLeftOrCentered} from '@datagrok-libraries/bio/src/utils/cell-renderer';
|
|
9
10
|
|
|
10
11
|
const undefinedColor = 'rgb(100,100,100)';
|
|
11
|
-
const grayColor = '#808080';
|
|
12
12
|
|
|
13
13
|
function getPalleteByType(paletteType: string): SeqPalette {
|
|
14
14
|
switch (paletteType) {
|
|
@@ -44,71 +44,6 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
44
44
|
}
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
/**
|
|
48
|
-
* A function that prints a string aligned to left or centered.
|
|
49
|
-
*
|
|
50
|
-
* @param {number} x x coordinate.
|
|
51
|
-
* @param {number} y y coordinate.
|
|
52
|
-
* @param {number} w Width.
|
|
53
|
-
* @param {number} h Height.
|
|
54
|
-
* @param {CanvasRenderingContext2D} g Canvas rendering context.
|
|
55
|
-
* @param {string} s String to print.
|
|
56
|
-
* @param {string} [color=undefinedColor] String color.
|
|
57
|
-
* @param {number} [pivot=0] Pirvot.
|
|
58
|
-
* @param {boolean} [left=false] Is left aligned.
|
|
59
|
-
* @param {number} [transparencyRate=0.0] Transparency rate where 1.0 is fully transparent
|
|
60
|
-
* @param {string} [separator=''] Is separator for sequence.
|
|
61
|
-
* @param {boolean} [last=false] Is checker if element last or not.
|
|
62
|
-
* @return {number} x coordinate to start printing at.
|
|
63
|
-
*/
|
|
64
|
-
export function printLeftOrCentered(
|
|
65
|
-
x: number, y: number, w: number, h: number,
|
|
66
|
-
g: CanvasRenderingContext2D, s: string, color = undefinedColor,
|
|
67
|
-
pivot: number = 0, left = false, transparencyRate: number = 1.0,
|
|
68
|
-
separator: string = '', last: boolean = false, drawStyle: string = 'classic', maxWord: any = {}, maxWordIdx: number = 0, gridCell: any = {}): number {
|
|
69
|
-
g.textAlign = 'start';
|
|
70
|
-
const colorPart = s.substring(0);
|
|
71
|
-
let grayPart = last ? '' : separator;
|
|
72
|
-
if (drawStyle === 'msa') {
|
|
73
|
-
grayPart = '';
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
let textSize: any = g.measureText(colorPart + grayPart);
|
|
77
|
-
const indent = 5;
|
|
78
|
-
|
|
79
|
-
let colorTextSize = g.measureText(colorPart).width;
|
|
80
|
-
const dy = (textSize.fontBoundingBoxAscent + textSize.fontBoundingBoxDescent) / 2;
|
|
81
|
-
textSize = textSize.width;
|
|
82
|
-
if (drawStyle === 'msa') {
|
|
83
|
-
if (colorTextSize > maxWord) {
|
|
84
|
-
maxWord[maxWordIdx] = colorTextSize;
|
|
85
|
-
gridCell.cell.column.temp = maxWord;
|
|
86
|
-
}
|
|
87
|
-
colorTextSize = maxWord[maxWordIdx];
|
|
88
|
-
textSize = maxWord[maxWordIdx];
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
function draw(dx1: number, dx2: number): void {
|
|
92
|
-
g.fillStyle = color;
|
|
93
|
-
g.globalAlpha = transparencyRate;
|
|
94
|
-
g.fillText(colorPart, x + dx1, y + dy);
|
|
95
|
-
if (drawStyle === 'classic') {
|
|
96
|
-
g.fillStyle = grayColor;
|
|
97
|
-
g.fillText(grayPart, x + dx2, y + dy);
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
if (left || textSize > w) {
|
|
102
|
-
draw(indent, indent + colorTextSize);
|
|
103
|
-
return x + colorTextSize + g.measureText(grayPart).width;
|
|
104
|
-
} else {
|
|
105
|
-
const dx = (w - textSize) / 2;
|
|
106
|
-
draw(dx, dx + colorTextSize);
|
|
107
|
-
return x + dx + colorTextSize;
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
|
|
112
47
|
export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
113
48
|
get name(): string { return 'sequence'; }
|
|
114
49
|
|
|
@@ -118,6 +53,45 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
118
53
|
|
|
119
54
|
get defaultWidth(): number { return 230; }
|
|
120
55
|
|
|
56
|
+
onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
|
|
57
|
+
if (gridCell.cell.column.getTag('aligned') !== 'SEQ.MSA') {
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
|
|
61
|
+
if (maxLengthWordsSum == null) {
|
|
62
|
+
gridCell.cell.column.setTag('.calculatedCellRender', 'unexist');
|
|
63
|
+
}
|
|
64
|
+
const maxIndex = gridCell.cell.column.temp['bio-maxIndex'];
|
|
65
|
+
//@ts-ignore
|
|
66
|
+
const argsX = e.layerX - gridCell.gridColumn.left - ((gridCell.bounds.x<0) ? gridCell.bounds.x : 0);
|
|
67
|
+
let left = 0;
|
|
68
|
+
let right = maxIndex;
|
|
69
|
+
let found = false;
|
|
70
|
+
maxLengthWordsSum[maxIndex + 1] = argsX + 1;
|
|
71
|
+
let mid = 0;
|
|
72
|
+
if (argsX > maxLengthWordsSum[0]) {
|
|
73
|
+
while (!found) {
|
|
74
|
+
mid = Math.floor((right + left) / 2);
|
|
75
|
+
if (argsX >= maxLengthWordsSum[mid] && argsX <= maxLengthWordsSum[mid + 1]) {
|
|
76
|
+
left = mid;
|
|
77
|
+
found = true;
|
|
78
|
+
} else if (argsX < maxLengthWordsSum[mid]) {
|
|
79
|
+
right = mid - 1;
|
|
80
|
+
} else if (argsX > maxLengthWordsSum[mid + 1]) {
|
|
81
|
+
left = mid + 1;
|
|
82
|
+
}
|
|
83
|
+
if (left == right) {
|
|
84
|
+
found = true;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
left = (argsX >= maxLengthWordsSum[left]) ? left + 1 : left;
|
|
89
|
+
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
90
|
+
const splitterFunc: SplitterFunc = WebLogo.getSplitter('separator', separator);
|
|
91
|
+
const subParts: string[] = splitterFunc(gridCell.cell.value);
|
|
92
|
+
ui.tooltip.show(ui.div(subParts[left]), e.x + 16, e.y + 16);
|
|
93
|
+
}
|
|
94
|
+
|
|
121
95
|
/**
|
|
122
96
|
* Cell renderer function.
|
|
123
97
|
*
|
|
@@ -136,7 +110,6 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
136
110
|
): void {
|
|
137
111
|
const grid = gridCell.gridRow !== -1 ? gridCell.grid : undefined;
|
|
138
112
|
const cell = gridCell.cell;
|
|
139
|
-
const tag = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
140
113
|
const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(':');
|
|
141
114
|
w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
142
115
|
g.save();
|
|
@@ -153,40 +126,46 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
153
126
|
const palette = getPalleteByType(paletteType);
|
|
154
127
|
|
|
155
128
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
156
|
-
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units,
|
|
129
|
+
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, separator);
|
|
157
130
|
|
|
158
131
|
const columns = gridCell.cell.column.categories;
|
|
159
132
|
let monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = WebLogo.monomerToShort;
|
|
160
133
|
let maxLengthOfMonomer = 8;
|
|
161
134
|
|
|
162
|
-
let maxLengthWords = {};
|
|
163
|
-
// check if gridCell.cell.column.temp is array
|
|
135
|
+
let maxLengthWords: any = {};
|
|
164
136
|
if (gridCell.cell.column.getTag('.calculatedCellRender') !== 'exist') {
|
|
165
137
|
for (let i = 0; i < columns.length; i++) {
|
|
166
138
|
let subParts: string[] = splitterFunc(columns[i]);
|
|
167
139
|
subParts.forEach((amino, index) => {
|
|
168
|
-
//@ts-ignore
|
|
169
140
|
let textSizeWidth = g.measureText(monomerToShortFunction(amino, maxLengthOfMonomer));
|
|
170
|
-
//@ts-ignore
|
|
171
141
|
if (textSizeWidth.width > (maxLengthWords[index] ?? 0)) {
|
|
172
|
-
//@ts-ignore
|
|
173
142
|
maxLengthWords[index] = textSizeWidth.width;
|
|
174
143
|
}
|
|
144
|
+
if (index > (maxLengthWords['bio-maxIndex'] ?? 0)) {
|
|
145
|
+
maxLengthWords['bio-maxIndex'] = index;
|
|
146
|
+
}
|
|
175
147
|
});
|
|
176
148
|
}
|
|
177
|
-
|
|
149
|
+
let maxLengthWordSum: any = {};
|
|
150
|
+
maxLengthWordSum[0] = maxLengthWords[0];
|
|
151
|
+
for (let i = 1; i <= maxLengthWords['bio-maxIndex']; i++) {
|
|
152
|
+
maxLengthWordSum[i] = maxLengthWordSum[i - 1] + maxLengthWords[i];
|
|
153
|
+
}
|
|
154
|
+
gridCell.cell.column.temp = {
|
|
155
|
+
'bio-sum-maxLengthWords': maxLengthWordSum,
|
|
156
|
+
'bio-maxIndex': maxLengthWords['bio-maxIndex'],
|
|
157
|
+
'bio-maxLengthWords': maxLengthWords
|
|
158
|
+
};
|
|
178
159
|
gridCell.cell.column.setTag('.calculatedCellRender', 'exist');
|
|
179
160
|
} else {
|
|
180
|
-
maxLengthWords = gridCell.cell.column.temp;
|
|
161
|
+
maxLengthWords = gridCell.cell.column.temp['bio-maxLengthWords'];
|
|
181
162
|
}
|
|
182
163
|
|
|
183
164
|
const subParts: string[] = splitterFunc(cell.value);
|
|
184
165
|
let x1 = x;
|
|
185
166
|
let color = undefinedColor;
|
|
186
|
-
// get max length word in subParts
|
|
187
|
-
let tagUnits = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
188
167
|
let drawStyle = 'classic';
|
|
189
|
-
if (
|
|
168
|
+
if (gridCell.cell.column.getTag('aligned').includes('MSA')) {
|
|
190
169
|
drawStyle = 'msa';
|
|
191
170
|
}
|
|
192
171
|
subParts.forEach((amino, index) => {
|
|
@@ -290,9 +269,8 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
|
290
269
|
// 28 is the height of the two substitutions on top of each other + space
|
|
291
270
|
const updatedY = Math.max(y, y + (h - 28) / 2);
|
|
292
271
|
|
|
293
|
-
let palette: SeqPalette = UnknownSeqPalettes.Color
|
|
294
|
-
|
|
295
|
-
palette = getPalleteByType(units.substring(units.length - 2));
|
|
272
|
+
let palette: SeqPalette = units == 'HELM' ? UnknownSeqPalettes.Color :
|
|
273
|
+
getPalleteByType(gridCell.tableColumn!.tags[C.TAGS.ALPHABET]);
|
|
296
274
|
|
|
297
275
|
const vShift = 7;
|
|
298
276
|
for (let i = 0; i < subParts1.length; i++) {
|
|
@@ -57,9 +57,15 @@ export async function runKalign(srcCol: DG.Column, isAligned = false, unUsedName
|
|
|
57
57
|
|
|
58
58
|
// units
|
|
59
59
|
const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
|
|
60
|
-
|
|
60
|
+
//aligned
|
|
61
|
+
const srcAligned = srcCol.getTag('aligned');
|
|
62
|
+
const tgtAligned = srcAligned + '.MSA';
|
|
63
|
+
//alphabet
|
|
64
|
+
const srcAlphabet = srcCol.getTag('alphabet');
|
|
61
65
|
|
|
62
|
-
tgtCol.setTag(DG.TAGS.UNITS,
|
|
66
|
+
tgtCol.setTag(DG.TAGS.UNITS, srcUnits);
|
|
67
|
+
tgtCol.setTag('aligned', tgtAligned);
|
|
68
|
+
tgtCol.setTag('alphabet', srcAlphabet);
|
|
63
69
|
tgtCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
64
70
|
return tgtCol;
|
|
65
71
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {ITooltipAndPanelParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
@@ -12,25 +12,9 @@ export async function sequenceGetSimilarities(col: DG.Column, seq: string): Prom
|
|
|
12
12
|
return DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'distances', distances);
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
export function
|
|
16
|
-
params.
|
|
17
|
-
|
|
18
|
-
ui.divText(
|
|
19
|
-
ui.divText(params.activity.name),
|
|
20
|
-
]);
|
|
21
|
-
columnNames.style.fontWeight = 'bold';
|
|
22
|
-
columnNames.style.display = 'flex';
|
|
23
|
-
columnNames.style.justifyContent = 'space-between';
|
|
24
|
-
params.tooltips[params.line.id].append(columnNames);
|
|
25
|
-
params.line.mols.forEach((mol: number) => {
|
|
26
|
-
const seq = ui.divText(params.df.get(params.seqCol.name, mol));
|
|
27
|
-
const activity = ui.divText(params.df.get(params.activity.name, mol).toFixed(2));
|
|
28
|
-
activity.style.display = 'flex';
|
|
29
|
-
activity.style.justifyContent = 'left';
|
|
30
|
-
activity.style.paddingLeft = '30px';
|
|
31
|
-
params.tooltips[params.line.id].append(ui.divV([
|
|
32
|
-
seq,
|
|
33
|
-
activity,
|
|
34
|
-
], {style: {paddingLeft: '5px'}}));
|
|
15
|
+
export function drawSequences(params: ITooltipAndPanelParams) {
|
|
16
|
+
params.line.mols.forEach((mol: number, index: number) => {
|
|
17
|
+
ui.empty(params.hosts[index]);
|
|
18
|
+
params.hosts[index].append(ui.divText(params.seqCol.get(mol)));
|
|
35
19
|
});
|
|
36
20
|
}
|
|
@@ -51,6 +51,7 @@ export class VdRegionsViewer extends DG.JsViewer {
|
|
|
51
51
|
public sequenceColumnNamePostfix: string;
|
|
52
52
|
|
|
53
53
|
public skipEmptyPositions: boolean;
|
|
54
|
+
public positionWidth: number;
|
|
54
55
|
|
|
55
56
|
|
|
56
57
|
public get df(): DG.DataFrame {
|
|
@@ -77,6 +78,7 @@ export class VdRegionsViewer extends DG.JsViewer {
|
|
|
77
78
|
this.sequenceColumnNamePostfix = this.string('sequenceColumnNamePostfix', 'chain sequence');
|
|
78
79
|
|
|
79
80
|
this.skipEmptyPositions = this.bool('skipEmptyPositions', false);
|
|
81
|
+
this.positionWidth = this.float('positionWidth', 16);
|
|
80
82
|
}
|
|
81
83
|
|
|
82
84
|
public async init() {
|
|
@@ -135,6 +137,10 @@ export class VdRegionsViewer extends DG.JsViewer {
|
|
|
135
137
|
await this.destroyView();
|
|
136
138
|
await this.buildView();
|
|
137
139
|
break;
|
|
140
|
+
case 'positionWidth':
|
|
141
|
+
await this.destroyView();
|
|
142
|
+
await this.buildView();
|
|
143
|
+
break;
|
|
138
144
|
}
|
|
139
145
|
}
|
|
140
146
|
}
|
|
@@ -205,6 +211,7 @@ export class VdRegionsViewer extends DG.JsViewer {
|
|
|
205
211
|
endPositionName: region!.positionEndName,
|
|
206
212
|
fixWidth: true,
|
|
207
213
|
skipEmptyPositions: this.skipEmptyPositions,
|
|
214
|
+
positionWidth: this.positionWidth,
|
|
208
215
|
})) as unknown as WebLogo;
|
|
209
216
|
}
|
|
210
217
|
// WebLogo creation fires onRootSizeChanged event even before control being added to this.logos
|