@datagrok/bio 1.11.0 → 1.11.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +114 -13
- package/dist/package-test.js +282 -206
- package/dist/package.js +197 -163
- package/package.json +10 -5
- package/src/package.ts +4 -3
- package/src/tests/checkInputColumn-tests.ts +41 -38
- package/src/tests/convert-test.ts +6 -3
- package/src/tests/renderers-test.ts +51 -17
- package/src/tests/sequence-space-utils.ts +8 -3
- package/src/tests/test-sequnces-generators.ts +16 -21
- package/src/utils/cell-renderer.ts +16 -16
- package/src/utils/convert.ts +4 -1
- package/src/utils/sequence-activity-cliffs.ts +106 -9
- package/test-Bio-a3ba57cf36f3-3bb13d3a.html +261 -0
- package/test-Bio-eb4783c07294-8e35df79.html +0 -359
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "1.11.
|
|
8
|
+
"version": "1.11.3",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,12 +14,12 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": ">=2.4.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^4.2.
|
|
17
|
+
"@datagrok-libraries/bio": "^4.2.1",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.0",
|
|
19
|
-
"@datagrok-libraries/ml": "^
|
|
19
|
+
"@datagrok-libraries/ml": "^6.0.0",
|
|
20
20
|
"@datagrok-libraries/utils": "^1.6.2",
|
|
21
21
|
"cash-dom": "latest",
|
|
22
|
-
"datagrok-api": "^1.6.
|
|
22
|
+
"datagrok-api": "^1.6.7",
|
|
23
23
|
"dayjs": "^1.11.4",
|
|
24
24
|
"openchemlib": "6.0.1",
|
|
25
25
|
"rxjs": "^6.5.5",
|
|
@@ -48,6 +48,7 @@
|
|
|
48
48
|
"link-api": "npm link datagrok-api",
|
|
49
49
|
"link-bio": "npm link @datagrok-libraries/bio",
|
|
50
50
|
"link-ml": "npm link @datagrok-libraries/ml",
|
|
51
|
+
"link-utils": "npm link @datagrok-libraries/utils",
|
|
51
52
|
"link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio @datagrok-libraries/ml",
|
|
52
53
|
"debug-sequences1": "grok publish",
|
|
53
54
|
"release-sequences1": "grok publish --release",
|
|
@@ -69,7 +70,11 @@
|
|
|
69
70
|
"Developers"
|
|
70
71
|
],
|
|
71
72
|
"sources": [
|
|
72
|
-
"css/helm.css"
|
|
73
|
+
"css/helm.css",
|
|
74
|
+
"https://ajax.googleapis.com/ajax/libs/dojo/1.10.4/dojo/dojo.js",
|
|
75
|
+
"helm/JSDraw/Scilligence.JSDraw2.Lite.js",
|
|
76
|
+
"helm/JSDraw/Scilligence.JSDraw2.Resources.js",
|
|
77
|
+
"helm/JSDraw/Pistoia.HELM-uncompressed.js"
|
|
73
78
|
],
|
|
74
79
|
"category": "Bioinformatics"
|
|
75
80
|
}
|
package/src/package.ts
CHANGED
|
@@ -15,7 +15,7 @@ import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
|
|
|
15
15
|
import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
|
|
16
16
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
17
17
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
18
|
-
import {
|
|
18
|
+
import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './utils/sequence-activity-cliffs';
|
|
19
19
|
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
@@ -180,8 +180,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
180
180
|
DG.SEMTYPE.MACROMOLECULE,
|
|
181
181
|
tags,
|
|
182
182
|
sequenceSpace,
|
|
183
|
-
|
|
184
|
-
|
|
183
|
+
getSimilaritiesMarix,
|
|
184
|
+
createTooltipElement,
|
|
185
|
+
createPropPanelElement,
|
|
185
186
|
(options as any)[methodName]);
|
|
186
187
|
return sp;
|
|
187
188
|
}
|
|
@@ -4,7 +4,7 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
import * as ui from 'datagrok-api/ui';
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
|
|
7
|
-
import {checkInputColumn} from '../package';
|
|
7
|
+
import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
|
|
8
8
|
import {UNITS} from 'datagrok-api/dg';
|
|
9
9
|
import {ALPHABET, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
10
|
|
|
@@ -17,53 +17,56 @@ seq2,
|
|
|
17
17
|
seq3,
|
|
18
18
|
seq4`;
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
test('testMsaPos', async () => {
|
|
21
|
+
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
22
|
+
const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
21
23
|
|
|
22
|
-
|
|
23
|
-
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
24
|
-
const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
24
|
+
let k = 11;
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
27
|
+
const col: DG.Column = df.getCol('seq');
|
|
28
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
29
|
+
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
30
|
+
col.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
27
31
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
31
|
-
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
32
|
-
col.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
32
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
33
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
33
34
|
|
|
34
|
-
|
|
35
|
-
|
|
35
|
+
expect(res, true);
|
|
36
|
+
});
|
|
36
37
|
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
test('testMsaNegHelm', async () => {
|
|
39
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
40
|
+
const col: DG.Column = df.getCol('seq');
|
|
41
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
42
|
+
col.setTag(DG.TAGS.UNITS, 'helm');
|
|
43
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
|
|
44
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
|
|
39
45
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
const col: DG.Column = df.getCol('seq');
|
|
43
|
-
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
44
|
-
col.setTag(DG.TAGS.UNITS, 'helm');
|
|
45
|
-
col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
|
|
46
|
-
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
|
|
46
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
47
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
47
48
|
|
|
48
|
-
|
|
49
|
-
|
|
49
|
+
expect(res, false);
|
|
50
|
+
});
|
|
50
51
|
|
|
51
|
-
|
|
52
|
-
|
|
52
|
+
test('testMsaNegUN', async () => {
|
|
53
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
54
|
+
const col: DG.Column = df.getCol('seq');
|
|
55
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
56
|
+
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
57
|
+
col.setTag(UnitsHandler.TAGS.alphabet, 'UN');
|
|
58
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
|
|
59
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
|
|
53
60
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
const col: DG.Column = df.getCol('seq');
|
|
57
|
-
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
58
|
-
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
59
|
-
col.setTag(UnitsHandler.TAGS.alphabet, 'UN');
|
|
60
|
-
col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
|
|
61
|
-
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
|
|
61
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
62
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
62
63
|
|
|
63
|
-
|
|
64
|
-
|
|
64
|
+
expect(res, false);
|
|
65
|
+
});
|
|
65
66
|
|
|
66
|
-
|
|
67
|
-
});
|
|
67
|
+
test('testGetActionFunctionMeta', async () => {
|
|
68
|
+
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
69
|
+
const sequenceInput: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
70
|
+
let k = 11;
|
|
68
71
|
});
|
|
69
72
|
});
|
|
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
|
|
6
6
|
import {ConverterFunc} from './types';
|
|
7
7
|
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
8
|
-
import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
|
+
import {NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
9
|
|
|
10
10
|
// import {mmSemType} from '../const';
|
|
11
11
|
// import {importFasta} from '../package';
|
|
@@ -145,14 +145,17 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
145
145
|
|
|
146
146
|
async function _testConvert(srcKey: string, converter: ConverterFunc, tgtKey: string) {
|
|
147
147
|
const srcDf: DG.DataFrame = await readCsv(srcKey);
|
|
148
|
-
const srcCol: DG.Column = srcDf.
|
|
148
|
+
const srcCol: DG.Column = srcDf.getCol('seq');
|
|
149
149
|
|
|
150
|
+
// conversion results
|
|
150
151
|
const resCol: DG.Column = converter(srcCol);
|
|
151
152
|
|
|
153
|
+
// The correct reference data to compare conversion results with.
|
|
152
154
|
const tgtDf: DG.DataFrame = await readCsv(tgtKey);
|
|
153
|
-
const tgtCol: DG.Column = tgtDf.
|
|
155
|
+
const tgtCol: DG.Column = tgtDf.getCol('seq');
|
|
154
156
|
|
|
155
157
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
158
|
+
const uh: UnitsHandler = new UnitsHandler(resCol);
|
|
156
159
|
}
|
|
157
160
|
|
|
158
161
|
// FASTA tests
|
|
@@ -3,9 +3,9 @@ import {after, before, category, delay, expect, test} from '@datagrok-libraries/
|
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
5
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
6
|
-
import {readDataframe} from './utils';
|
|
7
6
|
import {convertDo} from '../utils/convert';
|
|
8
7
|
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
|
+
import {SEM_TYPES, TAGS} from '../utils/constants';
|
|
9
9
|
|
|
10
10
|
category('renderers', () => {
|
|
11
11
|
let tvList: DG.TableView[];
|
|
@@ -30,33 +30,45 @@ category('renderers', () => {
|
|
|
30
30
|
await _testAfterConvert();
|
|
31
31
|
});
|
|
32
32
|
|
|
33
|
+
test('setRenderer', async () => {
|
|
34
|
+
await _setRendererManually();
|
|
35
|
+
});
|
|
36
|
+
|
|
33
37
|
async function _testAfterMsa() {
|
|
34
38
|
const fastaTxt: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
|
|
35
39
|
const df: DG.DataFrame = importFasta(fastaTxt)[0];
|
|
40
|
+
|
|
41
|
+
const srcSeqCol: DG.Column = df.getCol('sequence');
|
|
42
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcSeqCol});
|
|
43
|
+
if (semType)
|
|
44
|
+
srcSeqCol.semType = semType;
|
|
45
|
+
|
|
36
46
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
47
|
+
// call to calculate 'cell.renderer' tag
|
|
37
48
|
await grok.data.detectSemanticTypes(df);
|
|
38
|
-
console.log('Bio: tests/renderers/afterMsa, table view');
|
|
39
49
|
|
|
40
|
-
|
|
41
|
-
expect(srcSeqCol !== null, true);
|
|
50
|
+
console.log('Bio: tests/renderers/afterMsa, table view');
|
|
42
51
|
|
|
43
52
|
console.log('Bio: tests/renderers/afterMsa, src before test ' +
|
|
44
53
|
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
|
|
45
54
|
`cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
|
|
46
|
-
expect(srcSeqCol
|
|
47
|
-
expect(srcSeqCol
|
|
48
|
-
expect(srcSeqCol
|
|
49
|
-
expect(srcSeqCol
|
|
50
|
-
expect(srcSeqCol
|
|
55
|
+
expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
56
|
+
expect(srcSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
57
|
+
expect(srcSeqCol.getTag(UnitsHandler.TAGS.aligned), 'SEQ');
|
|
58
|
+
expect(srcSeqCol.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
|
|
59
|
+
expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
51
60
|
|
|
52
|
-
const msaSeqCol: DG.Column
|
|
61
|
+
const msaSeqCol: DG.Column = (await multipleSequenceAlignmentAny(df, srcSeqCol!))!;
|
|
53
62
|
tv.grid.invalidate();
|
|
54
63
|
|
|
55
|
-
expect(msaSeqCol
|
|
56
|
-
expect(msaSeqCol
|
|
57
|
-
expect(msaSeqCol
|
|
58
|
-
expect(msaSeqCol
|
|
59
|
-
expect(msaSeqCol
|
|
64
|
+
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
65
|
+
expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
66
|
+
expect(msaSeqCol.getTag(UnitsHandler.TAGS.aligned), 'SEQ.MSA');
|
|
67
|
+
expect(msaSeqCol.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
|
|
68
|
+
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
69
|
+
|
|
70
|
+
// check newColumn with UnitsHandler constructor
|
|
71
|
+
const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
|
|
60
72
|
|
|
61
73
|
dfList.push(df);
|
|
62
74
|
tvList.push(tv);
|
|
@@ -66,13 +78,35 @@ category('renderers', () => {
|
|
|
66
78
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA_PT.csv');
|
|
67
79
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
68
80
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
69
|
-
await grok.data.detectSemanticTypes(df);
|
|
70
81
|
|
|
71
82
|
const srcCol: DG.Column = df.col('sequence')!;
|
|
83
|
+
// await grok.data.detectSemanticTypes(df);
|
|
84
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
|
|
85
|
+
if (semType)
|
|
86
|
+
srcCol.semType = semType;
|
|
87
|
+
await grok.data.detectSemanticTypes(df);
|
|
88
|
+
|
|
72
89
|
const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
|
|
73
90
|
expect(tgtCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
74
91
|
|
|
92
|
+
// check tgtCol with UnitsHandler constructor
|
|
93
|
+
const uh: UnitsHandler = new UnitsHandler(tgtCol);
|
|
94
|
+
|
|
75
95
|
tvList.push(tv);
|
|
76
96
|
dfList.push(df);
|
|
77
|
-
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
async function _setRendererManually() {
|
|
100
|
+
const df = DG.DataFrame.fromColumns([DG.Column.fromStrings(
|
|
101
|
+
'SequencesDiff', ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV'])]);
|
|
102
|
+
df.col('SequencesDiff')!.tags[DG.TAGS.UNITS] = 'separator';
|
|
103
|
+
df.col('SequencesDiff')!.tags[TAGS.SEPARATOR] = '/';
|
|
104
|
+
df.col('SequencesDiff')!.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
105
|
+
const tw = grok.shell.addTableView(df);
|
|
106
|
+
await delay(100);
|
|
107
|
+
const renderer = tw.dataFrame.col('SequencesDiff')?.getTag(DG.TAGS.CELL_RENDERER);
|
|
108
|
+
if (renderer !== 'MacromoleculeDifferenceCR')
|
|
109
|
+
throw new Error(`Units 'separator', separator '/' and semType 'MacromoleculeDifference' have been ` +
|
|
110
|
+
`manually set on column but after df aws added as table view renderer has been reset to '${renderer}'`);
|
|
111
|
+
}
|
|
78
112
|
});
|
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
3
|
+
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {sequenceSpaceTopMenu} from '../package';
|
|
5
5
|
|
|
6
6
|
export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm: string, colName: string) {
|
|
7
|
-
await grok.data.detectSemanticTypes(df);
|
|
7
|
+
// await grok.data.detectSemanticTypes(df);
|
|
8
|
+
const col: DG.Column = df.getCol(colName);
|
|
9
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
10
|
+
if (semType)
|
|
11
|
+
col.semType = semType;
|
|
12
|
+
|
|
8
13
|
const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, 'Levenshtein', true);
|
|
9
14
|
expect(sp != null, true);
|
|
10
15
|
}
|
|
@@ -1,27 +1,22 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import {DataFrame} from 'datagrok-api/dg';
|
|
3
4
|
|
|
4
|
-
export function generateManySequences():
|
|
5
|
-
let
|
|
6
|
-
meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
}
|
|
10
|
-
return csvData;
|
|
5
|
+
export function generateManySequences(): DG.Column[] {
|
|
6
|
+
let columns: DG.Column[] = [];
|
|
7
|
+
columns.push(DG.Column.fromList('string', 'MSA', new Array(10 ** 6).fill('meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me')));
|
|
8
|
+
columns.push(DG.Column.fromList('string', 'Activity', new Array(10 ** 6).fill('5.30751')));
|
|
9
|
+
return columns;
|
|
11
10
|
}
|
|
12
11
|
|
|
13
|
-
export function generateLongSequence():
|
|
14
|
-
let
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
let csvData = `MSA,Activity `;
|
|
20
|
-
for (let i = 0; i <= 10 ** 1 * 4; i++) {
|
|
21
|
-
csvData += `\n ${longSequence}`;
|
|
22
|
-
}
|
|
23
|
-
return csvData;
|
|
12
|
+
export function generateLongSequence(): DG.Column[] {
|
|
13
|
+
let columns: DG.Column[] = [];
|
|
14
|
+
const longSequence = `meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr`.repeat(10 ** 5);
|
|
15
|
+
columns.push(DG.Column.fromList('string', 'MSA', new Array(10 ** 2).fill(longSequence)));
|
|
16
|
+
columns.push(DG.Column.fromList('string', 'Activity', new Array(10 ** 2).fill('7.30751')));
|
|
17
|
+
return columns;
|
|
24
18
|
}
|
|
19
|
+
|
|
25
20
|
export function setTagsMacromolecule(col: DG.Column) {
|
|
26
21
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
27
22
|
col.setTag('units', 'separator');
|
|
@@ -31,10 +26,10 @@ export function setTagsMacromolecule(col: DG.Column) {
|
|
|
31
26
|
return col;
|
|
32
27
|
}
|
|
33
28
|
|
|
34
|
-
export function performanceTest(generateFunc: () =>
|
|
29
|
+
export function performanceTest(generateFunc: () => DG.Column[], testName: string) {
|
|
30
|
+
const columns = generateFunc();
|
|
31
|
+
const df: DG.DataFrame = DG.DataFrame.fromColumns(columns);
|
|
35
32
|
const startTime: number = Date.now();
|
|
36
|
-
const csv = generateFunc();
|
|
37
|
-
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
38
33
|
const col: DG.Column = df.columns.byName('MSA');
|
|
39
34
|
setTagsMacromolecule(col);
|
|
40
35
|
grok.shell.addTableView(df);
|
|
@@ -2,7 +2,7 @@ import * as C from './constants';
|
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
4
4
|
import {NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
5
|
-
import {
|
|
5
|
+
import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
|
|
6
6
|
import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
7
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
8
8
|
import * as ui from 'datagrok-api/ui';
|
|
@@ -14,7 +14,7 @@ const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => str
|
|
|
14
14
|
const gapRenderer = 5;
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
function
|
|
17
|
+
function getPaletteByType(paletteType: string): SeqPalette {
|
|
18
18
|
switch (paletteType) {
|
|
19
19
|
case 'PT':
|
|
20
20
|
return AminoacidsPalettes.GrokGroups;
|
|
@@ -30,6 +30,10 @@ function getPalleteByType(paletteType: string): SeqPalette {
|
|
|
30
30
|
}
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
+
function getUpdatedWidth(grid: DG.Grid | null, g: CanvasRenderingContext2D, x: number, w: number): number {
|
|
34
|
+
return grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
35
|
+
}
|
|
36
|
+
|
|
33
37
|
export function processSequence(subParts: string[]): [string[], boolean] {
|
|
34
38
|
const simplified = !subParts.some((amino, index) =>
|
|
35
39
|
amino.length > 1 &&
|
|
@@ -63,8 +67,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
63
67
|
}
|
|
64
68
|
const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
|
|
65
69
|
const maxIndex = gridCell.cell.column.temp['bio-maxIndex'];
|
|
66
|
-
|
|
67
|
-
const argsX = e.layerX - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x);
|
|
70
|
+
const argsX = e.offsetX - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x);
|
|
68
71
|
let left = 0;
|
|
69
72
|
let right = maxIndex;
|
|
70
73
|
let found = false;
|
|
@@ -109,23 +112,22 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
109
112
|
g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
|
|
110
113
|
cellStyle: DG.GridCellStyle
|
|
111
114
|
): void {
|
|
112
|
-
const grid = gridCell.gridRow !== -1 ? gridCell.grid :
|
|
115
|
+
const grid = gridCell.gridRow !== -1 ? gridCell.grid : null;
|
|
113
116
|
const cell = gridCell.cell;
|
|
114
|
-
const
|
|
117
|
+
const paletteType = gridCell.cell.column.getTag(C.TAGS.ALPHABET);
|
|
115
118
|
const minDistanceRenderer = 50;
|
|
116
|
-
w =
|
|
119
|
+
w = getUpdatedWidth(grid, g, x, w);
|
|
117
120
|
g.save();
|
|
118
121
|
g.beginPath();
|
|
119
122
|
g.rect(x, y, w, h);
|
|
120
123
|
g.clip();
|
|
121
124
|
g.font = '12px monospace';
|
|
122
125
|
g.textBaseline = 'top';
|
|
123
|
-
const s: string = cell.value ?? '';
|
|
124
126
|
|
|
125
127
|
//TODO: can this be replaced/merged with splitSequence?
|
|
126
128
|
const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
127
129
|
|
|
128
|
-
const palette =
|
|
130
|
+
const palette = getPaletteByType(paletteType);
|
|
129
131
|
|
|
130
132
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
131
133
|
const splitLimit = gridCell.bounds.width / 5;
|
|
@@ -184,10 +186,8 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
184
186
|
g.fillStyle = undefinedColor;
|
|
185
187
|
let last = index === subParts.length - 1;
|
|
186
188
|
x1 = printLeftOrCentered(x1, y, w, h, g, monomerToShortFunction(amino, maxLengthOfMonomer), color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell);
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
}
|
|
190
|
-
return true;
|
|
189
|
+
return x1 - minDistanceRenderer - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x) <= gridCell.bounds.width;
|
|
190
|
+
|
|
191
191
|
});
|
|
192
192
|
|
|
193
193
|
g.restore();
|
|
@@ -226,7 +226,7 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
|
|
|
226
226
|
g.font = `12px monospace`;
|
|
227
227
|
g.textBaseline = 'top';
|
|
228
228
|
|
|
229
|
-
const palette =
|
|
229
|
+
const palette = getPaletteByType(gridCell.tableColumn!.tags[C.TAGS.ALPHABET]);
|
|
230
230
|
const s: string = gridCell.cell.value ? gridCell.cell.value : '-';
|
|
231
231
|
const color = palette.get(s);
|
|
232
232
|
|
|
@@ -262,7 +262,7 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
|
262
262
|
const grid = gridCell.grid;
|
|
263
263
|
const cell = gridCell.cell;
|
|
264
264
|
|
|
265
|
-
w =
|
|
265
|
+
w = getUpdatedWidth(grid, g, w, x);
|
|
266
266
|
g.save();
|
|
267
267
|
g.beginPath();
|
|
268
268
|
g.rect(x, y, w, h);
|
|
@@ -286,7 +286,7 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
|
286
286
|
|
|
287
287
|
let palette: SeqPalette = UnknownSeqPalettes.Color;
|
|
288
288
|
if (units != 'HELM')
|
|
289
|
-
palette =
|
|
289
|
+
palette = getPaletteByType(units.substring(units.length - 2));
|
|
290
290
|
|
|
291
291
|
const vShift = 7;
|
|
292
292
|
for (let i = 0; i < subParts1.length; i++) {
|
package/src/utils/convert.ts
CHANGED
|
@@ -5,7 +5,7 @@ import $ from 'cash-dom';
|
|
|
5
5
|
|
|
6
6
|
import {Subscription} from 'rxjs';
|
|
7
7
|
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
8
|
-
import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
|
+
import {NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
let convertDialog: DG.Dialog | null = null;
|
|
@@ -85,6 +85,9 @@ export async function convertDo(
|
|
|
85
85
|
const converter = new NotationConverter(srcCol);
|
|
86
86
|
const newColumn = converter.convert(targetNotation, separator);
|
|
87
87
|
srcCol.dataFrame.columns.add(newColumn);
|
|
88
|
+
|
|
89
|
+
// call to calculate 'cell.renderer' tag
|
|
88
90
|
await grok.data.detectSemanticTypes(srcCol.dataFrame);
|
|
91
|
+
|
|
89
92
|
return newColumn;
|
|
90
93
|
}
|
|
@@ -3,18 +3,115 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
5
5
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
6
|
+
import * as grok from 'datagrok-api/grok';
|
|
7
|
+
import { SplitterFunc, WebLogo } from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
8
|
+
import { UnitsHandler } from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
6
9
|
|
|
7
|
-
export async function
|
|
10
|
+
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
8
11
|
const stringArray = col.toList();
|
|
9
|
-
const distances = new Array(stringArray.length).fill(0
|
|
10
|
-
for (let i = 0; i < stringArray.length; ++i)
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
const distances = new Array(stringArray.length).fill(0);
|
|
13
|
+
for (let i = 0; i < stringArray.length; ++i) {
|
|
14
|
+
const distance = stringArray[i] ? AvailableMetrics['String']['Levenshtein'](stringArray[i], seq) : null;
|
|
15
|
+
distances[i] = distance ? distance/Math.max((stringArray[i] as string).length, seq.length) : null;
|
|
16
|
+
}
|
|
17
|
+
return distances;
|
|
13
18
|
}
|
|
14
19
|
|
|
15
|
-
export function
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
20
|
+
export async function getSimilaritiesMarix(dim: number, seqCol: DG.Column, df: DG.DataFrame, colName: string, simArr: DG.Column[])
|
|
21
|
+
: Promise<DG.Column[]> {
|
|
22
|
+
|
|
23
|
+
const distances = new Array(simArr.length).fill(null);
|
|
24
|
+
for (let i = 0; i != dim - 1; ++i) {
|
|
25
|
+
const seq: string = seqCol.get(i);
|
|
26
|
+
df.rows.removeAt(0, 1, false);
|
|
27
|
+
distances[i] = (await getDistances(df.col(colName)!, seq))!;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
for (let i = 0; i < distances.length; i++) {
|
|
31
|
+
for (let j = 0; j < distances[i].length; j++) {
|
|
32
|
+
distances[i][j] = getSimilarityFromDistance(distances[i][j]);
|
|
33
|
+
}
|
|
34
|
+
simArr[i] = DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'distances', distances[i]);
|
|
35
|
+
}
|
|
36
|
+
return simArr;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function createTooltipElement(params: ITooltipAndPanelParams): HTMLDivElement {
|
|
40
|
+
const tooltipElement = ui.divH([]);
|
|
41
|
+
const columnNames = ui.divV([
|
|
42
|
+
ui.divText(params.seqCol.name),
|
|
43
|
+
ui.divText(params.activityCol.name),
|
|
44
|
+
]);
|
|
45
|
+
columnNames.style.fontWeight = 'bold';
|
|
46
|
+
columnNames.style.display = 'flex';
|
|
47
|
+
columnNames.style.justifyContent = 'space-between';
|
|
48
|
+
tooltipElement.append(columnNames);
|
|
49
|
+
params.line.mols.forEach((molIdx: number, idx: number) => {
|
|
50
|
+
const activity = ui.divText(params.activityCol.get(molIdx).toFixed(2));
|
|
51
|
+
activity.style.display = 'flex';
|
|
52
|
+
activity.style.justifyContent = 'left';
|
|
53
|
+
activity.style.paddingLeft = '30px';
|
|
54
|
+
tooltipElement.append(ui.divV([
|
|
55
|
+
ui.divText(params.seqCol.get(molIdx)),
|
|
56
|
+
activity,
|
|
57
|
+
]));
|
|
19
58
|
});
|
|
59
|
+
return tooltipElement;
|
|
20
60
|
}
|
|
61
|
+
|
|
62
|
+
function moleculeInfo(df: DG.DataFrame, idx: number, seqColName: string): HTMLElement {
|
|
63
|
+
let dict: {[key: string]: string} = {};
|
|
64
|
+
for (let col of df.columns) {
|
|
65
|
+
if(col.name !== seqColName) {
|
|
66
|
+
dict[col.name] = df.get(col.name, idx);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return ui.tableFromMap(dict);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivElement {
|
|
74
|
+
const propPanel = ui.divV([]);
|
|
75
|
+
const columnNames = ui.divH([
|
|
76
|
+
ui.divText(params.seqCol.name),
|
|
77
|
+
ui.divText(params.activityCol.name),
|
|
78
|
+
]);
|
|
79
|
+
columnNames.style.fontWeight = 'bold';
|
|
80
|
+
columnNames.style.justifyContent = 'space-between';
|
|
81
|
+
propPanel.append(columnNames);
|
|
82
|
+
const hosts: HTMLDivElement[] = [];
|
|
83
|
+
params.line.mols.forEach((molIdx: number, hostIdx: number) => {
|
|
84
|
+
const activity = ui.divText(params.activityCol.get(molIdx).toFixed(2));
|
|
85
|
+
activity.style.paddingLeft = '15px';
|
|
86
|
+
activity.style.paddingLeft = '10px';
|
|
87
|
+
const molHost = ui.divText(params.seqCol.get(molIdx));
|
|
88
|
+
if (params.df.currentRowIdx === molIdx) {
|
|
89
|
+
molHost.style.border = 'solid 1px lightgrey';
|
|
90
|
+
}
|
|
91
|
+
//@ts-ignore
|
|
92
|
+
ui.tooltip.bind(molHost, () => moleculeInfo(params.df, molIdx, params.seqCol.name));
|
|
93
|
+
molHost.onclick = () => {
|
|
94
|
+
const obj = grok.shell.o;
|
|
95
|
+
molHost.style.border = 'solid 1px lightgrey';
|
|
96
|
+
params.df.currentRowIdx = molIdx;
|
|
97
|
+
hosts.forEach((h, i) => {
|
|
98
|
+
if (i !== hostIdx) {
|
|
99
|
+
h.style.border = '';
|
|
100
|
+
}
|
|
101
|
+
})
|
|
102
|
+
setTimeout(() => {
|
|
103
|
+
grok.shell.o = obj
|
|
104
|
+
}, 1000);
|
|
105
|
+
};
|
|
106
|
+
propPanel.append(ui.divH([
|
|
107
|
+
molHost,
|
|
108
|
+
activity,
|
|
109
|
+
]));
|
|
110
|
+
hosts.push(molHost);
|
|
111
|
+
});
|
|
112
|
+
propPanel.append(ui.divH([
|
|
113
|
+
ui.divText(`Cliff: `, {style: {fontWeight: 'bold', paddingRight: '5px'}}),
|
|
114
|
+
ui.divText(params.sali!.toFixed(2))
|
|
115
|
+
]));
|
|
116
|
+
return propPanel;
|
|
117
|
+
}
|