@datagrok/bio 1.10.2 → 1.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/detectors.js +45 -20
- package/dist/package-test.js +600 -323
- package/dist/package.js +404 -264
- package/package.json +5 -4
- package/src/package-test.ts +1 -0
- package/src/package.ts +73 -30
- package/src/tests/WebLogo-positions-test.ts +10 -9
- package/src/tests/checkInputColumn-tests.ts +72 -0
- package/src/tests/convert-test.ts +6 -3
- package/src/tests/detectors-test.ts +3 -3
- package/src/tests/renderers-test.ts +56 -22
- package/src/tests/sequence-space-utils.ts +8 -3
- package/src/tests/splitters-test.ts +15 -0
- package/src/tests/test-sequnces-generators.ts +16 -21
- package/src/utils/cell-renderer.ts +18 -17
- package/src/utils/constants.ts +3 -5
- package/src/utils/convert.ts +5 -2
- package/src/utils/multiple-sequence-alignment.ts +5 -4
- package/src/utils/sequence-activity-cliffs.ts +120 -8
- package/src/utils/sequence-space.ts +1 -1
- package/src/utils/utils.ts +3 -2
- package/test-Bio-f1ac5a5eade4-a0f7e8c0.html +261 -0
- package/src/utils/split-to-monomers.ts +0 -8
- package/test-Bio-eb4783c07294-0aa1538b.html +0 -355
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "1.
|
|
8
|
+
"version": "1.11.2",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,12 +14,12 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": ">=2.4.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^4.1
|
|
17
|
+
"@datagrok-libraries/bio": "^4.2.1",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.0",
|
|
19
|
-
"@datagrok-libraries/ml": "^
|
|
19
|
+
"@datagrok-libraries/ml": "^6.0.0",
|
|
20
20
|
"@datagrok-libraries/utils": "^1.6.2",
|
|
21
21
|
"cash-dom": "latest",
|
|
22
|
-
"datagrok-api": "^1.
|
|
22
|
+
"datagrok-api": "^1.6.7",
|
|
23
23
|
"dayjs": "^1.11.4",
|
|
24
24
|
"openchemlib": "6.0.1",
|
|
25
25
|
"rxjs": "^6.5.5",
|
|
@@ -48,6 +48,7 @@
|
|
|
48
48
|
"link-api": "npm link datagrok-api",
|
|
49
49
|
"link-bio": "npm link @datagrok-libraries/bio",
|
|
50
50
|
"link-ml": "npm link @datagrok-libraries/ml",
|
|
51
|
+
"link-utils": "npm link @datagrok-libraries/utils",
|
|
51
52
|
"link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio @datagrok-libraries/ml",
|
|
52
53
|
"debug-sequences1": "grok publish",
|
|
53
54
|
"release-sequences1": "grok publish --release",
|
package/src/package-test.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -15,7 +15,7 @@ import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
|
|
|
15
15
|
import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
|
|
16
16
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
17
17
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
18
|
-
import {
|
|
18
|
+
import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './utils/sequence-activity-cliffs';
|
|
19
19
|
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
@@ -30,6 +30,9 @@ import {
|
|
|
30
30
|
performanceTest
|
|
31
31
|
} from './tests/test-sequnces-generators';
|
|
32
32
|
|
|
33
|
+
import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
|
|
34
|
+
import * as C from './utils/constants';
|
|
35
|
+
|
|
33
36
|
//tags: init
|
|
34
37
|
export async function initBio() {
|
|
35
38
|
}
|
|
@@ -63,29 +66,50 @@ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRender
|
|
|
63
66
|
return new MacromoleculeSequenceCellRenderer();
|
|
64
67
|
}
|
|
65
68
|
|
|
66
|
-
function
|
|
67
|
-
allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
68
|
-
|
|
69
|
-
const
|
|
69
|
+
function checkInputColumnUi(
|
|
70
|
+
col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
71
|
+
): boolean {
|
|
72
|
+
const [res, msg]: [boolean, string] = checkInputColumn(col, name, allowedNotations, allowedAlphabets);
|
|
73
|
+
if (!res)
|
|
74
|
+
grok.shell.warning(msg);
|
|
75
|
+
return res;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function checkInputColumn(
|
|
79
|
+
col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
80
|
+
): [boolean, string] {
|
|
81
|
+
let res: boolean = true;
|
|
82
|
+
let msg: string = '';
|
|
83
|
+
|
|
84
|
+
const uh = new UnitsHandler(col);
|
|
70
85
|
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
71
86
|
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
72
|
-
|
|
73
|
-
} else
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
87
|
+
res = false;
|
|
88
|
+
} else {
|
|
89
|
+
const notation: string = uh.notation;
|
|
90
|
+
if (allowedNotations.length > 0 &&
|
|
91
|
+
!allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase()))
|
|
92
|
+
) {
|
|
93
|
+
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
94
|
+
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
95
|
+
msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
|
|
96
|
+
res = false;
|
|
97
|
+
} else if (!uh.isHelm()) {
|
|
98
|
+
// alphabet is not specified for 'helm' notation
|
|
99
|
+
const alphabet: string = uh.alphabet;
|
|
100
|
+
if (
|
|
101
|
+
allowedAlphabets.length > 0 &&
|
|
102
|
+
!allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))
|
|
103
|
+
) {
|
|
104
|
+
const alphabetAdd = allowedAlphabets.length == 0 ? 'any alphabet' :
|
|
105
|
+
(`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
106
|
+
msg = `${name} + ' analysis is allowed for Macromolecules with alphabet ${alphabetAdd}.`;
|
|
107
|
+
res = false;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
86
110
|
}
|
|
87
111
|
|
|
88
|
-
return
|
|
112
|
+
return [res, msg];
|
|
89
113
|
}
|
|
90
114
|
|
|
91
115
|
//name: sequenceAlignment
|
|
@@ -128,7 +152,7 @@ export function vdRegionViewer() {
|
|
|
128
152
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
129
153
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
130
154
|
similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
|
|
131
|
-
if (!
|
|
155
|
+
if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
|
|
132
156
|
return;
|
|
133
157
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
134
158
|
if (!encodedCol)
|
|
@@ -156,8 +180,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
156
180
|
DG.SEMTYPE.MACROMOLECULE,
|
|
157
181
|
tags,
|
|
158
182
|
sequenceSpace,
|
|
159
|
-
|
|
160
|
-
|
|
183
|
+
getSimilaritiesMarix,
|
|
184
|
+
createTooltipElement,
|
|
185
|
+
createPropPanelElement,
|
|
161
186
|
(options as any)[methodName]);
|
|
162
187
|
return sp;
|
|
163
188
|
}
|
|
@@ -171,7 +196,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
171
196
|
//input: bool plotEmbeddings = true
|
|
172
197
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
173
198
|
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer | undefined> {
|
|
174
|
-
if (!
|
|
199
|
+
if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
|
|
175
200
|
return;
|
|
176
201
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
177
202
|
if (!encodedCol)
|
|
@@ -213,7 +238,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
213
238
|
grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
|
|
214
239
|
return;
|
|
215
240
|
}
|
|
216
|
-
if (!
|
|
241
|
+
if (!checkInputColumnUi(macroMolecule, 'To Atomic Level'))
|
|
217
242
|
return;
|
|
218
243
|
|
|
219
244
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
@@ -232,10 +257,12 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
232
257
|
//top-menu: Bio | MSA...
|
|
233
258
|
//name: MSA
|
|
234
259
|
//input: dataframe table
|
|
235
|
-
//input: column sequence { semType: Macromolecule }
|
|
260
|
+
//input: column sequence { semType: Macromolecule, units: ['fasta'], alphabet: ['DNA', 'RNA', 'PT'] }
|
|
236
261
|
//output: column result
|
|
237
262
|
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column | null> {
|
|
238
|
-
|
|
263
|
+
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
264
|
+
|
|
265
|
+
if (!checkInputColumnUi(col, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
|
|
239
266
|
return null;
|
|
240
267
|
|
|
241
268
|
const unUsedName = table.columns.getUnusedName(`msa(${col.name})`);
|
|
@@ -276,7 +303,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
276
303
|
});
|
|
277
304
|
|
|
278
305
|
const handler = async (col: DG.Column) => {
|
|
279
|
-
if (!
|
|
306
|
+
if (!checkInputColumnUi(col, 'Composition'))
|
|
280
307
|
return;
|
|
281
308
|
|
|
282
309
|
const wlViewer = tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
@@ -391,11 +418,11 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
391
418
|
//console.warn(`file: ${fileInfo.path}, column: ${col.name}, ` +
|
|
392
419
|
// `semType: ${semType}, units: ${col.getTag(DG.TAGS.UNITS)}`);
|
|
393
420
|
// console.warn('file: "' + fileInfo.path + '", semType: "' + semType + '", ' +
|
|
394
|
-
// 'units: "' + col.getTag(
|
|
421
|
+
// 'units: "' + col.getTag(DG.TAGS.UNITS) + '"');
|
|
395
422
|
|
|
396
423
|
res.push({
|
|
397
424
|
file: fileInfo.path, result: 'detected', column: col.name,
|
|
398
|
-
message: `units: ${col.getTag(
|
|
425
|
+
message: `units: ${col.getTag(DG.TAGS.UNITS)}`
|
|
399
426
|
});
|
|
400
427
|
}
|
|
401
428
|
}
|
|
@@ -418,3 +445,19 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
418
445
|
return resDf;
|
|
419
446
|
}
|
|
420
447
|
|
|
448
|
+
//name: Bio | Split to monomers
|
|
449
|
+
//tags: panel, bio
|
|
450
|
+
//input: column col {semType: Macromolecule}
|
|
451
|
+
export function splitToMonomers(col: DG.Column<string>): void {
|
|
452
|
+
if (!col.getTag(UnitsHandler.TAGS.aligned).includes(C.MSA))
|
|
453
|
+
return grok.shell.error('Splitting is applicable only for aligned sequences');
|
|
454
|
+
|
|
455
|
+
const tempDf = splitAlignedSequences(col);
|
|
456
|
+
const originalDf = col.dataFrame;
|
|
457
|
+
for (const tempCol of tempDf.columns) {
|
|
458
|
+
const newCol = originalDf.columns.add(tempCol);
|
|
459
|
+
newCol.semType = C.SEM_TYPES.MONOMER;
|
|
460
|
+
// newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
|
|
461
|
+
newCol.setTag(C.TAGS.ALPHABET, col.getTag(C.TAGS.ALPHABET));
|
|
462
|
+
}
|
|
463
|
+
}
|
|
@@ -5,6 +5,7 @@ import * as ui from 'datagrok-api/ui';
|
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
import {PositionInfo, PositionMonomerInfo, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
7
|
import {Column} from 'datagrok-api/dg';
|
|
8
|
+
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
9
|
|
|
9
10
|
category('WebLogo-positions', () => {
|
|
10
11
|
let tvList: DG.TableView[];
|
|
@@ -35,9 +36,9 @@ ATC-G-TTGC--
|
|
|
35
36
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
36
37
|
|
|
37
38
|
const seqCol: DG.Column = df.getCol('seq');
|
|
38
|
-
seqCol.semType =
|
|
39
|
-
seqCol.setTag(
|
|
40
|
-
seqCol.setTag(
|
|
39
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
40
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
41
|
+
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
41
42
|
|
|
42
43
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
|
|
43
44
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
@@ -84,9 +85,9 @@ ATC-G-TTGC--
|
|
|
84
85
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
85
86
|
|
|
86
87
|
const seqCol: DG.Column = df.getCol('seq');
|
|
87
|
-
seqCol.semType =
|
|
88
|
-
seqCol.setTag(
|
|
89
|
-
seqCol.setTag(
|
|
88
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
89
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
90
|
+
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
90
91
|
|
|
91
92
|
df.filter.init((i) => {
|
|
92
93
|
return i > 2;
|
|
@@ -129,9 +130,9 @@ ATC-G-TTGC--
|
|
|
129
130
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
130
131
|
|
|
131
132
|
const seqCol: DG.Column = df.getCol('seq');
|
|
132
|
-
seqCol.semType =
|
|
133
|
-
seqCol.setTag(
|
|
134
|
-
seqCol.setTag(
|
|
133
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
134
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
135
|
+
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
135
136
|
|
|
136
137
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
|
|
137
138
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
|
|
8
|
+
import {UNITS} from 'datagrok-api/dg';
|
|
9
|
+
import {ALPHABET, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
category('checkInputColumn', () => {
|
|
13
|
+
|
|
14
|
+
const csv = `seq
|
|
15
|
+
seq1,
|
|
16
|
+
seq2,
|
|
17
|
+
seq3,
|
|
18
|
+
seq4`;
|
|
19
|
+
|
|
20
|
+
test('testMsaPos', async () => {
|
|
21
|
+
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
22
|
+
const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
23
|
+
|
|
24
|
+
let k = 11;
|
|
25
|
+
|
|
26
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
27
|
+
const col: DG.Column = df.getCol('seq');
|
|
28
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
29
|
+
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
30
|
+
col.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
31
|
+
|
|
32
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
33
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
34
|
+
|
|
35
|
+
expect(res, true);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test('testMsaNegHelm', async () => {
|
|
39
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
40
|
+
const col: DG.Column = df.getCol('seq');
|
|
41
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
42
|
+
col.setTag(DG.TAGS.UNITS, 'helm');
|
|
43
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
|
|
44
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
|
|
45
|
+
|
|
46
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
47
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
48
|
+
|
|
49
|
+
expect(res, false);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test('testMsaNegUN', async () => {
|
|
53
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
54
|
+
const col: DG.Column = df.getCol('seq');
|
|
55
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
56
|
+
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
57
|
+
col.setTag(UnitsHandler.TAGS.alphabet, 'UN');
|
|
58
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
|
|
59
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
|
|
60
|
+
|
|
61
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
62
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
63
|
+
|
|
64
|
+
expect(res, false);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
test('testGetActionFunctionMeta', async () => {
|
|
68
|
+
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
69
|
+
const sequenceInput: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
70
|
+
let k = 11;
|
|
71
|
+
});
|
|
72
|
+
});
|
|
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
|
|
6
6
|
import {ConverterFunc} from './types';
|
|
7
7
|
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
8
|
-
import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
|
+
import {NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
9
|
|
|
10
10
|
// import {mmSemType} from '../const';
|
|
11
11
|
// import {importFasta} from '../package';
|
|
@@ -145,14 +145,17 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
145
145
|
|
|
146
146
|
async function _testConvert(srcKey: string, converter: ConverterFunc, tgtKey: string) {
|
|
147
147
|
const srcDf: DG.DataFrame = await readCsv(srcKey);
|
|
148
|
-
const srcCol: DG.Column = srcDf.
|
|
148
|
+
const srcCol: DG.Column = srcDf.getCol('seq');
|
|
149
149
|
|
|
150
|
+
// conversion results
|
|
150
151
|
const resCol: DG.Column = converter(srcCol);
|
|
151
152
|
|
|
153
|
+
// The correct reference data to compare conversion results with.
|
|
152
154
|
const tgtDf: DG.DataFrame = await readCsv(tgtKey);
|
|
153
|
-
const tgtCol: DG.Column = tgtDf.
|
|
155
|
+
const tgtCol: DG.Column = tgtDf.getCol('seq');
|
|
154
156
|
|
|
155
157
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
158
|
+
const uh: UnitsHandler = new UnitsHandler(resCol);
|
|
156
159
|
}
|
|
157
160
|
|
|
158
161
|
// FASTA tests
|
|
@@ -400,10 +400,10 @@ export async function _testPos(
|
|
|
400
400
|
|
|
401
401
|
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
402
402
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
403
|
-
expect(col.getTag(
|
|
404
|
-
expect(col.getTag(
|
|
403
|
+
expect(col.getTag(UnitsHandler.TAGS.aligned), aligned);
|
|
404
|
+
expect(col.getTag(UnitsHandler.TAGS.alphabet), alphabet);
|
|
405
405
|
if (separator)
|
|
406
|
-
expect(col.getTag(
|
|
406
|
+
expect(col.getTag(UnitsHandler.TAGS.separator), separator);
|
|
407
407
|
|
|
408
408
|
const uh = new UnitsHandler(col);
|
|
409
409
|
expect(uh.getAlphabetSize(), alphabetSize);
|
|
@@ -3,9 +3,9 @@ import {after, before, category, delay, expect, test} from '@datagrok-libraries/
|
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
5
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
6
|
-
import {readDataframe} from './utils';
|
|
7
6
|
import {convertDo} from '../utils/convert';
|
|
8
|
-
import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
7
|
+
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
|
+
import {SEM_TYPES, TAGS} from '../utils/constants';
|
|
9
9
|
|
|
10
10
|
category('renderers', () => {
|
|
11
11
|
let tvList: DG.TableView[];
|
|
@@ -30,33 +30,45 @@ category('renderers', () => {
|
|
|
30
30
|
await _testAfterConvert();
|
|
31
31
|
});
|
|
32
32
|
|
|
33
|
+
test('setRenderer', async () => {
|
|
34
|
+
await _setRendererManually();
|
|
35
|
+
});
|
|
36
|
+
|
|
33
37
|
async function _testAfterMsa() {
|
|
34
38
|
const fastaTxt: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
|
|
35
39
|
const df: DG.DataFrame = importFasta(fastaTxt)[0];
|
|
40
|
+
|
|
41
|
+
const srcSeqCol: DG.Column = df.getCol('sequence');
|
|
42
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcSeqCol});
|
|
43
|
+
if (semType)
|
|
44
|
+
srcSeqCol.semType = semType;
|
|
45
|
+
|
|
36
46
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
47
|
+
// call to calculate 'cell.renderer' tag
|
|
37
48
|
await grok.data.detectSemanticTypes(df);
|
|
38
|
-
console.log('Bio: tests/renderers/afterMsa, table view');
|
|
39
49
|
|
|
40
|
-
|
|
41
|
-
expect(srcSeqCol !== null, true);
|
|
50
|
+
console.log('Bio: tests/renderers/afterMsa, table view');
|
|
42
51
|
|
|
43
52
|
console.log('Bio: tests/renderers/afterMsa, src before test ' +
|
|
44
53
|
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
|
|
45
|
-
`cell.renderer="${srcSeqCol!.getTag(
|
|
46
|
-
expect(srcSeqCol
|
|
47
|
-
expect(srcSeqCol
|
|
48
|
-
expect(srcSeqCol
|
|
49
|
-
expect(srcSeqCol
|
|
50
|
-
expect(srcSeqCol
|
|
51
|
-
|
|
52
|
-
const msaSeqCol: DG.Column
|
|
54
|
+
`cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
|
|
55
|
+
expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
56
|
+
expect(srcSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
57
|
+
expect(srcSeqCol.getTag(UnitsHandler.TAGS.aligned), 'SEQ');
|
|
58
|
+
expect(srcSeqCol.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
|
|
59
|
+
expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
60
|
+
|
|
61
|
+
const msaSeqCol: DG.Column = (await multipleSequenceAlignmentAny(df, srcSeqCol!))!;
|
|
53
62
|
tv.grid.invalidate();
|
|
54
|
-
|
|
55
|
-
expect(msaSeqCol
|
|
56
|
-
expect(msaSeqCol
|
|
57
|
-
expect(msaSeqCol
|
|
58
|
-
expect(msaSeqCol
|
|
59
|
-
expect(msaSeqCol
|
|
63
|
+
|
|
64
|
+
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
65
|
+
expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
66
|
+
expect(msaSeqCol.getTag(UnitsHandler.TAGS.aligned), 'SEQ.MSA');
|
|
67
|
+
expect(msaSeqCol.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
|
|
68
|
+
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
69
|
+
|
|
70
|
+
// check newColumn with UnitsHandler constructor
|
|
71
|
+
const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
|
|
60
72
|
|
|
61
73
|
dfList.push(df);
|
|
62
74
|
tvList.push(tv);
|
|
@@ -66,13 +78,35 @@ category('renderers', () => {
|
|
|
66
78
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA_PT.csv');
|
|
67
79
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
68
80
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
69
|
-
await grok.data.detectSemanticTypes(df);
|
|
70
81
|
|
|
71
82
|
const srcCol: DG.Column = df.col('sequence')!;
|
|
83
|
+
// await grok.data.detectSemanticTypes(df);
|
|
84
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
|
|
85
|
+
if (semType)
|
|
86
|
+
srcCol.semType = semType;
|
|
87
|
+
await grok.data.detectSemanticTypes(df);
|
|
88
|
+
|
|
72
89
|
const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
|
|
73
|
-
expect(tgtCol.getTag(
|
|
90
|
+
expect(tgtCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
91
|
+
|
|
92
|
+
// check tgtCol with UnitsHandler constructor
|
|
93
|
+
const uh: UnitsHandler = new UnitsHandler(tgtCol);
|
|
74
94
|
|
|
75
95
|
tvList.push(tv);
|
|
76
96
|
dfList.push(df);
|
|
77
|
-
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
async function _setRendererManually() {
|
|
100
|
+
const df = DG.DataFrame.fromColumns([DG.Column.fromStrings(
|
|
101
|
+
'SequencesDiff', ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV'])]);
|
|
102
|
+
df.col('SequencesDiff')!.tags[DG.TAGS.UNITS] = 'separator';
|
|
103
|
+
df.col('SequencesDiff')!.tags[TAGS.SEPARATOR] = '/';
|
|
104
|
+
df.col('SequencesDiff')!.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
105
|
+
const tw = grok.shell.addTableView(df);
|
|
106
|
+
await delay(100);
|
|
107
|
+
const renderer = tw.dataFrame.col('SequencesDiff')?.getTag(DG.TAGS.CELL_RENDERER);
|
|
108
|
+
if (renderer !== 'MacromoleculeDifferenceCR')
|
|
109
|
+
throw new Error(`Units 'separator', separator '/' and semType 'MacromoleculeDifference' have been ` +
|
|
110
|
+
`manually set on column but after df aws added as table view renderer has been reset to '${renderer}'`);
|
|
111
|
+
}
|
|
78
112
|
});
|
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
3
|
+
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {sequenceSpaceTopMenu} from '../package';
|
|
5
5
|
|
|
6
6
|
export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm: string, colName: string) {
|
|
7
|
-
await grok.data.detectSemanticTypes(df);
|
|
7
|
+
// await grok.data.detectSemanticTypes(df);
|
|
8
|
+
const col: DG.Column = df.getCol(colName);
|
|
9
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
10
|
+
if (semType)
|
|
11
|
+
col.semType = semType;
|
|
12
|
+
|
|
8
13
|
const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, 'Levenshtein', true);
|
|
9
14
|
expect(sp != null, true);
|
|
10
15
|
}
|
|
@@ -4,6 +4,8 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
import * as ui from 'datagrok-api/ui';
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
import {WebLogo, SplitterFunc} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
+
import {splitToMonomers, _package} from '../package';
|
|
8
|
+
import * as C from '../utils/constants';
|
|
7
9
|
|
|
8
10
|
category('splitters', () => {
|
|
9
11
|
const helm1 = 'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$';
|
|
@@ -51,6 +53,19 @@ category('splitters', () => {
|
|
|
51
53
|
test('testHelm1', async () => { await _testHelmSplitter(data.testHelm1[0], data.testHelm1[1]); });
|
|
52
54
|
test('testHelm2', async () => { await _testHelmSplitter(data.testHelm2[0], data.testHelm2[1]); });
|
|
53
55
|
test('testHelm3', async () => { await _testHelmSplitter(data.testHelm3[0], data.testHelm3[1]); });
|
|
56
|
+
|
|
57
|
+
test('splitToMonomers', async () => {
|
|
58
|
+
const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/sample_MSA.csv');
|
|
59
|
+
|
|
60
|
+
const seqCol = df.getCol('MSA');
|
|
61
|
+
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
62
|
+
if (semType)
|
|
63
|
+
seqCol.semType = semType;
|
|
64
|
+
seqCol.setTag(C.TAGS.ALIGNED, C.MSA);
|
|
65
|
+
|
|
66
|
+
splitToMonomers(seqCol);
|
|
67
|
+
expect(df.columns.names().includes('17'), true);
|
|
68
|
+
});
|
|
54
69
|
});
|
|
55
70
|
|
|
56
71
|
export async function _testHelmSplitter(src: string, tgt: string[]) {
|
|
@@ -1,27 +1,22 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import {DataFrame} from 'datagrok-api/dg';
|
|
3
4
|
|
|
4
|
-
export function generateManySequences():
|
|
5
|
-
let
|
|
6
|
-
meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
}
|
|
10
|
-
return csvData;
|
|
5
|
+
export function generateManySequences(): DG.Column[] {
|
|
6
|
+
let columns: DG.Column[] = [];
|
|
7
|
+
columns.push(DG.Column.fromList('string', 'MSA', new Array(10 ** 6).fill('meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me')));
|
|
8
|
+
columns.push(DG.Column.fromList('string', 'Activity', new Array(10 ** 6).fill('5.30751')));
|
|
9
|
+
return columns;
|
|
11
10
|
}
|
|
12
11
|
|
|
13
|
-
export function generateLongSequence():
|
|
14
|
-
let
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
let csvData = `MSA,Activity `;
|
|
20
|
-
for (let i = 0; i <= 10 ** 1 * 4; i++) {
|
|
21
|
-
csvData += `\n ${longSequence}`;
|
|
22
|
-
}
|
|
23
|
-
return csvData;
|
|
12
|
+
export function generateLongSequence(): DG.Column[] {
|
|
13
|
+
let columns: DG.Column[] = [];
|
|
14
|
+
const longSequence = `meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr`.repeat(10 ** 5);
|
|
15
|
+
columns.push(DG.Column.fromList('string', 'MSA', new Array(10 ** 2).fill(longSequence)));
|
|
16
|
+
columns.push(DG.Column.fromList('string', 'Activity', new Array(10 ** 2).fill('7.30751')));
|
|
17
|
+
return columns;
|
|
24
18
|
}
|
|
19
|
+
|
|
25
20
|
export function setTagsMacromolecule(col: DG.Column) {
|
|
26
21
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
27
22
|
col.setTag('units', 'separator');
|
|
@@ -31,10 +26,10 @@ export function setTagsMacromolecule(col: DG.Column) {
|
|
|
31
26
|
return col;
|
|
32
27
|
}
|
|
33
28
|
|
|
34
|
-
export function performanceTest(generateFunc: () =>
|
|
29
|
+
export function performanceTest(generateFunc: () => DG.Column[], testName: string) {
|
|
30
|
+
const columns = generateFunc();
|
|
31
|
+
const df: DG.DataFrame = DG.DataFrame.fromColumns(columns);
|
|
35
32
|
const startTime: number = Date.now();
|
|
36
|
-
const csv = generateFunc();
|
|
37
|
-
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
38
33
|
const col: DG.Column = df.columns.byName('MSA');
|
|
39
34
|
setTagsMacromolecule(col);
|
|
40
35
|
grok.shell.addTableView(df);
|