@datagrok/bio 1.10.2 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +45 -20
- package/dist/package-test.js +342 -157
- package/dist/package.js +196 -106
- package/package.json +4 -8
- package/src/package-test.ts +1 -0
- package/src/package.ts +69 -27
- package/src/tests/WebLogo-positions-test.ts +10 -9
- package/src/tests/checkInputColumn-tests.ts +69 -0
- package/src/tests/detectors-test.ts +3 -3
- package/src/tests/renderers-test.ts +12 -12
- package/src/tests/splitters-test.ts +15 -0
- package/src/utils/cell-renderer.ts +2 -1
- package/src/utils/constants.ts +3 -5
- package/src/utils/convert.ts +1 -1
- package/src/utils/multiple-sequence-alignment.ts +5 -4
- package/src/utils/sequence-space.ts +1 -1
- package/src/utils/utils.ts +3 -2
- package/{test-Bio-eb4783c07294-0aa1538b.html → test-Bio-eb4783c07294-8e35df79.html} +6 -2
- package/src/utils/split-to-monomers.ts +0 -8
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "1.
|
|
8
|
+
"version": "1.11.0",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,12 +14,12 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": ">=2.4.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^4.
|
|
17
|
+
"@datagrok-libraries/bio": "^4.2.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.0",
|
|
19
19
|
"@datagrok-libraries/ml": "^4.0.0",
|
|
20
20
|
"@datagrok-libraries/utils": "^1.6.2",
|
|
21
21
|
"cash-dom": "latest",
|
|
22
|
-
"datagrok-api": "^1.
|
|
22
|
+
"datagrok-api": "^1.6.6",
|
|
23
23
|
"dayjs": "^1.11.4",
|
|
24
24
|
"openchemlib": "6.0.1",
|
|
25
25
|
"rxjs": "^6.5.5",
|
|
@@ -69,11 +69,7 @@
|
|
|
69
69
|
"Developers"
|
|
70
70
|
],
|
|
71
71
|
"sources": [
|
|
72
|
-
"css/helm.css"
|
|
73
|
-
"https://ajax.googleapis.com/ajax/libs/dojo/1.10.4/dojo/dojo.js",
|
|
74
|
-
"helm/JSDraw/Scilligence.JSDraw2.Lite.js",
|
|
75
|
-
"helm/JSDraw/Scilligence.JSDraw2.Resources.js",
|
|
76
|
-
"helm/JSDraw/Pistoia.HELM-uncompressed.js"
|
|
72
|
+
"css/helm.css"
|
|
77
73
|
],
|
|
78
74
|
"category": "Bioinformatics"
|
|
79
75
|
}
|
package/src/package-test.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -30,6 +30,9 @@ import {
|
|
|
30
30
|
performanceTest
|
|
31
31
|
} from './tests/test-sequnces-generators';
|
|
32
32
|
|
|
33
|
+
import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
|
|
34
|
+
import * as C from './utils/constants';
|
|
35
|
+
|
|
33
36
|
//tags: init
|
|
34
37
|
export async function initBio() {
|
|
35
38
|
}
|
|
@@ -63,29 +66,50 @@ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRender
|
|
|
63
66
|
return new MacromoleculeSequenceCellRenderer();
|
|
64
67
|
}
|
|
65
68
|
|
|
66
|
-
function
|
|
67
|
-
allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
68
|
-
|
|
69
|
-
const
|
|
69
|
+
function checkInputColumnUi(
|
|
70
|
+
col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
71
|
+
): boolean {
|
|
72
|
+
const [res, msg]: [boolean, string] = checkInputColumn(col, name, allowedNotations, allowedAlphabets);
|
|
73
|
+
if (!res)
|
|
74
|
+
grok.shell.warning(msg);
|
|
75
|
+
return res;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function checkInputColumn(
|
|
79
|
+
col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
80
|
+
): [boolean, string] {
|
|
81
|
+
let res: boolean = true;
|
|
82
|
+
let msg: string = '';
|
|
83
|
+
|
|
84
|
+
const uh = new UnitsHandler(col);
|
|
70
85
|
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
71
86
|
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
72
|
-
|
|
73
|
-
} else
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
87
|
+
res = false;
|
|
88
|
+
} else {
|
|
89
|
+
const notation: string = uh.notation;
|
|
90
|
+
if (allowedNotations.length > 0 &&
|
|
91
|
+
!allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase()))
|
|
92
|
+
) {
|
|
93
|
+
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
94
|
+
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
95
|
+
msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
|
|
96
|
+
res = false;
|
|
97
|
+
} else if (!uh.isHelm()) {
|
|
98
|
+
// alphabet is not specified for 'helm' notation
|
|
99
|
+
const alphabet: string = uh.alphabet;
|
|
100
|
+
if (
|
|
101
|
+
allowedAlphabets.length > 0 &&
|
|
102
|
+
!allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))
|
|
103
|
+
) {
|
|
104
|
+
const alphabetAdd = allowedAlphabets.length == 0 ? 'any alphabet' :
|
|
105
|
+
(`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
106
|
+
msg = `${name} + ' analysis is allowed for Macromolecules with alphabet ${alphabetAdd}.`;
|
|
107
|
+
res = false;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
86
110
|
}
|
|
87
111
|
|
|
88
|
-
return
|
|
112
|
+
return [res, msg];
|
|
89
113
|
}
|
|
90
114
|
|
|
91
115
|
//name: sequenceAlignment
|
|
@@ -128,7 +152,7 @@ export function vdRegionViewer() {
|
|
|
128
152
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
129
153
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
130
154
|
similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
|
|
131
|
-
if (!
|
|
155
|
+
if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
|
|
132
156
|
return;
|
|
133
157
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
134
158
|
if (!encodedCol)
|
|
@@ -171,7 +195,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
171
195
|
//input: bool plotEmbeddings = true
|
|
172
196
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
173
197
|
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer | undefined> {
|
|
174
|
-
if (!
|
|
198
|
+
if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
|
|
175
199
|
return;
|
|
176
200
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
177
201
|
if (!encodedCol)
|
|
@@ -213,7 +237,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
213
237
|
grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
|
|
214
238
|
return;
|
|
215
239
|
}
|
|
216
|
-
if (!
|
|
240
|
+
if (!checkInputColumnUi(macroMolecule, 'To Atomic Level'))
|
|
217
241
|
return;
|
|
218
242
|
|
|
219
243
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
@@ -232,10 +256,12 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
232
256
|
//top-menu: Bio | MSA...
|
|
233
257
|
//name: MSA
|
|
234
258
|
//input: dataframe table
|
|
235
|
-
//input: column sequence { semType: Macromolecule }
|
|
259
|
+
//input: column sequence { semType: Macromolecule, units: ['fasta'], alphabet: ['DNA', 'RNA', 'PT'] }
|
|
236
260
|
//output: column result
|
|
237
261
|
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column | null> {
|
|
238
|
-
|
|
262
|
+
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
263
|
+
|
|
264
|
+
if (!checkInputColumnUi(col, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
|
|
239
265
|
return null;
|
|
240
266
|
|
|
241
267
|
const unUsedName = table.columns.getUnusedName(`msa(${col.name})`);
|
|
@@ -276,7 +302,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
276
302
|
});
|
|
277
303
|
|
|
278
304
|
const handler = async (col: DG.Column) => {
|
|
279
|
-
if (!
|
|
305
|
+
if (!checkInputColumnUi(col, 'Composition'))
|
|
280
306
|
return;
|
|
281
307
|
|
|
282
308
|
const wlViewer = tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
@@ -391,11 +417,11 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
391
417
|
//console.warn(`file: ${fileInfo.path}, column: ${col.name}, ` +
|
|
392
418
|
// `semType: ${semType}, units: ${col.getTag(DG.TAGS.UNITS)}`);
|
|
393
419
|
// console.warn('file: "' + fileInfo.path + '", semType: "' + semType + '", ' +
|
|
394
|
-
// 'units: "' + col.getTag(
|
|
420
|
+
// 'units: "' + col.getTag(DG.TAGS.UNITS) + '"');
|
|
395
421
|
|
|
396
422
|
res.push({
|
|
397
423
|
file: fileInfo.path, result: 'detected', column: col.name,
|
|
398
|
-
message: `units: ${col.getTag(
|
|
424
|
+
message: `units: ${col.getTag(DG.TAGS.UNITS)}`
|
|
399
425
|
});
|
|
400
426
|
}
|
|
401
427
|
}
|
|
@@ -418,3 +444,19 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
418
444
|
return resDf;
|
|
419
445
|
}
|
|
420
446
|
|
|
447
|
+
//name: Bio | Split to monomers
|
|
448
|
+
//tags: panel, bio
|
|
449
|
+
//input: column col {semType: Macromolecule}
|
|
450
|
+
export function splitToMonomers(col: DG.Column<string>): void {
|
|
451
|
+
if (!col.getTag(UnitsHandler.TAGS.aligned).includes(C.MSA))
|
|
452
|
+
return grok.shell.error('Splitting is applicable only for aligned sequences');
|
|
453
|
+
|
|
454
|
+
const tempDf = splitAlignedSequences(col);
|
|
455
|
+
const originalDf = col.dataFrame;
|
|
456
|
+
for (const tempCol of tempDf.columns) {
|
|
457
|
+
const newCol = originalDf.columns.add(tempCol);
|
|
458
|
+
newCol.semType = C.SEM_TYPES.MONOMER;
|
|
459
|
+
// newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
|
|
460
|
+
newCol.setTag(C.TAGS.ALPHABET, col.getTag(C.TAGS.ALPHABET));
|
|
461
|
+
}
|
|
462
|
+
}
|
|
@@ -5,6 +5,7 @@ import * as ui from 'datagrok-api/ui';
|
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
import {PositionInfo, PositionMonomerInfo, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
7
|
import {Column} from 'datagrok-api/dg';
|
|
8
|
+
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
9
|
|
|
9
10
|
category('WebLogo-positions', () => {
|
|
10
11
|
let tvList: DG.TableView[];
|
|
@@ -35,9 +36,9 @@ ATC-G-TTGC--
|
|
|
35
36
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
36
37
|
|
|
37
38
|
const seqCol: DG.Column = df.getCol('seq');
|
|
38
|
-
seqCol.semType =
|
|
39
|
-
seqCol.setTag(
|
|
40
|
-
seqCol.setTag(
|
|
39
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
40
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
41
|
+
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
41
42
|
|
|
42
43
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
|
|
43
44
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
@@ -84,9 +85,9 @@ ATC-G-TTGC--
|
|
|
84
85
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
85
86
|
|
|
86
87
|
const seqCol: DG.Column = df.getCol('seq');
|
|
87
|
-
seqCol.semType =
|
|
88
|
-
seqCol.setTag(
|
|
89
|
-
seqCol.setTag(
|
|
88
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
89
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
90
|
+
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
90
91
|
|
|
91
92
|
df.filter.init((i) => {
|
|
92
93
|
return i > 2;
|
|
@@ -129,9 +130,9 @@ ATC-G-TTGC--
|
|
|
129
130
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
130
131
|
|
|
131
132
|
const seqCol: DG.Column = df.getCol('seq');
|
|
132
|
-
seqCol.semType =
|
|
133
|
-
seqCol.setTag(
|
|
134
|
-
seqCol.setTag(
|
|
133
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
134
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
135
|
+
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
135
136
|
|
|
136
137
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
|
|
137
138
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
import {checkInputColumn} from '../package';
|
|
8
|
+
import {UNITS} from 'datagrok-api/dg';
|
|
9
|
+
import {ALPHABET, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
category('checkInputColumn', () => {
|
|
13
|
+
|
|
14
|
+
const csv = `seq
|
|
15
|
+
seq1,
|
|
16
|
+
seq2,
|
|
17
|
+
seq3,
|
|
18
|
+
seq4`;
|
|
19
|
+
|
|
20
|
+
category('MSA', () => {
|
|
21
|
+
|
|
22
|
+
test('testMsaPos', async () => {
|
|
23
|
+
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
24
|
+
const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
25
|
+
|
|
26
|
+
let k = 11;
|
|
27
|
+
|
|
28
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
29
|
+
const col: DG.Column = df.getCol('seq');
|
|
30
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
31
|
+
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
32
|
+
col.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
33
|
+
|
|
34
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
35
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
36
|
+
|
|
37
|
+
expect(res, true);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test('testMsaNegHelm', async () => {
|
|
41
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
42
|
+
const col: DG.Column = df.getCol('seq');
|
|
43
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
44
|
+
col.setTag(DG.TAGS.UNITS, 'helm');
|
|
45
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
|
|
46
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
|
|
47
|
+
|
|
48
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
49
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
50
|
+
|
|
51
|
+
expect(res, false);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test('testMsaNegUN', async () => {
|
|
55
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
56
|
+
const col: DG.Column = df.getCol('seq');
|
|
57
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
58
|
+
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
59
|
+
col.setTag(UnitsHandler.TAGS.alphabet, 'UN');
|
|
60
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
|
|
61
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
|
|
62
|
+
|
|
63
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
64
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
65
|
+
|
|
66
|
+
expect(res, false);
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
});
|
|
@@ -400,10 +400,10 @@ export async function _testPos(
|
|
|
400
400
|
|
|
401
401
|
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
402
402
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
403
|
-
expect(col.getTag(
|
|
404
|
-
expect(col.getTag(
|
|
403
|
+
expect(col.getTag(UnitsHandler.TAGS.aligned), aligned);
|
|
404
|
+
expect(col.getTag(UnitsHandler.TAGS.alphabet), alphabet);
|
|
405
405
|
if (separator)
|
|
406
|
-
expect(col.getTag(
|
|
406
|
+
expect(col.getTag(UnitsHandler.TAGS.separator), separator);
|
|
407
407
|
|
|
408
408
|
const uh = new UnitsHandler(col);
|
|
409
409
|
expect(uh.getAlphabetSize(), alphabetSize);
|
|
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
7
|
import {convertDo} from '../utils/convert';
|
|
8
|
-
import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
|
+
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
9
|
|
|
10
10
|
category('renderers', () => {
|
|
11
11
|
let tvList: DG.TableView[];
|
|
@@ -42,21 +42,21 @@ category('renderers', () => {
|
|
|
42
42
|
|
|
43
43
|
console.log('Bio: tests/renderers/afterMsa, src before test ' +
|
|
44
44
|
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
|
|
45
|
-
`cell.renderer="${srcSeqCol!.getTag(
|
|
45
|
+
`cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
|
|
46
46
|
expect(srcSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
47
|
-
expect(srcSeqCol!.getTag(DG.TAGS.UNITS),
|
|
48
|
-
expect(srcSeqCol!.getTag(
|
|
49
|
-
expect(srcSeqCol!.getTag(
|
|
50
|
-
expect(srcSeqCol!.getTag(
|
|
47
|
+
expect(srcSeqCol!.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
48
|
+
expect(srcSeqCol!.getTag(UnitsHandler.TAGS.aligned), 'SEQ');
|
|
49
|
+
expect(srcSeqCol!.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
|
|
50
|
+
expect(srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
51
51
|
|
|
52
52
|
const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
|
|
53
53
|
tv.grid.invalidate();
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
56
|
-
expect(msaSeqCol!.getTag(DG.TAGS.UNITS),
|
|
57
|
-
expect(msaSeqCol!.getTag(
|
|
58
|
-
expect(msaSeqCol!.getTag(
|
|
59
|
-
expect(msaSeqCol!.getTag(
|
|
56
|
+
expect(msaSeqCol!.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
57
|
+
expect(msaSeqCol!.getTag(UnitsHandler.TAGS.aligned), 'SEQ.MSA');
|
|
58
|
+
expect(msaSeqCol!.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
|
|
59
|
+
expect(msaSeqCol!.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
60
60
|
|
|
61
61
|
dfList.push(df);
|
|
62
62
|
tvList.push(tv);
|
|
@@ -70,7 +70,7 @@ category('renderers', () => {
|
|
|
70
70
|
|
|
71
71
|
const srcCol: DG.Column = df.col('sequence')!;
|
|
72
72
|
const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
|
|
73
|
-
expect(tgtCol.getTag(
|
|
73
|
+
expect(tgtCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
74
74
|
|
|
75
75
|
tvList.push(tv);
|
|
76
76
|
dfList.push(df);
|
|
@@ -4,6 +4,8 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
import * as ui from 'datagrok-api/ui';
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
import {WebLogo, SplitterFunc} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
+
import {splitToMonomers, _package} from '../package';
|
|
8
|
+
import * as C from '../utils/constants';
|
|
7
9
|
|
|
8
10
|
category('splitters', () => {
|
|
9
11
|
const helm1 = 'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$';
|
|
@@ -51,6 +53,19 @@ category('splitters', () => {
|
|
|
51
53
|
test('testHelm1', async () => { await _testHelmSplitter(data.testHelm1[0], data.testHelm1[1]); });
|
|
52
54
|
test('testHelm2', async () => { await _testHelmSplitter(data.testHelm2[0], data.testHelm2[1]); });
|
|
53
55
|
test('testHelm3', async () => { await _testHelmSplitter(data.testHelm3[0], data.testHelm3[1]); });
|
|
56
|
+
|
|
57
|
+
test('splitToMonomers', async () => {
|
|
58
|
+
const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/sample_MSA.csv');
|
|
59
|
+
|
|
60
|
+
const seqCol = df.getCol('MSA');
|
|
61
|
+
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
62
|
+
if (semType)
|
|
63
|
+
seqCol.semType = semType;
|
|
64
|
+
seqCol.setTag(C.TAGS.ALIGNED, C.MSA);
|
|
65
|
+
|
|
66
|
+
splitToMonomers(seqCol);
|
|
67
|
+
expect(df.columns.names().includes('17'), true);
|
|
68
|
+
});
|
|
54
69
|
});
|
|
55
70
|
|
|
56
71
|
export async function _testHelmSplitter(src: string, tgt: string[]) {
|
|
@@ -7,6 +7,7 @@ import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-log
|
|
|
7
7
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
8
8
|
import * as ui from 'datagrok-api/ui';
|
|
9
9
|
import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
|
|
10
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
11
|
|
|
11
12
|
const undefinedColor = 'rgb(100,100,100)';
|
|
12
13
|
const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = WebLogo.monomerToShort;
|
|
@@ -57,7 +58,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
57
58
|
get defaultWidth(): number { return 230; }
|
|
58
59
|
|
|
59
60
|
onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
|
|
60
|
-
if (gridCell.cell.column.getTag(
|
|
61
|
+
if (gridCell.cell.column.getTag(UnitsHandler.TAGS.aligned) !== 'SEQ.MSA') {
|
|
61
62
|
return;
|
|
62
63
|
}
|
|
63
64
|
const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
|
package/src/utils/constants.ts
CHANGED
|
@@ -9,17 +9,13 @@ export enum COLUMNS_NAMES {
|
|
|
9
9
|
MEAN_DIFFERENCE = 'Mean difference',
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
-
export enum CATEGORIES {
|
|
13
|
-
OTHER = 'Other',
|
|
14
|
-
ALL = 'All',
|
|
15
|
-
}
|
|
16
|
-
|
|
17
12
|
export enum TAGS {
|
|
18
13
|
AAR = 'AAR',
|
|
19
14
|
POSITION = 'Pos',
|
|
20
15
|
SEPARATOR = 'separator',
|
|
21
16
|
SELECTION = 'selection',
|
|
22
17
|
ALPHABET = 'alphabet',
|
|
18
|
+
ALIGNED = 'aligned',
|
|
23
19
|
}
|
|
24
20
|
|
|
25
21
|
export enum SEM_TYPES {
|
|
@@ -30,6 +26,8 @@ export enum SEM_TYPES {
|
|
|
30
26
|
MACROMOLECULE = 'Macromolecule',
|
|
31
27
|
}
|
|
32
28
|
|
|
29
|
+
export const MSA = 'MSA';
|
|
30
|
+
|
|
33
31
|
export const STATS = 'stats';
|
|
34
32
|
|
|
35
33
|
export const EMBEDDING_STATUS = 'embeddingStatus';
|
package/src/utils/convert.ts
CHANGED
|
@@ -9,6 +9,7 @@ import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler'
|
|
|
9
9
|
import Aioli from '@biowasm/aioli';
|
|
10
10
|
|
|
11
11
|
import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
|
|
12
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
12
13
|
|
|
13
14
|
/**
|
|
14
15
|
* Converts array of sequences into simple fasta string.
|
|
@@ -58,14 +59,14 @@ export async function runKalign(srcCol: DG.Column, isAligned = false, unUsedName
|
|
|
58
59
|
// units
|
|
59
60
|
const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
|
|
60
61
|
//aligned
|
|
61
|
-
const srcAligned = srcCol.getTag(
|
|
62
|
+
const srcAligned = srcCol.getTag(UnitsHandler.TAGS.aligned);
|
|
62
63
|
const tgtAligned = srcAligned + '.MSA';
|
|
63
64
|
//alphabet
|
|
64
|
-
const srcAlphabet = srcCol.getTag(
|
|
65
|
+
const srcAlphabet = srcCol.getTag(UnitsHandler.TAGS.alphabet);
|
|
65
66
|
|
|
66
67
|
tgtCol.setTag(DG.TAGS.UNITS, srcUnits);
|
|
67
|
-
tgtCol.setTag(
|
|
68
|
-
tgtCol.setTag(
|
|
68
|
+
tgtCol.setTag(UnitsHandler.TAGS.aligned, tgtAligned);
|
|
69
|
+
tgtCol.setTag(UnitsHandler.TAGS.alphabet, srcAlphabet);
|
|
69
70
|
tgtCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
70
71
|
return tgtCol;
|
|
71
72
|
}
|
|
@@ -16,7 +16,7 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
|
|
|
16
16
|
// code deprecated since seqCol is encoded
|
|
17
17
|
/* let preparedData: any;
|
|
18
18
|
if (!(spaceParams.seqCol!.tags[DG.TAGS.UNITS] === 'HELM')) {
|
|
19
|
-
const sep = spaceParams.seqCol.getTag(
|
|
19
|
+
const sep = spaceParams.seqCol.getTag(UnitsHandler.TAGS.separator);
|
|
20
20
|
const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
|
|
21
21
|
const regex = new RegExp(sepFinal, 'g');
|
|
22
22
|
if (Object.keys(AvailableMetrics['String']).includes(spaceParams.similarityMetric))
|
package/src/utils/utils.ts
CHANGED
|
@@ -5,6 +5,7 @@ import {
|
|
|
5
5
|
CAP_GROUP_NAME, CAP_GROUP_SMILES, jsonSdfMonomerLibDict, MONOMER_ENCODE_MAX, MONOMER_ENCODE_MIN, MONOMER_SYMBOL,
|
|
6
6
|
RGROUP_ALTER_ID, RGROUP_FIELD, RGROUP_LABEL, SDF_MONOMER_NAME
|
|
7
7
|
} from '../const';
|
|
8
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
9
|
|
|
9
10
|
export const HELM_CORE_LIB_FILENAME = '/samples/HELMCoreLibrary.json';
|
|
10
11
|
export const HELM_CORE_LIB_MONOMER_SYMBOL = 'symbol';
|
|
@@ -16,7 +17,7 @@ export function encodeMonomers(col: DG.Column): DG.Column | null {
|
|
|
16
17
|
let encodeSymbol = MONOMER_ENCODE_MIN;
|
|
17
18
|
const monomerSymbolDict: { [key: string]: number } = {};
|
|
18
19
|
const units = col.tags[DG.TAGS.UNITS];
|
|
19
|
-
const sep = col.getTag(
|
|
20
|
+
const sep = col.getTag(UnitsHandler.TAGS.separator);
|
|
20
21
|
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, sep);
|
|
21
22
|
const encodedStringArray = [];
|
|
22
23
|
for (let i = 0; i < col.length; ++i) {
|
|
@@ -25,7 +26,7 @@ export function encodeMonomers(col: DG.Column): DG.Column | null {
|
|
|
25
26
|
monomers.forEach((m) => {
|
|
26
27
|
if (!monomerSymbolDict[m]) {
|
|
27
28
|
if (encodeSymbol > MONOMER_ENCODE_MAX) {
|
|
28
|
-
grok.shell.error(`Not
|
|
29
|
+
grok.shell.error(`Not enough symbols to encode monomers`);
|
|
29
30
|
return null;
|
|
30
31
|
}
|
|
31
32
|
monomerSymbolDict[m] = encodeSymbol;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
<html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=eb4783c07294. Commit
|
|
1
|
+
<html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=eb4783c07294. Commit 8e35df79.</title><style type="text/css">html,
|
|
2
2
|
body {
|
|
3
3
|
font-family: Arial, Helvetica, sans-serif;
|
|
4
4
|
font-size: 1rem;
|
|
@@ -229,7 +229,7 @@ header {
|
|
|
229
229
|
font-size: 1rem;
|
|
230
230
|
padding: 0 0.5rem;
|
|
231
231
|
}
|
|
232
|
-
</style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=eb4783c07294. Commit
|
|
232
|
+
</style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=eb4783c07294. Commit 8e35df79.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-09-05 13:58:32</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed">1 passed</div><div class="summary-failed summary-empty">0 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed">1 passed</div><div class="summary-failed summary-empty">0 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">47.737s</div></div><div class="suite-tests"><div class="test-result passed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">passed</div><div class="test-duration">26.873s</div></div></div></div><div class="suite-consolelog"><div class="suite-consolelog-header">Console Log</div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at Object.<anonymous> (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:63:11)
|
|
233
233
|
at Generator.next (<anonymous>)
|
|
234
234
|
at fulfilled (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:28:58)
|
|
235
235
|
at processTicksAndRejections (internal/process/task_queues.js:97:5)</pre><pre class="suite-consolelog-item-message">Using web root: http://localhost:8080</pre></div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:40:11
|
|
@@ -309,6 +309,9 @@ Test result : Success : Bio.detectors.samplesTestDmvOfficesNegativeCity : OK
|
|
|
309
309
|
Test result : Success : Bio.detectors.samplesTestAlertCollectionNegativeSmarts : OK
|
|
310
310
|
Test result : Success : Bio.MSA.isCorrect : OK
|
|
311
311
|
Test result : Success : Bio.MSA.isCorrectLong : OK
|
|
312
|
+
Test result : Success : Bio.MSA.testMsaPos : OK
|
|
313
|
+
Test result : Success : Bio.MSA.testMsaNegHelm : OK
|
|
314
|
+
Test result : Success : Bio.MSA.testMsaNegUN : OK
|
|
312
315
|
Test result : Success : Bio.sequenceSpace.sequenceSpaceOpens : OK
|
|
313
316
|
Test result : Success : Bio.sequenceSpace.sequenceSpaceOpensWithEmptyRows : OK
|
|
314
317
|
Test result : Success : Bio.activityCliffs.activityCliffsOpen : OK
|
|
@@ -319,6 +322,7 @@ Test result : Success : Bio.splitters.helm3-multichar : OK
|
|
|
319
322
|
Test result : Success : Bio.splitters.testHelm1 : OK
|
|
320
323
|
Test result : Success : Bio.splitters.testHelm2 : OK
|
|
321
324
|
Test result : Success : Bio.splitters.testHelm3 : OK
|
|
325
|
+
Test result : Success : Bio.splitters.splitToMonomers : OK
|
|
322
326
|
Test result : Success : Bio.renderers.afterMsa : OK
|
|
323
327
|
Test result : Success : Bio.renderers.afterConvert : OK
|
|
324
328
|
Test result : Success : Bio.converters.testFastaPtToSeparator : OK
|