@datagrok/bio 1.9.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +77 -15
- package/dist/package-test.js +662 -5567
- package/dist/package.js +481 -5495
- package/package.json +8 -10
- package/src/package-test.ts +1 -0
- package/src/package.ts +94 -37
- package/src/tests/WebLogo-positions-test.ts +18 -13
- package/src/tests/checkInputColumn-tests.ts +69 -0
- package/src/tests/detectors-test.ts +38 -23
- package/src/tests/msa-tests.ts +2 -1
- package/src/tests/renderers-test.ts +12 -12
- package/src/tests/splitters-test.ts +15 -0
- package/src/tests/test-sequnces-generators.ts +45 -0
- package/src/utils/atomic-works.ts +2 -0
- package/src/utils/cell-renderer.ts +40 -24
- package/src/utils/constants.ts +3 -5
- package/src/utils/convert.ts +1 -1
- package/src/utils/multiple-sequence-alignment.ts +5 -4
- package/src/utils/sequence-space.ts +6 -4
- package/src/utils/utils.ts +3 -2
- package/src/widgets/representations.ts +1 -1
- package/test-Bio-eb4783c07294-8e35df79.html +359 -0
- package/webpack.config.js +1 -0
- package/src/utils/split-to-monomers.ts +0 -8
- package/test-Bio-d4ef1f35c295-90ae719f.html +0 -245
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "1.
|
|
8
|
+
"version": "1.11.0",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,16 +14,17 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": ">=2.4.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^
|
|
17
|
+
"@datagrok-libraries/bio": "^4.2.0",
|
|
18
|
+
"@datagrok-libraries/chem-meta": "1.0.0",
|
|
18
19
|
"@datagrok-libraries/ml": "^4.0.0",
|
|
19
20
|
"@datagrok-libraries/utils": "^1.6.2",
|
|
20
21
|
"cash-dom": "latest",
|
|
21
|
-
"datagrok-api": "^1.
|
|
22
|
+
"datagrok-api": "^1.6.6",
|
|
22
23
|
"dayjs": "^1.11.4",
|
|
23
24
|
"openchemlib": "6.0.1",
|
|
24
25
|
"rxjs": "^6.5.5",
|
|
25
|
-
"
|
|
26
|
-
"
|
|
26
|
+
"typescript": "^4.4.2",
|
|
27
|
+
"wu": "latest"
|
|
27
28
|
},
|
|
28
29
|
"devDependencies": {
|
|
29
30
|
"@types/jest": "^27.0.0",
|
|
@@ -35,6 +36,7 @@
|
|
|
35
36
|
"jest-html-reporter": "^3.6.0",
|
|
36
37
|
"puppeteer": "^13.7.0",
|
|
37
38
|
"ts-jest": "^27.0.0",
|
|
39
|
+
"ts-loader": "^9.2.5",
|
|
38
40
|
"webpack": "latest",
|
|
39
41
|
"webpack-cli": "^4.10.0",
|
|
40
42
|
"@types/js-yaml": "^4.0.5",
|
|
@@ -67,11 +69,7 @@
|
|
|
67
69
|
"Developers"
|
|
68
70
|
],
|
|
69
71
|
"sources": [
|
|
70
|
-
"css/helm.css"
|
|
71
|
-
"https://ajax.googleapis.com/ajax/libs/dojo/1.10.4/dojo/dojo.js",
|
|
72
|
-
"helm/JSDraw/Scilligence.JSDraw2.Lite.js",
|
|
73
|
-
"helm/JSDraw/Scilligence.JSDraw2.Resources.js",
|
|
74
|
-
"helm/JSDraw/Pistoia.HELM-uncompressed.js"
|
|
72
|
+
"css/helm.css"
|
|
75
73
|
],
|
|
76
74
|
"category": "Bioinformatics"
|
|
77
75
|
}
|
package/src/package-test.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -23,13 +23,31 @@ import {convert} from './utils/convert';
|
|
|
23
23
|
import {representationsWidget} from './widgets/representations';
|
|
24
24
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
25
25
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
26
|
-
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils'
|
|
26
|
+
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
27
|
+
import {
|
|
28
|
+
generateManySequences,
|
|
29
|
+
generateLongSequence,
|
|
30
|
+
performanceTest
|
|
31
|
+
} from './tests/test-sequnces-generators';
|
|
27
32
|
|
|
33
|
+
import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
|
|
34
|
+
import * as C from './utils/constants';
|
|
28
35
|
|
|
29
36
|
//tags: init
|
|
30
37
|
export async function initBio() {
|
|
31
38
|
}
|
|
32
39
|
|
|
40
|
+
//name: testManySequencesPerformance
|
|
41
|
+
export function testManySequencesPerformance(): void {
|
|
42
|
+
performanceTest(generateManySequences, 'Many sequences');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
//name: testLongSequencesPerformance
|
|
46
|
+
export function testLongSequencesPerformance(): void {
|
|
47
|
+
performanceTest(generateLongSequence, 'Long sequences');
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
33
51
|
//name: fastaSequenceCellRenderer
|
|
34
52
|
//tags: cellRenderer
|
|
35
53
|
//meta.cellType: Sequence
|
|
@@ -48,29 +66,50 @@ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRender
|
|
|
48
66
|
return new MacromoleculeSequenceCellRenderer();
|
|
49
67
|
}
|
|
50
68
|
|
|
51
|
-
function
|
|
52
|
-
allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
53
|
-
|
|
54
|
-
const
|
|
69
|
+
function checkInputColumnUi(
|
|
70
|
+
col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
71
|
+
): boolean {
|
|
72
|
+
const [res, msg]: [boolean, string] = checkInputColumn(col, name, allowedNotations, allowedAlphabets);
|
|
73
|
+
if (!res)
|
|
74
|
+
grok.shell.warning(msg);
|
|
75
|
+
return res;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function checkInputColumn(
|
|
79
|
+
col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
80
|
+
): [boolean, string] {
|
|
81
|
+
let res: boolean = true;
|
|
82
|
+
let msg: string = '';
|
|
83
|
+
|
|
84
|
+
const uh = new UnitsHandler(col);
|
|
55
85
|
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
56
86
|
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
57
|
-
|
|
58
|
-
} else
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
87
|
+
res = false;
|
|
88
|
+
} else {
|
|
89
|
+
const notation: string = uh.notation;
|
|
90
|
+
if (allowedNotations.length > 0 &&
|
|
91
|
+
!allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase()))
|
|
92
|
+
) {
|
|
93
|
+
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
94
|
+
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
95
|
+
msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
|
|
96
|
+
res = false;
|
|
97
|
+
} else if (!uh.isHelm()) {
|
|
98
|
+
// alphabet is not specified for 'helm' notation
|
|
99
|
+
const alphabet: string = uh.alphabet;
|
|
100
|
+
if (
|
|
101
|
+
allowedAlphabets.length > 0 &&
|
|
102
|
+
!allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))
|
|
103
|
+
) {
|
|
104
|
+
const alphabetAdd = allowedAlphabets.length == 0 ? 'any alphabet' :
|
|
105
|
+
(`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
106
|
+
msg = `${name} + ' analysis is allowed for Macromolecules with alphabet ${alphabetAdd}.`;
|
|
107
|
+
res = false;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
71
110
|
}
|
|
72
111
|
|
|
73
|
-
return
|
|
112
|
+
return [res, msg];
|
|
74
113
|
}
|
|
75
114
|
|
|
76
115
|
//name: sequenceAlignment
|
|
@@ -113,7 +152,7 @@ export function vdRegionViewer() {
|
|
|
113
152
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
114
153
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
115
154
|
similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
|
|
116
|
-
if (!
|
|
155
|
+
if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
|
|
117
156
|
return;
|
|
118
157
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
119
158
|
if (!encodedCol)
|
|
@@ -127,7 +166,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
127
166
|
'aligned': macroMolecule.tags['aligned'],
|
|
128
167
|
'separator': macroMolecule.tags['separator'],
|
|
129
168
|
'alphabet': macroMolecule.tags['alphabet'],
|
|
130
|
-
}
|
|
169
|
+
};
|
|
131
170
|
const sp = await getActivityCliffs(
|
|
132
171
|
df,
|
|
133
172
|
macroMolecule,
|
|
@@ -144,7 +183,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
144
183
|
sequenceGetSimilarities,
|
|
145
184
|
drawSequences,
|
|
146
185
|
(options as any)[methodName]);
|
|
147
|
-
|
|
186
|
+
return sp;
|
|
148
187
|
}
|
|
149
188
|
|
|
150
189
|
//top-menu: Bio | Sequence Space...
|
|
@@ -155,18 +194,18 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
155
194
|
//input: string similarityMetric { choices:["Levenshtein", "Tanimoto"] }
|
|
156
195
|
//input: bool plotEmbeddings = true
|
|
157
196
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
158
|
-
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer|undefined> {
|
|
159
|
-
if (!
|
|
197
|
+
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer | undefined> {
|
|
198
|
+
if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
|
|
160
199
|
return;
|
|
161
200
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
162
201
|
if (!encodedCol)
|
|
163
202
|
return;
|
|
164
203
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
165
|
-
const withoutEmptyValues = DG.DataFrame.fromColumns([
|
|
204
|
+
const withoutEmptyValues = DG.DataFrame.fromColumns([encodedCol]).clone();
|
|
166
205
|
const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, encodedCol);
|
|
167
206
|
|
|
168
207
|
const chemSpaceParams = {
|
|
169
|
-
seqCol: withoutEmptyValues.col(
|
|
208
|
+
seqCol: withoutEmptyValues.col(encodedCol.name)!,
|
|
170
209
|
methodName: methodName,
|
|
171
210
|
similarityMetric: similarityMetric,
|
|
172
211
|
embedAxesNames: embedColsNames
|
|
@@ -174,11 +213,11 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
174
213
|
const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
|
|
175
214
|
const embeddings = sequenceSpaceRes.coordinates;
|
|
176
215
|
for (const col of embeddings) {
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
216
|
+
const listValues = col.toList();
|
|
217
|
+
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
218
|
+
table.columns.add(DG.Column.fromList('double', col.name, listValues));
|
|
180
219
|
}
|
|
181
|
-
let sp;
|
|
220
|
+
let sp;
|
|
182
221
|
if (plotEmbeddings) {
|
|
183
222
|
for (const v of grok.shell.views) {
|
|
184
223
|
if (v.name === table.name)
|
|
@@ -198,7 +237,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
198
237
|
grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
|
|
199
238
|
return;
|
|
200
239
|
}
|
|
201
|
-
if (!
|
|
240
|
+
if (!checkInputColumnUi(macroMolecule, 'To Atomic Level'))
|
|
202
241
|
return;
|
|
203
242
|
|
|
204
243
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
@@ -217,10 +256,12 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
217
256
|
//top-menu: Bio | MSA...
|
|
218
257
|
//name: MSA
|
|
219
258
|
//input: dataframe table
|
|
220
|
-
//input: column sequence { semType: Macromolecule }
|
|
259
|
+
//input: column sequence { semType: Macromolecule, units: ['fasta'], alphabet: ['DNA', 'RNA', 'PT'] }
|
|
221
260
|
//output: column result
|
|
222
261
|
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column | null> {
|
|
223
|
-
|
|
262
|
+
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
263
|
+
|
|
264
|
+
if (!checkInputColumnUi(col, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
|
|
224
265
|
return null;
|
|
225
266
|
|
|
226
267
|
const unUsedName = table.columns.getUnusedName(`msa(${col.name})`);
|
|
@@ -261,7 +302,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
261
302
|
});
|
|
262
303
|
|
|
263
304
|
const handler = async (col: DG.Column) => {
|
|
264
|
-
if (!
|
|
305
|
+
if (!checkInputColumnUi(col, 'Composition'))
|
|
265
306
|
return;
|
|
266
307
|
|
|
267
308
|
const wlViewer = tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
@@ -376,11 +417,11 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
376
417
|
//console.warn(`file: ${fileInfo.path}, column: ${col.name}, ` +
|
|
377
418
|
// `semType: ${semType}, units: ${col.getTag(DG.TAGS.UNITS)}`);
|
|
378
419
|
// console.warn('file: "' + fileInfo.path + '", semType: "' + semType + '", ' +
|
|
379
|
-
// 'units: "' + col.getTag(
|
|
420
|
+
// 'units: "' + col.getTag(DG.TAGS.UNITS) + '"');
|
|
380
421
|
|
|
381
422
|
res.push({
|
|
382
423
|
file: fileInfo.path, result: 'detected', column: col.name,
|
|
383
|
-
message: `units: ${col.getTag(
|
|
424
|
+
message: `units: ${col.getTag(DG.TAGS.UNITS)}`
|
|
384
425
|
});
|
|
385
426
|
}
|
|
386
427
|
}
|
|
@@ -403,3 +444,19 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
403
444
|
return resDf;
|
|
404
445
|
}
|
|
405
446
|
|
|
447
|
+
//name: Bio | Split to monomers
|
|
448
|
+
//tags: panel, bio
|
|
449
|
+
//input: column col {semType: Macromolecule}
|
|
450
|
+
export function splitToMonomers(col: DG.Column<string>): void {
|
|
451
|
+
if (!col.getTag(UnitsHandler.TAGS.aligned).includes(C.MSA))
|
|
452
|
+
return grok.shell.error('Splitting is applicable only for aligned sequences');
|
|
453
|
+
|
|
454
|
+
const tempDf = splitAlignedSequences(col);
|
|
455
|
+
const originalDf = col.dataFrame;
|
|
456
|
+
for (const tempCol of tempDf.columns) {
|
|
457
|
+
const newCol = originalDf.columns.add(tempCol);
|
|
458
|
+
newCol.semType = C.SEM_TYPES.MONOMER;
|
|
459
|
+
// newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
|
|
460
|
+
newCol.setTag(C.TAGS.ALPHABET, col.getTag(C.TAGS.ALPHABET));
|
|
461
|
+
}
|
|
462
|
+
}
|
|
@@ -5,6 +5,7 @@ import * as ui from 'datagrok-api/ui';
|
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
import {PositionInfo, PositionMonomerInfo, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
7
|
import {Column} from 'datagrok-api/dg';
|
|
8
|
+
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
9
|
|
|
9
10
|
category('WebLogo-positions', () => {
|
|
10
11
|
let tvList: DG.TableView[];
|
|
@@ -12,11 +13,11 @@ category('WebLogo-positions', () => {
|
|
|
12
13
|
let currentView: DG.View;
|
|
13
14
|
|
|
14
15
|
const csvDf1 = `seq
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
16
|
+
ATC-G-TTGC--
|
|
17
|
+
ATC-G-TTGC--
|
|
18
|
+
-TC-G-TTGC--
|
|
19
|
+
-TC-GCTTGC--
|
|
20
|
+
-TC-GCTTGC--`;
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
before(async () => {
|
|
@@ -34,8 +35,10 @@ category('WebLogo-positions', () => {
|
|
|
34
35
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
35
36
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
36
37
|
|
|
37
|
-
df.getCol('seq')
|
|
38
|
-
|
|
38
|
+
const seqCol: DG.Column = df.getCol('seq');
|
|
39
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
40
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
41
|
+
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
39
42
|
|
|
40
43
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
|
|
41
44
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
@@ -81,10 +84,11 @@ category('WebLogo-positions', () => {
|
|
|
81
84
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf2);
|
|
82
85
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
83
86
|
|
|
84
|
-
df.getCol('seq')
|
|
85
|
-
|
|
87
|
+
const seqCol: DG.Column = df.getCol('seq');
|
|
88
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
89
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
90
|
+
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
86
91
|
|
|
87
|
-
let seq: Column = df.getCol('seq');
|
|
88
92
|
df.filter.init((i) => {
|
|
89
93
|
return i > 2;
|
|
90
94
|
});
|
|
@@ -125,8 +129,10 @@ category('WebLogo-positions', () => {
|
|
|
125
129
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
126
130
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
127
131
|
|
|
128
|
-
df.getCol('seq')
|
|
129
|
-
|
|
132
|
+
const seqCol: DG.Column = df.getCol('seq');
|
|
133
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
134
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
135
|
+
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
130
136
|
|
|
131
137
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
|
|
132
138
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
@@ -156,7 +162,6 @@ category('WebLogo-positions', () => {
|
|
|
156
162
|
expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
|
|
157
163
|
}
|
|
158
164
|
}
|
|
159
|
-
|
|
160
165
|
});
|
|
161
166
|
|
|
162
167
|
});
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
import {checkInputColumn} from '../package';
|
|
8
|
+
import {UNITS} from 'datagrok-api/dg';
|
|
9
|
+
import {ALPHABET, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
category('checkInputColumn', () => {
|
|
13
|
+
|
|
14
|
+
const csv = `seq
|
|
15
|
+
seq1,
|
|
16
|
+
seq2,
|
|
17
|
+
seq3,
|
|
18
|
+
seq4`;
|
|
19
|
+
|
|
20
|
+
category('MSA', () => {
|
|
21
|
+
|
|
22
|
+
test('testMsaPos', async () => {
|
|
23
|
+
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
24
|
+
const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
25
|
+
|
|
26
|
+
let k = 11;
|
|
27
|
+
|
|
28
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
29
|
+
const col: DG.Column = df.getCol('seq');
|
|
30
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
31
|
+
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
32
|
+
col.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
33
|
+
|
|
34
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
35
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
36
|
+
|
|
37
|
+
expect(res, true);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test('testMsaNegHelm', async () => {
|
|
41
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
42
|
+
const col: DG.Column = df.getCol('seq');
|
|
43
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
44
|
+
col.setTag(DG.TAGS.UNITS, 'helm');
|
|
45
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
|
|
46
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
|
|
47
|
+
|
|
48
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
49
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
50
|
+
|
|
51
|
+
expect(res, false);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test('testMsaNegUN', async () => {
|
|
55
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
56
|
+
const col: DG.Column = df.getCol('seq');
|
|
57
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
58
|
+
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
59
|
+
col.setTag(UnitsHandler.TAGS.alphabet, 'UN');
|
|
60
|
+
col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
|
|
61
|
+
col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
|
|
62
|
+
|
|
63
|
+
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
64
|
+
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
65
|
+
|
|
66
|
+
expect(res, false);
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
});
|
|
@@ -5,6 +5,7 @@ import * as ui from 'datagrok-api/ui';
|
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
|
|
7
7
|
import {importFasta} from '../package';
|
|
8
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
9
|
|
|
9
10
|
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
10
11
|
|
|
@@ -198,44 +199,44 @@ MWRSWY-CKHP
|
|
|
198
199
|
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
199
200
|
|
|
200
201
|
test('Dna1', async () => {
|
|
201
|
-
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA');
|
|
202
|
+
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA', 4, false);
|
|
202
203
|
});
|
|
203
204
|
test('Rna1', async () => {
|
|
204
|
-
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA');
|
|
205
|
+
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA', 4, false);
|
|
205
206
|
});
|
|
206
207
|
test('AA1', async () => {
|
|
207
|
-
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT');
|
|
208
|
+
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT', 20, false);
|
|
208
209
|
});
|
|
209
210
|
test('MsaDna1', async () => {
|
|
210
|
-
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA');
|
|
211
|
+
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA', 4, false);
|
|
211
212
|
});
|
|
212
213
|
|
|
213
214
|
test('MsaAA1', async () => {
|
|
214
|
-
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT');
|
|
215
|
+
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT', 20, false);
|
|
215
216
|
});
|
|
216
217
|
|
|
217
218
|
test('SepDna', async () => {
|
|
218
|
-
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', '*');
|
|
219
|
+
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', 4, false, '*');
|
|
219
220
|
});
|
|
220
221
|
test('SepRna', async () => {
|
|
221
|
-
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', '*');
|
|
222
|
+
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', 4, false, '*');
|
|
222
223
|
});
|
|
223
224
|
test('SepPt', async () => {
|
|
224
|
-
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', '-');
|
|
225
|
+
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', 20, false, '-');
|
|
225
226
|
});
|
|
226
227
|
test('SepUn1', async () => {
|
|
227
|
-
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', '-');
|
|
228
|
+
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', 8, true, '-');
|
|
228
229
|
});
|
|
229
230
|
test('SepUn2', async () => {
|
|
230
|
-
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', '/');
|
|
231
|
+
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', 9, true, '/');
|
|
231
232
|
});
|
|
232
233
|
|
|
233
234
|
test('SepMsaN1', async () => {
|
|
234
|
-
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', '-');
|
|
235
|
+
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', 4, false, '-');
|
|
235
236
|
});
|
|
236
237
|
|
|
237
238
|
test('SamplesFastaCsvPt', async () => {
|
|
238
|
-
await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
239
|
+
await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
|
|
239
240
|
});
|
|
240
241
|
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
241
242
|
await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
|
|
@@ -248,7 +249,7 @@ MWRSWY-CKHP
|
|
|
248
249
|
});
|
|
249
250
|
|
|
250
251
|
test('SamplesFastaFastaPt', async () => {
|
|
251
|
-
await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
252
|
+
await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
|
|
252
253
|
});
|
|
253
254
|
|
|
254
255
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
@@ -266,7 +267,7 @@ MWRSWY-CKHP
|
|
|
266
267
|
});
|
|
267
268
|
|
|
268
269
|
test('samplesMsaComplexUn', async () => {
|
|
269
|
-
await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', '/');
|
|
270
|
+
await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', 161, true, '/');
|
|
270
271
|
});
|
|
271
272
|
test('samplesMsaComplexNegativeActivity', async () => {
|
|
272
273
|
await _testNeg(readSamples(Samples.msaComplex), 'Activity');
|
|
@@ -281,7 +282,7 @@ MWRSWY-CKHP
|
|
|
281
282
|
});
|
|
282
283
|
|
|
283
284
|
test('samplesHelmCsvHELM', async () => {
|
|
284
|
-
await _testPos(readSamples(Samples.helmCsv), 'HELM', 'helm', null, null, null);
|
|
285
|
+
await _testPos(readSamples(Samples.helmCsv), 'HELM', 'helm', null, null, 160, true, null);
|
|
285
286
|
});
|
|
286
287
|
|
|
287
288
|
test('samplesHelmCsvNegativeActivity', async () => {
|
|
@@ -297,7 +298,7 @@ MWRSWY-CKHP
|
|
|
297
298
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
298
299
|
});
|
|
299
300
|
test('samplesTestHelmPositiveHelmString', async () => {
|
|
300
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'helm', null, null, null);
|
|
301
|
+
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'helm', null, null, 9, true, null);
|
|
301
302
|
});
|
|
302
303
|
test('samplesTestHelmNegativeValid', async () => {
|
|
303
304
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
@@ -329,7 +330,7 @@ MWRSWY-CKHP
|
|
|
329
330
|
});
|
|
330
331
|
|
|
331
332
|
test('samplesFastaPtPosSequence', async () => {
|
|
332
|
-
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
333
|
+
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
|
|
333
334
|
});
|
|
334
335
|
|
|
335
336
|
test('samplesTestCerealNegativeCerealName', async () => {
|
|
@@ -371,7 +372,8 @@ MWRSWY-CKHP
|
|
|
371
372
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
372
373
|
const df: DG.DataFrame = await readDf();
|
|
373
374
|
const col: DG.Column = df.col(colName)!;
|
|
374
|
-
const semType: string = await grok.functions
|
|
375
|
+
const semType: string = await grok.functions
|
|
376
|
+
.call('Bio:detectMacromolecule', {col: col}) as unknown as string;
|
|
375
377
|
if (semType)
|
|
376
378
|
col.semType = semType;
|
|
377
379
|
|
|
@@ -384,18 +386,31 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
|
384
386
|
}
|
|
385
387
|
}
|
|
386
388
|
|
|
387
|
-
export async function _testPos(
|
|
389
|
+
export async function _testPos(
|
|
390
|
+
readDf: DfReaderFunc, colName: string, units: string,
|
|
391
|
+
aligned: string | null, alphabet: string | null, alphabetSize: number, alphabetIsMultichar: boolean,
|
|
392
|
+
separator: string | null = null
|
|
393
|
+
) {
|
|
388
394
|
const df: DG.DataFrame = await readDf();
|
|
389
395
|
const col: DG.Column = df.col(colName)!;
|
|
390
|
-
const semType: string = await grok.functions
|
|
396
|
+
const semType: string = await grok.functions
|
|
397
|
+
.call('Bio:detectMacromolecule', {col: col}) as unknown as string;
|
|
391
398
|
if (semType)
|
|
392
399
|
col.semType = semType;
|
|
393
400
|
|
|
394
401
|
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
395
402
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
396
|
-
expect(col.getTag(
|
|
397
|
-
expect(col.getTag(
|
|
403
|
+
expect(col.getTag(UnitsHandler.TAGS.aligned), aligned);
|
|
404
|
+
expect(col.getTag(UnitsHandler.TAGS.alphabet), alphabet);
|
|
398
405
|
if (separator)
|
|
399
|
-
expect(col.getTag(
|
|
406
|
+
expect(col.getTag(UnitsHandler.TAGS.separator), separator);
|
|
407
|
+
|
|
408
|
+
const uh = new UnitsHandler(col);
|
|
409
|
+
expect(uh.getAlphabetSize(), alphabetSize);
|
|
410
|
+
expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
|
|
411
|
+
if (!uh.isHelm()) {
|
|
412
|
+
expect(uh.aligned, aligned);
|
|
413
|
+
expect(uh.alphabet, alphabet);
|
|
414
|
+
}
|
|
400
415
|
}
|
|
401
416
|
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -49,7 +49,8 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
|
|
|
49
49
|
const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
|
|
50
50
|
|
|
51
51
|
const srcCol: DG.Column = srcDf.getCol('seq')!;
|
|
52
|
-
const semType: string = await grok.functions
|
|
52
|
+
const semType: string = await grok.functions
|
|
53
|
+
.call('Bio:detectMacromolecule', {col: srcCol}) as unknown as string;
|
|
53
54
|
if (semType)
|
|
54
55
|
srcCol.semType = semType;
|
|
55
56
|
|
|
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
7
|
import {convertDo} from '../utils/convert';
|
|
8
|
-
import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
|
+
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
9
|
|
|
10
10
|
category('renderers', () => {
|
|
11
11
|
let tvList: DG.TableView[];
|
|
@@ -42,21 +42,21 @@ category('renderers', () => {
|
|
|
42
42
|
|
|
43
43
|
console.log('Bio: tests/renderers/afterMsa, src before test ' +
|
|
44
44
|
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
|
|
45
|
-
`cell.renderer="${srcSeqCol!.getTag(
|
|
45
|
+
`cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
|
|
46
46
|
expect(srcSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
47
|
-
expect(srcSeqCol!.getTag(DG.TAGS.UNITS),
|
|
48
|
-
expect(srcSeqCol!.getTag(
|
|
49
|
-
expect(srcSeqCol!.getTag(
|
|
50
|
-
expect(srcSeqCol!.getTag(
|
|
47
|
+
expect(srcSeqCol!.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
48
|
+
expect(srcSeqCol!.getTag(UnitsHandler.TAGS.aligned), 'SEQ');
|
|
49
|
+
expect(srcSeqCol!.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
|
|
50
|
+
expect(srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
51
51
|
|
|
52
52
|
const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
|
|
53
53
|
tv.grid.invalidate();
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
56
|
-
expect(msaSeqCol!.getTag(DG.TAGS.UNITS),
|
|
57
|
-
expect(msaSeqCol!.getTag(
|
|
58
|
-
expect(msaSeqCol!.getTag(
|
|
59
|
-
expect(msaSeqCol!.getTag(
|
|
56
|
+
expect(msaSeqCol!.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
57
|
+
expect(msaSeqCol!.getTag(UnitsHandler.TAGS.aligned), 'SEQ.MSA');
|
|
58
|
+
expect(msaSeqCol!.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
|
|
59
|
+
expect(msaSeqCol!.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
60
60
|
|
|
61
61
|
dfList.push(df);
|
|
62
62
|
tvList.push(tv);
|
|
@@ -70,7 +70,7 @@ category('renderers', () => {
|
|
|
70
70
|
|
|
71
71
|
const srcCol: DG.Column = df.col('sequence')!;
|
|
72
72
|
const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
|
|
73
|
-
expect(tgtCol.getTag(
|
|
73
|
+
expect(tgtCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
74
74
|
|
|
75
75
|
tvList.push(tv);
|
|
76
76
|
dfList.push(df);
|