@datagrok/bio 1.7.23 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +7 -4
- package/dist/package-test.js +448 -286
- package/dist/package.js +276 -200
- package/files/samples/sample_HELM_empty_vals.csv +541 -0
- package/package.json +10 -7
- package/{setup-unlink-clean → setup-unlink-clean.sh} +0 -0
- package/{setup → setup.sh} +4 -1
- package/src/__jest__/remote.test.ts +4 -4
- package/src/__jest__/test-node.ts +3 -2
- package/src/package-test.ts +0 -1
- package/src/package.ts +42 -47
- package/src/tests/WebLogo-positions-test.ts +43 -21
- package/src/tests/activity-cliffs-tests.ts +15 -34
- package/src/tests/activity-cliffs-utils.ts +19 -0
- package/src/tests/detectors-test.ts +21 -18
- package/src/tests/renderers-test.ts +12 -7
- package/src/tests/sequence-space-test.ts +17 -12
- package/src/tests/sequence-space-utils.ts +10 -0
- package/src/utils/cell-renderer.ts +124 -134
- package/src/utils/convert.ts +12 -2
- package/src/utils/multiple-sequence-alignment.ts +8 -2
- package/src/utils/sequence-activity-cliffs.ts +1 -1
- package/src/viewers/vd-regions-viewer.ts +7 -0
- package/test-Bio-dc07f068a0b2-cdad4cfb.html +358 -0
- package/test-Bio-4f0c8bae6479-5b129baa.html +0 -358
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
import * as utils from './test-node';
|
|
6
6
|
import puppeteer from 'puppeteer';
|
|
7
7
|
|
|
8
|
-
const P_START_TIMEOUT: number =
|
|
8
|
+
const P_START_TIMEOUT: number = 3600000;
|
|
9
9
|
let browser: puppeteer.Browser;
|
|
10
10
|
let page: puppeteer.Page;
|
|
11
11
|
|
|
@@ -51,10 +51,10 @@ it('TEST', async () => {
|
|
|
51
51
|
let failReport = '';
|
|
52
52
|
for (let i = 0; i < df.rowCount; i++) {
|
|
53
53
|
if (cStatus.get(i)) {
|
|
54
|
-
passReport += `Test result : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
54
|
+
passReport += `Test result : Success : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
55
55
|
} else {
|
|
56
56
|
failed = true;
|
|
57
|
-
failReport += `Test result : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
57
|
+
failReport += `Test result : Failed : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
58
58
|
}
|
|
59
59
|
}
|
|
60
60
|
resolve({failReport, passReport, failed});
|
|
@@ -65,4 +65,4 @@ it('TEST', async () => {
|
|
|
65
65
|
console.log(r.passReport);
|
|
66
66
|
// @ts-ignore
|
|
67
67
|
expect(r.failed).checkOutput(false, r.failReport);
|
|
68
|
-
},
|
|
68
|
+
}, 3600000);
|
|
@@ -68,6 +68,7 @@ export async function getBrowserPage(puppeteer: any): Promise<{ browser: any, pa
|
|
|
68
68
|
});
|
|
69
69
|
|
|
70
70
|
const page = await browser.newPage();
|
|
71
|
+
await page.setDefaultNavigationTimeout(0);
|
|
71
72
|
await page.goto(`${url}/oauth/`);
|
|
72
73
|
await page.setCookie({name: 'auth', value: token});
|
|
73
74
|
await page.evaluate((token: any) => {
|
|
@@ -75,8 +76,8 @@ export async function getBrowserPage(puppeteer: any): Promise<{ browser: any, pa
|
|
|
75
76
|
}, token);
|
|
76
77
|
await page.goto(url);
|
|
77
78
|
try {
|
|
78
|
-
await page.waitForSelector('.grok-preloader');
|
|
79
|
-
await page.waitForFunction(() => document.querySelector('.grok-preloader') == null, {timeout:
|
|
79
|
+
await page.waitForSelector('.grok-preloader', { timeout: 1800000 });
|
|
80
|
+
await page.waitForFunction(() => document.querySelector('.grok-preloader') == null, {timeout: 3600000});
|
|
80
81
|
} catch (error) {
|
|
81
82
|
throw error;
|
|
82
83
|
}
|
package/src/package-test.ts
CHANGED
|
@@ -22,7 +22,6 @@ export {tests};
|
|
|
22
22
|
//input: string category {optional: true}
|
|
23
23
|
//input: string test {optional: true}
|
|
24
24
|
//output: dataframe result
|
|
25
|
-
//top-menu: Tools | Dev | JS API Tests
|
|
26
25
|
export async function test(category: string, test: string): Promise<DG.DataFrame> {
|
|
27
26
|
const data = await runTests({category, test});
|
|
28
27
|
return DG.DataFrame.fromObjects(data)!;
|
package/src/package.ts
CHANGED
|
@@ -20,51 +20,46 @@ import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CO
|
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
22
|
import {convert} from './utils/convert';
|
|
23
|
-
import {lru} from './utils/cell-renderer';
|
|
24
23
|
import {representationsWidget} from './widgets/representations';
|
|
25
24
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
26
25
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
26
|
+
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils'
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
//tags: init
|
|
30
|
-
export async function initBio()
|
|
31
|
-
// apparently HELMWebEditor requires dojo to be initialized first
|
|
32
|
-
const funcList: DG.Func[] = DG.Func.find({package: 'Helm', name: 'initHelm'});
|
|
33
|
-
console.debug(`Bio: initBio() funcList.length = ${funcList.length}`);
|
|
34
|
-
if (funcList.length === 1)
|
|
35
|
-
await grok.functions.call('Helm:initHelp');
|
|
36
|
-
|
|
37
|
-
return new Promise((resolve, reject) => {
|
|
38
|
-
// @ts-ignore
|
|
39
|
-
dojo.ready(function() { resolve(null); });
|
|
40
|
-
});
|
|
30
|
+
export async function initBio() {
|
|
41
31
|
}
|
|
42
32
|
|
|
43
|
-
//name:
|
|
44
|
-
//
|
|
45
|
-
|
|
46
|
-
|
|
33
|
+
//name: fastaSequenceCellRenderer
|
|
34
|
+
//tags: cellRenderer
|
|
35
|
+
//meta.cellType: Sequence
|
|
36
|
+
//meta.columnTags: units=fasta
|
|
37
|
+
//output: grid_cell_renderer result
|
|
38
|
+
export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
39
|
+
return new MacromoleculeSequenceCellRenderer();
|
|
47
40
|
}
|
|
48
41
|
|
|
49
|
-
//name:
|
|
42
|
+
//name: separatorSequenceCellRenderer
|
|
50
43
|
//tags: cellRenderer
|
|
51
|
-
//meta.cellType:
|
|
44
|
+
//meta.cellType: Sequence
|
|
45
|
+
//meta.columnTags: units=separator
|
|
52
46
|
//output: grid_cell_renderer result
|
|
53
|
-
export function
|
|
47
|
+
export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
54
48
|
return new MacromoleculeSequenceCellRenderer();
|
|
55
49
|
}
|
|
56
50
|
|
|
57
51
|
function checkInputColumn(col: DG.Column, name: string,
|
|
58
52
|
allowedNotations: string[] = [], allowedAlphabets: string[] = []): boolean {
|
|
59
|
-
const
|
|
53
|
+
const notation: string = col.getTag(DG.TAGS.UNITS);
|
|
54
|
+
const alphabet: string = col.getTag('alphabet')
|
|
60
55
|
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
61
56
|
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
62
57
|
return false;
|
|
63
58
|
} else if (
|
|
64
59
|
(allowedAlphabets.length > 0 &&
|
|
65
|
-
!allowedAlphabets.some((a) =>
|
|
60
|
+
!allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))) ||
|
|
66
61
|
(allowedNotations.length > 0 &&
|
|
67
|
-
!allowedNotations.some((n) =>
|
|
62
|
+
!allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase())))
|
|
68
63
|
) {
|
|
69
64
|
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
70
65
|
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
@@ -117,7 +112,7 @@ export function vdRegionViewer() {
|
|
|
117
112
|
//input: double similarity = 80 [Similarity cutoff]
|
|
118
113
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
119
114
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
120
|
-
similarity: number, methodName: string): Promise<
|
|
115
|
+
similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
|
|
121
116
|
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
122
117
|
return;
|
|
123
118
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
@@ -127,8 +122,13 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
127
122
|
const options = {
|
|
128
123
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
129
124
|
};
|
|
130
|
-
const
|
|
131
|
-
|
|
125
|
+
const tags = {
|
|
126
|
+
'units': macroMolecule.tags['units'],
|
|
127
|
+
'aligned': macroMolecule.tags['aligned'],
|
|
128
|
+
'separator': macroMolecule.tags['separator'],
|
|
129
|
+
'alphabet': macroMolecule.tags['alphabet'],
|
|
130
|
+
}
|
|
131
|
+
const sp = await getActivityCliffs(
|
|
132
132
|
df,
|
|
133
133
|
macroMolecule,
|
|
134
134
|
encodedCol,
|
|
@@ -139,11 +139,12 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
139
139
|
'Levenshtein',
|
|
140
140
|
methodName,
|
|
141
141
|
DG.SEMTYPE.MACROMOLECULE,
|
|
142
|
-
|
|
142
|
+
tags,
|
|
143
143
|
sequenceSpace,
|
|
144
144
|
sequenceGetSimilarities,
|
|
145
145
|
drawTooltip,
|
|
146
146
|
(options as any)[methodName]);
|
|
147
|
+
return sp;
|
|
147
148
|
}
|
|
148
149
|
|
|
149
150
|
//top-menu: Bio | Sequence Space...
|
|
@@ -154,29 +155,37 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
154
155
|
//input: string similarityMetric { choices:["Levenshtein", "Tanimoto"] }
|
|
155
156
|
//input: bool plotEmbeddings = true
|
|
156
157
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
157
|
-
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<
|
|
158
|
+
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer|undefined> {
|
|
158
159
|
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
159
160
|
return;
|
|
160
161
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
161
162
|
if (!encodedCol)
|
|
162
163
|
return;
|
|
163
164
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
165
|
+
const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
|
|
166
|
+
const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, encodedCol);
|
|
167
|
+
|
|
164
168
|
const chemSpaceParams = {
|
|
165
|
-
seqCol:
|
|
169
|
+
seqCol: withoutEmptyValues.col(macroMolecule.name)!,
|
|
166
170
|
methodName: methodName,
|
|
167
171
|
similarityMetric: similarityMetric,
|
|
168
172
|
embedAxesNames: embedColsNames
|
|
169
173
|
};
|
|
170
174
|
const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
|
|
171
175
|
const embeddings = sequenceSpaceRes.coordinates;
|
|
172
|
-
for (const col of embeddings)
|
|
173
|
-
|
|
176
|
+
for (const col of embeddings) {
|
|
177
|
+
const listValues = col.toList();
|
|
178
|
+
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
179
|
+
table.columns.add(DG.Column.fromList('double', col.name, listValues));
|
|
180
|
+
}
|
|
181
|
+
let sp;
|
|
174
182
|
if (plotEmbeddings) {
|
|
175
183
|
for (const v of grok.shell.views) {
|
|
176
184
|
if (v.name === table.name)
|
|
177
|
-
(v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
185
|
+
sp = (v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
178
186
|
}
|
|
179
187
|
}
|
|
188
|
+
return sp;
|
|
180
189
|
};
|
|
181
190
|
|
|
182
191
|
//top-menu: Bio | To Atomic Level...
|
|
@@ -192,22 +201,6 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
192
201
|
if (!checkInputColumn(macroMolecule, 'To Atomic Level'))
|
|
193
202
|
return;
|
|
194
203
|
|
|
195
|
-
let currentView: DG.TableView;
|
|
196
|
-
for (const view of grok.shell.tableViews) {
|
|
197
|
-
if (df.name === view.name)
|
|
198
|
-
currentView = view;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
// Some hack to activate Chem Molecule rendering
|
|
202
|
-
const file2 = await _package.files.readAsText('tests/sar-small.csv');
|
|
203
|
-
const df2 = DG.DataFrame.fromCsv(file2);
|
|
204
|
-
const v2 = grok.shell.addTableView(df2);
|
|
205
|
-
setTimeout(() => {
|
|
206
|
-
grok.shell.closeTable(df2);
|
|
207
|
-
v2.close();
|
|
208
|
-
grok.shell.v = currentView;
|
|
209
|
-
}, 100);
|
|
210
|
-
|
|
211
204
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
212
205
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
213
206
|
const atomicCodes = getMolfilesFromSeq(macroMolecule, monomersLibObject);
|
|
@@ -217,6 +210,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
217
210
|
col.semType = DG.SEMTYPE.MOLECULE;
|
|
218
211
|
col.tags[DG.TAGS.UNITS] = 'molblock';
|
|
219
212
|
df.columns.add(col, true);
|
|
213
|
+
await grok.data.detectSemanticTypes(df);
|
|
220
214
|
}
|
|
221
215
|
|
|
222
216
|
|
|
@@ -408,3 +402,4 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
408
402
|
resDf.name = `datasets_detectMacromolecule_${path}`;
|
|
409
403
|
return resDf;
|
|
410
404
|
}
|
|
405
|
+
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
|
|
2
1
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
3
2
|
|
|
4
3
|
import * as grok from 'datagrok-api/grok';
|
|
5
4
|
import * as ui from 'datagrok-api/ui';
|
|
6
5
|
import * as DG from 'datagrok-api/dg';
|
|
7
6
|
import {PositionInfo, PositionMonomerInfo, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
+
import {Column} from 'datagrok-api/dg';
|
|
8
|
+
|
|
8
9
|
category('WebLogo-positions', () => {
|
|
9
10
|
let tvList: DG.TableView[];
|
|
10
11
|
let dfList: DG.DataFrame[];
|
|
@@ -18,8 +19,6 @@ category('WebLogo-positions', () => {
|
|
|
18
19
|
-TC-GCTTGC--`;
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
const resShrinkEmptyTailDf1: PositionInfo[] = [];
|
|
22
|
-
|
|
23
22
|
before(async () => {
|
|
24
23
|
tvList = [];
|
|
25
24
|
dfList = [];
|
|
@@ -35,6 +34,9 @@ category('WebLogo-positions', () => {
|
|
|
35
34
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
36
35
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
37
36
|
|
|
37
|
+
df.getCol('seq').semType = 'Macromolecule';
|
|
38
|
+
df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
|
|
39
|
+
|
|
38
40
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
|
|
39
41
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
40
42
|
|
|
@@ -57,9 +59,9 @@ category('WebLogo-positions', () => {
|
|
|
57
59
|
new PositionInfo('11', {'-': new PositionMonomerInfo(5)}),
|
|
58
60
|
new PositionInfo('12', {'-': new PositionMonomerInfo(5)})
|
|
59
61
|
];
|
|
60
|
-
|
|
61
|
-
expect(positions.length,resAllDf1.length);
|
|
62
|
-
|
|
62
|
+
|
|
63
|
+
expect(positions.length, resAllDf1.length);
|
|
64
|
+
|
|
63
65
|
for (let i = 0; i < positions.length; i++) {
|
|
64
66
|
expect(positions[i].name, resAllDf1[i].name);
|
|
65
67
|
for (const key in positions[i].freq) {
|
|
@@ -68,11 +70,27 @@ category('WebLogo-positions', () => {
|
|
|
68
70
|
}
|
|
69
71
|
|
|
70
72
|
});
|
|
71
|
-
test('positions with shrinkEmptyTail option', async () => {
|
|
72
|
-
|
|
73
|
+
test('positions with shrinkEmptyTail option true (filterd)', async () => {
|
|
74
|
+
let csvDf2 = `seq
|
|
75
|
+
-TC-G-TTGC--
|
|
76
|
+
-TC-GCTTGC--
|
|
77
|
+
-T--C-GT-
|
|
78
|
+
-T--C-GT-
|
|
79
|
+
-T--C-GT-
|
|
80
|
+
-T--CCGT-`;
|
|
81
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf2);
|
|
73
82
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
74
83
|
|
|
84
|
+
df.getCol('seq').semType = 'Macromolecule';
|
|
85
|
+
df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
|
|
86
|
+
|
|
87
|
+
let seq: Column = df.getCol('seq');
|
|
88
|
+
df.filter.init((i) => {
|
|
89
|
+
return i > 2;
|
|
90
|
+
});
|
|
91
|
+
df.filter.fireChanged();
|
|
75
92
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'shrinkEmptyTail': true}) as unknown as WebLogo;
|
|
93
|
+
|
|
76
94
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
77
95
|
|
|
78
96
|
tvList.push(tv);
|
|
@@ -81,19 +99,19 @@ category('WebLogo-positions', () => {
|
|
|
81
99
|
const positions: PositionInfo[] = wlViewer['positions'];
|
|
82
100
|
|
|
83
101
|
const resAllDf1: PositionInfo[] = [
|
|
84
|
-
new PositionInfo('1', {'
|
|
85
|
-
new PositionInfo('2', {'T': new PositionMonomerInfo(
|
|
86
|
-
new PositionInfo('3', {'
|
|
87
|
-
new PositionInfo('4', {'-': new PositionMonomerInfo(
|
|
88
|
-
new PositionInfo('5', {'
|
|
89
|
-
new PositionInfo('6', {'-': new PositionMonomerInfo(
|
|
90
|
-
new PositionInfo('7', {'
|
|
91
|
-
new PositionInfo('8', {'T': new PositionMonomerInfo(
|
|
92
|
-
new PositionInfo('9', {'
|
|
93
|
-
new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
|
|
102
|
+
new PositionInfo('1', {'-': new PositionMonomerInfo(3)}),
|
|
103
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(3)}),
|
|
104
|
+
new PositionInfo('3', {'-': new PositionMonomerInfo(3)}),
|
|
105
|
+
new PositionInfo('4', {'-': new PositionMonomerInfo(3)}),
|
|
106
|
+
new PositionInfo('5', {'C': new PositionMonomerInfo(3)}),
|
|
107
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(2), 'C': new PositionMonomerInfo(1)}),
|
|
108
|
+
new PositionInfo('7', {'G': new PositionMonomerInfo(3)}),
|
|
109
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(3)}),
|
|
110
|
+
new PositionInfo('9', {'-': new PositionMonomerInfo(3)}),
|
|
94
111
|
];
|
|
95
112
|
|
|
96
|
-
|
|
113
|
+
expect(positions.length, resAllDf1.length);
|
|
114
|
+
|
|
97
115
|
for (let i = 0; i < positions.length; i++) {
|
|
98
116
|
expect(positions[i].name, resAllDf1[i].name);
|
|
99
117
|
for (const key in positions[i].freq) {
|
|
@@ -107,7 +125,10 @@ category('WebLogo-positions', () => {
|
|
|
107
125
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
108
126
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
109
127
|
|
|
110
|
-
|
|
128
|
+
df.getCol('seq').semType = 'Macromolecule';
|
|
129
|
+
df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
|
|
130
|
+
|
|
131
|
+
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
|
|
111
132
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
112
133
|
|
|
113
134
|
tvList.push(tv);
|
|
@@ -126,7 +147,8 @@ category('WebLogo-positions', () => {
|
|
|
126
147
|
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
127
148
|
new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
|
|
128
149
|
];
|
|
129
|
-
|
|
150
|
+
|
|
151
|
+
expect(positions.length, resAllDf1.length);
|
|
130
152
|
|
|
131
153
|
for (let i = 0; i < positions.length; i++) {
|
|
132
154
|
expect(positions[i].name, resAllDf1[i].name);
|
|
@@ -1,58 +1,39 @@
|
|
|
1
|
-
import {after, before, category,
|
|
1
|
+
import {after, before, category, test} from '@datagrok-libraries/utils/src/test';
|
|
2
2
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
import * as grok from 'datagrok-api/grok';
|
|
5
5
|
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
|
-
import {
|
|
8
|
-
import {drawTooltip, sequenceGetSimilarities} from '../utils/sequence-activity-cliffs';
|
|
9
|
-
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
10
|
-
import {encodeMonomers} from '../utils/utils';
|
|
7
|
+
import { _testActivityCliffsOpen } from './activity-cliffs-utils';
|
|
11
8
|
|
|
12
9
|
|
|
13
10
|
category('activityCliffs', async () => {
|
|
14
11
|
let actCliffsTableView: DG.TableView;
|
|
15
12
|
let actCliffsDf: DG.DataFrame;
|
|
13
|
+
let actCliffsTableViewWithEmptyRows: DG.TableView;
|
|
14
|
+
let actCliffsDfWithEmptyRows: DG.DataFrame;
|
|
15
|
+
|
|
16
16
|
|
|
17
17
|
before(async () => {
|
|
18
18
|
actCliffsDf = await readDataframe('samples/sample_MSA.csv');
|
|
19
19
|
actCliffsTableView = grok.shell.addTableView(actCliffsDf);
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
actCliffsDfWithEmptyRows = await readDataframe('samples/sample_HELM_empty_vals.csv');
|
|
21
|
+
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
22
22
|
});
|
|
23
23
|
|
|
24
24
|
after(async () => {
|
|
25
25
|
grok.shell.closeTable(actCliffsDf);
|
|
26
26
|
actCliffsTableView.close();
|
|
27
|
+
grok.shell.closeTable(actCliffsDfWithEmptyRows);
|
|
28
|
+
actCliffsTableViewWithEmptyRows.close();
|
|
27
29
|
});
|
|
28
30
|
|
|
29
31
|
test('activityCliffsOpen', async () => {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
const encodedCol = encodeMonomers(actCliffsDf.col('MSA')!) as DG.Column;
|
|
36
|
-
const scatterPlot = await getActivityCliffs(
|
|
37
|
-
actCliffsDf,
|
|
38
|
-
actCliffsDf.col('MSA')!,
|
|
39
|
-
encodedCol,
|
|
40
|
-
axesNames,
|
|
41
|
-
'Activity cliffs',
|
|
42
|
-
actCliffsDf.col('Activity')!,
|
|
43
|
-
50,
|
|
44
|
-
'Levenshtein',
|
|
45
|
-
't-SNE',
|
|
46
|
-
DG.SEMTYPE.MACROMOLECULE,
|
|
47
|
-
units,
|
|
48
|
-
sequenceSpace,
|
|
49
|
-
sequenceGetSimilarities,
|
|
50
|
-
drawTooltip);
|
|
51
|
-
|
|
52
|
-
expect(scatterPlot != null, true);
|
|
53
|
-
|
|
54
|
-
const cliffsLink = (Array.from(scatterPlot.root.children) as Element[])
|
|
55
|
-
.filter((it) => it.className === 'ui-btn ui-btn-ok');
|
|
56
|
-
expect((cliffsLink[0] as HTMLElement).innerText, '2362 cliffs');
|
|
32
|
+
await _testActivityCliffsOpen(actCliffsDf, 53, 'UMAP', 'MSA');
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test('activityCliffsOpenWithEmptyRows', async () => {
|
|
36
|
+
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 53, 'UMAP', 'HELM');
|
|
57
37
|
});
|
|
38
|
+
|
|
58
39
|
});
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import {delay, expect} from '@datagrok-libraries/utils/src/test';
|
|
3
|
+
import {_package} from '../package-test';
|
|
4
|
+
import { activityCliffs } from '../package';
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: string, colName: string) {
|
|
8
|
+
const scatterPlot = await activityCliffs(
|
|
9
|
+
df,
|
|
10
|
+
df.col(colName)!,
|
|
11
|
+
df.col('Activity')!,
|
|
12
|
+
80,
|
|
13
|
+
method);
|
|
14
|
+
|
|
15
|
+
expect(scatterPlot != null, true);
|
|
16
|
+
|
|
17
|
+
const cliffsLink = Array.from(scatterPlot!.root.children).filter(it => it.className === 'ui-btn ui-btn-ok');
|
|
18
|
+
expect((cliffsLink[0] as HTMLElement).innerText, `${numberCliffs} cliffs`);
|
|
19
|
+
}
|
|
@@ -190,6 +190,7 @@ MWRSWY-CKHP
|
|
|
190
190
|
};
|
|
191
191
|
};
|
|
192
192
|
|
|
193
|
+
|
|
193
194
|
test('NegativeEmpty', async () => { await _testNeg(readCsv('csvDfEmpty', csvDfEmpty), 'col1'); });
|
|
194
195
|
test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
|
|
195
196
|
test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
|
|
@@ -197,44 +198,44 @@ MWRSWY-CKHP
|
|
|
197
198
|
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
198
199
|
|
|
199
200
|
test('Dna1', async () => {
|
|
200
|
-
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta
|
|
201
|
+
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA');
|
|
201
202
|
});
|
|
202
203
|
test('Rna1', async () => {
|
|
203
|
-
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta
|
|
204
|
+
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA');
|
|
204
205
|
});
|
|
205
206
|
test('AA1', async () => {
|
|
206
|
-
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta
|
|
207
|
+
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT');
|
|
207
208
|
});
|
|
208
209
|
test('MsaDna1', async () => {
|
|
209
|
-
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta
|
|
210
|
+
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA');
|
|
210
211
|
});
|
|
211
212
|
|
|
212
213
|
test('MsaAA1', async () => {
|
|
213
|
-
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta
|
|
214
|
+
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT');
|
|
214
215
|
});
|
|
215
216
|
|
|
216
217
|
test('SepDna', async () => {
|
|
217
|
-
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator
|
|
218
|
+
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', '*');
|
|
218
219
|
});
|
|
219
220
|
test('SepRna', async () => {
|
|
220
|
-
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator
|
|
221
|
+
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', '*');
|
|
221
222
|
});
|
|
222
223
|
test('SepPt', async () => {
|
|
223
|
-
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator
|
|
224
|
+
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', '-');
|
|
224
225
|
});
|
|
225
226
|
test('SepUn1', async () => {
|
|
226
|
-
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator
|
|
227
|
+
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', '-');
|
|
227
228
|
});
|
|
228
229
|
test('SepUn2', async () => {
|
|
229
|
-
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator
|
|
230
|
+
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', '/');
|
|
230
231
|
});
|
|
231
232
|
|
|
232
233
|
test('SepMsaN1', async () => {
|
|
233
|
-
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator
|
|
234
|
+
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', '-');
|
|
234
235
|
});
|
|
235
236
|
|
|
236
237
|
test('SamplesFastaCsvPt', async () => {
|
|
237
|
-
await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta
|
|
238
|
+
await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
238
239
|
});
|
|
239
240
|
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
240
241
|
await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
|
|
@@ -247,7 +248,7 @@ MWRSWY-CKHP
|
|
|
247
248
|
});
|
|
248
249
|
|
|
249
250
|
test('SamplesFastaFastaPt', async () => {
|
|
250
|
-
await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta
|
|
251
|
+
await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
251
252
|
});
|
|
252
253
|
|
|
253
254
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
@@ -265,7 +266,7 @@ MWRSWY-CKHP
|
|
|
265
266
|
});
|
|
266
267
|
|
|
267
268
|
test('samplesMsaComplexUn', async () => {
|
|
268
|
-
await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator
|
|
269
|
+
await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', '/');
|
|
269
270
|
});
|
|
270
271
|
test('samplesMsaComplexNegativeActivity', async () => {
|
|
271
272
|
await _testNeg(readSamples(Samples.msaComplex), 'Activity');
|
|
@@ -280,7 +281,7 @@ MWRSWY-CKHP
|
|
|
280
281
|
});
|
|
281
282
|
|
|
282
283
|
test('samplesHelmCsvHELM', async () => {
|
|
283
|
-
await _testPos(readSamples(Samples.helmCsv), 'HELM', 'HELM', null);
|
|
284
|
+
await _testPos(readSamples(Samples.helmCsv), 'HELM', 'HELM', null, null, null);
|
|
284
285
|
});
|
|
285
286
|
|
|
286
287
|
test('samplesHelmCsvNegativeActivity', async () => {
|
|
@@ -296,7 +297,7 @@ MWRSWY-CKHP
|
|
|
296
297
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
297
298
|
});
|
|
298
299
|
test('samplesTestHelmPositiveHelmString', async () => {
|
|
299
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'HELM');
|
|
300
|
+
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'HELM', null, null, null);
|
|
300
301
|
});
|
|
301
302
|
test('samplesTestHelmNegativeValid', async () => {
|
|
302
303
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
@@ -328,7 +329,7 @@ MWRSWY-CKHP
|
|
|
328
329
|
});
|
|
329
330
|
|
|
330
331
|
test('samplesFastaPtPosSequence', async () => {
|
|
331
|
-
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta
|
|
332
|
+
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
332
333
|
});
|
|
333
334
|
|
|
334
335
|
test('samplesTestCerealNegativeCerealName', async () => {
|
|
@@ -383,7 +384,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
|
383
384
|
}
|
|
384
385
|
}
|
|
385
386
|
|
|
386
|
-
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string | null = null) {
|
|
387
|
+
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, aligned: string | null, alphabet: string | null, separator: string | null = null) {
|
|
387
388
|
const df: DG.DataFrame = await readDf();
|
|
388
389
|
const col: DG.Column = df.col(colName)!;
|
|
389
390
|
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
@@ -392,6 +393,8 @@ export async function _testPos(readDf: DfReaderFunc, colName: string, units: str
|
|
|
392
393
|
|
|
393
394
|
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
394
395
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
396
|
+
expect(col.getTag('aligned'), aligned);
|
|
397
|
+
expect(col.getTag('alphabet'), alphabet);
|
|
395
398
|
if (separator)
|
|
396
399
|
expect(col.getTag('separator'), separator);
|
|
397
400
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {after, before, category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
1
|
+
import {after, before, category, delay, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
2
2
|
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
@@ -12,6 +12,7 @@ category('renderers', () => {
|
|
|
12
12
|
let dfList: DG.DataFrame[];
|
|
13
13
|
|
|
14
14
|
before(async () => {
|
|
15
|
+
await grok.functions.call('Bio:initBio');
|
|
15
16
|
tvList = [];
|
|
16
17
|
dfList = [];
|
|
17
18
|
});
|
|
@@ -43,15 +44,19 @@ category('renderers', () => {
|
|
|
43
44
|
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
|
|
44
45
|
`cell.renderer="${srcSeqCol!.getTag('cell.renderer')}"`);
|
|
45
46
|
expect(srcSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
46
|
-
expect(srcSeqCol!.getTag(DG.TAGS.UNITS), 'fasta
|
|
47
|
-
expect(srcSeqCol!.getTag('
|
|
47
|
+
expect(srcSeqCol!.getTag(DG.TAGS.UNITS), 'fasta');
|
|
48
|
+
expect(srcSeqCol!.getTag('aligned'), 'SEQ');
|
|
49
|
+
expect(srcSeqCol!.getTag('alphabet'), 'PT');
|
|
50
|
+
expect(srcSeqCol!.getTag('cell.renderer'), 'sequence');
|
|
48
51
|
|
|
49
52
|
const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
|
|
50
53
|
tv.grid.invalidate();
|
|
51
|
-
|
|
54
|
+
|
|
52
55
|
expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
53
|
-
expect(msaSeqCol!.getTag(DG.TAGS.UNITS), 'fasta
|
|
54
|
-
expect(msaSeqCol!.getTag('
|
|
56
|
+
expect(msaSeqCol!.getTag(DG.TAGS.UNITS), 'fasta');
|
|
57
|
+
expect(msaSeqCol!.getTag('aligned'), 'SEQ.MSA');
|
|
58
|
+
expect(msaSeqCol!.getTag('alphabet'), 'PT');
|
|
59
|
+
expect(msaSeqCol!.getTag('cell.renderer'), 'sequence');
|
|
55
60
|
|
|
56
61
|
dfList.push(df);
|
|
57
62
|
tvList.push(tv);
|
|
@@ -65,7 +70,7 @@ category('renderers', () => {
|
|
|
65
70
|
|
|
66
71
|
const srcCol: DG.Column = df.col('sequence')!;
|
|
67
72
|
const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
|
|
68
|
-
expect(tgtCol.getTag('cell.renderer'), '
|
|
73
|
+
expect(tgtCol.getTag('cell.renderer'), 'sequence');
|
|
69
74
|
|
|
70
75
|
tvList.push(tv);
|
|
71
76
|
dfList.push(df);
|